aboutsummaryrefslogtreecommitdiffstats
path: root/src/vnet/ip
diff options
context:
space:
mode:
authorDamjan Marion <damarion@cisco.com>2016-12-19 23:05:39 +0100
committerDamjan Marion <damarion@cisco.com>2016-12-28 12:25:14 +0100
commit7cd468a3d7dee7d6c92f69a0bb7061ae208ec727 (patch)
tree5de62f8dbd3a752f5a676ca600e43d2652d1ff1a /src/vnet/ip
parent696f1adec0df3b8f161862566dd9c86174302658 (diff)
Reorganize source tree to use single autotools instance
Change-Id: I7b51f88292e057c6443b12224486f2d0c9f8ae23 Signed-off-by: Damjan Marion <damarion@cisco.com>
Diffstat (limited to 'src/vnet/ip')
-rw-r--r--src/vnet/ip/dir.dox26
-rw-r--r--src/vnet/ip/format.c121
-rw-r--r--src/vnet/ip/format.h114
-rw-r--r--src/vnet/ip/icmp4.c784
-rw-r--r--src/vnet/ip/icmp4.h60
-rw-r--r--src/vnet/ip/icmp46_packet.h398
-rw-r--r--src/vnet/ip/icmp6.c882
-rw-r--r--src/vnet/ip/icmp6.h86
-rw-r--r--src/vnet/ip/igmp_packet.h155
-rw-r--r--src/vnet/ip/ip.api434
-rw-r--r--src/vnet/ip/ip.h195
-rw-r--r--src/vnet/ip/ip4.h322
-rw-r--r--src/vnet/ip/ip46_cli.c236
-rw-r--r--src/vnet/ip/ip4_error.h95
-rw-r--r--src/vnet/ip/ip4_format.c256
-rw-r--r--src/vnet/ip/ip4_forward.c3345
-rw-r--r--src/vnet/ip/ip4_input.c507
-rw-r--r--src/vnet/ip/ip4_mtrie.c568
-rw-r--r--src/vnet/ip/ip4_mtrie.h188
-rw-r--r--src/vnet/ip/ip4_packet.h384
-rw-r--r--src/vnet/ip/ip4_pg.c387
-rw-r--r--src/vnet/ip/ip4_source_and_port_range_check.c1415
-rw-r--r--src/vnet/ip/ip4_source_check.c573
-rw-r--r--src/vnet/ip/ip4_test.c340
-rw-r--r--src/vnet/ip/ip6.h476
-rw-r--r--src/vnet/ip/ip6_error.h92
-rw-r--r--src/vnet/ip/ip6_format.c383
-rw-r--r--src/vnet/ip/ip6_forward.c3402
-rw-r--r--src/vnet/ip/ip6_hop_by_hop.c1194
-rw-r--r--src/vnet/ip/ip6_hop_by_hop.h217
-rw-r--r--src/vnet/ip/ip6_hop_by_hop_packet.h66
-rw-r--r--src/vnet/ip/ip6_input.c353
-rw-r--r--src/vnet/ip/ip6_neighbor.c4088
-rw-r--r--src/vnet/ip/ip6_neighbor.h52
-rw-r--r--src/vnet/ip/ip6_packet.h499
-rw-r--r--src/vnet/ip/ip6_pg.c231
-rw-r--r--src/vnet/ip/ip_api.c1196
-rw-r--r--src/vnet/ip/ip_checksum.c228
-rw-r--r--src/vnet/ip/ip_frag.c581
-rw-r--r--src/vnet/ip/ip_frag.h96
-rw-r--r--src/vnet/ip/ip_init.c152
-rw-r--r--src/vnet/ip/ip_input_acl.c450
-rw-r--r--src/vnet/ip/ip_packet.h180
-rw-r--r--src/vnet/ip/ip_source_and_port_range_check.h148
-rw-r--r--src/vnet/ip/lookup.c967
-rw-r--r--src/vnet/ip/lookup.h498
-rw-r--r--src/vnet/ip/ping.c888
-rw-r--r--src/vnet/ip/ping.h108
-rw-r--r--src/vnet/ip/ports.def757
-rw-r--r--src/vnet/ip/protocols.def162
-rw-r--r--src/vnet/ip/punt.c323
-rw-r--r--src/vnet/ip/punt.h43
-rw-r--r--src/vnet/ip/punt_error.def19
-rw-r--r--src/vnet/ip/tcp_packet.h138
-rw-r--r--src/vnet/ip/udp.h313
-rw-r--r--src/vnet/ip/udp_error.def21
-rw-r--r--src/vnet/ip/udp_format.c91
-rw-r--r--src/vnet/ip/udp_init.c71
-rw-r--r--src/vnet/ip/udp_local.c645
-rw-r--r--src/vnet/ip/udp_packet.h65
-rw-r--r--src/vnet/ip/udp_pg.c237
61 files changed, 31301 insertions, 0 deletions
diff --git a/src/vnet/ip/dir.dox b/src/vnet/ip/dir.dox
new file mode 100644
index 00000000000..a4eb733774a
--- /dev/null
+++ b/src/vnet/ip/dir.dox
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Doxygen directory documentation */
+
+/**
+@dir
+@brief Layer 3 IP Code.
+
+This directory contains the source code for IP routing.
+
+*/
+/*? %%clicmd:group_label Layer 3 IP CLI %% ?*/
diff --git a/src/vnet/ip/format.c b/src/vnet/ip/format.c
new file mode 100644
index 00000000000..be1c4fd32fb
--- /dev/null
+++ b/src/vnet/ip/format.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip_format.c: ip generic (4 or 6) formatting
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+
+/* Format IP protocol. */
+u8 *
+format_ip_protocol (u8 * s, va_list * args)
+{
+ ip_protocol_t protocol = va_arg (*args, ip_protocol_t);
+ ip_main_t *im = &ip_main;
+ ip_protocol_info_t *pi = ip_get_protocol_info (im, protocol);
+
+ if (pi)
+ return format (s, "%s", pi->name);
+ else
+ return format (s, "unknown %d", protocol);
+}
+
+uword
+unformat_ip_protocol (unformat_input_t * input, va_list * args)
+{
+ u8 *result = va_arg (*args, u8 *);
+ ip_main_t *im = &ip_main;
+ ip_protocol_info_t *pi;
+ int i;
+
+ if (!unformat_user (input, unformat_vlib_number_by_name,
+ im->protocol_info_by_name, &i))
+ return 0;
+
+ pi = vec_elt_at_index (im->protocol_infos, i);
+ *result = pi->protocol;
+ return 1;
+}
+
+u8 *
+format_tcp_udp_port (u8 * s, va_list * args)
+{
+ int port = va_arg (*args, int);
+ ip_main_t *im = &ip_main;
+ tcp_udp_port_info_t *pi;
+
+ pi = ip_get_tcp_udp_port_info (im, port);
+ if (pi)
+ s = format (s, "%s", pi->name);
+ else
+ s = format (s, "%d", clib_net_to_host_u16 (port));
+
+ return s;
+}
+
+uword
+unformat_tcp_udp_port (unformat_input_t * input, va_list * args)
+{
+ u16 *result = va_arg (*args, u16 *);
+ ip_main_t *im = &ip_main;
+ tcp_udp_port_info_t *pi;
+ u32 i, port;
+
+
+ if (unformat_user (input, unformat_vlib_number_by_name,
+ im->port_info_by_name, &i))
+ {
+ pi = vec_elt_at_index (im->port_infos, i);
+ port = pi->port;
+ }
+ else if (unformat_user (input, unformat_vlib_number, &port)
+ && port < (1 << 16))
+ port = clib_host_to_net_u16 (port);
+
+ else
+ return 0;
+
+ *result = port;
+ return 1;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/format.h b/src/vnet/ip/format.h
new file mode 100644
index 00000000000..c35f0f4bb74
--- /dev/null
+++ b/src/vnet/ip/format.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/format.h: ip 4 and/or 6 formatting
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_ip_format_h
+#define included_ip_format_h
+
+/* IP4 or IP6. */
+
+format_function_t format_ip_protocol;
+unformat_function_t unformat_ip_protocol;
+
+format_function_t format_tcp_udp_port;
+unformat_function_t unformat_tcp_udp_port;
+
+typedef enum format_ip_adjacency_flags_t_
+{
+ FORMAT_IP_ADJACENCY_NONE,
+ FORMAT_IP_ADJACENCY_BRIEF = FORMAT_IP_ADJACENCY_NONE,
+ FORMAT_IP_ADJACENCY_DETAIL = (1 << 0),
+} format_ip_adjacency_flags_t;
+
+format_function_t format_ip_adjacency;
+format_function_t format_ip_adjacency_packet_data;
+
+format_function_t format_ip46_address;
+
+typedef enum
+{
+ IP46_TYPE_ANY,
+ IP46_TYPE_IP4,
+ IP46_TYPE_IP6
+} ip46_type_t;
+/* unformat_ip46_address expects arguments (ip46_address_t *, ip46_type_t)
+ * The type argument is used to enforce a particular IP version. */
+unformat_function_t unformat_ip46_address;
+
+/* IP4 */
+
+/* Parse an IP4 address %d.%d.%d.%d. */
+unformat_function_t unformat_ip4_address;
+
+/* Format an IP4 address. */
+format_function_t format_ip4_address;
+format_function_t format_ip4_address_and_length;
+
+/* Parse an IP4 header. */
+unformat_function_t unformat_ip4_header;
+
+/* Format an IP4 header. */
+format_function_t format_ip4_header;
+
+/* Parse an IP packet matching pattern. */
+unformat_function_t unformat_ip4_match;
+
+unformat_function_t unformat_pg_ip4_header;
+
+/* IP6 */
+unformat_function_t unformat_ip6_address;
+format_function_t format_ip6_address;
+format_function_t format_ip6_address_and_length;
+unformat_function_t unformat_ip6_header;
+format_function_t format_ip6_header;
+unformat_function_t unformat_pg_ip6_header;
+
+/* Format a TCP/UDP headers. */
+format_function_t format_tcp_header, format_udp_header;
+
+unformat_function_t unformat_pg_tcp_header, unformat_pg_udp_header;
+
+#endif /* included_ip_format_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/icmp4.c b/src/vnet/ip/icmp4.c
new file mode 100644
index 00000000000..c3afff72f26
--- /dev/null
+++ b/src/vnet/ip/icmp4.c
@@ -0,0 +1,784 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/icmp4.c: ipv4 icmp
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/ip/ip.h>
+#include <vnet/pg/pg.h>
+
+
+static char *icmp_error_strings[] = {
+#define _(f,s) s,
+ foreach_icmp4_error
+#undef _
+};
+
+static u8 *
+format_ip4_icmp_type_and_code (u8 * s, va_list * args)
+{
+ icmp4_type_t type = va_arg (*args, int);
+ u8 code = va_arg (*args, int);
+ char *t = 0;
+
+#define _(n,f) case n: t = #f; break;
+
+ switch (type)
+ {
+ foreach_icmp4_type;
+
+ default:
+ break;
+ }
+
+#undef _
+
+ if (!t)
+ return format (s, "unknown 0x%x", type);
+
+ s = format (s, "%s", t);
+
+ t = 0;
+ switch ((type << 8) | code)
+ {
+#define _(a,n,f) case (ICMP4_##a << 8) | (n): t = #f; break;
+
+ foreach_icmp4_code;
+
+#undef _
+ }
+
+ if (t)
+ s = format (s, " %s", t);
+
+ return s;
+}
+
+static u8 *
+format_ip4_icmp_header (u8 * s, va_list * args)
+{
+ icmp46_header_t *icmp = va_arg (*args, icmp46_header_t *);
+ u32 max_header_bytes = va_arg (*args, u32);
+
+ /* Nothing to do. */
+ if (max_header_bytes < sizeof (icmp[0]))
+ return format (s, "ICMP header truncated");
+
+ s = format (s, "ICMP %U checksum 0x%x",
+ format_ip4_icmp_type_and_code, icmp->type, icmp->code,
+ clib_net_to_host_u16 (icmp->checksum));
+
+ return s;
+}
+
+static u8 *
+format_icmp_input_trace (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ icmp_input_trace_t *t = va_arg (*va, icmp_input_trace_t *);
+
+ s = format (s, "%U",
+ format_ip4_header, t->packet_data, sizeof (t->packet_data));
+
+ return s;
+}
+
+typedef enum
+{
+ ICMP_INPUT_NEXT_ERROR,
+ ICMP_INPUT_N_NEXT,
+} icmp_input_next_t;
+
+typedef struct
+{
+ uword *type_and_code_by_name;
+
+ uword *type_by_name;
+
+ /* Vector dispatch table indexed by [icmp type]. */
+ u8 ip4_input_next_index_by_type[256];
+} icmp4_main_t;
+
+icmp4_main_t icmp4_main;
+
+static uword
+ip4_icmp_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ icmp4_main_t *im = &icmp4_main;
+ uword n_packets = frame->n_vectors;
+ u32 *from, *to_next;
+ u32 n_left_from, n_left_to_next, next;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = n_packets;
+ next = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1,
+ sizeof (icmp_input_trace_t));
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *p0;
+ ip4_header_t *ip0;
+ icmp46_header_t *icmp0;
+ icmp4_type_t type0;
+ u32 bi0, next0;
+
+ if (PREDICT_TRUE (n_left_from > 2))
+ {
+ vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
+ p0 = vlib_get_buffer (vm, from[1]);
+ ip0 = vlib_buffer_get_current (p0);
+ CLIB_PREFETCH (ip0, CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+
+ bi0 = to_next[0] = from[0];
+
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, bi0);
+ ip0 = vlib_buffer_get_current (p0);
+ icmp0 = ip4_next_header (ip0);
+ type0 = icmp0->type;
+ next0 = im->ip4_input_next_index_by_type[type0];
+
+ p0->error = node->errors[ICMP4_ERROR_UNKNOWN_TYPE];
+ if (PREDICT_FALSE (next0 != next))
+ {
+ vlib_put_next_frame (vm, node, next, n_left_to_next + 1);
+ next = next0;
+ vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
+ to_next[0] = bi0;
+ to_next += 1;
+ n_left_to_next -= 1;
+ }
+ }
+
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip4_icmp_input_node,static) = {
+ .function = ip4_icmp_input,
+ .name = "ip4-icmp-input",
+
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_icmp_input_trace,
+
+ .n_errors = ARRAY_LEN (icmp_error_strings),
+ .error_strings = icmp_error_strings,
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [ICMP_INPUT_NEXT_ERROR] = "error-punt",
+ },
+};
+/* *INDENT-ON* */
+
+static uword
+ip4_icmp_echo_request (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ uword n_packets = frame->n_vectors;
+ u32 *from, *to_next;
+ u32 n_left_from, n_left_to_next, next;
+ ip4_main_t *i4m = &ip4_main;
+ u16 *fragment_ids, *fid;
+ u8 host_config_ttl = i4m->host_config.ttl;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = n_packets;
+ next = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1,
+ sizeof (icmp_input_trace_t));
+
+ /* Get random fragment IDs for replies. */
+ fid = fragment_ids = clib_random_buffer_get_data (&vm->random_buffer,
+ n_packets *
+ sizeof (fragment_ids[0]));
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
+
+ while (n_left_from > 2 && n_left_to_next > 2)
+ {
+ vlib_buffer_t *p0, *p1;
+ ip4_header_t *ip0, *ip1;
+ icmp46_header_t *icmp0, *icmp1;
+ u32 bi0, src0, dst0;
+ u32 bi1, src1, dst1;
+ ip_csum_t sum0, sum1;
+
+ bi0 = to_next[0] = from[0];
+ bi1 = to_next[1] = from[1];
+
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, bi0);
+ p1 = vlib_get_buffer (vm, bi1);
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+ icmp0 = ip4_next_header (ip0);
+ icmp1 = ip4_next_header (ip1);
+
+ vnet_buffer (p0)->sw_if_index[VLIB_RX] =
+ vnet_main.local_interface_sw_if_index;
+ vnet_buffer (p1)->sw_if_index[VLIB_RX] =
+ vnet_main.local_interface_sw_if_index;
+
+ /* Update ICMP checksum. */
+ sum0 = icmp0->checksum;
+ sum1 = icmp1->checksum;
+
+ ASSERT (icmp0->type == ICMP4_echo_request);
+ ASSERT (icmp1->type == ICMP4_echo_request);
+ sum0 = ip_csum_update (sum0, ICMP4_echo_request, ICMP4_echo_reply,
+ icmp46_header_t, type);
+ sum1 = ip_csum_update (sum1, ICMP4_echo_request, ICMP4_echo_reply,
+ icmp46_header_t, type);
+ icmp0->type = ICMP4_echo_reply;
+ icmp1->type = ICMP4_echo_reply;
+
+ icmp0->checksum = ip_csum_fold (sum0);
+ icmp1->checksum = ip_csum_fold (sum1);
+
+ src0 = ip0->src_address.data_u32;
+ src1 = ip1->src_address.data_u32;
+ dst0 = ip0->dst_address.data_u32;
+ dst1 = ip1->dst_address.data_u32;
+
+ /* Swap source and destination address.
+ Does not change checksum. */
+ ip0->src_address.data_u32 = dst0;
+ ip1->src_address.data_u32 = dst1;
+ ip0->dst_address.data_u32 = src0;
+ ip1->dst_address.data_u32 = src1;
+
+ /* Update IP checksum. */
+ sum0 = ip0->checksum;
+ sum1 = ip1->checksum;
+
+ sum0 = ip_csum_update (sum0, ip0->ttl, host_config_ttl,
+ ip4_header_t, ttl);
+ sum1 = ip_csum_update (sum1, ip1->ttl, host_config_ttl,
+ ip4_header_t, ttl);
+ ip0->ttl = host_config_ttl;
+ ip1->ttl = host_config_ttl;
+
+ /* New fragment id. */
+ sum0 = ip_csum_update (sum0, ip0->fragment_id, fid[0],
+ ip4_header_t, fragment_id);
+ sum1 = ip_csum_update (sum1, ip1->fragment_id, fid[1],
+ ip4_header_t, fragment_id);
+ ip0->fragment_id = fid[0];
+ ip1->fragment_id = fid[1];
+ fid += 2;
+
+ ip0->checksum = ip_csum_fold (sum0);
+ ip1->checksum = ip_csum_fold (sum1);
+
+ ASSERT (ip0->checksum == ip4_header_checksum (ip0));
+ ASSERT (ip1->checksum == ip4_header_checksum (ip1));
+
+ p0->flags |= VNET_BUFFER_LOCALLY_ORIGINATED;
+ p1->flags |= VNET_BUFFER_LOCALLY_ORIGINATED;
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *p0;
+ ip4_header_t *ip0;
+ icmp46_header_t *icmp0;
+ u32 bi0, src0, dst0;
+ ip_csum_t sum0;
+
+ bi0 = to_next[0] = from[0];
+
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, bi0);
+ ip0 = vlib_buffer_get_current (p0);
+ icmp0 = ip4_next_header (ip0);
+
+ vnet_buffer (p0)->sw_if_index[VLIB_RX] =
+ vnet_main.local_interface_sw_if_index;
+
+ /* Update ICMP checksum. */
+ sum0 = icmp0->checksum;
+
+ ASSERT (icmp0->type == ICMP4_echo_request);
+ sum0 = ip_csum_update (sum0, ICMP4_echo_request, ICMP4_echo_reply,
+ icmp46_header_t, type);
+ icmp0->type = ICMP4_echo_reply;
+ icmp0->checksum = ip_csum_fold (sum0);
+
+ src0 = ip0->src_address.data_u32;
+ dst0 = ip0->dst_address.data_u32;
+ ip0->src_address.data_u32 = dst0;
+ ip0->dst_address.data_u32 = src0;
+
+ /* Update IP checksum. */
+ sum0 = ip0->checksum;
+
+ sum0 = ip_csum_update (sum0, ip0->ttl, host_config_ttl,
+ ip4_header_t, ttl);
+ ip0->ttl = host_config_ttl;
+
+ sum0 = ip_csum_update (sum0, ip0->fragment_id, fid[0],
+ ip4_header_t, fragment_id);
+ ip0->fragment_id = fid[0];
+ fid += 1;
+
+ ip0->checksum = ip_csum_fold (sum0);
+
+ ASSERT (ip0->checksum == ip4_header_checksum (ip0));
+
+ p0->flags |= VNET_BUFFER_LOCALLY_ORIGINATED;
+ }
+
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ }
+
+ vlib_error_count (vm, ip4_icmp_input_node.index,
+ ICMP4_ERROR_ECHO_REPLIES_SENT, frame->n_vectors);
+
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip4_icmp_echo_request_node,static) = {
+ .function = ip4_icmp_echo_request,
+ .name = "ip4-icmp-echo-request",
+
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_icmp_input_trace,
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "ip4-load-balance",
+ },
+};
+/* *INDENT-ON* */
+
+typedef enum
+{
+ IP4_ICMP_ERROR_NEXT_DROP,
+ IP4_ICMP_ERROR_NEXT_LOOKUP,
+ IP4_ICMP_ERROR_N_NEXT,
+} ip4_icmp_error_next_t;
+
+void
+icmp4_error_set_vnet_buffer (vlib_buffer_t * b, u8 type, u8 code, u32 data)
+{
+ vnet_buffer (b)->ip.icmp.type = type;
+ vnet_buffer (b)->ip.icmp.code = code;
+ vnet_buffer (b)->ip.icmp.data = data;
+}
+
+static u8
+icmp4_icmp_type_to_error (u8 type)
+{
+ switch (type)
+ {
+ case ICMP4_destination_unreachable:
+ return ICMP4_ERROR_DEST_UNREACH_SENT;
+ case ICMP4_time_exceeded:
+ return ICMP4_ERROR_TTL_EXPIRE_SENT;
+ case ICMP4_parameter_problem:
+ return ICMP4_ERROR_PARAM_PROBLEM_SENT;
+ default:
+ return ICMP4_ERROR_DROP;
+ }
+}
+
+static uword
+ip4_icmp_error (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 *from, *to_next;
+ uword n_left_from, n_left_to_next;
+ ip4_icmp_error_next_t next_index;
+ ip4_main_t *im = &ip4_main;
+ ip_lookup_main_t *lm = &im->lookup_main;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1,
+ sizeof (icmp_input_trace_t));
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 pi0 = from[0];
+ u32 next0 = IP4_ICMP_ERROR_NEXT_LOOKUP;
+ u8 error0 = ICMP4_ERROR_NONE;
+ vlib_buffer_t *p0;
+ ip4_header_t *ip0, *out_ip0;
+ icmp46_header_t *icmp0;
+ u32 sw_if_index0, if_add_index0;
+ ip_csum_t sum;
+
+ /* Speculatively enqueue p0 to the current next frame */
+ to_next[0] = pi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ ip0 = vlib_buffer_get_current (p0);
+ sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+
+ /*
+ * RFC1812 says to keep as much of the original packet as
+ * possible within the minimum MTU (576). We cheat "a little"
+ * here by keeping whatever fits in the first buffer, to be more
+ * efficient
+ */
+ if (PREDICT_FALSE (p0->total_length_not_including_first_buffer))
+ {
+ /* clear current_length of all other buffers in chain */
+ vlib_buffer_t *b = p0;
+ p0->total_length_not_including_first_buffer = 0;
+ while (b->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ b = vlib_get_buffer (vm, b->next_buffer);
+ b->current_length = 0;
+ }
+ }
+ p0->current_length =
+ p0->current_length > 576 ? 576 : p0->current_length;
+
+ /* Add IP header and ICMPv4 header including a 4 byte data field */
+ vlib_buffer_advance (p0,
+ -sizeof (ip4_header_t) -
+ sizeof (icmp46_header_t) - 4);
+ out_ip0 = vlib_buffer_get_current (p0);
+ icmp0 = (icmp46_header_t *) & out_ip0[1];
+
+ /* Fill ip header fields */
+ out_ip0->ip_version_and_header_length = 0x45;
+ out_ip0->tos = 0;
+ out_ip0->length = clib_host_to_net_u16 (p0->current_length);
+ out_ip0->fragment_id = 0;
+ out_ip0->flags_and_fragment_offset = 0;
+ out_ip0->ttl = 0xff;
+ out_ip0->protocol = IP_PROTOCOL_ICMP;
+ out_ip0->dst_address = ip0->src_address;
+ if_add_index0 = ~0;
+ if (PREDICT_TRUE (vec_len (lm->if_address_pool_index_by_sw_if_index)
+ > sw_if_index0))
+ if_add_index0 =
+ lm->if_address_pool_index_by_sw_if_index[sw_if_index0];
+ if (PREDICT_TRUE (if_add_index0 != ~0))
+ {
+ ip_interface_address_t *if_add =
+ pool_elt_at_index (lm->if_address_pool, if_add_index0);
+ ip4_address_t *if_ip =
+ ip_interface_address_get_address (lm, if_add);
+ out_ip0->src_address = *if_ip;
+ }
+ else
+ {
+ /* interface has no IP4 address - should not happen */
+ next0 = IP4_ICMP_ERROR_NEXT_DROP;
+ error0 = ICMP4_ERROR_DROP;
+ }
+ out_ip0->checksum = ip4_header_checksum (out_ip0);
+
+ /* Fill icmp header fields */
+ icmp0->type = vnet_buffer (p0)->ip.icmp.type;
+ icmp0->code = vnet_buffer (p0)->ip.icmp.code;
+ *((u32 *) (icmp0 + 1)) =
+ clib_host_to_net_u32 (vnet_buffer (p0)->ip.icmp.data);
+ icmp0->checksum = 0;
+ sum =
+ ip_incremental_checksum (0, icmp0,
+ p0->current_length -
+ sizeof (ip4_header_t));
+ icmp0->checksum = ~ip_csum_fold (sum);
+
+ /* Update error status */
+ if (error0 == ICMP4_ERROR_NONE)
+ error0 = icmp4_icmp_type_to_error (icmp0->type);
+ vlib_error_count (vm, node->node_index, error0, 1);
+
+ /* Verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip4_icmp_error_node) = {
+ .function = ip4_icmp_error,
+ .name = "ip4-icmp-error",
+ .vector_size = sizeof (u32),
+
+ .n_errors = ARRAY_LEN (icmp_error_strings),
+ .error_strings = icmp_error_strings,
+
+ .n_next_nodes = IP4_ICMP_ERROR_N_NEXT,
+ .next_nodes = {
+ [IP4_ICMP_ERROR_NEXT_DROP] = "error-drop",
+ [IP4_ICMP_ERROR_NEXT_LOOKUP] = "ip4-lookup",
+ },
+
+ .format_trace = format_icmp_input_trace,
+};
+/* *INDENT-ON* */
+
+
+static uword
+unformat_icmp_type_and_code (unformat_input_t * input, va_list * args)
+{
+ icmp46_header_t *h = va_arg (*args, icmp46_header_t *);
+ icmp4_main_t *cm = &icmp4_main;
+ u32 i;
+
+ if (unformat_user (input, unformat_vlib_number_by_name,
+ cm->type_and_code_by_name, &i))
+ {
+ h->type = (i >> 8) & 0xff;
+ h->code = (i >> 0) & 0xff;
+ }
+ else if (unformat_user (input, unformat_vlib_number_by_name,
+ cm->type_by_name, &i))
+ {
+ h->type = i;
+ h->code = 0;
+ }
+ else
+ return 0;
+
+ return 1;
+}
+
+static void
+icmp4_pg_edit_function (pg_main_t * pg,
+ pg_stream_t * s,
+ pg_edit_group_t * g, u32 * packets, u32 n_packets)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ u32 ip_offset, icmp_offset;
+
+ icmp_offset = g->start_byte_offset;
+ ip_offset = (g - 1)->start_byte_offset;
+
+ while (n_packets >= 1)
+ {
+ vlib_buffer_t *p0;
+ ip4_header_t *ip0;
+ icmp46_header_t *icmp0;
+ u32 len0;
+
+ p0 = vlib_get_buffer (vm, packets[0]);
+ n_packets -= 1;
+ packets += 1;
+
+ ASSERT (p0->current_data == 0);
+ ip0 = (void *) (p0->data + ip_offset);
+ icmp0 = (void *) (p0->data + icmp_offset);
+ len0 = clib_net_to_host_u16 (ip0->length) - ip4_header_bytes (ip0);
+ icmp0->checksum =
+ ~ip_csum_fold (ip_incremental_checksum (0, icmp0, len0));
+ }
+}
+
+typedef struct
+{
+ pg_edit_t type, code;
+ pg_edit_t checksum;
+} pg_icmp46_header_t;
+
+always_inline void
+pg_icmp_header_init (pg_icmp46_header_t * p)
+{
+ /* Initialize fields that are not bit fields in the IP header. */
+#define _(f) pg_edit_init (&p->f, icmp46_header_t, f);
+ _(type);
+ _(code);
+ _(checksum);
+#undef _
+}
+
+static uword
+unformat_pg_icmp_header (unformat_input_t * input, va_list * args)
+{
+ pg_stream_t *s = va_arg (*args, pg_stream_t *);
+ pg_icmp46_header_t *p;
+ u32 group_index;
+
+ p = pg_create_edit_group (s, sizeof (p[0]), sizeof (icmp46_header_t),
+ &group_index);
+ pg_icmp_header_init (p);
+
+ p->checksum.type = PG_EDIT_UNSPECIFIED;
+
+ {
+ icmp46_header_t tmp;
+
+ if (!unformat (input, "ICMP %U", unformat_icmp_type_and_code, &tmp))
+ goto error;
+
+ pg_edit_set_fixed (&p->type, tmp.type);
+ pg_edit_set_fixed (&p->code, tmp.code);
+ }
+
+ /* Parse options. */
+ while (1)
+ {
+ if (unformat (input, "checksum %U",
+ unformat_pg_edit, unformat_pg_number, &p->checksum))
+ ;
+
+ /* Can't parse input: try next protocol level. */
+ else
+ break;
+ }
+
+ if (!unformat_user (input, unformat_pg_payload, s))
+ goto error;
+
+ if (p->checksum.type == PG_EDIT_UNSPECIFIED)
+ {
+ pg_edit_group_t *g = pg_stream_get_group (s, group_index);
+ g->edit_function = icmp4_pg_edit_function;
+ g->edit_function_opaque = 0;
+ }
+
+ return 1;
+
+error:
+ /* Free up any edits we may have added. */
+ pg_free_edit_group (s);
+ return 0;
+}
+
+void
+ip4_icmp_register_type (vlib_main_t * vm, icmp4_type_t type, u32 node_index)
+{
+ icmp4_main_t *im = &icmp4_main;
+
+ ASSERT ((int) type < ARRAY_LEN (im->ip4_input_next_index_by_type));
+ im->ip4_input_next_index_by_type[type]
+ = vlib_node_add_next (vm, ip4_icmp_input_node.index, node_index);
+}
+
+static clib_error_t *
+icmp4_init (vlib_main_t * vm)
+{
+ ip_main_t *im = &ip_main;
+ ip_protocol_info_t *pi;
+ icmp4_main_t *cm = &icmp4_main;
+ clib_error_t *error;
+
+ error = vlib_call_init_function (vm, ip_main_init);
+
+ if (error)
+ return error;
+
+ pi = ip_get_protocol_info (im, IP_PROTOCOL_ICMP);
+ pi->format_header = format_ip4_icmp_header;
+ pi->unformat_pg_edit = unformat_pg_icmp_header;
+
+ cm->type_by_name = hash_create_string (0, sizeof (uword));
+#define _(n,t) hash_set_mem (cm->type_by_name, #t, (n));
+ foreach_icmp4_type;
+#undef _
+
+ cm->type_and_code_by_name = hash_create_string (0, sizeof (uword));
+#define _(a,n,t) hash_set_mem (cm->type_by_name, #t, (n) | (ICMP4_##a << 8));
+ foreach_icmp4_code;
+#undef _
+
+ memset (cm->ip4_input_next_index_by_type,
+ ICMP_INPUT_NEXT_ERROR, sizeof (cm->ip4_input_next_index_by_type));
+
+ ip4_icmp_register_type (vm, ICMP4_echo_request,
+ ip4_icmp_echo_request_node.index);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (icmp4_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/icmp4.h b/src/vnet/ip/icmp4.h
new file mode 100644
index 00000000000..ae805148c89
--- /dev/null
+++ b/src/vnet/ip/icmp4.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_vnet_icmp4_h
+#define included_vnet_icmp4_h
+
+#define foreach_icmp4_error \
+ _ (NONE, "valid packets") \
+ _ (UNKNOWN_TYPE, "unknown type") \
+ _ (INVALID_CODE_FOR_TYPE, "invalid code for type") \
+ _ (INVALID_HOP_LIMIT_FOR_TYPE, "hop_limit != 255") \
+ _ (LENGTH_TOO_SMALL_FOR_TYPE, "payload length too small for type") \
+ _ (OPTIONS_WITH_ODD_LENGTH, \
+ "total option length not multiple of 8 bytes") \
+ _ (OPTION_WITH_ZERO_LENGTH, "option has zero length") \
+ _ (ECHO_REPLIES_SENT, "echo replies sent") \
+ _ (DST_LOOKUP_MISS, "icmp6 dst address lookup misses") \
+ _ (DEST_UNREACH_SENT, "destination unreachable response sent") \
+ _ (TTL_EXPIRE_SENT, "hop limit exceeded response sent") \
+ _ (PARAM_PROBLEM_SENT, "parameter problem response sent") \
+ _ (DROP, "error message dropped")
+
+typedef enum
+{
+#define _(f,s) ICMP4_ERROR_##f,
+ foreach_icmp4_error
+#undef _
+} icmp4_error_t;
+
+typedef struct
+{
+ u8 packet_data[64];
+} icmp_input_trace_t;
+
+format_function_t format_icmp4_input_trace;
+void ip4_icmp_register_type (vlib_main_t * vm, icmp4_type_t type,
+ u32 node_index);
+void icmp4_error_set_vnet_buffer (vlib_buffer_t * b, u8 type, u8 code,
+ u32 data);
+
+#endif /* included_vnet_icmp4_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/icmp46_packet.h b/src/vnet/ip/icmp46_packet.h
new file mode 100644
index 00000000000..a86cbd57bdb
--- /dev/null
+++ b/src/vnet/ip/icmp46_packet.h
@@ -0,0 +1,398 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * icmp46_packet.h: ip4/ip6 icmp packet format
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vnet_icmp46_packet_h
+#define included_vnet_icmp46_packet_h
+
+#include <vnet/ethernet/packet.h>
+#include <vnet/ip/ip6_packet.h>
+
+#define foreach_icmp4_type \
+ _ (0, echo_reply) \
+ _ (3, destination_unreachable) \
+ _ (4, source_quench) \
+ _ (5, redirect) \
+ _ (6, alternate_host_address) \
+ _ (8, echo_request) \
+ _ (9, router_advertisement) \
+ _ (10, router_solicitation) \
+ _ (11, time_exceeded) \
+ _ (12, parameter_problem) \
+ _ (13, timestamp_request) \
+ _ (14, timestamp_reply) \
+ _ (15, information_request) \
+ _ (16, information_reply) \
+ _ (17, address_mask_request) \
+ _ (18, address_mask_reply) \
+ _ (30, traceroute) \
+ _ (31, datagram_conversion_error) \
+ _ (32, mobile_host_redirect) \
+ _ (33, ip6_where_are_you) \
+ _ (34, ip6_i_am_here) \
+ _ (35, mobile_registration_request) \
+ _ (36, mobile_registration_reply) \
+ _ (37, domain_name_request) \
+ _ (38, domain_name_reply) \
+ _ (39, skip) \
+ _ (40, photuris)
+
+#define icmp_no_code 0
+
+#define foreach_icmp4_code \
+ _ (destination_unreachable, 0, destination_unreachable_net) \
+ _ (destination_unreachable, 1, destination_unreachable_host) \
+ _ (destination_unreachable, 2, protocol_unreachable) \
+ _ (destination_unreachable, 3, port_unreachable) \
+ _ (destination_unreachable, 4, fragmentation_needed_and_dont_fragment_set) \
+ _ (destination_unreachable, 5, source_route_failed) \
+ _ (destination_unreachable, 6, destination_network_unknown) \
+ _ (destination_unreachable, 7, destination_host_unknown) \
+ _ (destination_unreachable, 8, source_host_isolated) \
+ _ (destination_unreachable, 9, network_administratively_prohibited) \
+ _ (destination_unreachable, 10, host_administratively_prohibited) \
+ _ (destination_unreachable, 11, network_unreachable_for_type_of_service) \
+ _ (destination_unreachable, 12, host_unreachable_for_type_of_service) \
+ _ (destination_unreachable, 13, communication_administratively_prohibited) \
+ _ (destination_unreachable, 14, host_precedence_violation) \
+ _ (destination_unreachable, 15, precedence_cutoff_in_effect) \
+ _ (redirect, 0, network_redirect) \
+ _ (redirect, 1, host_redirect) \
+ _ (redirect, 2, type_of_service_and_network_redirect) \
+ _ (redirect, 3, type_of_service_and_host_redirect) \
+ _ (router_advertisement, 0, normal_router_advertisement) \
+ _ (router_advertisement, 16, does_not_route_common_traffic) \
+ _ (time_exceeded, 0, ttl_exceeded_in_transit) \
+ _ (time_exceeded, 1, fragment_reassembly_time_exceeded) \
+ _ (parameter_problem, 0, pointer_indicates_error) \
+ _ (parameter_problem, 1, missing_required_option) \
+ _ (parameter_problem, 2, bad_length)
+
+/* ICMPv6 */
+#define foreach_icmp6_type \
+ _ (1, destination_unreachable) \
+ _ (2, packet_too_big) \
+ _ (3, time_exceeded) \
+ _ (4, parameter_problem) \
+ _ (128, echo_request) \
+ _ (129, echo_reply) \
+ _ (130, multicast_listener_request) \
+ _ (131, multicast_listener_report) \
+ _ (132, multicast_listener_done) \
+ _ (133, router_solicitation) \
+ _ (134, router_advertisement) \
+ _ (135, neighbor_solicitation) \
+ _ (136, neighbor_advertisement) \
+ _ (137, redirect) \
+ _ (138, router_renumbering) \
+ _ (139, node_information_request) \
+ _ (140, node_information_response) \
+ _ (141, inverse_neighbor_solicitation) \
+ _ (142, inverse_neighbor_advertisement) \
+ _ (143, multicast_listener_report_v2) \
+ _ (144, home_agent_address_discovery_request) \
+ _ (145, home_agent_address_discovery_reply) \
+ _ (146, mobile_prefix_solicitation) \
+ _ (147, mobile_prefix_advertisement) \
+ _ (148, certification_path_solicitation) \
+ _ (149, certification_path_advertisement) \
+ _ (151, multicast_router_advertisement) \
+ _ (152, multicast_router_solicitation) \
+ _ (153, multicast_router_termination) \
+ _ (154, fmipv6_messages)
+
+#define foreach_icmp6_code \
+ _ (destination_unreachable, 0, no_route_to_destination) \
+ _ (destination_unreachable, 1, destination_administratively_prohibited) \
+ _ (destination_unreachable, 2, beyond_scope_of_source_address) \
+ _ (destination_unreachable, 3, address_unreachable) \
+ _ (destination_unreachable, 4, port_unreachable) \
+ _ (destination_unreachable, 5, source_address_failed_policy) \
+ _ (destination_unreachable, 6, reject_route_to_destination) \
+ _ (time_exceeded, 0, ttl_exceeded_in_transit) \
+ _ (time_exceeded, 1, fragment_reassembly_time_exceeded) \
+ _ (parameter_problem, 0, erroneous_header_field) \
+ _ (parameter_problem, 1, unrecognized_next_header) \
+ _ (parameter_problem, 2, unrecognized_option) \
+ _ (router_renumbering, 0, command) \
+ _ (router_renumbering, 1, result) \
+ _ (node_information_request, 0, data_contains_ip6_address) \
+ _ (node_information_request, 1, data_contains_name) \
+ _ (node_information_request, 2, data_contains_ip4_address) \
+ _ (node_information_response, 0, success) \
+ _ (node_information_response, 1, failed) \
+ _ (node_information_response, 2, unknown_request)
+
+typedef enum
+{
+#define _(n,f) ICMP4_##f = n,
+ foreach_icmp4_type
+#undef _
+} icmp4_type_t;
+
+typedef enum
+{
+#define _(t,n,f) ICMP4_##t##_##f = n,
+ foreach_icmp4_code
+#undef _
+} icmp4_code_t;
+
+typedef enum
+{
+#define _(n,f) ICMP6_##f = n,
+ foreach_icmp6_type
+#undef _
+} icmp6_type_t;
+
+typedef enum
+{
+#define _(t,n,f) ICMP6_##t##_##f = n,
+ foreach_icmp6_code
+#undef _
+} icmp6_code_t;
+
+typedef CLIB_PACKED (struct
+ {
+ u8 type;
+ u8 code;
+ /* IP checksum of icmp header plus data which follows. */
+ u16 checksum;
+ }) icmp46_header_t;
+
+/* ip6 neighbor discovery */
+#define foreach_icmp6_neighbor_discovery_option \
+ _ (1, source_link_layer_address) \
+ _ (2, target_link_layer_address) \
+ _ (3, prefix_information) \
+ _ (4, redirected_header) \
+ _ (5, mtu) \
+ _ (6, nbma_shortcut_limit) \
+ _ (7, advertisement_interval) \
+ _ (8, home_agent_information) \
+ _ (9, source_address_list) \
+ _ (10, target_address_list) \
+ _ (11, cryptographically_generated_address) \
+ _ (12, rsa_signature) \
+ _ (13, timestamp) \
+ _ (14, nonce) \
+ _ (15, trust_anchor) \
+ _ (16, certificate) \
+ _ (17, ip_address_and_prefix) \
+ _ (18, new_router_prefix_information) \
+ _ (19, mobile_link_layer_address) \
+ _ (20, neighbor_advertisement_acknowledgment) \
+ _ (23, map) \
+ _ (24, route_information) \
+ _ (25, recursive_dns_server) \
+ _ (26, ra_flags_extension) \
+ _ (27, handover_key_request) \
+ _ (28, handover_key_reply) \
+ _ (29, handover_assist_information) \
+ _ (30, mobile_node_identifier) \
+ _ (31, dns_search_list) \
+ _ (138, card_request) \
+ _ (139, card_reply)
+
+typedef enum icmp6_neighbor_discovery_option_type
+{
+#define _(n,f) ICMP6_NEIGHBOR_DISCOVERY_OPTION_##f = n,
+ foreach_icmp6_neighbor_discovery_option
+#undef _
+} icmp6_neighbor_discovery_option_type_t;
+
+typedef CLIB_PACKED (struct
+ {
+ /* Option type. */
+ u8 type;
+ /* Length of this header plus option data in 8 byte units. */
+ u8 n_data_u64s;
+ /* Option data follows. */
+ u8 data[0];
+ }) icmp6_neighbor_discovery_option_header_t;
+
+typedef CLIB_PACKED (struct
+ {
+ icmp6_neighbor_discovery_option_header_t header;
+ u8 dst_address_length;
+ u8 flags;
+#define ICMP6_NEIGHBOR_DISCOVERY_PREFIX_INFORMATION_FLAG_ON_LINK (1 << 7)
+#define ICMP6_NEIGHBOR_DISCOVERY_PREFIX_INFORMATION_AUTO (1 << 6)
+ u32 valid_time;
+ u32 preferred_time;
+ u32 unused; ip6_address_t dst_address;
+ }) icmp6_neighbor_discovery_prefix_information_option_t;
+
+typedef CLIB_PACKED (struct
+ {
+ u8 type;
+ u8 aux_data_len_u32s;
+ u16 num_sources;
+ ip6_address_t mcast_addr; ip6_address_t source_addr[0];
+ }) icmp6_multicast_address_record_t;
+
+typedef CLIB_PACKED (struct
+ {
+ ip6_hop_by_hop_ext_t ext_hdr;
+ ip6_router_alert_option_t alert;
+ ip6_padN_option_t pad;
+ icmp46_header_t icmp;
+ u16 rsvd;
+ u16 num_addr_records;
+ icmp6_multicast_address_record_t records[0];
+ }) icmp6_multicast_listener_report_header_t;
+
+typedef CLIB_PACKED (struct
+ {
+ icmp6_neighbor_discovery_option_header_t header;
+ u8 reserved[6];
+ /* IP6 header plus payload follows. */
+ u8 data[0];
+ }) icmp6_neighbor_discovery_redirected_header_option_t;
+
+typedef CLIB_PACKED (struct
+ {
+ icmp6_neighbor_discovery_option_header_t header;
+ u16 unused; u32 mtu;
+ }) icmp6_neighbor_discovery_mtu_option_t;
+
+typedef CLIB_PACKED (struct
+ {
+ icmp6_neighbor_discovery_option_header_t header;
+ u8 ethernet_address[6];
+ })
+ icmp6_neighbor_discovery_ethernet_link_layer_address_option_t;
+
+typedef CLIB_PACKED (struct
+ {
+ icmp6_neighbor_discovery_option_header_t header;
+ u8 max_l2_address[6 + 8];
+ })
+ icmp6_neighbor_discovery_max_link_layer_address_option_t;
+
+/* Generic neighbor discover header. Used for router solicitations,
+ etc. */
+typedef CLIB_PACKED (struct
+ {
+ icmp46_header_t icmp; u32 reserved_must_be_zero;
+ }) icmp6_neighbor_discovery_header_t;
+
+/* Router advertisement packet formats. */
+typedef CLIB_PACKED (struct
+ {
+ icmp46_header_t icmp;
+ /* Current hop limit to use for outgoing packets. */
+ u8 current_hop_limit;
+ u8 flags;
+#define ICMP6_ROUTER_DISCOVERY_FLAG_ADDRESS_CONFIG_VIA_DHCP (1 << 7)
+#define ICMP6_ROUTER_DISCOVERY_FLAG_OTHER_CONFIG_VIA_DHCP (1 << 6)
+ /* Zero means unspecified. */
+ u16 router_lifetime_in_sec;
+ /* Zero means unspecified. */
+ u32 neighbor_reachable_time_in_msec;
+ /* Zero means unspecified. */
+ u32
+ time_in_msec_between_retransmitted_neighbor_solicitations;
+ /* Options that may follow: source_link_layer_address, mtu, prefix_information. */
+ }) icmp6_router_advertisement_header_t;
+
+/* Neighbor solicitation/advertisement header. */
+typedef CLIB_PACKED (struct
+ {
+ icmp46_header_t icmp;
+ /* Zero for solicitation; flags for advertisement. */
+ u32 advertisement_flags;
+ /* Set when sent by a router. */
+#define ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_ROUTER (1 << 31)
+ /* Set when response to solicitation. */
+#define ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_SOLICITED (1 << 30)
+#define ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_OVERRIDE (1 << 29)
+ ip6_address_t target_address;
+ /* Options that may follow: source_link_layer_address
+ (for solicitation) target_link_layer_address (for advertisement). */
+ }) icmp6_neighbor_solicitation_or_advertisement_header_t;
+
+typedef CLIB_PACKED (struct
+ {
+ icmp46_header_t icmp;
+ u32 reserved_must_be_zero;
+ /* Better next hop to use for given destination. */
+ ip6_address_t better_next_hop_address;
+ ip6_address_t dst_address;
+ /* Options that may follow: target_link_layer_address,
+ redirected_header. */
+ }) icmp6_redirect_header_t;
+
+/* Solicitation/advertisement packet format for ethernet. */
+typedef CLIB_PACKED (struct
+ {
+ ip6_header_t ip;
+ icmp6_neighbor_solicitation_or_advertisement_header_t
+ neighbor;
+ icmp6_neighbor_discovery_ethernet_link_layer_address_option_t
+ link_layer_option;
+ }) icmp6_neighbor_solicitation_header_t;
+
+/* Router solicitation packet format for ethernet. */
+typedef CLIB_PACKED (struct
+ {
+ ip6_header_t ip;
+ icmp6_neighbor_discovery_header_t neighbor;
+ icmp6_neighbor_discovery_ethernet_link_layer_address_option_t
+ link_layer_option;
+ }) icmp6_router_solicitation_header_t;
+
+/* router advertisement packet format for ethernet. */
+typedef CLIB_PACKED (struct
+ {
+ ip6_header_t ip;
+ icmp6_router_advertisement_header_t router;
+ icmp6_neighbor_discovery_ethernet_link_layer_address_option_t
+ link_layer_option;
+ icmp6_neighbor_discovery_mtu_option_t mtu_option;
+ icmp6_neighbor_discovery_prefix_information_option_t
+ prefix[0];
+ }) icmp6_router_advertisement_packet_t;
+
+/* multicast listener report packet format for ethernet. */
+typedef CLIB_PACKED (struct
+ {
+ ip6_header_t ip;
+ icmp6_multicast_listener_report_header_t report_hdr;
+ }) icmp6_multicast_listener_report_packet_t;
+
+#endif /* included_vnet_icmp46_packet_h */
diff --git a/src/vnet/ip/icmp6.c b/src/vnet/ip/icmp6.c
new file mode 100644
index 00000000000..70696d0c6cb
--- /dev/null
+++ b/src/vnet/ip/icmp6.c
@@ -0,0 +1,882 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/icmp6.c: ip6 icmp
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/ip/ip.h>
+#include <vnet/pg/pg.h>
+
+static u8 *
+format_ip6_icmp_type_and_code (u8 * s, va_list * args)
+{
+ icmp6_type_t type = va_arg (*args, int);
+ u8 code = va_arg (*args, int);
+ char *t = 0;
+
+#define _(n,f) case n: t = #f; break;
+
+ switch (type)
+ {
+ foreach_icmp6_type;
+
+ default:
+ break;
+ }
+
+#undef _
+
+ if (!t)
+ return format (s, "unknown 0x%x", type);
+
+ s = format (s, "%s", t);
+
+ t = 0;
+ switch ((type << 8) | code)
+ {
+#define _(a,n,f) case (ICMP6_##a << 8) | (n): t = #f; break;
+
+ foreach_icmp6_code;
+
+#undef _
+ }
+
+ if (t)
+ s = format (s, " %s", t);
+
+ return s;
+}
+
+static u8 *
+format_icmp6_header (u8 * s, va_list * args)
+{
+ icmp46_header_t *icmp = va_arg (*args, icmp46_header_t *);
+ u32 max_header_bytes = va_arg (*args, u32);
+
+ /* Nothing to do. */
+ if (max_header_bytes < sizeof (icmp[0]))
+ return format (s, "ICMP header truncated");
+
+ s = format (s, "ICMP %U checksum 0x%x",
+ format_ip6_icmp_type_and_code, icmp->type, icmp->code,
+ clib_net_to_host_u16 (icmp->checksum));
+
+ if (max_header_bytes >=
+ sizeof (icmp6_neighbor_solicitation_or_advertisement_header_t) &&
+ (icmp->type == ICMP6_neighbor_solicitation ||
+ icmp->type == ICMP6_neighbor_advertisement))
+ {
+ icmp6_neighbor_solicitation_or_advertisement_header_t *icmp6_nd =
+ (icmp6_neighbor_solicitation_or_advertisement_header_t *) icmp;
+ s = format (s, "\n target address %U",
+ format_ip6_address, &icmp6_nd->target_address);
+ }
+
+ return s;
+}
+
+u8 *
+format_icmp6_input_trace (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ icmp6_input_trace_t *t = va_arg (*va, icmp6_input_trace_t *);
+
+ s = format (s, "%U",
+ format_ip6_header, t->packet_data, sizeof (t->packet_data));
+
+ return s;
+}
+
+static char *icmp_error_strings[] = {
+#define _(f,s) s,
+ foreach_icmp6_error
+#undef _
+};
+
+typedef enum
+{
+ ICMP_INPUT_NEXT_DROP,
+ ICMP_INPUT_N_NEXT,
+} icmp_input_next_t;
+
+typedef struct
+{
+ uword *type_and_code_by_name;
+
+ uword *type_by_name;
+
+ /* Vector dispatch table indexed by [icmp type]. */
+ u8 input_next_index_by_type[256];
+
+ /* Max valid code indexed by icmp type. */
+ u8 max_valid_code_by_type[256];
+
+ /* hop_limit must be >= this value for this icmp type. */
+ u8 min_valid_hop_limit_by_type[256];
+
+ u8 min_valid_length_by_type[256];
+} icmp6_main_t;
+
+icmp6_main_t icmp6_main;
+
+static uword
+ip6_icmp_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ icmp6_main_t *im = &icmp6_main;
+ u32 *from, *to_next;
+ u32 n_left_from, n_left_to_next, next_index;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1,
+ sizeof (icmp6_input_trace_t));
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *b0;
+ ip6_header_t *ip0;
+ icmp46_header_t *icmp0;
+ icmp6_type_t type0;
+ u32 bi0, next0, error0, len0;
+
+ bi0 = to_next[0] = from[0];
+
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ ip0 = vlib_buffer_get_current (b0);
+ icmp0 = ip6_next_header (ip0);
+ type0 = icmp0->type;
+
+ error0 = ICMP6_ERROR_NONE;
+
+ next0 = im->input_next_index_by_type[type0];
+ error0 =
+ next0 == ICMP_INPUT_NEXT_DROP ? ICMP6_ERROR_UNKNOWN_TYPE : error0;
+
+ /* Check code is valid for type. */
+ error0 =
+ icmp0->code >
+ im->max_valid_code_by_type[type0] ?
+ ICMP6_ERROR_INVALID_CODE_FOR_TYPE : error0;
+
+ /* Checksum is already validated by ip6_local node so we don't need to check that. */
+
+ /* Check that hop limit == 255 for certain types. */
+ error0 =
+ ip0->hop_limit <
+ im->min_valid_hop_limit_by_type[type0] ?
+ ICMP6_ERROR_INVALID_HOP_LIMIT_FOR_TYPE : error0;
+
+ len0 = clib_net_to_host_u16 (ip0->payload_length);
+ error0 =
+ len0 <
+ im->min_valid_length_by_type[type0] ?
+ ICMP6_ERROR_LENGTH_TOO_SMALL_FOR_TYPE : error0;
+
+ b0->error = node->errors[error0];
+
+ next0 = error0 != ICMP6_ERROR_NONE ? ICMP_INPUT_NEXT_DROP : next0;
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_icmp_input_node) = {
+ .function = ip6_icmp_input,
+ .name = "ip6-icmp-input",
+
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_icmp6_input_trace,
+
+ .n_errors = ARRAY_LEN (icmp_error_strings),
+ .error_strings = icmp_error_strings,
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [ICMP_INPUT_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+typedef enum
+{
+ ICMP6_ECHO_REQUEST_NEXT_LOOKUP,
+ ICMP6_ECHO_REQUEST_NEXT_OUTPUT,
+ ICMP6_ECHO_REQUEST_N_NEXT,
+} icmp6_echo_request_next_t;
+
+static uword
+ip6_icmp_echo_request (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 *from, *to_next;
+ u32 n_left_from, n_left_to_next, next_index;
+ ip6_main_t *im = &ip6_main;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1,
+ sizeof (icmp6_input_trace_t));
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 2 && n_left_to_next > 2)
+ {
+ vlib_buffer_t *p0, *p1;
+ ip6_header_t *ip0, *ip1;
+ icmp46_header_t *icmp0, *icmp1;
+ ip6_address_t tmp0, tmp1;
+ ip_csum_t sum0, sum1;
+ u32 bi0, bi1;
+ u32 fib_index0, fib_index1;
+ u32 next0 = ICMP6_ECHO_REQUEST_NEXT_LOOKUP;
+ u32 next1 = ICMP6_ECHO_REQUEST_NEXT_LOOKUP;
+
+ bi0 = to_next[0] = from[0];
+ bi1 = to_next[1] = from[1];
+
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, bi0);
+ p1 = vlib_get_buffer (vm, bi1);
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+ icmp0 = ip6_next_header (ip0);
+ icmp1 = ip6_next_header (ip1);
+
+ /* Check icmp type to echo reply and update icmp checksum. */
+ sum0 = icmp0->checksum;
+ sum1 = icmp1->checksum;
+
+ ASSERT (icmp0->type == ICMP6_echo_request);
+ ASSERT (icmp1->type == ICMP6_echo_request);
+ sum0 = ip_csum_update (sum0, ICMP6_echo_request, ICMP6_echo_reply,
+ icmp46_header_t, type);
+ sum1 = ip_csum_update (sum1, ICMP6_echo_request, ICMP6_echo_reply,
+ icmp46_header_t, type);
+
+ icmp0->checksum = ip_csum_fold (sum0);
+ icmp1->checksum = ip_csum_fold (sum1);
+
+ icmp0->type = ICMP6_echo_reply;
+ icmp1->type = ICMP6_echo_reply;
+
+ /* Swap source and destination address. */
+ tmp0 = ip0->src_address;
+ tmp1 = ip1->src_address;
+
+ ip0->src_address = ip0->dst_address;
+ ip1->src_address = ip1->dst_address;
+
+ ip0->dst_address = tmp0;
+ ip1->dst_address = tmp1;
+
+ /* New hop count. */
+ ip0->hop_limit = im->host_config.ttl;
+ ip1->hop_limit = im->host_config.ttl;
+
+ if (ip6_address_is_link_local_unicast (&ip0->dst_address))
+ {
+ ethernet_header_t *eth0;
+ u8 tmp_mac[6];
+ /* For link local, reuse current MAC header by sawpping
+ * SMAC to DMAC instead of IP6 lookup since link local
+ * is not in the IP6 FIB */
+ vlib_buffer_reset (p0);
+ eth0 = vlib_buffer_get_current (p0);
+ clib_memcpy (tmp_mac, eth0->dst_address, 6);
+ clib_memcpy (eth0->dst_address, eth0->src_address, 6);
+ clib_memcpy (eth0->src_address, tmp_mac, 6);
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] =
+ vnet_buffer (p0)->sw_if_index[VLIB_RX];
+ next0 = ICMP6_ECHO_REQUEST_NEXT_OUTPUT;
+ }
+ else
+ {
+ /* Determine the correct lookup fib indices... */
+ fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (p0)->sw_if_index[VLIB_RX]);
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = fib_index0;
+ }
+
+ if (ip6_address_is_link_local_unicast (&ip1->dst_address))
+ {
+ ethernet_header_t *eth1;
+ u8 tmp_mac[6];
+ /* For link local, reuse current MAC header by sawpping
+ * SMAC to DMAC instead of IP6 lookup since link local
+ * is not in the IP6 FIB */
+ vlib_buffer_reset (p1);
+ eth1 = vlib_buffer_get_current (p1);
+ clib_memcpy (tmp_mac, eth1->dst_address, 6);
+ clib_memcpy (eth1->dst_address, eth1->src_address, 6);
+ clib_memcpy (eth1->src_address, tmp_mac, 6);
+ vnet_buffer (p1)->sw_if_index[VLIB_TX] =
+ vnet_buffer (p1)->sw_if_index[VLIB_RX];
+ next1 = ICMP6_ECHO_REQUEST_NEXT_OUTPUT;
+ }
+ else
+ {
+ /* Determine the correct lookup fib indices... */
+ fib_index1 = vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (p1)->sw_if_index[VLIB_RX]);
+ vnet_buffer (p1)->sw_if_index[VLIB_TX] = fib_index1;
+ }
+
+ vnet_buffer (p0)->sw_if_index[VLIB_RX]
+ = vnet_main.local_interface_sw_if_index;
+ vnet_buffer (p1)->sw_if_index[VLIB_RX]
+ = vnet_main.local_interface_sw_if_index;
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ /* if next0==next1==next_index then nothing special needs to be done */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *p0;
+ ip6_header_t *ip0;
+ icmp46_header_t *icmp0;
+ u32 bi0;
+ ip6_address_t tmp0;
+ ip_csum_t sum0;
+ u32 fib_index0;
+ u32 next0 = ICMP6_ECHO_REQUEST_NEXT_LOOKUP;
+
+ bi0 = to_next[0] = from[0];
+
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, bi0);
+ ip0 = vlib_buffer_get_current (p0);
+ icmp0 = ip6_next_header (ip0);
+
+ /* Check icmp type to echo reply and update icmp checksum. */
+ sum0 = icmp0->checksum;
+
+ ASSERT (icmp0->type == ICMP6_echo_request);
+ sum0 = ip_csum_update (sum0, ICMP6_echo_request, ICMP6_echo_reply,
+ icmp46_header_t, type);
+
+ icmp0->checksum = ip_csum_fold (sum0);
+
+ icmp0->type = ICMP6_echo_reply;
+
+ /* Swap source and destination address. */
+ tmp0 = ip0->src_address;
+ ip0->src_address = ip0->dst_address;
+ ip0->dst_address = tmp0;
+
+ ip0->hop_limit = im->host_config.ttl;
+
+ if (ip6_address_is_link_local_unicast (&ip0->dst_address))
+ {
+ ethernet_header_t *eth0;
+ u8 tmp_mac[6];
+ /* For link local, reuse current MAC header by sawpping
+ * SMAC to DMAC instead of IP6 lookup since link local
+ * is not in the IP6 FIB */
+ vlib_buffer_reset (p0);
+ eth0 = vlib_buffer_get_current (p0);
+ clib_memcpy (tmp_mac, eth0->dst_address, 6);
+ clib_memcpy (eth0->dst_address, eth0->src_address, 6);
+ clib_memcpy (eth0->src_address, tmp_mac, 6);
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] =
+ vnet_buffer (p0)->sw_if_index[VLIB_RX];
+ next0 = ICMP6_ECHO_REQUEST_NEXT_OUTPUT;
+ }
+ else
+ {
+ fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (p0)->sw_if_index[VLIB_RX]);
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = fib_index0;
+ }
+ vnet_buffer (p0)->sw_if_index[VLIB_RX]
+ = vnet_main.local_interface_sw_if_index;
+
+ /* Verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_error_count (vm, ip6_icmp_input_node.index,
+ ICMP6_ERROR_ECHO_REPLIES_SENT, frame->n_vectors);
+
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_icmp_echo_request_node,static) = {
+ .function = ip6_icmp_echo_request,
+ .name = "ip6-icmp-echo-request",
+
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_icmp6_input_trace,
+
+ .n_next_nodes = ICMP6_ECHO_REQUEST_N_NEXT,
+ .next_nodes = {
+ [ICMP6_ECHO_REQUEST_NEXT_LOOKUP] = "ip6-lookup",
+ [ICMP6_ECHO_REQUEST_NEXT_OUTPUT] = "interface-output",
+ },
+};
+/* *INDENT-ON* */
+
+typedef enum
+{
+ IP6_ICMP_ERROR_NEXT_DROP,
+ IP6_ICMP_ERROR_NEXT_LOOKUP,
+ IP6_ICMP_ERROR_N_NEXT,
+} ip6_icmp_error_next_t;
+
+void
+icmp6_error_set_vnet_buffer (vlib_buffer_t * b, u8 type, u8 code, u32 data)
+{
+ vnet_buffer (b)->ip.icmp.type = type;
+ vnet_buffer (b)->ip.icmp.code = code;
+ vnet_buffer (b)->ip.icmp.data = data;
+}
+
+static u8
+icmp6_icmp_type_to_error (u8 type)
+{
+ switch (type)
+ {
+ case ICMP6_destination_unreachable:
+ return ICMP6_ERROR_DEST_UNREACH_SENT;
+ case ICMP6_packet_too_big:
+ return ICMP6_ERROR_PACKET_TOO_BIG_SENT;
+ case ICMP6_time_exceeded:
+ return ICMP6_ERROR_TTL_EXPIRE_SENT;
+ case ICMP6_parameter_problem:
+ return ICMP6_ERROR_PARAM_PROBLEM_SENT;
+ default:
+ return ICMP6_ERROR_DROP;
+ }
+}
+
+static uword
+ip6_icmp_error (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 *from, *to_next;
+ uword n_left_from, n_left_to_next;
+ ip6_icmp_error_next_t next_index;
+ ip6_main_t *im = &ip6_main;
+ ip_lookup_main_t *lm = &im->lookup_main;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1,
+ sizeof (icmp6_input_trace_t));
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 pi0 = from[0];
+ u32 next0 = IP6_ICMP_ERROR_NEXT_LOOKUP;
+ u8 error0 = ICMP6_ERROR_NONE;
+ vlib_buffer_t *p0;
+ ip6_header_t *ip0, *out_ip0;
+ icmp46_header_t *icmp0;
+ u32 sw_if_index0, if_add_index0;
+ int bogus_length;
+
+ /* Speculatively enqueue p0 to the current next frame */
+ to_next[0] = pi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ ip0 = vlib_buffer_get_current (p0);
+ sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+
+ /* RFC4443 says to keep as much of the original packet as possible
+ * within the minimum MTU. We cheat "a little" here by keeping whatever fits
+ * in the first buffer, to be more efficient */
+ if (PREDICT_FALSE (p0->total_length_not_including_first_buffer))
+ { /* clear current_length of all other buffers in chain */
+ vlib_buffer_t *b = p0;
+ p0->total_length_not_including_first_buffer = 0;
+ while (b->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ b = vlib_get_buffer (vm, b->next_buffer);
+ b->current_length = 0;
+ }
+ }
+ p0->current_length =
+ p0->current_length > 1280 ? 1280 : p0->current_length;
+
+ /* Add IP header and ICMPv6 header including a 4 byte data field */
+ vlib_buffer_advance (p0,
+ -sizeof (ip6_header_t) -
+ sizeof (icmp46_header_t) - 4);
+ out_ip0 = vlib_buffer_get_current (p0);
+ icmp0 = (icmp46_header_t *) & out_ip0[1];
+
+ /* Fill ip header fields */
+ out_ip0->ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 (0x6 << 28);
+
+ out_ip0->payload_length =
+ clib_host_to_net_u16 (p0->current_length - sizeof (ip6_header_t));
+ out_ip0->protocol = IP_PROTOCOL_ICMP6;
+ out_ip0->hop_limit = 0xff;
+ out_ip0->dst_address = ip0->src_address;
+ if_add_index0 =
+ lm->if_address_pool_index_by_sw_if_index[sw_if_index0];
+ if (PREDICT_TRUE (if_add_index0 != ~0))
+ {
+ ip_interface_address_t *if_add =
+ pool_elt_at_index (lm->if_address_pool, if_add_index0);
+ ip6_address_t *if_ip =
+ ip_interface_address_get_address (lm, if_add);
+ out_ip0->src_address = *if_ip;
+ }
+ else /* interface has no IP6 address - should not happen */
+ {
+ next0 = IP6_ICMP_ERROR_NEXT_DROP;
+ error0 = ICMP6_ERROR_DROP;
+ }
+
+ /* Fill icmp header fields */
+ icmp0->type = vnet_buffer (p0)->ip.icmp.type;
+ icmp0->code = vnet_buffer (p0)->ip.icmp.code;
+ *((u32 *) (icmp0 + 1)) =
+ clib_host_to_net_u32 (vnet_buffer (p0)->ip.icmp.data);
+ icmp0->checksum = 0;
+ icmp0->checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, p0, out_ip0,
+ &bogus_length);
+
+
+
+ /* Update error status */
+ if (error0 == ICMP6_ERROR_NONE)
+ error0 = icmp6_icmp_type_to_error (icmp0->type);
+ vlib_error_count (vm, node->node_index, error0, 1);
+
+ /* Verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_icmp_error_node) = {
+ .function = ip6_icmp_error,
+ .name = "ip6-icmp-error",
+ .vector_size = sizeof (u32),
+
+ .n_errors = ARRAY_LEN (icmp_error_strings),
+ .error_strings = icmp_error_strings,
+
+ .n_next_nodes = IP6_ICMP_ERROR_N_NEXT,
+ .next_nodes = {
+ [IP6_ICMP_ERROR_NEXT_DROP] = "error-drop",
+ [IP6_ICMP_ERROR_NEXT_LOOKUP] = "ip6-lookup",
+ },
+
+ .format_trace = format_icmp6_input_trace,
+};
+/* *INDENT-ON* */
+
+
+static uword
+unformat_icmp_type_and_code (unformat_input_t * input, va_list * args)
+{
+ icmp46_header_t *h = va_arg (*args, icmp46_header_t *);
+ icmp6_main_t *cm = &icmp6_main;
+ u32 i;
+
+ if (unformat_user (input, unformat_vlib_number_by_name,
+ cm->type_and_code_by_name, &i))
+ {
+ h->type = (i >> 8) & 0xff;
+ h->code = (i >> 0) & 0xff;
+ }
+ else if (unformat_user (input, unformat_vlib_number_by_name,
+ cm->type_by_name, &i))
+ {
+ h->type = i;
+ h->code = 0;
+ }
+ else
+ return 0;
+
+ return 1;
+}
+
+static void
+icmp6_pg_edit_function (pg_main_t * pg,
+ pg_stream_t * s,
+ pg_edit_group_t * g, u32 * packets, u32 n_packets)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ u32 ip_offset, icmp_offset;
+ int bogus_length;
+
+ icmp_offset = g->start_byte_offset;
+ ip_offset = (g - 1)->start_byte_offset;
+
+ while (n_packets >= 1)
+ {
+ vlib_buffer_t *p0;
+ ip6_header_t *ip0;
+ icmp46_header_t *icmp0;
+
+ p0 = vlib_get_buffer (vm, packets[0]);
+ n_packets -= 1;
+ packets += 1;
+
+ ASSERT (p0->current_data == 0);
+ ip0 = (void *) (p0->data + ip_offset);
+ icmp0 = (void *) (p0->data + icmp_offset);
+
+ icmp0->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, p0, ip0,
+ &bogus_length);
+ ASSERT (bogus_length == 0);
+ }
+}
+
+typedef struct
+{
+ pg_edit_t type, code;
+ pg_edit_t checksum;
+} pg_icmp46_header_t;
+
+always_inline void
+pg_icmp_header_init (pg_icmp46_header_t * p)
+{
+ /* Initialize fields that are not bit fields in the IP header. */
+#define _(f) pg_edit_init (&p->f, icmp46_header_t, f);
+ _(type);
+ _(code);
+ _(checksum);
+#undef _
+}
+
+static uword
+unformat_pg_icmp_header (unformat_input_t * input, va_list * args)
+{
+ pg_stream_t *s = va_arg (*args, pg_stream_t *);
+ pg_icmp46_header_t *p;
+ u32 group_index;
+
+ p = pg_create_edit_group (s, sizeof (p[0]), sizeof (icmp46_header_t),
+ &group_index);
+ pg_icmp_header_init (p);
+
+ p->checksum.type = PG_EDIT_UNSPECIFIED;
+
+ {
+ icmp46_header_t tmp;
+
+ if (!unformat (input, "ICMP %U", unformat_icmp_type_and_code, &tmp))
+ goto error;
+
+ pg_edit_set_fixed (&p->type, tmp.type);
+ pg_edit_set_fixed (&p->code, tmp.code);
+ }
+
+ /* Parse options. */
+ while (1)
+ {
+ if (unformat (input, "checksum %U",
+ unformat_pg_edit, unformat_pg_number, &p->checksum))
+ ;
+
+ /* Can't parse input: try next protocol level. */
+ else
+ break;
+ }
+
+ if (!unformat_user (input, unformat_pg_payload, s))
+ goto error;
+
+ if (p->checksum.type == PG_EDIT_UNSPECIFIED)
+ {
+ pg_edit_group_t *g = pg_stream_get_group (s, group_index);
+ g->edit_function = icmp6_pg_edit_function;
+ g->edit_function_opaque = 0;
+ }
+
+ return 1;
+
+error:
+ /* Free up any edits we may have added. */
+ pg_free_edit_group (s);
+ return 0;
+}
+
+void
+icmp6_register_type (vlib_main_t * vm, icmp6_type_t type, u32 node_index)
+{
+ icmp6_main_t *im = &icmp6_main;
+
+ ASSERT ((int) type < ARRAY_LEN (im->input_next_index_by_type));
+ im->input_next_index_by_type[type]
+ = vlib_node_add_next (vm, ip6_icmp_input_node.index, node_index);
+}
+
+static clib_error_t *
+icmp6_init (vlib_main_t * vm)
+{
+ ip_main_t *im = &ip_main;
+ ip_protocol_info_t *pi;
+ icmp6_main_t *cm = &icmp6_main;
+ clib_error_t *error;
+
+ error = vlib_call_init_function (vm, ip_main_init);
+
+ if (error)
+ return error;
+
+ pi = ip_get_protocol_info (im, IP_PROTOCOL_ICMP6);
+ pi->format_header = format_icmp6_header;
+ pi->unformat_pg_edit = unformat_pg_icmp_header;
+
+ cm->type_by_name = hash_create_string (0, sizeof (uword));
+#define _(n,t) hash_set_mem (cm->type_by_name, #t, (n));
+ foreach_icmp6_type;
+#undef _
+
+ cm->type_and_code_by_name = hash_create_string (0, sizeof (uword));
+#define _(a,n,t) hash_set_mem (cm->type_by_name, #t, (n) | (ICMP6_##a << 8));
+ foreach_icmp6_code;
+#undef _
+
+ memset (cm->input_next_index_by_type,
+ ICMP_INPUT_NEXT_DROP, sizeof (cm->input_next_index_by_type));
+ memset (cm->max_valid_code_by_type, 0, sizeof (cm->max_valid_code_by_type));
+
+#define _(a,n,t) cm->max_valid_code_by_type[ICMP6_##a] = clib_max (cm->max_valid_code_by_type[ICMP6_##a], n);
+ foreach_icmp6_code;
+#undef _
+
+ memset (cm->min_valid_hop_limit_by_type, 0,
+ sizeof (cm->min_valid_hop_limit_by_type));
+ cm->min_valid_hop_limit_by_type[ICMP6_router_solicitation] = 255;
+ cm->min_valid_hop_limit_by_type[ICMP6_router_advertisement] = 255;
+ cm->min_valid_hop_limit_by_type[ICMP6_neighbor_solicitation] = 255;
+ cm->min_valid_hop_limit_by_type[ICMP6_neighbor_advertisement] = 255;
+ cm->min_valid_hop_limit_by_type[ICMP6_redirect] = 255;
+
+ memset (cm->min_valid_length_by_type, sizeof (icmp46_header_t),
+ sizeof (cm->min_valid_length_by_type));
+ cm->min_valid_length_by_type[ICMP6_router_solicitation] =
+ sizeof (icmp6_neighbor_discovery_header_t);
+ cm->min_valid_length_by_type[ICMP6_router_advertisement] =
+ sizeof (icmp6_router_advertisement_header_t);
+ cm->min_valid_length_by_type[ICMP6_neighbor_solicitation] =
+ sizeof (icmp6_neighbor_solicitation_or_advertisement_header_t);
+ cm->min_valid_length_by_type[ICMP6_neighbor_advertisement] =
+ sizeof (icmp6_neighbor_solicitation_or_advertisement_header_t);
+ cm->min_valid_length_by_type[ICMP6_redirect] =
+ sizeof (icmp6_redirect_header_t);
+
+ icmp6_register_type (vm, ICMP6_echo_request,
+ ip6_icmp_echo_request_node.index);
+
+ return vlib_call_init_function (vm, ip6_neighbor_init);
+}
+
+VLIB_INIT_FUNCTION (icmp6_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/icmp6.h b/src/vnet/ip/icmp6.h
new file mode 100644
index 00000000000..a426512ea2f
--- /dev/null
+++ b/src/vnet/ip/icmp6.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_vnet_icmp6_h
+#define included_vnet_icmp6_h
+
+#define foreach_icmp6_error \
+ _ (NONE, "valid packets") \
+ _ (UNKNOWN_TYPE, "unknown type") \
+ _ (INVALID_CODE_FOR_TYPE, "invalid code for type") \
+ _ (INVALID_HOP_LIMIT_FOR_TYPE, "hop_limit != 255") \
+ _ (LENGTH_TOO_SMALL_FOR_TYPE, "payload length too small for type") \
+ _ (OPTIONS_WITH_ODD_LENGTH, \
+ "total option length not multiple of 8 bytes") \
+ _ (OPTION_WITH_ZERO_LENGTH, "option has zero length") \
+ _ (ECHO_REPLIES_SENT, "echo replies sent") \
+ _ (NEIGHBOR_SOLICITATION_SOURCE_NOT_ON_LINK, \
+ "neighbor solicitations from source not on link") \
+ _ (NEIGHBOR_SOLICITATION_SOURCE_UNKNOWN, \
+ "neighbor solicitations for unknown targets") \
+ _ (NEIGHBOR_ADVERTISEMENTS_TX, "neighbor advertisements sent") \
+ _ (NEIGHBOR_ADVERTISEMENTS_RX, "neighbor advertisements received") \
+ _ (ROUTER_SOLICITATION_SOURCE_NOT_ON_LINK, \
+ "router solicitations from source not on link") \
+ _ (ROUTER_SOLICITATION_UNSUPPORTED_INTF, \
+ "neighbor discovery unsupported interface") \
+ _ (ROUTER_SOLICITATION_RADV_NOT_CONFIG, \
+ "neighbor discovery not configured") \
+ _ (ROUTER_SOLICITATION_DEST_UNKNOWN, \
+ "router solicitations for unknown destination") \
+ _ (ROUTER_SOLICITATION_SOURCE_UNKNOWN, \
+ "router solicitations for unknown source") \
+ _ (ROUTER_ADVERTISEMENT_SOURCE_NOT_LINK_LOCAL, \
+ "router advertisement source not link local") \
+ _ (ROUTER_ADVERTISEMENTS_TX, "router advertisements sent") \
+ _ (ROUTER_ADVERTISEMENTS_RX, "router advertisements received") \
+ _ (DST_LOOKUP_MISS, "icmp6 dst address lookup misses") \
+ _ (DEST_UNREACH_SENT, "destination unreachable response sent") \
+ _ (PACKET_TOO_BIG_SENT, "packet too big response sent") \
+ _ (TTL_EXPIRE_SENT, "hop limit exceeded response sent") \
+ _ (PARAM_PROBLEM_SENT, "parameter Pproblem response sent") \
+ _ (DROP, "error message dropped")
+
+
+typedef enum
+{
+#define _(f,s) ICMP6_ERROR_##f,
+ foreach_icmp6_error
+#undef _
+} icmp6_error_t;
+
+typedef struct
+{
+ u8 packet_data[64];
+} icmp6_input_trace_t;
+
+format_function_t format_icmp6_input_trace;
+void icmp6_register_type (vlib_main_t * vm, icmp6_type_t type,
+ u32 node_index);
+void icmp6_error_set_vnet_buffer (vlib_buffer_t * b, u8 type, u8 code,
+ u32 data);
+
+extern vlib_node_registration_t ip6_icmp_input_node;
+
+#endif /* included_vnet_icmp6_h */
+
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/igmp_packet.h b/src/vnet/ip/igmp_packet.h
new file mode 100644
index 00000000000..503259ece7c
--- /dev/null
+++ b/src/vnet/ip/igmp_packet.h
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * igmp_packet.h: igmp packet format
+ *
+ * Copyright (c) 2011 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vnet_igmp_packet_h
+#define included_vnet_igmp_packet_h
+
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+
+#define foreach_igmp_type \
+ _ (0x11, membership_query) \
+ _ (0x12, membership_report_v1) \
+ _ (0x13, dvmrp) \
+ _ (0x14, pim_v1) \
+ _ (0x15, cisco_trace) \
+ _ (0x16, membership_report_v2) \
+ _ (0x17, leave_group_v2) \
+ _ (0x1e, traceroute_response) \
+ _ (0x1f, traceroute_request) \
+ _ (0x22, membership_report_v3) \
+ _ (0x30, router_advertisement) \
+ _ (0x31, router_solicitation) \
+ _ (0x32, router_termination)
+
+typedef enum
+{
+#define _(n,f) IGMP_TYPE_##f = n,
+ foreach_igmp_type
+#undef _
+} igmp_type_t;
+
+typedef struct
+{
+ igmp_type_t type:8;
+
+ u8 code;
+
+ u16 checksum;
+} igmp_header_t;
+
+typedef struct
+{
+ /* membership_query, version <= 2 reports. */
+ igmp_header_t header;
+
+ /* Multicast destination address. */
+ ip4_address_t dst;
+} igmp_message_t;
+
+#define foreach_igmp_membership_group_v3_type \
+ _ (1, mode_is_filter_include) \
+ _ (2, mode_is_filter_exclude) \
+ _ (3, change_to_filter_include) \
+ _ (4, change_to_filter_exclude) \
+ _ (5, allow_new_sources) \
+ _ (6, block_old_sources)
+
+typedef enum
+{
+#define _(n,f) IGMP_MEMBERSHIP_GROUP_##f = n,
+ foreach_igmp_membership_group_v3_type
+#undef _
+} igmp_membership_group_v3_type_t;
+
+typedef struct
+{
+ igmp_membership_group_v3_type_t type:8;
+
+ /* Number of 32 bit words of aux data after source addresses. */
+ u8 n_aux_u32s;
+
+ /* Number of source addresses that follow. */
+ u16 n_src_addresses;
+
+ /* Destination multicast address. */
+ ip4_address_t dst_address;
+
+ ip4_address_t src_addresses[0];
+} igmp_membership_group_v3_t;
+
+always_inline igmp_membership_group_v3_t *
+igmp_membership_group_v3_next (igmp_membership_group_v3_t * g)
+{
+ return ((void *) g
+ + g->n_src_addresses * sizeof (g->src_addresses[0])
+ + g->n_aux_u32s * sizeof (u32));
+}
+
+typedef struct
+{
+ /* Type 0x22. */
+ igmp_header_t header;
+
+ u16 unused;
+
+ /* Number of groups which follow. */
+ u16 n_groups;
+
+ igmp_membership_group_v3_t groups[0];
+} igmp_membership_report_v3_t;
+
+/* IP6 flavor of IGMP is called MLD which is embedded in ICMP6. */
+typedef struct
+{
+ /* Preceeded by ICMP v6 header. */
+ u16 max_response_delay_in_milliseconds;
+ u16 reserved;
+ ip6_address_t dst;
+} mld_header_t;
+
+#endif /* included_vnet_igmp_packet_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip.api b/src/vnet/ip/ip.api
new file mode 100644
index 00000000000..c811e465ea9
--- /dev/null
+++ b/src/vnet/ip/ip.api
@@ -0,0 +1,434 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** \file
+
+ This file defines vpp IP control-plane API messages which are generally
+ called through a shared memory interface.
+*/
+
+/** \brief Dump IP fib table
+ @param client_index - opaque cookie to identify the sender
+*/
+define ip_fib_dump
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief FIB path
+ @param sw_if_index - index of the interface
+ @param weight - The weight, for UCMP
+ @param is_local - local if non-zero, else remote
+ @param is_drop - Drop the packet
+ @param is_unreach - Drop the packet and rate limit send ICMP unreachable
+ @param is_prohibit - Drop the packet and rate limit send ICMP prohibited
+ @param afi - the afi of the next hop, IP46_TYPE_IP4=1, IP46_TYPE_IP6=2
+ @param next_hop[16] - the next hop address
+
+ WARNING: this type is replicated, pending cleanup completion
+*/
+typeonly manual_print manual_endian define fib_path
+{
+ u32 sw_if_index;
+ u32 weight;
+ u8 is_local;
+ u8 is_drop;
+ u8 is_unreach;
+ u8 is_prohibit;
+ u8 afi;
+ u8 next_hop[16];
+};
+
+/** \brief IP FIB table response
+ @param table_id - IP fib table id
+ @address_length - mask length
+ @address - ip4 prefix
+ @param count - the number of fib_path in path
+ @param path - array of of fib_path structures
+*/
+manual_endian manual_print define ip_fib_details
+{
+ u32 context;
+ u32 table_id;
+ u8 address_length;
+ u8 address[4];
+ u32 count;
+ vl_api_fib_path_t path[count];
+};
+
+/** \brief Dump IP6 fib table
+ @param client_index - opaque cookie to identify the sender
+*/
+define ip6_fib_dump
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief IP6 FIB table response
+ @param table_id - IP6 fib table id
+ @address_length - mask length
+ @address - ip6 prefix
+ @param count - the number of fib_path in path
+ @param path - array of of fib_path structures
+*/
+manual_endian manual_print define ip6_fib_details
+{
+ u32 context;
+ u32 table_id;
+ u8 address_length;
+ u8 address[16];
+ u32 count;
+ vl_api_fib_path_t path[count];
+};
+
+/** \brief Dump IP neighboors
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - the interface to dump neighboors
+ @param is_ipv6 - [1|0] to indicate if address family is ipv[6|4]
+*/
+define ip_neighbor_dump
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u8 is_ipv6;
+};
+
+/** \brief IP neighboors dump response
+ @param context - sender context which was passed in the request
+ @param is_static - [1|0] to indicate if neighbor is statically configured
+ @param is_ipv6 - [1|0] to indicate if address family is ipv[6|4]
+*/
+define ip_neighbor_details {
+ u32 context;
+ u32 is_static;
+ u8 is_ipv6;
+ u8 mac_address[6];
+ u8 ip_address[16];
+};
+
+/** \brief IP neighbor add / del request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param vrf_id - vrf_id, only for IP4
+ @param sw_if_index - interface used to reach neighbor
+ @param is_add - 1 to add neighbor, 0 to delete
+ @param is_ipv6 - 1 for IPv6 neighbor, 0 for IPv4
+ @param is_static -
+ @param mac_address - l2 address of the neighbor
+ @param dst_address - ip4 or ip6 address of the neighbor
+*/
+define ip_neighbor_add_del
+{
+ u32 client_index;
+ u32 context;
+ u32 vrf_id; /* only makes sense for ip4 */
+ u32 sw_if_index;
+ /* 1 = add, 0 = delete */
+ u8 is_add;
+ u8 is_ipv6;
+ u8 is_static;
+ u8 mac_address[6];
+ u8 dst_address[16];
+};
+
+/** \brief Reply for IP Neighbor add / delete request
+ @param context - returned sender context, to match reply w/ request
+ @param retval - return code
+*/
+define ip_neighbor_add_del_reply
+{
+ u32 context;
+ i32 retval;
+};
+
+/** \brief Set the ip flow hash config for a fib request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param vrf_id - vrf/fib id
+ @param is_ipv6 - if non-zero the fib is ip6, else ip4
+ @param src - if non-zero include src in flow hash
+ @param dst - if non-zero include dst in flow hash
+ @param sport - if non-zero include sport in flow hash
+ @param dport - if non-zero include dport in flow hash
+ @param proto -if non-zero include proto in flow hash
+ @param reverse - if non-zero include reverse in flow hash
+*/
+define set_ip_flow_hash
+{
+ u32 client_index;
+ u32 context;
+ u32 vrf_id;
+ u8 is_ipv6;
+ u8 src;
+ u8 dst;
+ u8 sport;
+ u8 dport;
+ u8 proto;
+ u8 reverse;
+};
+
+/** \brief Set the ip flow hash config for a fib response
+ @param context - sender context, to match reply w/ request
+ @param retval - return code for the request
+*/
+define set_ip_flow_hash_reply
+{
+ u32 context;
+ i32 retval;
+};
+
+/** \brief IPv6 router advertisement config request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param suppress -
+ @param managed -
+ @param other -
+ @param ll_option -
+ @param send_unicast -
+ @param cease -
+ @param is_no -
+ @param default_router -
+ @param max_interval -
+ @param min_interval -
+ @param lifetime -
+ @param initial_count -
+ @param initial_interval -
+*/
+define sw_interface_ip6nd_ra_config
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u8 suppress;
+ u8 managed;
+ u8 other;
+ u8 ll_option;
+ u8 send_unicast;
+ u8 cease;
+ u8 is_no;
+ u8 default_router;
+ u32 max_interval;
+ u32 min_interval;
+ u32 lifetime;
+ u32 initial_count;
+ u32 initial_interval;
+};
+
+/** \brief IPv6 router advertisement config response
+ @param context - sender context, to match reply w/ request
+ @param retval - return code for the request
+*/
+define sw_interface_ip6nd_ra_config_reply
+{
+ u32 context;
+ i32 retval;
+};
+
+/** \brief IPv6 router advertisement prefix config request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index -
+ @param address[] -
+ @param address_length -
+ @param use_default -
+ @param no_advertise -
+ @param off_link -
+ @param no_autoconfig -
+ @param no_onlink -
+ @param is_no -
+ @param val_lifetime -
+ @param pref_lifetime -
+*/
+define sw_interface_ip6nd_ra_prefix
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u8 address[16];
+ u8 address_length;
+ u8 use_default;
+ u8 no_advertise;
+ u8 off_link;
+ u8 no_autoconfig;
+ u8 no_onlink;
+ u8 is_no;
+ u32 val_lifetime;
+ u32 pref_lifetime;
+};
+
+/** \brief IPv6 router advertisement prefix config response
+ @param context - sender context, to match reply w/ request
+ @param retval - return code for the request
+*/
+define sw_interface_ip6nd_ra_prefix_reply
+{
+ u32 context;
+ i32 retval;
+};
+
+/** \brief IPv6 interface enable / disable request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - interface used to reach neighbor
+ @param enable - if non-zero enable ip6 on interface, else disable
+*/
+define sw_interface_ip6_enable_disable
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u8 enable; /* set to true if enable */
+};
+
+/** \brief IPv6 interface enable / disable response
+ @param context - sender context, to match reply w/ request
+ @param retval - return code for the request
+*/
+define sw_interface_ip6_enable_disable_reply
+{
+ u32 context;
+ i32 retval;
+};
+
+/** \brief IPv6 set link local address on interface request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - interface to set link local on
+ @param address[] - the new link local address
+ @param address_length - link local address length
+*/
+define sw_interface_ip6_set_link_local_address
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u8 address[16];
+ u8 address_length;
+};
+
+/** \brief IPv6 set link local address on interface response
+ @param context - sender context, to match reply w/ request
+ @param retval - error code for the request
+*/
+define sw_interface_ip6_set_link_local_address_reply
+{
+ u32 context;
+ i32 retval;
+};
+
+/** \brief Add / del route request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - software index of the new vlan's parent interface
+ @param vrf_id - fib table /vrf associated with the route
+ @param lookup_in_vrf -
+ @param classify_table_index -
+ @param create_vrf_if_needed -
+ @param is_add - 1 if adding the route, 0 if deleting
+ @param is_drop - Drop the packet
+ @param is_unreach - Drop the packet and rate limit send ICMP unreachable
+ @param is_prohibit - Drop the packet and rate limit send ICMP prohibited
+ @param is_ipv6 - 0 if an ip4 route, else ip6
+ @param is_local -
+ @param is_classify -
+ @param is_multipath - Set to 1 if this is a multipath route, else 0
+ @param not_last - Is last or not last msg in group of multiple add/del msgs
+ @param next_hop_weight -
+ @param dst_address_length -
+ @param dst_address[16] -
+ @param next_hop_address[16] -
+ @param next_hop_n_out_labels - the number of labels in the label stack
+ @param next_hop_out_label_stack - the next-hop output label stack, outer most first
+ @param next_hop_via_label - The next-hop is a resolved via a local label
+*/
+define ip_add_del_route
+{
+ u32 client_index;
+ u32 context;
+ u32 next_hop_sw_if_index;
+ u32 table_id;
+ u32 classify_table_index;
+ u32 next_hop_table_id;
+ u8 create_vrf_if_needed;
+ u8 is_add;
+ u8 is_drop;
+ u8 is_unreach;
+ u8 is_prohibit;
+ u8 is_ipv6;
+ u8 is_local;
+ u8 is_classify;
+ u8 is_multipath;
+ u8 is_resolve_host;
+ u8 is_resolve_attached;
+ /* Is last/not-last message in group of multiple add/del messages. */
+ u8 not_last;
+ u8 next_hop_weight;
+ u8 dst_address_length;
+ u8 dst_address[16];
+ u8 next_hop_address[16];
+ u8 next_hop_n_out_labels;
+ u32 next_hop_via_label;
+ u32 next_hop_out_label_stack[next_hop_n_out_labels];
+};
+
+/** \brief Reply for add / del route request
+ @param context - returned sender context, to match reply w/ request
+ @param retval - return code
+*/
+define ip_add_del_route_reply
+{
+ u32 context;
+ i32 retval;
+};
+
+define ip_address_details
+{
+ u32 client_index;
+ u32 context;
+ u8 ip[16];
+ u8 prefix_length;
+};
+
+define ip_address_dump
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u8 is_ipv6;
+};
+
+define ip_details
+{
+ u32 sw_if_index;
+ u32 context;
+};
+
+define ip_dump
+{
+ u32 client_index;
+ u32 context;
+ u8 is_ipv6;
+};
+
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip.h b/src/vnet/ip/ip.h
new file mode 100644
index 00000000000..02a1a9636ee
--- /dev/null
+++ b/src/vnet/ip/ip.h
@@ -0,0 +1,195 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip.h: ip generic (4 or 6) main
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_ip_main_h
+#define included_ip_main_h
+
+#include <vppinfra/hash.h>
+#include <vppinfra/heap.h> /* adjacency heap */
+#include <vppinfra/ptclosure.h>
+
+#include <vnet/vnet.h>
+
+#include <vnet/ip/format.h>
+#include <vnet/ip/ip_packet.h>
+#include <vnet/ip/lookup.h>
+
+#include <vnet/ip/tcp_packet.h>
+#include <vnet/ip/udp_packet.h>
+#include <vnet/ip/icmp46_packet.h>
+
+#include <vnet/ip/ip4.h>
+#include <vnet/ip/ip4_error.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/icmp4.h>
+
+#include <vnet/ip/ip6.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/ip/ip6_error.h>
+#include <vnet/ip/icmp6.h>
+#include <vnet/classify/vnet_classify.h>
+
+/* Per protocol info. */
+typedef struct
+{
+ /* Protocol name (also used as hash key). */
+ u8 *name;
+
+ /* Protocol number. */
+ ip_protocol_t protocol;
+
+ /* Format function for this IP protocol. */
+ format_function_t *format_header;
+
+ /* Parser for header. */
+ unformat_function_t *unformat_header;
+
+ /* Parser for per-protocol matches. */
+ unformat_function_t *unformat_match;
+
+ /* Parser for packet generator edits for this protocol. */
+ unformat_function_t *unformat_pg_edit;
+} ip_protocol_info_t;
+
+/* Per TCP/UDP port info. */
+typedef struct
+{
+ /* Port name (used as hash key). */
+ u8 *name;
+
+ /* UDP/TCP port number in network byte order. */
+ u16 port;
+
+ /* Port specific format function. */
+ format_function_t *format_header;
+
+ /* Parser for packet generator edits for this protocol. */
+ unformat_function_t *unformat_pg_edit;
+} tcp_udp_port_info_t;
+
+typedef struct
+{
+ /* Per IP protocol info. */
+ ip_protocol_info_t *protocol_infos;
+
+ /* Protocol info index hashed by 8 bit IP protocol. */
+ uword *protocol_info_by_protocol;
+
+ /* Hash table mapping IP protocol name (see protocols.def)
+ to protocol number. */
+ uword *protocol_info_by_name;
+
+ /* Per TCP/UDP port info. */
+ tcp_udp_port_info_t *port_infos;
+
+ /* Hash table from network-byte-order port to port info index. */
+ uword *port_info_by_port;
+
+ /* Hash table mapping TCP/UDP name to port info index. */
+ uword *port_info_by_name;
+} ip_main_t;
+
+extern ip_main_t ip_main;
+
+clib_error_t *ip_main_init (vlib_main_t * vm);
+
+static inline ip_protocol_info_t *
+ip_get_protocol_info (ip_main_t * im, u32 protocol)
+{
+ uword *p;
+
+ p = hash_get (im->protocol_info_by_protocol, protocol);
+ return p ? vec_elt_at_index (im->protocol_infos, p[0]) : 0;
+}
+
+static inline tcp_udp_port_info_t *
+ip_get_tcp_udp_port_info (ip_main_t * im, u32 port)
+{
+ uword *p;
+
+ p = hash_get (im->port_info_by_port, port);
+ return p ? vec_elt_at_index (im->port_infos, p[0]) : 0;
+}
+
+always_inline ip_csum_t
+ip_incremental_checksum_buffer (vlib_main_t * vm,
+ vlib_buffer_t * first_buffer,
+ u32 first_buffer_offset,
+ u32 n_bytes_to_checksum, ip_csum_t sum)
+{
+ vlib_buffer_t *b = first_buffer;
+ u32 n_bytes_left = n_bytes_to_checksum;
+ ASSERT (b->current_length >= first_buffer_offset);
+ void *h;
+ u32 n;
+
+ n = clib_min (n_bytes_left, b->current_length);
+ h = vlib_buffer_get_current (b) + first_buffer_offset;
+ sum = ip_incremental_checksum (sum, h, n);
+ if (PREDICT_FALSE (b->flags & VLIB_BUFFER_NEXT_PRESENT))
+ {
+ while (1)
+ {
+ n_bytes_left -= n;
+ if (n_bytes_left == 0)
+ break;
+ b = vlib_get_buffer (vm, b->next_buffer);
+ n = clib_min (n_bytes_left, b->current_length);
+ h = vlib_buffer_get_current (b);
+ sum = ip_incremental_checksum (sum, h, n);
+ }
+ }
+
+ return sum;
+}
+
+void ip_del_all_interface_addresses (vlib_main_t * vm, u32 sw_if_index);
+
+extern vlib_node_registration_t ip4_inacl_node;
+extern vlib_node_registration_t ip6_inacl_node;
+
+#endif /* included_ip_main_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip4.h b/src/vnet/ip/ip4.h
new file mode 100644
index 00000000000..0331c44563e
--- /dev/null
+++ b/src/vnet/ip/ip4.h
@@ -0,0 +1,322 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip4.h: ip4 main include file
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_ip_ip4_h
+#define included_ip_ip4_h
+
+#include <vnet/ip/ip4_mtrie.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/lookup.h>
+#include <vnet/feature/feature.h>
+
+typedef struct ip4_fib_t
+{
+ /* Hash table for each prefix length mapping. */
+ uword *fib_entry_by_dst_address[33];
+
+ /* Mtrie for fast lookups. Hash is used to maintain overlapping prefixes. */
+ ip4_fib_mtrie_t mtrie;
+
+ /* Table ID (hash key) for this FIB. */
+ u32 table_id;
+
+ /* Index into FIB vector. */
+ u32 index;
+
+ /* flow hash configuration */
+ flow_hash_config_t flow_hash_config;
+
+ /* N-tuple classifier indices */
+ u32 fwd_classify_table_index;
+ u32 rev_classify_table_index;
+
+} ip4_fib_t;
+
+struct ip4_main_t;
+
+typedef void (ip4_add_del_interface_address_function_t)
+ (struct ip4_main_t * im,
+ uword opaque,
+ u32 sw_if_index,
+ ip4_address_t * address,
+ u32 address_length, u32 if_address_index, u32 is_del);
+
+typedef struct
+{
+ ip4_add_del_interface_address_function_t *function;
+ uword function_opaque;
+} ip4_add_del_interface_address_callback_t;
+
+/**
+ * @brief IPv4 main type.
+ *
+ * State of IPv4 VPP processing including:
+ * - FIBs
+ * - Feature indices used in feature topological sort
+ * - Feature node run time references
+ */
+
+typedef struct ip4_main_t
+{
+ ip_lookup_main_t lookup_main;
+
+ /** Vector of FIBs. */
+ struct fib_table_t_ *fibs;
+
+ u32 fib_masks[33];
+
+ /** Table index indexed by software interface. */
+ u32 *fib_index_by_sw_if_index;
+
+ /* IP4 enabled count by software interface */
+ u8 *ip_enabled_by_sw_if_index;
+
+ /** Hash table mapping table id to fib index.
+ ID space is not necessarily dense; index space is dense. */
+ uword *fib_index_by_table_id;
+
+ /** Functions to call when interface address changes. */
+ ip4_add_del_interface_address_callback_t
+ * add_del_interface_address_callbacks;
+
+ /** Template used to generate IP4 ARP packets. */
+ vlib_packet_template_t ip4_arp_request_packet_template;
+
+ /** Seed for Jenkins hash used to compute ip4 flow hash. */
+ u32 flow_hash_seed;
+
+ /** @brief Template information for VPP generated packets */
+ struct
+ {
+ /** TTL to use for host generated packets. */
+ u8 ttl;
+
+ /** TOS byte to use for host generated packets. */
+ u8 tos;
+
+ u8 pad[2];
+ } host_config;
+} ip4_main_t;
+
+/** Global ip4 main structure. */
+extern ip4_main_t ip4_main;
+
+/** Global ip4 input node. Errors get attached to ip4 input node. */
+extern vlib_node_registration_t ip4_input_node;
+extern vlib_node_registration_t ip4_lookup_node;
+extern vlib_node_registration_t ip4_rewrite_node;
+extern vlib_node_registration_t ip4_rewrite_local_node;
+extern vlib_node_registration_t ip4_arp_node;
+extern vlib_node_registration_t ip4_glean_node;
+extern vlib_node_registration_t ip4_midchain_node;
+
+always_inline uword
+ip4_destination_matches_route (const ip4_main_t * im,
+ const ip4_address_t * key,
+ const ip4_address_t * dest, uword dest_length)
+{
+ return 0 == ((key->data_u32 ^ dest->data_u32) & im->fib_masks[dest_length]);
+}
+
+always_inline uword
+ip4_destination_matches_interface (ip4_main_t * im,
+ ip4_address_t * key,
+ ip_interface_address_t * ia)
+{
+ ip4_address_t *a = ip_interface_address_get_address (&im->lookup_main, ia);
+ return ip4_destination_matches_route (im, key, a, ia->address_length);
+}
+
+/* As above but allows for unaligned destinations (e.g. works right from IP header of packet). */
+always_inline uword
+ip4_unaligned_destination_matches_route (ip4_main_t * im,
+ ip4_address_t * key,
+ ip4_address_t * dest,
+ uword dest_length)
+{
+ return 0 ==
+ ((clib_mem_unaligned (&key->data_u32, u32) ^ dest->
+ data_u32) & im->fib_masks[dest_length]);
+}
+
+always_inline int
+ip4_src_address_for_packet (ip_lookup_main_t * lm,
+ u32 sw_if_index, ip4_address_t * src)
+{
+ u32 if_add_index = lm->if_address_pool_index_by_sw_if_index[sw_if_index];
+ if (PREDICT_TRUE (if_add_index != ~0))
+ {
+ ip_interface_address_t *if_add =
+ pool_elt_at_index (lm->if_address_pool, if_add_index);
+ ip4_address_t *if_ip = ip_interface_address_get_address (lm, if_add);
+ *src = *if_ip;
+ return 0;
+ }
+ else
+ {
+ ASSERT (0);
+ src->as_u32 = 0;
+ }
+ return (!0);
+}
+
+/* Find interface address which matches destination. */
+always_inline ip4_address_t *
+ip4_interface_address_matching_destination (ip4_main_t * im,
+ ip4_address_t * dst,
+ u32 sw_if_index,
+ ip_interface_address_t **
+ result_ia)
+{
+ ip_lookup_main_t *lm = &im->lookup_main;
+ ip_interface_address_t *ia;
+ ip4_address_t *result = 0;
+
+ /* *INDENT-OFF* */
+ foreach_ip_interface_address (lm, ia, sw_if_index,
+ 1 /* honor unnumbered */,
+ ({
+ ip4_address_t * a = ip_interface_address_get_address (lm, ia);
+ if (ip4_destination_matches_route (im, dst, a, ia->address_length))
+ {
+ result = a;
+ break;
+ }
+ }));
+ /* *INDENT-ON* */
+ if (result_ia)
+ *result_ia = result ? ia : 0;
+ return result;
+}
+
+ip4_address_t *ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
+ ip_interface_address_t **
+ result_ia);
+
+clib_error_t *ip4_add_del_interface_address (vlib_main_t * vm,
+ u32 sw_if_index,
+ ip4_address_t * address,
+ u32 address_length, u32 is_del);
+
+void ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable);
+
+int ip4_address_compare (ip4_address_t * a1, ip4_address_t * a2);
+
+/* Send an ARP request to see if given destination is reachable on given interface. */
+clib_error_t *ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst,
+ u32 sw_if_index);
+
+clib_error_t *ip4_set_arp_limit (u32 arp_limit);
+
+uword
+ip4_udp_register_listener (vlib_main_t * vm,
+ u16 dst_port, u32 next_node_index);
+
+void
+ip4_icmp_register_type (vlib_main_t * vm, icmp4_type_t type, u32 node_index);
+
+u16 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
+ ip4_header_t * ip0);
+
+void ip4_register_protocol (u32 protocol, u32 node_index);
+
+serialize_function_t serialize_vnet_ip4_main, unserialize_vnet_ip4_main;
+
+int vnet_set_ip4_flow_hash (u32 table_id,
+ flow_hash_config_t flow_hash_config);
+
+void ip4_mtrie_init (ip4_fib_mtrie_t * m);
+
+int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
+ u32 table_index);
+
+/* Compute flow hash. We'll use it to select which adjacency to use for this
+ flow. And other things. */
+always_inline u32
+ip4_compute_flow_hash (const ip4_header_t * ip,
+ flow_hash_config_t flow_hash_config)
+{
+ tcp_header_t *tcp = (void *) (ip + 1);
+ u32 a, b, c, t1, t2;
+ uword is_tcp_udp = (ip->protocol == IP_PROTOCOL_TCP
+ || ip->protocol == IP_PROTOCOL_UDP);
+
+ t1 = (flow_hash_config & IP_FLOW_HASH_SRC_ADDR)
+ ? ip->src_address.data_u32 : 0;
+ t2 = (flow_hash_config & IP_FLOW_HASH_DST_ADDR)
+ ? ip->dst_address.data_u32 : 0;
+
+ a = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ? t2 : t1;
+ b = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ? t1 : t2;
+ b ^= (flow_hash_config & IP_FLOW_HASH_PROTO) ? ip->protocol : 0;
+
+ t1 = is_tcp_udp ? tcp->ports.src : 0;
+ t2 = is_tcp_udp ? tcp->ports.dst : 0;
+
+ t1 = (flow_hash_config & IP_FLOW_HASH_SRC_PORT) ? t1 : 0;
+ t2 = (flow_hash_config & IP_FLOW_HASH_DST_PORT) ? t2 : 0;
+
+ c = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ?
+ (t1 << 16) | t2 : (t2 << 16) | t1;
+
+ hash_v3_mix32 (a, b, c);
+ hash_v3_finalize32 (a, b, c);
+
+ return c;
+}
+
+void
+ip4_forward_next_trace (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ vlib_rx_or_tx_t which_adj_index);
+
+u8 *format_ip4_forward_next_trace (u8 * s, va_list * args);
+
+u32 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0);
+
+#endif /* included_ip_ip4_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip46_cli.c b/src/vnet/ip/ip46_cli.c
new file mode 100644
index 00000000000..ce1ffa6242b
--- /dev/null
+++ b/src/vnet/ip/ip46_cli.c
@@ -0,0 +1,236 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip4_cli.c: ip4 commands
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+
+/**
+ * @file
+ * @brief Set IP Address.
+ *
+ * Configure an IPv4 or IPv6 address for on an interface.
+ */
+
+
+int
+ip4_address_compare (ip4_address_t * a1, ip4_address_t * a2)
+{
+ return clib_net_to_host_u32 (a1->data_u32) -
+ clib_net_to_host_u32 (a2->data_u32);
+}
+
+int
+ip6_address_compare (ip6_address_t * a1, ip6_address_t * a2)
+{
+ int i;
+ for (i = 0; i < ARRAY_LEN (a1->as_u16); i++)
+ {
+ int cmp =
+ clib_net_to_host_u16 (a1->as_u16[i]) -
+ clib_net_to_host_u16 (a2->as_u16[i]);
+ if (cmp != 0)
+ return cmp;
+ }
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_interface_ip_command, static) = {
+ .path = "set interface ip",
+ .short_help = "IP4/IP6 commands",
+};
+/* *INDENT-ON* */
+
+void
+ip_del_all_interface_addresses (vlib_main_t * vm, u32 sw_if_index)
+{
+ ip4_main_t *im4 = &ip4_main;
+ ip4_address_t *ip4_addrs = 0;
+ u32 *ip4_masks = 0;
+ ip6_main_t *im6 = &ip6_main;
+ ip6_address_t *ip6_addrs = 0;
+ u32 *ip6_masks = 0;
+ ip_interface_address_t *ia;
+ int i;
+
+ /* *INDENT-OFF* */
+ foreach_ip_interface_address (&im4->lookup_main, ia, sw_if_index,
+ 0 /* honor unnumbered */,
+ ({
+ ip4_address_t * x = (ip4_address_t *)
+ ip_interface_address_get_address (&im4->lookup_main, ia);
+ vec_add1 (ip4_addrs, x[0]);
+ vec_add1 (ip4_masks, ia->address_length);
+ }));
+ /* *INDENT-ON* */
+
+ /* *INDENT-OFF* */
+ foreach_ip_interface_address (&im6->lookup_main, ia, sw_if_index,
+ 0 /* honor unnumbered */,
+ ({
+ ip6_address_t * x = (ip6_address_t *)
+ ip_interface_address_get_address (&im6->lookup_main, ia);
+ vec_add1 (ip6_addrs, x[0]);
+ vec_add1 (ip6_masks, ia->address_length);
+ }));
+ /* *INDENT-ON* */
+
+ for (i = 0; i < vec_len (ip4_addrs); i++)
+ ip4_add_del_interface_address (vm, sw_if_index, &ip4_addrs[i],
+ ip4_masks[i], 1 /* is_del */ );
+ for (i = 0; i < vec_len (ip6_addrs); i++)
+ ip6_add_del_interface_address (vm, sw_if_index, &ip6_addrs[i],
+ ip6_masks[i], 1 /* is_del */ );
+
+ vec_free (ip4_addrs);
+ vec_free (ip4_masks);
+ vec_free (ip6_addrs);
+ vec_free (ip6_masks);
+}
+
+static clib_error_t *
+ip_address_delete_cleanup (vnet_main_t * vnm, u32 hw_if_index, u32 is_create)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_hw_interface_t *hw;
+
+ if (is_create)
+ return 0;
+
+ hw = vnet_get_hw_interface (vnm, hw_if_index);
+
+ ip_del_all_interface_addresses (vm, hw->sw_if_index);
+ return 0;
+}
+
+VNET_HW_INTERFACE_ADD_DEL_FUNCTION (ip_address_delete_cleanup);
+
+static clib_error_t *
+add_del_ip_address (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip4_address_t a4;
+ ip6_address_t a6;
+ clib_error_t *error = 0;
+ u32 sw_if_index, length, is_del;
+
+ sw_if_index = ~0;
+ is_del = 0;
+
+ if (unformat (input, "del"))
+ is_del = 1;
+
+ if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ if (is_del && unformat (input, "all"))
+ ip_del_all_interface_addresses (vm, sw_if_index);
+ else if (unformat (input, "%U/%d", unformat_ip4_address, &a4, &length))
+ error = ip4_add_del_interface_address (vm, sw_if_index, &a4, length,
+ is_del);
+ else if (unformat (input, "%U/%d", unformat_ip6_address, &a6, &length))
+ error = ip6_add_del_interface_address (vm, sw_if_index, &a6, length,
+ is_del);
+ else
+ {
+ error = clib_error_return (0, "expected IP4/IP6 address/length `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+
+done:
+ return error;
+}
+
+/*?
+ * Add an IP Address to an interface or remove and IP Address from an interface.
+ * The IP Address can be an IPv4 or an IPv6 address. Interfaces may have multiple
+ * IPv4 and IPv6 addresses. There is no concept of primary vs. secondary
+ * interface addresses; they're just addresses.
+ *
+ * To display the addresses associated with a given interface, use the command
+ * '<em>show interface address <interface></em>'.
+ *
+ * Note that the debug CLI does not enforce classful mask-width / addressing
+ * constraints.
+ *
+ * @cliexpar
+ * @parblock
+ * An example of how to add an IPv4 address to an interface:
+ * @cliexcmd{set interface ip address GigabitEthernet2/0/0 172.16.2.12/24}
+ *
+ * An example of how to add an IPv6 address to an interface:
+ * @cliexcmd{set interface ip address GigabitEthernet2/0/0 @::a:1:1:0:7/126}
+ *
+ * To delete a specific interface ip address:
+ * @cliexcmd{set interface ip address GigabitEthernet2/0/0 172.16.2.12/24 del}
+ *
+ * To delete all interfaces addresses (IPv4 and IPv6):
+ * @cliexcmd{set interface ip address GigabitEthernet2/0/0 del all}
+ * @endparblock
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_interface_ip_address_command, static) = {
+ .path = "set interface ip address",
+ .function = add_del_ip_address,
+ .short_help = "set interface ip address <interface> [<ip-addr>/<mask> [del]] | [del all]",
+};
+/* *INDENT-ON* */
+
+/* Dummy init function to get us linked in. */
+static clib_error_t *
+ip4_cli_init (vlib_main_t * vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (ip4_cli_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip4_error.h b/src/vnet/ip/ip4_error.h
new file mode 100644
index 00000000000..95d12ec22d5
--- /dev/null
+++ b/src/vnet/ip/ip4_error.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip4_error.h: ip4 fast path errors
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_ip_ip4_error_h
+#define included_ip_ip4_error_h
+
+#define foreach_ip4_error \
+ /* Must be first. */ \
+ _ (NONE, "valid ip4 packets") \
+ \
+ /* Errors signalled by ip4-input */ \
+ _ (TOO_SHORT, "ip4 length < 20 bytes") \
+ _ (BAD_LENGTH, "ip4 length > l2 length") \
+ _ (BAD_CHECKSUM, "bad ip4 checksum") \
+ _ (VERSION, "ip4 version != 4") \
+ _ (OPTIONS, "ip4 options present") \
+ _ (FRAGMENT_OFFSET_ONE, "ip4 fragment offset == 1") \
+ _ (TIME_EXPIRED, "ip4 ttl <= 1") \
+ \
+ /* Errors signalled by ip4-rewrite. */ \
+ _ (MTU_EXCEEDED, "ip4 MTU exceeded and DF set") \
+ _ (DST_LOOKUP_MISS, "ip4 destination lookup miss") \
+ _ (SRC_LOOKUP_MISS, "ip4 source lookup miss") \
+ _ (ADJACENCY_DROP, "ip4 adjacency drop") \
+ _ (ADJACENCY_PUNT, "ip4 adjacency punt") \
+ \
+ /* Errors signalled by ip4-local. */ \
+ _ (UNKNOWN_PROTOCOL, "unknown ip protocol") \
+ _ (TCP_CHECKSUM, "bad tcp checksum") \
+ _ (UDP_CHECKSUM, "bad udp checksum") \
+ _ (UDP_LENGTH, "inconsistent udp/ip lengths") \
+ \
+ /* Errors signalled by ip4-source-check. */ \
+ _ (UNICAST_SOURCE_CHECK_FAILS, "ip4 unicast source check fails") \
+ \
+ /* Spoofed packets in ip4-rewrite-local */ \
+ _(SPOOFED_LOCAL_PACKETS, "ip4 spoofed local-address packet drops") \
+ \
+ /* Errors singalled by ip4-inacl */ \
+ _ (INACL_TABLE_MISS, "input ACL table-miss drops") \
+ _ (INACL_SESSION_DENY, "input ACL session deny drops")
+
+typedef enum
+{
+#define _(sym,str) IP4_ERROR_##sym,
+ foreach_ip4_error
+#undef _
+ IP4_N_ERROR,
+} ip4_error_t;
+
+#endif /* included_ip_ip4_error_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip4_format.c b/src/vnet/ip/ip4_format.c
new file mode 100644
index 00000000000..c803e0656db
--- /dev/null
+++ b/src/vnet/ip/ip4_format.c
@@ -0,0 +1,256 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip4_format.c: ip4 formatting
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+
+/* Format an IP4 address. */
+u8 *
+format_ip4_address (u8 * s, va_list * args)
+{
+ u8 *a = va_arg (*args, u8 *);
+ return format (s, "%d.%d.%d.%d", a[0], a[1], a[2], a[3]);
+}
+
+/* Format an IP4 route destination and length. */
+u8 *
+format_ip4_address_and_length (u8 * s, va_list * args)
+{
+ u8 *a = va_arg (*args, u8 *);
+ u8 l = va_arg (*args, u32);
+ return format (s, "%U/%d", format_ip4_address, a, l);
+}
+
+/* Parse an IP4 address %d.%d.%d.%d. */
+uword
+unformat_ip4_address (unformat_input_t * input, va_list * args)
+{
+ u8 *result = va_arg (*args, u8 *);
+ unsigned a[4];
+
+ if (!unformat (input, "%d.%d.%d.%d", &a[0], &a[1], &a[2], &a[3]))
+ return 0;
+
+ if (a[0] >= 256 || a[1] >= 256 || a[2] >= 256 || a[3] >= 256)
+ return 0;
+
+ result[0] = a[0];
+ result[1] = a[1];
+ result[2] = a[2];
+ result[3] = a[3];
+
+ return 1;
+}
+
+/* Format an IP4 header. */
+u8 *
+format_ip4_header (u8 * s, va_list * args)
+{
+ ip4_header_t *ip = va_arg (*args, ip4_header_t *);
+ u32 max_header_bytes = va_arg (*args, u32);
+ u32 ip_version, header_bytes;
+ uword indent;
+
+ /* Nothing to do. */
+ if (max_header_bytes < sizeof (ip[0]))
+ return format (s, "IP header truncated");
+
+ indent = format_get_indent (s);
+ indent += 2;
+
+ ip_version = (ip->ip_version_and_header_length >> 4);
+ header_bytes = (ip->ip_version_and_header_length & 0xf) * sizeof (u32);
+
+ s = format (s, "%U: %U -> %U",
+ format_ip_protocol, ip->protocol,
+ format_ip4_address, ip->src_address.data,
+ format_ip4_address, ip->dst_address.data);
+
+ /* Show IP version and header length only with unexpected values. */
+ if (ip_version != 4 || header_bytes != sizeof (ip4_header_t))
+ s = format (s, "\n%Uversion %d, header length %d",
+ format_white_space, indent, ip_version, header_bytes);
+
+ s = format (s, "\n%Utos 0x%02x, ttl %d, length %d, checksum 0x%04x",
+ format_white_space, indent,
+ ip->tos, ip->ttl,
+ clib_net_to_host_u16 (ip->length),
+ clib_net_to_host_u16 (ip->checksum));
+
+ /* Check and report invalid checksums. */
+ {
+ u16 c = ip4_header_checksum (ip);
+ if (c != ip->checksum)
+ s = format (s, " (should be 0x%04x)", clib_net_to_host_u16 (c));
+ }
+
+ {
+ u32 f = clib_net_to_host_u16 (ip->flags_and_fragment_offset);
+ u32 o;
+
+ s = format (s, "\n%Ufragment id 0x%04x",
+ format_white_space, indent,
+ clib_net_to_host_u16 (ip->fragment_id));
+
+ /* Fragment offset. */
+ o = 8 * (f & 0x1fff);
+ f ^= o;
+ if (o != 0)
+ s = format (s, " offset %d", o);
+
+ if (f != 0)
+ {
+ s = format (s, ", flags ");
+#define _(l) if (f & IP4_HEADER_FLAG_##l) s = format (s, #l);
+ _(MORE_FRAGMENTS);
+ _(DONT_FRAGMENT);
+ _(CONGESTION);
+#undef _
+ }
+ }
+
+ /* Recurse into next protocol layer. */
+ if (max_header_bytes != 0 && header_bytes < max_header_bytes)
+ {
+ ip_main_t *im = &ip_main;
+ ip_protocol_info_t *pi = ip_get_protocol_info (im, ip->protocol);
+
+ if (pi && pi->format_header)
+ s = format (s, "\n%U%U",
+ format_white_space, indent - 2, pi->format_header,
+ /* next protocol header */ (void *) ip + header_bytes,
+ max_header_bytes - header_bytes);
+ }
+
+ return s;
+}
+
+/* Parse an IP4 header. */
+uword
+unformat_ip4_header (unformat_input_t * input, va_list * args)
+{
+ u8 **result = va_arg (*args, u8 **);
+ ip4_header_t *ip;
+ int old_length;
+
+ /* Allocate space for IP header. */
+ {
+ void *p;
+
+ old_length = vec_len (*result);
+ vec_add2 (*result, p, sizeof (ip4_header_t));
+ ip = p;
+ }
+
+ memset (ip, 0, sizeof (ip[0]));
+ ip->ip_version_and_header_length = IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS;
+
+ if (!unformat (input, "%U: %U -> %U",
+ unformat_ip_protocol, &ip->protocol,
+ unformat_ip4_address, &ip->src_address,
+ unformat_ip4_address, &ip->dst_address))
+ return 0;
+
+ /* Parse options. */
+ while (1)
+ {
+ int i, j;
+
+ if (unformat (input, "tos %U", unformat_vlib_number, &i))
+ ip->tos = i;
+
+ else if (unformat (input, "ttl %U", unformat_vlib_number, &i))
+ ip->ttl = i;
+
+ else if (unformat (input, "fragment id %U offset %U",
+ unformat_vlib_number, &i, unformat_vlib_number, &j))
+ {
+ ip->fragment_id = clib_host_to_net_u16 (i);
+ ip->flags_and_fragment_offset |=
+ clib_host_to_net_u16 ((i / 8) & 0x1fff);
+ }
+
+ /* Flags. */
+ else if (unformat (input, "mf") || unformat (input, "MF"))
+ ip->flags_and_fragment_offset |=
+ clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS);
+
+ else if (unformat (input, "df") || unformat (input, "DF"))
+ ip->flags_and_fragment_offset |=
+ clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT);
+
+ else if (unformat (input, "ce") || unformat (input, "CE"))
+ ip->flags_and_fragment_offset |=
+ clib_host_to_net_u16 (IP4_HEADER_FLAG_CONGESTION);
+
+ /* Can't parse input: try next protocol level. */
+ else
+ break;
+ }
+
+ /* Fill in checksum. */
+ ip->checksum = ip4_header_checksum (ip);
+
+ /* Recurse into next protocol layer. */
+ {
+ ip_main_t *im = &ip_main;
+ ip_protocol_info_t *pi = ip_get_protocol_info (im, ip->protocol);
+
+ if (pi && pi->unformat_header)
+ {
+ if (!unformat_user (input, pi->unformat_header, result))
+ return 0;
+
+ /* Result may have moved. */
+ ip = (void *) *result + old_length;
+ }
+ }
+
+ /* Fill in IP length. */
+ ip->length = clib_host_to_net_u16 (vec_len (*result) - old_length);
+
+ return 1;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c
new file mode 100644
index 00000000000..6e91b9e91e1
--- /dev/null
+++ b/src/vnet/ip/ip4_forward.c
@@ -0,0 +1,3345 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip4_forward.c: IP v4 forwarding
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */
+#include <vnet/ethernet/arp_packet.h> /* for ethernet_arp_header_t */
+#include <vnet/ppp/ppp.h>
+#include <vnet/srp/srp.h> /* for srp_hw_interface_class */
+#include <vnet/api_errno.h> /* for API error numbers */
+#include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
+#include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
+#include <vnet/fib/fib_urpf_list.h> /* for FIB uRPF check */
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/dpo/load_balance.h>
+#include <vnet/dpo/classify_dpo.h>
+
+/**
+ * @file
+ * @brief IPv4 Forwarding.
+ *
+ * This file contains the source code for IPv4 forwarding.
+ */
+
+void
+ip4_forward_next_trace (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ vlib_rx_or_tx_t which_adj_index);
+
+always_inline uword
+ip4_lookup_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ int lookup_for_responses_to_locally_received_packets)
+{
+ ip4_main_t *im = &ip4_main;
+ vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters;
+ u32 n_left_from, n_left_to_next, *from, *to_next;
+ ip_lookup_next_t next;
+ u32 cpu_index = os_get_cpu_number ();
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
+
+ while (n_left_from >= 8 && n_left_to_next >= 4)
+ {
+ vlib_buffer_t *p0, *p1, *p2, *p3;
+ ip4_header_t *ip0, *ip1, *ip2, *ip3;
+ __attribute__ ((unused)) tcp_header_t *tcp0, *tcp1, *tcp2, *tcp3;
+ ip_lookup_next_t next0, next1, next2, next3;
+ const load_balance_t *lb0, *lb1, *lb2, *lb3;
+ ip4_fib_mtrie_t *mtrie0, *mtrie1, *mtrie2, *mtrie3;
+ ip4_fib_mtrie_leaf_t leaf0, leaf1, leaf2, leaf3;
+ ip4_address_t *dst_addr0, *dst_addr1, *dst_addr2, *dst_addr3;
+ __attribute__ ((unused)) u32 pi0, fib_index0, lb_index0,
+ is_tcp_udp0;
+ __attribute__ ((unused)) u32 pi1, fib_index1, lb_index1,
+ is_tcp_udp1;
+ __attribute__ ((unused)) u32 pi2, fib_index2, lb_index2,
+ is_tcp_udp2;
+ __attribute__ ((unused)) u32 pi3, fib_index3, lb_index3,
+ is_tcp_udp3;
+ flow_hash_config_t flow_hash_config0, flow_hash_config1;
+ flow_hash_config_t flow_hash_config2, flow_hash_config3;
+ u32 hash_c0, hash_c1, hash_c2, hash_c3;
+ const dpo_id_t *dpo0, *dpo1, *dpo2, *dpo3;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p4, *p5, *p6, *p7;
+
+ p4 = vlib_get_buffer (vm, from[4]);
+ p5 = vlib_get_buffer (vm, from[5]);
+ p6 = vlib_get_buffer (vm, from[6]);
+ p7 = vlib_get_buffer (vm, from[7]);
+
+ vlib_prefetch_buffer_header (p4, LOAD);
+ vlib_prefetch_buffer_header (p5, LOAD);
+ vlib_prefetch_buffer_header (p6, LOAD);
+ vlib_prefetch_buffer_header (p7, LOAD);
+
+ CLIB_PREFETCH (p4->data, sizeof (ip0[0]), LOAD);
+ CLIB_PREFETCH (p5->data, sizeof (ip0[0]), LOAD);
+ CLIB_PREFETCH (p6->data, sizeof (ip0[0]), LOAD);
+ CLIB_PREFETCH (p7->data, sizeof (ip0[0]), LOAD);
+ }
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+ pi2 = to_next[2] = from[2];
+ pi3 = to_next[3] = from[3];
+
+ from += 4;
+ to_next += 4;
+ n_left_to_next -= 4;
+ n_left_from -= 4;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+ p2 = vlib_get_buffer (vm, pi2);
+ p3 = vlib_get_buffer (vm, pi3);
+
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+ ip2 = vlib_buffer_get_current (p2);
+ ip3 = vlib_buffer_get_current (p3);
+
+ dst_addr0 = &ip0->dst_address;
+ dst_addr1 = &ip1->dst_address;
+ dst_addr2 = &ip2->dst_address;
+ dst_addr3 = &ip3->dst_address;
+
+ fib_index0 =
+ vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (p0)->sw_if_index[VLIB_RX]);
+ fib_index1 =
+ vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (p1)->sw_if_index[VLIB_RX]);
+ fib_index2 =
+ vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (p2)->sw_if_index[VLIB_RX]);
+ fib_index3 =
+ vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (p3)->sw_if_index[VLIB_RX]);
+ fib_index0 =
+ (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
+ (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
+ fib_index1 =
+ (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
+ (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
+ fib_index2 =
+ (vnet_buffer (p2)->sw_if_index[VLIB_TX] ==
+ (u32) ~ 0) ? fib_index2 : vnet_buffer (p2)->sw_if_index[VLIB_TX];
+ fib_index3 =
+ (vnet_buffer (p3)->sw_if_index[VLIB_TX] ==
+ (u32) ~ 0) ? fib_index3 : vnet_buffer (p3)->sw_if_index[VLIB_TX];
+
+
+ if (!lookup_for_responses_to_locally_received_packets)
+ {
+ mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
+ mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
+ mtrie2 = &ip4_fib_get (fib_index2)->mtrie;
+ mtrie3 = &ip4_fib_get (fib_index3)->mtrie;
+
+ leaf0 = leaf1 = leaf2 = leaf3 = IP4_FIB_MTRIE_LEAF_ROOT;
+
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
+ leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
+ leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 0);
+ leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 0);
+ }
+
+ tcp0 = (void *) (ip0 + 1);
+ tcp1 = (void *) (ip1 + 1);
+ tcp2 = (void *) (ip2 + 1);
+ tcp3 = (void *) (ip3 + 1);
+
+ is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
+ || ip0->protocol == IP_PROTOCOL_UDP);
+ is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
+ || ip1->protocol == IP_PROTOCOL_UDP);
+ is_tcp_udp2 = (ip2->protocol == IP_PROTOCOL_TCP
+ || ip2->protocol == IP_PROTOCOL_UDP);
+ is_tcp_udp3 = (ip1->protocol == IP_PROTOCOL_TCP
+ || ip1->protocol == IP_PROTOCOL_UDP);
+
+ if (!lookup_for_responses_to_locally_received_packets)
+ {
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
+ leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
+ leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 1);
+ leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 1);
+ }
+
+ if (!lookup_for_responses_to_locally_received_packets)
+ {
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
+ leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
+ leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 2);
+ leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 2);
+ }
+
+ if (!lookup_for_responses_to_locally_received_packets)
+ {
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
+ leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
+ leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 3);
+ leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 3);
+ }
+
+ if (lookup_for_responses_to_locally_received_packets)
+ {
+ lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
+ lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
+ lb_index2 = vnet_buffer (p2)->ip.adj_index[VLIB_RX];
+ lb_index3 = vnet_buffer (p3)->ip.adj_index[VLIB_RX];
+ }
+ else
+ {
+ /* Handle default route. */
+ leaf0 =
+ (leaf0 ==
+ IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
+ leaf1 =
+ (leaf1 ==
+ IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
+ leaf2 =
+ (leaf2 ==
+ IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie2->default_leaf : leaf2);
+ leaf3 =
+ (leaf3 ==
+ IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie3->default_leaf : leaf3);
+ lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
+ lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
+ lb_index2 = ip4_fib_mtrie_leaf_get_adj_index (leaf2);
+ lb_index3 = ip4_fib_mtrie_leaf_get_adj_index (leaf3);
+ }
+
+ lb0 = load_balance_get (lb_index0);
+ lb1 = load_balance_get (lb_index1);
+ lb2 = load_balance_get (lb_index2);
+ lb3 = load_balance_get (lb_index3);
+
+ /* Use flow hash to compute multipath adjacency. */
+ hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
+ hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
+ hash_c2 = vnet_buffer (p2)->ip.flow_hash = 0;
+ hash_c3 = vnet_buffer (p3)->ip.flow_hash = 0;
+ if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
+ {
+ flow_hash_config0 = lb0->lb_hash_config;
+ hash_c0 = vnet_buffer (p0)->ip.flow_hash =
+ ip4_compute_flow_hash (ip0, flow_hash_config0);
+ }
+ if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
+ {
+ flow_hash_config1 = lb1->lb_hash_config;
+ hash_c1 = vnet_buffer (p1)->ip.flow_hash =
+ ip4_compute_flow_hash (ip1, flow_hash_config1);
+ }
+ if (PREDICT_FALSE (lb2->lb_n_buckets > 1))
+ {
+ flow_hash_config2 = lb2->lb_hash_config;
+ hash_c2 = vnet_buffer (p2)->ip.flow_hash =
+ ip4_compute_flow_hash (ip2, flow_hash_config2);
+ }
+ if (PREDICT_FALSE (lb3->lb_n_buckets > 1))
+ {
+ flow_hash_config3 = lb3->lb_hash_config;
+ hash_c3 = vnet_buffer (p3)->ip.flow_hash =
+ ip4_compute_flow_hash (ip3, flow_hash_config3);
+ }
+
+ ASSERT (lb0->lb_n_buckets > 0);
+ ASSERT (is_pow2 (lb0->lb_n_buckets));
+ ASSERT (lb1->lb_n_buckets > 0);
+ ASSERT (is_pow2 (lb1->lb_n_buckets));
+ ASSERT (lb2->lb_n_buckets > 0);
+ ASSERT (is_pow2 (lb2->lb_n_buckets));
+ ASSERT (lb3->lb_n_buckets > 0);
+ ASSERT (is_pow2 (lb3->lb_n_buckets));
+
+ dpo0 = load_balance_get_bucket_i (lb0,
+ (hash_c0 &
+ (lb0->lb_n_buckets_minus_1)));
+ dpo1 = load_balance_get_bucket_i (lb1,
+ (hash_c1 &
+ (lb1->lb_n_buckets_minus_1)));
+ dpo2 = load_balance_get_bucket_i (lb2,
+ (hash_c2 &
+ (lb2->lb_n_buckets_minus_1)));
+ dpo3 = load_balance_get_bucket_i (lb3,
+ (hash_c3 &
+ (lb3->lb_n_buckets_minus_1)));
+
+ next0 = dpo0->dpoi_next_node;
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+ next1 = dpo1->dpoi_next_node;
+ vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
+ next2 = dpo2->dpoi_next_node;
+ vnet_buffer (p2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index;
+ next3 = dpo3->dpoi_next_node;
+ vnet_buffer (p3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index;
+
+ vlib_increment_combined_counter
+ (cm, cpu_index, lb_index0, 1,
+ vlib_buffer_length_in_chain (vm, p0)
+ + sizeof (ethernet_header_t));
+ vlib_increment_combined_counter
+ (cm, cpu_index, lb_index1, 1,
+ vlib_buffer_length_in_chain (vm, p1)
+ + sizeof (ethernet_header_t));
+ vlib_increment_combined_counter
+ (cm, cpu_index, lb_index2, 1,
+ vlib_buffer_length_in_chain (vm, p2)
+ + sizeof (ethernet_header_t));
+ vlib_increment_combined_counter
+ (cm, cpu_index, lb_index3, 1,
+ vlib_buffer_length_in_chain (vm, p3)
+ + sizeof (ethernet_header_t));
+
+ vlib_validate_buffer_enqueue_x4 (vm, node, next,
+ to_next, n_left_to_next,
+ pi0, pi1, pi2, pi3,
+ next0, next1, next2, next3);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *p0;
+ ip4_header_t *ip0;
+ __attribute__ ((unused)) tcp_header_t *tcp0;
+ ip_lookup_next_t next0;
+ const load_balance_t *lb0;
+ ip4_fib_mtrie_t *mtrie0;
+ ip4_fib_mtrie_leaf_t leaf0;
+ ip4_address_t *dst_addr0;
+ __attribute__ ((unused)) u32 pi0, fib_index0, is_tcp_udp0, lbi0;
+ flow_hash_config_t flow_hash_config0;
+ const dpo_id_t *dpo0;
+ u32 hash_c0;
+
+ pi0 = from[0];
+ to_next[0] = pi0;
+
+ p0 = vlib_get_buffer (vm, pi0);
+
+ ip0 = vlib_buffer_get_current (p0);
+
+ dst_addr0 = &ip0->dst_address;
+
+ fib_index0 =
+ vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (p0)->sw_if_index[VLIB_RX]);
+ fib_index0 =
+ (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
+ (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
+
+ if (!lookup_for_responses_to_locally_received_packets)
+ {
+ mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
+
+ leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
+
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
+ }
+
+ tcp0 = (void *) (ip0 + 1);
+
+ is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
+ || ip0->protocol == IP_PROTOCOL_UDP);
+
+ if (!lookup_for_responses_to_locally_received_packets)
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
+
+ if (!lookup_for_responses_to_locally_received_packets)
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
+
+ if (!lookup_for_responses_to_locally_received_packets)
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
+
+ if (lookup_for_responses_to_locally_received_packets)
+ lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
+ else
+ {
+ /* Handle default route. */
+ leaf0 =
+ (leaf0 ==
+ IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
+ lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
+ }
+
+ lb0 = load_balance_get (lbi0);
+
+ /* Use flow hash to compute multipath adjacency. */
+ hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
+ if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
+ {
+ flow_hash_config0 = lb0->lb_hash_config;
+
+ hash_c0 = vnet_buffer (p0)->ip.flow_hash =
+ ip4_compute_flow_hash (ip0, flow_hash_config0);
+ }
+
+ ASSERT (lb0->lb_n_buckets > 0);
+ ASSERT (is_pow2 (lb0->lb_n_buckets));
+
+ dpo0 = load_balance_get_bucket_i (lb0,
+ (hash_c0 &
+ (lb0->lb_n_buckets_minus_1)));
+
+ next0 = dpo0->dpoi_next_node;
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+ vlib_increment_combined_counter
+ (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
+
+ from += 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+ n_left_from -= 1;
+
+ if (PREDICT_FALSE (next0 != next))
+ {
+ n_left_to_next += 1;
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ next = next0;
+ vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
+ to_next[0] = pi0;
+ to_next += 1;
+ n_left_to_next -= 1;
+ }
+ }
+
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ }
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip4_forward_next_trace (vm, node, frame, VLIB_TX);
+
+ return frame->n_vectors;
+}
+
+/** @brief IPv4 lookup node.
+ @node ip4-lookup
+
+ This is the main IPv4 lookup dispatch node.
+
+ @param vm vlib_main_t corresponding to the current thread
+ @param node vlib_node_runtime_t
+ @param frame vlib_frame_t whose contents should be dispatched
+
+ @par Graph mechanics: buffer metadata, next index usage
+
+ @em Uses:
+ - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
+ - Indicates the @c sw_if_index value of the interface that the
+ packet was received on.
+ - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
+ - When the value is @c ~0 then the node performs a longest prefix
+ match (LPM) for the packet destination address in the FIB attached
+ to the receive interface.
+ - Otherwise perform LPM for the packet destination address in the
+ indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
+ value (0, 1, ...) and not a VRF id.
+
+ @em Sets:
+ - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
+ - The lookup result adjacency index.
+
+ <em>Next Index:</em>
+ - Dispatches the packet to the node index found in
+ ip_adjacency_t @c adj->lookup_next_index
+ (where @c adj is the lookup result adjacency).
+*/
+static uword
+ip4_lookup (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return ip4_lookup_inline (vm, node, frame,
+ /* lookup_for_responses_to_locally_received_packets */
+ 0);
+
+}
+
+static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
+
+VLIB_REGISTER_NODE (ip4_lookup_node) =
+{
+.function = ip4_lookup,.name = "ip4-lookup",.vector_size =
+ sizeof (u32),.format_trace = format_ip4_lookup_trace,.n_next_nodes =
+ IP_LOOKUP_N_NEXT,.next_nodes = IP4_LOOKUP_NEXT_NODES,};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup);
+
+always_inline uword
+ip4_load_balance (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
+ u32 n_left_from, n_left_to_next, *from, *to_next;
+ ip_lookup_next_t next;
+ u32 cpu_index = os_get_cpu_number ();
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip4_forward_next_trace (vm, node, frame, VLIB_TX);
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
+
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ ip_lookup_next_t next0, next1;
+ const load_balance_t *lb0, *lb1;
+ vlib_buffer_t *p0, *p1;
+ u32 pi0, lbi0, hc0, pi1, lbi1, hc1;
+ const ip4_header_t *ip0, *ip1;
+ const dpo_id_t *dpo0, *dpo1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, STORE);
+ vlib_prefetch_buffer_header (p3, STORE);
+
+ CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
+ CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
+ }
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+ lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+ lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
+
+ lb0 = load_balance_get (lbi0);
+ lb1 = load_balance_get (lbi1);
+
+ /*
+ * this node is for via FIBs we can re-use the hash value from the
+ * to node if present.
+ * We don't want to use the same hash value at each level in the recursion
+ * graph as that would lead to polarisation
+ */
+ hc0 = vnet_buffer (p0)->ip.flow_hash = 0;
+ hc1 = vnet_buffer (p1)->ip.flow_hash = 0;
+
+ if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
+ {
+ if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
+ {
+ hc0 = vnet_buffer (p0)->ip.flow_hash =
+ vnet_buffer (p0)->ip.flow_hash >> 1;
+ }
+ else
+ {
+ hc0 = vnet_buffer (p0)->ip.flow_hash =
+ ip4_compute_flow_hash (ip0, hc0);
+ }
+ }
+ if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
+ {
+ if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash))
+ {
+ hc1 = vnet_buffer (p1)->ip.flow_hash =
+ vnet_buffer (p1)->ip.flow_hash >> 1;
+ }
+ else
+ {
+ hc1 = vnet_buffer (p1)->ip.flow_hash =
+ ip4_compute_flow_hash (ip1, hc1);
+ }
+ }
+
+ dpo0 =
+ load_balance_get_bucket_i (lb0,
+ hc0 & (lb0->lb_n_buckets_minus_1));
+ dpo1 =
+ load_balance_get_bucket_i (lb1,
+ hc1 & (lb1->lb_n_buckets_minus_1));
+
+ next0 = dpo0->dpoi_next_node;
+ next1 = dpo1->dpoi_next_node;
+
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+ vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
+
+ vlib_increment_combined_counter
+ (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
+ vlib_increment_combined_counter
+ (cm, cpu_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next,
+ to_next, n_left_to_next,
+ pi0, pi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ ip_lookup_next_t next0;
+ const load_balance_t *lb0;
+ vlib_buffer_t *p0;
+ u32 pi0, lbi0, hc0;
+ const ip4_header_t *ip0;
+ const dpo_id_t *dpo0;
+
+ pi0 = from[0];
+ to_next[0] = pi0;
+ from += 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+ n_left_from -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+
+ ip0 = vlib_buffer_get_current (p0);
+ lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+
+ lb0 = load_balance_get (lbi0);
+
+ hc0 = vnet_buffer (p0)->ip.flow_hash = 0;
+ if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
+ {
+ if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
+ {
+ hc0 = vnet_buffer (p0)->ip.flow_hash =
+ vnet_buffer (p0)->ip.flow_hash >> 1;
+ }
+ else
+ {
+ hc0 = vnet_buffer (p0)->ip.flow_hash =
+ ip4_compute_flow_hash (ip0, hc0);
+ }
+ }
+
+ dpo0 =
+ load_balance_get_bucket_i (lb0,
+ hc0 & (lb0->lb_n_buckets_minus_1));
+
+ next0 = dpo0->dpoi_next_node;
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+ vlib_increment_combined_counter
+ (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (ip4_load_balance_node) =
+{
+.function = ip4_load_balance,.name = "ip4-load-balance",.vector_size =
+ sizeof (u32),.sibling_of = "ip4-lookup",.format_trace =
+ format_ip4_lookup_trace,};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance);
+
+/* get first interface address */
+ip4_address_t *
+ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
+ ip_interface_address_t ** result_ia)
+{
+ ip_lookup_main_t *lm = &im->lookup_main;
+ ip_interface_address_t *ia = 0;
+ ip4_address_t *result = 0;
+
+ foreach_ip_interface_address (lm, ia, sw_if_index,
+ 1 /* honor unnumbered */ ,
+ (
+ {
+ ip4_address_t * a =
+ ip_interface_address_get_address (lm, ia);
+ result = a;
+ break;
+ }
+ ));
+ if (result_ia)
+ *result_ia = result ? ia : 0;
+ return result;
+}
+
+static void
+ip4_add_interface_routes (u32 sw_if_index,
+ ip4_main_t * im, u32 fib_index,
+ ip_interface_address_t * a)
+{
+ ip_lookup_main_t *lm = &im->lookup_main;
+ ip4_address_t *address = ip_interface_address_get_address (lm, a);
+ fib_prefix_t pfx = {
+ .fp_len = a->address_length,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr.ip4 = *address,
+ };
+
+ a->neighbor_probe_adj_index = ~0;
+
+ if (pfx.fp_len < 32)
+ {
+ fib_node_index_t fei;
+
+ fei = fib_table_entry_update_one_path (fib_index, &pfx, FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_ATTACHED), FIB_PROTOCOL_IP4, NULL, /* No next-hop address */
+ sw_if_index, ~0, // invalid FIB index
+ 1, NULL, // no out-label stack
+ FIB_ROUTE_PATH_FLAG_NONE);
+ a->neighbor_probe_adj_index = fib_entry_get_adj (fei);
+ }
+
+ pfx.fp_len = 32;
+
+ if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
+ {
+ u32 classify_table_index =
+ lm->classify_table_index_by_sw_if_index[sw_if_index];
+ if (classify_table_index != (u32) ~ 0)
+ {
+ dpo_id_t dpo = DPO_INVALID;
+
+ dpo_set (&dpo,
+ DPO_CLASSIFY,
+ DPO_PROTO_IP4,
+ classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
+
+ fib_table_entry_special_dpo_add (fib_index,
+ &pfx,
+ FIB_SOURCE_CLASSIFY,
+ FIB_ENTRY_FLAG_NONE, &dpo);
+ dpo_reset (&dpo);
+ }
+ }
+
+ fib_table_entry_update_one_path (fib_index, &pfx, FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_LOCAL), FIB_PROTOCOL_IP4, &pfx.fp_addr, sw_if_index, ~0, // invalid FIB index
+ 1, NULL, // no out-label stack
+ FIB_ROUTE_PATH_FLAG_NONE);
+}
+
+static void
+ip4_del_interface_routes (ip4_main_t * im,
+ u32 fib_index,
+ ip4_address_t * address, u32 address_length)
+{
+ fib_prefix_t pfx = {
+ .fp_len = address_length,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr.ip4 = *address,
+ };
+
+ if (pfx.fp_len < 32)
+ {
+ fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
+ }
+
+ pfx.fp_len = 32;
+ fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
+}
+
+void
+ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
+{
+ ip4_main_t *im = &ip4_main;
+
+ vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
+
+ /*
+ * enable/disable only on the 1<->0 transition
+ */
+ if (is_enable)
+ {
+ if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
+ return;
+ }
+ else
+ {
+ ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
+ if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
+ return;
+ }
+ vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index,
+ !is_enable, 0, 0);
+
+ vnet_feature_enable_disable ("ip4-multicast", "ip4-drop", sw_if_index,
+ !is_enable, 0, 0);
+
+}
+
+static clib_error_t *
+ip4_add_del_interface_address_internal (vlib_main_t * vm,
+ u32 sw_if_index,
+ ip4_address_t * address,
+ u32 address_length, u32 is_del)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip4_main_t *im = &ip4_main;
+ ip_lookup_main_t *lm = &im->lookup_main;
+ clib_error_t *error = 0;
+ u32 if_address_index, elts_before;
+ ip4_address_fib_t ip4_af, *addr_fib = 0;
+
+ vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
+ ip4_addr_fib_init (&ip4_af, address,
+ vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
+ vec_add1 (addr_fib, ip4_af);
+
+ /* FIXME-LATER
+ * there is no support for adj-fib handling in the presence of overlapping
+ * subnets on interfaces. Easy fix - disallow overlapping subnets, like
+ * most routers do.
+ */
+ if (!is_del)
+ {
+ /* When adding an address check that it does not conflict
+ with an existing address. */
+ ip_interface_address_t *ia;
+ foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
+ 0 /* honor unnumbered */ ,
+ (
+ {
+ ip4_address_t * x =
+ ip_interface_address_get_address
+ (&im->lookup_main, ia);
+ if (ip4_destination_matches_route
+ (im, address, x, ia->address_length)
+ ||
+ ip4_destination_matches_route (im,
+ x,
+ address,
+ address_length))
+ return
+ clib_error_create
+ ("failed to add %U which conflicts with %U for interface %U",
+ format_ip4_address_and_length, address,
+ address_length,
+ format_ip4_address_and_length, x,
+ ia->address_length,
+ format_vnet_sw_if_index_name, vnm,
+ sw_if_index);}
+ ));
+ }
+
+ elts_before = pool_elts (lm->if_address_pool);
+
+ error = ip_interface_address_add_del
+ (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
+ if (error)
+ goto done;
+
+ ip4_sw_interface_enable_disable (sw_if_index, !is_del);
+
+ if (is_del)
+ ip4_del_interface_routes (im, ip4_af.fib_index, address, address_length);
+ else
+ ip4_add_interface_routes (sw_if_index,
+ im, ip4_af.fib_index,
+ pool_elt_at_index
+ (lm->if_address_pool, if_address_index));
+
+ /* If pool did not grow/shrink: add duplicate address. */
+ if (elts_before != pool_elts (lm->if_address_pool))
+ {
+ ip4_add_del_interface_address_callback_t *cb;
+ vec_foreach (cb, im->add_del_interface_address_callbacks)
+ cb->function (im, cb->function_opaque, sw_if_index,
+ address, address_length, if_address_index, is_del);
+ }
+
+done:
+ vec_free (addr_fib);
+ return error;
+}
+
+clib_error_t *
+ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
+ ip4_address_t * address, u32 address_length,
+ u32 is_del)
+{
+ return ip4_add_del_interface_address_internal
+ (vm, sw_if_index, address, address_length, is_del);
+}
+
+/* Built-in ip4 unicast rx feature path definition */
+/* *INDENT-OFF* */
+VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
+{
+ .arc_name = "ip4-unicast",
+ .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
+ .end_node = "ip4-lookup",
+ .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
+};
+
+VNET_FEATURE_INIT (ip4_flow_classify, static) =
+{
+ .arc_name = "ip4-unicast",
+ .node_name = "ip4-flow-classify",
+ .runs_before = VNET_FEATURES ("ip4-inacl"),
+};
+
+VNET_FEATURE_INIT (ip4_inacl, static) =
+{
+ .arc_name = "ip4-unicast",
+ .node_name = "ip4-inacl",
+ .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
+};
+
+VNET_FEATURE_INIT (ip4_source_check_1, static) =
+{
+ .arc_name = "ip4-unicast",
+ .node_name = "ip4-source-check-via-rx",
+ .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
+};
+
+VNET_FEATURE_INIT (ip4_source_check_2, static) =
+{
+ .arc_name = "ip4-unicast",
+ .node_name = "ip4-source-check-via-any",
+ .runs_before = VNET_FEATURES ("ip4-policer-classify"),
+};
+
+VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
+{
+ .arc_name = "ip4-unicast",
+ .node_name = "ip4-source-and-port-range-check-rx",
+ .runs_before = VNET_FEATURES ("ip4-policer-classify"),
+};
+
+VNET_FEATURE_INIT (ip4_policer_classify, static) =
+{
+ .arc_name = "ip4-unicast",
+ .node_name = "ip4-policer-classify",
+ .runs_before = VNET_FEATURES ("ipsec-input-ip4"),
+};
+
+VNET_FEATURE_INIT (ip4_ipsec, static) =
+{
+ .arc_name = "ip4-unicast",
+ .node_name = "ipsec-input-ip4",
+ .runs_before = VNET_FEATURES ("vpath-input-ip4"),
+};
+
+VNET_FEATURE_INIT (ip4_vpath, static) =
+{
+ .arc_name = "ip4-unicast",
+ .node_name = "vpath-input-ip4",
+ .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
+};
+
+VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
+{
+ .arc_name = "ip4-unicast",
+ .node_name = "ip4-vxlan-bypass",
+ .runs_before = VNET_FEATURES ("ip4-lookup"),
+};
+
+VNET_FEATURE_INIT (ip4_lookup, static) =
+{
+ .arc_name = "ip4-unicast",
+ .node_name = "ip4-lookup",
+ .runs_before = VNET_FEATURES ("ip4-drop"),
+};
+
+VNET_FEATURE_INIT (ip4_drop, static) =
+{
+ .arc_name = "ip4-unicast",
+ .node_name = "ip4-drop",
+ .runs_before = 0, /* not before any other features */
+};
+
+
+/* Built-in ip4 multicast rx feature path definition */
+VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
+{
+ .arc_name = "ip4-multicast",
+ .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
+ .end_node = "ip4-lookup-multicast",
+ .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
+};
+
+VNET_FEATURE_INIT (ip4_vpath_mc, static) =
+{
+ .arc_name = "ip4-multicast",
+ .node_name = "vpath-input-ip4",
+ .runs_before = VNET_FEATURES ("ip4-lookup-multicast"),
+};
+
+VNET_FEATURE_INIT (ip4_lookup_mc, static) =
+{
+ .arc_name = "ip4-multicast",
+ .node_name = "ip4-lookup-multicast",
+ .runs_before = VNET_FEATURES ("ip4-drop"),
+};
+
+VNET_FEATURE_INIT (ip4_mc_drop, static) =
+{
+ .arc_name = "ip4-multicast",
+ .node_name = "ip4-drop",
+ .runs_before = 0, /* last feature */
+};
+
+/* Source and port-range check ip4 tx feature path definition */
+VNET_FEATURE_ARC_INIT (ip4_output, static) =
+{
+ .arc_name = "ip4-output",
+ .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain"),
+ .end_node = "interface-output",
+ .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
+};
+
+VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
+{
+ .arc_name = "ip4-output",
+ .node_name = "ip4-source-and-port-range-check-tx",
+ .runs_before = VNET_FEATURES ("ipsec-output-ip4"),
+};
+
+VNET_FEATURE_INIT (ip4_ipsec_output, static) =
+{
+ .arc_name = "ip4-output",
+ .node_name = "ipsec-output-ip4",
+ .runs_before = VNET_FEATURES ("interface-output"),
+};
+
+/* Built-in ip4 tx feature path definition */
+VNET_FEATURE_INIT (ip4_interface_output, static) =
+{
+ .arc_name = "ip4-output",
+ .node_name = "interface-output",
+ .runs_before = 0, /* not before any other features */
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
+{
+ ip4_main_t *im = &ip4_main;
+
+ /* Fill in lookup tables with default table (0). */
+ vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
+
+ vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index,
+ is_add, 0, 0);
+
+ vnet_feature_enable_disable ("ip4-multicast", "ip4-drop", sw_if_index,
+ is_add, 0, 0);
+
+ return /* no error */ 0;
+}
+
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
+
+/* Global IP4 main. */
+ip4_main_t ip4_main;
+
+clib_error_t *
+ip4_lookup_init (vlib_main_t * vm)
+{
+ ip4_main_t *im = &ip4_main;
+ clib_error_t *error;
+ uword i;
+
+ if ((error = vlib_call_init_function (vm, vnet_feature_init)))
+ return error;
+
+ for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
+ {
+ u32 m;
+
+ if (i < 32)
+ m = pow2_mask (i) << (32 - i);
+ else
+ m = ~0;
+ im->fib_masks[i] = clib_host_to_net_u32 (m);
+ }
+
+ ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
+
+ /* Create FIB with index 0 and table id of 0. */
+ fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0);
+
+ {
+ pg_node_t *pn;
+ pn = pg_get_node (ip4_lookup_node.index);
+ pn->unformat_edit = unformat_pg_ip4_header;
+ }
+
+ {
+ ethernet_arp_header_t h;
+
+ memset (&h, 0, sizeof (h));
+
+ /* Set target ethernet address to all zeros. */
+ memset (h.ip4_over_ethernet[1].ethernet, 0,
+ sizeof (h.ip4_over_ethernet[1].ethernet));
+
+#define _16(f,v) h.f = clib_host_to_net_u16 (v);
+#define _8(f,v) h.f = v;
+ _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
+ _16 (l3_type, ETHERNET_TYPE_IP4);
+ _8 (n_l2_address_bytes, 6);
+ _8 (n_l3_address_bytes, 4);
+ _16 (opcode, ETHERNET_ARP_OPCODE_request);
+#undef _16
+#undef _8
+
+ vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
+ /* data */ &h,
+ sizeof (h),
+ /* alloc chunk size */ 8,
+ "ip4 arp");
+ }
+
+ return error;
+}
+
+VLIB_INIT_FUNCTION (ip4_lookup_init);
+
+typedef struct
+{
+ /* Adjacency taken. */
+ u32 dpo_index;
+ u32 flow_hash;
+ u32 fib_index;
+
+ /* Packet data, possibly *after* rewrite. */
+ u8 packet_data[64 - 1 * sizeof (u32)];
+}
+ip4_forward_next_trace_t;
+
+u8 *
+format_ip4_forward_next_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
+ uword indent = format_get_indent (s);
+ s = format (s, "%U%U",
+ format_white_space, indent,
+ format_ip4_header, t->packet_data, sizeof (t->packet_data));
+ return s;
+}
+
+static u8 *
+format_ip4_lookup_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
+ uword indent = format_get_indent (s);
+
+ s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
+ t->fib_index, t->dpo_index, t->flow_hash);
+ s = format (s, "\n%U%U",
+ format_white_space, indent,
+ format_ip4_header, t->packet_data, sizeof (t->packet_data));
+ return s;
+}
+
+static u8 *
+format_ip4_rewrite_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
+ vnet_main_t *vnm = vnet_get_main ();
+ uword indent = format_get_indent (s);
+
+ s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
+ t->fib_index, t->dpo_index, format_ip_adjacency,
+ t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
+ s = format (s, "\n%U%U",
+ format_white_space, indent,
+ format_ip_adjacency_packet_data,
+ vnm, t->dpo_index, t->packet_data, sizeof (t->packet_data));
+ return s;
+}
+
+/* Common trace function for all ip4-forward next nodes. */
+void
+ip4_forward_next_trace (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
+{
+ u32 *from, n_left;
+ ip4_main_t *im = &ip4_main;
+
+ n_left = frame->n_vectors;
+ from = vlib_frame_vector_args (frame);
+
+ while (n_left >= 4)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ ip4_forward_next_trace_t *t0, *t1;
+
+ /* Prefetch next iteration. */
+ vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
+ vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
+
+ bi0 = from[0];
+ bi1 = from[1];
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
+ t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
+ t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
+ t0->fib_index =
+ (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
+ (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
+ vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (b0)->sw_if_index[VLIB_RX]);
+
+ clib_memcpy (t0->packet_data,
+ vlib_buffer_get_current (b0),
+ sizeof (t0->packet_data));
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
+ t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
+ t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
+ t1->fib_index =
+ (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
+ (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
+ vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (b1)->sw_if_index[VLIB_RX]);
+ clib_memcpy (t1->packet_data, vlib_buffer_get_current (b1),
+ sizeof (t1->packet_data));
+ }
+ from += 2;
+ n_left -= 2;
+ }
+
+ while (n_left >= 1)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ ip4_forward_next_trace_t *t0;
+
+ bi0 = from[0];
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
+ t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
+ t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
+ t0->fib_index =
+ (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
+ (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
+ vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (b0)->sw_if_index[VLIB_RX]);
+ clib_memcpy (t0->packet_data, vlib_buffer_get_current (b0),
+ sizeof (t0->packet_data));
+ }
+ from += 1;
+ n_left -= 1;
+ }
+}
+
+static uword
+ip4_drop_or_punt (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame, ip4_error_t error_code)
+{
+ u32 *buffers = vlib_frame_vector_args (frame);
+ uword n_packets = frame->n_vectors;
+
+ vlib_error_drop_buffers (vm, node, buffers,
+ /* stride */ 1,
+ n_packets,
+ /* next */ 0,
+ ip4_input_node.index, error_code);
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip4_forward_next_trace (vm, node, frame, VLIB_TX);
+
+ return n_packets;
+}
+
+static uword
+ip4_drop (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP);
+}
+
+static uword
+ip4_punt (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT);
+}
+
+VLIB_REGISTER_NODE (ip4_drop_node, static) =
+{
+ .function = ip4_drop,.name = "ip4-drop",.vector_size =
+ sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_next_nodes =
+ 1,.next_nodes =
+ {
+ [0] = "error-drop",}
+,};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop);
+
+VLIB_REGISTER_NODE (ip4_punt_node, static) =
+{
+ .function = ip4_punt,.name = "ip4-punt",.vector_size =
+ sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_next_nodes =
+ 1,.next_nodes =
+ {
+ [0] = "error-punt",}
+,};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt);
+
+/* Compute TCP/UDP/ICMP4 checksum in software. */
+u16
+ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
+ ip4_header_t * ip0)
+{
+ ip_csum_t sum0;
+ u32 ip_header_length, payload_length_host_byte_order;
+ u32 n_this_buffer, n_bytes_left;
+ u16 sum16;
+ void *data_this_buffer;
+
+ /* Initialize checksum with ip header. */
+ ip_header_length = ip4_header_bytes (ip0);
+ payload_length_host_byte_order =
+ clib_net_to_host_u16 (ip0->length) - ip_header_length;
+ sum0 =
+ clib_host_to_net_u32 (payload_length_host_byte_order +
+ (ip0->protocol << 16));
+
+ if (BITS (uword) == 32)
+ {
+ sum0 =
+ ip_csum_with_carry (sum0,
+ clib_mem_unaligned (&ip0->src_address, u32));
+ sum0 =
+ ip_csum_with_carry (sum0,
+ clib_mem_unaligned (&ip0->dst_address, u32));
+ }
+ else
+ sum0 =
+ ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
+
+ n_bytes_left = n_this_buffer = payload_length_host_byte_order;
+ data_this_buffer = (void *) ip0 + ip_header_length;
+ if (n_this_buffer + ip_header_length > p0->current_length)
+ n_this_buffer =
+ p0->current_length >
+ ip_header_length ? p0->current_length - ip_header_length : 0;
+ while (1)
+ {
+ sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
+ n_bytes_left -= n_this_buffer;
+ if (n_bytes_left == 0)
+ break;
+
+ ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
+ p0 = vlib_get_buffer (vm, p0->next_buffer);
+ data_this_buffer = vlib_buffer_get_current (p0);
+ n_this_buffer = p0->current_length;
+ }
+
+ sum16 = ~ip_csum_fold (sum0);
+
+ return sum16;
+}
+
+u32
+ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
+{
+ ip4_header_t *ip0 = vlib_buffer_get_current (p0);
+ udp_header_t *udp0;
+ u16 sum16;
+
+ ASSERT (ip0->protocol == IP_PROTOCOL_TCP
+ || ip0->protocol == IP_PROTOCOL_UDP);
+
+ udp0 = (void *) (ip0 + 1);
+ if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
+ {
+ p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
+ | IP_BUFFER_L4_CHECKSUM_CORRECT);
+ return p0->flags;
+ }
+
+ sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
+
+ p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
+ | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
+
+ return p0->flags;
+}
+
+static uword
+ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ ip4_main_t *im = &ip4_main;
+ ip_lookup_main_t *lm = &im->lookup_main;
+ ip_local_next_t next_index;
+ u32 *from, *to_next, n_left_from, n_left_to_next;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip4_input_node.index);
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip4_forward_next_trace (vm, node, frame, VLIB_TX);
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ vlib_buffer_t *p0, *p1;
+ ip4_header_t *ip0, *ip1;
+ udp_header_t *udp0, *udp1;
+ ip4_fib_mtrie_t *mtrie0, *mtrie1;
+ ip4_fib_mtrie_leaf_t leaf0, leaf1;
+ const dpo_id_t *dpo0, *dpo1;
+ const load_balance_t *lb0, *lb1;
+ u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, lbi0;
+ u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, lbi1;
+ i32 len_diff0, len_diff1;
+ u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
+ u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
+ u8 enqueue_code;
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+
+ vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data;
+ vnet_buffer (p1)->ip.start_of_ip_header = p1->current_data;
+
+ fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (p0)->sw_if_index[VLIB_RX]);
+ fib_index0 = (vnet_buffer (p0)->sw_if_index[VLIB_TX] == (u32) ~ 0) ?
+ fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
+
+ fib_index1 = vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (p1)->sw_if_index[VLIB_RX]);
+ fib_index1 = (vnet_buffer (p1)->sw_if_index[VLIB_TX] == (u32) ~ 0) ?
+ fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
+
+ mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
+ mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
+
+ leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
+
+ leaf0 =
+ ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
+ leaf1 =
+ ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
+
+ /* Treat IP frag packets as "experimental" protocol for now
+ until support of IP frag reassembly is implemented */
+ proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol;
+ proto1 = ip4_is_fragment (ip1) ? 0xfe : ip1->protocol;
+ is_udp0 = proto0 == IP_PROTOCOL_UDP;
+ is_udp1 = proto1 == IP_PROTOCOL_UDP;
+ is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
+ is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
+
+ flags0 = p0->flags;
+ flags1 = p1->flags;
+
+ good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
+ good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
+
+ udp0 = ip4_next_header (ip0);
+ udp1 = ip4_next_header (ip1);
+
+ /* Don't verify UDP checksum for packets with explicit zero checksum. */
+ good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
+ good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
+
+ leaf0 =
+ ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
+ leaf1 =
+ ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
+
+ /* Verify UDP length. */
+ ip_len0 = clib_net_to_host_u16 (ip0->length);
+ ip_len1 = clib_net_to_host_u16 (ip1->length);
+ udp_len0 = clib_net_to_host_u16 (udp0->length);
+ udp_len1 = clib_net_to_host_u16 (udp1->length);
+
+ len_diff0 = ip_len0 - udp_len0;
+ len_diff1 = ip_len1 - udp_len1;
+
+ len_diff0 = is_udp0 ? len_diff0 : 0;
+ len_diff1 = is_udp1 ? len_diff1 : 0;
+
+ if (PREDICT_FALSE (!(is_tcp_udp0 & is_tcp_udp1
+ & good_tcp_udp0 & good_tcp_udp1)))
+ {
+ if (is_tcp_udp0)
+ {
+ if (is_tcp_udp0
+ && !(flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
+ flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
+ good_tcp_udp0 =
+ (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
+ good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
+ }
+ if (is_tcp_udp1)
+ {
+ if (is_tcp_udp1
+ && !(flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
+ flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
+ good_tcp_udp1 =
+ (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
+ good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
+ }
+ }
+
+ good_tcp_udp0 &= len_diff0 >= 0;
+ good_tcp_udp1 &= len_diff1 >= 0;
+
+ leaf0 =
+ ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
+ leaf1 =
+ ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
+
+ error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
+
+ error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
+ error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
+
+ ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
+ error0 = (is_tcp_udp0 && !good_tcp_udp0
+ ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
+ error1 = (is_tcp_udp1 && !good_tcp_udp1
+ ? IP4_ERROR_TCP_CHECKSUM + is_udp1 : error1);
+
+ leaf0 =
+ ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
+ leaf1 =
+ ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
+ leaf0 =
+ (leaf0 ==
+ IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
+ leaf1 =
+ (leaf1 ==
+ IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
+
+ vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 =
+ ip4_fib_mtrie_leaf_get_adj_index (leaf0);
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
+
+ vnet_buffer (p1)->ip.adj_index[VLIB_RX] = lbi1 =
+ ip4_fib_mtrie_leaf_get_adj_index (leaf1);
+ vnet_buffer (p1)->ip.adj_index[VLIB_TX] = lbi1;
+
+ lb0 = load_balance_get (lbi0);
+ lb1 = load_balance_get (lbi1);
+ dpo0 = load_balance_get_bucket_i (lb0, 0);
+ dpo1 = load_balance_get_bucket_i (lb1, 0);
+
+ /*
+ * Must have a route to source otherwise we drop the packet.
+ * ip4 broadcasts are accepted, e.g. to make dhcp client work
+ *
+ * The checks are:
+ * - the source is a recieve => it's from us => bogus, do this
+ * first since it sets a different error code.
+ * - uRPF check for any route to source - accept if passes.
+ * - allow packets destined to the broadcast address from unknown sources
+ */
+ error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
+ dpo0->dpoi_type == DPO_RECEIVE) ?
+ IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
+ error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
+ !fib_urpf_check_size (lb0->lb_urpf) &&
+ ip0->dst_address.as_u32 != 0xFFFFFFFF)
+ ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
+ error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
+ dpo1->dpoi_type == DPO_RECEIVE) ?
+ IP4_ERROR_SPOOFED_LOCAL_PACKETS : error1);
+ error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
+ !fib_urpf_check_size (lb1->lb_urpf) &&
+ ip1->dst_address.as_u32 != 0xFFFFFFFF)
+ ? IP4_ERROR_SRC_LOOKUP_MISS : error1);
+
+ next0 = lm->local_next_by_ip_protocol[proto0];
+ next1 = lm->local_next_by_ip_protocol[proto1];
+
+ next0 =
+ error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
+ next1 =
+ error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
+
+ p0->error = error0 ? error_node->errors[error0] : 0;
+ p1->error = error1 ? error_node->errors[error1] : 0;
+
+ enqueue_code = (next0 != next_index) + 2 * (next1 != next_index);
+
+ if (PREDICT_FALSE (enqueue_code != 0))
+ {
+ switch (enqueue_code)
+ {
+ case 1:
+ /* A B A */
+ to_next[-2] = pi1;
+ to_next -= 1;
+ n_left_to_next += 1;
+ vlib_set_next_frame_buffer (vm, node, next0, pi0);
+ break;
+
+ case 2:
+ /* A A B */
+ to_next -= 1;
+ n_left_to_next += 1;
+ vlib_set_next_frame_buffer (vm, node, next1, pi1);
+ break;
+
+ case 3:
+ /* A B B or A B C */
+ to_next -= 2;
+ n_left_to_next += 2;
+ vlib_set_next_frame_buffer (vm, node, next0, pi0);
+ vlib_set_next_frame_buffer (vm, node, next1, pi1);
+ if (next0 == next1)
+ {
+ vlib_put_next_frame (vm, node, next_index,
+ n_left_to_next);
+ next_index = next1;
+ vlib_get_next_frame (vm, node, next_index, to_next,
+ n_left_to_next);
+ }
+ break;
+ }
+ }
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *p0;
+ ip4_header_t *ip0;
+ udp_header_t *udp0;
+ ip4_fib_mtrie_t *mtrie0;
+ ip4_fib_mtrie_leaf_t leaf0;
+ u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, lbi0;
+ i32 len_diff0;
+ u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
+ load_balance_t *lb0;
+ const dpo_id_t *dpo0;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+
+ ip0 = vlib_buffer_get_current (p0);
+
+ vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data;
+
+ fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (p0)->sw_if_index[VLIB_RX]);
+ fib_index0 = (vnet_buffer (p0)->sw_if_index[VLIB_TX] == (u32) ~ 0) ?
+ fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
+
+ mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
+
+ leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
+
+ leaf0 =
+ ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
+
+ /* Treat IP frag packets as "experimental" protocol for now
+ until support of IP frag reassembly is implemented */
+ proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol;
+ is_udp0 = proto0 == IP_PROTOCOL_UDP;
+ is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
+
+ flags0 = p0->flags;
+
+ good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
+
+ udp0 = ip4_next_header (ip0);
+
+ /* Don't verify UDP checksum for packets with explicit zero checksum. */
+ good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
+
+ leaf0 =
+ ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
+
+ /* Verify UDP length. */
+ ip_len0 = clib_net_to_host_u16 (ip0->length);
+ udp_len0 = clib_net_to_host_u16 (udp0->length);
+
+ len_diff0 = ip_len0 - udp_len0;
+
+ len_diff0 = is_udp0 ? len_diff0 : 0;
+
+ if (PREDICT_FALSE (!(is_tcp_udp0 & good_tcp_udp0)))
+ {
+ if (is_tcp_udp0)
+ {
+ if (is_tcp_udp0
+ && !(flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
+ flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
+ good_tcp_udp0 =
+ (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
+ good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
+ }
+ }
+
+ good_tcp_udp0 &= len_diff0 >= 0;
+
+ leaf0 =
+ ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
+
+ error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
+
+ error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
+
+ ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
+ error0 = (is_tcp_udp0 && !good_tcp_udp0
+ ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
+
+ leaf0 =
+ ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
+ leaf0 =
+ (leaf0 ==
+ IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
+
+ lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
+
+ lb0 = load_balance_get (lbi0);
+ dpo0 = load_balance_get_bucket_i (lb0, 0);
+
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
+ vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0;
+
+ error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
+ dpo0->dpoi_type == DPO_RECEIVE) ?
+ IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
+ error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
+ !fib_urpf_check_size (lb0->lb_urpf) &&
+ ip0->dst_address.as_u32 != 0xFFFFFFFF)
+ ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
+
+ next0 = lm->local_next_by_ip_protocol[proto0];
+
+ next0 =
+ error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
+
+ p0->error = error0 ? error_node->errors[error0] : 0;
+
+ if (PREDICT_FALSE (next0 != next_index))
+ {
+ n_left_to_next += 1;
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+
+ next_index = next0;
+ vlib_get_next_frame (vm, node, next_index, to_next,
+ n_left_to_next);
+ to_next[0] = pi0;
+ to_next += 1;
+ n_left_to_next -= 1;
+ }
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (ip4_local_node, static) =
+{
+ .function = ip4_local,.name = "ip4-local",.vector_size =
+ sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_next_nodes =
+ IP_LOCAL_N_NEXT,.next_nodes =
+ {
+ [IP_LOCAL_NEXT_DROP] = "error-drop",
+ [IP_LOCAL_NEXT_PUNT] = "error-punt",
+ [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
+ [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",}
+,};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local);
+
+void
+ip4_register_protocol (u32 protocol, u32 node_index)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ ip4_main_t *im = &ip4_main;
+ ip_lookup_main_t *lm = &im->lookup_main;
+
+ ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
+ lm->local_next_by_ip_protocol[protocol] =
+ vlib_node_add_next (vm, ip4_local_node.index, node_index);
+}
+
+static clib_error_t *
+show_ip_local_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ ip4_main_t *im = &ip4_main;
+ ip_lookup_main_t *lm = &im->lookup_main;
+ int i;
+
+ vlib_cli_output (vm, "Protocols handled by ip4_local");
+ for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
+ {
+ if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
+ vlib_cli_output (vm, "%d", i);
+ }
+ return 0;
+}
+
+
+
+/*?
+ * Display the set of protocols handled by the local IPv4 stack.
+ *
+ * @cliexpar
+ * Example of how to display local protocol table:
+ * @cliexstart{show ip local}
+ * Protocols handled by ip4_local
+ * 1
+ * 17
+ * 47
+ * @cliexend
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_ip_local, static) =
+{
+ .path = "show ip local",
+ .function = show_ip_local_command_fn,
+ .short_help = "show ip local",
+};
+/* *INDENT-ON* */
+
+always_inline uword
+ip4_arp_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame, int is_glean)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip4_main_t *im = &ip4_main;
+ ip_lookup_main_t *lm = &im->lookup_main;
+ u32 *from, *to_next_drop;
+ uword n_left_from, n_left_to_next_drop, next_index;
+ static f64 time_last_seed_change = -1e100;
+ static u32 hash_seeds[3];
+ static uword hash_bitmap[256 / BITS (uword)];
+ f64 time_now;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip4_forward_next_trace (vm, node, frame, VLIB_TX);
+
+ time_now = vlib_time_now (vm);
+ if (time_now - time_last_seed_change > 1e-3)
+ {
+ uword i;
+ u32 *r = clib_random_buffer_get_data (&vm->random_buffer,
+ sizeof (hash_seeds));
+ for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
+ hash_seeds[i] = r[i];
+
+ /* Mark all hash keys as been no-seen before. */
+ for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
+ hash_bitmap[i] = 0;
+
+ time_last_seed_change = time_now;
+ }
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ if (next_index == IP4_ARP_NEXT_DROP)
+ next_index = IP4_ARP_N_NEXT; /* point to first interface */
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
+ to_next_drop, n_left_to_next_drop);
+
+ while (n_left_from > 0 && n_left_to_next_drop > 0)
+ {
+ u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
+ ip_adjacency_t *adj0;
+ vlib_buffer_t *p0;
+ ip4_header_t *ip0;
+ uword bm0;
+
+ pi0 = from[0];
+
+ p0 = vlib_get_buffer (vm, pi0);
+
+ adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+ adj0 = ip_get_adjacency (lm, adj_index0);
+ ip0 = vlib_buffer_get_current (p0);
+
+ a0 = hash_seeds[0];
+ b0 = hash_seeds[1];
+ c0 = hash_seeds[2];
+
+ sw_if_index0 = adj0->rewrite_header.sw_if_index;
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
+
+ if (is_glean)
+ {
+ /*
+ * this is the Glean case, so we are ARPing for the
+ * packet's destination
+ */
+ a0 ^= ip0->dst_address.data_u32;
+ }
+ else
+ {
+ a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32;
+ }
+ b0 ^= sw_if_index0;
+
+ hash_v3_finalize32 (a0, b0, c0);
+
+ c0 &= BITS (hash_bitmap) - 1;
+ c0 = c0 / BITS (uword);
+ m0 = (uword) 1 << (c0 % BITS (uword));
+
+ bm0 = hash_bitmap[c0];
+ drop0 = (bm0 & m0) != 0;
+
+ /* Mark it as seen. */
+ hash_bitmap[c0] = bm0 | m0;
+
+ from += 1;
+ n_left_from -= 1;
+ to_next_drop[0] = pi0;
+ to_next_drop += 1;
+ n_left_to_next_drop -= 1;
+
+ p0->error =
+ node->errors[drop0 ? IP4_ARP_ERROR_DROP :
+ IP4_ARP_ERROR_REQUEST_SENT];
+
+ /*
+ * the adj has been updated to a rewrite but the node the DPO that got
+ * us here hasn't - yet. no big deal. we'll drop while we wait.
+ */
+ if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
+ continue;
+
+ if (drop0)
+ continue;
+
+ /*
+ * Can happen if the control-plane is programming tables
+ * with traffic flowing; at least that's today's lame excuse.
+ */
+ if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN) ||
+ (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
+ {
+ p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
+ }
+ else
+ /* Send ARP request. */
+ {
+ u32 bi0 = 0;
+ vlib_buffer_t *b0;
+ ethernet_arp_header_t *h0;
+ vnet_hw_interface_t *hw_if0;
+
+ h0 =
+ vlib_packet_template_get_packet (vm,
+ &im->ip4_arp_request_packet_template,
+ &bi0);
+
+ /* Add rewrite/encap string for ARP packet. */
+ vnet_rewrite_one_header (adj0[0], h0,
+ sizeof (ethernet_header_t));
+
+ hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
+
+ /* Src ethernet address in ARP header. */
+ clib_memcpy (h0->ip4_over_ethernet[0].ethernet,
+ hw_if0->hw_address,
+ sizeof (h0->ip4_over_ethernet[0].ethernet));
+
+ if (is_glean)
+ {
+ /* The interface's source address is stashed in the Glean Adj */
+ h0->ip4_over_ethernet[0].ip4 =
+ adj0->sub_type.glean.receive_addr.ip4;
+
+ /* Copy in destination address we are requesting. This is the
+ * glean case, so it's the packet's destination.*/
+ h0->ip4_over_ethernet[1].ip4.data_u32 =
+ ip0->dst_address.data_u32;
+ }
+ else
+ {
+ /* Src IP address in ARP header. */
+ if (ip4_src_address_for_packet (lm, sw_if_index0,
+ &h0->
+ ip4_over_ethernet[0].ip4))
+ {
+ /* No source address available */
+ p0->error =
+ node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
+ vlib_buffer_free (vm, &bi0, 1);
+ continue;
+ }
+
+ /* Copy in destination address we are requesting from the
+ incomplete adj */
+ h0->ip4_over_ethernet[1].ip4.data_u32 =
+ adj0->sub_type.nbr.next_hop.ip4.as_u32;
+ }
+
+ vlib_buffer_copy_trace_flag (vm, p0, bi0);
+ b0 = vlib_get_buffer (vm, bi0);
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
+
+ vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
+
+ vlib_set_next_frame_buffer (vm, node,
+ adj0->rewrite_header.next_index,
+ bi0);
+ }
+ }
+
+ vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
+ }
+
+ return frame->n_vectors;
+}
+
+static uword
+ip4_arp (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return (ip4_arp_inline (vm, node, frame, 0));
+}
+
+static uword
+ip4_glean (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return (ip4_arp_inline (vm, node, frame, 1));
+}
+
+static char *ip4_arp_error_strings[] = {
+ [IP4_ARP_ERROR_DROP] = "address overflow drops",
+ [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
+ [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
+ [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
+ [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
+ [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
+};
+
+VLIB_REGISTER_NODE (ip4_arp_node) =
+{
+ .function = ip4_arp,.name = "ip4-arp",.vector_size =
+ sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_errors =
+ ARRAY_LEN (ip4_arp_error_strings),.error_strings =
+ ip4_arp_error_strings,.n_next_nodes = IP4_ARP_N_NEXT,.next_nodes =
+ {
+ [IP4_ARP_NEXT_DROP] = "error-drop",}
+,};
+
+VLIB_REGISTER_NODE (ip4_glean_node) =
+{
+ .function = ip4_glean,.name = "ip4-glean",.vector_size =
+ sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_errors =
+ ARRAY_LEN (ip4_arp_error_strings),.error_strings =
+ ip4_arp_error_strings,.n_next_nodes = IP4_ARP_N_NEXT,.next_nodes =
+ {
+ [IP4_ARP_NEXT_DROP] = "error-drop",}
+,};
+
+#define foreach_notrace_ip4_arp_error \
+_(DROP) \
+_(REQUEST_SENT) \
+_(REPLICATE_DROP) \
+_(REPLICATE_FAIL)
+
+clib_error_t *
+arp_notrace_init (vlib_main_t * vm)
+{
+ vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
+
+ /* don't trace ARP request packets */
+#define _(a) \
+ vnet_pcap_drop_trace_filter_add_del \
+ (rt->errors[IP4_ARP_ERROR_##a], \
+ 1 /* is_add */);
+ foreach_notrace_ip4_arp_error;
+#undef _
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (arp_notrace_init);
+
+
+/* Send an ARP request to see if given destination is reachable on given interface. */
+clib_error_t *
+ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip4_main_t *im = &ip4_main;
+ ethernet_arp_header_t *h;
+ ip4_address_t *src;
+ ip_interface_address_t *ia;
+ ip_adjacency_t *adj;
+ vnet_hw_interface_t *hi;
+ vnet_sw_interface_t *si;
+ vlib_buffer_t *b;
+ u32 bi = 0;
+
+ si = vnet_get_sw_interface (vnm, sw_if_index);
+
+ if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
+ {
+ return clib_error_return (0, "%U: interface %U down",
+ format_ip4_address, dst,
+ format_vnet_sw_if_index_name, vnm,
+ sw_if_index);
+ }
+
+ src =
+ ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
+ if (!src)
+ {
+ vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
+ return clib_error_return
+ (0, "no matching interface address for destination %U (interface %U)",
+ format_ip4_address, dst,
+ format_vnet_sw_if_index_name, vnm, sw_if_index);
+ }
+
+ adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
+
+ h =
+ vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template,
+ &bi);
+
+ hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
+
+ clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address,
+ sizeof (h->ip4_over_ethernet[0].ethernet));
+
+ h->ip4_over_ethernet[0].ip4 = src[0];
+ h->ip4_over_ethernet[1].ip4 = dst[0];
+
+ b = vlib_get_buffer (vm, bi);
+ vnet_buffer (b)->sw_if_index[VLIB_RX] =
+ vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
+
+ /* Add encapsulation string for software interface (e.g. ethernet header). */
+ vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
+ vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
+
+ {
+ vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
+ u32 *to_next = vlib_frame_vector_args (f);
+ to_next[0] = bi;
+ f->n_vectors = 1;
+ vlib_put_frame_to_node (vm, hi->output_node_index, f);
+ }
+
+ return /* no error */ 0;
+}
+
+typedef enum
+{
+ IP4_REWRITE_NEXT_DROP,
+ IP4_REWRITE_NEXT_ICMP_ERROR,
+} ip4_rewrite_next_t;
+
+always_inline uword
+ip4_rewrite_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame, int is_midchain)
+{
+ ip_lookup_main_t *lm = &ip4_main.lookup_main;
+ u32 *from = vlib_frame_vector_args (frame);
+ u32 n_left_from, n_left_to_next, *to_next, next_index;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip4_input_node.index);
+
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ u32 cpu_index = os_get_cpu_number ();
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ ip_adjacency_t *adj0, *adj1;
+ vlib_buffer_t *p0, *p1;
+ ip4_header_t *ip0, *ip1;
+ u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
+ u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
+ u32 tx_sw_if_index0, tx_sw_if_index1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, STORE);
+ vlib_prefetch_buffer_header (p3, STORE);
+
+ CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
+ CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
+ }
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+
+ adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+ adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
+
+ /* We should never rewrite a pkt using the MISS adjacency */
+ ASSERT (adj_index0 && adj_index1);
+
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+
+ error0 = error1 = IP4_ERROR_NONE;
+ next0 = next1 = IP4_REWRITE_NEXT_DROP;
+
+ /* Decrement TTL & update checksum.
+ Works either endian, so no need for byte swap. */
+ if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
+ {
+ i32 ttl0 = ip0->ttl;
+
+ /* Input node should have reject packets with ttl 0. */
+ ASSERT (ip0->ttl > 0);
+
+ checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
+ checksum0 += checksum0 >= 0xffff;
+
+ ip0->checksum = checksum0;
+ ttl0 -= 1;
+ ip0->ttl = ttl0;
+
+ /*
+ * If the ttl drops below 1 when forwarding, generate
+ * an ICMP response.
+ */
+ if (PREDICT_FALSE (ttl0 <= 0))
+ {
+ error0 = IP4_ERROR_TIME_EXPIRED;
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
+ ICMP4_time_exceeded_ttl_exceeded_in_transit,
+ 0);
+ next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
+ }
+
+ /* Verify checksum. */
+ ASSERT (ip0->checksum == ip4_header_checksum (ip0));
+ }
+ else
+ {
+ p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
+ }
+ if (PREDICT_TRUE (!(p1->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
+ {
+ i32 ttl1 = ip1->ttl;
+
+ /* Input node should have reject packets with ttl 0. */
+ ASSERT (ip1->ttl > 0);
+
+ checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
+ checksum1 += checksum1 >= 0xffff;
+
+ ip1->checksum = checksum1;
+ ttl1 -= 1;
+ ip1->ttl = ttl1;
+
+ /*
+ * If the ttl drops below 1 when forwarding, generate
+ * an ICMP response.
+ */
+ if (PREDICT_FALSE (ttl1 <= 0))
+ {
+ error1 = IP4_ERROR_TIME_EXPIRED;
+ vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ icmp4_error_set_vnet_buffer (p1, ICMP4_time_exceeded,
+ ICMP4_time_exceeded_ttl_exceeded_in_transit,
+ 0);
+ next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
+ }
+
+ /* Verify checksum. */
+ ASSERT (ip0->checksum == ip4_header_checksum (ip0));
+ ASSERT (ip1->checksum == ip4_header_checksum (ip1));
+ }
+ else
+ {
+ p1->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
+ }
+
+ /* Rewrite packet header and updates lengths. */
+ adj0 = ip_get_adjacency (lm, adj_index0);
+ adj1 = ip_get_adjacency (lm, adj_index1);
+
+ /* Worth pipelining. No guarantee that adj0,1 are hot... */
+ rw_len0 = adj0[0].rewrite_header.data_bytes;
+ rw_len1 = adj1[0].rewrite_header.data_bytes;
+ vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
+ vnet_buffer (p1)->ip.save_rewrite_length = rw_len1;
+
+ /* Check MTU of outgoing interface. */
+ error0 =
+ (vlib_buffer_length_in_chain (vm, p0) >
+ adj0[0].
+ rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
+ error0);
+ error1 =
+ (vlib_buffer_length_in_chain (vm, p1) >
+ adj1[0].
+ rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
+ error1);
+
+ /*
+ * We've already accounted for an ethernet_header_t elsewhere
+ */
+ if (PREDICT_FALSE (rw_len0 > sizeof (ethernet_header_t)))
+ vlib_increment_combined_counter
+ (&adjacency_counters, cpu_index, adj_index0,
+ /* packet increment */ 0,
+ /* byte increment */ rw_len0 - sizeof (ethernet_header_t));
+
+ if (PREDICT_FALSE (rw_len1 > sizeof (ethernet_header_t)))
+ vlib_increment_combined_counter
+ (&adjacency_counters, cpu_index, adj_index1,
+ /* packet increment */ 0,
+ /* byte increment */ rw_len1 - sizeof (ethernet_header_t));
+
+ /* Don't adjust the buffer for ttl issue; icmp-error node wants
+ * to see the IP headerr */
+ if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
+ {
+ next0 = adj0[0].rewrite_header.next_index;
+ p0->current_data -= rw_len0;
+ p0->current_length += rw_len0;
+ tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
+
+ vnet_feature_arc_start (lm->output_feature_arc_index,
+ tx_sw_if_index0, &next0, p0);
+ }
+ if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
+ {
+ next1 = adj1[0].rewrite_header.next_index;
+ p1->current_data -= rw_len1;
+ p1->current_length += rw_len1;
+
+ tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
+ vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1;
+
+ vnet_feature_arc_start (lm->output_feature_arc_index,
+ tx_sw_if_index1, &next1, p1);
+ }
+
+ /* Guess we are only writing on simple Ethernet header. */
+ vnet_rewrite_two_headers (adj0[0], adj1[0],
+ ip0, ip1, sizeof (ethernet_header_t));
+
+ if (is_midchain)
+ {
+ adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
+ adj1->sub_type.midchain.fixup_func (vm, adj1, p1);
+ }
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, pi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ ip_adjacency_t *adj0;
+ vlib_buffer_t *p0;
+ ip4_header_t *ip0;
+ u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
+ u32 tx_sw_if_index0;
+
+ pi0 = to_next[0] = from[0];
+
+ p0 = vlib_get_buffer (vm, pi0);
+
+ adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+
+ /* We should never rewrite a pkt using the MISS adjacency */
+ ASSERT (adj_index0);
+
+ adj0 = ip_get_adjacency (lm, adj_index0);
+
+ ip0 = vlib_buffer_get_current (p0);
+
+ error0 = IP4_ERROR_NONE;
+ next0 = IP4_REWRITE_NEXT_DROP; /* drop on error */
+
+ /* Decrement TTL & update checksum. */
+ if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
+ {
+ i32 ttl0 = ip0->ttl;
+
+ checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
+
+ checksum0 += checksum0 >= 0xffff;
+
+ ip0->checksum = checksum0;
+
+ ASSERT (ip0->ttl > 0);
+
+ ttl0 -= 1;
+
+ ip0->ttl = ttl0;
+
+ ASSERT (ip0->checksum == ip4_header_checksum (ip0));
+
+ if (PREDICT_FALSE (ttl0 <= 0))
+ {
+ /*
+ * If the ttl drops below 1 when forwarding, generate
+ * an ICMP response.
+ */
+ error0 = IP4_ERROR_TIME_EXPIRED;
+ next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
+ ICMP4_time_exceeded_ttl_exceeded_in_transit,
+ 0);
+ }
+ }
+ else
+ {
+ p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
+ }
+
+ /* Guess we are only writing on simple Ethernet header. */
+ vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
+
+ /* Update packet buffer attributes/set output interface. */
+ rw_len0 = adj0[0].rewrite_header.data_bytes;
+ vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
+
+ if (PREDICT_FALSE (rw_len0 > sizeof (ethernet_header_t)))
+ vlib_increment_combined_counter
+ (&adjacency_counters, cpu_index, adj_index0,
+ /* packet increment */ 0,
+ /* byte increment */ rw_len0 - sizeof (ethernet_header_t));
+
+ /* Check MTU of outgoing interface. */
+ error0 = (vlib_buffer_length_in_chain (vm, p0)
+ > adj0[0].rewrite_header.max_l3_packet_bytes
+ ? IP4_ERROR_MTU_EXCEEDED : error0);
+
+ p0->error = error_node->errors[error0];
+
+ /* Don't adjust the buffer for ttl issue; icmp-error node wants
+ * to see the IP headerr */
+ if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
+ {
+ p0->current_data -= rw_len0;
+ p0->current_length += rw_len0;
+ tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
+
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
+ next0 = adj0[0].rewrite_header.next_index;
+
+ if (is_midchain)
+ {
+ adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
+ }
+
+ vnet_feature_arc_start (lm->output_feature_arc_index,
+ tx_sw_if_index0, &next0, p0);
+
+ }
+
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ /* Need to do trace after rewrites to pick up new packet data. */
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip4_forward_next_trace (vm, node, frame, VLIB_TX);
+
+ return frame->n_vectors;
+}
+
+
+/** @brief IPv4 rewrite node.
+ @node ip4-rewrite
+
+ This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
+ header checksum, fetch the ip adjacency, check the outbound mtu,
+ apply the adjacency rewrite, and send pkts to the adjacency
+ rewrite header's rewrite_next_index.
+
+ @param vm vlib_main_t corresponding to the current thread
+ @param node vlib_node_runtime_t
+ @param frame vlib_frame_t whose contents should be dispatched
+
+ @par Graph mechanics: buffer metadata, next index usage
+
+ @em Uses:
+ - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
+ - the rewrite adjacency index
+ - <code>adj->lookup_next_index</code>
+ - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
+ the packet will be dropped.
+ - <code>adj->rewrite_header</code>
+ - Rewrite string length, rewrite string, next_index
+
+ @em Sets:
+ - <code>b->current_data, b->current_length</code>
+ - Updated net of applying the rewrite string
+
+ <em>Next Indices:</em>
+ - <code> adj->rewrite_header.next_index </code>
+ or @c error-drop
+*/
+static uword
+ip4_rewrite (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return ip4_rewrite_inline (vm, node, frame, 0);
+}
+
+static uword
+ip4_midchain (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return ip4_rewrite_inline (vm, node, frame, 1);
+}
+
+
+VLIB_REGISTER_NODE (ip4_rewrite_node) =
+{
+ .function = ip4_rewrite,.name = "ip4-rewrite",.vector_size =
+ sizeof (u32),.format_trace = format_ip4_rewrite_trace,.n_next_nodes =
+ 2,.next_nodes =
+ {
+ [IP4_REWRITE_NEXT_DROP] = "error-drop",
+ [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",}
+,};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite);
+
+VLIB_REGISTER_NODE (ip4_midchain_node) =
+{
+.function = ip4_midchain,.name = "ip4-midchain",.vector_size =
+ sizeof (u32),.format_trace = format_ip4_forward_next_trace,.sibling_of =
+ "ip4-rewrite",};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain);
+
+static clib_error_t *
+add_del_interface_table (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error = 0;
+ u32 sw_if_index, table_id;
+
+ sw_if_index = ~0;
+
+ if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ if (unformat (input, "%d", &table_id))
+ ;
+ else
+ {
+ error = clib_error_return (0, "expected table id `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ {
+ ip4_main_t *im = &ip4_main;
+ u32 fib_index;
+
+ fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
+ table_id);
+
+ //
+ // FIXME-LATER
+ // changing an interface's table has consequences for any connecteds
+ // and adj-fibs already installed.
+ //
+ vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
+ im->fib_index_by_sw_if_index[sw_if_index] = fib_index;
+ }
+
+done:
+ return error;
+}
+
+/*?
+ * Place the indicated interface into the supplied IPv4 FIB table (also known
+ * as a VRF). If the FIB table does not exist, this command creates it. To
+ * display the current IPv4 FIB table, use the command '<em>show ip fib</em>'.
+ * FIB table will only be displayed if a route has been added to the table, or
+ * an IP Address is assigned to an interface in the table (which adds a route
+ * automatically).
+ *
+ * @note IP addresses added after setting the interface IP table end up in
+ * the indicated FIB table. If the IP address is added prior to adding the
+ * interface to the FIB table, it will NOT be part of the FIB table. Predictable
+ * but potentially counter-intuitive results occur if you provision interface
+ * addresses in multiple FIBs. Upon RX, packets will be processed in the last
+ * IP table ID provisioned. It might be marginally useful to evade source RPF
+ * drops to put an interface address into multiple FIBs.
+ *
+ * @cliexpar
+ * Example of how to add an interface to an IPv4 FIB table (where 2 is the table-id):
+ * @cliexcmd{set interface ip table GigabitEthernet2/0/0 2}
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_interface_ip_table_command, static) =
+{
+ .path = "set interface ip table",
+ .function = add_del_interface_table,
+ .short_help = "set interface ip table <interface> <table-id>",
+};
+/* *INDENT-ON* */
+
+
+static uword
+ip4_lookup_multicast (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ ip4_main_t *im = &ip4_main;
+ vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters;
+ u32 n_left_from, n_left_to_next, *from, *to_next;
+ ip_lookup_next_t next;
+ u32 cpu_index = os_get_cpu_number ();
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ vlib_buffer_t *p0, *p1;
+ u32 pi0, pi1, lb_index0, lb_index1, wrong_next;
+ ip_lookup_next_t next0, next1;
+ ip4_header_t *ip0, *ip1;
+ u32 fib_index0, fib_index1;
+ const dpo_id_t *dpo0, *dpo1;
+ const load_balance_t *lb0, *lb1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
+ CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
+ }
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+
+ fib_index0 =
+ vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (p0)->sw_if_index[VLIB_RX]);
+ fib_index1 =
+ vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (p1)->sw_if_index[VLIB_RX]);
+ fib_index0 =
+ (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
+ (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
+ fib_index1 =
+ (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
+ (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
+
+ lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0),
+ &ip0->dst_address);
+ lb_index1 = ip4_fib_table_lookup_lb (ip4_fib_get (fib_index1),
+ &ip1->dst_address);
+
+ lb0 = load_balance_get (lb_index0);
+ lb1 = load_balance_get (lb_index1);
+
+ ASSERT (lb0->lb_n_buckets > 0);
+ ASSERT (is_pow2 (lb0->lb_n_buckets));
+ ASSERT (lb1->lb_n_buckets > 0);
+ ASSERT (is_pow2 (lb1->lb_n_buckets));
+
+ vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash
+ (ip0, lb0->lb_hash_config);
+
+ vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash
+ (ip1, lb1->lb_hash_config);
+
+ dpo0 = load_balance_get_bucket_i (lb0,
+ (vnet_buffer (p0)->ip.flow_hash &
+ (lb0->lb_n_buckets_minus_1)));
+ dpo1 = load_balance_get_bucket_i (lb1,
+ (vnet_buffer (p1)->ip.flow_hash &
+ (lb1->lb_n_buckets_minus_1)));
+
+ next0 = dpo0->dpoi_next_node;
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+ next1 = dpo1->dpoi_next_node;
+ vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
+
+ if (1) /* $$$$$$ HACK FIXME */
+ vlib_increment_combined_counter
+ (cm, cpu_index, lb_index0, 1,
+ vlib_buffer_length_in_chain (vm, p0));
+ if (1) /* $$$$$$ HACK FIXME */
+ vlib_increment_combined_counter
+ (cm, cpu_index, lb_index1, 1,
+ vlib_buffer_length_in_chain (vm, p1));
+
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
+
+ wrong_next = (next0 != next) + 2 * (next1 != next);
+ if (PREDICT_FALSE (wrong_next != 0))
+ {
+ switch (wrong_next)
+ {
+ case 1:
+ /* A B A */
+ to_next[-2] = pi1;
+ to_next -= 1;
+ n_left_to_next += 1;
+ vlib_set_next_frame_buffer (vm, node, next0, pi0);
+ break;
+
+ case 2:
+ /* A A B */
+ to_next -= 1;
+ n_left_to_next += 1;
+ vlib_set_next_frame_buffer (vm, node, next1, pi1);
+ break;
+
+ case 3:
+ /* A B C */
+ to_next -= 2;
+ n_left_to_next += 2;
+ vlib_set_next_frame_buffer (vm, node, next0, pi0);
+ vlib_set_next_frame_buffer (vm, node, next1, pi1);
+ if (next0 == next1)
+ {
+ /* A B B */
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ next = next1;
+ vlib_get_next_frame (vm, node, next, to_next,
+ n_left_to_next);
+ }
+ }
+ }
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *p0;
+ ip4_header_t *ip0;
+ u32 pi0, lb_index0;
+ ip_lookup_next_t next0;
+ u32 fib_index0;
+ const dpo_id_t *dpo0;
+ const load_balance_t *lb0;
+
+ pi0 = from[0];
+ to_next[0] = pi0;
+
+ p0 = vlib_get_buffer (vm, pi0);
+
+ ip0 = vlib_buffer_get_current (p0);
+
+ fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (p0)->sw_if_index[VLIB_RX]);
+ fib_index0 = (vnet_buffer (p0)->sw_if_index[VLIB_TX] == (u32) ~ 0) ?
+ fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
+
+ lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0),
+ &ip0->dst_address);
+
+ lb0 = load_balance_get (lb_index0);
+
+ ASSERT (lb0->lb_n_buckets > 0);
+ ASSERT (is_pow2 (lb0->lb_n_buckets));
+
+ vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash
+ (ip0, lb0->lb_hash_config);
+
+ dpo0 = load_balance_get_bucket_i (lb0,
+ (vnet_buffer (p0)->ip.flow_hash &
+ (lb0->lb_n_buckets_minus_1)));
+
+ next0 = dpo0->dpoi_next_node;
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+ if (1) /* $$$$$$ HACK FIXME */
+ vlib_increment_combined_counter
+ (cm, cpu_index, lb_index0, 1,
+ vlib_buffer_length_in_chain (vm, p0));
+
+ from += 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+ n_left_from -= 1;
+
+ if (PREDICT_FALSE (next0 != next))
+ {
+ n_left_to_next += 1;
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ next = next0;
+ vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
+ to_next[0] = pi0;
+ to_next += 1;
+ n_left_to_next -= 1;
+ }
+ }
+
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ }
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip4_forward_next_trace (vm, node, frame, VLIB_TX);
+
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (ip4_lookup_multicast_node, static) =
+{
+.function = ip4_lookup_multicast,.name =
+ "ip4-lookup-multicast",.vector_size = sizeof (u32),.sibling_of =
+ "ip4-lookup",.format_trace = format_ip4_lookup_trace,.n_next_nodes = 0,};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_multicast_node,
+ ip4_lookup_multicast);
+
+VLIB_REGISTER_NODE (ip4_multicast_node, static) =
+{
+ .function = ip4_drop,.name = "ip4-multicast",.vector_size =
+ sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_next_nodes =
+ 1,.next_nodes =
+ {
+ [0] = "error-drop",}
+,};
+
+int
+ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
+{
+ ip4_fib_mtrie_t *mtrie0;
+ ip4_fib_mtrie_leaf_t leaf0;
+ u32 lbi0;
+
+ mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
+
+ leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
+
+ /* Handle default route. */
+ leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
+
+ lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
+
+ return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
+}
+
+static clib_error_t *
+test_lookup_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ ip4_fib_t *fib;
+ u32 table_id = 0;
+ f64 count = 1;
+ u32 n;
+ int i;
+ ip4_address_t ip4_base_address;
+ u64 errors = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "table %d", &table_id))
+ {
+ /* Make sure the entry exists. */
+ fib = ip4_fib_get (table_id);
+ if ((fib) && (fib->index != table_id))
+ return clib_error_return (0, "<fib-index> %d does not exist",
+ table_id);
+ }
+ else if (unformat (input, "count %f", &count))
+ ;
+
+ else if (unformat (input, "%U",
+ unformat_ip4_address, &ip4_base_address))
+ ;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ n = count;
+
+ for (i = 0; i < n; i++)
+ {
+ if (!ip4_lookup_validate (&ip4_base_address, table_id))
+ errors++;
+
+ ip4_base_address.as_u32 =
+ clib_host_to_net_u32 (1 +
+ clib_net_to_host_u32 (ip4_base_address.as_u32));
+ }
+
+ if (errors)
+ vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
+ else
+ vlib_cli_output (vm, "No errors in %d lookups\n", n);
+
+ return 0;
+}
+
+/*?
+ * Perform a lookup of an IPv4 Address (or range of addresses) in the
+ * given FIB table to determine if there is a conflict with the
+ * adjacency table. The fib-id can be determined by using the
+ * '<em>show ip fib</em>' command. If fib-id is not entered, default value
+ * of 0 is used.
+ *
+ * @todo This command uses fib-id, other commands use table-id (not
+ * just a name, they are different indexes). Would like to change this
+ * to table-id for consistency.
+ *
+ * @cliexpar
+ * Example of how to run the test lookup command:
+ * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
+ * No errors in 2 lookups
+ * @cliexend
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (lookup_test_command, static) =
+{
+ .path = "test lookup",
+ .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
+ .function = test_lookup_command_fn,
+};
+/* *INDENT-ON* */
+
+int
+vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
+{
+ ip4_main_t *im4 = &ip4_main;
+ ip4_fib_t *fib;
+ uword *p = hash_get (im4->fib_index_by_table_id, table_id);
+
+ if (p == 0)
+ return VNET_API_ERROR_NO_SUCH_FIB;
+
+ fib = ip4_fib_get (p[0]);
+
+ fib->flow_hash_config = flow_hash_config;
+ return 0;
+}
+
+static clib_error_t *
+set_ip_flow_hash_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ int matched = 0;
+ u32 table_id = 0;
+ u32 flow_hash_config = 0;
+ int rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "table %d", &table_id))
+ matched = 1;
+#define _(a,v) \
+ else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
+ foreach_flow_hash_bit
+#undef _
+ else
+ break;
+ }
+
+ if (matched == 0)
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+
+ rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ case VNET_API_ERROR_NO_SUCH_FIB:
+ return clib_error_return (0, "no such FIB table %d", table_id);
+
+ default:
+ clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
+ break;
+ }
+
+ return 0;
+}
+
+/*?
+ * Configure the set of IPv4 fields used by the flow hash.
+ *
+ * @cliexpar
+ * Example of how to set the flow hash on a given table:
+ * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
+ * Example of display the configured flow hash:
+ * @cliexstart{show ip fib}
+ * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
+ * 0.0.0.0/0
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
+ * [0] [@0]: dpo-drop ip6
+ * 0.0.0.0/32
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
+ * [0] [@0]: dpo-drop ip6
+ * 224.0.0.0/8
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
+ * [0] [@0]: dpo-drop ip6
+ * 6.0.1.2/32
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
+ * [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
+ * 7.0.0.1/32
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
+ * [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
+ * [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
+ * [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
+ * [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
+ * 240.0.0.0/8
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
+ * [0] [@0]: dpo-drop ip6
+ * 255.255.255.255/32
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
+ * [0] [@0]: dpo-drop ip6
+ * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
+ * 0.0.0.0/0
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
+ * [0] [@0]: dpo-drop ip6
+ * 0.0.0.0/32
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
+ * [0] [@0]: dpo-drop ip6
+ * 172.16.1.0/24
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
+ * [0] [@4]: ipv4-glean: af_packet0
+ * 172.16.1.1/32
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
+ * [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
+ * 172.16.1.2/32
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
+ * [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
+ * 172.16.2.0/24
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
+ * [0] [@4]: ipv4-glean: af_packet1
+ * 172.16.2.1/32
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
+ * [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
+ * 224.0.0.0/8
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
+ * [0] [@0]: dpo-drop ip6
+ * 240.0.0.0/8
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
+ * [0] [@0]: dpo-drop ip6
+ * 255.255.255.255/32
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
+ * [0] [@0]: dpo-drop ip6
+ * @cliexend
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
+{
+ .path = "set ip flow-hash",
+ .short_help =
+ "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
+ .function = set_ip_flow_hash_command_fn,
+};
+/* *INDENT-ON* */
+
+int
+vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
+ u32 table_index)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_interface_main_t *im = &vnm->interface_main;
+ ip4_main_t *ipm = &ip4_main;
+ ip_lookup_main_t *lm = &ipm->lookup_main;
+ vnet_classify_main_t *cm = &vnet_classify_main;
+ ip4_address_t *if_addr;
+
+ if (pool_is_free_index (im->sw_interfaces, sw_if_index))
+ return VNET_API_ERROR_NO_MATCHING_INTERFACE;
+
+ if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
+ lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
+
+ if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
+
+ if (NULL != if_addr)
+ {
+ fib_prefix_t pfx = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr.ip4 = *if_addr,
+ };
+ u32 fib_index;
+
+ fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
+ sw_if_index);
+
+
+ if (table_index != (u32) ~ 0)
+ {
+ dpo_id_t dpo = DPO_INVALID;
+
+ dpo_set (&dpo,
+ DPO_CLASSIFY,
+ DPO_PROTO_IP4,
+ classify_dpo_create (DPO_PROTO_IP4, table_index));
+
+ fib_table_entry_special_dpo_add (fib_index,
+ &pfx,
+ FIB_SOURCE_CLASSIFY,
+ FIB_ENTRY_FLAG_NONE, &dpo);
+ dpo_reset (&dpo);
+ }
+ else
+ {
+ fib_table_entry_special_remove (fib_index,
+ &pfx, FIB_SOURCE_CLASSIFY);
+ }
+ }
+
+ return 0;
+}
+
+static clib_error_t *
+set_ip_classify_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u32 table_index = ~0;
+ int table_index_set = 0;
+ u32 sw_if_index = ~0;
+ int rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "table-index %d", &table_index))
+ table_index_set = 1;
+ else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
+ vnet_get_main (), &sw_if_index))
+ ;
+ else
+ break;
+ }
+
+ if (table_index_set == 0)
+ return clib_error_return (0, "classify table-index must be specified");
+
+ if (sw_if_index == ~0)
+ return clib_error_return (0, "interface / subif must be specified");
+
+ rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ case VNET_API_ERROR_NO_MATCHING_INTERFACE:
+ return clib_error_return (0, "No such interface");
+
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ return clib_error_return (0, "No such classifier table");
+ }
+ return 0;
+}
+
+/*?
+ * Assign a classification table to an interface. The classification
+ * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
+ * commands. Once the table is create, use this command to filter packets
+ * on an interface.
+ *
+ * @cliexpar
+ * Example of how to assign a classification table to an interface:
+ * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_ip_classify_command, static) =
+{
+ .path = "set ip classify",
+ .short_help =
+ "set ip classify intfc <interface> table-index <classify-idx>",
+ .function = set_ip_classify_command_fn,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip4_input.c b/src/vnet/ip/ip4_input.c
new file mode 100644
index 00000000000..1cf5e0b8517
--- /dev/null
+++ b/src/vnet/ip/ip4_input.c
@@ -0,0 +1,507 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip4_input.c: IP v4 input node
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ppp/ppp.h>
+#include <vnet/hdlc/hdlc.h>
+
+typedef struct
+{
+ u8 packet_data[64];
+} ip4_input_trace_t;
+
+static u8 *
+format_ip4_input_trace (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ ip4_input_trace_t *t = va_arg (*va, ip4_input_trace_t *);
+
+ s = format (s, "%U",
+ format_ip4_header, t->packet_data, sizeof (t->packet_data));
+
+ return s;
+}
+
+typedef enum
+{
+ IP4_INPUT_NEXT_DROP,
+ IP4_INPUT_NEXT_PUNT,
+ IP4_INPUT_NEXT_LOOKUP,
+ IP4_INPUT_NEXT_LOOKUP_MULTICAST,
+ IP4_INPUT_NEXT_ICMP_ERROR,
+ IP4_INPUT_N_NEXT,
+} ip4_input_next_t;
+
+/* Validate IP v4 packets and pass them either to forwarding code
+ or drop/punt exception packets. */
+always_inline uword
+ip4_input_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame, int verify_checksum)
+{
+ ip4_main_t *im = &ip4_main;
+ vnet_main_t *vnm = vnet_get_main ();
+ ip_lookup_main_t *lm = &im->lookup_main;
+ u32 n_left_from, *from, *to_next;
+ ip4_input_next_t next_index;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip4_input_node.index);
+ vlib_simple_counter_main_t *cm;
+ u32 cpu_index = os_get_cpu_number ();
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1,
+ sizeof (ip4_input_trace_t));
+
+ cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
+ VNET_INTERFACE_COUNTER_IP4);
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ vlib_buffer_t *p0, *p1;
+ ip4_header_t *ip0, *ip1;
+ u32 sw_if_index0, pi0, ip_len0, cur_len0, next0;
+ u32 sw_if_index1, pi1, ip_len1, cur_len1, next1;
+ i32 len_diff0, len_diff1;
+ u8 error0, error1, arc0, arc1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
+ CLIB_PREFETCH (p3->data, sizeof (ip1[0]), LOAD);
+ }
+
+ to_next[0] = pi0 = from[0];
+ to_next[1] = pi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+
+ sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+ sw_if_index1 = vnet_buffer (p1)->sw_if_index[VLIB_RX];
+
+ error0 = error1 = IP4_ERROR_NONE;
+
+ if (PREDICT_FALSE (ip4_address_is_multicast (&ip0->dst_address)))
+ {
+ arc0 = lm->mcast_feature_arc_index;
+ next0 = IP4_INPUT_NEXT_LOOKUP_MULTICAST;
+ }
+ else
+ {
+ arc0 = lm->ucast_feature_arc_index;
+ next0 = IP4_INPUT_NEXT_LOOKUP;
+ if (PREDICT_FALSE (ip0->ttl < 1))
+ error0 = IP4_ERROR_TIME_EXPIRED;
+ }
+
+ if (PREDICT_FALSE (ip4_address_is_multicast (&ip1->dst_address)))
+ {
+ arc1 = lm->mcast_feature_arc_index;
+ next1 = IP4_INPUT_NEXT_LOOKUP_MULTICAST;
+ }
+ else
+ {
+ arc1 = lm->ucast_feature_arc_index;
+ next1 = IP4_INPUT_NEXT_LOOKUP;
+ if (PREDICT_FALSE (ip1->ttl < 1))
+ error1 = IP4_ERROR_TIME_EXPIRED;
+ }
+
+ vnet_buffer (p0)->ip.adj_index[VLIB_RX] = ~0;
+ vnet_buffer (p1)->ip.adj_index[VLIB_RX] = ~0;
+
+ vnet_feature_arc_start (arc0, sw_if_index0, &next0, p0);
+ vnet_feature_arc_start (arc1, sw_if_index1, &next1, p1);
+
+ vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1);
+ vlib_increment_simple_counter (cm, cpu_index, sw_if_index1, 1);
+
+ /* Punt packets with options or wrong version. */
+ if (PREDICT_FALSE (ip0->ip_version_and_header_length != 0x45))
+ error0 = (ip0->ip_version_and_header_length & 0xf) != 5 ?
+ IP4_ERROR_OPTIONS : IP4_ERROR_VERSION;
+
+ if (PREDICT_FALSE (ip1->ip_version_and_header_length != 0x45))
+ error1 = (ip1->ip_version_and_header_length & 0xf) != 5 ?
+ IP4_ERROR_OPTIONS : IP4_ERROR_VERSION;
+
+ /* Verify header checksum. */
+ if (verify_checksum)
+ {
+ ip_csum_t sum0, sum1;
+
+ ip4_partial_header_checksum_x1 (ip0, sum0);
+ ip4_partial_header_checksum_x1 (ip1, sum1);
+
+ error0 = 0xffff != ip_csum_fold (sum0) ?
+ IP4_ERROR_BAD_CHECKSUM : error0;
+ error1 = 0xffff != ip_csum_fold (sum1) ?
+ IP4_ERROR_BAD_CHECKSUM : error1;
+ }
+
+ /* Drop fragmentation offset 1 packets. */
+ error0 = ip4_get_fragment_offset (ip0) == 1 ?
+ IP4_ERROR_FRAGMENT_OFFSET_ONE : error0;
+ error1 = ip4_get_fragment_offset (ip1) == 1 ?
+ IP4_ERROR_FRAGMENT_OFFSET_ONE : error1;
+
+ /* Verify lengths. */
+ ip_len0 = clib_net_to_host_u16 (ip0->length);
+ ip_len1 = clib_net_to_host_u16 (ip1->length);
+
+ /* IP length must be at least minimal IP header. */
+ error0 = ip_len0 < sizeof (ip0[0]) ? IP4_ERROR_TOO_SHORT : error0;
+ error1 = ip_len1 < sizeof (ip1[0]) ? IP4_ERROR_TOO_SHORT : error1;
+
+ cur_len0 = vlib_buffer_length_in_chain (vm, p0);
+ cur_len1 = vlib_buffer_length_in_chain (vm, p1);
+
+ len_diff0 = cur_len0 - ip_len0;
+ len_diff1 = cur_len1 - ip_len1;
+
+ error0 = len_diff0 < 0 ? IP4_ERROR_BAD_LENGTH : error0;
+ error1 = len_diff1 < 0 ? IP4_ERROR_BAD_LENGTH : error1;
+
+ p0->error = error_node->errors[error0];
+ p1->error = error_node->errors[error1];
+
+ if (PREDICT_FALSE (error0 != IP4_ERROR_NONE))
+ {
+ if (error0 == IP4_ERROR_TIME_EXPIRED)
+ {
+ icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
+ ICMP4_time_exceeded_ttl_exceeded_in_transit,
+ 0);
+ next0 = IP4_INPUT_NEXT_ICMP_ERROR;
+ }
+ else
+ next0 = error0 != IP4_ERROR_OPTIONS ?
+ IP4_INPUT_NEXT_DROP : IP4_INPUT_NEXT_PUNT;
+ }
+ if (PREDICT_FALSE (error1 != IP4_ERROR_NONE))
+ {
+ if (error1 == IP4_ERROR_TIME_EXPIRED)
+ {
+ icmp4_error_set_vnet_buffer (p1, ICMP4_time_exceeded,
+ ICMP4_time_exceeded_ttl_exceeded_in_transit,
+ 0);
+ next1 = IP4_INPUT_NEXT_ICMP_ERROR;
+ }
+ else
+ next1 = error1 != IP4_ERROR_OPTIONS ?
+ IP4_INPUT_NEXT_DROP : IP4_INPUT_NEXT_PUNT;
+ }
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, pi1, next0, next1);
+ }
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *p0;
+ ip4_header_t *ip0;
+ u32 sw_if_index0, pi0, ip_len0, cur_len0, next0;
+ i32 len_diff0;
+ u8 error0, arc0;
+
+ pi0 = from[0];
+ to_next[0] = pi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ ip0 = vlib_buffer_get_current (p0);
+
+ sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+
+ error0 = IP4_ERROR_NONE;
+
+ if (PREDICT_FALSE (ip4_address_is_multicast (&ip0->dst_address)))
+ {
+ arc0 = lm->mcast_feature_arc_index;
+ next0 = IP4_INPUT_NEXT_LOOKUP_MULTICAST;
+ }
+ else
+ {
+ arc0 = lm->ucast_feature_arc_index;
+ next0 = IP4_INPUT_NEXT_LOOKUP;
+ if (PREDICT_FALSE (ip0->ttl < 1))
+ error0 = IP4_ERROR_TIME_EXPIRED;
+ }
+
+ vnet_buffer (p0)->ip.adj_index[VLIB_RX] = ~0;
+ vnet_feature_arc_start (arc0, sw_if_index0, &next0, p0);
+
+ vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1);
+
+ /* Punt packets with options or wrong version. */
+ if (PREDICT_FALSE (ip0->ip_version_and_header_length != 0x45))
+ error0 = (ip0->ip_version_and_header_length & 0xf) != 5 ?
+ IP4_ERROR_OPTIONS : IP4_ERROR_VERSION;
+
+ /* Verify header checksum. */
+ if (verify_checksum)
+ {
+ ip_csum_t sum0;
+
+ ip4_partial_header_checksum_x1 (ip0, sum0);
+ error0 =
+ 0xffff !=
+ ip_csum_fold (sum0) ? IP4_ERROR_BAD_CHECKSUM : error0;
+ }
+
+ /* Drop fragmentation offset 1 packets. */
+ error0 =
+ ip4_get_fragment_offset (ip0) ==
+ 1 ? IP4_ERROR_FRAGMENT_OFFSET_ONE : error0;
+
+ /* Verify lengths. */
+ ip_len0 = clib_net_to_host_u16 (ip0->length);
+
+ /* IP length must be at least minimal IP header. */
+ error0 = ip_len0 < sizeof (ip0[0]) ? IP4_ERROR_TOO_SHORT : error0;
+
+ cur_len0 = vlib_buffer_length_in_chain (vm, p0);
+ len_diff0 = cur_len0 - ip_len0;
+ error0 = len_diff0 < 0 ? IP4_ERROR_BAD_LENGTH : error0;
+
+ p0->error = error_node->errors[error0];
+ if (PREDICT_FALSE (error0 != IP4_ERROR_NONE))
+ {
+ if (error0 == IP4_ERROR_TIME_EXPIRED)
+ {
+ icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
+ ICMP4_time_exceeded_ttl_exceeded_in_transit,
+ 0);
+ next0 = IP4_INPUT_NEXT_ICMP_ERROR;
+ }
+ else
+ next0 = error0 != IP4_ERROR_OPTIONS ?
+ IP4_INPUT_NEXT_DROP : IP4_INPUT_NEXT_PUNT;
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+/** \brief IPv4 input node.
+ @node ip4-input
+
+ This is the IPv4 input node: validates ip4 header checksums,
+ verifies ip header lengths, discards pkts with expired TTLs,
+ and sends pkts to the set of ip feature nodes configured on
+ the rx interface.
+
+ @param vm vlib_main_t corresponding to the current thread
+ @param node vlib_node_runtime_t
+ @param frame vlib_frame_t whose contents should be dispatched
+
+ @par Graph mechanics: buffer metadata, next index usage
+
+ @em Uses:
+ - vnet_feature_config_main_t cm corresponding to each pkt's dst address unicast /
+ multicast status.
+ - <code>b->current_config_index</code> corresponding to each pkt's
+ rx sw_if_index.
+ - This sets the per-packet graph trajectory, ensuring that
+ each packet visits the per-interface features in order.
+
+ - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
+ - Indicates the @c sw_if_index value of the interface that the
+ packet was received on.
+
+ @em Sets:
+ - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
+ - The lookup result adjacency index.
+
+ <em>Next Indices:</em>
+ - Dispatches pkts to the (first) feature node:
+ <code> vnet_get_config_data (... &next0 ...); </code>
+ or @c error-drop
+*/
+static uword
+ip4_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return ip4_input_inline (vm, node, frame, /* verify_checksum */ 1);
+}
+
+static uword
+ip4_input_no_checksum (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return ip4_input_inline (vm, node, frame, /* verify_checksum */ 0);
+}
+
+static char *ip4_error_strings[] = {
+#define _(sym,string) string,
+ foreach_ip4_error
+#undef _
+};
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip4_input_node) = {
+ .function = ip4_input,
+ .name = "ip4-input",
+ .vector_size = sizeof (u32),
+
+ .n_errors = IP4_N_ERROR,
+ .error_strings = ip4_error_strings,
+
+ .n_next_nodes = IP4_INPUT_N_NEXT,
+ .next_nodes = {
+ [IP4_INPUT_NEXT_DROP] = "error-drop",
+ [IP4_INPUT_NEXT_PUNT] = "error-punt",
+ [IP4_INPUT_NEXT_LOOKUP] = "ip4-lookup",
+ [IP4_INPUT_NEXT_LOOKUP_MULTICAST] = "ip4-lookup-multicast",
+ [IP4_INPUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+ },
+
+ .format_buffer = format_ip4_header,
+ .format_trace = format_ip4_input_trace,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_input_node, ip4_input);
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip4_input_no_checksum_node,static) = {
+ .function = ip4_input_no_checksum,
+ .name = "ip4-input-no-checksum",
+ .vector_size = sizeof (u32),
+
+ .n_next_nodes = IP4_INPUT_N_NEXT,
+ .next_nodes = {
+ [IP4_INPUT_NEXT_DROP] = "error-drop",
+ [IP4_INPUT_NEXT_PUNT] = "error-punt",
+ [IP4_INPUT_NEXT_LOOKUP] = "ip4-lookup",
+ [IP4_INPUT_NEXT_LOOKUP_MULTICAST] = "ip4-lookup-multicast",
+ [IP4_INPUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+ },
+
+ .format_buffer = format_ip4_header,
+ .format_trace = format_ip4_input_trace,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_input_no_checksum_node,
+ ip4_input_no_checksum);
+
+static clib_error_t *
+ip4_init (vlib_main_t * vm)
+{
+ clib_error_t *error;
+
+ ethernet_register_input_type (vm, ETHERNET_TYPE_IP4, ip4_input_node.index);
+ ppp_register_input_protocol (vm, PPP_PROTOCOL_ip4, ip4_input_node.index);
+ hdlc_register_input_protocol (vm, HDLC_PROTOCOL_ip4, ip4_input_node.index);
+
+ {
+ pg_node_t *pn;
+ pn = pg_get_node (ip4_input_node.index);
+ pn->unformat_edit = unformat_pg_ip4_header;
+ pn = pg_get_node (ip4_input_no_checksum_node.index);
+ pn->unformat_edit = unformat_pg_ip4_header;
+ }
+
+ if ((error = vlib_call_init_function (vm, ip4_cli_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, ip4_source_check_init)))
+ return error;
+
+ if ((error = vlib_call_init_function
+ (vm, ip4_source_and_port_range_check_init)))
+ return error;
+
+ /* Set flow hash to something non-zero. */
+ ip4_main.flow_hash_seed = 0xdeadbeef;
+
+ /* Default TTL for packets we generate. */
+ ip4_main.host_config.ttl = 64;
+
+ return error;
+}
+
+VLIB_INIT_FUNCTION (ip4_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip4_mtrie.c b/src/vnet/ip/ip4_mtrie.c
new file mode 100644
index 00000000000..6e3d0e8068b
--- /dev/null
+++ b/src/vnet/ip/ip4_mtrie.c
@@ -0,0 +1,568 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip4_fib.h: ip4 mtrie fib
+ *
+ * Copyright (c) 2012 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/fib/fib_entry.h>
+
+static void
+ply_init (ip4_fib_mtrie_ply_t * p, ip4_fib_mtrie_leaf_t init,
+ uword prefix_len)
+{
+ p->n_non_empty_leafs =
+ ip4_fib_mtrie_leaf_is_empty (init) ? 0 : ARRAY_LEN (p->leaves);
+ memset (p->dst_address_bits_of_leaves, prefix_len,
+ sizeof (p->dst_address_bits_of_leaves));
+
+ /* Initialize leaves. */
+#ifdef CLIB_HAVE_VEC128
+ {
+ u32x4 *l, init_x4;
+
+#ifndef __ALTIVEC__
+ init_x4 = u32x4_splat (init);
+#else
+ {
+ u32x4_union_t y;
+ y.as_u32[0] = init;
+ y.as_u32[1] = init;
+ y.as_u32[2] = init;
+ y.as_u32[3] = init;
+ init_x4 = y.as_u32x4;
+ }
+#endif
+
+ for (l = p->leaves_as_u32x4;
+ l < p->leaves_as_u32x4 + ARRAY_LEN (p->leaves_as_u32x4); l += 4)
+ {
+ l[0] = init_x4;
+ l[1] = init_x4;
+ l[2] = init_x4;
+ l[3] = init_x4;
+ }
+ }
+#else
+ {
+ u32 *l;
+
+ for (l = p->leaves; l < p->leaves + ARRAY_LEN (p->leaves); l += 4)
+ {
+ l[0] = init;
+ l[1] = init;
+ l[2] = init;
+ l[3] = init;
+ }
+ }
+#endif
+}
+
+static ip4_fib_mtrie_leaf_t
+ply_create (ip4_fib_mtrie_t * m, ip4_fib_mtrie_leaf_t init_leaf,
+ uword prefix_len)
+{
+ ip4_fib_mtrie_ply_t *p;
+
+ /* Get cache aligned ply. */
+ pool_get_aligned (m->ply_pool, p, sizeof (p[0]));
+
+ ply_init (p, init_leaf, prefix_len);
+ return ip4_fib_mtrie_leaf_set_next_ply_index (p - m->ply_pool);
+}
+
+always_inline ip4_fib_mtrie_ply_t *
+get_next_ply_for_leaf (ip4_fib_mtrie_t * m, ip4_fib_mtrie_leaf_t l)
+{
+ uword n = ip4_fib_mtrie_leaf_get_next_ply_index (l);
+ /* It better not be the root ply. */
+ ASSERT (n != 0);
+ return pool_elt_at_index (m->ply_pool, n);
+}
+
+static void
+ply_free (ip4_fib_mtrie_t * m, ip4_fib_mtrie_ply_t * p)
+{
+ uword i, is_root;
+
+ is_root = p - m->ply_pool == 0;
+
+ for (i = 0; i < ARRAY_LEN (p->leaves); i++)
+ {
+ ip4_fib_mtrie_leaf_t l = p->leaves[i];
+ if (ip4_fib_mtrie_leaf_is_next_ply (l))
+ ply_free (m, get_next_ply_for_leaf (m, l));
+ }
+
+ if (is_root)
+ ply_init (p, IP4_FIB_MTRIE_LEAF_EMPTY, /* prefix_len */ 0);
+ else
+ pool_put (m->ply_pool, p);
+}
+
+void
+ip4_fib_free (ip4_fib_mtrie_t * m)
+{
+ ip4_fib_mtrie_ply_t *root_ply = pool_elt_at_index (m->ply_pool, 0);
+ ply_free (m, root_ply);
+}
+
+u32
+ip4_mtrie_lookup_address (ip4_fib_mtrie_t * m, ip4_address_t dst)
+{
+ ip4_fib_mtrie_ply_t *p = pool_elt_at_index (m->ply_pool, 0);
+ ip4_fib_mtrie_leaf_t l;
+
+ l = p->leaves[dst.as_u8[0]];
+ if (ip4_fib_mtrie_leaf_is_terminal (l))
+ return ip4_fib_mtrie_leaf_get_adj_index (l);
+
+ p = get_next_ply_for_leaf (m, l);
+ l = p->leaves[dst.as_u8[1]];
+ if (ip4_fib_mtrie_leaf_is_terminal (l))
+ return ip4_fib_mtrie_leaf_get_adj_index (l);
+
+ p = get_next_ply_for_leaf (m, l);
+ l = p->leaves[dst.as_u8[2]];
+ if (ip4_fib_mtrie_leaf_is_terminal (l))
+ return ip4_fib_mtrie_leaf_get_adj_index (l);
+
+ p = get_next_ply_for_leaf (m, l);
+ l = p->leaves[dst.as_u8[3]];
+
+ ASSERT (ip4_fib_mtrie_leaf_is_terminal (l));
+ return ip4_fib_mtrie_leaf_get_adj_index (l);
+}
+
+typedef struct
+{
+ ip4_address_t dst_address;
+ u32 dst_address_length;
+ u32 adj_index;
+} ip4_fib_mtrie_set_unset_leaf_args_t;
+
+static void
+set_ply_with_more_specific_leaf (ip4_fib_mtrie_t * m,
+ ip4_fib_mtrie_ply_t * ply,
+ ip4_fib_mtrie_leaf_t new_leaf,
+ uword new_leaf_dst_address_bits)
+{
+ ip4_fib_mtrie_leaf_t old_leaf;
+ uword i;
+
+ ASSERT (ip4_fib_mtrie_leaf_is_terminal (new_leaf));
+ ASSERT (!ip4_fib_mtrie_leaf_is_empty (new_leaf));
+
+ for (i = 0; i < ARRAY_LEN (ply->leaves); i++)
+ {
+ old_leaf = ply->leaves[i];
+
+ /* Recurse into sub plies. */
+ if (!ip4_fib_mtrie_leaf_is_terminal (old_leaf))
+ {
+ ip4_fib_mtrie_ply_t *sub_ply = get_next_ply_for_leaf (m, old_leaf);
+ set_ply_with_more_specific_leaf (m, sub_ply, new_leaf,
+ new_leaf_dst_address_bits);
+ }
+
+ /* Replace less specific terminal leaves with new leaf. */
+ else if (new_leaf_dst_address_bits >=
+ ply->dst_address_bits_of_leaves[i])
+ {
+ __sync_val_compare_and_swap (&ply->leaves[i], old_leaf, new_leaf);
+ ASSERT (ply->leaves[i] == new_leaf);
+ ply->dst_address_bits_of_leaves[i] = new_leaf_dst_address_bits;
+ ply->n_non_empty_leafs += ip4_fib_mtrie_leaf_is_empty (old_leaf);
+ }
+ }
+}
+
+static void
+set_leaf (ip4_fib_mtrie_t * m,
+ ip4_fib_mtrie_set_unset_leaf_args_t * a,
+ u32 old_ply_index, u32 dst_address_byte_index)
+{
+ ip4_fib_mtrie_leaf_t old_leaf, new_leaf;
+ i32 n_dst_bits_next_plies;
+ u8 dst_byte;
+
+ ASSERT (a->dst_address_length > 0 && a->dst_address_length <= 32);
+ ASSERT (dst_address_byte_index < ARRAY_LEN (a->dst_address.as_u8));
+
+ n_dst_bits_next_plies =
+ a->dst_address_length - BITS (u8) * (dst_address_byte_index + 1);
+
+ dst_byte = a->dst_address.as_u8[dst_address_byte_index];
+
+ /* Number of bits next plies <= 0 => insert leaves this ply. */
+ if (n_dst_bits_next_plies <= 0)
+ {
+ uword i, n_dst_bits_this_ply, old_leaf_is_terminal;
+
+ n_dst_bits_this_ply = -n_dst_bits_next_plies;
+ ASSERT ((a->dst_address.as_u8[dst_address_byte_index] &
+ pow2_mask (n_dst_bits_this_ply)) == 0);
+
+ for (i = dst_byte; i < dst_byte + (1 << n_dst_bits_this_ply); i++)
+ {
+ ip4_fib_mtrie_ply_t *old_ply, *new_ply;
+
+ old_ply = pool_elt_at_index (m->ply_pool, old_ply_index);
+
+ old_leaf = old_ply->leaves[i];
+ old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf);
+
+ /* Is leaf to be inserted more specific? */
+ if (a->dst_address_length >= old_ply->dst_address_bits_of_leaves[i])
+ {
+ new_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index);
+
+ if (old_leaf_is_terminal)
+ {
+ old_ply->dst_address_bits_of_leaves[i] =
+ a->dst_address_length;
+ __sync_val_compare_and_swap (&old_ply->leaves[i], old_leaf,
+ new_leaf);
+ ASSERT (old_ply->leaves[i] == new_leaf);
+ old_ply->n_non_empty_leafs +=
+ ip4_fib_mtrie_leaf_is_empty (old_leaf);
+ ASSERT (old_ply->n_non_empty_leafs <=
+ ARRAY_LEN (old_ply->leaves));
+ }
+ else
+ {
+ /* Existing leaf points to another ply. We need to place new_leaf into all
+ more specific slots. */
+ new_ply = get_next_ply_for_leaf (m, old_leaf);
+ set_ply_with_more_specific_leaf (m, new_ply, new_leaf,
+ a->dst_address_length);
+ }
+ }
+
+ else if (!old_leaf_is_terminal)
+ {
+ new_ply = get_next_ply_for_leaf (m, old_leaf);
+ set_leaf (m, a, new_ply - m->ply_pool,
+ dst_address_byte_index + 1);
+ }
+ }
+ }
+ else
+ {
+ ip4_fib_mtrie_ply_t *old_ply, *new_ply;
+
+ old_ply = pool_elt_at_index (m->ply_pool, old_ply_index);
+ old_leaf = old_ply->leaves[dst_byte];
+ if (ip4_fib_mtrie_leaf_is_terminal (old_leaf))
+ {
+ new_leaf =
+ ply_create (m, old_leaf,
+ old_ply->dst_address_bits_of_leaves[dst_byte]);
+ new_ply = get_next_ply_for_leaf (m, new_leaf);
+
+ /* Refetch since ply_create may move pool. */
+ old_ply = pool_elt_at_index (m->ply_pool, old_ply_index);
+
+ __sync_val_compare_and_swap (&old_ply->leaves[dst_byte], old_leaf,
+ new_leaf);
+ ASSERT (old_ply->leaves[dst_byte] == new_leaf);
+ old_ply->dst_address_bits_of_leaves[dst_byte] = 0;
+
+ old_ply->n_non_empty_leafs -=
+ ip4_fib_mtrie_leaf_is_non_empty (old_leaf);
+ ASSERT (old_ply->n_non_empty_leafs >= 0);
+
+ /* Account for the ply we just created. */
+ old_ply->n_non_empty_leafs += 1;
+ }
+ else
+ new_ply = get_next_ply_for_leaf (m, old_leaf);
+
+ set_leaf (m, a, new_ply - m->ply_pool, dst_address_byte_index + 1);
+ }
+}
+
+static uword
+unset_leaf (ip4_fib_mtrie_t * m,
+ ip4_fib_mtrie_set_unset_leaf_args_t * a,
+ ip4_fib_mtrie_ply_t * old_ply, u32 dst_address_byte_index)
+{
+ ip4_fib_mtrie_leaf_t old_leaf, del_leaf;
+ i32 n_dst_bits_next_plies;
+ i32 i, n_dst_bits_this_ply, old_leaf_is_terminal;
+ u8 dst_byte;
+
+ ASSERT (a->dst_address_length > 0 && a->dst_address_length <= 32);
+ ASSERT (dst_address_byte_index < ARRAY_LEN (a->dst_address.as_u8));
+
+ n_dst_bits_next_plies =
+ a->dst_address_length - BITS (u8) * (dst_address_byte_index + 1);
+
+ dst_byte = a->dst_address.as_u8[dst_address_byte_index];
+ if (n_dst_bits_next_plies < 0)
+ dst_byte &= ~pow2_mask (-n_dst_bits_next_plies);
+
+ n_dst_bits_this_ply =
+ n_dst_bits_next_plies <= 0 ? -n_dst_bits_next_plies : 0;
+ n_dst_bits_this_ply = clib_min (8, n_dst_bits_this_ply);
+
+ del_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index);
+
+ for (i = dst_byte; i < dst_byte + (1 << n_dst_bits_this_ply); i++)
+ {
+ old_leaf = old_ply->leaves[i];
+ old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf);
+
+ if (old_leaf == del_leaf
+ || (!old_leaf_is_terminal
+ && unset_leaf (m, a, get_next_ply_for_leaf (m, old_leaf),
+ dst_address_byte_index + 1)))
+ {
+ old_ply->leaves[i] = IP4_FIB_MTRIE_LEAF_EMPTY;
+ old_ply->dst_address_bits_of_leaves[i] = 0;
+
+ /* No matter what we just deleted a non-empty leaf. */
+ ASSERT (!ip4_fib_mtrie_leaf_is_empty (old_leaf));
+ old_ply->n_non_empty_leafs -= 1;
+
+ ASSERT (old_ply->n_non_empty_leafs >= 0);
+ if (old_ply->n_non_empty_leafs == 0 && dst_address_byte_index > 0)
+ {
+ pool_put (m->ply_pool, old_ply);
+ /* Old ply was deleted. */
+ return 1;
+ }
+ }
+ }
+
+ /* Old ply was not deleted. */
+ return 0;
+}
+
+void
+ip4_mtrie_init (ip4_fib_mtrie_t * m)
+{
+ ip4_fib_mtrie_leaf_t root;
+ memset (m, 0, sizeof (m[0]));
+ m->default_leaf = IP4_FIB_MTRIE_LEAF_EMPTY;
+ root = ply_create (m, IP4_FIB_MTRIE_LEAF_EMPTY, /* dst_address_bits_of_leaves */
+ 0);
+ ASSERT (ip4_fib_mtrie_leaf_get_next_ply_index (root) == 0);
+}
+
+void
+ip4_fib_mtrie_add_del_route (ip4_fib_t * fib,
+ ip4_address_t dst_address,
+ u32 dst_address_length,
+ u32 adj_index, u32 is_del)
+{
+ ip4_fib_mtrie_t *m = &fib->mtrie;
+ ip4_fib_mtrie_ply_t *root_ply;
+ ip4_fib_mtrie_set_unset_leaf_args_t a;
+ ip4_main_t *im = &ip4_main;
+
+ ASSERT (m->ply_pool != 0);
+
+ root_ply = pool_elt_at_index (m->ply_pool, 0);
+
+ /* Honor dst_address_length. Fib masks are in network byte order */
+ dst_address.as_u32 &= im->fib_masks[dst_address_length];
+ a.dst_address = dst_address;
+ a.dst_address_length = dst_address_length;
+ a.adj_index = adj_index;
+
+ if (!is_del)
+ {
+ if (dst_address_length == 0)
+ m->default_leaf = ip4_fib_mtrie_leaf_set_adj_index (adj_index);
+ else
+ set_leaf (m, &a, /* ply_index */ 0, /* dst_address_byte_index */ 0);
+ }
+ else
+ {
+ if (dst_address_length == 0)
+ m->default_leaf = IP4_FIB_MTRIE_LEAF_EMPTY;
+
+ else
+ {
+ ip4_main_t *im = &ip4_main;
+ uword i;
+
+ unset_leaf (m, &a, root_ply, 0);
+
+ /* Find next less specific route and insert into mtrie. */
+ for (i = dst_address_length - 1; i >= 1; i--)
+ {
+ uword *p;
+ index_t lbi;
+ ip4_address_t key;
+
+ if (!fib->fib_entry_by_dst_address[i])
+ continue;
+
+ key.as_u32 = dst_address.as_u32 & im->fib_masks[i];
+ p = hash_get (fib->fib_entry_by_dst_address[i], key.as_u32);
+ if (p)
+ {
+ lbi = fib_entry_contribute_ip_forwarding (p[0])->dpoi_index;
+ if (INDEX_INVALID == lbi)
+ continue;
+
+ a.dst_address = key;
+ a.adj_index = lbi;
+ a.dst_address_length = i;
+
+ set_leaf (m, &a, /* ply_index */ 0,
+ /* dst_address_byte_index */ 0);
+ break;
+ }
+ }
+ }
+ }
+}
+
+/* Returns number of bytes of memory used by mtrie. */
+static uword
+mtrie_memory_usage (ip4_fib_mtrie_t * m, ip4_fib_mtrie_ply_t * p)
+{
+ uword bytes, i;
+
+ if (!p)
+ {
+ if (pool_is_free_index (m->ply_pool, 0))
+ return 0;
+ p = pool_elt_at_index (m->ply_pool, 0);
+ }
+
+ bytes = sizeof (p[0]);
+ for (i = 0; i < ARRAY_LEN (p->leaves); i++)
+ {
+ ip4_fib_mtrie_leaf_t l = p->leaves[i];
+ if (ip4_fib_mtrie_leaf_is_next_ply (l))
+ bytes += mtrie_memory_usage (m, get_next_ply_for_leaf (m, l));
+ }
+
+ return bytes;
+}
+
+static u8 *
+format_ip4_fib_mtrie_leaf (u8 * s, va_list * va)
+{
+ ip4_fib_mtrie_leaf_t l = va_arg (*va, ip4_fib_mtrie_leaf_t);
+
+ if (ip4_fib_mtrie_leaf_is_empty (l))
+ s = format (s, "miss");
+ else if (ip4_fib_mtrie_leaf_is_terminal (l))
+ s = format (s, "adj %d", ip4_fib_mtrie_leaf_get_adj_index (l));
+ else
+ s = format (s, "next ply %d", ip4_fib_mtrie_leaf_get_next_ply_index (l));
+ return s;
+}
+
+static u8 *
+format_ip4_fib_mtrie_ply (u8 * s, va_list * va)
+{
+ ip4_fib_mtrie_t *m = va_arg (*va, ip4_fib_mtrie_t *);
+ u32 base_address = va_arg (*va, u32);
+ u32 ply_index = va_arg (*va, u32);
+ u32 dst_address_byte_index = va_arg (*va, u32);
+ ip4_fib_mtrie_ply_t *p;
+ uword i, indent;
+
+ p = pool_elt_at_index (m->ply_pool, ply_index);
+ indent = format_get_indent (s);
+ s =
+ format (s, "ply index %d, %d non-empty leaves", ply_index,
+ p->n_non_empty_leafs);
+ for (i = 0; i < ARRAY_LEN (p->leaves); i++)
+ {
+ ip4_fib_mtrie_leaf_t l = p->leaves[i];
+
+ if (!ip4_fib_mtrie_leaf_is_empty (l))
+ {
+ u32 a, ia_length;
+ ip4_address_t ia;
+
+ a = base_address + (i << (24 - 8 * dst_address_byte_index));
+ ia.as_u32 = clib_host_to_net_u32 (a);
+ if (ip4_fib_mtrie_leaf_is_terminal (l))
+ ia_length = p->dst_address_bits_of_leaves[i];
+ else
+ ia_length = 8 * (1 + dst_address_byte_index);
+ s = format (s, "\n%U%20U %U",
+ format_white_space, indent + 2,
+ format_ip4_address_and_length, &ia, ia_length,
+ format_ip4_fib_mtrie_leaf, l);
+
+ if (ip4_fib_mtrie_leaf_is_next_ply (l))
+ s = format (s, "\n%U%U",
+ format_white_space, indent + 2,
+ format_ip4_fib_mtrie_ply, m, a,
+ ip4_fib_mtrie_leaf_get_next_ply_index (l),
+ dst_address_byte_index + 1);
+ }
+ }
+
+ return s;
+}
+
+u8 *
+format_ip4_fib_mtrie (u8 * s, va_list * va)
+{
+ ip4_fib_mtrie_t *m = va_arg (*va, ip4_fib_mtrie_t *);
+
+ s = format (s, "%d plies, memory usage %U",
+ pool_elts (m->ply_pool),
+ format_memory_size, mtrie_memory_usage (m, 0));
+
+ if (pool_elts (m->ply_pool) > 0)
+ {
+ ip4_address_t base_address;
+ base_address.as_u32 = 0;
+ s =
+ format (s, "\n %U", format_ip4_fib_mtrie_ply, m, base_address, 0, 0);
+ }
+
+ return s;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip4_mtrie.h b/src/vnet/ip/ip4_mtrie.h
new file mode 100644
index 00000000000..c0afc2cf842
--- /dev/null
+++ b/src/vnet/ip/ip4_mtrie.h
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip4_fib.h: ip4 mtrie fib
+ *
+ * Copyright (c) 2012 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_ip_ip4_fib_h
+#define included_ip_ip4_fib_h
+
+#include <vppinfra/cache.h>
+#include <vppinfra/vector.h>
+#include <vnet/ip/lookup.h>
+#include <vnet/ip/ip4_packet.h> /* for ip4_address_t */
+
+/* ip4 fib leafs: 4 ply 8-8-8-8 mtrie.
+ 1 + 2*adj_index for terminal leaves.
+ 0 + 2*next_ply_index for non-terminals.
+ 1 => empty (adjacency index of zero is special miss adjacency). */
+typedef u32 ip4_fib_mtrie_leaf_t;
+
+#define IP4_FIB_MTRIE_LEAF_EMPTY (1 + 2*0)
+#define IP4_FIB_MTRIE_LEAF_ROOT (0 + 2*0)
+
+always_inline u32
+ip4_fib_mtrie_leaf_is_empty (ip4_fib_mtrie_leaf_t n)
+{
+ return n == IP4_FIB_MTRIE_LEAF_EMPTY;
+}
+
+always_inline u32
+ip4_fib_mtrie_leaf_is_non_empty (ip4_fib_mtrie_leaf_t n)
+{
+ return n != IP4_FIB_MTRIE_LEAF_EMPTY;
+}
+
+always_inline u32
+ip4_fib_mtrie_leaf_is_terminal (ip4_fib_mtrie_leaf_t n)
+{
+ return n & 1;
+}
+
+always_inline u32
+ip4_fib_mtrie_leaf_get_adj_index (ip4_fib_mtrie_leaf_t n)
+{
+ ASSERT (ip4_fib_mtrie_leaf_is_terminal (n));
+ return n >> 1;
+}
+
+always_inline ip4_fib_mtrie_leaf_t
+ip4_fib_mtrie_leaf_set_adj_index (u32 adj_index)
+{
+ ip4_fib_mtrie_leaf_t l;
+ l = 1 + 2 * adj_index;
+ ASSERT (ip4_fib_mtrie_leaf_get_adj_index (l) == adj_index);
+ return l;
+}
+
+always_inline u32
+ip4_fib_mtrie_leaf_is_next_ply (ip4_fib_mtrie_leaf_t n)
+{
+ return (n & 1) == 0;
+}
+
+always_inline u32
+ip4_fib_mtrie_leaf_get_next_ply_index (ip4_fib_mtrie_leaf_t n)
+{
+ ASSERT (ip4_fib_mtrie_leaf_is_next_ply (n));
+ return n >> 1;
+}
+
+always_inline ip4_fib_mtrie_leaf_t
+ip4_fib_mtrie_leaf_set_next_ply_index (u32 i)
+{
+ ip4_fib_mtrie_leaf_t l;
+ l = 0 + 2 * i;
+ ASSERT (ip4_fib_mtrie_leaf_get_next_ply_index (l) == i);
+ return l;
+}
+
+/* One ply of the 4 ply mtrie fib. */
+typedef struct
+{
+ union
+ {
+ ip4_fib_mtrie_leaf_t leaves[256];
+
+#ifdef CLIB_HAVE_VEC128
+ u32x4 leaves_as_u32x4[256 / 4];
+#endif
+ };
+
+ /* Prefix length for terminal leaves. */
+ u8 dst_address_bits_of_leaves[256];
+
+ /* Number of non-empty leafs (whether terminal or not). */
+ i32 n_non_empty_leafs;
+
+ /* Pad to cache line boundary. */
+ u8 pad[CLIB_CACHE_LINE_BYTES - 1 * sizeof (i32)];
+}
+ip4_fib_mtrie_ply_t;
+
+STATIC_ASSERT (0 == sizeof (ip4_fib_mtrie_ply_t) % CLIB_CACHE_LINE_BYTES,
+ "IP4 Mtrie ply cache line");
+
+typedef struct
+{
+ /* Pool of plies. Index zero is root ply. */
+ ip4_fib_mtrie_ply_t *ply_pool;
+
+ /* Special case leaf for default route 0.0.0.0/0. */
+ ip4_fib_mtrie_leaf_t default_leaf;
+} ip4_fib_mtrie_t;
+
+void ip4_fib_mtrie_init (ip4_fib_mtrie_t * m);
+
+struct ip4_fib_t;
+
+void ip4_fib_mtrie_add_del_route (struct ip4_fib_t *f,
+ ip4_address_t dst_address,
+ u32 dst_address_length,
+ u32 adj_index, u32 is_del);
+
+/* Returns adjacency index. */
+u32 ip4_mtrie_lookup_address (ip4_fib_mtrie_t * m, ip4_address_t dst);
+
+format_function_t format_ip4_fib_mtrie;
+
+/* Lookup step. Processes 1 byte of 4 byte ip4 address. */
+always_inline ip4_fib_mtrie_leaf_t
+ip4_fib_mtrie_lookup_step (ip4_fib_mtrie_t * m,
+ ip4_fib_mtrie_leaf_t current_leaf,
+ const ip4_address_t * dst_address,
+ u32 dst_address_byte_index)
+{
+ ip4_fib_mtrie_leaf_t next_leaf;
+ ip4_fib_mtrie_ply_t *ply;
+ uword current_is_terminal = ip4_fib_mtrie_leaf_is_terminal (current_leaf);
+
+ ply = m->ply_pool + (current_is_terminal ? 0 : (current_leaf >> 1));
+ next_leaf = ply->leaves[dst_address->as_u8[dst_address_byte_index]];
+ next_leaf = current_is_terminal ? current_leaf : next_leaf;
+
+ return next_leaf;
+}
+
+#endif /* included_ip_ip4_fib_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip4_packet.h b/src/vnet/ip/ip4_packet.h
new file mode 100644
index 00000000000..8da788b411f
--- /dev/null
+++ b/src/vnet/ip/ip4_packet.h
@@ -0,0 +1,384 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip4/packet.h: ip4 packet format
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_ip4_packet_h
+#define included_ip4_packet_h
+
+#include <vnet/ip/ip_packet.h> /* for ip_csum_t */
+#include <vnet/ip/tcp_packet.h> /* for tcp_header_t */
+#include <vppinfra/byte_order.h> /* for clib_net_to_host_u16 */
+
+/* IP4 address which can be accessed either as 4 bytes
+ or as a 32-bit number. */
+typedef union
+{
+ u8 data[4];
+ u32 data_u32;
+ /* Aliases. */
+ u8 as_u8[4];
+ u32 as_u32;
+} ip4_address_t;
+
+typedef struct
+{
+ /* IP address must be first for ip_interface_address_get_address() to work */
+ ip4_address_t ip4_addr;
+ u32 fib_index;
+} ip4_address_fib_t;
+
+always_inline void
+ip4_addr_fib_init (ip4_address_fib_t * addr_fib, ip4_address_t * address,
+ u32 fib_index)
+{
+ clib_memcpy (&addr_fib->ip4_addr, address, sizeof (addr_fib->ip4_addr));
+ addr_fib->fib_index = fib_index;
+}
+
+/* (src,dst) pair of addresses as found in packet header. */
+typedef struct
+{
+ ip4_address_t src, dst;
+} ip4_address_pair_t;
+
+/* If address is a valid netmask, return length of mask. */
+always_inline uword
+ip4_address_netmask_length (ip4_address_t * a)
+{
+ uword result = 0;
+ uword i;
+ for (i = 0; i < ARRAY_LEN (a->as_u8); i++)
+ {
+ switch (a->as_u8[i])
+ {
+ case 0xff:
+ result += 8;
+ break;
+ case 0xfe:
+ result += 7;
+ goto done;
+ case 0xfc:
+ result += 6;
+ goto done;
+ case 0xf8:
+ result += 5;
+ goto done;
+ case 0xf0:
+ result += 4;
+ goto done;
+ case 0xe0:
+ result += 3;
+ goto done;
+ case 0xc0:
+ result += 2;
+ goto done;
+ case 0x80:
+ result += 1;
+ goto done;
+ case 0x00:
+ result += 0;
+ goto done;
+ default:
+ /* Not a valid netmask mask. */
+ return ~0;
+ }
+ }
+done:
+ return result;
+}
+
+typedef union
+{
+ struct
+ {
+ /* 4 bit packet length (in 32bit units) and version VVVVLLLL.
+ e.g. for packets w/ no options ip_version_and_header_length == 0x45. */
+ u8 ip_version_and_header_length;
+
+ /* Type of service. */
+ u8 tos;
+
+ /* Total layer 3 packet length including this header. */
+ u16 length;
+
+ /* Fragmentation ID. */
+ u16 fragment_id;
+
+ /* 3 bits of flags and 13 bits of fragment offset (in units
+ of 8 byte quantities). */
+ u16 flags_and_fragment_offset;
+#define IP4_HEADER_FLAG_MORE_FRAGMENTS (1 << 13)
+#define IP4_HEADER_FLAG_DONT_FRAGMENT (1 << 14)
+#define IP4_HEADER_FLAG_CONGESTION (1 << 15)
+
+ /* Time to live decremented by router at each hop. */
+ u8 ttl;
+
+ /* Next level protocol packet. */
+ u8 protocol;
+
+ /* Checksum. */
+ u16 checksum;
+
+ /* Source and destination address. */
+ union
+ {
+ struct
+ {
+ ip4_address_t src_address, dst_address;
+ };
+ ip4_address_pair_t address_pair;
+ };
+ };
+
+ /* For checksumming we'll want to access IP header in word sized chunks. */
+ /* For 64 bit machines. */
+ /* *INDENT-OFF* */
+ CLIB_PACKED (struct {
+ u64 checksum_data_64[2];
+ u32 checksum_data_64_32[1];
+ });
+ /* *INDENT-ON* */
+
+ /* For 32 bit machines. */
+ /* *INDENT-OFF* */
+ CLIB_PACKED (struct {
+ u32 checksum_data_32[5];
+ });
+ /* *INDENT-ON* */
+} ip4_header_t;
+
+/* Value of ip_version_and_header_length for packets w/o options. */
+#define IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS \
+ ((4 << 4) | (sizeof (ip4_header_t) / sizeof (u32)))
+
+always_inline int
+ip4_get_fragment_offset (ip4_header_t * i)
+{
+ return clib_net_to_host_u16 (i->flags_and_fragment_offset) & 0x1fff;
+}
+
+always_inline int
+ip4_get_fragment_more (ip4_header_t * i)
+{
+ return clib_net_to_host_u16 (i->flags_and_fragment_offset) &
+ IP4_HEADER_FLAG_MORE_FRAGMENTS;
+}
+
+always_inline int
+ip4_is_fragment (ip4_header_t * i)
+{
+ return (i->flags_and_fragment_offset &
+ clib_net_to_host_u16 (0x1fff | IP4_HEADER_FLAG_MORE_FRAGMENTS));
+}
+
+always_inline int
+ip4_is_first_fragment (ip4_header_t * i)
+{
+ return (i->flags_and_fragment_offset &
+ clib_net_to_host_u16 (0x1fff | IP4_HEADER_FLAG_MORE_FRAGMENTS)) ==
+ clib_net_to_host_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS);
+}
+
+/* Fragment offset in bytes. */
+always_inline int
+ip4_get_fragment_offset_bytes (ip4_header_t * i)
+{
+ return 8 * ip4_get_fragment_offset (i);
+}
+
+always_inline int
+ip4_header_bytes (ip4_header_t * i)
+{
+ return sizeof (u32) * (i->ip_version_and_header_length & 0xf);
+}
+
+always_inline void *
+ip4_next_header (ip4_header_t * i)
+{
+ return (void *) i + ip4_header_bytes (i);
+}
+
+always_inline u16
+ip4_header_checksum (ip4_header_t * i)
+{
+ u16 save, csum;
+ ip_csum_t sum;
+
+ save = i->checksum;
+ i->checksum = 0;
+ sum = ip_incremental_checksum (0, i, ip4_header_bytes (i));
+ csum = ~ip_csum_fold (sum);
+
+ i->checksum = save;
+
+ /* Make checksum agree for special case where either
+ 0 or 0xffff would give same 1s complement sum. */
+ if (csum == 0 && save == 0xffff)
+ csum = save;
+
+ return csum;
+}
+
+static inline uword
+ip4_header_checksum_is_valid (ip4_header_t * i)
+{
+ return i->checksum == ip4_header_checksum (i);
+}
+
+#define ip4_partial_header_checksum_x1(ip0,sum0) \
+do { \
+ if (BITS (ip_csum_t) > 32) \
+ { \
+ sum0 = ip0->checksum_data_64[0]; \
+ sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_64[1]); \
+ sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_64_32[0]); \
+ } \
+ else \
+ { \
+ sum0 = ip0->checksum_data_32[0]; \
+ sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_32[1]); \
+ sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_32[2]); \
+ sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_32[3]); \
+ sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_32[4]); \
+ } \
+} while (0)
+
+#define ip4_partial_header_checksum_x2(ip0,ip1,sum0,sum1) \
+do { \
+ if (BITS (ip_csum_t) > 32) \
+ { \
+ sum0 = ip0->checksum_data_64[0]; \
+ sum1 = ip1->checksum_data_64[0]; \
+ sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_64[1]); \
+ sum1 = ip_csum_with_carry (sum1, ip1->checksum_data_64[1]); \
+ sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_64_32[0]); \
+ sum1 = ip_csum_with_carry (sum1, ip1->checksum_data_64_32[0]); \
+ } \
+ else \
+ { \
+ sum0 = ip0->checksum_data_32[0]; \
+ sum1 = ip1->checksum_data_32[0]; \
+ sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_32[1]); \
+ sum1 = ip_csum_with_carry (sum1, ip1->checksum_data_32[1]); \
+ sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_32[2]); \
+ sum1 = ip_csum_with_carry (sum1, ip1->checksum_data_32[2]); \
+ sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_32[3]); \
+ sum1 = ip_csum_with_carry (sum1, ip1->checksum_data_32[3]); \
+ sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_32[4]); \
+ sum1 = ip_csum_with_carry (sum1, ip1->checksum_data_32[4]); \
+ } \
+} while (0)
+
+always_inline uword
+ip4_address_is_multicast (ip4_address_t * a)
+{
+ return (a->data[0] & 0xf0) == 0xe0;
+}
+
+always_inline void
+ip4_multicast_address_set_for_group (ip4_address_t * a,
+ ip_multicast_group_t g)
+{
+ ASSERT ((u32) g < (1 << 28));
+ a->as_u32 = clib_host_to_net_u32 ((0xe << 28) + g);
+}
+
+always_inline void
+ip4_multicast_ethernet_address (u8 * ethernet_address, ip4_address_t * a)
+{
+ u8 *d = a->as_u8;
+
+ ethernet_address[0] = 0x01;
+ ethernet_address[1] = 0x00;
+ ethernet_address[2] = 0x5e;
+ ethernet_address[3] = d[1] & 0x7f;
+ ethernet_address[4] = d[2];
+ ethernet_address[5] = d[3];
+}
+
+always_inline void
+ip4_tcp_reply_x1 (ip4_header_t * ip0, tcp_header_t * tcp0)
+{
+ u32 src0, dst0;
+
+ src0 = ip0->src_address.data_u32;
+ dst0 = ip0->dst_address.data_u32;
+ ip0->src_address.data_u32 = dst0;
+ ip0->dst_address.data_u32 = src0;
+
+ src0 = tcp0->ports.src;
+ dst0 = tcp0->ports.dst;
+ tcp0->ports.src = dst0;
+ tcp0->ports.dst = src0;
+}
+
+always_inline void
+ip4_tcp_reply_x2 (ip4_header_t * ip0, ip4_header_t * ip1,
+ tcp_header_t * tcp0, tcp_header_t * tcp1)
+{
+ u32 src0, dst0, src1, dst1;
+
+ src0 = ip0->src_address.data_u32;
+ src1 = ip1->src_address.data_u32;
+ dst0 = ip0->dst_address.data_u32;
+ dst1 = ip1->dst_address.data_u32;
+ ip0->src_address.data_u32 = dst0;
+ ip1->src_address.data_u32 = dst1;
+ ip0->dst_address.data_u32 = src0;
+ ip1->dst_address.data_u32 = src1;
+
+ src0 = tcp0->ports.src;
+ src1 = tcp1->ports.src;
+ dst0 = tcp0->ports.dst;
+ dst1 = tcp1->ports.dst;
+ tcp0->ports.src = dst0;
+ tcp1->ports.src = dst1;
+ tcp0->ports.dst = src0;
+ tcp1->ports.dst = src1;
+}
+
+#endif /* included_ip4_packet_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip4_pg.c b/src/vnet/ip/ip4_pg.c
new file mode 100644
index 00000000000..9697a3b9c89
--- /dev/null
+++ b/src/vnet/ip/ip4_pg.c
@@ -0,0 +1,387 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip4_pg: IP v4 packet-generator interface
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/pg/pg.h>
+
+#define IP4_PG_EDIT_CHECKSUM (1 << 0)
+#define IP4_PG_EDIT_LENGTH (1 << 1)
+
+static_always_inline void
+compute_length_and_or_checksum (vlib_main_t * vm,
+ u32 * packets,
+ u32 n_packets,
+ u32 ip_header_offset, u32 flags)
+{
+ ASSERT (flags != 0);
+
+ while (n_packets >= 2)
+ {
+ u32 pi0, pi1;
+ vlib_buffer_t *p0, *p1;
+ ip4_header_t *ip0, *ip1;
+ ip_csum_t sum0, sum1;
+
+ pi0 = packets[0];
+ pi1 = packets[1];
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+ n_packets -= 2;
+ packets += 2;
+
+ ip0 = (void *) (p0->data + ip_header_offset);
+ ip1 = (void *) (p1->data + ip_header_offset);
+
+ if (flags & IP4_PG_EDIT_LENGTH)
+ {
+ ip0->length =
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, p0) -
+ ip_header_offset);
+ ip1->length =
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, p1) -
+ ip_header_offset);
+ }
+
+ if (flags & IP4_PG_EDIT_CHECKSUM)
+ {
+ ASSERT (ip4_header_bytes (ip0) == sizeof (ip0[0]));
+ ASSERT (ip4_header_bytes (ip1) == sizeof (ip1[0]));
+
+ ip0->checksum = 0;
+ ip1->checksum = 0;
+
+ ip4_partial_header_checksum_x2 (ip0, ip1, sum0, sum1);
+ ip0->checksum = ~ip_csum_fold (sum0);
+ ip1->checksum = ~ip_csum_fold (sum1);
+
+ ASSERT (ip0->checksum == ip4_header_checksum (ip0));
+ ASSERT (ip1->checksum == ip4_header_checksum (ip1));
+ }
+ }
+
+ while (n_packets >= 1)
+ {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ ip4_header_t *ip0;
+ ip_csum_t sum0;
+
+ pi0 = packets[0];
+ p0 = vlib_get_buffer (vm, pi0);
+ n_packets -= 1;
+ packets += 1;
+
+ ip0 = (void *) (p0->data + ip_header_offset);
+
+ if (flags & IP4_PG_EDIT_LENGTH)
+ ip0->length =
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, p0) -
+ ip_header_offset);
+
+ if (flags & IP4_PG_EDIT_CHECKSUM)
+ {
+ ASSERT (ip4_header_bytes (ip0) == sizeof (ip0[0]));
+
+ ip0->checksum = 0;
+
+ ip4_partial_header_checksum_x1 (ip0, sum0);
+ ip0->checksum = ~ip_csum_fold (sum0);
+
+ ASSERT (ip0->checksum == ip4_header_checksum (ip0));
+ }
+ }
+}
+
+static void
+ip4_pg_edit_function (pg_main_t * pg,
+ pg_stream_t * s,
+ pg_edit_group_t * g, u32 * packets, u32 n_packets)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ u32 ip_offset;
+
+ ip_offset = g->start_byte_offset;
+
+ switch (g->edit_function_opaque)
+ {
+ case IP4_PG_EDIT_LENGTH:
+ compute_length_and_or_checksum (vm, packets, n_packets, ip_offset,
+ IP4_PG_EDIT_LENGTH);
+ break;
+
+ case IP4_PG_EDIT_CHECKSUM:
+ compute_length_and_or_checksum (vm, packets, n_packets, ip_offset,
+ IP4_PG_EDIT_CHECKSUM);
+ break;
+
+ case IP4_PG_EDIT_LENGTH | IP4_PG_EDIT_CHECKSUM:
+ compute_length_and_or_checksum (vm, packets, n_packets, ip_offset,
+ IP4_PG_EDIT_LENGTH
+ | IP4_PG_EDIT_CHECKSUM);
+ break;
+
+ default:
+ ASSERT (0);
+ break;
+ }
+}
+
+typedef struct
+{
+ pg_edit_t ip_version, header_length;
+ pg_edit_t tos;
+ pg_edit_t length;
+
+ pg_edit_t fragment_id, fragment_offset;
+
+ /* Flags together with fragment offset. */
+ pg_edit_t mf_flag, df_flag, ce_flag;
+
+ pg_edit_t ttl;
+
+ pg_edit_t protocol;
+
+ pg_edit_t checksum;
+
+ pg_edit_t src_address, dst_address;
+} pg_ip4_header_t;
+
+static inline void
+pg_ip4_header_init (pg_ip4_header_t * p)
+{
+ /* Initialize fields that are not bit fields in the IP header. */
+#define _(f) pg_edit_init (&p->f, ip4_header_t, f);
+ _(tos);
+ _(length);
+ _(fragment_id);
+ _(ttl);
+ _(protocol);
+ _(checksum);
+ _(src_address);
+ _(dst_address);
+#undef _
+
+ /* Initialize bit fields. */
+ pg_edit_init_bitfield (&p->header_length, ip4_header_t,
+ ip_version_and_header_length, 0, 4);
+ pg_edit_init_bitfield (&p->ip_version, ip4_header_t,
+ ip_version_and_header_length, 4, 4);
+
+ pg_edit_init_bitfield (&p->fragment_offset, ip4_header_t,
+ flags_and_fragment_offset, 0, 13);
+ pg_edit_init_bitfield (&p->mf_flag, ip4_header_t,
+ flags_and_fragment_offset, 13, 1);
+ pg_edit_init_bitfield (&p->df_flag, ip4_header_t,
+ flags_and_fragment_offset, 14, 1);
+ pg_edit_init_bitfield (&p->ce_flag, ip4_header_t,
+ flags_and_fragment_offset, 15, 1);
+}
+
+uword
+unformat_pg_ip4_header (unformat_input_t * input, va_list * args)
+{
+ pg_stream_t *s = va_arg (*args, pg_stream_t *);
+ pg_ip4_header_t *p;
+ u32 group_index;
+
+ p = pg_create_edit_group (s, sizeof (p[0]), sizeof (ip4_header_t),
+ &group_index);
+ pg_ip4_header_init (p);
+
+ /* Defaults. */
+ pg_edit_set_fixed (&p->ip_version, 4);
+ pg_edit_set_fixed (&p->header_length, sizeof (ip4_header_t) / sizeof (u32));
+
+ pg_edit_set_fixed (&p->tos, 0);
+ pg_edit_set_fixed (&p->ttl, 64);
+
+ pg_edit_set_fixed (&p->fragment_id, 0);
+ pg_edit_set_fixed (&p->fragment_offset, 0);
+ pg_edit_set_fixed (&p->mf_flag, 0);
+ pg_edit_set_fixed (&p->df_flag, 0);
+ pg_edit_set_fixed (&p->ce_flag, 0);
+
+ p->length.type = PG_EDIT_UNSPECIFIED;
+ p->checksum.type = PG_EDIT_UNSPECIFIED;
+
+ if (unformat (input, "%U: %U -> %U",
+ unformat_pg_edit,
+ unformat_ip_protocol, &p->protocol,
+ unformat_pg_edit,
+ unformat_ip4_address, &p->src_address,
+ unformat_pg_edit, unformat_ip4_address, &p->dst_address))
+ goto found;
+
+ if (!unformat (input, "%U:",
+ unformat_pg_edit, unformat_ip_protocol, &p->protocol))
+ goto error;
+
+found:
+ /* Parse options. */
+ while (1)
+ {
+ if (unformat (input, "version %U",
+ unformat_pg_edit, unformat_pg_number, &p->ip_version))
+ ;
+
+ else if (unformat (input, "header-length %U",
+ unformat_pg_edit,
+ unformat_pg_number, &p->header_length))
+ ;
+
+ else if (unformat (input, "tos %U",
+ unformat_pg_edit, unformat_pg_number, &p->tos))
+ ;
+
+ else if (unformat (input, "length %U",
+ unformat_pg_edit, unformat_pg_number, &p->length))
+ ;
+
+ else if (unformat (input, "checksum %U",
+ unformat_pg_edit, unformat_pg_number, &p->checksum))
+ ;
+
+ else if (unformat (input, "ttl %U",
+ unformat_pg_edit, unformat_pg_number, &p->ttl))
+ ;
+
+ else if (unformat (input, "fragment id %U offset %U",
+ unformat_pg_edit,
+ unformat_pg_number, &p->fragment_id,
+ unformat_pg_edit,
+ unformat_pg_number, &p->fragment_offset))
+ {
+ int i;
+ for (i = 0; i < ARRAY_LEN (p->fragment_offset.values); i++)
+ pg_edit_set_value (&p->fragment_offset, i,
+ pg_edit_get_value (&p->fragment_offset,
+ i) / 8);
+
+ }
+
+ /* Flags. */
+ else if (unformat (input, "mf") || unformat (input, "MF"))
+ pg_edit_set_fixed (&p->mf_flag, 1);
+
+ else if (unformat (input, "df") || unformat (input, "DF"))
+ pg_edit_set_fixed (&p->df_flag, 1);
+
+ else if (unformat (input, "ce") || unformat (input, "CE"))
+ pg_edit_set_fixed (&p->ce_flag, 1);
+
+ /* Can't parse input: try next protocol level. */
+ else
+ break;
+ }
+
+ {
+ ip_main_t *im = &ip_main;
+ ip_protocol_t protocol;
+ ip_protocol_info_t *pi;
+
+ pi = 0;
+ if (p->protocol.type == PG_EDIT_FIXED)
+ {
+ protocol = pg_edit_get_value (&p->protocol, PG_EDIT_LO);
+ pi = ip_get_protocol_info (im, protocol);
+ }
+
+ if (pi && pi->unformat_pg_edit
+ && unformat_user (input, pi->unformat_pg_edit, s))
+ ;
+
+ else if (!unformat_user (input, unformat_pg_payload, s))
+ goto error;
+
+ if (p->length.type == PG_EDIT_UNSPECIFIED
+ && s->min_packet_bytes == s->max_packet_bytes
+ && group_index + 1 < vec_len (s->edit_groups))
+ {
+ pg_edit_set_fixed (&p->length,
+ pg_edit_group_n_bytes (s, group_index));
+ }
+
+ /* Compute IP header checksum if all edits are fixed. */
+ if (p->checksum.type == PG_EDIT_UNSPECIFIED)
+ {
+ ip4_header_t fixed_header, fixed_mask, cmp_mask;
+
+ /* See if header is all fixed and specified except for
+ checksum field. */
+ memset (&cmp_mask, ~0, sizeof (cmp_mask));
+ cmp_mask.checksum = 0;
+
+ pg_edit_group_get_fixed_packet_data (s, group_index,
+ &fixed_header, &fixed_mask);
+ if (!memcmp (&fixed_mask, &cmp_mask, sizeof (cmp_mask)))
+ pg_edit_set_fixed (&p->checksum,
+ clib_net_to_host_u16 (ip4_header_checksum
+ (&fixed_header)));
+ }
+
+ p = pg_get_edit_group (s, group_index);
+ if (p->length.type == PG_EDIT_UNSPECIFIED
+ || p->checksum.type == PG_EDIT_UNSPECIFIED)
+ {
+ pg_edit_group_t *g = pg_stream_get_group (s, group_index);
+ g->edit_function = ip4_pg_edit_function;
+ g->edit_function_opaque = 0;
+ if (p->length.type == PG_EDIT_UNSPECIFIED)
+ g->edit_function_opaque |= IP4_PG_EDIT_LENGTH;
+ if (p->checksum.type == PG_EDIT_UNSPECIFIED)
+ g->edit_function_opaque |= IP4_PG_EDIT_CHECKSUM;
+ }
+
+ return 1;
+ }
+
+error:
+ /* Free up any edits we may have added. */
+ pg_free_edit_group (s);
+ return 0;
+}
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip4_source_and_port_range_check.c b/src/vnet/ip/ip4_source_and_port_range_check.c
new file mode 100644
index 00000000000..ae836a113a5
--- /dev/null
+++ b/src/vnet/ip/ip4_source_and_port_range_check.c
@@ -0,0 +1,1415 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/ip/ip.h>
+#include <vnet/ip/ip_source_and_port_range_check.h>
+#include <vnet/dpo/load_balance.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/ip4_fib.h>
+
+/**
+ * @file
+ * @brief IPv4 Source and Port Range Checking.
+ *
+ * This file contains the source code for IPv4 source and port range
+ * checking.
+ */
+
+
+/**
+ * @brief The pool of range chack DPOs
+ */
+static protocol_port_range_dpo_t *ppr_dpo_pool;
+
+/**
+ * @brief Dynamically registered DPO type
+ */
+static dpo_type_t ppr_dpo_type;
+
+vlib_node_registration_t ip4_source_port_and_range_check_rx;
+vlib_node_registration_t ip4_source_port_and_range_check_tx;
+
+#define foreach_ip4_source_and_port_range_check_error \
+ _(CHECK_FAIL, "ip4 source and port range check bad packets") \
+ _(CHECK_OK, "ip4 source and port range check good packets")
+
+typedef enum
+{
+#define _(sym,str) IP4_SOURCE_AND_PORT_RANGE_CHECK_ERROR_##sym,
+ foreach_ip4_source_and_port_range_check_error
+#undef _
+ IP4_SOURCE_AND_PORT_RANGE_CHECK_N_ERROR,
+} ip4_source_and_port_range_check_error_t;
+
+static char *ip4_source_and_port_range_check_error_strings[] = {
+#define _(sym,string) string,
+ foreach_ip4_source_and_port_range_check_error
+#undef _
+};
+
+typedef struct
+{
+ u32 pass;
+ u32 bypass;
+ u32 is_tcp;
+ ip4_address_t src_addr;
+ u16 port;
+ u32 fib_index;
+} ip4_source_and_port_range_check_trace_t;
+
+static u8 *
+format_ip4_source_and_port_range_check_trace (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ ip4_source_and_port_range_check_trace_t *t =
+ va_arg (*va, ip4_source_and_port_range_check_trace_t *);
+
+ if (t->bypass)
+ s = format (s, "PASS (bypass case)");
+ else
+ s = format (s, "fib %d src ip %U %s dst port %d: %s",
+ t->fib_index, format_ip4_address, &t->src_addr,
+ t->is_tcp ? "TCP" : "UDP", (u32) t->port,
+ (t->pass == 1) ? "PASS" : "FAIL");
+ return s;
+}
+
+typedef enum
+{
+ IP4_SOURCE_AND_PORT_RANGE_CHECK_NEXT_DROP,
+ IP4_SOURCE_AND_PORT_RANGE_CHECK_N_NEXT,
+} ip4_source_and_port_range_check_next_t;
+
+
+static inline u32
+check_adj_port_range_x1 (const protocol_port_range_dpo_t * ppr_dpo,
+ u16 dst_port, u32 next)
+{
+ u16x8vec_t key;
+ u16x8vec_t diff1;
+ u16x8vec_t diff2;
+ u16x8vec_t sum, sum_equal_diff2;
+ u16 sum_nonzero, sum_equal, winner_mask;
+ int i;
+
+ if (NULL == ppr_dpo || dst_port == 0)
+ return IP4_SOURCE_AND_PORT_RANGE_CHECK_NEXT_DROP;
+
+ /* Make the obvious screw-case work. A variant also works w/ no MMX */
+ if (PREDICT_FALSE (dst_port == 65535))
+ {
+ int j;
+
+ for (i = 0;
+ i < VLIB_BUFFER_PRE_DATA_SIZE / sizeof (protocol_port_range_t);
+ i++)
+ {
+ for (j = 0; j < 8; j++)
+ if (ppr_dpo->blocks[i].low.as_u16[j] == 65535)
+ return next;
+ }
+ return IP4_SOURCE_AND_PORT_RANGE_CHECK_NEXT_DROP;
+ }
+
+ key.as_u16x8 = u16x8_splat (dst_port);
+
+ for (i = 0; i < ppr_dpo->n_used_blocks; i++)
+ {
+ diff1.as_u16x8 =
+ u16x8_sub_saturate (ppr_dpo->blocks[i].low.as_u16x8, key.as_u16x8);
+ diff2.as_u16x8 =
+ u16x8_sub_saturate (ppr_dpo->blocks[i].hi.as_u16x8, key.as_u16x8);
+ sum.as_u16x8 = u16x8_add (diff1.as_u16x8, diff2.as_u16x8);
+ sum_equal_diff2.as_u16x8 =
+ u16x8_is_equal (sum.as_u16x8, diff2.as_u16x8);
+ sum_nonzero = ~u16x8_zero_byte_mask (sum.as_u16x8);
+ sum_equal = ~u16x8_zero_byte_mask (sum_equal_diff2.as_u16x8);
+ winner_mask = sum_nonzero & sum_equal;
+ if (winner_mask)
+ return next;
+ }
+ return IP4_SOURCE_AND_PORT_RANGE_CHECK_NEXT_DROP;
+}
+
+always_inline protocol_port_range_dpo_t *
+protocol_port_range_dpo_get (index_t index)
+{
+ return (pool_elt_at_index (ppr_dpo_pool, index));
+}
+
+always_inline uword
+ip4_source_and_port_range_check_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame, int is_tx)
+{
+ ip4_main_t *im = &ip4_main;
+ u32 n_left_from, *from, *to_next;
+ u32 next_index;
+ vlib_node_runtime_t *error_node = node;
+ u32 good_packets = 0;
+ int i;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+
+ /* while (n_left_from >= 4 && n_left_to_next >= 2) */
+ /* { */
+ /* vlib_buffer_t *b0, *b1; */
+ /* ip4_header_t *ip0, *ip1; */
+ /* ip4_fib_mtrie_t *mtrie0, *mtrie1; */
+ /* ip4_fib_mtrie_leaf_t leaf0, leaf1; */
+ /* ip_source_and_port_range_check_config_t *c0, *c1; */
+ /* ip_adjacency_t *adj0 = 0, *adj1 = 0; */
+ /* u32 bi0, next0, adj_index0, pass0, save_next0, fib_index0; */
+ /* u32 bi1, next1, adj_index1, pass1, save_next1, fib_index1; */
+ /* udp_header_t *udp0, *udp1; */
+
+ /* /\* Prefetch next iteration. *\/ */
+ /* { */
+ /* vlib_buffer_t *p2, *p3; */
+
+ /* p2 = vlib_get_buffer (vm, from[2]); */
+ /* p3 = vlib_get_buffer (vm, from[3]); */
+
+ /* vlib_prefetch_buffer_header (p2, LOAD); */
+ /* vlib_prefetch_buffer_header (p3, LOAD); */
+
+ /* CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD); */
+ /* CLIB_PREFETCH (p3->data, sizeof (ip1[0]), LOAD); */
+ /* } */
+
+ /* bi0 = to_next[0] = from[0]; */
+ /* bi1 = to_next[1] = from[1]; */
+ /* from += 2; */
+ /* to_next += 2; */
+ /* n_left_from -= 2; */
+ /* n_left_to_next -= 2; */
+
+ /* b0 = vlib_get_buffer (vm, bi0); */
+ /* b1 = vlib_get_buffer (vm, bi1); */
+
+ /* fib_index0 = */
+ /* vec_elt (im->fib_index_by_sw_if_index, */
+ /* vnet_buffer (b0)->sw_if_index[VLIB_RX]); */
+ /* fib_index1 = */
+ /* vec_elt (im->fib_index_by_sw_if_index, */
+ /* vnet_buffer (b1)->sw_if_index[VLIB_RX]); */
+
+ /* ip0 = vlib_buffer_get_current (b0); */
+ /* ip1 = vlib_buffer_get_current (b1); */
+
+ /* if (is_tx) */
+ /* { */
+ /* c0 = vnet_get_config_data (&tx_cm->config_main, */
+ /* &b0->current_config_index, */
+ /* &next0, sizeof (c0[0])); */
+ /* c1 = vnet_get_config_data (&tx_cm->config_main, */
+ /* &b1->current_config_index, */
+ /* &next1, sizeof (c1[0])); */
+ /* } */
+ /* else */
+ /* { */
+ /* c0 = vnet_get_config_data (&rx_cm->config_main, */
+ /* &b0->current_config_index, */
+ /* &next0, sizeof (c0[0])); */
+ /* c1 = vnet_get_config_data (&rx_cm->config_main, */
+ /* &b1->current_config_index, */
+ /* &next1, sizeof (c1[0])); */
+ /* } */
+
+ /* /\* we can't use the default VRF here... *\/ */
+ /* for (i = 0; i < IP_SOURCE_AND_PORT_RANGE_CHECK_N_PROTOCOLS; i++) */
+ /* { */
+ /* ASSERT (c0->fib_index[i] && c1->fib_index[i]); */
+ /* } */
+
+
+ /* if (is_tx) */
+ /* { */
+ /* if (ip0->protocol == IP_PROTOCOL_UDP) */
+ /* fib_index0 = */
+ /* c0->fib_index */
+ /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_IN]; */
+ /* if (ip0->protocol == IP_PROTOCOL_TCP) */
+ /* fib_index0 = */
+ /* c0->fib_index */
+ /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_IN]; */
+ /* } */
+ /* else */
+ /* { */
+ /* if (ip0->protocol == IP_PROTOCOL_UDP) */
+ /* fib_index0 = */
+ /* c0->fib_index */
+ /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_OUT]; */
+ /* if (ip0->protocol == IP_PROTOCOL_TCP) */
+ /* fib_index0 = */
+ /* c0->fib_index */
+ /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_OUT]; */
+ /* } */
+
+ /* if (PREDICT_TRUE (fib_index0 != ~0)) */
+ /* { */
+
+ /* mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie; */
+
+ /* leaf0 = IP4_FIB_MTRIE_LEAF_ROOT; */
+
+ /* leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, */
+ /* &ip0->src_address, 0); */
+
+ /* leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, */
+ /* &ip0->src_address, 1); */
+
+ /* leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, */
+ /* &ip0->src_address, 2); */
+
+ /* leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, */
+ /* &ip0->src_address, 3); */
+
+ /* adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); */
+
+ /* ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0, */
+ /* &ip0->src_address, */
+ /* 0 */
+ /* /\* use dflt rt *\/ */
+ /* )); */
+ /* adj0 = ip_get_adjacency (lm, adj_index0); */
+ /* } */
+
+ /* if (is_tx) */
+ /* { */
+ /* if (ip1->protocol == IP_PROTOCOL_UDP) */
+ /* fib_index1 = */
+ /* c1->fib_index */
+ /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_IN]; */
+ /* if (ip1->protocol == IP_PROTOCOL_TCP) */
+ /* fib_index1 = */
+ /* c1->fib_index */
+ /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_IN]; */
+ /* } */
+ /* else */
+ /* { */
+ /* if (ip1->protocol == IP_PROTOCOL_UDP) */
+ /* fib_index1 = */
+ /* c1->fib_index */
+ /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_OUT]; */
+ /* if (ip1->protocol == IP_PROTOCOL_TCP) */
+ /* fib_index1 = */
+ /* c1->fib_index */
+ /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_OUT]; */
+ /* } */
+
+ /* if (PREDICT_TRUE (fib_index1 != ~0)) */
+ /* { */
+
+ /* mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie; */
+
+ /* leaf1 = IP4_FIB_MTRIE_LEAF_ROOT; */
+
+ /* leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, */
+ /* &ip1->src_address, 0); */
+
+ /* leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, */
+ /* &ip1->src_address, 1); */
+
+ /* leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, */
+ /* &ip1->src_address, 2); */
+
+ /* leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, */
+ /* &ip1->src_address, 3); */
+
+ /* adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1); */
+
+ /* ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1, */
+ /* &ip1->src_address, */
+ /* 0)); */
+ /* adj1 = ip_get_adjacency (lm, adj_index1); */
+ /* } */
+
+ /* pass0 = 0; */
+ /* pass0 |= adj0 == 0; */
+ /* pass0 |= ip4_address_is_multicast (&ip0->src_address); */
+ /* pass0 |= */
+ /* ip0->src_address.as_u32 == clib_host_to_net_u32 (0xFFFFFFFF); */
+ /* pass0 |= (ip0->protocol != IP_PROTOCOL_UDP) */
+ /* && (ip0->protocol != IP_PROTOCOL_TCP); */
+
+ /* pass1 = 0; */
+ /* pass1 |= adj1 == 0; */
+ /* pass1 |= ip4_address_is_multicast (&ip1->src_address); */
+ /* pass1 |= */
+ /* ip1->src_address.as_u32 == clib_host_to_net_u32 (0xFFFFFFFF); */
+ /* pass1 |= (ip1->protocol != IP_PROTOCOL_UDP) */
+ /* && (ip1->protocol != IP_PROTOCOL_TCP); */
+
+ /* save_next0 = next0; */
+ /* udp0 = ip4_next_header (ip0); */
+ /* save_next1 = next1; */
+ /* udp1 = ip4_next_header (ip1); */
+
+ /* if (PREDICT_TRUE (pass0 == 0)) */
+ /* { */
+ /* good_packets++; */
+ /* next0 = check_adj_port_range_x1 */
+ /* (adj0, clib_net_to_host_u16 (udp0->dst_port), next0); */
+ /* good_packets -= (save_next0 != next0); */
+ /* b0->error = error_node->errors */
+ /* [IP4_SOURCE_AND_PORT_RANGE_CHECK_ERROR_CHECK_FAIL]; */
+ /* } */
+
+ /* if (PREDICT_TRUE (pass1 == 0)) */
+ /* { */
+ /* good_packets++; */
+ /* next1 = check_adj_port_range_x1 */
+ /* (adj1, clib_net_to_host_u16 (udp1->dst_port), next1); */
+ /* good_packets -= (save_next1 != next1); */
+ /* b1->error = error_node->errors */
+ /* [IP4_SOURCE_AND_PORT_RANGE_CHECK_ERROR_CHECK_FAIL]; */
+ /* } */
+
+ /* if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) */
+ /* && (b0->flags & VLIB_BUFFER_IS_TRACED))) */
+ /* { */
+ /* ip4_source_and_port_range_check_trace_t *t = */
+ /* vlib_add_trace (vm, node, b0, sizeof (*t)); */
+ /* t->pass = next0 == save_next0; */
+ /* t->bypass = pass0; */
+ /* t->fib_index = fib_index0; */
+ /* t->src_addr.as_u32 = ip0->src_address.as_u32; */
+ /* t->port = (pass0 == 0) ? */
+ /* clib_net_to_host_u16 (udp0->dst_port) : 0; */
+ /* t->is_tcp = ip0->protocol == IP_PROTOCOL_TCP; */
+ /* } */
+
+ /* if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) */
+ /* && (b1->flags & VLIB_BUFFER_IS_TRACED))) */
+ /* { */
+ /* ip4_source_and_port_range_check_trace_t *t = */
+ /* vlib_add_trace (vm, node, b1, sizeof (*t)); */
+ /* t->pass = next1 == save_next1; */
+ /* t->bypass = pass1; */
+ /* t->fib_index = fib_index1; */
+ /* t->src_addr.as_u32 = ip1->src_address.as_u32; */
+ /* t->port = (pass1 == 0) ? */
+ /* clib_net_to_host_u16 (udp1->dst_port) : 0; */
+ /* t->is_tcp = ip1->protocol == IP_PROTOCOL_TCP; */
+ /* } */
+
+ /* vlib_validate_buffer_enqueue_x2 (vm, node, next_index, */
+ /* to_next, n_left_to_next, */
+ /* bi0, bi1, next0, next1); */
+ /* } */
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *b0;
+ ip4_header_t *ip0;
+ ip_source_and_port_range_check_config_t *c0;
+ u32 bi0, next0, lb_index0, pass0, save_next0, fib_index0;
+ udp_header_t *udp0;
+ const protocol_port_range_dpo_t *ppr_dpo0 = NULL;
+ const dpo_id_t *dpo;
+ u32 sw_if_index0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+
+ fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
+
+ if (is_tx)
+ vlib_buffer_advance (b0, sizeof (ethernet_header_t));
+
+ ip0 = vlib_buffer_get_current (b0);
+
+ c0 = vnet_feature_next_with_data (sw_if_index0, &next0,
+ b0, sizeof (c0[0]));
+
+ /* we can't use the default VRF here... */
+ for (i = 0; i < IP_SOURCE_AND_PORT_RANGE_CHECK_N_PROTOCOLS; i++)
+ {
+ ASSERT (c0->fib_index[i]);
+ }
+
+
+ if (is_tx)
+ {
+ if (ip0->protocol == IP_PROTOCOL_UDP)
+ fib_index0 =
+ c0->fib_index
+ [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_IN];
+ if (ip0->protocol == IP_PROTOCOL_TCP)
+ fib_index0 =
+ c0->fib_index
+ [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_IN];
+ }
+ else
+ {
+ if (ip0->protocol == IP_PROTOCOL_UDP)
+ fib_index0 =
+ c0->fib_index
+ [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_OUT];
+ if (ip0->protocol == IP_PROTOCOL_TCP)
+ fib_index0 =
+ c0->fib_index
+ [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_OUT];
+ }
+
+ if (fib_index0 != ~0)
+ {
+ lb_index0 = ip4_fib_forwarding_lookup (fib_index0,
+ &ip0->src_address);
+
+ dpo =
+ load_balance_get_bucket_i (load_balance_get (lb_index0), 0);
+
+ if (ppr_dpo_type == dpo->dpoi_type)
+ {
+ ppr_dpo0 = protocol_port_range_dpo_get (dpo->dpoi_index);
+ }
+ /*
+ * else the lookup hit an enty that was no inserted
+ * by this range checker, which is the default route
+ */
+ }
+ /*
+ * $$$ which (src,dst) categories should we always pass?
+ */
+ pass0 = 0;
+ pass0 |= ip4_address_is_multicast (&ip0->src_address);
+ pass0 |=
+ ip0->src_address.as_u32 == clib_host_to_net_u32 (0xFFFFFFFF);
+ pass0 |= (ip0->protocol != IP_PROTOCOL_UDP)
+ && (ip0->protocol != IP_PROTOCOL_TCP);
+
+ save_next0 = next0;
+ udp0 = ip4_next_header (ip0);
+
+ if (PREDICT_TRUE (pass0 == 0))
+ {
+ good_packets++;
+ next0 = check_adj_port_range_x1
+ (ppr_dpo0, clib_net_to_host_u16 (udp0->dst_port), next0);
+ good_packets -= (save_next0 != next0);
+ b0->error = error_node->errors
+ [IP4_SOURCE_AND_PORT_RANGE_CHECK_ERROR_CHECK_FAIL];
+ }
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ ip4_source_and_port_range_check_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->pass = next0 == save_next0;
+ t->bypass = pass0;
+ t->fib_index = fib_index0;
+ t->src_addr.as_u32 = ip0->src_address.as_u32;
+ t->port = (pass0 == 0) ?
+ clib_net_to_host_u16 (udp0->dst_port) : 0;
+ t->is_tcp = ip0->protocol == IP_PROTOCOL_TCP;
+ }
+
+ if (is_tx)
+ vlib_buffer_advance (b0, -sizeof (ethernet_header_t));
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ if (is_tx)
+ vlib_node_increment_counter (vm, ip4_source_port_and_range_check_tx.index,
+ IP4_SOURCE_AND_PORT_RANGE_CHECK_ERROR_CHECK_OK,
+ good_packets);
+ else
+ vlib_node_increment_counter (vm, ip4_source_port_and_range_check_rx.index,
+ IP4_SOURCE_AND_PORT_RANGE_CHECK_ERROR_CHECK_OK,
+ good_packets);
+ return frame->n_vectors;
+}
+
+static uword
+ip4_source_and_port_range_check_rx (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return ip4_source_and_port_range_check_inline (vm, node, frame,
+ 0 /* !is_tx */ );
+}
+
+static uword
+ip4_source_and_port_range_check_tx (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return ip4_source_and_port_range_check_inline (vm, node, frame,
+ 1 /* is_tx */ );
+}
+
+/* Note: Calling same function for both RX and TX nodes
+ as always checking dst_port, although
+ if this changes can easily make new function
+*/
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip4_source_port_and_range_check_rx) = {
+ .function = ip4_source_and_port_range_check_rx,
+ .name = "ip4-source-and-port-range-check-rx",
+ .vector_size = sizeof (u32),
+
+ .n_errors = ARRAY_LEN(ip4_source_and_port_range_check_error_strings),
+ .error_strings = ip4_source_and_port_range_check_error_strings,
+
+ .n_next_nodes = IP4_SOURCE_AND_PORT_RANGE_CHECK_N_NEXT,
+ .next_nodes = {
+ [IP4_SOURCE_AND_PORT_RANGE_CHECK_NEXT_DROP] = "error-drop",
+ },
+
+ .format_buffer = format_ip4_header,
+ .format_trace = format_ip4_source_and_port_range_check_trace,
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip4_source_port_and_range_check_tx) = {
+ .function = ip4_source_and_port_range_check_tx,
+ .name = "ip4-source-and-port-range-check-tx",
+ .vector_size = sizeof (u32),
+
+ .n_errors = ARRAY_LEN(ip4_source_and_port_range_check_error_strings),
+ .error_strings = ip4_source_and_port_range_check_error_strings,
+
+ .n_next_nodes = IP4_SOURCE_AND_PORT_RANGE_CHECK_N_NEXT,
+ .next_nodes = {
+ [IP4_SOURCE_AND_PORT_RANGE_CHECK_NEXT_DROP] = "error-drop",
+ },
+
+ .format_buffer = format_ip4_header,
+ .format_trace = format_ip4_source_and_port_range_check_trace,
+};
+/* *INDENT-ON* */
+
+int
+set_ip_source_and_port_range_check (vlib_main_t * vm,
+ u32 * fib_index,
+ u32 sw_if_index, u32 is_add)
+{
+ ip_source_and_port_range_check_config_t config;
+ int rv = 0;
+ int i;
+
+ for (i = 0; i < IP_SOURCE_AND_PORT_RANGE_CHECK_N_PROTOCOLS; i++)
+ {
+ config.fib_index[i] = fib_index[i];
+ }
+
+ /* For OUT we are in the RX path */
+ if ((fib_index[IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_OUT] != ~0) ||
+ (fib_index[IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_OUT] != ~0))
+ {
+ vnet_feature_enable_disable ("ip4-unicast",
+ "ip4-source-and-port-range-check-rx",
+ sw_if_index, is_add, &config,
+ sizeof (config));
+ }
+
+ /* For IN we are in the TX path */
+ if ((fib_index[IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_IN] != ~0) ||
+ (fib_index[IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_IN] != ~0))
+ {
+ vnet_feature_enable_disable ("ip4-output",
+ "ip4-source-and-port-range-check-tx",
+ sw_if_index, is_add, &config,
+ sizeof (config));
+ }
+ return rv;
+}
+
+static clib_error_t *
+set_ip_source_and_port_range_check_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip4_main_t *im = &ip4_main;
+ clib_error_t *error = 0;
+ u8 is_add = 1;
+ u32 sw_if_index = ~0;
+ u32 vrf_id[IP_SOURCE_AND_PORT_RANGE_CHECK_N_PROTOCOLS];
+ u32 fib_index[IP_SOURCE_AND_PORT_RANGE_CHECK_N_PROTOCOLS];
+ int vrf_set = 0;
+ uword *p;
+ int rv = 0;
+ int i;
+
+ sw_if_index = ~0;
+ for (i = 0; i < IP_SOURCE_AND_PORT_RANGE_CHECK_N_PROTOCOLS; i++)
+ {
+ fib_index[i] = ~0;
+ vrf_id[i] = ~0;
+ }
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%U", unformat_vnet_sw_interface, vnm,
+ &sw_if_index))
+ ;
+ else
+ if (unformat
+ (input, "tcp-out-vrf %d",
+ &vrf_id[IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_OUT]))
+ vrf_set = 1;
+ else
+ if (unformat
+ (input, "udp-out-vrf %d",
+ &vrf_id[IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_OUT]))
+ vrf_set = 1;
+ else
+ if (unformat
+ (input, "tcp-in-vrf %d",
+ &vrf_id[IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_IN]))
+ vrf_set = 1;
+ else
+ if (unformat
+ (input, "udp-in-vrf %d",
+ &vrf_id[IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_IN]))
+ vrf_set = 1;
+ else if (unformat (input, "del"))
+ is_add = 0;
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0)
+ return clib_error_return (0, "Interface required but not specified");
+
+ if (!vrf_set)
+ return clib_error_return (0,
+ "TCP or UDP VRF ID required but not specified");
+
+ for (i = 0; i < IP_SOURCE_AND_PORT_RANGE_CHECK_N_PROTOCOLS; i++)
+ {
+
+ if (vrf_id[i] == 0)
+ return clib_error_return (0,
+ "TCP, UDP VRF ID should not be 0 (default). Should be distinct VRF for this purpose. ");
+
+ if (vrf_id[i] != ~0)
+ {
+ p = hash_get (im->fib_index_by_table_id, vrf_id[i]);
+
+ if (p == 0)
+ return clib_error_return (0, "Invalid VRF ID %d", vrf_id[i]);
+
+ fib_index[i] = p[0];
+ }
+ }
+ rv =
+ set_ip_source_and_port_range_check (vm, fib_index, sw_if_index, is_add);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ default:
+ return clib_error_return
+ (0,
+ "set source and port-range on interface returned an unexpected value: %d",
+ rv);
+ }
+ return error;
+}
+
+/*?
+ * Add the 'ip4-source-and-port-range-check-rx' or
+ * 'ip4-source-and-port-range-check-tx' graph node for a given
+ * interface. 'tcp-out-vrf' and 'udp-out-vrf' will add to
+ * the RX path. 'tcp-in-vrf' and 'udp-in-vrf' will add to
+ * the TX path. A graph node will be inserted into the chain when
+ * the range check is added to the first interface. It will not
+ * be removed from when range check is removed from the last
+ * interface.
+ *
+ * By adding the range check graph node to the interface, incoming
+ * or outgoing TCP/UDP packets will be validated using the
+ * provided IPv4 FIB table (VRF).
+ *
+ * @note 'ip4-source-and-port-range-check-rx' and
+ * 'ip4-source-and-port-range-check-tx' strings are too long, so
+ * they are truncated on the 'show vlib graph' output.
+ *
+ * @todo This content needs to be validated and potentially more detail added.
+ *
+ * @cliexpar
+ * @parblock
+ * Example of graph node before range checking is enabled:
+ * @cliexstart{show vlib graph ip4-source-and-port-range-check-tx}
+ * Name Next Previous
+ * ip4-source-and-port-range- error-drop [0]
+ * @cliexend
+ *
+ * Example of how to enable range checking on TX:
+ * @cliexcmd{set interface ip source-and-port-range-check GigabitEthernet2/0/0 udp-in-vrf 7}
+ *
+ * Example of graph node after range checking is enabled:
+ * @cliexstart{show vlib graph ip4-source-and-port-range-check-tx}
+ * Name Next Previous
+ * ip4-source-and-port-range- error-drop [0] ip4-rewrite
+ * interface-output [1]
+ * @cliexend
+ *
+ * Example of how to display the features enabed on an interface:
+ * @cliexstart{show ip interface features GigabitEthernet2/0/0}
+ * IP feature paths configured on GigabitEthernet2/0/0...
+ *
+ * ipv4 unicast:
+ * ip4-source-and-port-range-check-rx
+ * ip4-lookup
+ *
+ * ipv4 multicast:
+ * ip4-lookup-multicast
+ *
+ * ipv4 multicast:
+ * interface-output
+ *
+ * ipv6 unicast:
+ * ip6-lookup
+ *
+ * ipv6 multicast:
+ * ip6-lookup
+ *
+ * ipv6 multicast:
+ * interface-output
+ * @cliexend
+ * @endparblock
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_interface_ip_source_and_port_range_check_command, static) = {
+ .path = "set interface ip source-and-port-range-check",
+ .function = set_ip_source_and_port_range_check_fn,
+ .short_help = "set interface ip source-and-port-range-check <interface> [tcp-out-vrf <table-id>] [udp-out-vrf <table-id>] [tcp-in-vrf <table-id>] [udp-in-vrf <table-id>] [del]",
+};
+/* *INDENT-ON* */
+
+static u8 *
+format_ppr_dpo (u8 * s, va_list * args)
+{
+ index_t index = va_arg (*args, index_t);
+ CLIB_UNUSED (u32 indent) = va_arg (*args, u32);
+
+ protocol_port_range_dpo_t *ppr_dpo;
+ int i, j;
+ int printed = 0;
+
+ ppr_dpo = protocol_port_range_dpo_get (index);
+
+ s = format (s, "allow ");
+
+ for (i = 0; i < ppr_dpo->n_used_blocks; i++)
+ {
+ for (j = 0; j < 8; j++)
+ {
+ if (ppr_dpo->blocks[i].low.as_u16[j])
+ {
+ if (printed)
+ s = format (s, ", ");
+ if (ppr_dpo->blocks[i].hi.as_u16[j] >
+ (ppr_dpo->blocks[i].low.as_u16[j] + 1))
+ s =
+ format (s, "%d-%d", (u32) ppr_dpo->blocks[i].low.as_u16[j],
+ (u32) ppr_dpo->blocks[i].hi.as_u16[j] - 1);
+ else
+ s = format (s, "%d", ppr_dpo->blocks[i].low.as_u16[j]);
+ printed = 1;
+ }
+ }
+ }
+ return s;
+}
+
+static void
+ppr_dpo_lock (dpo_id_t * dpo)
+{
+}
+
+static void
+ppr_dpo_unlock (dpo_id_t * dpo)
+{
+}
+
+const static dpo_vft_t ppr_vft = {
+ .dv_lock = ppr_dpo_lock,
+ .dv_unlock = ppr_dpo_unlock,
+ .dv_format = format_ppr_dpo,
+};
+
+const static char *const ppr_ip4_nodes[] = {
+ "ip4-source-and-port-range-check-rx",
+ NULL,
+};
+
+const static char *const *const ppr_nodes[DPO_PROTO_NUM] = {
+ [DPO_PROTO_IP4] = ppr_ip4_nodes,
+};
+
+clib_error_t *
+ip4_source_and_port_range_check_init (vlib_main_t * vm)
+{
+ source_range_check_main_t *srm = &source_range_check_main;
+
+ srm->vlib_main = vm;
+ srm->vnet_main = vnet_get_main ();
+
+ ppr_dpo_type = dpo_register_new_type (&ppr_vft, ppr_nodes);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (ip4_source_and_port_range_check_init);
+
+protocol_port_range_dpo_t *
+protocol_port_range_dpo_alloc (void)
+{
+ protocol_port_range_dpo_t *ppr_dpo;
+
+ pool_get_aligned (ppr_dpo_pool, ppr_dpo, CLIB_CACHE_LINE_BYTES);
+ memset (ppr_dpo, 0, sizeof (*ppr_dpo));
+
+ ppr_dpo->n_free_ranges = N_PORT_RANGES_PER_DPO;
+
+ return (ppr_dpo);
+}
+
+
+static int
+add_port_range_adjacency (u32 fib_index,
+ ip4_address_t * address,
+ u32 length, u16 * low_ports, u16 * high_ports)
+{
+ protocol_port_range_dpo_t *ppr_dpo;
+ dpo_id_t dpop = DPO_INVALID;
+ int i, j, k;
+
+ fib_node_index_t fei;
+ fib_prefix_t pfx = {
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_len = length,
+ .fp_addr = {
+ .ip4 = *address,
+ },
+ };
+
+ /*
+ * check to see if we have already sourced this prefix
+ */
+ fei = fib_table_lookup_exact_match (fib_index, &pfx);
+
+ if (FIB_NODE_INDEX_INVALID == fei)
+ {
+ /*
+ * this is a first time add for this prefix.
+ */
+ ppr_dpo = protocol_port_range_dpo_alloc ();
+ }
+ else
+ {
+ /*
+ * the prefix is already there.
+ * check it was sourced by us, and if so get the ragne DPO from it.
+ */
+ dpo_id_t dpo = DPO_INVALID;
+ const dpo_id_t *bucket;
+
+ if (fib_entry_get_dpo_for_source (fei, FIB_SOURCE_SPECIAL, &dpo))
+ {
+ /*
+ * there is existing state. we'll want to add the new ranges to it
+ */
+ bucket =
+ load_balance_get_bucket_i (load_balance_get (dpo.dpoi_index), 0);
+ ppr_dpo = protocol_port_range_dpo_get (bucket->dpoi_index);
+ dpo_reset (&dpo);
+ }
+ else
+ {
+ /*
+ * there is no PPR state associated with this prefix,
+ * so we'll need a new DPO
+ */
+ ppr_dpo = protocol_port_range_dpo_alloc ();
+ }
+ }
+
+ if (vec_len (low_ports) > ppr_dpo->n_free_ranges)
+ return VNET_API_ERROR_EXCEEDED_NUMBER_OF_RANGES_CAPACITY;
+
+ j = k = 0;
+
+ for (i = 0; i < vec_len (low_ports); i++)
+ {
+ for (; j < N_BLOCKS_PER_DPO; j++)
+ {
+ for (; k < 8; k++)
+ {
+ if (ppr_dpo->blocks[j].low.as_u16[k] == 0)
+ {
+ ppr_dpo->blocks[j].low.as_u16[k] = low_ports[i];
+ ppr_dpo->blocks[j].hi.as_u16[k] = high_ports[i];
+ goto doublebreak;
+ }
+ }
+ }
+ doublebreak:;
+ }
+ ppr_dpo->n_used_blocks = j + 1;
+
+ /*
+ * add or update the entry in the FIB
+ */
+ dpo_set (&dpop, ppr_dpo_type, DPO_PROTO_IP4, (ppr_dpo - ppr_dpo_pool));
+
+ if (FIB_NODE_INDEX_INVALID == fei)
+ {
+ fib_table_entry_special_dpo_add (fib_index,
+ &pfx,
+ FIB_SOURCE_SPECIAL,
+ FIB_ENTRY_FLAG_NONE, &dpop);
+ }
+ else
+ {
+ fib_entry_special_update (fei,
+ FIB_SOURCE_SPECIAL,
+ FIB_ENTRY_FLAG_NONE, &dpop);
+ }
+
+ return 0;
+}
+
+static int
+remove_port_range_adjacency (u32 fib_index,
+ ip4_address_t * address,
+ u32 length, u16 * low_ports, u16 * high_ports)
+{
+ protocol_port_range_dpo_t *ppr_dpo;
+ fib_node_index_t fei;
+ int i, j, k;
+
+ fib_prefix_t pfx = {
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_len = length,
+ .fp_addr = {
+ .ip4 = *address,
+ },
+ };
+
+ /*
+ * check to see if we have sourced this prefix
+ */
+ fei = fib_table_lookup_exact_match (fib_index, &pfx);
+
+ if (FIB_NODE_INDEX_INVALID == fei)
+ {
+ /*
+ * not one of ours
+ */
+ return VNET_API_ERROR_INCORRECT_ADJACENCY_TYPE;
+ }
+ else
+ {
+ /*
+ * the prefix is already there.
+ * check it was sourced by us
+ */
+ dpo_id_t dpo = DPO_INVALID;
+ const dpo_id_t *bucket;
+
+ if (fib_entry_get_dpo_for_source (fei, FIB_SOURCE_SPECIAL, &dpo))
+ {
+ /*
+ * there is existing state. we'll want to add the new ranges to it
+ */
+ bucket =
+ load_balance_get_bucket_i (load_balance_get (dpo.dpoi_index), 0);
+ ppr_dpo = protocol_port_range_dpo_get (bucket->dpoi_index);
+ dpo_reset (&dpo);
+ }
+ else
+ {
+ /*
+ * not one of ours
+ */
+ return VNET_API_ERROR_INCORRECT_ADJACENCY_TYPE;
+ }
+ }
+
+ for (i = 0; i < vec_len (low_ports); i++)
+ {
+ for (j = 0; j < N_BLOCKS_PER_DPO; j++)
+ {
+ for (k = 0; k < 8; k++)
+ {
+ if (low_ports[i] == ppr_dpo->blocks[j].low.as_u16[k] &&
+ high_ports[i] == ppr_dpo->blocks[j].hi.as_u16[k])
+ {
+ ppr_dpo->blocks[j].low.as_u16[k] =
+ ppr_dpo->blocks[j].hi.as_u16[k] = 0;
+ goto doublebreak;
+ }
+ }
+ }
+ doublebreak:;
+ }
+
+ ppr_dpo->n_free_ranges = 0;
+
+ /* Have we deleted all ranges yet? */
+ for (i = 0; i < N_BLOCKS_PER_DPO; i++)
+ {
+ for (j = 0; j < 8; j++)
+ {
+ if (ppr_dpo->blocks[j].low.as_u16[i] == 0)
+ ppr_dpo->n_free_ranges++;
+ }
+ }
+
+ if (N_PORT_RANGES_PER_DPO == ppr_dpo->n_free_ranges)
+ {
+ /* Yes, lose the adjacency... */
+ fib_table_entry_special_remove (fib_index, &pfx, FIB_SOURCE_SPECIAL);
+ }
+ else
+ {
+ /*
+ * compact the ranges down to a contiguous block
+ */
+ // FIXME. TODO.
+ }
+
+ return 0;
+}
+
+// This will be moved to another file and implemented post API freeze.
+int
+ip6_source_and_port_range_check_add_del (ip6_address_t * address,
+ u32 length,
+ u32 vrf_id,
+ u16 * low_ports,
+ u16 * high_ports, int is_add)
+{
+ return 0;
+}
+
+int
+ip4_source_and_port_range_check_add_del (ip4_address_t * address,
+ u32 length,
+ u32 vrf_id,
+ u16 * low_ports,
+ u16 * high_ports, int is_add)
+{
+ u32 fib_index;
+
+ fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id);
+
+ if (is_add == 0)
+ {
+ remove_port_range_adjacency (fib_index, address, length,
+ low_ports, high_ports);
+ }
+ else
+ {
+ add_port_range_adjacency (fib_index, address, length,
+ low_ports, high_ports);
+ }
+
+ return 0;
+}
+
+static clib_error_t *
+ip_source_and_port_range_check_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u16 *low_ports = 0;
+ u16 *high_ports = 0;
+ u16 this_low;
+ u16 this_hi;
+ ip4_address_t ip4_addr;
+ ip6_address_t ip6_addr; //This function will be moved to generic impl when v6 done.
+ u32 length;
+ u32 tmp, tmp2;
+ u32 vrf_id = ~0;
+ int is_add = 1, ip_ver = ~0;
+ int rv;
+
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%U/%d", unformat_ip4_address, &ip4_addr, &length))
+ ip_ver = 4;
+ else
+ if (unformat
+ (input, "%U/%d", unformat_ip6_address, &ip6_addr, &length))
+ ip_ver = 6;
+ else if (unformat (input, "vrf %d", &vrf_id))
+ ;
+ else if (unformat (input, "del"))
+ is_add = 0;
+ else if (unformat (input, "port %d", &tmp))
+ {
+ if (tmp == 0 || tmp > 65535)
+ return clib_error_return (0, "port %d out of range", tmp);
+ this_low = tmp;
+ this_hi = this_low + 1;
+ vec_add1 (low_ports, this_low);
+ vec_add1 (high_ports, this_hi);
+ }
+ else if (unformat (input, "range %d - %d", &tmp, &tmp2))
+ {
+ if (tmp > tmp2)
+ return clib_error_return (0, "ports %d and %d out of order",
+ tmp, tmp2);
+ if (tmp == 0 || tmp > 65535)
+ return clib_error_return (0, "low port %d out of range", tmp);
+ if (tmp2 == 0 || tmp2 > 65535)
+ return clib_error_return (0, "high port %d out of range", tmp2);
+ this_low = tmp;
+ this_hi = tmp2 + 1;
+ vec_add1 (low_ports, this_low);
+ vec_add1 (high_ports, this_hi);
+ }
+ else
+ break;
+ }
+
+ if (ip_ver == ~0)
+ return clib_error_return (0, " <address>/<mask> not specified");
+
+ if (vrf_id == ~0)
+ return clib_error_return (0, " VRF ID required, not specified");
+
+ if (vec_len (low_ports) == 0)
+ return clib_error_return (0,
+ " Both VRF ID and range/port must be set for a protocol.");
+
+ if (vrf_id == 0)
+ return clib_error_return (0, " VRF ID can not be 0 (default).");
+
+
+ if (ip_ver == 4)
+ rv = ip4_source_and_port_range_check_add_del
+ (&ip4_addr, length, vrf_id, low_ports, high_ports, is_add);
+ else
+ return clib_error_return (0, " IPv6 in subsequent patch");
+
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ case VNET_API_ERROR_INCORRECT_ADJACENCY_TYPE:
+ return clib_error_return
+ (0, " Incorrect adjacency for add/del operation");
+
+ case VNET_API_ERROR_EXCEEDED_NUMBER_OF_PORTS_CAPACITY:
+ return clib_error_return (0, " Too many ports in add/del operation");
+
+ case VNET_API_ERROR_EXCEEDED_NUMBER_OF_RANGES_CAPACITY:
+ return clib_error_return
+ (0, " Too many ranges requested for add operation");
+
+ default:
+ return clib_error_return (0, " returned an unexpected value: %d", rv);
+ }
+
+ return 0;
+}
+
+/*?
+ * This command adds an IP Subnet and range of ports to be validated
+ * by an IP FIB table (VRF).
+ *
+ * @todo This is incomplete. This needs a detailed description and a
+ * practical example.
+ *
+ * @cliexpar
+ * Example of how to add an IPv4 subnet and single port to an IPv4 FIB table:
+ * @cliexcmd{set ip source-and-port-range-check vrf 7 172.16.1.0/24 port 23}
+ * Example of how to add an IPv4 subnet and range of ports to an IPv4 FIB table:
+ * @cliexcmd{set ip source-and-port-range-check vrf 7 172.16.1.0/24 range 23 - 100}
+ * Example of how to delete an IPv4 subnet and single port from an IPv4 FIB table:
+ * @cliexcmd{set ip source-and-port-range-check vrf 7 172.16.1.0/24 port 23 del}
+ * Example of how to delete an IPv4 subnet and range of ports from an IPv4 FIB table:
+ * @cliexcmd{set ip source-and-port-range-check vrf 7 172.16.1.0/24 range 23 - 100 del}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ip_source_and_port_range_check_command, static) = {
+ .path = "set ip source-and-port-range-check",
+ .function = ip_source_and_port_range_check_command_fn,
+ .short_help =
+ "set ip source-and-port-range-check vrf <table-id> <ip-addr>/<mask> {port nn | range <nn> - <nn>} [del]",
+};
+/* *INDENT-ON* */
+
+
+static clib_error_t *
+show_source_and_port_range_check_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ protocol_port_range_dpo_t *ppr_dpo;
+ u32 fib_index;
+ u8 addr_set = 0;
+ u32 vrf_id = ~0;
+ int rv, i, j;
+ u32 port = 0;
+ fib_prefix_t pfx = {
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_len = 32,
+ };
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%U", unformat_ip4_address, &pfx.fp_addr.ip4))
+ addr_set = 1;
+ else if (unformat (input, "vrf %d", &vrf_id))
+ ;
+ else if (unformat (input, "port %d", &port))
+ ;
+ else
+ break;
+ }
+
+ if (addr_set == 0)
+ return clib_error_return (0, "<address> not specified");
+
+ if (vrf_id == ~0)
+ return clib_error_return (0, "VRF ID required, not specified");
+
+ fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
+ if (~0 == fib_index)
+ return clib_error_return (0, "VRF %d not found", vrf_id);
+
+ /*
+ * find the longest prefix match on the address requested,
+ * check it was sourced by us
+ */
+ dpo_id_t dpo = DPO_INVALID;
+ const dpo_id_t *bucket;
+
+ if (!fib_entry_get_dpo_for_source (fib_table_lookup (fib_index, &pfx),
+ FIB_SOURCE_SPECIAL, &dpo))
+ {
+ /*
+ * not one of ours
+ */
+ vlib_cli_output (vm, "%U: src address drop", format_ip4_address,
+ &pfx.fp_addr.ip4);
+ return 0;
+ }
+
+ bucket = load_balance_get_bucket_i (load_balance_get (dpo.dpoi_index), 0);
+ ppr_dpo = protocol_port_range_dpo_get (bucket->dpoi_index);
+ dpo_reset (&dpo);
+
+ if (port)
+ {
+ rv = check_adj_port_range_x1 (ppr_dpo, (u16) port, 1234);
+ if (rv == 1234)
+ vlib_cli_output (vm, "%U port %d PASS", format_ip4_address,
+ &pfx.fp_addr.ip4, port);
+ else
+ vlib_cli_output (vm, "%U port %d FAIL", format_ip4_address,
+ &pfx.fp_addr.ip4, port);
+ return 0;
+ }
+ else
+ {
+ u8 *s;
+
+ s = format (0, "%U: ", format_ip4_address, &pfx.fp_addr.ip4);
+
+ for (i = 0; i < N_BLOCKS_PER_DPO; i++)
+ {
+ for (j = 0; j < 8; j++)
+ {
+ if (ppr_dpo->blocks[i].low.as_u16[j])
+ s = format (s, "%d - %d ",
+ (u32) ppr_dpo->blocks[i].low.as_u16[j],
+ (u32) ppr_dpo->blocks[i].hi.as_u16[j]);
+ }
+ }
+ vlib_cli_output (vm, "%s", s);
+ vec_free (s);
+ }
+
+ return 0;
+}
+
+/*?
+ * Display the range of ports being validated by an IPv4 FIB for a given
+ * IP or subnet, or test if a given IP and port are being validated.
+ *
+ * @todo This is incomplete. This needs a detailed description and a
+ * practical example.
+ *
+ * @cliexpar
+ * Example of how to display the set of ports being validated for a given
+ * IPv4 subnet:
+ * @cliexstart{show ip source-and-port-range-check vrf 7 172.16.2.0}
+ * 172.16.2.0: 23 - 101
+ * @cliexend
+ * Example of how to test to determine of a given Pv4 address and port
+ * are being validated:
+ * @cliexstart{show ip source-and-port-range-check vrf 7 172.16.2.2 port 23}
+ * 172.16.2.2 port 23 PASS
+ * @cliexend
+ * @cliexstart{show ip source-and-port-range-check vrf 7 172.16.2.2 port 250}
+ * 172.16.2.2 port 250 FAIL
+ * @cliexend
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_source_and_port_range_check, static) = {
+ .path = "show ip source-and-port-range-check",
+ .function = show_source_and_port_range_check_fn,
+ .short_help =
+ "show ip source-and-port-range-check vrf <table-id> <ip-addr> [port <n>]",
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip4_source_check.c b/src/vnet/ip/ip4_source_check.c
new file mode 100644
index 00000000000..d461cc885d6
--- /dev/null
+++ b/src/vnet/ip/ip4_source_check.c
@@ -0,0 +1,573 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip4_source_check.c: IP v4 check source address (unicast RPF check)
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/fib/fib_urpf_list.h>
+#include <vnet/dpo/load_balance.h>
+
+/**
+ * @file
+ * @brief IPv4 Unicast Source Check.
+ *
+ * This file contains the IPv4 interface unicast source check.
+ */
+
+
+typedef struct
+{
+ u8 packet_data[64];
+ index_t urpf;
+} ip4_source_check_trace_t;
+
+static u8 *
+format_ip4_source_check_trace (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ ip4_source_check_trace_t *t = va_arg (*va, ip4_source_check_trace_t *);
+
+ s = format (s, "%U",
+ format_ip4_header, t->packet_data, sizeof (t->packet_data));
+
+ return s;
+}
+
+typedef enum
+{
+ IP4_SOURCE_CHECK_NEXT_DROP,
+ IP4_SOURCE_CHECK_N_NEXT,
+} ip4_source_check_next_t;
+
+typedef enum
+{
+ IP4_SOURCE_CHECK_REACHABLE_VIA_RX,
+ IP4_SOURCE_CHECK_REACHABLE_VIA_ANY,
+} ip4_source_check_type_t;
+
+typedef union
+{
+ u32 fib_index;
+} ip4_source_check_config_t;
+
+always_inline uword
+ip4_source_check_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ ip4_source_check_type_t source_check_type)
+{
+ u32 n_left_from, *from, *to_next;
+ u32 next_index;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip4_input_node.index);
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1,
+ sizeof (ip4_source_check_trace_t));
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ vlib_buffer_t *p0, *p1;
+ ip4_header_t *ip0, *ip1;
+ ip4_fib_mtrie_t *mtrie0, *mtrie1;
+ ip4_fib_mtrie_leaf_t leaf0, leaf1;
+ ip4_source_check_config_t *c0, *c1;
+ const load_balance_t *lb0, *lb1;
+ u32 pi0, next0, pass0, lb_index0;
+ u32 pi1, next1, pass1, lb_index1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
+ CLIB_PREFETCH (p3->data, sizeof (ip1[0]), LOAD);
+ }
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+
+ c0 =
+ vnet_feature_next_with_data (vnet_buffer (p0)->sw_if_index
+ [VLIB_RX], &next0, p0,
+ sizeof (c0[0]));
+ c1 =
+ vnet_feature_next_with_data (vnet_buffer (p1)->sw_if_index
+ [VLIB_RX], &next1, p1,
+ sizeof (c1[0]));
+
+ mtrie0 = &ip4_fib_get (c0->fib_index)->mtrie;
+ mtrie1 = &ip4_fib_get (c1->fib_index)->mtrie;
+
+ leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
+
+ leaf0 =
+ ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
+ leaf1 =
+ ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
+
+ leaf0 =
+ ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
+ leaf1 =
+ ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
+
+ leaf0 =
+ ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
+ leaf1 =
+ ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
+
+ leaf0 =
+ ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
+ leaf1 =
+ ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
+
+ lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
+ lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
+
+ lb0 = load_balance_get (lb_index0);
+ lb1 = load_balance_get (lb_index1);
+
+ /* Pass multicast. */
+ pass0 = ip4_address_is_multicast (&ip0->src_address)
+ || ip0->src_address.as_u32 == clib_host_to_net_u32 (0xFFFFFFFF);
+ pass1 = ip4_address_is_multicast (&ip1->src_address)
+ || ip1->src_address.as_u32 == clib_host_to_net_u32 (0xFFFFFFFF);
+
+ if (IP4_SOURCE_CHECK_REACHABLE_VIA_RX == source_check_type)
+ {
+ pass0 |= fib_urpf_check (lb0->lb_urpf,
+ vnet_buffer (p0)->sw_if_index
+ [VLIB_RX]);
+ pass1 |=
+ fib_urpf_check (lb1->lb_urpf,
+ vnet_buffer (p1)->sw_if_index[VLIB_RX]);
+ }
+ else
+ {
+ pass0 |= fib_urpf_check_size (lb0->lb_urpf);
+ pass1 |= fib_urpf_check_size (lb1->lb_urpf);
+ }
+ next0 = (pass0 ? next0 : IP4_SOURCE_CHECK_NEXT_DROP);
+ next1 = (pass1 ? next1 : IP4_SOURCE_CHECK_NEXT_DROP);
+
+ p0->error =
+ error_node->errors[IP4_ERROR_UNICAST_SOURCE_CHECK_FAILS];
+ p1->error =
+ error_node->errors[IP4_ERROR_UNICAST_SOURCE_CHECK_FAILS];
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, pi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *p0;
+ ip4_header_t *ip0;
+ ip4_fib_mtrie_t *mtrie0;
+ ip4_fib_mtrie_leaf_t leaf0;
+ ip4_source_check_config_t *c0;
+ u32 pi0, next0, pass0, lb_index0;
+ const load_balance_t *lb0;
+
+ pi0 = from[0];
+ to_next[0] = pi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ ip0 = vlib_buffer_get_current (p0);
+
+ c0 =
+ vnet_feature_next_with_data (vnet_buffer (p0)->sw_if_index
+ [VLIB_RX], &next0, p0,
+ sizeof (c0[0]));
+
+ mtrie0 = &ip4_fib_get (c0->fib_index)->mtrie;
+
+ leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
+
+ leaf0 =
+ ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
+
+ leaf0 =
+ ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
+
+ leaf0 =
+ ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
+
+ leaf0 =
+ ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
+
+ lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
+
+ lb0 = load_balance_get (lb_index0);
+
+ /* Pass multicast. */
+ pass0 = ip4_address_is_multicast (&ip0->src_address)
+ || ip0->src_address.as_u32 == clib_host_to_net_u32 (0xFFFFFFFF);
+
+ if (IP4_SOURCE_CHECK_REACHABLE_VIA_RX == source_check_type)
+ {
+ pass0 |= fib_urpf_check (lb0->lb_urpf,
+ vnet_buffer (p0)->sw_if_index
+ [VLIB_RX]);
+ }
+ else
+ {
+ pass0 |= fib_urpf_check_size (lb0->lb_urpf);
+ }
+
+ next0 = (pass0 ? next0 : IP4_SOURCE_CHECK_NEXT_DROP);
+ p0->error =
+ error_node->errors[IP4_ERROR_UNICAST_SOURCE_CHECK_FAILS];
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+static uword
+ip4_source_check_reachable_via_any (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return ip4_source_check_inline (vm, node, frame,
+ IP4_SOURCE_CHECK_REACHABLE_VIA_ANY);
+}
+
+static uword
+ip4_source_check_reachable_via_rx (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return ip4_source_check_inline (vm, node, frame,
+ IP4_SOURCE_CHECK_REACHABLE_VIA_RX);
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip4_check_source_reachable_via_any) = {
+ .function = ip4_source_check_reachable_via_any,
+ .name = "ip4-source-check-via-any",
+ .vector_size = sizeof (u32),
+
+ .n_next_nodes = IP4_SOURCE_CHECK_N_NEXT,
+ .next_nodes = {
+ [IP4_SOURCE_CHECK_NEXT_DROP] = "error-drop",
+ },
+
+ .format_buffer = format_ip4_header,
+ .format_trace = format_ip4_source_check_trace,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_check_source_reachable_via_any,
+ ip4_source_check_reachable_via_any);
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip4_check_source_reachable_via_rx) = {
+ .function = ip4_source_check_reachable_via_rx,
+ .name = "ip4-source-check-via-rx",
+ .vector_size = sizeof (u32),
+
+ .n_next_nodes = IP4_SOURCE_CHECK_N_NEXT,
+ .next_nodes = {
+ [IP4_SOURCE_CHECK_NEXT_DROP] = "error-drop",
+ },
+
+ .format_buffer = format_ip4_header,
+ .format_trace = format_ip4_source_check_trace,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_check_source_reachable_via_rx,
+ ip4_source_check_reachable_via_rx);
+
+static clib_error_t *
+set_ip_source_check (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ vnet_main_t *vnm = vnet_get_main ();
+ ip4_main_t *im = &ip4_main;
+ clib_error_t *error = 0;
+ u32 sw_if_index, is_del;
+ ip4_source_check_config_t config;
+ char *feature_name = "ip4-source-check-via-rx";
+
+ sw_if_index = ~0;
+ is_del = 0;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat_user
+ (line_input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ ;
+ else if (unformat (line_input, "del"))
+ is_del = 1;
+ else if (unformat (line_input, "loose"))
+ feature_name = "ip4-source-check-via-any";
+ else
+ {
+ error = unformat_parse_error (line_input);
+ goto done;
+ }
+ }
+
+ if (~0 == sw_if_index)
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+
+ config.fib_index = im->fib_index_by_sw_if_index[sw_if_index];
+ vnet_feature_enable_disable ("ip4-unicast", feature_name, sw_if_index,
+ is_del == 0, &config, sizeof (config));
+done:
+ return error;
+}
+
+/*?
+ * This command adds the 'ip4-source-check-via-rx' graph node for
+ * a given interface. By adding the IPv4 source check graph node to
+ * an interface, the code verifies that the source address of incoming
+ * unicast packets are reachable over the incoming interface. Two flavours
+ * are supported (the default is strict):
+ * - loose: accept ingress packet if there is a route to reach the source
+ * - strict: accept ingress packet if it arrived on an interface which
+ * the route to the source uses. i.e. an interface that the source
+ * is reachable via.
+ *
+ * @cliexpar
+ * @parblock
+ * Example of graph node before range checking is enabled:
+ * @cliexstart{show vlib graph ip4-source-check-via-rx}
+ * Name Next Previous
+ * ip4-source-check-via-rx error-drop [0]
+ * @cliexend
+ *
+ * Example of how to enable unicast source checking on an interface:
+ * @cliexcmd{set interface ip source-check GigabitEthernet2/0/0 loose}
+ *
+ * Example of graph node after range checking is enabled:
+ * @cliexstart{show vlib graph ip4-source-check-via-rx}
+ * Name Next Previous
+ * ip4-source-check-via-rx error-drop [0] ip4-input-no-checksum
+ * ip4-source-and-port-range- ip4-input
+ * @cliexend
+ *
+ * Example of how to display the feature enabed on an interface:
+ * @cliexstart{show ip interface features GigabitEthernet2/0/0}
+ * IP feature paths configured on GigabitEthernet2/0/0...
+ *
+ * ipv4 unicast:
+ * ip4-source-check-via-rx
+ * ip4-lookup
+ *
+ * ipv4 multicast:
+ * ip4-lookup-multicast
+ *
+ * ipv4 multicast:
+ * interface-output
+ *
+ * ipv6 unicast:
+ * ip6-lookup
+ *
+ * ipv6 multicast:
+ * ip6-lookup
+ *
+ * ipv6 multicast:
+ * interface-output
+ * @cliexend
+ *
+ * Example of how to disable unicast source checking on an interface:
+ * @cliexcmd{set interface ip source-check GigabitEthernet2/0/0 del}
+ * @endparblock
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_interface_ip_source_check_command, static) = {
+ .path = "set interface ip source-check",
+ .function = set_ip_source_check,
+ .short_help = "set interface ip source-check <interface> [strict|loose] [del]",
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+ip_source_check_accept (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ fib_prefix_t pfx = {
+ .fp_proto = FIB_PROTOCOL_IP4,
+ };
+ clib_error_t *error = NULL;
+ u32 table_id, is_add, fib_index;
+
+ is_add = 1;
+ table_id = ~0;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "table %d", &table_id))
+ ;
+ else if (unformat (line_input, "del"))
+ is_add = 0;
+ else if (unformat (line_input, "add"))
+ is_add = 1;
+ else if (unformat (line_input, "%U/%d",
+ unformat_ip4_address, &pfx.fp_addr.ip4, &pfx.fp_len))
+ pfx.fp_proto = FIB_PROTOCOL_IP4;
+ else
+ {
+ error = unformat_parse_error (line_input);
+ goto done;
+ }
+ }
+
+ if (~0 != table_id)
+ {
+ fib_index = fib_table_id_find_fib_index (pfx.fp_proto, table_id);
+ if (~0 == fib_index)
+ {
+ error = clib_error_return (0, "Nonexistent table id %d", table_id);
+ goto done;
+ }
+ }
+ else
+ {
+ fib_index = 0;
+ }
+
+ if (is_add)
+ {
+ fib_table_entry_special_add (fib_index,
+ &pfx,
+ FIB_SOURCE_URPF_EXEMPT,
+ FIB_ENTRY_FLAG_DROP, ADJ_INDEX_INVALID);
+ }
+ else
+ {
+ fib_table_entry_special_remove (fib_index,
+ &pfx, FIB_SOURCE_URPF_EXEMPT);
+ }
+
+done:
+ return (error);
+}
+
+/*?
+ * Add an exemption for a prefix to pass the Unicast Reverse Path
+ * Forwarding (uRPF) loose check. This is for testing purposes only.
+ * If the '<em>table</em>' is not enter it is defaulted to 0. Default
+ * is to '<em>add</em>'. VPP always performs a loose uRPF check for
+ * for-us traffic.
+ *
+ * @cliexpar
+ * Example of how to add a uRPF exception to a FIB table to pass the
+ * loose RPF tests:
+ * @cliexcmd{ip urpf-accept table 7 add}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ip_source_check_accept_command, static) = {
+ .path = "ip urpf-accept",
+ .function = ip_source_check_accept,
+ .short_help = "ip urpf-accept [table <table-id>] [add|del]",
+};
+/* *INDENT-ON* */
+
+
+/* Dummy init function to get us linked in. */
+clib_error_t *
+ip4_source_check_init (vlib_main_t * vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (ip4_source_check_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip4_test.c b/src/vnet/ip/ip4_test.c
new file mode 100644
index 00000000000..45d171130df
--- /dev/null
+++ b/src/vnet/ip/ip4_test.c
@@ -0,0 +1,340 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+
+/**
+ * @file
+ * @brief IPv4 FIB Tester.
+ *
+ * Not compiled in by default. IPv4 FIB tester. Add, probe, delete a bunch of
+ * random routes / masks and make sure that the mtrie agrees with
+ * the hash-table FIB.
+ *
+ * Manipulate the FIB by means of the debug CLI commands, to minimize
+ * the chances of doing something idiotic.
+ */
+
+/*
+ * These routines need to be redeclared non-static elsewhere.
+ *
+ * Also: rename ip_route() -> vnet_ip_route_cmd() and add the usual
+ * test_route_init() call to main.c
+ */
+clib_error_t *vnet_ip_route_cmd (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd_arg);
+
+int ip4_lookup_validate (ip4_address_t * a, u32 fib_index0);
+
+ip4_fib_t *find_fib_by_table_index_or_id (ip4_main_t * im,
+ u32 table_index_or_id, u32 flags);
+
+/* Routes to insert/delete/probe in FIB */
+typedef struct
+{
+ ip4_address_t address;
+ u32 mask_width;
+ u32 interface_id; /* not an xx_if_index */
+} test_route_t;
+
+typedef struct
+{
+ /* Test routes in use */
+ test_route_t *route_pool;
+
+ /* Number of fake ethernets created */
+ u32 test_interfaces_created;
+} test_main_t;
+
+test_main_t test_main;
+
+/* fake ethernet device class, distinct from "fake-ethX" */
+static u8 *
+format_test_interface_name (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ return format (s, "test-eth%d", dev_instance);
+}
+
+static uword
+dummy_interface_tx (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ clib_warning ("you shouldn't be here, leaking buffers...");
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VNET_DEVICE_CLASS (test_interface_device_class,static) = {
+ .name = "Test interface",
+ .format_device_name = format_test_interface_name,
+ .tx_function = dummy_interface_tx,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+thrash (vlib_main_t * vm,
+ unformat_input_t * main_input, vlib_cli_command_t * cmd_arg)
+{
+ u32 seed = 0xdeaddabe;
+ u32 niter = 10;
+ u32 nroutes = 10;
+ u32 ninterfaces = 4;
+ f64 min_mask_bits = 7.0;
+ f64 max_mask_bits = 32.0;
+ u32 table_id = 11; /* my amp goes to 11 (use fib 11) */
+ u32 table_index;
+ int iter, i;
+ u8 *cmd;
+ test_route_t *tr;
+ test_main_t *tm = &test_main;
+ ip4_main_t *im = &ip4_main;
+ vnet_main_t *vnm = vnet_get_main ();
+ unformat_input_t cmd_input;
+ f64 rf;
+ u32 *masks = 0;
+ u32 tmp;
+ u32 hw_if_index;
+ clib_error_t *error = 0;
+ uword *p;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u8 hw_address[6];
+ ip4_fib_t *fib;
+ int verbose = 0;
+
+ /* Precompute mask width -> mask vector */
+ tmp = (u32) ~ 0;
+ vec_validate (masks, 32);
+ for (i = 32; i > 0; i--)
+ {
+ masks[i] = tmp;
+ tmp <<= 1;
+ }
+
+ if (unformat_user (main_input, unformat_line_input, line_input))
+ {
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "seed %d", &seed))
+ ;
+ else if (unformat (line_input, "niter %d", &niter))
+ ;
+ else if (unformat (line_input, "nroutes %d", &nroutes))
+ ;
+ else if (unformat (line_input, "ninterfaces %d", &ninterfaces))
+ ;
+ else if (unformat (line_input, "min-mask-bits %d", &tmp))
+ min_mask_bits = (f64) tmp;
+ else if (unformat (line_input, "max-mask-bits %d", &tmp))
+ max_mask_bits = (f64) tmp;
+ else if (unformat (line_input, "verbose"))
+ verbose = 1;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ }
+ }
+
+ /* Find or create FIB table 11 */
+ fib = ip4_fib_find_or_create_fib_by_table_id (table_id);
+
+ for (i = tm->test_interfaces_created; i < ninterfaces; i++)
+ {
+ vnet_hw_interface_t *hw;
+ memset (hw_address, 0, sizeof (hw_address));
+ hw_address[0] = 0xd0;
+ hw_address[1] = 0x0f;
+ hw_address[5] = i;
+
+ error = ethernet_register_interface
+ (vnm, test_interface_device_class.index, i /* instance */ ,
+ hw_address, &hw_if_index,
+ /* flag change */ 0);
+
+ /* Fake interfaces use FIB table 11 */
+ hw = vnet_get_hw_interface (vnm, hw_if_index);
+ vec_validate (im->fib_index_by_sw_if_index, hw->sw_if_index);
+ im->fib_index_by_sw_if_index[hw->sw_if_index] = fib->index;
+ ip4_sw_interface_enable_disable (sw_if_index, 1);
+ }
+
+ tm->test_interfaces_created = ninterfaces;
+
+ /* Find fib index corresponding to FIB id 11 */
+ p = hash_get (im->fib_index_by_table_id, table_id);
+ if (p == 0)
+ {
+ vlib_cli_output (vm, "Couldn't map fib id %d to fib index\n", table_id);
+ return 0;
+ }
+ table_index = p[0];
+
+ for (iter = 0; iter < niter; iter++)
+ {
+ /* Pick random routes to install */
+ for (i = 0; i < nroutes; i++)
+ {
+ int j;
+
+ pool_get (tm->route_pool, tr);
+ memset (tr, 0, sizeof (*tr));
+
+ again:
+ rf = random_f64 (&seed);
+ tr->mask_width = (u32) (min_mask_bits
+ + rf * (max_mask_bits - min_mask_bits));
+ tmp = random_u32 (&seed);
+ tmp &= masks[tr->mask_width];
+ tr->address.as_u32 = clib_host_to_net_u32 (tmp);
+
+ /* We can't add the same address/mask twice... */
+ for (j = 0; j < i; j++)
+ {
+ test_route_t *prev;
+ prev = pool_elt_at_index (tm->route_pool, j);
+ if ((prev->address.as_u32 == tr->address.as_u32)
+ && (prev->mask_width == tr->mask_width))
+ goto again;
+ }
+
+ rf = random_f64 (&seed);
+ tr->interface_id = (u32) (rf * ninterfaces);
+ }
+
+ /* Add them */
+ for (i = 0; i < nroutes; i++)
+ {
+ tr = pool_elt_at_index (tm->route_pool, i);
+ cmd = format (0, "add table %d %U/%d via test-eth%d",
+ table_id,
+ format_ip4_address, &tr->address,
+ tr->mask_width, tr->interface_id);
+ vec_add1 (cmd, 0);
+ if (verbose)
+ fformat (stderr, "ip route %s\n", cmd);
+ unformat_init_string (&cmd_input, (char *) cmd, vec_len (cmd) - 1);
+ error = vnet_ip_route_cmd (vm, &cmd_input, cmd_arg);
+ if (error)
+ clib_error_report (error);
+ unformat_free (&cmd_input);
+ vec_free (cmd);
+ }
+ /* Probe them */
+ for (i = 0; i < nroutes; i++)
+ {
+ tr = pool_elt_at_index (tm->route_pool, i);
+ if (!ip4_lookup_validate (&tr->address, table_index))
+ {
+ if (verbose)
+ fformat (stderr, "test lookup table %d %U\n",
+ table_index, format_ip4_address, &tr->address);
+
+ fformat (stderr, "FAIL-after-insert: %U/%d\n",
+ format_ip4_address, &tr->address, tr->mask_width);
+ }
+ }
+
+ /* Delete them */
+ for (i = 0; i < nroutes; i++)
+ {
+ int j;
+ tr = pool_elt_at_index (tm->route_pool, i);
+ if (0)
+ cmd = format (0, "del table %d %U/%d via test-eth%d",
+ table_id,
+ format_ip4_address, &tr->address,
+ tr->mask_width, tr->interface_id);
+ else
+ cmd = format (0, "del table %d %U/%d",
+ table_id,
+ format_ip4_address, &tr->address, tr->mask_width);
+ vec_add1 (cmd, 0);
+ if (verbose)
+ fformat (stderr, "ip route %s\n", cmd);
+ unformat_init_string (&cmd_input, (char *) cmd, vec_len (cmd) - 1);
+ error = vnet_ip_route_cmd (vm, &cmd_input, cmd_arg);
+ if (error)
+ clib_error_report (error);
+ unformat_free (&cmd_input);
+ vec_free (cmd);
+
+ /* Make sure all undeleted routes still work */
+ for (j = i + 1; j < nroutes; j++)
+ {
+ test_route_t *rr; /* remaining route */
+ rr = pool_elt_at_index (tm->route_pool, j);
+ if (!ip4_lookup_validate (&rr->address, table_index))
+ {
+ if (verbose)
+ fformat (stderr, "test lookup table %d %U\n",
+ table_index, format_ip4_address, &rr->address);
+
+ fformat (stderr, "FAIL: %U/%d AWOL\n",
+ format_ip4_address, &rr->address, rr->mask_width);
+ fformat (stderr, " iter %d after %d of %d deletes\n",
+ iter, i, nroutes);
+ fformat (stderr, " last route deleted %U/%d\n",
+ format_ip4_address, &tr->address, tr->mask_width);
+ }
+ }
+ }
+
+ pool_free (tm->route_pool);
+ }
+ return 0;
+}
+
+/*?
+ * This command in not in the build by default. It is an internal
+ * command used to test the route functonality.
+ *
+ * Create test routes on IPv4 FIB table 11. Table will be created if it
+ * does not exist.
+ *
+ * There are several optional attributes:
+ * - If not provided, <seed> defaults to 0xdeaddabe.
+ * - If not provided, <num-iter> defaults to 10.
+ * - If not provided, <num-iface> defaults to 4.
+ * - If not provided, <min-mask> defaults to 7.0.
+ * - If not provided, <max-mask> defaults to 32.0.
+ *
+ * @cliexpar
+ * Example of how to run:
+ * @cliexcmd{test route}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (test_route_command, static) = {
+ .path = "test route",
+ .short_help = "test route [seed <seed-num>] [niter <num-iter>] [ninterfaces <num-iface>] [min-mask-bits <min-mask>] [max-mask-bits <max-mask>] [verbose]", .function = thrash,
+ .function = thrash,
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+test_route_init (vlib_main_t * vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (test_route_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip6.h b/src/vnet/ip/ip6.h
new file mode 100644
index 00000000000..586b7c1b7f2
--- /dev/null
+++ b/src/vnet/ip/ip6.h
@@ -0,0 +1,476 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip6.h: ip6 main include file
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_ip_ip6_h
+#define included_ip_ip6_h
+
+#include <vlib/mc.h>
+#include <vlib/buffer.h>
+#include <vnet/ethernet/packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/ip/ip6_hop_by_hop_packet.h>
+#include <vnet/ip/lookup.h>
+#include <stdbool.h>
+#include <vppinfra/bihash_24_8.h>
+#include <vppinfra/bihash_template.h>
+
+/*
+ * Default size of the ip6 fib hash table
+ */
+#define IP6_FIB_DEFAULT_HASH_NUM_BUCKETS (64 * 1024)
+#define IP6_FIB_DEFAULT_HASH_MEMORY_SIZE (32<<20)
+
+typedef struct
+{
+ ip6_address_t addr;
+ u32 dst_address_length;
+ u32 vrf_index;
+} ip6_fib_key_t;
+
+typedef struct
+{
+ /* Table ID (hash key) for this FIB. */
+ u32 table_id;
+
+ /* Index into FIB vector. */
+ u32 index;
+
+ /* flow hash configuration */
+ flow_hash_config_t flow_hash_config;
+} ip6_fib_t;
+
+struct ip6_main_t;
+
+typedef void (ip6_add_del_interface_address_function_t)
+ (struct ip6_main_t * im,
+ uword opaque,
+ u32 sw_if_index,
+ ip6_address_t * address,
+ u32 address_length, u32 if_address_index, u32 is_del);
+
+typedef struct
+{
+ ip6_add_del_interface_address_function_t *function;
+ uword function_opaque;
+} ip6_add_del_interface_address_callback_t;
+
+/**
+ * Enumeration of the FIB table instance types
+ */
+typedef enum ip6_fib_table_instance_type_t_
+{
+ /**
+ * This table stores the routes that are used to forward traffic.
+ * The key is the prefix, the result the adjacnecy to forward on.
+ */
+ IP6_FIB_TABLE_FWDING,
+ /**
+ * The table that stores ALL routes learned by the DP.
+ * Some of these routes may not be ready to install in forwarding
+ * at a given time.
+ * The key in this table is the prefix, the result is the fib_entry_t
+ */
+ IP6_FIB_TABLE_NON_FWDING,
+} ip6_fib_table_instance_type_t;
+
+#define IP6_FIB_NUM_TABLES (IP6_FIB_TABLE_NON_FWDING+1)
+
+/**
+ * A represenation of a single IP6 table
+ */
+typedef struct ip6_fib_table_instance_t_
+{
+ /* The hash table */
+ BVT (clib_bihash) ip6_hash;
+
+ /* bitmap / refcounts / vector of mask widths to search */
+ uword *non_empty_dst_address_length_bitmap;
+ u8 *prefix_lengths_in_search_order;
+ i32 dst_address_length_refcounts[129];
+} ip6_fib_table_instance_t;
+
+typedef struct ip6_main_t
+{
+ /**
+ * The two FIB tables; fwding and non-fwding
+ */
+ ip6_fib_table_instance_t ip6_table[IP6_FIB_NUM_TABLES];
+
+ ip_lookup_main_t lookup_main;
+
+ /* Pool of FIBs. */
+ struct fib_table_t_ *fibs;
+
+ /* Network byte orders subnet mask for each prefix length */
+ ip6_address_t fib_masks[129];
+
+ /* Table index indexed by software interface. */
+ u32 *fib_index_by_sw_if_index;
+
+ /* IP6 enabled count by software interface */
+ u8 *ip_enabled_by_sw_if_index;
+
+ /* Hash table mapping table id to fib index.
+ ID space is not necessarily dense; index space is dense. */
+ uword *fib_index_by_table_id;
+
+ /* Hash table mapping interface rewrite adjacency index by sw if index. */
+ uword *interface_route_adj_index_by_sw_if_index;
+
+ /* Functions to call when interface address changes. */
+ ip6_add_del_interface_address_callback_t
+ * add_del_interface_address_callbacks;
+
+ /* Template used to generate IP6 neighbor solicitation packets. */
+ vlib_packet_template_t discover_neighbor_packet_template;
+
+ /* ip6 lookup table config parameters */
+ u32 lookup_table_nbuckets;
+ uword lookup_table_size;
+
+ /* Seed for Jenkins hash used to compute ip6 flow hash. */
+ u32 flow_hash_seed;
+
+ struct
+ {
+ /* TTL to use for host generated packets. */
+ u8 ttl;
+
+ u8 pad[3];
+ } host_config;
+
+ /* HBH processing enabled? */
+ u8 hbh_enabled;
+} ip6_main_t;
+
+/* Global ip6 main structure. */
+extern ip6_main_t ip6_main;
+
+/* Global ip6 input node. Errors get attached to ip6 input node. */
+extern vlib_node_registration_t ip6_input_node;
+extern vlib_node_registration_t ip6_rewrite_node;
+extern vlib_node_registration_t ip6_rewrite_local_node;
+extern vlib_node_registration_t ip6_discover_neighbor_node;
+extern vlib_node_registration_t ip6_glean_node;
+extern vlib_node_registration_t ip6_midchain_node;
+
+extern vlib_node_registration_t ip6_icmp_neighbor_discovery_event_node;
+
+/* ipv6 neighbor discovery - timer/event types */
+typedef enum
+{
+ ICMP6_ND_EVENT_INIT,
+} ip6_icmp_neighbor_discovery_event_type_t;
+
+typedef union
+{
+ u32 add_del_swindex;
+ struct
+ {
+ u32 up_down_swindex;
+ u32 fib_index;
+ } up_down_event;
+} ip6_icmp_neighbor_discovery_event_data_t;
+
+always_inline uword
+ip6_destination_matches_route (const ip6_main_t * im,
+ const ip6_address_t * key,
+ const ip6_address_t * dest, uword dest_length)
+{
+ int i;
+ for (i = 0; i < ARRAY_LEN (key->as_uword); i++)
+ {
+ if ((key->as_uword[i] ^ dest->as_uword[i]) & im->
+ fib_masks[dest_length].as_uword[i])
+ return 0;
+ }
+ return 1;
+}
+
+always_inline uword
+ip6_destination_matches_interface (ip6_main_t * im,
+ ip6_address_t * key,
+ ip_interface_address_t * ia)
+{
+ ip6_address_t *a = ip_interface_address_get_address (&im->lookup_main, ia);
+ return ip6_destination_matches_route (im, key, a, ia->address_length);
+}
+
+/* As above but allows for unaligned destinations (e.g. works right from IP header of packet). */
+always_inline uword
+ip6_unaligned_destination_matches_route (ip6_main_t * im,
+ ip6_address_t * key,
+ ip6_address_t * dest,
+ uword dest_length)
+{
+ int i;
+ for (i = 0; i < ARRAY_LEN (key->as_uword); i++)
+ {
+ if ((clib_mem_unaligned (&key->as_uword[i], uword) ^ dest->as_uword[i])
+ & im->fib_masks[dest_length].as_uword[i])
+ return 0;
+ }
+ return 1;
+}
+
+always_inline int
+ip6_src_address_for_packet (ip_lookup_main_t * lm,
+ u32 sw_if_index, ip6_address_t * src)
+{
+ u32 if_add_index = lm->if_address_pool_index_by_sw_if_index[sw_if_index];
+ if (PREDICT_TRUE (if_add_index != ~0))
+ {
+ ip_interface_address_t *if_add =
+ pool_elt_at_index (lm->if_address_pool, if_add_index);
+ ip6_address_t *if_ip = ip_interface_address_get_address (lm, if_add);
+ *src = *if_ip;
+ return (0);
+ }
+ else
+ {
+ src->as_u64[0] = 0;
+ src->as_u64[1] = 0;
+ }
+ return (!0);
+}
+
+/* Find interface address which matches destination. */
+always_inline ip6_address_t *
+ip6_interface_address_matching_destination (ip6_main_t * im,
+ ip6_address_t * dst,
+ u32 sw_if_index,
+ ip_interface_address_t **
+ result_ia)
+{
+ ip_lookup_main_t *lm = &im->lookup_main;
+ ip_interface_address_t *ia;
+ ip6_address_t *result = 0;
+
+ /* *INDENT-OFF* */
+ foreach_ip_interface_address (lm, ia, sw_if_index,
+ 1 /* honor unnumbered */,
+ ({
+ ip6_address_t * a = ip_interface_address_get_address (lm, ia);
+ if (ip6_destination_matches_route (im, dst, a, ia->address_length))
+ {
+ result = a;
+ break;
+ }
+ }));
+ /* *INDENT-ON* */
+ if (result_ia)
+ *result_ia = result ? ia : 0;
+ return result;
+}
+
+clib_error_t *ip6_add_del_interface_address (vlib_main_t * vm,
+ u32 sw_if_index,
+ ip6_address_t * address,
+ u32 address_length, u32 is_del);
+void ip6_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable);
+
+int ip6_address_compare (ip6_address_t * a1, ip6_address_t * a2);
+
+clib_error_t *ip6_probe_neighbor (vlib_main_t * vm, ip6_address_t * dst,
+ u32 sw_if_index);
+
+clib_error_t *ip6_set_neighbor_limit (u32 neighbor_limit);
+
+uword
+ip6_udp_register_listener (vlib_main_t * vm,
+ u16 dst_port, u32 next_node_index);
+
+u16 ip6_tcp_udp_icmp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
+ ip6_header_t * ip0,
+ int *bogus_lengthp);
+
+void ip6_register_protocol (u32 protocol, u32 node_index);
+
+serialize_function_t serialize_vnet_ip6_main, unserialize_vnet_ip6_main;
+
+void ip6_ethernet_update_adjacency (vnet_main_t * vnm,
+ u32 sw_if_index, u32 ai);
+
+int
+vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm,
+ u32 sw_if_index,
+ ip6_address_t * a,
+ u8 * link_layer_address,
+ uword n_bytes_link_layer_address,
+ int is_static);
+int
+vnet_unset_ip6_ethernet_neighbor (vlib_main_t * vm,
+ u32 sw_if_index,
+ ip6_address_t * a,
+ u8 * link_layer_address,
+ uword n_bytes_link_layer_address);
+
+void
+ip6_link_local_address_from_ethernet_mac_address (ip6_address_t * ip,
+ u8 * mac);
+
+void
+ip6_ethernet_mac_address_from_link_local_address (u8 * mac,
+ ip6_address_t * ip);
+
+int vnet_set_ip6_flow_hash (u32 table_id,
+ flow_hash_config_t flow_hash_config);
+
+int
+ip6_neighbor_ra_config (vlib_main_t * vm, u32 sw_if_index,
+ u8 suppress, u8 managed, u8 other,
+ u8 ll_option, u8 send_unicast, u8 cease,
+ u8 use_lifetime, u32 lifetime,
+ u32 initial_count, u32 initial_interval,
+ u32 max_interval, u32 min_interval, u8 is_no);
+
+int
+ip6_neighbor_ra_prefix (vlib_main_t * vm, u32 sw_if_index,
+ ip6_address_t * prefix_addr, u8 prefix_len,
+ u8 use_default, u32 val_lifetime, u32 pref_lifetime,
+ u8 no_advertise, u8 off_link, u8 no_autoconfig,
+ u8 no_onlink, u8 is_no);
+
+
+clib_error_t *enable_ip6_interface (vlib_main_t * vm, u32 sw_if_index);
+
+clib_error_t *disable_ip6_interface (vlib_main_t * vm, u32 sw_if_index);
+
+int ip6_interface_enabled (vlib_main_t * vm, u32 sw_if_index);
+
+clib_error_t *set_ip6_link_local_address (vlib_main_t * vm,
+ u32 sw_if_index,
+ ip6_address_t * address,
+ u8 address_length);
+
+void vnet_register_ip6_neighbor_resolution_event (vnet_main_t * vnm,
+ void *address_arg,
+ uword node_index,
+ uword type_opaque,
+ uword data);
+
+int vnet_add_del_ip6_nd_change_event (vnet_main_t * vnm,
+ void *data_callback,
+ u32 pid,
+ void *address_arg,
+ uword node_index,
+ uword type_opaque,
+ uword data, int is_add);
+
+int vnet_ip6_nd_term (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_buffer_t * p0,
+ ethernet_header_t * eth,
+ ip6_header_t * ip,
+ u32 sw_if_index, u16 bd_index, u8 shg);
+
+int vnet_set_ip6_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
+ u32 table_index);
+extern vlib_node_registration_t ip6_lookup_node;
+
+/* Compute flow hash. We'll use it to select which Sponge to use for this
+ flow. And other things. */
+always_inline u32
+ip6_compute_flow_hash (const ip6_header_t * ip,
+ flow_hash_config_t flow_hash_config)
+{
+ tcp_header_t *tcp = (void *) (ip + 1);
+ u64 a, b, c;
+ u64 t1, t2;
+ uword is_tcp_udp = (ip->protocol == IP_PROTOCOL_TCP
+ || ip->protocol == IP_PROTOCOL_UDP);
+
+ t1 = (ip->src_address.as_u64[0] ^ ip->src_address.as_u64[1]);
+ t1 = (flow_hash_config & IP_FLOW_HASH_SRC_ADDR) ? t1 : 0;
+
+ t2 = (ip->dst_address.as_u64[0] ^ ip->dst_address.as_u64[1]);
+ t2 = (flow_hash_config & IP_FLOW_HASH_DST_ADDR) ? t2 : 0;
+
+ a = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ? t2 : t1;
+ b = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ? t1 : t2;
+ b ^= (flow_hash_config & IP_FLOW_HASH_PROTO) ? ip->protocol : 0;
+
+ t1 = is_tcp_udp ? tcp->ports.src : 0;
+ t2 = is_tcp_udp ? tcp->ports.dst : 0;
+
+ t1 = (flow_hash_config & IP_FLOW_HASH_SRC_PORT) ? t1 : 0;
+ t2 = (flow_hash_config & IP_FLOW_HASH_DST_PORT) ? t2 : 0;
+
+ c = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ?
+ ((t1 << 16) | t2) : ((t2 << 16) | t1);
+
+ hash_mix64 (a, b, c);
+ return (u32) c;
+}
+
+/*
+ * Hop-by-Hop handling
+ */
+typedef struct
+{
+ /* Array of function pointers to HBH option handling routines */
+ int (*options[256]) (vlib_buffer_t * b, ip6_header_t * ip,
+ ip6_hop_by_hop_option_t * opt);
+ u8 *(*trace[256]) (u8 * s, ip6_hop_by_hop_option_t * opt);
+ uword next_override;
+} ip6_hop_by_hop_main_t;
+
+extern ip6_hop_by_hop_main_t ip6_hop_by_hop_main;
+
+int ip6_hbh_register_option (u8 option,
+ int options (vlib_buffer_t * b,
+ ip6_header_t * ip,
+ ip6_hop_by_hop_option_t * opt),
+ u8 * trace (u8 * s,
+ ip6_hop_by_hop_option_t * opt));
+int ip6_hbh_unregister_option (u8 option);
+void ip6_hbh_set_next_override (uword next);
+
+/* Flag used by IOAM code. Classifier sets it pop-hop-by-hop checks it */
+#define OI_DECAP 0x80000000
+
+#endif /* included_ip_ip6_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip6_error.h b/src/vnet/ip/ip6_error.h
new file mode 100644
index 00000000000..a2807169123
--- /dev/null
+++ b/src/vnet/ip/ip6_error.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip6_error.h: ip6 fast path errors
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_ip_ip6_error_h
+#define included_ip_ip6_error_h
+
+#define foreach_ip6_error \
+ /* Must be first. */ \
+ _ (NONE, "valid ip6 packets") \
+ \
+ /* Errors signalled by ip6-input */ \
+ _ (TOO_SHORT, "ip6 length < 40 bytes") \
+ _ (BAD_LENGTH, "ip6 length > l2 length") \
+ _ (VERSION, "ip6 version != 6") \
+ _ (TIME_EXPIRED, "ip6 ttl <= 1") \
+ \
+ /* Errors signalled by ip6-rewrite. */ \
+ _ (MTU_EXCEEDED, "ip6 MTU exceeded") \
+ _ (DST_LOOKUP_MISS, "ip6 destination lookup miss") \
+ _ (SRC_LOOKUP_MISS, "ip6 source lookup miss") \
+ _ (ADJACENCY_DROP, "ip6 adjacency drop") \
+ _ (ADJACENCY_PUNT, "ip6 adjacency punt") \
+ \
+ /* Errors signalled by ip6-local. */ \
+ _ (UNKNOWN_PROTOCOL, "unknown ip protocol") \
+ _ (UDP_CHECKSUM, "bad udp checksum") \
+ _ (ICMP_CHECKSUM, "bad icmp checksum") \
+ _ (UDP_LENGTH, "inconsistent udp/ip lengths") \
+ \
+ /* Errors signalled by udp6-lookup. */ \
+ _ (UNKNOWN_UDP_PORT, "no listener for udp port") \
+ \
+ /* Spoofed packets in ip6-rewrite-local */ \
+ _(SPOOFED_LOCAL_PACKETS, "ip4 spoofed local-address packet drops") \
+ \
+ /* Erros singalled by ip6-inacl */ \
+ _ (INACL_TABLE_MISS, "input ACL table-miss drops") \
+ _ (INACL_SESSION_DENY, "input ACL session deny drops")
+
+typedef enum
+{
+#define _(sym,str) IP6_ERROR_##sym,
+ foreach_ip6_error
+#undef _
+ IP6_N_ERROR,
+} ip6_error_t;
+
+#endif /* included_ip_ip6_error_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip6_format.c b/src/vnet/ip/ip6_format.c
new file mode 100644
index 00000000000..56899b73d8b
--- /dev/null
+++ b/src/vnet/ip/ip6_format.c
@@ -0,0 +1,383 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip6_format.c: ip6 formatting
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+
+/* Format an IP6 address. */
+u8 *
+format_ip6_address (u8 * s, va_list * args)
+{
+ ip6_address_t *a = va_arg (*args, ip6_address_t *);
+ u32 max_zero_run = 0, this_zero_run = 0;
+ int max_zero_run_index = -1, this_zero_run_index = 0;
+ int in_zero_run = 0, i;
+ int last_double_colon = 0;
+
+ /* Ugh, this is a pain. Scan forward looking for runs of 0's */
+ for (i = 0; i < ARRAY_LEN (a->as_u16); i++)
+ {
+ if (a->as_u16[i] == 0)
+ {
+ if (in_zero_run)
+ this_zero_run++;
+ else
+ {
+ in_zero_run = 1;
+ this_zero_run = 1;
+ this_zero_run_index = i;
+ }
+ }
+ else
+ {
+ if (in_zero_run)
+ {
+ /* offer to compress the biggest run of > 1 zero */
+ if (this_zero_run > max_zero_run && this_zero_run > 1)
+ {
+ max_zero_run_index = this_zero_run_index;
+ max_zero_run = this_zero_run;
+ }
+ }
+ in_zero_run = 0;
+ this_zero_run = 0;
+ }
+ }
+
+ if (in_zero_run)
+ {
+ if (this_zero_run > max_zero_run && this_zero_run > 1)
+ {
+ max_zero_run_index = this_zero_run_index;
+ max_zero_run = this_zero_run;
+ }
+ }
+
+ for (i = 0; i < ARRAY_LEN (a->as_u16); i++)
+ {
+ if (i == max_zero_run_index)
+ {
+ s = format (s, "::");
+ i += max_zero_run - 1;
+ last_double_colon = 1;
+ }
+ else
+ {
+ s = format (s, "%s%x",
+ (last_double_colon || i == 0) ? "" : ":",
+ clib_net_to_host_u16 (a->as_u16[i]));
+ last_double_colon = 0;
+ }
+ }
+
+ return s;
+}
+
+/* Format an IP6 route destination and length. */
+u8 *
+format_ip6_address_and_length (u8 * s, va_list * args)
+{
+ ip6_address_t *a = va_arg (*args, ip6_address_t *);
+ u8 l = va_arg (*args, u32);
+ return format (s, "%U/%d", format_ip6_address, a, l);
+}
+
+/* Parse an IP6 address. */
+uword
+unformat_ip6_address (unformat_input_t * input, va_list * args)
+{
+ ip6_address_t *result = va_arg (*args, ip6_address_t *);
+ u16 hex_quads[8];
+ uword hex_quad, n_hex_quads, hex_digit, n_hex_digits;
+ uword c, n_colon, double_colon_index;
+
+ n_hex_quads = hex_quad = n_hex_digits = n_colon = 0;
+ double_colon_index = ARRAY_LEN (hex_quads);
+ while ((c = unformat_get_input (input)) != UNFORMAT_END_OF_INPUT)
+ {
+ hex_digit = 16;
+ if (c >= '0' && c <= '9')
+ hex_digit = c - '0';
+ else if (c >= 'a' && c <= 'f')
+ hex_digit = c + 10 - 'a';
+ else if (c >= 'A' && c <= 'F')
+ hex_digit = c + 10 - 'A';
+ else if (c == ':' && n_colon < 2)
+ n_colon++;
+ else
+ {
+ unformat_put_input (input);
+ break;
+ }
+
+ /* Too many hex quads. */
+ if (n_hex_quads >= ARRAY_LEN (hex_quads))
+ return 0;
+
+ if (hex_digit < 16)
+ {
+ hex_quad = (hex_quad << 4) | hex_digit;
+
+ /* Hex quad must fit in 16 bits. */
+ if (n_hex_digits >= 4)
+ return 0;
+
+ n_colon = 0;
+ n_hex_digits++;
+ }
+
+ /* Save position of :: */
+ if (n_colon == 2)
+ {
+ /* More than one :: ? */
+ if (double_colon_index < ARRAY_LEN (hex_quads))
+ return 0;
+ double_colon_index = n_hex_quads;
+ }
+
+ if (n_colon > 0 && n_hex_digits > 0)
+ {
+ hex_quads[n_hex_quads++] = hex_quad;
+ hex_quad = 0;
+ n_hex_digits = 0;
+ }
+ }
+
+ if (n_hex_digits > 0)
+ hex_quads[n_hex_quads++] = hex_quad;
+
+ {
+ word i;
+
+ /* Expand :: to appropriate number of zero hex quads. */
+ if (double_colon_index < ARRAY_LEN (hex_quads))
+ {
+ word n_zero = ARRAY_LEN (hex_quads) - n_hex_quads;
+
+ for (i = n_hex_quads - 1; i >= (signed) double_colon_index; i--)
+ hex_quads[n_zero + i] = hex_quads[i];
+
+ for (i = 0; i < n_zero; i++)
+ {
+ ASSERT ((double_colon_index + i) < ARRAY_LEN (hex_quads));
+ hex_quads[double_colon_index + i] = 0;
+ }
+
+ n_hex_quads = ARRAY_LEN (hex_quads);
+ }
+
+ /* Too few hex quads given. */
+ if (n_hex_quads < ARRAY_LEN (hex_quads))
+ return 0;
+
+ for (i = 0; i < ARRAY_LEN (hex_quads); i++)
+ result->as_u16[i] = clib_host_to_net_u16 (hex_quads[i]);
+
+ return 1;
+ }
+}
+
+/* Format an IP6 header. */
+u8 *
+format_ip6_header (u8 * s, va_list * args)
+{
+ ip6_header_t *ip = va_arg (*args, ip6_header_t *);
+ u32 max_header_bytes = va_arg (*args, u32);
+ u32 i, ip_version, traffic_class, flow_label;
+ uword indent;
+
+ /* Nothing to do. */
+ if (max_header_bytes < sizeof (ip[0]))
+ return format (s, "IP header truncated");
+
+ indent = format_get_indent (s);
+ indent += 2;
+
+ s = format (s, "%U: %U -> %U",
+ format_ip_protocol, ip->protocol,
+ format_ip6_address, &ip->src_address,
+ format_ip6_address, &ip->dst_address);
+
+ i = clib_net_to_host_u32 (ip->ip_version_traffic_class_and_flow_label);
+ ip_version = (i >> 28);
+ traffic_class = (i >> 20) & 0xff;
+ flow_label = i & pow2_mask (20);
+
+ if (ip_version != 6)
+ s = format (s, "\n%Uversion %d", format_white_space, indent, ip_version);
+
+ s =
+ format (s,
+ "\n%Utos 0x%02x, flow label 0x%x, hop limit %d, payload length %d",
+ format_white_space, indent, traffic_class, flow_label,
+ ip->hop_limit, clib_net_to_host_u16 (ip->payload_length));
+
+ /* Recurse into next protocol layer. */
+ if (max_header_bytes != 0 && sizeof (ip[0]) < max_header_bytes)
+ {
+ ip_main_t *im = &ip_main;
+ ip_protocol_info_t *pi = ip_get_protocol_info (im, ip->protocol);
+
+ if (pi && pi->format_header)
+ s = format (s, "\n%U%U",
+ format_white_space, indent - 2, pi->format_header,
+ /* next protocol header */ (void *) (ip + 1),
+ max_header_bytes - sizeof (ip[0]));
+ }
+
+ return s;
+}
+
+/* Parse an IP6 header. */
+uword
+unformat_ip6_header (unformat_input_t * input, va_list * args)
+{
+ u8 **result = va_arg (*args, u8 **);
+ ip6_header_t *ip;
+ int old_length;
+
+ /* Allocate space for IP header. */
+ {
+ void *p;
+
+ old_length = vec_len (*result);
+ vec_add2 (*result, p, sizeof (ip[0]));
+ ip = p;
+ }
+
+ memset (ip, 0, sizeof (ip[0]));
+ ip->ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 (6 << 28);
+
+ if (!unformat (input, "%U: %U -> %U",
+ unformat_ip_protocol, &ip->protocol,
+ unformat_ip6_address, &ip->src_address,
+ unformat_ip6_address, &ip->dst_address))
+ return 0;
+
+ /* Parse options. */
+ while (1)
+ {
+ int i;
+
+ if (unformat (input, "tos %U", unformat_vlib_number, &i))
+ ip->ip_version_traffic_class_and_flow_label |=
+ clib_host_to_net_u32 ((i & 0xff) << 20);
+
+ else if (unformat (input, "hop-limit %U", unformat_vlib_number, &i))
+ ip->hop_limit = i;
+
+ /* Can't parse input: try next protocol level. */
+ else
+ break;
+ }
+
+ /* Recurse into next protocol layer. */
+ {
+ ip_main_t *im = &ip_main;
+ ip_protocol_info_t *pi = ip_get_protocol_info (im, ip->protocol);
+
+ if (pi && pi->unformat_header)
+ {
+ if (!unformat_user (input, pi->unformat_header, result))
+ return 0;
+
+ /* Result may have moved. */
+ ip = (void *) *result + old_length;
+ }
+ }
+
+ ip->payload_length =
+ clib_host_to_net_u16 (vec_len (*result) - (old_length + sizeof (ip[0])));
+
+ return 1;
+}
+
+/* Parse an IP46 address. */
+uword
+unformat_ip46_address (unformat_input_t * input, va_list * args)
+{
+ ip46_address_t *ip46 = va_arg (*args, ip46_address_t *);
+ ip46_type_t type = va_arg (*args, ip46_type_t);
+ if ((type != IP46_TYPE_IP6) &&
+ unformat (input, "%U", unformat_ip4_address, &ip46->ip4))
+ {
+ ip46_address_mask_ip4 (ip46);
+ return 1;
+ }
+ else if ((type != IP46_TYPE_IP4) &&
+ unformat (input, "%U", unformat_ip6_address, &ip46->ip6))
+ {
+ return 1;
+ }
+ return 0;
+}
+
+/* Format an IP46 address. */
+u8 *
+format_ip46_address (u8 * s, va_list * args)
+{
+ ip46_address_t *ip46 = va_arg (*args, ip46_address_t *);
+ ip46_type_t type = va_arg (*args, ip46_type_t);
+ int is_ip4 = 1;
+
+ switch (type)
+ {
+ case IP46_TYPE_ANY:
+ is_ip4 = ip46_address_is_ip4 (ip46);
+ break;
+ case IP46_TYPE_IP4:
+ is_ip4 = 1;
+ break;
+ case IP46_TYPE_IP6:
+ is_ip4 = 0;
+ break;
+ }
+
+ return is_ip4 ?
+ format (s, "%U", format_ip4_address, &ip46->ip4) :
+ format (s, "%U", format_ip6_address, &ip46->ip6);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c
new file mode 100644
index 00000000000..b5c795523ca
--- /dev/null
+++ b/src/vnet/ip/ip6_forward.c
@@ -0,0 +1,3402 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip6_forward.c: IP v6 forwarding
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */
+#include <vnet/srp/srp.h> /* for srp_hw_interface_class */
+#include <vppinfra/cache.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/dpo/load_balance.h>
+#include <vnet/dpo/classify_dpo.h>
+
+#include <vppinfra/bihash_template.c>
+
+/**
+ * @file
+ * @brief IPv6 Forwarding.
+ *
+ * This file contains the source code for IPv6 forwarding.
+ */
+
+void
+ip6_forward_next_trace (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ vlib_rx_or_tx_t which_adj_index);
+
+always_inline uword
+ip6_lookup_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ ip6_main_t *im = &ip6_main;
+ vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters;
+ u32 n_left_from, n_left_to_next, *from, *to_next;
+ ip_lookup_next_t next;
+ u32 cpu_index = os_get_cpu_number ();
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ vlib_buffer_t *p0, *p1;
+ u32 pi0, pi1, lbi0, lbi1, wrong_next;
+ ip_lookup_next_t next0, next1;
+ ip6_header_t *ip0, *ip1;
+ ip6_address_t *dst_addr0, *dst_addr1;
+ u32 fib_index0, fib_index1;
+ u32 flow_hash_config0, flow_hash_config1;
+ const dpo_id_t *dpo0, *dpo1;
+ const load_balance_t *lb0, *lb1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+ CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
+ CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
+ }
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+
+ dst_addr0 = &ip0->dst_address;
+ dst_addr1 = &ip1->dst_address;
+
+ fib_index0 =
+ vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (p0)->sw_if_index[VLIB_RX]);
+ fib_index1 =
+ vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (p1)->sw_if_index[VLIB_RX]);
+
+ fib_index0 = (vnet_buffer (p0)->sw_if_index[VLIB_TX] == (u32) ~ 0) ?
+ fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
+ fib_index1 = (vnet_buffer (p1)->sw_if_index[VLIB_TX] == (u32) ~ 0) ?
+ fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
+
+ lbi0 = ip6_fib_table_fwding_lookup (im, fib_index0, dst_addr0);
+ lbi1 = ip6_fib_table_fwding_lookup (im, fib_index1, dst_addr1);
+
+ lb0 = load_balance_get (lbi0);
+ lb1 = load_balance_get (lbi1);
+
+ vnet_buffer (p0)->ip.flow_hash = vnet_buffer (p1)->ip.flow_hash = 0;
+
+ if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
+ {
+ flow_hash_config0 = lb0->lb_hash_config;
+ vnet_buffer (p0)->ip.flow_hash =
+ ip6_compute_flow_hash (ip0, flow_hash_config0);
+ }
+ if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
+ {
+ flow_hash_config1 = lb1->lb_hash_config;
+ vnet_buffer (p1)->ip.flow_hash =
+ ip6_compute_flow_hash (ip1, flow_hash_config1);
+ }
+
+ ASSERT (lb0->lb_n_buckets > 0);
+ ASSERT (lb1->lb_n_buckets > 0);
+ ASSERT (is_pow2 (lb0->lb_n_buckets));
+ ASSERT (is_pow2 (lb1->lb_n_buckets));
+ dpo0 = load_balance_get_bucket_i (lb0,
+ (vnet_buffer (p0)->ip.flow_hash &
+ lb0->lb_n_buckets_minus_1));
+ dpo1 = load_balance_get_bucket_i (lb1,
+ (vnet_buffer (p1)->ip.flow_hash &
+ lb1->lb_n_buckets_minus_1));
+
+ next0 = dpo0->dpoi_next_node;
+ next1 = dpo1->dpoi_next_node;
+
+ /* Only process the HBH Option Header if explicitly configured to do so */
+ if (PREDICT_FALSE
+ (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+ {
+ next0 = (dpo_is_adj (dpo0) && im->hbh_enabled) ?
+ (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next0;
+ }
+ if (PREDICT_FALSE
+ (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+ {
+ next1 = (dpo_is_adj (dpo1) && im->hbh_enabled) ?
+ (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next1;
+ }
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+ vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
+
+ vlib_increment_combined_counter
+ (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
+ vlib_increment_combined_counter
+ (cm, cpu_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
+
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
+
+ wrong_next = (next0 != next) + 2 * (next1 != next);
+ if (PREDICT_FALSE (wrong_next != 0))
+ {
+ switch (wrong_next)
+ {
+ case 1:
+ /* A B A */
+ to_next[-2] = pi1;
+ to_next -= 1;
+ n_left_to_next += 1;
+ vlib_set_next_frame_buffer (vm, node, next0, pi0);
+ break;
+
+ case 2:
+ /* A A B */
+ to_next -= 1;
+ n_left_to_next += 1;
+ vlib_set_next_frame_buffer (vm, node, next1, pi1);
+ break;
+
+ case 3:
+ /* A B C */
+ to_next -= 2;
+ n_left_to_next += 2;
+ vlib_set_next_frame_buffer (vm, node, next0, pi0);
+ vlib_set_next_frame_buffer (vm, node, next1, pi1);
+ if (next0 == next1)
+ {
+ /* A B B */
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ next = next1;
+ vlib_get_next_frame (vm, node, next, to_next,
+ n_left_to_next);
+ }
+ }
+ }
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *p0;
+ ip6_header_t *ip0;
+ u32 pi0, lbi0;
+ ip_lookup_next_t next0;
+ load_balance_t *lb0;
+ ip6_address_t *dst_addr0;
+ u32 fib_index0, flow_hash_config0;
+ const dpo_id_t *dpo0;
+
+ pi0 = from[0];
+ to_next[0] = pi0;
+
+ p0 = vlib_get_buffer (vm, pi0);
+
+ ip0 = vlib_buffer_get_current (p0);
+
+ dst_addr0 = &ip0->dst_address;
+
+ fib_index0 =
+ vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (p0)->sw_if_index[VLIB_RX]);
+ fib_index0 =
+ (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
+ (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
+
+ flow_hash_config0 = ip6_fib_get (fib_index0)->flow_hash_config;
+
+ lbi0 = ip6_fib_table_fwding_lookup (im, fib_index0, dst_addr0);
+
+ lb0 = load_balance_get (lbi0);
+
+ vnet_buffer (p0)->ip.flow_hash = 0;
+
+ if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
+ {
+ flow_hash_config0 = lb0->lb_hash_config;
+ vnet_buffer (p0)->ip.flow_hash =
+ ip6_compute_flow_hash (ip0, flow_hash_config0);
+ }
+
+ ASSERT (lb0->lb_n_buckets > 0);
+ ASSERT (is_pow2 (lb0->lb_n_buckets));
+ dpo0 = load_balance_get_bucket_i (lb0,
+ (vnet_buffer (p0)->ip.flow_hash &
+ lb0->lb_n_buckets_minus_1));
+ next0 = dpo0->dpoi_next_node;
+
+ /* Only process the HBH Option Header if explicitly configured to do so */
+ if (PREDICT_FALSE
+ (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+ {
+ next0 = (dpo_is_adj (dpo0) && im->hbh_enabled) ?
+ (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next0;
+ }
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+ vlib_increment_combined_counter
+ (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
+
+ from += 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+ n_left_from -= 1;
+
+ if (PREDICT_FALSE (next0 != next))
+ {
+ n_left_to_next += 1;
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ next = next0;
+ vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
+ to_next[0] = pi0;
+ to_next += 1;
+ n_left_to_next -= 1;
+ }
+ }
+
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ }
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip6_forward_next_trace (vm, node, frame, VLIB_TX);
+
+ return frame->n_vectors;
+}
+
+static void
+ip6_add_interface_routes (vnet_main_t * vnm, u32 sw_if_index,
+ ip6_main_t * im, u32 fib_index,
+ ip_interface_address_t * a)
+{
+ ip_lookup_main_t *lm = &im->lookup_main;
+ ip6_address_t *address = ip_interface_address_get_address (lm, a);
+ fib_prefix_t pfx = {
+ .fp_len = a->address_length,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_addr.ip6 = *address,
+ };
+
+ a->neighbor_probe_adj_index = ~0;
+ if (a->address_length < 128)
+ {
+ fib_node_index_t fei;
+
+ fei = fib_table_entry_update_one_path (fib_index, &pfx, FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_ATTACHED), FIB_PROTOCOL_IP6, NULL, /* No next-hop address */
+ sw_if_index, ~0, // invalid FIB index
+ 1, NULL, // no label stack
+ FIB_ROUTE_PATH_FLAG_NONE);
+ a->neighbor_probe_adj_index = fib_entry_get_adj (fei);
+ }
+
+ pfx.fp_len = 128;
+ if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
+ {
+ u32 classify_table_index =
+ lm->classify_table_index_by_sw_if_index[sw_if_index];
+ if (classify_table_index != (u32) ~ 0)
+ {
+ dpo_id_t dpo = DPO_INVALID;
+
+ dpo_set (&dpo,
+ DPO_CLASSIFY,
+ DPO_PROTO_IP6,
+ classify_dpo_create (DPO_PROTO_IP6, classify_table_index));
+
+ fib_table_entry_special_dpo_add (fib_index,
+ &pfx,
+ FIB_SOURCE_CLASSIFY,
+ FIB_ENTRY_FLAG_NONE, &dpo);
+ dpo_reset (&dpo);
+ }
+ }
+
+ fib_table_entry_update_one_path (fib_index, &pfx, FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_LOCAL), FIB_PROTOCOL_IP6, &pfx.fp_addr, sw_if_index, ~0, // invalid FIB index
+ 1, NULL, FIB_ROUTE_PATH_FLAG_NONE);
+}
+
+static void
+ip6_del_interface_routes (ip6_main_t * im,
+ u32 fib_index,
+ ip6_address_t * address, u32 address_length)
+{
+ fib_prefix_t pfx = {
+ .fp_len = address_length,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_addr.ip6 = *address,
+ };
+
+ if (pfx.fp_len < 128)
+ {
+ fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
+
+ }
+
+ pfx.fp_len = 128;
+ fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
+}
+
+void
+ip6_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
+{
+ ip6_main_t *im = &ip6_main;
+
+ vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
+
+ /*
+ * enable/disable only on the 1<->0 transition
+ */
+ if (is_enable)
+ {
+ if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
+ return;
+ }
+ else
+ {
+ ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
+ if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
+ return;
+ }
+
+ vnet_feature_enable_disable ("ip6-unicast", "ip6-lookup", sw_if_index,
+ is_enable, 0, 0);
+
+ vnet_feature_enable_disable ("ip6-multicast", "ip6-lookup", sw_if_index,
+ is_enable, 0, 0);
+
+}
+
+/* get first interface address */
+ip6_address_t *
+ip6_interface_first_address (ip6_main_t * im,
+ u32 sw_if_index,
+ ip_interface_address_t ** result_ia)
+{
+ ip_lookup_main_t *lm = &im->lookup_main;
+ ip_interface_address_t *ia = 0;
+ ip6_address_t *result = 0;
+
+ /* *INDENT-OFF* */
+ foreach_ip_interface_address (lm, ia, sw_if_index,
+ 1 /* honor unnumbered */,
+ ({
+ ip6_address_t * a = ip_interface_address_get_address (lm, ia);
+ result = a;
+ break;
+ }));
+ /* *INDENT-ON* */
+ if (result_ia)
+ *result_ia = result ? ia : 0;
+ return result;
+}
+
+clib_error_t *
+ip6_add_del_interface_address (vlib_main_t * vm,
+ u32 sw_if_index,
+ ip6_address_t * address,
+ u32 address_length, u32 is_del)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip6_main_t *im = &ip6_main;
+ ip_lookup_main_t *lm = &im->lookup_main;
+ clib_error_t *error;
+ u32 if_address_index;
+ ip6_address_fib_t ip6_af, *addr_fib = 0;
+
+ vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
+ ip6_addr_fib_init (&ip6_af, address,
+ vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
+ vec_add1 (addr_fib, ip6_af);
+
+ {
+ uword elts_before = pool_elts (lm->if_address_pool);
+
+ error = ip_interface_address_add_del
+ (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
+ if (error)
+ goto done;
+
+ /* Pool did not grow: add duplicate address. */
+ if (elts_before == pool_elts (lm->if_address_pool))
+ goto done;
+ }
+
+ ip6_sw_interface_enable_disable (sw_if_index, !is_del);
+
+ if (is_del)
+ ip6_del_interface_routes (im, ip6_af.fib_index, address, address_length);
+ else
+ ip6_add_interface_routes (vnm, sw_if_index,
+ im, ip6_af.fib_index,
+ pool_elt_at_index (lm->if_address_pool,
+ if_address_index));
+
+ {
+ ip6_add_del_interface_address_callback_t *cb;
+ vec_foreach (cb, im->add_del_interface_address_callbacks)
+ cb->function (im, cb->function_opaque, sw_if_index,
+ address, address_length, if_address_index, is_del);
+ }
+
+done:
+ vec_free (addr_fib);
+ return error;
+}
+
+clib_error_t *
+ip6_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
+{
+ ip6_main_t *im = &ip6_main;
+ ip_interface_address_t *ia;
+ ip6_address_t *a;
+ u32 is_admin_up, fib_index;
+
+ /* Fill in lookup tables with default table (0). */
+ vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
+
+ vec_validate_init_empty (im->
+ lookup_main.if_address_pool_index_by_sw_if_index,
+ sw_if_index, ~0);
+
+ is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
+
+ fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
+
+ /* *INDENT-OFF* */
+ foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
+ 0 /* honor unnumbered */,
+ ({
+ a = ip_interface_address_get_address (&im->lookup_main, ia);
+ if (is_admin_up)
+ ip6_add_interface_routes (vnm, sw_if_index,
+ im, fib_index,
+ ia);
+ else
+ ip6_del_interface_routes (im, fib_index,
+ a, ia->address_length);
+ }));
+ /* *INDENT-ON* */
+
+ return 0;
+}
+
+VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip6_sw_interface_admin_up_down);
+
+/* Built-in ip6 unicast rx feature path definition */
+/* *INDENT-OFF* */
+VNET_FEATURE_ARC_INIT (ip6_unicast, static) =
+{
+ .arc_name = "ip6-unicast",
+ .start_nodes = VNET_FEATURES ("ip6-input"),
+ .arc_index_ptr = &ip6_main.lookup_main.ucast_feature_arc_index,
+};
+
+VNET_FEATURE_INIT (ip6_flow_classify, static) =
+{
+ .arc_name = "ip6-unicast",
+ .node_name = "ip6-flow-classify",
+ .runs_before = VNET_FEATURES ("ip6-inacl"),
+};
+
+VNET_FEATURE_INIT (ip6_inacl, static) =
+{
+ .arc_name = "ip6-unicast",
+ .node_name = "ip6-inacl",
+ .runs_before = VNET_FEATURES ("ip6-policer-classify"),
+};
+
+VNET_FEATURE_INIT (ip6_policer_classify, static) =
+{
+ .arc_name = "ip6-unicast",
+ .node_name = "ip6-policer-classify",
+ .runs_before = VNET_FEATURES ("ipsec-input-ip6"),
+};
+
+VNET_FEATURE_INIT (ip6_ipsec, static) =
+{
+ .arc_name = "ip6-unicast",
+ .node_name = "ipsec-input-ip6",
+ .runs_before = VNET_FEATURES ("l2tp-decap"),
+};
+
+VNET_FEATURE_INIT (ip6_l2tp, static) =
+{
+ .arc_name = "ip6-unicast",
+ .node_name = "l2tp-decap",
+ .runs_before = VNET_FEATURES ("vpath-input-ip6"),
+};
+
+VNET_FEATURE_INIT (ip6_vpath, static) =
+{
+ .arc_name = "ip6-unicast",
+ .node_name = "vpath-input-ip6",
+ .runs_before = VNET_FEATURES ("ip6-lookup"),
+};
+
+VNET_FEATURE_INIT (ip6_lookup, static) =
+{
+ .arc_name = "ip6-unicast",
+ .node_name = "ip6-lookup",
+ .runs_before = VNET_FEATURES ("ip6-drop"),
+};
+
+VNET_FEATURE_INIT (ip6_drop, static) =
+{
+ .arc_name = "ip6-unicast",
+ .node_name = "ip6-drop",
+ .runs_before = 0, /*last feature*/
+};
+
+/* Built-in ip6 multicast rx feature path definition (none now) */
+VNET_FEATURE_ARC_INIT (ip6_multicast, static) =
+{
+ .arc_name = "ip6-multicast",
+ .start_nodes = VNET_FEATURES ("ip6-input"),
+ .arc_index_ptr = &ip6_main.lookup_main.mcast_feature_arc_index,
+};
+
+VNET_FEATURE_INIT (ip6_vpath_mc, static) = {
+ .arc_name = "ip6-multicast",
+ .node_name = "vpath-input-ip6",
+ .runs_before = VNET_FEATURES ("ip6-lookup"),
+};
+
+VNET_FEATURE_INIT (ip6_mc_lookup, static) = {
+ .arc_name = "ip6-multicast",
+ .node_name = "ip6-lookup",
+ .runs_before = VNET_FEATURES ("ip6-drop"),
+};
+
+VNET_FEATURE_INIT (ip6_drop_mc, static) = {
+ .arc_name = "ip6-multicast",
+ .node_name = "ip6-drop",
+ .runs_before = 0, /* last feature */
+};
+
+/* Built-in ip4 tx feature path definition */
+VNET_FEATURE_ARC_INIT (ip6_output, static) =
+{
+ .arc_name = "ip6-output",
+ .start_nodes = VNET_FEATURES ("ip6-rewrite", "ip6-midchain"),
+ .arc_index_ptr = &ip6_main.lookup_main.output_feature_arc_index,
+};
+
+VNET_FEATURE_INIT (ip6_ipsec_output, static) = {
+ .arc_name = "ip6-output",
+ .node_name = "ipsec-output-ip6",
+ .runs_before = VNET_FEATURES ("interface-output"),
+};
+
+VNET_FEATURE_INIT (ip6_interface_output, static) = {
+ .arc_name = "ip6-output",
+ .node_name = "interface-output",
+ .runs_before = 0, /* not before any other features */
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+ip6_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
+{
+ vnet_feature_enable_disable ("ip6-unicast", "ip6-drop", sw_if_index,
+ is_add, 0, 0);
+
+ vnet_feature_enable_disable ("ip6-multicast", "ip6-drop", sw_if_index,
+ is_add, 0, 0);
+
+ vnet_feature_enable_disable ("ip6-output", "interface-output", sw_if_index,
+ is_add, 0, 0);
+
+ return /* no error */ 0;
+}
+
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip6_sw_interface_add_del);
+
+static uword
+ip6_lookup (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return ip6_lookup_inline (vm, node, frame);
+}
+
+static u8 *format_ip6_lookup_trace (u8 * s, va_list * args);
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_lookup_node) =
+{
+ .function = ip6_lookup,
+ .name = "ip6-lookup",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip6_lookup_trace,
+ .n_next_nodes = IP6_LOOKUP_N_NEXT,
+ .next_nodes = IP6_LOOKUP_NEXT_NODES,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_lookup_node, ip6_lookup);
+
+always_inline uword
+ip6_load_balance (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
+ u32 n_left_from, n_left_to_next, *from, *to_next;
+ ip_lookup_next_t next;
+ u32 cpu_index = os_get_cpu_number ();
+ ip6_main_t *im = &ip6_main;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip6_forward_next_trace (vm, node, frame, VLIB_TX);
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
+
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ ip_lookup_next_t next0, next1;
+ const load_balance_t *lb0, *lb1;
+ vlib_buffer_t *p0, *p1;
+ u32 pi0, lbi0, hc0, pi1, lbi1, hc1;
+ const ip6_header_t *ip0, *ip1;
+ const dpo_id_t *dpo0, *dpo1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, STORE);
+ vlib_prefetch_buffer_header (p3, STORE);
+
+ CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
+ CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
+ }
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+ lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+ lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
+
+ lb0 = load_balance_get (lbi0);
+ lb1 = load_balance_get (lbi1);
+
+ /*
+ * this node is for via FIBs we can re-use the hash value from the
+ * to node if present.
+ * We don't want to use the same hash value at each level in the recursion
+ * graph as that would lead to polarisation
+ */
+ hc0 = vnet_buffer (p0)->ip.flow_hash = 0;
+ hc1 = vnet_buffer (p1)->ip.flow_hash = 0;
+
+ if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
+ {
+ if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
+ {
+ hc0 = vnet_buffer (p0)->ip.flow_hash =
+ vnet_buffer (p0)->ip.flow_hash >> 1;
+ }
+ else
+ {
+ hc0 = vnet_buffer (p0)->ip.flow_hash =
+ ip6_compute_flow_hash (ip0, hc0);
+ }
+ }
+ if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
+ {
+ if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash))
+ {
+ hc1 = vnet_buffer (p1)->ip.flow_hash =
+ vnet_buffer (p1)->ip.flow_hash >> 1;
+ }
+ else
+ {
+ hc1 = vnet_buffer (p1)->ip.flow_hash =
+ ip6_compute_flow_hash (ip1, hc1);
+ }
+ }
+
+ dpo0 =
+ load_balance_get_bucket_i (lb0,
+ hc0 & (lb0->lb_n_buckets_minus_1));
+ dpo1 =
+ load_balance_get_bucket_i (lb1,
+ hc1 & (lb1->lb_n_buckets_minus_1));
+
+ next0 = dpo0->dpoi_next_node;
+ next1 = dpo1->dpoi_next_node;
+
+ /* Only process the HBH Option Header if explicitly configured to do so */
+ if (PREDICT_FALSE
+ (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+ {
+ next0 = (dpo_is_adj (dpo0) && im->hbh_enabled) ?
+ (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next0;
+ }
+ /* Only process the HBH Option Header if explicitly configured to do so */
+ if (PREDICT_FALSE
+ (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+ {
+ next1 = (dpo_is_adj (dpo1) && im->hbh_enabled) ?
+ (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next1;
+ }
+
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+ vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
+
+ vlib_increment_combined_counter
+ (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
+ vlib_increment_combined_counter
+ (cm, cpu_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next,
+ to_next, n_left_to_next,
+ pi0, pi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ ip_lookup_next_t next0;
+ const load_balance_t *lb0;
+ vlib_buffer_t *p0;
+ u32 pi0, lbi0, hc0;
+ const ip6_header_t *ip0;
+ const dpo_id_t *dpo0;
+
+ pi0 = from[0];
+ to_next[0] = pi0;
+ from += 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+ n_left_from -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+
+ ip0 = vlib_buffer_get_current (p0);
+ lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+
+ lb0 = load_balance_get (lbi0);
+
+ hc0 = vnet_buffer (p0)->ip.flow_hash = 0;
+ if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
+ {
+ if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
+ {
+ hc0 = vnet_buffer (p0)->ip.flow_hash =
+ vnet_buffer (p0)->ip.flow_hash >> 1;
+ }
+ else
+ {
+ hc0 = vnet_buffer (p0)->ip.flow_hash =
+ ip6_compute_flow_hash (ip0, hc0);
+ }
+ }
+ dpo0 =
+ load_balance_get_bucket_i (lb0,
+ hc0 & (lb0->lb_n_buckets_minus_1));
+
+ next0 = dpo0->dpoi_next_node;
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+ /* Only process the HBH Option Header if explicitly configured to do so */
+ if (PREDICT_FALSE
+ (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+ {
+ next0 = (dpo_is_adj (dpo0) && im->hbh_enabled) ?
+ (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next0;
+ }
+
+ vlib_increment_combined_counter
+ (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_load_balance_node) =
+{
+ .function = ip6_load_balance,
+ .name = "ip6-load-balance",
+ .vector_size = sizeof (u32),
+ .sibling_of = "ip6-lookup",
+ .format_trace = format_ip6_lookup_trace,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_load_balance_node, ip6_load_balance);
+
+typedef struct
+{
+ /* Adjacency taken. */
+ u32 adj_index;
+ u32 flow_hash;
+ u32 fib_index;
+
+ /* Packet data, possibly *after* rewrite. */
+ u8 packet_data[128 - 1 * sizeof (u32)];
+}
+ip6_forward_next_trace_t;
+
+static u8 *
+format_ip6_forward_next_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ip6_forward_next_trace_t *t = va_arg (*args, ip6_forward_next_trace_t *);
+ uword indent = format_get_indent (s);
+
+ s = format (s, "%U%U",
+ format_white_space, indent,
+ format_ip6_header, t->packet_data, sizeof (t->packet_data));
+ return s;
+}
+
+static u8 *
+format_ip6_lookup_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ip6_forward_next_trace_t *t = va_arg (*args, ip6_forward_next_trace_t *);
+ uword indent = format_get_indent (s);
+
+ s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
+ t->fib_index, t->adj_index, t->flow_hash);
+ s = format (s, "\n%U%U",
+ format_white_space, indent,
+ format_ip6_header, t->packet_data, sizeof (t->packet_data));
+ return s;
+}
+
+
+static u8 *
+format_ip6_rewrite_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ip6_forward_next_trace_t *t = va_arg (*args, ip6_forward_next_trace_t *);
+ vnet_main_t *vnm = vnet_get_main ();
+ uword indent = format_get_indent (s);
+
+ s = format (s, "tx_sw_if_index %d adj-idx %d : %U flow hash: 0x%08x",
+ t->fib_index, t->adj_index, format_ip_adjacency,
+ t->adj_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
+ s = format (s, "\n%U%U",
+ format_white_space, indent,
+ format_ip_adjacency_packet_data,
+ vnm, t->adj_index, t->packet_data, sizeof (t->packet_data));
+ return s;
+}
+
+/* Common trace function for all ip6-forward next nodes. */
+void
+ip6_forward_next_trace (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
+{
+ u32 *from, n_left;
+ ip6_main_t *im = &ip6_main;
+
+ n_left = frame->n_vectors;
+ from = vlib_frame_vector_args (frame);
+
+ while (n_left >= 4)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ ip6_forward_next_trace_t *t0, *t1;
+
+ /* Prefetch next iteration. */
+ vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
+ vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
+
+ bi0 = from[0];
+ bi1 = from[1];
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
+ t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
+ t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
+ t0->fib_index =
+ (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
+ (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
+ vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (b0)->sw_if_index[VLIB_RX]);
+
+ clib_memcpy (t0->packet_data,
+ vlib_buffer_get_current (b0),
+ sizeof (t0->packet_data));
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
+ t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
+ t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
+ t1->fib_index =
+ (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
+ (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
+ vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (b1)->sw_if_index[VLIB_RX]);
+
+ clib_memcpy (t1->packet_data,
+ vlib_buffer_get_current (b1),
+ sizeof (t1->packet_data));
+ }
+ from += 2;
+ n_left -= 2;
+ }
+
+ while (n_left >= 1)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ ip6_forward_next_trace_t *t0;
+
+ bi0 = from[0];
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
+ t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
+ t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
+ t0->fib_index =
+ (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
+ (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
+ vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (b0)->sw_if_index[VLIB_RX]);
+
+ clib_memcpy (t0->packet_data,
+ vlib_buffer_get_current (b0),
+ sizeof (t0->packet_data));
+ }
+ from += 1;
+ n_left -= 1;
+ }
+}
+
+static uword
+ip6_drop_or_punt (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame, ip6_error_t error_code)
+{
+ u32 *buffers = vlib_frame_vector_args (frame);
+ uword n_packets = frame->n_vectors;
+
+ vlib_error_drop_buffers (vm, node, buffers,
+ /* stride */ 1,
+ n_packets,
+ /* next */ 0,
+ ip6_input_node.index, error_code);
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip6_forward_next_trace (vm, node, frame, VLIB_TX);
+
+ return n_packets;
+}
+
+static uword
+ip6_drop (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return ip6_drop_or_punt (vm, node, frame, IP6_ERROR_ADJACENCY_DROP);
+}
+
+static uword
+ip6_punt (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return ip6_drop_or_punt (vm, node, frame, IP6_ERROR_ADJACENCY_PUNT);
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_drop_node, static) =
+{
+ .function = ip6_drop,
+ .name = "ip6-drop",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip6_forward_next_trace,
+ .n_next_nodes = 1,
+ .next_nodes =
+ {
+ [0] = "error-drop",},
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_drop_node, ip6_drop);
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_punt_node, static) =
+{
+ .function = ip6_punt,
+ .name = "ip6-punt",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip6_forward_next_trace,
+ .n_next_nodes = 1,
+ .next_nodes =
+ {
+ [0] = "error-punt",},
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_punt_node, ip6_punt);
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_multicast_node, static) =
+{
+ .function = ip6_drop,
+ .name = "ip6-multicast",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip6_forward_next_trace,
+ .n_next_nodes = 1,
+ .next_nodes =
+ {
+ [0] = "error-drop",
+ },
+};
+
+/* *INDENT-ON* */
+
+/* Compute TCP/UDP/ICMP6 checksum in software. */
+u16
+ip6_tcp_udp_icmp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
+ ip6_header_t * ip0, int *bogus_lengthp)
+{
+ ip_csum_t sum0;
+ u16 sum16, payload_length_host_byte_order;
+ u32 i, n_this_buffer, n_bytes_left;
+ u32 headers_size = sizeof (ip0[0]);
+ void *data_this_buffer;
+
+ ASSERT (bogus_lengthp);
+ *bogus_lengthp = 0;
+
+ /* Initialize checksum with ip header. */
+ sum0 = ip0->payload_length + clib_host_to_net_u16 (ip0->protocol);
+ payload_length_host_byte_order = clib_net_to_host_u16 (ip0->payload_length);
+ data_this_buffer = (void *) (ip0 + 1);
+
+ for (i = 0; i < ARRAY_LEN (ip0->src_address.as_uword); i++)
+ {
+ sum0 = ip_csum_with_carry (sum0,
+ clib_mem_unaligned (&ip0->
+ src_address.as_uword[i],
+ uword));
+ sum0 =
+ ip_csum_with_carry (sum0,
+ clib_mem_unaligned (&ip0->dst_address.as_uword[i],
+ uword));
+ }
+
+ /* some icmp packets may come with a "router alert" hop-by-hop extension header (e.g., mldv2 packets) */
+ if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+ {
+ u32 skip_bytes;
+ ip6_hop_by_hop_ext_t *ext_hdr =
+ (ip6_hop_by_hop_ext_t *) data_this_buffer;
+
+ /* validate really icmp6 next */
+ ASSERT (ext_hdr->next_hdr == IP_PROTOCOL_ICMP6);
+
+ skip_bytes = 8 * (1 + ext_hdr->n_data_u64s);
+ data_this_buffer = (void *) ((u8 *) data_this_buffer + skip_bytes);
+
+ payload_length_host_byte_order -= skip_bytes;
+ headers_size += skip_bytes;
+ }
+
+ n_bytes_left = n_this_buffer = payload_length_host_byte_order;
+ if (p0 && n_this_buffer + headers_size > p0->current_length)
+ n_this_buffer =
+ p0->current_length >
+ headers_size ? p0->current_length - headers_size : 0;
+ while (1)
+ {
+ sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
+ n_bytes_left -= n_this_buffer;
+ if (n_bytes_left == 0)
+ break;
+
+ if (!(p0->flags & VLIB_BUFFER_NEXT_PRESENT))
+ {
+ *bogus_lengthp = 1;
+ return 0xfefe;
+ }
+ p0 = vlib_get_buffer (vm, p0->next_buffer);
+ data_this_buffer = vlib_buffer_get_current (p0);
+ n_this_buffer = p0->current_length;
+ }
+
+ sum16 = ~ip_csum_fold (sum0);
+
+ return sum16;
+}
+
+u32
+ip6_tcp_udp_icmp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
+{
+ ip6_header_t *ip0 = vlib_buffer_get_current (p0);
+ udp_header_t *udp0;
+ u16 sum16;
+ int bogus_length;
+
+ /* some icmp packets may come with a "router alert" hop-by-hop extension header (e.g., mldv2 packets) */
+ ASSERT (ip0->protocol == IP_PROTOCOL_TCP
+ || ip0->protocol == IP_PROTOCOL_ICMP6
+ || ip0->protocol == IP_PROTOCOL_UDP
+ || ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS);
+
+ udp0 = (void *) (ip0 + 1);
+ if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
+ {
+ p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
+ | IP_BUFFER_L4_CHECKSUM_CORRECT);
+ return p0->flags;
+ }
+
+ sum16 = ip6_tcp_udp_icmp_compute_checksum (vm, p0, ip0, &bogus_length);
+
+ p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
+ | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
+
+ return p0->flags;
+}
+
+/* ip6_locate_header
+ *
+ * This function is to search for the header specified by the find_hdr number.
+ * 1. If the find_hdr < 0 then it finds and returns the protocol number and
+ * offset stored in *offset of the transport or ESP header in the chain if
+ * found.
+ * 2. If a header with find_hdr > 0 protocol number is found then the
+ * offset is stored in *offset and protocol number of the header is
+ * returned.
+ * 3. If find_hdr header is not found or packet is malformed or
+ * it is a non-first fragment -1 is returned.
+ */
+always_inline int
+ip6_locate_header (vlib_buffer_t * p0,
+ ip6_header_t * ip0, int find_hdr, u32 * offset)
+{
+ u8 next_proto = ip0->protocol;
+ u8 *next_header;
+ u8 done = 0;
+ u32 cur_offset;
+ u8 *temp_nxthdr = 0;
+ u32 exthdr_len = 0;
+
+ next_header = ip6_next_header (ip0);
+ cur_offset = sizeof (ip6_header_t);
+ while (1)
+ {
+ done = (next_proto == find_hdr);
+ if (PREDICT_FALSE
+ (next_header >=
+ (u8 *) vlib_buffer_get_current (p0) + p0->current_length))
+ {
+ //A malicious packet could set an extension header with a too big size
+ return (-1);
+ }
+ if (done)
+ break;
+ if ((!ip6_ext_hdr (next_proto)) || next_proto == IP_PROTOCOL_IP6_NONXT)
+ {
+ if (find_hdr < 0)
+ break;
+ return -1;
+ }
+ if (next_proto == IP_PROTOCOL_IPV6_FRAGMENTATION)
+ {
+ ip6_frag_hdr_t *frag_hdr = (ip6_frag_hdr_t *) next_header;
+ u16 frag_off = ip6_frag_hdr_offset (frag_hdr);
+ /* Non first fragment return -1 */
+ if (frag_off)
+ return (-1);
+ exthdr_len = sizeof (ip6_frag_hdr_t);
+ temp_nxthdr = next_header + exthdr_len;
+ }
+ else if (next_proto == IP_PROTOCOL_IPSEC_AH)
+ {
+ exthdr_len =
+ ip6_ext_authhdr_len (((ip6_ext_header_t *) next_header));
+ temp_nxthdr = next_header + exthdr_len;
+ }
+ else
+ {
+ exthdr_len =
+ ip6_ext_header_len (((ip6_ext_header_t *) next_header));
+ temp_nxthdr = next_header + exthdr_len;
+ }
+ next_proto = ((ip6_ext_header_t *) next_header)->next_hdr;
+ next_header = temp_nxthdr;
+ cur_offset += exthdr_len;
+ }
+
+ *offset = cur_offset;
+ return (next_proto);
+}
+
+static uword
+ip6_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ ip6_main_t *im = &ip6_main;
+ ip_lookup_main_t *lm = &im->lookup_main;
+ ip_local_next_t next_index;
+ u32 *from, *to_next, n_left_from, n_left_to_next;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip6_input_node.index);
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip6_forward_next_trace (vm, node, frame, VLIB_TX);
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ vlib_buffer_t *p0, *p1;
+ ip6_header_t *ip0, *ip1;
+ udp_header_t *udp0, *udp1;
+ u32 pi0, ip_len0, udp_len0, flags0, next0;
+ u32 pi1, ip_len1, udp_len1, flags1, next1;
+ i32 len_diff0, len_diff1;
+ u8 error0, type0, good_l4_checksum0;
+ u8 error1, type1, good_l4_checksum1;
+ u32 udp_offset0, udp_offset1;
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+
+ type0 = lm->builtin_protocol_by_ip_protocol[ip0->protocol];
+ type1 = lm->builtin_protocol_by_ip_protocol[ip1->protocol];
+
+ next0 = lm->local_next_by_ip_protocol[ip0->protocol];
+ next1 = lm->local_next_by_ip_protocol[ip1->protocol];
+
+ flags0 = p0->flags;
+ flags1 = p1->flags;
+
+ good_l4_checksum0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
+ good_l4_checksum1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
+ len_diff0 = 0;
+ len_diff1 = 0;
+
+ /* Skip HBH local processing */
+ if (PREDICT_FALSE
+ (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+ {
+ ip6_hop_by_hop_ext_t *ext_hdr =
+ (ip6_hop_by_hop_ext_t *) ip6_next_header (ip0);
+ next0 = lm->local_next_by_ip_protocol[ext_hdr->next_hdr];
+ type0 = lm->builtin_protocol_by_ip_protocol[ext_hdr->next_hdr];
+ }
+ if (PREDICT_FALSE
+ (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+ {
+ ip6_hop_by_hop_ext_t *ext_hdr =
+ (ip6_hop_by_hop_ext_t *) ip6_next_header (ip1);
+ next1 = lm->local_next_by_ip_protocol[ext_hdr->next_hdr];
+ type1 = lm->builtin_protocol_by_ip_protocol[ext_hdr->next_hdr];
+ }
+ if (PREDICT_TRUE (IP_PROTOCOL_UDP == ip6_locate_header (p0, ip0,
+ IP_PROTOCOL_UDP,
+ &udp_offset0)))
+ {
+ udp0 = (udp_header_t *) ((u8 *) ip0 + udp_offset0);
+ /* Don't verify UDP checksum for packets with explicit zero checksum. */
+ good_l4_checksum0 |= type0 == IP_BUILTIN_PROTOCOL_UDP
+ && udp0->checksum == 0;
+ /* Verify UDP length. */
+ ip_len0 = clib_net_to_host_u16 (ip0->payload_length);
+ udp_len0 = clib_net_to_host_u16 (udp0->length);
+ len_diff0 = ip_len0 - udp_len0;
+ }
+ if (PREDICT_TRUE (IP_PROTOCOL_UDP == ip6_locate_header (p1, ip1,
+ IP_PROTOCOL_UDP,
+ &udp_offset1)))
+ {
+ udp1 = (udp_header_t *) ((u8 *) ip1 + udp_offset1);
+ /* Don't verify UDP checksum for packets with explicit zero checksum. */
+ good_l4_checksum1 |= type1 == IP_BUILTIN_PROTOCOL_UDP
+ && udp1->checksum == 0;
+ /* Verify UDP length. */
+ ip_len1 = clib_net_to_host_u16 (ip1->payload_length);
+ udp_len1 = clib_net_to_host_u16 (udp1->length);
+ len_diff1 = ip_len1 - udp_len1;
+ }
+
+ good_l4_checksum0 |= type0 == IP_BUILTIN_PROTOCOL_UNKNOWN;
+ good_l4_checksum1 |= type1 == IP_BUILTIN_PROTOCOL_UNKNOWN;
+
+ len_diff0 = type0 == IP_BUILTIN_PROTOCOL_UDP ? len_diff0 : 0;
+ len_diff1 = type1 == IP_BUILTIN_PROTOCOL_UDP ? len_diff1 : 0;
+
+ if (PREDICT_FALSE (type0 != IP_BUILTIN_PROTOCOL_UNKNOWN
+ && !good_l4_checksum0
+ && !(flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED)))
+ {
+ flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, p0);
+ good_l4_checksum0 =
+ (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
+ }
+ if (PREDICT_FALSE (type1 != IP_BUILTIN_PROTOCOL_UNKNOWN
+ && !good_l4_checksum1
+ && !(flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED)))
+ {
+ flags1 = ip6_tcp_udp_icmp_validate_checksum (vm, p1);
+ good_l4_checksum1 =
+ (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
+ }
+
+ error0 = error1 = IP6_ERROR_UNKNOWN_PROTOCOL;
+
+ error0 = len_diff0 < 0 ? IP6_ERROR_UDP_LENGTH : error0;
+ error1 = len_diff1 < 0 ? IP6_ERROR_UDP_LENGTH : error1;
+
+ ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_UDP ==
+ IP6_ERROR_UDP_CHECKSUM);
+ ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_ICMP ==
+ IP6_ERROR_ICMP_CHECKSUM);
+ error0 =
+ (!good_l4_checksum0 ? IP6_ERROR_UDP_CHECKSUM + type0 : error0);
+ error1 =
+ (!good_l4_checksum1 ? IP6_ERROR_UDP_CHECKSUM + type1 : error1);
+
+ /* Drop packets from unroutable hosts. */
+ /* If this is a neighbor solicitation (ICMP), skip source RPF check */
+ if (error0 == IP6_ERROR_UNKNOWN_PROTOCOL &&
+ type0 != IP_BUILTIN_PROTOCOL_ICMP &&
+ !ip6_address_is_link_local_unicast (&ip0->src_address))
+ {
+ u32 src_adj_index0 = ip6_src_lookup_for_packet (im, p0, ip0);
+ error0 = (ADJ_INDEX_INVALID == src_adj_index0
+ ? IP6_ERROR_SRC_LOOKUP_MISS : error0);
+ }
+ if (error1 == IP6_ERROR_UNKNOWN_PROTOCOL &&
+ type1 != IP_BUILTIN_PROTOCOL_ICMP &&
+ !ip6_address_is_link_local_unicast (&ip1->src_address))
+ {
+ u32 src_adj_index1 = ip6_src_lookup_for_packet (im, p1, ip1);
+ error1 = (ADJ_INDEX_INVALID == src_adj_index1
+ ? IP6_ERROR_SRC_LOOKUP_MISS : error1);
+ }
+
+ next0 =
+ error0 != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
+ next1 =
+ error1 != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
+
+ p0->error = error_node->errors[error0];
+ p1->error = error_node->errors[error1];
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, pi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *p0;
+ ip6_header_t *ip0;
+ udp_header_t *udp0;
+ u32 pi0, ip_len0, udp_len0, flags0, next0;
+ i32 len_diff0;
+ u8 error0, type0, good_l4_checksum0;
+ u32 udp_offset0;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+
+ ip0 = vlib_buffer_get_current (p0);
+
+ type0 = lm->builtin_protocol_by_ip_protocol[ip0->protocol];
+ next0 = lm->local_next_by_ip_protocol[ip0->protocol];
+
+ flags0 = p0->flags;
+
+ good_l4_checksum0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
+ len_diff0 = 0;
+
+ /* Skip HBH local processing */
+ if (PREDICT_FALSE
+ (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+ {
+ ip6_hop_by_hop_ext_t *ext_hdr =
+ (ip6_hop_by_hop_ext_t *) ip6_next_header (ip0);
+ next0 = lm->local_next_by_ip_protocol[ext_hdr->next_hdr];
+ type0 = lm->builtin_protocol_by_ip_protocol[ext_hdr->next_hdr];
+ }
+ if (PREDICT_TRUE (IP_PROTOCOL_UDP == ip6_locate_header (p0, ip0,
+ IP_PROTOCOL_UDP,
+ &udp_offset0)))
+ {
+ udp0 = (udp_header_t *) ((u8 *) ip0 + udp_offset0);
+ /* Don't verify UDP checksum for packets with explicit zero checksum. */
+ good_l4_checksum0 |= type0 == IP_BUILTIN_PROTOCOL_UDP
+ && udp0->checksum == 0;
+ /* Verify UDP length. */
+ ip_len0 = clib_net_to_host_u16 (ip0->payload_length);
+ udp_len0 = clib_net_to_host_u16 (udp0->length);
+ len_diff0 = ip_len0 - udp_len0;
+ }
+
+ good_l4_checksum0 |= type0 == IP_BUILTIN_PROTOCOL_UNKNOWN;
+ len_diff0 = type0 == IP_BUILTIN_PROTOCOL_UDP ? len_diff0 : 0;
+
+ if (PREDICT_FALSE (type0 != IP_BUILTIN_PROTOCOL_UNKNOWN
+ && !good_l4_checksum0
+ && !(flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED)))
+ {
+ flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, p0);
+ good_l4_checksum0 =
+ (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
+ }
+
+ error0 = IP6_ERROR_UNKNOWN_PROTOCOL;
+
+ error0 = len_diff0 < 0 ? IP6_ERROR_UDP_LENGTH : error0;
+
+ ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_UDP ==
+ IP6_ERROR_UDP_CHECKSUM);
+ ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_ICMP ==
+ IP6_ERROR_ICMP_CHECKSUM);
+ error0 =
+ (!good_l4_checksum0 ? IP6_ERROR_UDP_CHECKSUM + type0 : error0);
+
+ /* If this is a neighbor solicitation (ICMP), skip source RPF check */
+ if (error0 == IP6_ERROR_UNKNOWN_PROTOCOL &&
+ type0 != IP_BUILTIN_PROTOCOL_ICMP &&
+ !ip6_address_is_link_local_unicast (&ip0->src_address))
+ {
+ u32 src_adj_index0 = ip6_src_lookup_for_packet (im, p0, ip0);
+ error0 = (ADJ_INDEX_INVALID == src_adj_index0
+ ? IP6_ERROR_SRC_LOOKUP_MISS : error0);
+ }
+
+ next0 =
+ error0 != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
+
+ p0->error = error_node->errors[error0];
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_local_node, static) =
+{
+ .function = ip6_local,
+ .name = "ip6-local",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip6_forward_next_trace,
+ .n_next_nodes = IP_LOCAL_N_NEXT,
+ .next_nodes =
+ {
+ [IP_LOCAL_NEXT_DROP] = "error-drop",
+ [IP_LOCAL_NEXT_PUNT] = "error-punt",
+ [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip6-udp-lookup",
+ [IP_LOCAL_NEXT_ICMP] = "ip6-icmp-input",
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_local_node, ip6_local);
+
+void
+ip6_register_protocol (u32 protocol, u32 node_index)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ ip6_main_t *im = &ip6_main;
+ ip_lookup_main_t *lm = &im->lookup_main;
+
+ ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
+ lm->local_next_by_ip_protocol[protocol] =
+ vlib_node_add_next (vm, ip6_local_node.index, node_index);
+}
+
+typedef enum
+{
+ IP6_DISCOVER_NEIGHBOR_NEXT_DROP,
+ IP6_DISCOVER_NEIGHBOR_NEXT_REPLY_TX,
+ IP6_DISCOVER_NEIGHBOR_N_NEXT,
+} ip6_discover_neighbor_next_t;
+
+typedef enum
+{
+ IP6_DISCOVER_NEIGHBOR_ERROR_DROP,
+ IP6_DISCOVER_NEIGHBOR_ERROR_REQUEST_SENT,
+ IP6_DISCOVER_NEIGHBOR_ERROR_NO_SOURCE_ADDRESS,
+} ip6_discover_neighbor_error_t;
+
+static uword
+ip6_discover_neighbor_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame, int is_glean)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip6_main_t *im = &ip6_main;
+ ip_lookup_main_t *lm = &im->lookup_main;
+ u32 *from, *to_next_drop;
+ uword n_left_from, n_left_to_next_drop;
+ static f64 time_last_seed_change = -1e100;
+ static u32 hash_seeds[3];
+ static uword hash_bitmap[256 / BITS (uword)];
+ f64 time_now;
+ int bogus_length;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip6_forward_next_trace (vm, node, frame, VLIB_TX);
+
+ time_now = vlib_time_now (vm);
+ if (time_now - time_last_seed_change > 1e-3)
+ {
+ uword i;
+ u32 *r = clib_random_buffer_get_data (&vm->random_buffer,
+ sizeof (hash_seeds));
+ for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
+ hash_seeds[i] = r[i];
+
+ /* Mark all hash keys as been not-seen before. */
+ for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
+ hash_bitmap[i] = 0;
+
+ time_last_seed_change = time_now;
+ }
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, IP6_DISCOVER_NEIGHBOR_NEXT_DROP,
+ to_next_drop, n_left_to_next_drop);
+
+ while (n_left_from > 0 && n_left_to_next_drop > 0)
+ {
+ vlib_buffer_t *p0;
+ ip6_header_t *ip0;
+ u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
+ uword bm0;
+ ip_adjacency_t *adj0;
+ vnet_hw_interface_t *hw_if0;
+ u32 next0;
+
+ pi0 = from[0];
+
+ p0 = vlib_get_buffer (vm, pi0);
+
+ adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+
+ ip0 = vlib_buffer_get_current (p0);
+
+ adj0 = ip_get_adjacency (lm, adj_index0);
+
+ if (!is_glean)
+ {
+ ip0->dst_address.as_u64[0] =
+ adj0->sub_type.nbr.next_hop.ip6.as_u64[0];
+ ip0->dst_address.as_u64[1] =
+ adj0->sub_type.nbr.next_hop.ip6.as_u64[1];
+ }
+
+ a0 = hash_seeds[0];
+ b0 = hash_seeds[1];
+ c0 = hash_seeds[2];
+
+ sw_if_index0 = adj0->rewrite_header.sw_if_index;
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
+
+ a0 ^= sw_if_index0;
+ b0 ^= ip0->dst_address.as_u32[0];
+ c0 ^= ip0->dst_address.as_u32[1];
+
+ hash_v3_mix32 (a0, b0, c0);
+
+ b0 ^= ip0->dst_address.as_u32[2];
+ c0 ^= ip0->dst_address.as_u32[3];
+
+ hash_v3_finalize32 (a0, b0, c0);
+
+ c0 &= BITS (hash_bitmap) - 1;
+ c0 = c0 / BITS (uword);
+ m0 = (uword) 1 << (c0 % BITS (uword));
+
+ bm0 = hash_bitmap[c0];
+ drop0 = (bm0 & m0) != 0;
+
+ /* Mark it as seen. */
+ hash_bitmap[c0] = bm0 | m0;
+
+ from += 1;
+ n_left_from -= 1;
+ to_next_drop[0] = pi0;
+ to_next_drop += 1;
+ n_left_to_next_drop -= 1;
+
+ hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
+
+ /* If the interface is link-down, drop the pkt */
+ if (!(hw_if0->flags & VNET_HW_INTERFACE_FLAG_LINK_UP))
+ drop0 = 1;
+
+ p0->error =
+ node->errors[drop0 ? IP6_DISCOVER_NEIGHBOR_ERROR_DROP
+ : IP6_DISCOVER_NEIGHBOR_ERROR_REQUEST_SENT];
+ if (drop0)
+ continue;
+
+ /*
+ * the adj has been updated to a rewrite but the node the DPO that got
+ * us here hasn't - yet. no big deal. we'll drop while we wait.
+ */
+ if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
+ continue;
+
+ {
+ u32 bi0 = 0;
+ icmp6_neighbor_solicitation_header_t *h0;
+ vlib_buffer_t *b0;
+
+ h0 = vlib_packet_template_get_packet
+ (vm, &im->discover_neighbor_packet_template, &bi0);
+
+ /*
+ * Build ethernet header.
+ * Choose source address based on destination lookup
+ * adjacency.
+ */
+ if (ip6_src_address_for_packet (lm,
+ sw_if_index0,
+ &h0->ip.src_address))
+ {
+ /* There is no address on the interface */
+ p0->error =
+ node->errors[IP6_DISCOVER_NEIGHBOR_ERROR_NO_SOURCE_ADDRESS];
+ vlib_buffer_free (vm, &bi0, 1);
+ continue;
+ }
+
+ /*
+ * Destination address is a solicited node multicast address.
+ * We need to fill in
+ * the low 24 bits with low 24 bits of target's address.
+ */
+ h0->ip.dst_address.as_u8[13] = ip0->dst_address.as_u8[13];
+ h0->ip.dst_address.as_u8[14] = ip0->dst_address.as_u8[14];
+ h0->ip.dst_address.as_u8[15] = ip0->dst_address.as_u8[15];
+
+ h0->neighbor.target_address = ip0->dst_address;
+
+ clib_memcpy (h0->link_layer_option.ethernet_address,
+ hw_if0->hw_address, vec_len (hw_if0->hw_address));
+
+ /* $$$$ appears we need this; why is the checksum non-zero? */
+ h0->neighbor.icmp.checksum = 0;
+ h0->neighbor.icmp.checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, 0, &h0->ip,
+ &bogus_length);
+
+ ASSERT (bogus_length == 0);
+
+ vlib_buffer_copy_trace_flag (vm, p0, bi0);
+ b0 = vlib_get_buffer (vm, bi0);
+ vnet_buffer (b0)->sw_if_index[VLIB_TX]
+ = vnet_buffer (p0)->sw_if_index[VLIB_TX];
+
+ /* Add rewrite/encap string. */
+ vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
+ vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
+
+ next0 = IP6_DISCOVER_NEIGHBOR_NEXT_REPLY_TX;
+
+ vlib_set_next_frame_buffer (vm, node, next0, bi0);
+ }
+ }
+
+ vlib_put_next_frame (vm, node, IP6_DISCOVER_NEIGHBOR_NEXT_DROP,
+ n_left_to_next_drop);
+ }
+
+ return frame->n_vectors;
+}
+
+static uword
+ip6_discover_neighbor (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return (ip6_discover_neighbor_inline (vm, node, frame, 0));
+}
+
+static uword
+ip6_glean (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return (ip6_discover_neighbor_inline (vm, node, frame, 1));
+}
+
+static char *ip6_discover_neighbor_error_strings[] = {
+ [IP6_DISCOVER_NEIGHBOR_ERROR_DROP] = "address overflow drops",
+ [IP6_DISCOVER_NEIGHBOR_ERROR_REQUEST_SENT] = "neighbor solicitations sent",
+ [IP6_DISCOVER_NEIGHBOR_ERROR_NO_SOURCE_ADDRESS]
+ = "no source address for ND solicitation",
+};
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_discover_neighbor_node) =
+{
+ .function = ip6_discover_neighbor,
+ .name = "ip6-discover-neighbor",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip6_forward_next_trace,
+ .n_errors = ARRAY_LEN (ip6_discover_neighbor_error_strings),
+ .error_strings = ip6_discover_neighbor_error_strings,
+ .n_next_nodes = IP6_DISCOVER_NEIGHBOR_N_NEXT,
+ .next_nodes =
+ {
+ [IP6_DISCOVER_NEIGHBOR_NEXT_DROP] = "error-drop",
+ [IP6_DISCOVER_NEIGHBOR_NEXT_REPLY_TX] = "interface-output",
+ },
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_glean_node) =
+{
+ .function = ip6_glean,
+ .name = "ip6-glean",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip6_forward_next_trace,
+ .n_errors = ARRAY_LEN (ip6_discover_neighbor_error_strings),
+ .error_strings = ip6_discover_neighbor_error_strings,
+ .n_next_nodes = IP6_DISCOVER_NEIGHBOR_N_NEXT,
+ .next_nodes =
+ {
+ [IP6_DISCOVER_NEIGHBOR_NEXT_DROP] = "error-drop",
+ [IP6_DISCOVER_NEIGHBOR_NEXT_REPLY_TX] = "interface-output",
+ },
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+ip6_probe_neighbor (vlib_main_t * vm, ip6_address_t * dst, u32 sw_if_index)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip6_main_t *im = &ip6_main;
+ icmp6_neighbor_solicitation_header_t *h;
+ ip6_address_t *src;
+ ip_interface_address_t *ia;
+ ip_adjacency_t *adj;
+ vnet_hw_interface_t *hi;
+ vnet_sw_interface_t *si;
+ vlib_buffer_t *b;
+ u32 bi = 0;
+ int bogus_length;
+
+ si = vnet_get_sw_interface (vnm, sw_if_index);
+
+ if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
+ {
+ return clib_error_return (0, "%U: interface %U down",
+ format_ip6_address, dst,
+ format_vnet_sw_if_index_name, vnm,
+ sw_if_index);
+ }
+
+ src =
+ ip6_interface_address_matching_destination (im, dst, sw_if_index, &ia);
+ if (!src)
+ {
+ vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
+ return clib_error_return
+ (0, "no matching interface address for destination %U (interface %U)",
+ format_ip6_address, dst,
+ format_vnet_sw_if_index_name, vnm, sw_if_index);
+ }
+
+ h =
+ vlib_packet_template_get_packet (vm,
+ &im->discover_neighbor_packet_template,
+ &bi);
+
+ hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
+
+ /* Destination address is a solicited node multicast address. We need to fill in
+ the low 24 bits with low 24 bits of target's address. */
+ h->ip.dst_address.as_u8[13] = dst->as_u8[13];
+ h->ip.dst_address.as_u8[14] = dst->as_u8[14];
+ h->ip.dst_address.as_u8[15] = dst->as_u8[15];
+
+ h->ip.src_address = src[0];
+ h->neighbor.target_address = dst[0];
+
+ clib_memcpy (h->link_layer_option.ethernet_address, hi->hw_address,
+ vec_len (hi->hw_address));
+
+ h->neighbor.icmp.checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, 0, &h->ip, &bogus_length);
+ ASSERT (bogus_length == 0);
+
+ b = vlib_get_buffer (vm, bi);
+ vnet_buffer (b)->sw_if_index[VLIB_RX] =
+ vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
+
+ /* Add encapsulation string for software interface (e.g. ethernet header). */
+ adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
+ vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
+ vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
+
+ {
+ vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
+ u32 *to_next = vlib_frame_vector_args (f);
+ to_next[0] = bi;
+ f->n_vectors = 1;
+ vlib_put_frame_to_node (vm, hi->output_node_index, f);
+ }
+
+ return /* no error */ 0;
+}
+
+typedef enum
+{
+ IP6_REWRITE_NEXT_DROP,
+ IP6_REWRITE_NEXT_ICMP_ERROR,
+} ip6_rewrite_next_t;
+
+always_inline uword
+ip6_rewrite_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame, int is_midchain)
+{
+ ip_lookup_main_t *lm = &ip6_main.lookup_main;
+ u32 *from = vlib_frame_vector_args (frame);
+ u32 n_left_from, n_left_to_next, *to_next, next_index;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip6_input_node.index);
+
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ u32 cpu_index = os_get_cpu_number ();
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ ip_adjacency_t *adj0, *adj1;
+ vlib_buffer_t *p0, *p1;
+ ip6_header_t *ip0, *ip1;
+ u32 pi0, rw_len0, next0, error0, adj_index0;
+ u32 pi1, rw_len1, next1, error1, adj_index1;
+ u32 tx_sw_if_index0, tx_sw_if_index1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->pre_data, 32, STORE);
+ CLIB_PREFETCH (p3->pre_data, 32, STORE);
+
+ CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
+ CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
+ }
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+
+ adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+ adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
+
+ /* We should never rewrite a pkt using the MISS adjacency */
+ ASSERT (adj_index0 && adj_index1);
+
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+
+ error0 = error1 = IP6_ERROR_NONE;
+ next0 = next1 = IP6_REWRITE_NEXT_DROP;
+
+ if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
+ {
+ i32 hop_limit0 = ip0->hop_limit;
+
+ /* Input node should have reject packets with hop limit 0. */
+ ASSERT (ip0->hop_limit > 0);
+
+ hop_limit0 -= 1;
+
+ ip0->hop_limit = hop_limit0;
+
+ /*
+ * If the hop count drops below 1 when forwarding, generate
+ * an ICMP response.
+ */
+ if (PREDICT_FALSE (hop_limit0 <= 0))
+ {
+ error0 = IP6_ERROR_TIME_EXPIRED;
+ next0 = IP6_REWRITE_NEXT_ICMP_ERROR;
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ icmp6_error_set_vnet_buffer (p0, ICMP6_time_exceeded,
+ ICMP6_time_exceeded_ttl_exceeded_in_transit,
+ 0);
+ }
+ }
+ else
+ {
+ p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
+ }
+ if (PREDICT_TRUE (!(p1->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
+ {
+ i32 hop_limit1 = ip1->hop_limit;
+
+ /* Input node should have reject packets with hop limit 0. */
+ ASSERT (ip1->hop_limit > 0);
+
+ hop_limit1 -= 1;
+
+ ip1->hop_limit = hop_limit1;
+
+ /*
+ * If the hop count drops below 1 when forwarding, generate
+ * an ICMP response.
+ */
+ if (PREDICT_FALSE (hop_limit1 <= 0))
+ {
+ error1 = IP6_ERROR_TIME_EXPIRED;
+ next1 = IP6_REWRITE_NEXT_ICMP_ERROR;
+ vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ icmp6_error_set_vnet_buffer (p1, ICMP6_time_exceeded,
+ ICMP6_time_exceeded_ttl_exceeded_in_transit,
+ 0);
+ }
+ }
+ else
+ {
+ p1->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
+ }
+ adj0 = ip_get_adjacency (lm, adj_index0);
+ adj1 = ip_get_adjacency (lm, adj_index1);
+
+ rw_len0 = adj0[0].rewrite_header.data_bytes;
+ rw_len1 = adj1[0].rewrite_header.data_bytes;
+ vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
+ vnet_buffer (p1)->ip.save_rewrite_length = rw_len1;
+
+ vlib_increment_combined_counter (&adjacency_counters,
+ cpu_index, adj_index0,
+ /* packet increment */ 0,
+ /* byte increment */ rw_len0);
+ vlib_increment_combined_counter (&adjacency_counters,
+ cpu_index, adj_index1,
+ /* packet increment */ 0,
+ /* byte increment */ rw_len1);
+
+ /* Check MTU of outgoing interface. */
+ error0 =
+ (vlib_buffer_length_in_chain (vm, p0) >
+ adj0[0].
+ rewrite_header.max_l3_packet_bytes ? IP6_ERROR_MTU_EXCEEDED :
+ error0);
+ error1 =
+ (vlib_buffer_length_in_chain (vm, p1) >
+ adj1[0].
+ rewrite_header.max_l3_packet_bytes ? IP6_ERROR_MTU_EXCEEDED :
+ error1);
+
+ /* Don't adjust the buffer for hop count issue; icmp-error node
+ * wants to see the IP headerr */
+ if (PREDICT_TRUE (error0 == IP6_ERROR_NONE))
+ {
+ p0->current_data -= rw_len0;
+ p0->current_length += rw_len0;
+
+ tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
+ next0 = adj0[0].rewrite_header.next_index;
+
+ vnet_feature_arc_start (lm->output_feature_arc_index,
+ tx_sw_if_index0, &next0, p0);
+ }
+ if (PREDICT_TRUE (error1 == IP6_ERROR_NONE))
+ {
+ p1->current_data -= rw_len1;
+ p1->current_length += rw_len1;
+
+ tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
+ vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1;
+ next1 = adj1[0].rewrite_header.next_index;
+
+ vnet_feature_arc_start (lm->output_feature_arc_index,
+ tx_sw_if_index1, &next1, p1);
+ }
+
+ /* Guess we are only writing on simple Ethernet header. */
+ vnet_rewrite_two_headers (adj0[0], adj1[0],
+ ip0, ip1, sizeof (ethernet_header_t));
+
+ if (is_midchain)
+ {
+ adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
+ adj1->sub_type.midchain.fixup_func (vm, adj1, p1);
+ }
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, pi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ ip_adjacency_t *adj0;
+ vlib_buffer_t *p0;
+ ip6_header_t *ip0;
+ u32 pi0, rw_len0;
+ u32 adj_index0, next0, error0;
+ u32 tx_sw_if_index0;
+
+ pi0 = to_next[0] = from[0];
+
+ p0 = vlib_get_buffer (vm, pi0);
+
+ adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+
+ /* We should never rewrite a pkt using the MISS adjacency */
+ ASSERT (adj_index0);
+
+ adj0 = ip_get_adjacency (lm, adj_index0);
+
+ ip0 = vlib_buffer_get_current (p0);
+
+ error0 = IP6_ERROR_NONE;
+ next0 = IP6_REWRITE_NEXT_DROP;
+
+ /* Check hop limit */
+ if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
+ {
+ i32 hop_limit0 = ip0->hop_limit;
+
+ ASSERT (ip0->hop_limit > 0);
+
+ hop_limit0 -= 1;
+
+ ip0->hop_limit = hop_limit0;
+
+ if (PREDICT_FALSE (hop_limit0 <= 0))
+ {
+ /*
+ * If the hop count drops below 1 when forwarding, generate
+ * an ICMP response.
+ */
+ error0 = IP6_ERROR_TIME_EXPIRED;
+ next0 = IP6_REWRITE_NEXT_ICMP_ERROR;
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ icmp6_error_set_vnet_buffer (p0, ICMP6_time_exceeded,
+ ICMP6_time_exceeded_ttl_exceeded_in_transit,
+ 0);
+ }
+ }
+ else
+ {
+ p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
+ }
+
+ /* Guess we are only writing on simple Ethernet header. */
+ vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
+
+ /* Update packet buffer attributes/set output interface. */
+ rw_len0 = adj0[0].rewrite_header.data_bytes;
+ vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
+
+ vlib_increment_combined_counter (&adjacency_counters,
+ cpu_index, adj_index0,
+ /* packet increment */ 0,
+ /* byte increment */ rw_len0);
+
+ /* Check MTU of outgoing interface. */
+ error0 =
+ (vlib_buffer_length_in_chain (vm, p0) >
+ adj0[0].
+ rewrite_header.max_l3_packet_bytes ? IP6_ERROR_MTU_EXCEEDED :
+ error0);
+
+ /* Don't adjust the buffer for hop count issue; icmp-error node
+ * wants to see the IP headerr */
+ if (PREDICT_TRUE (error0 == IP6_ERROR_NONE))
+ {
+ p0->current_data -= rw_len0;
+ p0->current_length += rw_len0;
+
+ tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
+
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
+ next0 = adj0[0].rewrite_header.next_index;
+
+ vnet_feature_arc_start (lm->output_feature_arc_index,
+ tx_sw_if_index0, &next0, p0);
+ }
+
+ if (is_midchain)
+ {
+ adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
+ }
+
+ p0->error = error_node->errors[error0];
+
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ /* Need to do trace after rewrites to pick up new packet data. */
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip6_forward_next_trace (vm, node, frame, VLIB_TX);
+
+ return frame->n_vectors;
+}
+
+static uword
+ip6_rewrite (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return ip6_rewrite_inline (vm, node, frame,
+ /* midchain */ 0);
+}
+
+static uword
+ip6_midchain (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return ip6_rewrite_inline (vm, node, frame,
+ /* midchain */ 1);
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_midchain_node) =
+{
+ .function = ip6_midchain,
+ .name = "ip6-midchain",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip6_forward_next_trace,
+ .sibling_of = "ip6-rewrite",
+ };
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_midchain_node, ip6_midchain);
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_rewrite_node) =
+{
+ .function = ip6_rewrite,
+ .name = "ip6-rewrite",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip6_rewrite_trace,
+ .n_next_nodes = 2,
+ .next_nodes =
+ {
+ [IP6_REWRITE_NEXT_DROP] = "error-drop",
+ [IP6_REWRITE_NEXT_ICMP_ERROR] = "ip6-icmp-error",
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_rewrite_node, ip6_rewrite);
+
+/*
+ * Hop-by-Hop handling
+ */
+
+ip6_hop_by_hop_main_t ip6_hop_by_hop_main;
+
+#define foreach_ip6_hop_by_hop_error \
+_(PROCESSED, "pkts with ip6 hop-by-hop options") \
+_(FORMAT, "incorrectly formatted hop-by-hop options") \
+_(UNKNOWN_OPTION, "unknown ip6 hop-by-hop options")
+
+typedef enum
+{
+#define _(sym,str) IP6_HOP_BY_HOP_ERROR_##sym,
+ foreach_ip6_hop_by_hop_error
+#undef _
+ IP6_HOP_BY_HOP_N_ERROR,
+} ip6_hop_by_hop_error_t;
+
+/*
+ * Primary h-b-h handler trace support
+ * We work pretty hard on the problem for obvious reasons
+ */
+typedef struct
+{
+ u32 next_index;
+ u32 trace_len;
+ u8 option_data[256];
+} ip6_hop_by_hop_trace_t;
+
+vlib_node_registration_t ip6_hop_by_hop_node;
+
+static char *ip6_hop_by_hop_error_strings[] = {
+#define _(sym,string) string,
+ foreach_ip6_hop_by_hop_error
+#undef _
+};
+
+static u8 *
+format_ip6_hop_by_hop_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ip6_hop_by_hop_trace_t *t = va_arg (*args, ip6_hop_by_hop_trace_t *);
+ ip6_hop_by_hop_header_t *hbh0;
+ ip6_hop_by_hop_option_t *opt0, *limit0;
+ ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
+
+ u8 type0;
+
+ hbh0 = (ip6_hop_by_hop_header_t *) t->option_data;
+
+ s = format (s, "IP6_HOP_BY_HOP: next index %d len %d traced %d",
+ t->next_index, (hbh0->length + 1) << 3, t->trace_len);
+
+ opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
+ limit0 = (ip6_hop_by_hop_option_t *) ((u8 *) hbh0) + t->trace_len;
+
+ while (opt0 < limit0)
+ {
+ type0 = opt0->type;
+ switch (type0)
+ {
+ case 0: /* Pad, just stop */
+ opt0 = (ip6_hop_by_hop_option_t *) ((u8 *) opt0) + 1;
+ break;
+
+ default:
+ if (hm->trace[type0])
+ {
+ s = (*hm->trace[type0]) (s, opt0);
+ }
+ else
+ {
+ s =
+ format (s, "\n unrecognized option %d length %d", type0,
+ opt0->length);
+ }
+ opt0 =
+ (ip6_hop_by_hop_option_t *) (((u8 *) opt0) + opt0->length +
+ sizeof (ip6_hop_by_hop_option_t));
+ break;
+ }
+ }
+ return s;
+}
+
+always_inline u8
+ip6_scan_hbh_options (vlib_buffer_t * b0,
+ ip6_header_t * ip0,
+ ip6_hop_by_hop_header_t * hbh0,
+ ip6_hop_by_hop_option_t * opt0,
+ ip6_hop_by_hop_option_t * limit0, u32 * next0)
+{
+ ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
+ u8 type0;
+ u8 error0 = 0;
+
+ while (opt0 < limit0)
+ {
+ type0 = opt0->type;
+ switch (type0)
+ {
+ case 0: /* Pad1 */
+ opt0 = (ip6_hop_by_hop_option_t *) ((u8 *) opt0) + 1;
+ continue;
+ case 1: /* PadN */
+ break;
+ default:
+ if (hm->options[type0])
+ {
+ if ((*hm->options[type0]) (b0, ip0, opt0) < 0)
+ {
+ error0 = IP6_HOP_BY_HOP_ERROR_FORMAT;
+ return (error0);
+ }
+ }
+ else
+ {
+ /* Unrecognized mandatory option, check the two high order bits */
+ switch (opt0->type & HBH_OPTION_TYPE_HIGH_ORDER_BITS)
+ {
+ case HBH_OPTION_TYPE_SKIP_UNKNOWN:
+ break;
+ case HBH_OPTION_TYPE_DISCARD_UNKNOWN:
+ error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION;
+ *next0 = IP_LOOKUP_NEXT_DROP;
+ break;
+ case HBH_OPTION_TYPE_DISCARD_UNKNOWN_ICMP:
+ error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION;
+ *next0 = IP_LOOKUP_NEXT_ICMP_ERROR;
+ icmp6_error_set_vnet_buffer (b0, ICMP6_parameter_problem,
+ ICMP6_parameter_problem_unrecognized_option,
+ (u8 *) opt0 - (u8 *) ip0);
+ break;
+ case HBH_OPTION_TYPE_DISCARD_UNKNOWN_ICMP_NOT_MCAST:
+ error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION;
+ if (!ip6_address_is_multicast (&ip0->dst_address))
+ {
+ *next0 = IP_LOOKUP_NEXT_ICMP_ERROR;
+ icmp6_error_set_vnet_buffer (b0,
+ ICMP6_parameter_problem,
+ ICMP6_parameter_problem_unrecognized_option,
+ (u8 *) opt0 - (u8 *) ip0);
+ }
+ else
+ {
+ *next0 = IP_LOOKUP_NEXT_DROP;
+ }
+ break;
+ }
+ return (error0);
+ }
+ }
+ opt0 =
+ (ip6_hop_by_hop_option_t *) (((u8 *) opt0) + opt0->length +
+ sizeof (ip6_hop_by_hop_option_t));
+ }
+ return (error0);
+}
+
+/*
+ * Process the Hop-by-Hop Options header
+ */
+static uword
+ip6_hop_by_hop (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip6_hop_by_hop_node.index);
+ ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
+ u32 n_left_from, *from, *to_next;
+ ip_lookup_next_t next_index;
+ ip6_main_t *im = &ip6_main;
+ ip_lookup_main_t *lm = &im->lookup_main;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ u32 next0, next1;
+ ip6_header_t *ip0, *ip1;
+ ip6_hop_by_hop_header_t *hbh0, *hbh1;
+ ip6_hop_by_hop_option_t *opt0, *limit0, *opt1, *limit1;
+ u8 error0 = 0, error1 = 0;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (p3->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+
+ /* Speculatively enqueue b0, b1 to the current next frame */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ /* Default use the next_index from the adjacency. A HBH option rarely redirects to a different node */
+ u32 adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
+ ip_adjacency_t *adj0 = ip_get_adjacency (lm, adj_index0);
+ u32 adj_index1 = vnet_buffer (b1)->ip.adj_index[VLIB_TX];
+ ip_adjacency_t *adj1 = ip_get_adjacency (lm, adj_index1);
+
+ /* Default use the next_index from the adjacency. A HBH option rarely redirects to a different node */
+ next0 = adj0->lookup_next_index;
+ next1 = adj1->lookup_next_index;
+
+ ip0 = vlib_buffer_get_current (b0);
+ ip1 = vlib_buffer_get_current (b1);
+ hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
+ hbh1 = (ip6_hop_by_hop_header_t *) (ip1 + 1);
+ opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
+ opt1 = (ip6_hop_by_hop_option_t *) (hbh1 + 1);
+ limit0 =
+ (ip6_hop_by_hop_option_t *) ((u8 *) hbh0 +
+ ((hbh0->length + 1) << 3));
+ limit1 =
+ (ip6_hop_by_hop_option_t *) ((u8 *) hbh1 +
+ ((hbh1->length + 1) << 3));
+
+ /*
+ * Basic validity checks
+ */
+ if ((hbh0->length + 1) << 3 >
+ clib_net_to_host_u16 (ip0->payload_length))
+ {
+ error0 = IP6_HOP_BY_HOP_ERROR_FORMAT;
+ next0 = IP_LOOKUP_NEXT_DROP;
+ goto outdual;
+ }
+ /* Scan the set of h-b-h options, process ones that we understand */
+ error0 = ip6_scan_hbh_options (b0, ip0, hbh0, opt0, limit0, &next0);
+
+ if ((hbh1->length + 1) << 3 >
+ clib_net_to_host_u16 (ip1->payload_length))
+ {
+ error1 = IP6_HOP_BY_HOP_ERROR_FORMAT;
+ next1 = IP_LOOKUP_NEXT_DROP;
+ goto outdual;
+ }
+ /* Scan the set of h-b-h options, process ones that we understand */
+ error1 = ip6_scan_hbh_options (b1, ip1, hbh1, opt1, limit1, &next1);
+
+ outdual:
+ /* Has the classifier flagged this buffer for special treatment? */
+ if (PREDICT_FALSE
+ ((error0 == 0)
+ && (vnet_buffer (b0)->l2_classify.opaque_index & OI_DECAP)))
+ next0 = hm->next_override;
+
+ /* Has the classifier flagged this buffer for special treatment? */
+ if (PREDICT_FALSE
+ ((error1 == 0)
+ && (vnet_buffer (b1)->l2_classify.opaque_index & OI_DECAP)))
+ next1 = hm->next_override;
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ ip6_hop_by_hop_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ u32 trace_len = (hbh0->length + 1) << 3;
+ t->next_index = next0;
+ /* Capture the h-b-h option verbatim */
+ trace_len =
+ trace_len <
+ ARRAY_LEN (t->option_data) ? trace_len :
+ ARRAY_LEN (t->option_data);
+ t->trace_len = trace_len;
+ clib_memcpy (t->option_data, hbh0, trace_len);
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ ip6_hop_by_hop_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ u32 trace_len = (hbh1->length + 1) << 3;
+ t->next_index = next1;
+ /* Capture the h-b-h option verbatim */
+ trace_len =
+ trace_len <
+ ARRAY_LEN (t->option_data) ? trace_len :
+ ARRAY_LEN (t->option_data);
+ t->trace_len = trace_len;
+ clib_memcpy (t->option_data, hbh1, trace_len);
+ }
+
+ }
+
+ b0->error = error_node->errors[error0];
+ b1->error = error_node->errors[error1];
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, bi1, next0,
+ next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+ ip6_header_t *ip0;
+ ip6_hop_by_hop_header_t *hbh0;
+ ip6_hop_by_hop_option_t *opt0, *limit0;
+ u8 error0 = 0;
+
+ /* Speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ /*
+ * Default use the next_index from the adjacency.
+ * A HBH option rarely redirects to a different node
+ */
+ u32 adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
+ ip_adjacency_t *adj0 = ip_get_adjacency (lm, adj_index0);
+ next0 = adj0->lookup_next_index;
+
+ ip0 = vlib_buffer_get_current (b0);
+ hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
+ opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
+ limit0 =
+ (ip6_hop_by_hop_option_t *) ((u8 *) hbh0 +
+ ((hbh0->length + 1) << 3));
+
+ /*
+ * Basic validity checks
+ */
+ if ((hbh0->length + 1) << 3 >
+ clib_net_to_host_u16 (ip0->payload_length))
+ {
+ error0 = IP6_HOP_BY_HOP_ERROR_FORMAT;
+ next0 = IP_LOOKUP_NEXT_DROP;
+ goto out0;
+ }
+
+ /* Scan the set of h-b-h options, process ones that we understand */
+ error0 = ip6_scan_hbh_options (b0, ip0, hbh0, opt0, limit0, &next0);
+
+ out0:
+ /* Has the classifier flagged this buffer for special treatment? */
+ if (PREDICT_FALSE
+ ((error0 == 0)
+ && (vnet_buffer (b0)->l2_classify.opaque_index & OI_DECAP)))
+ next0 = hm->next_override;
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ ip6_hop_by_hop_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ u32 trace_len = (hbh0->length + 1) << 3;
+ t->next_index = next0;
+ /* Capture the h-b-h option verbatim */
+ trace_len =
+ trace_len <
+ ARRAY_LEN (t->option_data) ? trace_len :
+ ARRAY_LEN (t->option_data);
+ t->trace_len = trace_len;
+ clib_memcpy (t->option_data, hbh0, trace_len);
+ }
+
+ b0->error = error_node->errors[error0];
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_hop_by_hop_node) =
+{
+ .function = ip6_hop_by_hop,
+ .name = "ip6-hop-by-hop",
+ .sibling_of = "ip6-lookup",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip6_hop_by_hop_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN (ip6_hop_by_hop_error_strings),
+ .error_strings = ip6_hop_by_hop_error_strings,
+ .n_next_nodes = 0,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_hop_by_hop_node, ip6_hop_by_hop);
+
+static clib_error_t *
+ip6_hop_by_hop_init (vlib_main_t * vm)
+{
+ ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
+ memset (hm->options, 0, sizeof (hm->options));
+ memset (hm->trace, 0, sizeof (hm->trace));
+ hm->next_override = IP6_LOOKUP_NEXT_POP_HOP_BY_HOP;
+ return (0);
+}
+
+VLIB_INIT_FUNCTION (ip6_hop_by_hop_init);
+
+void
+ip6_hbh_set_next_override (uword next)
+{
+ ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
+
+ hm->next_override = next;
+}
+
+int
+ip6_hbh_register_option (u8 option,
+ int options (vlib_buffer_t * b, ip6_header_t * ip,
+ ip6_hop_by_hop_option_t * opt),
+ u8 * trace (u8 * s, ip6_hop_by_hop_option_t * opt))
+{
+ ip6_main_t *im = &ip6_main;
+ ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
+
+ ASSERT (option < ARRAY_LEN (hm->options));
+
+ /* Already registered */
+ if (hm->options[option])
+ return (-1);
+
+ hm->options[option] = options;
+ hm->trace[option] = trace;
+
+ /* Set global variable */
+ im->hbh_enabled = 1;
+
+ return (0);
+}
+
+int
+ip6_hbh_unregister_option (u8 option)
+{
+ ip6_main_t *im = &ip6_main;
+ ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
+
+ ASSERT (option < ARRAY_LEN (hm->options));
+
+ /* Not registered */
+ if (!hm->options[option])
+ return (-1);
+
+ hm->options[option] = NULL;
+ hm->trace[option] = NULL;
+
+ /* Disable global knob if this was the last option configured */
+ int i;
+ bool found = false;
+ for (i = 0; i < 256; i++)
+ {
+ if (hm->options[option])
+ {
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ im->hbh_enabled = 0;
+
+ return (0);
+}
+
+/* Global IP6 main. */
+ip6_main_t ip6_main;
+
+static clib_error_t *
+ip6_lookup_init (vlib_main_t * vm)
+{
+ ip6_main_t *im = &ip6_main;
+ clib_error_t *error;
+ uword i;
+
+ if ((error = vlib_call_init_function (vm, vnet_feature_init)))
+ return error;
+
+ for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
+ {
+ u32 j, i0, i1;
+
+ i0 = i / 32;
+ i1 = i % 32;
+
+ for (j = 0; j < i0; j++)
+ im->fib_masks[i].as_u32[j] = ~0;
+
+ if (i1)
+ im->fib_masks[i].as_u32[i0] =
+ clib_host_to_net_u32 (pow2_mask (i1) << (32 - i1));
+ }
+
+ ip_lookup_init (&im->lookup_main, /* is_ip6 */ 1);
+
+ if (im->lookup_table_nbuckets == 0)
+ im->lookup_table_nbuckets = IP6_FIB_DEFAULT_HASH_NUM_BUCKETS;
+
+ im->lookup_table_nbuckets = 1 << max_log2 (im->lookup_table_nbuckets);
+
+ if (im->lookup_table_size == 0)
+ im->lookup_table_size = IP6_FIB_DEFAULT_HASH_MEMORY_SIZE;
+
+ BV (clib_bihash_init) (&(im->ip6_table[IP6_FIB_TABLE_FWDING].ip6_hash),
+ "ip6 FIB fwding table",
+ im->lookup_table_nbuckets, im->lookup_table_size);
+ BV (clib_bihash_init) (&im->ip6_table[IP6_FIB_TABLE_NON_FWDING].ip6_hash,
+ "ip6 FIB non-fwding table",
+ im->lookup_table_nbuckets, im->lookup_table_size);
+
+ /* Create FIB with index 0 and table id of 0. */
+ fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, 0);
+
+ {
+ pg_node_t *pn;
+ pn = pg_get_node (ip6_lookup_node.index);
+ pn->unformat_edit = unformat_pg_ip6_header;
+ }
+
+ /* Unless explicitly configured, don't process HBH options */
+ im->hbh_enabled = 0;
+
+ {
+ icmp6_neighbor_solicitation_header_t p;
+
+ memset (&p, 0, sizeof (p));
+
+ p.ip.ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 (0x6 << 28);
+ p.ip.payload_length =
+ clib_host_to_net_u16 (sizeof (p) -
+ STRUCT_OFFSET_OF
+ (icmp6_neighbor_solicitation_header_t, neighbor));
+ p.ip.protocol = IP_PROTOCOL_ICMP6;
+ p.ip.hop_limit = 255;
+ ip6_set_solicited_node_multicast_address (&p.ip.dst_address, 0);
+
+ p.neighbor.icmp.type = ICMP6_neighbor_solicitation;
+
+ p.link_layer_option.header.type =
+ ICMP6_NEIGHBOR_DISCOVERY_OPTION_source_link_layer_address;
+ p.link_layer_option.header.n_data_u64s =
+ sizeof (p.link_layer_option) / sizeof (u64);
+
+ vlib_packet_template_init (vm,
+ &im->discover_neighbor_packet_template,
+ &p, sizeof (p),
+ /* alloc chunk size */ 8,
+ "ip6 neighbor discovery");
+ }
+
+ return error;
+}
+
+VLIB_INIT_FUNCTION (ip6_lookup_init);
+
+static clib_error_t *
+add_del_ip6_interface_table (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error = 0;
+ u32 sw_if_index, table_id;
+
+ sw_if_index = ~0;
+
+ if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ if (unformat (input, "%d", &table_id))
+ ;
+ else
+ {
+ error = clib_error_return (0, "expected table id `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ {
+ u32 fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6,
+ table_id);
+
+ vec_validate (ip6_main.fib_index_by_sw_if_index, sw_if_index);
+ ip6_main.fib_index_by_sw_if_index[sw_if_index] = fib_index;
+ }
+
+
+done:
+ return error;
+}
+
+/*?
+ * Place the indicated interface into the supplied IPv6 FIB table (also known
+ * as a VRF). If the FIB table does not exist, this command creates it. To
+ * display the current IPv6 FIB table, use the command '<em>show ip6 fib</em>'.
+ * FIB table will only be displayed if a route has been added to the table, or
+ * an IP Address is assigned to an interface in the table (which adds a route
+ * automatically).
+ *
+ * @note IP addresses added after setting the interface IP table end up in
+ * the indicated FIB table. If the IP address is added prior to adding the
+ * interface to the FIB table, it will NOT be part of the FIB table. Predictable
+ * but potentially counter-intuitive results occur if you provision interface
+ * addresses in multiple FIBs. Upon RX, packets will be processed in the last
+ * IP table ID provisioned. It might be marginally useful to evade source RPF
+ * drops to put an interface address into multiple FIBs.
+ *
+ * @cliexpar
+ * Example of how to add an interface to an IPv6 FIB table (where 2 is the table-id):
+ * @cliexcmd{set interface ip6 table GigabitEthernet2/0/0 2}
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_interface_ip6_table_command, static) =
+{
+ .path = "set interface ip6 table",
+ .function = add_del_ip6_interface_table,
+ .short_help = "set interface ip6 table <interface> <table-id>"
+};
+/* *INDENT-ON* */
+
+void
+ip6_link_local_address_from_ethernet_mac_address (ip6_address_t * ip,
+ u8 * mac)
+{
+ ip->as_u64[0] = clib_host_to_net_u64 (0xFE80000000000000ULL);
+ /* Invert the "u" bit */
+ ip->as_u8[8] = mac[0] ^ (1 << 1);
+ ip->as_u8[9] = mac[1];
+ ip->as_u8[10] = mac[2];
+ ip->as_u8[11] = 0xFF;
+ ip->as_u8[12] = 0xFE;
+ ip->as_u8[13] = mac[3];
+ ip->as_u8[14] = mac[4];
+ ip->as_u8[15] = mac[5];
+}
+
+void
+ip6_ethernet_mac_address_from_link_local_address (u8 * mac,
+ ip6_address_t * ip)
+{
+ /* Invert the previously inverted "u" bit */
+ mac[0] = ip->as_u8[8] ^ (1 << 1);
+ mac[1] = ip->as_u8[9];
+ mac[2] = ip->as_u8[10];
+ mac[3] = ip->as_u8[13];
+ mac[4] = ip->as_u8[14];
+ mac[5] = ip->as_u8[15];
+}
+
+static clib_error_t *
+test_ip6_link_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ u8 mac[6];
+ ip6_address_t _a, *a = &_a;
+
+ if (unformat (input, "%U", unformat_ethernet_address, mac))
+ {
+ ip6_link_local_address_from_ethernet_mac_address (a, mac);
+ vlib_cli_output (vm, "Link local address: %U", format_ip6_address, a);
+ ip6_ethernet_mac_address_from_link_local_address (mac, a);
+ vlib_cli_output (vm, "Original MAC address: %U",
+ format_ethernet_address, mac);
+ }
+
+ return 0;
+}
+
+/*?
+ * This command converts the given MAC Address into an IPv6 link-local
+ * address.
+ *
+ * @cliexpar
+ * Example of how to create an IPv6 link-local address:
+ * @cliexstart{test ip6 link 16:d9:e0:91:79:86}
+ * Link local address: fe80::14d9:e0ff:fe91:7986
+ * Original MAC address: 16:d9:e0:91:79:86
+ * @cliexend
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (test_link_command, static) =
+{
+ .path = "test ip6 link",
+ .function = test_ip6_link_command_fn,
+ .short_help = "test ip6 link <mac-address>",
+};
+/* *INDENT-ON* */
+
+int
+vnet_set_ip6_flow_hash (u32 table_id, u32 flow_hash_config)
+{
+ ip6_main_t *im6 = &ip6_main;
+ ip6_fib_t *fib;
+ uword *p = hash_get (im6->fib_index_by_table_id, table_id);
+
+ if (p == 0)
+ return -1;
+
+ fib = ip6_fib_get (p[0]);
+
+ fib->flow_hash_config = flow_hash_config;
+ return 1;
+}
+
+static clib_error_t *
+set_ip6_flow_hash_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ int matched = 0;
+ u32 table_id = 0;
+ u32 flow_hash_config = 0;
+ int rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "table %d", &table_id))
+ matched = 1;
+#define _(a,v) \
+ else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
+ foreach_flow_hash_bit
+#undef _
+ else
+ break;
+ }
+
+ if (matched == 0)
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+
+ rv = vnet_set_ip6_flow_hash (table_id, flow_hash_config);
+ switch (rv)
+ {
+ case 1:
+ break;
+
+ case -1:
+ return clib_error_return (0, "no such FIB table %d", table_id);
+
+ default:
+ clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
+ break;
+ }
+
+ return 0;
+}
+
+/*?
+ * Configure the set of IPv6 fields used by the flow hash.
+ *
+ * @cliexpar
+ * @parblock
+ * Example of how to set the flow hash on a given table:
+ * @cliexcmd{set ip6 flow-hash table 8 dst sport dport proto}
+ *
+ * Example of display the configured flow hash:
+ * @cliexstart{show ip6 fib}
+ * ipv6-VRF:0, fib_index 0, flow hash: src dst sport dport proto
+ * @::/0
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:5 buckets:1 uRPF:5 to:[0:0]]
+ * [0] [@0]: dpo-drop ip6
+ * fe80::/10
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:10 buckets:1 uRPF:10 to:[0:0]]
+ * [0] [@2]: dpo-receive
+ * ff02::1/128
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:8 buckets:1 uRPF:8 to:[0:0]]
+ * [0] [@2]: dpo-receive
+ * ff02::2/128
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:7 buckets:1 uRPF:7 to:[0:0]]
+ * [0] [@2]: dpo-receive
+ * ff02::16/128
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:9 buckets:1 uRPF:9 to:[0:0]]
+ * [0] [@2]: dpo-receive
+ * ff02::1:ff00:0/104
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:6 buckets:1 uRPF:6 to:[0:0]]
+ * [0] [@2]: dpo-receive
+ * ipv6-VRF:8, fib_index 1, flow hash: dst sport dport proto
+ * @::/0
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
+ * [0] [@0]: dpo-drop ip6
+ * @::a:1:1:0:4/126
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:27 buckets:1 uRPF:26 to:[0:0]]
+ * [0] [@4]: ipv6-glean: af_packet0
+ * @::a:1:1:0:7/128
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:28 buckets:1 uRPF:27 to:[0:0]]
+ * [0] [@2]: dpo-receive: @::a:1:1:0:7 on af_packet0
+ * fe80::/10
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:26 buckets:1 uRPF:25 to:[0:0]]
+ * [0] [@2]: dpo-receive
+ * fe80::fe:3eff:fe3e:9222/128
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:29 buckets:1 uRPF:28 to:[0:0]]
+ * [0] [@2]: dpo-receive: fe80::fe:3eff:fe3e:9222 on af_packet0
+ * ff02::1/128
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:24 buckets:1 uRPF:23 to:[0:0]]
+ * [0] [@2]: dpo-receive
+ * ff02::2/128
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:23 buckets:1 uRPF:22 to:[0:0]]
+ * [0] [@2]: dpo-receive
+ * ff02::16/128
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:25 buckets:1 uRPF:24 to:[0:0]]
+ * [0] [@2]: dpo-receive
+ * ff02::1:ff00:0/104
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:22 buckets:1 uRPF:21 to:[0:0]]
+ * [0] [@2]: dpo-receive
+ * @cliexend
+ * @endparblock
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_ip6_flow_hash_command, static) =
+{
+ .path = "set ip6 flow-hash",
+ .short_help =
+ "set ip6 flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
+ .function = set_ip6_flow_hash_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+show_ip6_local_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ ip6_main_t *im = &ip6_main;
+ ip_lookup_main_t *lm = &im->lookup_main;
+ int i;
+
+ vlib_cli_output (vm, "Protocols handled by ip6_local");
+ for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
+ {
+ if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
+ vlib_cli_output (vm, "%d", i);
+ }
+ return 0;
+}
+
+
+
+/*?
+ * Display the set of protocols handled by the local IPv6 stack.
+ *
+ * @cliexpar
+ * Example of how to display local protocol table:
+ * @cliexstart{show ip6 local}
+ * Protocols handled by ip6_local
+ * 17
+ * 43
+ * 58
+ * 115
+ * @cliexend
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_ip6_local, static) =
+{
+ .path = "show ip6 local",
+ .function = show_ip6_local_command_fn,
+ .short_help = "show ip6 local",
+};
+/* *INDENT-ON* */
+
+int
+vnet_set_ip6_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
+ u32 table_index)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_interface_main_t *im = &vnm->interface_main;
+ ip6_main_t *ipm = &ip6_main;
+ ip_lookup_main_t *lm = &ipm->lookup_main;
+ vnet_classify_main_t *cm = &vnet_classify_main;
+ ip6_address_t *if_addr;
+
+ if (pool_is_free_index (im->sw_interfaces, sw_if_index))
+ return VNET_API_ERROR_NO_MATCHING_INTERFACE;
+
+ if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
+ lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
+
+ if_addr = ip6_interface_first_address (ipm, sw_if_index, NULL);
+
+ if (NULL != if_addr)
+ {
+ fib_prefix_t pfx = {
+ .fp_len = 128,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_addr.ip6 = *if_addr,
+ };
+ u32 fib_index;
+
+ fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
+ sw_if_index);
+
+
+ if (table_index != (u32) ~ 0)
+ {
+ dpo_id_t dpo = DPO_INVALID;
+
+ dpo_set (&dpo,
+ DPO_CLASSIFY,
+ DPO_PROTO_IP6,
+ classify_dpo_create (DPO_PROTO_IP6, table_index));
+
+ fib_table_entry_special_dpo_add (fib_index,
+ &pfx,
+ FIB_SOURCE_CLASSIFY,
+ FIB_ENTRY_FLAG_NONE, &dpo);
+ dpo_reset (&dpo);
+ }
+ else
+ {
+ fib_table_entry_special_remove (fib_index,
+ &pfx, FIB_SOURCE_CLASSIFY);
+ }
+ }
+
+ return 0;
+}
+
+static clib_error_t *
+set_ip6_classify_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u32 table_index = ~0;
+ int table_index_set = 0;
+ u32 sw_if_index = ~0;
+ int rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "table-index %d", &table_index))
+ table_index_set = 1;
+ else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
+ vnet_get_main (), &sw_if_index))
+ ;
+ else
+ break;
+ }
+
+ if (table_index_set == 0)
+ return clib_error_return (0, "classify table-index must be specified");
+
+ if (sw_if_index == ~0)
+ return clib_error_return (0, "interface / subif must be specified");
+
+ rv = vnet_set_ip6_classify_intfc (vm, sw_if_index, table_index);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ case VNET_API_ERROR_NO_MATCHING_INTERFACE:
+ return clib_error_return (0, "No such interface");
+
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ return clib_error_return (0, "No such classifier table");
+ }
+ return 0;
+}
+
+/*?
+ * Assign a classification table to an interface. The classification
+ * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
+ * commands. Once the table is create, use this command to filter packets
+ * on an interface.
+ *
+ * @cliexpar
+ * Example of how to assign a classification table to an interface:
+ * @cliexcmd{set ip6 classify intfc GigabitEthernet2/0/0 table-index 1}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_ip6_classify_command, static) =
+{
+ .path = "set ip6 classify",
+ .short_help =
+ "set ip6 classify intfc <interface> table-index <classify-idx>",
+ .function = set_ip6_classify_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+ip6_config (vlib_main_t * vm, unformat_input_t * input)
+{
+ ip6_main_t *im = &ip6_main;
+ uword heapsize = 0;
+ u32 tmp;
+ u32 nbuckets = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "hash-buckets %d", &tmp))
+ nbuckets = tmp;
+ else if (unformat (input, "heap-size %dm", &tmp))
+ heapsize = ((u64) tmp) << 20;
+ else if (unformat (input, "heap-size %dM", &tmp))
+ heapsize = ((u64) tmp) << 20;
+ else if (unformat (input, "heap-size %dg", &tmp))
+ heapsize = ((u64) tmp) << 30;
+ else if (unformat (input, "heap-size %dG", &tmp))
+ heapsize = ((u64) tmp) << 30;
+ else
+ return clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, input);
+ }
+
+ im->lookup_table_nbuckets = nbuckets;
+ im->lookup_table_size = heapsize;
+
+ return 0;
+}
+
+VLIB_EARLY_CONFIG_FUNCTION (ip6_config, "ip6");
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip6_hop_by_hop.c b/src/vnet/ip/ip6_hop_by_hop.c
new file mode 100644
index 00000000000..3a820b3cefc
--- /dev/null
+++ b/src/vnet/ip/ip6_hop_by_hop.c
@@ -0,0 +1,1194 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+
+#include <vnet/ip/ip.h>
+
+#include <vppinfra/hash.h>
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+
+#include <vnet/ip/ip6_hop_by_hop.h>
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/classify/vnet_classify.h>
+
+/**
+ * @file
+ * @brief In-band OAM (iOAM).
+ *
+ * In-band OAM (iOAM) is an implementation study to record operational
+ * information in the packet while the packet traverses a path between
+ * two points in the network.
+ *
+ * VPP can function as in-band OAM encapsulating, transit and
+ * decapsulating node. In this version of VPP in-band OAM data is
+ * transported as options in an IPv6 hop-by-hop extension header. Hence
+ * in-band OAM can be enabled for IPv6 traffic.
+ */
+
+ip6_hop_by_hop_ioam_main_t ip6_hop_by_hop_ioam_main;
+
+#define foreach_ip6_hbyh_ioam_input_next \
+ _(IP6_REWRITE, "ip6-rewrite") \
+ _(IP6_LOOKUP, "ip6-lookup") \
+ _(DROP, "error-drop")
+
+typedef enum
+{
+#define _(s,n) IP6_HBYH_IOAM_INPUT_NEXT_##s,
+ foreach_ip6_hbyh_ioam_input_next
+#undef _
+ IP6_HBYH_IOAM_INPUT_N_NEXT,
+} ip6_hbyh_ioam_input_next_t;
+
+
+u32
+ioam_flow_add (u8 encap, u8 * flow_name)
+{
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+ flow_data_t *flow = 0;
+ u32 index = 0;
+ u8 i;
+
+ pool_get (hm->flows, flow);
+ memset (flow, 0, sizeof (flow_data_t));
+
+ index = flow - hm->flows;
+ strncpy ((char *) flow->flow_name, (char *) flow_name, 31);
+
+ if (!encap)
+ IOAM_SET_DECAP (index);
+
+ for (i = 0; i < 255; i++)
+ {
+ if (hm->flow_handler[i])
+ flow->ctx[i] = hm->flow_handler[i] (index, 1);
+ }
+ return (index);
+}
+
+static uword
+unformat_opaque_ioam (unformat_input_t * input, va_list * args)
+{
+ u64 *opaquep = va_arg (*args, u64 *);
+ u8 *flow_name = NULL;
+ uword ret = 0;
+
+ if (unformat (input, "ioam-encap %s", &flow_name))
+ {
+ *opaquep = ioam_flow_add (1, flow_name);
+ ret = 1;
+ }
+ else if (unformat (input, "ioam-decap %s", &flow_name))
+ {
+ *opaquep = ioam_flow_add (0, flow_name);
+ ret = 1;
+ }
+
+ vec_free (flow_name);
+ return ret;
+}
+
+u8 *
+get_flow_name_from_flow_ctx (u32 flow_ctx)
+{
+ flow_data_t *flow = NULL;
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+ u32 index;
+
+ index = IOAM_MASK_DECAP_BIT (flow_ctx);
+
+ if (pool_is_free_index (hm->flows, index))
+ return NULL;
+
+ flow = pool_elt_at_index (hm->flows, index);
+ return (flow->flow_name);
+}
+
+/* The main h-b-h tracer will be invoked, no need to do much here */
+int
+ip6_hbh_add_register_option (u8 option,
+ u8 size,
+ int rewrite_options (u8 * rewrite_string,
+ u8 * rewrite_size))
+{
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+
+ ASSERT (option < ARRAY_LEN (hm->add_options));
+
+ /* Already registered */
+ if (hm->add_options[option])
+ return (-1);
+
+ hm->add_options[option] = rewrite_options;
+ hm->options_size[option] = size;
+
+ return (0);
+}
+
+int
+ip6_hbh_add_unregister_option (u8 option)
+{
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+
+ ASSERT (option < ARRAY_LEN (hm->add_options));
+
+ /* Not registered */
+ if (!hm->add_options[option])
+ return (-1);
+
+ hm->add_options[option] = NULL;
+ hm->options_size[option] = 0;
+ return (0);
+}
+
+/* Config handler registration */
+int
+ip6_hbh_config_handler_register (u8 option,
+ int config_handler (void *data, u8 disable))
+{
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+
+ ASSERT (option < ARRAY_LEN (hm->config_handler));
+
+ /* Already registered */
+ if (hm->config_handler[option])
+ return (VNET_API_ERROR_INVALID_REGISTRATION);
+
+ hm->config_handler[option] = config_handler;
+
+ return (0);
+}
+
+int
+ip6_hbh_config_handler_unregister (u8 option)
+{
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+
+ ASSERT (option < ARRAY_LEN (hm->config_handler));
+
+ /* Not registered */
+ if (!hm->config_handler[option])
+ return (VNET_API_ERROR_INVALID_REGISTRATION);
+
+ hm->config_handler[option] = NULL;
+ return (0);
+}
+
+/* Flow handler registration */
+int
+ip6_hbh_flow_handler_register (u8 option,
+ u32 ioam_flow_handler (u32 flow_ctx, u8 add))
+{
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+
+ ASSERT (option < ARRAY_LEN (hm->flow_handler));
+
+ /* Already registered */
+ if (hm->flow_handler[option])
+ return (VNET_API_ERROR_INVALID_REGISTRATION);
+
+ hm->flow_handler[option] = ioam_flow_handler;
+
+ return (0);
+}
+
+int
+ip6_hbh_flow_handler_unregister (u8 option)
+{
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+
+ ASSERT (option < ARRAY_LEN (hm->flow_handler));
+
+ /* Not registered */
+ if (!hm->flow_handler[option])
+ return (VNET_API_ERROR_INVALID_REGISTRATION);
+
+ hm->flow_handler[option] = NULL;
+ return (0);
+}
+
+typedef struct
+{
+ u32 next_index;
+} ip6_add_hop_by_hop_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_ip6_add_hop_by_hop_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ip6_add_hop_by_hop_trace_t *t = va_arg (*args,
+ ip6_add_hop_by_hop_trace_t *);
+
+ s = format (s, "IP6_ADD_HOP_BY_HOP: next index %d", t->next_index);
+ return s;
+}
+
+vlib_node_registration_t ip6_add_hop_by_hop_node;
+
+#define foreach_ip6_add_hop_by_hop_error \
+_(PROCESSED, "Pkts w/ added ip6 hop-by-hop options")
+
+typedef enum
+{
+#define _(sym,str) IP6_ADD_HOP_BY_HOP_ERROR_##sym,
+ foreach_ip6_add_hop_by_hop_error
+#undef _
+ IP6_ADD_HOP_BY_HOP_N_ERROR,
+} ip6_add_hop_by_hop_error_t;
+
+static char *ip6_add_hop_by_hop_error_strings[] = {
+#define _(sym,string) string,
+ foreach_ip6_add_hop_by_hop_error
+#undef _
+};
+
+static uword
+ip6_add_hop_by_hop_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+ u32 n_left_from, *from, *to_next;
+ ip_lookup_next_t next_index;
+ u32 processed = 0;
+ u8 *rewrite = hm->rewrite;
+ u32 rewrite_length = vec_len (rewrite);
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ u32 next0, next1;
+ ip6_header_t *ip0, *ip1;
+ ip6_hop_by_hop_header_t *hbh0, *hbh1;
+ u64 *copy_src0, *copy_dst0, *copy_src1, *copy_dst1;
+ u16 new_l0, new_l1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data - rewrite_length,
+ 2 * CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p3->data - rewrite_length,
+ 2 * CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ /* $$$$$ Dual loop: process 2 x packets here $$$$$ */
+ ip0 = vlib_buffer_get_current (b0);
+ ip1 = vlib_buffer_get_current (b1);
+
+ /* Copy the ip header left by the required amount */
+ copy_dst0 = (u64 *) (((u8 *) ip0) - rewrite_length);
+ copy_dst1 = (u64 *) (((u8 *) ip1) - rewrite_length);
+ copy_src0 = (u64 *) ip0;
+ copy_src1 = (u64 *) ip1;
+
+ copy_dst0[0] = copy_src0[0];
+ copy_dst0[1] = copy_src0[1];
+ copy_dst0[2] = copy_src0[2];
+ copy_dst0[3] = copy_src0[3];
+ copy_dst0[4] = copy_src0[4];
+
+ copy_dst1[0] = copy_src1[0];
+ copy_dst1[1] = copy_src1[1];
+ copy_dst1[2] = copy_src1[2];
+ copy_dst1[3] = copy_src1[3];
+ copy_dst1[4] = copy_src1[4];
+
+ vlib_buffer_advance (b0, -(word) rewrite_length);
+ vlib_buffer_advance (b1, -(word) rewrite_length);
+ ip0 = vlib_buffer_get_current (b0);
+ ip1 = vlib_buffer_get_current (b1);
+
+ hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
+ hbh1 = (ip6_hop_by_hop_header_t *) (ip1 + 1);
+ /* $$$ tune, rewrite_length is a multiple of 8 */
+ clib_memcpy (hbh0, rewrite, rewrite_length);
+ clib_memcpy (hbh1, rewrite, rewrite_length);
+ /* Patch the protocol chain, insert the h-b-h (type 0) header */
+ hbh0->protocol = ip0->protocol;
+ hbh1->protocol = ip1->protocol;
+ ip0->protocol = 0;
+ ip1->protocol = 0;
+ new_l0 =
+ clib_net_to_host_u16 (ip0->payload_length) + rewrite_length;
+ new_l1 =
+ clib_net_to_host_u16 (ip1->payload_length) + rewrite_length;
+ ip0->payload_length = clib_host_to_net_u16 (new_l0);
+ ip1->payload_length = clib_host_to_net_u16 (new_l1);
+
+ /* Populate the (first) h-b-h list elt */
+ next0 = IP6_HBYH_IOAM_INPUT_NEXT_IP6_LOOKUP;
+ next1 = IP6_HBYH_IOAM_INPUT_NEXT_IP6_LOOKUP;
+
+
+ /* $$$$$ End of processing 2 x packets $$$$$ */
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ ip6_add_hop_by_hop_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->next_index = next0;
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ ip6_add_hop_by_hop_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ t->next_index = next1;
+ }
+ }
+ processed += 2;
+ /* verify speculative enqueues, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+ ip6_header_t *ip0;
+ ip6_hop_by_hop_header_t *hbh0;
+ u64 *copy_src0, *copy_dst0;
+ u16 new_l0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ ip0 = vlib_buffer_get_current (b0);
+
+ /* Copy the ip header left by the required amount */
+ copy_dst0 = (u64 *) (((u8 *) ip0) - rewrite_length);
+ copy_src0 = (u64 *) ip0;
+
+ copy_dst0[0] = copy_src0[0];
+ copy_dst0[1] = copy_src0[1];
+ copy_dst0[2] = copy_src0[2];
+ copy_dst0[3] = copy_src0[3];
+ copy_dst0[4] = copy_src0[4];
+ vlib_buffer_advance (b0, -(word) rewrite_length);
+ ip0 = vlib_buffer_get_current (b0);
+
+ hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
+ /* $$$ tune, rewrite_length is a multiple of 8 */
+ clib_memcpy (hbh0, rewrite, rewrite_length);
+ /* Patch the protocol chain, insert the h-b-h (type 0) header */
+ hbh0->protocol = ip0->protocol;
+ ip0->protocol = 0;
+ new_l0 =
+ clib_net_to_host_u16 (ip0->payload_length) + rewrite_length;
+ ip0->payload_length = clib_host_to_net_u16 (new_l0);
+
+ /* Populate the (first) h-b-h list elt */
+ next0 = IP6_HBYH_IOAM_INPUT_NEXT_IP6_LOOKUP;
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ ip6_add_hop_by_hop_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->next_index = next0;
+ }
+
+ processed++;
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, ip6_add_hop_by_hop_node.index,
+ IP6_ADD_HOP_BY_HOP_ERROR_PROCESSED, processed);
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_add_hop_by_hop_node) = /* *INDENT-OFF* */
+{
+ .function = ip6_add_hop_by_hop_node_fn,.name =
+ "ip6-add-hop-by-hop",.vector_size = sizeof (u32),.format_trace =
+ format_ip6_add_hop_by_hop_trace,.type =
+ VLIB_NODE_TYPE_INTERNAL,.n_errors =
+ ARRAY_LEN (ip6_add_hop_by_hop_error_strings),.error_strings =
+ ip6_add_hop_by_hop_error_strings,
+ /* See ip/lookup.h */
+ .n_next_nodes = IP6_HBYH_IOAM_INPUT_N_NEXT,.next_nodes =
+ {
+#define _(s,n) [IP6_HBYH_IOAM_INPUT_NEXT_##s] = n,
+ foreach_ip6_hbyh_ioam_input_next
+#undef _
+ }
+,};
+/* *INDENT-ON* */
+
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_add_hop_by_hop_node,
+ ip6_add_hop_by_hop_node_fn);
+/* The main h-b-h tracer was already invoked, no need to do much here */
+typedef struct
+{
+ u32 next_index;
+} ip6_pop_hop_by_hop_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_ip6_pop_hop_by_hop_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ip6_pop_hop_by_hop_trace_t *t =
+ va_arg (*args, ip6_pop_hop_by_hop_trace_t *);
+
+ s = format (s, "IP6_POP_HOP_BY_HOP: next index %d", t->next_index);
+ return s;
+}
+
+int
+ip6_hbh_pop_register_option (u8 option,
+ int options (vlib_buffer_t * b,
+ ip6_header_t * ip,
+ ip6_hop_by_hop_option_t * opt))
+{
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+
+ ASSERT (option < ARRAY_LEN (hm->pop_options));
+
+ /* Already registered */
+ if (hm->pop_options[option])
+ return (-1);
+
+ hm->pop_options[option] = options;
+
+ return (0);
+}
+
+int
+ip6_hbh_pop_unregister_option (u8 option)
+{
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+
+ ASSERT (option < ARRAY_LEN (hm->pop_options));
+
+ /* Not registered */
+ if (!hm->pop_options[option])
+ return (-1);
+
+ hm->pop_options[option] = NULL;
+ return (0);
+}
+
+vlib_node_registration_t ip6_pop_hop_by_hop_node;
+
+#define foreach_ip6_pop_hop_by_hop_error \
+_(PROCESSED, "Pkts w/ removed ip6 hop-by-hop options") \
+_(NO_HOHO, "Pkts w/ no ip6 hop-by-hop options") \
+_(OPTION_FAILED, "ip6 pop hop-by-hop failed to process")
+
+typedef enum
+{
+#define _(sym,str) IP6_POP_HOP_BY_HOP_ERROR_##sym,
+ foreach_ip6_pop_hop_by_hop_error
+#undef _
+ IP6_POP_HOP_BY_HOP_N_ERROR,
+} ip6_pop_hop_by_hop_error_t;
+
+static char *ip6_pop_hop_by_hop_error_strings[] = {
+#define _(sym,string) string,
+ foreach_ip6_pop_hop_by_hop_error
+#undef _
+};
+
+static inline void
+ioam_pop_hop_by_hop_processing (vlib_main_t * vm,
+ ip6_header_t * ip0,
+ ip6_hop_by_hop_header_t * hbh0,
+ vlib_buffer_t * b)
+{
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+ ip6_hop_by_hop_option_t *opt0, *limit0;
+ u8 type0;
+
+ if (!hbh0 || !ip0)
+ return;
+
+ opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
+ limit0 = (ip6_hop_by_hop_option_t *)
+ ((u8 *) hbh0 + ((hbh0->length + 1) << 3));
+
+ /* Scan the set of h-b-h options, process ones that we understand */
+ while (opt0 < limit0)
+ {
+ type0 = opt0->type;
+ switch (type0)
+ {
+ case 0: /* Pad1 */
+ opt0 = (ip6_hop_by_hop_option_t *) ((u8 *) opt0) + 1;
+ continue;
+ case 1: /* PadN */
+ break;
+ default:
+ if (hm->pop_options[type0])
+ {
+ if ((*hm->pop_options[type0]) (b, ip0, opt0) < 0)
+ {
+ vlib_node_increment_counter (vm,
+ ip6_pop_hop_by_hop_node.index,
+ IP6_POP_HOP_BY_HOP_ERROR_OPTION_FAILED,
+ 1);
+ }
+ }
+ }
+ opt0 =
+ (ip6_hop_by_hop_option_t *) (((u8 *) opt0) + opt0->length +
+ sizeof (ip6_hop_by_hop_option_t));
+ }
+}
+
+static uword
+ip6_pop_hop_by_hop_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ ip6_main_t *im = &ip6_main;
+ ip_lookup_main_t *lm = &im->lookup_main;
+ u32 n_left_from, *from, *to_next;
+ ip_lookup_next_t next_index;
+ u32 processed = 0;
+ u32 no_header = 0;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ u32 next0, next1;
+ u32 adj_index0, adj_index1;
+ ip6_header_t *ip0, *ip1;
+ ip_adjacency_t *adj0, *adj1;
+ ip6_hop_by_hop_header_t *hbh0, *hbh1;
+ u64 *copy_dst0, *copy_src0, *copy_dst1, *copy_src1;
+ u16 new_l0, new_l1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ /* $$$$$ Dual loop: process 2 x packets here $$$$$ */
+ ip0 = vlib_buffer_get_current (b0);
+ ip1 = vlib_buffer_get_current (b1);
+ adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
+ adj_index1 = vnet_buffer (b1)->ip.adj_index[VLIB_TX];
+ adj0 = ip_get_adjacency (lm, adj_index0);
+ adj1 = ip_get_adjacency (lm, adj_index1);
+
+ next0 = adj0->lookup_next_index;
+ next1 = adj1->lookup_next_index;
+
+ hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
+ hbh1 = (ip6_hop_by_hop_header_t *) (ip1 + 1);
+
+ ioam_pop_hop_by_hop_processing (vm, ip0, hbh0, b0);
+ ioam_pop_hop_by_hop_processing (vm, ip1, hbh1, b1);
+
+ vlib_buffer_advance (b0, (hbh0->length + 1) << 3);
+ vlib_buffer_advance (b1, (hbh1->length + 1) << 3);
+
+ new_l0 = clib_net_to_host_u16 (ip0->payload_length) -
+ ((hbh0->length + 1) << 3);
+ new_l1 = clib_net_to_host_u16 (ip1->payload_length) -
+ ((hbh1->length + 1) << 3);
+
+ ip0->payload_length = clib_host_to_net_u16 (new_l0);
+ ip1->payload_length = clib_host_to_net_u16 (new_l1);
+
+ ip0->protocol = hbh0->protocol;
+ ip1->protocol = hbh1->protocol;
+
+ copy_src0 = (u64 *) ip0;
+ copy_src1 = (u64 *) ip1;
+ copy_dst0 = copy_src0 + (hbh0->length + 1);
+ copy_dst0[4] = copy_src0[4];
+ copy_dst0[3] = copy_src0[3];
+ copy_dst0[2] = copy_src0[2];
+ copy_dst0[1] = copy_src0[1];
+ copy_dst0[0] = copy_src0[0];
+ copy_dst1 = copy_src1 + (hbh1->length + 1);
+ copy_dst1[4] = copy_src1[4];
+ copy_dst1[3] = copy_src1[3];
+ copy_dst1[2] = copy_src1[2];
+ copy_dst1[1] = copy_src1[1];
+ copy_dst1[0] = copy_src1[0];
+ processed += 2;
+ /* $$$$$ End of processing 2 x packets $$$$$ */
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ ip6_pop_hop_by_hop_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->next_index = next0;
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ ip6_pop_hop_by_hop_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ t->next_index = next1;
+ }
+ }
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+ u32 adj_index0;
+ ip6_header_t *ip0;
+ ip_adjacency_t *adj0;
+ ip6_hop_by_hop_header_t *hbh0;
+ u64 *copy_dst0, *copy_src0;
+ u16 new_l0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ ip0 = vlib_buffer_get_current (b0);
+ adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
+ adj0 = ip_get_adjacency (lm, adj_index0);
+
+ /* Default use the next_index from the adjacency. */
+ next0 = adj0->lookup_next_index;
+
+ /* Perfectly normal to end up here w/ out h-b-h header */
+ hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
+
+ /* TODO:Temporarily doing it here.. do this validation in end_of_path_cb */
+ ioam_pop_hop_by_hop_processing (vm, ip0, hbh0, b0);
+ /* Pop the trace data */
+ vlib_buffer_advance (b0, (hbh0->length + 1) << 3);
+ new_l0 = clib_net_to_host_u16 (ip0->payload_length) -
+ ((hbh0->length + 1) << 3);
+ ip0->payload_length = clib_host_to_net_u16 (new_l0);
+ ip0->protocol = hbh0->protocol;
+ copy_src0 = (u64 *) ip0;
+ copy_dst0 = copy_src0 + (hbh0->length + 1);
+ copy_dst0[4] = copy_src0[4];
+ copy_dst0[3] = copy_src0[3];
+ copy_dst0[2] = copy_src0[2];
+ copy_dst0[1] = copy_src0[1];
+ copy_dst0[0] = copy_src0[0];
+ processed++;
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ ip6_pop_hop_by_hop_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->next_index = next0;
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, ip6_pop_hop_by_hop_node.index,
+ IP6_POP_HOP_BY_HOP_ERROR_PROCESSED, processed);
+ vlib_node_increment_counter (vm, ip6_pop_hop_by_hop_node.index,
+ IP6_POP_HOP_BY_HOP_ERROR_NO_HOHO, no_header);
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_pop_hop_by_hop_node) =
+{
+ .function = ip6_pop_hop_by_hop_node_fn,.name =
+ "ip6-pop-hop-by-hop",.vector_size = sizeof (u32),.format_trace =
+ format_ip6_pop_hop_by_hop_trace,.type =
+ VLIB_NODE_TYPE_INTERNAL,.sibling_of = "ip6-lookup",.n_errors =
+ ARRAY_LEN (ip6_pop_hop_by_hop_error_strings),.error_strings =
+ ip6_pop_hop_by_hop_error_strings,
+ /* See ip/lookup.h */
+.n_next_nodes = 0,};
+
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_pop_hop_by_hop_node,
+ ip6_pop_hop_by_hop_node_fn);
+static clib_error_t *
+ip6_hop_by_hop_ioam_init (vlib_main_t * vm)
+{
+ clib_error_t *error;
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+
+ if ((error = vlib_call_init_function (vm, ip_main_init)))
+ return (error);
+
+ if ((error = vlib_call_init_function (vm, ip6_lookup_init)))
+ return error;
+
+ hm->vlib_main = vm;
+ hm->vnet_main = vnet_get_main ();
+ hm->unix_time_0 = (u32) time (0); /* Store starting time */
+ hm->vlib_time_0 = vlib_time_now (vm);
+ hm->ioam_flag = IOAM_HBYH_MOD;
+ memset (hm->add_options, 0, sizeof (hm->add_options));
+ memset (hm->pop_options, 0, sizeof (hm->pop_options));
+ memset (hm->options_size, 0, sizeof (hm->options_size));
+
+ vnet_classify_register_unformat_opaque_index_fn (unformat_opaque_ioam);
+
+ return (0);
+}
+
+VLIB_INIT_FUNCTION (ip6_hop_by_hop_ioam_init);
+
+int
+ip6_ioam_set_rewrite (u8 ** rwp, int has_trace_option,
+ int has_pot_option, int has_seqno_option)
+{
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+ u8 *rewrite = NULL;
+ u32 size, rnd_size;
+ ip6_hop_by_hop_header_t *hbh;
+ u8 *current;
+ u8 *trace_data_size = NULL;
+ u8 *pot_data_size = NULL;
+
+ vec_free (*rwp);
+
+ if (has_trace_option == 0 && has_pot_option == 0)
+ return -1;
+
+ /* Work out how much space we need */
+ size = sizeof (ip6_hop_by_hop_header_t);
+
+ //if (has_trace_option && hm->get_sizeof_options[HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST] != 0)
+ if (has_trace_option
+ && hm->options_size[HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST] != 0)
+ {
+ size += hm->options_size[HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST];
+ }
+ if (has_pot_option
+ && hm->add_options[HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT] != 0)
+ {
+ size += hm->options_size[HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT];
+ }
+
+ if (has_seqno_option)
+ {
+ size += hm->options_size[HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE];
+ }
+
+ /* Round to a multiple of 8 octets */
+ rnd_size = (size + 7) & ~7;
+
+ /* allocate it, zero-fill / pad by construction */
+ vec_validate (rewrite, rnd_size - 1);
+
+ hbh = (ip6_hop_by_hop_header_t *) rewrite;
+ /* Length of header in 8 octet units, not incl first 8 octets */
+ hbh->length = (rnd_size >> 3) - 1;
+ current = (u8 *) (hbh + 1);
+
+ if (has_trace_option
+ && hm->add_options[HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST] != 0)
+ {
+ if (0 != (hm->options_size[HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST]))
+ {
+ trace_data_size =
+ &hm->options_size[HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST];
+ if (0 ==
+ hm->add_options[HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST] (current,
+ trace_data_size))
+ current += *trace_data_size;
+ }
+ }
+ if (has_pot_option
+ && hm->add_options[HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT] != 0)
+ {
+ pot_data_size =
+ &hm->options_size[HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT];
+ if (0 ==
+ hm->add_options[HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT] (current,
+ pot_data_size))
+ current += *pot_data_size;
+ }
+
+ if (has_seqno_option &&
+ (hm->add_options[HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE] != 0))
+ {
+ if (0 == hm->add_options[HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE] (current,
+ &
+ (hm->options_size
+ [HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE])))
+ current += hm->options_size[HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE];
+ }
+
+ *rwp = rewrite;
+ return 0;
+}
+
+clib_error_t *
+clear_ioam_rewrite_fn (void)
+{
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+
+ vec_free (hm->rewrite);
+ hm->rewrite = 0;
+ hm->has_trace_option = 0;
+ hm->has_pot_option = 0;
+ hm->has_seqno_option = 0;
+ hm->has_analyse_option = 0;
+ if (hm->config_handler[HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST])
+ hm->config_handler[HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST] (NULL, 1);
+
+ if (hm->config_handler[HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT])
+ hm->config_handler[HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT] (NULL, 1);
+
+ if (hm->config_handler[HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE])
+ {
+ hm->config_handler[HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE] ((void *)
+ &hm->has_analyse_option,
+ 1);
+ }
+
+ return 0;
+}
+
+clib_error_t *
+clear_ioam_rewrite_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ return (clear_ioam_rewrite_fn ());
+}
+
+/*?
+ * This command clears all the In-band OAM (iOAM) features enabled by
+ * the '<em>set ioam rewrite</em>' command. Use '<em>show ioam summary</em>' to
+ * verify the configured settings cleared.
+ *
+ * @cliexpar
+ * Example of how to clear iOAM features:
+ * @cliexcmd{clear ioam rewrite}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ip6_clear_ioam_rewrite_cmd, static) = {
+ .path = "clear ioam rewrite",
+ .short_help = "clear ioam rewrite",
+ .function = clear_ioam_rewrite_command_fn,
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+ip6_ioam_enable (int has_trace_option, int has_pot_option,
+ int has_seqno_option, int has_analyse_option)
+{
+ int rv;
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+ rv = ip6_ioam_set_rewrite (&hm->rewrite, has_trace_option,
+ has_pot_option, has_seqno_option);
+
+ switch (rv)
+ {
+ case 0:
+ if (has_trace_option)
+ {
+ hm->has_trace_option = has_trace_option;
+ if (hm->config_handler[HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST])
+ hm->config_handler[HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST] (NULL,
+ 0);
+ }
+
+ if (has_pot_option)
+ {
+ hm->has_pot_option = has_pot_option;
+ if (hm->config_handler[HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT])
+ hm->config_handler[HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT] (NULL,
+ 0);
+ }
+ hm->has_analyse_option = has_analyse_option;
+ if (has_seqno_option)
+ {
+ hm->has_seqno_option = has_seqno_option;
+ if (hm->config_handler[HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE])
+ {
+ hm->config_handler[HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE] ((void *)
+ &has_analyse_option,
+ 0);
+ }
+ }
+ break;
+
+ default:
+ return clib_error_return_code (0, rv, 0,
+ "ip6_ioam_set_rewrite returned %d", rv);
+ }
+
+ return 0;
+}
+
+
+static clib_error_t *
+ip6_set_ioam_rewrite_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ int has_trace_option = 0;
+ int has_pot_option = 0;
+ int has_seqno_option = 0;
+ int has_analyse_option = 0;
+ clib_error_t *rv = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "trace"))
+ has_trace_option = 1;
+ else if (unformat (input, "pot"))
+ has_pot_option = 1;
+ else if (unformat (input, "seqno"))
+ has_seqno_option = 1;
+ else if (unformat (input, "analyse"))
+ has_analyse_option = 1;
+ else
+ break;
+ }
+
+
+ rv = ip6_ioam_enable (has_trace_option, has_pot_option,
+ has_seqno_option, has_analyse_option);
+
+ return rv;
+}
+
+/*?
+ * This command is used to enable In-band OAM (iOAM) features on IPv6.
+ * '<em>trace</em>' is used to enable iOAM trace feature. '<em>pot</em>' is used to
+ * enable the Proof Of Transit feature. '<em>ppc</em>' is used to indicate the
+ * Per Packet Counter feature for Edge to Edge processing. '<em>ppc</em>' is
+ * used to indicate if this node is an '<em>encap</em>' node (iOAM edge node
+ * where packet enters iOAM domain), a '<em>decap</em>' node (iOAM edge node
+ * where packet leaves iOAM domain) or '<em>none</em>' (iOAM node where packet
+ * is in-transit through the iOAM domain). '<em>ppc</em>' can only be set if
+ * '<em>trace</em>' or '<em>pot</em>' is enabled.
+ *
+ * Use '<em>clear ioam rewrite</em>' to disable all features enabled by this
+ * command. Use '<em>show ioam summary</em>' to verify the configured settings.
+ *
+ * @cliexpar
+ * Example of how to enable trace and pot with ppc set to encap:
+ * @cliexcmd{set ioam rewrite trace pot ppc encap}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ip6_set_ioam_rewrite_cmd, static) = {
+ .path = "set ioam rewrite",
+ .short_help = "set ioam [trace] [pot] [seqno] [analyse]",
+ .function = ip6_set_ioam_rewrite_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+ip6_show_ioam_summary_cmd_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+ u8 *s = 0;
+
+
+ if (!is_zero_ip6_address (&hm->adj))
+ {
+ s = format (s, " REWRITE FLOW CONFIGS - \n");
+ s = format (s, " Destination Address : %U\n",
+ format_ip6_address, &hm->adj, sizeof (ip6_address_t));
+ s =
+ format (s, " Flow operation : %d (%s)\n",
+ hm->ioam_flag,
+ (hm->ioam_flag ==
+ IOAM_HBYH_ADD) ? "Add" : ((hm->ioam_flag ==
+ IOAM_HBYH_MOD) ? "Mod" : "Pop"));
+ }
+ else
+ {
+ s = format (s, " REWRITE FLOW CONFIGS - Not configured\n");
+ }
+
+
+ s = format (s, " TRACE OPTION - %d (%s)\n",
+ hm->has_trace_option,
+ (hm->has_trace_option ? "Enabled" : "Disabled"));
+ if (hm->has_trace_option)
+ s =
+ format (s,
+ "Try 'show ioam trace and show ioam-trace profile' for more information\n");
+
+
+ s = format (s, " POT OPTION - %d (%s)\n",
+ hm->has_pot_option,
+ (hm->has_pot_option ? "Enabled" : "Disabled"));
+ if (hm->has_pot_option)
+ s =
+ format (s,
+ "Try 'show ioam pot and show pot profile' for more information\n");
+
+ s = format (s, " EDGE TO EDGE - SeqNo OPTION - %d (%s)\n",
+ hm->has_seqno_option,
+ hm->has_seqno_option ? "Enabled" : "Disabled");
+ if (hm->has_seqno_option)
+ s = format (s, "Try 'show ioam e2e' for more information\n");
+
+ s = format (s, " iOAM Analyse OPTION - %d (%s)\n",
+ hm->has_analyse_option,
+ hm->has_analyse_option ? "Enabled" : "Disabled");
+
+ vlib_cli_output (vm, "%v", s);
+ vec_free (s);
+ return 0;
+}
+
+/*?
+ * This command displays the current configuration data for In-band
+ * OAM (iOAM).
+ *
+ * @cliexpar
+ * Example to show the iOAM configuration:
+ * @cliexstart{show ioam summary}
+ * REWRITE FLOW CONFIGS -
+ * Destination Address : ff02::1
+ * Flow operation : 2 (Pop)
+ * TRACE OPTION - 1 (Enabled)
+ * Try 'show ioam trace and show ioam-trace profile' for more information
+ * POT OPTION - 1 (Enabled)
+ * Try 'show ioam pot and show pot profile' for more information
+ * EDGE TO EDGE - PPC OPTION - 1 (Encap)
+ * @cliexend
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ip6_show_ioam_run_cmd, static) = {
+ .path = "show ioam summary",
+ .short_help = "show ioam summary",
+ .function = ip6_show_ioam_summary_cmd_fn,
+};
+/* *INDENT-ON* */
+
+void
+vnet_register_ioam_end_of_path_callback (void *cb)
+{
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+
+ hm->ioam_end_of_path_cb = cb;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip6_hop_by_hop.h b/src/vnet/ip/ip6_hop_by_hop.h
new file mode 100644
index 00000000000..acfaa37ed2f
--- /dev/null
+++ b/src/vnet/ip/ip6_hop_by_hop.h
@@ -0,0 +1,217 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_ip6_hop_by_hop_ioam_h__
+#define __included_ip6_hop_by_hop_ioam_h__
+
+#include <vnet/ip/ip6_hop_by_hop_packet.h>
+#include <vnet/ip/ip.h>
+
+
+#define MAX_IP6_HBH_OPTION 256
+
+/* To determine whether a node is decap MS bit is set */
+#define IOAM_DECAP_BIT 0x80000000
+
+#define IOAM_DEAP_ENABLED(opaque_data) (opaque_data & IOAM_DECAP_BIT)
+
+#define IOAM_SET_DECAP(opaque_data) \
+ (opaque_data |= IOAM_DECAP_BIT)
+
+#define IOAM_MASK_DECAP_BIT(x) (x & ~IOAM_DECAP_BIT)
+
+/*
+ * Stores the run time flow data of hbh options
+ */
+typedef struct
+{
+ u32 ctx[MAX_IP6_HBH_OPTION];
+ u8 flow_name[64];
+} flow_data_t;
+
+typedef struct
+{
+ /* The current rewrite we're using */
+ u8 *rewrite;
+
+ /* Trace data processing callback */
+ void *ioam_end_of_path_cb;
+ /* Configuration data */
+ /* Adjacency */
+ ip6_address_t adj;
+#define IOAM_HBYH_ADD 0
+#define IOAM_HBYH_MOD 1
+#define IOAM_HBYH_POP 2
+ u8 ioam_flag;
+ /* time scale transform. Joy. */
+ u32 unix_time_0;
+ f64 vlib_time_0;
+
+
+ /* Trace option */
+ u8 has_trace_option;
+
+ /* Pot option */
+ u8 has_pot_option;
+
+ /* Per Packet Counter option */
+ u8 has_seqno_option;
+
+ /* Enabling analyis of iOAM data on decap node */
+ u8 has_analyse_option;
+
+ /* Array of function pointers to ADD and POP HBH option handling routines */
+ u8 options_size[MAX_IP6_HBH_OPTION];
+ int (*add_options[MAX_IP6_HBH_OPTION]) (u8 * rewrite_string,
+ u8 * rewrite_size);
+ int (*pop_options[MAX_IP6_HBH_OPTION]) (vlib_buffer_t * b,
+ ip6_header_t * ip,
+ ip6_hop_by_hop_option_t * opt);
+ int (*get_sizeof_options[MAX_IP6_HBH_OPTION]) (u32 * rewrite_size);
+ int (*config_handler[MAX_IP6_HBH_OPTION]) (void *data, u8 disable);
+
+ /* Array of function pointers to handle hbh options being used with classifier */
+ u32 (*flow_handler[MAX_IP6_HBH_OPTION]) (u32 flow_ctx, u8 add);
+ flow_data_t *flows;
+
+ /* convenience */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} ip6_hop_by_hop_ioam_main_t;
+
+extern ip6_hop_by_hop_ioam_main_t ip6_hop_by_hop_ioam_main;
+
+extern u8 *format_path_map (u8 * s, va_list * args);
+
+extern clib_error_t *ip6_ioam_enable (int has_trace_option,
+ int has_pot_option,
+ int has_seqno_option,
+ int has_analyse_option);
+
+extern int ip6_ioam_set_destination (ip6_address_t * addr, u32 mask_width,
+ u32 vrf_id, int is_add, int is_pop,
+ int is_none);
+
+extern clib_error_t *clear_ioam_rewrite_fn (void);
+
+static inline u8
+is_zero_ip4_address (ip4_address_t * a)
+{
+ return (a->as_u32 == 0);
+}
+
+static inline void
+copy_ip6_address (ip6_address_t * dst, ip6_address_t * src)
+{
+ dst->as_u64[0] = src->as_u64[0];
+ dst->as_u64[1] = src->as_u64[1];
+}
+
+static inline void
+set_zero_ip6_address (ip6_address_t * a)
+{
+ a->as_u64[0] = 0;
+ a->as_u64[1] = 0;
+}
+
+static inline u8
+cmp_ip6_address (ip6_address_t * a1, ip6_address_t * a2)
+{
+ return ((a1->as_u64[0] == a2->as_u64[0])
+ && (a1->as_u64[1] == a2->as_u64[1]));
+}
+
+static inline u8
+is_zero_ip6_address (ip6_address_t * a)
+{
+ return ((a->as_u64[0] == 0) && (a->as_u64[1] == 0));
+}
+
+int ip6_hbh_add_register_option (u8 option,
+ u8 size,
+ int rewrite_options (u8 * rewrite_string,
+ u8 * size));
+int ip6_hbh_add_unregister_option (u8 option);
+
+int ip6_hbh_pop_register_option (u8 option,
+ int options (vlib_buffer_t * b,
+ ip6_header_t * ip,
+ ip6_hop_by_hop_option_t * opt));
+int ip6_hbh_pop_unregister_option (u8 option);
+
+int
+ip6_hbh_get_sizeof_register_option (u8 option,
+ int get_sizeof_hdr_options (u32 *
+ rewrite_size));
+
+int
+ip6_ioam_set_rewrite (u8 ** rwp, int has_trace_option,
+ int has_pot_option, int has_seq_no);
+
+int
+ip6_hbh_config_handler_register (u8 option,
+ int config_handler (void *data, u8 disable));
+
+int ip6_hbh_config_handler_unregister (u8 option);
+
+int ip6_hbh_flow_handler_register (u8 option,
+ u32 ioam_flow_handler (u32 flow_ctx,
+ u8 add));
+
+int ip6_hbh_flow_handler_unregister (u8 option);
+
+u8 *get_flow_name_from_flow_ctx (u32 flow_ctx);
+
+static inline flow_data_t *
+get_flow (u32 index)
+{
+ flow_data_t *flow = NULL;
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+
+ if (pool_is_free_index (hm->flows, index))
+ return NULL;
+
+ flow = pool_elt_at_index (hm->flows, index);
+ return flow;
+}
+
+static inline u32
+get_flow_data_from_flow_ctx (u32 flow_ctx, u8 option)
+{
+ flow_data_t *flow = NULL;
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+ u32 index;
+
+ index = IOAM_MASK_DECAP_BIT (flow_ctx);
+ //flow = pool_elt_at_index (hm->flows, index);
+ flow = &hm->flows[index];
+ return (flow->ctx[option]);
+}
+
+static inline u8
+is_seqno_enabled (void)
+{
+ return (ip6_hop_by_hop_ioam_main.has_seqno_option);
+}
+
+int ip6_trace_profile_setup ();
+#endif /* __included_ip6_hop_by_hop_ioam_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip6_hop_by_hop_packet.h b/src/vnet/ip/ip6_hop_by_hop_packet.h
new file mode 100644
index 00000000000..543ba8b0533
--- /dev/null
+++ b/src/vnet/ip/ip6_hop_by_hop_packet.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_ip6_hop_by_hop_packet_h__
+#define __included_ip6_hop_by_hop_packet_h__
+
+typedef struct
+{
+ /* Protocol for next header */
+ u8 protocol;
+ /*
+ * Length of hop_by_hop header in 8 octet units,
+ * not including the first 8 octets
+ */
+ u8 length;
+} ip6_hop_by_hop_header_t;
+
+typedef struct
+{
+ /* Option Type */
+#define HBH_OPTION_TYPE_SKIP_UNKNOWN (0x00)
+#define HBH_OPTION_TYPE_DISCARD_UNKNOWN (0x40)
+#define HBH_OPTION_TYPE_DISCARD_UNKNOWN_ICMP (0x80)
+#define HBH_OPTION_TYPE_DISCARD_UNKNOWN_ICMP_NOT_MCAST (0xc0)
+#define HBH_OPTION_TYPE_HIGH_ORDER_BITS (0xc0)
+#define HBH_OPTION_TYPE_DATA_CHANGE_ENROUTE (1<<5)
+ u8 type;
+ /* Length in octets of the option data field */
+ u8 length;
+} ip6_hop_by_hop_option_t;
+
+/* $$$$ IANA banana constants */
+#define HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST 59 /* Third highest bit set (change en-route) */
+#define HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT 60 /* Third highest bit set (change en-route) */
+#define HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE 29
+
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED(struct {
+ ip6_hop_by_hop_option_t hdr;
+ u8 e2e_type;
+ u8 reserved;
+ u32 e2e_data;
+}) ioam_e2e_option_t;
+/* *INDENT-ON* */
+
+#endif /* __included_ip6_hop_by_hop_packet_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip6_input.c b/src/vnet/ip/ip6_input.c
new file mode 100644
index 00000000000..bbc2cebaa39
--- /dev/null
+++ b/src/vnet/ip/ip6_input.c
@@ -0,0 +1,353 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip6_input.c: IP v6 input node
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ppp/ppp.h>
+#include <vnet/hdlc/hdlc.h>
+
+typedef struct
+{
+ u8 packet_data[64];
+} ip6_input_trace_t;
+
+static u8 *
+format_ip6_input_trace (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ ip6_input_trace_t *t = va_arg (*va, ip6_input_trace_t *);
+
+ s = format (s, "%U",
+ format_ip6_header, t->packet_data, sizeof (t->packet_data));
+
+ return s;
+}
+
+typedef enum
+{
+ IP6_INPUT_NEXT_DROP,
+ IP6_INPUT_NEXT_LOOKUP,
+ IP6_INPUT_NEXT_ICMP_ERROR,
+ IP6_INPUT_N_NEXT,
+} ip6_input_next_t;
+
+/* Validate IP v6 packets and pass them either to forwarding code
+ or drop exception packets. */
+static uword
+ip6_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip6_main_t *im = &ip6_main;
+ ip_lookup_main_t *lm = &im->lookup_main;
+ u32 n_left_from, *from, *to_next;
+ ip6_input_next_t next_index;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip6_input_node.index);
+ vlib_simple_counter_main_t *cm;
+ u32 cpu_index = os_get_cpu_number ();
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1,
+ sizeof (ip6_input_trace_t));
+
+ cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
+ VNET_INTERFACE_COUNTER_IP6);
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ vlib_buffer_t *p0, *p1;
+ ip6_header_t *ip0, *ip1;
+ u32 pi0, sw_if_index0, next0 = 0;
+ u32 pi1, sw_if_index1, next1 = 0;
+ u8 error0, error1, arc0, arc1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
+ CLIB_PREFETCH (p3->data, sizeof (ip1[0]), LOAD);
+ }
+
+ pi0 = from[0];
+ pi1 = from[1];
+
+ to_next[0] = pi0;
+ to_next[1] = pi1;
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+
+ sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+ sw_if_index1 = vnet_buffer (p1)->sw_if_index[VLIB_RX];
+
+ arc0 =
+ ip6_address_is_multicast (&ip0->dst_address) ?
+ lm->mcast_feature_arc_index : lm->ucast_feature_arc_index;
+ arc1 =
+ ip6_address_is_multicast (&ip1->dst_address) ?
+ lm->mcast_feature_arc_index : lm->ucast_feature_arc_index;
+
+ vnet_buffer (p0)->ip.adj_index[VLIB_RX] = ~0;
+ vnet_buffer (p1)->ip.adj_index[VLIB_RX] = ~0;
+
+ vnet_feature_arc_start (arc0, sw_if_index0, &next0, p0);
+ vnet_feature_arc_start (arc1, sw_if_index1, &next1, p1);
+
+ vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1);
+ vlib_increment_simple_counter (cm, cpu_index, sw_if_index1, 1);
+
+ error0 = error1 = IP6_ERROR_NONE;
+
+ /* Version != 6? Drop it. */
+ error0 =
+ (clib_net_to_host_u32
+ (ip0->ip_version_traffic_class_and_flow_label) >> 28) !=
+ 6 ? IP6_ERROR_VERSION : error0;
+ error1 =
+ (clib_net_to_host_u32
+ (ip1->ip_version_traffic_class_and_flow_label) >> 28) !=
+ 6 ? IP6_ERROR_VERSION : error1;
+
+ /* hop limit < 1? Drop it. for link-local broadcast packets,
+ * like dhcpv6 packets from client has hop-limit 1, which should not
+ * be dropped.
+ */
+ error0 = ip0->hop_limit < 1 ? IP6_ERROR_TIME_EXPIRED : error0;
+ error1 = ip1->hop_limit < 1 ? IP6_ERROR_TIME_EXPIRED : error1;
+
+ /* L2 length must be at least minimal IP header. */
+ error0 =
+ p0->current_length <
+ sizeof (ip0[0]) ? IP6_ERROR_TOO_SHORT : error0;
+ error1 =
+ p1->current_length <
+ sizeof (ip1[0]) ? IP6_ERROR_TOO_SHORT : error1;
+
+ if (PREDICT_FALSE (error0 != IP6_ERROR_NONE))
+ {
+ if (error0 == IP6_ERROR_TIME_EXPIRED)
+ {
+ icmp6_error_set_vnet_buffer (p0, ICMP6_time_exceeded,
+ ICMP6_time_exceeded_ttl_exceeded_in_transit,
+ 0);
+ next0 = IP6_INPUT_NEXT_ICMP_ERROR;
+ }
+ else
+ {
+ next0 = IP6_INPUT_NEXT_DROP;
+ }
+ }
+ if (PREDICT_FALSE (error1 != IP6_ERROR_NONE))
+ {
+ if (error1 == IP6_ERROR_TIME_EXPIRED)
+ {
+ icmp6_error_set_vnet_buffer (p1, ICMP6_time_exceeded,
+ ICMP6_time_exceeded_ttl_exceeded_in_transit,
+ 0);
+ next1 = IP6_INPUT_NEXT_ICMP_ERROR;
+ }
+ else
+ {
+ next1 = IP6_INPUT_NEXT_DROP;
+ }
+ }
+
+ p0->error = error_node->errors[error0];
+ p1->error = error_node->errors[error1];
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, pi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *p0;
+ ip6_header_t *ip0;
+ u32 pi0, sw_if_index0, next0 = 0;
+ u8 error0, arc0;
+
+ pi0 = from[0];
+ to_next[0] = pi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ ip0 = vlib_buffer_get_current (p0);
+
+ sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+ arc0 =
+ ip6_address_is_multicast (&ip0->dst_address) ?
+ lm->mcast_feature_arc_index : lm->ucast_feature_arc_index;
+ vnet_buffer (p0)->ip.adj_index[VLIB_RX] = ~0;
+ vnet_feature_arc_start (arc0, sw_if_index0, &next0, p0);
+
+ vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1);
+ error0 = IP6_ERROR_NONE;
+
+ /* Version != 6? Drop it. */
+ error0 =
+ (clib_net_to_host_u32
+ (ip0->ip_version_traffic_class_and_flow_label) >> 28) !=
+ 6 ? IP6_ERROR_VERSION : error0;
+
+ /* hop limit < 1? Drop it. for link-local broadcast packets,
+ * like dhcpv6 packets from client has hop-limit 1, which should not
+ * be dropped.
+ */
+ error0 = ip0->hop_limit < 1 ? IP6_ERROR_TIME_EXPIRED : error0;
+
+ /* L2 length must be at least minimal IP header. */
+ error0 =
+ p0->current_length <
+ sizeof (ip0[0]) ? IP6_ERROR_TOO_SHORT : error0;
+
+ if (PREDICT_FALSE (error0 != IP6_ERROR_NONE))
+ {
+ if (error0 == IP6_ERROR_TIME_EXPIRED)
+ {
+ icmp6_error_set_vnet_buffer (p0, ICMP6_time_exceeded,
+ ICMP6_time_exceeded_ttl_exceeded_in_transit,
+ 0);
+ next0 = IP6_INPUT_NEXT_ICMP_ERROR;
+ }
+ else
+ {
+ next0 = IP6_INPUT_NEXT_DROP;
+ }
+ }
+ p0->error = error_node->errors[error0];
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+static char *ip6_error_strings[] = {
+#define _(sym,string) string,
+ foreach_ip6_error
+#undef _
+};
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_input_node) = {
+ .function = ip6_input,
+ .name = "ip6-input",
+ .vector_size = sizeof (u32),
+
+ .n_errors = IP6_N_ERROR,
+ .error_strings = ip6_error_strings,
+
+ .n_next_nodes = IP6_INPUT_N_NEXT,
+ .next_nodes = {
+ [IP6_INPUT_NEXT_DROP] = "error-drop",
+ [IP6_INPUT_NEXT_LOOKUP] = "ip6-lookup",
+ [IP6_INPUT_NEXT_ICMP_ERROR] = "ip6-icmp-error",
+ },
+
+ .format_buffer = format_ip6_header,
+ .format_trace = format_ip6_input_trace,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_input_node, ip6_input)
+ static clib_error_t *ip6_init (vlib_main_t * vm)
+{
+ ethernet_register_input_type (vm, ETHERNET_TYPE_IP6, ip6_input_node.index);
+ ppp_register_input_protocol (vm, PPP_PROTOCOL_ip6, ip6_input_node.index);
+ hdlc_register_input_protocol (vm, HDLC_PROTOCOL_ip6, ip6_input_node.index);
+
+ {
+ pg_node_t *pn;
+ pn = pg_get_node (ip6_input_node.index);
+ pn->unformat_edit = unformat_pg_ip6_header;
+ }
+
+ /* Set flow hash to something non-zero. */
+ ip6_main.flow_hash_seed = 0xdeadbeef;
+
+ /* Default hop limit for packets we generate. */
+ ip6_main.host_config.ttl = 64;
+
+ return /* no error */ 0;
+}
+
+VLIB_INIT_FUNCTION (ip6_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip6_neighbor.c b/src/vnet/ip/ip6_neighbor.c
new file mode 100644
index 00000000000..5a1c9e86b4f
--- /dev/null
+++ b/src/vnet/ip/ip6_neighbor.c
@@ -0,0 +1,4088 @@
+/*
+ * ip/ip6_neighbor.c: IP6 neighbor handling
+ *
+ * Copyright (c) 2010 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/ip/ip6_neighbor.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vppinfra/mhash.h>
+#include <vppinfra/md5.h>
+#include <vnet/adj/adj.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/ip6_fib.h>
+
+/**
+ * @file
+ * @brief IPv6 Neighbor Adjacency and Neighbor Discovery.
+ *
+ * The files contains the API and CLI code for managing IPv6 neighbor
+ * adjacency tables and neighbor discovery logic.
+ */
+
+/* can't use sizeof link_layer_address, that's 8 */
+#define ETHER_MAC_ADDR_LEN 6
+
+/* advertised prefix option */
+typedef struct
+{
+ /* basic advertised information */
+ ip6_address_t prefix;
+ u8 prefix_len;
+ int adv_on_link_flag;
+ int adv_autonomous_flag;
+ u32 adv_valid_lifetime_in_secs;
+ u32 adv_pref_lifetime_in_secs;
+
+ /* advertised values are computed from these times if decrementing */
+ f64 valid_lifetime_expires;
+ f64 pref_lifetime_expires;
+
+ /* local information */
+ int enabled;
+ int deprecated_prefix_flag;
+ int decrement_lifetime_flag;
+
+#define MIN_ADV_VALID_LIFETIME 7203 /* seconds */
+#define DEF_ADV_VALID_LIFETIME 2592000
+#define DEF_ADV_PREF_LIFETIME 604800
+
+ /* extensions are added here, mobile, DNS etc.. */
+} ip6_radv_prefix_t;
+
+
+typedef struct
+{
+ /* group information */
+ u8 type;
+ ip6_address_t mcast_address;
+ u16 num_sources;
+ ip6_address_t *mcast_source_address_pool;
+} ip6_mldp_group_t;
+
+/* configured router advertisement information per ipv6 interface */
+typedef struct
+{
+
+ /* advertised config information, zero means unspecified */
+ u8 curr_hop_limit;
+ int adv_managed_flag;
+ int adv_other_flag;
+ u16 adv_router_lifetime_in_sec;
+ u32 adv_neighbor_reachable_time_in_msec;
+ u32 adv_time_in_msec_between_retransmitted_neighbor_solicitations;
+
+ /* mtu option */
+ u32 adv_link_mtu;
+
+ /* source link layer option */
+ u8 link_layer_address[8];
+ u8 link_layer_addr_len;
+
+ /* prefix option */
+ ip6_radv_prefix_t *adv_prefixes_pool;
+
+ /* Hash table mapping address to index in interface advertised prefix pool. */
+ mhash_t address_to_prefix_index;
+
+ /* MLDP group information */
+ ip6_mldp_group_t *mldp_group_pool;
+
+ /* Hash table mapping address to index in mldp address pool. */
+ mhash_t address_to_mldp_index;
+
+ /* local information */
+ u32 sw_if_index;
+ u32 fib_index;
+ int send_radv; /* radv on/off on this interface - set by config */
+ int cease_radv; /* we are ceasing to send - set byf config */
+ int send_unicast;
+ int adv_link_layer_address;
+ int prefix_option;
+ int failed_device_check;
+ int all_routers_mcast;
+ u32 seed;
+ u64 randomizer;
+ int ref_count;
+ adj_index_t all_nodes_adj_index;
+ adj_index_t all_routers_adj_index;
+ adj_index_t all_mldv2_routers_adj_index;
+
+ /* timing information */
+#define DEF_MAX_RADV_INTERVAL 200
+#define DEF_MIN_RADV_INTERVAL .75 * DEF_MAX_RADV_INTERVAL
+#define DEF_CURR_HOP_LIMIT 64
+#define DEF_DEF_RTR_LIFETIME 3 * DEF_MAX_RADV_INTERVAL
+#define MAX_DEF_RTR_LIFETIME 9000
+
+#define MAX_INITIAL_RTR_ADVERT_INTERVAL 16 /* seconds */
+#define MAX_INITIAL_RTR_ADVERTISEMENTS 3 /*transmissions */
+#define MIN_DELAY_BETWEEN_RAS 3 /* seconds */
+#define MAX_DELAY_BETWEEN_RAS 1800 /* seconds */
+#define MAX_RA_DELAY_TIME .5 /* seconds */
+
+ f64 max_radv_interval;
+ f64 min_radv_interval;
+ f64 min_delay_between_radv;
+ f64 max_delay_between_radv;
+ f64 max_rtr_default_lifetime;
+
+ f64 last_radv_time;
+ f64 last_multicast_time;
+ f64 next_multicast_time;
+
+
+ u32 initial_adverts_count;
+ f64 initial_adverts_interval;
+ u32 initial_adverts_sent;
+
+ /* stats */
+ u32 n_advertisements_sent;
+ u32 n_solicitations_rcvd;
+ u32 n_solicitations_dropped;
+
+ /* Link local address to use (defaults to underlying physical for logical interfaces */
+ ip6_address_t link_local_address;
+ u8 link_local_prefix_len;
+
+} ip6_radv_t;
+
+typedef struct
+{
+ u32 next_index;
+ uword node_index;
+ uword type_opaque;
+ uword data;
+ /* Used for nd event notification only */
+ void *data_callback;
+ u32 pid;
+} pending_resolution_t;
+
+
+typedef struct
+{
+ /* Hash tables mapping name to opcode. */
+ uword *opcode_by_name;
+
+ /* lite beer "glean" adjacency handling */
+ mhash_t pending_resolutions_by_address;
+ pending_resolution_t *pending_resolutions;
+
+ /* Mac address change notification */
+ mhash_t mac_changes_by_address;
+ pending_resolution_t *mac_changes;
+
+ u32 *neighbor_input_next_index_by_hw_if_index;
+
+ ip6_neighbor_t *neighbor_pool;
+
+ mhash_t neighbor_index_by_key;
+
+ u32 *if_radv_pool_index_by_sw_if_index;
+
+ ip6_radv_t *if_radv_pool;
+
+ /* Neighbor attack mitigation */
+ u32 limit_neighbor_cache_size;
+ u32 neighbor_delete_rotor;
+
+} ip6_neighbor_main_t;
+
+static ip6_neighbor_main_t ip6_neighbor_main;
+static ip6_address_t ip6a_zero; /* ip6 address 0 */
+
+static u8 *
+format_ip6_neighbor_ip6_entry (u8 * s, va_list * va)
+{
+ vlib_main_t *vm = va_arg (*va, vlib_main_t *);
+ ip6_neighbor_t *n = va_arg (*va, ip6_neighbor_t *);
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_sw_interface_t *si;
+ u8 *flags = 0;
+
+ if (!n)
+ return format (s, "%=12s%=20s%=6s%=20s%=40s", "Time", "Address", "Flags",
+ "Link layer", "Interface");
+
+ if (n->flags & IP6_NEIGHBOR_FLAG_DYNAMIC)
+ flags = format (flags, "D");
+
+ if (n->flags & IP6_NEIGHBOR_FLAG_STATIC)
+ flags = format (flags, "S");
+
+ si = vnet_get_sw_interface (vnm, n->key.sw_if_index);
+ s = format (s, "%=12U%=20U%=6s%=20U%=40U",
+ format_vlib_cpu_time, vm, n->cpu_time_last_updated,
+ format_ip6_address, &n->key.ip6_address,
+ flags ? (char *) flags : "",
+ format_ethernet_address, n->link_layer_address,
+ format_vnet_sw_interface_name, vnm, si);
+
+ vec_free (flags);
+ return s;
+}
+
+static clib_error_t *
+ip6_neighbor_sw_interface_up_down (vnet_main_t * vnm,
+ u32 sw_if_index, u32 flags)
+{
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ ip6_neighbor_t *n;
+
+ if (!(flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
+ {
+ u32 i, *to_delete = 0;
+
+ /* *INDENT-OFF* */
+ pool_foreach (n, nm->neighbor_pool,
+ ({
+ if (n->key.sw_if_index == sw_if_index)
+ vec_add1 (to_delete, n - nm->neighbor_pool);
+ }));
+ /* *INDENT-ON* */
+
+ for (i = 0; i < vec_len (to_delete); i++)
+ {
+ n = pool_elt_at_index (nm->neighbor_pool, to_delete[i]);
+ mhash_unset (&nm->neighbor_index_by_key, &n->key, 0);
+ fib_table_entry_delete_index (n->fib_entry_index, FIB_SOURCE_ADJ);
+ pool_put (nm->neighbor_pool, n);
+ }
+
+ vec_free (to_delete);
+ }
+
+ return 0;
+}
+
+VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip6_neighbor_sw_interface_up_down);
+
+static void
+unset_random_neighbor_entry (void)
+{
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ vnet_main_t *vnm = vnet_get_main ();
+ vlib_main_t *vm = vnm->vlib_main;
+ ip6_neighbor_t *e;
+ u32 index;
+
+ index = pool_next_index (nm->neighbor_pool, nm->neighbor_delete_rotor);
+ nm->neighbor_delete_rotor = index;
+
+ /* Try again from elt 0, could happen if an intfc goes down */
+ if (index == ~0)
+ {
+ index = pool_next_index (nm->neighbor_pool, nm->neighbor_delete_rotor);
+ nm->neighbor_delete_rotor = index;
+ }
+
+ /* Nothing left in the pool */
+ if (index == ~0)
+ return;
+
+ e = pool_elt_at_index (nm->neighbor_pool, index);
+
+ vnet_unset_ip6_ethernet_neighbor (vm, e->key.sw_if_index,
+ &e->key.ip6_address,
+ e->link_layer_address,
+ ETHER_MAC_ADDR_LEN);
+}
+
+typedef struct
+{
+ u8 is_add;
+ u8 is_static;
+ u8 link_layer_address[6];
+ u32 sw_if_index;
+ ip6_address_t addr;
+} ip6_neighbor_set_unset_rpc_args_t;
+
+static void ip6_neighbor_set_unset_rpc_callback
+ (ip6_neighbor_set_unset_rpc_args_t * a);
+
+static void set_unset_ip6_neighbor_rpc
+ (vlib_main_t * vm,
+ u32 sw_if_index,
+ ip6_address_t * a, u8 * link_layer_addreess, int is_add, int is_static)
+{
+ ip6_neighbor_set_unset_rpc_args_t args;
+ void vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length);
+
+ args.sw_if_index = sw_if_index;
+ args.is_add = is_add;
+ args.is_static = is_static;
+ clib_memcpy (&args.addr, a, sizeof (*a));
+ clib_memcpy (args.link_layer_address, link_layer_addreess, 6);
+
+ vl_api_rpc_call_main_thread (ip6_neighbor_set_unset_rpc_callback,
+ (u8 *) & args, sizeof (args));
+}
+
+static void
+ip6_nbr_probe (ip_adjacency_t * adj)
+{
+ icmp6_neighbor_solicitation_header_t *h;
+ vnet_main_t *vnm = vnet_get_main ();
+ ip6_main_t *im = &ip6_main;
+ ip_interface_address_t *ia;
+ ip6_address_t *dst, *src;
+ vnet_hw_interface_t *hi;
+ vnet_sw_interface_t *si;
+ vlib_buffer_t *b;
+ int bogus_length;
+ vlib_main_t *vm;
+ u32 bi = 0;
+
+ vm = vlib_get_main ();
+
+ si = vnet_get_sw_interface (vnm, adj->rewrite_header.sw_if_index);
+ dst = &adj->sub_type.nbr.next_hop.ip6;
+
+ if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
+ {
+ return;
+ }
+ src = ip6_interface_address_matching_destination (im, dst,
+ adj->rewrite_header.
+ sw_if_index, &ia);
+ if (!src)
+ {
+ return;
+ }
+
+ h = vlib_packet_template_get_packet (vm,
+ &im->discover_neighbor_packet_template,
+ &bi);
+
+ hi = vnet_get_sup_hw_interface (vnm, adj->rewrite_header.sw_if_index);
+
+ h->ip.dst_address.as_u8[13] = dst->as_u8[13];
+ h->ip.dst_address.as_u8[14] = dst->as_u8[14];
+ h->ip.dst_address.as_u8[15] = dst->as_u8[15];
+ h->ip.src_address = src[0];
+ h->neighbor.target_address = dst[0];
+
+ clib_memcpy (h->link_layer_option.ethernet_address,
+ hi->hw_address, vec_len (hi->hw_address));
+
+ h->neighbor.icmp.checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, 0, &h->ip, &bogus_length);
+ ASSERT (bogus_length == 0);
+
+ b = vlib_get_buffer (vm, bi);
+ vnet_buffer (b)->sw_if_index[VLIB_RX] =
+ vnet_buffer (b)->sw_if_index[VLIB_TX] = adj->rewrite_header.sw_if_index;
+
+ /* Add encapsulation string for software interface (e.g. ethernet header). */
+ vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
+ vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
+
+ {
+ vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
+ u32 *to_next = vlib_frame_vector_args (f);
+ to_next[0] = bi;
+ f->n_vectors = 1;
+ vlib_put_frame_to_node (vm, hi->output_node_index, f);
+ }
+}
+
+static void
+ip6_nd_mk_complete (adj_index_t ai, ip6_neighbor_t * nbr)
+{
+ adj_nbr_update_rewrite (ai, ADJ_NBR_REWRITE_FLAG_COMPLETE,
+ ethernet_build_rewrite (vnet_get_main (),
+ nbr->key.sw_if_index,
+ adj_get_link_type (ai),
+ nbr->link_layer_address));
+}
+
+static void
+ip6_nd_mk_incomplete (adj_index_t ai)
+{
+ ip_adjacency_t *adj = adj_get (ai);
+
+ adj_nbr_update_rewrite (ai,
+ ADJ_NBR_REWRITE_FLAG_INCOMPLETE,
+ ethernet_build_rewrite (vnet_get_main (),
+ adj->rewrite_header.
+ sw_if_index,
+ adj_get_link_type (ai),
+ VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
+}
+
+#define IP6_NBR_MK_KEY(k, sw_if_index, addr) \
+{ \
+ k.sw_if_index = sw_if_index; \
+ k.ip6_address = *addr; \
+ k.pad = 0; \
+}
+
+static ip6_neighbor_t *
+ip6_nd_find (u32 sw_if_index, const ip6_address_t * addr)
+{
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ ip6_neighbor_t *n = NULL;
+ ip6_neighbor_key_t k;
+ uword *p;
+
+ IP6_NBR_MK_KEY (k, sw_if_index, addr);
+
+ p = mhash_get (&nm->neighbor_index_by_key, &k);
+ if (p)
+ {
+ n = pool_elt_at_index (nm->neighbor_pool, p[0]);
+ }
+
+ return (n);
+}
+
+static adj_walk_rc_t
+ip6_nd_mk_complete_walk (adj_index_t ai, void *ctx)
+{
+ ip6_neighbor_t *nbr = ctx;
+
+ ip6_nd_mk_complete (ai, nbr);
+
+ return (ADJ_WALK_RC_CONTINUE);
+}
+
+static adj_walk_rc_t
+ip6_nd_mk_incomplete_walk (adj_index_t ai, void *ctx)
+{
+ ip6_nd_mk_incomplete (ai);
+
+ return (ADJ_WALK_RC_CONTINUE);
+}
+
+void
+ip6_ethernet_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai)
+{
+ ip6_neighbor_t *nbr;
+ ip_adjacency_t *adj;
+
+ adj = adj_get (ai);
+
+ nbr = ip6_nd_find (sw_if_index, &adj->sub_type.nbr.next_hop.ip6);
+
+ if (NULL != nbr)
+ {
+ adj_nbr_walk_nh6 (sw_if_index, &nbr->key.ip6_address,
+ ip6_nd_mk_complete_walk, nbr);
+ }
+ else
+ {
+ /*
+ * no matching ND entry.
+ * construct the rewrite required to for an ND packet, and stick
+ * that in the adj's pipe to smoke.
+ */
+ adj_nbr_update_rewrite (ai,
+ ADJ_NBR_REWRITE_FLAG_INCOMPLETE,
+ ethernet_build_rewrite (vnm,
+ sw_if_index,
+ VNET_LINK_IP6,
+ VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
+
+ /*
+ * since the FIB has added this adj for a route, it makes sense it may
+ * want to forward traffic sometime soon. Let's send a speculative ND.
+ * just one. If we were to do periodically that wouldn't be bad either,
+ * but that's more code than i'm prepared to write at this time for
+ * relatively little reward.
+ */
+ ip6_nbr_probe (adj);
+ }
+}
+
+int
+vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm,
+ u32 sw_if_index,
+ ip6_address_t * a,
+ u8 * link_layer_address,
+ uword n_bytes_link_layer_address,
+ int is_static)
+{
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ ip6_neighbor_key_t k;
+ ip6_neighbor_t *n = 0;
+ int make_new_nd_cache_entry = 1;
+ uword *p;
+ u32 next_index;
+ pending_resolution_t *pr, *mc;
+
+ if (os_get_cpu_number ())
+ {
+ set_unset_ip6_neighbor_rpc (vm, sw_if_index, a, link_layer_address,
+ 1 /* set new neighbor */ , is_static);
+ return 0;
+ }
+
+ k.sw_if_index = sw_if_index;
+ k.ip6_address = a[0];
+ k.pad = 0;
+
+ p = mhash_get (&nm->neighbor_index_by_key, &k);
+ if (p)
+ {
+ n = pool_elt_at_index (nm->neighbor_pool, p[0]);
+ /* Refuse to over-write static neighbor entry. */
+ if (!is_static && (n->flags & IP6_NEIGHBOR_FLAG_STATIC))
+ return -2;
+ make_new_nd_cache_entry = 0;
+ }
+
+ if (make_new_nd_cache_entry)
+ {
+ fib_prefix_t pfx = {
+ .fp_len = 128,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_addr = {
+ .ip6 = k.ip6_address,
+ }
+ ,
+ };
+ u32 fib_index;
+
+ pool_get (nm->neighbor_pool, n);
+ mhash_set (&nm->neighbor_index_by_key, &k, n - nm->neighbor_pool,
+ /* old value */ 0);
+ n->key = k;
+
+ clib_memcpy (n->link_layer_address,
+ link_layer_address, n_bytes_link_layer_address);
+
+ /*
+ * create the adj-fib. the entry in the FIB table for and to the peer.
+ */
+ fib_index = ip6_main.fib_index_by_sw_if_index[n->key.sw_if_index];
+ n->fib_entry_index = fib_table_entry_update_one_path (fib_index, &pfx, FIB_SOURCE_ADJ, FIB_ENTRY_FLAG_NONE, FIB_PROTOCOL_IP6, &pfx.fp_addr, n->key.sw_if_index, ~0, 1, NULL, // no label stack
+ FIB_ROUTE_PATH_FLAG_NONE);
+ }
+ else
+ {
+ /*
+ * prevent a DoS attack from the data-plane that
+ * spams us with no-op updates to the MAC address
+ */
+ if (0 == memcmp (n->link_layer_address,
+ link_layer_address, n_bytes_link_layer_address))
+ return -1;
+
+ clib_memcpy (n->link_layer_address,
+ link_layer_address, n_bytes_link_layer_address);
+ }
+
+ /* Update time stamp and flags. */
+ n->cpu_time_last_updated = clib_cpu_time_now ();
+ if (is_static)
+ n->flags |= IP6_NEIGHBOR_FLAG_STATIC;
+ else
+ n->flags |= IP6_NEIGHBOR_FLAG_DYNAMIC;
+
+ adj_nbr_walk_nh6 (sw_if_index,
+ &n->key.ip6_address, ip6_nd_mk_complete_walk, n);
+
+ /* Customer(s) waiting for this address to be resolved? */
+ p = mhash_get (&nm->pending_resolutions_by_address, a);
+ if (p)
+ {
+ next_index = p[0];
+
+ while (next_index != (u32) ~ 0)
+ {
+ pr = pool_elt_at_index (nm->pending_resolutions, next_index);
+ vlib_process_signal_event (vm, pr->node_index,
+ pr->type_opaque, pr->data);
+ next_index = pr->next_index;
+ pool_put (nm->pending_resolutions, pr);
+ }
+
+ mhash_unset (&nm->pending_resolutions_by_address, a, 0);
+ }
+
+ /* Customer(s) requesting ND event for this address? */
+ p = mhash_get (&nm->mac_changes_by_address, a);
+ if (p)
+ {
+ next_index = p[0];
+
+ while (next_index != (u32) ~ 0)
+ {
+ int (*fp) (u32, u8 *, u32, ip6_address_t *);
+ int rv = 1;
+ mc = pool_elt_at_index (nm->mac_changes, next_index);
+ fp = mc->data_callback;
+
+ /* Call the user's data callback, return 1 to suppress dup events */
+ if (fp)
+ rv =
+ (*fp) (mc->data, link_layer_address, sw_if_index, &ip6a_zero);
+ /*
+ * Signal the resolver process, as long as the user
+ * says they want to be notified
+ */
+ if (rv == 0)
+ vlib_process_signal_event (vm, mc->node_index,
+ mc->type_opaque, mc->data);
+ next_index = mc->next_index;
+ }
+ }
+
+ return 0;
+}
+
+int
+vnet_unset_ip6_ethernet_neighbor (vlib_main_t * vm,
+ u32 sw_if_index,
+ ip6_address_t * a,
+ u8 * link_layer_address,
+ uword n_bytes_link_layer_address)
+{
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ ip6_neighbor_key_t k;
+ ip6_neighbor_t *n;
+ uword *p;
+ int rv = 0;
+
+ if (os_get_cpu_number ())
+ {
+ set_unset_ip6_neighbor_rpc (vm, sw_if_index, a, link_layer_address,
+ 0 /* unset */ , 0);
+ return 0;
+ }
+
+ k.sw_if_index = sw_if_index;
+ k.ip6_address = a[0];
+ k.pad = 0;
+
+ p = mhash_get (&nm->neighbor_index_by_key, &k);
+ if (p == 0)
+ {
+ rv = -1;
+ goto out;
+ }
+
+ n = pool_elt_at_index (nm->neighbor_pool, p[0]);
+ mhash_unset (&nm->neighbor_index_by_key, &n->key, 0);
+
+ adj_nbr_walk_nh6 (sw_if_index,
+ &n->key.ip6_address, ip6_nd_mk_incomplete_walk, NULL);
+
+ fib_table_entry_delete_index (n->fib_entry_index, FIB_SOURCE_ADJ);
+ pool_put (nm->neighbor_pool, n);
+
+out:
+ return rv;
+}
+
+static void ip6_neighbor_set_unset_rpc_callback
+ (ip6_neighbor_set_unset_rpc_args_t * a)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ if (a->is_add)
+ vnet_set_ip6_ethernet_neighbor (vm, a->sw_if_index, &a->addr,
+ a->link_layer_address, 6, a->is_static);
+ else
+ vnet_unset_ip6_ethernet_neighbor (vm, a->sw_if_index, &a->addr,
+ a->link_layer_address, 6);
+}
+
+static int
+ip6_neighbor_sort (void *a1, void *a2)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip6_neighbor_t *n1 = a1, *n2 = a2;
+ int cmp;
+ cmp = vnet_sw_interface_compare (vnm, n1->key.sw_if_index,
+ n2->key.sw_if_index);
+ if (!cmp)
+ cmp = ip6_address_compare (&n1->key.ip6_address, &n2->key.ip6_address);
+ return cmp;
+}
+
+ip6_neighbor_t *
+ip6_neighbors_entries (u32 sw_if_index)
+{
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ ip6_neighbor_t *n, *ns = 0;
+
+ /* *INDENT-OFF* */
+ pool_foreach (n, nm->neighbor_pool,
+ ({
+ if (sw_if_index != ~0 && n->key.sw_if_index != sw_if_index)
+ continue;
+ vec_add1 (ns, n[0]);
+ }));
+ /* *INDENT-ON* */
+
+ if (ns)
+ vec_sort_with_function (ns, ip6_neighbor_sort);
+ return ns;
+}
+
+static clib_error_t *
+show_ip6_neighbors (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip6_neighbor_t *n, *ns;
+ clib_error_t *error = 0;
+ u32 sw_if_index;
+
+ /* Filter entries by interface if given. */
+ sw_if_index = ~0;
+ (void) unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index);
+
+ ns = ip6_neighbors_entries (sw_if_index);
+ if (ns)
+ {
+ vlib_cli_output (vm, "%U", format_ip6_neighbor_ip6_entry, vm, 0);
+ vec_foreach (n, ns)
+ {
+ vlib_cli_output (vm, "%U", format_ip6_neighbor_ip6_entry, vm, n);
+ }
+ vec_free (ns);
+ }
+
+ return error;
+}
+
+/*?
+ * This command is used to display the adjacent IPv6 hosts found via
+ * neighbor discovery. Optionally, limit the output to the specified
+ * interface.
+ *
+ * @cliexpar
+ * Example of how to display the IPv6 neighbor adjacency table:
+ * @cliexstart{show ip6 neighbors}
+ * Time Address Flags Link layer Interface
+ * 34.0910 ::a:1:1:0:7 02:fe:6a:07:39:6f GigabitEthernet2/0/0
+ * 173.2916 ::b:5:1:c:2 02:fe:50:62:3a:94 GigabitEthernet2/0/0
+ * 886.6654 ::1:1:c:0:9 S 02:fe:e4:45:27:5b GigabitEthernet3/0/0
+ * @cliexend
+ * Example of how to display the IPv6 neighbor adjacency table for given interface:
+ * @cliexstart{show ip6 neighbors GigabitEthernet2/0/0}
+ * Time Address Flags Link layer Interface
+ * 34.0910 ::a:1:1:0:7 02:fe:6a:07:39:6f GigabitEthernet2/0/0
+ * 173.2916 ::b:5:1:c:2 02:fe:50:62:3a:94 GigabitEthernet2/0/0
+ * @cliexend
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_ip6_neighbors_command, static) = {
+ .path = "show ip6 neighbors",
+ .function = show_ip6_neighbors,
+ .short_help = "show ip6 neighbors [<interface>]",
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+set_ip6_neighbor (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip6_address_t addr;
+ u8 mac_address[6];
+ int addr_valid = 0;
+ int is_del = 0;
+ int is_static = 0;
+ u32 sw_if_index;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ /* intfc, ip6-address, mac-address */
+ if (unformat (input, "%U %U %U",
+ unformat_vnet_sw_interface, vnm, &sw_if_index,
+ unformat_ip6_address, &addr,
+ unformat_ethernet_address, mac_address))
+ addr_valid = 1;
+
+ else if (unformat (input, "delete") || unformat (input, "del"))
+ is_del = 1;
+ else if (unformat (input, "static"))
+ is_static = 1;
+ else
+ break;
+ }
+
+ if (!addr_valid)
+ return clib_error_return (0, "Missing interface, ip6 or hw address");
+
+ if (!is_del)
+ vnet_set_ip6_ethernet_neighbor (vm, sw_if_index, &addr,
+ mac_address, sizeof (mac_address),
+ is_static);
+ else
+ vnet_unset_ip6_ethernet_neighbor (vm, sw_if_index, &addr,
+ mac_address, sizeof (mac_address));
+ return 0;
+}
+
+/*?
+ * This command is used to manually add an entry to the IPv6 neighbor
+ * adjacency table. Optionally, the entry can be added as static. It is
+ * also used to remove an entry from the table. Use the '<em>show ip6
+ * neighbors</em>' command to display all learned and manually entered entries.
+ *
+ * @cliexpar
+ * Example of how to add a static entry to the IPv6 neighbor adjacency table:
+ * @cliexcmd{set ip6 neighbor GigabitEthernet2/0/0 ::1:1:c:0:9 02:fe:e4:45:27:5b static}
+ * Example of how to delete an entry from the IPv6 neighbor adjacency table:
+ * @cliexcmd{set ip6 neighbor del GigabitEthernet2/0/0 ::1:1:c:0:9 02:fe:e4:45:27:5b}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_ip6_neighbor_command, static) =
+{
+ .path = "set ip6 neighbor",
+ .function = set_ip6_neighbor,
+ .short_help = "set ip6 neighbor [del] <interface> <ip6-address> <mac-address> [static]",
+};
+/* *INDENT-ON* */
+
+typedef enum
+{
+ ICMP6_NEIGHBOR_SOLICITATION_NEXT_DROP,
+ ICMP6_NEIGHBOR_SOLICITATION_NEXT_REPLY,
+ ICMP6_NEIGHBOR_SOLICITATION_N_NEXT,
+} icmp6_neighbor_solicitation_or_advertisement_next_t;
+
+static_always_inline uword
+icmp6_neighbor_solicitation_or_advertisement (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ uword is_solicitation)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip6_main_t *im = &ip6_main;
+ uword n_packets = frame->n_vectors;
+ u32 *from, *to_next;
+ u32 n_left_from, n_left_to_next, next_index, n_advertisements_sent;
+ icmp6_neighbor_discovery_option_type_t option_type;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip6_icmp_input_node.index);
+ int bogus_length;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = n_packets;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1,
+ sizeof (icmp6_input_trace_t));
+
+ option_type =
+ (is_solicitation
+ ? ICMP6_NEIGHBOR_DISCOVERY_OPTION_source_link_layer_address
+ : ICMP6_NEIGHBOR_DISCOVERY_OPTION_target_link_layer_address);
+ n_advertisements_sent = 0;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *p0;
+ ip6_header_t *ip0;
+ icmp6_neighbor_solicitation_or_advertisement_header_t *h0;
+ icmp6_neighbor_discovery_ethernet_link_layer_address_option_t *o0;
+ u32 bi0, options_len0, sw_if_index0, next0, error0;
+ u32 ip6_sadd_link_local, ip6_sadd_unspecified;
+ int is_rewrite0;
+ u32 ni0;
+
+ bi0 = to_next[0] = from[0];
+
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, bi0);
+ ip0 = vlib_buffer_get_current (p0);
+ h0 = ip6_next_header (ip0);
+ options_len0 =
+ clib_net_to_host_u16 (ip0->payload_length) - sizeof (h0[0]);
+
+ error0 = ICMP6_ERROR_NONE;
+ sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+ ip6_sadd_link_local =
+ ip6_address_is_link_local_unicast (&ip0->src_address);
+ ip6_sadd_unspecified =
+ ip6_address_is_unspecified (&ip0->src_address);
+
+ /* Check that source address is unspecified, link-local or else on-link. */
+ if (!ip6_sadd_unspecified && !ip6_sadd_link_local)
+ {
+ u32 src_adj_index0 = ip6_src_lookup_for_packet (im, p0, ip0);
+
+ if (ADJ_INDEX_INVALID != src_adj_index0)
+ {
+ ip_adjacency_t *adj0 =
+ ip_get_adjacency (&im->lookup_main, src_adj_index0);
+
+ /* Allow all realistic-looking rewrite adjacencies to pass */
+ ni0 = adj0->lookup_next_index;
+ is_rewrite0 = (ni0 >= IP_LOOKUP_NEXT_ARP) &&
+ (ni0 < IP6_LOOKUP_N_NEXT);
+
+ error0 = ((adj0->rewrite_header.sw_if_index != sw_if_index0
+ || !is_rewrite0)
+ ?
+ ICMP6_ERROR_NEIGHBOR_SOLICITATION_SOURCE_NOT_ON_LINK
+ : error0);
+ }
+ else
+ {
+ error0 =
+ ICMP6_ERROR_NEIGHBOR_SOLICITATION_SOURCE_NOT_ON_LINK;
+ }
+ }
+
+ o0 = (void *) (h0 + 1);
+ o0 = ((options_len0 == 8 && o0->header.type == option_type
+ && o0->header.n_data_u64s == 1) ? o0 : 0);
+
+ /* If src address unspecified or link local, donot learn neighbor MAC */
+ if (PREDICT_TRUE (error0 == ICMP6_ERROR_NONE && o0 != 0 &&
+ !ip6_sadd_unspecified && !ip6_sadd_link_local))
+ {
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ if (nm->limit_neighbor_cache_size &&
+ pool_elts (nm->neighbor_pool) >=
+ nm->limit_neighbor_cache_size)
+ unset_random_neighbor_entry ();
+ vnet_set_ip6_ethernet_neighbor (vm, sw_if_index0,
+ is_solicitation ?
+ &ip0->src_address :
+ &h0->target_address,
+ o0->ethernet_address,
+ sizeof (o0->ethernet_address),
+ 0);
+ }
+
+ if (is_solicitation && error0 == ICMP6_ERROR_NONE)
+ {
+ /* Check that target address is local to this router. */
+ fib_node_index_t fei;
+ u32 fib_index;
+
+ fib_index =
+ ip6_fib_table_get_index_for_sw_if_index (sw_if_index0);
+
+ if (~0 == fib_index)
+ {
+ error0 = ICMP6_ERROR_NEIGHBOR_SOLICITATION_SOURCE_UNKNOWN;
+ }
+ else
+ {
+ fei = ip6_fib_table_lookup_exact_match (fib_index,
+ &h0->target_address,
+ 128);
+
+ if (FIB_NODE_INDEX_INVALID == fei ||
+ !(FIB_ENTRY_FLAG_LOCAL &
+ fib_entry_get_flags_for_source (fei,
+ FIB_SOURCE_INTERFACE)))
+ {
+ error0 =
+ ICMP6_ERROR_NEIGHBOR_SOLICITATION_SOURCE_UNKNOWN;
+ }
+ }
+ }
+
+ if (is_solicitation)
+ next0 = (error0 != ICMP6_ERROR_NONE
+ ? ICMP6_NEIGHBOR_SOLICITATION_NEXT_DROP
+ : ICMP6_NEIGHBOR_SOLICITATION_NEXT_REPLY);
+ else
+ {
+ next0 = 0;
+ error0 = error0 == ICMP6_ERROR_NONE ?
+ ICMP6_ERROR_NEIGHBOR_ADVERTISEMENTS_RX : error0;
+ }
+
+ if (is_solicitation && error0 == ICMP6_ERROR_NONE)
+ {
+ vnet_sw_interface_t *sw_if0;
+ ethernet_interface_t *eth_if0;
+ ethernet_header_t *eth0;
+
+ /* dst address is either source address or the all-nodes mcast addr */
+ if (!ip6_sadd_unspecified)
+ ip0->dst_address = ip0->src_address;
+ else
+ ip6_set_reserved_multicast_address (&ip0->dst_address,
+ IP6_MULTICAST_SCOPE_link_local,
+ IP6_MULTICAST_GROUP_ID_all_hosts);
+
+ ip0->src_address = h0->target_address;
+ ip0->hop_limit = 255;
+ h0->icmp.type = ICMP6_neighbor_advertisement;
+
+ sw_if0 = vnet_get_sup_sw_interface (vnm, sw_if_index0);
+ ASSERT (sw_if0->type == VNET_SW_INTERFACE_TYPE_HARDWARE);
+ eth_if0 =
+ ethernet_get_interface (&ethernet_main, sw_if0->hw_if_index);
+ if (eth_if0 && o0)
+ {
+ clib_memcpy (o0->ethernet_address, eth_if0->address, 6);
+ o0->header.type =
+ ICMP6_NEIGHBOR_DISCOVERY_OPTION_target_link_layer_address;
+ }
+
+ h0->advertisement_flags = clib_host_to_net_u32
+ (ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_SOLICITED
+ | ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_OVERRIDE);
+
+ h0->icmp.checksum = 0;
+ h0->icmp.checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, p0, ip0,
+ &bogus_length);
+ ASSERT (bogus_length == 0);
+
+ /* Reuse current MAC header, copy SMAC to DMAC and
+ * interface MAC to SMAC */
+ vlib_buffer_advance (p0, -ethernet_buffer_header_size (p0));
+ eth0 = vlib_buffer_get_current (p0);
+ clib_memcpy (eth0->dst_address, eth0->src_address, 6);
+ if (eth_if0)
+ clib_memcpy (eth0->src_address, eth_if0->address, 6);
+
+ /* Setup input and output sw_if_index for packet */
+ ASSERT (vnet_buffer (p0)->sw_if_index[VLIB_RX] == sw_if_index0);
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
+ vnet_buffer (p0)->sw_if_index[VLIB_RX] =
+ vnet_main.local_interface_sw_if_index;
+
+ n_advertisements_sent++;
+ }
+
+ p0->error = error_node->errors[error0];
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ /* Account for advertisements sent. */
+ vlib_error_count (vm, error_node->node_index,
+ ICMP6_ERROR_NEIGHBOR_ADVERTISEMENTS_TX,
+ n_advertisements_sent);
+
+ return frame->n_vectors;
+}
+
+/* for "syslogging" - use elog for now */
+#define foreach_log_level \
+ _ (DEBUG, "DEBUG") \
+ _ (INFO, "INFORMATION") \
+ _ (NOTICE, "NOTICE") \
+ _ (WARNING, "WARNING") \
+ _ (ERR, "ERROR") \
+ _ (CRIT, "CRITICAL") \
+ _ (ALERT, "ALERT") \
+ _ (EMERG, "EMERGENCY")
+
+typedef enum
+{
+#define _(f,s) LOG_##f,
+ foreach_log_level
+#undef _
+} log_level_t;
+
+static char *log_level_strings[] = {
+#define _(f,s) s,
+ foreach_log_level
+#undef _
+};
+
+static int logmask = 1 << LOG_DEBUG;
+
+static void
+ip6_neighbor_syslog (vlib_main_t * vm, int priority, char *fmt, ...)
+{
+ /* just use elog for now */
+ u8 *what;
+ va_list va;
+
+ if ((priority > LOG_EMERG) || !(logmask & (1 << priority)))
+ return;
+
+ va_start (va, fmt);
+ if (fmt)
+ {
+ what = va_format (0, fmt, &va);
+
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "ip6 nd: (%s): %s",.format_args = "T4T4",};
+ struct
+ {
+ u32 s[2];
+ } *ed;
+ ed = ELOG_DATA (&vm->elog_main, e);
+ ed->s[0] = elog_string (&vm->elog_main, log_level_strings[priority]);
+ ed->s[1] = elog_string (&vm->elog_main, (char *) what);
+ }
+ va_end (va);
+ return;
+}
+
+/* ipv6 neighbor discovery - router advertisements */
+typedef enum
+{
+ ICMP6_ROUTER_SOLICITATION_NEXT_DROP,
+ ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_RW,
+ ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_TX,
+ ICMP6_ROUTER_SOLICITATION_N_NEXT,
+} icmp6_router_solicitation_or_advertisement_next_t;
+
+static_always_inline uword
+icmp6_router_solicitation (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip6_main_t *im = &ip6_main;
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ uword n_packets = frame->n_vectors;
+ u32 *from, *to_next;
+ u32 n_left_from, n_left_to_next, next_index;
+ u32 n_advertisements_sent = 0;
+ int bogus_length;
+
+ icmp6_neighbor_discovery_option_type_t option_type;
+
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip6_icmp_input_node.index);
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = n_packets;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1,
+ sizeof (icmp6_input_trace_t));
+
+ /* source may append his LL address */
+ option_type = ICMP6_NEIGHBOR_DISCOVERY_OPTION_source_link_layer_address;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *p0;
+ ip6_header_t *ip0;
+ ip6_radv_t *radv_info = 0;
+
+ icmp6_neighbor_discovery_header_t *h0;
+ icmp6_neighbor_discovery_ethernet_link_layer_address_option_t *o0;
+
+ u32 bi0, options_len0, sw_if_index0, next0, error0;
+ u32 is_solicitation = 1, is_dropped = 0;
+ u32 is_unspecified, is_link_local;
+
+ bi0 = to_next[0] = from[0];
+
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, bi0);
+ ip0 = vlib_buffer_get_current (p0);
+ h0 = ip6_next_header (ip0);
+ options_len0 =
+ clib_net_to_host_u16 (ip0->payload_length) - sizeof (h0[0]);
+ is_unspecified = ip6_address_is_unspecified (&ip0->src_address);
+ is_link_local =
+ ip6_address_is_link_local_unicast (&ip0->src_address);
+
+ error0 = ICMP6_ERROR_NONE;
+ sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+
+ /* check if solicitation (not from nd_timer node) */
+ if (ip6_address_is_unspecified (&ip0->dst_address))
+ is_solicitation = 0;
+
+ /* Check that source address is unspecified, link-local or else on-link. */
+ if (!is_unspecified && !is_link_local)
+ {
+ u32 src_adj_index0 = ip6_src_lookup_for_packet (im, p0, ip0);
+
+ if (ADJ_INDEX_INVALID != src_adj_index0)
+ {
+ ip_adjacency_t *adj0 = ip_get_adjacency (&im->lookup_main,
+ src_adj_index0);
+
+ error0 = (adj0->rewrite_header.sw_if_index != sw_if_index0
+ ?
+ ICMP6_ERROR_ROUTER_SOLICITATION_SOURCE_NOT_ON_LINK
+ : error0);
+ }
+ else
+ {
+ error0 = ICMP6_ERROR_ROUTER_SOLICITATION_SOURCE_NOT_ON_LINK;
+ }
+ }
+
+ /* check for source LL option and process */
+ o0 = (void *) (h0 + 1);
+ o0 = ((options_len0 == 8
+ && o0->header.type == option_type
+ && o0->header.n_data_u64s == 1) ? o0 : 0);
+
+ /* if src address unspecified IGNORE any options */
+ if (PREDICT_TRUE (error0 == ICMP6_ERROR_NONE && o0 != 0 &&
+ !is_unspecified && !is_link_local))
+ {
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ if (nm->limit_neighbor_cache_size &&
+ pool_elts (nm->neighbor_pool) >=
+ nm->limit_neighbor_cache_size)
+ unset_random_neighbor_entry ();
+
+ vnet_set_ip6_ethernet_neighbor (vm, sw_if_index0,
+ &ip0->src_address,
+ o0->ethernet_address,
+ sizeof (o0->ethernet_address),
+ 0);
+ }
+
+ /* default is to drop */
+ next0 = ICMP6_ROUTER_SOLICITATION_NEXT_DROP;
+
+ if (error0 == ICMP6_ERROR_NONE)
+ {
+ vnet_sw_interface_t *sw_if0;
+ ethernet_interface_t *eth_if0;
+ u32 adj_index0;
+
+ sw_if0 = vnet_get_sup_sw_interface (vnm, sw_if_index0);
+ ASSERT (sw_if0->type == VNET_SW_INTERFACE_TYPE_HARDWARE);
+ eth_if0 =
+ ethernet_get_interface (&ethernet_main, sw_if0->hw_if_index);
+
+ /* only support ethernet interface type for now */
+ error0 =
+ (!eth_if0) ? ICMP6_ERROR_ROUTER_SOLICITATION_UNSUPPORTED_INTF
+ : error0;
+
+ if (error0 == ICMP6_ERROR_NONE)
+ {
+ u32 ri;
+
+ /* adjust the sizeof the buffer to just include the ipv6 header */
+ p0->current_length -=
+ (options_len0 +
+ sizeof (icmp6_neighbor_discovery_header_t));
+
+ /* look up the radv_t information for this interface */
+ vec_validate_init_empty
+ (nm->if_radv_pool_index_by_sw_if_index, sw_if_index0, ~0);
+
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index0];
+
+ if (ri != ~0)
+ radv_info = pool_elt_at_index (nm->if_radv_pool, ri);
+
+ error0 =
+ ((!radv_info) ?
+ ICMP6_ERROR_ROUTER_SOLICITATION_RADV_NOT_CONFIG :
+ error0);
+
+ if (error0 == ICMP6_ERROR_NONE)
+ {
+ f64 now = vlib_time_now (vm);
+
+ /* for solicited adverts - need to rate limit */
+ if (is_solicitation)
+ {
+ if ((now - radv_info->last_radv_time) <
+ MIN_DELAY_BETWEEN_RAS)
+ is_dropped = 1;
+ else
+ radv_info->last_radv_time = now;
+ }
+
+ /* send now */
+ icmp6_router_advertisement_header_t rh;
+
+ rh.icmp.type = ICMP6_router_advertisement;
+ rh.icmp.code = 0;
+ rh.icmp.checksum = 0;
+
+ rh.current_hop_limit = radv_info->curr_hop_limit;
+ rh.router_lifetime_in_sec =
+ clib_host_to_net_u16
+ (radv_info->adv_router_lifetime_in_sec);
+ rh.
+ time_in_msec_between_retransmitted_neighbor_solicitations
+ =
+ clib_host_to_net_u32 (radv_info->
+ adv_time_in_msec_between_retransmitted_neighbor_solicitations);
+ rh.neighbor_reachable_time_in_msec =
+ clib_host_to_net_u32 (radv_info->
+ adv_neighbor_reachable_time_in_msec);
+
+ rh.flags =
+ (radv_info->adv_managed_flag) ?
+ ICMP6_ROUTER_DISCOVERY_FLAG_ADDRESS_CONFIG_VIA_DHCP :
+ 0;
+ rh.flags |=
+ ((radv_info->adv_other_flag) ?
+ ICMP6_ROUTER_DISCOVERY_FLAG_OTHER_CONFIG_VIA_DHCP :
+ 0);
+
+
+ u16 payload_length =
+ sizeof (icmp6_router_advertisement_header_t);
+
+ vlib_buffer_add_data (vm,
+ p0->free_list_index,
+ bi0,
+ (void *) &rh,
+ sizeof
+ (icmp6_router_advertisement_header_t));
+
+ if (radv_info->adv_link_layer_address)
+ {
+ icmp6_neighbor_discovery_ethernet_link_layer_address_option_t
+ h;
+
+ h.header.type =
+ ICMP6_NEIGHBOR_DISCOVERY_OPTION_source_link_layer_address;
+ h.header.n_data_u64s = 1;
+
+ /* copy ll address */
+ clib_memcpy (&h.ethernet_address[0],
+ eth_if0->address, 6);
+
+ vlib_buffer_add_data (vm,
+ p0->free_list_index,
+ bi0,
+ (void *) &h,
+ sizeof
+ (icmp6_neighbor_discovery_ethernet_link_layer_address_option_t));
+
+ payload_length +=
+ sizeof
+ (icmp6_neighbor_discovery_ethernet_link_layer_address_option_t);
+ }
+
+ /* add MTU option */
+ if (radv_info->adv_link_mtu)
+ {
+ icmp6_neighbor_discovery_mtu_option_t h;
+
+ h.unused = 0;
+ h.mtu =
+ clib_host_to_net_u32 (radv_info->adv_link_mtu);
+ h.header.type = ICMP6_NEIGHBOR_DISCOVERY_OPTION_mtu;
+ h.header.n_data_u64s = 1;
+
+ payload_length +=
+ sizeof (icmp6_neighbor_discovery_mtu_option_t);
+
+ vlib_buffer_add_data (vm,
+ p0->free_list_index,
+ bi0,
+ (void *) &h,
+ sizeof
+ (icmp6_neighbor_discovery_mtu_option_t));
+ }
+
+ /* add advertised prefix options */
+ ip6_radv_prefix_t *pr_info;
+
+ /* *INDENT-OFF* */
+ pool_foreach (pr_info, radv_info->adv_prefixes_pool,
+ ({
+ if(pr_info->enabled &&
+ (!pr_info->decrement_lifetime_flag
+ || (pr_info->pref_lifetime_expires >0)))
+ {
+ /* advertise this prefix */
+ icmp6_neighbor_discovery_prefix_information_option_t h;
+
+ h.header.type = ICMP6_NEIGHBOR_DISCOVERY_OPTION_prefix_information;
+ h.header.n_data_u64s = (sizeof(icmp6_neighbor_discovery_prefix_information_option_t) >> 3);
+
+ h.dst_address_length = pr_info->prefix_len;
+
+ h.flags = (pr_info->adv_on_link_flag) ? ICMP6_NEIGHBOR_DISCOVERY_PREFIX_INFORMATION_FLAG_ON_LINK : 0;
+ h.flags |= (pr_info->adv_autonomous_flag) ? ICMP6_NEIGHBOR_DISCOVERY_PREFIX_INFORMATION_AUTO : 0;
+
+ if(radv_info->cease_radv && pr_info->deprecated_prefix_flag)
+ {
+ h.valid_time = clib_host_to_net_u32(MIN_ADV_VALID_LIFETIME);
+ h.preferred_time = 0;
+ }
+ else
+ {
+ if(pr_info->decrement_lifetime_flag)
+ {
+ pr_info->adv_valid_lifetime_in_secs = ((pr_info->valid_lifetime_expires > now)) ?
+ (pr_info->valid_lifetime_expires - now) : 0;
+
+ pr_info->adv_pref_lifetime_in_secs = ((pr_info->pref_lifetime_expires > now)) ?
+ (pr_info->pref_lifetime_expires - now) : 0;
+ }
+
+ h.valid_time = clib_host_to_net_u32(pr_info->adv_valid_lifetime_in_secs);
+ h.preferred_time = clib_host_to_net_u32(pr_info->adv_pref_lifetime_in_secs) ;
+ }
+ h.unused = 0;
+
+ clib_memcpy(&h.dst_address, &pr_info->prefix, sizeof(ip6_address_t));
+
+ payload_length += sizeof( icmp6_neighbor_discovery_prefix_information_option_t);
+
+ vlib_buffer_add_data (vm,
+ p0->free_list_index,
+ bi0,
+ (void *)&h, sizeof(icmp6_neighbor_discovery_prefix_information_option_t));
+
+ }
+ }));
+ /* *INDENT-ON* */
+
+ /* add additional options before here */
+
+ /* finish building the router advertisement... */
+ if (!is_unspecified && radv_info->send_unicast)
+ {
+ ip0->dst_address = ip0->src_address;
+ }
+ else
+ {
+ /* target address is all-nodes mcast addr */
+ ip6_set_reserved_multicast_address
+ (&ip0->dst_address,
+ IP6_MULTICAST_SCOPE_link_local,
+ IP6_MULTICAST_GROUP_ID_all_hosts);
+ }
+
+ /* source address MUST be the link-local address */
+ ip0->src_address = radv_info->link_local_address;
+
+ ip0->hop_limit = 255;
+ ip0->payload_length =
+ clib_host_to_net_u16 (payload_length);
+
+ icmp6_router_advertisement_header_t *rh0 =
+ (icmp6_router_advertisement_header_t *) (ip0 + 1);
+ rh0->icmp.checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, p0, ip0,
+ &bogus_length);
+ ASSERT (bogus_length == 0);
+
+ /* setup output if and adjacency */
+ vnet_buffer (p0)->sw_if_index[VLIB_RX] =
+ vnet_main.local_interface_sw_if_index;
+
+ if (is_solicitation)
+ {
+ ethernet_header_t *eth0;
+ /* Reuse current MAC header, copy SMAC to DMAC and
+ * interface MAC to SMAC */
+ vlib_buffer_reset (p0);
+ eth0 = vlib_buffer_get_current (p0);
+ clib_memcpy (eth0->dst_address, eth0->src_address,
+ 6);
+ clib_memcpy (eth0->src_address, eth_if0->address,
+ 6);
+ next0 =
+ is_dropped ? next0 :
+ ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_TX;
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] =
+ sw_if_index0;
+ }
+ else
+ {
+ adj_index0 = radv_info->all_nodes_adj_index;
+ if (adj_index0 == 0)
+ error0 = ICMP6_ERROR_DST_LOOKUP_MISS;
+ else
+ {
+ ip_adjacency_t *adj0 =
+ ip_get_adjacency (&im->lookup_main,
+ adj_index0);
+ error0 =
+ ((adj0->rewrite_header.sw_if_index !=
+ sw_if_index0
+ || adj0->lookup_next_index !=
+ IP_LOOKUP_NEXT_REWRITE) ?
+ ICMP6_ERROR_ROUTER_SOLICITATION_DEST_UNKNOWN
+ : error0);
+ next0 =
+ is_dropped ? next0 :
+ ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_RW;
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
+ adj_index0;
+ }
+ }
+ p0->flags |= VNET_BUFFER_LOCALLY_ORIGINATED;
+
+ radv_info->n_solicitations_dropped += is_dropped;
+ radv_info->n_solicitations_rcvd += is_solicitation;
+
+ if ((error0 == ICMP6_ERROR_NONE) && !is_dropped)
+ {
+ radv_info->n_advertisements_sent++;
+ n_advertisements_sent++;
+ }
+ }
+ }
+ }
+
+ p0->error = error_node->errors[error0];
+
+ if (error0 != ICMP6_ERROR_NONE)
+ vlib_error_count (vm, error_node->node_index, error0, 1);
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ /* Account for router advertisements sent. */
+ vlib_error_count (vm, error_node->node_index,
+ ICMP6_ERROR_ROUTER_ADVERTISEMENTS_TX,
+ n_advertisements_sent);
+
+ return frame->n_vectors;
+}
+
+ /* validate advertised info for consistancy (see RFC-4861 section 6.2.7) - log any inconsistencies, packet will always be dropped */
+static_always_inline uword
+icmp6_router_advertisement (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ uword n_packets = frame->n_vectors;
+ u32 *from, *to_next;
+ u32 n_left_from, n_left_to_next, next_index;
+ u32 n_advertisements_rcvd = 0;
+
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip6_icmp_input_node.index);
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = n_packets;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1,
+ sizeof (icmp6_input_trace_t));
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *p0;
+ ip6_header_t *ip0;
+ ip6_radv_t *radv_info = 0;
+ icmp6_router_advertisement_header_t *h0;
+ u32 bi0, options_len0, sw_if_index0, next0, error0;
+
+ bi0 = to_next[0] = from[0];
+
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, bi0);
+ ip0 = vlib_buffer_get_current (p0);
+ h0 = ip6_next_header (ip0);
+ options_len0 =
+ clib_net_to_host_u16 (ip0->payload_length) - sizeof (h0[0]);
+
+ error0 = ICMP6_ERROR_NONE;
+ sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+
+ /* Check that source address is link-local */
+ error0 = (!ip6_address_is_link_local_unicast (&ip0->src_address)) ?
+ ICMP6_ERROR_ROUTER_ADVERTISEMENT_SOURCE_NOT_LINK_LOCAL : error0;
+
+ /* default is to drop */
+ next0 = ICMP6_ROUTER_SOLICITATION_NEXT_DROP;
+
+ n_advertisements_rcvd++;
+
+ if (error0 == ICMP6_ERROR_NONE)
+ {
+ vnet_sw_interface_t *sw_if0;
+ ethernet_interface_t *eth_if0;
+
+ sw_if0 = vnet_get_sup_sw_interface (vnm, sw_if_index0);
+ ASSERT (sw_if0->type == VNET_SW_INTERFACE_TYPE_HARDWARE);
+ eth_if0 =
+ ethernet_get_interface (&ethernet_main, sw_if0->hw_if_index);
+
+ /* only support ethernet interface type for now */
+ error0 =
+ (!eth_if0) ? ICMP6_ERROR_ROUTER_SOLICITATION_UNSUPPORTED_INTF
+ : error0;
+
+ if (error0 == ICMP6_ERROR_NONE)
+ {
+ u32 ri;
+
+ /* look up the radv_t information for this interface */
+ vec_validate_init_empty
+ (nm->if_radv_pool_index_by_sw_if_index, sw_if_index0, ~0);
+
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index0];
+
+ if (ri != ~0)
+ radv_info = pool_elt_at_index (nm->if_radv_pool, ri);
+
+ error0 =
+ ((!radv_info) ?
+ ICMP6_ERROR_ROUTER_SOLICITATION_RADV_NOT_CONFIG :
+ error0);
+
+ if (error0 == ICMP6_ERROR_NONE)
+ {
+ /* validate advertised information */
+ if ((h0->current_hop_limit && radv_info->curr_hop_limit)
+ && (h0->current_hop_limit !=
+ radv_info->curr_hop_limit))
+ {
+ ip6_neighbor_syslog (vm, LOG_WARNING,
+ "our AdvCurHopLimit on %U doesn't agree with %U",
+ format_vnet_sw_if_index_name,
+ vnm, sw_if_index0,
+ format_ip6_address,
+ &ip0->src_address);
+ }
+
+ if ((h0->flags &
+ ICMP6_ROUTER_DISCOVERY_FLAG_ADDRESS_CONFIG_VIA_DHCP)
+ != radv_info->adv_managed_flag)
+ {
+ ip6_neighbor_syslog (vm, LOG_WARNING,
+ "our AdvManagedFlag on %U doesn't agree with %U",
+ format_vnet_sw_if_index_name,
+ vnm, sw_if_index0,
+ format_ip6_address,
+ &ip0->src_address);
+ }
+
+ if ((h0->flags &
+ ICMP6_ROUTER_DISCOVERY_FLAG_OTHER_CONFIG_VIA_DHCP)
+ != radv_info->adv_other_flag)
+ {
+ ip6_neighbor_syslog (vm, LOG_WARNING,
+ "our AdvOtherConfigFlag on %U doesn't agree with %U",
+ format_vnet_sw_if_index_name,
+ vnm, sw_if_index0,
+ format_ip6_address,
+ &ip0->src_address);
+ }
+
+ if ((h0->
+ time_in_msec_between_retransmitted_neighbor_solicitations
+ && radv_info->
+ adv_time_in_msec_between_retransmitted_neighbor_solicitations)
+ && (h0->
+ time_in_msec_between_retransmitted_neighbor_solicitations
+ !=
+ clib_host_to_net_u32 (radv_info->
+ adv_time_in_msec_between_retransmitted_neighbor_solicitations)))
+ {
+ ip6_neighbor_syslog (vm, LOG_WARNING,
+ "our AdvRetransTimer on %U doesn't agree with %U",
+ format_vnet_sw_if_index_name,
+ vnm, sw_if_index0,
+ format_ip6_address,
+ &ip0->src_address);
+ }
+
+ if ((h0->neighbor_reachable_time_in_msec &&
+ radv_info->adv_neighbor_reachable_time_in_msec) &&
+ (h0->neighbor_reachable_time_in_msec !=
+ clib_host_to_net_u32
+ (radv_info->adv_neighbor_reachable_time_in_msec)))
+ {
+ ip6_neighbor_syslog (vm, LOG_WARNING,
+ "our AdvReachableTime on %U doesn't agree with %U",
+ format_vnet_sw_if_index_name,
+ vnm, sw_if_index0,
+ format_ip6_address,
+ &ip0->src_address);
+ }
+
+ /* check for MTU or prefix options or .. */
+ u8 *opt_hdr = (u8 *) (h0 + 1);
+ while (options_len0 > 0)
+ {
+ icmp6_neighbor_discovery_option_header_t *o0 =
+ (icmp6_neighbor_discovery_option_header_t *)
+ opt_hdr;
+ int opt_len = o0->n_data_u64s << 3;
+ icmp6_neighbor_discovery_option_type_t option_type =
+ o0->type;
+
+ if (options_len0 < 2)
+ {
+ ip6_neighbor_syslog (vm, LOG_ERR,
+ "malformed RA packet on %U from %U",
+ format_vnet_sw_if_index_name,
+ vnm, sw_if_index0,
+ format_ip6_address,
+ &ip0->src_address);
+ break;
+ }
+
+ if (opt_len == 0)
+ {
+ ip6_neighbor_syslog (vm, LOG_ERR,
+ " zero length option in RA on %U from %U",
+ format_vnet_sw_if_index_name,
+ vnm, sw_if_index0,
+ format_ip6_address,
+ &ip0->src_address);
+ break;
+ }
+ else if (opt_len > options_len0)
+ {
+ ip6_neighbor_syslog (vm, LOG_ERR,
+ "option length in RA packet greater than total length on %U from %U",
+ format_vnet_sw_if_index_name,
+ vnm, sw_if_index0,
+ format_ip6_address,
+ &ip0->src_address);
+ break;
+ }
+
+ options_len0 -= opt_len;
+ opt_hdr += opt_len;
+
+ switch (option_type)
+ {
+ case ICMP6_NEIGHBOR_DISCOVERY_OPTION_mtu:
+ {
+ icmp6_neighbor_discovery_mtu_option_t *h =
+ (icmp6_neighbor_discovery_mtu_option_t
+ *) (o0);
+
+ if (opt_len < sizeof (*h))
+ break;
+
+ if ((h->mtu && radv_info->adv_link_mtu) &&
+ (h->mtu !=
+ clib_host_to_net_u32
+ (radv_info->adv_link_mtu)))
+ {
+ ip6_neighbor_syslog (vm, LOG_WARNING,
+ "our AdvLinkMTU on %U doesn't agree with %U",
+ format_vnet_sw_if_index_name,
+ vnm, sw_if_index0,
+ format_ip6_address,
+ &ip0->src_address);
+ }
+ }
+ break;
+
+ case ICMP6_NEIGHBOR_DISCOVERY_OPTION_prefix_information:
+ {
+ icmp6_neighbor_discovery_prefix_information_option_t
+ * h =
+ (icmp6_neighbor_discovery_prefix_information_option_t
+ *) (o0);
+
+ /* validate advertised prefix options */
+ ip6_radv_prefix_t *pr_info;
+ u32 preferred, valid;
+
+ if (opt_len < sizeof (*h))
+ break;
+
+ preferred =
+ clib_net_to_host_u32 (h->preferred_time);
+ valid = clib_net_to_host_u32 (h->valid_time);
+
+ /* look for matching prefix - if we our advertising it, it better be consistant */
+ /* *INDENT-OFF* */
+ pool_foreach (pr_info, radv_info->adv_prefixes_pool,
+ ({
+
+ ip6_address_t mask;
+ ip6_address_mask_from_width(&mask, pr_info->prefix_len);
+
+ if(pr_info->enabled &&
+ (pr_info->prefix_len == h->dst_address_length) &&
+ ip6_address_is_equal_masked (&pr_info->prefix, &h->dst_address, &mask))
+ {
+ /* found it */
+ if(!pr_info->decrement_lifetime_flag &&
+ valid != pr_info->adv_valid_lifetime_in_secs)
+ {
+ ip6_neighbor_syslog(vm, LOG_WARNING,
+ "our ADV validlifetime on %U for %U does not agree with %U",
+ format_vnet_sw_if_index_name, vnm, sw_if_index0,format_ip6_address, &pr_info->prefix,
+ format_ip6_address, &h->dst_address);
+ }
+ if(!pr_info->decrement_lifetime_flag &&
+ preferred != pr_info->adv_pref_lifetime_in_secs)
+ {
+ ip6_neighbor_syslog(vm, LOG_WARNING,
+ "our ADV preferredlifetime on %U for %U does not agree with %U",
+ format_vnet_sw_if_index_name, vnm, sw_if_index0,format_ip6_address, &pr_info->prefix,
+ format_ip6_address, &h->dst_address);
+ }
+ }
+ break;
+ }));
+ /* *INDENT-ON* */
+ break;
+ }
+ default:
+ /* skip this one */
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ p0->error = error_node->errors[error0];
+
+ if (error0 != ICMP6_ERROR_NONE)
+ vlib_error_count (vm, error_node->node_index, error0, 1);
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ /* Account for router advertisements sent. */
+ vlib_error_count (vm, error_node->node_index,
+ ICMP6_ERROR_ROUTER_ADVERTISEMENTS_RX,
+ n_advertisements_rcvd);
+
+ return frame->n_vectors;
+}
+
+/* create and initialize router advertisement parameters with default values for this intfc */
+static u32
+ip6_neighbor_sw_interface_add_del (vnet_main_t * vnm,
+ u32 sw_if_index, u32 is_add)
+{
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ ip6_radv_t *a = 0;
+ u32 ri = ~0;
+ vnet_sw_interface_t *sw_if0;
+ ethernet_interface_t *eth_if0 = 0;
+
+ /* lookup radv container - ethernet interfaces only */
+ sw_if0 = vnet_get_sup_sw_interface (vnm, sw_if_index);
+ if (sw_if0->type == VNET_SW_INTERFACE_TYPE_HARDWARE)
+ eth_if0 = ethernet_get_interface (&ethernet_main, sw_if0->hw_if_index);
+
+ if (!eth_if0)
+ return ri;
+
+ vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index,
+ ~0);
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index];
+
+ if (ri != ~0)
+ {
+ a = pool_elt_at_index (nm->if_radv_pool, ri);
+
+ if (!is_add)
+ {
+ u32 i, *to_delete = 0;
+ ip6_radv_prefix_t *p;
+ ip6_mldp_group_t *m;
+
+ /* remove adjacencies */
+ adj_unlock (a->all_nodes_adj_index);
+ adj_unlock (a->all_routers_adj_index);
+ adj_unlock (a->all_mldv2_routers_adj_index);
+
+ /* clean up prefix_pool */
+ /* *INDENT-OFF* */
+ pool_foreach (p, a->adv_prefixes_pool,
+ ({
+ vec_add1 (to_delete, p - a->adv_prefixes_pool);
+ }));
+ /* *INDENT-ON* */
+
+ for (i = 0; i < vec_len (to_delete); i++)
+ {
+ p = pool_elt_at_index (a->adv_prefixes_pool, to_delete[i]);
+ mhash_unset (&a->address_to_prefix_index, &p->prefix, 0);
+ pool_put (a->adv_prefixes_pool, p);
+ }
+
+ vec_free (to_delete);
+ to_delete = 0;
+
+ /* clean up mldp group pool */
+ /* *INDENT-OFF* */
+ pool_foreach (m, a->mldp_group_pool,
+ ({
+ vec_add1 (to_delete, m - a->mldp_group_pool);
+ }));
+ /* *INDENT-ON* */
+
+ for (i = 0; i < vec_len (to_delete); i++)
+ {
+ m = pool_elt_at_index (a->mldp_group_pool, to_delete[i]);
+ mhash_unset (&a->address_to_mldp_index, &m->mcast_address, 0);
+ pool_put (a->mldp_group_pool, m);
+ }
+
+ vec_free (to_delete);
+
+ pool_put (nm->if_radv_pool, a);
+ nm->if_radv_pool_index_by_sw_if_index[sw_if_index] = ~0;
+ ri = ~0;
+ }
+ }
+ else
+ {
+ if (is_add)
+ {
+ vnet_hw_interface_t *hw_if0;
+
+ hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index);
+
+ pool_get (nm->if_radv_pool, a);
+
+ ri = a - nm->if_radv_pool;
+ nm->if_radv_pool_index_by_sw_if_index[sw_if_index] = ri;
+
+ /* initialize default values (most of which are zero) */
+ memset (a, 0, sizeof (a[0]));
+
+ a->sw_if_index = sw_if_index;
+ a->fib_index = ~0;
+ a->max_radv_interval = DEF_MAX_RADV_INTERVAL;
+ a->min_radv_interval = DEF_MIN_RADV_INTERVAL;
+ a->curr_hop_limit = DEF_CURR_HOP_LIMIT;
+ a->adv_router_lifetime_in_sec = DEF_DEF_RTR_LIFETIME;
+
+ a->adv_link_layer_address = 1; /* send ll address source address option */
+
+ a->min_delay_between_radv = MIN_DELAY_BETWEEN_RAS;
+ a->max_delay_between_radv = MAX_DELAY_BETWEEN_RAS;
+ a->max_rtr_default_lifetime = MAX_DEF_RTR_LIFETIME;
+ a->seed = (u32) clib_cpu_time_now ();
+ (void) random_u32 (&a->seed);
+ a->randomizer = clib_cpu_time_now ();
+ (void) random_u64 (&a->randomizer);
+
+ a->initial_adverts_count = MAX_INITIAL_RTR_ADVERTISEMENTS;
+ a->initial_adverts_sent = a->initial_adverts_count - 1;
+ a->initial_adverts_interval = MAX_INITIAL_RTR_ADVERT_INTERVAL;
+
+ /* deafult is to send */
+ a->send_radv = 1;
+
+ /* fill in radv_info for this interface that will be needed later */
+ a->adv_link_mtu = hw_if0->max_l3_packet_bytes[VLIB_RX];
+
+ clib_memcpy (a->link_layer_address, eth_if0->address, 6);
+
+ /* fill in default link-local address (this may be overridden) */
+ ip6_link_local_address_from_ethernet_address
+ (&a->link_local_address, eth_if0->address);
+ a->link_local_prefix_len = 64;
+
+ mhash_init (&a->address_to_prefix_index, sizeof (uword),
+ sizeof (ip6_address_t));
+ mhash_init (&a->address_to_mldp_index, sizeof (uword),
+ sizeof (ip6_address_t));
+
+ {
+ u8 link_layer_address[6] = { 0x33, 0x33, 0x00, 0x00, 0x00,
+ IP6_MULTICAST_GROUP_ID_all_hosts
+ };
+
+ a->all_nodes_adj_index =
+ adj_rewrite_add_and_lock (FIB_PROTOCOL_IP6, VNET_LINK_IP6,
+ sw_if_index, link_layer_address);
+ }
+
+ {
+ u8 link_layer_address[6] = { 0x33, 0x33, 0x00, 0x00, 0x00,
+ IP6_MULTICAST_GROUP_ID_all_routers
+ };
+
+ a->all_routers_adj_index =
+ adj_rewrite_add_and_lock (FIB_PROTOCOL_IP6, VNET_LINK_IP6,
+ sw_if_index, link_layer_address);
+ }
+
+ {
+ u8 link_layer_address[6] = { 0x33, 0x33, 0x00, 0x00, 0x00,
+ IP6_MULTICAST_GROUP_ID_mldv2_routers
+ };
+
+ a->all_mldv2_routers_adj_index =
+ adj_rewrite_add_and_lock (FIB_PROTOCOL_IP6,
+ VNET_LINK_IP6,
+ sw_if_index, link_layer_address);
+ }
+
+ /* add multicast groups we will always be reporting */
+ ip6_address_t addr;
+ ip6_mldp_group_t *mcast_group_info;
+
+ ip6_set_reserved_multicast_address (&addr,
+ IP6_MULTICAST_SCOPE_link_local,
+ IP6_MULTICAST_GROUP_ID_all_hosts);
+
+ /* lookup mldp info for this interface */
+
+ uword *p = mhash_get (&a->address_to_mldp_index, &addr);
+ mcast_group_info =
+ p ? pool_elt_at_index (a->mldp_group_pool, p[0]) : 0;
+
+ /* add address */
+ if (!mcast_group_info)
+ {
+ /* add */
+ u32 mi;
+ pool_get (a->mldp_group_pool, mcast_group_info);
+
+ mi = mcast_group_info - a->mldp_group_pool;
+ mhash_set (&a->address_to_mldp_index, &addr, mi, /* old_value */
+ 0);
+
+ mcast_group_info->type = 4;
+ mcast_group_info->mcast_source_address_pool = 0;
+ mcast_group_info->num_sources = 0;
+ clib_memcpy (&mcast_group_info->mcast_address, &addr,
+ sizeof (ip6_address_t));
+ }
+
+ ip6_set_reserved_multicast_address (&addr,
+ IP6_MULTICAST_SCOPE_link_local,
+ IP6_MULTICAST_GROUP_ID_all_routers);
+
+ p = mhash_get (&a->address_to_mldp_index, &addr);
+ mcast_group_info =
+ p ? pool_elt_at_index (a->mldp_group_pool, p[0]) : 0;
+
+ if (!mcast_group_info)
+ {
+ /* add */
+ u32 mi;
+ pool_get (a->mldp_group_pool, mcast_group_info);
+
+ mi = mcast_group_info - a->mldp_group_pool;
+ mhash_set (&a->address_to_mldp_index, &addr, mi, /* old_value */
+ 0);
+
+ mcast_group_info->type = 4;
+ mcast_group_info->mcast_source_address_pool = 0;
+ mcast_group_info->num_sources = 0;
+ clib_memcpy (&mcast_group_info->mcast_address, &addr,
+ sizeof (ip6_address_t));
+ }
+
+ ip6_set_reserved_multicast_address (&addr,
+ IP6_MULTICAST_SCOPE_link_local,
+ IP6_MULTICAST_GROUP_ID_mldv2_routers);
+
+ p = mhash_get (&a->address_to_mldp_index, &addr);
+ mcast_group_info =
+ p ? pool_elt_at_index (a->mldp_group_pool, p[0]) : 0;
+
+ if (!mcast_group_info)
+ {
+ /* add */
+ u32 mi;
+ pool_get (a->mldp_group_pool, mcast_group_info);
+
+ mi = mcast_group_info - a->mldp_group_pool;
+ mhash_set (&a->address_to_mldp_index, &addr, mi, /* old_value */
+ 0);
+
+ mcast_group_info->type = 4;
+ mcast_group_info->mcast_source_address_pool = 0;
+ mcast_group_info->num_sources = 0;
+ clib_memcpy (&mcast_group_info->mcast_address, &addr,
+ sizeof (ip6_address_t));
+ }
+ }
+ }
+ return ri;
+}
+
+/* send an mldpv2 report */
+static void
+ip6_neighbor_send_mldpv2_report (u32 sw_if_index)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vlib_main_t *vm = vnm->vlib_main;
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ vnet_sw_interface_t *sw_if0;
+ ethernet_interface_t *eth_if0;
+ u32 ri;
+ int bogus_length;
+
+ ip6_radv_t *radv_info;
+ u16 payload_length;
+ vlib_buffer_t *b0;
+ ip6_header_t *ip0;
+ u32 *to_next;
+ vlib_frame_t *f;
+ u32 bo0;
+ u32 n_to_alloc = 1;
+ u32 n_allocated;
+
+ icmp6_multicast_listener_report_header_t *rh0;
+ icmp6_multicast_listener_report_packet_t *rp0;
+
+ sw_if0 = vnet_get_sup_sw_interface (vnm, sw_if_index);
+ ASSERT (sw_if0->type == VNET_SW_INTERFACE_TYPE_HARDWARE);
+ eth_if0 = ethernet_get_interface (&ethernet_main, sw_if0->hw_if_index);
+
+ if (!eth_if0 || !vnet_sw_interface_is_admin_up (vnm, sw_if_index))
+ return;
+
+ /* look up the radv_t information for this interface */
+ vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index,
+ ~0);
+
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index];
+
+ if (ri == ~0)
+ return;
+
+ /* send report now - build a mldpv2 report packet */
+ n_allocated = vlib_buffer_alloc_from_free_list (vm,
+ &bo0,
+ n_to_alloc,
+ VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
+ if (PREDICT_FALSE (n_allocated == 0))
+ {
+ clib_warning ("buffer allocation failure");
+ return;
+ }
+
+ b0 = vlib_get_buffer (vm, bo0);
+
+ /* adjust the sizeof the buffer to just include the ipv6 header */
+ b0->current_length = sizeof (icmp6_multicast_listener_report_packet_t);
+
+ payload_length = sizeof (icmp6_multicast_listener_report_header_t);
+
+ b0->error = ICMP6_ERROR_NONE;
+
+ rp0 = vlib_buffer_get_current (b0);
+ ip0 = (ip6_header_t *) & rp0->ip;
+ rh0 = (icmp6_multicast_listener_report_header_t *) & rp0->report_hdr;
+
+ memset (rp0, 0x0, sizeof (icmp6_multicast_listener_report_packet_t));
+
+ ip0->ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 (0x6 << 28);
+
+ ip0->protocol = IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS;
+ /* for DEBUG - vnet driver won't seem to emit router alerts */
+ /* ip0->protocol = IP_PROTOCOL_ICMP6; */
+ ip0->hop_limit = 1;
+
+ rh0->icmp.type = ICMP6_multicast_listener_report_v2;
+
+ /* source address MUST be the link-local address */
+ radv_info = pool_elt_at_index (nm->if_radv_pool, ri);
+ ip0->src_address = radv_info->link_local_address;
+
+ /* destination is all mldpv2 routers */
+ ip6_set_reserved_multicast_address (&ip0->dst_address,
+ IP6_MULTICAST_SCOPE_link_local,
+ IP6_MULTICAST_GROUP_ID_mldv2_routers);
+
+ /* add reports here */
+ ip6_mldp_group_t *m;
+ int num_addr_records = 0;
+ icmp6_multicast_address_record_t rr;
+
+ /* fill in the hop-by-hop extension header (router alert) info */
+ rh0->ext_hdr.next_hdr = IP_PROTOCOL_ICMP6;
+ rh0->ext_hdr.n_data_u64s = 0;
+
+ rh0->alert.type = IP6_MLDP_ALERT_TYPE;
+ rh0->alert.len = 2;
+ rh0->alert.value = 0;
+
+ rh0->pad.type = 1;
+ rh0->pad.len = 0;
+
+ rh0->icmp.checksum = 0;
+
+ /* *INDENT-OFF* */
+ pool_foreach (m, radv_info->mldp_group_pool,
+ ({
+ rr.type = m->type;
+ rr.aux_data_len_u32s = 0;
+ rr.num_sources = clib_host_to_net_u16 (m->num_sources);
+ clib_memcpy(&rr.mcast_addr, &m->mcast_address, sizeof(ip6_address_t));
+
+ num_addr_records++;
+
+ vlib_buffer_add_data
+ (vm, b0->free_list_index, bo0,
+ (void *)&rr, sizeof(icmp6_multicast_address_record_t));
+
+ payload_length += sizeof( icmp6_multicast_address_record_t);
+ }));
+ /* *INDENT-ON* */
+
+ rh0->rsvd = 0;
+ rh0->num_addr_records = clib_host_to_net_u16 (num_addr_records);
+
+ /* update lengths */
+ ip0->payload_length = clib_host_to_net_u16 (payload_length);
+
+ rh0->icmp.checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip0,
+ &bogus_length);
+ ASSERT (bogus_length == 0);
+
+ /*
+ * OK to override w/ no regard for actual FIB, because
+ * ip6-rewrite only looks at the adjacency.
+ */
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] =
+ vnet_main.local_interface_sw_if_index;
+
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] =
+ radv_info->all_mldv2_routers_adj_index;
+ b0->flags |= VNET_BUFFER_LOCALLY_ORIGINATED;
+
+ vlib_node_t *node = vlib_get_node_by_name (vm, (u8 *) "ip6-rewrite");
+
+ f = vlib_get_frame_to_node (vm, node->index);
+ to_next = vlib_frame_vector_args (f);
+ to_next[0] = bo0;
+ f->n_vectors = 1;
+
+ vlib_put_frame_to_node (vm, node->index, f);
+ return;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_icmp_router_solicitation_node,static) =
+{
+ .function = icmp6_router_solicitation,
+ .name = "icmp6-router-solicitation",
+
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_icmp6_input_trace,
+
+ .n_next_nodes = ICMP6_ROUTER_SOLICITATION_N_NEXT,
+ .next_nodes = {
+ [ICMP6_ROUTER_SOLICITATION_NEXT_DROP] = "error-drop",
+ [ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_RW] = "ip6-rewrite",
+ [ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_TX] = "interface-output",
+ },
+};
+/* *INDENT-ON* */
+
+/* send a RA or update the timer info etc.. */
+static uword
+ip6_neighbor_process_timer_event (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ ip6_radv_t *radv_info;
+ vlib_frame_t *f = 0;
+ u32 n_this_frame = 0;
+ u32 n_left_to_next = 0;
+ u32 *to_next = 0;
+ u32 bo0;
+ icmp6_router_solicitation_header_t *h0;
+ vlib_buffer_t *b0;
+ f64 now = vlib_time_now (vm);
+
+ /* Interface ip6 radv info list */
+ /* *INDENT-OFF* */
+ pool_foreach (radv_info, nm->if_radv_pool,
+ ({
+ if( !vnet_sw_interface_is_admin_up (vnm, radv_info->sw_if_index))
+ {
+ radv_info->initial_adverts_sent = radv_info->initial_adverts_count-1;
+ radv_info->next_multicast_time = now;
+ radv_info->last_multicast_time = now;
+ radv_info->last_radv_time = 0;
+ radv_info->all_routers_mcast = 0;
+ continue;
+ }
+
+ /* Make sure that we've joined the all-routers multicast group */
+ if(!radv_info->all_routers_mcast)
+ {
+ /* send MDLP_REPORT_EVENT message */
+ ip6_neighbor_send_mldpv2_report(radv_info->sw_if_index);
+ radv_info->all_routers_mcast = 1;
+ }
+
+ /* is it time to send a multicast RA on this interface? */
+ if(radv_info->send_radv && (now >= radv_info->next_multicast_time))
+ {
+ u32 n_to_alloc = 1;
+ u32 n_allocated;
+
+ f64 rfn = (radv_info->max_radv_interval - radv_info->min_radv_interval) *
+ random_f64 (&radv_info->seed) + radv_info->min_radv_interval;
+
+ /* multicast send - compute next multicast send time */
+ if( radv_info->initial_adverts_sent > 0)
+ {
+ radv_info->initial_adverts_sent--;
+ if(rfn > radv_info-> initial_adverts_interval)
+ rfn = radv_info-> initial_adverts_interval;
+
+ /* check to see if we are ceasing to send */
+ if( radv_info->initial_adverts_sent == 0)
+ if(radv_info->cease_radv)
+ radv_info->send_radv = 0;
+ }
+
+ radv_info->next_multicast_time = rfn + now;
+ radv_info->last_multicast_time = now;
+
+ /* send advert now - build a "solicted" router advert with unspecified source address */
+ n_allocated = vlib_buffer_alloc_from_free_list
+ (vm, &bo0, n_to_alloc, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
+
+ if (PREDICT_FALSE(n_allocated == 0))
+ {
+ clib_warning ("buffer allocation failure");
+ continue;
+ }
+ b0 = vlib_get_buffer (vm, bo0);
+ b0->current_length = sizeof( icmp6_router_solicitation_header_t);
+ b0->error = ICMP6_ERROR_NONE;
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = radv_info->sw_if_index;
+
+ h0 = vlib_buffer_get_current (b0);
+
+ memset (h0, 0, sizeof (icmp6_router_solicitation_header_t));
+
+ h0->ip.ip_version_traffic_class_and_flow_label = clib_host_to_net_u32 (0x6 << 28);
+ h0->ip.payload_length = clib_host_to_net_u16 (sizeof (icmp6_router_solicitation_header_t)
+ - STRUCT_OFFSET_OF (icmp6_router_solicitation_header_t, neighbor));
+ h0->ip.protocol = IP_PROTOCOL_ICMP6;
+ h0->ip.hop_limit = 255;
+
+ /* set src/dst address as "unspecified" this marks this packet as internally generated rather than recieved */
+ h0->ip.src_address.as_u64[0] = 0;
+ h0->ip.src_address.as_u64[1] = 0;
+
+ h0->ip.dst_address.as_u64[0] = 0;
+ h0->ip.dst_address.as_u64[1] = 0;
+
+ h0->neighbor.icmp.type = ICMP6_router_solicitation;
+
+ if (PREDICT_FALSE(f == 0))
+ {
+ f = vlib_get_frame_to_node (vm, ip6_icmp_router_solicitation_node.index);
+ to_next = vlib_frame_vector_args (f);
+ n_left_to_next = VLIB_FRAME_SIZE;
+ n_this_frame = 0;
+ }
+
+ n_this_frame++;
+ n_left_to_next--;
+ to_next[0] = bo0;
+ to_next += 1;
+
+ if (PREDICT_FALSE(n_left_to_next == 0))
+ {
+ f->n_vectors = n_this_frame;
+ vlib_put_frame_to_node (vm, ip6_icmp_router_solicitation_node.index, f);
+ f = 0;
+ }
+ }
+ }));
+ /* *INDENT-ON* */
+
+ if (f)
+ {
+ ASSERT (n_this_frame);
+ f->n_vectors = n_this_frame;
+ vlib_put_frame_to_node (vm, ip6_icmp_router_solicitation_node.index, f);
+ }
+ return 0;
+}
+
+static uword
+ip6_icmp_neighbor_discovery_event_process (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ uword event_type;
+ ip6_icmp_neighbor_discovery_event_data_t *event_data;
+
+ /* init code here */
+
+ while (1)
+ {
+ vlib_process_wait_for_event_or_clock (vm, 1. /* seconds */ );
+
+ event_data = vlib_process_get_event_data (vm, &event_type);
+
+ if (!event_data)
+ {
+ /* No events found: timer expired. */
+ /* process interface list and send RAs as appropriate, update timer info */
+ ip6_neighbor_process_timer_event (vm, node, frame);
+ }
+ else
+ {
+ switch (event_type)
+ {
+
+ case ICMP6_ND_EVENT_INIT:
+ break;
+
+ case ~0:
+ break;
+
+ default:
+ ASSERT (0);
+ }
+
+ if (event_data)
+ _vec_len (event_data) = 0;
+ }
+ }
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_icmp_router_advertisement_node,static) =
+{
+ .function = icmp6_router_advertisement,
+ .name = "icmp6-router-advertisement",
+
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_icmp6_input_trace,
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+vlib_node_registration_t ip6_icmp_neighbor_discovery_event_node = {
+
+ .function = ip6_icmp_neighbor_discovery_event_process,
+ .name = "ip6-icmp-neighbor-discovery-event-process",
+ .type = VLIB_NODE_TYPE_PROCESS,
+};
+
+static uword
+icmp6_neighbor_solicitation (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return icmp6_neighbor_solicitation_or_advertisement (vm, node, frame,
+ /* is_solicitation */
+ 1);
+}
+
+static uword
+icmp6_neighbor_advertisement (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return icmp6_neighbor_solicitation_or_advertisement (vm, node, frame,
+ /* is_solicitation */
+ 0);
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_icmp_neighbor_solicitation_node,static) =
+{
+ .function = icmp6_neighbor_solicitation,
+ .name = "icmp6-neighbor-solicitation",
+
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_icmp6_input_trace,
+
+ .n_next_nodes = ICMP6_NEIGHBOR_SOLICITATION_N_NEXT,
+ .next_nodes = {
+ [ICMP6_NEIGHBOR_SOLICITATION_NEXT_DROP] = "error-drop",
+ [ICMP6_NEIGHBOR_SOLICITATION_NEXT_REPLY] = "interface-output",
+ },
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_icmp_neighbor_advertisement_node,static) =
+{
+ .function = icmp6_neighbor_advertisement,
+ .name = "icmp6-neighbor-advertisement",
+
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_icmp6_input_trace,
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+/* API support functions */
+int
+ip6_neighbor_ra_config (vlib_main_t * vm, u32 sw_if_index,
+ u8 suppress, u8 managed, u8 other,
+ u8 ll_option, u8 send_unicast, u8 cease,
+ u8 use_lifetime, u32 lifetime,
+ u32 initial_count, u32 initial_interval,
+ u32 max_interval, u32 min_interval, u8 is_no)
+{
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ int error;
+ u32 ri;
+
+ /* look up the radv_t information for this interface */
+ vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index,
+ ~0);
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index];
+ error = (ri != ~0) ? 0 : VNET_API_ERROR_INVALID_SW_IF_INDEX;
+
+ if (!error)
+ {
+
+ ip6_radv_t *radv_info;
+ radv_info = pool_elt_at_index (nm->if_radv_pool, ri);
+
+ if ((max_interval != 0) && (min_interval == 0))
+ min_interval = .75 * max_interval;
+
+ max_interval =
+ (max_interval !=
+ 0) ? ((is_no) ? DEF_MAX_RADV_INTERVAL : max_interval) :
+ radv_info->max_radv_interval;
+ min_interval =
+ (min_interval !=
+ 0) ? ((is_no) ? DEF_MIN_RADV_INTERVAL : min_interval) :
+ radv_info->min_radv_interval;
+ lifetime =
+ (use_lifetime !=
+ 0) ? ((is_no) ? DEF_DEF_RTR_LIFETIME : lifetime) :
+ radv_info->adv_router_lifetime_in_sec;
+
+ if (lifetime)
+ {
+ if (lifetime > MAX_DEF_RTR_LIFETIME)
+ lifetime = MAX_DEF_RTR_LIFETIME;
+
+ if (lifetime <= max_interval)
+ return VNET_API_ERROR_INVALID_VALUE;
+ }
+
+ if (min_interval != 0)
+ {
+ if ((min_interval > .75 * max_interval) || (min_interval < 3))
+ return VNET_API_ERROR_INVALID_VALUE;
+ }
+
+ if ((initial_count > MAX_INITIAL_RTR_ADVERTISEMENTS) ||
+ (initial_interval > MAX_INITIAL_RTR_ADVERT_INTERVAL))
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ /*
+ if "flag" is set and is_no is true then restore default value else set value corresponding to "flag"
+ if "flag" is clear don't change corresponding value
+ */
+ radv_info->send_radv =
+ (suppress != 0) ? ((is_no != 0) ? 1 : 0) : radv_info->send_radv;
+ radv_info->adv_managed_flag =
+ (managed != 0) ? ((is_no) ? 0 : 1) : radv_info->adv_managed_flag;
+ radv_info->adv_other_flag =
+ (other != 0) ? ((is_no) ? 0 : 1) : radv_info->adv_other_flag;
+ radv_info->adv_link_layer_address =
+ (ll_option !=
+ 0) ? ((is_no) ? 1 : 0) : radv_info->adv_link_layer_address;
+ radv_info->send_unicast =
+ (send_unicast != 0) ? ((is_no) ? 0 : 1) : radv_info->send_unicast;
+ radv_info->cease_radv =
+ (cease != 0) ? ((is_no) ? 0 : 1) : radv_info->cease_radv;
+
+ radv_info->min_radv_interval = min_interval;
+ radv_info->max_radv_interval = max_interval;
+ radv_info->adv_router_lifetime_in_sec = lifetime;
+
+ radv_info->initial_adverts_count =
+ (initial_count !=
+ 0) ? ((is_no) ? MAX_INITIAL_RTR_ADVERTISEMENTS : initial_count) :
+ radv_info->initial_adverts_count;
+ radv_info->initial_adverts_interval =
+ (initial_interval !=
+ 0) ? ((is_no) ? MAX_INITIAL_RTR_ADVERT_INTERVAL : initial_interval) :
+ radv_info->initial_adverts_interval;
+
+ /* restart */
+ if ((cease != 0) && (is_no))
+ radv_info->send_radv = 1;
+
+ radv_info->initial_adverts_sent = radv_info->initial_adverts_count - 1;
+ radv_info->next_multicast_time = vlib_time_now (vm);
+ radv_info->last_multicast_time = vlib_time_now (vm);
+ radv_info->last_radv_time = 0;
+ }
+ return (error);
+}
+
+int
+ip6_neighbor_ra_prefix (vlib_main_t * vm, u32 sw_if_index,
+ ip6_address_t * prefix_addr, u8 prefix_len,
+ u8 use_default, u32 val_lifetime, u32 pref_lifetime,
+ u8 no_advertise, u8 off_link, u8 no_autoconfig,
+ u8 no_onlink, u8 is_no)
+{
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ int error;
+
+ u32 ri;
+
+ /* look up the radv_t information for this interface */
+ vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index,
+ ~0);
+
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index];
+
+ error = (ri != ~0) ? 0 : VNET_API_ERROR_INVALID_SW_IF_INDEX;
+
+ if (!error)
+ {
+ f64 now = vlib_time_now (vm);
+ ip6_radv_t *radv_info;
+ radv_info = pool_elt_at_index (nm->if_radv_pool, ri);
+
+ /* prefix info add, delete or update */
+ ip6_radv_prefix_t *prefix;
+
+ /* lookup prefix info for this address on this interface */
+ uword *p = mhash_get (&radv_info->address_to_prefix_index, prefix_addr);
+
+ prefix = p ? pool_elt_at_index (radv_info->adv_prefixes_pool, p[0]) : 0;
+
+ if (is_no)
+ {
+ /* delete */
+ if (!prefix)
+ return VNET_API_ERROR_INVALID_VALUE; /* invalid prefix */
+
+ if (prefix->prefix_len != prefix_len)
+ return VNET_API_ERROR_INVALID_VALUE_2;
+
+ /* FIXME - Should the DP do this or the CP ? */
+ /* do specific delete processing here before returning */
+ /* try to remove from routing table */
+
+ mhash_unset (&radv_info->address_to_prefix_index, prefix_addr,
+ /* old_value */ 0);
+ pool_put (radv_info->adv_prefixes_pool, prefix);
+
+ radv_info->initial_adverts_sent =
+ radv_info->initial_adverts_count - 1;
+ radv_info->next_multicast_time = vlib_time_now (vm);
+ radv_info->last_multicast_time = vlib_time_now (vm);
+ radv_info->last_radv_time = 0;
+ return (error);
+ }
+
+ /* adding or changing */
+ if (!prefix)
+ {
+ /* add */
+ u32 pi;
+ pool_get (radv_info->adv_prefixes_pool, prefix);
+ pi = prefix - radv_info->adv_prefixes_pool;
+ mhash_set (&radv_info->address_to_prefix_index, prefix_addr, pi,
+ /* old_value */ 0);
+
+ memset (prefix, 0x0, sizeof (ip6_radv_prefix_t));
+
+ prefix->prefix_len = prefix_len;
+ clib_memcpy (&prefix->prefix, prefix_addr, sizeof (ip6_address_t));
+
+ /* initialize default values */
+ prefix->adv_on_link_flag = 1; /* L bit set */
+ prefix->adv_autonomous_flag = 1; /* A bit set */
+ prefix->adv_valid_lifetime_in_secs = DEF_ADV_VALID_LIFETIME;
+ prefix->adv_pref_lifetime_in_secs = DEF_ADV_PREF_LIFETIME;
+ prefix->enabled = 1;
+ prefix->decrement_lifetime_flag = 1;
+ prefix->deprecated_prefix_flag = 1;
+
+ if (off_link == 0)
+ {
+ /* FIXME - Should the DP do this or the CP ? */
+ /* insert prefix into routing table as a connected prefix */
+ }
+
+ if (use_default)
+ goto restart;
+ }
+ else
+ {
+
+ if (prefix->prefix_len != prefix_len)
+ return VNET_API_ERROR_INVALID_VALUE_2;
+
+ if (off_link != 0)
+ {
+ /* FIXME - Should the DP do this or the CP ? */
+ /* remove from routing table if already there */
+ }
+ }
+
+ if ((val_lifetime == ~0) || (pref_lifetime == ~0))
+ {
+ prefix->adv_valid_lifetime_in_secs = ~0;
+ prefix->adv_pref_lifetime_in_secs = ~0;
+ prefix->decrement_lifetime_flag = 0;
+ }
+ else
+ {
+ prefix->adv_valid_lifetime_in_secs = val_lifetime;;
+ prefix->adv_pref_lifetime_in_secs = pref_lifetime;
+ }
+
+ /* copy remaining */
+ prefix->enabled = !(no_advertise != 0);
+ prefix->adv_on_link_flag = !((off_link != 0) || (no_onlink != 0));
+ prefix->adv_autonomous_flag = !(no_autoconfig != 0);
+
+ restart:
+ /* restart */
+ /* fill in the expiration times */
+ prefix->valid_lifetime_expires =
+ now + prefix->adv_valid_lifetime_in_secs;
+ prefix->pref_lifetime_expires = now + prefix->adv_pref_lifetime_in_secs;
+
+ radv_info->initial_adverts_sent = radv_info->initial_adverts_count - 1;
+ radv_info->next_multicast_time = vlib_time_now (vm);
+ radv_info->last_multicast_time = vlib_time_now (vm);
+ radv_info->last_radv_time = 0;
+ }
+ return (error);
+}
+
+clib_error_t *
+ip6_neighbor_cmd (vlib_main_t * vm, unformat_input_t * main_input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ clib_error_t *error = 0;
+ u8 is_no = 0;
+ u8 suppress = 0, managed = 0, other = 0;
+ u8 suppress_ll_option = 0, send_unicast = 0, cease = 0;
+ u8 use_lifetime = 0;
+ u32 sw_if_index, ra_lifetime = 0, ra_initial_count =
+ 0, ra_initial_interval = 0;
+ u32 ra_max_interval = 0, ra_min_interval = 0;
+
+ unformat_input_t _line_input, *line_input = &_line_input;
+ vnet_sw_interface_t *sw_if0;
+
+ int add_radv_info = 1;
+ __attribute__ ((unused)) ip6_radv_t *radv_info = 0;
+ ip6_address_t ip6_addr;
+ u32 addr_len;
+
+
+ /* Get a line of input. */
+ if (!unformat_user (main_input, unformat_line_input, line_input))
+ return 0;
+
+ /* get basic radv info for this interface */
+ if (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+
+ if (unformat_user (line_input,
+ unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ u32 ri;
+ ethernet_interface_t *eth_if0 = 0;
+
+ sw_if0 = vnet_get_sup_sw_interface (vnm, sw_if_index);
+ if (sw_if0->type == VNET_SW_INTERFACE_TYPE_HARDWARE)
+ eth_if0 =
+ ethernet_get_interface (&ethernet_main, sw_if0->hw_if_index);
+
+ if (!eth_if0)
+ {
+ error =
+ clib_error_return (0, "Interface must be of ethernet type");
+ goto done;
+ }
+
+ /* look up the radv_t information for this interface */
+ vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index,
+ sw_if_index, ~0);
+
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index];
+
+ if (ri != ~0)
+ {
+ radv_info = pool_elt_at_index (nm->if_radv_pool, ri);
+ }
+ else
+ {
+ error = clib_error_return (0, "unknown interface %U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+ else
+ {
+ error = clib_error_return (0, "invalid interface name %U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ /* get the rest of the command */
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "no"))
+ is_no = 1;
+ else if (unformat (line_input, "prefix %U/%d",
+ unformat_ip6_address, &ip6_addr, &addr_len))
+ {
+ add_radv_info = 0;
+ break;
+ }
+ else if (unformat (line_input, "ra-managed-config-flag"))
+ {
+ managed = 1;
+ break;
+ }
+ else if (unformat (line_input, "ra-other-config-flag"))
+ {
+ other = 1;
+ break;
+ }
+ else if (unformat (line_input, "ra-suppress") ||
+ unformat (line_input, "ra-surpress"))
+ {
+ suppress = 1;
+ break;
+ }
+ else if (unformat (line_input, "ra-suppress-link-layer") ||
+ unformat (line_input, "ra-surpress-link-layer"))
+ {
+ suppress_ll_option = 1;
+ break;
+ }
+ else if (unformat (line_input, "ra-send-unicast"))
+ {
+ send_unicast = 1;
+ break;
+ }
+ else if (unformat (line_input, "ra-lifetime"))
+ {
+ if (!unformat (line_input, "%d", &ra_lifetime))
+ return (error = unformat_parse_error (line_input));
+ use_lifetime = 1;
+ break;
+ }
+ else if (unformat (line_input, "ra-initial"))
+ {
+ if (!unformat
+ (line_input, "%d %d", &ra_initial_count, &ra_initial_interval))
+ return (error = unformat_parse_error (line_input));
+ break;
+ }
+ else if (unformat (line_input, "ra-interval"))
+ {
+ if (!unformat (line_input, "%d", &ra_max_interval))
+ return (error = unformat_parse_error (line_input));
+
+ if (!unformat (line_input, "%d", &ra_min_interval))
+ ra_min_interval = 0;
+ break;
+ }
+ else if (unformat (line_input, "ra-cease"))
+ {
+ cease = 1;
+ break;
+ }
+ else
+ return (unformat_parse_error (line_input));
+ }
+
+ if (add_radv_info)
+ {
+ ip6_neighbor_ra_config (vm, sw_if_index,
+ suppress, managed, other,
+ suppress_ll_option, send_unicast, cease,
+ use_lifetime, ra_lifetime,
+ ra_initial_count, ra_initial_interval,
+ ra_max_interval, ra_min_interval, is_no);
+ }
+ else
+ {
+ u32 valid_lifetime_in_secs = 0;
+ u32 pref_lifetime_in_secs = 0;
+ u8 use_prefix_default_values = 0;
+ u8 no_advertise = 0;
+ u8 off_link = 0;
+ u8 no_autoconfig = 0;
+ u8 no_onlink = 0;
+
+ /* get the rest of the command */
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "default"))
+ {
+ use_prefix_default_values = 1;
+ break;
+ }
+ else if (unformat (line_input, "infinite"))
+ {
+ valid_lifetime_in_secs = ~0;
+ pref_lifetime_in_secs = ~0;
+ break;
+ }
+ else if (unformat (line_input, "%d %d", &valid_lifetime_in_secs,
+ &pref_lifetime_in_secs))
+ break;
+ else
+ break;
+ }
+
+
+ /* get the rest of the command */
+ while (!use_prefix_default_values &&
+ unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "no-advertise"))
+ no_advertise = 1;
+ else if (unformat (line_input, "off-link"))
+ off_link = 1;
+ else if (unformat (line_input, "no-autoconfig"))
+ no_autoconfig = 1;
+ else if (unformat (line_input, "no-onlink"))
+ no_onlink = 1;
+ else
+ return (unformat_parse_error (line_input));
+ }
+
+ ip6_neighbor_ra_prefix (vm, sw_if_index,
+ &ip6_addr, addr_len,
+ use_prefix_default_values,
+ valid_lifetime_in_secs,
+ pref_lifetime_in_secs,
+ no_advertise,
+ off_link, no_autoconfig, no_onlink, is_no);
+ }
+
+ unformat_free (line_input);
+
+done:
+ return error;
+}
+
+static void
+ip6_print_addrs (vlib_main_t * vm, u32 * addrs)
+{
+ ip_lookup_main_t *lm = &ip6_main.lookup_main;
+ u32 i;
+
+ for (i = 0; i < vec_len (addrs); i++)
+ {
+ ip_interface_address_t *a =
+ pool_elt_at_index (lm->if_address_pool, addrs[i]);
+ ip6_address_t *address = ip_interface_address_get_address (lm, a);
+
+ vlib_cli_output (vm, "\t\t%U/%d",
+ format_ip6_address, address, a->address_length);
+ }
+}
+
+static clib_error_t *
+show_ip6_interface_cmd (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ clib_error_t *error = 0;
+ u32 sw_if_index;
+
+ sw_if_index = ~0;
+
+ if (unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ u32 ri;
+
+ /* look up the radv_t information for this interface */
+ vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index,
+ sw_if_index, ~0);
+
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index];
+
+ if (ri != ~0)
+ {
+ ip_lookup_main_t *lm = &ip6_main.lookup_main;
+ ip6_radv_t *radv_info;
+ radv_info = pool_elt_at_index (nm->if_radv_pool, ri);
+
+ vlib_cli_output (vm, "%U is admin %s\n",
+ format_vnet_sw_interface_name, vnm,
+ vnet_get_sw_interface (vnm, sw_if_index),
+ (vnet_sw_interface_is_admin_up (vnm, sw_if_index) ?
+ "up" : "down"));
+
+ u32 ai;
+ u32 *link_scope = 0, *global_scope = 0;
+ u32 *local_scope = 0, *unknown_scope = 0;
+ ip_interface_address_t *a;
+
+ vec_validate_init_empty (lm->if_address_pool_index_by_sw_if_index,
+ sw_if_index, ~0);
+ ai = lm->if_address_pool_index_by_sw_if_index[sw_if_index];
+
+ while (ai != (u32) ~ 0)
+ {
+ a = pool_elt_at_index (lm->if_address_pool, ai);
+ ip6_address_t *address =
+ ip_interface_address_get_address (lm, a);
+
+ if (ip6_address_is_link_local_unicast (address))
+ vec_add1 (link_scope, ai);
+ else if (ip6_address_is_global_unicast (address))
+ vec_add1 (global_scope, ai);
+ else if (ip6_address_is_local_unicast (address))
+ vec_add1 (local_scope, ai);
+ else
+ vec_add1 (unknown_scope, ai);
+
+ ai = a->next_this_sw_interface;
+ }
+
+ if (vec_len (link_scope))
+ {
+ vlib_cli_output (vm, "\tLink-local address(es):\n");
+ ip6_print_addrs (vm, link_scope);
+ vec_free (link_scope);
+ }
+
+ if (vec_len (local_scope))
+ {
+ vlib_cli_output (vm, "\tLocal unicast address(es):\n");
+ ip6_print_addrs (vm, local_scope);
+ vec_free (local_scope);
+ }
+
+ if (vec_len (global_scope))
+ {
+ vlib_cli_output (vm, "\tGlobal unicast address(es):\n");
+ ip6_print_addrs (vm, global_scope);
+ vec_free (global_scope);
+ }
+
+ if (vec_len (unknown_scope))
+ {
+ vlib_cli_output (vm, "\tOther-scope address(es):\n");
+ ip6_print_addrs (vm, unknown_scope);
+ vec_free (unknown_scope);
+ }
+
+ vlib_cli_output (vm, "\tJoined group address(es):\n");
+ ip6_mldp_group_t *m;
+ /* *INDENT-OFF* */
+ pool_foreach (m, radv_info->mldp_group_pool,
+ ({
+ vlib_cli_output (vm, "\t\t%U\n", format_ip6_address,
+ &m->mcast_address);
+ }));
+ /* *INDENT-ON* */
+
+ vlib_cli_output (vm, "\tAdvertised Prefixes:\n");
+ ip6_radv_prefix_t *p;
+ /* *INDENT-OFF* */
+ pool_foreach (p, radv_info->adv_prefixes_pool,
+ ({
+ vlib_cli_output (vm, "\t\tprefix %U, length %d\n",
+ format_ip6_address, &p->prefix, p->prefix_len);
+ }));
+ /* *INDENT-ON* */
+
+ vlib_cli_output (vm, "\tMTU is %d\n", radv_info->adv_link_mtu);
+ vlib_cli_output (vm, "\tICMP error messages are unlimited\n");
+ vlib_cli_output (vm, "\tICMP redirects are disabled\n");
+ vlib_cli_output (vm, "\tICMP unreachables are not sent\n");
+ vlib_cli_output (vm, "\tND DAD is disabled\n");
+ //vlib_cli_output (vm, "\tND reachable time is %d milliseconds\n",);
+ vlib_cli_output (vm, "\tND advertised reachable time is %d\n",
+ radv_info->adv_neighbor_reachable_time_in_msec);
+ vlib_cli_output (vm,
+ "\tND advertised retransmit interval is %d (msec)\n",
+ radv_info->
+ adv_time_in_msec_between_retransmitted_neighbor_solicitations);
+
+ u32 ra_interval = radv_info->max_radv_interval;
+ u32 ra_interval_min = radv_info->min_radv_interval;
+ vlib_cli_output (vm,
+ "\tND router advertisements are sent every %d seconds (min interval is %d)\n",
+ ra_interval, ra_interval_min);
+ vlib_cli_output (vm,
+ "\tND router advertisements live for %d seconds\n",
+ radv_info->adv_router_lifetime_in_sec);
+ vlib_cli_output (vm,
+ "\tHosts %s stateless autoconfig for addresses\n",
+ (radv_info->adv_managed_flag) ? "use" :
+ " don't use");
+ vlib_cli_output (vm, "\tND router advertisements sent %d\n",
+ radv_info->n_advertisements_sent);
+ vlib_cli_output (vm, "\tND router solicitations received %d\n",
+ radv_info->n_solicitations_rcvd);
+ vlib_cli_output (vm, "\tND router solicitations dropped %d\n",
+ radv_info->n_solicitations_dropped);
+ }
+ else
+ {
+ error = clib_error_return (0, "IPv6 not enabled on interface",
+ format_unformat_error, input);
+
+ }
+ }
+ return error;
+}
+
+/*?
+ * This command is used to display various IPv6 attributes on a given
+ * interface.
+ *
+ * @cliexpar
+ * Example of how to display IPv6 settings:
+ * @cliexstart{show ip6 interface GigabitEthernet2/0/0}
+ * GigabitEthernet2/0/0 is admin up
+ * Link-local address(es):
+ * fe80::ab8/64
+ * Joined group address(es):
+ * ff02::1
+ * ff02::2
+ * ff02::16
+ * ff02::1:ff00:ab8
+ * Advertised Prefixes:
+ * prefix fe80::fe:28ff:fe9c:75b3, length 64
+ * MTU is 1500
+ * ICMP error messages are unlimited
+ * ICMP redirects are disabled
+ * ICMP unreachables are not sent
+ * ND DAD is disabled
+ * ND advertised reachable time is 0
+ * ND advertised retransmit interval is 0 (msec)
+ * ND router advertisements are sent every 200 seconds (min interval is 150)
+ * ND router advertisements live for 600 seconds
+ * Hosts use stateless autoconfig for addresses
+ * ND router advertisements sent 19336
+ * ND router solicitations received 0
+ * ND router solicitations dropped 0
+ * @cliexend
+ * Example of output if IPv6 is not enabled on the interface:
+ * @cliexstart{show ip6 interface GigabitEthernet2/0/0}
+ * show ip6 interface: IPv6 not enabled on interface
+ * @cliexend
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_ip6_interface_command, static) =
+{
+ .path = "show ip6 interface",
+ .function = show_ip6_interface_cmd,
+ .short_help = "show ip6 interface <interface>",
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+disable_ip6_interface (vlib_main_t * vm, u32 sw_if_index)
+{
+ clib_error_t *error = 0;
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ u32 ri;
+
+ /* look up the radv_t information for this interface */
+ vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index,
+ ~0);
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index];
+
+ /* if not created - do nothing */
+ if (ri != ~0)
+ {
+ vnet_main_t *vnm = vnet_get_main ();
+ ip6_radv_t *radv_info;
+
+ radv_info = pool_elt_at_index (nm->if_radv_pool, ri);
+
+ /* check radv_info ref count for other ip6 addresses on this interface */
+ if (radv_info->ref_count == 0)
+ {
+ /* essentially "disables" ipv6 on this interface */
+ error = ip6_add_del_interface_address (vm, sw_if_index,
+ &radv_info->
+ link_local_address,
+ radv_info->
+ link_local_prefix_len,
+ 1 /* is_del */ );
+
+ ip6_neighbor_sw_interface_add_del (vnm, sw_if_index,
+ 0 /* is_add */ );
+ }
+ }
+ return error;
+}
+
+int
+ip6_interface_enabled (vlib_main_t * vm, u32 sw_if_index)
+{
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ u32 ri = ~0;
+
+ /* look up the radv_t information for this interface */
+ vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index,
+ ~0);
+
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index];
+
+ return ri != ~0;
+}
+
+clib_error_t *
+enable_ip6_interface (vlib_main_t * vm, u32 sw_if_index)
+{
+ clib_error_t *error = 0;
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ u32 ri;
+ int is_add = 1;
+
+ /* look up the radv_t information for this interface */
+ vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index,
+ ~0);
+
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index];
+
+ /* if not created yet */
+ if (ri == ~0)
+ {
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_sw_interface_t *sw_if0;
+
+ sw_if0 = vnet_get_sup_sw_interface (vnm, sw_if_index);
+ if (sw_if0->type == VNET_SW_INTERFACE_TYPE_HARDWARE)
+ {
+ ethernet_interface_t *eth_if0;
+
+ eth_if0 =
+ ethernet_get_interface (&ethernet_main, sw_if0->hw_if_index);
+ if (eth_if0)
+ {
+ /* create radv_info. for this interface. This holds all the info needed for router adverts */
+ ri =
+ ip6_neighbor_sw_interface_add_del (vnm, sw_if_index, is_add);
+
+ if (ri != ~0)
+ {
+ ip6_radv_t *radv_info;
+ ip6_address_t link_local_address;
+
+ radv_info = pool_elt_at_index (nm->if_radv_pool, ri);
+
+ ip6_link_local_address_from_ethernet_mac_address
+ (&link_local_address, eth_if0->address);
+
+ sw_if0 = vnet_get_sw_interface (vnm, sw_if_index);
+ if (sw_if0->type == VNET_SW_INTERFACE_TYPE_SUB)
+ {
+ /* make up an interface id */
+ md5_context_t m;
+ u8 digest[16];
+
+ link_local_address.as_u64[0] = radv_info->randomizer;
+
+ md5_init (&m);
+ md5_add (&m, &link_local_address, 16);
+ md5_finish (&m, digest);
+
+ clib_memcpy (&link_local_address, digest, 16);
+
+ radv_info->randomizer = link_local_address.as_u64[0];
+
+ link_local_address.as_u64[0] =
+ clib_host_to_net_u64 (0xFE80000000000000ULL);
+ /* clear u bit */
+ link_local_address.as_u8[8] &= 0xfd;
+ }
+
+ /* essentially "enables" ipv6 on this interface */
+ error = ip6_add_del_interface_address (vm, sw_if_index,
+ &link_local_address,
+ 128
+ /* address width */ ,
+ 0 /* is_del */ );
+
+ if (error)
+ ip6_neighbor_sw_interface_add_del (vnm, sw_if_index,
+ !is_add);
+ else
+ {
+ radv_info->link_local_address = link_local_address;
+ radv_info->link_local_prefix_len = 64;
+ }
+ }
+ }
+ }
+ }
+ return error;
+}
+
+static clib_error_t *
+enable_ip6_interface_cmd (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error = 0;
+ u32 sw_if_index;
+
+ sw_if_index = ~0;
+
+ if (unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ enable_ip6_interface (vm, sw_if_index);
+ }
+ else
+ {
+ error = clib_error_return (0, "unknown interface\n'",
+ format_unformat_error, input);
+
+ }
+ return error;
+}
+
+/*?
+ * This command is used to enable IPv6 on a given interface.
+ *
+ * @cliexpar
+ * Example of how enable IPv6 on a given interface:
+ * @cliexcmd{enable ip6 interface GigabitEthernet2/0/0}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (enable_ip6_interface_command, static) =
+{
+ .path = "enable ip6 interface",
+ .function = enable_ip6_interface_cmd,
+ .short_help = "enable ip6 interface <interface>",
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+disable_ip6_interface_cmd (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error = 0;
+ u32 sw_if_index;
+
+ sw_if_index = ~0;
+
+ if (unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error = disable_ip6_interface (vm, sw_if_index);
+ }
+ else
+ {
+ error = clib_error_return (0, "unknown interface\n'",
+ format_unformat_error, input);
+
+ }
+ return error;
+}
+
+/*?
+ * This command is used to disable IPv6 on a given interface.
+ *
+ * @cliexpar
+ * Example of how disable IPv6 on a given interface:
+ * @cliexcmd{disable ip6 interface GigabitEthernet2/0/0}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (disable_ip6_interface_command, static) =
+{
+ .path = "disable ip6 interface",
+ .function = disable_ip6_interface_cmd,
+ .short_help = "disable ip6 interface <interface>",
+};
+/* *INDENT-ON* */
+
+/*?
+ * This command is used to configure the neighbor discovery
+ * parameters on a given interface. Use the '<em>show ip6 interface</em>'
+ * command to display some of the current neighbor discovery parameters
+ * on a given interface. This command has three formats:
+ *
+ *
+ * <b>Format 1 - Router Advertisement Options:</b> (Only one can be entered in a single command)
+ *
+ * '<em><b>ip6 nd <interface> [no] [ra-managed-config-flag] | [ra-other-config-flag] | [ra-suppress] | [ra-suppress-link-layer] | [ra-send-unicast] | [ra-lifetime <lifetime>] | [ra-initial <cnt> <interval>] | [ra-interval <max-interval> [<min-interval>]] | [ra-cease]</b></em>'
+ *
+ * Where:
+ *
+ * <em>[no] ra-managed-config-flag</em> - Advertises in ICMPv6
+ * router-advertisement messages to use stateful address
+ * auto-configuration to obtain address information (sets the M-bit).
+ * Default is the M-bit is not set and the '<em>no</em>' option
+ * returns it to this default state.
+ *
+ * <em>[no] ra-other-config-flag</em> - Indicates in ICMPv6
+ * router-advertisement messages that hosts use stateful auto
+ * configuration to obtain nonaddress related information (sets
+ * the O-bit). Default is the O-bit is not set and the '<em>no</em>'
+ * option returns it to this default state.
+ *
+ * <em>[no] ra-suppress</em> - Disables sending ICMPv6 router-advertisement
+ * messages. The '<em>no</em>' option implies to enable sending ICMPv6
+ * router-advertisement messages.
+ *
+ * <em>[no] ra-suppress-link-layer</em> - Indicates not to include the
+ * optional source link-layer address in the ICMPv6 router-advertisement
+ * messages. Default is to include the optional source link-layer address
+ * and the '<em>no</em>' option returns it to this default state.
+ *
+ * <em>[no] ra-send-unicast</em> - Use the source address of the
+ * router-solicitation message if availiable. The default is to use
+ * multicast address of all nodes, and the '<em>no</em>' option returns
+ * it to this default state.
+ *
+ * <em>[no] ra-lifetime <lifetime></em> - Advertises the lifetime of a
+ * default router in ICMPv6 router-advertisement messages. The range is
+ * from 0 to 9000 seconds. '<em><lifetime></em>' must be greater than
+ * '<em><max-interval></em>'. The default value is 600 seconds and the
+ * '<em>no</em>' option returns it to this default value.
+ *
+ * <em>[no] ra-initial <cnt> <interval></em> - Number of initial ICMPv6
+ * router-advertisement messages sent and the interval between each
+ * message. Range for count is 1 - 3 and default is 3. Range for interval
+ * is 1 to 16 seconds, and default is 16 seconds. The '<em>no</em>' option
+ * returns both to their default value.
+ *
+ * <em>[no] ra-interval <max-interval> [<min-interval>]</em> - Configures the
+ * interval between sending ICMPv6 router-advertisement messages. The
+ * range for max-interval is from 4 to 200 seconds. min-interval can not
+ * be more than 75% of max-interval. If not set, min-interval will be
+ * set to 75% of max-interval. The range for min-interval is from 3 to
+ * 150 seconds. The '<em>no</em>' option returns both to their default
+ * value.
+ *
+ * <em>[no] ra-cease</em> - Cease sending ICMPv6 router-advertisement messages.
+ * The '<em>no</em>' options implies to start (or restart) sending
+ * ICMPv6 router-advertisement messages.
+ *
+ *
+ * <b>Format 2 - Prefix Options:</b>
+ *
+ * '<em><b>ip6 nd <interface> [no] prefix <ip6-address>/<width> [<valid-lifetime> <pref-lifetime> | infinite] [no-advertise] [off-link] [no-autoconfig] [no-onlink]</b></em>'
+ *
+ * Where:
+ *
+ * <em>no</em> - All additional flags are ignored and the prefix is deleted.
+ *
+ * <em><valid-lifetime> <pref-lifetime></em> - '<em><valid-lifetime></em>' is the
+ * length of time in seconds during what the prefix is valid for the purpose of
+ * on-link determination. Range is 7203 to 2592000 seconds and default is 2592000
+ * seconds (30 days). '<em><pref-lifetime></em>' is the prefered-lifetime and is the
+ * length of time in seconds during what addresses generated from the prefix remain
+ * preferred. Range is 0 to 604800 seconds and default is 604800 seconds (7 days).
+ *
+ * <em>infinite</em> - Both '<em><valid-lifetime></em>' and '<em><<pref-lifetime></em>'
+ * are inifinte, no timeout.
+ *
+ * <em>no-advertise</em> - Do not send full router address in prefix
+ * advertisement. Default is to advertise (i.e. - This flag is off by default).
+ *
+ * <em>off-link</em> - Prefix is off-link, clear L-bit in packet. Default is on-link
+ * (i.e. - This flag is off and L-bit in packet is set by default and this prefix can
+ * be used for on-link determination). '<em>no-onlink</em>' also controls the L-bit.
+ *
+ * <em>no-autoconfig</em> - Do not use prefix for autoconfiguration, clear A-bit in packet.
+ * Default is autoconfig (i.e. - This flag is off and A-bit in packet is set by default.
+ *
+ * <em>no-onlink</em> - Do not use prefix for onlink determination, clear L-bit in packet.
+ * Default is on-link (i.e. - This flag is off and L-bit in packet is set by default and
+ * this prefix can be used for on-link determination). '<em>off-link</em>' also controls
+ * the L-bit.
+ *
+ *
+ * <b>Format 3: - Default of Prefix:</b>
+ *
+ * '<em><b>ip6 nd <interface> [no] prefix <ip6-address>/<width> default</b></em>'
+ *
+ * When a new prefix is added (or existing one is being overwritten) <em>default</em>
+ * uses default values for the prefix. If <em>no</em> is used, the <em>default</em>
+ * is ignored and the prefix is deleted.
+ *
+ *
+ * @cliexpar
+ * Example of how set a router advertisement option:
+ * @cliexcmd{ip6 nd GigabitEthernet2/0/0 ra-interval 100 20}
+ * Example of how to add a prefix:
+ * @cliexcmd{ip6 nd GigabitEthernet2/0/0 prefix fe80::fe:28ff:fe9c:75b3/64 infinite no-advertise}
+ * Example of how to delete a prefix:
+ * @cliexcmd{ip6 nd GigabitEthernet2/0/0 no prefix fe80::fe:28ff:fe9c:75b3/64}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ip6_nd_command, static) =
+{
+ .path = "ip6 nd",
+ .short_help = "ip6 nd <interface> ...",
+ .function = ip6_neighbor_cmd,
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+set_ip6_link_local_address (vlib_main_t * vm,
+ u32 sw_if_index,
+ ip6_address_t * address, u8 address_length)
+{
+ clib_error_t *error = 0;
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ u32 ri;
+ ip6_radv_t *radv_info;
+ vnet_main_t *vnm = vnet_get_main ();
+
+ if (!ip6_address_is_link_local_unicast (address))
+ {
+ vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_LINK_LOCAL;
+ return (error = clib_error_return (0, "address not link-local",
+ format_unformat_error));
+ }
+
+ /* call enable ipv6 */
+ enable_ip6_interface (vm, sw_if_index);
+
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index];
+
+ if (ri != ~0)
+ {
+ radv_info = pool_elt_at_index (nm->if_radv_pool, ri);
+
+ /* save if link local address (overwrite default) */
+
+ /* delete the old one */
+ error = ip6_add_del_interface_address (vm, sw_if_index,
+ &radv_info->link_local_address,
+ radv_info->link_local_prefix_len
+ /* address width */ ,
+ 1 /* is_del */ );
+
+ if (!error)
+ {
+ /* add the new one */
+ error = ip6_add_del_interface_address (vm, sw_if_index,
+ address, address_length
+ /* address width */ ,
+ 0 /* is_del */ );
+
+ if (!error)
+ {
+ radv_info->link_local_address = *address;
+ radv_info->link_local_prefix_len = address_length;
+ }
+ }
+ }
+ else
+ {
+ vnm->api_errno = VNET_API_ERROR_IP6_NOT_ENABLED;
+ error = clib_error_return (0, "ip6 not enabled for interface",
+ format_unformat_error);
+ }
+ return error;
+}
+
+clib_error_t *
+set_ip6_link_local_address_cmd (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error = 0;
+ u32 sw_if_index;
+ ip6_address_t ip6_addr;
+ u32 addr_len = 0;
+
+ if (unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ /* get the rest of the command */
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%U/%d",
+ unformat_ip6_address, &ip6_addr, &addr_len))
+ break;
+ else
+ return (unformat_parse_error (input));
+ }
+ }
+ error = set_ip6_link_local_address (vm, sw_if_index, &ip6_addr, addr_len);
+ return error;
+}
+
+/*?
+ * This command is used to assign an IPv6 Link-local address to an
+ * interface. This command will enable IPv6 on an interface if it
+ * is not already enabled. Use the '<em>show ip6 interface</em>' command
+ * to display the assigned Link-local address.
+ *
+ * @cliexpar
+ * Example of how to assign an IPv6 Link-local address to an interface:
+ * @cliexcmd{set ip6 link-local address GigabitEthernet2/0/0 FE80::AB8/64}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_ip6_link_local_address_command, static) =
+{
+ .path = "set ip6 link-local address",
+ .short_help = "set ip6 link-local address <interface> <ip6-address>/<width>",
+ .function = set_ip6_link_local_address_cmd,
+};
+/* *INDENT-ON* */
+
+/* callback when an interface address is added or deleted */
+static void
+ip6_neighbor_add_del_interface_address (ip6_main_t * im,
+ uword opaque,
+ u32 sw_if_index,
+ ip6_address_t * address,
+ u32 address_length,
+ u32 if_address_index, u32 is_delete)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ u32 ri;
+ vlib_main_t *vm = vnm->vlib_main;
+ ip6_radv_t *radv_info;
+ ip6_address_t a;
+ ip6_mldp_group_t *mcast_group_info;
+
+ /* create solicited node multicast address for this interface adddress */
+ ip6_set_solicited_node_multicast_address (&a, 0);
+
+ a.as_u8[0xd] = address->as_u8[0xd];
+ a.as_u8[0xe] = address->as_u8[0xe];
+ a.as_u8[0xf] = address->as_u8[0xf];
+
+ if (!is_delete)
+ {
+ /* try to create radv_info - does nothing if ipv6 already enabled */
+ enable_ip6_interface (vm, sw_if_index);
+
+ /* look up the radv_t information for this interface */
+ vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index,
+ sw_if_index, ~0);
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index];
+ if (ri != ~0)
+ {
+ /* get radv_info */
+ radv_info = pool_elt_at_index (nm->if_radv_pool, ri);
+
+ /* add address */
+ if (!ip6_address_is_link_local_unicast (address))
+ radv_info->ref_count++;
+
+ /* lookup prefix info for this address on this interface */
+ uword *p = mhash_get (&radv_info->address_to_mldp_index, &a);
+ mcast_group_info =
+ p ? pool_elt_at_index (radv_info->mldp_group_pool, p[0]) : 0;
+
+ /* add -solicted node multicast address */
+ if (!mcast_group_info)
+ {
+ /* add */
+ u32 mi;
+ pool_get (radv_info->mldp_group_pool, mcast_group_info);
+
+ mi = mcast_group_info - radv_info->mldp_group_pool;
+ mhash_set (&radv_info->address_to_mldp_index, &a, mi,
+ /* old_value */ 0);
+
+ mcast_group_info->type = 4;
+ mcast_group_info->mcast_source_address_pool = 0;
+ mcast_group_info->num_sources = 0;
+ clib_memcpy (&mcast_group_info->mcast_address, &a,
+ sizeof (ip6_address_t));
+ }
+ }
+ }
+ else
+ {
+
+ /* delete */
+ /* look up the radv_t information for this interface */
+ vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index,
+ sw_if_index, ~0);
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index];
+ if (ri != ~0)
+ {
+ /* get radv_info */
+ radv_info = pool_elt_at_index (nm->if_radv_pool, ri);
+
+ /* lookup prefix info for this address on this interface */
+ uword *p = mhash_get (&radv_info->address_to_mldp_index, &a);
+ mcast_group_info =
+ p ? pool_elt_at_index (radv_info->mldp_group_pool, p[0]) : 0;
+
+ if (mcast_group_info)
+ {
+ mhash_unset (&radv_info->address_to_mldp_index, &a,
+ /* old_value */ 0);
+ pool_put (radv_info->mldp_group_pool, mcast_group_info);
+ }
+
+ /* if interface up send MLDP "report" */
+ radv_info->all_routers_mcast = 0;
+
+ /* add address */
+ if (!ip6_address_is_link_local_unicast (address))
+ radv_info->ref_count--;
+ }
+ }
+}
+
+clib_error_t *
+ip6_set_neighbor_limit (u32 neighbor_limit)
+{
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+
+ nm->limit_neighbor_cache_size = neighbor_limit;
+ return 0;
+}
+
+static clib_error_t *
+ip6_neighbor_init (vlib_main_t * vm)
+{
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ ip6_main_t *im = &ip6_main;
+
+ mhash_init (&nm->neighbor_index_by_key,
+ /* value size */ sizeof (uword),
+ /* key size */ sizeof (ip6_neighbor_key_t));
+
+ icmp6_register_type (vm, ICMP6_neighbor_solicitation,
+ ip6_icmp_neighbor_solicitation_node.index);
+ icmp6_register_type (vm, ICMP6_neighbor_advertisement,
+ ip6_icmp_neighbor_advertisement_node.index);
+ icmp6_register_type (vm, ICMP6_router_solicitation,
+ ip6_icmp_router_solicitation_node.index);
+ icmp6_register_type (vm, ICMP6_router_advertisement,
+ ip6_icmp_router_advertisement_node.index);
+
+ /* handler node for ip6 neighbor discovery events and timers */
+ vlib_register_node (vm, &ip6_icmp_neighbor_discovery_event_node);
+
+ /* add call backs */
+ ip6_add_del_interface_address_callback_t cb;
+ memset (&cb, 0x0, sizeof (ip6_add_del_interface_address_callback_t));
+
+ /* when an interface address changes... */
+ cb.function = ip6_neighbor_add_del_interface_address;
+ cb.function_opaque = 0;
+ vec_add1 (im->add_del_interface_address_callbacks, cb);
+
+ mhash_init (&nm->pending_resolutions_by_address,
+ /* value size */ sizeof (uword),
+ /* key size */ sizeof (ip6_address_t));
+
+ mhash_init (&nm->mac_changes_by_address,
+ /* value size */ sizeof (uword),
+ /* key size */ sizeof (ip6_address_t));
+
+ /* default, configurable */
+ nm->limit_neighbor_cache_size = 50000;
+
+#if 0
+ /* $$$$ Hack fix for today */
+ vec_validate_init_empty
+ (im->discover_neighbor_next_index_by_hw_if_index, 32, 0 /* drop */ );
+#endif
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (ip6_neighbor_init);
+
+
+void
+vnet_register_ip6_neighbor_resolution_event (vnet_main_t * vnm,
+ void *address_arg,
+ uword node_index,
+ uword type_opaque, uword data)
+{
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ ip6_address_t *address = address_arg;
+ uword *p;
+ pending_resolution_t *pr;
+
+ pool_get (nm->pending_resolutions, pr);
+
+ pr->next_index = ~0;
+ pr->node_index = node_index;
+ pr->type_opaque = type_opaque;
+ pr->data = data;
+
+ p = mhash_get (&nm->pending_resolutions_by_address, address);
+ if (p)
+ {
+ /* Insert new resolution at the head of the list */
+ pr->next_index = p[0];
+ mhash_unset (&nm->pending_resolutions_by_address, address, 0);
+ }
+
+ mhash_set (&nm->pending_resolutions_by_address, address,
+ pr - nm->pending_resolutions, 0 /* old value */ );
+}
+
+int
+vnet_add_del_ip6_nd_change_event (vnet_main_t * vnm,
+ void *data_callback,
+ u32 pid,
+ void *address_arg,
+ uword node_index,
+ uword type_opaque, uword data, int is_add)
+{
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ ip6_address_t *address = address_arg;
+ uword *p;
+ pending_resolution_t *mc;
+ void (*fp) (u32, u8 *) = data_callback;
+
+ if (is_add)
+ {
+ pool_get (nm->mac_changes, mc);
+
+ mc->next_index = ~0;
+ mc->node_index = node_index;
+ mc->type_opaque = type_opaque;
+ mc->data = data;
+ mc->data_callback = data_callback;
+ mc->pid = pid;
+
+ p = mhash_get (&nm->mac_changes_by_address, address);
+ if (p)
+ {
+ /* Insert new resolution at the head of the list */
+ mc->next_index = p[0];
+ mhash_unset (&nm->mac_changes_by_address, address, 0);
+ }
+
+ mhash_set (&nm->mac_changes_by_address, address,
+ mc - nm->mac_changes, 0);
+ return 0;
+ }
+ else
+ {
+ u32 index;
+ pending_resolution_t *mc_last = 0;
+
+ p = mhash_get (&nm->mac_changes_by_address, address);
+ if (p == 0)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ index = p[0];
+
+ while (index != (u32) ~ 0)
+ {
+ mc = pool_elt_at_index (nm->mac_changes, index);
+ if (mc->node_index == node_index &&
+ mc->type_opaque == type_opaque && mc->pid == pid)
+ {
+ /* Clients may need to clean up pool entries, too */
+ if (fp)
+ (*fp) (mc->data, 0 /* no new mac addrs */ );
+ if (index == p[0])
+ {
+ mhash_unset (&nm->mac_changes_by_address, address, 0);
+ if (mc->next_index != ~0)
+ mhash_set (&nm->mac_changes_by_address, address,
+ mc->next_index, 0);
+ pool_put (nm->mac_changes, mc);
+ return 0;
+ }
+ else
+ {
+ ASSERT (mc_last);
+ mc_last->next_index = mc->next_index;
+ pool_put (nm->mac_changes, mc);
+ return 0;
+ }
+ }
+ mc_last = mc;
+ index = mc->next_index;
+ }
+
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+ }
+}
+
+int
+vnet_ip6_nd_term (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_buffer_t * p0,
+ ethernet_header_t * eth,
+ ip6_header_t * ip, u32 sw_if_index, u16 bd_index, u8 shg)
+{
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ icmp6_neighbor_solicitation_or_advertisement_header_t *ndh;
+ pending_resolution_t *mc;
+ uword *p;
+
+ ndh = ip6_next_header (ip);
+ if (ndh->icmp.type != ICMP6_neighbor_solicitation &&
+ ndh->icmp.type != ICMP6_neighbor_advertisement)
+ return 0;
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
+ (p0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ u8 *t0 = vlib_add_trace (vm, node, p0,
+ sizeof (icmp6_input_trace_t));
+ clib_memcpy (t0, ip, sizeof (icmp6_input_trace_t));
+ }
+
+ /* Check if anyone want ND events for L2 BDs */
+ p = mhash_get (&nm->mac_changes_by_address, &ip6a_zero);
+ if (p && shg == 0 && /* Only SHG 0 interface which is more likely local */
+ !ip6_address_is_link_local_unicast (&ip->src_address))
+ {
+ u32 next_index = p[0];
+ while (next_index != (u32) ~ 0)
+ {
+ int (*fp) (u32, u8 *, u32, ip6_address_t *);
+ int rv = 1;
+ mc = pool_elt_at_index (nm->mac_changes, next_index);
+ fp = mc->data_callback;
+ /* Call the callback, return 1 to suppress dup events */
+ if (fp)
+ rv = (*fp) (mc->data,
+ eth->src_address, sw_if_index, &ip->src_address);
+ /* Signal the resolver process */
+ if (rv == 0)
+ vlib_process_signal_event (vm, mc->node_index,
+ mc->type_opaque, mc->data);
+ next_index = mc->next_index;
+ }
+ }
+
+ /* Check if MAC entry exsist for solicited target IP */
+ if (ndh->icmp.type == ICMP6_neighbor_solicitation)
+ {
+ icmp6_neighbor_discovery_ethernet_link_layer_address_option_t *opt;
+ l2_bridge_domain_t *bd_config;
+ u8 *macp;
+
+ opt = (void *) (ndh + 1);
+ if ((opt->header.type !=
+ ICMP6_NEIGHBOR_DISCOVERY_OPTION_source_link_layer_address) ||
+ (opt->header.n_data_u64s != 1))
+ return 0; /* source link layer address option not present */
+
+ bd_config = vec_elt_at_index (l2input_main.bd_configs, bd_index);
+ macp =
+ (u8 *) hash_get_mem (bd_config->mac_by_ip6, &ndh->target_address);
+ if (macp)
+ { /* found ip-mac entry, generate eighbor advertisement response */
+ int bogus_length;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip6_icmp_input_node.index);
+ ip->dst_address = ip->src_address;
+ ip->src_address = ndh->target_address;
+ ip->hop_limit = 255;
+ opt->header.type =
+ ICMP6_NEIGHBOR_DISCOVERY_OPTION_target_link_layer_address;
+ clib_memcpy (opt->ethernet_address, macp, 6);
+ ndh->icmp.type = ICMP6_neighbor_advertisement;
+ ndh->advertisement_flags = clib_host_to_net_u32
+ (ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_SOLICITED |
+ ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_OVERRIDE);
+ ndh->icmp.checksum = 0;
+ ndh->icmp.checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, p0, ip, &bogus_length);
+ clib_memcpy (eth->dst_address, eth->src_address, 6);
+ clib_memcpy (eth->src_address, macp, 6);
+ vlib_error_count (vm, error_node->node_index,
+ ICMP6_ERROR_NEIGHBOR_ADVERTISEMENTS_TX, 1);
+ return 1;
+ }
+ }
+
+ return 0;
+
+}
+
+void
+ethernet_ndp_change_mac (u32 sw_if_index)
+{
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ ip6_neighbor_t *n;
+
+ /* *INDENT-OFF* */
+ pool_foreach (n, nm->neighbor_pool,
+ ({
+ if (n->key.sw_if_index == sw_if_index)
+ {
+ adj_nbr_walk_nh6 (sw_if_index,
+ &n->key.ip6_address,
+ ip6_nd_mk_complete_walk, n);
+ }
+ }));
+ /* *INDENT-ON* */
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip6_neighbor.h b/src/vnet/ip/ip6_neighbor.h
new file mode 100644
index 00000000000..b2c9f48ae8a
--- /dev/null
+++ b/src/vnet/ip/ip6_neighbor.h
@@ -0,0 +1,52 @@
+/*
+ *
+ * ip6_neighboor.h: ip6 neighbor structures
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_ip6_neighbor_h
+#define included_ip6_neighbor_h
+
+#include <vnet/fib/fib_types.h>
+
+typedef struct
+{
+ ip6_address_t ip6_address;
+ u32 sw_if_index;
+ u32 pad;
+} ip6_neighbor_key_t;
+
+typedef struct
+{
+ ip6_neighbor_key_t key;
+ u8 link_layer_address[8];
+ u16 flags;
+#define IP6_NEIGHBOR_FLAG_STATIC (1 << 0)
+#define IP6_NEIGHBOR_FLAG_DYNAMIC (2 << 0)
+ u64 cpu_time_last_updated;
+ fib_node_index_t fib_entry_index;
+} ip6_neighbor_t;
+
+ip6_neighbor_t *ip6_neighbors_entries (u32 sw_if_index);
+
+#endif /* included_ip6_neighbor_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip6_packet.h b/src/vnet/ip/ip6_packet.h
new file mode 100644
index 00000000000..1e551c8b67d
--- /dev/null
+++ b/src/vnet/ip/ip6_packet.h
@@ -0,0 +1,499 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip6/packet.h: ip6 packet format
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_ip6_packet_h
+#define included_ip6_packet_h
+
+#include <vnet/ip/tcp_packet.h>
+#include <vnet/ip/ip4_packet.h>
+
+typedef union
+{
+ u8 as_u8[16];
+ u16 as_u16[8];
+ u32 as_u32[4];
+ u64 as_u64[2];
+ uword as_uword[16 / sizeof (uword)];
+}
+ip6_address_t;
+
+/* Packed so that the mhash key doesn't include uninitialized pad bytes */
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+ /* IP address must be first for ip_interface_address_get_address() to work */
+ ip6_address_t ip6_addr;
+ u32 fib_index;
+}) ip6_address_fib_t;
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (union {
+ struct {
+ u32 pad[3];
+ ip4_address_t ip4;
+ };
+ ip6_address_t ip6;
+ u8 as_u8[16];
+ u64 as_u64[2];
+}) ip46_address_t;
+/* *INDENT-ON* */
+#define ip46_address_is_ip4(ip46) (((ip46)->pad[0] | (ip46)->pad[1] | (ip46)->pad[2]) == 0)
+#define ip46_address_mask_ip4(ip46) ((ip46)->pad[0] = (ip46)->pad[1] = (ip46)->pad[2] = 0)
+#define ip46_address_set_ip4(ip46, ip) (ip46_address_mask_ip4(ip46), (ip46)->ip4 = (ip)[0])
+#define ip46_address_reset(ip46) ((ip46)->as_u64[0] = (ip46)->as_u64[1] = 0)
+#define ip46_address_cmp(ip46_1, ip46_2) (memcmp(ip46_1, ip46_2, sizeof(*ip46_1)))
+#define ip46_address_is_zero(ip46) (((ip46)->as_u64[0] == 0) && ((ip46)->as_u64[1] == 0))
+
+always_inline void
+ip46_from_addr_buf (u32 is_ipv6, u8 * buf, ip46_address_t * ip)
+{
+ if (is_ipv6)
+ ip->ip6 = *((ip6_address_t *) buf);
+ else
+ ip46_address_set_ip4 (ip, (ip4_address_t *) buf);
+}
+
+always_inline void
+ip6_addr_fib_init (ip6_address_fib_t * addr_fib, ip6_address_t * address,
+ u32 fib_index)
+{
+ addr_fib->ip6_addr.as_u64[0] = address->as_u64[0];
+ addr_fib->ip6_addr.as_u64[1] = address->as_u64[1];
+ addr_fib->fib_index = fib_index;
+}
+
+/* Special addresses:
+ unspecified ::/128
+ loopback ::1/128
+ global unicast 2000::/3
+ unique local unicast fc00::/7
+ link local unicast fe80::/10
+ multicast ff00::/8
+ ietf reserved everything else. */
+
+#define foreach_ip6_multicast_address_scope \
+ _ (loopback, 0x1) \
+ _ (link_local, 0x2) \
+ _ (admin_local, 0x4) \
+ _ (site_local, 0x5) \
+ _ (organization_local, 0x8) \
+ _ (global, 0xe)
+
+#define foreach_ip6_multicast_link_local_group_id \
+ _ (all_hosts, 0x1) \
+ _ (all_routers, 0x2) \
+ _ (rip_routers, 0x9) \
+ _ (eigrp_routers, 0xa) \
+ _ (pim_routers, 0xd) \
+ _ (mldv2_routers, 0x16)
+
+typedef enum
+{
+#define _(f,n) IP6_MULTICAST_SCOPE_##f = n,
+ foreach_ip6_multicast_address_scope
+#undef _
+} ip6_multicast_address_scope_t;
+
+typedef enum
+{
+#define _(f,n) IP6_MULTICAST_GROUP_ID_##f = n,
+ foreach_ip6_multicast_link_local_group_id
+#undef _
+} ip6_multicast_link_local_group_id_t;
+
+always_inline uword
+ip6_address_is_multicast (ip6_address_t * a)
+{
+ return a->as_u8[0] == 0xff;
+}
+
+always_inline uword
+ip46_address_is_multicast (ip46_address_t * a)
+{
+ return ip46_address_is_ip4 (a) ? ip4_address_is_multicast (&a->ip4) :
+ ip6_address_is_multicast (&a->ip6);
+}
+
+always_inline void
+ip6_set_reserved_multicast_address (ip6_address_t * a,
+ ip6_multicast_address_scope_t scope,
+ u16 id)
+{
+ a->as_u64[0] = a->as_u64[1] = 0;
+ a->as_u16[0] = clib_host_to_net_u16 (0xff00 | scope);
+ a->as_u16[7] = clib_host_to_net_u16 (id);
+}
+
+always_inline void
+ip6_set_solicited_node_multicast_address (ip6_address_t * a, u32 id)
+{
+ /* 0xff02::1:ffXX:XXXX. */
+ a->as_u64[0] = a->as_u64[1] = 0;
+ a->as_u16[0] = clib_host_to_net_u16 (0xff02);
+ a->as_u8[11] = 1;
+ ASSERT ((id >> 24) == 0);
+ id |= 0xff << 24;
+ a->as_u32[3] = clib_host_to_net_u32 (id);
+}
+
+always_inline void
+ip6_link_local_address_from_ethernet_address (ip6_address_t * a,
+ u8 * ethernet_address)
+{
+ a->as_u64[0] = a->as_u64[1] = 0;
+ a->as_u16[0] = clib_host_to_net_u16 (0xfe80);
+ /* Always set locally administered bit (6). */
+ a->as_u8[0x8] = ethernet_address[0] | (1 << 6);
+ a->as_u8[0x9] = ethernet_address[1];
+ a->as_u8[0xa] = ethernet_address[2];
+ a->as_u8[0xb] = 0xff;
+ a->as_u8[0xc] = 0xfe;
+ a->as_u8[0xd] = ethernet_address[3];
+ a->as_u8[0xe] = ethernet_address[4];
+ a->as_u8[0xf] = ethernet_address[5];
+}
+
+always_inline void
+ip6_multicast_ethernet_address (u8 * ethernet_address, u32 group_id)
+{
+ ethernet_address[0] = 0x33;
+ ethernet_address[1] = 0x33;
+ ethernet_address[2] = ((group_id >> 24) & 0xff);
+ ethernet_address[3] = ((group_id >> 16) & 0xff);
+ ethernet_address[4] = ((group_id >> 8) & 0xff);
+ ethernet_address[5] = ((group_id >> 0) & 0xff);
+}
+
+always_inline uword
+ip6_address_is_equal (ip6_address_t * a, ip6_address_t * b)
+{
+ int i;
+ for (i = 0; i < ARRAY_LEN (a->as_uword); i++)
+ if (a->as_uword[i] != b->as_uword[i])
+ return 0;
+ return 1;
+}
+
+always_inline uword
+ip6_address_is_equal_masked (ip6_address_t * a, ip6_address_t * b,
+ ip6_address_t * mask)
+{
+ int i;
+ for (i = 0; i < ARRAY_LEN (a->as_uword); i++)
+ {
+ uword a_masked, b_masked;
+ a_masked = a->as_uword[i] & mask->as_uword[i];
+ b_masked = b->as_uword[i] & mask->as_uword[i];
+
+ if (a_masked != b_masked)
+ return 0;
+ }
+ return 1;
+}
+
+always_inline void
+ip6_address_mask (ip6_address_t * a, ip6_address_t * mask)
+{
+ int i;
+ for (i = 0; i < ARRAY_LEN (a->as_uword); i++)
+ a->as_uword[i] &= mask->as_uword[i];
+}
+
+always_inline void
+ip6_address_set_zero (ip6_address_t * a)
+{
+ int i;
+ for (i = 0; i < ARRAY_LEN (a->as_uword); i++)
+ a->as_uword[i] = 0;
+}
+
+always_inline void
+ip6_address_mask_from_width (ip6_address_t * a, u32 width)
+{
+ int i, byte, bit, bitnum;
+ ASSERT (width <= 128);
+ memset (a, 0, sizeof (a[0]));
+ for (i = 0; i < width; i++)
+ {
+ bitnum = (7 - (i & 7));
+ byte = i / 8;
+ bit = 1 << bitnum;
+ a->as_u8[byte] |= bit;
+ }
+}
+
+always_inline uword
+ip6_address_is_zero (ip6_address_t * a)
+{
+ int i;
+ for (i = 0; i < ARRAY_LEN (a->as_uword); i++)
+ if (a->as_uword[i] != 0)
+ return 0;
+ return 1;
+}
+
+/* Check for unspecified address ::0 */
+always_inline uword
+ip6_address_is_unspecified (ip6_address_t * a)
+{
+ return ip6_address_is_zero (a);
+}
+
+/* Check for loopback address ::1 */
+always_inline uword
+ip6_address_is_loopback (ip6_address_t * a)
+{
+ uword is_loopback;
+ u8 save = a->as_u8[15];
+ a->as_u8[15] = save ^ 1;
+ is_loopback = ip6_address_is_zero (a);
+ a->as_u8[15] = save;
+ return is_loopback;
+}
+
+/* Check for link local unicast fe80::/10. */
+always_inline uword
+ip6_address_is_link_local_unicast (ip6_address_t * a)
+{
+ return a->as_u8[0] == 0xfe && (a->as_u8[1] & 0xc0) == 0x80;
+}
+
+/* Check for unique local unicast fc00::/7. */
+always_inline uword
+ip6_address_is_local_unicast (ip6_address_t * a)
+{
+ return (a->as_u8[0] & 0xfe) == 0xfc;
+}
+
+/* Check for unique global unicast 2000::/3. */
+always_inline uword
+ip6_address_is_global_unicast (ip6_address_t * a)
+{
+ return (a->as_u8[0] & 0xe0) == 0x20;
+}
+
+/* Check for solicited node multicast 0xff02::1:ff00:0/104 */
+always_inline uword
+ip6_is_solicited_node_multicast_address (ip6_address_t * a)
+{
+ return (a->as_u32[0] == clib_host_to_net_u32 (0xff020000)
+ && a->as_u32[1] == 0
+ && a->as_u32[2] == clib_host_to_net_u32 (1)
+ && a->as_u8[12] == 0xff);
+}
+
+typedef struct
+{
+ /* 4 bit version, 8 bit traffic class and 20 bit flow label. */
+ u32 ip_version_traffic_class_and_flow_label;
+
+ /* Total packet length not including this header (but including
+ any extension headers if present). */
+ u16 payload_length;
+
+ /* Protocol for next header. */
+ u8 protocol;
+
+ /* Hop limit decremented by router at each hop. */
+ u8 hop_limit;
+
+ /* Source and destination address. */
+ ip6_address_t src_address, dst_address;
+} ip6_header_t;
+
+always_inline void *
+ip6_next_header (ip6_header_t * i)
+{
+ return (void *) (i + 1);
+}
+
+always_inline void
+ip6_copy_header (ip6_header_t * dst, const ip6_header_t * src)
+{
+ dst->ip_version_traffic_class_and_flow_label =
+ src->ip_version_traffic_class_and_flow_label;
+ dst->payload_length = src->payload_length;
+ dst->protocol = src->protocol;
+ dst->hop_limit = src->hop_limit;
+
+ dst->src_address.as_uword[0] = src->src_address.as_uword[0];
+ dst->src_address.as_uword[1] = src->src_address.as_uword[1];
+ dst->dst_address.as_uword[0] = src->dst_address.as_uword[0];
+ dst->dst_address.as_uword[1] = src->dst_address.as_uword[1];
+}
+
+always_inline void
+ip6_tcp_reply_x1 (ip6_header_t * ip0, tcp_header_t * tcp0)
+{
+ {
+ ip6_address_t src0, dst0;
+
+ src0 = ip0->src_address;
+ dst0 = ip0->dst_address;
+ ip0->src_address = dst0;
+ ip0->dst_address = src0;
+ }
+
+ {
+ u16 src0, dst0;
+
+ src0 = tcp0->ports.src;
+ dst0 = tcp0->ports.dst;
+ tcp0->ports.src = dst0;
+ tcp0->ports.dst = src0;
+ }
+}
+
+always_inline void
+ip6_tcp_reply_x2 (ip6_header_t * ip0, ip6_header_t * ip1,
+ tcp_header_t * tcp0, tcp_header_t * tcp1)
+{
+ {
+ ip6_address_t src0, dst0, src1, dst1;
+
+ src0 = ip0->src_address;
+ src1 = ip1->src_address;
+ dst0 = ip0->dst_address;
+ dst1 = ip1->dst_address;
+ ip0->src_address = dst0;
+ ip1->src_address = dst1;
+ ip0->dst_address = src0;
+ ip1->dst_address = src1;
+ }
+
+ {
+ u16 src0, dst0, src1, dst1;
+
+ src0 = tcp0->ports.src;
+ src1 = tcp1->ports.src;
+ dst0 = tcp0->ports.dst;
+ dst1 = tcp1->ports.dst;
+ tcp0->ports.src = dst0;
+ tcp1->ports.src = dst1;
+ tcp0->ports.dst = src0;
+ tcp1->ports.dst = src1;
+ }
+}
+
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+ u8 data;
+}) ip6_pad1_option_t;
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+ u8 type;
+ u8 len;
+ u8 data[0];
+}) ip6_padN_option_t;
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+#define IP6_MLDP_ALERT_TYPE 0x5
+ u8 type;
+ u8 len;
+ u16 value;
+}) ip6_router_alert_option_t;
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+ u8 next_hdr;
+ /* Length of this header plus option data in 8 byte units. */
+ u8 n_data_u64s;
+}) ip6_ext_header_t;
+
+always_inline u8 ip6_ext_hdr(u8 nexthdr)
+{
+ /*
+ * find out if nexthdr is an extension header or a protocol
+ */
+ return (nexthdr == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) ||
+ (nexthdr == IP_PROTOCOL_IP6_NONXT) ||
+ (nexthdr == IP_PROTOCOL_IPV6_FRAGMENTATION) ||
+ (nexthdr == IP_PROTOCOL_IPSEC_AH) ||
+ (nexthdr == IP_PROTOCOL_IPV6_ROUTE) ||
+ (nexthdr == IP_PROTOCOL_IP6_DESTINATION_OPTIONS);
+}
+
+#define ip6_ext_header_len(p) (((p)->n_data_u64s+1) << 3)
+#define ip6_ext_authhdr_len(p) (((p)->n_data_u64s+2) << 2)
+
+always_inline void *
+ip6_ext_next_header (ip6_ext_header_t *ext_hdr )
+{ return (void *)((u8 *) ext_hdr + ip6_ext_header_len(ext_hdr)); }
+
+typedef CLIB_PACKED (struct {
+ u8 next_hdr;
+ /* Length of this header plus option data in 8 byte units. */
+ u8 n_data_u64s;
+ u8 data[0];
+}) ip6_hop_by_hop_ext_t;
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+ u8 next_hdr;
+ u8 rsv;
+ u16 fragment_offset_and_more;
+ u32 identification;
+}) ip6_frag_hdr_t;
+/* *INDENT-ON* */
+
+#define ip6_frag_hdr_offset(hdr) \
+ (clib_net_to_host_u16((hdr)->fragment_offset_and_more) >> 3)
+
+#define ip6_frag_hdr_more(hdr) \
+ (clib_net_to_host_u16((hdr)->fragment_offset_and_more) & 0x1)
+
+#define ip6_frag_hdr_offset_and_more(offset, more) \
+ clib_host_to_net_u16(((offset) << 3) + !!(more))
+
+#endif /* included_ip6_packet_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip6_pg.c b/src/vnet/ip/ip6_pg.c
new file mode 100644
index 00000000000..ba1e4ad9a58
--- /dev/null
+++ b/src/vnet/ip/ip6_pg.c
@@ -0,0 +1,231 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip6_pg: IP v4 packet-generator interface
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/pg/pg.h>
+
+static void
+ip6_pg_edit_function (pg_main_t * pg,
+ pg_stream_t * s,
+ pg_edit_group_t * g, u32 * packets, u32 n_packets)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ u32 ip_header_offset = g->start_byte_offset;
+
+ while (n_packets >= 2)
+ {
+ u32 pi0, pi1;
+ vlib_buffer_t *p0, *p1;
+ ip6_header_t *ip0, *ip1;
+
+ pi0 = packets[0];
+ pi1 = packets[1];
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+ n_packets -= 2;
+ packets += 2;
+
+ ip0 = (void *) (p0->data + ip_header_offset);
+ ip1 = (void *) (p1->data + ip_header_offset);
+
+ ip0->payload_length =
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, p0) -
+ ip_header_offset - sizeof (ip0[0]));
+ ip1->payload_length =
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, p1) -
+ ip_header_offset - sizeof (ip1[0]));
+ }
+
+ while (n_packets >= 1)
+ {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ ip6_header_t *ip0;
+
+ pi0 = packets[0];
+ p0 = vlib_get_buffer (vm, pi0);
+ n_packets -= 1;
+ packets += 1;
+
+ ip0 = (void *) (p0->data + ip_header_offset);
+
+ ip0->payload_length =
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, p0) -
+ ip_header_offset - sizeof (ip0[0]));
+ }
+}
+
+typedef struct
+{
+ pg_edit_t ip_version;
+ pg_edit_t traffic_class;
+ pg_edit_t flow_label;
+ pg_edit_t payload_length;
+ pg_edit_t protocol;
+ pg_edit_t hop_limit;
+ pg_edit_t src_address, dst_address;
+} pg_ip6_header_t;
+
+static inline void
+pg_ip6_header_init (pg_ip6_header_t * p)
+{
+ /* Initialize fields that are not bit fields in the IP header. */
+#define _(f) pg_edit_init (&p->f, ip6_header_t, f);
+ _(payload_length);
+ _(hop_limit);
+ _(protocol);
+ _(src_address);
+ _(dst_address);
+#undef _
+
+ /* Initialize bit fields. */
+ pg_edit_init_bitfield (&p->ip_version, ip6_header_t,
+ ip_version_traffic_class_and_flow_label, 28, 4);
+ pg_edit_init_bitfield (&p->traffic_class, ip6_header_t,
+ ip_version_traffic_class_and_flow_label, 20, 8);
+ pg_edit_init_bitfield (&p->flow_label, ip6_header_t,
+ ip_version_traffic_class_and_flow_label, 0, 20);
+}
+
+uword
+unformat_pg_ip6_header (unformat_input_t * input, va_list * args)
+{
+ pg_stream_t *s = va_arg (*args, pg_stream_t *);
+ pg_ip6_header_t *p;
+ u32 group_index;
+
+ p = pg_create_edit_group (s, sizeof (p[0]), sizeof (ip6_header_t),
+ &group_index);
+ pg_ip6_header_init (p);
+
+ /* Defaults. */
+ pg_edit_set_fixed (&p->ip_version, 6);
+ pg_edit_set_fixed (&p->traffic_class, 0);
+ pg_edit_set_fixed (&p->flow_label, 0);
+ pg_edit_set_fixed (&p->hop_limit, 64);
+
+ p->payload_length.type = PG_EDIT_UNSPECIFIED;
+
+ if (!unformat (input, "%U: %U -> %U",
+ unformat_pg_edit,
+ unformat_ip_protocol, &p->protocol,
+ unformat_pg_edit,
+ unformat_ip6_address, &p->src_address,
+ unformat_pg_edit, unformat_ip6_address, &p->dst_address))
+ goto error;
+
+ /* Parse options. */
+ while (1)
+ {
+ if (unformat (input, "version %U",
+ unformat_pg_edit, unformat_pg_number, &p->ip_version))
+ ;
+
+ else if (unformat (input, "traffic-class %U",
+ unformat_pg_edit,
+ unformat_pg_number, &p->traffic_class))
+ ;
+
+ else if (unformat (input, "length %U",
+ unformat_pg_edit,
+ unformat_pg_number, &p->payload_length))
+ ;
+
+ else if (unformat (input, "hop-limit %U",
+ unformat_pg_edit, unformat_pg_number, &p->hop_limit))
+ ;
+
+ /* Can't parse input: try next protocol level. */
+ else
+ break;
+ }
+
+ {
+ ip_main_t *im = &ip_main;
+ ip_protocol_t protocol;
+ ip_protocol_info_t *pi;
+
+ pi = 0;
+ if (p->protocol.type == PG_EDIT_FIXED)
+ {
+ protocol = pg_edit_get_value (&p->protocol, PG_EDIT_LO);
+ pi = ip_get_protocol_info (im, protocol);
+ }
+
+ if (pi && pi->unformat_pg_edit
+ && unformat_user (input, pi->unformat_pg_edit, s))
+ ;
+
+ else if (!unformat_user (input, unformat_pg_payload, s))
+ goto error;
+
+ if (p->payload_length.type == PG_EDIT_UNSPECIFIED
+ && s->min_packet_bytes == s->max_packet_bytes
+ && group_index + 1 < vec_len (s->edit_groups))
+ {
+ pg_edit_set_fixed (&p->payload_length,
+ pg_edit_group_n_bytes (s,
+ group_index) -
+ sizeof (ip6_header_t));
+ }
+
+ p = pg_get_edit_group (s, group_index);
+ if (p->payload_length.type == PG_EDIT_UNSPECIFIED)
+ {
+ pg_edit_group_t *g = pg_stream_get_group (s, group_index);
+ g->edit_function = ip6_pg_edit_function;
+ }
+
+ return 1;
+ }
+
+error:
+ /* Free up any edits we may have added. */
+ pg_free_edit_group (s);
+ return 0;
+}
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip_api.c b/src/vnet/ip/ip_api.c
new file mode 100644
index 00000000000..cd9b7397d29
--- /dev/null
+++ b/src/vnet/ip/ip_api.c
@@ -0,0 +1,1196 @@
+/*
+ *------------------------------------------------------------------
+ * ip_api.c - vnet ip api
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ip/ip6_neighbor.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/fib_api.h>
+#include <vnet/dpo/drop_dpo.h>
+#include <vnet/dpo/receive_dpo.h>
+#include <vnet/dpo/lookup_dpo.h>
+#include <vnet/dpo/classify_dpo.h>
+#include <vnet/dpo/ip_null_dpo.h>
+#include <vnet/ethernet/arp_packet.h>
+
+#include <vnet/vnet_msg_enum.h>
+
+#define vl_typedefs /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <vnet/vnet_all_api_h.h>
+#undef vl_printfun
+
+#include <vlibapi/api_helper_macros.h>
+
+#define foreach_ip_api_msg \
+_(IP_FIB_DUMP, ip_fib_dump) \
+_(IP_FIB_DETAILS, ip_fib_details) \
+_(IP6_FIB_DUMP, ip6_fib_dump) \
+_(IP6_FIB_DETAILS, ip6_fib_details) \
+_(IP_NEIGHBOR_DUMP, ip_neighbor_dump) \
+_(IP_NEIGHBOR_DETAILS, ip_neighbor_details) \
+_(IP_ADDRESS_DUMP, ip_address_dump) \
+_(IP_DUMP, ip_dump) \
+_(IP_NEIGHBOR_ADD_DEL, ip_neighbor_add_del) \
+_(IP_ADD_DEL_ROUTE, ip_add_del_route) \
+_(SET_IP_FLOW_HASH,set_ip_flow_hash) \
+_(SW_INTERFACE_IP6ND_RA_CONFIG, sw_interface_ip6nd_ra_config) \
+_(SW_INTERFACE_IP6ND_RA_PREFIX, sw_interface_ip6nd_ra_prefix) \
+_(SW_INTERFACE_IP6_ENABLE_DISABLE, sw_interface_ip6_enable_disable ) \
+_(SW_INTERFACE_IP6_SET_LINK_LOCAL_ADDRESS, \
+ sw_interface_ip6_set_link_local_address)
+
+extern void stats_dslock_with_hint (int hint, int tag);
+extern void stats_dsunlock (void);
+
+static void
+send_ip_neighbor_details (u8 is_ipv6,
+ u8 is_static,
+ u8 * mac_address,
+ u8 * ip_address,
+ unix_shared_memory_queue_t * q, u32 context)
+{
+ vl_api_ip_neighbor_details_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_IP_NEIGHBOR_DETAILS);
+ mp->context = context;
+ mp->is_ipv6 = is_ipv6;
+ mp->is_static = is_static;
+ memcpy (mp->mac_address, mac_address, 6);
+ memcpy (mp->ip_address, ip_address, (is_ipv6) ? 16 : 4);
+
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+static void
+vl_api_ip_neighbor_details_t_handler (vl_api_ip_neighbor_details_t * mp)
+{
+ clib_warning ("BUG");
+}
+
+static void
+vl_api_ip_neighbor_dump_t_handler (vl_api_ip_neighbor_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+
+ if (mp->is_ipv6)
+ {
+ ip6_neighbor_t *n, *ns;
+
+ ns = ip6_neighbors_entries (sw_if_index);
+ /* *INDENT-OFF* */
+ vec_foreach (n, ns)
+ {
+ send_ip_neighbor_details
+ (mp->is_ipv6, ((n->flags & IP6_NEIGHBOR_FLAG_STATIC) ? 1 : 0),
+ (u8 *) n->link_layer_address,
+ (u8 *) & (n->key.ip6_address.as_u8),
+ q, mp->context);
+ }
+ /* *INDENT-ON* */
+ vec_free (ns);
+ }
+ else
+ {
+ ethernet_arp_ip4_entry_t *n, *ns;
+
+ ns = ip4_neighbor_entries (sw_if_index);
+ /* *INDENT-OFF* */
+ vec_foreach (n, ns)
+ {
+ send_ip_neighbor_details (mp->is_ipv6,
+ ((n->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC) ? 1 : 0),
+ (u8*) n->ethernet_address,
+ (u8*) & (n->ip4_address.as_u8),
+ q, mp->context);
+ }
+ /* *INDENT-ON* */
+ vec_free (ns);
+ }
+}
+
+
+void
+copy_fib_next_hop (fib_route_path_encode_t * api_rpath, void *fp_arg)
+{
+ int is_ip4;
+ vl_api_fib_path_t *fp = (vl_api_fib_path_t *) fp_arg;
+
+ if (api_rpath->rpath.frp_proto == FIB_PROTOCOL_IP4)
+ fp->afi = IP46_TYPE_IP4;
+ else if (api_rpath->rpath.frp_proto == FIB_PROTOCOL_IP6)
+ fp->afi = IP46_TYPE_IP6;
+ else
+ {
+ is_ip4 = ip46_address_is_ip4 (&api_rpath->rpath.frp_addr);
+ if (is_ip4)
+ fp->afi = IP46_TYPE_IP4;
+ else
+ fp->afi = IP46_TYPE_IP6;
+ }
+ if (fp->afi == IP46_TYPE_IP4)
+ memcpy (fp->next_hop, &api_rpath->rpath.frp_addr.ip4,
+ sizeof (api_rpath->rpath.frp_addr.ip4));
+ else
+ memcpy (fp->next_hop, &api_rpath->rpath.frp_addr.ip6,
+ sizeof (api_rpath->rpath.frp_addr.ip6));
+}
+
+static void
+vl_api_ip_fib_details_t_handler (vl_api_ip_fib_details_t * mp)
+{
+ clib_warning ("BUG");
+}
+
+static void
+vl_api_ip_fib_details_t_endian (vl_api_ip_fib_details_t * mp)
+{
+ clib_warning ("BUG");
+}
+
+static void
+vl_api_ip_fib_details_t_print (vl_api_ip_fib_details_t * mp)
+{
+ clib_warning ("BUG");
+}
+
+static void
+send_ip_fib_details (vpe_api_main_t * am,
+ unix_shared_memory_queue_t * q,
+ u32 table_id, fib_prefix_t * pfx,
+ fib_route_path_encode_t * api_rpaths, u32 context)
+{
+ vl_api_ip_fib_details_t *mp;
+ fib_route_path_encode_t *api_rpath;
+ vl_api_fib_path_t *fp;
+ int path_count;
+
+ path_count = vec_len (api_rpaths);
+ mp = vl_msg_api_alloc (sizeof (*mp) + path_count * sizeof (*fp));
+ if (!mp)
+ return;
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_IP_FIB_DETAILS);
+ mp->context = context;
+
+ mp->table_id = htonl (table_id);
+ mp->address_length = pfx->fp_len;
+ memcpy (mp->address, &pfx->fp_addr.ip4, sizeof (pfx->fp_addr.ip4));
+
+ mp->count = htonl (path_count);
+ fp = mp->path;
+ vec_foreach (api_rpath, api_rpaths)
+ {
+ memset (fp, 0, sizeof (*fp));
+ switch (api_rpath->dpo.dpoi_type)
+ {
+ case DPO_RECEIVE:
+ fp->is_local = true;
+ break;
+ case DPO_DROP:
+ fp->is_drop = true;
+ break;
+ case DPO_IP_NULL:
+ switch (api_rpath->dpo.dpoi_index)
+ {
+ case IP_NULL_ACTION_NONE:
+ fp->is_drop = true;
+ break;
+ case IP_NULL_ACTION_SEND_ICMP_UNREACH:
+ fp->is_unreach = true;
+ break;
+ case IP_NULL_ACTION_SEND_ICMP_PROHIBIT:
+ fp->is_prohibit = true;
+ break;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+ fp->weight = htonl (api_rpath->rpath.frp_weight);
+ fp->sw_if_index = htonl (api_rpath->rpath.frp_sw_if_index);
+ copy_fib_next_hop (api_rpath, fp);
+ fp++;
+ }
+
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+static void
+vl_api_ip_fib_dump_t_handler (vl_api_ip_fib_dump_t * mp)
+{
+ vpe_api_main_t *am = &vpe_api_main;
+ unix_shared_memory_queue_t *q;
+ ip4_main_t *im = &ip4_main;
+ fib_table_t *fib_table;
+ fib_node_index_t lfei, *lfeip, *lfeis = NULL;
+ mpls_label_t key;
+ fib_prefix_t pfx;
+ u32 fib_index;
+ fib_route_path_encode_t *api_rpaths;
+ int i;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ /* *INDENT-OFF* */
+ pool_foreach (fib_table, im->fibs,
+ ({
+ for (i = 0; i < ARRAY_LEN (fib_table->v4.fib_entry_by_dst_address); i++)
+ {
+ hash_foreach(key, lfei, fib_table->v4.fib_entry_by_dst_address[i],
+ ({
+ vec_add1(lfeis, lfei);
+ }));
+ }
+ }));
+ /* *INDENT-ON* */
+
+ vec_sort_with_function (lfeis, fib_entry_cmp_for_sort);
+
+ vec_foreach (lfeip, lfeis)
+ {
+ fib_entry_get_prefix (*lfeip, &pfx);
+ fib_index = fib_entry_get_fib_index (*lfeip);
+ fib_table = fib_table_get (fib_index, pfx.fp_proto);
+ api_rpaths = NULL;
+ fib_entry_encode (*lfeip, &api_rpaths);
+ send_ip_fib_details (am, q,
+ fib_table->ft_table_id, &pfx, api_rpaths,
+ mp->context);
+ vec_free (api_rpaths);
+ }
+
+ vec_free (lfeis);
+}
+
+static void
+vl_api_ip6_fib_details_t_handler (vl_api_ip6_fib_details_t * mp)
+{
+ clib_warning ("BUG");
+}
+
+static void
+vl_api_ip6_fib_details_t_endian (vl_api_ip6_fib_details_t * mp)
+{
+ clib_warning ("BUG");
+}
+
+static void
+vl_api_ip6_fib_details_t_print (vl_api_ip6_fib_details_t * mp)
+{
+ clib_warning ("BUG");
+}
+
+static void
+send_ip6_fib_details (vpe_api_main_t * am,
+ unix_shared_memory_queue_t * q,
+ u32 table_id, fib_prefix_t * pfx,
+ fib_route_path_encode_t * api_rpaths, u32 context)
+{
+ vl_api_ip6_fib_details_t *mp;
+ fib_route_path_encode_t *api_rpath;
+ vl_api_fib_path_t *fp;
+ int path_count;
+
+ path_count = vec_len (api_rpaths);
+ mp = vl_msg_api_alloc (sizeof (*mp) + path_count * sizeof (*fp));
+ if (!mp)
+ return;
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_IP6_FIB_DETAILS);
+ mp->context = context;
+
+ mp->table_id = htonl (table_id);
+ mp->address_length = pfx->fp_len;
+ memcpy (mp->address, &pfx->fp_addr.ip6, sizeof (pfx->fp_addr.ip6));
+
+ mp->count = htonl (path_count);
+ fp = mp->path;
+ vec_foreach (api_rpath, api_rpaths)
+ {
+ memset (fp, 0, sizeof (*fp));
+ switch (api_rpath->dpo.dpoi_type)
+ {
+ case DPO_RECEIVE:
+ fp->is_local = true;
+ break;
+ case DPO_DROP:
+ fp->is_drop = true;
+ break;
+ case DPO_IP_NULL:
+ switch (api_rpath->dpo.dpoi_index)
+ {
+ case IP_NULL_DPO_ACTION_NUM + IP_NULL_ACTION_NONE:
+ fp->is_drop = true;
+ break;
+ case IP_NULL_DPO_ACTION_NUM + IP_NULL_ACTION_SEND_ICMP_UNREACH:
+ fp->is_unreach = true;
+ break;
+ case IP_NULL_DPO_ACTION_NUM + IP_NULL_ACTION_SEND_ICMP_PROHIBIT:
+ fp->is_prohibit = true;
+ break;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+ fp->weight = htonl (api_rpath->rpath.frp_weight);
+ fp->sw_if_index = htonl (api_rpath->rpath.frp_sw_if_index);
+ copy_fib_next_hop (api_rpath, fp);
+ fp++;
+ }
+
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+typedef struct apt_ip6_fib_show_ctx_t_
+{
+ u32 fib_index;
+ fib_node_index_t *entries;
+} api_ip6_fib_show_ctx_t;
+
+static void
+api_ip6_fib_table_put_entries (clib_bihash_kv_24_8_t * kvp, void *arg)
+{
+ api_ip6_fib_show_ctx_t *ctx = arg;
+
+ if ((kvp->key[2] >> 32) == ctx->fib_index)
+ {
+ vec_add1 (ctx->entries, kvp->value);
+ }
+}
+
+static void
+api_ip6_fib_table_get_all (unix_shared_memory_queue_t * q,
+ vl_api_ip6_fib_dump_t * mp,
+ fib_table_t * fib_table)
+{
+ vpe_api_main_t *am = &vpe_api_main;
+ ip6_main_t *im6 = &ip6_main;
+ ip6_fib_t *fib = &fib_table->v6;
+ fib_node_index_t *fib_entry_index;
+ api_ip6_fib_show_ctx_t ctx = {
+ .fib_index = fib->index,.entries = NULL,
+ };
+ fib_route_path_encode_t *api_rpaths;
+ fib_prefix_t pfx;
+
+ BV (clib_bihash_foreach_key_value_pair)
+ ((BVT (clib_bihash) *) & im6->ip6_table[IP6_FIB_TABLE_NON_FWDING].
+ ip6_hash, api_ip6_fib_table_put_entries, &ctx);
+
+ vec_sort_with_function (ctx.entries, fib_entry_cmp_for_sort);
+
+ vec_foreach (fib_entry_index, ctx.entries)
+ {
+ fib_entry_get_prefix (*fib_entry_index, &pfx);
+ api_rpaths = NULL;
+ fib_entry_encode (*fib_entry_index, &api_rpaths);
+ send_ip6_fib_details (am, q,
+ fib_table->ft_table_id,
+ &pfx, api_rpaths, mp->context);
+ vec_free (api_rpaths);
+ }
+
+ vec_free (ctx.entries);
+}
+
+static void
+vl_api_ip6_fib_dump_t_handler (vl_api_ip6_fib_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+ ip6_main_t *im6 = &ip6_main;
+ fib_table_t *fib_table;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ /* *INDENT-OFF* */
+ pool_foreach (fib_table, im6->fibs,
+ ({
+ api_ip6_fib_table_get_all(q, mp, fib_table);
+ }));
+ /* *INDENT-ON* */
+}
+
+static void
+vl_api_ip_neighbor_add_del_t_handler (vl_api_ip_neighbor_add_del_t * mp,
+ vlib_main_t * vm)
+{
+ vl_api_ip_neighbor_add_del_reply_t *rmp;
+ vnet_main_t *vnm = vnet_get_main ();
+ int rv = 0;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ stats_dslock_with_hint (1 /* release hint */ , 7 /* tag */ );
+
+ /*
+ * there's no validation here of the ND/ARP entry being added.
+ * The expectation is that the FIB will ensure that nothing bad
+ * will come of adding bogus entries.
+ */
+ if (mp->is_ipv6)
+ {
+ if (mp->is_add)
+ rv = vnet_set_ip6_ethernet_neighbor
+ (vm, ntohl (mp->sw_if_index),
+ (ip6_address_t *) (mp->dst_address),
+ mp->mac_address, sizeof (mp->mac_address), mp->is_static);
+ else
+ rv = vnet_unset_ip6_ethernet_neighbor
+ (vm, ntohl (mp->sw_if_index),
+ (ip6_address_t *) (mp->dst_address),
+ mp->mac_address, sizeof (mp->mac_address));
+ }
+ else
+ {
+ ethernet_arp_ip4_over_ethernet_address_t a;
+
+ clib_memcpy (&a.ethernet, mp->mac_address, 6);
+ clib_memcpy (&a.ip4, mp->dst_address, 4);
+
+ if (mp->is_add)
+ rv = vnet_arp_set_ip4_over_ethernet (vnm, ntohl (mp->sw_if_index),
+ &a, mp->is_static);
+ else
+ rv =
+ vnet_arp_unset_ip4_over_ethernet (vnm, ntohl (mp->sw_if_index), &a);
+ }
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ stats_dsunlock ();
+ REPLY_MACRO (VL_API_IP_NEIGHBOR_ADD_DEL_REPLY);
+}
+
+int
+add_del_route_t_handler (u8 is_multipath,
+ u8 is_add,
+ u8 is_drop,
+ u8 is_unreach,
+ u8 is_prohibit,
+ u8 is_local,
+ u8 is_classify,
+ u32 classify_table_index,
+ u8 is_resolve_host,
+ u8 is_resolve_attached,
+ u32 fib_index,
+ const fib_prefix_t * prefix,
+ u8 next_hop_proto_is_ip4,
+ const ip46_address_t * next_hop,
+ u32 next_hop_sw_if_index,
+ u8 next_hop_fib_index,
+ u32 next_hop_weight,
+ mpls_label_t next_hop_via_label,
+ mpls_label_t * next_hop_out_label_stack)
+{
+ vnet_classify_main_t *cm = &vnet_classify_main;
+ fib_route_path_flags_t path_flags = FIB_ROUTE_PATH_FLAG_NONE;
+ fib_route_path_t path = {
+ .frp_proto = (next_hop_proto_is_ip4 ?
+ FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6),
+ .frp_addr = (NULL == next_hop ? zero_addr : *next_hop),
+ .frp_sw_if_index = next_hop_sw_if_index,
+ .frp_fib_index = next_hop_fib_index,
+ .frp_weight = next_hop_weight,
+ .frp_label_stack = next_hop_out_label_stack,
+ };
+ fib_route_path_t *paths = NULL;
+
+ if (MPLS_LABEL_INVALID != next_hop_via_label)
+ {
+ path.frp_proto = FIB_PROTOCOL_MPLS;
+ path.frp_local_label = next_hop_via_label;
+ }
+ if (is_resolve_host)
+ path_flags |= FIB_ROUTE_PATH_RESOLVE_VIA_HOST;
+ if (is_resolve_attached)
+ path_flags |= FIB_ROUTE_PATH_RESOLVE_VIA_ATTACHED;
+
+ path.frp_flags = path_flags;
+
+ if (is_multipath)
+ {
+ stats_dslock_with_hint (1 /* release hint */ , 10 /* tag */ );
+
+
+ vec_add1 (paths, path);
+
+ if (is_add)
+ fib_table_entry_path_add2 (fib_index,
+ prefix,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE, paths);
+ else
+ fib_table_entry_path_remove2 (fib_index,
+ prefix, FIB_SOURCE_API, paths);
+
+ vec_free (paths);
+ stats_dsunlock ();
+ return 0;
+ }
+
+ stats_dslock_with_hint (1 /* release hint */ , 2 /* tag */ );
+
+ if (is_drop || is_local || is_classify || is_unreach || is_prohibit)
+ {
+ /*
+ * special route types that link directly to the adj
+ */
+ if (is_add)
+ {
+ dpo_id_t dpo = DPO_INVALID;
+ dpo_proto_t dproto;
+
+ dproto = fib_proto_to_dpo (prefix->fp_proto);
+
+ if (is_drop)
+ ip_null_dpo_add_and_lock (dproto, IP_NULL_ACTION_NONE, &dpo);
+ else if (is_local)
+ receive_dpo_add_or_lock (dproto, ~0, NULL, &dpo);
+ else if (is_unreach)
+ ip_null_dpo_add_and_lock (dproto,
+ IP_NULL_ACTION_SEND_ICMP_UNREACH, &dpo);
+ else if (is_prohibit)
+ ip_null_dpo_add_and_lock (dproto,
+ IP_NULL_ACTION_SEND_ICMP_PROHIBIT,
+ &dpo);
+ else if (is_classify)
+ {
+ if (pool_is_free_index (cm->tables,
+ ntohl (classify_table_index)))
+ {
+ stats_dsunlock ();
+ return VNET_API_ERROR_NO_SUCH_TABLE;
+ }
+
+ dpo_set (&dpo, DPO_CLASSIFY, dproto,
+ classify_dpo_create (dproto,
+ ntohl (classify_table_index)));
+ }
+ else
+ {
+ stats_dsunlock ();
+ return VNET_API_ERROR_NO_SUCH_TABLE;
+ }
+
+ fib_table_entry_special_dpo_update (fib_index,
+ prefix,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_EXCLUSIVE, &dpo);
+ dpo_reset (&dpo);
+ }
+ else
+ {
+ fib_table_entry_special_remove (fib_index, prefix, FIB_SOURCE_API);
+ }
+ }
+ else
+ {
+ if (is_add)
+ {
+ vec_add1 (paths, path);
+ fib_table_entry_update (fib_index,
+ prefix,
+ FIB_SOURCE_API, FIB_ENTRY_FLAG_NONE, paths);
+ vec_free (paths);
+ }
+ else
+ {
+ fib_table_entry_delete (fib_index, prefix, FIB_SOURCE_API);
+ }
+ }
+
+ stats_dsunlock ();
+ return (0);
+}
+
+int
+add_del_route_check (fib_protocol_t table_proto,
+ u32 table_id,
+ u32 next_hop_sw_if_index,
+ fib_protocol_t next_hop_table_proto,
+ u32 next_hop_table_id,
+ u8 create_missing_tables,
+ u32 * fib_index, u32 * next_hop_fib_index)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+
+ *fib_index = fib_table_find (table_proto, ntohl (table_id));
+ if (~0 == *fib_index)
+ {
+ if (create_missing_tables)
+ {
+ *fib_index = fib_table_find_or_create_and_lock (table_proto,
+ ntohl (table_id));
+ }
+ else
+ {
+ /* No such VRF, and we weren't asked to create one */
+ return VNET_API_ERROR_NO_SUCH_FIB;
+ }
+ }
+
+ if (~0 != ntohl (next_hop_sw_if_index))
+ {
+ if (pool_is_free_index (vnm->interface_main.sw_interfaces,
+ ntohl (next_hop_sw_if_index)))
+ {
+ return VNET_API_ERROR_NO_MATCHING_INTERFACE;
+ }
+ }
+ else
+ {
+ *next_hop_fib_index = fib_table_find (next_hop_table_proto,
+ ntohl (next_hop_table_id));
+
+ if (~0 == *next_hop_fib_index)
+ {
+ if (create_missing_tables)
+ {
+ *next_hop_fib_index =
+ fib_table_find_or_create_and_lock (next_hop_table_proto,
+ ntohl (next_hop_table_id));
+ }
+ else
+ {
+ /* No such VRF, and we weren't asked to create one */
+ return VNET_API_ERROR_NO_SUCH_FIB;
+ }
+ }
+ }
+
+ return (0);
+}
+
+static int
+ip4_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp)
+{
+ u32 fib_index, next_hop_fib_index;
+ mpls_label_t *label_stack = NULL;
+ int rv, ii, n_labels;;
+
+ rv = add_del_route_check (FIB_PROTOCOL_IP4,
+ mp->table_id,
+ mp->next_hop_sw_if_index,
+ FIB_PROTOCOL_IP4,
+ mp->next_hop_table_id,
+ mp->create_vrf_if_needed,
+ &fib_index, &next_hop_fib_index);
+
+ if (0 != rv)
+ return (rv);
+
+ fib_prefix_t pfx = {
+ .fp_len = mp->dst_address_length,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ };
+ clib_memcpy (&pfx.fp_addr.ip4, mp->dst_address, sizeof (pfx.fp_addr.ip4));
+
+ ip46_address_t nh;
+ memset (&nh, 0, sizeof (nh));
+ memcpy (&nh.ip4, mp->next_hop_address, sizeof (nh.ip4));
+
+ n_labels = mp->next_hop_n_out_labels;
+ if (n_labels == 0)
+ ;
+ else if (1 == n_labels)
+ vec_add1 (label_stack, ntohl (mp->next_hop_out_label_stack[0]));
+ else
+ {
+ vec_validate (label_stack, n_labels - 1);
+ for (ii = 0; ii < n_labels; ii++)
+ label_stack[ii] = ntohl (mp->next_hop_out_label_stack[ii]);
+ }
+
+ return (add_del_route_t_handler (mp->is_multipath,
+ mp->is_add,
+ mp->is_drop,
+ mp->is_unreach,
+ mp->is_prohibit,
+ mp->is_local,
+ mp->is_classify,
+ mp->classify_table_index,
+ mp->is_resolve_host,
+ mp->is_resolve_attached,
+ fib_index, &pfx, 1,
+ &nh,
+ ntohl (mp->next_hop_sw_if_index),
+ next_hop_fib_index,
+ mp->next_hop_weight,
+ ntohl (mp->next_hop_via_label),
+ label_stack));
+}
+
+static int
+ip6_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp)
+{
+ u32 fib_index, next_hop_fib_index;
+ mpls_label_t *label_stack = NULL;
+ int rv, ii, n_labels;;
+
+ rv = add_del_route_check (FIB_PROTOCOL_IP6,
+ mp->table_id,
+ mp->next_hop_sw_if_index,
+ FIB_PROTOCOL_IP6,
+ mp->next_hop_table_id,
+ mp->create_vrf_if_needed,
+ &fib_index, &next_hop_fib_index);
+
+ if (0 != rv)
+ return (rv);
+
+ fib_prefix_t pfx = {
+ .fp_len = mp->dst_address_length,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ };
+ clib_memcpy (&pfx.fp_addr.ip6, mp->dst_address, sizeof (pfx.fp_addr.ip6));
+
+ ip46_address_t nh;
+ memset (&nh, 0, sizeof (nh));
+ memcpy (&nh.ip6, mp->next_hop_address, sizeof (nh.ip6));
+
+ n_labels = mp->next_hop_n_out_labels;
+ if (n_labels == 0)
+ ;
+ else if (1 == n_labels)
+ vec_add1 (label_stack, ntohl (mp->next_hop_out_label_stack[0]));
+ else
+ {
+ vec_validate (label_stack, n_labels - 1);
+ for (ii = 0; ii < n_labels; ii++)
+ label_stack[ii] = ntohl (mp->next_hop_out_label_stack[ii]);
+ }
+
+ return (add_del_route_t_handler (mp->is_multipath,
+ mp->is_add,
+ mp->is_drop,
+ mp->is_unreach,
+ mp->is_prohibit,
+ mp->is_local,
+ mp->is_classify,
+ mp->classify_table_index,
+ mp->is_resolve_host,
+ mp->is_resolve_attached,
+ fib_index, &pfx, 0,
+ &nh, ntohl (mp->next_hop_sw_if_index),
+ next_hop_fib_index,
+ mp->next_hop_weight,
+ ntohl (mp->next_hop_via_label),
+ label_stack));
+}
+
+void
+vl_api_ip_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp)
+{
+ vl_api_ip_add_del_route_reply_t *rmp;
+ int rv;
+ vnet_main_t *vnm = vnet_get_main ();
+
+ vnm->api_errno = 0;
+
+ if (mp->is_ipv6)
+ rv = ip6_add_del_route_t_handler (mp);
+ else
+ rv = ip4_add_del_route_t_handler (mp);
+
+ rv = (rv == 0) ? vnm->api_errno : rv;
+
+ REPLY_MACRO (VL_API_IP_ADD_DEL_ROUTE_REPLY);
+}
+
+static void
+send_ip_details (vpe_api_main_t * am,
+ unix_shared_memory_queue_t * q, u32 sw_if_index, u32 context)
+{
+ vl_api_ip_details_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_IP_DETAILS);
+
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->context = context;
+
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+static void
+send_ip_address_details (vpe_api_main_t * am,
+ unix_shared_memory_queue_t * q,
+ u8 * ip, u16 prefix_length, u8 is_ipv6, u32 context)
+{
+ vl_api_ip_address_details_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_IP_ADDRESS_DETAILS);
+
+ if (is_ipv6)
+ {
+ clib_memcpy (&mp->ip, ip, sizeof (mp->ip));
+ }
+ else
+ {
+ u32 *tp = (u32 *) mp->ip;
+ *tp = *(u32 *) ip;
+ }
+ mp->prefix_length = prefix_length;
+ mp->context = context;
+
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+static void
+vl_api_ip_address_dump_t_handler (vl_api_ip_address_dump_t * mp)
+{
+ vpe_api_main_t *am = &vpe_api_main;
+ unix_shared_memory_queue_t *q;
+ ip6_address_t *r6;
+ ip4_address_t *r4;
+ ip6_main_t *im6 = &ip6_main;
+ ip4_main_t *im4 = &ip4_main;
+ ip_lookup_main_t *lm6 = &im6->lookup_main;
+ ip_lookup_main_t *lm4 = &im4->lookup_main;
+ ip_interface_address_t *ia = 0;
+ u32 sw_if_index = ~0;
+ int rv __attribute__ ((unused)) = 0;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ sw_if_index = ntohl (mp->sw_if_index);
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ if (mp->is_ipv6)
+ {
+ /* *INDENT-OFF* */
+ foreach_ip_interface_address (lm6, ia, sw_if_index,
+ 1 /* honor unnumbered */,
+ ({
+ r6 = ip_interface_address_get_address (lm6, ia);
+ u16 prefix_length = ia->address_length;
+ send_ip_address_details(am, q, (u8*)r6, prefix_length, 1, mp->context);
+ }));
+ /* *INDENT-ON* */
+ }
+ else
+ {
+ /* *INDENT-OFF* */
+ foreach_ip_interface_address (lm4, ia, sw_if_index,
+ 1 /* honor unnumbered */,
+ ({
+ r4 = ip_interface_address_get_address (lm4, ia);
+ u16 prefix_length = ia->address_length;
+ send_ip_address_details(am, q, (u8*)r4, prefix_length, 0, mp->context);
+ }));
+ /* *INDENT-ON* */
+ }
+ BAD_SW_IF_INDEX_LABEL;
+}
+
+static void
+vl_api_ip_dump_t_handler (vl_api_ip_dump_t * mp)
+{
+ vpe_api_main_t *am = &vpe_api_main;
+ vnet_main_t *vnm = vnet_get_main ();
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_interface_main_t *im = &vnm->interface_main;
+ unix_shared_memory_queue_t *q;
+ vnet_sw_interface_t *si, *sorted_sis;
+ u32 sw_if_index = ~0;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ {
+ return;
+ }
+
+ /* Gather interfaces. */
+ sorted_sis = vec_new (vnet_sw_interface_t, pool_elts (im->sw_interfaces));
+ _vec_len (sorted_sis) = 0;
+ /* *INDENT-OFF* */
+ pool_foreach (si, im->sw_interfaces,
+ ({
+ vec_add1 (sorted_sis, si[0]);
+ }));
+ /* *INDENT-ON* */
+
+ vec_foreach (si, sorted_sis)
+ {
+ if (!(si->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED))
+ {
+ if (mp->is_ipv6 && !ip6_interface_enabled (vm, si->sw_if_index))
+ {
+ continue;
+ }
+ sw_if_index = si->sw_if_index;
+ send_ip_details (am, q, sw_if_index, mp->context);
+ }
+ }
+}
+
+static void
+set_ip6_flow_hash (vl_api_set_ip_flow_hash_t * mp)
+{
+ vl_api_set_ip_flow_hash_reply_t *rmp;
+ int rv = VNET_API_ERROR_UNIMPLEMENTED;
+
+ clib_warning ("unimplemented...");
+
+ REPLY_MACRO (VL_API_SET_IP_FLOW_HASH_REPLY);
+}
+
+static void
+set_ip4_flow_hash (vl_api_set_ip_flow_hash_t * mp)
+{
+ vl_api_set_ip_flow_hash_reply_t *rmp;
+ int rv;
+ u32 table_id;
+ flow_hash_config_t flow_hash_config = 0;
+
+ table_id = ntohl (mp->vrf_id);
+
+#define _(a,b) if (mp->a) flow_hash_config |= b;
+ foreach_flow_hash_bit;
+#undef _
+
+ rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
+
+ REPLY_MACRO (VL_API_SET_IP_FLOW_HASH_REPLY);
+}
+
+
+static void
+vl_api_set_ip_flow_hash_t_handler (vl_api_set_ip_flow_hash_t * mp)
+{
+ if (mp->is_ipv6 == 0)
+ set_ip4_flow_hash (mp);
+ else
+ set_ip6_flow_hash (mp);
+}
+
+static void
+ vl_api_sw_interface_ip6nd_ra_config_t_handler
+ (vl_api_sw_interface_ip6nd_ra_config_t * mp)
+{
+ vl_api_sw_interface_ip6nd_ra_config_reply_t *rmp;
+ vlib_main_t *vm = vlib_get_main ();
+ int rv = 0;
+ u8 is_no, suppress, managed, other, ll_option, send_unicast, cease,
+ default_router;
+
+ is_no = mp->is_no == 1;
+ suppress = mp->suppress == 1;
+ managed = mp->managed == 1;
+ other = mp->other == 1;
+ ll_option = mp->ll_option == 1;
+ send_unicast = mp->send_unicast == 1;
+ cease = mp->cease == 1;
+ default_router = mp->default_router == 1;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ rv = ip6_neighbor_ra_config (vm, ntohl (mp->sw_if_index),
+ suppress, managed, other,
+ ll_option, send_unicast, cease,
+ default_router, ntohl (mp->lifetime),
+ ntohl (mp->initial_count),
+ ntohl (mp->initial_interval),
+ ntohl (mp->max_interval),
+ ntohl (mp->min_interval), is_no);
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_SW_INTERFACE_IP6ND_RA_CONFIG_REPLY);
+}
+
+static void
+ vl_api_sw_interface_ip6nd_ra_prefix_t_handler
+ (vl_api_sw_interface_ip6nd_ra_prefix_t * mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_sw_interface_ip6nd_ra_prefix_reply_t *rmp;
+ int rv = 0;
+ u8 is_no, use_default, no_advertise, off_link, no_autoconfig, no_onlink;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ is_no = mp->is_no == 1;
+ use_default = mp->use_default == 1;
+ no_advertise = mp->no_advertise == 1;
+ off_link = mp->off_link == 1;
+ no_autoconfig = mp->no_autoconfig == 1;
+ no_onlink = mp->no_onlink == 1;
+
+ rv = ip6_neighbor_ra_prefix (vm, ntohl (mp->sw_if_index),
+ (ip6_address_t *) mp->address,
+ mp->address_length, use_default,
+ ntohl (mp->val_lifetime),
+ ntohl (mp->pref_lifetime), no_advertise,
+ off_link, no_autoconfig, no_onlink, is_no);
+
+ BAD_SW_IF_INDEX_LABEL;
+ REPLY_MACRO (VL_API_SW_INTERFACE_IP6ND_RA_PREFIX_REPLY);
+}
+
+static void
+ vl_api_sw_interface_ip6_enable_disable_t_handler
+ (vl_api_sw_interface_ip6_enable_disable_t * mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_sw_interface_ip6_enable_disable_reply_t *rmp;
+ vnet_main_t *vnm = vnet_get_main ();
+ int rv = 0;
+ clib_error_t *error;
+
+ vnm->api_errno = 0;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ error =
+ (mp->enable == 1) ? enable_ip6_interface (vm,
+ ntohl (mp->sw_if_index)) :
+ disable_ip6_interface (vm, ntohl (mp->sw_if_index));
+
+ if (error)
+ {
+ clib_error_report (error);
+ rv = VNET_API_ERROR_UNSPECIFIED;
+ }
+ else
+ {
+ rv = vnm->api_errno;
+ }
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_SW_INTERFACE_IP6_ENABLE_DISABLE_REPLY);
+}
+
+static void
+ vl_api_sw_interface_ip6_set_link_local_address_t_handler
+ (vl_api_sw_interface_ip6_set_link_local_address_t * mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_sw_interface_ip6_set_link_local_address_reply_t *rmp;
+ int rv = 0;
+ clib_error_t *error;
+ vnet_main_t *vnm = vnet_get_main ();
+
+ vnm->api_errno = 0;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ error = set_ip6_link_local_address (vm,
+ ntohl (mp->sw_if_index),
+ (ip6_address_t *) mp->address,
+ mp->address_length);
+ if (error)
+ {
+ clib_error_report (error);
+ rv = VNET_API_ERROR_UNSPECIFIED;
+ }
+ else
+ {
+ rv = vnm->api_errno;
+ }
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_SW_INTERFACE_IP6_SET_LINK_LOCAL_ADDRESS_REPLY);
+}
+
+
+#define vl_msg_name_crc_list
+#include <vnet/ip/ip.api.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (api_main_t * am)
+{
+#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id);
+ foreach_vl_msg_name_crc_ip;
+#undef _
+}
+
+static clib_error_t *
+ip_api_hookup (vlib_main_t * vm)
+{
+ api_main_t *am = &api_main;
+
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_ip_api_msg;
+#undef _
+
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ setup_message_id_table (am);
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (ip_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip_checksum.c b/src/vnet/ip/ip_checksum.c
new file mode 100644
index 00000000000..6a9cf657a5c
--- /dev/null
+++ b/src/vnet/ip/ip_checksum.c
@@ -0,0 +1,228 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip4/ip_checksum.c: ip/tcp/udp checksums
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+
+ip_csum_t
+ip_incremental_checksum (ip_csum_t sum, void *_data, uword n_bytes)
+{
+ uword data = pointer_to_uword (_data);
+ ip_csum_t sum0, sum1;
+
+ sum0 = 0;
+ sum1 = sum;
+
+ /* Align data pointer to 64 bits. */
+#define _(t) \
+do { \
+ if (n_bytes >= sizeof (t) \
+ && sizeof (t) < sizeof (ip_csum_t) \
+ && (data % (2 * sizeof (t))) != 0) \
+ { \
+ sum0 += * uword_to_pointer (data, t *); \
+ data += sizeof (t); \
+ n_bytes -= sizeof (t); \
+ } \
+} while (0)
+
+ _(u8);
+ _(u16);
+ if (BITS (ip_csum_t) > 32)
+ _(u32);
+
+#undef _
+
+ {
+ ip_csum_t *d = uword_to_pointer (data, ip_csum_t *);
+
+ while (n_bytes >= 2 * sizeof (d[0]))
+ {
+ sum0 = ip_csum_with_carry (sum0, d[0]);
+ sum1 = ip_csum_with_carry (sum1, d[1]);
+ d += 2;
+ n_bytes -= 2 * sizeof (d[0]);
+ }
+
+ data = pointer_to_uword (d);
+ }
+
+#define _(t) \
+do { \
+ if (n_bytes >= sizeof (t) && sizeof (t) <= sizeof (ip_csum_t)) \
+ { \
+ sum0 = ip_csum_with_carry (sum0, * uword_to_pointer (data, t *)); \
+ data += sizeof (t); \
+ n_bytes -= sizeof (t); \
+ } \
+} while (0)
+
+ if (BITS (ip_csum_t) > 32)
+ _(u64);
+ _(u32);
+ _(u16);
+ _(u8);
+
+#undef _
+
+ /* Combine even and odd sums. */
+ sum0 = ip_csum_with_carry (sum0, sum1);
+
+ return sum0;
+}
+
+ip_csum_t
+ip_csum_and_memcpy (ip_csum_t sum, void *dst, void *src, uword n_bytes)
+{
+ uword n_left;
+ ip_csum_t sum0 = sum, sum1;
+ n_left = n_bytes;
+
+ if (n_left && (pointer_to_uword (dst) & sizeof (u8)))
+ {
+ u8 *d8, val;
+
+ d8 = dst;
+ val = ((u8 *) src)[0];
+ d8[0] = val;
+ dst += 1;
+ src += 1;
+ n_left -= 1;
+ sum0 =
+ ip_csum_with_carry (sum0, val << (8 * CLIB_ARCH_IS_LITTLE_ENDIAN));
+ }
+
+ while ((n_left >= sizeof (u16))
+ && (pointer_to_uword (dst) & (sizeof (sum) - sizeof (u16))))
+ {
+ u16 *d16, *s16;
+
+ d16 = dst;
+ s16 = src;
+
+ d16[0] = clib_mem_unaligned (&s16[0], u16);
+
+ sum0 = ip_csum_with_carry (sum0, d16[0]);
+ dst += sizeof (u16);
+ src += sizeof (u16);
+ n_left -= sizeof (u16);
+ }
+
+ sum1 = 0;
+ while (n_left >= 2 * sizeof (sum))
+ {
+ ip_csum_t dst0, dst1;
+ ip_csum_t *dst_even, *src_even;
+
+ dst_even = dst;
+ src_even = src;
+ dst0 = clib_mem_unaligned (&src_even[0], ip_csum_t);
+ dst1 = clib_mem_unaligned (&src_even[1], ip_csum_t);
+
+ dst_even[0] = dst0;
+ dst_even[1] = dst1;
+
+ dst += 2 * sizeof (dst_even[0]);
+ src += 2 * sizeof (dst_even[0]);
+ n_left -= 2 * sizeof (dst_even[0]);
+
+ sum0 = ip_csum_with_carry (sum0, dst0);
+ sum1 = ip_csum_with_carry (sum1, dst1);
+ }
+
+ sum0 = ip_csum_with_carry (sum0, sum1);
+ while (n_left >= 1 * sizeof (sum))
+ {
+ ip_csum_t dst0, *dst_even, *src_even;
+
+ dst_even = dst;
+ src_even = src;
+
+ dst0 = clib_mem_unaligned (&src_even[0], ip_csum_t);
+
+ dst_even[0] = dst0;
+
+ dst += 1 * sizeof (sum);
+ src += 1 * sizeof (sum);
+ n_left -= 1 * sizeof (sum);
+
+ sum0 = ip_csum_with_carry (sum0, dst0);
+ }
+
+ while (n_left >= sizeof (u16))
+ {
+ u16 dst0, *dst_short, *src_short;
+
+ dst_short = dst;
+ src_short = src;
+
+ dst0 = clib_mem_unaligned (&src_short[0], u16);
+
+ dst_short[0] = dst0;
+
+ sum0 = ip_csum_with_carry (sum0, dst_short[0]);
+ dst += 1 * sizeof (dst0);
+ src += 1 * sizeof (dst0);
+ n_left -= 1 * sizeof (dst0);
+
+ }
+
+ if (n_left == 1)
+ {
+ u8 *d8, *s8, val;
+
+ d8 = dst;
+ s8 = src;
+
+ d8[0] = val = s8[0];
+ d8 += 1;
+ s8 += 1;
+ n_left -= 1;
+ sum0 = ip_csum_with_carry (sum0, val << (8 * CLIB_ARCH_IS_BIG_ENDIAN));
+ }
+
+ return sum0;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip_frag.c b/src/vnet/ip/ip_frag.c
new file mode 100644
index 00000000000..ca062bfd5e8
--- /dev/null
+++ b/src/vnet/ip/ip_frag.c
@@ -0,0 +1,581 @@
+/*---------------------------------------------------------------------------
+ * Copyright (c) 2009-2014 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *---------------------------------------------------------------------------
+ */
+/*
+ * IPv4 Fragmentation Node
+ *
+ *
+ */
+
+#include "ip_frag.h"
+
+#include <vnet/ip/ip.h>
+
+
+typedef struct
+{
+ u8 ipv6;
+ u16 header_offset;
+ u16 mtu;
+ u8 next;
+ u16 n_fragments;
+} ip_frag_trace_t;
+
+static u8 *
+format_ip_frag_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ip_frag_trace_t *t = va_arg (*args, ip_frag_trace_t *);
+ s = format (s, "IPv%s offset: %u mtu: %u fragments: %u",
+ t->ipv6 ? "6" : "4", t->header_offset, t->mtu, t->n_fragments);
+ return s;
+}
+
+static u32 running_fragment_id;
+
+static void
+ip4_frag_do_fragment (vlib_main_t * vm, u32 pi, u32 ** buffer,
+ ip_frag_error_t * error)
+{
+ vlib_buffer_t *p;
+ ip4_header_t *ip4;
+ u16 mtu, ptr, len, max, rem, offset, ip_frag_id, ip_frag_offset;
+ u8 *packet, more;
+
+ vec_add1 (*buffer, pi);
+ p = vlib_get_buffer (vm, pi);
+ offset = vnet_buffer (p)->ip_frag.header_offset;
+ mtu = vnet_buffer (p)->ip_frag.mtu;
+ packet = (u8 *) vlib_buffer_get_current (p);
+ ip4 = (ip4_header_t *) (packet + offset);
+
+ rem = clib_net_to_host_u16 (ip4->length) - sizeof (*ip4);
+ ptr = 0;
+ max = (mtu - sizeof (*ip4) - vnet_buffer (p)->ip_frag.header_offset) & ~0x7;
+
+ if (rem < (p->current_length - offset - sizeof (*ip4)))
+ {
+ *error = IP_FRAG_ERROR_MALFORMED;
+ return;
+ }
+
+ if (mtu < sizeof (*ip4))
+ {
+ *error = IP_FRAG_ERROR_CANT_FRAGMENT_HEADER;
+ return;
+ }
+
+ if (ip4->flags_and_fragment_offset &
+ clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT))
+ {
+ *error = IP_FRAG_ERROR_DONT_FRAGMENT_SET;
+ return;
+ }
+
+ if (ip4_is_fragment (ip4))
+ {
+ ip_frag_id = ip4->fragment_id;
+ ip_frag_offset = ip4_get_fragment_offset (ip4);
+ more =
+ ! !(ip4->flags_and_fragment_offset &
+ clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS));
+ }
+ else
+ {
+ ip_frag_id = (++running_fragment_id);
+ ip_frag_offset = 0;
+ more = 0;
+ }
+
+ //Do the actual fragmentation
+ while (rem)
+ {
+ u32 bi;
+ vlib_buffer_t *b;
+ ip4_header_t *fip4;
+
+ len =
+ (rem >
+ (mtu - sizeof (*ip4) -
+ vnet_buffer (p)->ip_frag.header_offset)) ? max : rem;
+
+ if (ptr == 0)
+ {
+ bi = pi;
+ b = p;
+ fip4 = (ip4_header_t *) (vlib_buffer_get_current (b) + offset);
+ }
+ else
+ {
+ if (!vlib_buffer_alloc (vm, &bi, 1))
+ {
+ *error = IP_FRAG_ERROR_MEMORY;
+ return;
+ }
+ vec_add1 (*buffer, bi);
+ b = vlib_get_buffer (vm, bi);
+ vnet_buffer (b)->sw_if_index[VLIB_RX] =
+ vnet_buffer (p)->sw_if_index[VLIB_RX];
+ vnet_buffer (b)->sw_if_index[VLIB_TX] =
+ vnet_buffer (p)->sw_if_index[VLIB_TX];
+ fip4 = (ip4_header_t *) (vlib_buffer_get_current (b) + offset);
+
+ //Copy offset and ip4 header
+ clib_memcpy (b->data, packet, offset + sizeof (*ip4));
+ //Copy data
+ clib_memcpy (((u8 *) (fip4)) + sizeof (*fip4),
+ packet + offset + sizeof (*fip4) + ptr, len);
+ }
+ b->current_length = offset + len + sizeof (*fip4);
+
+ fip4->fragment_id = ip_frag_id;
+ fip4->flags_and_fragment_offset =
+ clib_host_to_net_u16 ((ptr >> 3) + ip_frag_offset);
+ fip4->flags_and_fragment_offset |=
+ clib_host_to_net_u16 (((len != rem) || more) << 13);
+ // ((len0 != rem0) || more0) << 13 is optimization for
+ // ((len0 != rem0) || more0) ? IP4_HEADER_FLAG_MORE_FRAGMENTS : 0
+ fip4->length = clib_host_to_net_u16 (len + sizeof (*fip4));
+ fip4->checksum = ip4_header_checksum (fip4);
+
+ if (vnet_buffer (p)->ip_frag.flags & IP_FRAG_FLAG_IP4_HEADER)
+ {
+ //Encapsulating ipv4 header
+ ip4_header_t *encap_header4 =
+ (ip4_header_t *) vlib_buffer_get_current (b);
+ encap_header4->length = clib_host_to_net_u16 (b->current_length);
+ encap_header4->checksum = ip4_header_checksum (encap_header4);
+ }
+ else if (vnet_buffer (p)->ip_frag.flags & IP_FRAG_FLAG_IP6_HEADER)
+ {
+ //Encapsulating ipv6 header
+ ip6_header_t *encap_header6 =
+ (ip6_header_t *) vlib_buffer_get_current (b);
+ encap_header6->payload_length =
+ clib_host_to_net_u16 (b->current_length -
+ sizeof (*encap_header6));
+ }
+
+ rem -= len;
+ ptr += len;
+ }
+}
+
+void
+ip_frag_set_vnet_buffer (vlib_buffer_t * b, u16 offset, u16 mtu,
+ u8 next_index, u8 flags)
+{
+ vnet_buffer (b)->ip_frag.header_offset = offset;
+ vnet_buffer (b)->ip_frag.mtu = mtu;
+ vnet_buffer (b)->ip_frag.next_index = next_index;
+ vnet_buffer (b)->ip_frag.flags = flags;
+}
+
+static uword
+ip4_frag (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip4_frag_node.index);
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ u32 frag_sent = 0, small_packets = 0;
+ u32 *buffer = 0;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 pi0, *frag_from, frag_left;
+ vlib_buffer_t *p0;
+ ip_frag_error_t error0;
+ ip4_frag_next_t next0;
+
+ //Note: The packet is not enqueued now.
+ //It is instead put in a vector where other fragments
+ //will be put as well.
+ pi0 = from[0];
+ from += 1;
+ n_left_from -= 1;
+ error0 = IP_FRAG_ERROR_NONE;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ ip4_frag_do_fragment (vm, pi0, &buffer, &error0);
+
+ if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ ip_frag_trace_t *tr =
+ vlib_add_trace (vm, node, p0, sizeof (*tr));
+ tr->header_offset = vnet_buffer (p0)->ip_frag.header_offset;
+ tr->mtu = vnet_buffer (p0)->ip_frag.mtu;
+ tr->ipv6 = 0;
+ tr->n_fragments = vec_len (buffer);
+ tr->next = vnet_buffer (p0)->ip_frag.next_index;
+ }
+
+ if (error0 == IP_FRAG_ERROR_DONT_FRAGMENT_SET)
+ {
+ icmp4_error_set_vnet_buffer (p0, ICMP4_destination_unreachable,
+ ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
+ vnet_buffer (p0)->ip_frag.mtu);
+ vlib_buffer_advance (p0,
+ vnet_buffer (p0)->ip_frag.header_offset);
+ next0 = IP4_FRAG_NEXT_ICMP_ERROR;
+ }
+ else
+ next0 =
+ (error0 ==
+ IP_FRAG_ERROR_NONE) ? vnet_buffer (p0)->
+ ip_frag.next_index : IP4_FRAG_NEXT_DROP;
+
+ if (error0 == IP_FRAG_ERROR_NONE)
+ {
+ frag_sent += vec_len (buffer);
+ small_packets += (vec_len (buffer) == 1);
+ }
+ else
+ vlib_error_count (vm, ip4_frag_node.index, error0, 1);
+
+ //Send fragments that were added in the frame
+ frag_from = buffer;
+ frag_left = vec_len (buffer);
+
+ while (frag_left > 0)
+ {
+ while (frag_left > 0 && n_left_to_next > 0)
+ {
+ u32 i;
+ i = to_next[0] = frag_from[0];
+ frag_from += 1;
+ frag_left -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ vlib_get_buffer (vm, i)->error = error_node->errors[error0];
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next, i,
+ next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ vlib_get_next_frame (vm, node, next_index, to_next,
+ n_left_to_next);
+ }
+ vec_reset_length (buffer);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ vec_free (buffer);
+
+ vlib_node_increment_counter (vm, ip4_frag_node.index,
+ IP_FRAG_ERROR_FRAGMENT_SENT, frag_sent);
+ vlib_node_increment_counter (vm, ip4_frag_node.index,
+ IP_FRAG_ERROR_SMALL_PACKET, small_packets);
+
+ return frame->n_vectors;
+}
+
+
+static void
+ip6_frag_do_fragment (vlib_main_t * vm, u32 pi, u32 ** buffer,
+ ip_frag_error_t * error)
+{
+ vlib_buffer_t *p;
+ ip6_header_t *ip6_hdr;
+ ip6_frag_hdr_t *frag_hdr;
+ u8 *payload, *next_header;
+
+ p = vlib_get_buffer (vm, pi);
+
+ //Parsing the IPv6 headers
+ ip6_hdr =
+ vlib_buffer_get_current (p) + vnet_buffer (p)->ip_frag.header_offset;
+ payload = (u8 *) (ip6_hdr + 1);
+ next_header = &ip6_hdr->protocol;
+ if (*next_header == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
+ {
+ next_header = payload;
+ payload += payload[1] * 8;
+ }
+
+ if (*next_header == IP_PROTOCOL_IP6_DESTINATION_OPTIONS)
+ {
+ next_header = payload;
+ payload += payload[1] * 8;
+ }
+
+ if (*next_header == IP_PROTOCOL_IPV6_ROUTE)
+ {
+ next_header = payload;
+ payload += payload[1] * 8;
+ }
+
+ if (PREDICT_FALSE
+ (payload >= (u8 *) vlib_buffer_get_current (p) + p->current_length))
+ {
+ //A malicious packet could set an extension header with a too big size
+ //and make us modify another vlib_buffer
+ *error = IP_FRAG_ERROR_MALFORMED;
+ return;
+ }
+
+ u8 has_more;
+ u16 initial_offset;
+ if (*next_header == IP_PROTOCOL_IPV6_FRAGMENTATION)
+ {
+ //The fragmentation header is already there
+ frag_hdr = (ip6_frag_hdr_t *) payload;
+ has_more = ip6_frag_hdr_more (frag_hdr);
+ initial_offset = ip6_frag_hdr_offset (frag_hdr);
+ }
+ else
+ {
+ //Insert a fragmentation header in the packet
+ u8 nh = *next_header;
+ *next_header = IP_PROTOCOL_IPV6_FRAGMENTATION;
+ vlib_buffer_advance (p, -sizeof (*frag_hdr));
+ u8 *start = vlib_buffer_get_current (p);
+ memmove (start, start + sizeof (*frag_hdr),
+ payload - (start + sizeof (*frag_hdr)));
+ frag_hdr = (ip6_frag_hdr_t *) (payload - sizeof (*frag_hdr));
+ frag_hdr->identification = ++running_fragment_id;
+ frag_hdr->next_hdr = nh;
+ frag_hdr->rsv = 0;
+ has_more = 0;
+ initial_offset = 0;
+ }
+ payload = (u8 *) (frag_hdr + 1);
+
+ u16 headers_len = payload - (u8 *) vlib_buffer_get_current (p);
+ u16 max_payload = vnet_buffer (p)->ip_frag.mtu - headers_len;
+ u16 rem = p->current_length - headers_len;
+ u16 ptr = 0;
+
+ if (max_payload < 8)
+ {
+ *error = IP_FRAG_ERROR_CANT_FRAGMENT_HEADER;
+ return;
+ }
+
+ while (rem)
+ {
+ u32 bi;
+ vlib_buffer_t *b;
+ u16 len = (rem > max_payload) ? (max_payload & ~0x7) : rem;
+ rem -= len;
+
+ if (ptr != 0)
+ {
+ if (!vlib_buffer_alloc (vm, &bi, 1))
+ {
+ *error = IP_FRAG_ERROR_MEMORY;
+ return;
+ }
+ b = vlib_get_buffer (vm, bi);
+ vnet_buffer (b)->sw_if_index[VLIB_RX] =
+ vnet_buffer (p)->sw_if_index[VLIB_RX];
+ vnet_buffer (b)->sw_if_index[VLIB_TX] =
+ vnet_buffer (p)->sw_if_index[VLIB_TX];
+ clib_memcpy (vlib_buffer_get_current (b),
+ vlib_buffer_get_current (p), headers_len);
+ clib_memcpy (vlib_buffer_get_current (b) + headers_len,
+ payload + ptr, len);
+ frag_hdr =
+ vlib_buffer_get_current (b) + headers_len - sizeof (*frag_hdr);
+ }
+ else
+ {
+ bi = pi;
+ b = vlib_get_buffer (vm, bi);
+ //frag_hdr already set here
+ }
+
+ ip6_hdr =
+ vlib_buffer_get_current (b) + vnet_buffer (p)->ip_frag.header_offset;
+ frag_hdr->fragment_offset_and_more =
+ ip6_frag_hdr_offset_and_more (initial_offset + (ptr >> 3),
+ (rem || has_more));
+ b->current_length = headers_len + len;
+ ip6_hdr->payload_length =
+ clib_host_to_net_u16 (b->current_length -
+ vnet_buffer (p)->ip_frag.header_offset -
+ sizeof (*ip6_hdr));
+
+ if (vnet_buffer (p)->ip_frag.flags & IP_FRAG_FLAG_IP4_HEADER)
+ {
+ //Encapsulating ipv4 header
+ ip4_header_t *encap_header4 =
+ (ip4_header_t *) vlib_buffer_get_current (b);
+ encap_header4->length = clib_host_to_net_u16 (b->current_length);
+ encap_header4->checksum = ip4_header_checksum (encap_header4);
+ }
+ else if (vnet_buffer (p)->ip_frag.flags & IP_FRAG_FLAG_IP6_HEADER)
+ {
+ //Encapsulating ipv6 header
+ ip6_header_t *encap_header6 =
+ (ip6_header_t *) vlib_buffer_get_current (b);
+ encap_header6->payload_length =
+ clib_host_to_net_u16 (b->current_length -
+ sizeof (*encap_header6));
+ }
+
+ vec_add1 (*buffer, bi);
+
+ ptr += len;
+ }
+}
+
+static uword
+ip6_frag (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip6_frag_node.index);
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ u32 frag_sent = 0, small_packets = 0;
+ u32 *buffer = 0;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 pi0, *frag_from, frag_left;
+ vlib_buffer_t *p0;
+ ip_frag_error_t error0;
+ ip6_frag_next_t next0;
+
+ pi0 = from[0];
+ from += 1;
+ n_left_from -= 1;
+ error0 = IP_FRAG_ERROR_NONE;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ ip6_frag_do_fragment (vm, pi0, &buffer, &error0);
+
+ if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ ip_frag_trace_t *tr =
+ vlib_add_trace (vm, node, p0, sizeof (*tr));
+ tr->header_offset = vnet_buffer (p0)->ip_frag.header_offset;
+ tr->mtu = vnet_buffer (p0)->ip_frag.mtu;
+ tr->ipv6 = 1;
+ tr->n_fragments = vec_len (buffer);
+ tr->next = vnet_buffer (p0)->ip_frag.next_index;
+ }
+
+ next0 =
+ (error0 ==
+ IP_FRAG_ERROR_NONE) ? vnet_buffer (p0)->
+ ip_frag.next_index : IP6_FRAG_NEXT_DROP;
+ frag_sent += vec_len (buffer);
+ small_packets += (vec_len (buffer) == 1);
+
+ //Send fragments that were added in the frame
+ frag_from = buffer;
+ frag_left = vec_len (buffer);
+ while (frag_left > 0)
+ {
+ while (frag_left > 0 && n_left_to_next > 0)
+ {
+ u32 i;
+ i = to_next[0] = frag_from[0];
+ frag_from += 1;
+ frag_left -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ vlib_get_buffer (vm, i)->error = error_node->errors[error0];
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next, i,
+ next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ vlib_get_next_frame (vm, node, next_index, to_next,
+ n_left_to_next);
+ }
+ vec_reset_length (buffer);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ vec_free (buffer);
+ vlib_node_increment_counter (vm, ip6_frag_node.index,
+ IP_FRAG_ERROR_FRAGMENT_SENT, frag_sent);
+ vlib_node_increment_counter (vm, ip6_frag_node.index,
+ IP_FRAG_ERROR_SMALL_PACKET, small_packets);
+
+ return frame->n_vectors;
+}
+
+static char *ip4_frag_error_strings[] = {
+#define _(sym,string) string,
+ foreach_ip_frag_error
+#undef _
+};
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip4_frag_node) = {
+ .function = ip4_frag,
+ .name = IP4_FRAG_NODE_NAME,
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip_frag_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = IP_FRAG_N_ERROR,
+ .error_strings = ip4_frag_error_strings,
+
+ .n_next_nodes = IP4_FRAG_N_NEXT,
+ .next_nodes = {
+ [IP4_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [IP4_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup",
+ [IP4_FRAG_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+ [IP4_FRAG_NEXT_DROP] = "error-drop"
+ },
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_frag_node) = {
+ .function = ip6_frag,
+ .name = IP6_FRAG_NODE_NAME,
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip_frag_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = IP_FRAG_N_ERROR,
+ .error_strings = ip4_frag_error_strings,
+
+ .n_next_nodes = IP6_FRAG_N_NEXT,
+ .next_nodes = {
+ [IP6_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [IP6_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup",
+ [IP6_FRAG_NEXT_DROP] = "error-drop"
+ },
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip_frag.h b/src/vnet/ip/ip_frag.h
new file mode 100644
index 00000000000..348f5a2fbc6
--- /dev/null
+++ b/src/vnet/ip/ip_frag.h
@@ -0,0 +1,96 @@
+/*---------------------------------------------------------------------------
+ * Copyright (c) 2009-2014 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *---------------------------------------------------------------------------
+ */
+/*
+ * IPv4 and IPv6 Fragmentation Nodes
+ *
+ * A packet sent to those nodes require the following
+ * buffer attributes to be set:
+ * ip_frag.header_offset :
+ * Where to find the IPv4 (or IPv6) header in the packet. Previous
+ * bytes are left untouched and copied in every fragment. The fragments
+ * are then appended. This option is used for fragmented packets
+ * that are encapsulated.
+ * ip_frag.mtu :
+ * Maximum size of IP packets, header included, but ignoring
+ * the 'ip_frag.header_offset' copied bytes.
+ * ip_frag.next_index :
+ * One of ip_frag_next_t, indicating to which exit node the fragments
+ * should be sent to.
+ *
+ */
+
+#ifndef IP_FRAG_H
+#define IP_FRAG_H
+
+#include <vnet/vnet.h>
+
+#define IP_FRAG_FLAG_IP4_HEADER 0x01 //Encapsulating IPv4 header
+#define IP_FRAG_FLAG_IP6_HEADER 0x02 //Encapsulating IPv6 header
+
+#define IP4_FRAG_NODE_NAME "ip4-frag"
+#define IP6_FRAG_NODE_NAME "ip6-frag"
+
+extern vlib_node_registration_t ip4_frag_node;
+extern vlib_node_registration_t ip6_frag_node;
+
+typedef enum
+{
+ IP4_FRAG_NEXT_IP4_LOOKUP,
+ IP4_FRAG_NEXT_IP6_LOOKUP,
+ IP4_FRAG_NEXT_ICMP_ERROR,
+ IP4_FRAG_NEXT_DROP,
+ IP4_FRAG_N_NEXT
+} ip4_frag_next_t;
+
+typedef enum
+{
+ IP6_FRAG_NEXT_IP4_LOOKUP,
+ IP6_FRAG_NEXT_IP6_LOOKUP,
+ IP6_FRAG_NEXT_DROP,
+ IP6_FRAG_N_NEXT
+} ip6_frag_next_t;
+
+#define foreach_ip_frag_error \
+ /* Must be first. */ \
+ _(NONE, "packet fragmented") \
+ _(SMALL_PACKET, "packet smaller than MTU") \
+ _(FRAGMENT_SENT, "number of sent fragments") \
+ _(CANT_FRAGMENT_HEADER, "can't fragment header") \
+ _(DONT_FRAGMENT_SET, "can't fragment this packet") \
+ _(MALFORMED, "malformed packet") \
+ _(MEMORY, "could not allocate buffer") \
+ _(UNKNOWN, "unknown error")
+
+typedef enum
+{
+#define _(sym,str) IP_FRAG_ERROR_##sym,
+ foreach_ip_frag_error
+#undef _
+ IP_FRAG_N_ERROR,
+} ip_frag_error_t;
+
+void ip_frag_set_vnet_buffer (vlib_buffer_t * b, u16 offset, u16 mtu,
+ u8 next_index, u8 flags);
+
+#endif /* ifndef IP_FRAG_H */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip_init.c b/src/vnet/ip/ip_init.c
new file mode 100644
index 00000000000..f7635b35d0e
--- /dev/null
+++ b/src/vnet/ip/ip_init.c
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip_init.c: ip generic initialization
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+
+ip_main_t ip_main;
+
+clib_error_t *
+ip_main_init (vlib_main_t * vm)
+{
+ ip_main_t *im = &ip_main;
+ clib_error_t *error = 0;
+
+ memset (im, 0, sizeof (im[0]));
+
+ {
+ ip_protocol_info_t *pi;
+ u32 i;
+
+#define ip_protocol(n,s) \
+do { \
+ vec_add2 (im->protocol_infos, pi, 1); \
+ pi->protocol = n; \
+ pi->name = (u8 *) #s; \
+} while (0);
+
+#include "protocols.def"
+
+#undef ip_protocol
+
+ im->protocol_info_by_name = hash_create_string (0, sizeof (uword));
+ for (i = 0; i < vec_len (im->protocol_infos); i++)
+ {
+ pi = im->protocol_infos + i;
+
+ hash_set_mem (im->protocol_info_by_name, pi->name, i);
+ hash_set (im->protocol_info_by_protocol, pi->protocol, i);
+ }
+ }
+
+ {
+ tcp_udp_port_info_t *pi;
+ u32 i;
+ static char *port_names[] = {
+#define ip_port(s,n) #s,
+#include "ports.def"
+#undef ip_port
+ };
+ static u16 ports[] = {
+#define ip_port(s,n) n,
+#include "ports.def"
+#undef ip_port
+ };
+
+ vec_resize (im->port_infos, ARRAY_LEN (port_names));
+ im->port_info_by_name = hash_create_string (0, sizeof (uword));
+
+ for (i = 0; i < vec_len (im->port_infos); i++)
+ {
+ pi = im->port_infos + i;
+ pi->port = clib_host_to_net_u16 (ports[i]);
+ pi->name = (u8 *) port_names[i];
+ hash_set_mem (im->port_info_by_name, pi->name, i);
+ hash_set (im->port_info_by_port, pi->port, i);
+ }
+ }
+
+ if ((error = vlib_call_init_function (vm, vnet_main_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, ip4_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, ip6_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, icmp4_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, icmp6_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, ip6_hop_by_hop_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, udp_local_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, udp_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, ip_classify_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, input_acl_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, policer_classify_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, flow_classify_init)))
+ return error;
+
+ return error;
+}
+
+VLIB_INIT_FUNCTION (ip_main_init);
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip_input_acl.c b/src/vnet/ip/ip_input_acl.c
new file mode 100644
index 00000000000..b0b52ab11c3
--- /dev/null
+++ b/src/vnet/ip/ip_input_acl.c
@@ -0,0 +1,450 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/ip/ip.h>
+#include <vnet/classify/vnet_classify.h>
+#include <vnet/classify/input_acl.h>
+
+typedef struct
+{
+ u32 sw_if_index;
+ u32 next_index;
+ u32 table_index;
+ u32 offset;
+} ip_inacl_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_ip_inacl_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ip_inacl_trace_t *t = va_arg (*args, ip_inacl_trace_t *);
+
+ s = format (s, "INACL: sw_if_index %d, next_index %d, table %d, offset %d",
+ t->sw_if_index, t->next_index, t->table_index, t->offset);
+ return s;
+}
+
+vlib_node_registration_t ip4_inacl_node;
+vlib_node_registration_t ip6_inacl_node;
+
+#define foreach_ip_inacl_error \
+_(MISS, "input ACL misses") \
+_(HIT, "input ACL hits") \
+_(CHAIN_HIT, "input ACL hits after chain walk")
+
+typedef enum
+{
+#define _(sym,str) IP_INACL_ERROR_##sym,
+ foreach_ip_inacl_error
+#undef _
+ IP_INACL_N_ERROR,
+} ip_inacl_error_t;
+
+static char *ip_inacl_error_strings[] = {
+#define _(sym,string) string,
+ foreach_ip_inacl_error
+#undef _
+};
+
+static inline uword
+ip_inacl_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame, int is_ip4)
+{
+ u32 n_left_from, *from, *to_next;
+ acl_next_index_t next_index;
+ input_acl_main_t *am = &input_acl_main;
+ vnet_classify_main_t *vcm = am->vnet_classify_main;
+ f64 now = vlib_time_now (vm);
+ u32 hits = 0;
+ u32 misses = 0;
+ u32 chain_hits = 0;
+ input_acl_table_id_t tid;
+ vlib_node_runtime_t *error_node;
+ u32 n_next_nodes;
+
+ n_next_nodes = node->n_next_nodes;
+
+ if (is_ip4)
+ {
+ tid = INPUT_ACL_TABLE_IP4;
+ error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
+ }
+ else
+ {
+ tid = INPUT_ACL_TABLE_IP6;
+ error_node = vlib_node_get_runtime (vm, ip6_input_node.index);
+ }
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+
+ /* First pass: compute hashes */
+
+ while (n_left_from > 2)
+ {
+ vlib_buffer_t *b0, *b1;
+ u32 bi0, bi1;
+ u8 *h0, *h1;
+ u32 sw_if_index0, sw_if_index1;
+ u32 table_index0, table_index1;
+ vnet_classify_table_t *t0, *t1;
+
+ /* prefetch next iteration */
+ {
+ vlib_buffer_t *p1, *p2;
+
+ p1 = vlib_get_buffer (vm, from[1]);
+ p2 = vlib_get_buffer (vm, from[2]);
+
+ vlib_prefetch_buffer_header (p1, STORE);
+ CLIB_PREFETCH (p1->data, CLIB_CACHE_LINE_BYTES, STORE);
+ vlib_prefetch_buffer_header (p2, STORE);
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ bi0 = from[0];
+ b0 = vlib_get_buffer (vm, bi0);
+
+ bi1 = from[1];
+ b1 = vlib_get_buffer (vm, bi1);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ table_index0 =
+ am->classify_table_index_by_sw_if_index[tid][sw_if_index0];
+
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+ table_index1 =
+ am->classify_table_index_by_sw_if_index[tid][sw_if_index1];
+
+ t0 = pool_elt_at_index (vcm->tables, table_index0);
+
+ t1 = pool_elt_at_index (vcm->tables, table_index1);
+
+ if (t0->current_data_flag == CLASSIFY_FLAG_USE_CURR_DATA)
+ h0 = (void *) vlib_buffer_get_current (b0) + t0->current_data_offset;
+ else
+ h0 = b0->data;
+
+ vnet_buffer (b0)->l2_classify.hash =
+ vnet_classify_hash_packet (t0, (u8 *) h0);
+
+ vnet_classify_prefetch_bucket (t0, vnet_buffer (b0)->l2_classify.hash);
+
+ if (t1->current_data_flag == CLASSIFY_FLAG_USE_CURR_DATA)
+ h1 = (void *) vlib_buffer_get_current (b1) + t1->current_data_offset;
+ else
+ h1 = b1->data;
+
+ vnet_buffer (b1)->l2_classify.hash =
+ vnet_classify_hash_packet (t1, (u8 *) h1);
+
+ vnet_classify_prefetch_bucket (t1, vnet_buffer (b1)->l2_classify.hash);
+
+ vnet_buffer (b0)->l2_classify.table_index = table_index0;
+
+ vnet_buffer (b1)->l2_classify.table_index = table_index1;
+
+ from += 2;
+ n_left_from -= 2;
+ }
+
+ while (n_left_from > 0)
+ {
+ vlib_buffer_t *b0;
+ u32 bi0;
+ u8 *h0;
+ u32 sw_if_index0;
+ u32 table_index0;
+ vnet_classify_table_t *t0;
+
+ bi0 = from[0];
+ b0 = vlib_get_buffer (vm, bi0);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ table_index0 =
+ am->classify_table_index_by_sw_if_index[tid][sw_if_index0];
+
+ t0 = pool_elt_at_index (vcm->tables, table_index0);
+
+ if (t0->current_data_flag == CLASSIFY_FLAG_USE_CURR_DATA)
+ h0 = (void *) vlib_buffer_get_current (b0) + t0->current_data_offset;
+ else
+ h0 = b0->data;
+
+ vnet_buffer (b0)->l2_classify.hash =
+ vnet_classify_hash_packet (t0, (u8 *) h0);
+
+ vnet_buffer (b0)->l2_classify.table_index = table_index0;
+ vnet_classify_prefetch_bucket (t0, vnet_buffer (b0)->l2_classify.hash);
+
+ from++;
+ n_left_from--;
+ }
+
+ next_index = node->cached_next_index;
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ /* Not enough load/store slots to dual loop... */
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0 = ACL_NEXT_INDEX_DENY;
+ u32 table_index0;
+ vnet_classify_table_t *t0;
+ vnet_classify_entry_t *e0;
+ u64 hash0;
+ u8 *h0;
+ u8 error0;
+
+ /* Stride 3 seems to work best */
+ if (PREDICT_TRUE (n_left_from > 3))
+ {
+ vlib_buffer_t *p1 = vlib_get_buffer (vm, from[3]);
+ vnet_classify_table_t *tp1;
+ u32 table_index1;
+ u64 phash1;
+
+ table_index1 = vnet_buffer (p1)->l2_classify.table_index;
+
+ if (PREDICT_TRUE (table_index1 != ~0))
+ {
+ tp1 = pool_elt_at_index (vcm->tables, table_index1);
+ phash1 = vnet_buffer (p1)->l2_classify.hash;
+ vnet_classify_prefetch_entry (tp1, phash1);
+ }
+ }
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ table_index0 = vnet_buffer (b0)->l2_classify.table_index;
+ e0 = 0;
+ t0 = 0;
+ vnet_get_config_data (am->vnet_config_main[tid],
+ &b0->current_config_index, &next0,
+ /* # bytes of config data */ 0);
+
+ vnet_buffer (b0)->l2_classify.opaque_index = ~0;
+
+ if (PREDICT_TRUE (table_index0 != ~0))
+ {
+ hash0 = vnet_buffer (b0)->l2_classify.hash;
+ t0 = pool_elt_at_index (vcm->tables, table_index0);
+
+ if (t0->current_data_flag == CLASSIFY_FLAG_USE_CURR_DATA)
+ h0 =
+ (void *) vlib_buffer_get_current (b0) +
+ t0->current_data_offset;
+ else
+ h0 = b0->data;
+
+ e0 = vnet_classify_find_entry (t0, (u8 *) h0, hash0, now);
+ if (e0)
+ {
+ vnet_buffer (b0)->l2_classify.opaque_index
+ = e0->opaque_index;
+ vlib_buffer_advance (b0, e0->advance);
+
+ next0 = (e0->next_index < n_next_nodes) ?
+ e0->next_index : next0;
+
+ hits++;
+
+ if (is_ip4)
+ error0 = (next0 == ACL_NEXT_INDEX_DENY) ?
+ IP4_ERROR_INACL_SESSION_DENY : IP4_ERROR_NONE;
+ else
+ error0 = (next0 == ACL_NEXT_INDEX_DENY) ?
+ IP6_ERROR_INACL_SESSION_DENY : IP6_ERROR_NONE;
+ b0->error = error_node->errors[error0];
+
+ if (e0->action == CLASSIFY_ACTION_SET_IP4_FIB_INDEX ||
+ e0->action == CLASSIFY_ACTION_SET_IP6_FIB_INDEX)
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = e0->metadata;
+ }
+ else
+ {
+ while (1)
+ {
+ if (PREDICT_TRUE (t0->next_table_index != ~0))
+ t0 = pool_elt_at_index (vcm->tables,
+ t0->next_table_index);
+ else
+ {
+ next0 = (t0->miss_next_index < n_next_nodes) ?
+ t0->miss_next_index : next0;
+
+ misses++;
+
+ if (is_ip4)
+ error0 = (next0 == ACL_NEXT_INDEX_DENY) ?
+ IP4_ERROR_INACL_TABLE_MISS : IP4_ERROR_NONE;
+ else
+ error0 = (next0 == ACL_NEXT_INDEX_DENY) ?
+ IP6_ERROR_INACL_TABLE_MISS : IP6_ERROR_NONE;
+ b0->error = error_node->errors[error0];
+ break;
+ }
+
+ if (t0->current_data_flag ==
+ CLASSIFY_FLAG_USE_CURR_DATA)
+ h0 =
+ (void *) vlib_buffer_get_current (b0) +
+ t0->current_data_offset;
+ else
+ h0 = b0->data;
+
+ hash0 = vnet_classify_hash_packet (t0, (u8 *) h0);
+ e0 = vnet_classify_find_entry
+ (t0, (u8 *) h0, hash0, now);
+ if (e0)
+ {
+ vnet_buffer (b0)->l2_classify.opaque_index
+ = e0->opaque_index;
+ vlib_buffer_advance (b0, e0->advance);
+ next0 = (e0->next_index < n_next_nodes) ?
+ e0->next_index : next0;
+ hits++;
+ chain_hits++;
+
+ if (is_ip4)
+ error0 = (next0 == ACL_NEXT_INDEX_DENY) ?
+ IP4_ERROR_INACL_SESSION_DENY : IP4_ERROR_NONE;
+ else
+ error0 = (next0 == ACL_NEXT_INDEX_DENY) ?
+ IP6_ERROR_INACL_SESSION_DENY : IP6_ERROR_NONE;
+ b0->error = error_node->errors[error0];
+
+ if (e0->action == CLASSIFY_ACTION_SET_IP4_FIB_INDEX
+ || e0->action ==
+ CLASSIFY_ACTION_SET_IP6_FIB_INDEX)
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] =
+ e0->metadata;
+ break;
+ }
+ }
+ }
+ }
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ ip_inacl_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ t->next_index = next0;
+ t->table_index = t0 ? t0 - vcm->tables : ~0;
+ t->offset = (e0 && t0) ? vnet_classify_get_offset (t0, e0) : ~0;
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, node->node_index,
+ IP_INACL_ERROR_MISS, misses);
+ vlib_node_increment_counter (vm, node->node_index,
+ IP_INACL_ERROR_HIT, hits);
+ vlib_node_increment_counter (vm, node->node_index,
+ IP_INACL_ERROR_CHAIN_HIT, chain_hits);
+ return frame->n_vectors;
+}
+
+static uword
+ip4_inacl (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return ip_inacl_inline (vm, node, frame, 1 /* is_ip4 */ );
+}
+
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip4_inacl_node) = {
+ .function = ip4_inacl,
+ .name = "ip4-inacl",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip_inacl_trace,
+ .n_errors = ARRAY_LEN(ip_inacl_error_strings),
+ .error_strings = ip_inacl_error_strings,
+
+ .n_next_nodes = ACL_NEXT_INDEX_N_NEXT,
+ .next_nodes = {
+ [ACL_NEXT_INDEX_DENY] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_inacl_node, ip4_inacl);
+
+static uword
+ip6_inacl (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return ip_inacl_inline (vm, node, frame, 0 /* is_ip4 */ );
+}
+
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_inacl_node) = {
+ .function = ip6_inacl,
+ .name = "ip6-inacl",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip_inacl_trace,
+ .n_errors = ARRAY_LEN(ip_inacl_error_strings),
+ .error_strings = ip_inacl_error_strings,
+
+ .n_next_nodes = ACL_NEXT_INDEX_N_NEXT,
+ .next_nodes = {
+ [ACL_NEXT_INDEX_DENY] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_inacl_node, ip6_inacl);
+
+static clib_error_t *
+ip_inacl_init (vlib_main_t * vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (ip_inacl_init);
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip_packet.h b/src/vnet/ip/ip_packet.h
new file mode 100644
index 00000000000..d3f3de771bc
--- /dev/null
+++ b/src/vnet/ip/ip_packet.h
@@ -0,0 +1,180 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip_packet.h: packet format common between ip4 & ip6
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_ip_packet_h
+#define included_ip_packet_h
+
+#include <vppinfra/byte_order.h>
+#include <vppinfra/error.h>
+
+typedef enum ip_protocol
+{
+#define ip_protocol(n,s) IP_PROTOCOL_##s = n,
+#include "protocols.def"
+#undef ip_protocol
+} ip_protocol_t;
+
+/* TCP/UDP ports. */
+typedef enum
+{
+#define ip_port(s,n) IP_PORT_##s = n,
+#include "ports.def"
+#undef ip_port
+} ip_port_t;
+
+/* Classifies protocols into UDP, ICMP or other. */
+typedef enum
+{
+ IP_BUILTIN_PROTOCOL_UDP,
+ IP_BUILTIN_PROTOCOL_ICMP,
+ IP_BUILTIN_PROTOCOL_UNKNOWN,
+} ip_builtin_protocol_t;
+
+#define foreach_ip_builtin_multicast_group \
+ _ (1, all_hosts_on_subnet) \
+ _ (2, all_routers_on_subnet) \
+ _ (4, dvmrp) \
+ _ (5, ospf_all_routers) \
+ _ (6, ospf_designated_routers) \
+ _ (13, pim) \
+ _ (18, vrrp) \
+ _ (102, hsrp) \
+ _ (22, igmp_v3)
+
+typedef enum
+{
+#define _(n,f) IP_MULTICAST_GROUP_##f = n,
+ foreach_ip_builtin_multicast_group
+#undef _
+} ip_multicast_group_t;
+
+/* IP checksum support. */
+
+/* Incremental checksum update. */
+typedef uword ip_csum_t;
+
+always_inline ip_csum_t
+ip_csum_with_carry (ip_csum_t sum, ip_csum_t x)
+{
+ ip_csum_t t = sum + x;
+ return t + (t < x);
+}
+
+/* Update checksum changing field at even byte offset from x -> 0. */
+always_inline ip_csum_t
+ip_csum_add_even (ip_csum_t c, ip_csum_t x)
+{
+ ip_csum_t d;
+
+ d = c - x;
+
+ /* Fold in carry from high bit. */
+ d -= d > c;
+
+ ASSERT (ip_csum_with_carry (d, x) == c);
+
+ return d;
+}
+
+/* Update checksum changing field at even byte offset from 0 -> x. */
+always_inline ip_csum_t
+ip_csum_sub_even (ip_csum_t c, ip_csum_t x)
+{
+ return ip_csum_with_carry (c, x);
+}
+
+always_inline ip_csum_t
+ip_csum_update_inline (ip_csum_t sum, ip_csum_t old, ip_csum_t new,
+ u32 field_byte_offset, u32 field_n_bytes)
+{
+ /* For even 1-byte fields on big-endian and odd 1-byte fields on little endian
+ we need to shift byte into place for checksum. */
+ if ((field_n_bytes % 2)
+ && (field_byte_offset % 2) == CLIB_ARCH_IS_LITTLE_ENDIAN)
+ {
+ old = old << 8;
+ new = new << 8;
+ }
+ sum = ip_csum_sub_even (sum, old);
+ sum = ip_csum_add_even (sum, new);
+ return sum;
+}
+
+#define ip_csum_update(sum,old,new,type,field) \
+ ip_csum_update_inline ((sum), (old), (new), \
+ STRUCT_OFFSET_OF (type, field), \
+ STRUCT_SIZE_OF (type, field))
+
+always_inline u16
+ip_csum_fold (ip_csum_t c)
+{
+ /* Reduce to 16 bits. */
+#if uword_bits == 64
+ c = (c & (ip_csum_t) 0xffffffff) + (c >> (ip_csum_t) 32);
+ c = (c & 0xffff) + (c >> 16);
+#endif
+
+ c = (c & 0xffff) + (c >> 16);
+ c = (c & 0xffff) + (c >> 16);
+
+ return c;
+}
+
+/* Copy data and checksum at the same time. */
+ip_csum_t ip_csum_and_memcpy (ip_csum_t sum, void *dst, void *src,
+ uword n_bytes);
+
+always_inline u16
+ip_csum_and_memcpy_fold (ip_csum_t sum, void *dst)
+{
+ return ip_csum_fold (sum);
+}
+
+/* Checksum routine. */
+ip_csum_t ip_incremental_checksum (ip_csum_t sum, void *data, uword n_bytes);
+
+#endif /* included_ip_packet_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip_source_and_port_range_check.h b/src/vnet/ip/ip_source_and_port_range_check.h
new file mode 100644
index 00000000000..fefe5ff1fd9
--- /dev/null
+++ b/src/vnet/ip/ip_source_and_port_range_check.h
@@ -0,0 +1,148 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_ip_ip_source_and_port_range_check_h
+#define included_ip_ip_source_and_port_range_check_h
+
+
+typedef struct
+{
+ /* convenience */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} source_range_check_main_t;
+
+source_range_check_main_t source_range_check_main;
+
+typedef enum
+{
+ IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_OUT,
+ IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_OUT,
+ IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_IN,
+ IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_IN,
+ IP_SOURCE_AND_PORT_RANGE_CHECK_N_PROTOCOLS,
+} ip_source_and_port_range_check_protocol_t;
+
+typedef struct
+{
+ u32 fib_index[IP_SOURCE_AND_PORT_RANGE_CHECK_N_PROTOCOLS];
+} ip_source_and_port_range_check_config_t;
+
+#define IP_SOURCE_AND_PORT_RANGE_CHECK_RANGE_LIMIT VLIB_BUFFER_PRE_DATA_SIZE/(2*sizeof(u16x8));
+
+typedef struct
+{
+ union
+ {
+ u16x8 as_u16x8;
+ u16 as_u16[8];
+ };
+} u16x8vec_t;
+
+typedef struct
+{
+ u16x8vec_t low;
+ u16x8vec_t hi;
+} protocol_port_range_t;
+
+/**
+ * @brief The number of supported ranges per-data path object.
+ * If more ranges are required, bump this number.
+ */
+#define N_PORT_RANGES_PER_DPO 64
+#define N_RANGES_PER_BLOCK (sizeof(u16x8vec_t)/2)
+#define N_BLOCKS_PER_DPO (N_PORT_RANGES_PER_DPO/N_RANGES_PER_BLOCK)
+
+/**
+ * @brief
+ * The object that is in the data-path to perform the check.
+ *
+ * Some trade-offs here; memory vs performance.
+ *
+ * performance:
+ * the principle factor is d-cache line misses/hits.
+ * so we want the data layout to minimise the d-cache misses. This
+ * means not following dependent reads. i.e. not doing
+ *
+ * struct B {
+ * u16 n_ranges;
+ * range_t *ragnes; // vector of ranges.
+ * }
+ *
+ * so to read ranges[0] we would first d-cache miss on the address
+ * of the object of type B, for which we would need to wait before we
+ * can get the address of B->ranges.
+ * So this layout is better:
+ *
+ * struct B {
+ * u16 n_ranges;
+ * range_t ragnes[N];
+ * }
+ *
+ * memory:
+ * the latter layout above is more memory hungry. And N needs to be:
+ * 1 - sized for the maximum required
+ * 2 - fixed, so that objects of type B can be pool allocated and so
+ * 'get'-able using an index.
+ * An option over fixed might be to allocate contiguous chunk from
+ * the pool (like we used to do for multi-path adjs).
+ */
+typedef struct protocol_port_range_dpo_t_
+{
+ /**
+ * The number of blocks from the 'block' array below
+ * that have rnages configured. We keep this count so that in the data-path
+ * we can limit the loop to be only over the blocks we need
+ */
+ u16 n_used_blocks;
+
+ /**
+ * The total number of free ranges from all blocks.
+ * Used to prevent overrun of the ranges available.
+ */
+ u16 n_free_ranges;
+
+ /**
+ * the fixed size array of ranges
+ */
+ protocol_port_range_t blocks[N_BLOCKS_PER_DPO];
+} protocol_port_range_dpo_t;
+
+int ip4_source_and_port_range_check_add_del (ip4_address_t * address,
+ u32 length,
+ u32 vrf_id,
+ u16 * low_ports,
+ u16 * hi_ports, int is_add);
+
+// This will be moved to another file in another patch -- for API freeze
+int ip6_source_and_port_range_check_add_del (ip6_address_t * address,
+ u32 length,
+ u32 vrf_id,
+ u16 * low_ports,
+ u16 * hi_ports, int is_add);
+
+int set_ip_source_and_port_range_check (vlib_main_t * vm,
+ u32 * fib_index,
+ u32 sw_if_index, u32 is_add);
+
+#endif /* included ip_source_and_port_range_check_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/lookup.c b/src/vnet/ip/lookup.c
new file mode 100644
index 00000000000..734a4cd7cfb
--- /dev/null
+++ b/src/vnet/ip/lookup.c
@@ -0,0 +1,967 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip_lookup.c: ip4/6 adjacency and lookup table managment
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/adj/adj.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/mpls/mpls.h>
+#include <vnet/dpo/drop_dpo.h>
+#include <vnet/dpo/classify_dpo.h>
+#include <vnet/dpo/punt_dpo.h>
+#include <vnet/dpo/receive_dpo.h>
+#include <vnet/dpo/ip_null_dpo.h>
+
+/**
+ * @file
+ * @brief IPv4 and IPv6 adjacency and lookup table managment.
+ *
+ */
+
+clib_error_t *
+ip_interface_address_add_del (ip_lookup_main_t * lm,
+ u32 sw_if_index,
+ void *addr_fib,
+ u32 address_length,
+ u32 is_del, u32 * result_if_address_index)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip_interface_address_t *a, *prev, *next;
+ uword *p = mhash_get (&lm->address_to_if_address_index, addr_fib);
+
+ vec_validate_init_empty (lm->if_address_pool_index_by_sw_if_index,
+ sw_if_index, ~0);
+ a = p ? pool_elt_at_index (lm->if_address_pool, p[0]) : 0;
+
+ /* Verify given length. */
+ if ((a && (address_length != a->address_length)) || (address_length == 0))
+ {
+ vnm->api_errno = VNET_API_ERROR_ADDRESS_LENGTH_MISMATCH;
+ return clib_error_create
+ ("%U wrong length (expected %d) for interface %U",
+ lm->format_address_and_length, addr_fib,
+ address_length, a ? a->address_length : -1,
+ format_vnet_sw_if_index_name, vnm, sw_if_index);
+ }
+
+ if (is_del)
+ {
+ if (!a)
+ {
+ vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index);
+ vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_FOUND_FOR_INTERFACE;
+ return clib_error_create ("%U not found for interface %U",
+ lm->format_address_and_length,
+ addr_fib, address_length,
+ format_vnet_sw_interface_name, vnm, si);
+ }
+
+ if (a->prev_this_sw_interface != ~0)
+ {
+ prev =
+ pool_elt_at_index (lm->if_address_pool,
+ a->prev_this_sw_interface);
+ prev->next_this_sw_interface = a->next_this_sw_interface;
+ }
+ if (a->next_this_sw_interface != ~0)
+ {
+ next =
+ pool_elt_at_index (lm->if_address_pool,
+ a->next_this_sw_interface);
+ next->prev_this_sw_interface = a->prev_this_sw_interface;
+
+ if (a->prev_this_sw_interface == ~0)
+ lm->if_address_pool_index_by_sw_if_index[sw_if_index] =
+ a->next_this_sw_interface;
+ }
+
+ if ((a->next_this_sw_interface == ~0)
+ && (a->prev_this_sw_interface == ~0))
+ lm->if_address_pool_index_by_sw_if_index[sw_if_index] = ~0;
+
+ mhash_unset (&lm->address_to_if_address_index, addr_fib,
+ /* old_value */ 0);
+ pool_put (lm->if_address_pool, a);
+
+ if (result_if_address_index)
+ *result_if_address_index = ~0;
+ }
+
+ else if (!a)
+ {
+ u32 pi; /* previous index */
+ u32 ai;
+ u32 hi; /* head index */
+
+ pool_get (lm->if_address_pool, a);
+ memset (a, ~0, sizeof (a[0]));
+ ai = a - lm->if_address_pool;
+
+ hi = pi = lm->if_address_pool_index_by_sw_if_index[sw_if_index];
+ prev = 0;
+ while (pi != (u32) ~ 0)
+ {
+ prev = pool_elt_at_index (lm->if_address_pool, pi);
+ pi = prev->next_this_sw_interface;
+ }
+ pi = prev ? prev - lm->if_address_pool : (u32) ~ 0;
+
+ a->address_key = mhash_set (&lm->address_to_if_address_index,
+ addr_fib, ai, /* old_value */ 0);
+ a->address_length = address_length;
+ a->sw_if_index = sw_if_index;
+ a->flags = 0;
+ a->prev_this_sw_interface = pi;
+ a->next_this_sw_interface = ~0;
+ if (prev)
+ prev->next_this_sw_interface = ai;
+
+ lm->if_address_pool_index_by_sw_if_index[sw_if_index] =
+ (hi != ~0) ? hi : ai;
+ if (result_if_address_index)
+ *result_if_address_index = ai;
+ }
+ else
+ {
+ if (result_if_address_index)
+ *result_if_address_index = a - lm->if_address_pool;
+ }
+
+
+ return /* no error */ 0;
+}
+
+void
+ip_lookup_init (ip_lookup_main_t * lm, u32 is_ip6)
+{
+ /* ensure that adjacency is cacheline aligned and sized */
+ STATIC_ASSERT (STRUCT_OFFSET_OF (ip_adjacency_t, cacheline0) == 0,
+ "Cache line marker must be 1st element in struct");
+ STATIC_ASSERT (STRUCT_OFFSET_OF (ip_adjacency_t, cacheline1) ==
+ CLIB_CACHE_LINE_BYTES,
+ "Data in cache line 0 is bigger than cache line size");
+
+ /* Preallocate three "special" adjacencies */
+ lm->adjacency_heap = adj_pool;
+
+ if (!lm->fib_result_n_bytes)
+ lm->fib_result_n_bytes = sizeof (uword);
+
+ lm->is_ip6 = is_ip6;
+ if (is_ip6)
+ {
+ lm->format_address_and_length = format_ip6_address_and_length;
+ mhash_init (&lm->address_to_if_address_index, sizeof (uword),
+ sizeof (ip6_address_fib_t));
+ }
+ else
+ {
+ lm->format_address_and_length = format_ip4_address_and_length;
+ mhash_init (&lm->address_to_if_address_index, sizeof (uword),
+ sizeof (ip4_address_fib_t));
+ }
+
+ {
+ int i;
+
+ /* Setup all IP protocols to be punted and builtin-unknown. */
+ for (i = 0; i < 256; i++)
+ {
+ lm->local_next_by_ip_protocol[i] = IP_LOCAL_NEXT_PUNT;
+ lm->builtin_protocol_by_ip_protocol[i] = IP_BUILTIN_PROTOCOL_UNKNOWN;
+ }
+
+ lm->local_next_by_ip_protocol[IP_PROTOCOL_UDP] = IP_LOCAL_NEXT_UDP_LOOKUP;
+ lm->local_next_by_ip_protocol[is_ip6 ? IP_PROTOCOL_ICMP6 :
+ IP_PROTOCOL_ICMP] = IP_LOCAL_NEXT_ICMP;
+ lm->builtin_protocol_by_ip_protocol[IP_PROTOCOL_UDP] =
+ IP_BUILTIN_PROTOCOL_UDP;
+ lm->builtin_protocol_by_ip_protocol[is_ip6 ? IP_PROTOCOL_ICMP6 :
+ IP_PROTOCOL_ICMP] =
+ IP_BUILTIN_PROTOCOL_ICMP;
+ }
+}
+
+u8 *
+format_ip_flow_hash_config (u8 * s, va_list * args)
+{
+ flow_hash_config_t flow_hash_config = va_arg (*args, u32);
+
+#define _(n,v) if (flow_hash_config & v) s = format (s, "%s ", #n);
+ foreach_flow_hash_bit;
+#undef _
+
+ return s;
+}
+
+u8 *
+format_ip_lookup_next (u8 * s, va_list * args)
+{
+ ip_lookup_next_t n = va_arg (*args, ip_lookup_next_t);
+ char *t = 0;
+
+ switch (n)
+ {
+ default:
+ s = format (s, "unknown %d", n);
+ return s;
+
+ case IP_LOOKUP_NEXT_DROP:
+ t = "drop";
+ break;
+ case IP_LOOKUP_NEXT_PUNT:
+ t = "punt";
+ break;
+ case IP_LOOKUP_NEXT_ARP:
+ t = "arp";
+ break;
+ case IP_LOOKUP_NEXT_MIDCHAIN:
+ t = "midchain";
+ break;
+ case IP_LOOKUP_NEXT_GLEAN:
+ t = "glean";
+ break;
+ case IP_LOOKUP_NEXT_REWRITE:
+ break;
+ }
+
+ if (t)
+ vec_add (s, t, strlen (t));
+
+ return s;
+}
+
+u8 *
+format_ip_adjacency_packet_data (u8 * s, va_list * args)
+{
+ vnet_main_t *vnm = va_arg (*args, vnet_main_t *);
+ u32 adj_index = va_arg (*args, u32);
+ u8 *packet_data = va_arg (*args, u8 *);
+ u32 n_packet_data_bytes = va_arg (*args, u32);
+ ip_adjacency_t *adj = adj_get (adj_index);
+
+ switch (adj->lookup_next_index)
+ {
+ case IP_LOOKUP_NEXT_REWRITE:
+ s = format (s, "%U",
+ format_vnet_rewrite_header,
+ vnm->vlib_main, &adj->rewrite_header, packet_data,
+ n_packet_data_bytes);
+ break;
+
+ default:
+ break;
+ }
+
+ return s;
+}
+
+static uword
+unformat_dpo (unformat_input_t * input, va_list * args)
+{
+ dpo_id_t *dpo = va_arg (*args, dpo_id_t *);
+ fib_protocol_t fp = va_arg (*args, int);
+ dpo_proto_t proto;
+
+ proto = fib_proto_to_dpo (fp);
+
+ if (unformat (input, "drop"))
+ dpo_copy (dpo, drop_dpo_get (proto));
+ else if (unformat (input, "punt"))
+ dpo_copy (dpo, punt_dpo_get (proto));
+ else if (unformat (input, "local"))
+ receive_dpo_add_or_lock (proto, ~0, NULL, dpo);
+ else if (unformat (input, "null-send-unreach"))
+ ip_null_dpo_add_and_lock (proto, IP_NULL_ACTION_SEND_ICMP_UNREACH, dpo);
+ else if (unformat (input, "null-send-prohibit"))
+ ip_null_dpo_add_and_lock (proto, IP_NULL_ACTION_SEND_ICMP_PROHIBIT, dpo);
+ else if (unformat (input, "null"))
+ ip_null_dpo_add_and_lock (proto, IP_NULL_ACTION_NONE, dpo);
+ else if (unformat (input, "classify"))
+ {
+ u32 classify_table_index;
+
+ if (!unformat (input, "%d", &classify_table_index))
+ {
+ clib_warning ("classify adj must specify table index");
+ return 0;
+ }
+
+ dpo_set (dpo, DPO_CLASSIFY, proto,
+ classify_dpo_create (proto, classify_table_index));
+ }
+ else
+ return 0;
+
+ return 1;
+}
+
+const ip46_address_t zero_addr = {
+ .as_u64 = {
+ 0, 0},
+};
+
+u32
+fib_table_id_find_fib_index (fib_protocol_t proto, u32 table_id)
+{
+ ip4_main_t *im4 = &ip4_main;
+ ip6_main_t *im6 = &ip6_main;
+ uword *p;
+
+ switch (proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ p = hash_get (im4->fib_index_by_table_id, table_id);
+ break;
+ case FIB_PROTOCOL_IP6:
+ p = hash_get (im6->fib_index_by_table_id, table_id);
+ break;
+ default:
+ p = NULL;
+ break;
+ }
+ if (NULL != p)
+ {
+ return (p[0]);
+ }
+ return (~0);
+}
+
+clib_error_t *
+vnet_ip_route_cmd (vlib_main_t * vm,
+ unformat_input_t * main_input, vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ fib_route_path_t *rpaths = NULL, rpath;
+ dpo_id_t dpo = DPO_INVALID, *dpos = NULL;
+ fib_prefix_t *prefixs = NULL, pfx;
+ mpls_label_t out_label, via_label;
+ clib_error_t *error = NULL;
+ u32 table_id, is_del;
+ vnet_main_t *vnm;
+ u32 fib_index;
+ f64 count;
+ int i;
+
+ vnm = vnet_get_main ();
+ is_del = 0;
+ table_id = 0;
+ count = 1;
+ memset (&pfx, 0, sizeof (pfx));
+ out_label = via_label = MPLS_LABEL_INVALID;
+
+ /* Get a line of input. */
+ if (!unformat_user (main_input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ memset (&rpath, 0, sizeof (rpath));
+
+ if (unformat (line_input, "table %d", &table_id))
+ ;
+ else if (unformat (line_input, "del"))
+ is_del = 1;
+ else if (unformat (line_input, "add"))
+ is_del = 0;
+ else if (unformat (line_input, "resolve-via-host"))
+ {
+ if (vec_len (rpaths) == 0)
+ {
+ error = clib_error_return (0, "Paths then flags");
+ goto done;
+ }
+ rpaths[vec_len (rpaths) - 1].frp_flags |=
+ FIB_ROUTE_PATH_RESOLVE_VIA_HOST;
+ }
+ else if (unformat (line_input, "resolve-via-attached"))
+ {
+ if (vec_len (rpaths) == 0)
+ {
+ error = clib_error_return (0, "Paths then flags");
+ goto done;
+ }
+ rpaths[vec_len (rpaths) - 1].frp_flags |=
+ FIB_ROUTE_PATH_RESOLVE_VIA_ATTACHED;
+ }
+ else if (unformat (line_input, "out-label %U",
+ unformat_mpls_unicast_label, &out_label))
+ {
+ if (vec_len (rpaths) == 0)
+ {
+ error = clib_error_return (0, "Paths then labels");
+ goto done;
+ }
+ vec_add1 (rpaths[vec_len (rpaths) - 1].frp_label_stack, out_label);
+ }
+ else if (unformat (line_input, "via-label %U",
+ unformat_mpls_unicast_label, &rpath.frp_local_label))
+ {
+ rpath.frp_weight = 1;
+ rpath.frp_proto = FIB_PROTOCOL_MPLS;
+ rpath.frp_sw_if_index = ~0;
+ vec_add1 (rpaths, rpath);
+ }
+ else if (unformat (line_input, "count %f", &count))
+ ;
+
+ else if (unformat (line_input, "%U/%d",
+ unformat_ip4_address, &pfx.fp_addr.ip4, &pfx.fp_len))
+ {
+ pfx.fp_proto = FIB_PROTOCOL_IP4;
+ vec_add1 (prefixs, pfx);
+ }
+ else if (unformat (line_input, "%U/%d",
+ unformat_ip6_address, &pfx.fp_addr.ip6, &pfx.fp_len))
+ {
+ pfx.fp_proto = FIB_PROTOCOL_IP6;
+ vec_add1 (prefixs, pfx);
+ }
+ else if (unformat (line_input, "via %U %U weight %u",
+ unformat_ip4_address,
+ &rpath.frp_addr.ip4,
+ unformat_vnet_sw_interface, vnm,
+ &rpath.frp_sw_if_index, &rpath.frp_weight))
+ {
+ rpath.frp_proto = FIB_PROTOCOL_IP4;
+ vec_add1 (rpaths, rpath);
+ }
+
+ else if (unformat (line_input, "via %U %U weight %u",
+ unformat_ip6_address,
+ &rpath.frp_addr.ip6,
+ unformat_vnet_sw_interface, vnm,
+ &rpath.frp_sw_if_index, &rpath.frp_weight))
+ {
+ rpath.frp_proto = FIB_PROTOCOL_IP6;
+ vec_add1 (rpaths, rpath);
+ }
+
+ else if (unformat (line_input, "via %U %U",
+ unformat_ip4_address,
+ &rpath.frp_addr.ip4,
+ unformat_vnet_sw_interface, vnm,
+ &rpath.frp_sw_if_index))
+ {
+ rpath.frp_weight = 1;
+ rpath.frp_proto = FIB_PROTOCOL_IP4;
+ vec_add1 (rpaths, rpath);
+ }
+
+ else if (unformat (line_input, "via %U %U",
+ unformat_ip6_address,
+ &rpath.frp_addr.ip6,
+ unformat_vnet_sw_interface, vnm,
+ &rpath.frp_sw_if_index))
+ {
+ rpath.frp_weight = 1;
+ rpath.frp_proto = FIB_PROTOCOL_IP6;
+ vec_add1 (rpaths, rpath);
+ }
+ else if (unformat (line_input, "via %U next-hop-table %d",
+ unformat_ip4_address,
+ &rpath.frp_addr.ip4, &rpath.frp_fib_index))
+ {
+ rpath.frp_weight = 1;
+ rpath.frp_sw_if_index = ~0;
+ rpath.frp_proto = FIB_PROTOCOL_IP4;
+ vec_add1 (rpaths, rpath);
+ }
+ else if (unformat (line_input, "via %U next-hop-table %d",
+ unformat_ip6_address,
+ &rpath.frp_addr.ip6, &rpath.frp_fib_index))
+ {
+ rpath.frp_weight = 1;
+ rpath.frp_sw_if_index = ~0;
+ rpath.frp_proto = FIB_PROTOCOL_IP6;
+ vec_add1 (rpaths, rpath);
+ }
+ else if (unformat (line_input, "via %U",
+ unformat_ip4_address, &rpath.frp_addr.ip4))
+ {
+ /*
+ * the recursive next-hops are by default in the same table
+ * as the prefix
+ */
+ rpath.frp_fib_index = table_id;
+ rpath.frp_weight = 1;
+ rpath.frp_sw_if_index = ~0;
+ rpath.frp_proto = FIB_PROTOCOL_IP4;
+ vec_add1 (rpaths, rpath);
+ }
+ else if (unformat (line_input, "via %U",
+ unformat_ip6_address, &rpath.frp_addr.ip6))
+ {
+ rpath.frp_fib_index = table_id;
+ rpath.frp_weight = 1;
+ rpath.frp_sw_if_index = ~0;
+ rpath.frp_proto = FIB_PROTOCOL_IP6;
+ vec_add1 (rpaths, rpath);
+ }
+ else if (unformat (line_input,
+ "lookup in table %d", &rpath.frp_fib_index))
+ {
+ rpath.frp_proto = pfx.fp_proto;
+ rpath.frp_sw_if_index = ~0;
+ vec_add1 (rpaths, rpath);
+ }
+ else if (vec_len (prefixs) > 0 &&
+ unformat (line_input, "via %U",
+ unformat_vnet_sw_interface, vnm,
+ &rpath.frp_sw_if_index))
+ {
+ rpath.frp_weight = 1;
+ rpath.frp_proto = prefixs[0].fp_proto;
+ vec_add1 (rpaths, rpath);
+ }
+ else if (vec_len (prefixs) > 0 &&
+ unformat (line_input, "via %U",
+ unformat_dpo, &dpo, prefixs[0].fp_proto))
+ {
+ vec_add1 (dpos, dpo);
+ }
+ else
+ {
+ error = unformat_parse_error (line_input);
+ goto done;
+ }
+ }
+
+ unformat_free (line_input);
+
+ if (vec_len (prefixs) == 0)
+ {
+ error =
+ clib_error_return (0, "expected ip4/ip6 destination address/length.");
+ goto done;
+ }
+
+ if (!is_del && vec_len (rpaths) + vec_len (dpos) == 0)
+ {
+ error = clib_error_return (0, "expected paths.");
+ goto done;
+ }
+
+ if (~0 == table_id)
+ {
+ /*
+ * if no table_id is passed we will manipulate the default
+ */
+ fib_index = 0;
+ }
+ else
+ {
+ fib_index = fib_table_id_find_fib_index (prefixs[0].fp_proto, table_id);
+
+ if (~0 == fib_index)
+ {
+ error = clib_error_return (0, "Nonexistent table id %d", table_id);
+ goto done;
+ }
+ }
+
+ for (i = 0; i < vec_len (prefixs); i++)
+ {
+ if (is_del && 0 == vec_len (rpaths))
+ {
+ fib_table_entry_delete (fib_index, &prefixs[i], FIB_SOURCE_CLI);
+ }
+ else if (!is_del && 1 == vec_len (dpos))
+ {
+ fib_table_entry_special_dpo_add (fib_index,
+ &prefixs[i],
+ FIB_SOURCE_CLI,
+ FIB_ENTRY_FLAG_EXCLUSIVE,
+ &dpos[0]);
+ dpo_reset (&dpos[0]);
+ }
+ else if (vec_len (dpos) > 0)
+ {
+ error =
+ clib_error_return (0,
+ "Load-balancing over multiple special adjacencies is unsupported");
+ goto done;
+ }
+ else if (0 < vec_len (rpaths))
+ {
+ u32 k, j, n, incr;
+ ip46_address_t dst = prefixs[i].fp_addr;
+ f64 t[2];
+ n = count;
+ t[0] = vlib_time_now (vm);
+ incr = 1 << ((FIB_PROTOCOL_IP4 == prefixs[0].fp_proto ? 32 : 128) -
+ prefixs[i].fp_len);
+
+ for (k = 0; k < n; k++)
+ {
+ for (j = 0; j < vec_len (rpaths); j++)
+ {
+ u32 fi;
+ /*
+ * the CLI parsing stored table Ids, swap to FIB indicies
+ */
+ fi = fib_table_id_find_fib_index (prefixs[i].fp_proto,
+ rpaths[i].frp_fib_index);
+
+ if (~0 == fi)
+ {
+ error =
+ clib_error_return (0, "Via table %d does not exist",
+ rpaths[i].frp_fib_index);
+ goto done;
+ }
+ rpaths[i].frp_fib_index = fi;
+
+ fib_prefix_t rpfx = {
+ .fp_len = prefixs[i].fp_len,
+ .fp_proto = prefixs[i].fp_proto,
+ .fp_addr = dst,
+ };
+
+ if (is_del)
+ fib_table_entry_path_remove2 (fib_index,
+ &rpfx,
+ FIB_SOURCE_CLI, &rpaths[j]);
+ else
+ fib_table_entry_path_add2 (fib_index,
+ &rpfx,
+ FIB_SOURCE_CLI,
+ FIB_ENTRY_FLAG_NONE,
+ &rpaths[j]);
+ }
+
+ if (FIB_PROTOCOL_IP4 == prefixs[0].fp_proto)
+ {
+ dst.ip4.as_u32 =
+ clib_host_to_net_u32 (incr +
+ clib_net_to_host_u32 (dst.
+ ip4.as_u32));
+ }
+ else
+ {
+ int bucket = (incr < 64 ? 0 : 1);
+ dst.ip6.as_u64[bucket] =
+ clib_host_to_net_u64 (incr +
+ clib_net_to_host_u64 (dst.ip6.as_u64
+ [bucket]));
+
+ }
+ }
+ t[1] = vlib_time_now (vm);
+ if (count > 1)
+ vlib_cli_output (vm, "%.6e routes/sec", count / (t[1] - t[0]));
+ }
+ else
+ {
+ error = clib_error_return (0, "Don't understand what you want...");
+ goto done;
+ }
+ }
+
+
+done:
+ vec_free (dpos);
+ vec_free (prefixs);
+ vec_free (rpaths);
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (vlib_cli_ip_command, static) = {
+ .path = "ip",
+ .short_help = "Internet protocol (IP) commands",
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (vlib_cli_ip6_command, static) = {
+ .path = "ip6",
+ .short_help = "Internet protocol version 6 (IPv6) commands",
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (vlib_cli_show_ip_command, static) = {
+ .path = "show ip",
+ .short_help = "Internet protocol (IP) show commands",
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (vlib_cli_show_ip6_command, static) = {
+ .path = "show ip6",
+ .short_help = "Internet protocol version 6 (IPv6) show commands",
+};
+/* *INDENT-ON* */
+
+/*?
+ * This command is used to add or delete IPv4 or IPv6 routes. All
+ * IP Addresses ('<em><dst-ip-addr>/<width></em>',
+ * '<em><next-hop-ip-addr></em>' and '<em><adj-hop-ip-addr></em>')
+ * can be IPv4 or IPv6, but all must be of the same form in a single
+ * command. To display the current set of routes, use the commands
+ * '<em>show ip fib</em>' and '<em>show ip6 fib</em>'.
+ *
+ * @cliexpar
+ * Example of how to add a straight forward static route:
+ * @cliexcmd{ip route add 6.0.1.2/32 via 6.0.0.1 GigabitEthernet2/0/0}
+ * Example of how to delete a straight forward static route:
+ * @cliexcmd{ip route del 6.0.1.2/32 via 6.0.0.1 GigabitEthernet2/0/0}
+ * Mainly for route add/del performance testing, one can add or delete
+ * multiple routes by adding 'count N' to the previous item:
+ * @cliexcmd{ip route add count 10 7.0.0.0/24 via 6.0.0.1 GigabitEthernet2/0/0}
+ * Add multiple routes for the same destination to create equal-cost multipath:
+ * @cliexcmd{ip route add 7.0.0.1/32 via 6.0.0.1 GigabitEthernet2/0/0}
+ * @cliexcmd{ip route add 7.0.0.1/32 via 6.0.0.2 GigabitEthernet2/0/0}
+ * For unequal-cost multipath, specify the desired weights. This
+ * combination of weights results in 3/4 of the traffic following the
+ * second path, 1/4 following the first path:
+ * @cliexcmd{ip route add 7.0.0.1/32 via 6.0.0.1 GigabitEthernet2/0/0 weight 1}
+ * @cliexcmd{ip route add 7.0.0.1/32 via 6.0.0.2 GigabitEthernet2/0/0 weight 3}
+ * To add a route to a particular FIB table (VRF), use:
+ * @cliexcmd{ip route add 172.16.24.0/24 table 7 via GigabitEthernet2/0/0}
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ip_route_command, static) = {
+ .path = "ip route",
+ .short_help = "ip route [add|del] [count <n>] <dst-ip-addr>/<width> [table <table-id>] [via <next-hop-ip-addr> [<interface>] [weight <weight>]] | [via arp <interface> <adj-hop-ip-addr>] | [via drop|punt|local<id>|arp|classify <classify-idx>] [lookup in table <out-table-id>]",
+ .function = vnet_ip_route_cmd,
+ .is_mp_safe = 1,
+};
+/* *INDENT-ON* */
+
+/*
+ * The next two routines address a longstanding script hemorrhoid.
+ * Probing a v4 or v6 neighbor needs to appear to be synchronous,
+ * or dependent route-adds will simply fail.
+ */
+static clib_error_t *
+ip6_probe_neighbor_wait (vlib_main_t * vm, ip6_address_t * a, u32 sw_if_index,
+ int retry_count)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *e;
+ int i;
+ int resolved = 0;
+ uword event_type;
+ uword *event_data = 0;
+
+ ASSERT (vlib_in_process_context (vm));
+
+ if (retry_count > 0)
+ vnet_register_ip6_neighbor_resolution_event
+ (vnm, a, vlib_get_current_process (vm)->node_runtime.node_index,
+ 1 /* event */ , 0 /* data */ );
+
+ for (i = 0; i < retry_count; i++)
+ {
+ /* The interface may be down, etc. */
+ e = ip6_probe_neighbor (vm, a, sw_if_index);
+
+ if (e)
+ return e;
+
+ vlib_process_wait_for_event_or_clock (vm, 1.0);
+ event_type = vlib_process_get_events (vm, &event_data);
+ switch (event_type)
+ {
+ case 1: /* resolved... */
+ vlib_cli_output (vm, "Resolved %U", format_ip6_address, a);
+ resolved = 1;
+ goto done;
+
+ case ~0: /* timeout */
+ break;
+
+ default:
+ clib_warning ("unknown event_type %d", event_type);
+ }
+ vec_reset_length (event_data);
+ }
+
+done:
+
+ if (!resolved)
+ return clib_error_return (0, "Resolution failed for %U",
+ format_ip6_address, a);
+ return 0;
+}
+
+static clib_error_t *
+ip4_probe_neighbor_wait (vlib_main_t * vm, ip4_address_t * a, u32 sw_if_index,
+ int retry_count)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *e;
+ int i;
+ int resolved = 0;
+ uword event_type;
+ uword *event_data = 0;
+
+ ASSERT (vlib_in_process_context (vm));
+
+ if (retry_count > 0)
+ vnet_register_ip4_arp_resolution_event
+ (vnm, a, vlib_get_current_process (vm)->node_runtime.node_index,
+ 1 /* event */ , 0 /* data */ );
+
+ for (i = 0; i < retry_count; i++)
+ {
+ /* The interface may be down, etc. */
+ e = ip4_probe_neighbor (vm, a, sw_if_index);
+
+ if (e)
+ return e;
+
+ vlib_process_wait_for_event_or_clock (vm, 1.0);
+ event_type = vlib_process_get_events (vm, &event_data);
+ switch (event_type)
+ {
+ case 1: /* resolved... */
+ vlib_cli_output (vm, "Resolved %U", format_ip4_address, a);
+ resolved = 1;
+ goto done;
+
+ case ~0: /* timeout */
+ break;
+
+ default:
+ clib_warning ("unknown event_type %d", event_type);
+ }
+ vec_reset_length (event_data);
+ }
+
+done:
+
+ vec_reset_length (event_data);
+
+ if (!resolved)
+ return clib_error_return (0, "Resolution failed for %U",
+ format_ip4_address, a);
+ return 0;
+}
+
+static clib_error_t *
+probe_neighbor_address (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ unformat_input_t _line_input, *line_input = &_line_input;
+ ip4_address_t a4;
+ ip6_address_t a6;
+ clib_error_t *error = 0;
+ u32 sw_if_index = ~0;
+ int retry_count = 3;
+ int is_ip4 = 1;
+ int address_set = 0;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat_user (line_input, unformat_vnet_sw_interface, vnm,
+ &sw_if_index))
+ ;
+ else if (unformat (line_input, "retry %d", &retry_count))
+ ;
+
+ else if (unformat (line_input, "%U", unformat_ip4_address, &a4))
+ address_set++;
+ else if (unformat (line_input, "%U", unformat_ip6_address, &a6))
+ {
+ address_set++;
+ is_ip4 = 0;
+ }
+ else
+ return clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, line_input);
+ }
+
+ unformat_free (line_input);
+
+ if (sw_if_index == ~0)
+ return clib_error_return (0, "Interface required, not set.");
+ if (address_set == 0)
+ return clib_error_return (0, "ip address required, not set.");
+ if (address_set > 1)
+ return clib_error_return (0, "Multiple ip addresses not supported.");
+
+ if (is_ip4)
+ error = ip4_probe_neighbor_wait (vm, &a4, sw_if_index, retry_count);
+ else
+ error = ip6_probe_neighbor_wait (vm, &a6, sw_if_index, retry_count);
+
+ return error;
+}
+
+/*?
+ * The '<em>ip probe-neighbor</em>' command ARPs for IPv4 addresses or
+ * attempts IPv6 neighbor discovery depending on the supplied IP address
+ * format.
+ *
+ * @note This command will not immediately affect the indicated FIB; it
+ * is not suitable for use in establishing a FIB entry prior to adding
+ * recursive FIB entries. As in: don't use it in a script to probe a
+ * gateway prior to adding a default route. It won't work. Instead,
+ * configure a static ARP cache entry [see '<em>set ip arp</em>'], or
+ * a static IPv6 neighbor [see '<em>set ip6 neighbor</em>'].
+ *
+ * @cliexpar
+ * Example of probe for an IPv4 address:
+ * @cliexcmd{ip probe-neighbor GigabitEthernet2/0/0 172.16.1.2}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ip_probe_neighbor_command, static) = {
+ .path = "ip probe-neighbor",
+ .function = probe_neighbor_address,
+ .short_help = "ip probe-neighbor <interface> <ip4-addr> | <ip6-addr> [retry nn]",
+ .is_mp_safe = 1,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/lookup.h b/src/vnet/ip/lookup.h
new file mode 100644
index 00000000000..3dbd7b3b8e8
--- /dev/null
+++ b/src/vnet/ip/lookup.h
@@ -0,0 +1,498 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip_lookup.h: ip (4 or 6) lookup structures, adjacencies, ...
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * @file
+ * Definitions for all things IP (v4|v6) unicast and multicast lookup related.
+ *
+ * - Adjacency definitions and registration.
+ * - Callbacks on route add.
+ * - Callbacks on interface address change.
+ */
+#ifndef included_ip_lookup_h
+#define included_ip_lookup_h
+
+#include <vnet/vnet.h>
+#include <vlib/buffer.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/fib/fib_node.h>
+#include <vnet/dpo/dpo.h>
+#include <vnet/feature/feature.h>
+
+/** @brief Common (IP4/IP6) next index stored in adjacency. */
+typedef enum
+{
+ /** Adjacency to drop this packet. */
+ IP_LOOKUP_NEXT_DROP,
+ /** Adjacency to punt this packet. */
+ IP_LOOKUP_NEXT_PUNT,
+
+ /** This packet is for one of our own IP addresses. */
+ IP_LOOKUP_NEXT_LOCAL,
+
+ /** This packet matches an "incomplete adjacency" and packets
+ need to be passed to ARP to find rewrite string for
+ this destination. */
+ IP_LOOKUP_NEXT_ARP,
+
+ /** This packet matches an "interface route" and packets
+ need to be passed to ARP to find rewrite string for
+ this destination. */
+ IP_LOOKUP_NEXT_GLEAN,
+
+ /** This packet is to be rewritten and forwarded to the next
+ processing node. This is typically the output interface but
+ might be another node for further output processing. */
+ IP_LOOKUP_NEXT_REWRITE,
+
+ /** This packets follow a load-balance */
+ IP_LOOKUP_NEXT_LOAD_BALANCE,
+
+ /** This packets follow a mid-chain adjacency */
+ IP_LOOKUP_NEXT_MIDCHAIN,
+
+ /** This packets needs to go to ICMP error */
+ IP_LOOKUP_NEXT_ICMP_ERROR,
+
+ IP_LOOKUP_N_NEXT,
+} ip_lookup_next_t;
+
+typedef enum
+{
+ IP4_LOOKUP_N_NEXT = IP_LOOKUP_N_NEXT,
+} ip4_lookup_next_t;
+
+typedef enum
+{
+ /* Hop-by-hop header handling */
+ IP6_LOOKUP_NEXT_HOP_BY_HOP = IP_LOOKUP_N_NEXT,
+ IP6_LOOKUP_NEXT_ADD_HOP_BY_HOP,
+ IP6_LOOKUP_NEXT_POP_HOP_BY_HOP,
+ IP6_LOOKUP_N_NEXT,
+} ip6_lookup_next_t;
+
+#define IP4_LOOKUP_NEXT_NODES { \
+ [IP_LOOKUP_NEXT_DROP] = "ip4-drop", \
+ [IP_LOOKUP_NEXT_PUNT] = "ip4-punt", \
+ [IP_LOOKUP_NEXT_LOCAL] = "ip4-local", \
+ [IP_LOOKUP_NEXT_ARP] = "ip4-arp", \
+ [IP_LOOKUP_NEXT_GLEAN] = "ip4-glean", \
+ [IP_LOOKUP_NEXT_REWRITE] = "ip4-rewrite", \
+ [IP_LOOKUP_NEXT_MIDCHAIN] = "ip4-midchain", \
+ [IP_LOOKUP_NEXT_LOAD_BALANCE] = "ip4-load-balance", \
+ [IP_LOOKUP_NEXT_ICMP_ERROR] = "ip4-icmp-error", \
+}
+
+#define IP6_LOOKUP_NEXT_NODES { \
+ [IP_LOOKUP_NEXT_DROP] = "ip6-drop", \
+ [IP_LOOKUP_NEXT_PUNT] = "ip6-punt", \
+ [IP_LOOKUP_NEXT_LOCAL] = "ip6-local", \
+ [IP_LOOKUP_NEXT_ARP] = "ip6-discover-neighbor", \
+ [IP_LOOKUP_NEXT_GLEAN] = "ip6-glean", \
+ [IP_LOOKUP_NEXT_REWRITE] = "ip6-rewrite", \
+ [IP_LOOKUP_NEXT_MIDCHAIN] = "ip6-midchain", \
+ [IP_LOOKUP_NEXT_LOAD_BALANCE] = "ip6-load-balance", \
+ [IP_LOOKUP_NEXT_ICMP_ERROR] = "ip6-icmp-error", \
+ [IP6_LOOKUP_NEXT_HOP_BY_HOP] = "ip6-hop-by-hop", \
+ [IP6_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip6-add-hop-by-hop", \
+ [IP6_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip6-pop-hop-by-hop", \
+}
+
+/** Flow hash configuration */
+#define IP_FLOW_HASH_SRC_ADDR (1<<0)
+#define IP_FLOW_HASH_DST_ADDR (1<<1)
+#define IP_FLOW_HASH_PROTO (1<<2)
+#define IP_FLOW_HASH_SRC_PORT (1<<3)
+#define IP_FLOW_HASH_DST_PORT (1<<4)
+#define IP_FLOW_HASH_REVERSE_SRC_DST (1<<5)
+
+/** Default: 5-tuple without the "reverse" bit */
+#define IP_FLOW_HASH_DEFAULT (0x1F)
+
+#define foreach_flow_hash_bit \
+_(src, IP_FLOW_HASH_SRC_ADDR) \
+_(dst, IP_FLOW_HASH_DST_ADDR) \
+_(sport, IP_FLOW_HASH_SRC_PORT) \
+_(dport, IP_FLOW_HASH_DST_PORT) \
+_(proto, IP_FLOW_HASH_PROTO) \
+_(reverse, IP_FLOW_HASH_REVERSE_SRC_DST)
+
+/**
+ * A flow hash configuration is a mask of the flow hash options
+ */
+typedef u32 flow_hash_config_t;
+
+/**
+ * Forward delcartion
+ */
+struct ip_adjacency_t_;
+
+/**
+ * @brief A function type for post-rewrite fixups on midchain adjacency
+ */
+typedef void (*adj_midchain_fixup_t) (vlib_main_t * vm,
+ struct ip_adjacency_t_ * adj,
+ vlib_buffer_t * b0);
+
+/**
+ * @brief Flags on an IP adjacency
+ */
+typedef enum ip_adjacency_flags_t_
+{
+ /**
+ * Currently a sync walk is active. Used to prevent re-entrant walking
+ */
+ IP_ADJ_SYNC_WALK_ACTIVE = (1 << 0),
+} ip_adjacency_flags_t;
+
+/** @brief IP unicast adjacency.
+ @note cache aligned.
+*/
+typedef struct ip_adjacency_t_
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+
+ /** Number of adjecencies in block. Greater than 1 means multipath;
+ otherwise equal to 1. */
+ u16 n_adj;
+
+ /** Next hop after ip4-lookup. */
+ union
+ {
+ ip_lookup_next_t lookup_next_index:16;
+ u16 lookup_next_index_as_int;
+ };
+
+ /** Interface address index for this local/arp adjacency. */
+ u32 if_address_index;
+
+ /** Force re-lookup in a different FIB. ~0 => normal behavior */
+ u16 mcast_group_index;
+
+ /** Highest possible perf subgraph arc interposition, e.g. for ip6 ioam */
+ u16 saved_lookup_next_index;
+
+ /*
+ * link/ether-type
+ */
+ vnet_link_t ia_link;
+ u8 ia_nh_proto;
+
+ union
+ {
+ /**
+ * IP_LOOKUP_NEXT_ARP/IP_LOOKUP_NEXT_REWRITE
+ *
+ * neighbour adjacency sub-type;
+ */
+ struct
+ {
+ ip46_address_t next_hop;
+ } nbr;
+ /**
+ * IP_LOOKUP_NEXT_MIDCHAIN
+ *
+ * A nbr adj that is also recursive. Think tunnels.
+ * A nbr adj can transition to be of type MDICHAIN
+ * so be sure to leave the two structs with the next_hop
+ * fields aligned.
+ */
+ struct
+ {
+ /**
+ * The recursive next-hop
+ */
+ ip46_address_t next_hop;
+ /**
+ * The node index of the tunnel's post rewrite/TX function.
+ */
+ u32 tx_function_node;
+ /**
+ * The next DPO to use
+ */
+ dpo_id_t next_dpo;
+ /**
+ * A function to perform the post-rewrite fixup
+ */
+ adj_midchain_fixup_t fixup_func;
+ } midchain;
+ /**
+ * IP_LOOKUP_NEXT_GLEAN
+ *
+ * Glean the address to ARP for from the packet's destination
+ */
+ struct
+ {
+ ip46_address_t receive_addr;
+ } glean;
+ } sub_type;
+
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
+
+ /* Rewrite in second/third cache lines */
+ vnet_declare_rewrite (VLIB_BUFFER_PRE_DATA_SIZE);
+
+ /*
+ * member not accessed in the data plane are relgated to the
+ * remaining cachelines
+ */
+ fib_node_t ia_node;
+
+ /**
+ * Flags on the adjacency
+ */
+ ip_adjacency_flags_t ia_flags;
+
+} ip_adjacency_t;
+
+STATIC_ASSERT ((STRUCT_OFFSET_OF (ip_adjacency_t, cacheline0) == 0),
+ "IP adjacency cachline 0 is not offset");
+STATIC_ASSERT ((STRUCT_OFFSET_OF (ip_adjacency_t, cacheline1) ==
+ CLIB_CACHE_LINE_BYTES),
+ "IP adjacency cachline 1 is more than one cachline size offset");
+
+/* An all zeros address */
+extern const ip46_address_t zero_addr;
+
+/* IP multicast adjacency. */
+typedef struct
+{
+ /* Handle for this adjacency in adjacency heap. */
+ u32 heap_handle;
+
+ /* Number of adjecencies in block. */
+ u32 n_adj;
+
+ /* Rewrite string. */
+ vnet_declare_rewrite (64 - 2 * sizeof (u32));
+}
+ip_multicast_rewrite_t;
+
+typedef struct
+{
+ /* ip4-multicast-rewrite next index. */
+ u32 next_index;
+
+ u8 n_rewrite_bytes;
+
+ u8 rewrite_string[64 - 1 * sizeof (u32) - 1 * sizeof (u8)];
+}
+ip_multicast_rewrite_string_t;
+
+typedef struct
+{
+ ip_multicast_rewrite_t *rewrite_heap;
+
+ ip_multicast_rewrite_string_t *rewrite_strings;
+
+ /* Negative rewrite string index; >= 0 sw_if_index.
+ Sorted. Used to hash. */
+ i32 **adjacency_id_vector;
+
+ uword *adjacency_by_id_vector;
+} ip_multicast_lookup_main_t;
+
+typedef struct
+{
+ /* Key for mhash; in fact, just a byte offset into mhash key vector. */
+ u32 address_key;
+
+ /* Interface which has this address. */
+ u32 sw_if_index;
+
+ /* Adjacency for neighbor probe (ARP) for this interface address. */
+ u32 neighbor_probe_adj_index;
+
+ /* Address (prefix) length for this interface. */
+ u16 address_length;
+
+ /* Will be used for something eventually. Primary vs. secondary? */
+ u16 flags;
+
+ /* Next and previous pointers for doubly linked list of
+ addresses per software interface. */
+ u32 next_this_sw_interface;
+ u32 prev_this_sw_interface;
+} ip_interface_address_t;
+
+typedef enum
+{
+ IP_LOCAL_NEXT_DROP,
+ IP_LOCAL_NEXT_PUNT,
+ IP_LOCAL_NEXT_UDP_LOOKUP,
+ IP_LOCAL_NEXT_ICMP,
+ IP_LOCAL_N_NEXT,
+} ip_local_next_t;
+
+struct ip_lookup_main_t;
+
+typedef struct ip_lookup_main_t
+{
+ /* Adjacency heap. */
+ ip_adjacency_t *adjacency_heap;
+
+ /** load-balance packet/byte counters indexed by LB index. */
+ vlib_combined_counter_main_t load_balance_counters;
+
+ /** Pool of addresses that are assigned to interfaces. */
+ ip_interface_address_t *if_address_pool;
+
+ /** Hash table mapping address to index in interface address pool. */
+ mhash_t address_to_if_address_index;
+
+ /** Head of doubly linked list of interface addresses for each software interface.
+ ~0 means this interface has no address. */
+ u32 *if_address_pool_index_by_sw_if_index;
+
+ /** First table index to use for this interface, ~0 => none */
+ u32 *classify_table_index_by_sw_if_index;
+
+ /** Feature arc indices */
+ u8 mcast_feature_arc_index;
+ u8 ucast_feature_arc_index;
+ u8 output_feature_arc_index;
+
+ /** Number of bytes in a fib result. Must be at least
+ sizeof (uword). First word is always adjacency index. */
+ u32 fib_result_n_bytes, fib_result_n_words;
+
+ format_function_t *format_fib_result;
+
+ /** 1 for ip6; 0 for ip4. */
+ u32 is_ip6;
+
+ /** Either format_ip4_address_and_length or format_ip6_address_and_length. */
+ format_function_t *format_address_and_length;
+
+ /** Special adjacency format functions */
+ format_function_t **special_adjacency_format_functions;
+
+ /** Table mapping ip protocol to ip[46]-local node next index. */
+ u8 local_next_by_ip_protocol[256];
+
+ /** IP_BUILTIN_PROTOCOL_{TCP,UDP,ICMP,OTHER} by protocol in IP header. */
+ u8 builtin_protocol_by_ip_protocol[256];
+} ip_lookup_main_t;
+
+always_inline ip_adjacency_t *
+ip_get_adjacency (ip_lookup_main_t * lm, u32 adj_index)
+{
+ ip_adjacency_t *adj;
+
+ adj = vec_elt_at_index (lm->adjacency_heap, adj_index);
+
+ return adj;
+}
+
+#define ip_prefetch_adjacency(lm,adj_index,type) \
+do { \
+ ip_adjacency_t * _adj = (lm)->adjacency_heap + (adj_index); \
+ CLIB_PREFETCH (_adj, sizeof (_adj[0]), type); \
+} while (0)
+
+/* Create new block of given number of contiguous adjacencies. */
+ip_adjacency_t *ip_add_adjacency (ip_lookup_main_t * lm,
+ ip_adjacency_t * adj,
+ u32 n_adj, u32 * adj_index_result);
+
+clib_error_t *ip_interface_address_add_del (ip_lookup_main_t * lm,
+ u32 sw_if_index,
+ void *address,
+ u32 address_length,
+ u32 is_del, u32 * result_index);
+
+u8 *format_ip_flow_hash_config (u8 * s, va_list * args);
+
+always_inline ip_interface_address_t *
+ip_get_interface_address (ip_lookup_main_t * lm, void *addr_fib)
+{
+ uword *p = mhash_get (&lm->address_to_if_address_index, addr_fib);
+ return p ? pool_elt_at_index (lm->if_address_pool, p[0]) : 0;
+}
+
+u32 fib_table_id_find_fib_index (fib_protocol_t proto, u32 table_id);
+
+always_inline void *
+ip_interface_address_get_address (ip_lookup_main_t * lm,
+ ip_interface_address_t * a)
+{
+ return mhash_key_to_mem (&lm->address_to_if_address_index, a->address_key);
+}
+
+/* *INDENT-OFF* */
+#define foreach_ip_interface_address(lm,a,sw_if_index,loop,body) \
+do { \
+ vnet_main_t *_vnm = vnet_get_main(); \
+ u32 _sw_if_index = sw_if_index; \
+ vnet_sw_interface_t *_swif; \
+ _swif = vnet_get_sw_interface (_vnm, _sw_if_index); \
+ \
+ /* \
+ * Loop => honor unnumbered interface addressing. \
+ */ \
+ if (loop && _swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED) \
+ _sw_if_index = _swif->unnumbered_sw_if_index; \
+ u32 _ia = \
+ (vec_len((lm)->if_address_pool_index_by_sw_if_index) \
+ > (_sw_if_index)) \
+ ? vec_elt ((lm)->if_address_pool_index_by_sw_if_index, \
+ (_sw_if_index)) : (u32)~0; \
+ ip_interface_address_t * _a; \
+ while (_ia != ~0) \
+ { \
+ _a = pool_elt_at_index ((lm)->if_address_pool, _ia); \
+ _ia = _a->next_this_sw_interface; \
+ (a) = _a; \
+ body; \
+ } \
+} while (0)
+/* *INDENT-ON* */
+
+void ip_lookup_init (ip_lookup_main_t * lm, u32 ip_lookup_node_index);
+
+#endif /* included_ip_lookup_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ping.c b/src/vnet/ip/ping.c
new file mode 100644
index 00000000000..68dbe759ebc
--- /dev/null
+++ b/src/vnet/ip/ping.c
@@ -0,0 +1,888 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/ip/ping.h>
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/fib/fib_entry.h>
+
+/**
+ * @file
+ * @brief IPv4 and IPv6 ICMP Ping.
+ *
+ * This file contains code to suppport IPv4 or IPv6 ICMP ECHO_REQUEST to
+ * network hosts.
+ *
+ */
+
+
+u8 *
+format_icmp_echo_trace (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ icmp_echo_trace_t *t = va_arg (*va, icmp_echo_trace_t *);
+
+ s = format (s, "ICMP echo id %d seq %d%s",
+ clib_net_to_host_u16 (t->id),
+ clib_net_to_host_u16 (t->seq), t->bound ? "" : " (unknown)");
+
+ return s;
+}
+
+/*
+ * If we can find the ping run by an ICMP ID, then we send the signal
+ * to the CLI process referenced by that ping run, alongside with
+ * a freshly made copy of the packet.
+ * I opted for a packet copy to keep the main packet processing path
+ * the same as for all the other nodes.
+ *
+ */
+
+static int
+signal_ip46_icmp_reply_event (vlib_main_t * vm,
+ u8 event_type, vlib_buffer_t * b0)
+{
+ ping_main_t *pm = &ping_main;
+ u16 net_icmp_id = 0;
+ u32 bi0_copy = 0;
+
+ switch (event_type)
+ {
+ case PING_RESPONSE_IP4:
+ {
+ icmp4_echo_request_header_t *h0 = vlib_buffer_get_current (b0);
+ net_icmp_id = h0->icmp_echo.id;
+ }
+ break;
+ case PING_RESPONSE_IP6:
+ {
+ icmp6_echo_request_header_t *h0 = vlib_buffer_get_current (b0);
+ net_icmp_id = h0->icmp_echo.id;
+ }
+ break;
+ default:
+ return 0;
+ }
+
+ uword *p = hash_get (pm->ping_run_by_icmp_id,
+ clib_net_to_host_u16 (net_icmp_id));
+ if (!p)
+ return 0;
+
+ ping_run_t *pr = vec_elt_at_index (pm->ping_runs, p[0]);
+ if (vlib_buffer_alloc (vm, &bi0_copy, 1) == 1)
+ {
+ void *dst = vlib_buffer_get_current (vlib_get_buffer (vm, bi0_copy));
+ clib_memcpy (dst, vlib_buffer_get_current (b0), b0->current_length);
+ }
+ /* If buffer_alloc failed, bi0_copy == 0 - just signaling an event. */
+
+ vlib_process_signal_event (vm, pr->cli_process_id, event_type, bi0_copy);
+ return 1;
+}
+
+/*
+ * Process ICMPv6 echo replies
+ */
+static uword
+ip6_icmp_echo_reply_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from;
+
+ from = vlib_frame_vector_args (frame); /* array of buffer indices */
+ n_left_from = frame->n_vectors; /* number of buffer indices */
+
+ while (n_left_from > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+
+ bi0 = from[0];
+ b0 = vlib_get_buffer (vm, bi0);
+
+ next0 = signal_ip46_icmp_reply_event (vm, PING_RESPONSE_IP6, b0) ?
+ ICMP6_ECHO_REPLY_NEXT_DROP : ICMP6_ECHO_REPLY_NEXT_PUNT;
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ icmp6_echo_request_header_t *h0 = vlib_buffer_get_current (b0);
+ icmp_echo_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr));
+ tr->id = h0->icmp_echo.id;
+ tr->seq = h0->icmp_echo.seq;
+ tr->bound = (next0 == ICMP6_ECHO_REPLY_NEXT_DROP);
+ }
+
+ /* push this pkt to the next graph node */
+ vlib_set_next_frame_buffer (vm, node, next0, bi0);
+
+ from += 1;
+ n_left_from -= 1;
+ }
+
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_icmp_echo_reply_node, static) =
+{
+ .function = ip6_icmp_echo_reply_node_fn,
+ .name = "ip6-icmp-echo-reply",
+ .vector_size = sizeof (u32),
+ .format_trace = format_icmp_echo_trace,
+ .n_next_nodes = ICMP6_ECHO_REPLY_N_NEXT,
+ .next_nodes = {
+ [ICMP6_ECHO_REPLY_NEXT_DROP] = "error-drop",
+ [ICMP6_ECHO_REPLY_NEXT_PUNT] = "error-punt",
+ },
+};
+/* *INDENT-ON* */
+
+/*
+ * Process ICMPv4 echo replies
+ */
+static uword
+ip4_icmp_echo_reply_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from;
+
+ from = vlib_frame_vector_args (frame); /* array of buffer indices */
+ n_left_from = frame->n_vectors; /* number of buffer indices */
+
+ while (n_left_from > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+
+ bi0 = from[0];
+ b0 = vlib_get_buffer (vm, bi0);
+
+ next0 = signal_ip46_icmp_reply_event (vm, PING_RESPONSE_IP4, b0) ?
+ ICMP4_ECHO_REPLY_NEXT_DROP : ICMP4_ECHO_REPLY_NEXT_PUNT;
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ icmp4_echo_request_header_t *h0 = vlib_buffer_get_current (b0);
+ icmp_echo_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr));
+ tr->id = h0->icmp_echo.id;
+ tr->seq = h0->icmp_echo.seq;
+ tr->bound = (next0 == ICMP4_ECHO_REPLY_NEXT_DROP);
+ }
+
+ /* push this pkt to the next graph node */
+ vlib_set_next_frame_buffer (vm, node, next0, bi0);
+
+ from += 1;
+ n_left_from -= 1;
+ }
+
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip4_icmp_echo_reply_node, static) =
+{
+ .function = ip4_icmp_echo_reply_node_fn,
+ .name = "ip4-icmp-echo-reply",
+ .vector_size = sizeof (u32),
+ .format_trace = format_icmp_echo_trace,
+ .n_next_nodes = ICMP4_ECHO_REPLY_N_NEXT,
+ .next_nodes = {
+ [ICMP4_ECHO_REPLY_NEXT_DROP] = "error-drop",
+ [ICMP4_ECHO_REPLY_NEXT_PUNT] = "error-punt",
+ },
+};
+/* *INDENT-ON* */
+
+char *ip6_lookup_next_nodes[] = IP6_LOOKUP_NEXT_NODES;
+char *ip4_lookup_next_nodes[] = IP4_LOOKUP_NEXT_NODES;
+
+/* get first interface address */
+static ip6_address_t *
+ip6_interface_first_address (ip6_main_t * im, u32 sw_if_index)
+{
+ ip_lookup_main_t *lm = &im->lookup_main;
+ ip_interface_address_t *ia = 0;
+ ip6_address_t *result = 0;
+
+ /* *INDENT-OFF* */
+ foreach_ip_interface_address (lm, ia, sw_if_index,
+ 1 /* honor unnumbered */ ,
+ ({
+ ip6_address_t * a =
+ ip_interface_address_get_address (lm, ia);
+ result = a;
+ break;
+ }));
+ /* *INDENT-ON* */
+ return result;
+}
+
+/* Fill in the ICMP ECHO structure, return the safety-checked and possibly shrunk data_len */
+static u16
+init_icmp46_echo_request (icmp46_echo_request_t * icmp46_echo,
+ u16 seq_host, u16 id_host, u16 data_len)
+{
+ int i;
+ icmp46_echo->seq = clib_host_to_net_u16 (seq_host);
+ icmp46_echo->id = clib_host_to_net_u16 (id_host);
+
+ for (i = 0; i < sizeof (icmp46_echo->data); i++)
+ {
+ icmp46_echo->data[i] = i % 256;
+ }
+
+ if (data_len > sizeof (icmp46_echo_request_t))
+ {
+ data_len = sizeof (icmp46_echo_request_t);
+ }
+ return data_len;
+}
+
+static send_ip46_ping_result_t
+send_ip6_ping (vlib_main_t * vm, ip6_main_t * im,
+ u32 table_id, ip6_address_t * pa6,
+ u32 sw_if_index, u16 seq_host, u16 id_host, u16 data_len,
+ u8 verbose)
+{
+ icmp6_echo_request_header_t *h0;
+ u32 bi0 = 0;
+ int bogus_length = 0;
+ vlib_buffer_t *p0;
+ vlib_frame_t *f;
+ u32 *to_next;
+
+ if (vlib_buffer_alloc (vm, &bi0, 1) != 1)
+ return SEND_PING_ALLOC_FAIL;
+
+ p0 = vlib_get_buffer (vm, bi0);
+
+ /*
+ * if the user did not provide a source interface, use the any interface
+ * that the destination resolves via.
+ */
+ if (~0 == sw_if_index)
+ {
+ fib_node_index_t fib_entry_index;
+ u32 fib_index;
+
+ fib_index = ip6_fib_index_from_table_id (table_id);
+
+ if (~0 == fib_index)
+ {
+ vlib_buffer_free (vm, &bi0, 1);
+ return SEND_PING_NO_TABLE;
+ }
+
+ fib_entry_index = ip6_fib_table_lookup (fib_index, pa6, 128);
+ sw_if_index = fib_entry_get_resolving_interface (fib_entry_index);
+ /*
+ * Set the TX interface to force ip-lookup to use its table ID
+ */
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = fib_index;
+ }
+ else
+ {
+ /*
+ * force an IP lookup in the table bound to the user's chosen
+ * source interface.
+ */
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] =
+ ip6_fib_table_get_index_for_sw_if_index (sw_if_index);
+ }
+
+ if (~0 == sw_if_index)
+ {
+ vlib_buffer_free (vm, &bi0, 1);
+ return SEND_PING_NO_INTERFACE;
+ }
+
+ vnet_buffer (p0)->sw_if_index[VLIB_RX] = sw_if_index;
+
+ h0 = vlib_buffer_get_current (p0);
+
+ /* Fill in ip6 header fields */
+ h0->ip6.ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 (0x6 << 28);
+ h0->ip6.payload_length = 0; /* Set below */
+ h0->ip6.protocol = IP_PROTOCOL_ICMP6;
+ h0->ip6.hop_limit = 255;
+ h0->ip6.dst_address = *pa6;
+ h0->ip6.src_address = *pa6;
+
+ /* Fill in the correct source now */
+ ip6_address_t *a = ip6_interface_first_address (im, sw_if_index);
+ h0->ip6.src_address = a[0];
+
+ /* Fill in icmp fields */
+ h0->icmp.type = ICMP6_echo_request;
+ h0->icmp.code = 0;
+ h0->icmp.checksum = 0;
+
+ data_len =
+ init_icmp46_echo_request (&h0->icmp_echo, seq_host, id_host, data_len);
+ h0->icmp_echo.time_sent = vlib_time_now (vm);
+
+ /* Fix up the lengths */
+ h0->ip6.payload_length =
+ clib_host_to_net_u16 (data_len + sizeof (icmp46_header_t));
+
+ p0->current_length = clib_net_to_host_u16 (h0->ip6.payload_length) +
+ STRUCT_OFFSET_OF (icmp6_echo_request_header_t, icmp);
+
+ /* Calculate the ICMP checksum */
+ h0->icmp.checksum = 0;
+ h0->icmp.checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, 0, &h0->ip6, &bogus_length);
+
+ /* Enqueue the packet right now */
+ f = vlib_get_frame_to_node (vm, ip6_lookup_node.index);
+ to_next = vlib_frame_vector_args (f);
+ to_next[0] = bi0;
+ f->n_vectors = 1;
+ vlib_put_frame_to_node (vm, ip6_lookup_node.index, f);
+
+ return SEND_PING_OK;
+}
+
+static send_ip46_ping_result_t
+send_ip4_ping (vlib_main_t * vm,
+ ip4_main_t * im,
+ u32 table_id,
+ ip4_address_t * pa4,
+ u32 sw_if_index,
+ u16 seq_host, u16 id_host, u16 data_len, u8 verbose)
+{
+ icmp4_echo_request_header_t *h0;
+ u32 bi0 = 0;
+ ip_lookup_main_t *lm = &im->lookup_main;
+ vlib_buffer_t *p0;
+ vlib_frame_t *f;
+ u32 *to_next;
+ u32 if_add_index0;
+
+ if (vlib_buffer_alloc (vm, &bi0, 1) != 1)
+ return SEND_PING_ALLOC_FAIL;
+
+ p0 = vlib_get_buffer (vm, bi0);
+
+ /*
+ * if the user did not provide a source interface, use the any interface
+ * that the destination resolves via.
+ */
+ if (~0 == sw_if_index)
+ {
+ fib_node_index_t fib_entry_index;
+ u32 fib_index;
+
+ fib_index = ip4_fib_index_from_table_id (table_id);
+
+ if (~0 == fib_index)
+ {
+ vlib_buffer_free (vm, &bi0, 1);
+ return SEND_PING_NO_TABLE;
+ }
+
+ fib_entry_index =
+ ip4_fib_table_lookup (ip4_fib_get (fib_index), pa4, 32);
+ sw_if_index = fib_entry_get_resolving_interface (fib_entry_index);
+ /*
+ * Set the TX interface to force ip-lookup to use the user's table ID
+ */
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = fib_index;
+ }
+ else
+ {
+ /*
+ * force an IP lookup in the table bound to the user's chosen
+ * source interface.
+ */
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] =
+ ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
+ }
+
+ if (~0 == sw_if_index)
+ {
+ vlib_buffer_free (vm, &bi0, 1);
+ return SEND_PING_NO_INTERFACE;
+ }
+
+ vnet_buffer (p0)->sw_if_index[VLIB_RX] = sw_if_index;
+
+ h0 = vlib_buffer_get_current (p0);
+
+ /* Fill in ip4 header fields */
+ h0->ip4.checksum = 0;
+ h0->ip4.ip_version_and_header_length = 0x45;
+ h0->ip4.tos = 0;
+ h0->ip4.length = 0; /* Set below */
+ h0->ip4.fragment_id = 0;
+ h0->ip4.flags_and_fragment_offset = 0;
+ h0->ip4.ttl = 0xff;
+ h0->ip4.protocol = IP_PROTOCOL_ICMP;
+ h0->ip4.dst_address = *pa4;
+ h0->ip4.src_address = *pa4;
+
+ /* Fill in the correct source now */
+ if_add_index0 = lm->if_address_pool_index_by_sw_if_index[sw_if_index];
+ if (PREDICT_TRUE (if_add_index0 != ~0))
+ {
+ ip_interface_address_t *if_add =
+ pool_elt_at_index (lm->if_address_pool, if_add_index0);
+ ip4_address_t *if_ip = ip_interface_address_get_address (lm, if_add);
+ h0->ip4.src_address = *if_ip;
+ if (verbose)
+ {
+ vlib_cli_output (vm, "Source address: %U",
+ format_ip4_address, &h0->ip4.src_address);
+ }
+ }
+
+ /* Fill in icmp fields */
+ h0->icmp.type = ICMP4_echo_request;
+ h0->icmp.code = 0;
+ h0->icmp.checksum = 0;
+
+ data_len =
+ init_icmp46_echo_request (&h0->icmp_echo, seq_host, id_host, data_len);
+ h0->icmp_echo.time_sent = vlib_time_now (vm);
+
+ /* Fix up the lengths */
+ h0->ip4.length =
+ clib_host_to_net_u16 (data_len + sizeof (icmp46_header_t) +
+ sizeof (ip4_header_t));
+
+ p0->current_length = clib_net_to_host_u16 (h0->ip4.length);
+
+ /* Calculate the IP and ICMP checksums */
+ h0->ip4.checksum = ip4_header_checksum (&(h0->ip4));
+ h0->icmp.checksum =
+ ~ip_csum_fold (ip_incremental_checksum (0, &(h0->icmp),
+ p0->current_length -
+ sizeof (ip4_header_t)));
+
+ /* Enqueue the packet right now */
+ f = vlib_get_frame_to_node (vm, ip4_lookup_node.index);
+ to_next = vlib_frame_vector_args (f);
+ to_next[0] = bi0;
+ f->n_vectors = 1;
+ vlib_put_frame_to_node (vm, ip4_lookup_node.index, f);
+
+ return SEND_PING_OK;
+}
+
+
+static void
+print_ip6_icmp_reply (vlib_main_t * vm, u32 bi0)
+{
+ vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
+ icmp6_echo_request_header_t *h0 = vlib_buffer_get_current (b0);
+ f64 rtt = vlib_time_now (vm) - h0->icmp_echo.time_sent;
+
+ vlib_cli_output (vm,
+ "%d bytes from %U: icmp_seq=%d ttl=%d time=%.4f ms",
+ clib_host_to_net_u16 (h0->ip6.payload_length),
+ format_ip6_address,
+ &h0->ip6.src_address,
+ clib_host_to_net_u16 (h0->icmp_echo.seq),
+ h0->ip6.hop_limit, rtt * 1000.0);
+}
+
+static void
+print_ip4_icmp_reply (vlib_main_t * vm, u32 bi0)
+{
+ vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
+ icmp4_echo_request_header_t *h0 = vlib_buffer_get_current (b0);
+ f64 rtt = vlib_time_now (vm) - h0->icmp_echo.time_sent;
+ u32 rcvd_icmp_len =
+ clib_host_to_net_u16 (h0->ip4.length) -
+ (4 * (0xF & h0->ip4.ip_version_and_header_length));
+
+ vlib_cli_output (vm,
+ "%d bytes from %U: icmp_seq=%d ttl=%d time=%.4f ms",
+ rcvd_icmp_len,
+ format_ip4_address,
+ &h0->ip4.src_address,
+ clib_host_to_net_u16 (h0->icmp_echo.seq),
+ h0->ip4.ttl, rtt * 1000.0);
+}
+
+
+/*
+ * Perform the ping run with the given parameters in the current CLI process.
+ * Depending on whether pa4 or pa6 is set, runs IPv4 or IPv6 ping.
+ * The amusing side effect is of course if both are set, then both pings are sent.
+ * This behavior can be used to ping a dualstack host over IPv4 and IPv6 at once.
+ */
+
+static void
+run_ping_ip46_address (vlib_main_t * vm, u32 table_id, ip4_address_t * pa4,
+ ip6_address_t * pa6, u32 sw_if_index,
+ f64 ping_interval, u32 ping_repeat, u32 data_len,
+ u32 verbose)
+{
+ int i;
+ ping_main_t *pm = &ping_main;
+ uword curr_proc = vlib_current_process (vm);
+ u32 n_replies = 0;
+ u32 n_requests = 0;
+ ping_run_t *pr = 0;
+ u32 ping_run_index = 0;
+ u16 icmp_id;
+
+ static u32 rand_seed = 0;
+
+ if (PREDICT_FALSE (!rand_seed))
+ rand_seed = random_default_seed ();
+
+ icmp_id = random_u32 (&rand_seed) & 0xffff;
+
+ while (hash_get (pm->ping_run_by_icmp_id, icmp_id))
+ {
+ vlib_cli_output (vm, "ICMP ID collision at %d, incrementing", icmp_id);
+ icmp_id++;
+ }
+ pool_get (pm->ping_runs, pr);
+ ping_run_index = pr - pm->ping_runs;
+ pr->cli_process_id = curr_proc;
+ pr->icmp_id = icmp_id;
+ hash_set (pm->ping_run_by_icmp_id, icmp_id, ping_run_index);
+ for (i = 1; i <= ping_repeat; i++)
+ {
+ f64 sleep_interval;
+ f64 time_ping_sent = vlib_time_now (vm);
+ /* Reset pr: running ping in other process could have changed pm->ping_runs */
+ pr = vec_elt_at_index (pm->ping_runs, ping_run_index);
+ pr->curr_seq = i;
+ if (pa6 &&
+ (SEND_PING_OK ==
+ send_ip6_ping (vm, ping_main.ip6_main, table_id, pa6, sw_if_index,
+ i, icmp_id, data_len, verbose)))
+ {
+ n_requests++;
+ }
+ if (pa4 &&
+ (SEND_PING_OK ==
+ send_ip4_ping (vm, ping_main.ip4_main, table_id, pa4, sw_if_index,
+ i, icmp_id, data_len, verbose)))
+ {
+ n_requests++;
+ }
+ while ((i <= ping_repeat)
+ &&
+ ((sleep_interval =
+ time_ping_sent + ping_interval - vlib_time_now (vm)) > 0.0))
+ {
+ uword event_type, *event_data = 0;
+ vlib_process_wait_for_event_or_clock (vm, sleep_interval);
+ event_type = vlib_process_get_events (vm, &event_data);
+ switch (event_type)
+ {
+ case ~0: /* no events => timeout */
+ break;
+ case PING_RESPONSE_IP6:
+ {
+ int i;
+ for (i = 0; i < vec_len (event_data); i++)
+ {
+ u32 bi0 = event_data[0];
+ print_ip6_icmp_reply (vm, bi0);
+ n_replies++;
+ if (0 != bi0)
+ {
+ vlib_buffer_free (vm, &bi0, 1);
+ }
+ }
+ }
+ break;
+ case PING_RESPONSE_IP4:
+ {
+ int i;
+ for (i = 0; i < vec_len (event_data); i++)
+ {
+ u32 bi0 = event_data[0];
+ print_ip4_icmp_reply (vm, bi0);
+ n_replies++;
+ if (0 != bi0)
+ {
+ vlib_buffer_free (vm, &bi0, 1);
+ }
+ }
+ }
+ break;
+ default:
+ /* someone pressed a key, abort */
+ vlib_cli_output (vm, "Aborted due to a keypress.");
+ i = 1 + ping_repeat;
+ break;
+ }
+ }
+ }
+ vlib_cli_output (vm, "\n");
+ {
+ float loss =
+ (0 ==
+ n_requests) ? 0 : 100.0 * ((float) n_requests -
+ (float) n_replies) / (float) n_requests;
+ vlib_cli_output (vm,
+ "Statistics: %u sent, %u received, %f%% packet loss\n",
+ n_requests, n_replies, loss);
+ /* Reset pr: running ping in other process could have changed pm->ping_runs */
+ pr = vec_elt_at_index (pm->ping_runs, ping_run_index);
+ hash_unset (pm->ping_run_by_icmp_id, icmp_id);
+ pool_put (pm->ping_runs, pr);
+ }
+}
+
+
+
+
+
+static clib_error_t *
+ping_ip_address (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ ip4_address_t a4;
+ ip6_address_t a6;
+ clib_error_t *error = 0;
+ u32 ping_repeat = 5;
+ u8 ping_ip4, ping_ip6;
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 data_len = PING_DEFAULT_DATA_LEN;
+ u32 verbose = 0;
+ f64 ping_interval = PING_DEFAULT_INTERVAL;
+ u32 sw_if_index, table_id;
+
+ table_id = 0;
+ ping_ip4 = ping_ip6 = 0;
+ sw_if_index = ~0;
+
+ if (unformat (input, "%U", unformat_ip4_address, &a4))
+ {
+ ping_ip4 = 1;
+ }
+ else if (unformat (input, "%U", unformat_ip6_address, &a6))
+ {
+ ping_ip6 = 1;
+ }
+ else if (unformat (input, "ipv4"))
+ {
+ if (unformat (input, "%U", unformat_ip4_address, &a4))
+ {
+ ping_ip4 = 1;
+ }
+ else
+ {
+ error =
+ clib_error_return (0,
+ "expecting IPv4 address but got `%U'",
+ format_unformat_error, input);
+ }
+ }
+ else if (unformat (input, "ipv6"))
+ {
+ if (unformat (input, "%U", unformat_ip6_address, &a6))
+ {
+ ping_ip6 = 1;
+ }
+ else
+ {
+ error =
+ clib_error_return (0,
+ "expecting IPv6 address but got `%U'",
+ format_unformat_error, input);
+ }
+ }
+ else
+ {
+ error =
+ clib_error_return (0,
+ "expecting IP4/IP6 address `%U'. Usage: ping <addr> [source <intf>] [size <datasz>] [repeat <count>] [verbose]",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ /* allow for the second AF in the same ping */
+ if (!ping_ip4 && (unformat (input, "ipv4")))
+ {
+ if (unformat (input, "%U", unformat_ip4_address, &a4))
+ {
+ ping_ip4 = 1;
+ }
+ }
+ else if (!ping_ip6 && (unformat (input, "ipv6")))
+ {
+ if (unformat (input, "%U", unformat_ip6_address, &a6))
+ {
+ ping_ip6 = 1;
+ }
+ }
+
+ /* parse the rest of the parameters in a cycle */
+ while (!unformat_eof (input, NULL))
+ {
+ if (unformat (input, "source"))
+ {
+ if (!unformat_user
+ (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error =
+ clib_error_return (0,
+ "unknown interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+ }
+ else if (unformat (input, "size"))
+ {
+ if (!unformat (input, "%u", &data_len))
+ {
+ error =
+ clib_error_return (0,
+ "expecting size but got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+ }
+ else if (unformat (input, "table-id"))
+ {
+ if (!unformat (input, "du", &table_id))
+ {
+ error =
+ clib_error_return (0,
+ "expecting table-id but got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+ }
+ else if (unformat (input, "interval"))
+ {
+ if (!unformat (input, "%f", &ping_interval))
+ {
+ error =
+ clib_error_return (0,
+ "expecting interval (floating point number) got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+ }
+ else if (unformat (input, "repeat"))
+ {
+ if (!unformat (input, "%u", &ping_repeat))
+ {
+ error =
+ clib_error_return (0,
+ "expecting repeat count but got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+ }
+ else if (unformat (input, "verbose"))
+ {
+ verbose = 1;
+ }
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+ }
+
+ run_ping_ip46_address (vm, table_id, ping_ip4 ? &a4 : NULL,
+ ping_ip6 ? &a6 : NULL, sw_if_index, ping_interval,
+ ping_repeat, data_len, verbose);
+done:
+ return error;
+}
+
+/*?
+ * This command sends an ICMP ECHO_REQUEST to network hosts. The address
+ * can be an IPv4 or IPv6 address (or both at the same time).
+ *
+ * @cliexpar
+ * @parblock
+ * Example of how ping an IPv4 address:
+ * @cliexstart{ping 172.16.1.2 source GigabitEthernet2/0/0 repeat 2}
+ * 64 bytes from 172.16.1.2: icmp_seq=1 ttl=64 time=.1090 ms
+ * 64 bytes from 172.16.1.2: icmp_seq=2 ttl=64 time=.0914 ms
+ *
+ * Statistics: 2 sent, 2 received, 0% packet loss
+ * @cliexend
+ *
+ * Example of how ping both an IPv4 address and IPv6 address at the same time:
+ * @cliexstart{ping 172.16.1.2 ipv6 fe80::24a5:f6ff:fe9c:3a36 source GigabitEthernet2/0/0 repeat 2 verbose}
+ * Adjacency index: 10, sw_if_index: 1
+ * Adj: ip6-discover-neighbor
+ * Adj Interface: 0
+ * Forced set interface: 1
+ * Adjacency index: 0, sw_if_index: 4294967295
+ * Adj: ip4-miss
+ * Adj Interface: 0
+ * Forced set interface: 1
+ * Source address: 172.16.1.1
+ * 64 bytes from 172.16.1.2: icmp_seq=1 ttl=64 time=.1899 ms
+ * Adjacency index: 10, sw_if_index: 1
+ * Adj: ip6-discover-neighbor
+ * Adj Interface: 0
+ * Forced set interface: 1
+ * Adjacency index: 0, sw_if_index: 4294967295
+ * Adj: ip4-miss
+ * Adj Interface: 0
+ * Forced set interface: 1
+ * Source address: 172.16.1.1
+ * 64 bytes from 172.16.1.2: icmp_seq=2 ttl=64 time=.0910 ms
+ *
+ * Statistics: 4 sent, 2 received, 50% packet loss
+ * @cliexend
+ * @endparblock
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ping_command, static) =
+{
+ .path = "ping",
+ .function = ping_ip_address,
+ .short_help = "ping {<ip-addr> | ipv4 <ip4-addr> | ipv6 <ip6-addr>}"
+ " [ipv4 <ip4-addr> | ipv6 <ip6-addr>] [source <interface>]"
+ " [size <pktsize>] [interval <sec>] [repeat <cnt>] [table-id <id>]"
+ " [verbose]",
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+ping_cli_init (vlib_main_t * vm)
+{
+ ping_main_t *pm = &ping_main;
+ pm->ip6_main = &ip6_main;
+ pm->ip4_main = &ip4_main;
+ icmp6_register_type (vm, ICMP6_echo_reply, ip6_icmp_echo_reply_node.index);
+ ip4_icmp_register_type (vm, ICMP4_echo_reply,
+ ip4_icmp_echo_reply_node.index);
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (ping_cli_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ping.h b/src/vnet/ip/ping.h
new file mode 100644
index 00000000000..8f41f45c5f9
--- /dev/null
+++ b/src/vnet/ip/ping.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_vnet_ping_h
+#define included_vnet_ping_h
+
+
+#include <vnet/ip/ip.h>
+
+#include <vnet/ip/lookup.h>
+
+typedef enum
+{
+ PING_RESPONSE_IP6 = 42,
+ PING_RESPONSE_IP4,
+} ping_response_type_t;
+
+typedef enum
+{
+ SEND_PING_OK = 0,
+ SEND_PING_ALLOC_FAIL,
+ SEND_PING_NO_INTERFACE,
+ SEND_PING_NO_TABLE,
+} send_ip46_ping_result_t;
+
+/*
+ * Currently running ping command.
+ */
+typedef struct ping_run_t
+{
+ u16 icmp_id;
+ u16 curr_seq;
+ uword cli_process_id;
+} ping_run_t;
+
+typedef struct ping_main_t
+{
+ ip6_main_t *ip6_main;
+ ip4_main_t *ip4_main;
+ ping_run_t *ping_runs;
+ /* hash table to find back the CLI process for a reply */
+ // uword *cli_proc_by_icmp_id;
+ ping_run_t *ping_run_by_icmp_id;
+} ping_main_t;
+
+ping_main_t ping_main;
+
+#define PING_DEFAULT_DATA_LEN 60
+#define PING_DEFAULT_INTERVAL 1.0
+
+#define PING_MAXIMUM_DATA_SIZE 2000
+
+typedef CLIB_PACKED (struct
+ {
+ u16 id;
+ u16 seq; f64 time_sent; u8 data[PING_MAXIMUM_DATA_SIZE];
+ }) icmp46_echo_request_t;
+
+
+typedef CLIB_PACKED (struct
+ {
+ ip6_header_t ip6;
+ icmp46_header_t icmp; icmp46_echo_request_t icmp_echo;
+ }) icmp6_echo_request_header_t;
+
+typedef CLIB_PACKED (struct
+ {
+ ip4_header_t ip4;
+ icmp46_header_t icmp; icmp46_echo_request_t icmp_echo;
+ }) icmp4_echo_request_header_t;
+
+
+typedef struct
+{
+ u16 id;
+ u16 seq;
+ u8 bound;
+} icmp_echo_trace_t;
+
+
+
+
+typedef enum
+{
+ ICMP6_ECHO_REPLY_NEXT_DROP,
+ ICMP6_ECHO_REPLY_NEXT_PUNT,
+ ICMP6_ECHO_REPLY_N_NEXT,
+} icmp6_echo_reply_next_t;
+
+typedef enum
+{
+ ICMP4_ECHO_REPLY_NEXT_DROP,
+ ICMP4_ECHO_REPLY_NEXT_PUNT,
+ ICMP4_ECHO_REPLY_N_NEXT,
+} icmp4_echo_reply_next_t;
+
+#endif /* included_vnet_ping_h */
diff --git a/src/vnet/ip/ports.def b/src/vnet/ip/ports.def
new file mode 100644
index 00000000000..cdb754f5b2e
--- /dev/null
+++ b/src/vnet/ip/ports.def
@@ -0,0 +1,757 @@
+/*
+ * ip/ports.def: tcp/udp port definitions
+ *
+ * Eliot Dresselhaus
+ * August, 2005
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+PORT NUMBERS
+
+(last updated 18 October 2005)
+
+The port numbers are divided into three ranges: the Well Known Ports,
+the Registered Ports, and the Dynamic and/or Private Ports.
+
+The Well Known Ports are those from 0 through 1023.
+
+The Registered Ports are those from 1024 through 49151
+
+The Dynamic and/or Private Ports are those from 49152 through 65535
+
+
+************************************************************************
+* PLEASE NOTE THE FOLLOWING: *
+* *
+* 1. UNASSIGNED PORT NUMBERS SHOULD NOT BE USED. THE IANA WILL ASSIGN *
+* THE NUMBER FOR THE PORT AFTER YOUR APPLICATION HAS BEEN APPROVED. *
+* *
+* 2. ASSIGNMENT OF A PORT NUMBER DOES NOT IN ANY WAY IMPLY AN *
+* ENDORSEMENT OF AN APPLICATION OR PRODUCT, AND THE FACT THAT NETWORK *
+* TRAFFIC IS FLOWING TO OR FROM A REGISTERED PORT DOES NOT MEAN THAT *
+* IT IS "GOOD" TRAFFIC. FIREWALL AND SYSTEM ADMINISTRATORS SHOULD *
+* CHOOSE HOW TO CONFIGURE THEIR SYSTEMS BASED ON THEIR KNOWLEDGE OF *
+* THE TRAFFIC IN QUESTION, NOT WHETHER THERE IS A PORT NUMBER *
+* REGISTERED OR NOT. *
+************************************************************************
+
+
+WELL KNOWN PORT NUMBERS
+
+The Well Known Ports are assigned by the IANA and on most systems can
+only be used by system (or root) processes or by programs executed by
+privileged users.
+
+Ports are used in the TCP [RFC793] to name the ends of logical
+connections which carry long term conversations. For the purpose of
+providing services to unknown callers, a service contact port is
+defined. This list specifies the port used by the server process as
+its contact port. The contact port is sometimes called the
+"well-known port".
+
+To the extent possible, these same port assignments are used with the
+UDP [RFC768].
+
+The range for assigned ports managed by the IANA is 0-1023.
+*/
+ip_port (TCPMUX, 1)
+ip_port (COMPRESS_NET_MANAGEMENT, 2)
+ip_port (COMPRESS_NET, 3)
+ip_port (RJE, 5)
+ip_port (ECHO, 7)
+ip_port (DISCARD, 9)
+ip_port (SYSTAT, 11)
+ip_port (DAYTIME, 13)
+ip_port (QOTD, 17)
+ip_port (MSP, 18)
+ip_port (CHARGEN, 19)
+ip_port (FTP_DATA, 20)
+ip_port (FTP, 21)
+ip_port (SSH, 22)
+ip_port (TELNET, 23)
+ip_port (SMTP, 25)
+ip_port (NSW_FE, 27)
+ip_port (MSG_ICP, 29)
+ip_port (MSG_AUTH, 31)
+ip_port (DSP, 33)
+ip_port (TIME, 37)
+ip_port (RAP, 38)
+ip_port (RLP, 39)
+ip_port (GRAPHICS, 41)
+ip_port (NAME, 42)
+ip_port (NAMESERVER, 42)
+ip_port (NICNAME, 43)
+ip_port (MPM_FLAGS, 44)
+ip_port (MPM, 45)
+ip_port (MPM_SND, 46)
+ip_port (NI_FTP, 47)
+ip_port (AUDITD, 48)
+ip_port (TACACS, 49)
+ip_port (RE_MAIL_CK, 50)
+ip_port (LA_MAINT, 51)
+ip_port (XNS_TIME, 52)
+ip_port (DNS, 53)
+ip_port (XNS_CH, 54)
+ip_port (ISI_GL, 55)
+ip_port (XNS_AUTH, 56)
+ip_port (XNS_MAIL, 58)
+ip_port (NI_MAIL, 61)
+ip_port (ACAS, 62)
+ip_port (WHOIS_PLUS_PLUS, 63)
+ip_port (COVIA, 64)
+ip_port (TACACS_DS, 65)
+ip_port (ORACLE_SQL_NET, 66)
+ip_port (BOOTPS, 67)
+ip_port (BOOTPC, 68)
+ip_port (TFTP, 69)
+ip_port (GOPHER, 70)
+ip_port (NETRJS_1, 71)
+ip_port (NETRJS_2, 72)
+ip_port (NETRJS_3, 73)
+ip_port (NETRJS_4, 74)
+ip_port (DEOS, 76)
+ip_port (VETTCP, 78)
+ip_port (FINGER, 79)
+ip_port (WWW, 80)
+ip_port (HOSTS2_NS, 81)
+ip_port (XFER, 82)
+ip_port (MIT_ML_DEV, 83)
+ip_port (CTF, 84)
+ip_port (MIT_ML_DEV1, 85)
+ip_port (MFCOBOL, 86)
+ip_port (KERBEROS, 88)
+ip_port (SU_MIT_TG, 89)
+ip_port (DNSIX, 90)
+ip_port (MIT_DOV, 91)
+ip_port (NPP, 92)
+ip_port (DCP, 93)
+ip_port (OBJCALL, 94)
+ip_port (SUPDUP, 95)
+ip_port (DIXIE, 96)
+ip_port (SWIFT_RVF, 97)
+ip_port (TACNEWS, 98)
+ip_port (METAGRAM, 99)
+ip_port (NEWACCT, 100)
+ip_port (HOSTNAME, 101)
+ip_port (ISO_TSAP, 102)
+ip_port (GPPITNP, 103)
+ip_port (ACR_NEMA, 104)
+ip_port (CSO, 105)
+ip_port (CSNET_NS, 105)
+ip_port (3COM_TSMUX, 106)
+ip_port (RTELNET, 107)
+ip_port (SNAGAS, 108)
+ip_port (POP2, 109)
+ip_port (POP3, 110)
+ip_port (SUNRPC, 111)
+ip_port (MCIDAS, 112)
+ip_port (IDENT, 113)
+ip_port (SFTP, 115)
+ip_port (ANSANOTIFY, 116)
+ip_port (UUCP_PATH, 117)
+ip_port (SQLSERV, 118)
+ip_port (NNTP, 119)
+ip_port (CFDPTKT, 120)
+ip_port (ERPC, 121)
+ip_port (SMAKYNET, 122)
+ip_port (NTP, 123)
+ip_port (ANSATRADER, 124)
+ip_port (LOCUS_MAP, 125)
+ip_port (NXEDIT, 126)
+ip_port (LOCUS_CON, 127)
+ip_port (GSS_XLICEN, 128)
+ip_port (PWDGEN, 129)
+ip_port (CISCO_FNA, 130)
+ip_port (CISCO_TNA, 131)
+ip_port (CISCO_SYS, 132)
+ip_port (STATSRV, 133)
+ip_port (INGRES_NET, 134)
+ip_port (EPMAP, 135)
+ip_port (PROFILE, 136)
+ip_port (NETBIOS_NS, 137)
+ip_port (NETBIOS_DGM, 138)
+ip_port (NETBIOS_SSN, 139)
+ip_port (EMFIS_DATA, 140)
+ip_port (EMFIS_CNTL, 141)
+ip_port (BL_IDM, 142)
+ip_port (IMAP, 143)
+ip_port (UMA, 144)
+ip_port (UAAC, 145)
+ip_port (ISO_TP0, 146)
+ip_port (ISO_IP, 147)
+ip_port (JARGON, 148)
+ip_port (AED_512, 149)
+ip_port (SQL_NET, 150)
+ip_port (HEMS, 151)
+ip_port (BFTP, 152)
+ip_port (SGMP, 153)
+ip_port (NETSC_PROD, 154)
+ip_port (NETSC_DEV, 155)
+ip_port (SQLSRV, 156)
+ip_port (KNET_CMP, 157)
+ip_port (PCMAIL_SRV, 158)
+ip_port (NSS_ROUTING, 159)
+ip_port (SGMP_TRAPS, 160)
+ip_port (SNMP, 161)
+ip_port (SNMPTRAP, 162)
+ip_port (CMIP_MAN, 163)
+ip_port (CMIP_AGENT, 164)
+ip_port (XNS_COURIER, 165)
+ip_port (S_NET, 166)
+ip_port (NAMP, 167)
+ip_port (RSVD, 168)
+ip_port (SEND, 169)
+ip_port (PRINT_SRV, 170)
+ip_port (MULTIPLEX, 171)
+ip_port (CL1, 172)
+ip_port (XYPLEX_MUX, 173)
+ip_port (MAILQ, 174)
+ip_port (VMNET, 175)
+ip_port (GENRAD_MUX, 176)
+ip_port (XDMCP, 177)
+ip_port (NEXTSTEP, 178)
+ip_port (BGP, 179)
+ip_port (RIS, 180)
+ip_port (UNIFY, 181)
+ip_port (AUDIT, 182)
+ip_port (OCBINDER, 183)
+ip_port (OCSERVER, 184)
+ip_port (REMOTE_KIS, 185)
+ip_port (KIS, 186)
+ip_port (ACI, 187)
+ip_port (MUMPS, 188)
+ip_port (QFT, 189)
+ip_port (GACP, 190)
+ip_port (PROSPERO, 191)
+ip_port (OSU_NMS, 192)
+ip_port (SRMP, 193)
+ip_port (IRC, 194)
+ip_port (DN6_NLM_AUD, 195)
+ip_port (DN6_SMM_RED, 196)
+ip_port (DLS, 197)
+ip_port (DLS_MON, 198)
+ip_port (SMUX, 199)
+ip_port (SRC, 200)
+ip_port (AT_RTMP, 201)
+ip_port (AT_NBP, 202)
+ip_port (AT_3, 203)
+ip_port (AT_ECHO, 204)
+ip_port (AT_5, 205)
+ip_port (AT_ZIS, 206)
+ip_port (AT_7, 207)
+ip_port (AT_8, 208)
+ip_port (QMTP, 209)
+ip_port (Z39_50, 210)
+ip_port (TI914CG, 211)
+ip_port (ANET, 212)
+ip_port (IPX, 213)
+ip_port (VMPWSCS, 214)
+ip_port (SOFTPC, 215)
+ip_port (CAILIC, 216)
+ip_port (DBASE, 217)
+ip_port (MPP, 218)
+ip_port (UARPS, 219)
+ip_port (IMAP3, 220)
+ip_port (FLN_SPX, 221)
+ip_port (RSH_SPX, 222)
+ip_port (CDC, 223)
+ip_port (MASQDIALER, 224)
+ip_port (DIRECT, 242)
+ip_port (SUR_MEAS, 243)
+ip_port (INBUSINESS, 244)
+ip_port (LINK, 245)
+ip_port (DSP3270, 246)
+ip_port (SUBNTBCST_TFTP, 247)
+ip_port (BHFHS, 248)
+ip_port (RAP1, 256)
+ip_port (SET, 257)
+ip_port (YAK_CHAT, 258)
+ip_port (ESRO_GEN, 259)
+ip_port (OPENPORT, 260)
+ip_port (NSIIOPS, 261)
+ip_port (ARCISDMS, 262)
+ip_port (HDAP, 263)
+ip_port (BGMP, 264)
+ip_port (X_BONE_CTL, 265)
+ip_port (SST, 266)
+ip_port (TD_SERVICE, 267)
+ip_port (TD_REPLICA, 268)
+ip_port (HTTP_MGMT, 280)
+ip_port (PERSONAL_LINK, 281)
+ip_port (CABLEPORT_AX, 282)
+ip_port (RESCAP, 283)
+ip_port (CORERJD, 284)
+ip_port (FXP, 286)
+ip_port (K_BLOCK, 287)
+ip_port (NOVASTORBAKCUP, 308)
+ip_port (ENTRUSTTIME, 309)
+ip_port (BHMDS, 310)
+ip_port (ASIP_WEBADMIN, 311)
+ip_port (VSLMP, 312)
+ip_port (MAGENTA_LOGIC, 313)
+ip_port (OPALIS_ROBOT, 314)
+ip_port (DPSI, 315)
+ip_port (DECAUTH, 316)
+ip_port (ZANNET, 317)
+ip_port (PKIX_TIMESTAMP, 318)
+ip_port (PTP_EVENT, 319)
+ip_port (PTP_GENERAL, 320)
+ip_port (PIP, 321)
+ip_port (RTSPS, 322)
+ip_port (TEXAR, 333)
+ip_port (PDAP, 344)
+ip_port (PAWSERV, 345)
+ip_port (ZSERV, 346)
+ip_port (FATSERV, 347)
+ip_port (CSI_SGWP, 348)
+ip_port (MFTP, 349)
+ip_port (MATIP_TYPE_A, 350)
+ip_port (MATIP_TYPE_B, 351)
+ip_port (BHOETTY, 351)
+ip_port (DTAG_STE_SB, 352)
+ip_port (BHOEDAP4, 352)
+ip_port (NDSAUTH, 353)
+ip_port (BH611, 354)
+ip_port (DATEX_ASN, 355)
+ip_port (CLOANTO_NET_1, 356)
+ip_port (BHEVENT, 357)
+ip_port (SHRINKWRAP, 358)
+ip_port (NSRMP, 359)
+ip_port (SCOI2ODIALOG, 360)
+ip_port (SEMANTIX, 361)
+ip_port (SRSSEND, 362)
+ip_port (RSVP_TUNNEL, 363)
+ip_port (AURORA_CMGR, 364)
+ip_port (DTK, 365)
+ip_port (ODMR, 366)
+ip_port (MORTGAGEWARE, 367)
+ip_port (QBIKGDP, 368)
+ip_port (RPC2PORTMAP, 369)
+ip_port (CODAAUTH2, 370)
+ip_port (CLEARCASE, 371)
+ip_port (ULISTPROC, 372)
+ip_port (LEGENT_1, 373)
+ip_port (LEGENT_2, 374)
+ip_port (HASSLE, 375)
+ip_port (NIP, 376)
+ip_port (TNETOS, 377)
+ip_port (DSETOS, 378)
+ip_port (IS99C, 379)
+ip_port (IS99S, 380)
+ip_port (HP_COLLECTOR, 381)
+ip_port (HP_MANAGED_NODE, 382)
+ip_port (HP_ALARM_MGR, 383)
+ip_port (ARNS, 384)
+ip_port (IBM_APP, 385)
+ip_port (ASA, 386)
+ip_port (AURP, 387)
+ip_port (UNIDATA_LDM, 388)
+ip_port (LDAP, 389)
+ip_port (UIS, 390)
+ip_port (SYNOTICS_RELAY, 391)
+ip_port (SYNOTICS_BROKER, 392)
+ip_port (META5, 393)
+ip_port (EMBL_NDT, 394)
+ip_port (NETCP, 395)
+ip_port (NETWARE_IP, 396)
+ip_port (MPTN, 397)
+ip_port (KRYPTOLAN, 398)
+ip_port (ISO_TSAP_C2, 399)
+ip_port (WORK_SOL, 400)
+ip_port (UPS, 401)
+ip_port (GENIE, 402)
+ip_port (DECAP, 403)
+ip_port (NCED, 404)
+ip_port (NCLD, 405)
+ip_port (IMSP, 406)
+ip_port (TIMBUKTU, 407)
+ip_port (PRM_SM, 408)
+ip_port (PRM_NM, 409)
+ip_port (DECLADEBUG, 410)
+ip_port (RMT, 411)
+ip_port (SYNOPTICS_TRAP, 412)
+ip_port (SMSP, 413)
+ip_port (INFOSEEK, 414)
+ip_port (BNET, 415)
+ip_port (SILVERPLATTER, 416)
+ip_port (ONMUX, 417)
+ip_port (HYPER_G, 418)
+ip_port (ARIEL1, 419)
+ip_port (SMPTE, 420)
+ip_port (ARIEL2, 421)
+ip_port (ARIEL3, 422)
+ip_port (OPC_JOB_START, 423)
+ip_port (OPC_JOB_TRACK, 424)
+ip_port (ICAD_EL, 425)
+ip_port (SMARTSDP, 426)
+ip_port (SVRLOC, 427)
+ip_port (OCS_CMU, 428)
+ip_port (OCS_AMU, 429)
+ip_port (UTMPSD, 430)
+ip_port (UTMPCD, 431)
+ip_port (IASD, 432)
+ip_port (NNSP, 433)
+ip_port (MOBILEIP_AGENT, 434)
+ip_port (MOBILIP_MN, 435)
+ip_port (DNA_CML, 436)
+ip_port (COMSCM, 437)
+ip_port (DSFGW, 438)
+ip_port (DASP, 439)
+ip_port (SGCP, 440)
+ip_port (DECVMS_SYSMGT, 441)
+ip_port (CVC_HOSTD, 442)
+ip_port (HTTPS, 443)
+ip_port (SNPP, 444)
+ip_port (MICROSOFT_DS, 445)
+ip_port (DDM_RDB, 446)
+ip_port (DDM_DFM, 447)
+ip_port (DDM_SSL, 448)
+ip_port (AS_SERVERMAP, 449)
+ip_port (TSERVER, 450)
+ip_port (SFS_SMP_NET, 451)
+ip_port (SFS_CONFIG, 452)
+ip_port (CREATIVESERVER, 453)
+ip_port (CONTENTSERVER, 454)
+ip_port (CREATIVEPARTNR, 455)
+ip_port (MACON_TCP, 456)
+ip_port (SCOHELP, 457)
+ip_port (APPLEQTC, 458)
+ip_port (AMPR_RCMD, 459)
+ip_port (SKRONK, 460)
+ip_port (DATASURFSRV, 461)
+ip_port (DATASURFSRVSEC, 462)
+ip_port (ALPES, 463)
+ip_port (KPASSWD, 464)
+ip_port (URD, 465)
+ip_port (DIGITAL_VRC, 466)
+ip_port (MYLEX_MAPD, 467)
+ip_port (PHOTURIS, 468)
+ip_port (RCP, 469)
+ip_port (SCX_PROXY, 470)
+ip_port (MONDEX, 471)
+ip_port (LJK_LOGIN, 472)
+ip_port (HYBRID_POP, 473)
+ip_port (TN_TL_W1, 474)
+ip_port (TCPNETHASPSRV, 475)
+ip_port (TN_TL_FD1, 476)
+ip_port (SS7NS, 477)
+ip_port (SPSC, 478)
+ip_port (IAFSERVER, 479)
+ip_port (IAFDBASE, 480)
+ip_port (PH, 481)
+ip_port (BGS_NSI, 482)
+ip_port (ULPNET, 483)
+ip_port (INTEGRA_SME, 484)
+ip_port (POWERBURST, 485)
+ip_port (AVIAN, 486)
+ip_port (SAFT, 487)
+ip_port (GSS_HTTP, 488)
+ip_port (NEST_PROTOCOL, 489)
+ip_port (MICOM_PFS, 490)
+ip_port (GO_LOGIN, 491)
+ip_port (TICF_1, 492)
+ip_port (TICF_2, 493)
+ip_port (POV_RAY, 494)
+ip_port (INTECOURIER, 495)
+ip_port (PIM_RP_DISC, 496)
+ip_port (DANTZ, 497)
+ip_port (SIAM, 498)
+ip_port (ISO_ILL, 499)
+ip_port (ISAKMP, 500)
+ip_port (STMF, 501)
+ip_port (ASA_APPL_PROTO, 502)
+ip_port (INTRINSA, 503)
+ip_port (CITADEL, 504)
+ip_port (MAILBOX_LM, 505)
+ip_port (OHIMSRV, 506)
+ip_port (CRS, 507)
+ip_port (XVTTP, 508)
+ip_port (SNARE, 509)
+ip_port (FCP, 510)
+ip_port (PASSGO, 511)
+ip_port (EXEC, 512)
+ip_port (LOGIN, 513)
+ip_port (SHELL, 514)
+ip_port (PRINTER, 515)
+ip_port (VIDEOTEX, 516)
+ip_port (TALK, 517)
+ip_port (NTALK, 518)
+ip_port (UTIME, 519)
+ip_port (EFS, 520)
+ip_port (RIPNG, 521)
+ip_port (ULP, 522)
+ip_port (IBM_DB2, 523)
+ip_port (NCP, 524)
+ip_port (TIMED, 525)
+ip_port (TEMPO, 526)
+ip_port (STX, 527)
+ip_port (CUSTIX, 528)
+ip_port (IRC_SERV, 529)
+ip_port (COURIER, 530)
+ip_port (CONFERENCE, 531)
+ip_port (NETNEWS, 532)
+ip_port (NETWALL, 533)
+ip_port (MM_ADMIN, 534)
+ip_port (IIOP, 535)
+ip_port (OPALIS_RDV, 536)
+ip_port (NMSP, 537)
+ip_port (GDOMAP, 538)
+ip_port (APERTUS_LDP, 539)
+ip_port (UUCP, 540)
+ip_port (UUCP_RLOGIN, 541)
+ip_port (COMMERCE, 542)
+ip_port (KLOGIN, 543)
+ip_port (KSHELL, 544)
+ip_port (APPLEQTCSRVR, 545)
+ip_port (DHCPV6_CLIENT, 546)
+ip_port (DHCPV6_SERVER, 547)
+ip_port (AFPOVERTCP, 548)
+ip_port (IDFP, 549)
+ip_port (NEW_RWHO, 550)
+ip_port (CYBERCASH, 551)
+ip_port (DEVSHR_NTS, 552)
+ip_port (PIRP, 553)
+ip_port (RTSP, 554)
+ip_port (DSF, 555)
+ip_port (REMOTEFS, 556)
+ip_port (OPENVMS_SYSIPC, 557)
+ip_port (SDNSKMP, 558)
+ip_port (TEEDTAP, 559)
+ip_port (RMONITOR, 560)
+ip_port (MONITOR, 561)
+ip_port (CHSHELL, 562)
+ip_port (NNTPS, 563)
+ip_port (9PFS, 564)
+ip_port (WHOAMI, 565)
+ip_port (STREETTALK, 566)
+ip_port (BANYAN_RPC, 567)
+ip_port (MS_SHUTTLE, 568)
+ip_port (MS_ROME, 569)
+ip_port (METER, 570)
+ip_port (METER1, 571)
+ip_port (SONAR, 572)
+ip_port (BANYAN_VIP, 573)
+ip_port (FTP_AGENT, 574)
+ip_port (VEMMI, 575)
+ip_port (IPCD, 576)
+ip_port (VNAS, 577)
+ip_port (IPDD, 578)
+ip_port (DECBSRV, 579)
+ip_port (SNTP_HEARTBEAT, 580)
+ip_port (BDP, 581)
+ip_port (SCC_SECURITY, 582)
+ip_port (PHILIPS_VC, 583)
+ip_port (KEYSERVER, 584)
+ip_port (IMAP4_SSL, 585)
+ip_port (PASSWORD_CHG, 586)
+ip_port (SUBMISSION, 587)
+ip_port (CAL, 588)
+ip_port (EYELINK, 589)
+ip_port (TNS_CML, 590)
+ip_port (HTTP_ALT, 591)
+ip_port (EUDORA_SET, 592)
+ip_port (HTTP_RPC_EPMAP, 593)
+ip_port (TPIP, 594)
+ip_port (CAB_PROTOCOL, 595)
+ip_port (SMSD, 596)
+ip_port (PTCNAMESERVICE, 597)
+ip_port (SCO_WEBSRVRMG3, 598)
+ip_port (ACP, 599)
+ip_port (IPCSERVER, 600)
+ip_port (SYSLOG_CONN, 601)
+ip_port (XMLRPC_BEEP, 602)
+ip_port (IDXP, 603)
+ip_port (TUNNEL, 604)
+ip_port (SOAP_BEEP, 605)
+ip_port (URM, 606)
+ip_port (NQS, 607)
+ip_port (SIFT_UFT, 608)
+ip_port (NPMP_TRAP, 609)
+ip_port (NPMP_LOCAL, 610)
+ip_port (NPMP_GUI, 611)
+ip_port (HMMP_IND, 612)
+ip_port (HMMP_OP, 613)
+ip_port (SSHELL, 614)
+ip_port (SCO_INETMGR, 615)
+ip_port (SCO_SYSMGR, 616)
+ip_port (SCO_DTMGR, 617)
+ip_port (DEI_ICDA, 618)
+ip_port (COMPAQ_EVM, 619)
+ip_port (SCO_WEBSRVRMGR, 620)
+ip_port (ESCP_IP, 621)
+ip_port (COLLABORATOR, 622)
+ip_port (ASF_RMCP, 623)
+ip_port (CRYPTOADMIN, 624)
+ip_port (DEC_DLM, 625)
+ip_port (ASIA, 626)
+ip_port (PASSGO_TIVOLI, 627)
+ip_port (QMQP, 628)
+ip_port (3COM_AMP3, 629)
+ip_port (RDA, 630)
+ip_port (IPP, 631)
+ip_port (BMPP, 632)
+ip_port (SERVSTAT, 633)
+ip_port (GINAD, 634)
+ip_port (RLZDBASE, 635)
+ip_port (LDAPS, 636)
+ip_port (LANSERVER, 637)
+ip_port (MCNS_SEC, 638)
+ip_port (MSDP, 639)
+ip_port (ENTRUST_SPS, 640)
+ip_port (REPCMD, 641)
+ip_port (ESRO_EMSDP, 642)
+ip_port (SANITY, 643)
+ip_port (DWR, 644)
+ip_port (PSSC, 645)
+ip_port (LDP, 646)
+ip_port (DHCP_FAILOVER, 647)
+ip_port (RRP, 648)
+ip_port (CADVIEW_3D, 649)
+ip_port (OBEX, 650)
+ip_port (IEEE_MMS, 651)
+ip_port (HELLO_PORT, 652)
+ip_port (REPSCMD, 653)
+ip_port (AODV, 654)
+ip_port (TINC, 655)
+ip_port (SPMP, 656)
+ip_port (RMC, 657)
+ip_port (TENFOLD, 658)
+ip_port (MAC_SRVR_ADMIN, 660)
+ip_port (HAP, 661)
+ip_port (PFTP, 662)
+ip_port (PURENOISE, 663)
+ip_port (ASF_SECURE_RMCP, 664)
+ip_port (SUN_DR, 665)
+ip_port (MDQS, 666)
+ip_port (DOOM, 666)
+ip_port (DISCLOSE, 667)
+ip_port (MECOMM, 668)
+ip_port (MEREGISTER, 669)
+ip_port (VACDSM_SWS, 670)
+ip_port (VACDSM_APP, 671)
+ip_port (VPPS_QUA, 672)
+ip_port (CIMPLEX, 673)
+ip_port (ACAP, 674)
+ip_port (DCTP, 675)
+ip_port (VPPS_VIA, 676)
+ip_port (VPP, 677)
+ip_port (GGF_NCP, 678)
+ip_port (MRM, 679)
+ip_port (ENTRUST_AAAS, 680)
+ip_port (ENTRUST_AAMS, 681)
+ip_port (XFR, 682)
+ip_port (CORBA_IIOP, 683)
+ip_port (CORBA_IIOP_SSL, 684)
+ip_port (MDC_PORTMAPPER, 685)
+ip_port (HCP_WISMAR, 686)
+ip_port (ASIPREGISTRY, 687)
+ip_port (REALM_RUSD, 688)
+ip_port (NMAP, 689)
+ip_port (VATP, 690)
+ip_port (MSEXCH_ROUTING, 691)
+ip_port (HYPERWAVE_ISP, 692)
+ip_port (CONNENDP, 693)
+ip_port (HA_CLUSTER, 694)
+ip_port (IEEE_MMS_SSL, 695)
+ip_port (RUSHD, 696)
+ip_port (UUIDGEN, 697)
+ip_port (OLSR, 698)
+ip_port (ACCESSNETWORK, 699)
+ip_port (EPP, 700)
+ip_port (LMP, 701)
+ip_port (IRIS_BEEP, 702)
+ip_port (ELCSD, 704)
+ip_port (AGENTX, 705)
+ip_port (SILC, 706)
+ip_port (BORLAND_DSJ, 707)
+ip_port (ENTRUST_KMSH, 709)
+ip_port (ENTRUST_ASH, 710)
+ip_port (CISCO_TDP, 711)
+ip_port (TBRPF, 712)
+ip_port (NETVIEWDM1, 729)
+ip_port (NETVIEWDM2, 730)
+ip_port (NETVIEWDM3, 731)
+ip_port (NETGW, 741)
+ip_port (NETRCS, 742)
+ip_port (FLEXLM, 744)
+ip_port (FUJITSU_DEV, 747)
+ip_port (RIS_CM, 748)
+ip_port (KERBEROS_ADM, 749)
+ip_port (RFILE, 750)
+ip_port (PUMP, 751)
+ip_port (QRH, 752)
+ip_port (RRH, 753)
+ip_port (TELL, 754)
+ip_port (NLOGIN, 758)
+ip_port (CON, 759)
+ip_port (NS, 760)
+ip_port (RXE, 761)
+ip_port (QUOTAD, 762)
+ip_port (CYCLESERV, 763)
+ip_port (OMSERV, 764)
+ip_port (WEBSTER, 765)
+ip_port (PHONEBOOK, 767)
+ip_port (VID, 769)
+ip_port (CADLOCK, 770)
+ip_port (RTIP, 771)
+ip_port (CYCLESERV2, 772)
+ip_port (SUBMIT, 773)
+ip_port (RPASSWD, 774)
+ip_port (ENTOMB, 775)
+ip_port (WPAGES, 776)
+ip_port (MULTILING_HTTP, 777)
+ip_port (WPGS, 780)
+ip_port (MDBS_DAEMON, 800)
+ip_port (DEVICE, 801)
+ip_port (FCP_UDP, 810)
+ip_port (ITM_MCELL_S, 828)
+ip_port (PKIX_3_CA_RA, 829)
+ip_port (DHCP_FAILOVER2, 847)
+ip_port (GDOI, 848)
+ip_port (ISCSI, 860)
+ip_port (RSYNC, 873)
+ip_port (ICLCNET_LOCATE, 886)
+ip_port (ICLCNET_SVINFO, 887)
+ip_port (ACCESSBUILDER, 888)
+ip_port (CDDBP, 888)
+ip_port (OMGINITIALREFS, 900)
+ip_port (SMPNAMERES, 901)
+ip_port (IDEAFARM_CHAT, 902)
+ip_port (IDEAFARM_CATCH, 903)
+ip_port (XACT_BACKUP, 911)
+ip_port (APEX_MESH, 912)
+ip_port (APEX_EDGE, 913)
+ip_port (FTPS_DATA, 989)
+ip_port (FTPS, 990)
+ip_port (NAS, 991)
+ip_port (TELNETS, 992)
+ip_port (IMAPS, 993)
+ip_port (IRCS, 994)
+ip_port (POP3S, 995)
+ip_port (VSINET, 996)
+ip_port (MAITRD, 997)
+ip_port (BUSBOY, 998)
+ip_port (GARCON, 999)
+ip_port (PUPROUTER, 999)
+ip_port (CADLOCK2, 1000)
+ip_port (SURF, 1010)
+
diff --git a/src/vnet/ip/protocols.def b/src/vnet/ip/protocols.def
new file mode 100644
index 00000000000..77fab31da05
--- /dev/null
+++ b/src/vnet/ip/protocols.def
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/* Emacs editing mode -*-C-*-
+
+From http://www.iana.org/assignments/protocol-numbers
+
+PROTOCOL NUMBERS
+
+(last updated 18 October 2004)
+
+In the Internet Protocol version 4 (IPv4) [RFC791] there is a field,
+called "Protocol", to identify the next level protocol. This is an 8
+bit field. In Internet Protocol version 6 (IPv6) [RFC1883] this field
+is called the "Next Header" field.
+*/
+ip_protocol (0, IP6_HOP_BY_HOP_OPTIONS)
+ip_protocol (1, ICMP)
+ip_protocol (2, IGMP)
+ip_protocol (3, GGP)
+ip_protocol (4, IP_IN_IP)
+ip_protocol (5, ST)
+ip_protocol (6, TCP)
+ip_protocol (7, CBT)
+ip_protocol (8, EGP)
+ip_protocol (9, IGP)
+ip_protocol (10, BBN_RCC_MON)
+ip_protocol (11, NVP_II)
+ip_protocol (12, PUP)
+ip_protocol (13, ARGUS)
+ip_protocol (14, EMCON)
+ip_protocol (15, XNET)
+ip_protocol (16, CHAOS)
+ip_protocol (17, UDP)
+ip_protocol (18, MUX)
+ip_protocol (19, DCN_MEAS)
+ip_protocol (20, HMP)
+ip_protocol (21, PRM)
+ip_protocol (22, XNS_IDP)
+ip_protocol (23, TRUNK_1)
+ip_protocol (24, TRUNK_2)
+ip_protocol (25, LEAF_1)
+ip_protocol (26, LEAF_2)
+ip_protocol (27, RDP)
+ip_protocol (28, IRTP)
+ip_protocol (29, ISO_TP4)
+ip_protocol (30, NETBLT)
+ip_protocol (31, MFE_NSP)
+ip_protocol (32, MERIT_INP)
+ip_protocol (33, SEP)
+ip_protocol (34, 3PC)
+ip_protocol (35, IDPR)
+ip_protocol (36, XTP)
+ip_protocol (37, DDP)
+ip_protocol (38, IDPR_CMTP)
+ip_protocol (39, TP)
+ip_protocol (40, IL)
+ip_protocol (41, IPV6)
+ip_protocol (42, SDRP)
+ip_protocol (43, IPV6_ROUTE)
+ip_protocol (44, IPV6_FRAGMENTATION)
+ip_protocol (45, IDRP)
+ip_protocol (46, RSVP)
+ip_protocol (47, GRE)
+ip_protocol (48, MHRP)
+ip_protocol (49, BNA)
+ip_protocol (50, IPSEC_ESP)
+ip_protocol (51, IPSEC_AH)
+ip_protocol (52, I_NLSP)
+ip_protocol (53, SWIPE)
+ip_protocol (54, NARP)
+ip_protocol (55, MOBILE)
+ip_protocol (56, TLSP)
+ip_protocol (57, SKIP)
+ip_protocol (58, ICMP6)
+ip_protocol (59, IP6_NONXT)
+ip_protocol (60, IP6_DESTINATION_OPTIONS)
+ip_protocol (62, CFTP)
+ip_protocol (64, SAT_EXPAK)
+ip_protocol (65, KRYPTOLAN)
+ip_protocol (66, RVD)
+ip_protocol (67, IPPC)
+ip_protocol (69, SAT_MON)
+ip_protocol (70, VISA)
+ip_protocol (71, IPCV)
+ip_protocol (72, CPNX)
+ip_protocol (73, CPHB)
+ip_protocol (74, WSN)
+ip_protocol (75, PVP)
+ip_protocol (76, BR_SAT_MON)
+ip_protocol (77, SUN_ND)
+ip_protocol (78, WB_MON)
+ip_protocol (79, WB_EXPAK)
+ip_protocol (80, ISO_IP)
+ip_protocol (81, VMTP)
+ip_protocol (82, SECURE_VMTP)
+ip_protocol (83, VINES)
+ip_protocol (84, TTP)
+ip_protocol (85, NSFNET_IGP)
+ip_protocol (86, DGP)
+ip_protocol (87, TCF)
+ip_protocol (88, EIGRP)
+ip_protocol (89, OSPF)
+ip_protocol (90, SPRITE_RPC)
+ip_protocol (91, LARP)
+ip_protocol (92, MTP)
+ip_protocol (93, AX)
+ip_protocol (94, IPIP)
+ip_protocol (95, MICP)
+ip_protocol (96, SCC_SP)
+ip_protocol (97, ETHERIP)
+ip_protocol (98, ENCAP)
+ip_protocol (100, GMTP)
+ip_protocol (101, IFMP)
+ip_protocol (102, PNNI)
+ip_protocol (103, PIM)
+ip_protocol (104, ARIS)
+ip_protocol (105, SCPS)
+ip_protocol (106, QNX)
+ip_protocol (107, A)
+ip_protocol (108, IPCOMP)
+ip_protocol (109, SNP)
+ip_protocol (110, COMPAQ_PEER)
+ip_protocol (111, IPX_IN_IP)
+ip_protocol (112, VRRP)
+ip_protocol (113, PGM)
+ip_protocol (115, L2TP)
+ip_protocol (116, DDX)
+ip_protocol (117, IATP)
+ip_protocol (118, STP)
+ip_protocol (119, SRP)
+ip_protocol (120, UTI)
+ip_protocol (121, SMP)
+ip_protocol (122, SM)
+ip_protocol (123, PTP)
+ip_protocol (124, ISIS)
+ip_protocol (125, FIRE)
+ip_protocol (126, CRTP)
+ip_protocol (127, CRUDP)
+ip_protocol (128, SSCOPMCE)
+ip_protocol (129, IPLT)
+ip_protocol (130, SPS)
+ip_protocol (131, PIPE)
+ip_protocol (132, SCTP)
+ip_protocol (133, FC)
+ip_protocol (134, RSVP_E2E_IGNORE)
+ip_protocol (135, MOBILITY)
+ip_protocol (136, UDP_LITE)
+ip_protocol (137, MPLS_IN_IP)
+ip_protocol (255, RESERVED)
+
diff --git a/src/vnet/ip/punt.c b/src/vnet/ip/punt.c
new file mode 100644
index 00000000000..9c735128a3b
--- /dev/null
+++ b/src/vnet/ip/punt.c
@@ -0,0 +1,323 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file
+ * @brief Local TCP/IP stack punt infrastructure.
+ *
+ * Provides a set of VPP nodes togather with the relevant APIs and CLI
+ * commands in order to adjust and dispatch packets from the VPP data plane
+ * to the local TCP/IP stack
+ */
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ip/udp.h>
+#include <vnet/ip/punt.h>
+
+#define foreach_punt_next \
+ _ (PUNT, "error-punt")
+
+typedef enum
+{
+#define _(s,n) PUNT_NEXT_##s,
+ foreach_punt_next
+#undef _
+ PUNT_N_NEXT,
+} punt_next_t;
+
+vlib_node_registration_t udp4_punt_node;
+vlib_node_registration_t udp6_punt_node;
+
+/** @brief IPv4/IPv6 UDP punt node main loop.
+
+ This is the main loop inline function for IPv4/IPv6 UDP punt
+ transition node.
+
+ @param vm vlib_main_t corresponding to the current thread
+ @param node vlib_node_runtime_t
+ @param frame vlib_frame_t whose contents should be dispatched
+ @param is_ipv4 indicates if called for IPv4 or IPv6 node
+*/
+always_inline uword
+udp46_punt_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame, int is_ip4)
+{
+ u32 n_left_from, *from, *to_next;
+ word advance;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ /* udp[46]_lookup hands us the data payload, not the IP header */
+ if (is_ip4)
+ advance = -(sizeof (ip4_header_t) + sizeof (udp_header_t));
+ else
+ advance = -(sizeof (ip6_header_t) + sizeof (udp_header_t));
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, PUNT_NEXT_PUNT, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ vlib_buffer_advance (b0, advance);
+ b0->error = node->errors[PUNT_ERROR_UDP_PORT];
+ }
+
+ vlib_put_next_frame (vm, node, PUNT_NEXT_PUNT, n_left_to_next);
+ }
+
+ return from_frame->n_vectors;
+}
+
+static char *punt_error_strings[] = {
+#define punt_error(n,s) s,
+#include "punt_error.def"
+#undef punt_error
+};
+
+/** @brief IPv4 UDP punt node.
+ @node ip4-udp-punt
+
+ This is the IPv4 UDP punt transition node. It is registered as a next
+ node for the "ip4-udp-lookup" handling UDP port(s) requested for punt.
+ The buffer's current data pointer is adjusted to the original packet
+ IPv4 header. All buffers are dispatched to "error-punt".
+
+ @param vm vlib_main_t corresponding to the current thread
+ @param node vlib_node_runtime_t
+ @param frame vlib_frame_t whose contents should be dispatched
+
+ @par Graph mechanics: next index usage
+
+ @em Sets:
+ - <code>vnet_buffer(b)->current_data</code>
+ - <code>vnet_buffer(b)->current_len</code>
+
+ <em>Next Index:</em>
+ - Dispatches the packet to the "error-punt" node
+*/
+static uword
+udp4_punt (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * from_frame)
+{
+ return udp46_punt_inline (vm, node, from_frame, 1 /* is_ip4 */ );
+}
+
+/** @brief IPv6 UDP punt node.
+ @node ip6-udp-punt
+
+ This is the IPv6 UDP punt transition node. It is registered as a next
+ node for the "ip6-udp-lookup" handling UDP port(s) requested for punt.
+ The buffer's current data pointer is adjusted to the original packet
+ IPv6 header. All buffers are dispatched to "error-punt".
+
+ @param vm vlib_main_t corresponding to the current thread
+ @param node vlib_node_runtime_t
+ @param frame vlib_frame_t whose contents should be dispatched
+
+ @par Graph mechanics: next index usage
+
+ @em Sets:
+ - <code>vnet_buffer(b)->current_data</code>
+ - <code>vnet_buffer(b)->current_len</code>
+
+ <em>Next Index:</em>
+ - Dispatches the packet to the "error-punt" node
+*/
+static uword
+udp6_punt (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * from_frame)
+{
+ return udp46_punt_inline (vm, node, from_frame, 0 /* is_ip4 */ );
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (udp4_punt_node) = {
+ .function = udp4_punt,
+ .name = "ip4-udp-punt",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+
+ .n_errors = PUNT_N_ERROR,
+ .error_strings = punt_error_strings,
+
+ .n_next_nodes = PUNT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [PUNT_NEXT_##s] = n,
+ foreach_punt_next
+#undef _
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (udp4_punt_node, udp4_punt);
+
+VLIB_REGISTER_NODE (udp6_punt_node) = {
+ .function = udp6_punt,
+ .name = "ip6-udp-punt",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+
+ .n_errors = PUNT_N_ERROR,
+ .error_strings = punt_error_strings,
+
+ .n_next_nodes = PUNT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [PUNT_NEXT_##s] = n,
+ foreach_punt_next
+#undef _
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (udp6_punt_node, udp6_punt);;
+
+/**
+ * @brief Request IP traffic punt to the local TCP/IP stack.
+ *
+ * @em Note
+ * - UDP is the only protocol supported in the current implementation
+ * - When requesting UDP punt port number(s) must be specified
+ * - All TCP traffic is currently punted to the host by default
+ *
+ * @param vm vlib_main_t corresponding to the current thread
+ * @param ipv IP protcol version.
+ * 4 - IPv4, 6 - IPv6, ~0 for both IPv6 and IPv4
+ * @param protocol 8-bits L4 protocol value
+ * Only value of 17 (UDP) is currently supported
+ * @param port 16-bits L4 (TCP/IP) port number when applicable
+ *
+ * @returns 0 on success, non-zero value otherwise
+ */
+clib_error_t *
+vnet_punt_add_del (vlib_main_t * vm, u8 ipv, u8 protocol, u16 port,
+ int is_add)
+{
+ /* For now we only support UDP punt */
+ if (protocol != IP_PROTOCOL_UDP)
+ return clib_error_return (0,
+ "only UDP protocol (%d) is supported, got %d",
+ IP_PROTOCOL_UDP, protocol);
+
+ if (ipv != (u8) ~ 0 && ipv != 4 && ipv != 6)
+ return clib_error_return (0, "IP version must be 4 or 6, got %d", ipv);
+
+ if (port == (u16) ~ 0)
+ {
+ if (ipv == 4 || ipv == (u8) ~ 0)
+ udp_punt_unknown (vm, 1, is_add);
+
+ if (ipv == 6 || ipv == (u8) ~ 0)
+ udp_punt_unknown (vm, 0, is_add);
+
+ return 0;
+ }
+
+ else if (is_add)
+ {
+ if (ipv == 4 || ipv == (u8) ~ 0)
+ udp_register_dst_port (vm, port, udp4_punt_node.index, 1);
+
+ if (ipv == 6 || ipv == (u8) ~ 0)
+ udp_register_dst_port (vm, port, udp6_punt_node.index, 0);
+
+ return 0;
+ }
+
+ else
+ return clib_error_return (0, "punt delete is not supported yet");
+}
+
+static clib_error_t *
+udp_punt_cli (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ u32 udp_port;
+ int is_add = 1;
+ clib_error_t *error;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "del"))
+ is_add = 0;
+ if (unformat (input, "all"))
+ {
+ /* punt both IPv6 and IPv4 when used in CLI */
+ error = vnet_punt_add_del (vm, ~0, IP_PROTOCOL_UDP, ~0, is_add);
+ if (error)
+ clib_error_report (error);
+ }
+ else if (unformat (input, "%d", &udp_port))
+ {
+ /* punt both IPv6 and IPv4 when used in CLI */
+ error = vnet_punt_add_del (vm, ~0, IP_PROTOCOL_UDP,
+ udp_port, is_add);
+ if (error)
+ clib_error_report (error);
+ }
+ }
+
+ return 0;
+}
+
+/*?
+ * The set of '<em>set punt</em>' commands allows specific IP traffic to
+ * be punted to the host TCP/IP stack
+ *
+ * @em Note
+ * - UDP is the only protocol supported in the current implementation
+ * - All TCP traffic is currently punted to the host by default
+ *
+ * @cliexpar
+ * @parblock
+ * Example of how to request NTP traffic to be punted
+ * @cliexcmd{set punt udp 125}
+ *
+ * Example of how to request all 'unknown' UDP traffic to be punted
+ * @cliexcmd{set punt udp all}
+ *
+ * Example of how to stop all 'unknown' UDP traffic to be punted
+ * @cliexcmd{set punt udp del all}
+ * @endparblock
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (punt_udp_command, static) = {
+ .path = "set punt udp",
+ .short_help = "set punt udp [del] <all | port-num1 [port-num2 ...]>",
+ .function = udp_punt_cli,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/punt.h b/src/vnet/ip/punt.h
new file mode 100644
index 00000000000..09a9d4c55bf
--- /dev/null
+++ b/src/vnet/ip/punt.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file
+ * @brief Definitions for punt infrastructure.
+ */
+#ifndef included_punt_h
+#define included_punt_h
+
+typedef enum
+{
+#define punt_error(n,s) PUNT_ERROR_##n,
+#include <vnet/ip/punt_error.def>
+#undef punt_error
+ PUNT_N_ERROR,
+} punt_error_t;
+
+
+clib_error_t *vnet_punt_add_del (vlib_main_t * vm, u8 ipv,
+ u8 protocol, u16 port, int is_add);
+
+#endif /* included_punt_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/punt_error.def b/src/vnet/ip/punt_error.def
new file mode 100644
index 00000000000..a76d7e7b817
--- /dev/null
+++ b/src/vnet/ip/punt_error.def
@@ -0,0 +1,19 @@
+/*
+ * punt_error.def: punt errors
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+punt_error (NONE, "no error")
+punt_error (UDP_PORT, "udp port punt")
diff --git a/src/vnet/ip/tcp_packet.h b/src/vnet/ip/tcp_packet.h
new file mode 100644
index 00000000000..ed402403592
--- /dev/null
+++ b/src/vnet/ip/tcp_packet.h
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip4/tcp_packet.h: TCP packet format (see RFC 793)
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_tcp_packet_h
+#define included_tcp_packet_h
+
+/* TCP flags bit 0 first. */
+#define foreach_tcp_flag \
+ _ (FIN) \
+ _ (SYN) \
+ _ (RST) \
+ _ (PSH) \
+ _ (ACK) \
+ _ (URG) \
+ _ (ECE) \
+ _ (CWR)
+
+enum
+{
+#define _(f) TCP_FLAG_BIT_##f,
+ foreach_tcp_flag
+#undef _
+ TCP_N_FLAG_BITS,
+
+#define _(f) TCP_FLAG_##f = 1 << TCP_FLAG_BIT_##f,
+ foreach_tcp_flag
+#undef _
+};
+
+typedef struct
+{
+ /* Source and destination port. */
+ union
+ {
+ union
+ {
+ struct
+ {
+ u16 src, dst;
+ };
+ u32 src_and_dst;
+ } ports;
+ u16 src_port, dst_port;
+ };
+
+ /* Sequence and acknowledgment number. */
+ u32 seq_number, ack_number;
+
+ /* Size of TCP header in 32-bit units plus 4 reserved bits. */
+ u8 tcp_header_u32s_and_reserved;
+
+ /* see foreach_tcp_flag for enumation of tcp flags. */
+ u8 flags;
+
+ /* Current window advertised by sender.
+ This is the number of bytes sender is willing to receive
+ right now. */
+ u16 window;
+
+ /* Checksum of TCP pseudo header and data. */
+ u16 checksum;
+
+ u16 urgent_pointer;
+} tcp_header_t;
+
+always_inline int
+tcp_header_bytes (tcp_header_t * t)
+{
+ return (t->tcp_header_u32s_and_reserved >> 4) * sizeof (u32);
+}
+
+/* TCP options. */
+typedef enum tcp_option_type
+{
+ TCP_OPTION_END = 0,
+ TCP_OPTION_NOP = 1,
+ TCP_OPTION_MSS = 2,
+ TCP_OPTION_WINDOW_SCALE = 3,
+ TCP_OPTION_SACK_PERMITTED = 4,
+ TCP_OPTION_SACK_BLOCK = 5,
+ TCP_OPTION_TIME_STAMP = 8,
+} tcp_option_type_t;
+
+/* All except NOP and END have 1 byte length field. */
+typedef struct
+{
+ tcp_option_type_t type:8;
+
+ /* Length of this option in bytes. */
+ u8 length;
+} tcp_option_with_length_t;
+
+#endif /* included_tcp_packet_h */
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/udp.h b/src/vnet/ip/udp.h
new file mode 100644
index 00000000000..03c62e0b684
--- /dev/null
+++ b/src/vnet/ip/udp.h
@@ -0,0 +1,313 @@
+/*
+ * ip/udp.h: udp protocol
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_udp_h
+#define included_udp_h
+
+#include <vnet/vnet.h>
+#include <vnet/ip/udp_packet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ip/ip4.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ip/format.h>
+
+typedef enum
+{
+#define udp_error(n,s) UDP_ERROR_##n,
+#include <vnet/ip/udp_error.def>
+#undef udp_error
+ UDP_N_ERROR,
+} udp_error_t;
+
+#define foreach_udp4_dst_port \
+_ (67, dhcp_to_server) \
+_ (68, dhcp_to_client) \
+_ (500, ikev2) \
+_ (3784, bfd4) \
+_ (4341, lisp_gpe) \
+_ (4342, lisp_cp) \
+_ (4739, ipfix) \
+_ (4789, vxlan) \
+_ (4789, vxlan6) \
+_ (4790, vxlan_gpe) \
+_ (6633, vpath_3)
+
+
+#define foreach_udp6_dst_port \
+_ (547, dhcpv6_to_server) \
+_ (546, dhcpv6_to_client) \
+_ (3784, bfd6) \
+_ (4341, lisp_gpe6) \
+_ (4342, lisp_cp6) \
+_ (4790, vxlan6_gpe) \
+_ (6633, vpath6_3)
+
+typedef enum
+{
+#define _(n,f) UDP_DST_PORT_##f = n,
+ foreach_udp4_dst_port foreach_udp6_dst_port
+#undef _
+} udp_dst_port_t;
+
+typedef enum
+{
+#define _(n,f) UDP6_DST_PORT_##f = n,
+ foreach_udp6_dst_port
+#undef _
+} udp6_dst_port_t;
+
+typedef struct
+{
+ /* Name (a c string). */
+ char *name;
+
+ /* GRE protocol type in host byte order. */
+ udp_dst_port_t dst_port;
+
+ /* Node which handles this type. */
+ u32 node_index;
+
+ /* Next index for this type. */
+ u32 next_index;
+} udp_dst_port_info_t;
+
+typedef enum
+{
+ UDP_IP6 = 0,
+ UDP_IP4, /* the code is full of is_ip4... */
+ N_UDP_AF,
+} udp_af_t;
+
+typedef struct
+{
+ udp_dst_port_info_t *dst_port_infos[N_UDP_AF];
+
+ /* Hash tables mapping name/protocol to protocol info index. */
+ uword *dst_port_info_by_name[N_UDP_AF];
+ uword *dst_port_info_by_dst_port[N_UDP_AF];
+
+ /* convenience */
+ vlib_main_t *vlib_main;
+} udp_main_t;
+
+always_inline udp_dst_port_info_t *
+udp_get_dst_port_info (udp_main_t * um, udp_dst_port_t dst_port, u8 is_ip4)
+{
+ uword *p = hash_get (um->dst_port_info_by_dst_port[is_ip4], dst_port);
+ return p ? vec_elt_at_index (um->dst_port_infos[is_ip4], p[0]) : 0;
+}
+
+format_function_t format_udp_header;
+format_function_t format_udp_rx_trace;
+
+unformat_function_t unformat_udp_header;
+
+void udp_register_dst_port (vlib_main_t * vm,
+ udp_dst_port_t dst_port,
+ u32 node_index, u8 is_ip4);
+
+void udp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add);
+
+always_inline void
+ip_udp_fixup_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 is_ip4)
+{
+ u16 new_l0;
+ udp_header_t *udp0;
+
+ if (is_ip4)
+ {
+ ip4_header_t *ip0;
+ ip_csum_t sum0;
+ u16 old_l0 = 0;
+
+ ip0 = vlib_buffer_get_current (b0);
+
+ /* fix the <bleep>ing outer-IP checksum */
+ sum0 = ip0->checksum;
+ /* old_l0 always 0, see the rewrite setup */
+ new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0));
+
+ sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t,
+ length /* changed member */ );
+ ip0->checksum = ip_csum_fold (sum0);
+ ip0->length = new_l0;
+
+ /* Fix UDP length */
+ udp0 = (udp_header_t *) (ip0 + 1);
+ new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)
+ - sizeof (*ip0));
+ udp0->length = new_l0;
+ }
+ else
+ {
+ ip6_header_t *ip0;
+ int bogus0;
+
+ ip0 = vlib_buffer_get_current (b0);
+
+ new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)
+ - sizeof (*ip0));
+ ip0->payload_length = new_l0;
+
+ /* Fix UDP length */
+ udp0 = (udp_header_t *) (ip0 + 1);
+ udp0->length = new_l0;
+
+ udp0->checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip0, &bogus0);
+ ASSERT (bogus0 == 0);
+
+ if (udp0->checksum == 0)
+ udp0->checksum = 0xffff;
+ }
+}
+
+always_inline void
+ip_udp_encap_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 * ec0, word ec_len,
+ u8 is_ip4)
+{
+ vlib_buffer_advance (b0, -ec_len);
+
+ if (is_ip4)
+ {
+ ip4_header_t *ip0;
+
+ ip0 = vlib_buffer_get_current (b0);
+
+ /* Apply the encap string. */
+ clib_memcpy (ip0, ec0, ec_len);
+ ip_udp_fixup_one (vm, b0, 1);
+ }
+ else
+ {
+ ip6_header_t *ip0;
+
+ ip0 = vlib_buffer_get_current (b0);
+
+ /* Apply the encap string. */
+ clib_memcpy (ip0, ec0, ec_len);
+ ip_udp_fixup_one (vm, b0, 0);
+ }
+}
+
+always_inline void
+ip_udp_encap_two (vlib_main_t * vm, vlib_buffer_t * b0, vlib_buffer_t * b1,
+ u8 * ec0, u8 * ec1, word ec_len, u8 is_v4)
+{
+ u16 new_l0, new_l1;
+ udp_header_t *udp0, *udp1;
+
+ ASSERT (_vec_len (ec0) == _vec_len (ec1));
+
+ vlib_buffer_advance (b0, -ec_len);
+ vlib_buffer_advance (b1, -ec_len);
+
+ if (is_v4)
+ {
+ ip4_header_t *ip0, *ip1;
+ ip_csum_t sum0, sum1;
+ u16 old_l0 = 0, old_l1 = 0;
+
+ ip0 = vlib_buffer_get_current (b0);
+ ip1 = vlib_buffer_get_current (b1);
+
+ /* Apply the encap string */
+ clib_memcpy (ip0, ec0, ec_len);
+ clib_memcpy (ip1, ec1, ec_len);
+
+ /* fix the <bleep>ing outer-IP checksum */
+ sum0 = ip0->checksum;
+ sum1 = ip1->checksum;
+
+ /* old_l0 always 0, see the rewrite setup */
+ new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0));
+ new_l1 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1));
+
+ sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t,
+ length /* changed member */ );
+ sum1 = ip_csum_update (sum1, old_l1, new_l1, ip4_header_t,
+ length /* changed member */ );
+
+ ip0->checksum = ip_csum_fold (sum0);
+ ip1->checksum = ip_csum_fold (sum1);
+
+ ip0->length = new_l0;
+ ip1->length = new_l1;
+
+ /* Fix UDP length */
+ udp0 = (udp_header_t *) (ip0 + 1);
+ udp1 = (udp_header_t *) (ip1 + 1);
+
+ new_l0 =
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) -
+ sizeof (*ip0));
+ new_l1 =
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1) -
+ sizeof (*ip1));
+ udp0->length = new_l0;
+ udp1->length = new_l1;
+ }
+ else
+ {
+ ip6_header_t *ip0, *ip1;
+ int bogus0, bogus1;
+
+ ip0 = vlib_buffer_get_current (b0);
+ ip1 = vlib_buffer_get_current (b1);
+
+ /* Apply the encap string. */
+ clib_memcpy (ip0, ec0, ec_len);
+ clib_memcpy (ip1, ec1, ec_len);
+
+ new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)
+ - sizeof (*ip0));
+ new_l1 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1)
+ - sizeof (*ip1));
+ ip0->payload_length = new_l0;
+ ip1->payload_length = new_l1;
+
+ /* Fix UDP length */
+ udp0 = (udp_header_t *) (ip0 + 1);
+ udp1 = (udp_header_t *) (ip1 + 1);
+
+ udp0->length = new_l0;
+ udp1->length = new_l1;
+
+ udp0->checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip0, &bogus0);
+ udp1->checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, b1, ip1, &bogus1);
+ ASSERT (bogus0 == 0);
+ ASSERT (bogus1 == 0);
+
+ if (udp0->checksum == 0)
+ udp0->checksum = 0xffff;
+ if (udp1->checksum == 0)
+ udp1->checksum = 0xffff;
+ }
+}
+
+#endif /* included_udp_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/udp_error.def b/src/vnet/ip/udp_error.def
new file mode 100644
index 00000000000..bfdae0acc77
--- /dev/null
+++ b/src/vnet/ip/udp_error.def
@@ -0,0 +1,21 @@
+/*
+ * udp_error.def: udp errors
+ *
+ * Copyright (c) 2013-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+udp_error (NONE, "no error")
+udp_error (NO_LISTENER, "no listener for dst port")
+udp_error (LENGTH_ERROR, "UDP packets with length errors")
+udp_error (PUNT, "no listener punt")
diff --git a/src/vnet/ip/udp_format.c b/src/vnet/ip/udp_format.c
new file mode 100644
index 00000000000..abdf561e8c9
--- /dev/null
+++ b/src/vnet/ip/udp_format.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/udp_format.c: udp formatting
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+
+/* Format UDP header. */
+u8 *
+format_udp_header (u8 * s, va_list * args)
+{
+ udp_header_t *udp = va_arg (*args, udp_header_t *);
+ u32 max_header_bytes = va_arg (*args, u32);
+ uword indent;
+ u32 header_bytes = sizeof (udp[0]);
+
+ /* Nothing to do. */
+ if (max_header_bytes < sizeof (udp[0]))
+ return format (s, "UDP header truncated");
+
+ indent = format_get_indent (s);
+ indent += 2;
+
+ s = format (s, "UDP: %d -> %d",
+ clib_net_to_host_u16 (udp->src_port),
+ clib_net_to_host_u16 (udp->dst_port));
+
+ s = format (s, "\n%Ulength %d, checksum 0x%04x",
+ format_white_space, indent,
+ clib_net_to_host_u16 (udp->length),
+ clib_net_to_host_u16 (udp->checksum));
+
+ /* Recurse into next protocol layer. */
+ if (max_header_bytes != 0 && header_bytes < max_header_bytes)
+ {
+ ip_main_t *im = &ip_main;
+ tcp_udp_port_info_t *pi;
+
+ pi = ip_get_tcp_udp_port_info (im, udp->dst_port);
+
+ if (pi && pi->format_header)
+ s = format (s, "\n%U%U",
+ format_white_space, indent - 2, pi->format_header,
+ /* next protocol header */ (udp + 1),
+ max_header_bytes - sizeof (udp[0]));
+ }
+
+ return s;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/udp_init.c b/src/vnet/ip/udp_init.c
new file mode 100644
index 00000000000..1241ca4ab32
--- /dev/null
+++ b/src/vnet/ip/udp_init.c
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/udp_init.c: udp initialization
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+
+clib_error_t *
+udp_init (vlib_main_t * vm)
+{
+ ip_main_t *im = &ip_main;
+ ip_protocol_info_t *pi;
+ clib_error_t *error;
+
+ error = vlib_call_init_function (vm, ip_main_init);
+
+ if (!error)
+ {
+ pi = ip_get_protocol_info (im, IP_PROTOCOL_UDP);
+ if (pi == 0)
+ return clib_error_return (0, "UDP protocol info AWOL");
+ pi->format_header = format_udp_header;
+ pi->unformat_pg_edit = unformat_pg_udp_header;
+ }
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (udp_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/udp_local.c b/src/vnet/ip/udp_local.c
new file mode 100644
index 00000000000..13ab6e4fb32
--- /dev/null
+++ b/src/vnet/ip/udp_local.c
@@ -0,0 +1,645 @@
+/*
+ * node.c: udp packet processing
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ip/udp.h>
+#include <vnet/ip/udp_packet.h>
+#include <vppinfra/sparse_vec.h>
+
+udp_main_t udp_main;
+
+#define foreach_udp_input_next \
+ _ (PUNT, "error-punt") \
+ _ (DROP, "error-drop") \
+ _ (ICMP4_ERROR, "ip4-icmp-error") \
+ _ (ICMP6_ERROR, "ip6-icmp-error")
+
+typedef enum
+{
+#define _(s,n) UDP_INPUT_NEXT_##s,
+ foreach_udp_input_next
+#undef _
+ UDP_INPUT_N_NEXT,
+} udp_input_next_t;
+
+typedef struct
+{
+ u16 src_port;
+ u16 dst_port;
+ u8 bound;
+} udp_rx_trace_t;
+
+u8 *
+format_udp_rx_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ udp_rx_trace_t *t = va_arg (*args, udp_rx_trace_t *);
+
+ s = format (s, "UDP: src-port %d dst-port %d%s",
+ clib_net_to_host_u16 (t->src_port),
+ clib_net_to_host_u16 (t->dst_port),
+ t->bound ? "" : " (no listener)");
+ return s;
+}
+
+typedef struct
+{
+ /* Sparse vector mapping udp dst_port in network byte order
+ to next index. */
+ u16 *next_by_dst_port;
+ u8 punt_unknown;
+} udp_input_runtime_t;
+
+vlib_node_registration_t udp4_input_node;
+vlib_node_registration_t udp6_input_node;
+
+always_inline uword
+udp46_input_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame, int is_ip4)
+{
+ udp_input_runtime_t *rt = is_ip4 ?
+ (void *) vlib_node_get_runtime_data (vm, udp4_input_node.index)
+ : (void *) vlib_node_get_runtime_data (vm, udp6_input_node.index);
+ __attribute__ ((unused)) u32 n_left_from, next_index, *from, *to_next;
+ word n_no_listener = 0;
+ u8 punt_unknown = rt->punt_unknown;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ udp_header_t *h0 = 0, *h1 = 0;
+ u32 i0, i1, dst_port0, dst_port1;
+ u32 advance0, advance1;
+ u32 error0, next0, error1, next1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, sizeof (h0[0]), LOAD);
+ CLIB_PREFETCH (p3->data, sizeof (h1[0]), LOAD);
+ }
+
+ bi0 = from[0];
+ bi1 = from[1];
+ to_next[0] = bi0;
+ to_next[1] = bi1;
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ /* ip4/6_local hands us the ip header, not the udp header */
+ if (is_ip4)
+ {
+ advance0 = sizeof (ip4_header_t);
+ advance1 = sizeof (ip4_header_t);
+ }
+ else
+ {
+ advance0 = sizeof (ip6_header_t);
+ advance1 = sizeof (ip6_header_t);
+ }
+
+ if (PREDICT_FALSE (b0->current_length < advance0 + sizeof (*h0)))
+ {
+ error0 = UDP_ERROR_LENGTH_ERROR;
+ next0 = UDP_INPUT_NEXT_DROP;
+ }
+ else
+ {
+ vlib_buffer_advance (b0, advance0);
+ h0 = vlib_buffer_get_current (b0);
+ error0 = next0 = 0;
+ if (PREDICT_FALSE (clib_net_to_host_u16 (h0->length) >
+ vlib_buffer_length_in_chain (vm, b0)))
+ {
+ error0 = UDP_ERROR_LENGTH_ERROR;
+ next0 = UDP_INPUT_NEXT_DROP;
+ }
+ }
+
+ if (PREDICT_FALSE (b1->current_length < advance1 + sizeof (*h1)))
+ {
+ error1 = UDP_ERROR_LENGTH_ERROR;
+ next1 = UDP_INPUT_NEXT_DROP;
+ }
+ else
+ {
+ vlib_buffer_advance (b1, advance1);
+ h1 = vlib_buffer_get_current (b1);
+ error1 = next1 = 0;
+ if (PREDICT_FALSE (clib_net_to_host_u16 (h1->length) >
+ vlib_buffer_length_in_chain (vm, b1)))
+ {
+ error1 = UDP_ERROR_LENGTH_ERROR;
+ next1 = UDP_INPUT_NEXT_DROP;
+ }
+ }
+
+ /* Index sparse array with network byte order. */
+ dst_port0 = (error0 == 0) ? h0->dst_port : 0;
+ dst_port1 = (error1 == 0) ? h1->dst_port : 0;
+ sparse_vec_index2 (rt->next_by_dst_port, dst_port0, dst_port1,
+ &i0, &i1);
+ next0 = (error0 == 0) ? vec_elt (rt->next_by_dst_port, i0) : next0;
+ next1 = (error1 == 0) ? vec_elt (rt->next_by_dst_port, i1) : next1;
+
+ if (PREDICT_FALSE (i0 == SPARSE_VEC_INVALID_INDEX))
+ {
+ // move the pointer back so icmp-error can find the
+ // ip packet header
+ vlib_buffer_advance (b0, -(word) advance0);
+
+ if (PREDICT_FALSE (punt_unknown))
+ {
+ b0->error = node->errors[UDP_ERROR_PUNT];
+ next0 = UDP_INPUT_NEXT_PUNT;
+ }
+ else if (is_ip4)
+ {
+ icmp4_error_set_vnet_buffer (b0,
+ ICMP4_destination_unreachable,
+ ICMP4_destination_unreachable_port_unreachable,
+ 0);
+ next0 = UDP_INPUT_NEXT_ICMP4_ERROR;
+ n_no_listener++;
+ }
+ else
+ {
+ icmp6_error_set_vnet_buffer (b0,
+ ICMP6_destination_unreachable,
+ ICMP6_destination_unreachable_port_unreachable,
+ 0);
+ next0 = UDP_INPUT_NEXT_ICMP6_ERROR;
+ n_no_listener++;
+ }
+ }
+ else
+ {
+ b0->error = node->errors[UDP_ERROR_NONE];
+ // advance to the payload
+ vlib_buffer_advance (b0, sizeof (*h0));
+ }
+
+ if (PREDICT_FALSE (i1 == SPARSE_VEC_INVALID_INDEX))
+ {
+ // move the pointer back so icmp-error can find the
+ // ip packet header
+ vlib_buffer_advance (b1, -(word) advance1);
+
+ if (PREDICT_FALSE (punt_unknown))
+ {
+ b1->error = node->errors[UDP_ERROR_PUNT];
+ next1 = UDP_INPUT_NEXT_PUNT;
+ }
+ else if (is_ip4)
+ {
+ icmp4_error_set_vnet_buffer (b1,
+ ICMP4_destination_unreachable,
+ ICMP4_destination_unreachable_port_unreachable,
+ 0);
+ next1 = UDP_INPUT_NEXT_ICMP4_ERROR;
+ n_no_listener++;
+ }
+ else
+ {
+ icmp6_error_set_vnet_buffer (b1,
+ ICMP6_destination_unreachable,
+ ICMP6_destination_unreachable_port_unreachable,
+ 0);
+ next1 = UDP_INPUT_NEXT_ICMP6_ERROR;
+ n_no_listener++;
+ }
+ }
+ else
+ {
+ b1->error = node->errors[UDP_ERROR_NONE];
+ // advance to the payload
+ vlib_buffer_advance (b1, sizeof (*h1));
+ }
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ udp_rx_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ if (b0->error != node->errors[UDP_ERROR_LENGTH_ERROR])
+ {
+ tr->src_port = h0 ? h0->src_port : 0;
+ tr->dst_port = h0 ? h0->dst_port : 0;
+ tr->bound = (next0 != UDP_INPUT_NEXT_ICMP4_ERROR &&
+ next0 != UDP_INPUT_NEXT_ICMP6_ERROR);
+ }
+ }
+ if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ udp_rx_trace_t *tr = vlib_add_trace (vm, node,
+ b1, sizeof (*tr));
+ if (b1->error != node->errors[UDP_ERROR_LENGTH_ERROR])
+ {
+ tr->src_port = h1 ? h1->src_port : 0;
+ tr->dst_port = h1 ? h1->dst_port : 0;
+ tr->bound = (next1 != UDP_INPUT_NEXT_ICMP4_ERROR &&
+ next1 != UDP_INPUT_NEXT_ICMP6_ERROR);
+ }
+ }
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ udp_header_t *h0 = 0;
+ u32 i0, next0;
+ u32 advance0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ /* ip4/6_local hands us the ip header, not the udp header */
+ if (is_ip4)
+ advance0 = sizeof (ip4_header_t);
+ else
+ advance0 = sizeof (ip6_header_t);
+
+ if (PREDICT_FALSE (b0->current_length < advance0 + sizeof (*h0)))
+ {
+ b0->error = node->errors[UDP_ERROR_LENGTH_ERROR];
+ next0 = UDP_INPUT_NEXT_DROP;
+ goto trace_x1;
+ }
+
+ vlib_buffer_advance (b0, advance0);
+
+ h0 = vlib_buffer_get_current (b0);
+
+ if (PREDICT_TRUE (clib_net_to_host_u16 (h0->length) <=
+ vlib_buffer_length_in_chain (vm, b0)))
+ {
+ i0 = sparse_vec_index (rt->next_by_dst_port, h0->dst_port);
+ next0 = vec_elt (rt->next_by_dst_port, i0);
+
+ if (PREDICT_FALSE (i0 == SPARSE_VEC_INVALID_INDEX))
+ {
+ // move the pointer back so icmp-error can find the
+ // ip packet header
+ vlib_buffer_advance (b0, -(word) advance0);
+
+ if (PREDICT_FALSE (punt_unknown))
+ {
+ b0->error = node->errors[UDP_ERROR_PUNT];
+ next0 = UDP_INPUT_NEXT_PUNT;
+ }
+ else if (is_ip4)
+ {
+ icmp4_error_set_vnet_buffer (b0,
+ ICMP4_destination_unreachable,
+ ICMP4_destination_unreachable_port_unreachable,
+ 0);
+ next0 = UDP_INPUT_NEXT_ICMP4_ERROR;
+ n_no_listener++;
+ }
+ else
+ {
+ icmp6_error_set_vnet_buffer (b0,
+ ICMP6_destination_unreachable,
+ ICMP6_destination_unreachable_port_unreachable,
+ 0);
+ next0 = UDP_INPUT_NEXT_ICMP6_ERROR;
+ n_no_listener++;
+ }
+ }
+ else
+ {
+ b0->error = node->errors[UDP_ERROR_NONE];
+ // advance to the payload
+ vlib_buffer_advance (b0, sizeof (*h0));
+ }
+ }
+ else
+ {
+ b0->error = node->errors[UDP_ERROR_LENGTH_ERROR];
+ next0 = UDP_INPUT_NEXT_DROP;
+ }
+
+ trace_x1:
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ udp_rx_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ if (b0->error != node->errors[UDP_ERROR_LENGTH_ERROR])
+ {
+ tr->src_port = h0->src_port;
+ tr->dst_port = h0->dst_port;
+ tr->bound = (next0 != UDP_INPUT_NEXT_ICMP4_ERROR &&
+ next0 != UDP_INPUT_NEXT_ICMP6_ERROR);
+ }
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ vlib_error_count (vm, node->node_index, UDP_ERROR_NO_LISTENER,
+ n_no_listener);
+ return from_frame->n_vectors;
+}
+
+static char *udp_error_strings[] = {
+#define udp_error(n,s) s,
+#include "udp_error.def"
+#undef udp_error
+};
+
+static uword
+udp4_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * from_frame)
+{
+ return udp46_input_inline (vm, node, from_frame, 1 /* is_ip4 */ );
+}
+
+static uword
+udp6_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * from_frame)
+{
+ return udp46_input_inline (vm, node, from_frame, 0 /* is_ip4 */ );
+}
+
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (udp4_input_node) = {
+ .function = udp4_input,
+ .name = "ip4-udp-lookup",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+
+ .runtime_data_bytes = sizeof (udp_input_runtime_t),
+
+ .n_errors = UDP_N_ERROR,
+ .error_strings = udp_error_strings,
+
+ .n_next_nodes = UDP_INPUT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [UDP_INPUT_NEXT_##s] = n,
+ foreach_udp_input_next
+#undef _
+ },
+
+ .format_buffer = format_udp_header,
+ .format_trace = format_udp_rx_trace,
+ .unformat_buffer = unformat_udp_header,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (udp4_input_node, udp4_input);
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (udp6_input_node) = {
+ .function = udp6_input,
+ .name = "ip6-udp-lookup",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+
+ .runtime_data_bytes = sizeof (udp_input_runtime_t),
+
+ .n_errors = UDP_N_ERROR,
+ .error_strings = udp_error_strings,
+
+ .n_next_nodes = UDP_INPUT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [UDP_INPUT_NEXT_##s] = n,
+ foreach_udp_input_next
+#undef _
+ },
+
+ .format_buffer = format_udp_header,
+ .format_trace = format_udp_rx_trace,
+ .unformat_buffer = unformat_udp_header,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (udp6_input_node, udp6_input);
+
+static void
+add_dst_port (udp_main_t * um,
+ udp_dst_port_t dst_port, char *dst_port_name, u8 is_ip4)
+{
+ udp_dst_port_info_t *pi;
+ u32 i;
+
+ vec_add2 (um->dst_port_infos[is_ip4], pi, 1);
+ i = pi - um->dst_port_infos[is_ip4];
+
+ pi->name = dst_port_name;
+ pi->dst_port = dst_port;
+ pi->next_index = pi->node_index = ~0;
+
+ hash_set (um->dst_port_info_by_dst_port[is_ip4], dst_port, i);
+
+ if (pi->name)
+ hash_set_mem (um->dst_port_info_by_name[is_ip4], pi->name, i);
+}
+
+void
+udp_register_dst_port (vlib_main_t * vm,
+ udp_dst_port_t dst_port, u32 node_index, u8 is_ip4)
+{
+ udp_main_t *um = &udp_main;
+ udp_dst_port_info_t *pi;
+ udp_input_runtime_t *rt;
+ u16 *n;
+
+ {
+ clib_error_t *error = vlib_call_init_function (vm, udp_local_init);
+ if (error)
+ clib_error_report (error);
+ }
+
+ pi = udp_get_dst_port_info (um, dst_port, is_ip4);
+ if (!pi)
+ {
+ add_dst_port (um, dst_port, 0, is_ip4);
+ pi = udp_get_dst_port_info (um, dst_port, is_ip4);
+ ASSERT (pi);
+ }
+
+ pi->node_index = node_index;
+ pi->next_index = vlib_node_add_next (vm,
+ is_ip4 ? udp4_input_node.index
+ : udp6_input_node.index, node_index);
+
+ /* Setup udp protocol -> next index sparse vector mapping. */
+ rt = vlib_node_get_runtime_data
+ (vm, is_ip4 ? udp4_input_node.index : udp6_input_node.index);
+ n = sparse_vec_validate (rt->next_by_dst_port,
+ clib_host_to_net_u16 (dst_port));
+ n[0] = pi->next_index;
+}
+
+void
+udp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add)
+{
+ udp_input_runtime_t *rt;
+
+ {
+ clib_error_t *error = vlib_call_init_function (vm, udp_local_init);
+ if (error)
+ clib_error_report (error);
+ }
+
+ rt = vlib_node_get_runtime_data
+ (vm, is_ip4 ? udp4_input_node.index : udp6_input_node.index);
+
+ rt->punt_unknown = is_add;
+}
+
+/* Parse a UDP header. */
+uword
+unformat_udp_header (unformat_input_t * input, va_list * args)
+{
+ u8 **result = va_arg (*args, u8 **);
+ udp_header_t *udp;
+ __attribute__ ((unused)) int old_length;
+ u16 src_port, dst_port;
+
+ /* Allocate space for IP header. */
+ {
+ void *p;
+
+ old_length = vec_len (*result);
+ vec_add2 (*result, p, sizeof (ip4_header_t));
+ udp = p;
+ }
+
+ memset (udp, 0, sizeof (udp[0]));
+ if (unformat (input, "src-port %d dst-port %d", &src_port, &dst_port))
+ {
+ udp->src_port = clib_host_to_net_u16 (src_port);
+ udp->dst_port = clib_host_to_net_u16 (dst_port);
+ return 1;
+ }
+ return 0;
+}
+
+static void
+udp_setup_node (vlib_main_t * vm, u32 node_index)
+{
+ vlib_node_t *n = vlib_get_node (vm, node_index);
+ pg_node_t *pn = pg_get_node (node_index);
+
+ n->format_buffer = format_udp_header;
+ n->unformat_buffer = unformat_udp_header;
+ pn->unformat_edit = unformat_pg_udp_header;
+}
+
+clib_error_t *
+udp_local_init (vlib_main_t * vm)
+{
+ udp_input_runtime_t *rt;
+ udp_main_t *um = &udp_main;
+ int i;
+
+ {
+ clib_error_t *error;
+ error = vlib_call_init_function (vm, udp_init);
+ if (error)
+ clib_error_report (error);
+ }
+
+
+ for (i = 0; i < 2; i++)
+ {
+ um->dst_port_info_by_name[i] = hash_create_string (0, sizeof (uword));
+ um->dst_port_info_by_dst_port[i] = hash_create (0, sizeof (uword));
+ }
+
+ udp_setup_node (vm, udp4_input_node.index);
+ udp_setup_node (vm, udp6_input_node.index);
+
+ rt = vlib_node_get_runtime_data (vm, udp4_input_node.index);
+
+ rt->next_by_dst_port = sparse_vec_new
+ ( /* elt bytes */ sizeof (rt->next_by_dst_port[0]),
+ /* bits in index */ BITS (((udp_header_t *) 0)->dst_port));
+
+ rt->punt_unknown = 0;
+
+#define _(n,s) add_dst_port (um, UDP_DST_PORT_##s, #s, 1 /* is_ip4 */);
+ foreach_udp4_dst_port
+#undef _
+ rt = vlib_node_get_runtime_data (vm, udp6_input_node.index);
+
+ rt->next_by_dst_port = sparse_vec_new
+ ( /* elt bytes */ sizeof (rt->next_by_dst_port[0]),
+ /* bits in index */ BITS (((udp_header_t *) 0)->dst_port));
+
+ rt->punt_unknown = 0;
+
+#define _(n,s) add_dst_port (um, UDP_DST_PORT_##s, #s, 0 /* is_ip4 */);
+ foreach_udp6_dst_port
+#undef _
+ ip4_register_protocol (IP_PROTOCOL_UDP, udp4_input_node.index);
+ /* Note: ip6 differs from ip4, UDP is hotwired to ip6-udp-lookup */
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (udp_local_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/udp_packet.h b/src/vnet/ip/udp_packet.h
new file mode 100644
index 00000000000..beea3059246
--- /dev/null
+++ b/src/vnet/ip/udp_packet.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip4/udp_packet.h: UDP packet format
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_udp_packet_h
+#define included_udp_packet_h
+
+typedef struct
+{
+ /* Source and destination port. */
+ u16 src_port, dst_port;
+
+ /* Length of UDP header plus payload. */
+ u16 length;
+
+ /* Checksum of UDP pseudo-header and data or
+ zero if checksum is disabled. */
+ u16 checksum;
+} udp_header_t;
+
+#endif /* included_udp_packet_h */
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/udp_pg.c b/src/vnet/ip/udp_pg.c
new file mode 100644
index 00000000000..c9d8d38ca4a
--- /dev/null
+++ b/src/vnet/ip/udp_pg.c
@@ -0,0 +1,237 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/udp_pg: UDP packet-generator interface
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/pg/pg.h>
+#include <vnet/ip/ip.h> /* for unformat_udp_udp_port */
+
+#define UDP_PG_EDIT_LENGTH (1 << 0)
+#define UDP_PG_EDIT_CHECKSUM (1 << 1)
+
+always_inline void
+udp_pg_edit_function_inline (pg_main_t * pg,
+ pg_stream_t * s,
+ pg_edit_group_t * g,
+ u32 * packets, u32 n_packets, u32 flags)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ u32 ip_offset, udp_offset;
+
+ udp_offset = g->start_byte_offset;
+ ip_offset = (g - 1)->start_byte_offset;
+
+ while (n_packets >= 1)
+ {
+ vlib_buffer_t *p0;
+ ip4_header_t *ip0;
+ udp_header_t *udp0;
+ u32 udp_len0;
+
+ p0 = vlib_get_buffer (vm, packets[0]);
+ n_packets -= 1;
+ packets += 1;
+
+ ip0 = (void *) (p0->data + ip_offset);
+ udp0 = (void *) (p0->data + udp_offset);
+ udp_len0 = clib_net_to_host_u16 (ip0->length) - sizeof (ip0[0]);
+
+ if (flags & UDP_PG_EDIT_LENGTH)
+ udp0->length =
+ clib_net_to_host_u16 (vlib_buffer_length_in_chain (vm, p0)
+ - ip_offset);
+
+ /* Initialize checksum with header. */
+ if (flags & UDP_PG_EDIT_CHECKSUM)
+ {
+ ip_csum_t sum0;
+
+ sum0 = clib_mem_unaligned (&ip0->src_address, u64);
+
+ sum0 = ip_csum_with_carry
+ (sum0, clib_host_to_net_u32 (udp_len0 + (ip0->protocol << 16)));
+
+ /* Invalidate possibly old checksum. */
+ udp0->checksum = 0;
+
+ sum0 =
+ ip_incremental_checksum_buffer (vm, p0, udp_offset, udp_len0,
+ sum0);
+
+ sum0 = ~ip_csum_fold (sum0);
+
+ /* Zero checksum means checksumming disabled. */
+ sum0 = sum0 != 0 ? sum0 : 0xffff;
+
+ udp0->checksum = sum0;
+ }
+ }
+}
+
+static void
+udp_pg_edit_function (pg_main_t * pg,
+ pg_stream_t * s,
+ pg_edit_group_t * g, u32 * packets, u32 n_packets)
+{
+ switch (g->edit_function_opaque)
+ {
+ case UDP_PG_EDIT_LENGTH:
+ udp_pg_edit_function_inline (pg, s, g, packets, n_packets,
+ UDP_PG_EDIT_LENGTH);
+ break;
+
+ case UDP_PG_EDIT_CHECKSUM:
+ udp_pg_edit_function_inline (pg, s, g, packets, n_packets,
+ UDP_PG_EDIT_CHECKSUM);
+ break;
+
+ case UDP_PG_EDIT_CHECKSUM | UDP_PG_EDIT_LENGTH:
+ udp_pg_edit_function_inline (pg, s, g, packets, n_packets,
+ UDP_PG_EDIT_CHECKSUM | UDP_PG_EDIT_LENGTH);
+ break;
+
+ default:
+ ASSERT (0);
+ break;
+ }
+}
+
+typedef struct
+{
+ pg_edit_t src_port, dst_port;
+ pg_edit_t length;
+ pg_edit_t checksum;
+} pg_udp_header_t;
+
+static inline void
+pg_udp_header_init (pg_udp_header_t * p)
+{
+ /* Initialize fields that are not bit fields in the IP header. */
+#define _(f) pg_edit_init (&p->f, udp_header_t, f);
+ _(src_port);
+ _(dst_port);
+ _(length);
+ _(checksum);
+#undef _
+}
+
+uword
+unformat_pg_udp_header (unformat_input_t * input, va_list * args)
+{
+ pg_stream_t *s = va_arg (*args, pg_stream_t *);
+ pg_udp_header_t *p;
+ u32 group_index;
+
+ p = pg_create_edit_group (s, sizeof (p[0]), sizeof (udp_header_t),
+ &group_index);
+ pg_udp_header_init (p);
+
+ /* Defaults. */
+ p->checksum.type = PG_EDIT_UNSPECIFIED;
+ p->length.type = PG_EDIT_UNSPECIFIED;
+
+ if (!unformat (input, "UDP: %U -> %U",
+ unformat_pg_edit,
+ unformat_tcp_udp_port, &p->src_port,
+ unformat_pg_edit, unformat_tcp_udp_port, &p->dst_port))
+ goto error;
+
+ /* Parse options. */
+ while (1)
+ {
+ if (unformat (input, "length %U",
+ unformat_pg_edit, unformat_pg_number, &p->length))
+ ;
+
+ else if (unformat (input, "checksum %U",
+ unformat_pg_edit, unformat_pg_number, &p->checksum))
+ ;
+
+ /* Can't parse input: try next protocol level. */
+ else
+ break;
+ }
+
+ {
+ ip_main_t *im = &ip_main;
+ u16 dst_port;
+ tcp_udp_port_info_t *pi;
+
+ pi = 0;
+ if (p->dst_port.type == PG_EDIT_FIXED)
+ {
+ dst_port = pg_edit_get_value (&p->dst_port, PG_EDIT_LO);
+ pi = ip_get_tcp_udp_port_info (im, dst_port);
+ }
+
+ if (pi && pi->unformat_pg_edit
+ && unformat_user (input, pi->unformat_pg_edit, s))
+ ;
+
+ else if (!unformat_user (input, unformat_pg_payload, s))
+ goto error;
+
+ p = pg_get_edit_group (s, group_index);
+ if (p->checksum.type == PG_EDIT_UNSPECIFIED
+ || p->length.type == PG_EDIT_UNSPECIFIED)
+ {
+ pg_edit_group_t *g = pg_stream_get_group (s, group_index);
+ g->edit_function = udp_pg_edit_function;
+ g->edit_function_opaque = 0;
+ if (p->checksum.type == PG_EDIT_UNSPECIFIED)
+ g->edit_function_opaque |= UDP_PG_EDIT_CHECKSUM;
+ if (p->length.type == PG_EDIT_UNSPECIFIED)
+ g->edit_function_opaque |= UDP_PG_EDIT_LENGTH;
+ }
+
+ return 1;
+ }
+
+error:
+ /* Free up any edits we may have added. */
+ pg_free_edit_group (s);
+ return 0;
+}
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */