aboutsummaryrefslogtreecommitdiffstats
path: root/vnet/vnet/ip
diff options
context:
space:
mode:
Diffstat (limited to 'vnet/vnet/ip')
-rw-r--r--vnet/vnet/ip/format.c119
-rw-r--r--vnet/vnet/ip/format.h89
-rw-r--r--vnet/vnet/ip/icmp4.c734
-rw-r--r--vnet/vnet/ip/icmp46_packet.h392
-rw-r--r--vnet/vnet/ip/icmp6.c814
-rw-r--r--vnet/vnet/ip/icmp6.h70
-rw-r--r--vnet/vnet/ip/igmp_packet.h140
-rw-r--r--vnet/vnet/ip/ip.h222
-rw-r--r--vnet/vnet/ip/ip4.h409
-rw-r--r--vnet/vnet/ip/ip46_cli.c158
-rw-r--r--vnet/vnet/ip/ip4_error.h86
-rw-r--r--vnet/vnet/ip/ip4_format.c243
-rw-r--r--vnet/vnet/ip/ip4_forward.c3564
-rw-r--r--vnet/vnet/ip/ip4_hop_by_hop.c320
-rw-r--r--vnet/vnet/ip/ip4_input.c423
-rw-r--r--vnet/vnet/ip/ip4_mtrie.c561
-rw-r--r--vnet/vnet/ip/ip4_mtrie.h161
-rw-r--r--vnet/vnet/ip/ip4_packet.h314
-rw-r--r--vnet/vnet/ip/ip4_pg.c387
-rw-r--r--vnet/vnet/ip/ip4_source_check.c369
-rw-r--r--vnet/vnet/ip/ip4_test.c311
-rw-r--r--vnet/vnet/ip/ip6.h503
-rw-r--r--vnet/vnet/ip/ip6_error.h85
-rw-r--r--vnet/vnet/ip/ip6_format.c322
-rw-r--r--vnet/vnet/ip/ip6_forward.c2724
-rw-r--r--vnet/vnet/ip/ip6_hop_by_hop.c1139
-rw-r--r--vnet/vnet/ip/ip6_hop_by_hop.h35
-rw-r--r--vnet/vnet/ip/ip6_hop_by_hop_packet.h67
-rw-r--r--vnet/vnet/ip/ip6_input.c317
-rw-r--r--vnet/vnet/ip/ip6_neighbor.c3146
-rw-r--r--vnet/vnet/ip/ip6_packet.h378
-rw-r--r--vnet/vnet/ip/ip6_pg.c222
-rw-r--r--vnet/vnet/ip/ip_checksum.c184
-rw-r--r--vnet/vnet/ip/ip_frag.c449
-rw-r--r--vnet/vnet/ip/ip_frag.h81
-rw-r--r--vnet/vnet/ip/ip_init.c153
-rw-r--r--vnet/vnet/ip/ip_input_acl.c394
-rw-r--r--vnet/vnet/ip/ip_packet.h183
-rw-r--r--vnet/vnet/ip/lookup.c2271
-rw-r--r--vnet/vnet/ip/lookup.h442
-rw-r--r--vnet/vnet/ip/ports.def757
-rw-r--r--vnet/vnet/ip/protocols.def162
-rw-r--r--vnet/vnet/ip/tcp.c2983
-rw-r--r--vnet/vnet/ip/tcp.h396
-rw-r--r--vnet/vnet/ip/tcp_format.c132
-rw-r--r--vnet/vnet/ip/tcp_init.c65
-rw-r--r--vnet/vnet/ip/tcp_packet.h118
-rw-r--r--vnet/vnet/ip/tcp_pg.c224
-rw-r--r--vnet/vnet/ip/udp.h113
-rw-r--r--vnet/vnet/ip/udp_error.def20
-rw-r--r--vnet/vnet/ip/udp_format.c83
-rw-r--r--vnet/vnet/ip/udp_init.c63
-rw-r--r--vnet/vnet/ip/udp_local.c508
-rw-r--r--vnet/vnet/ip/udp_packet.h56
-rw-r--r--vnet/vnet/ip/udp_pg.c233
55 files changed, 28894 insertions, 0 deletions
diff --git a/vnet/vnet/ip/format.c b/vnet/vnet/ip/format.c
new file mode 100644
index 00000000000..9dda4c5e10b
--- /dev/null
+++ b/vnet/vnet/ip/format.c
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip_format.c: ip generic (4 or 6) formatting
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+
+/* Format IP protocol. */
+u8 * format_ip_protocol (u8 * s, va_list * args)
+{
+ ip_protocol_t protocol = va_arg (*args, ip_protocol_t);
+ ip_main_t * im = &ip_main;
+ ip_protocol_info_t * pi = ip_get_protocol_info (im, protocol);
+
+ if (pi)
+ return format (s, "%s", pi->name);
+ else
+ return format (s, "unknown %d", protocol);
+}
+
+uword unformat_ip_protocol (unformat_input_t * input, va_list * args)
+{
+ u8 * result = va_arg (*args, u8 *);
+ ip_main_t * im = &ip_main;
+ ip_protocol_info_t * pi;
+ int i;
+
+ if (! unformat_user (input, unformat_vlib_number_by_name,
+ im->protocol_info_by_name, &i))
+ return 0;
+
+ pi = vec_elt_at_index (im->protocol_infos, i);
+ *result = pi->protocol;
+ return 1;
+}
+
+u8 * format_tcp_udp_port (u8 * s, va_list * args)
+{
+ int port = va_arg (*args, int);
+ ip_main_t * im = &ip_main;
+ tcp_udp_port_info_t * pi;
+
+ pi = ip_get_tcp_udp_port_info (im, port);
+ if (pi)
+ s = format (s, "%s", pi->name);
+ else
+ s = format (s, "%d", clib_net_to_host_u16 (port));
+
+ return s;
+}
+
+uword unformat_tcp_udp_port (unformat_input_t * input, va_list * args)
+{
+ u16 * result = va_arg (*args, u16 *);
+ ip_main_t * im = &ip_main;
+ tcp_udp_port_info_t * pi;
+ u32 i, port;
+
+
+ if (unformat_user (input, unformat_vlib_number_by_name,
+ im->port_info_by_name, &i))
+ {
+ pi = vec_elt_at_index (im->port_infos, i);
+ port = pi->port;
+ }
+ else if (unformat_user (input, unformat_vlib_number, &port)
+ && port < (1 << 16))
+ port = clib_host_to_net_u16 (port);
+
+ else
+ return 0;
+
+ *result = port;
+ return 1;
+}
+
+uword unformat_ip46_address (unformat_input_t * input, va_list * args)
+{
+ ip46_address_t * a = va_arg (*args, ip46_address_t *);
+ u32 is_ip6 = va_arg (*args, u32);
+ if (is_ip6)
+ return unformat_user (input, unformat_ip6_address, &a->ip6);
+ else
+ return unformat_user (input, unformat_ip4_address, &a->ip4);
+}
diff --git a/vnet/vnet/ip/format.h b/vnet/vnet/ip/format.h
new file mode 100644
index 00000000000..511a9346bf6
--- /dev/null
+++ b/vnet/vnet/ip/format.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/format.h: ip 4 and/or 6 formatting
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_ip_format_h
+#define included_ip_format_h
+
+/* IP4 or IP6. */
+
+format_function_t format_ip_protocol;
+unformat_function_t unformat_ip_protocol;
+
+format_function_t format_tcp_udp_port;
+unformat_function_t unformat_tcp_udp_port;
+
+format_function_t format_ip_adjacency;
+format_function_t format_ip_adjacency_packet_data;
+
+unformat_function_t unformat_ip46_address;
+
+/* IP4 */
+
+/* Parse an IP4 address %d.%d.%d.%d. */
+unformat_function_t unformat_ip4_address;
+
+/* Format an IP4 address. */
+format_function_t format_ip4_address;
+format_function_t format_ip4_address_and_length;
+
+/* Parse an IP4 header. */
+unformat_function_t unformat_ip4_header;
+
+/* Format an IP4 header. */
+format_function_t format_ip4_header;
+
+/* Parse an IP packet matching pattern. */
+unformat_function_t unformat_ip4_match;
+
+unformat_function_t unformat_pg_ip4_header;
+
+/* IP6 */
+unformat_function_t unformat_ip6_address;
+format_function_t format_ip6_address;
+format_function_t format_ip6_address_and_length;
+unformat_function_t unformat_ip6_header;
+format_function_t format_ip6_header;
+unformat_function_t unformat_pg_ip6_header;
+
+/* Format a TCP/UDP headers. */
+format_function_t format_tcp_header, format_udp_header;
+
+unformat_function_t unformat_pg_tcp_header, unformat_pg_udp_header;
+
+#endif /* included_ip_format_h */
diff --git a/vnet/vnet/ip/icmp4.c b/vnet/vnet/ip/icmp4.c
new file mode 100644
index 00000000000..e21f3bf047b
--- /dev/null
+++ b/vnet/vnet/ip/icmp4.c
@@ -0,0 +1,734 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/icmp4.c: ipv4 icmp
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/ip/ip.h>
+#include <vnet/pg/pg.h>
+
+static u8 * format_ip4_icmp_type_and_code (u8 * s, va_list * args)
+{
+ icmp4_type_t type = va_arg (*args, int);
+ u8 code = va_arg (*args, int);
+ char * t = 0;
+
+#define _(n,f) case n: t = #f; break;
+
+ switch (type)
+ {
+ foreach_icmp4_type;
+
+ default:
+ break;
+ }
+
+#undef _
+
+ if (! t)
+ return format (s, "unknown 0x%x", type);
+
+ s = format (s, "%s", t);
+
+ t = 0;
+ switch ((type << 8) | code)
+ {
+#define _(a,n,f) case (ICMP4_##a << 8) | (n): t = #f; break;
+
+ foreach_icmp4_code;
+
+#undef _
+ }
+
+ if (t)
+ s = format (s, " %s", t);
+
+ return s;
+}
+
+static u8 * format_ip4_icmp_header (u8 * s, va_list * args)
+{
+ icmp46_header_t * icmp = va_arg (*args, icmp46_header_t *);
+ u32 max_header_bytes = va_arg (*args, u32);
+
+ /* Nothing to do. */
+ if (max_header_bytes < sizeof (icmp[0]))
+ return format (s, "ICMP header truncated");
+
+ s = format (s, "ICMP %U checksum 0x%x",
+ format_ip4_icmp_type_and_code, icmp->type, icmp->code,
+ clib_net_to_host_u16 (icmp->checksum));
+
+ return s;
+}
+
+typedef struct {
+ u8 packet_data[64];
+} icmp_input_trace_t;
+
+static u8 * format_icmp_input_trace (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ icmp_input_trace_t * t = va_arg (*va, icmp_input_trace_t *);
+
+ s = format (s, "%U",
+ format_ip4_header,
+ t->packet_data, sizeof (t->packet_data));
+
+ return s;
+}
+
+typedef enum {
+ ICMP4_ERROR_UNKNOWN_TYPE,
+ ICMP4_ERROR_ECHO_REPLIES_SENT,
+ ICMP4_ERROR_TTL_EXPIRE_RESP_SENT,
+ ICMP4_ERROR_TTL_EXPIRE_RESP_DROP,
+} icmp_error_t;
+
+static char * icmp_error_strings[] = {
+ [ICMP4_ERROR_UNKNOWN_TYPE] = "unknown type",
+ [ICMP4_ERROR_ECHO_REPLIES_SENT] = "echo replies sent",
+ [ICMP4_ERROR_TTL_EXPIRE_RESP_SENT] = "TTL time exceeded response sent",
+ [ICMP4_ERROR_TTL_EXPIRE_RESP_DROP] = "TTL time exceeded response dropped",
+};
+
+typedef enum {
+ ICMP_INPUT_NEXT_ERROR,
+ ICMP_INPUT_N_NEXT,
+} icmp_input_next_t;
+
+typedef struct {
+ uword * type_and_code_by_name;
+
+ uword * type_by_name;
+
+ /* Vector dispatch table indexed by [icmp type]. */
+ u8 ip4_input_next_index_by_type[256];
+} icmp4_main_t;
+
+icmp4_main_t icmp4_main;
+
+static uword
+ip4_icmp_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ icmp4_main_t * im = &icmp4_main;
+ uword n_packets = frame->n_vectors;
+ u32 * from, * to_next;
+ u32 n_left_from, n_left_to_next, next;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = n_packets;
+ next = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1,
+ sizeof (icmp_input_trace_t));
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t * p0;
+ ip4_header_t * ip0;
+ icmp46_header_t * icmp0;
+ icmp4_type_t type0;
+ u32 bi0, next0;
+
+ if (PREDICT_TRUE (n_left_from > 2))
+ {
+ vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
+ p0 = vlib_get_buffer (vm, from[1]);
+ ip0 = vlib_buffer_get_current (p0);
+ CLIB_PREFETCH(ip0, CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+
+ bi0 = to_next[0] = from[0];
+
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, bi0);
+ ip0 = vlib_buffer_get_current (p0);
+ icmp0 = ip4_next_header (ip0);
+ type0 = icmp0->type;
+ next0 = im->ip4_input_next_index_by_type[type0];
+
+ p0->error = node->errors[ICMP4_ERROR_UNKNOWN_TYPE];
+ if (PREDICT_FALSE (next0 != next))
+ {
+ vlib_put_next_frame (vm, node, next, n_left_to_next + 1);
+ next = next0;
+ vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
+ to_next[0] = bi0;
+ to_next += 1;
+ n_left_to_next -= 1;
+ }
+ }
+
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (ip4_icmp_input_node,static) = {
+ .function = ip4_icmp_input,
+ .name = "ip4-icmp-input",
+
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_icmp_input_trace,
+
+ .n_errors = ARRAY_LEN (icmp_error_strings),
+ .error_strings = icmp_error_strings,
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [ICMP_INPUT_NEXT_ERROR] = "error-punt",
+ },
+};
+
+static uword
+ip4_icmp_echo_request (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ uword n_packets = frame->n_vectors;
+ u32 * from, * to_next;
+ u32 n_left_from, n_left_to_next, next;
+ ip4_main_t * i4m = &ip4_main;
+ u16 * fragment_ids, * fid;
+ u8 host_config_ttl = i4m->host_config.ttl;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = n_packets;
+ next = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1,
+ sizeof (icmp_input_trace_t));
+
+ /* Get random fragment IDs for replies. */
+ fid = fragment_ids = clib_random_buffer_get_data (&vm->random_buffer,
+ n_packets * sizeof (fragment_ids[0]));
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
+
+ while (n_left_from > 2 && n_left_to_next > 2)
+ {
+ vlib_buffer_t * p0, * p1;
+ ip4_header_t * ip0, * ip1;
+ icmp46_header_t * icmp0, * icmp1;
+ u32 bi0, src0, dst0;
+ u32 bi1, src1, dst1;
+ ip_csum_t sum0, sum1;
+
+ bi0 = to_next[0] = from[0];
+ bi1 = to_next[1] = from[1];
+
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, bi0);
+ p1 = vlib_get_buffer (vm, bi1);
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+ icmp0 = ip4_next_header (ip0);
+ icmp1 = ip4_next_header (ip1);
+
+ vnet_buffer (p0)->sw_if_index[VLIB_RX] = vnet_main.local_interface_sw_if_index;
+ vnet_buffer (p1)->sw_if_index[VLIB_RX] = vnet_main.local_interface_sw_if_index;
+
+ /* Update ICMP checksum. */
+ sum0 = icmp0->checksum;
+ sum1 = icmp1->checksum;
+
+ ASSERT (icmp0->type == ICMP4_echo_request);
+ ASSERT (icmp1->type == ICMP4_echo_request);
+ sum0 = ip_csum_update (sum0, ICMP4_echo_request, ICMP4_echo_reply,
+ icmp46_header_t, type);
+ sum1 = ip_csum_update (sum1, ICMP4_echo_request, ICMP4_echo_reply,
+ icmp46_header_t, type);
+ icmp0->type = ICMP4_echo_reply;
+ icmp1->type = ICMP4_echo_reply;
+
+ icmp0->checksum = ip_csum_fold (sum0);
+ icmp1->checksum = ip_csum_fold (sum1);
+
+ src0 = ip0->src_address.data_u32;
+ src1 = ip1->src_address.data_u32;
+ dst0 = ip0->dst_address.data_u32;
+ dst1 = ip1->dst_address.data_u32;
+
+ /* Swap source and destination address.
+ Does not change checksum. */
+ ip0->src_address.data_u32 = dst0;
+ ip1->src_address.data_u32 = dst1;
+ ip0->dst_address.data_u32 = src0;
+ ip1->dst_address.data_u32 = src1;
+
+ /* Update IP checksum. */
+ sum0 = ip0->checksum;
+ sum1 = ip1->checksum;
+
+ sum0 = ip_csum_update (sum0, ip0->ttl, host_config_ttl,
+ ip4_header_t, ttl);
+ sum1 = ip_csum_update (sum1, ip1->ttl, host_config_ttl,
+ ip4_header_t, ttl);
+ ip0->ttl = host_config_ttl;
+ ip1->ttl = host_config_ttl;
+
+ /* New fragment id. */
+ sum0 = ip_csum_update (sum0, ip0->fragment_id, fid[0],
+ ip4_header_t, fragment_id);
+ sum1 = ip_csum_update (sum1, ip1->fragment_id, fid[1],
+ ip4_header_t, fragment_id);
+ ip0->fragment_id = fid[0];
+ ip1->fragment_id = fid[1];
+ fid += 2;
+
+ ip0->checksum = ip_csum_fold (sum0);
+ ip1->checksum = ip_csum_fold (sum1);
+
+ ASSERT (ip0->checksum == ip4_header_checksum (ip0));
+ ASSERT (ip1->checksum == ip4_header_checksum (ip1));
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t * p0;
+ ip4_header_t * ip0;
+ icmp46_header_t * icmp0;
+ u32 bi0, src0, dst0;
+ ip_csum_t sum0;
+
+ bi0 = to_next[0] = from[0];
+
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, bi0);
+ ip0 = vlib_buffer_get_current (p0);
+ icmp0 = ip4_next_header (ip0);
+
+ vnet_buffer (p0)->sw_if_index[VLIB_RX] = vnet_main.local_interface_sw_if_index;
+
+ /* Update ICMP checksum. */
+ sum0 = icmp0->checksum;
+
+ ASSERT (icmp0->type == ICMP4_echo_request);
+ sum0 = ip_csum_update (sum0, ICMP4_echo_request, ICMP4_echo_reply,
+ icmp46_header_t, type);
+ icmp0->type = ICMP4_echo_reply;
+ icmp0->checksum = ip_csum_fold (sum0);
+
+ src0 = ip0->src_address.data_u32;
+ dst0 = ip0->dst_address.data_u32;
+ ip0->src_address.data_u32 = dst0;
+ ip0->dst_address.data_u32 = src0;
+
+ /* Update IP checksum. */
+ sum0 = ip0->checksum;
+
+ sum0 = ip_csum_update (sum0, ip0->ttl, host_config_ttl,
+ ip4_header_t, ttl);
+ ip0->ttl = host_config_ttl;
+
+ sum0 = ip_csum_update (sum0, ip0->fragment_id, fid[0],
+ ip4_header_t, fragment_id);
+ ip0->fragment_id = fid[0];
+ fid += 1;
+
+ ip0->checksum = ip_csum_fold (sum0);
+
+ ASSERT (ip0->checksum == ip4_header_checksum (ip0));
+ }
+
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ }
+
+ vlib_error_count (vm, ip4_icmp_input_node.index,
+ ICMP4_ERROR_ECHO_REPLIES_SENT,
+ frame->n_vectors);
+
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (ip4_icmp_echo_request_node,static) = {
+ .function = ip4_icmp_echo_request,
+ .name = "ip4-icmp-echo-request",
+
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_icmp_input_trace,
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "ip4-rewrite-local",
+ },
+};
+
+typedef enum {
+ ICMP4_TTL_EXPIRE_NEXT_DROP,
+ ICMP4_TTL_EXPIRE_NEXT_LOOKUP,
+ ICMP4_TTL_EXPIRE_N_NEXT,
+} icmp_ttl_expire_next_t;
+
+static uword
+ip4_icmp_ttl_expire (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 * from, * to_next;
+ uword n_left_from, n_left_to_next;
+ icmp_ttl_expire_next_t next_index;
+ ip4_main_t *im = &ip4_main;
+ ip_lookup_main_t * lm = &im->lookup_main;
+
+ from = vlib_frame_vector_args(frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1, sizeof (icmp_input_trace_t));
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 pi0 = from[0];
+ u32 next0 = ICMP4_TTL_EXPIRE_NEXT_LOOKUP;
+ u8 error0 = ICMP4_ERROR_TTL_EXPIRE_RESP_SENT;
+ u32 len0, new_len0;
+ vlib_buffer_t * p0;
+ ip4_header_t * ip0, * out_ip0;
+ icmp46_header_t * icmp0;
+ ip_csum_t sum;
+ u32 sw_if_index0, if_add_index0;
+
+ /* Speculatively enqueue p0 to the current next frame */
+ to_next[0] = pi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer(vm, pi0);
+ ip0 = vlib_buffer_get_current(p0);
+ len0 = vlib_buffer_length_in_chain (vm, p0);
+ sw_if_index0 = vnet_buffer(p0)->sw_if_index[VLIB_RX];
+
+ /* Cut payload to just IP header plus first 8 bytes */
+ new_len0 = (ip0->ip_version_and_header_length &0xf)*4 + 8;
+ if (len0 > new_len0)
+ {
+ p0->current_length = new_len0; /* should fit in 1st buffer */
+ if (PREDICT_FALSE(p0->total_length_not_including_first_buffer))
+ { /* clear current_length of all other buffers in chain */
+ vlib_buffer_t *b = p0;
+ p0->total_length_not_including_first_buffer = 0;
+ while (b->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ b = vlib_get_buffer (vm, b->next_buffer);
+ b->current_length = 0;
+ }
+ }
+ }
+
+ /* Add IP header and ICMP header including a 4 byte unused field */
+ vlib_buffer_advance(p0,
+ -sizeof(ip4_header_t)-sizeof(icmp46_header_t)-4);
+ out_ip0 = vlib_buffer_get_current(p0);
+ icmp0 = (icmp46_header_t *) &out_ip0[1];
+
+ /* Fill ip header fields */
+ out_ip0->ip_version_and_header_length = 0x45;
+ out_ip0->tos = 0;
+ out_ip0->length = clib_host_to_net_u16(p0->current_length);
+ out_ip0->fragment_id = 0;
+ out_ip0->ttl = 0xff;
+ out_ip0->protocol = IP_PROTOCOL_ICMP;
+ out_ip0->dst_address = ip0->src_address;
+ if_add_index0 =
+ lm->if_address_pool_index_by_sw_if_index[sw_if_index0];
+ if (PREDICT_TRUE(if_add_index0 != ~0))
+ {
+ ip_interface_address_t *if_add =
+ pool_elt_at_index(lm->if_address_pool, if_add_index0);
+ ip4_address_t *if_ip =
+ ip_interface_address_get_address(lm, if_add);
+ out_ip0->src_address = *if_ip;
+ vlib_error_count (vm, node->node_index, error0, 1);
+ }
+ else /* interface has no IP4 address - should not happen */
+ {
+ next0 = ICMP4_TTL_EXPIRE_NEXT_DROP;
+ error0 = ICMP4_ERROR_TTL_EXPIRE_RESP_DROP;
+ }
+ out_ip0->checksum = ip4_header_checksum(out_ip0);
+
+ /* Fill icmp header fields */
+ icmp0->type = ICMP4_time_exceeded;
+ icmp0->code = ICMP4_time_exceeded_ttl_exceeded_in_transit;
+ icmp0->checksum = 0;
+ sum = ip_incremental_checksum(
+ 0, icmp0, p0->current_length - sizeof(ip4_header_t));
+ icmp0->checksum = ~ip_csum_fold(sum);
+
+ /* Update error status */
+ p0->error = node->errors[error0];
+
+ /* Verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1(vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+ vlib_put_next_frame(vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (ip4_icmp_ttl_expire_node) = {
+ .function = ip4_icmp_ttl_expire,
+ .name = "ip4-icmp-ttl-expire",
+ .vector_size = sizeof (u32),
+
+ .n_errors = ARRAY_LEN (icmp_error_strings),
+ .error_strings = icmp_error_strings,
+
+ .n_next_nodes = ICMP4_TTL_EXPIRE_N_NEXT,
+ .next_nodes = {
+ [ICMP4_TTL_EXPIRE_NEXT_DROP] = "error-drop",
+ [ICMP4_TTL_EXPIRE_NEXT_LOOKUP] = "ip4-lookup",
+ },
+
+ .format_trace = format_icmp_input_trace,
+};
+
+
+static uword unformat_icmp_type_and_code (unformat_input_t * input, va_list * args)
+{
+ icmp46_header_t * h = va_arg (*args, icmp46_header_t *);
+ icmp4_main_t * cm = &icmp4_main;
+ u32 i;
+
+ if (unformat_user (input, unformat_vlib_number_by_name,
+ cm->type_and_code_by_name, &i))
+ {
+ h->type = (i >> 8) & 0xff;
+ h->code = (i >> 0) & 0xff;
+ }
+ else if (unformat_user (input, unformat_vlib_number_by_name,
+ cm->type_by_name, &i))
+ {
+ h->type = i;
+ h->code = 0;
+ }
+ else
+ return 0;
+
+ return 1;
+}
+
+static void
+icmp4_pg_edit_function (pg_main_t * pg,
+ pg_stream_t * s,
+ pg_edit_group_t * g,
+ u32 * packets,
+ u32 n_packets)
+{
+ vlib_main_t * vm = pg->vlib_main;
+ u32 ip_offset, icmp_offset;
+
+ icmp_offset = g->start_byte_offset;
+ ip_offset = (g-1)->start_byte_offset;
+
+ while (n_packets >= 1)
+ {
+ vlib_buffer_t * p0;
+ ip4_header_t * ip0;
+ icmp46_header_t * icmp0;
+ u32 len0;
+
+ p0 = vlib_get_buffer (vm, packets[0]);
+ n_packets -= 1;
+ packets += 1;
+
+ ASSERT (p0->current_data == 0);
+ ip0 = (void *) (p0->data + ip_offset);
+ icmp0 = (void *) (p0->data + icmp_offset);
+ len0 = clib_net_to_host_u16 (ip0->length) - ip4_header_bytes (ip0);
+ icmp0->checksum = ~ ip_csum_fold (ip_incremental_checksum (0, icmp0, len0));
+ }
+}
+
+typedef struct {
+ pg_edit_t type, code;
+ pg_edit_t checksum;
+} pg_icmp46_header_t;
+
+always_inline void
+pg_icmp_header_init (pg_icmp46_header_t * p)
+{
+ /* Initialize fields that are not bit fields in the IP header. */
+#define _(f) pg_edit_init (&p->f, icmp46_header_t, f);
+ _ (type);
+ _ (code);
+ _ (checksum);
+#undef _
+}
+
+static uword
+unformat_pg_icmp_header (unformat_input_t * input, va_list * args)
+{
+ pg_stream_t * s = va_arg (*args, pg_stream_t *);
+ pg_icmp46_header_t * p;
+ u32 group_index;
+
+ p = pg_create_edit_group (s, sizeof (p[0]), sizeof (icmp46_header_t),
+ &group_index);
+ pg_icmp_header_init (p);
+
+ p->checksum.type = PG_EDIT_UNSPECIFIED;
+
+ {
+ icmp46_header_t tmp;
+
+ if (! unformat (input, "ICMP %U", unformat_icmp_type_and_code, &tmp))
+ goto error;
+
+ pg_edit_set_fixed (&p->type, tmp.type);
+ pg_edit_set_fixed (&p->code, tmp.code);
+ }
+
+ /* Parse options. */
+ while (1)
+ {
+ if (unformat (input, "checksum %U",
+ unformat_pg_edit,
+ unformat_pg_number, &p->checksum))
+ ;
+
+ /* Can't parse input: try next protocol level. */
+ else
+ break;
+ }
+
+ if (! unformat_user (input, unformat_pg_payload, s))
+ goto error;
+
+ if (p->checksum.type == PG_EDIT_UNSPECIFIED)
+ {
+ pg_edit_group_t * g = pg_stream_get_group (s, group_index);
+ g->edit_function = icmp4_pg_edit_function;
+ g->edit_function_opaque = 0;
+ }
+
+ return 1;
+
+ error:
+ /* Free up any edits we may have added. */
+ pg_free_edit_group (s);
+ return 0;
+}
+
+void ip4_icmp_register_type (vlib_main_t * vm, icmp4_type_t type,
+ u32 node_index)
+{
+ icmp4_main_t * im = &icmp4_main;
+
+ ASSERT (type < ARRAY_LEN (im->ip4_input_next_index_by_type));
+ im->ip4_input_next_index_by_type[type]
+ = vlib_node_add_next (vm, ip4_icmp_input_node.index, node_index);
+}
+
+static clib_error_t *
+icmp4_init (vlib_main_t * vm)
+{
+ ip_main_t * im = &ip_main;
+ ip_protocol_info_t * pi;
+ icmp4_main_t * cm = &icmp4_main;
+ clib_error_t * error;
+
+ error = vlib_call_init_function (vm, ip_main_init);
+
+ if (error)
+ return error;
+
+ pi = ip_get_protocol_info (im, IP_PROTOCOL_ICMP);
+ pi->format_header = format_ip4_icmp_header;
+ pi->unformat_pg_edit = unformat_pg_icmp_header;
+
+ cm->type_by_name = hash_create_string (0, sizeof (uword));
+#define _(n,t) hash_set_mem (cm->type_by_name, #t, (n));
+ foreach_icmp4_type;
+#undef _
+
+ cm->type_and_code_by_name = hash_create_string (0, sizeof (uword));
+#define _(a,n,t) hash_set_mem (cm->type_by_name, #t, (n) | (ICMP4_##a << 8));
+ foreach_icmp4_code;
+#undef _
+
+ memset (cm->ip4_input_next_index_by_type,
+ ICMP_INPUT_NEXT_ERROR,
+ sizeof (cm->ip4_input_next_index_by_type));
+
+ ip4_icmp_register_type (vm, ICMP4_echo_request, ip4_icmp_echo_request_node.index);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (icmp4_init);
diff --git a/vnet/vnet/ip/icmp46_packet.h b/vnet/vnet/ip/icmp46_packet.h
new file mode 100644
index 00000000000..fa3fed4d081
--- /dev/null
+++ b/vnet/vnet/ip/icmp46_packet.h
@@ -0,0 +1,392 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * icmp46_packet.h: ip4/ip6 icmp packet format
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vnet_icmp46_packet_h
+#define included_vnet_icmp46_packet_h
+
+#include <vnet/ethernet/packet.h>
+#include <vnet/ip/ip6_packet.h>
+
+#define foreach_icmp4_type \
+ _ (0, echo_reply) \
+ _ (3, destination_unreachable) \
+ _ (4, source_quench) \
+ _ (5, redirect) \
+ _ (6, alternate_host_address) \
+ _ (8, echo_request) \
+ _ (9, router_advertisement) \
+ _ (10, router_solicitation) \
+ _ (11, time_exceeded) \
+ _ (12, parameter_problem) \
+ _ (13, timestamp_request) \
+ _ (14, timestamp_reply) \
+ _ (15, information_request) \
+ _ (16, information_reply) \
+ _ (17, address_mask_request) \
+ _ (18, address_mask_reply) \
+ _ (30, traceroute) \
+ _ (31, datagram_conversion_error) \
+ _ (32, mobile_host_redirect) \
+ _ (33, ip6_where_are_you) \
+ _ (34, ip6_i_am_here) \
+ _ (35, mobile_registration_request) \
+ _ (36, mobile_registration_reply) \
+ _ (37, domain_name_request) \
+ _ (38, domain_name_reply) \
+ _ (39, skip) \
+ _ (40, photuris)
+
+#define icmp_no_code 0
+
+#define foreach_icmp4_code \
+ _ (destination_unreachable, 0, destination_unreachable_net) \
+ _ (destination_unreachable, 1, destination_unreachable_host) \
+ _ (destination_unreachable, 2, protocol_unreachable) \
+ _ (destination_unreachable, 3, port_unreachable) \
+ _ (destination_unreachable, 4, fragmentation_needed_and_dont_fragment_set) \
+ _ (destination_unreachable, 5, source_route_failed) \
+ _ (destination_unreachable, 6, destination_network_unknown) \
+ _ (destination_unreachable, 7, destination_host_unknown) \
+ _ (destination_unreachable, 8, source_host_isolated) \
+ _ (destination_unreachable, 9, network_administratively_prohibited) \
+ _ (destination_unreachable, 10, host_administratively_prohibited) \
+ _ (destination_unreachable, 11, network_unreachable_for_type_of_service) \
+ _ (destination_unreachable, 12, host_unreachable_for_type_of_service) \
+ _ (destination_unreachable, 13, communication_administratively_prohibited) \
+ _ (destination_unreachable, 14, host_precedence_violation) \
+ _ (destination_unreachable, 15, precedence_cutoff_in_effect) \
+ _ (redirect, 0, network_redirect) \
+ _ (redirect, 1, host_redirect) \
+ _ (redirect, 2, type_of_service_and_network_redirect) \
+ _ (redirect, 3, type_of_service_and_host_redirect) \
+ _ (router_advertisement, 0, normal_router_advertisement) \
+ _ (router_advertisement, 16, does_not_route_common_traffic) \
+ _ (time_exceeded, 0, ttl_exceeded_in_transit) \
+ _ (time_exceeded, 1, fragment_reassembly_time_exceeded) \
+ _ (parameter_problem, 0, pointer_indicates_error) \
+ _ (parameter_problem, 1, missing_required_option) \
+ _ (parameter_problem, 2, bad_length)
+
+/* ICMPv6 */
+#define foreach_icmp6_type \
+ _ (1, destination_unreachable) \
+ _ (2, packet_too_big) \
+ _ (3, time_exceeded) \
+ _ (4, parameter_problem) \
+ _ (128, echo_request) \
+ _ (129, echo_reply) \
+ _ (130, multicast_listener_request) \
+ _ (131, multicast_listener_report) \
+ _ (132, multicast_listener_done) \
+ _ (133, router_solicitation) \
+ _ (134, router_advertisement) \
+ _ (135, neighbor_solicitation) \
+ _ (136, neighbor_advertisement) \
+ _ (137, redirect) \
+ _ (138, router_renumbering) \
+ _ (139, node_information_request) \
+ _ (140, node_information_response) \
+ _ (141, inverse_neighbor_solicitation) \
+ _ (142, inverse_neighbor_advertisement) \
+ _ (143, multicast_listener_report_v2) \
+ _ (144, home_agent_address_discovery_request) \
+ _ (145, home_agent_address_discovery_reply) \
+ _ (146, mobile_prefix_solicitation) \
+ _ (147, mobile_prefix_advertisement) \
+ _ (148, certification_path_solicitation) \
+ _ (149, certification_path_advertisement) \
+ _ (151, multicast_router_advertisement) \
+ _ (152, multicast_router_solicitation) \
+ _ (153, multicast_router_termination) \
+ _ (154, fmipv6_messages)
+
+#define foreach_icmp6_code \
+ _ (destination_unreachable, 0, no_route_to_destination) \
+ _ (destination_unreachable, 1, destination_administratively_prohibited) \
+ _ (destination_unreachable, 2, beyond_scope_of_source_address) \
+ _ (destination_unreachable, 3, address_unreachable) \
+ _ (destination_unreachable, 4, port_unreachable) \
+ _ (destination_unreachable, 5, source_address_failed_policy) \
+ _ (destination_unreachable, 6, reject_route_to_destination) \
+ _ (time_exceeded, 0, ttl_exceeded_in_transit) \
+ _ (time_exceeded, 1, fragment_reassembly_time_exceeded) \
+ _ (parameter_problem, 0, erroneous_header_field) \
+ _ (parameter_problem, 1, unrecognized_next_header) \
+ _ (parameter_problem, 2, unrecognized_option) \
+ _ (router_renumbering, 0, command) \
+ _ (router_renumbering, 1, result) \
+ _ (node_information_request, 0, data_contains_ip6_address) \
+ _ (node_information_request, 1, data_contains_name) \
+ _ (node_information_request, 2, data_contains_ip4_address) \
+ _ (node_information_response, 0, success) \
+ _ (node_information_response, 1, failed) \
+ _ (node_information_response, 2, unknown_request)
+
+typedef enum {
+#define _(n,f) ICMP4_##f = n,
+ foreach_icmp4_type
+#undef _
+} icmp4_type_t;
+
+typedef enum {
+#define _(t,n,f) ICMP4_##t##_##f = n,
+ foreach_icmp4_code
+#undef _
+} icmp4_code_t;
+
+typedef enum {
+#define _(n,f) ICMP6_##f = n,
+ foreach_icmp6_type
+#undef _
+} icmp6_type_t;
+
+typedef enum {
+#define _(t,n,f) ICMP6_##t##_##f = n,
+ foreach_icmp6_code
+#undef _
+} icmp6_code_t;
+
+typedef CLIB_PACKED (struct {
+ u8 type;
+
+ u8 code;
+
+ /* IP checksum of icmp header plus data which follows. */
+ u16 checksum;
+}) icmp46_header_t;
+
+/* ip6 neighbor discovery */
+#define foreach_icmp6_neighbor_discovery_option \
+ _ (1, source_link_layer_address) \
+ _ (2, target_link_layer_address) \
+ _ (3, prefix_information) \
+ _ (4, redirected_header) \
+ _ (5, mtu) \
+ _ (6, nbma_shortcut_limit) \
+ _ (7, advertisement_interval) \
+ _ (8, home_agent_information) \
+ _ (9, source_address_list) \
+ _ (10, target_address_list) \
+ _ (11, cryptographically_generated_address) \
+ _ (12, rsa_signature) \
+ _ (13, timestamp) \
+ _ (14, nonce) \
+ _ (15, trust_anchor) \
+ _ (16, certificate) \
+ _ (17, ip_address_and_prefix) \
+ _ (18, new_router_prefix_information) \
+ _ (19, mobile_link_layer_address) \
+ _ (20, neighbor_advertisement_acknowledgment) \
+ _ (23, map) \
+ _ (24, route_information) \
+ _ (25, recursive_dns_server) \
+ _ (26, ra_flags_extension) \
+ _ (27, handover_key_request) \
+ _ (28, handover_key_reply) \
+ _ (29, handover_assist_information) \
+ _ (30, mobile_node_identifier) \
+ _ (31, dns_search_list) \
+ _ (138, card_request) \
+ _ (139, card_reply)
+
+typedef enum icmp6_neighbor_discovery_option_type {
+#define _(n,f) ICMP6_NEIGHBOR_DISCOVERY_OPTION_##f = n,
+ foreach_icmp6_neighbor_discovery_option
+#undef _
+} icmp6_neighbor_discovery_option_type_t;
+
+typedef CLIB_PACKED (struct {
+ /* Option type. */
+ u8 type;
+
+ /* Length of this header plus option data in 8 byte units. */
+ u8 n_data_u64s;
+
+ /* Option data follows. */
+ u8 data[0];
+}) icmp6_neighbor_discovery_option_header_t;
+
+typedef CLIB_PACKED (struct {
+ icmp6_neighbor_discovery_option_header_t header;
+ u8 dst_address_length;
+ u8 flags;
+#define ICMP6_NEIGHBOR_DISCOVERY_PREFIX_INFORMATION_FLAG_ON_LINK (1 << 7)
+#define ICMP6_NEIGHBOR_DISCOVERY_PREFIX_INFORMATION_AUTO (1 << 6)
+ u32 valid_time;
+ u32 preferred_time;
+ u32 unused;
+ ip6_address_t dst_address;
+}) icmp6_neighbor_discovery_prefix_information_option_t;
+
+typedef CLIB_PACKED (struct {
+ u8 type;
+ u8 aux_data_len_u32s;
+ u16 num_sources;
+ ip6_address_t mcast_addr;
+ ip6_address_t source_addr[0];
+}) icmp6_multicast_address_record_t;
+
+typedef CLIB_PACKED (struct {
+ ip6_hop_by_hop_ext_t ext_hdr;
+ ip6_router_alert_option_t alert;
+ ip6_padN_option_t pad;
+ icmp46_header_t icmp;
+ u16 rsvd;
+ u16 num_addr_records;
+ icmp6_multicast_address_record_t records[0];
+}) icmp6_multicast_listener_report_header_t;
+
+typedef CLIB_PACKED (struct {
+ icmp6_neighbor_discovery_option_header_t header;
+ u8 reserved[6];
+ /* IP6 header plus payload follows. */
+ u8 data[0];
+}) icmp6_neighbor_discovery_redirected_header_option_t;
+
+typedef CLIB_PACKED (struct {
+ icmp6_neighbor_discovery_option_header_t header;
+ u16 unused;
+ u32 mtu;
+}) icmp6_neighbor_discovery_mtu_option_t;
+
+typedef CLIB_PACKED (struct {
+ icmp6_neighbor_discovery_option_header_t header;
+ u8 ethernet_address[6];
+}) icmp6_neighbor_discovery_ethernet_link_layer_address_option_t;
+
+typedef CLIB_PACKED (struct {
+ icmp6_neighbor_discovery_option_header_t header;
+ u8 max_l2_address[6+8];
+}) icmp6_neighbor_discovery_max_link_layer_address_option_t;
+
+/* Generic neighbor discover header. Used for router solicitations,
+ etc. */
+typedef CLIB_PACKED (struct {
+ icmp46_header_t icmp;
+
+ u32 reserved_must_be_zero;
+}) icmp6_neighbor_discovery_header_t;
+
+/* Router advertisement packet formats. */
+typedef CLIB_PACKED (struct {
+ icmp46_header_t icmp;
+
+ /* Current hop limit to use for outgoing packets. */
+ u8 current_hop_limit;
+
+ u8 flags;
+#define ICMP6_ROUTER_DISCOVERY_FLAG_ADDRESS_CONFIG_VIA_DHCP (1 << 7)
+#define ICMP6_ROUTER_DISCOVERY_FLAG_OTHER_CONFIG_VIA_DHCP (1 << 6)
+
+ /* Zero means unspecified. */
+ u16 router_lifetime_in_sec;
+
+ /* Zero means unspecified. */
+ u32 neighbor_reachable_time_in_msec;
+
+ /* Zero means unspecified. */
+ u32 time_in_msec_between_retransmitted_neighbor_solicitations;
+
+ /* Options that may follow: source_link_layer_address, mtu, prefix_information. */
+}) icmp6_router_advertisement_header_t;
+
+/* Neighbor solicitation/advertisement header. */
+typedef CLIB_PACKED (struct {
+ icmp46_header_t icmp;
+
+ /* Zero for solicitation; flags for advertisement. */
+ u32 advertisement_flags;
+ /* Set when sent by a router. */
+#define ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_ROUTER (1 << 31)
+ /* Set when response to solicitation. */
+#define ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_SOLICITED (1 << 30)
+#define ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_OVERRIDE (1 << 29)
+
+ ip6_address_t target_address;
+
+ /* Options that may follow: source_link_layer_address
+ (for solicitation) target_link_layer_address (for advertisement). */
+}) icmp6_neighbor_solicitation_or_advertisement_header_t;
+
+typedef CLIB_PACKED (struct {
+ icmp46_header_t icmp;
+
+ u32 reserved_must_be_zero;
+
+ /* Better next hop to use for given destination. */
+ ip6_address_t better_next_hop_address;
+
+ ip6_address_t dst_address;
+
+ /* Options that may follow: target_link_layer_address,
+ redirected_header. */
+}) icmp6_redirect_header_t;
+
+/* Solicitation/advertisement packet format for ethernet. */
+typedef CLIB_PACKED (struct {
+ ip6_header_t ip;
+
+ icmp6_neighbor_solicitation_or_advertisement_header_t neighbor;
+
+ icmp6_neighbor_discovery_ethernet_link_layer_address_option_t link_layer_option;
+}) icmp6_neighbor_solicitation_header_t;
+
+/* Router solicitation packet format for ethernet. */
+typedef CLIB_PACKED (struct {
+ ip6_header_t ip;
+ icmp6_neighbor_discovery_header_t neighbor;
+ icmp6_neighbor_discovery_ethernet_link_layer_address_option_t link_layer_option;
+}) icmp6_router_solicitation_header_t;
+
+/* router advertisement packet format for ethernet. */
+typedef CLIB_PACKED (struct {
+ ip6_header_t ip;
+ icmp6_router_advertisement_header_t router;
+ icmp6_neighbor_discovery_ethernet_link_layer_address_option_t link_layer_option;
+ icmp6_neighbor_discovery_mtu_option_t mtu_option;
+ icmp6_neighbor_discovery_prefix_information_option_t prefix[0];
+}) icmp6_router_advertisement_packet_t;
+
+/* multicast listener report packet format for ethernet. */
+typedef CLIB_PACKED (struct {
+ ip6_header_t ip;
+ icmp6_multicast_listener_report_header_t report_hdr;
+}) icmp6_multicast_listener_report_packet_t;
+
+#endif /* included_vnet_icmp46_packet_h */
diff --git a/vnet/vnet/ip/icmp6.c b/vnet/vnet/ip/icmp6.c
new file mode 100644
index 00000000000..2d265d2b5b2
--- /dev/null
+++ b/vnet/vnet/ip/icmp6.c
@@ -0,0 +1,814 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/icmp6.c: ip6 icmp
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/ip/ip.h>
+#include <vnet/pg/pg.h>
+
+static u8 * format_ip6_icmp_type_and_code (u8 * s, va_list * args)
+{
+ icmp6_type_t type = va_arg (*args, int);
+ u8 code = va_arg (*args, int);
+ char * t = 0;
+
+#define _(n,f) case n: t = #f; break;
+
+ switch (type)
+ {
+ foreach_icmp6_type;
+
+ default:
+ break;
+ }
+
+#undef _
+
+ if (! t)
+ return format (s, "unknown 0x%x", type);
+
+ s = format (s, "%s", t);
+
+ t = 0;
+ switch ((type << 8) | code)
+ {
+#define _(a,n,f) case (ICMP6_##a << 8) | (n): t = #f; break;
+
+ foreach_icmp6_code;
+
+#undef _
+ }
+
+ if (t)
+ s = format (s, " %s", t);
+
+ return s;
+}
+
+static u8 * format_icmp6_header (u8 * s, va_list * args)
+{
+ icmp46_header_t * icmp = va_arg (*args, icmp46_header_t *);
+ u32 max_header_bytes = va_arg (*args, u32);
+
+ /* Nothing to do. */
+ if (max_header_bytes < sizeof (icmp[0]))
+ return format (s, "ICMP header truncated");
+
+ s = format (s, "ICMP %U checksum 0x%x",
+ format_ip6_icmp_type_and_code, icmp->type, icmp->code,
+ clib_net_to_host_u16 (icmp->checksum));
+
+ if (max_header_bytes >=
+ sizeof(icmp6_neighbor_solicitation_or_advertisement_header_t) &&
+ (icmp->type == ICMP6_neighbor_solicitation ||
+ icmp->type == ICMP6_neighbor_advertisement))
+ {
+ icmp6_neighbor_solicitation_or_advertisement_header_t *icmp6_nd =
+ (icmp6_neighbor_solicitation_or_advertisement_header_t *) icmp;
+ s = format (s, "\n target address %U",
+ format_ip6_address, &icmp6_nd->target_address);
+ }
+
+ return s;
+}
+
+u8 * format_icmp6_input_trace (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ icmp6_input_trace_t * t = va_arg (*va, icmp6_input_trace_t *);
+
+ s = format (s, "%U",
+ format_ip6_header,
+ t->packet_data, sizeof (t->packet_data));
+
+ return s;
+}
+
+static char * icmp_error_strings[] = {
+#define _(f,s) s,
+ foreach_icmp6_error
+#undef _
+};
+
+typedef enum {
+ ICMP_INPUT_NEXT_DROP,
+ ICMP_INPUT_N_NEXT,
+} icmp_input_next_t;
+
+typedef struct {
+ uword * type_and_code_by_name;
+
+ uword * type_by_name;
+
+ /* Vector dispatch table indexed by [icmp type]. */
+ u8 input_next_index_by_type[256];
+
+ /* Max valid code indexed by icmp type. */
+ u8 max_valid_code_by_type[256];
+
+ /* hop_limit must be >= this value for this icmp type. */
+ u8 min_valid_hop_limit_by_type[256];
+
+ u8 min_valid_length_by_type[256];
+} icmp6_main_t;
+
+icmp6_main_t icmp6_main;
+
+static uword
+ip6_icmp_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ icmp6_main_t * im = &icmp6_main;
+ u32 * from, * to_next;
+ u32 n_left_from, n_left_to_next, next_index;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1,
+ sizeof (icmp6_input_trace_t));
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t * b0;
+ ip6_header_t * ip0;
+ icmp46_header_t * icmp0;
+ icmp6_type_t type0;
+ u32 bi0, next0, error0, len0;
+
+ bi0 = to_next[0] = from[0];
+
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ ip0 = vlib_buffer_get_current (b0);
+ icmp0 = ip6_next_header (ip0);
+ type0 = icmp0->type;
+
+ error0 = ICMP6_ERROR_NONE;
+
+ next0 = im->input_next_index_by_type[type0];
+ error0 = next0 == ICMP_INPUT_NEXT_DROP ? ICMP6_ERROR_UNKNOWN_TYPE : error0;
+
+ /* Check code is valid for type. */
+ error0 = icmp0->code > im->max_valid_code_by_type[type0] ? ICMP6_ERROR_INVALID_CODE_FOR_TYPE : error0;
+
+ /* Checksum is already validated by ip6_local node so we don't need to check that. */
+
+ /* Check that hop limit == 255 for certain types. */
+ error0 = ip0->hop_limit < im->min_valid_hop_limit_by_type[type0] ? ICMP6_ERROR_INVALID_HOP_LIMIT_FOR_TYPE : error0;
+
+ len0 = clib_net_to_host_u16 (ip0->payload_length);
+ error0 = len0 < im->min_valid_length_by_type[type0] ? ICMP6_ERROR_LENGTH_TOO_SMALL_FOR_TYPE : error0;
+
+ b0->error = node->errors[error0];
+
+ next0 = error0 != ICMP6_ERROR_NONE ? ICMP_INPUT_NEXT_DROP : next0;
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (ip6_icmp_input_node) = {
+ .function = ip6_icmp_input,
+ .name = "ip6-icmp-input",
+
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_icmp6_input_trace,
+
+ .n_errors = ARRAY_LEN (icmp_error_strings),
+ .error_strings = icmp_error_strings,
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [ICMP_INPUT_NEXT_DROP] = "error-drop",
+ },
+};
+
+typedef enum {
+ ICMP6_ECHO_REQUEST_NEXT_LOOKUP,
+ ICMP6_ECHO_REQUEST_NEXT_OUTPUT,
+ ICMP6_ECHO_REQUEST_N_NEXT,
+} icmp6_echo_request_next_t;
+
+static uword
+ip6_icmp_echo_request (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 * from, * to_next;
+ u32 n_left_from, n_left_to_next, next_index;
+ ip6_main_t * im = &ip6_main;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1,
+ sizeof (icmp6_input_trace_t));
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 2 && n_left_to_next > 2)
+ {
+ vlib_buffer_t * p0, * p1;
+ ip6_header_t * ip0, * ip1;
+ icmp46_header_t * icmp0, * icmp1;
+ ip6_address_t tmp0, tmp1;
+ ip_csum_t sum0, sum1;
+ u32 bi0, bi1;
+ u32 fib_index0, fib_index1;
+ u32 next0 = ICMP6_ECHO_REQUEST_NEXT_LOOKUP;
+ u32 next1 = ICMP6_ECHO_REQUEST_NEXT_LOOKUP;
+
+ bi0 = to_next[0] = from[0];
+ bi1 = to_next[1] = from[1];
+
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, bi0);
+ p1 = vlib_get_buffer (vm, bi1);
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+ icmp0 = ip6_next_header (ip0);
+ icmp1 = ip6_next_header (ip1);
+
+ /* Check icmp type to echo reply and update icmp checksum. */
+ sum0 = icmp0->checksum;
+ sum1 = icmp1->checksum;
+
+ ASSERT (icmp0->type == ICMP6_echo_request);
+ ASSERT (icmp1->type == ICMP6_echo_request);
+ sum0 = ip_csum_update (sum0, ICMP6_echo_request, ICMP6_echo_reply,
+ icmp46_header_t, type);
+ sum1 = ip_csum_update (sum1, ICMP6_echo_request, ICMP6_echo_reply,
+ icmp46_header_t, type);
+
+ icmp0->checksum = ip_csum_fold (sum0);
+ icmp1->checksum = ip_csum_fold (sum1);
+
+ icmp0->type = ICMP6_echo_reply;
+ icmp1->type = ICMP6_echo_reply;
+
+ /* Swap source and destination address. */
+ tmp0 = ip0->src_address;
+ tmp1 = ip1->src_address;
+
+ ip0->src_address = ip0->dst_address;
+ ip1->src_address = ip1->dst_address;
+
+ ip0->dst_address = tmp0;
+ ip1->dst_address = tmp1;
+
+ /* New hop count. */
+ ip0->hop_limit = im->host_config.ttl;
+ ip1->hop_limit = im->host_config.ttl;
+
+ if (ip6_address_is_link_local_unicast (&ip0->dst_address))
+ {
+ ethernet_header_t *eth0;
+ u8 tmp_mac[6];
+ /* For link local, reuse current MAC header by sawpping
+ * SMAC to DMAC instead of IP6 lookup since link local
+ * is not in the IP6 FIB */
+ vlib_buffer_reset (p0);
+ eth0 = vlib_buffer_get_current (p0);
+ memcpy (tmp_mac, eth0->dst_address, 6);
+ memcpy (eth0->dst_address, eth0->src_address, 6);
+ memcpy (eth0->src_address, tmp_mac, 6);
+ vnet_buffer(p0)->sw_if_index[VLIB_TX] =
+ vnet_buffer (p0)->sw_if_index[VLIB_RX];
+ next0 = ICMP6_ECHO_REQUEST_NEXT_OUTPUT;
+ }
+ else
+ {
+ /* Determine the correct lookup fib indices... */
+ fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (p0)->sw_if_index[VLIB_RX]);
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = fib_index0;
+ }
+
+ if (ip6_address_is_link_local_unicast (&ip1->dst_address))
+ {
+ ethernet_header_t *eth1;
+ u8 tmp_mac[6];
+ /* For link local, reuse current MAC header by sawpping
+ * SMAC to DMAC instead of IP6 lookup since link local
+ * is not in the IP6 FIB */
+ vlib_buffer_reset (p1);
+ eth1 = vlib_buffer_get_current (p1);
+ memcpy (tmp_mac, eth1->dst_address, 6);
+ memcpy (eth1->dst_address, eth1->src_address, 6);
+ memcpy (eth1->src_address, tmp_mac, 6);
+ vnet_buffer(p1)->sw_if_index[VLIB_TX] =
+ vnet_buffer (p1)->sw_if_index[VLIB_RX];
+ next1 = ICMP6_ECHO_REQUEST_NEXT_OUTPUT;
+ }
+ else
+ {
+ /* Determine the correct lookup fib indices... */
+ fib_index1 = vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (p1)->sw_if_index[VLIB_RX]);
+ vnet_buffer (p1)->sw_if_index[VLIB_TX] = fib_index1;
+ }
+
+ vnet_buffer (p0)->sw_if_index[VLIB_RX]
+ = vnet_main.local_interface_sw_if_index;
+ vnet_buffer (p1)->sw_if_index[VLIB_RX]
+ = vnet_main.local_interface_sw_if_index;
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ /* if next0==next1==next_index then nothing special needs to be done */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t * p0;
+ ip6_header_t * ip0;
+ icmp46_header_t * icmp0;
+ u32 bi0;
+ ip6_address_t tmp0;
+ ip_csum_t sum0;
+ u32 fib_index0;
+ u32 next0 = ICMP6_ECHO_REQUEST_NEXT_LOOKUP;
+
+ bi0 = to_next[0] = from[0];
+
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, bi0);
+ ip0 = vlib_buffer_get_current (p0);
+ icmp0 = ip6_next_header (ip0);
+
+ /* Check icmp type to echo reply and update icmp checksum. */
+ sum0 = icmp0->checksum;
+
+ ASSERT (icmp0->type == ICMP6_echo_request);
+ sum0 = ip_csum_update (sum0, ICMP6_echo_request, ICMP6_echo_reply,
+ icmp46_header_t, type);
+
+ icmp0->checksum = ip_csum_fold (sum0);
+
+ icmp0->type = ICMP6_echo_reply;
+
+ /* Swap source and destination address. */
+ tmp0 = ip0->src_address;
+ ip0->src_address = ip0->dst_address;
+ ip0->dst_address = tmp0;
+
+ ip0->hop_limit = im->host_config.ttl;
+
+ if (ip6_address_is_link_local_unicast (&ip0->dst_address))
+ {
+ ethernet_header_t *eth0;
+ u8 tmp_mac[6];
+ /* For link local, reuse current MAC header by sawpping
+ * SMAC to DMAC instead of IP6 lookup since link local
+ * is not in the IP6 FIB */
+ vlib_buffer_reset (p0);
+ eth0 = vlib_buffer_get_current (p0);
+ memcpy (tmp_mac, eth0->dst_address, 6);
+ memcpy (eth0->dst_address, eth0->src_address, 6);
+ memcpy (eth0->src_address, tmp_mac, 6);
+ vnet_buffer(p0)->sw_if_index[VLIB_TX] =
+ vnet_buffer (p0)->sw_if_index[VLIB_RX];
+ next0 = ICMP6_ECHO_REQUEST_NEXT_OUTPUT;
+ }
+ else
+ {
+ fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (p0)->sw_if_index[VLIB_RX]);
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = fib_index0;
+ }
+ vnet_buffer (p0)->sw_if_index[VLIB_RX]
+ = vnet_main.local_interface_sw_if_index;
+
+ /* Verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_error_count (vm, ip6_icmp_input_node.index,
+ ICMP6_ERROR_ECHO_REPLIES_SENT,
+ frame->n_vectors);
+
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (ip6_icmp_echo_request_node,static) = {
+ .function = ip6_icmp_echo_request,
+ .name = "ip6-icmp-echo-request",
+
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_icmp6_input_trace,
+
+ .n_next_nodes = ICMP6_ECHO_REQUEST_N_NEXT,
+ .next_nodes = {
+ [ICMP6_ECHO_REQUEST_NEXT_LOOKUP] = "ip6-lookup",
+ [ICMP6_ECHO_REQUEST_NEXT_OUTPUT] = "interface-output",
+ },
+};
+
+typedef enum {
+ ICMP6_TTL_EXPIRE_NEXT_DROP,
+ ICMP6_TTL_EXPIRE_NEXT_LOOKUP,
+ ICMP6_TTL_EXPIRE_N_NEXT,
+} icmp_ttl_expire_next_t;
+
+static uword
+ip6_icmp_ttl_expire (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 * from, * to_next;
+ uword n_left_from, n_left_to_next;
+ icmp_ttl_expire_next_t next_index;
+ ip6_main_t *im = &ip6_main;
+ ip_lookup_main_t * lm = &im->lookup_main;
+
+ from = vlib_frame_vector_args(frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1, sizeof (icmp6_input_trace_t));
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 pi0 = from[0];
+ u32 next0 = ICMP6_TTL_EXPIRE_NEXT_LOOKUP;
+ u8 error0 = ICMP6_ERROR_TTL_EXPIRE_RESP_SENT;
+ vlib_buffer_t * p0;
+ ip6_header_t * ip0, * out_ip0;
+ icmp46_header_t * icmp0;
+ u32 sw_if_index0, if_add_index0;
+ int bogus_length;
+
+ /* Speculatively enqueue p0 to the current next frame */
+ to_next[0] = pi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer(vm, pi0);
+ ip0 = vlib_buffer_get_current(p0);
+ sw_if_index0 = vnet_buffer(p0)->sw_if_index[VLIB_RX];
+
+ /* RFC2463 says to keep as much of the original packet as possible
+ * within the MTU. We cheat "a little" here by keeping whatever fits
+ * in the first buffer, to be more efficient */
+ if (PREDICT_FALSE(p0->total_length_not_including_first_buffer))
+ { /* clear current_length of all other buffers in chain */
+ vlib_buffer_t *b = p0;
+ p0->total_length_not_including_first_buffer = 0;
+ while (b->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ b = vlib_get_buffer (vm, b->next_buffer);
+ b->current_length = 0;
+ }
+ }
+
+ /* Add IP header and ICMPv6 header including a 4 byte ununsed field */
+ vlib_buffer_advance(p0,
+ -sizeof(ip6_header_t)-sizeof(icmp46_header_t)-4);
+ out_ip0 = vlib_buffer_get_current(p0);
+ icmp0 = (icmp46_header_t *) &out_ip0[1];
+
+ /* Fill ip header fields */
+ out_ip0->ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32(0x6<<28);
+ out_ip0->payload_length =
+ clib_host_to_net_u16(p0->current_length - sizeof(ip6_header_t));
+ out_ip0->protocol = IP_PROTOCOL_ICMP6;
+ out_ip0->hop_limit = 0xff;
+ out_ip0->dst_address = ip0->src_address;
+ if_add_index0 =
+ lm->if_address_pool_index_by_sw_if_index[sw_if_index0];
+ if (PREDICT_TRUE(if_add_index0 != ~0))
+ {
+ ip_interface_address_t *if_add =
+ pool_elt_at_index(lm->if_address_pool, if_add_index0);
+ ip6_address_t *if_ip =
+ ip_interface_address_get_address(lm, if_add);
+ out_ip0->src_address = *if_ip;
+ vlib_error_count (vm, node->node_index, error0, 1);
+ }
+ else /* interface has no IP6 address - should not happen */
+ {
+ next0 = ICMP6_TTL_EXPIRE_NEXT_DROP;
+ error0 = ICMP6_ERROR_TTL_EXPIRE_RESP_DROP;
+ }
+
+ /* Fill icmp header fields */
+ icmp0->type = ICMP6_time_exceeded;
+ icmp0->code = ICMP6_time_exceeded_ttl_exceeded_in_transit;
+ icmp0->checksum = 0;
+ icmp0->checksum = ip6_tcp_udp_icmp_compute_checksum(
+ vm, p0, out_ip0, &bogus_length);
+
+ /* Update error status */
+ p0->error = node->errors[error0];
+
+ /* Verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1(vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+ vlib_put_next_frame(vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (ip6_icmp_ttl_expire_node) = {
+ .function = ip6_icmp_ttl_expire,
+ .name = "ip6-icmp-ttl-expire",
+ .vector_size = sizeof (u32),
+
+ .n_errors = ARRAY_LEN (icmp_error_strings),
+ .error_strings = icmp_error_strings,
+
+ .n_next_nodes = ICMP6_TTL_EXPIRE_N_NEXT,
+ .next_nodes = {
+ [ICMP6_TTL_EXPIRE_NEXT_DROP] = "error-drop",
+ [ICMP6_TTL_EXPIRE_NEXT_LOOKUP] = "ip6-lookup",
+ },
+
+ .format_trace = format_icmp6_input_trace,
+};
+
+
+static uword unformat_icmp_type_and_code (unformat_input_t * input, va_list * args)
+{
+ icmp46_header_t * h = va_arg (*args, icmp46_header_t *);
+ icmp6_main_t * cm = &icmp6_main;
+ u32 i;
+
+ if (unformat_user (input, unformat_vlib_number_by_name,
+ cm->type_and_code_by_name, &i))
+ {
+ h->type = (i >> 8) & 0xff;
+ h->code = (i >> 0) & 0xff;
+ }
+ else if (unformat_user (input, unformat_vlib_number_by_name,
+ cm->type_by_name, &i))
+ {
+ h->type = i;
+ h->code = 0;
+ }
+ else
+ return 0;
+
+ return 1;
+}
+
+static void
+icmp6_pg_edit_function (pg_main_t * pg,
+ pg_stream_t * s,
+ pg_edit_group_t * g,
+ u32 * packets,
+ u32 n_packets)
+{
+ vlib_main_t * vm = pg->vlib_main;
+ u32 ip_offset, icmp_offset;
+ int bogus_length;
+
+ icmp_offset = g->start_byte_offset;
+ ip_offset = (g-1)->start_byte_offset;
+
+ while (n_packets >= 1)
+ {
+ vlib_buffer_t * p0;
+ ip6_header_t * ip0;
+ icmp46_header_t * icmp0;
+
+ p0 = vlib_get_buffer (vm, packets[0]);
+ n_packets -= 1;
+ packets += 1;
+
+ ASSERT (p0->current_data == 0);
+ ip0 = (void *) (p0->data + ip_offset);
+ icmp0 = (void *) (p0->data + icmp_offset);
+
+ icmp0->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, p0, ip0,
+ &bogus_length);
+ ASSERT (bogus_length == 0);
+ }
+}
+
+typedef struct {
+ pg_edit_t type, code;
+ pg_edit_t checksum;
+} pg_icmp46_header_t;
+
+always_inline void
+pg_icmp_header_init (pg_icmp46_header_t * p)
+{
+ /* Initialize fields that are not bit fields in the IP header. */
+#define _(f) pg_edit_init (&p->f, icmp46_header_t, f);
+ _ (type);
+ _ (code);
+ _ (checksum);
+#undef _
+}
+
+static uword
+unformat_pg_icmp_header (unformat_input_t * input, va_list * args)
+{
+ pg_stream_t * s = va_arg (*args, pg_stream_t *);
+ pg_icmp46_header_t * p;
+ u32 group_index;
+
+ p = pg_create_edit_group (s, sizeof (p[0]), sizeof (icmp46_header_t),
+ &group_index);
+ pg_icmp_header_init (p);
+
+ p->checksum.type = PG_EDIT_UNSPECIFIED;
+
+ {
+ icmp46_header_t tmp;
+
+ if (! unformat (input, "ICMP %U", unformat_icmp_type_and_code, &tmp))
+ goto error;
+
+ pg_edit_set_fixed (&p->type, tmp.type);
+ pg_edit_set_fixed (&p->code, tmp.code);
+ }
+
+ /* Parse options. */
+ while (1)
+ {
+ if (unformat (input, "checksum %U",
+ unformat_pg_edit,
+ unformat_pg_number, &p->checksum))
+ ;
+
+ /* Can't parse input: try next protocol level. */
+ else
+ break;
+ }
+
+ if (! unformat_user (input, unformat_pg_payload, s))
+ goto error;
+
+ if (p->checksum.type == PG_EDIT_UNSPECIFIED)
+ {
+ pg_edit_group_t * g = pg_stream_get_group (s, group_index);
+ g->edit_function = icmp6_pg_edit_function;
+ g->edit_function_opaque = 0;
+ }
+
+ return 1;
+
+ error:
+ /* Free up any edits we may have added. */
+ pg_free_edit_group (s);
+ return 0;
+}
+
+void icmp6_register_type (vlib_main_t * vm, icmp6_type_t type, u32 node_index)
+{
+ icmp6_main_t * im = &icmp6_main;
+
+ ASSERT (type < ARRAY_LEN (im->input_next_index_by_type));
+ im->input_next_index_by_type[type]
+ = vlib_node_add_next (vm, ip6_icmp_input_node.index, node_index);
+}
+
+static clib_error_t *
+icmp6_init (vlib_main_t * vm)
+{
+ ip_main_t * im = &ip_main;
+ ip_protocol_info_t * pi;
+ icmp6_main_t * cm = &icmp6_main;
+ clib_error_t * error;
+
+ error = vlib_call_init_function (vm, ip_main_init);
+
+ if (error)
+ return error;
+
+ pi = ip_get_protocol_info (im, IP_PROTOCOL_ICMP6);
+ pi->format_header = format_icmp6_header;
+ pi->unformat_pg_edit = unformat_pg_icmp_header;
+
+ cm->type_by_name = hash_create_string (0, sizeof (uword));
+#define _(n,t) hash_set_mem (cm->type_by_name, #t, (n));
+ foreach_icmp6_type;
+#undef _
+
+ cm->type_and_code_by_name = hash_create_string (0, sizeof (uword));
+#define _(a,n,t) hash_set_mem (cm->type_by_name, #t, (n) | (ICMP6_##a << 8));
+ foreach_icmp6_code;
+#undef _
+
+ memset (cm->input_next_index_by_type,
+ ICMP_INPUT_NEXT_DROP,
+ sizeof (cm->input_next_index_by_type));
+ memset (cm->max_valid_code_by_type, 0, sizeof (cm->max_valid_code_by_type));
+
+#define _(a,n,t) cm->max_valid_code_by_type[ICMP6_##a] = clib_max (cm->max_valid_code_by_type[ICMP6_##a], n);
+ foreach_icmp6_code;
+#undef _
+
+ memset (cm->min_valid_hop_limit_by_type, 0, sizeof (cm->min_valid_hop_limit_by_type));
+ cm->min_valid_hop_limit_by_type[ICMP6_router_solicitation] = 255;
+ cm->min_valid_hop_limit_by_type[ICMP6_router_advertisement] = 255;
+ cm->min_valid_hop_limit_by_type[ICMP6_neighbor_solicitation] = 255;
+ cm->min_valid_hop_limit_by_type[ICMP6_neighbor_advertisement] = 255;
+ cm->min_valid_hop_limit_by_type[ICMP6_redirect] = 255;
+
+ memset (cm->min_valid_length_by_type, sizeof (icmp46_header_t), sizeof (cm->min_valid_length_by_type));
+ cm->min_valid_length_by_type[ICMP6_router_solicitation] = sizeof (icmp6_neighbor_discovery_header_t);
+ cm->min_valid_length_by_type[ICMP6_router_advertisement] = sizeof (icmp6_router_advertisement_header_t);
+ cm->min_valid_length_by_type[ICMP6_neighbor_solicitation]
+ = sizeof (icmp6_neighbor_solicitation_or_advertisement_header_t);
+ cm->min_valid_length_by_type[ICMP6_neighbor_advertisement]
+ = sizeof (icmp6_neighbor_solicitation_or_advertisement_header_t);
+ cm->min_valid_length_by_type[ICMP6_redirect] = sizeof (icmp6_redirect_header_t);
+
+ icmp6_register_type (vm, ICMP6_echo_request, ip6_icmp_echo_request_node.index);
+
+ return vlib_call_init_function (vm, ip6_neighbor_init);
+}
+
+VLIB_INIT_FUNCTION (icmp6_init);
diff --git a/vnet/vnet/ip/icmp6.h b/vnet/vnet/ip/icmp6.h
new file mode 100644
index 00000000000..92f6913a454
--- /dev/null
+++ b/vnet/vnet/ip/icmp6.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_vnet_icmp6_h
+#define included_vnet_icmp6_h
+
+#define foreach_icmp6_error \
+ _ (NONE, "valid packets") \
+ _ (UNKNOWN_TYPE, "unknown type") \
+ _ (INVALID_CODE_FOR_TYPE, "invalid code for type") \
+ _ (INVALID_HOP_LIMIT_FOR_TYPE, "hop_limit != 255") \
+ _ (LENGTH_TOO_SMALL_FOR_TYPE, "payload length too small for type") \
+ _ (OPTIONS_WITH_ODD_LENGTH, \
+ "total option length not multiple of 8 bytes") \
+ _ (OPTION_WITH_ZERO_LENGTH, "option has zero length") \
+ _ (ECHO_REPLIES_SENT, "echo replies sent") \
+ _ (NEIGHBOR_SOLICITATION_SOURCE_NOT_ON_LINK, \
+ "neighbor solicitations from source not on link") \
+ _ (NEIGHBOR_SOLICITATION_SOURCE_UNKNOWN, \
+ "neighbor solicitations for unknown targets") \
+ _ (NEIGHBOR_ADVERTISEMENTS_TX, "neighbor advertisements sent") \
+ _ (NEIGHBOR_ADVERTISEMENTS_RX, "neighbor advertisements received") \
+ _ (ROUTER_SOLICITATION_SOURCE_NOT_ON_LINK, \
+ "router solicitations from source not on link") \
+ _ (ROUTER_SOLICITATION_UNSUPPORTED_INTF, \
+ "neighbor discovery unsupported interface") \
+ _ (ROUTER_SOLICITATION_RADV_NOT_CONFIG, \
+ "neighbor discovery not configured") \
+ _ (ROUTER_SOLICITATION_DEST_UNKNOWN, \
+ "router solicitations for unknown destination") \
+ _ (ROUTER_SOLICITATION_SOURCE_UNKNOWN, \
+ "router solicitations for unknown source") \
+ _ (ROUTER_ADVERTISEMENT_SOURCE_NOT_LINK_LOCAL, \
+ "router advertisement source not link local") \
+ _ (ROUTER_ADVERTISEMENTS_TX, "router advertisements sent") \
+ _ (ROUTER_ADVERTISEMENTS_RX, "router advertisements received") \
+ _ (DST_LOOKUP_MISS, "icmp6 dst address lookup misses") \
+ _ (TTL_EXPIRE_RESP_SENT, "TTL time exceeded response sent") \
+ _ (TTL_EXPIRE_RESP_DROP, "TTL time exceeded response dropped")
+
+
+typedef enum {
+#define _(f,s) ICMP6_ERROR_##f,
+ foreach_icmp6_error
+#undef _
+} icmp6_error_t;
+
+typedef struct {
+ u8 packet_data[64];
+} icmp6_input_trace_t;
+
+format_function_t format_icmp6_input_trace;
+void icmp6_register_type (vlib_main_t * vm, icmp6_type_t type, u32 node_index);
+
+extern vlib_node_registration_t ip6_icmp_input_node;
+
+#endif /* included_vnet_icmp6_h */
+
+
diff --git a/vnet/vnet/ip/igmp_packet.h b/vnet/vnet/ip/igmp_packet.h
new file mode 100644
index 00000000000..00b1e0deeb7
--- /dev/null
+++ b/vnet/vnet/ip/igmp_packet.h
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * igmp_packet.h: igmp packet format
+ *
+ * Copyright (c) 2011 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vnet_igmp_packet_h
+#define included_vnet_igmp_packet_h
+
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+
+#define foreach_igmp_type \
+ _ (0x11, membership_query) \
+ _ (0x12, membership_report_v1) \
+ _ (0x13, dvmrp) \
+ _ (0x14, pim_v1) \
+ _ (0x15, cisco_trace) \
+ _ (0x16, membership_report_v2) \
+ _ (0x17, leave_group_v2) \
+ _ (0x1e, traceroute_response) \
+ _ (0x1f, traceroute_request) \
+ _ (0x22, membership_report_v3) \
+ _ (0x30, router_advertisement) \
+ _ (0x31, router_solicitation) \
+ _ (0x32, router_termination)
+
+typedef enum {
+#define _(n,f) IGMP_TYPE_##f = n,
+ foreach_igmp_type
+#undef _
+} igmp_type_t;
+
+typedef struct {
+ igmp_type_t type : 8;
+
+ u8 code;
+
+ u16 checksum;
+} igmp_header_t;
+
+typedef struct {
+ /* membership_query, version <= 2 reports. */
+ igmp_header_t header;
+
+ /* Multicast destination address. */
+ ip4_address_t dst;
+} igmp_message_t;
+
+#define foreach_igmp_membership_group_v3_type \
+ _ (1, mode_is_filter_include) \
+ _ (2, mode_is_filter_exclude) \
+ _ (3, change_to_filter_include) \
+ _ (4, change_to_filter_exclude) \
+ _ (5, allow_new_sources) \
+ _ (6, block_old_sources)
+
+typedef enum {
+#define _(n,f) IGMP_MEMBERSHIP_GROUP_##f = n,
+ foreach_igmp_membership_group_v3_type
+#undef _
+} igmp_membership_group_v3_type_t;
+
+typedef struct {
+ igmp_membership_group_v3_type_t type : 8;
+
+ /* Number of 32 bit words of aux data after source addresses. */
+ u8 n_aux_u32s;
+
+ /* Number of source addresses that follow. */
+ u16 n_src_addresses;
+
+ /* Destination multicast address. */
+ ip4_address_t dst_address;
+
+ ip4_address_t src_addresses[0];
+} igmp_membership_group_v3_t;
+
+always_inline igmp_membership_group_v3_t *
+igmp_membership_group_v3_next (igmp_membership_group_v3_t * g)
+{
+ return ((void *) g
+ + g->n_src_addresses * sizeof (g->src_addresses[0])
+ + g->n_aux_u32s * sizeof (u32));
+}
+
+typedef struct {
+ /* Type 0x22. */
+ igmp_header_t header;
+
+ u16 unused;
+
+ /* Number of groups which follow. */
+ u16 n_groups;
+
+ igmp_membership_group_v3_t groups[0];
+} igmp_membership_report_v3_t;
+
+/* IP6 flavor of IGMP is called MLD which is embedded in ICMP6. */
+typedef struct {
+ /* Preceeded by ICMP v6 header. */
+ u16 max_response_delay_in_milliseconds;
+ u16 reserved;
+ ip6_address_t dst;
+} mld_header_t;
+
+#endif /* included_vnet_igmp_packet_h */
diff --git a/vnet/vnet/ip/ip.h b/vnet/vnet/ip/ip.h
new file mode 100644
index 00000000000..e47512a960d
--- /dev/null
+++ b/vnet/vnet/ip/ip.h
@@ -0,0 +1,222 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip.h: ip generic (4 or 6) main
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_ip_main_h
+#define included_ip_main_h
+
+#include <vppinfra/hash.h>
+#include <vppinfra/heap.h> /* adjacency heap */
+
+#include <vnet/vnet.h>
+
+#include <vnet/ip/format.h>
+#include <vnet/ip/ip_packet.h>
+#include <vnet/ip/lookup.h>
+
+#include <vnet/ip/tcp_packet.h>
+#include <vnet/ip/udp_packet.h>
+#include <vnet/ip/icmp46_packet.h>
+
+#include <vnet/ip/ip4.h>
+#include <vnet/ip/ip4_error.h>
+#include <vnet/ip/ip4_packet.h>
+
+#include <vnet/ip/ip6.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/ip/ip6_error.h>
+#include <vnet/ip/icmp6.h>
+
+#include <vnet/ip/tcp.h>
+
+#if DPDK > 0
+#include <vnet/devices/dpdk/dpdk.h>
+#endif
+
+#include <vnet/classify/vnet_classify.h>
+
+typedef union {
+ ip4_address_t ip4;
+ ip6_address_t ip6;
+} ip46_address_t;
+
+/* Per protocol info. */
+typedef struct {
+ /* Protocol name (also used as hash key). */
+ u8 * name;
+
+ /* Protocol number. */
+ ip_protocol_t protocol;
+
+ /* Format function for this IP protocol. */
+ format_function_t * format_header;
+
+ /* Parser for header. */
+ unformat_function_t * unformat_header;
+
+ /* Parser for per-protocol matches. */
+ unformat_function_t * unformat_match;
+
+ /* Parser for packet generator edits for this protocol. */
+ unformat_function_t * unformat_pg_edit;
+} ip_protocol_info_t;
+
+/* Per TCP/UDP port info. */
+typedef struct {
+ /* Port name (used as hash key). */
+ u8 * name;
+
+ /* UDP/TCP port number in network byte order. */
+ u16 port;
+
+ /* Port specific format function. */
+ format_function_t * format_header;
+
+ /* Parser for packet generator edits for this protocol. */
+ unformat_function_t * unformat_pg_edit;
+} tcp_udp_port_info_t;
+
+typedef struct {
+ /* Per IP protocol info. */
+ ip_protocol_info_t * protocol_infos;
+
+ /* Protocol info index hashed by 8 bit IP protocol. */
+ uword * protocol_info_by_protocol;
+
+ /* Hash table mapping IP protocol name (see protocols.def)
+ to protocol number. */
+ uword * protocol_info_by_name;
+
+ /* Per TCP/UDP port info. */
+ tcp_udp_port_info_t * port_infos;
+
+ /* Hash table from network-byte-order port to port info index. */
+ uword * port_info_by_port;
+
+ /* Hash table mapping TCP/UDP name to port info index. */
+ uword * port_info_by_name;
+} ip_main_t;
+
+extern ip_main_t ip_main;
+
+clib_error_t *
+ip_main_init (vlib_main_t * vm);
+
+static inline ip_protocol_info_t *
+ip_get_protocol_info (ip_main_t * im, u32 protocol)
+{
+ uword * p;
+
+ p = hash_get (im->protocol_info_by_protocol, protocol);
+ return p ? vec_elt_at_index (im->protocol_infos, p[0]) : 0;
+}
+
+static inline tcp_udp_port_info_t *
+ip_get_tcp_udp_port_info (ip_main_t * im, u32 port)
+{
+ uword * p;
+
+ p = hash_get (im->port_info_by_port, port);
+ return p ? vec_elt_at_index (im->port_infos, p[0]) : 0;
+}
+
+always_inline ip_csum_t
+ip_incremental_checksum_buffer (vlib_main_t * vm, vlib_buffer_t * first_buffer,
+ u32 first_buffer_offset,
+ u32 n_bytes_to_checksum,
+ ip_csum_t sum)
+#if DPDK > 0
+{
+ u32 n_bytes_left = n_bytes_to_checksum;
+ struct rte_mbuf * mb = ((struct rte_mbuf *)first_buffer)-1;
+ u8 nb_segs = mb->nb_segs;
+ ASSERT(mb->data_len >= first_buffer_offset);
+ void * h;
+ u32 n;
+
+ n = clib_min (n_bytes_left, mb->data_len);
+ h = vlib_buffer_get_current (first_buffer) + first_buffer_offset;
+ while (n_bytes_left)
+ {
+ sum = ip_incremental_checksum (sum, h, n);
+ n_bytes_left -= n;
+ nb_segs--;
+ mb = mb->next;
+ if ((nb_segs == 0) || (mb == 0))
+ break;
+
+ n = clib_min (n_bytes_left, mb->data_len);
+ h = rte_ctrlmbuf_data(mb);
+ }
+
+ ASSERT(n_bytes_left == 0);
+ ASSERT(nb_segs == 0);
+ return sum;
+}
+#else
+{
+ vlib_buffer_t * b = first_buffer;
+ u32 n_bytes_left = n_bytes_to_checksum;
+ ASSERT (b->current_length >= first_buffer_offset);
+ void * h;
+ u32 n;
+
+ n = clib_min (n_bytes_left, b->current_length);
+ h = vlib_buffer_get_current (b) + first_buffer_offset;
+ sum = ip_incremental_checksum (sum, h, n);
+ if (PREDICT_FALSE (b->flags & VLIB_BUFFER_NEXT_PRESENT))
+ {
+ while (1)
+ {
+ n_bytes_left -= n;
+ if (n_bytes_left == 0)
+ break;
+ b = vlib_get_buffer (vm, b->next_buffer);
+ n = clib_min (n_bytes_left, b->current_length);
+ h = vlib_buffer_get_current (b);
+ sum = ip_incremental_checksum (sum, h, n);
+ }
+ }
+
+ return sum;
+}
+#endif /* DPDK */
+
+void ip_del_all_interface_addresses (vlib_main_t *vm, u32 sw_if_index);
+
+#endif /* included_ip_main_h */
diff --git a/vnet/vnet/ip/ip4.h b/vnet/vnet/ip/ip4.h
new file mode 100644
index 00000000000..6b8fd59a022
--- /dev/null
+++ b/vnet/vnet/ip/ip4.h
@@ -0,0 +1,409 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip4.h: ip4 main include file
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_ip_ip4_h
+#define included_ip_ip4_h
+
+#include <vnet/ip/ip4_mtrie.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/lookup.h>
+
+typedef struct ip4_fib_t {
+ /* Hash table for each prefix length mapping. */
+ uword * adj_index_by_dst_address[33];
+
+ /* Temporary vectors for holding new/old values for hash_set. */
+ uword * new_hash_values, * old_hash_values;
+
+ /* Mtrie for fast lookups. Hash is used to maintain overlapping prefixes. */
+ ip4_fib_mtrie_t mtrie;
+
+ /* Table ID (hash key) for this FIB. */
+ u32 table_id;
+
+ /* Index into FIB vector. */
+ u32 index;
+
+ /* flow hash configuration */
+ u32 flow_hash_config;
+
+ /* N-tuple classifier indices */
+ u32 fwd_classify_table_index;
+ u32 rev_classify_table_index;
+
+} ip4_fib_t;
+
+struct ip4_main_t;
+
+typedef void (ip4_add_del_route_function_t)
+ (struct ip4_main_t * im,
+ uword opaque,
+ ip4_fib_t * fib,
+ u32 flags,
+ ip4_address_t * address,
+ u32 address_length,
+ void * old_result,
+ void * new_result);
+
+typedef struct {
+ ip4_add_del_route_function_t * function;
+ uword required_flags;
+ uword function_opaque;
+} ip4_add_del_route_callback_t;
+
+typedef void (ip4_add_del_interface_address_function_t)
+ (struct ip4_main_t * im,
+ uword opaque,
+ u32 sw_if_index,
+ ip4_address_t * address,
+ u32 address_length,
+ u32 if_address_index,
+ u32 is_del);
+
+typedef struct {
+ ip4_add_del_interface_address_function_t * function;
+ uword function_opaque;
+} ip4_add_del_interface_address_callback_t;
+
+typedef enum {
+ /* First check access list to either permit or deny this
+ packet based on classification. */
+ IP4_RX_FEATURE_CHECK_ACCESS,
+
+ /* RPF check: verify that source address is reachable via
+ RX interface or via any interface. */
+ IP4_RX_FEATURE_SOURCE_CHECK_REACHABLE_VIA_RX,
+ IP4_RX_FEATURE_SOURCE_CHECK_REACHABLE_VIA_ANY,
+
+ /* IPSec */
+ IP4_RX_FEATURE_IPSEC,
+
+ /* vPath forwarding: won't return to call next feature
+ so any feature needed before vPath forwarding must be prior
+ to this entry */
+ IP4_RX_FEATURE_VPATH,
+
+ /* Must be last: perform forwarding lookup. */
+ IP4_RX_FEATURE_LOOKUP,
+
+ IP4_N_RX_FEATURE,
+} ip4_rx_feature_type_t;
+
+typedef struct ip4_main_t {
+ ip_lookup_main_t lookup_main;
+
+ /* Vector of FIBs. */
+ ip4_fib_t * fibs;
+
+ u32 fib_masks[33];
+
+ /* Table index indexed by software interface. */
+ u32 * fib_index_by_sw_if_index;
+
+ /* Hash table mapping table id to fib index.
+ ID space is not necessarily dense; index space is dense. */
+ uword * fib_index_by_table_id;
+
+ /* Vector of functions to call when routes are added/deleted. */
+ ip4_add_del_route_callback_t * add_del_route_callbacks;
+
+ /* Hash table mapping interface route rewrite adjacency index by sw if index. */
+ uword * interface_route_adj_index_by_sw_if_index;
+
+ /* Functions to call when interface address changes. */
+ ip4_add_del_interface_address_callback_t * add_del_interface_address_callbacks;
+
+ /* Template used to generate IP4 ARP packets. */
+ vlib_packet_template_t ip4_arp_request_packet_template;
+
+ /* Seed for Jenkins hash used to compute ip4 flow hash. */
+ u32 flow_hash_seed;
+
+ struct {
+ /* TTL to use for host generated packets. */
+ u8 ttl;
+
+ /* TOS byte to use for host generated packets. */
+ u8 tos;
+
+ u8 pad[2];
+ } host_config;
+} ip4_main_t;
+
+/* Global ip4 main structure. */
+extern ip4_main_t ip4_main;
+
+/* Global ip4 input node. Errors get attached to ip4 input node. */
+extern vlib_node_registration_t ip4_input_node;
+extern vlib_node_registration_t ip4_lookup_node;
+extern vlib_node_registration_t ip4_rewrite_node;
+extern vlib_node_registration_t ip4_arp_node;
+
+u32 ip4_fib_lookup_with_table (ip4_main_t * im, u32 fib_index, ip4_address_t * dst,
+ u32 disable_default_route);
+
+always_inline u32
+ip4_fib_lookup_buffer (ip4_main_t * im, u32 fib_index, ip4_address_t * dst,
+ vlib_buffer_t * b)
+{
+ return ip4_fib_lookup_with_table (im, fib_index, dst,
+ /* disable_default_route */ 0);
+}
+
+always_inline u32
+ip4_fib_lookup (ip4_main_t * im, u32 sw_if_index, ip4_address_t * dst)
+{
+ u32 fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
+ return ip4_fib_lookup_with_table (im, fib_index, dst,
+ /* disable_default_route */ 0);
+}
+
+always_inline uword
+ip4_destination_matches_route (ip4_main_t * im,
+ ip4_address_t * key,
+ ip4_address_t * dest,
+ uword dest_length)
+{ return 0 == ((key->data_u32 ^ dest->data_u32) & im->fib_masks[dest_length]); }
+
+always_inline uword
+ip4_destination_matches_interface (ip4_main_t * im,
+ ip4_address_t * key,
+ ip_interface_address_t * ia)
+{
+ ip4_address_t * a = ip_interface_address_get_address (&im->lookup_main, ia);
+ return ip4_destination_matches_route (im, key, a, ia->address_length);
+}
+
+/* As above but allows for unaligned destinations (e.g. works right from IP header of packet). */
+always_inline uword
+ip4_unaligned_destination_matches_route (ip4_main_t * im,
+ ip4_address_t * key,
+ ip4_address_t * dest,
+ uword dest_length)
+{ return 0 == ((clib_mem_unaligned (&key->data_u32, u32) ^ dest->data_u32) & im->fib_masks[dest_length]); }
+
+always_inline void
+ip4_src_address_for_packet (ip4_main_t * im, vlib_buffer_t * p, ip4_address_t * src, u32 sw_if_index)
+{
+ ip_lookup_main_t * lm = &im->lookup_main;
+ ip_interface_address_t * ia = ip_interface_address_for_packet (lm, p, sw_if_index);
+ ip4_address_t * a = ip_interface_address_get_address (lm, ia);
+ *src = a[0];
+}
+
+/* Find interface address which matches destination. */
+always_inline ip4_address_t *
+ip4_interface_address_matching_destination (ip4_main_t * im, ip4_address_t * dst, u32 sw_if_index,
+ ip_interface_address_t ** result_ia)
+{
+ ip_lookup_main_t * lm = &im->lookup_main;
+ ip_interface_address_t * ia;
+ ip4_address_t * result = 0;
+
+ foreach_ip_interface_address (lm, ia, sw_if_index,
+ 1 /* honor unnumbered */,
+ ({
+ ip4_address_t * a = ip_interface_address_get_address (lm, ia);
+ if (ip4_destination_matches_route (im, dst, a, ia->address_length))
+ {
+ result = a;
+ break;
+ }
+ }));
+ if (result_ia)
+ *result_ia = result ? ia : 0;
+ return result;
+}
+
+clib_error_t *
+ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
+ ip4_address_t * address, u32 address_length,
+ u32 is_del);
+
+int ip4_address_compare (ip4_address_t * a1, ip4_address_t * a2);
+
+/* Add/del a route to the FIB. */
+
+#define IP4_ROUTE_FLAG_ADD (0 << 0)
+#define IP4_ROUTE_FLAG_DEL (1 << 0)
+#define IP4_ROUTE_FLAG_TABLE_ID (0 << 1)
+#define IP4_ROUTE_FLAG_FIB_INDEX (1 << 1)
+#define IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY (1 << 2)
+#define IP4_ROUTE_FLAG_NO_REDISTRIBUTE (1 << 3)
+/* Not last add/del in group. Facilities batching requests into packets. */
+#define IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP (1 << 4)
+/* Dynamic route created via ARP reply. */
+#define IP4_ROUTE_FLAG_NEIGHBOR (1 << 5)
+
+typedef struct {
+ /* IP4_ROUTE_FLAG_* */
+ u32 flags;
+
+ /* Either index of fib or table_id to hash and get fib.
+ IP4_ROUTE_FLAG_FIB_INDEX specifies index; otherwise table_id is assumed. */
+ u32 table_index_or_table_id;
+
+ /* Destination address (prefix) and length. */
+ ip4_address_t dst_address;
+ u32 dst_address_length;
+
+ /* Adjacency to use for this destination. */
+ u32 adj_index;
+
+ /* If specified adjacencies to add and then
+ use for this destination. add_adj/n_add_adj
+ are override adj_index if specified. */
+ ip_adjacency_t * add_adj;
+ u32 n_add_adj;
+} ip4_add_del_route_args_t;
+
+ip4_fib_t *
+find_ip4_fib_by_table_index_or_id (ip4_main_t * im,
+ u32 table_index_or_id, u32 flags);
+
+void ip4_add_del_route (ip4_main_t * im, ip4_add_del_route_args_t * args);
+
+void ip4_add_del_route_next_hop (ip4_main_t * im,
+ u32 flags,
+ ip4_address_t * dst_address,
+ u32 dst_address_length,
+ ip4_address_t * next_hop,
+ u32 next_hop_sw_if_index,
+ u32 next_hop_weight, u32 adj_index,
+ u32 explicit_fib_index);
+
+void *
+ip4_get_route (ip4_main_t * im,
+ u32 fib_index_or_table_id,
+ u32 flags,
+ u8 * address,
+ u32 address_length);
+
+void
+ip4_foreach_matching_route (ip4_main_t * im,
+ u32 table_index_or_table_id,
+ u32 flags,
+ ip4_address_t * address,
+ u32 address_length,
+ ip4_address_t ** results,
+ u8 ** result_lengths);
+
+void ip4_delete_matching_routes (ip4_main_t * im,
+ u32 table_index_or_table_id,
+ u32 flags,
+ ip4_address_t * address,
+ u32 address_length);
+
+void ip4_maybe_remap_adjacencies (ip4_main_t * im,
+ u32 table_index_or_table_id,
+ u32 flags);
+
+void ip4_adjacency_set_interface_route (vnet_main_t * vnm,
+ ip_adjacency_t * adj,
+ u32 sw_if_index,
+ u32 if_address_index);
+
+/* Send an ARP request to see if given destination is reachable on given interface. */
+clib_error_t *
+ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index);
+
+clib_error_t *
+ip4_set_arp_limit (u32 arp_limit);
+
+uword
+ip4_tcp_register_listener (vlib_main_t * vm,
+ u16 dst_port,
+ u32 next_node_index);
+uword
+ip4_udp_register_listener (vlib_main_t * vm,
+ u16 dst_port,
+ u32 next_node_index);
+
+void
+ip4_icmp_register_type (vlib_main_t * vm, icmp4_type_t type,
+ u32 node_index);
+
+u16 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0, ip4_header_t * ip0);
+
+void ip4_register_protocol (u32 protocol, u32 node_index);
+
+serialize_function_t serialize_vnet_ip4_main, unserialize_vnet_ip4_main;
+
+int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config);
+
+void ip4_mtrie_init (ip4_fib_mtrie_t * m);
+
+int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
+ u32 table_index);
+
+/* Compute flow hash. We'll use it to select which adjacency to use for this
+ flow. And other things. */
+always_inline u32
+ip4_compute_flow_hash (ip4_header_t * ip, u32 flow_hash_config)
+{
+ tcp_header_t * tcp = (void *) (ip + 1);
+ u32 a, b, c, t1, t2;
+ uword is_tcp_udp = (ip->protocol == IP_PROTOCOL_TCP
+ || ip->protocol == IP_PROTOCOL_UDP);
+
+ t1 = (flow_hash_config & IP_FLOW_HASH_SRC_ADDR)
+ ? ip->src_address.data_u32 : 0;
+ t2 = (flow_hash_config & IP_FLOW_HASH_DST_ADDR)
+ ? ip->dst_address.data_u32 : 0;
+
+ a = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ? t2 : t1;
+ b = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ? t1 : t2;
+ b ^= (flow_hash_config & IP_FLOW_HASH_PROTO) ? ip->protocol : 0;
+
+ t1 = is_tcp_udp ? tcp->ports.src : 0;
+ t2 = is_tcp_udp ? tcp->ports.dst : 0;
+
+ t1 = (flow_hash_config & IP_FLOW_HASH_SRC_PORT) ? t1 : 0;
+ t2 = (flow_hash_config & IP_FLOW_HASH_DST_PORT) ? t2 : 0;
+
+ c = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ?
+ (t1<<16) | t2 : (t2<<16) | t1;
+
+ hash_v3_mix32 (a, b, c);
+ hash_v3_finalize32 (a, b, c);
+
+ return c;
+}
+
+#endif /* included_ip_ip4_h */
diff --git a/vnet/vnet/ip/ip46_cli.c b/vnet/vnet/ip/ip46_cli.c
new file mode 100644
index 00000000000..44dde9bf3e7
--- /dev/null
+++ b/vnet/vnet/ip/ip46_cli.c
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip4_cli.c: ip4 commands
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+
+int ip4_address_compare (ip4_address_t * a1, ip4_address_t * a2)
+{ return clib_net_to_host_u32 (a1->data_u32) - clib_net_to_host_u32 (a2->data_u32); }
+
+int ip6_address_compare (ip6_address_t * a1, ip6_address_t * a2)
+{
+ int i;
+ for (i = 0; i < ARRAY_LEN (a1->as_u16); i++)
+ {
+ int cmp = clib_net_to_host_u16 (a1->as_u16[i]) - clib_net_to_host_u16 (a2->as_u16[i]);
+ if (cmp != 0)
+ return cmp;
+ }
+ return 0;
+}
+
+VLIB_CLI_COMMAND (set_interface_ip_command, static) = {
+ .path = "set interface ip",
+ .short_help = "IP4/IP6 commands",
+};
+
+void ip_del_all_interface_addresses (vlib_main_t *vm, u32 sw_if_index)
+{
+ ip4_main_t * im4 = &ip4_main;
+ ip4_address_t * ip4_addrs = 0;
+ u32 *ip4_masks = 0;
+ ip6_main_t * im6 = &ip6_main;
+ ip6_address_t * ip6_addrs = 0;
+ u32 *ip6_masks = 0;
+ ip_interface_address_t * ia;
+ int i;
+
+ foreach_ip_interface_address (&im4->lookup_main, ia, sw_if_index,
+ 0 /* honor unnumbered */,
+ ({
+ ip4_address_t * x = (ip4_address_t *)
+ ip_interface_address_get_address (&im4->lookup_main, ia);
+ vec_add1 (ip4_addrs, x[0]);
+ vec_add1 (ip4_masks, ia->address_length);
+ }));
+
+ foreach_ip_interface_address (&im6->lookup_main, ia, sw_if_index,
+ 0 /* honor unnumbered */,
+ ({
+ ip6_address_t * x = (ip6_address_t *)
+ ip_interface_address_get_address (&im6->lookup_main, ia);
+ vec_add1 (ip6_addrs, x[0]);
+ vec_add1 (ip6_masks, ia->address_length);
+ }));
+
+ for (i = 0; i < vec_len (ip4_addrs); i++)
+ ip4_add_del_interface_address (vm, sw_if_index, &ip4_addrs[i],
+ ip4_masks[i], 1 /* is_del */);
+ for (i = 0; i < vec_len (ip6_addrs); i++)
+ ip6_add_del_interface_address (vm, sw_if_index, &ip6_addrs[i],
+ ip6_masks[i], 1 /* is_del */);
+
+ vec_free (ip4_addrs);
+ vec_free (ip4_masks);
+ vec_free (ip6_addrs);
+ vec_free (ip6_masks);
+}
+
+static clib_error_t *
+add_del_ip_address (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ ip4_address_t a4;
+ ip6_address_t a6;
+ clib_error_t * error = 0;
+ u32 sw_if_index, length, is_del;
+
+ sw_if_index = ~0;
+ is_del = 0;
+
+ if (unformat (input, "del"))
+ is_del = 1;
+
+ if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ if (is_del && unformat (input, "all"))
+ ip_del_all_interface_addresses (vm, sw_if_index);
+ else if (unformat (input, "%U/%d", unformat_ip4_address, &a4, &length))
+ error = ip4_add_del_interface_address (vm, sw_if_index, &a4, length,
+ is_del);
+ else if (unformat (input, "%U/%d", unformat_ip6_address, &a6, &length))
+ error = ip6_add_del_interface_address (vm, sw_if_index, &a6, length,
+ is_del);
+ else
+ {
+ error = clib_error_return (0, "expected IP4/IP6 address/length `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+
+ done:
+ return error;
+}
+
+VLIB_CLI_COMMAND (set_interface_ip_address_command, static) = {
+ .path = "set interface ip address",
+ .function = add_del_ip_address,
+ .short_help = "Add/delete IP4/IP6 address for interface",
+};
+
+/* Dummy init function to get us linked in. */
+static clib_error_t * ip4_cli_init (vlib_main_t * vm)
+{ return 0; }
+
+VLIB_INIT_FUNCTION (ip4_cli_init);
diff --git a/vnet/vnet/ip/ip4_error.h b/vnet/vnet/ip/ip4_error.h
new file mode 100644
index 00000000000..b84b082b993
--- /dev/null
+++ b/vnet/vnet/ip/ip4_error.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip4_error.h: ip4 fast path errors
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_ip_ip4_error_h
+#define included_ip_ip4_error_h
+
+#define foreach_ip4_error \
+ /* Must be first. */ \
+ _ (NONE, "valid ip4 packets") \
+ \
+ /* Errors signalled by ip4-input */ \
+ _ (TOO_SHORT, "ip4 length < 20 bytes") \
+ _ (BAD_LENGTH, "ip4 length > l2 length") \
+ _ (BAD_CHECKSUM, "bad ip4 checksum") \
+ _ (VERSION, "ip4 version != 4") \
+ _ (OPTIONS, "ip4 options present") \
+ _ (FRAGMENT_OFFSET_ONE, "ip4 fragment offset == 1") \
+ _ (TIME_EXPIRED, "ip4 ttl <= 1") \
+ \
+ /* Errors signalled by ip4-rewrite. */ \
+ _ (MTU_EXCEEDED, "ip4 MTU exceeded and DF set") \
+ _ (DST_LOOKUP_MISS, "ip4 destination lookup miss") \
+ _ (SRC_LOOKUP_MISS, "ip4 source lookup miss") \
+ _ (ADJACENCY_DROP, "ip4 adjacency drop") \
+ _ (ADJACENCY_PUNT, "ip4 adjacency punt") \
+ \
+ /* Errors signalled by ip4-local. */ \
+ _ (UNKNOWN_PROTOCOL, "unknown ip protocol") \
+ _ (TCP_CHECKSUM, "bad tcp checksum") \
+ _ (UDP_CHECKSUM, "bad udp checksum") \
+ _ (UDP_LENGTH, "inconsistent udp/ip lengths") \
+ \
+ /* Errors signalled by ip4-source-check. */ \
+ _ (UNICAST_SOURCE_CHECK_FAILS, "ip4 unicast source check fails") \
+ \
+ /* Spoofed packets in ip4-rewrite-local */ \
+ _(SPOOFED_LOCAL_PACKETS, "ip4 spoofed local-address packet drops") \
+ \
+ /* Erros singalled by ip4-inacl */ \
+ _ (INACL_TABLE_MISS, "input ACL table-miss drops") \
+ _ (INACL_SESSION_DENY, "input ACL session deny drops")
+
+typedef enum {
+#define _(sym,str) IP4_ERROR_##sym,
+ foreach_ip4_error
+#undef _
+ IP4_N_ERROR,
+} ip4_error_t;
+
+#endif /* included_ip_ip4_error_h */
diff --git a/vnet/vnet/ip/ip4_format.c b/vnet/vnet/ip/ip4_format.c
new file mode 100644
index 00000000000..5f4f8e3667d
--- /dev/null
+++ b/vnet/vnet/ip/ip4_format.c
@@ -0,0 +1,243 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip4_format.c: ip4 formatting
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+
+/* Format an IP4 address. */
+u8 * format_ip4_address (u8 * s, va_list * args)
+{
+ u8 * a = va_arg (*args, u8 *);
+ return format (s, "%d.%d.%d.%d", a[0], a[1], a[2], a[3]);
+}
+
+/* Format an IP4 route destination and length. */
+u8 * format_ip4_address_and_length (u8 * s, va_list * args)
+{
+ u8 * a = va_arg (*args, u8 *);
+ u8 l = va_arg (*args, u32);
+ return format (s, "%U/%d", format_ip4_address, a, l);
+}
+
+/* Parse an IP4 address %d.%d.%d.%d. */
+uword unformat_ip4_address (unformat_input_t * input, va_list * args)
+{
+ u8 * result = va_arg (*args, u8 *);
+ unsigned a[4];
+
+ if (! unformat (input, "%d.%d.%d.%d", &a[0], &a[1], &a[2], &a[3]))
+ return 0;
+
+ if (a[0] >= 256 || a[1] >= 256 || a[2] >= 256 || a[3] >= 256)
+ return 0;
+
+ result[0] = a[0];
+ result[1] = a[1];
+ result[2] = a[2];
+ result[3] = a[3];
+
+ return 1;
+}
+
+/* Format an IP4 header. */
+u8 * format_ip4_header (u8 * s, va_list * args)
+{
+ ip4_header_t * ip = va_arg (*args, ip4_header_t *);
+ u32 max_header_bytes = va_arg (*args, u32);
+ u32 ip_version, header_bytes;
+ uword indent;
+
+ /* Nothing to do. */
+ if (max_header_bytes < sizeof (ip[0]))
+ return format (s, "IP header truncated");
+
+ indent = format_get_indent (s);
+ indent += 2;
+
+ ip_version = (ip->ip_version_and_header_length >> 4);
+ header_bytes = (ip->ip_version_and_header_length & 0xf) * sizeof (u32);
+
+ s = format (s, "%U: %U -> %U",
+ format_ip_protocol, ip->protocol,
+ format_ip4_address, ip->src_address.data,
+ format_ip4_address, ip->dst_address.data);
+
+ /* Show IP version and header length only with unexpected values. */
+ if (ip_version != 4 || header_bytes != sizeof (ip4_header_t))
+ s = format (s, "\n%Uversion %d, header length %d",
+ format_white_space, indent,
+ ip_version, header_bytes);
+
+ s = format (s, "\n%Utos 0x%02x, ttl %d, length %d, checksum 0x%04x",
+ format_white_space, indent,
+ ip->tos, ip->ttl,
+ clib_net_to_host_u16 (ip->length),
+ clib_net_to_host_u16 (ip->checksum));
+
+ /* Check and report invalid checksums. */
+ {
+ u16 c = ip4_header_checksum (ip);
+ if (c != ip->checksum)
+ s = format (s, " (should be 0x%04x)", clib_net_to_host_u16 (c));
+ }
+
+ {
+ u32 f = clib_net_to_host_u16 (ip->flags_and_fragment_offset);
+ u32 o;
+
+ s = format (s, "\n%Ufragment id 0x%04x",
+ format_white_space, indent,
+ clib_net_to_host_u16 (ip->fragment_id));
+
+ /* Fragment offset. */
+ o = 8 * (f & 0x1fff);
+ f ^= o;
+ if (o != 0)
+ s = format (s, " offset %d", o);
+
+ if (f != 0)
+ {
+ s = format (s, ", flags ");
+#define _(l) if (f & IP4_HEADER_FLAG_##l) s = format (s, #l);
+ _ (MORE_FRAGMENTS);
+ _ (DONT_FRAGMENT);
+ _ (CONGESTION);
+#undef _
+ }
+ }
+
+ /* Recurse into next protocol layer. */
+ if (max_header_bytes != 0 && header_bytes < max_header_bytes)
+ {
+ ip_main_t * im = &ip_main;
+ ip_protocol_info_t * pi = ip_get_protocol_info (im, ip->protocol);
+
+ if (pi && pi->format_header)
+ s = format (s, "\n%U%U",
+ format_white_space, indent - 2,
+ pi->format_header,
+ /* next protocol header */ (void*) ip + header_bytes,
+ max_header_bytes - header_bytes);
+ }
+
+ return s;
+}
+
+/* Parse an IP4 header. */
+uword unformat_ip4_header (unformat_input_t * input, va_list * args)
+{
+ u8 ** result = va_arg (*args, u8 **);
+ ip4_header_t * ip;
+ int old_length;
+
+ /* Allocate space for IP header. */
+ {
+ void * p;
+
+ old_length = vec_len (*result);
+ vec_add2 (*result, p, sizeof (ip4_header_t));
+ ip = p;
+ }
+
+ memset (ip, 0, sizeof (ip[0]));
+ ip->ip_version_and_header_length = IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS;
+
+ if (! unformat (input, "%U: %U -> %U",
+ unformat_ip_protocol, &ip->protocol,
+ unformat_ip4_address, &ip->src_address,
+ unformat_ip4_address, &ip->dst_address))
+ return 0;
+
+ /* Parse options. */
+ while (1)
+ {
+ int i, j;
+
+ if (unformat (input, "tos %U", unformat_vlib_number, &i))
+ ip->tos = i;
+
+ else if (unformat (input, "ttl %U", unformat_vlib_number, &i))
+ ip->ttl = i;
+
+ else if (unformat (input, "fragment id %U offset %U",
+ unformat_vlib_number, &i,
+ unformat_vlib_number, &j))
+ {
+ ip->fragment_id = clib_host_to_net_u16 (i);
+ ip->flags_and_fragment_offset |=
+ clib_host_to_net_u16 ((i / 8) & 0x1fff);
+ }
+
+ /* Flags. */
+ else if (unformat (input, "mf") || unformat (input, "MF"))
+ ip->flags_and_fragment_offset |= clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS);
+
+ else if (unformat (input, "df") || unformat (input, "DF"))
+ ip->flags_and_fragment_offset |= clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT);
+
+ else if (unformat (input, "ce") || unformat (input, "CE"))
+ ip->flags_and_fragment_offset |= clib_host_to_net_u16 (IP4_HEADER_FLAG_CONGESTION);
+
+ /* Can't parse input: try next protocol level. */
+ else
+ break;
+ }
+
+ /* Fill in checksum. */
+ ip->checksum = ip4_header_checksum (ip);
+
+ /* Recurse into next protocol layer. */
+ {
+ ip_main_t * im = &ip_main;
+ ip_protocol_info_t * pi = ip_get_protocol_info (im, ip->protocol);
+
+ if (pi && pi->unformat_header)
+ {
+ if (! unformat_user (input, pi->unformat_header, result))
+ return 0;
+
+ /* Result may have moved. */
+ ip = (void *) *result + old_length;
+ }
+ }
+
+ /* Fill in IP length. */
+ ip->length = clib_host_to_net_u16 (vec_len (*result) - old_length);
+
+ return 1;
+}
diff --git a/vnet/vnet/ip/ip4_forward.c b/vnet/vnet/ip/ip4_forward.c
new file mode 100644
index 00000000000..fd304163a6b
--- /dev/null
+++ b/vnet/vnet/ip/ip4_forward.c
@@ -0,0 +1,3564 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip4_forward.c: IP v4 forwarding
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */
+#include <vnet/ethernet/arp_packet.h> /* for ethernet_arp_header_t */
+#include <vnet/ppp/ppp.h>
+#include <vnet/srp/srp.h> /* for srp_hw_interface_class */
+#include <vnet/api_errno.h> /* for API error numbers */
+
+/* This is really, really simple but stupid fib. */
+u32
+ip4_fib_lookup_with_table (ip4_main_t * im, u32 fib_index,
+ ip4_address_t * dst,
+ u32 disable_default_route)
+{
+ ip_lookup_main_t * lm = &im->lookup_main;
+ ip4_fib_t * fib = vec_elt_at_index (im->fibs, fib_index);
+ uword * p, * hash, key;
+ i32 i, i_min, dst_address, ai;
+
+ i_min = disable_default_route ? 1 : 0;
+ dst_address = clib_mem_unaligned (&dst->data_u32, u32);
+ for (i = ARRAY_LEN (fib->adj_index_by_dst_address) - 1; i >= i_min; i--)
+ {
+ hash = fib->adj_index_by_dst_address[i];
+ if (! hash)
+ continue;
+
+ key = dst_address & im->fib_masks[i];
+ if ((p = hash_get (hash, key)) != 0)
+ {
+ ai = p[0];
+ goto done;
+ }
+ }
+
+ /* Nothing matches in table. */
+ ai = lm->miss_adj_index;
+
+ done:
+ return ai;
+}
+
+static ip4_fib_t *
+create_fib_with_table_id (ip4_main_t * im, u32 table_id)
+{
+ ip4_fib_t * fib;
+ hash_set (im->fib_index_by_table_id, table_id, vec_len (im->fibs));
+ vec_add2 (im->fibs, fib, 1);
+ fib->table_id = table_id;
+ fib->index = fib - im->fibs;
+ fib->flow_hash_config = IP_FLOW_HASH_DEFAULT;
+ fib->fwd_classify_table_index = ~0;
+ fib->rev_classify_table_index = ~0;
+ ip4_mtrie_init (&fib->mtrie);
+ return fib;
+}
+
+ip4_fib_t *
+find_ip4_fib_by_table_index_or_id (ip4_main_t * im,
+ u32 table_index_or_id, u32 flags)
+{
+ uword * p, fib_index;
+
+ fib_index = table_index_or_id;
+ if (! (flags & IP4_ROUTE_FLAG_FIB_INDEX))
+ {
+ p = hash_get (im->fib_index_by_table_id, table_index_or_id);
+ if (! p)
+ return create_fib_with_table_id (im, table_index_or_id);
+ fib_index = p[0];
+ }
+ return vec_elt_at_index (im->fibs, fib_index);
+}
+
+static void
+ip4_fib_init_adj_index_by_dst_address (ip_lookup_main_t * lm,
+ ip4_fib_t * fib,
+ u32 address_length)
+{
+ hash_t * h;
+ uword max_index;
+
+ ASSERT (lm->fib_result_n_bytes >= sizeof (uword));
+ lm->fib_result_n_words = round_pow2 (lm->fib_result_n_bytes, sizeof (uword)) / sizeof (uword);
+
+ fib->adj_index_by_dst_address[address_length] =
+ hash_create (32 /* elts */, lm->fib_result_n_words * sizeof (uword));
+
+ hash_set_flags (fib->adj_index_by_dst_address[address_length],
+ HASH_FLAG_NO_AUTO_SHRINK);
+
+ h = hash_header (fib->adj_index_by_dst_address[address_length]);
+ max_index = (hash_value_bytes (h) / sizeof (fib->new_hash_values[0])) - 1;
+
+ /* Initialize new/old hash value vectors. */
+ vec_validate_init_empty (fib->new_hash_values, max_index, ~0);
+ vec_validate_init_empty (fib->old_hash_values, max_index, ~0);
+}
+
+static void serialize_ip4_address (serialize_main_t * m, va_list * va)
+{
+ ip4_address_t * a = va_arg (*va, ip4_address_t *);
+ u8 * p = serialize_get (m, sizeof (a->as_u8));
+ memcpy (p, a->as_u8, sizeof (a->as_u8));
+}
+
+static void unserialize_ip4_address (serialize_main_t * m, va_list * va)
+{
+ ip4_address_t * a = va_arg (*va, ip4_address_t *);
+ u8 * p = unserialize_get (m, sizeof (a->as_u8));
+ memcpy (a->as_u8, p, sizeof (a->as_u8));
+}
+
+static void serialize_ip4_address_and_length (serialize_main_t * m, va_list * va)
+{
+ ip4_address_t * a = va_arg (*va, ip4_address_t *);
+ u32 l = va_arg (*va, u32);
+ u32 n_bytes = (l / 8) + ((l % 8) != 0);
+ u8 * p = serialize_get (m, 1 + n_bytes);
+ ASSERT (l <= 32);
+ p[0] = l;
+ memcpy (p + 1, a->as_u8, n_bytes);
+}
+
+static void unserialize_ip4_address_and_length (serialize_main_t * m, va_list * va)
+{
+ ip4_address_t * a = va_arg (*va, ip4_address_t *);
+ u32 * al = va_arg (*va, u32 *);
+ u8 * p = unserialize_get (m, 1);
+ u32 l, n_bytes;
+
+ al[0] = l = p[0];
+ ASSERT (l <= 32);
+ n_bytes = (l / 8) + ((l % 8) != 0);
+
+ if (n_bytes)
+ {
+ p = unserialize_get (m, n_bytes);
+ memcpy (a->as_u8, p, n_bytes);
+ }
+}
+
+static void serialize_ip4_add_del_route_msg (serialize_main_t * m, va_list * va)
+{
+ ip4_add_del_route_args_t * a = va_arg (*va, ip4_add_del_route_args_t *);
+
+ serialize_likely_small_unsigned_integer (m, a->table_index_or_table_id);
+ serialize_likely_small_unsigned_integer (m, a->flags);
+ serialize (m, serialize_ip4_address_and_length, &a->dst_address, a->dst_address_length);
+ serialize_likely_small_unsigned_integer (m, a->adj_index);
+ serialize_likely_small_unsigned_integer (m, a->n_add_adj);
+ if (a->n_add_adj > 0)
+ serialize (m, serialize_vec_ip_adjacency, a->add_adj, a->n_add_adj);
+}
+
+/* Serialized adjacencies for arp/rewrite do not send graph next_index
+ since graph hookup is not guaranteed to be the same for both sides
+ of serialize/unserialize. */
+static void
+unserialize_fixup_ip4_rewrite_adjacencies (vlib_main_t * vm,
+ ip_adjacency_t * adj,
+ u32 n_adj)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ u32 i, ni, sw_if_index, is_arp;
+ vnet_hw_interface_t * hw;
+
+ for (i = 0; i < n_adj; i++)
+ {
+ switch (adj[i].lookup_next_index)
+ {
+ case IP_LOOKUP_NEXT_REWRITE:
+ case IP_LOOKUP_NEXT_ARP:
+ is_arp = adj[i].lookup_next_index == IP_LOOKUP_NEXT_ARP;
+ sw_if_index = adj[i].rewrite_header.sw_if_index;
+ hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
+ ni = is_arp ? ip4_arp_node.index : ip4_rewrite_node.index;
+ adj[i].rewrite_header.node_index = ni;
+ adj[i].rewrite_header.next_index = vlib_node_add_next (vm, ni, hw->output_node_index);
+ if (is_arp)
+ vnet_rewrite_for_sw_interface
+ (vnm,
+ VNET_L3_PACKET_TYPE_ARP,
+ sw_if_index,
+ ni,
+ VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST,
+ &adj[i].rewrite_header,
+ sizeof (adj->rewrite_data));
+ break;
+
+ default:
+ break;
+ }
+ }
+}
+
+static void unserialize_ip4_add_del_route_msg (serialize_main_t * m, va_list * va)
+{
+ ip4_main_t * i4m = &ip4_main;
+ ip4_add_del_route_args_t a;
+
+ a.table_index_or_table_id = unserialize_likely_small_unsigned_integer (m);
+ a.flags = unserialize_likely_small_unsigned_integer (m);
+ unserialize (m, unserialize_ip4_address_and_length, &a.dst_address, &a.dst_address_length);
+ a.adj_index = unserialize_likely_small_unsigned_integer (m);
+ a.n_add_adj = unserialize_likely_small_unsigned_integer (m);
+ a.add_adj = 0;
+ if (a.n_add_adj > 0)
+ {
+ vec_resize (a.add_adj, a.n_add_adj);
+ unserialize (m, unserialize_vec_ip_adjacency, a.add_adj, a.n_add_adj);
+ unserialize_fixup_ip4_rewrite_adjacencies (vlib_get_main(),
+ a.add_adj, a.n_add_adj);
+ }
+
+ /* Prevent re-re-distribution. */
+ a.flags |= IP4_ROUTE_FLAG_NO_REDISTRIBUTE;
+
+ ip4_add_del_route (i4m, &a);
+
+ vec_free (a.add_adj);
+}
+
+MC_SERIALIZE_MSG (ip4_add_del_route_msg, static) = {
+ .name = "vnet_ip4_add_del_route",
+ .serialize = serialize_ip4_add_del_route_msg,
+ .unserialize = unserialize_ip4_add_del_route_msg,
+};
+
+static void
+ip4_fib_set_adj_index (ip4_main_t * im,
+ ip4_fib_t * fib,
+ u32 flags,
+ u32 dst_address_u32,
+ u32 dst_address_length,
+ u32 adj_index)
+{
+ ip_lookup_main_t * lm = &im->lookup_main;
+ uword * hash;
+
+ if (vec_bytes(fib->old_hash_values))
+ memset (fib->old_hash_values, ~0, vec_bytes (fib->old_hash_values));
+ if (vec_bytes(fib->new_hash_values))
+ memset (fib->new_hash_values, ~0, vec_bytes (fib->new_hash_values));
+ fib->new_hash_values[0] = adj_index;
+
+ /* Make sure adj index is valid. */
+ if (CLIB_DEBUG > 0)
+ (void) ip_get_adjacency (lm, adj_index);
+
+ hash = fib->adj_index_by_dst_address[dst_address_length];
+
+ hash = _hash_set3 (hash, dst_address_u32,
+ fib->new_hash_values,
+ fib->old_hash_values);
+
+ fib->adj_index_by_dst_address[dst_address_length] = hash;
+
+ if (vec_len (im->add_del_route_callbacks) > 0)
+ {
+ ip4_add_del_route_callback_t * cb;
+ ip4_address_t d;
+ uword * p;
+
+ d.data_u32 = dst_address_u32;
+ vec_foreach (cb, im->add_del_route_callbacks)
+ if ((flags & cb->required_flags) == cb->required_flags)
+ cb->function (im, cb->function_opaque,
+ fib, flags,
+ &d, dst_address_length,
+ fib->old_hash_values,
+ fib->new_hash_values);
+
+ p = hash_get (hash, dst_address_u32);
+ memcpy (p, fib->new_hash_values, vec_bytes (fib->new_hash_values));
+ }
+}
+
+void ip4_add_del_route (ip4_main_t * im, ip4_add_del_route_args_t * a)
+{
+ vlib_main_t * vm = vlib_get_main();
+ ip_lookup_main_t * lm = &im->lookup_main;
+ ip4_fib_t * fib;
+ u32 dst_address, dst_address_length, adj_index, old_adj_index;
+ uword * hash, is_del;
+ ip4_add_del_route_callback_t * cb;
+
+ if (vm->mc_main && ! (a->flags & IP4_ROUTE_FLAG_NO_REDISTRIBUTE))
+ {
+ u32 multiple_messages_per_vlib_buffer = (a->flags & IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP);
+ mc_serialize2 (vm->mc_main, multiple_messages_per_vlib_buffer,
+ &ip4_add_del_route_msg, a);
+ return;
+ }
+
+ /* Either create new adjacency or use given one depending on arguments. */
+ if (a->n_add_adj > 0)
+ {
+ ip_add_adjacency (lm, a->add_adj, a->n_add_adj, &adj_index);
+ ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 0);
+ }
+ else
+ adj_index = a->adj_index;
+
+ dst_address = a->dst_address.data_u32;
+ dst_address_length = a->dst_address_length;
+ fib = find_ip4_fib_by_table_index_or_id (im, a->table_index_or_table_id, a->flags);
+
+ ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
+ dst_address &= im->fib_masks[dst_address_length];
+
+ if (! fib->adj_index_by_dst_address[dst_address_length])
+ ip4_fib_init_adj_index_by_dst_address (lm, fib, dst_address_length);
+
+ hash = fib->adj_index_by_dst_address[dst_address_length];
+
+ is_del = (a->flags & IP4_ROUTE_FLAG_DEL) != 0;
+
+ if (is_del)
+ {
+ fib->old_hash_values[0] = ~0;
+ hash = _hash_unset (hash, dst_address, fib->old_hash_values);
+ fib->adj_index_by_dst_address[dst_address_length] = hash;
+
+ if (vec_len (im->add_del_route_callbacks) > 0
+ && fib->old_hash_values[0] != ~0) /* make sure destination was found in hash */
+ {
+ fib->new_hash_values[0] = ~0;
+ vec_foreach (cb, im->add_del_route_callbacks)
+ if ((a->flags & cb->required_flags) == cb->required_flags)
+ cb->function (im, cb->function_opaque,
+ fib, a->flags,
+ &a->dst_address, dst_address_length,
+ fib->old_hash_values,
+ fib->new_hash_values);
+ }
+ }
+ else
+ ip4_fib_set_adj_index (im, fib, a->flags, dst_address, dst_address_length,
+ adj_index);
+
+ old_adj_index = fib->old_hash_values[0];
+
+ ip4_fib_mtrie_add_del_route (fib, a->dst_address, dst_address_length,
+ is_del ? old_adj_index : adj_index,
+ is_del);
+
+ /* Delete old adjacency index if present and changed. */
+ if (! (a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY)
+ && old_adj_index != ~0
+ && old_adj_index != adj_index)
+ ip_del_adjacency (lm, old_adj_index);
+}
+
+static void serialize_ip4_add_del_route_next_hop_msg (serialize_main_t * m, va_list * va)
+{
+ u32 flags = va_arg (*va, u32);
+ ip4_address_t * dst_address = va_arg (*va, ip4_address_t *);
+ u32 dst_address_length = va_arg (*va, u32);
+ ip4_address_t * next_hop_address = va_arg (*va, ip4_address_t *);
+ u32 next_hop_sw_if_index = va_arg (*va, u32);
+ u32 next_hop_weight = va_arg (*va, u32);
+
+ serialize_likely_small_unsigned_integer (m, flags);
+ serialize (m, serialize_ip4_address_and_length, dst_address, dst_address_length);
+ serialize (m, serialize_ip4_address, next_hop_address);
+ serialize_likely_small_unsigned_integer (m, next_hop_sw_if_index);
+ serialize_likely_small_unsigned_integer (m, next_hop_weight);
+}
+
+static void unserialize_ip4_add_del_route_next_hop_msg (serialize_main_t * m, va_list * va)
+{
+ ip4_main_t * im = &ip4_main;
+ u32 flags, dst_address_length, next_hop_sw_if_index, next_hop_weight;
+ ip4_address_t dst_address, next_hop_address;
+
+ flags = unserialize_likely_small_unsigned_integer (m);
+ unserialize (m, unserialize_ip4_address_and_length, &dst_address, &dst_address_length);
+ unserialize (m, unserialize_ip4_address, &next_hop_address);
+ next_hop_sw_if_index = unserialize_likely_small_unsigned_integer (m);
+ next_hop_weight = unserialize_likely_small_unsigned_integer (m);
+
+ ip4_add_del_route_next_hop
+ (im,
+ flags | IP4_ROUTE_FLAG_NO_REDISTRIBUTE,
+ &dst_address,
+ dst_address_length,
+ &next_hop_address,
+ next_hop_sw_if_index,
+ next_hop_weight, (u32)~0,
+ (u32)~0 /* explicit FIB index */);
+}
+
+MC_SERIALIZE_MSG (ip4_add_del_route_next_hop_msg, static) = {
+ .name = "vnet_ip4_add_del_route_next_hop",
+ .serialize = serialize_ip4_add_del_route_next_hop_msg,
+ .unserialize = unserialize_ip4_add_del_route_next_hop_msg,
+};
+
+void
+ip4_add_del_route_next_hop (ip4_main_t * im,
+ u32 flags,
+ ip4_address_t * dst_address,
+ u32 dst_address_length,
+ ip4_address_t * next_hop,
+ u32 next_hop_sw_if_index,
+ u32 next_hop_weight, u32 adj_index,
+ u32 explicit_fib_index)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ vlib_main_t * vm = vlib_get_main();
+ ip_lookup_main_t * lm = &im->lookup_main;
+ u32 fib_index;
+ ip4_fib_t * fib;
+ u32 dst_address_u32, old_mp_adj_index, new_mp_adj_index;
+ u32 dst_adj_index, nh_adj_index;
+ uword * dst_hash, * dst_result;
+ uword * nh_hash, * nh_result;
+ ip_adjacency_t * dst_adj;
+ ip_multipath_adjacency_t * old_mp, * new_mp;
+ int is_del = (flags & IP4_ROUTE_FLAG_DEL) != 0;
+ int is_interface_next_hop;
+ clib_error_t * error = 0;
+
+ if (vm->mc_main && ! (flags & IP4_ROUTE_FLAG_NO_REDISTRIBUTE))
+ {
+ u32 multiple_messages_per_vlib_buffer = (flags & IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP);
+ mc_serialize2 (vm->mc_main,
+ multiple_messages_per_vlib_buffer,
+ &ip4_add_del_route_next_hop_msg,
+ flags,
+ dst_address, dst_address_length,
+ next_hop, next_hop_sw_if_index, next_hop_weight);
+ return;
+ }
+
+ if (explicit_fib_index == (u32)~0)
+ fib_index = vec_elt (im->fib_index_by_sw_if_index, next_hop_sw_if_index);
+ else
+ fib_index = explicit_fib_index;
+
+ fib = vec_elt_at_index (im->fibs, fib_index);
+
+ /* Lookup next hop to be added or deleted. */
+ is_interface_next_hop = next_hop->data_u32 == 0;
+ if (adj_index == (u32)~0)
+ {
+ if (is_interface_next_hop)
+ {
+ nh_result = hash_get (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index);
+ if (nh_result)
+ nh_adj_index = *nh_result;
+ else
+ {
+ ip_adjacency_t * adj;
+ adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
+ &nh_adj_index);
+ ip4_adjacency_set_interface_route (vnm, adj, next_hop_sw_if_index, /* if_address_index */ ~0);
+ ip_call_add_del_adjacency_callbacks (lm, nh_adj_index, /* is_del */ 0);
+ hash_set (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index, nh_adj_index);
+ }
+ }
+ else
+ {
+ nh_hash = fib->adj_index_by_dst_address[32];
+ nh_result = hash_get (nh_hash, next_hop->data_u32);
+
+ /* Next hop must be known. */
+ if (! nh_result)
+ {
+ vnm->api_errno = VNET_API_ERROR_NEXT_HOP_NOT_IN_FIB;
+ error = clib_error_return (0, "next-hop %U/32 not in FIB",
+ format_ip4_address, next_hop);
+ goto done;
+ }
+ nh_adj_index = *nh_result;
+ }
+ }
+ else
+ {
+ nh_adj_index = adj_index;
+ }
+ ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
+ dst_address_u32 = dst_address->data_u32 & im->fib_masks[dst_address_length];
+
+ dst_hash = fib->adj_index_by_dst_address[dst_address_length];
+ dst_result = hash_get (dst_hash, dst_address_u32);
+ if (dst_result)
+ {
+ dst_adj_index = dst_result[0];
+ dst_adj = ip_get_adjacency (lm, dst_adj_index);
+ }
+ else
+ {
+ /* For deletes destination must be known. */
+ if (is_del)
+ {
+ vnm->api_errno = VNET_API_ERROR_UNKNOWN_DESTINATION;
+ error = clib_error_return (0, "unknown destination %U/%d",
+ format_ip4_address, dst_address,
+ dst_address_length);
+ goto done;
+ }
+
+ dst_adj_index = ~0;
+ dst_adj = 0;
+ }
+
+ /* Ignore adds of X/32 with next hop of X. */
+ if (! is_del
+ && dst_address_length == 32
+ && dst_address->data_u32 == next_hop->data_u32
+ && adj_index != (u32)~0)
+ {
+ vnm->api_errno = VNET_API_ERROR_PREFIX_MATCHES_NEXT_HOP;
+ error = clib_error_return (0, "prefix matches next hop %U/%d",
+ format_ip4_address, dst_address,
+ dst_address_length);
+ goto done;
+ }
+
+ old_mp_adj_index = dst_adj ? dst_adj->heap_handle : ~0;
+
+ if (! ip_multipath_adjacency_add_del_next_hop
+ (lm, is_del,
+ old_mp_adj_index,
+ nh_adj_index,
+ next_hop_weight,
+ &new_mp_adj_index))
+ {
+ vnm->api_errno = VNET_API_ERROR_NEXT_HOP_NOT_FOUND_MP;
+ error = clib_error_return (0, "requested deleting next-hop %U not found in multi-path",
+ format_ip4_address, next_hop);
+ goto done;
+ }
+
+ old_mp = new_mp = 0;
+ if (old_mp_adj_index != ~0)
+ old_mp = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index);
+ if (new_mp_adj_index != ~0)
+ new_mp = vec_elt_at_index (lm->multipath_adjacencies, new_mp_adj_index);
+
+ if (old_mp != new_mp)
+ {
+ ip4_add_del_route_args_t a;
+ a.table_index_or_table_id = fib_index;
+ a.flags = ((is_del && ! new_mp ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
+ | IP4_ROUTE_FLAG_FIB_INDEX
+ | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
+ | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
+ a.dst_address = dst_address[0];
+ a.dst_address_length = dst_address_length;
+ a.adj_index = new_mp ? new_mp->adj_index : dst_adj_index;
+ a.add_adj = 0;
+ a.n_add_adj = 0;
+
+ ip4_add_del_route (im, &a);
+ }
+
+ done:
+ if (error)
+ clib_error_report (error);
+}
+
+void *
+ip4_get_route (ip4_main_t * im,
+ u32 table_index_or_table_id,
+ u32 flags,
+ u8 * address,
+ u32 address_length)
+{
+ ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
+ u32 dst_address = * (u32 *) address;
+ uword * hash, * p;
+
+ ASSERT (address_length < ARRAY_LEN (im->fib_masks));
+ dst_address &= im->fib_masks[address_length];
+
+ hash = fib->adj_index_by_dst_address[address_length];
+ p = hash_get (hash, dst_address);
+ return (void *) p;
+}
+
+void
+ip4_foreach_matching_route (ip4_main_t * im,
+ u32 table_index_or_table_id,
+ u32 flags,
+ ip4_address_t * address,
+ u32 address_length,
+ ip4_address_t ** results,
+ u8 ** result_lengths)
+{
+ ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
+ u32 dst_address = address->data_u32;
+ u32 this_length = address_length;
+
+ if (*results)
+ _vec_len (*results) = 0;
+ if (*result_lengths)
+ _vec_len (*result_lengths) = 0;
+
+ while (this_length <= 32 && vec_len (results) == 0)
+ {
+ uword k, v;
+ hash_foreach (k, v, fib->adj_index_by_dst_address[this_length], ({
+ if (0 == ((k ^ dst_address) & im->fib_masks[address_length]))
+ {
+ ip4_address_t a;
+ a.data_u32 = k;
+ vec_add1 (*results, a);
+ vec_add1 (*result_lengths, this_length);
+ }
+ }));
+
+ this_length++;
+ }
+}
+
+void ip4_maybe_remap_adjacencies (ip4_main_t * im,
+ u32 table_index_or_table_id,
+ u32 flags)
+{
+ ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
+ ip_lookup_main_t * lm = &im->lookup_main;
+ u32 i, l;
+ ip4_address_t a;
+ ip4_add_del_route_callback_t * cb;
+ static ip4_address_t * to_delete;
+
+ if (lm->n_adjacency_remaps == 0)
+ return;
+
+ for (l = 0; l <= 32; l++)
+ {
+ hash_pair_t * p;
+ uword * hash = fib->adj_index_by_dst_address[l];
+
+ if (hash_elts (hash) == 0)
+ continue;
+
+ if (to_delete)
+ _vec_len (to_delete) = 0;
+
+ hash_foreach_pair (p, hash, ({
+ u32 adj_index = p->value[0];
+ u32 m = vec_elt (lm->adjacency_remap_table, adj_index);
+
+ if (m)
+ {
+ /* Record destination address from hash key. */
+ a.data_u32 = p->key;
+
+ /* New adjacency points to nothing: so delete prefix. */
+ if (m == ~0)
+ vec_add1 (to_delete, a);
+ else
+ {
+ /* Remap to new adjacency. */
+ memcpy (fib->old_hash_values, p->value, vec_bytes (fib->old_hash_values));
+
+ /* Set new adjacency value. */
+ fib->new_hash_values[0] = p->value[0] = m - 1;
+
+ vec_foreach (cb, im->add_del_route_callbacks)
+ if ((flags & cb->required_flags) == cb->required_flags)
+ cb->function (im, cb->function_opaque,
+ fib, flags | IP4_ROUTE_FLAG_ADD,
+ &a, l,
+ fib->old_hash_values,
+ fib->new_hash_values);
+ }
+ }
+ }));
+
+ fib->new_hash_values[0] = ~0;
+ for (i = 0; i < vec_len (to_delete); i++)
+ {
+ hash = _hash_unset (hash, to_delete[i].data_u32, fib->old_hash_values);
+ vec_foreach (cb, im->add_del_route_callbacks)
+ if ((flags & cb->required_flags) == cb->required_flags)
+ cb->function (im, cb->function_opaque,
+ fib, flags | IP4_ROUTE_FLAG_DEL,
+ &a, l,
+ fib->old_hash_values,
+ fib->new_hash_values);
+ }
+ }
+
+ /* Also remap adjacencies in mtrie. */
+ ip4_mtrie_maybe_remap_adjacencies (lm, &fib->mtrie);
+
+ /* Reset mapping table. */
+ vec_zero (lm->adjacency_remap_table);
+
+ /* All remaps have been performed. */
+ lm->n_adjacency_remaps = 0;
+}
+
+void ip4_delete_matching_routes (ip4_main_t * im,
+ u32 table_index_or_table_id,
+ u32 flags,
+ ip4_address_t * address,
+ u32 address_length)
+{
+ static ip4_address_t * matching_addresses;
+ static u8 * matching_address_lengths;
+ u32 l, i;
+ ip4_add_del_route_args_t a;
+
+ a.flags = IP4_ROUTE_FLAG_DEL | IP4_ROUTE_FLAG_NO_REDISTRIBUTE | flags;
+ a.table_index_or_table_id = table_index_or_table_id;
+ a.adj_index = ~0;
+ a.add_adj = 0;
+ a.n_add_adj = 0;
+
+ for (l = address_length + 1; l <= 32; l++)
+ {
+ ip4_foreach_matching_route (im, table_index_or_table_id, flags,
+ address,
+ l,
+ &matching_addresses,
+ &matching_address_lengths);
+ for (i = 0; i < vec_len (matching_addresses); i++)
+ {
+ a.dst_address = matching_addresses[i];
+ a.dst_address_length = matching_address_lengths[i];
+ ip4_add_del_route (im, &a);
+ }
+ }
+
+ ip4_maybe_remap_adjacencies (im, table_index_or_table_id, flags);
+}
+
+always_inline uword
+ip4_lookup_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ int lookup_for_responses_to_locally_received_packets)
+{
+ ip4_main_t * im = &ip4_main;
+ ip_lookup_main_t * lm = &im->lookup_main;
+ vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
+ u32 n_left_from, n_left_to_next, * from, * to_next;
+ ip_lookup_next_t next;
+ u32 cpu_index = os_get_cpu_number();
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next,
+ to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ vlib_buffer_t * p0, * p1;
+ ip4_header_t * ip0, * ip1;
+ __attribute__((unused)) tcp_header_t * tcp0, * tcp1;
+ ip_lookup_next_t next0, next1;
+ ip_adjacency_t * adj0, * adj1;
+ ip4_fib_mtrie_t * mtrie0, * mtrie1;
+ ip4_fib_mtrie_leaf_t leaf0, leaf1;
+ __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
+ __attribute__((unused)) u32 pi1, fib_index1, adj_index1, is_tcp_udp1;
+ u32 flow_hash_config0, flow_hash_config1;
+ u32 hash_c0, hash_c1;
+ u32 wrong_next;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
+ CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
+ }
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+
+ fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
+ fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
+ fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
+ fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
+ fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
+ fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
+
+
+ if (! lookup_for_responses_to_locally_received_packets)
+ {
+ mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
+ mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
+
+ leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
+
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 0);
+ leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->dst_address, 0);
+ }
+
+ tcp0 = (void *) (ip0 + 1);
+ tcp1 = (void *) (ip1 + 1);
+
+ is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
+ || ip0->protocol == IP_PROTOCOL_UDP);
+ is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
+ || ip1->protocol == IP_PROTOCOL_UDP);
+
+ if (! lookup_for_responses_to_locally_received_packets)
+ {
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 1);
+ leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->dst_address, 1);
+ }
+
+ if (! lookup_for_responses_to_locally_received_packets)
+ {
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 2);
+ leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->dst_address, 2);
+ }
+
+ if (! lookup_for_responses_to_locally_received_packets)
+ {
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 3);
+ leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->dst_address, 3);
+ }
+
+ if (lookup_for_responses_to_locally_received_packets)
+ {
+ adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
+ adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
+ }
+ else
+ {
+ /* Handle default route. */
+ leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
+ leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
+
+ adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
+ adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
+ }
+
+ ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
+ &ip0->dst_address,
+ /* no_default_route */ 0));
+ ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
+ &ip1->dst_address,
+ /* no_default_route */ 0));
+ adj0 = ip_get_adjacency (lm, adj_index0);
+ adj1 = ip_get_adjacency (lm, adj_index1);
+
+ next0 = adj0->lookup_next_index;
+ next1 = adj1->lookup_next_index;
+
+ /* Use flow hash to compute multipath adjacency. */
+ hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
+ hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
+ if (PREDICT_FALSE (adj0->n_adj > 1))
+ {
+ flow_hash_config0 =
+ vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
+ hash_c0 = vnet_buffer (p0)->ip.flow_hash =
+ ip4_compute_flow_hash (ip0, flow_hash_config0);
+ }
+ if (PREDICT_FALSE(adj1->n_adj > 1))
+ {
+ flow_hash_config1 =
+ vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
+ hash_c1 = vnet_buffer (p1)->ip.flow_hash =
+ ip4_compute_flow_hash (ip1, flow_hash_config1);
+ }
+
+ ASSERT (adj0->n_adj > 0);
+ ASSERT (adj1->n_adj > 0);
+ ASSERT (is_pow2 (adj0->n_adj));
+ ASSERT (is_pow2 (adj1->n_adj));
+ adj_index0 += (hash_c0 & (adj0->n_adj - 1));
+ adj_index1 += (hash_c1 & (adj1->n_adj - 1));
+
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
+ vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
+
+ vlib_increment_combined_counter
+ (cm, cpu_index, adj_index0, 1,
+ vlib_buffer_length_in_chain (vm, p0)
+ + sizeof(ethernet_header_t));
+ vlib_increment_combined_counter
+ (cm, cpu_index, adj_index1, 1,
+ vlib_buffer_length_in_chain (vm, p1)
+ + sizeof(ethernet_header_t));
+
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
+
+ wrong_next = (next0 != next) + 2*(next1 != next);
+ if (PREDICT_FALSE (wrong_next != 0))
+ {
+ switch (wrong_next)
+ {
+ case 1:
+ /* A B A */
+ to_next[-2] = pi1;
+ to_next -= 1;
+ n_left_to_next += 1;
+ vlib_set_next_frame_buffer (vm, node, next0, pi0);
+ break;
+
+ case 2:
+ /* A A B */
+ to_next -= 1;
+ n_left_to_next += 1;
+ vlib_set_next_frame_buffer (vm, node, next1, pi1);
+ break;
+
+ case 3:
+ /* A B C */
+ to_next -= 2;
+ n_left_to_next += 2;
+ vlib_set_next_frame_buffer (vm, node, next0, pi0);
+ vlib_set_next_frame_buffer (vm, node, next1, pi1);
+ if (next0 == next1)
+ {
+ /* A B B */
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ next = next1;
+ vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
+ }
+ }
+ }
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t * p0;
+ ip4_header_t * ip0;
+ __attribute__((unused)) tcp_header_t * tcp0;
+ ip_lookup_next_t next0;
+ ip_adjacency_t * adj0;
+ ip4_fib_mtrie_t * mtrie0;
+ ip4_fib_mtrie_leaf_t leaf0;
+ __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
+ u32 flow_hash_config0, hash_c0;
+
+ pi0 = from[0];
+ to_next[0] = pi0;
+
+ p0 = vlib_get_buffer (vm, pi0);
+
+ ip0 = vlib_buffer_get_current (p0);
+
+ fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
+ fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
+ fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
+
+ if (! lookup_for_responses_to_locally_received_packets)
+ {
+ mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
+
+ leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
+
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 0);
+ }
+
+ tcp0 = (void *) (ip0 + 1);
+
+ is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
+ || ip0->protocol == IP_PROTOCOL_UDP);
+
+ if (! lookup_for_responses_to_locally_received_packets)
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 1);
+
+ if (! lookup_for_responses_to_locally_received_packets)
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 2);
+
+ if (! lookup_for_responses_to_locally_received_packets)
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->dst_address, 3);
+
+ if (lookup_for_responses_to_locally_received_packets)
+ adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
+ else
+ {
+ /* Handle default route. */
+ leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
+ adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
+ }
+
+ ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
+ &ip0->dst_address,
+ /* no_default_route */ 0));
+
+ adj0 = ip_get_adjacency (lm, adj_index0);
+
+ next0 = adj0->lookup_next_index;
+
+ /* Use flow hash to compute multipath adjacency. */
+ hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
+ if (PREDICT_FALSE(adj0->n_adj > 1))
+ {
+ flow_hash_config0 =
+ vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
+
+ hash_c0 = vnet_buffer (p0)->ip.flow_hash =
+ ip4_compute_flow_hash (ip0, flow_hash_config0);
+ }
+
+ ASSERT (adj0->n_adj > 0);
+ ASSERT (is_pow2 (adj0->n_adj));
+ adj_index0 += (hash_c0 & (adj0->n_adj - 1));
+
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
+
+ vlib_increment_combined_counter
+ (cm, cpu_index, adj_index0, 1,
+ vlib_buffer_length_in_chain (vm, p0)
+ + sizeof(ethernet_header_t));
+
+ from += 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+ n_left_from -= 1;
+
+ if (PREDICT_FALSE (next0 != next))
+ {
+ n_left_to_next += 1;
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ next = next0;
+ vlib_get_next_frame (vm, node, next,
+ to_next, n_left_to_next);
+ to_next[0] = pi0;
+ to_next += 1;
+ n_left_to_next -= 1;
+ }
+ }
+
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+static uword
+ip4_lookup (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return ip4_lookup_inline (vm, node, frame, /* lookup_for_responses_to_locally_received_packets */ 0);
+
+}
+
+void ip4_adjacency_set_interface_route (vnet_main_t * vnm,
+ ip_adjacency_t * adj,
+ u32 sw_if_index,
+ u32 if_address_index)
+{
+ vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
+ ip_lookup_next_t n;
+ vnet_l3_packet_type_t packet_type;
+ u32 node_index;
+
+ if (hw->hw_class_index == ethernet_hw_interface_class.index
+ || hw->hw_class_index == srp_hw_interface_class.index)
+ {
+ /*
+ * We have a bit of a problem in this case. ip4-arp uses
+ * the rewrite_header.next_index to hand pkts to the
+ * indicated inteface output node. We can end up in
+ * ip4_rewrite_local, too, which also pays attention to
+ * rewrite_header.next index. Net result: a hack in
+ * ip4_rewrite_local...
+ */
+ n = IP_LOOKUP_NEXT_ARP;
+ node_index = ip4_arp_node.index;
+ adj->if_address_index = if_address_index;
+ packet_type = VNET_L3_PACKET_TYPE_ARP;
+ }
+ else
+ {
+ n = IP_LOOKUP_NEXT_REWRITE;
+ node_index = ip4_rewrite_node.index;
+ packet_type = VNET_L3_PACKET_TYPE_IP4;
+ }
+
+ adj->lookup_next_index = n;
+ vnet_rewrite_for_sw_interface
+ (vnm,
+ packet_type,
+ sw_if_index,
+ node_index,
+ VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST,
+ &adj->rewrite_header,
+ sizeof (adj->rewrite_data));
+}
+
+static void
+ip4_add_interface_routes (u32 sw_if_index,
+ ip4_main_t * im, u32 fib_index,
+ ip_interface_address_t * a)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ ip_lookup_main_t * lm = &im->lookup_main;
+ ip_adjacency_t * adj;
+ ip4_address_t * address = ip_interface_address_get_address (lm, a);
+ ip4_add_del_route_args_t x;
+ vnet_hw_interface_t * hw_if = vnet_get_sup_hw_interface (vnm, sw_if_index);
+ u32 classify_table_index;
+
+ /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
+ x.table_index_or_table_id = fib_index;
+ x.flags = (IP4_ROUTE_FLAG_ADD
+ | IP4_ROUTE_FLAG_FIB_INDEX
+ | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
+ x.dst_address = address[0];
+ x.dst_address_length = a->address_length;
+ x.n_add_adj = 0;
+ x.add_adj = 0;
+
+ a->neighbor_probe_adj_index = ~0;
+ if (a->address_length < 32)
+ {
+ adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
+ &x.adj_index);
+ ip4_adjacency_set_interface_route (vnm, adj, sw_if_index, a - lm->if_address_pool);
+ ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
+ ip4_add_del_route (im, &x);
+ a->neighbor_probe_adj_index = x.adj_index;
+ }
+
+ /* Add e.g. 1.1.1.1/32 as local to this host. */
+ adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
+ &x.adj_index);
+
+ classify_table_index = ~0;
+ if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
+ classify_table_index = lm->classify_table_index_by_sw_if_index [sw_if_index];
+ if (classify_table_index != (u32) ~0)
+ {
+ adj->lookup_next_index = IP_LOOKUP_NEXT_CLASSIFY;
+ adj->classify_table_index = classify_table_index;
+ }
+ else
+ adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
+
+ adj->if_address_index = a - lm->if_address_pool;
+ adj->rewrite_header.sw_if_index = sw_if_index;
+ adj->rewrite_header.max_l3_packet_bytes = hw_if->max_l3_packet_bytes[VLIB_RX];
+ /*
+ * Local adjs are never to be rewritten. Spoofed pkts w/ src = dst = local
+ * fail an RPF-ish check, but still go thru the rewrite code...
+ */
+ adj->rewrite_header.data_bytes = 0;
+
+ ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
+ x.dst_address_length = 32;
+ ip4_add_del_route (im, &x);
+}
+
+static void
+ip4_del_interface_routes (ip4_main_t * im, u32 fib_index, ip4_address_t * address, u32 address_length)
+{
+ ip4_add_del_route_args_t x;
+
+ /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
+ x.table_index_or_table_id = fib_index;
+ x.flags = (IP4_ROUTE_FLAG_DEL
+ | IP4_ROUTE_FLAG_FIB_INDEX
+ | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
+ x.dst_address = address[0];
+ x.dst_address_length = address_length;
+ x.adj_index = ~0;
+ x.n_add_adj = 0;
+ x.add_adj = 0;
+
+ if (address_length < 32)
+ ip4_add_del_route (im, &x);
+
+ x.dst_address_length = 32;
+ ip4_add_del_route (im, &x);
+
+ ip4_delete_matching_routes (im,
+ fib_index,
+ IP4_ROUTE_FLAG_FIB_INDEX,
+ address,
+ address_length);
+}
+
+typedef struct {
+ u32 sw_if_index;
+ ip4_address_t address;
+ u32 length;
+} ip4_interface_address_t;
+
+static void serialize_vec_ip4_set_interface_address (serialize_main_t * m, va_list * va)
+{
+ ip4_interface_address_t * a = va_arg (*va, ip4_interface_address_t *);
+ u32 n = va_arg (*va, u32);
+ u32 i;
+ for (i = 0; i < n; i++) {
+ serialize_integer (m, a[i].sw_if_index, sizeof (a[i].sw_if_index));
+ serialize (m, serialize_ip4_address, &a[i].address);
+ serialize_integer (m, a[i].length, sizeof (a[i].length));
+ }
+}
+
+static void unserialize_vec_ip4_set_interface_address (serialize_main_t * m, va_list * va)
+{
+ ip4_interface_address_t * a = va_arg (*va, ip4_interface_address_t *);
+ u32 n = va_arg (*va, u32);
+ u32 i;
+ for (i = 0; i < n; i++) {
+ unserialize_integer (m, &a[i].sw_if_index, sizeof (a[i].sw_if_index));
+ unserialize (m, unserialize_ip4_address, &a[i].address);
+ unserialize_integer (m, &a[i].length, sizeof (a[i].length));
+ }
+}
+
+static void serialize_ip4_set_interface_address_msg (serialize_main_t * m, va_list * va)
+{
+ ip4_interface_address_t * a = va_arg (*va, ip4_interface_address_t *);
+ int is_del = va_arg (*va, int);
+ serialize (m, serialize_vec_ip4_set_interface_address, a, 1);
+ serialize_integer (m, is_del, sizeof (is_del));
+}
+
+static clib_error_t *
+ip4_add_del_interface_address_internal (vlib_main_t * vm,
+ u32 sw_if_index,
+ ip4_address_t * new_address,
+ u32 new_length,
+ u32 redistribute,
+ u32 insert_routes,
+ u32 is_del);
+
+static void unserialize_ip4_set_interface_address_msg (serialize_main_t * m, va_list * va)
+{
+ mc_main_t * mcm = va_arg (*va, mc_main_t *);
+ vlib_main_t * vm = mcm->vlib_main;
+ ip4_interface_address_t a;
+ clib_error_t * error;
+ int is_del;
+
+ unserialize (m, unserialize_vec_ip4_set_interface_address, &a, 1);
+ unserialize_integer (m, &is_del, sizeof (is_del));
+ error = ip4_add_del_interface_address_internal
+ (vm, a.sw_if_index, &a.address, a.length,
+ /* redistribute */ 0,
+ /* insert_routes */ 1,
+ is_del);
+ if (error)
+ clib_error_report (error);
+}
+
+MC_SERIALIZE_MSG (ip4_set_interface_address_msg, static) = {
+ .name = "vnet_ip4_set_interface_address",
+ .serialize = serialize_ip4_set_interface_address_msg,
+ .unserialize = unserialize_ip4_set_interface_address_msg,
+};
+
+static clib_error_t *
+ip4_add_del_interface_address_internal (vlib_main_t * vm,
+ u32 sw_if_index,
+ ip4_address_t * address,
+ u32 address_length,
+ u32 redistribute,
+ u32 insert_routes,
+ u32 is_del)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ ip4_main_t * im = &ip4_main;
+ ip_lookup_main_t * lm = &im->lookup_main;
+ clib_error_t * error = 0;
+ u32 if_address_index, elts_before;
+ ip4_address_fib_t ip4_af, * addr_fib = 0;
+
+ vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
+ ip4_addr_fib_init (&ip4_af, address,
+ vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
+ vec_add1 (addr_fib, ip4_af);
+
+ /* When adding an address check that it does not conflict with an existing address. */
+ if (! is_del)
+ {
+ ip_interface_address_t * ia;
+ foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
+ 0 /* honor unnumbered */,
+ ({
+ ip4_address_t * x = ip_interface_address_get_address (&im->lookup_main, ia);
+
+ if (ip4_destination_matches_route (im, address, x, ia->address_length)
+ || ip4_destination_matches_route (im, x, address, address_length))
+ return clib_error_create ("failed to add %U which conflicts with %U for interface %U",
+ format_ip4_address_and_length, address, address_length,
+ format_ip4_address_and_length, x, ia->address_length,
+ format_vnet_sw_if_index_name, vnm, sw_if_index);
+ }));
+ }
+
+ if (vm->mc_main && redistribute)
+ {
+ ip4_interface_address_t a;
+ a.sw_if_index = sw_if_index;
+ a.address = address[0];
+ a.length = address_length;
+ mc_serialize (vm->mc_main, &ip4_set_interface_address_msg,
+ &a, (int)is_del);
+ goto done;
+ }
+
+ elts_before = pool_elts (lm->if_address_pool);
+
+ error = ip_interface_address_add_del
+ (lm,
+ sw_if_index,
+ addr_fib,
+ address_length,
+ is_del,
+ &if_address_index);
+ if (error)
+ goto done;
+
+ if (vnet_sw_interface_is_admin_up (vnm, sw_if_index) && insert_routes)
+ {
+ if (is_del)
+ ip4_del_interface_routes (im, ip4_af.fib_index, address,
+ address_length);
+
+ else
+ ip4_add_interface_routes (sw_if_index,
+ im, ip4_af.fib_index,
+ pool_elt_at_index
+ (lm->if_address_pool, if_address_index));
+ }
+
+ /* If pool did not grow/shrink: add duplicate address. */
+ if (elts_before != pool_elts (lm->if_address_pool))
+ {
+ ip4_add_del_interface_address_callback_t * cb;
+ vec_foreach (cb, im->add_del_interface_address_callbacks)
+ cb->function (im, cb->function_opaque, sw_if_index,
+ address, address_length,
+ if_address_index,
+ is_del);
+ }
+
+ done:
+ vec_free (addr_fib);
+ return error;
+}
+
+clib_error_t *
+ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
+ ip4_address_t * address, u32 address_length,
+ u32 is_del)
+{
+ return ip4_add_del_interface_address_internal
+ (vm, sw_if_index, address, address_length,
+ /* redistribute */ 1,
+ /* insert_routes */ 1,
+ is_del);
+}
+
+static void serialize_ip4_fib (serialize_main_t * m, va_list * va)
+{
+ ip4_fib_t * f = va_arg (*va, ip4_fib_t *);
+ u32 l, dst, adj_index;
+
+ serialize_integer (m, f->table_id, sizeof (f->table_id));
+ for (l = 0; l < ARRAY_LEN (f->adj_index_by_dst_address); l++)
+ {
+ u32 n_elts = hash_elts (f->adj_index_by_dst_address[l]);
+
+ serialize_integer (m, n_elts, sizeof (n_elts));
+ hash_foreach (dst, adj_index, f->adj_index_by_dst_address[l], ({
+ ip4_address_t tmp;
+ tmp.as_u32 = dst;
+ serialize (m, serialize_ip4_address, &tmp);
+ serialize_integer (m, adj_index, sizeof (adj_index));
+ }));
+ }
+}
+
+static void unserialize_ip4_fib (serialize_main_t * m, va_list * va)
+{
+ ip4_add_del_route_args_t a;
+ u32 i;
+
+ a.flags = (IP4_ROUTE_FLAG_ADD
+ | IP4_ROUTE_FLAG_NO_REDISTRIBUTE
+ | IP4_ROUTE_FLAG_TABLE_ID);
+ a.n_add_adj = 0;
+ a.add_adj = 0;
+
+ unserialize_integer (m, &a.table_index_or_table_id,
+ sizeof (a.table_index_or_table_id));
+
+ for (i = 0; i < STRUCT_ARRAY_LEN (ip4_fib_t, adj_index_by_dst_address); i++)
+ {
+ u32 n_elts;
+ unserialize_integer (m, &n_elts, sizeof (u32));
+ a.dst_address_length = i;
+ while (n_elts > 0)
+ {
+ unserialize (m, unserialize_ip4_address, &a.dst_address);
+ unserialize_integer (m, &a.adj_index, sizeof (a.adj_index));
+ ip4_add_del_route (&ip4_main, &a);
+ n_elts--;
+ }
+ }
+}
+
+void serialize_vnet_ip4_main (serialize_main_t * m, va_list * va)
+{
+ vnet_main_t * vnm = va_arg (*va, vnet_main_t *);
+ vnet_interface_main_t * vim = &vnm->interface_main;
+ vnet_sw_interface_t * si;
+ ip4_main_t * i4m = &ip4_main;
+ ip4_interface_address_t * as = 0, * a;
+
+ /* Download adjacency tables & multipath stuff. */
+ serialize (m, serialize_ip_lookup_main, &i4m->lookup_main);
+
+ /* FIBs. */
+ {
+ ip4_fib_t * f;
+ u32 n_fibs = vec_len (i4m->fibs);
+ serialize_integer (m, n_fibs, sizeof (n_fibs));
+ vec_foreach (f, i4m->fibs)
+ serialize (m, serialize_ip4_fib, f);
+ }
+
+ /* FIB interface config. */
+ vec_serialize (m, i4m->fib_index_by_sw_if_index, serialize_vec_32);
+
+ /* Interface ip4 addresses. */
+ pool_foreach (si, vim->sw_interfaces, ({
+ u32 sw_if_index = si->sw_if_index;
+ ip_interface_address_t * ia;
+ foreach_ip_interface_address (&i4m->lookup_main, ia, sw_if_index,
+ 0 /* honor unnumbered */,
+ ({
+ ip4_address_t * x = ip_interface_address_get_address (&i4m->lookup_main, ia);
+ vec_add2 (as, a, 1);
+ a->address = x[0];
+ a->length = ia->address_length;
+ a->sw_if_index = sw_if_index;
+ }));
+ }));
+ vec_serialize (m, as, serialize_vec_ip4_set_interface_address);
+ vec_free (as);
+}
+
+void unserialize_vnet_ip4_main (serialize_main_t * m, va_list * va)
+{
+ vlib_main_t * vm = va_arg (*va, vlib_main_t *);
+ ip4_main_t * i4m = &ip4_main;
+ ip4_interface_address_t * as = 0, * a;
+
+ unserialize (m, unserialize_ip_lookup_main, &i4m->lookup_main);
+
+ {
+ ip_adjacency_t * adj, * adj_heap;
+ u32 n_adj;
+ adj_heap = i4m->lookup_main.adjacency_heap;
+ heap_foreach (adj, n_adj, adj_heap, ({
+ unserialize_fixup_ip4_rewrite_adjacencies (vm, adj, n_adj);
+ ip_call_add_del_adjacency_callbacks (&i4m->lookup_main, adj - adj_heap, /* is_del */ 0);
+ }));
+ }
+
+ /* FIBs */
+ {
+ u32 i, n_fibs;
+ unserialize_integer (m, &n_fibs, sizeof (n_fibs));
+ for (i = 0; i < n_fibs; i++)
+ unserialize (m, unserialize_ip4_fib);
+ }
+
+ vec_unserialize (m, &i4m->fib_index_by_sw_if_index, unserialize_vec_32);
+
+ vec_unserialize (m, &as, unserialize_vec_ip4_set_interface_address);
+ vec_foreach (a, as) {
+ ip4_add_del_interface_address_internal
+ (vm, a->sw_if_index, &a->address, a->length,
+ /* redistribute */ 0,
+ /* insert_routes */ 0,
+ /* is_del */ 0);
+ }
+ vec_free (as);
+}
+
+static clib_error_t *
+ip4_sw_interface_admin_up_down (vnet_main_t * vnm,
+ u32 sw_if_index,
+ u32 flags)
+{
+ ip4_main_t * im = &ip4_main;
+ ip_interface_address_t * ia;
+ ip4_address_t * a;
+ u32 is_admin_up, fib_index;
+
+ /* Fill in lookup tables with default table (0). */
+ vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
+
+ vec_validate_init_empty (im->lookup_main.if_address_pool_index_by_sw_if_index, sw_if_index, ~0);
+
+ is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
+
+ fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
+
+ foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
+ 0 /* honor unnumbered */,
+ ({
+ a = ip_interface_address_get_address (&im->lookup_main, ia);
+ if (is_admin_up)
+ ip4_add_interface_routes (sw_if_index,
+ im, fib_index,
+ ia);
+ else
+ ip4_del_interface_routes (im, fib_index,
+ a, ia->address_length);
+ }));
+
+ return 0;
+}
+
+VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
+
+static clib_error_t *
+ip4_sw_interface_add_del (vnet_main_t * vnm,
+ u32 sw_if_index,
+ u32 is_add)
+{
+ vlib_main_t * vm = vnm->vlib_main;
+ ip4_main_t * im = &ip4_main;
+ ip_lookup_main_t * lm = &im->lookup_main;
+ u32 ci, cast;
+
+ for (cast = 0; cast < VNET_N_CAST; cast++)
+ {
+ ip_config_main_t * cm = &lm->rx_config_mains[cast];
+ vnet_config_main_t * vcm = &cm->config_main;
+
+ if (! vcm->node_index_by_feature_index)
+ {
+ if (cast == VNET_UNICAST)
+ {
+ static char * start_nodes[] = { "ip4-input", "ip4-input-no-checksum", };
+ static char * feature_nodes[] = {
+ [IP4_RX_FEATURE_CHECK_ACCESS] = "ip4-inacl",
+ [IP4_RX_FEATURE_SOURCE_CHECK_REACHABLE_VIA_RX] = "ip4-source-check-via-rx",
+ [IP4_RX_FEATURE_SOURCE_CHECK_REACHABLE_VIA_ANY] = "ip4-source-check-via-any",
+ [IP4_RX_FEATURE_IPSEC] = "ipsec-input-ip4",
+ [IP4_RX_FEATURE_VPATH] = "vpath-input-ip4",
+ [IP4_RX_FEATURE_LOOKUP] = "ip4-lookup",
+ };
+
+ vnet_config_init (vm, vcm,
+ start_nodes, ARRAY_LEN (start_nodes),
+ feature_nodes, ARRAY_LEN (feature_nodes));
+ }
+ else
+ {
+ static char * start_nodes[] = { "ip4-input", "ip4-input-no-checksum", };
+ static char * feature_nodes[] = {
+ [IP4_RX_FEATURE_VPATH] = "vpath-input-ip4",
+ [IP4_RX_FEATURE_LOOKUP] = "ip4-lookup-multicast",
+ };
+
+ vnet_config_init (vm, vcm,
+ start_nodes, ARRAY_LEN (start_nodes),
+ feature_nodes, ARRAY_LEN (feature_nodes));
+ }
+ }
+
+ vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
+ ci = cm->config_index_by_sw_if_index[sw_if_index];
+
+ if (is_add)
+ ci = vnet_config_add_feature (vm, vcm,
+ ci,
+ IP4_RX_FEATURE_LOOKUP,
+ /* config data */ 0,
+ /* # bytes of config data */ 0);
+ else
+ ci = vnet_config_del_feature (vm, vcm,
+ ci,
+ IP4_RX_FEATURE_LOOKUP,
+ /* config data */ 0,
+ /* # bytes of config data */ 0);
+
+ cm->config_index_by_sw_if_index[sw_if_index] = ci;
+ }
+
+ return /* no error */ 0;
+}
+
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
+
+VLIB_REGISTER_NODE (ip4_lookup_node) = {
+ .function = ip4_lookup,
+ .name = "ip4-lookup",
+ .vector_size = sizeof (u32),
+
+ .n_next_nodes = IP_LOOKUP_N_NEXT,
+ .next_nodes = {
+ [IP_LOOKUP_NEXT_MISS] = "ip4-miss",
+ [IP_LOOKUP_NEXT_DROP] = "ip4-drop",
+ [IP_LOOKUP_NEXT_PUNT] = "ip4-punt",
+ [IP_LOOKUP_NEXT_LOCAL] = "ip4-local",
+ [IP_LOOKUP_NEXT_ARP] = "ip4-arp",
+ [IP_LOOKUP_NEXT_REWRITE] = "ip4-rewrite-transit",
+ [IP_LOOKUP_NEXT_CLASSIFY] = "ip4-classify",
+ [IP_LOOKUP_NEXT_MAP] = "ip4-map",
+ [IP_LOOKUP_NEXT_MAP_T] = "ip4-map-t",
+ [IP_LOOKUP_NEXT_SIXRD] = "ip4-sixrd",
+ [IP_LOOKUP_NEXT_HOP_BY_HOP] = "ip4-hop-by-hop",
+ [IP_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip4-add-hop-by-hop",
+ [IP_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip4-pop-hop-by-hop",
+ },
+};
+
+/* Global IP4 main. */
+ip4_main_t ip4_main;
+
+clib_error_t *
+ip4_lookup_init (vlib_main_t * vm)
+{
+ ip4_main_t * im = &ip4_main;
+ uword i;
+
+ for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
+ {
+ u32 m;
+
+ if (i < 32)
+ m = pow2_mask (i) << (32 - i);
+ else
+ m = ~0;
+ im->fib_masks[i] = clib_host_to_net_u32 (m);
+ }
+
+ /* Create FIB with index 0 and table id of 0. */
+ find_ip4_fib_by_table_index_or_id (im, /* table id */ 0, IP4_ROUTE_FLAG_TABLE_ID);
+
+ ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
+
+ {
+ pg_node_t * pn;
+ pn = pg_get_node (ip4_lookup_node.index);
+ pn->unformat_edit = unformat_pg_ip4_header;
+ }
+
+ {
+ ethernet_arp_header_t h;
+
+ memset (&h, 0, sizeof (h));
+
+ /* Set target ethernet address to all zeros. */
+ memset (h.ip4_over_ethernet[1].ethernet, 0, sizeof (h.ip4_over_ethernet[1].ethernet));
+
+#define _16(f,v) h.f = clib_host_to_net_u16 (v);
+#define _8(f,v) h.f = v;
+ _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
+ _16 (l3_type, ETHERNET_TYPE_IP4);
+ _8 (n_l2_address_bytes, 6);
+ _8 (n_l3_address_bytes, 4);
+ _16 (opcode, ETHERNET_ARP_OPCODE_request);
+#undef _16
+#undef _8
+
+ vlib_packet_template_init (vm,
+ &im->ip4_arp_request_packet_template,
+ /* data */ &h,
+ sizeof (h),
+ /* alloc chunk size */ 8,
+ "ip4 arp");
+ }
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (ip4_lookup_init);
+
+typedef struct {
+ /* Adjacency taken. */
+ u32 adj_index;
+ u32 flow_hash;
+ u32 fib_index;
+
+ /* Packet data, possibly *after* rewrite. */
+ u8 packet_data[64 - 1*sizeof(u32)];
+} ip4_forward_next_trace_t;
+
+static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
+ vnet_main_t * vnm = vnet_get_main();
+ ip4_main_t * im = &ip4_main;
+ ip_adjacency_t * adj;
+ uword indent = format_get_indent (s);
+
+ adj = ip_get_adjacency (&im->lookup_main, t->adj_index);
+ s = format (s, "fib: %d adjacency: %U flow hash: 0x%08x",
+ t->fib_index, format_ip_adjacency,
+ vnm, &im->lookup_main, t->adj_index, t->flow_hash);
+ switch (adj->lookup_next_index)
+ {
+ case IP_LOOKUP_NEXT_REWRITE:
+ s = format (s, "\n%U%U",
+ format_white_space, indent,
+ format_ip_adjacency_packet_data,
+ vnm, &im->lookup_main, t->adj_index,
+ t->packet_data, sizeof (t->packet_data));
+ break;
+
+ default:
+ break;
+ }
+
+ return s;
+}
+
+/* Common trace function for all ip4-forward next nodes. */
+void
+ip4_forward_next_trace (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ vlib_rx_or_tx_t which_adj_index)
+{
+ u32 * from, n_left;
+ ip4_main_t * im = &ip4_main;
+
+ n_left = frame->n_vectors;
+ from = vlib_frame_vector_args (frame);
+
+ while (n_left >= 4)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t * b0, * b1;
+ ip4_forward_next_trace_t * t0, * t1;
+
+ /* Prefetch next iteration. */
+ vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
+ vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
+
+ bi0 = from[0];
+ bi1 = from[1];
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
+ t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
+ t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
+ t0->fib_index = vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer(b0)->sw_if_index[VLIB_RX]);
+ memcpy (t0->packet_data,
+ vlib_buffer_get_current (b0),
+ sizeof (t0->packet_data));
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
+ t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
+ t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
+ t1->fib_index = vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer(b1)->sw_if_index[VLIB_RX]);
+ memcpy (t1->packet_data,
+ vlib_buffer_get_current (b1),
+ sizeof (t1->packet_data));
+ }
+ from += 2;
+ n_left -= 2;
+ }
+
+ while (n_left >= 1)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ ip4_forward_next_trace_t * t0;
+
+ bi0 = from[0];
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
+ t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
+ t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
+ t0->fib_index = vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer(b0)->sw_if_index[VLIB_RX]);
+ memcpy (t0->packet_data,
+ vlib_buffer_get_current (b0),
+ sizeof (t0->packet_data));
+ }
+ from += 1;
+ n_left -= 1;
+ }
+}
+
+static uword
+ip4_drop_or_punt (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ ip4_error_t error_code)
+{
+ u32 * buffers = vlib_frame_vector_args (frame);
+ uword n_packets = frame->n_vectors;
+
+ vlib_error_drop_buffers (vm, node,
+ buffers,
+ /* stride */ 1,
+ n_packets,
+ /* next */ 0,
+ ip4_input_node.index,
+ error_code);
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip4_forward_next_trace (vm, node, frame, VLIB_TX);
+
+ return n_packets;
+}
+
+static uword
+ip4_drop (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{ return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP); }
+
+static uword
+ip4_punt (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{ return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); }
+
+static uword
+ip4_miss (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{ return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_DST_LOOKUP_MISS); }
+
+VLIB_REGISTER_NODE (ip4_drop_node,static) = {
+ .function = ip4_drop,
+ .name = "ip4-drop",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_ip4_forward_next_trace,
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+
+VLIB_REGISTER_NODE (ip4_punt_node,static) = {
+ .function = ip4_punt,
+ .name = "ip4-punt",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_ip4_forward_next_trace,
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-punt",
+ },
+};
+
+VLIB_REGISTER_NODE (ip4_miss_node,static) = {
+ .function = ip4_miss,
+ .name = "ip4-miss",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_ip4_forward_next_trace,
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+
+/* Compute TCP/UDP/ICMP4 checksum in software. */
+u16
+ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
+ ip4_header_t * ip0)
+{
+ ip_csum_t sum0;
+ u32 ip_header_length, payload_length_host_byte_order;
+ u32 n_this_buffer, n_bytes_left;
+ u16 sum16;
+ void * data_this_buffer;
+
+ /* Initialize checksum with ip header. */
+ ip_header_length = ip4_header_bytes (ip0);
+ payload_length_host_byte_order = clib_net_to_host_u16 (ip0->length) - ip_header_length;
+ sum0 = clib_host_to_net_u32 (payload_length_host_byte_order + (ip0->protocol << 16));
+
+ if (BITS (uword) == 32)
+ {
+ sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u32));
+ sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32));
+ }
+ else
+ sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
+
+ n_bytes_left = n_this_buffer = payload_length_host_byte_order;
+ data_this_buffer = (void *) ip0 + ip_header_length;
+ if (n_this_buffer + ip_header_length > p0->current_length)
+ n_this_buffer = p0->current_length > ip_header_length ? p0->current_length - ip_header_length : 0;
+ while (1)
+ {
+ sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
+ n_bytes_left -= n_this_buffer;
+ if (n_bytes_left == 0)
+ break;
+
+ ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
+ p0 = vlib_get_buffer (vm, p0->next_buffer);
+ data_this_buffer = vlib_buffer_get_current (p0);
+ n_this_buffer = p0->current_length;
+ }
+
+ sum16 = ~ ip_csum_fold (sum0);
+
+ return sum16;
+}
+
+static u32
+ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
+{
+ ip4_header_t * ip0 = vlib_buffer_get_current (p0);
+ udp_header_t * udp0;
+ u16 sum16;
+
+ ASSERT (ip0->protocol == IP_PROTOCOL_TCP
+ || ip0->protocol == IP_PROTOCOL_UDP);
+
+ udp0 = (void *) (ip0 + 1);
+ if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
+ {
+ p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
+ | IP_BUFFER_L4_CHECKSUM_CORRECT);
+ return p0->flags;
+ }
+
+ sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
+
+ p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
+ | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
+
+ return p0->flags;
+}
+
+static uword
+ip4_local (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ ip4_main_t * im = &ip4_main;
+ ip_lookup_main_t * lm = &im->lookup_main;
+ ip_local_next_t next_index;
+ u32 * from, * to_next, n_left_from, n_left_to_next;
+ vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip4_forward_next_trace (vm, node, frame, VLIB_TX);
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ vlib_buffer_t * p0, * p1;
+ ip4_header_t * ip0, * ip1;
+ udp_header_t * udp0, * udp1;
+ ip4_fib_mtrie_t * mtrie0, * mtrie1;
+ ip4_fib_mtrie_leaf_t leaf0, leaf1;
+ ip_adjacency_t * adj0, * adj1;
+ u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, adj_index0;
+ u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, adj_index1;
+ i32 len_diff0, len_diff1;
+ u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
+ u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
+ u8 enqueue_code;
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+
+ fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer(p0)->sw_if_index[VLIB_RX]);
+ fib_index1 = vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer(p1)->sw_if_index[VLIB_RX]);
+
+ mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
+ mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
+
+ leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
+
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
+ leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
+
+ proto0 = ip0->protocol;
+ proto1 = ip1->protocol;
+ is_udp0 = proto0 == IP_PROTOCOL_UDP;
+ is_udp1 = proto1 == IP_PROTOCOL_UDP;
+ is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
+ is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
+
+ flags0 = p0->flags;
+ flags1 = p1->flags;
+
+ good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
+ good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
+
+ udp0 = ip4_next_header (ip0);
+ udp1 = ip4_next_header (ip1);
+
+ /* Don't verify UDP checksum for packets with explicit zero checksum. */
+ good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
+ good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
+
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
+ leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
+
+ /* Verify UDP length. */
+ ip_len0 = clib_net_to_host_u16 (ip0->length);
+ ip_len1 = clib_net_to_host_u16 (ip1->length);
+ udp_len0 = clib_net_to_host_u16 (udp0->length);
+ udp_len1 = clib_net_to_host_u16 (udp1->length);
+
+ len_diff0 = ip_len0 - udp_len0;
+ len_diff1 = ip_len1 - udp_len1;
+
+ len_diff0 = is_udp0 ? len_diff0 : 0;
+ len_diff1 = is_udp1 ? len_diff1 : 0;
+
+ if (PREDICT_FALSE (! (is_tcp_udp0 & is_tcp_udp1
+ & good_tcp_udp0 & good_tcp_udp1)))
+ {
+ if (is_tcp_udp0)
+ {
+ if (is_tcp_udp0
+ && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
+ flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
+ good_tcp_udp0 =
+ (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
+ good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
+ }
+ if (is_tcp_udp1)
+ {
+ if (is_tcp_udp1
+ && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
+ flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
+ good_tcp_udp1 =
+ (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
+ good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
+ }
+ }
+
+ good_tcp_udp0 &= len_diff0 >= 0;
+ good_tcp_udp1 &= len_diff1 >= 0;
+
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
+ leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
+
+ error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
+
+ error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
+ error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
+
+ ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
+ error0 = (is_tcp_udp0 && ! good_tcp_udp0
+ ? IP4_ERROR_TCP_CHECKSUM + is_udp0
+ : error0);
+ error1 = (is_tcp_udp1 && ! good_tcp_udp1
+ ? IP4_ERROR_TCP_CHECKSUM + is_udp1
+ : error1);
+
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
+ leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
+
+ vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
+
+ vnet_buffer (p1)->ip.adj_index[VLIB_RX] = adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
+ vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
+
+ ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
+ &ip0->src_address,
+ /* no_default_route */ 1));
+ ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
+ &ip1->src_address,
+ /* no_default_route */ 1));
+
+ adj0 = ip_get_adjacency (lm, adj_index0);
+ adj1 = ip_get_adjacency (lm, adj_index1);
+
+ /*
+ * Must have a route to source otherwise we drop the packet.
+ * ip4 broadcasts are accepted, e.g. to make dhcp client work
+ */
+ error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
+ && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
+ && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
+ && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
+ && ip0->dst_address.as_u32 != 0xFFFFFFFF
+ ? IP4_ERROR_SRC_LOOKUP_MISS
+ : error0);
+ error1 = (error1 == IP4_ERROR_UNKNOWN_PROTOCOL
+ && adj1->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
+ && adj1->lookup_next_index != IP_LOOKUP_NEXT_ARP
+ && adj1->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
+ && ip0->dst_address.as_u32 != 0xFFFFFFFF
+ ? IP4_ERROR_SRC_LOOKUP_MISS
+ : error1);
+
+ next0 = lm->local_next_by_ip_protocol[proto0];
+ next1 = lm->local_next_by_ip_protocol[proto1];
+
+ next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
+ next1 = error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
+
+ p0->error = error0 ? error_node->errors[error0] : 0;
+ p1->error = error1 ? error_node->errors[error1] : 0;
+
+ enqueue_code = (next0 != next_index) + 2*(next1 != next_index);
+
+ if (PREDICT_FALSE (enqueue_code != 0))
+ {
+ switch (enqueue_code)
+ {
+ case 1:
+ /* A B A */
+ to_next[-2] = pi1;
+ to_next -= 1;
+ n_left_to_next += 1;
+ vlib_set_next_frame_buffer (vm, node, next0, pi0);
+ break;
+
+ case 2:
+ /* A A B */
+ to_next -= 1;
+ n_left_to_next += 1;
+ vlib_set_next_frame_buffer (vm, node, next1, pi1);
+ break;
+
+ case 3:
+ /* A B B or A B C */
+ to_next -= 2;
+ n_left_to_next += 2;
+ vlib_set_next_frame_buffer (vm, node, next0, pi0);
+ vlib_set_next_frame_buffer (vm, node, next1, pi1);
+ if (next0 == next1)
+ {
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ next_index = next1;
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+ }
+ break;
+ }
+ }
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t * p0;
+ ip4_header_t * ip0;
+ udp_header_t * udp0;
+ ip4_fib_mtrie_t * mtrie0;
+ ip4_fib_mtrie_leaf_t leaf0;
+ ip_adjacency_t * adj0;
+ u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, adj_index0;
+ i32 len_diff0;
+ u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+
+ ip0 = vlib_buffer_get_current (p0);
+
+ fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer(p0)->sw_if_index[VLIB_RX]);
+
+ mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
+
+ leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
+
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
+
+ proto0 = ip0->protocol;
+ is_udp0 = proto0 == IP_PROTOCOL_UDP;
+ is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
+
+ flags0 = p0->flags;
+
+ good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
+
+ udp0 = ip4_next_header (ip0);
+
+ /* Don't verify UDP checksum for packets with explicit zero checksum. */
+ good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
+
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
+
+ /* Verify UDP length. */
+ ip_len0 = clib_net_to_host_u16 (ip0->length);
+ udp_len0 = clib_net_to_host_u16 (udp0->length);
+
+ len_diff0 = ip_len0 - udp_len0;
+
+ len_diff0 = is_udp0 ? len_diff0 : 0;
+
+ if (PREDICT_FALSE (! (is_tcp_udp0 & good_tcp_udp0)))
+ {
+ if (is_tcp_udp0)
+ {
+ if (is_tcp_udp0
+ && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
+ flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
+ good_tcp_udp0 =
+ (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
+ good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
+ }
+ }
+
+ good_tcp_udp0 &= len_diff0 >= 0;
+
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
+
+ error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
+
+ error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
+
+ ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
+ error0 = (is_tcp_udp0 && ! good_tcp_udp0
+ ? IP4_ERROR_TCP_CHECKSUM + is_udp0
+ : error0);
+
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
+
+ vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
+
+ ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
+ &ip0->src_address,
+ /* no_default_route */ 1));
+
+ adj0 = ip_get_adjacency (lm, adj_index0);
+
+ /* Must have a route to source otherwise we drop the packet. */
+ error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
+ && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
+ && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
+ && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
+ && ip0->dst_address.as_u32 != 0xFFFFFFFF
+ ? IP4_ERROR_SRC_LOOKUP_MISS
+ : error0);
+
+ next0 = lm->local_next_by_ip_protocol[proto0];
+
+ next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
+
+ p0->error = error0? error_node->errors[error0] : 0;
+
+ if (PREDICT_FALSE (next0 != next_index))
+ {
+ n_left_to_next += 1;
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+
+ next_index = next0;
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+ to_next[0] = pi0;
+ to_next += 1;
+ n_left_to_next -= 1;
+ }
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (ip4_local_node,static) = {
+ .function = ip4_local,
+ .name = "ip4-local",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_ip4_forward_next_trace,
+
+ .n_next_nodes = IP_LOCAL_N_NEXT,
+ .next_nodes = {
+ [IP_LOCAL_NEXT_DROP] = "error-drop",
+ [IP_LOCAL_NEXT_PUNT] = "error-punt",
+ // [IP_LOCAL_NEXT_TCP_LOOKUP] = "ip4-tcp-lookup",
+ [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
+ [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
+ },
+};
+
+void ip4_register_protocol (u32 protocol, u32 node_index)
+{
+ vlib_main_t * vm = vlib_get_main();
+ ip4_main_t * im = &ip4_main;
+ ip_lookup_main_t * lm = &im->lookup_main;
+
+ ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
+ lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip4_local_node.index, node_index);
+}
+
+static clib_error_t *
+show_ip_local_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ ip4_main_t * im = &ip4_main;
+ ip_lookup_main_t * lm = &im->lookup_main;
+ int i;
+
+ vlib_cli_output (vm, "Protocols handled by ip4_local");
+ for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
+ {
+ if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
+ vlib_cli_output (vm, "%d", i);
+ }
+ return 0;
+}
+
+
+
+VLIB_CLI_COMMAND (show_ip_local, static) = {
+ .path = "show ip local",
+ .function = show_ip_local_command_fn,
+ .short_help = "Show ip local protocol table",
+};
+
+static uword
+ip4_arp (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ ip4_main_t * im = &ip4_main;
+ ip_lookup_main_t * lm = &im->lookup_main;
+ u32 * from, * to_next_drop;
+ uword n_left_from, n_left_to_next_drop, next_index;
+ static f64 time_last_seed_change = -1e100;
+ static u32 hash_seeds[3];
+ static uword hash_bitmap[256 / BITS (uword)];
+ f64 time_now;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip4_forward_next_trace (vm, node, frame, VLIB_TX);
+
+ time_now = vlib_time_now (vm);
+ if (time_now - time_last_seed_change > 1e-3)
+ {
+ uword i;
+ u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
+ sizeof (hash_seeds));
+ for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
+ hash_seeds[i] = r[i];
+
+ /* Mark all hash keys as been no-seen before. */
+ for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
+ hash_bitmap[i] = 0;
+
+ time_last_seed_change = time_now;
+ }
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ if (next_index == IP4_ARP_NEXT_DROP)
+ next_index = IP4_ARP_N_NEXT; /* point to first interface */
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
+ to_next_drop, n_left_to_next_drop);
+
+ while (n_left_from > 0 && n_left_to_next_drop > 0)
+ {
+ vlib_buffer_t * p0;
+ ip4_header_t * ip0;
+ ethernet_header_t * eh0;
+ u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
+ uword bm0;
+ ip_adjacency_t * adj0;
+
+ pi0 = from[0];
+
+ p0 = vlib_get_buffer (vm, pi0);
+
+ adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+ adj0 = ip_get_adjacency (lm, adj_index0);
+ ip0 = vlib_buffer_get_current (p0);
+
+ /*
+ * if ip4_rewrite_local applied the IP_LOOKUP_NEXT_ARP
+ * rewrite to this packet, we need to skip it here.
+ * Note, to distinguish from src IP addr *.8.6.*, we
+ * check for a bcast eth dest instead of IPv4 version.
+ */
+ eh0 = (ethernet_header_t*)ip0;
+ if ((ip0->ip_version_and_header_length & 0xF0) != 0x40)
+ {
+ u32 vlan_num = 0;
+ u16 * etype = &eh0->type;
+ while ((*etype == clib_host_to_net_u16 (0x8100)) //dot1q
+ || (*etype == clib_host_to_net_u16 (0x88a8)))//dot1ad
+ {
+ vlan_num += 1;
+ etype += 2; //vlan tag also 16 bits, same as etype
+ }
+ if (*etype == clib_host_to_net_u16 (0x0806)) //arp
+ {
+ vlib_buffer_advance (
+ p0, sizeof(ethernet_header_t) + (4*vlan_num));
+ ip0 = vlib_buffer_get_current (p0);
+ }
+ }
+
+ a0 = hash_seeds[0];
+ b0 = hash_seeds[1];
+ c0 = hash_seeds[2];
+
+ sw_if_index0 = adj0->rewrite_header.sw_if_index;
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
+
+ a0 ^= ip0->dst_address.data_u32;
+ b0 ^= sw_if_index0;
+
+ hash_v3_finalize32 (a0, b0, c0);
+
+ c0 &= BITS (hash_bitmap) - 1;
+ c0 = c0 / BITS (uword);
+ m0 = (uword) 1 << (c0 % BITS (uword));
+
+ bm0 = hash_bitmap[c0];
+ drop0 = (bm0 & m0) != 0;
+
+ /* Mark it as seen. */
+ hash_bitmap[c0] = bm0 | m0;
+
+ from += 1;
+ n_left_from -= 1;
+ to_next_drop[0] = pi0;
+ to_next_drop += 1;
+ n_left_to_next_drop -= 1;
+
+ p0->error = node->errors[drop0 ? IP4_ARP_ERROR_DROP : IP4_ARP_ERROR_REQUEST_SENT];
+
+ if (drop0)
+ continue;
+
+ /*
+ * Can happen if the control-plane is programming tables
+ * with traffic flowing; at least that's today's lame excuse.
+ */
+ if (adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP)
+ {
+ p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
+ }
+ else
+ /* Send ARP request. */
+ {
+ u32 bi0 = 0;
+ vlib_buffer_t * b0;
+ ethernet_arp_header_t * h0;
+ vnet_hw_interface_t * hw_if0;
+
+ h0 = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi0);
+
+ /* Add rewrite/encap string for ARP packet. */
+ vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
+
+ hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
+
+ /* Src ethernet address in ARP header. */
+ memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address,
+ sizeof (h0->ip4_over_ethernet[0].ethernet));
+
+ ip4_src_address_for_packet (im, p0, &h0->ip4_over_ethernet[0].ip4, sw_if_index0);
+
+ /* Copy in destination address we are requesting. */
+ h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32;
+
+ vlib_buffer_copy_trace_flag (vm, p0, bi0);
+ b0 = vlib_get_buffer (vm, bi0);
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
+
+ vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
+
+ vlib_set_next_frame_buffer (vm, node, adj0->rewrite_header.next_index, bi0);
+ }
+ }
+
+ vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
+ }
+
+ return frame->n_vectors;
+}
+
+static char * ip4_arp_error_strings[] = {
+ [IP4_ARP_ERROR_DROP] = "address overflow drops",
+ [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
+ [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
+ [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
+ [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
+};
+
+VLIB_REGISTER_NODE (ip4_arp_node) = {
+ .function = ip4_arp,
+ .name = "ip4-arp",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_ip4_forward_next_trace,
+
+ .n_errors = ARRAY_LEN (ip4_arp_error_strings),
+ .error_strings = ip4_arp_error_strings,
+
+ .n_next_nodes = IP4_ARP_N_NEXT,
+ .next_nodes = {
+ [IP4_ARP_NEXT_DROP] = "error-drop",
+ },
+};
+
+#define foreach_notrace_ip4_arp_error \
+_(DROP) \
+_(REQUEST_SENT) \
+_(REPLICATE_DROP) \
+_(REPLICATE_FAIL)
+
+clib_error_t * arp_notrace_init (vlib_main_t * vm)
+{
+ vlib_node_runtime_t *rt =
+ vlib_node_get_runtime (vm, ip4_arp_node.index);
+
+ /* don't trace ARP request packets */
+#define _(a) \
+ vnet_pcap_drop_trace_filter_add_del \
+ (rt->errors[IP4_ARP_ERROR_##a], \
+ 1 /* is_add */);
+ foreach_notrace_ip4_arp_error;
+#undef _
+ return 0;
+}
+
+VLIB_INIT_FUNCTION(arp_notrace_init);
+
+
+/* Send an ARP request to see if given destination is reachable on given interface. */
+clib_error_t *
+ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ ip4_main_t * im = &ip4_main;
+ ethernet_arp_header_t * h;
+ ip4_address_t * src;
+ ip_interface_address_t * ia;
+ ip_adjacency_t * adj;
+ vnet_hw_interface_t * hi;
+ vnet_sw_interface_t * si;
+ vlib_buffer_t * b;
+ u32 bi = 0;
+
+ si = vnet_get_sw_interface (vnm, sw_if_index);
+
+ if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
+ {
+ return clib_error_return (0, "%U: interface %U down",
+ format_ip4_address, dst,
+ format_vnet_sw_if_index_name, vnm,
+ sw_if_index);
+ }
+
+ src = ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
+ if (! src)
+ {
+ vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
+ return clib_error_return
+ (0, "no matching interface address for destination %U (interface %U)",
+ format_ip4_address, dst,
+ format_vnet_sw_if_index_name, vnm, sw_if_index);
+ }
+
+ adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
+
+ h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi);
+
+ hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
+
+ memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
+
+ h->ip4_over_ethernet[0].ip4 = src[0];
+ h->ip4_over_ethernet[1].ip4 = dst[0];
+
+ b = vlib_get_buffer (vm, bi);
+ vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
+
+ /* Add encapsulation string for software interface (e.g. ethernet header). */
+ vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
+ vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
+
+ {
+ vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
+ u32 * to_next = vlib_frame_vector_args (f);
+ to_next[0] = bi;
+ f->n_vectors = 1;
+ vlib_put_frame_to_node (vm, hi->output_node_index, f);
+ }
+
+ return /* no error */ 0;
+}
+
+typedef enum {
+ IP4_REWRITE_NEXT_DROP,
+ IP4_REWRITE_NEXT_ARP,
+} ip4_rewrite_next_t;
+
+always_inline uword
+ip4_rewrite_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ int rewrite_for_locally_received_packets)
+{
+ ip_lookup_main_t * lm = &ip4_main.lookup_main;
+ u32 * from = vlib_frame_vector_args (frame);
+ u32 n_left_from, n_left_to_next, * to_next, next_index;
+ vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
+ vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
+
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ u32 cpu_index = os_get_cpu_number();
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ ip_adjacency_t * adj0, * adj1;
+ vlib_buffer_t * p0, * p1;
+ ip4_header_t * ip0, * ip1;
+ u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
+ u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
+ u32 next0_override, next1_override;
+
+ if (rewrite_for_locally_received_packets)
+ next0_override = next1_override = 0;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, STORE);
+ vlib_prefetch_buffer_header (p3, STORE);
+
+ CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
+ CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
+ }
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+
+ adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
+ adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
+
+ /* We should never rewrite a pkt using the MISS adjacency */
+ ASSERT(adj_index0 && adj_index1);
+
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+
+ error0 = error1 = IP4_ERROR_NONE;
+
+ /* Decrement TTL & update checksum.
+ Works either endian, so no need for byte swap. */
+ if (! rewrite_for_locally_received_packets)
+ {
+ i32 ttl0 = ip0->ttl, ttl1 = ip1->ttl;
+
+ /* Input node should have reject packets with ttl 0. */
+ ASSERT (ip0->ttl > 0);
+ ASSERT (ip1->ttl > 0);
+
+ checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
+ checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
+
+ checksum0 += checksum0 >= 0xffff;
+ checksum1 += checksum1 >= 0xffff;
+
+ ip0->checksum = checksum0;
+ ip1->checksum = checksum1;
+
+ ttl0 -= 1;
+ ttl1 -= 1;
+
+ ip0->ttl = ttl0;
+ ip1->ttl = ttl1;
+
+ error0 = ttl0 <= 0 ? IP4_ERROR_TIME_EXPIRED : error0;
+ error1 = ttl1 <= 0 ? IP4_ERROR_TIME_EXPIRED : error1;
+
+ /* Verify checksum. */
+ ASSERT (ip0->checksum == ip4_header_checksum (ip0));
+ ASSERT (ip1->checksum == ip4_header_checksum (ip1));
+ }
+
+ /* Rewrite packet header and updates lengths. */
+ adj0 = ip_get_adjacency (lm, adj_index0);
+ adj1 = ip_get_adjacency (lm, adj_index1);
+
+ if (rewrite_for_locally_received_packets)
+ {
+ /*
+ * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
+ * we end up here with a local adjacency in hand
+ * The local adj rewrite data is 0xfefe on purpose.
+ * Bad engineer, no donut for you.
+ */
+ if (PREDICT_FALSE(adj0->lookup_next_index
+ == IP_LOOKUP_NEXT_LOCAL))
+ error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
+ if (PREDICT_FALSE(adj0->lookup_next_index
+ == IP_LOOKUP_NEXT_ARP))
+ next0_override = IP4_REWRITE_NEXT_ARP;
+ if (PREDICT_FALSE(adj1->lookup_next_index
+ == IP_LOOKUP_NEXT_LOCAL))
+ error1 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
+ if (PREDICT_FALSE(adj1->lookup_next_index
+ == IP_LOOKUP_NEXT_ARP))
+ next1_override = IP4_REWRITE_NEXT_ARP;
+ }
+
+ /* Worth pipelining. No guarantee that adj0,1 are hot... */
+ rw_len0 = adj0[0].rewrite_header.data_bytes;
+ rw_len1 = adj1[0].rewrite_header.data_bytes;
+ next0 = (error0 == IP4_ERROR_NONE)
+ ? adj0[0].rewrite_header.next_index : 0;
+
+ if (rewrite_for_locally_received_packets)
+ next0 = next0 && next0_override ? next0_override : next0;
+
+ next1 = (error1 == IP4_ERROR_NONE)
+ ? adj1[0].rewrite_header.next_index : 0;
+
+ if (rewrite_for_locally_received_packets)
+ next1 = next1 && next1_override ? next1_override : next1;
+
+ /*
+ * We've already accounted for an ethernet_header_t elsewhere
+ */
+ if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
+ vlib_increment_combined_counter
+ (&lm->adjacency_counters,
+ cpu_index, adj_index0,
+ /* packet increment */ 0,
+ /* byte increment */ rw_len0-sizeof(ethernet_header_t));
+
+ if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t)))
+ vlib_increment_combined_counter
+ (&lm->adjacency_counters,
+ cpu_index, adj_index1,
+ /* packet increment */ 0,
+ /* byte increment */ rw_len1-sizeof(ethernet_header_t));
+
+ /* Check MTU of outgoing interface. */
+ error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
+ ? IP4_ERROR_MTU_EXCEEDED
+ : error0);
+ error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
+ ? IP4_ERROR_MTU_EXCEEDED
+ : error1);
+
+ p0->current_data -= rw_len0;
+ p1->current_data -= rw_len1;
+
+ p0->current_length += rw_len0;
+ p1->current_length += rw_len1;
+
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = adj0[0].rewrite_header.sw_if_index;
+ vnet_buffer (p1)->sw_if_index[VLIB_TX] = adj1[0].rewrite_header.sw_if_index;
+
+ p0->error = error_node->errors[error0];
+ p1->error = error_node->errors[error1];
+
+ /* Guess we are only writing on simple Ethernet header. */
+ vnet_rewrite_two_headers (adj0[0], adj1[0],
+ ip0, ip1,
+ sizeof (ethernet_header_t));
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, pi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ ip_adjacency_t * adj0;
+ vlib_buffer_t * p0;
+ ip4_header_t * ip0;
+ u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
+ u32 next0_override;
+
+ if (rewrite_for_locally_received_packets)
+ next0_override = 0;
+
+ pi0 = to_next[0] = from[0];
+
+ p0 = vlib_get_buffer (vm, pi0);
+
+ adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
+
+ /* We should never rewrite a pkt using the MISS adjacency */
+ ASSERT(adj_index0);
+
+ adj0 = ip_get_adjacency (lm, adj_index0);
+
+ ip0 = vlib_buffer_get_current (p0);
+
+ error0 = IP4_ERROR_NONE;
+ next0 = 0; /* drop on error */
+
+ /* Decrement TTL & update checksum. */
+ if (! rewrite_for_locally_received_packets)
+ {
+ i32 ttl0 = ip0->ttl;
+
+ checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
+
+ checksum0 += checksum0 >= 0xffff;
+
+ ip0->checksum = checksum0;
+
+ ASSERT (ip0->ttl > 0);
+
+ ttl0 -= 1;
+
+ ip0->ttl = ttl0;
+
+ ASSERT (ip0->checksum == ip4_header_checksum (ip0));
+
+ error0 = ttl0 <= 0 ? IP4_ERROR_TIME_EXPIRED : error0;
+ }
+
+ if (rewrite_for_locally_received_packets)
+ {
+ /*
+ * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
+ * we end up here with a local adjacency in hand
+ * The local adj rewrite data is 0xfefe on purpose.
+ * Bad engineer, no donut for you.
+ */
+ if (PREDICT_FALSE(adj0->lookup_next_index
+ == IP_LOOKUP_NEXT_LOCAL))
+ error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
+ /*
+ * We have to override the next_index in ARP adjacencies,
+ * because they're set up for ip4-arp, not this node...
+ */
+ if (PREDICT_FALSE(adj0->lookup_next_index
+ == IP_LOOKUP_NEXT_ARP))
+ next0_override = IP4_REWRITE_NEXT_ARP;
+ }
+
+ /* Guess we are only writing on simple Ethernet header. */
+ vnet_rewrite_one_header (adj0[0], ip0,
+ sizeof (ethernet_header_t));
+
+ /* Update packet buffer attributes/set output interface. */
+ rw_len0 = adj0[0].rewrite_header.data_bytes;
+
+ if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
+ vlib_increment_combined_counter
+ (&lm->adjacency_counters,
+ cpu_index, adj_index0,
+ /* packet increment */ 0,
+ /* byte increment */ rw_len0-sizeof(ethernet_header_t));
+
+ /* Check MTU of outgoing interface. */
+ error0 = (vlib_buffer_length_in_chain (vm, p0)
+ > adj0[0].rewrite_header.max_l3_packet_bytes
+ ? IP4_ERROR_MTU_EXCEEDED
+ : error0);
+
+ p0->error = error_node->errors[error0];
+ p0->current_data -= rw_len0;
+ p0->current_length += rw_len0;
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] =
+ adj0[0].rewrite_header.sw_if_index;
+
+ next0 = (error0 == IP4_ERROR_NONE)
+ ? adj0[0].rewrite_header.next_index : 0;
+
+ if (rewrite_for_locally_received_packets)
+ next0 = next0 && next0_override ? next0_override : next0;
+
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ /* Need to do trace after rewrites to pick up new packet data. */
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip4_forward_next_trace (vm, node, frame, adj_rx_tx);
+
+ return frame->n_vectors;
+}
+
+static uword
+ip4_rewrite_transit (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return ip4_rewrite_inline (vm, node, frame,
+ /* rewrite_for_locally_received_packets */ 0);
+}
+
+static uword
+ip4_rewrite_local (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return ip4_rewrite_inline (vm, node, frame,
+ /* rewrite_for_locally_received_packets */ 1);
+}
+
+VLIB_REGISTER_NODE (ip4_rewrite_node) = {
+ .function = ip4_rewrite_transit,
+ .name = "ip4-rewrite-transit",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_ip4_forward_next_trace,
+
+ .n_next_nodes = 2,
+ .next_nodes = {
+ [IP4_REWRITE_NEXT_DROP] = "error-drop",
+ [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
+ },
+};
+
+VLIB_REGISTER_NODE (ip4_rewrite_local_node,static) = {
+ .function = ip4_rewrite_local,
+ .name = "ip4-rewrite-local",
+ .vector_size = sizeof (u32),
+
+ .sibling_of = "ip4-rewrite-transit",
+
+ .format_trace = format_ip4_forward_next_trace,
+
+ .n_next_nodes = 2,
+ .next_nodes = {
+ [IP4_REWRITE_NEXT_DROP] = "error-drop",
+ [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
+ },
+};
+
+static clib_error_t *
+add_del_interface_table (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ clib_error_t * error = 0;
+ u32 sw_if_index, table_id;
+
+ sw_if_index = ~0;
+
+ if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ if (unformat (input, "%d", &table_id))
+ ;
+ else
+ {
+ error = clib_error_return (0, "expected table id `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ {
+ ip4_main_t * im = &ip4_main;
+ ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_id, IP4_ROUTE_FLAG_TABLE_ID);
+
+ if (fib)
+ {
+ vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
+ im->fib_index_by_sw_if_index[sw_if_index] = fib->index;
+ }
+ }
+
+ done:
+ return error;
+}
+
+VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
+ .path = "set interface ip table",
+ .function = add_del_interface_table,
+ .short_help = "Add/delete FIB table id for interface",
+};
+
+
+static uword
+ip4_lookup_multicast (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ ip4_main_t * im = &ip4_main;
+ ip_lookup_main_t * lm = &im->lookup_main;
+ vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
+ u32 n_left_from, n_left_to_next, * from, * to_next;
+ ip_lookup_next_t next;
+ u32 cpu_index = os_get_cpu_number();
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next,
+ to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ vlib_buffer_t * p0, * p1;
+ u32 pi0, pi1, adj_index0, adj_index1, wrong_next;
+ ip_lookup_next_t next0, next1;
+ ip4_header_t * ip0, * ip1;
+ ip_adjacency_t * adj0, * adj1;
+ u32 fib_index0, fib_index1;
+ u32 flow_hash_config0, flow_hash_config1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
+ CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
+ }
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+
+ fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
+ fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
+ fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
+ fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
+ fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
+ fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
+
+ adj_index0 = ip4_fib_lookup_buffer (im, fib_index0,
+ &ip0->dst_address, p0);
+ adj_index1 = ip4_fib_lookup_buffer (im, fib_index1,
+ &ip1->dst_address, p1);
+
+ adj0 = ip_get_adjacency (lm, adj_index0);
+ adj1 = ip_get_adjacency (lm, adj_index1);
+
+ next0 = adj0->lookup_next_index;
+ next1 = adj1->lookup_next_index;
+
+ flow_hash_config0 =
+ vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
+
+ flow_hash_config1 =
+ vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
+
+ vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash
+ (ip0, flow_hash_config0);
+
+ vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash
+ (ip1, flow_hash_config1);
+
+ ASSERT (adj0->n_adj > 0);
+ ASSERT (adj1->n_adj > 0);
+ ASSERT (is_pow2 (adj0->n_adj));
+ ASSERT (is_pow2 (adj1->n_adj));
+ adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
+ adj_index1 += (vnet_buffer (p1)->ip.flow_hash & (adj1->n_adj - 1));
+
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
+ vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
+
+ if (1) /* $$$$$$ HACK FIXME */
+ vlib_increment_combined_counter
+ (cm, cpu_index, adj_index0, 1,
+ vlib_buffer_length_in_chain (vm, p0));
+ if (1) /* $$$$$$ HACK FIXME */
+ vlib_increment_combined_counter
+ (cm, cpu_index, adj_index1, 1,
+ vlib_buffer_length_in_chain (vm, p1));
+
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
+
+ wrong_next = (next0 != next) + 2*(next1 != next);
+ if (PREDICT_FALSE (wrong_next != 0))
+ {
+ switch (wrong_next)
+ {
+ case 1:
+ /* A B A */
+ to_next[-2] = pi1;
+ to_next -= 1;
+ n_left_to_next += 1;
+ vlib_set_next_frame_buffer (vm, node, next0, pi0);
+ break;
+
+ case 2:
+ /* A A B */
+ to_next -= 1;
+ n_left_to_next += 1;
+ vlib_set_next_frame_buffer (vm, node, next1, pi1);
+ break;
+
+ case 3:
+ /* A B C */
+ to_next -= 2;
+ n_left_to_next += 2;
+ vlib_set_next_frame_buffer (vm, node, next0, pi0);
+ vlib_set_next_frame_buffer (vm, node, next1, pi1);
+ if (next0 == next1)
+ {
+ /* A B B */
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ next = next1;
+ vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
+ }
+ }
+ }
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t * p0;
+ ip4_header_t * ip0;
+ u32 pi0, adj_index0;
+ ip_lookup_next_t next0;
+ ip_adjacency_t * adj0;
+ u32 fib_index0;
+ u32 flow_hash_config0;
+
+ pi0 = from[0];
+ to_next[0] = pi0;
+
+ p0 = vlib_get_buffer (vm, pi0);
+
+ ip0 = vlib_buffer_get_current (p0);
+
+ fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (p0)->sw_if_index[VLIB_RX]);
+ fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
+ fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
+
+ adj_index0 = ip4_fib_lookup_buffer (im, fib_index0,
+ &ip0->dst_address, p0);
+
+ adj0 = ip_get_adjacency (lm, adj_index0);
+
+ next0 = adj0->lookup_next_index;
+
+ flow_hash_config0 =
+ vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
+
+ vnet_buffer (p0)->ip.flow_hash =
+ ip4_compute_flow_hash (ip0, flow_hash_config0);
+
+ ASSERT (adj0->n_adj > 0);
+ ASSERT (is_pow2 (adj0->n_adj));
+ adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
+
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
+
+ if (1) /* $$$$$$ HACK FIXME */
+ vlib_increment_combined_counter
+ (cm, cpu_index, adj_index0, 1,
+ vlib_buffer_length_in_chain (vm, p0));
+
+ from += 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+ n_left_from -= 1;
+
+ if (PREDICT_FALSE (next0 != next))
+ {
+ n_left_to_next += 1;
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ next = next0;
+ vlib_get_next_frame (vm, node, next,
+ to_next, n_left_to_next);
+ to_next[0] = pi0;
+ to_next += 1;
+ n_left_to_next -= 1;
+ }
+ }
+
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = {
+ .function = ip4_lookup_multicast,
+ .name = "ip4-lookup-multicast",
+ .vector_size = sizeof (u32),
+
+ .n_next_nodes = IP_LOOKUP_N_NEXT,
+ .next_nodes = {
+ [IP_LOOKUP_NEXT_MISS] = "ip4-miss",
+ [IP_LOOKUP_NEXT_DROP] = "ip4-drop",
+ [IP_LOOKUP_NEXT_PUNT] = "ip4-punt",
+ [IP_LOOKUP_NEXT_LOCAL] = "ip4-local",
+ [IP_LOOKUP_NEXT_ARP] = "ip4-arp",
+ [IP_LOOKUP_NEXT_REWRITE] = "ip4-rewrite-transit",
+ [IP_LOOKUP_NEXT_CLASSIFY] = "ip4-classify",
+ [IP_LOOKUP_NEXT_MAP] = "ip4-map",
+ [IP_LOOKUP_NEXT_MAP_T] = "ip4-map-t",
+ [IP_LOOKUP_NEXT_SIXRD] = "ip4-sixrd",
+ [IP_LOOKUP_NEXT_HOP_BY_HOP] = "ip4-hop-by-hop",
+ [IP_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip4-add-hop-by-hop",
+ [IP_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip4-pop-hop-by-hop",
+ },
+};
+
+VLIB_REGISTER_NODE (ip4_multicast_node,static) = {
+ .function = ip4_drop,
+ .name = "ip4-multicast",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_ip4_forward_next_trace,
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+
+int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0)
+{
+ ip4_main_t * im = &ip4_main;
+ ip4_fib_mtrie_t * mtrie0;
+ ip4_fib_mtrie_leaf_t leaf0;
+ u32 adj_index0;
+
+ mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
+
+ leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
+
+ /* Handle default route. */
+ leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
+
+ adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
+
+ return adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
+ a,
+ /* no_default_route */ 0);
+}
+
+static clib_error_t *
+test_lookup_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u32 table_id = 0;
+ f64 count = 1;
+ u32 n;
+ int i;
+ ip4_address_t ip4_base_address;
+ u64 errors = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
+ if (unformat (input, "table %d", &table_id))
+ ;
+ else if (unformat (input, "count %f", &count))
+ ;
+
+ else if (unformat (input, "%U",
+ unformat_ip4_address, &ip4_base_address))
+ ;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ n = count;
+
+ for (i = 0; i < n; i++)
+ {
+ if (!ip4_lookup_validate (&ip4_base_address, table_id))
+ errors++;
+
+ ip4_base_address.as_u32 =
+ clib_host_to_net_u32 (1 +
+ clib_net_to_host_u32 (ip4_base_address.as_u32));
+ }
+
+ if (errors)
+ vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
+ else
+ vlib_cli_output (vm, "No errors in %d lookups\n", n);
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (lookup_test_command, static) = {
+ .path = "test lookup",
+ .short_help = "test lookup",
+ .function = test_lookup_command_fn,
+};
+
+int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
+{
+ ip4_main_t * im4 = &ip4_main;
+ ip4_fib_t * fib;
+ uword * p = hash_get (im4->fib_index_by_table_id, table_id);
+
+ if (p == 0)
+ return VNET_API_ERROR_NO_SUCH_FIB;
+
+ fib = vec_elt_at_index (im4->fibs, p[0]);
+
+ fib->flow_hash_config = flow_hash_config;
+ return 0;
+}
+
+static clib_error_t *
+set_ip_flow_hash_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ int matched = 0;
+ u32 table_id = 0;
+ u32 flow_hash_config = 0;
+ int rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
+ if (unformat (input, "table %d", &table_id))
+ matched = 1;
+#define _(a,v) \
+ else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
+ foreach_flow_hash_bit
+#undef _
+ else break;
+ }
+
+ if (matched == 0)
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+
+ rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ case VNET_API_ERROR_NO_SUCH_FIB:
+ return clib_error_return (0, "no such FIB table %d", table_id);
+
+ default:
+ clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
+ break;
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
+ .path = "set ip flow-hash",
+ .short_help =
+ "set ip table flow-hash table <fib-id> src dst sport dport proto reverse",
+ .function = set_ip_flow_hash_command_fn,
+};
+
+int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
+ u32 table_index)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ vnet_interface_main_t * im = &vnm->interface_main;
+ ip4_main_t * ipm = &ip4_main;
+ ip_lookup_main_t * lm = &ipm->lookup_main;
+ vnet_classify_main_t * cm = &vnet_classify_main;
+
+ if (pool_is_free_index (im->sw_interfaces, sw_if_index))
+ return VNET_API_ERROR_NO_MATCHING_INTERFACE;
+
+ if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
+ lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
+
+ return 0;
+}
+
+static clib_error_t *
+set_ip_classify_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u32 table_index = ~0;
+ int table_index_set = 0;
+ u32 sw_if_index = ~0;
+ int rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
+ if (unformat (input, "table-index %d", &table_index))
+ table_index_set = 1;
+ else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
+ vnet_get_main(), &sw_if_index))
+ ;
+ else
+ break;
+ }
+
+ if (table_index_set == 0)
+ return clib_error_return (0, "classify table-index must be specified");
+
+ if (sw_if_index == ~0)
+ return clib_error_return (0, "interface / subif must be specified");
+
+ rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ case VNET_API_ERROR_NO_MATCHING_INTERFACE:
+ return clib_error_return (0, "No such interface");
+
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ return clib_error_return (0, "No such classifier table");
+ }
+ return 0;
+}
+
+VLIB_CLI_COMMAND (set_ip_classify_command, static) = {
+ .path = "set ip classify",
+ .short_help =
+ "set ip classify intfc <int> table-index <index>",
+ .function = set_ip_classify_command_fn,
+};
+
diff --git a/vnet/vnet/ip/ip4_hop_by_hop.c b/vnet/vnet/ip/ip4_hop_by_hop.c
new file mode 100644
index 00000000000..ee2bcc0ae75
--- /dev/null
+++ b/vnet/vnet/ip/ip4_hop_by_hop.c
@@ -0,0 +1,320 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+
+#include <vnet/ip/ip.h>
+
+#include <vppinfra/hash.h>
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+
+typedef struct {
+ /* convenience */
+ vlib_main_t * vlib_main;
+ vnet_main_t * vnet_main;
+} ip4_hop_by_hop_main_t;
+
+ip4_hop_by_hop_main_t ip4_hop_by_hop_main;
+
+vlib_node_registration_t ip4_hop_by_hop_node;
+
+typedef struct {
+ u32 next_index;
+} ip4_hop_by_hop_trace_t;
+
+/* packet trace format function */
+static u8 * format_ip4_hop_by_hop_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ip4_hop_by_hop_trace_t * t = va_arg (*args, ip4_hop_by_hop_trace_t *);
+
+ s = format (s, "IP4_HOP_BY_HOP: next index %d",
+ t->next_index);
+ return s;
+}
+
+vlib_node_registration_t ip4_hop_by_hop_node;
+
+#define foreach_ip4_hop_by_hop_error \
+_(PROCESSED, "Pkts with ip4 hop-by-hop options")
+
+typedef enum {
+#define _(sym,str) IP4_HOP_BY_HOP_ERROR_##sym,
+ foreach_ip4_hop_by_hop_error
+#undef _
+ IP4_HOP_BY_HOP_N_ERROR,
+} ip4_hop_by_hop_error_t;
+
+static char * ip4_hop_by_hop_error_strings[] = {
+#define _(sym,string) string,
+ foreach_ip4_hop_by_hop_error
+#undef _
+};
+
+static uword
+ip4_hop_by_hop_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ ip4_main_t * im = &ip4_main;
+ ip_lookup_main_t * lm = &im->lookup_main;
+ u32 n_left_from, * from, * to_next;
+ ip_lookup_next_t next_index;
+ u32 processed = 0;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+#if 0
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 next0 = IP4_HOP_BY_HOP_NEXT_INTERFACE_OUTPUT;
+ u32 next1 = IP4_HOP_BY_HOP_NEXT_INTERFACE_OUTPUT;
+ u32 sw_if_index0, sw_if_index1;
+ u8 tmp0[6], tmp1[6];
+ ethernet_header_t *en0, *en1;
+ u32 bi0, bi1;
+ vlib_buffer_t * b0, * b1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ /* $$$$$ Dual loop: process 2 x packets here $$$$$ */
+ ASSERT (b0->current_data == 0);
+ ASSERT (b1->current_data == 0);
+
+ ip0 = vlib_buffer_get_current (b0);
+ ip1 = vlib_buffer_get_current (b0);
+
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+ sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
+
+ /* $$$$$ End of processing 2 x packets $$$$$ */
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ ip4_hop_by_hop_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->next_index = next0;
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ ip4_hop_by_hop_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ t->sw_if_index = sw_if_index1;
+ t->next_index = next1;
+ }
+ }
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+#endif
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ u32 next0;
+ u32 adj_index0;
+ ip_adjacency_t * adj0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
+ adj0 = ip_get_adjacency (lm, adj_index0);
+
+ /* $$$$$$$$$$$$ process one (or more) hop-by-hop header(s) here */
+
+
+ /* $$$$$$$$$$$$ */
+
+ /* Send the packet e.g. to ip4_rewrite */
+ next0 = adj0->lookup_next_index;
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ ip4_hop_by_hop_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->next_index = next0;
+ }
+
+ processed++;
+
+ /* $$$$$ Done processing 1 packet here $$$$$ */
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, ip4_hop_by_hop_node.index,
+ IP4_HOP_BY_HOP_ERROR_PROCESSED, processed);
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (ip4_hop_by_hop_node) = {
+ .function = ip4_hop_by_hop_node_fn,
+ .name = "ip4-hop-by-hop",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip4_hop_by_hop_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(ip4_hop_by_hop_error_strings),
+ .error_strings = ip4_hop_by_hop_error_strings,
+
+ /* See ip/lookup.h */
+ .n_next_nodes = IP_LOOKUP_N_NEXT,
+ .next_nodes = {
+ [IP_LOOKUP_NEXT_MISS] = "ip4-miss",
+ [IP_LOOKUP_NEXT_DROP] = "ip4-drop",
+ [IP_LOOKUP_NEXT_PUNT] = "ip4-punt",
+ [IP_LOOKUP_NEXT_LOCAL] = "ip4-local",
+ [IP_LOOKUP_NEXT_ARP] = "ip4-arp",
+ [IP_LOOKUP_NEXT_REWRITE] = "ip4-rewrite-transit",
+ [IP_LOOKUP_NEXT_CLASSIFY] = "ip4-classify",
+ [IP_LOOKUP_NEXT_MAP] = "ip4-map",
+ [IP_LOOKUP_NEXT_MAP_T] = "ip4-map-t",
+ [IP_LOOKUP_NEXT_SIXRD] = "ip4-sixrd",
+ [IP_LOOKUP_NEXT_HOP_BY_HOP] = "ip4-hop-by-hop", /* probably not */
+ [IP_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip4-add-hop-by-hop",
+ [IP_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip4-pop-hop-by-hop",
+ },
+};
+
+VLIB_REGISTER_NODE (ip4_add_hop_by_hop_node) = {
+ .function = ip4_hop_by_hop_node_fn,
+ .name = "ip4-add-hop-by-hop",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip4_hop_by_hop_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(ip4_hop_by_hop_error_strings),
+ .error_strings = ip4_hop_by_hop_error_strings,
+
+ /* See ip/lookup.h */
+ .n_next_nodes = IP_LOOKUP_N_NEXT,
+ .next_nodes = {
+ [IP_LOOKUP_NEXT_MISS] = "ip4-miss",
+ [IP_LOOKUP_NEXT_DROP] = "ip4-drop",
+ [IP_LOOKUP_NEXT_PUNT] = "ip4-punt",
+ [IP_LOOKUP_NEXT_LOCAL] = "ip4-local",
+ [IP_LOOKUP_NEXT_ARP] = "ip4-arp",
+ [IP_LOOKUP_NEXT_REWRITE] = "ip4-rewrite-transit",
+ [IP_LOOKUP_NEXT_CLASSIFY] = "ip4-classify",
+ [IP_LOOKUP_NEXT_MAP] = "ip4-map",
+ [IP_LOOKUP_NEXT_MAP_T] = "ip4-map-t",
+ [IP_LOOKUP_NEXT_SIXRD] = "ip4-sixrd",
+ [IP_LOOKUP_NEXT_HOP_BY_HOP] = "ip4-hop-by-hop", /* probably not */
+ [IP_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip4-add-hop-by-hop",
+ [IP_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip4-pop-hop-by-hop",
+ },
+};
+
+VLIB_REGISTER_NODE (ip4_pop_hop_by_hop_node) = {
+ .function = ip4_hop_by_hop_node_fn,
+ .name = "ip4-pop-hop-by-hop",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip4_hop_by_hop_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(ip4_hop_by_hop_error_strings),
+ .error_strings = ip4_hop_by_hop_error_strings,
+
+ /* See ip/lookup.h */
+ .n_next_nodes = IP_LOOKUP_N_NEXT,
+ .next_nodes = {
+ [IP_LOOKUP_NEXT_MISS] = "ip4-miss",
+ [IP_LOOKUP_NEXT_DROP] = "ip4-drop",
+ [IP_LOOKUP_NEXT_PUNT] = "ip4-punt",
+ [IP_LOOKUP_NEXT_LOCAL] = "ip4-local",
+ [IP_LOOKUP_NEXT_ARP] = "ip4-arp",
+ [IP_LOOKUP_NEXT_REWRITE] = "ip4-rewrite-transit",
+ [IP_LOOKUP_NEXT_CLASSIFY] = "ip4-classify",
+ [IP_LOOKUP_NEXT_MAP] = "ip4-map",
+ [IP_LOOKUP_NEXT_MAP_T] = "ip4-map-t",
+ [IP_LOOKUP_NEXT_SIXRD] = "ip4-sixrd",
+ [IP_LOOKUP_NEXT_SIXRD] = "ip4-sixrd",
+ [IP_LOOKUP_NEXT_HOP_BY_HOP] = "ip4-hop-by-hop", /* probably not */
+ [IP_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip4-add-hop-by-hop",
+ [IP_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip4-pop-hop-by-hop",
+ },
+};
+
+static clib_error_t *
+ip4_hop_by_hop_init (vlib_main_t * vm)
+{
+ ip4_hop_by_hop_main_t * hm = &ip4_hop_by_hop_main;
+
+ hm->vlib_main = vm;
+ hm->vnet_main = vnet_get_main();
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (ip4_hop_by_hop_init);
diff --git a/vnet/vnet/ip/ip4_input.c b/vnet/vnet/ip/ip4_input.c
new file mode 100644
index 00000000000..68edc0fa918
--- /dev/null
+++ b/vnet/vnet/ip/ip4_input.c
@@ -0,0 +1,423 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip4_input.c: IP v4 input node
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ppp/ppp.h>
+#include <vnet/hdlc/hdlc.h>
+
+typedef struct {
+ u8 packet_data[64];
+} ip4_input_trace_t;
+
+static u8 * format_ip4_input_trace (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ ip4_input_trace_t * t = va_arg (*va, ip4_input_trace_t *);
+
+ s = format (s, "%U",
+ format_ip4_header,
+ t->packet_data, sizeof (t->packet_data));
+
+ return s;
+}
+
+typedef enum {
+ IP4_INPUT_NEXT_DROP,
+ IP4_INPUT_NEXT_PUNT,
+ IP4_INPUT_NEXT_LOOKUP,
+ IP4_INPUT_NEXT_LOOKUP_MULTICAST,
+ IP4_INPUT_NEXT_TTL_EXPIRE,
+ IP4_INPUT_N_NEXT,
+} ip4_input_next_t;
+
+/* Validate IP v4 packets and pass them either to forwarding code
+ or drop/punt exception packets. */
+always_inline uword
+ip4_input_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ int verify_checksum)
+{
+ ip4_main_t * im = &ip4_main;
+ vnet_main_t * vnm = vnet_get_main();
+ ip_lookup_main_t * lm = &im->lookup_main;
+ u32 n_left_from, * from, * to_next;
+ ip4_input_next_t next_index;
+ vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
+ vlib_simple_counter_main_t * cm;
+ u32 cpu_index = os_get_cpu_number();
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1,
+ sizeof (ip4_input_trace_t));
+
+ cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
+ VNET_INTERFACE_COUNTER_IP4);
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ vlib_buffer_t * p0, * p1;
+ ip4_header_t * ip0, * ip1;
+ ip_config_main_t * cm0, * cm1;
+ u32 sw_if_index0, pi0, ip_len0, cur_len0, next0;
+ u32 sw_if_index1, pi1, ip_len1, cur_len1, next1;
+ i32 len_diff0, len_diff1;
+ u8 error0, error1, cast0, cast1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
+ CLIB_PREFETCH (p3->data, sizeof (ip1[0]), LOAD);
+ }
+
+ to_next[0] = pi0 = from[0];
+ to_next[1] = pi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+
+ sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+ sw_if_index1 = vnet_buffer (p1)->sw_if_index[VLIB_RX];
+
+ cast0 = ip4_address_is_multicast (&ip0->dst_address) ? VNET_MULTICAST : VNET_UNICAST;
+ cast1 = ip4_address_is_multicast (&ip1->dst_address) ? VNET_MULTICAST : VNET_UNICAST;
+
+ cm0 = lm->rx_config_mains + cast0;
+ cm1 = lm->rx_config_mains + cast1;
+
+ vnet_buffer (p0)->ip.current_config_index = vec_elt (cm0->config_index_by_sw_if_index, sw_if_index0);
+ vnet_buffer (p1)->ip.current_config_index = vec_elt (cm1->config_index_by_sw_if_index, sw_if_index1);
+
+ vnet_buffer (p0)->ip.adj_index[VLIB_RX] = ~0;
+ vnet_buffer (p1)->ip.adj_index[VLIB_RX] = ~0;
+
+ vnet_get_config_data (&cm0->config_main,
+ &vnet_buffer (p0)->ip.current_config_index,
+ &next0,
+ /* # bytes of config data */ 0);
+ vnet_get_config_data (&cm1->config_main,
+ &vnet_buffer (p1)->ip.current_config_index,
+ &next1,
+ /* # bytes of config data */ 0);
+
+ vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1);
+ vlib_increment_simple_counter (cm, cpu_index, sw_if_index1, 1);
+
+ error0 = error1 = IP4_ERROR_NONE;
+
+ /* Punt packets with options. */
+ error0 = (ip0->ip_version_and_header_length & 0xf) != 5 ? IP4_ERROR_OPTIONS : error0;
+ error1 = (ip1->ip_version_and_header_length & 0xf) != 5 ? IP4_ERROR_OPTIONS : error1;
+
+ /* Version != 4? Drop it. */
+ error0 = (ip0->ip_version_and_header_length >> 4) != 4 ? IP4_ERROR_VERSION : error0;
+ error1 = (ip1->ip_version_and_header_length >> 4) != 4 ? IP4_ERROR_VERSION : error1;
+
+ /* Verify header checksum. */
+ if (verify_checksum)
+ {
+ ip_csum_t sum0, sum1;
+
+ ip4_partial_header_checksum_x1 (ip0, sum0);
+ ip4_partial_header_checksum_x1 (ip1, sum1);
+
+ error0 = 0xffff != ip_csum_fold (sum0) ? IP4_ERROR_BAD_CHECKSUM : error0;
+ error1 = 0xffff != ip_csum_fold (sum1) ? IP4_ERROR_BAD_CHECKSUM : error1;
+ }
+
+ /* Drop fragmentation offset 1 packets. */
+ error0 = ip4_get_fragment_offset (ip0) == 1 ? IP4_ERROR_FRAGMENT_OFFSET_ONE : error0;
+ error1 = ip4_get_fragment_offset (ip1) == 1 ? IP4_ERROR_FRAGMENT_OFFSET_ONE : error1;
+
+ /* TTL <= 1? Drop it. */
+ error0 = (ip0->ttl <= 1 && cast0 == VNET_UNICAST) ? IP4_ERROR_TIME_EXPIRED : error0;
+ error1 = (ip1->ttl <= 1 && cast1 == VNET_UNICAST) ? IP4_ERROR_TIME_EXPIRED : error1;
+
+ /* Verify lengths. */
+ ip_len0 = clib_net_to_host_u16 (ip0->length);
+ ip_len1 = clib_net_to_host_u16 (ip1->length);
+
+ /* IP length must be at least minimal IP header. */
+ error0 = ip_len0 < sizeof (ip0[0]) ? IP4_ERROR_TOO_SHORT : error0;
+ error1 = ip_len1 < sizeof (ip1[0]) ? IP4_ERROR_TOO_SHORT : error1;
+
+ cur_len0 = vlib_buffer_length_in_chain (vm, p0);
+ cur_len1 = vlib_buffer_length_in_chain (vm, p1);
+
+ len_diff0 = cur_len0 - ip_len0;
+ len_diff1 = cur_len1 - ip_len1;
+
+ error0 = len_diff0 < 0 ? IP4_ERROR_BAD_LENGTH : error0;
+ error1 = len_diff1 < 0 ? IP4_ERROR_BAD_LENGTH : error1;
+
+ p0->error = error_node->errors[error0];
+ p1->error = error_node->errors[error1];
+
+ if (PREDICT_FALSE(error0 != IP4_ERROR_NONE))
+ {
+ next0 = (error0 != IP4_ERROR_OPTIONS
+ ? (error0 == IP4_ERROR_TIME_EXPIRED
+ ? IP4_INPUT_NEXT_TTL_EXPIRE
+ : IP4_INPUT_NEXT_DROP)
+ : IP4_INPUT_NEXT_PUNT);
+ }
+ if (PREDICT_FALSE(error1 != IP4_ERROR_NONE))
+ {
+ next1 = (error1 != IP4_ERROR_OPTIONS
+ ? (error1 == IP4_ERROR_TIME_EXPIRED
+ ? IP4_INPUT_NEXT_TTL_EXPIRE
+ : IP4_INPUT_NEXT_DROP)
+ : IP4_INPUT_NEXT_PUNT);
+ }
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, pi1, next0, next1);
+ }
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t * p0;
+ ip4_header_t * ip0;
+ ip_config_main_t * cm0;
+ u32 sw_if_index0, pi0, ip_len0, cur_len0, next0;
+ i32 len_diff0;
+ u8 error0, cast0;
+
+ pi0 = from[0];
+ to_next[0] = pi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ ip0 = vlib_buffer_get_current (p0);
+
+ sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+
+ cast0 = ip4_address_is_multicast (&ip0->dst_address) ? VNET_MULTICAST : VNET_UNICAST;
+ cm0 = lm->rx_config_mains + cast0;
+ vnet_buffer (p0)->ip.current_config_index = vec_elt (cm0->config_index_by_sw_if_index, sw_if_index0);
+ vnet_buffer (p0)->ip.adj_index[VLIB_RX] = ~0;
+ vnet_get_config_data (&cm0->config_main,
+ &vnet_buffer (p0)->ip.current_config_index,
+ &next0,
+ /* # bytes of config data */ 0);
+
+ vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1);
+
+ error0 = IP4_ERROR_NONE;
+
+ /* Punt packets with options. */
+ error0 = (ip0->ip_version_and_header_length & 0xf) != 5 ? IP4_ERROR_OPTIONS : error0;
+
+ /* Version != 4? Drop it. */
+ error0 = (ip0->ip_version_and_header_length >> 4) != 4 ? IP4_ERROR_VERSION : error0;
+
+ /* Verify header checksum. */
+ if (verify_checksum)
+ {
+ ip_csum_t sum0;
+
+ ip4_partial_header_checksum_x1 (ip0, sum0);
+ error0 = 0xffff != ip_csum_fold (sum0) ? IP4_ERROR_BAD_CHECKSUM : error0;
+ }
+
+ /* Drop fragmentation offset 1 packets. */
+ error0 = ip4_get_fragment_offset (ip0) == 1 ? IP4_ERROR_FRAGMENT_OFFSET_ONE : error0;
+
+ /* TTL <= 1? Drop it. */
+ error0 = (ip0->ttl <= 1 && cast0 == VNET_UNICAST) ? IP4_ERROR_TIME_EXPIRED : error0;
+
+ /* Verify lengths. */
+ ip_len0 = clib_net_to_host_u16 (ip0->length);
+
+ /* IP length must be at least minimal IP header. */
+ error0 = ip_len0 < sizeof (ip0[0]) ? IP4_ERROR_TOO_SHORT : error0;
+
+ cur_len0 = vlib_buffer_length_in_chain (vm, p0);
+ len_diff0 = cur_len0 - ip_len0;
+ error0 = len_diff0 < 0 ? IP4_ERROR_BAD_LENGTH : error0;
+
+ p0->error = error_node->errors[error0];
+ if (PREDICT_FALSE(error0 != IP4_ERROR_NONE))
+ {
+ next0 = (error0 != IP4_ERROR_OPTIONS
+ ? (error0 == IP4_ERROR_TIME_EXPIRED
+ ? IP4_INPUT_NEXT_TTL_EXPIRE
+ : IP4_INPUT_NEXT_DROP)
+ : IP4_INPUT_NEXT_PUNT);
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+static uword
+ip4_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return ip4_input_inline (vm, node, frame, /* verify_checksum */ 1);
+}
+
+static uword
+ip4_input_no_checksum (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return ip4_input_inline (vm, node, frame, /* verify_checksum */ 0);
+}
+
+static char * ip4_error_strings[] = {
+#define _(sym,string) string,
+ foreach_ip4_error
+#undef _
+};
+
+VLIB_REGISTER_NODE (ip4_input_node) = {
+ .function = ip4_input,
+ .name = "ip4-input",
+ .vector_size = sizeof (u32),
+
+ .n_errors = IP4_N_ERROR,
+ .error_strings = ip4_error_strings,
+
+ .n_next_nodes = IP4_INPUT_N_NEXT,
+ .next_nodes = {
+ [IP4_INPUT_NEXT_DROP] = "error-drop",
+ [IP4_INPUT_NEXT_PUNT] = "error-punt",
+ [IP4_INPUT_NEXT_LOOKUP] = "ip4-lookup",
+ [IP4_INPUT_NEXT_LOOKUP_MULTICAST] = "ip4-lookup-multicast",
+ [IP4_INPUT_NEXT_TTL_EXPIRE] = "ip4-icmp-ttl-expire",
+ },
+
+ .format_buffer = format_ip4_header,
+ .format_trace = format_ip4_input_trace,
+};
+
+VLIB_REGISTER_NODE (ip4_input_no_checksum_node,static) = {
+ .function = ip4_input_no_checksum,
+ .name = "ip4-input-no-checksum",
+ .vector_size = sizeof (u32),
+
+ .n_next_nodes = IP4_INPUT_N_NEXT,
+ .next_nodes = {
+ [IP4_INPUT_NEXT_DROP] = "error-drop",
+ [IP4_INPUT_NEXT_PUNT] = "error-punt",
+ [IP4_INPUT_NEXT_LOOKUP] = "ip4-lookup",
+ [IP4_INPUT_NEXT_LOOKUP_MULTICAST] = "ip4-lookup-multicast",
+ [IP4_INPUT_NEXT_TTL_EXPIRE] = "ip4-icmp-ttl-expire",
+ },
+
+ .format_buffer = format_ip4_header,
+ .format_trace = format_ip4_input_trace,
+};
+
+static clib_error_t * ip4_init (vlib_main_t * vm)
+{
+ clib_error_t * error;
+
+ ethernet_register_input_type (vm, ETHERNET_TYPE_IP4,
+ ip4_input_node.index);
+ ppp_register_input_protocol (vm, PPP_PROTOCOL_ip4,
+ ip4_input_node.index);
+ hdlc_register_input_protocol (vm, HDLC_PROTOCOL_ip4,
+ ip4_input_node.index);
+
+ {
+ pg_node_t * pn;
+ pn = pg_get_node (ip4_input_node.index);
+ pn->unformat_edit = unformat_pg_ip4_header;
+ pn = pg_get_node (ip4_input_no_checksum_node.index);
+ pn->unformat_edit = unformat_pg_ip4_header;
+ }
+
+ if ((error = vlib_call_init_function (vm, ip4_cli_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, ip4_source_check_init)))
+ return error;
+
+ /* Set flow hash to something non-zero. */
+ ip4_main.flow_hash_seed = 0xdeadbeef;
+
+ /* Default TTL for packets we generate. */
+ ip4_main.host_config.ttl = 64;
+
+ return error;
+}
+
+VLIB_INIT_FUNCTION (ip4_init);
diff --git a/vnet/vnet/ip/ip4_mtrie.c b/vnet/vnet/ip/ip4_mtrie.c
new file mode 100644
index 00000000000..ed4a0d9f44f
--- /dev/null
+++ b/vnet/vnet/ip/ip4_mtrie.c
@@ -0,0 +1,561 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip4_fib.h: ip4 mtrie fib
+ *
+ * Copyright (c) 2012 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+
+static void
+ply_init (ip4_fib_mtrie_ply_t * p, ip4_fib_mtrie_leaf_t init, uword prefix_len)
+{
+ p->n_non_empty_leafs = ip4_fib_mtrie_leaf_is_empty (init) ? 0 : ARRAY_LEN (p->leaves);
+ memset (p->dst_address_bits_of_leaves, prefix_len, sizeof (p->dst_address_bits_of_leaves));
+
+ /* Initialize leaves. */
+#ifdef CLIB_HAVE_VEC128
+ {
+ u32x4 * l, init_x4;
+
+#ifndef __ALTIVEC__
+ init_x4 = u32x4_splat (init);
+#else
+ {
+ u32x4_union_t y;
+ y.as_u32[0] = init;
+ y.as_u32[1] = init;
+ y.as_u32[2] = init;
+ y.as_u32[3] = init;
+ init_x4 = y.as_u32x4;
+ }
+#endif
+
+ for (l = p->leaves_as_u32x4; l < p->leaves_as_u32x4 + ARRAY_LEN (p->leaves_as_u32x4); l += 4)
+ {
+ l[0] = init_x4;
+ l[1] = init_x4;
+ l[2] = init_x4;
+ l[3] = init_x4;
+ }
+ }
+#else
+ {
+ u32 * l;
+
+ for (l = p->leaves; l < p->leaves + ARRAY_LEN (p->leaves); l += 4)
+ {
+ l[0] = init;
+ l[1] = init;
+ l[2] = init;
+ l[3] = init;
+ }
+ }
+#endif
+}
+
+static ip4_fib_mtrie_leaf_t
+ply_create (ip4_fib_mtrie_t * m, ip4_fib_mtrie_leaf_t init_leaf, uword prefix_len)
+{
+ ip4_fib_mtrie_ply_t * p;
+
+ /* Get cache aligned ply. */
+ pool_get_aligned (m->ply_pool, p, sizeof (p[0]));
+
+ ply_init (p, init_leaf, prefix_len);
+ return ip4_fib_mtrie_leaf_set_next_ply_index (p - m->ply_pool);
+}
+
+always_inline ip4_fib_mtrie_ply_t *
+get_next_ply_for_leaf (ip4_fib_mtrie_t * m, ip4_fib_mtrie_leaf_t l)
+{
+ uword n = ip4_fib_mtrie_leaf_get_next_ply_index (l);
+ /* It better not be the root ply. */
+ ASSERT (n != 0);
+ return pool_elt_at_index (m->ply_pool, n);
+}
+
+static void
+ply_free (ip4_fib_mtrie_t * m, ip4_fib_mtrie_ply_t * p)
+{
+ uword i, is_root;
+
+ is_root = p - m->ply_pool == 0;
+
+ for (i = 0 ; i < ARRAY_LEN (p->leaves); i++)
+ {
+ ip4_fib_mtrie_leaf_t l = p->leaves[i];
+ if (ip4_fib_mtrie_leaf_is_next_ply (l))
+ ply_free (m, get_next_ply_for_leaf (m, l));
+ }
+
+ if (is_root)
+ ply_init (p, IP4_FIB_MTRIE_LEAF_EMPTY, /* prefix_len */ 0);
+ else
+ pool_put (m->ply_pool, p);
+}
+
+void ip4_fib_free (ip4_fib_mtrie_t * m)
+{
+ ip4_fib_mtrie_ply_t * root_ply = pool_elt_at_index (m->ply_pool, 0);
+ ply_free (m, root_ply);
+}
+
+u32 ip4_mtrie_lookup_address (ip4_fib_mtrie_t * m, ip4_address_t dst)
+{
+ ip4_fib_mtrie_ply_t * p = pool_elt_at_index (m->ply_pool, 0);
+ ip4_fib_mtrie_leaf_t l;
+
+ l = p->leaves[dst.as_u8[0]];
+ if (ip4_fib_mtrie_leaf_is_terminal (l))
+ return ip4_fib_mtrie_leaf_get_adj_index (l);
+
+ p = get_next_ply_for_leaf (m, l);
+ l = p->leaves[dst.as_u8[1]];
+ if (ip4_fib_mtrie_leaf_is_terminal (l))
+ return ip4_fib_mtrie_leaf_get_adj_index (l);
+
+ p = get_next_ply_for_leaf (m, l);
+ l = p->leaves[dst.as_u8[2]];
+ if (ip4_fib_mtrie_leaf_is_terminal (l))
+ return ip4_fib_mtrie_leaf_get_adj_index (l);
+
+ p = get_next_ply_for_leaf (m, l);
+ l = p->leaves[dst.as_u8[3]];
+
+ ASSERT (ip4_fib_mtrie_leaf_is_terminal (l));
+ return ip4_fib_mtrie_leaf_get_adj_index (l);
+}
+
+typedef struct {
+ ip4_address_t dst_address;
+ u32 dst_address_length;
+ u32 adj_index;
+} ip4_fib_mtrie_set_unset_leaf_args_t;
+
+static void
+set_ply_with_more_specific_leaf (ip4_fib_mtrie_t * m,
+ ip4_fib_mtrie_ply_t * ply,
+ ip4_fib_mtrie_leaf_t new_leaf,
+ uword new_leaf_dst_address_bits)
+{
+ ip4_fib_mtrie_leaf_t old_leaf;
+ uword i;
+
+ ASSERT (ip4_fib_mtrie_leaf_is_terminal (new_leaf));
+ ASSERT (! ip4_fib_mtrie_leaf_is_empty (new_leaf));
+
+ for (i = 0; i < ARRAY_LEN (ply->leaves); i++)
+ {
+ old_leaf = ply->leaves[i];
+
+ /* Recurse into sub plies. */
+ if (! ip4_fib_mtrie_leaf_is_terminal (old_leaf))
+ {
+ ip4_fib_mtrie_ply_t * sub_ply = get_next_ply_for_leaf (m, old_leaf);
+ set_ply_with_more_specific_leaf (m, sub_ply, new_leaf, new_leaf_dst_address_bits);
+ }
+
+ /* Replace less specific terminal leaves with new leaf. */
+ else if (new_leaf_dst_address_bits >= ply->dst_address_bits_of_leaves[i])
+ {
+ ply->leaves[i] = new_leaf;
+ ply->dst_address_bits_of_leaves[i] = new_leaf_dst_address_bits;
+ ply->n_non_empty_leafs += ip4_fib_mtrie_leaf_is_empty (old_leaf);
+ }
+ }
+}
+
+static void
+set_leaf (ip4_fib_mtrie_t * m,
+ ip4_fib_mtrie_set_unset_leaf_args_t * a,
+ u32 old_ply_index,
+ u32 dst_address_byte_index)
+{
+ ip4_fib_mtrie_leaf_t old_leaf, new_leaf;
+ i32 n_dst_bits_next_plies;
+ u8 dst_byte;
+
+ ASSERT (a->dst_address_length > 0 && a->dst_address_length <= 32);
+ ASSERT (dst_address_byte_index < ARRAY_LEN (a->dst_address.as_u8));
+
+ n_dst_bits_next_plies = a->dst_address_length - BITS (u8) * (dst_address_byte_index + 1);
+
+ dst_byte = a->dst_address.as_u8[dst_address_byte_index];
+
+ /* Number of bits next plies <= 0 => insert leaves this ply. */
+ if (n_dst_bits_next_plies <= 0)
+ {
+ uword i, n_dst_bits_this_ply, old_leaf_is_terminal;
+
+ n_dst_bits_this_ply = -n_dst_bits_next_plies;
+ ASSERT ((a->dst_address.as_u8[dst_address_byte_index] & pow2_mask (n_dst_bits_this_ply)) == 0);
+
+ for (i = dst_byte; i < dst_byte + (1 << n_dst_bits_this_ply); i++)
+ {
+ ip4_fib_mtrie_ply_t * old_ply, * new_ply;
+
+ old_ply = pool_elt_at_index (m->ply_pool, old_ply_index);
+
+ old_leaf = old_ply->leaves[i];
+ old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf);
+
+ /* Is leaf to be inserted more specific? */
+ if (a->dst_address_length >= old_ply->dst_address_bits_of_leaves[i])
+ {
+ new_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index);
+
+ if (old_leaf_is_terminal)
+ {
+ old_ply->dst_address_bits_of_leaves[i] = a->dst_address_length;
+ old_ply->leaves[i] = new_leaf;
+ old_ply->n_non_empty_leafs += ip4_fib_mtrie_leaf_is_empty (old_leaf);
+ ASSERT (old_ply->n_non_empty_leafs <= ARRAY_LEN (old_ply->leaves));
+ }
+ else
+ {
+ /* Existing leaf points to another ply. We need to place new_leaf into all
+ more specific slots. */
+ new_ply = get_next_ply_for_leaf (m, old_leaf);
+ set_ply_with_more_specific_leaf (m, new_ply, new_leaf, a->dst_address_length);
+ }
+ }
+
+ else if (! old_leaf_is_terminal)
+ {
+ new_ply = get_next_ply_for_leaf (m, old_leaf);
+ set_leaf (m, a, new_ply - m->ply_pool, dst_address_byte_index + 1);
+ }
+ }
+ }
+ else
+ {
+ ip4_fib_mtrie_ply_t * old_ply, * new_ply;
+
+ old_ply = pool_elt_at_index (m->ply_pool, old_ply_index);
+ old_leaf = old_ply->leaves[dst_byte];
+ if (ip4_fib_mtrie_leaf_is_terminal (old_leaf))
+ {
+ new_leaf = ply_create (m, old_leaf, old_ply->dst_address_bits_of_leaves[dst_byte]);
+ new_ply = get_next_ply_for_leaf (m, new_leaf);
+
+ /* Refetch since ply_create may move pool. */
+ old_ply = pool_elt_at_index (m->ply_pool, old_ply_index);
+
+ old_ply->leaves[dst_byte] = new_leaf;
+ old_ply->dst_address_bits_of_leaves[dst_byte] = 0;
+
+ old_ply->n_non_empty_leafs -= ip4_fib_mtrie_leaf_is_non_empty (old_leaf);
+ ASSERT (old_ply->n_non_empty_leafs >= 0);
+
+ /* Account for the ply we just created. */
+ old_ply->n_non_empty_leafs += 1;
+ }
+ else
+ new_ply = get_next_ply_for_leaf (m, old_leaf);
+
+ set_leaf (m, a, new_ply - m->ply_pool, dst_address_byte_index + 1);
+ }
+}
+
+static uword
+unset_leaf (ip4_fib_mtrie_t * m,
+ ip4_fib_mtrie_set_unset_leaf_args_t * a,
+ ip4_fib_mtrie_ply_t * old_ply,
+ u32 dst_address_byte_index)
+{
+ ip4_fib_mtrie_leaf_t old_leaf, del_leaf;
+ i32 n_dst_bits_next_plies;
+ uword i, n_dst_bits_this_ply, old_leaf_is_terminal;
+ u8 dst_byte;
+
+ ASSERT (a->dst_address_length > 0 && a->dst_address_length <= 32);
+ ASSERT (dst_address_byte_index < ARRAY_LEN (a->dst_address.as_u8));
+
+ n_dst_bits_next_plies = a->dst_address_length - BITS (u8) * (dst_address_byte_index + 1);
+
+ dst_byte = a->dst_address.as_u8[dst_address_byte_index];
+ if (n_dst_bits_next_plies < 0)
+ dst_byte &= ~pow2_mask (-n_dst_bits_next_plies);
+
+ n_dst_bits_this_ply = n_dst_bits_next_plies <= 0 ? -n_dst_bits_next_plies : 0;
+ n_dst_bits_this_ply = clib_min (8, n_dst_bits_this_ply);
+
+ del_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index);
+
+ for (i = dst_byte; i < dst_byte + (1 << n_dst_bits_this_ply); i++)
+ {
+ old_leaf = old_ply->leaves[i];
+ old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf);
+
+ if (old_leaf == del_leaf
+ || (! old_leaf_is_terminal
+ && unset_leaf (m, a, get_next_ply_for_leaf (m, old_leaf), dst_address_byte_index + 1)))
+ {
+ old_ply->leaves[i] = IP4_FIB_MTRIE_LEAF_EMPTY;
+ old_ply->dst_address_bits_of_leaves[i] = 0;
+
+ /* No matter what we just deleted a non-empty leaf. */
+ ASSERT (! ip4_fib_mtrie_leaf_is_empty (old_leaf));
+ old_ply->n_non_empty_leafs -= 1;
+
+ ASSERT (old_ply->n_non_empty_leafs >= 0);
+ if (old_ply->n_non_empty_leafs == 0 && dst_address_byte_index > 0)
+ {
+ pool_put (m->ply_pool, old_ply);
+ /* Old ply was deleted. */
+ return 1;
+ }
+ }
+ }
+
+ /* Old ply was not deleted. */
+ return 0;
+}
+
+void ip4_mtrie_init (ip4_fib_mtrie_t * m)
+{
+ ip4_fib_mtrie_leaf_t root;
+ memset (m, 0, sizeof (m[0]));
+ m->default_leaf = IP4_FIB_MTRIE_LEAF_EMPTY;
+ root = ply_create (m, IP4_FIB_MTRIE_LEAF_EMPTY, /* dst_address_bits_of_leaves */ 0);
+ ASSERT (ip4_fib_mtrie_leaf_get_next_ply_index (root) == 0);
+}
+
+void
+ip4_fib_mtrie_add_del_route (ip4_fib_t * fib,
+ ip4_address_t dst_address,
+ u32 dst_address_length,
+ u32 adj_index,
+ u32 is_del)
+{
+ ip4_fib_mtrie_t * m = &fib->mtrie;
+ ip4_fib_mtrie_ply_t * root_ply;
+ ip4_fib_mtrie_set_unset_leaf_args_t a;
+ ip4_main_t * im = &ip4_main;
+
+ ASSERT(m->ply_pool != 0);
+
+ root_ply = pool_elt_at_index (m->ply_pool, 0);
+
+ /* Honor dst_address_length. Fib masks are in network byte order */
+ dst_address.as_u32 &= im->fib_masks[dst_address_length];
+ a.dst_address = dst_address;
+ a.dst_address_length = dst_address_length;
+ a.adj_index = adj_index;
+
+ if (! is_del)
+ {
+ if (dst_address_length == 0)
+ m->default_leaf = ip4_fib_mtrie_leaf_set_adj_index (adj_index);
+ else
+ set_leaf (m, &a, /* ply_index */ 0, /* dst_address_byte_index */ 0);
+ }
+ else
+ {
+ if (dst_address_length == 0)
+ m->default_leaf = IP4_FIB_MTRIE_LEAF_EMPTY;
+
+ else
+ {
+ ip4_main_t * im = &ip4_main;
+ uword i;
+
+ unset_leaf (m, &a, root_ply, 0);
+
+ /* Find next less specific route and insert into mtrie. */
+ for (i = ARRAY_LEN (fib->adj_index_by_dst_address) - 1; i >= 1; i--)
+ {
+ uword * p;
+ ip4_address_t key;
+
+ if (! fib->adj_index_by_dst_address[i])
+ continue;
+
+ key.as_u32 = dst_address.as_u32 & im->fib_masks[i];
+ p = hash_get (fib->adj_index_by_dst_address[i], key.as_u32);
+ if (p)
+ {
+ a.dst_address = key;
+ a.dst_address_length = i;
+ a.adj_index = p[0];
+ set_leaf (m, &a, /* ply_index */ 0, /* dst_address_byte_index */ 0);
+ break;
+ }
+ }
+ }
+ }
+}
+
+always_inline uword
+maybe_remap_leaf (ip_lookup_main_t * lm, ip4_fib_mtrie_leaf_t * p)
+{
+ ip4_fib_mtrie_leaf_t l = p[0];
+ uword was_remapped_to_empty_leaf = 0;
+ if (ip4_fib_mtrie_leaf_is_terminal (l))
+ {
+ u32 adj_index = ip4_fib_mtrie_leaf_get_adj_index (l);
+ u32 m = vec_elt (lm->adjacency_remap_table, adj_index);
+ if (m)
+ {
+ was_remapped_to_empty_leaf = m == ~0;
+ if (was_remapped_to_empty_leaf)
+ p[0] = (was_remapped_to_empty_leaf
+ ? IP4_FIB_MTRIE_LEAF_EMPTY
+ : ip4_fib_mtrie_leaf_set_adj_index (m - 1));
+ }
+ }
+ return was_remapped_to_empty_leaf;
+}
+
+static void maybe_remap_ply (ip_lookup_main_t * lm, ip4_fib_mtrie_ply_t * ply)
+{
+ u32 n_remapped_to_empty = 0;
+ u32 i;
+ for (i = 0; i < ARRAY_LEN (ply->leaves); i++)
+ n_remapped_to_empty += maybe_remap_leaf (lm, &ply->leaves[i]);
+ if (n_remapped_to_empty > 0)
+ {
+ ASSERT (n_remapped_to_empty <= ply->n_non_empty_leafs);
+ ply->n_non_empty_leafs -= n_remapped_to_empty;
+ if (ply->n_non_empty_leafs == 0)
+ os_panic ();
+ }
+}
+
+void ip4_mtrie_maybe_remap_adjacencies (ip_lookup_main_t * lm, ip4_fib_mtrie_t * m)
+{
+ ip4_fib_mtrie_ply_t * ply;
+ pool_foreach (ply, m->ply_pool, maybe_remap_ply (lm, ply));
+ maybe_remap_leaf (lm, &m->default_leaf);
+}
+
+/* Returns number of bytes of memory used by mtrie. */
+static uword mtrie_memory_usage (ip4_fib_mtrie_t * m, ip4_fib_mtrie_ply_t * p)
+{
+ uword bytes, i;
+
+ if (! p)
+ {
+ if (pool_is_free_index (m->ply_pool, 0))
+ return 0;
+ p = pool_elt_at_index (m->ply_pool, 0);
+ }
+
+ bytes = sizeof (p[0]);
+ for (i = 0 ; i < ARRAY_LEN (p->leaves); i++)
+ {
+ ip4_fib_mtrie_leaf_t l = p->leaves[i];
+ if (ip4_fib_mtrie_leaf_is_next_ply (l))
+ bytes += mtrie_memory_usage (m, get_next_ply_for_leaf (m, l));
+ }
+
+ return bytes;
+}
+
+static u8 * format_ip4_fib_mtrie_leaf (u8 * s, va_list * va)
+{
+ ip4_fib_mtrie_leaf_t l = va_arg (*va, ip4_fib_mtrie_leaf_t);
+
+ if (ip4_fib_mtrie_leaf_is_empty (l))
+ s = format (s, "miss");
+ else if (ip4_fib_mtrie_leaf_is_terminal (l))
+ s = format (s, "adj %d", ip4_fib_mtrie_leaf_get_adj_index (l));
+ else
+ s = format (s, "next ply %d", ip4_fib_mtrie_leaf_get_next_ply_index (l));
+ return s;
+}
+
+static u8 * format_ip4_fib_mtrie_ply (u8 * s, va_list * va)
+{
+ ip4_fib_mtrie_t * m = va_arg (*va, ip4_fib_mtrie_t *);
+ u32 base_address = va_arg (*va, u32);
+ u32 ply_index = va_arg (*va, u32);
+ u32 dst_address_byte_index = va_arg (*va, u32);
+ ip4_fib_mtrie_ply_t * p;
+ uword i, indent;
+
+ p = pool_elt_at_index (m->ply_pool, ply_index);
+ indent = format_get_indent (s);
+ s = format (s, "ply index %d, %d non-empty leaves", ply_index, p->n_non_empty_leafs);
+ for (i = 0; i < ARRAY_LEN (p->leaves); i++)
+ {
+ ip4_fib_mtrie_leaf_t l = p->leaves[i];
+
+ if (! ip4_fib_mtrie_leaf_is_empty (l))
+ {
+ u32 a, ia_length;
+ ip4_address_t ia;
+
+ a = base_address + (i << (24 - 8*dst_address_byte_index));
+ ia.as_u32 = clib_host_to_net_u32 (a);
+ if (ip4_fib_mtrie_leaf_is_terminal (l))
+ ia_length = p->dst_address_bits_of_leaves[i];
+ else
+ ia_length = 8*(1 + dst_address_byte_index);
+ s = format (s, "\n%U%20U %U",
+ format_white_space, indent + 2,
+ format_ip4_address_and_length, &ia, ia_length,
+ format_ip4_fib_mtrie_leaf, l);
+
+ if (ip4_fib_mtrie_leaf_is_next_ply (l))
+ s = format (s, "\n%U%U",
+ format_white_space, indent + 2,
+ format_ip4_fib_mtrie_ply, m, a,
+ ip4_fib_mtrie_leaf_get_next_ply_index (l),
+ dst_address_byte_index + 1);
+ }
+ }
+
+ return s;
+}
+
+u8 * format_ip4_fib_mtrie (u8 * s, va_list * va)
+{
+ ip4_fib_mtrie_t * m = va_arg (*va, ip4_fib_mtrie_t *);
+
+ s = format (s, "%d plies, memory usage %U",
+ pool_elts (m->ply_pool),
+ format_memory_size, mtrie_memory_usage (m, 0));
+
+ if (pool_elts (m->ply_pool) > 0)
+ {
+ ip4_address_t base_address;
+ base_address.as_u32 = 0;
+ s = format (s, "\n %U", format_ip4_fib_mtrie_ply, m, base_address, 0, 0);
+ }
+
+ return s;
+}
diff --git a/vnet/vnet/ip/ip4_mtrie.h b/vnet/vnet/ip/ip4_mtrie.h
new file mode 100644
index 00000000000..31de41e14fa
--- /dev/null
+++ b/vnet/vnet/ip/ip4_mtrie.h
@@ -0,0 +1,161 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip4_fib.h: ip4 mtrie fib
+ *
+ * Copyright (c) 2012 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_ip_ip4_fib_h
+#define included_ip_ip4_fib_h
+
+#include <vppinfra/cache.h>
+#include <vppinfra/vector.h>
+#include <vnet/ip/lookup.h>
+#include <vnet/ip/ip4_packet.h> /* for ip4_address_t */
+
+/* ip4 fib leafs: 4 ply 8-8-8-8 mtrie.
+ 1 + 2*adj_index for terminal leaves.
+ 0 + 2*next_ply_index for non-terminals.
+ 1 => empty (adjacency index of zero is special miss adjacency). */
+typedef u32 ip4_fib_mtrie_leaf_t;
+
+#define IP4_FIB_MTRIE_LEAF_EMPTY (1 + 2*IP_LOOKUP_MISS_ADJ_INDEX)
+#define IP4_FIB_MTRIE_LEAF_ROOT (0 + 2*0)
+
+always_inline u32 ip4_fib_mtrie_leaf_is_empty (ip4_fib_mtrie_leaf_t n)
+{ return n == IP4_FIB_MTRIE_LEAF_EMPTY; }
+
+always_inline u32 ip4_fib_mtrie_leaf_is_non_empty (ip4_fib_mtrie_leaf_t n)
+{ return n != IP4_FIB_MTRIE_LEAF_EMPTY; }
+
+always_inline u32 ip4_fib_mtrie_leaf_is_terminal (ip4_fib_mtrie_leaf_t n)
+{ return n & 1; }
+
+always_inline u32 ip4_fib_mtrie_leaf_get_adj_index (ip4_fib_mtrie_leaf_t n)
+{
+ ASSERT (ip4_fib_mtrie_leaf_is_terminal (n));
+ return n >> 1;
+}
+
+always_inline ip4_fib_mtrie_leaf_t ip4_fib_mtrie_leaf_set_adj_index (u32 adj_index)
+{
+ ip4_fib_mtrie_leaf_t l;
+ l = 1 + 2*adj_index;
+ ASSERT (ip4_fib_mtrie_leaf_get_adj_index (l) == adj_index);
+ return l;
+}
+
+always_inline u32 ip4_fib_mtrie_leaf_is_next_ply (ip4_fib_mtrie_leaf_t n)
+{ return (n & 1) == 0; }
+
+always_inline u32 ip4_fib_mtrie_leaf_get_next_ply_index (ip4_fib_mtrie_leaf_t n)
+{
+ ASSERT (ip4_fib_mtrie_leaf_is_next_ply (n));
+ return n >> 1;
+}
+
+always_inline ip4_fib_mtrie_leaf_t ip4_fib_mtrie_leaf_set_next_ply_index (u32 i)
+{
+ ip4_fib_mtrie_leaf_t l;
+ l = 0 + 2*i;
+ ASSERT (ip4_fib_mtrie_leaf_get_next_ply_index (l) == i);
+ return l;
+}
+
+/* One ply of the 4 ply mtrie fib. */
+typedef struct {
+ union {
+ ip4_fib_mtrie_leaf_t leaves[256];
+
+#ifdef CLIB_HAVE_VEC128
+ u32x4 leaves_as_u32x4[256 / 4];
+#endif
+ };
+
+ /* Prefix length for terminal leaves. */
+ u8 dst_address_bits_of_leaves[256];
+
+ /* Number of non-empty leafs (whether terminal or not). */
+ i32 n_non_empty_leafs;
+
+ /* Pad to cache line boundary. */
+ u8 pad[CLIB_CACHE_LINE_BYTES
+ - 1 * sizeof (i32)];
+} ip4_fib_mtrie_ply_t;
+
+typedef struct {
+ /* Pool of plies. Index zero is root ply. */
+ ip4_fib_mtrie_ply_t * ply_pool;
+
+ /* Special case leaf for default route 0.0.0.0/0. */
+ ip4_fib_mtrie_leaf_t default_leaf;
+} ip4_fib_mtrie_t;
+
+void ip4_fib_mtrie_init (ip4_fib_mtrie_t * m);
+
+struct ip4_fib_t;
+
+void ip4_fib_mtrie_add_del_route (struct ip4_fib_t * f,
+ ip4_address_t dst_address,
+ u32 dst_address_length,
+ u32 adj_index,
+ u32 is_del);
+
+/* Returns adjacency index. */
+u32 ip4_mtrie_lookup_address (ip4_fib_mtrie_t * m, ip4_address_t dst);
+
+void ip4_mtrie_maybe_remap_adjacencies (ip_lookup_main_t * lm, ip4_fib_mtrie_t * m);
+
+format_function_t format_ip4_fib_mtrie;
+
+/* Lookup step. Processes 1 byte of 4 byte ip4 address. */
+always_inline ip4_fib_mtrie_leaf_t
+ip4_fib_mtrie_lookup_step (ip4_fib_mtrie_t * m,
+ ip4_fib_mtrie_leaf_t current_leaf,
+ ip4_address_t * dst_address,
+ u32 dst_address_byte_index)
+{
+ ip4_fib_mtrie_leaf_t next_leaf;
+ ip4_fib_mtrie_ply_t * ply;
+ uword current_is_terminal = ip4_fib_mtrie_leaf_is_terminal (current_leaf);
+
+ ply = m->ply_pool + (current_is_terminal ? 0 : (current_leaf >> 1));
+ next_leaf = ply->leaves[dst_address->as_u8[dst_address_byte_index]];
+ next_leaf = current_is_terminal ? current_leaf : next_leaf;
+
+ return next_leaf;
+}
+
+#endif /* included_ip_ip4_fib_h */
diff --git a/vnet/vnet/ip/ip4_packet.h b/vnet/vnet/ip/ip4_packet.h
new file mode 100644
index 00000000000..69467eb4e03
--- /dev/null
+++ b/vnet/vnet/ip/ip4_packet.h
@@ -0,0 +1,314 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip4/packet.h: ip4 packet format
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_ip4_packet_h
+#define included_ip4_packet_h
+
+#include <vnet/ip/ip_packet.h> /* for ip_csum_t */
+#include <vnet/ip/tcp_packet.h> /* for tcp_header_t */
+#include <vppinfra/byte_order.h> /* for clib_net_to_host_u16 */
+
+/* IP4 address which can be accessed either as 4 bytes
+ or as a 32-bit number. */
+typedef union {
+ u8 data[4];
+ u32 data_u32;
+ /* Aliases. */
+ u8 as_u8[4];
+ u32 as_u32;
+} ip4_address_t;
+
+typedef struct {
+ /* IP address must be first for ip_interface_address_get_address() to work */
+ ip4_address_t ip4_addr;
+ u32 fib_index;
+} ip4_address_fib_t;
+
+always_inline void
+ip4_addr_fib_init (ip4_address_fib_t * addr_fib, ip4_address_t * address,
+ u32 fib_index)
+{
+ memcpy (&addr_fib->ip4_addr, address, sizeof (addr_fib->ip4_addr));
+ addr_fib->fib_index = fib_index;
+}
+
+/* (src,dst) pair of addresses as found in packet header. */
+typedef struct {
+ ip4_address_t src, dst;
+} ip4_address_pair_t;
+
+/* If address is a valid netmask, return length of mask. */
+always_inline uword
+ip4_address_netmask_length (ip4_address_t * a)
+{
+ uword result = 0;
+ uword i;
+ for (i = 0; i < ARRAY_LEN (a->as_u8); i++)
+ {
+ switch (a->as_u8[i])
+ {
+ case 0xff: result += 8; break;
+ case 0xfe: result += 7; goto done;
+ case 0xfc: result += 6; goto done;
+ case 0xf8: result += 5; goto done;
+ case 0xf0: result += 4; goto done;
+ case 0xe0: result += 3; goto done;
+ case 0xc0: result += 2; goto done;
+ case 0x80: result += 1; goto done;
+ case 0x00: result += 0; goto done;
+ default:
+ /* Not a valid netmask mask. */
+ return ~0;
+ }
+ }
+ done:
+ return result;
+}
+
+typedef union {
+ struct {
+ /* 4 bit packet length (in 32bit units) and version VVVVLLLL.
+ e.g. for packets w/ no options ip_version_and_header_length == 0x45. */
+ u8 ip_version_and_header_length;
+
+ /* Type of service. */
+ u8 tos;
+
+ /* Total layer 3 packet length including this header. */
+ u16 length;
+
+ /* Fragmentation ID. */
+ u16 fragment_id;
+
+ /* 3 bits of flags and 13 bits of fragment offset (in units
+ of 8 byte quantities). */
+ u16 flags_and_fragment_offset;
+#define IP4_HEADER_FLAG_MORE_FRAGMENTS (1 << 13)
+#define IP4_HEADER_FLAG_DONT_FRAGMENT (1 << 14)
+#define IP4_HEADER_FLAG_CONGESTION (1 << 15)
+
+ /* Time to live decremented by router at each hop. */
+ u8 ttl;
+
+ /* Next level protocol packet. */
+ u8 protocol;
+
+ /* Checksum. */
+ u16 checksum;
+
+ /* Source and destination address. */
+ union {
+ struct {
+ ip4_address_t src_address, dst_address;
+ };
+ ip4_address_pair_t address_pair;
+ };
+ };
+
+ /* For checksumming we'll want to access IP header in word sized chunks. */
+ /* For 64 bit machines. */
+ CLIB_PACKED (struct {
+ u64 checksum_data_64[2];
+ u32 checksum_data_64_32[1];
+ });
+
+ /* For 32 bit machines. */
+ CLIB_PACKED (struct {
+ u32 checksum_data_32[5];
+ });
+} ip4_header_t;
+
+/* Value of ip_version_and_header_length for packets w/o options. */
+#define IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS \
+ ((4 << 4) | (sizeof (ip4_header_t) / sizeof (u32)))
+
+always_inline int
+ip4_get_fragment_offset (ip4_header_t * i)
+{ return clib_net_to_host_u16 (i->flags_and_fragment_offset) & 0x1fff; }
+
+always_inline int
+ip4_get_fragment_more (ip4_header_t * i)
+{ return clib_net_to_host_u16 (i->flags_and_fragment_offset) & IP4_HEADER_FLAG_MORE_FRAGMENTS; }
+
+always_inline int
+ip4_is_fragment (ip4_header_t * i)
+{ return (i->flags_and_fragment_offset &
+ clib_net_to_host_u16 (0x1fff | IP4_HEADER_FLAG_MORE_FRAGMENTS)); }
+
+always_inline int
+ip4_is_first_fragment (ip4_header_t * i)
+{ return (i->flags_and_fragment_offset &
+ clib_net_to_host_u16 (0x1fff | IP4_HEADER_FLAG_MORE_FRAGMENTS)) ==
+ clib_net_to_host_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS); }
+
+/* Fragment offset in bytes. */
+always_inline int
+ip4_get_fragment_offset_bytes (ip4_header_t * i)
+{ return 8 * ip4_get_fragment_offset (i); }
+
+always_inline int
+ip4_header_bytes (ip4_header_t * i)
+{ return sizeof (u32) * (i->ip_version_and_header_length & 0xf); }
+
+always_inline void *
+ip4_next_header (ip4_header_t * i)
+{ return (void *) i + ip4_header_bytes (i); }
+
+always_inline u16
+ip4_header_checksum (ip4_header_t * i)
+{
+ u16 save, csum;
+ ip_csum_t sum;
+
+ save = i->checksum;
+ i->checksum = 0;
+ sum = ip_incremental_checksum (0, i, ip4_header_bytes (i));
+ csum = ~ip_csum_fold (sum);
+
+ i->checksum = save;
+
+ /* Make checksum agree for special case where either
+ 0 or 0xffff would give same 1s complement sum. */
+ if (csum == 0 && save == 0xffff)
+ csum = save;
+
+ return csum;
+}
+
+static inline uword
+ip4_header_checksum_is_valid (ip4_header_t * i)
+{ return i->checksum == ip4_header_checksum (i); }
+
+#define ip4_partial_header_checksum_x1(ip0,sum0) \
+do { \
+ if (BITS (ip_csum_t) > 32) \
+ { \
+ sum0 = ip0->checksum_data_64[0]; \
+ sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_64[1]); \
+ sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_64_32[0]); \
+ } \
+ else \
+ { \
+ sum0 = ip0->checksum_data_32[0]; \
+ sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_32[1]); \
+ sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_32[2]); \
+ sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_32[3]); \
+ sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_32[4]); \
+ } \
+} while (0)
+
+#define ip4_partial_header_checksum_x2(ip0,ip1,sum0,sum1) \
+do { \
+ if (BITS (ip_csum_t) > 32) \
+ { \
+ sum0 = ip0->checksum_data_64[0]; \
+ sum1 = ip1->checksum_data_64[0]; \
+ sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_64[1]); \
+ sum1 = ip_csum_with_carry (sum1, ip1->checksum_data_64[1]); \
+ sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_64_32[0]); \
+ sum1 = ip_csum_with_carry (sum1, ip1->checksum_data_64_32[0]); \
+ } \
+ else \
+ { \
+ sum0 = ip0->checksum_data_32[0]; \
+ sum1 = ip1->checksum_data_32[0]; \
+ sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_32[1]); \
+ sum1 = ip_csum_with_carry (sum1, ip1->checksum_data_32[1]); \
+ sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_32[2]); \
+ sum1 = ip_csum_with_carry (sum1, ip1->checksum_data_32[2]); \
+ sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_32[3]); \
+ sum1 = ip_csum_with_carry (sum1, ip1->checksum_data_32[3]); \
+ sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_32[4]); \
+ sum1 = ip_csum_with_carry (sum1, ip1->checksum_data_32[4]); \
+ } \
+} while (0)
+
+always_inline uword
+ip4_address_is_multicast (ip4_address_t * a)
+{ return (a->data[0] & 0xf0) == 0xe0; }
+
+always_inline void
+ip4_multicast_address_set_for_group (ip4_address_t * a, ip_multicast_group_t g)
+{
+ ASSERT (g < (1 << 28));
+ a->as_u32 = clib_host_to_net_u32 ((0xe << 28) + g);
+}
+
+always_inline void
+ip4_tcp_reply_x1 (ip4_header_t * ip0, tcp_header_t * tcp0)
+{
+ u32 src0, dst0;
+
+ src0 = ip0->src_address.data_u32;
+ dst0 = ip0->dst_address.data_u32;
+ ip0->src_address.data_u32 = dst0;
+ ip0->dst_address.data_u32 = src0;
+
+ src0 = tcp0->ports.src;
+ dst0 = tcp0->ports.dst;
+ tcp0->ports.src = dst0;
+ tcp0->ports.dst = src0;
+}
+
+always_inline void
+ip4_tcp_reply_x2 (ip4_header_t * ip0, ip4_header_t * ip1,
+ tcp_header_t * tcp0, tcp_header_t * tcp1)
+{
+ u32 src0, dst0, src1, dst1;
+
+ src0 = ip0->src_address.data_u32;
+ src1 = ip1->src_address.data_u32;
+ dst0 = ip0->dst_address.data_u32;
+ dst1 = ip1->dst_address.data_u32;
+ ip0->src_address.data_u32 = dst0;
+ ip1->src_address.data_u32 = dst1;
+ ip0->dst_address.data_u32 = src0;
+ ip1->dst_address.data_u32 = src1;
+
+ src0 = tcp0->ports.src;
+ src1 = tcp1->ports.src;
+ dst0 = tcp0->ports.dst;
+ dst1 = tcp1->ports.dst;
+ tcp0->ports.src = dst0;
+ tcp1->ports.src = dst1;
+ tcp0->ports.dst = src0;
+ tcp1->ports.dst = src1;
+}
+
+#endif /* included_ip4_packet_h */
diff --git a/vnet/vnet/ip/ip4_pg.c b/vnet/vnet/ip/ip4_pg.c
new file mode 100644
index 00000000000..9710d8d4c5a
--- /dev/null
+++ b/vnet/vnet/ip/ip4_pg.c
@@ -0,0 +1,387 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip4_pg: IP v4 packet-generator interface
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/pg/pg.h>
+
+#define IP4_PG_EDIT_CHECKSUM (1 << 0)
+#define IP4_PG_EDIT_LENGTH (1 << 1)
+
+static_always_inline void
+compute_length_and_or_checksum (vlib_main_t * vm,
+ u32 * packets,
+ u32 n_packets,
+ u32 ip_header_offset,
+ u32 flags)
+{
+ ASSERT (flags != 0);
+
+ while (n_packets >= 2)
+ {
+ u32 pi0, pi1;
+ vlib_buffer_t * p0, * p1;
+ ip4_header_t * ip0, * ip1;
+ ip_csum_t sum0, sum1;
+
+ pi0 = packets[0];
+ pi1 = packets[1];
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+ n_packets -= 2;
+ packets += 2;
+
+ ip0 = (void *) (p0->data + ip_header_offset);
+ ip1 = (void *) (p1->data + ip_header_offset);
+
+ if (flags & IP4_PG_EDIT_LENGTH)
+ {
+ ip0->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, p0) - ip_header_offset);
+ ip1->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, p1) - ip_header_offset);
+ }
+
+ if (flags & IP4_PG_EDIT_CHECKSUM)
+ {
+ ASSERT (ip4_header_bytes (ip0) == sizeof (ip0[0]));
+ ASSERT (ip4_header_bytes (ip1) == sizeof (ip1[0]));
+
+ ip0->checksum = 0;
+ ip1->checksum = 0;
+
+ ip4_partial_header_checksum_x2 (ip0, ip1, sum0, sum1);
+ ip0->checksum = ~ ip_csum_fold (sum0);
+ ip1->checksum = ~ ip_csum_fold (sum1);
+
+ ASSERT (ip0->checksum == ip4_header_checksum (ip0));
+ ASSERT (ip1->checksum == ip4_header_checksum (ip1));
+ }
+ }
+
+ while (n_packets >= 1)
+ {
+ u32 pi0;
+ vlib_buffer_t * p0;
+ ip4_header_t * ip0;
+ ip_csum_t sum0;
+
+ pi0 = packets[0];
+ p0 = vlib_get_buffer (vm, pi0);
+ n_packets -= 1;
+ packets += 1;
+
+ ip0 = (void *) (p0->data + ip_header_offset);
+
+ if (flags & IP4_PG_EDIT_LENGTH)
+ ip0->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, p0) - ip_header_offset);
+
+ if (flags & IP4_PG_EDIT_CHECKSUM)
+ {
+ ASSERT (ip4_header_bytes (ip0) == sizeof (ip0[0]));
+
+ ip0->checksum = 0;
+
+ ip4_partial_header_checksum_x1 (ip0, sum0);
+ ip0->checksum = ~ ip_csum_fold (sum0);
+
+ ASSERT (ip0->checksum == ip4_header_checksum (ip0));
+ }
+ }
+}
+
+static void
+ip4_pg_edit_function (pg_main_t * pg,
+ pg_stream_t * s,
+ pg_edit_group_t * g,
+ u32 * packets,
+ u32 n_packets)
+{
+ vlib_main_t * vm = pg->vlib_main;
+ u32 ip_offset;
+
+ ip_offset = g->start_byte_offset;
+
+ switch (g->edit_function_opaque)
+ {
+ case IP4_PG_EDIT_LENGTH:
+ compute_length_and_or_checksum (vm, packets, n_packets, ip_offset,
+ IP4_PG_EDIT_LENGTH);
+ break;
+
+ case IP4_PG_EDIT_CHECKSUM:
+ compute_length_and_or_checksum (vm, packets, n_packets, ip_offset,
+ IP4_PG_EDIT_CHECKSUM);
+ break;
+
+ case IP4_PG_EDIT_LENGTH | IP4_PG_EDIT_CHECKSUM:
+ compute_length_and_or_checksum (vm, packets, n_packets, ip_offset,
+ IP4_PG_EDIT_LENGTH
+ | IP4_PG_EDIT_CHECKSUM);
+ break;
+
+ default:
+ ASSERT (0);
+ break;
+ }
+}
+
+typedef struct {
+ pg_edit_t ip_version, header_length;
+ pg_edit_t tos;
+ pg_edit_t length;
+
+ pg_edit_t fragment_id, fragment_offset;
+
+ /* Flags together with fragment offset. */
+ pg_edit_t mf_flag, df_flag, ce_flag;
+
+ pg_edit_t ttl;
+
+ pg_edit_t protocol;
+
+ pg_edit_t checksum;
+
+ pg_edit_t src_address, dst_address;
+} pg_ip4_header_t;
+
+static inline void
+pg_ip4_header_init (pg_ip4_header_t * p)
+{
+ /* Initialize fields that are not bit fields in the IP header. */
+#define _(f) pg_edit_init (&p->f, ip4_header_t, f);
+ _ (tos);
+ _ (length);
+ _ (fragment_id);
+ _ (ttl);
+ _ (protocol);
+ _ (checksum);
+ _ (src_address);
+ _ (dst_address);
+#undef _
+
+ /* Initialize bit fields. */
+ pg_edit_init_bitfield (&p->header_length, ip4_header_t,
+ ip_version_and_header_length,
+ 0, 4);
+ pg_edit_init_bitfield (&p->ip_version, ip4_header_t,
+ ip_version_and_header_length,
+ 4, 4);
+
+ pg_edit_init_bitfield (&p->fragment_offset, ip4_header_t,
+ flags_and_fragment_offset,
+ 0, 13);
+ pg_edit_init_bitfield (&p->mf_flag, ip4_header_t,
+ flags_and_fragment_offset,
+ 13, 1);
+ pg_edit_init_bitfield (&p->df_flag, ip4_header_t,
+ flags_and_fragment_offset,
+ 14, 1);
+ pg_edit_init_bitfield (&p->ce_flag, ip4_header_t,
+ flags_and_fragment_offset,
+ 15, 1);
+}
+
+uword
+unformat_pg_ip4_header (unformat_input_t * input, va_list * args)
+{
+ pg_stream_t * s = va_arg (*args, pg_stream_t *);
+ pg_ip4_header_t * p;
+ u32 group_index;
+
+ p = pg_create_edit_group (s, sizeof (p[0]), sizeof (ip4_header_t),
+ &group_index);
+ pg_ip4_header_init (p);
+
+ /* Defaults. */
+ pg_edit_set_fixed (&p->ip_version, 4);
+ pg_edit_set_fixed (&p->header_length,
+ sizeof (ip4_header_t) / sizeof (u32));
+
+ pg_edit_set_fixed (&p->tos, 0);
+ pg_edit_set_fixed (&p->ttl, 64);
+
+ pg_edit_set_fixed (&p->fragment_id, 0);
+ pg_edit_set_fixed (&p->fragment_offset, 0);
+ pg_edit_set_fixed (&p->mf_flag, 0);
+ pg_edit_set_fixed (&p->df_flag, 0);
+ pg_edit_set_fixed (&p->ce_flag, 0);
+
+ p->length.type = PG_EDIT_UNSPECIFIED;
+ p->checksum.type = PG_EDIT_UNSPECIFIED;
+
+ if (unformat (input, "%U: %U -> %U",
+ unformat_pg_edit,
+ unformat_ip_protocol, &p->protocol,
+ unformat_pg_edit,
+ unformat_ip4_address, &p->src_address,
+ unformat_pg_edit,
+ unformat_ip4_address, &p->dst_address))
+ goto found;
+
+ if (! unformat (input, "%U:",
+ unformat_pg_edit,
+ unformat_ip_protocol, &p->protocol))
+ goto error;
+
+found:
+ /* Parse options. */
+ while (1)
+ {
+ if (unformat (input, "version %U",
+ unformat_pg_edit,
+ unformat_pg_number, &p->ip_version))
+ ;
+
+ else if (unformat (input, "header-length %U",
+ unformat_pg_edit,
+ unformat_pg_number, &p->header_length))
+ ;
+
+ else if (unformat (input, "tos %U",
+ unformat_pg_edit,
+ unformat_pg_number, &p->tos))
+ ;
+
+ else if (unformat (input, "length %U",
+ unformat_pg_edit,
+ unformat_pg_number, &p->length))
+ ;
+
+ else if (unformat (input, "checksum %U",
+ unformat_pg_edit,
+ unformat_pg_number, &p->checksum))
+ ;
+
+ else if (unformat (input, "ttl %U",
+ unformat_pg_edit,
+ unformat_pg_number, &p->ttl))
+ ;
+
+ else if (unformat (input, "fragment id %U offset %U",
+ unformat_pg_edit,
+ unformat_pg_number, &p->fragment_id,
+ unformat_pg_edit,
+ unformat_pg_number, &p->fragment_offset))
+ {
+ int i;
+ for (i = 0; i< ARRAY_LEN (p->fragment_offset.values); i++)
+ pg_edit_set_value (&p->fragment_offset, i,
+ pg_edit_get_value (&p->fragment_offset, i) / 8);
+
+ }
+
+ /* Flags. */
+ else if (unformat (input, "mf") || unformat (input, "MF"))
+ pg_edit_set_fixed (&p->mf_flag, 1);
+
+ else if (unformat (input, "df") || unformat (input, "DF"))
+ pg_edit_set_fixed (&p->df_flag, 1);
+
+ else if (unformat (input, "ce") || unformat (input, "CE"))
+ pg_edit_set_fixed (&p->ce_flag, 1);
+
+ /* Can't parse input: try next protocol level. */
+ else
+ break;
+ }
+
+ {
+ ip_main_t * im = &ip_main;
+ ip_protocol_t protocol;
+ ip_protocol_info_t * pi;
+
+ pi = 0;
+ if (p->protocol.type == PG_EDIT_FIXED)
+ {
+ protocol = pg_edit_get_value (&p->protocol, PG_EDIT_LO);
+ pi = ip_get_protocol_info (im, protocol);
+ }
+
+ if (pi && pi->unformat_pg_edit
+ && unformat_user (input, pi->unformat_pg_edit, s))
+ ;
+
+ else if (! unformat_user (input, unformat_pg_payload, s))
+ goto error;
+
+ if (p->length.type == PG_EDIT_UNSPECIFIED
+ && s->min_packet_bytes == s->max_packet_bytes
+ && group_index + 1 < vec_len (s->edit_groups))
+ {
+ pg_edit_set_fixed (&p->length,
+ pg_edit_group_n_bytes (s, group_index));
+ }
+
+ /* Compute IP header checksum if all edits are fixed. */
+ if (p->checksum.type == PG_EDIT_UNSPECIFIED)
+ {
+ ip4_header_t fixed_header, fixed_mask, cmp_mask;
+
+ /* See if header is all fixed and specified except for
+ checksum field. */
+ memset (&cmp_mask, ~0, sizeof (cmp_mask));
+ cmp_mask.checksum = 0;
+
+ pg_edit_group_get_fixed_packet_data (s, group_index,
+ &fixed_header, &fixed_mask);
+ if (! memcmp (&fixed_mask, &cmp_mask, sizeof (cmp_mask)))
+ pg_edit_set_fixed (&p->checksum,
+ clib_net_to_host_u16 (ip4_header_checksum (&fixed_header)));
+ }
+
+ p = pg_get_edit_group (s, group_index);
+ if (p->length.type == PG_EDIT_UNSPECIFIED
+ || p->checksum.type == PG_EDIT_UNSPECIFIED)
+ {
+ pg_edit_group_t * g = pg_stream_get_group (s, group_index);
+ g->edit_function = ip4_pg_edit_function;
+ g->edit_function_opaque = 0;
+ if (p->length.type == PG_EDIT_UNSPECIFIED)
+ g->edit_function_opaque |= IP4_PG_EDIT_LENGTH;
+ if (p->checksum.type == PG_EDIT_UNSPECIFIED)
+ g->edit_function_opaque |= IP4_PG_EDIT_CHECKSUM;
+ }
+
+ return 1;
+ }
+
+ error:
+ /* Free up any edits we may have added. */
+ pg_free_edit_group (s);
+ return 0;
+}
+
diff --git a/vnet/vnet/ip/ip4_source_check.c b/vnet/vnet/ip/ip4_source_check.c
new file mode 100644
index 00000000000..47e22f2392e
--- /dev/null
+++ b/vnet/vnet/ip/ip4_source_check.c
@@ -0,0 +1,369 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip4_source_check.c: IP v4 check source address (unicast RPF check)
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+
+typedef struct {
+ u8 packet_data[64];
+} ip4_source_check_trace_t;
+
+static u8 * format_ip4_source_check_trace (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ ip4_source_check_trace_t * t = va_arg (*va, ip4_source_check_trace_t *);
+
+ s = format (s, "%U",
+ format_ip4_header,
+ t->packet_data, sizeof (t->packet_data));
+
+ return s;
+}
+
+typedef enum {
+ IP4_SOURCE_CHECK_NEXT_DROP,
+ IP4_SOURCE_CHECK_N_NEXT,
+} ip4_source_check_next_t;
+
+typedef enum {
+ IP4_SOURCE_CHECK_REACHABLE_VIA_RX,
+ IP4_SOURCE_CHECK_REACHABLE_VIA_ANY,
+} ip4_source_check_type_t;
+
+typedef union {
+ struct {
+ u32 no_default_route : 1;
+ u32 fib_index : 31;
+ };
+ u32 as_u32[1];
+} ip4_source_check_config_t;
+
+always_inline uword
+ip4_source_check_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ ip4_source_check_type_t source_check_type)
+{
+ ip4_main_t * im = &ip4_main;
+ ip_lookup_main_t * lm = &im->lookup_main;
+ ip_config_main_t * cm = &lm->rx_config_mains[VNET_UNICAST];
+ u32 n_left_from, * from, * to_next;
+ u32 next_index;
+ vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1,
+ sizeof (ip4_source_check_trace_t));
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ vlib_buffer_t * p0, * p1;
+ ip4_header_t * ip0, * ip1;
+ ip4_fib_mtrie_t * mtrie0, * mtrie1;
+ ip4_fib_mtrie_leaf_t leaf0, leaf1;
+ ip4_source_check_config_t * c0, * c1;
+ ip_adjacency_t * adj0, * adj1;
+ u32 pi0, next0, pass0, adj_index0;
+ u32 pi1, next1, pass1, adj_index1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
+ CLIB_PREFETCH (p3->data, sizeof (ip1[0]), LOAD);
+ }
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+
+ c0 = vnet_get_config_data (&cm->config_main,
+ &vnet_buffer (p0)->ip.current_config_index,
+ &next0,
+ sizeof (c0[0]));
+ c1 = vnet_get_config_data (&cm->config_main,
+ &vnet_buffer (p1)->ip.current_config_index,
+ &next1,
+ sizeof (c1[0]));
+
+ mtrie0 = &vec_elt_at_index (im->fibs, c0->fib_index)->mtrie;
+ mtrie1 = &vec_elt_at_index (im->fibs, c1->fib_index)->mtrie;
+
+ leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
+
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
+ leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
+
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
+ leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
+
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
+ leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
+
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
+ leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
+
+ adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
+ adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
+
+ ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, c0->fib_index,
+ &ip0->src_address,
+ c0->no_default_route));
+ ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, c1->fib_index,
+ &ip1->src_address,
+ c1->no_default_route));
+
+ adj0 = ip_get_adjacency (lm, adj_index0);
+ adj1 = ip_get_adjacency (lm, adj_index1);
+
+ /* Pass multicast. */
+ pass0 = ip4_address_is_multicast (&ip0->src_address) || ip0->src_address.as_u32 == clib_host_to_net_u32(0xFFFFFFFF);
+ pass1 = ip4_address_is_multicast (&ip1->src_address) || ip1->src_address.as_u32 == clib_host_to_net_u32(0xFFFFFFFF);
+
+ pass0 |= (adj0->lookup_next_index == IP_LOOKUP_NEXT_REWRITE
+ && (source_check_type == IP4_SOURCE_CHECK_REACHABLE_VIA_ANY
+ || vnet_buffer (p0)->sw_if_index[VLIB_RX] == adj0->rewrite_header.sw_if_index));
+ pass1 |= (adj1->lookup_next_index == IP_LOOKUP_NEXT_REWRITE
+ && (source_check_type == IP4_SOURCE_CHECK_REACHABLE_VIA_ANY
+ || vnet_buffer (p1)->sw_if_index[VLIB_RX] == adj1->rewrite_header.sw_if_index));
+
+ next0 = (pass0 ? next0 : IP4_SOURCE_CHECK_NEXT_DROP);
+ next1 = (pass1 ? next1 : IP4_SOURCE_CHECK_NEXT_DROP);
+
+ p0->error = error_node->errors[IP4_ERROR_UNICAST_SOURCE_CHECK_FAILS];
+ p1->error = error_node->errors[IP4_ERROR_UNICAST_SOURCE_CHECK_FAILS];
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, pi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t * p0;
+ ip4_header_t * ip0;
+ ip4_fib_mtrie_t * mtrie0;
+ ip4_fib_mtrie_leaf_t leaf0;
+ ip4_source_check_config_t * c0;
+ ip_adjacency_t * adj0;
+ u32 pi0, next0, pass0, adj_index0;
+
+ pi0 = from[0];
+ to_next[0] = pi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ ip0 = vlib_buffer_get_current (p0);
+
+ c0 = vnet_get_config_data (&cm->config_main,
+ &vnet_buffer (p0)->ip.current_config_index,
+ &next0,
+ sizeof (c0[0]));
+
+ mtrie0 = &vec_elt_at_index (im->fibs, c0->fib_index)->mtrie;
+
+ leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
+
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
+
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
+
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
+
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
+
+ adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
+
+ ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, c0->fib_index,
+ &ip0->src_address,
+ c0->no_default_route));
+ adj0 = ip_get_adjacency (lm, adj_index0);
+
+ /* Pass multicast. */
+ pass0 = ip4_address_is_multicast (&ip0->src_address) || ip0->src_address.as_u32 == clib_host_to_net_u32(0xFFFFFFFF);
+
+ pass0 |= (adj0->lookup_next_index == IP_LOOKUP_NEXT_REWRITE
+ && (source_check_type == IP4_SOURCE_CHECK_REACHABLE_VIA_ANY
+ || vnet_buffer (p0)->sw_if_index[VLIB_RX] == adj0->rewrite_header.sw_if_index));
+
+ next0 = (pass0 ? next0 : IP4_SOURCE_CHECK_NEXT_DROP);
+ p0->error = error_node->errors[IP4_ERROR_UNICAST_SOURCE_CHECK_FAILS];
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+static uword
+ip4_source_check_reachable_via_any (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return ip4_source_check_inline (vm, node, frame, IP4_SOURCE_CHECK_REACHABLE_VIA_ANY);
+}
+
+static uword
+ip4_source_check_reachable_via_rx (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return ip4_source_check_inline (vm, node, frame, IP4_SOURCE_CHECK_REACHABLE_VIA_RX);
+}
+
+VLIB_REGISTER_NODE (ip4_check_source_reachable_via_any) = {
+ .function = ip4_source_check_reachable_via_any,
+ .name = "ip4-source-check-via-any",
+ .vector_size = sizeof (u32),
+
+ .n_next_nodes = IP4_SOURCE_CHECK_N_NEXT,
+ .next_nodes = {
+ [IP4_SOURCE_CHECK_NEXT_DROP] = "error-drop",
+ },
+
+ .format_buffer = format_ip4_header,
+ .format_trace = format_ip4_source_check_trace,
+};
+
+VLIB_REGISTER_NODE (ip4_check_source_reachable_via_rx) = {
+ .function = ip4_source_check_reachable_via_rx,
+ .name = "ip4-source-check-via-rx",
+ .vector_size = sizeof (u32),
+
+ .n_next_nodes = IP4_SOURCE_CHECK_N_NEXT,
+ .next_nodes = {
+ [IP4_SOURCE_CHECK_NEXT_DROP] = "error-drop",
+ },
+
+ .format_buffer = format_ip4_header,
+ .format_trace = format_ip4_source_check_trace,
+};
+
+static clib_error_t *
+set_ip_source_check (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ ip4_main_t * im = &ip4_main;
+ ip_lookup_main_t * lm = &im->lookup_main;
+ ip_config_main_t * rx_cm = &lm->rx_config_mains[VNET_UNICAST];
+ clib_error_t * error = 0;
+ u32 sw_if_index, is_del, ci;
+ ip4_source_check_config_t config;
+ ip4_rx_feature_type_t type;
+
+ sw_if_index = ~0;
+
+ if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ is_del = 0;
+ config.no_default_route = 0;
+ config.fib_index = im->fib_index_by_sw_if_index[sw_if_index];
+ type = IP4_RX_FEATURE_SOURCE_CHECK_REACHABLE_VIA_RX;
+ if (unformat (input, "del"))
+ is_del = 1;
+
+ ci = rx_cm->config_index_by_sw_if_index[sw_if_index];
+ ci = (is_del
+ ? vnet_config_del_feature
+ : vnet_config_add_feature)
+ (vm, &rx_cm->config_main,
+ ci,
+ type,
+ &config,
+ sizeof (config));
+ rx_cm->config_index_by_sw_if_index[sw_if_index] = ci;
+
+ done:
+ return error;
+}
+
+VLIB_CLI_COMMAND (set_interface_ip_source_check_command, static) = {
+ .path = "set interface ip source-check",
+ .function = set_ip_source_check,
+ .short_help = "Set IP4/IP6 interface unicast source check",
+};
+
+/* Dummy init function to get us linked in. */
+clib_error_t * ip4_source_check_init (vlib_main_t * vm)
+{ return 0; }
+
+VLIB_INIT_FUNCTION (ip4_source_check_init);
diff --git a/vnet/vnet/ip/ip4_test.c b/vnet/vnet/ip/ip4_test.c
new file mode 100644
index 00000000000..ff088e78f3e
--- /dev/null
+++ b/vnet/vnet/ip/ip4_test.c
@@ -0,0 +1,311 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+
+/*
+ * ip4 FIB tester. Add, probe, delete a bunch of
+ * random routes / masks and make sure that the mtrie agrees with
+ * the hash-table FIB.
+ *
+ * Manipulate the FIB by means of the debug CLI commands, to minimize
+ * the chances of doing something idiotic.
+ */
+
+/*
+ * These routines need to be redeclared non-static elsewhere.
+ *
+ * Also: rename ip_route() -> vnet_ip_route_cmd() and add the usual
+ * test_route_init() call to main.c
+ */
+clib_error_t *
+vnet_ip_route_cmd (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd_arg);
+
+int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0);
+
+ip4_fib_t *
+find_fib_by_table_index_or_id (ip4_main_t * im, u32 table_index_or_id,
+ u32 flags);
+
+/* Routes to insert/delete/probe in FIB */
+typedef struct {
+ ip4_address_t address;
+ u32 mask_width;
+ u32 interface_id; /* not an xx_if_index */
+} test_route_t;
+
+typedef struct {
+ /* Test routes in use */
+ test_route_t *route_pool;
+
+ /* Number of fake ethernets created */
+ u32 test_interfaces_created;
+} test_main_t;
+
+test_main_t test_main;
+
+/* fake ethernet device class, distinct from "fake-ethX" */
+static u8 * format_test_interface_name (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ return format (s, "test-eth%d", dev_instance);
+}
+
+static uword dummy_interface_tx (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ clib_warning ("you shouldn't be here, leaking buffers...");
+ return frame->n_vectors;
+}
+
+VNET_DEVICE_CLASS (test_interface_device_class,static) = {
+ .name = "Test interface",
+ .format_device_name = format_test_interface_name,
+ .tx_function = dummy_interface_tx,
+};
+
+static clib_error_t *
+thrash (vlib_main_t * vm,
+ unformat_input_t * main_input, vlib_cli_command_t * cmd_arg)
+{
+ u32 seed = 0xdeaddabe;
+ u32 niter = 10;
+ u32 nroutes = 10;
+ u32 ninterfaces = 4;
+ f64 min_mask_bits = 7.0;
+ f64 max_mask_bits = 32.0;
+ u32 table_id = 11; /* my amp goes to 11 (use fib 11) */
+ u32 table_index;
+ int iter, i;
+ u8 * cmd;
+ test_route_t *tr;
+ test_main_t *tm = &test_main;
+ ip4_main_t * im = &ip4_main;
+ vnet_main_t * vnm = vnet_get_main();
+ unformat_input_t cmd_input;
+ f64 rf;
+ u32 *masks = 0;
+ u32 tmp;
+ u32 hw_if_index;
+ clib_error_t * error = 0;
+ uword *p;
+ unformat_input_t _line_input, * line_input = &_line_input;
+ u8 hw_address[6];
+ ip4_fib_t * fib;
+ int verbose = 0;
+
+ /* Precompute mask width -> mask vector */
+ tmp = (u32)~0;
+ vec_validate (masks, 32);
+ for (i = 32; i > 0; i--)
+ {
+ masks [i] = tmp;
+ tmp <<= 1;
+ }
+
+ if (unformat_user (main_input, unformat_line_input, line_input))
+ {
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "seed %d", &seed))
+ ;
+ else if (unformat (line_input, "niter %d", &niter))
+ ;
+ else if (unformat (line_input, "nroutes %d", &nroutes))
+ ;
+ else if (unformat (line_input, "ninterfaces %d", &ninterfaces))
+ ;
+ else if (unformat (line_input, "min-mask-bits %d", &tmp))
+ min_mask_bits = (f64) tmp;
+ else if (unformat (line_input, "max-mask-bits %d", &tmp))
+ max_mask_bits = (f64) tmp;
+ else if (unformat (line_input, "verbose"))
+ verbose = 1;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ }
+ }
+
+ /* Find or create FIB table 11 */
+ fib = find_ip4_fib_by_table_index_or_id (im, table_id, IP4_ROUTE_FLAG_TABLE_ID);
+
+ for (i = tm->test_interfaces_created; i < ninterfaces; i++)
+ {
+ vnet_hw_interface_t * hw;
+ memset (hw_address, 0, sizeof (hw_address));
+ hw_address[0] = 0xd0;
+ hw_address[1] = 0x0f;
+ hw_address[5] = i;
+
+ error = ethernet_register_interface
+ (vnm,
+ test_interface_device_class.index,
+ i /* instance */,
+ hw_address,
+ &hw_if_index,
+ /* flag change */ 0);
+
+ /* Fake interfaces use FIB table 11 */
+ hw = vnet_get_hw_interface (vnm, hw_if_index);
+ vec_validate (im->fib_index_by_sw_if_index, hw->sw_if_index);
+ im->fib_index_by_sw_if_index[hw->sw_if_index] = fib->index;
+ }
+
+ tm->test_interfaces_created = ninterfaces;
+
+ /* Find fib index corresponding to FIB id 11 */
+ p = hash_get (im->fib_index_by_table_id, table_id);
+ if (p == 0)
+ {
+ vlib_cli_output (vm, "Couldn't map fib id %d to fib index\n",
+ table_id);
+ return 0;
+ }
+ table_index = p[0];
+
+ for (iter = 0; iter < niter; iter++)
+ {
+ /* Pick random routes to install */
+ for (i = 0; i < nroutes; i++)
+ {
+ int j;
+
+ pool_get (tm->route_pool, tr);
+ memset (tr, 0, sizeof (*tr));
+
+ again:
+ rf = random_f64 (&seed);
+ tr->mask_width = (u32) (min_mask_bits
+ + rf * (max_mask_bits - min_mask_bits));
+ tmp = random_u32 (&seed);
+ tmp &= masks[tr->mask_width];
+ tr->address.as_u32 = clib_host_to_net_u32(tmp);
+
+ /* We can't add the same address/mask twice... */
+ for (j = 0; j < i; j++)
+ {
+ test_route_t *prev;
+ prev = pool_elt_at_index (tm->route_pool, j);
+ if ((prev->address.as_u32 == tr->address.as_u32)
+ && (prev->mask_width == tr->mask_width))
+ goto again;
+ }
+
+ rf = random_f64 (&seed);
+ tr->interface_id = (u32) (rf * ninterfaces);
+ }
+
+ /* Add them */
+ for (i = 0; i < nroutes; i++)
+ {
+ tr = pool_elt_at_index (tm->route_pool, i);
+ cmd = format (0, "add table %d %U/%d via test-eth%d",
+ table_id,
+ format_ip4_address, &tr->address,
+ tr->mask_width, tr->interface_id);
+ vec_add1(cmd,0);
+ if (verbose)
+ fformat(stderr, "ip route %s\n", cmd);
+ unformat_init_string (&cmd_input, (char *) cmd, vec_len(cmd)-1);
+ error = vnet_ip_route_cmd (vm, &cmd_input, cmd_arg);
+ if (error)
+ clib_error_report(error);
+ unformat_free (&cmd_input);
+ vec_free(cmd);
+ }
+ /* Probe them */
+ for (i = 0; i < nroutes; i++)
+ {
+ tr = pool_elt_at_index (tm->route_pool, i);
+ if (!ip4_lookup_validate (&tr->address, table_index))
+ {
+ if (verbose)
+ fformat (stderr, "test lookup table %d %U\n",
+ table_index, format_ip4_address, &tr->address);
+
+ fformat (stderr, "FAIL-after-insert: %U/%d\n",
+ format_ip4_address, &tr->address,
+ tr->mask_width);
+ }
+ }
+
+ /* Delete them */
+ for (i = 0; i < nroutes; i++)
+ {
+ int j;
+ tr = pool_elt_at_index (tm->route_pool, i);
+ if (0)
+ cmd = format (0, "del table %d %U/%d via test-eth%d",
+ table_id,
+ format_ip4_address, &tr->address,
+ tr->mask_width, tr->interface_id);
+ else
+ cmd = format (0, "del table %d %U/%d",
+ table_id,
+ format_ip4_address, &tr->address,
+ tr->mask_width);
+ vec_add1(cmd,0);
+ if (verbose)
+ fformat(stderr, "ip route %s\n", cmd);
+ unformat_init_string (&cmd_input, (char *) cmd, vec_len(cmd)-1);
+ error = vnet_ip_route_cmd (vm, &cmd_input, cmd_arg);
+ if (error)
+ clib_error_report(error);
+ unformat_free (&cmd_input);
+ vec_free(cmd);
+
+ /* Make sure all undeleted routes still work */
+ for (j = i+1; j < nroutes; j++)
+ {
+ test_route_t *rr; /* remaining route */
+ rr = pool_elt_at_index (tm->route_pool, j);
+ if (!ip4_lookup_validate (&rr->address, table_index))
+ {
+ if (verbose)
+ fformat (stderr, "test lookup table %d %U\n",
+ table_index, format_ip4_address, &rr->address);
+
+ fformat (stderr, "FAIL: %U/%d AWOL\n",
+ format_ip4_address, &rr->address,
+ rr->mask_width);
+ fformat (stderr, " iter %d after %d of %d deletes\n",
+ iter, i, nroutes);
+ fformat (stderr, " last route deleted %U/%d\n",
+ format_ip4_address, &tr->address,
+ tr->mask_width);
+ }
+ }
+ }
+
+ pool_free (tm->route_pool);
+ }
+ return 0;
+}
+
+VLIB_CLI_COMMAND (test_route_command, static) = {
+ .path = "test route",
+ .short_help = "test route",
+ .function = thrash,
+};
+
+clib_error_t *test_route_init (vlib_main_t *vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (test_route_init);
diff --git a/vnet/vnet/ip/ip6.h b/vnet/vnet/ip/ip6.h
new file mode 100644
index 00000000000..a5c322a2fa5
--- /dev/null
+++ b/vnet/vnet/ip/ip6.h
@@ -0,0 +1,503 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip6.h: ip6 main include file
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_ip_ip6_h
+#define included_ip_ip6_h
+
+#include <vlib/mc.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/ip/lookup.h>
+
+#include <vppinfra/bihash_24_8.h>
+#include <vppinfra/bihash_template.h>
+
+/*
+ * Default size of the ip6 fib hash table
+ */
+#define IP6_FIB_DEFAULT_HASH_NUM_BUCKETS (64 * 1024)
+#define IP6_FIB_DEFAULT_HASH_MEMORY_SIZE (32<<20)
+
+typedef struct {
+ ip6_address_t addr;
+ u32 dst_address_length;
+ u32 vrf_index;
+} ip6_fib_key_t;
+
+typedef struct {
+ /* Table ID (hash key) for this FIB. */
+ u32 table_id;
+
+ /* Index into FIB vector. */
+ u32 index;
+
+ /* flow hash configuration */
+ u32 flow_hash_config;
+} ip6_fib_t;
+
+struct ip6_main_t;
+
+typedef void (ip6_add_del_route_function_t)
+ (struct ip6_main_t * im,
+ uword opaque,
+ ip6_fib_t * fib,
+ u32 flags,
+ ip6_address_t * address,
+ u32 address_length,
+ void * old_result,
+ void * new_result);
+
+typedef struct {
+ ip6_add_del_route_function_t * function;
+ uword required_flags;
+ uword function_opaque;
+} ip6_add_del_route_callback_t;
+
+typedef void (ip6_add_del_interface_address_function_t)
+ (struct ip6_main_t * im,
+ uword opaque,
+ u32 sw_if_index,
+ ip6_address_t * address,
+ u32 address_length,
+ u32 if_address_index,
+ u32 is_del);
+
+typedef struct {
+ ip6_add_del_interface_address_function_t * function;
+ uword function_opaque;
+} ip6_add_del_interface_address_callback_t;
+
+typedef enum {
+ /* First check access list to either permit or deny this
+ packet based on classification. */
+ IP6_RX_FEATURE_CHECK_ACCESS,
+
+ /* RPF check: verify that source address is reachable via
+ RX interface or via any interface. */
+ IP6_RX_FEATURE_CHECK_SOURCE_REACHABLE_VIA_RX,
+ IP6_RX_FEATURE_CHECK_SOURCE_REACHABLE_VIA_ANY,
+
+ /* IPSec */
+ IP6_RX_FEATURE_IPSEC,
+
+ /* Intercept and decap L2TPv3 packets. */
+ IP6_RX_FEATURE_L2TPV3,
+
+ /* vPath forwarding: won't return to call next feature
+ so any feature needed before vPath forwarding must be prior
+ to this entry */
+ IP6_RX_FEATURE_VPATH,
+
+ /* Must be last: perform forwarding lookup. */
+ IP6_RX_FEATURE_LOOKUP,
+
+ IP6_N_RX_FEATURE,
+} ip6_rx_feature_type_t;
+
+typedef struct ip6_main_t {
+ BVT(clib_bihash) ip6_lookup_table;
+
+ ip_lookup_main_t lookup_main;
+
+ /* bitmap / refcounts / vector of mask widths to search */
+ uword * non_empty_dst_address_length_bitmap;
+ u8 * prefix_lengths_in_search_order;
+ i32 dst_address_length_refcounts[129];
+
+ /* Vector of FIBs. */
+ ip6_fib_t * fibs;
+
+ ip6_address_t fib_masks[129];
+
+ /* Table index indexed by software interface. */
+ u32 * fib_index_by_sw_if_index;
+
+ /* Hash table mapping table id to fib index.
+ ID space is not necessarily dense; index space is dense. */
+ uword * fib_index_by_table_id;
+
+ /* Vector of functions to call when routes are added/deleted. */
+ ip6_add_del_route_callback_t * add_del_route_callbacks;
+
+ /* Hash table mapping interface rewrite adjacency index by sw if index. */
+ uword * interface_route_adj_index_by_sw_if_index;
+
+ /* Functions to call when interface address changes. */
+ ip6_add_del_interface_address_callback_t * add_del_interface_address_callbacks;
+
+ /* Template used to generate IP6 neighbor solicitation packets. */
+ vlib_packet_template_t discover_neighbor_packet_template;
+
+ u32 * discover_neighbor_next_index_by_hw_if_index;
+
+ /* ip6 lookup table config parameters */
+ u32 lookup_table_nbuckets;
+ uword lookup_table_size;
+
+ /* Seed for Jenkins hash used to compute ip6 flow hash. */
+ u32 flow_hash_seed;
+
+ struct {
+ /* TTL to use for host generated packets. */
+ u8 ttl;
+
+ u8 pad[3];
+ } host_config;
+} ip6_main_t;
+
+/* Global ip6 main structure. */
+extern ip6_main_t ip6_main;
+
+/* Global ip6 input node. Errors get attached to ip6 input node. */
+extern vlib_node_registration_t ip6_input_node;
+extern vlib_node_registration_t ip6_rewrite_node;
+extern vlib_node_registration_t ip6_discover_neighbor_node;
+
+extern vlib_node_registration_t ip6_icmp_neighbor_discovery_event_node;
+
+/* ipv6 neighbor discovery - timer/event types */
+typedef enum {
+ ICMP6_ND_EVENT_INIT,
+} ip6_icmp_neighbor_discovery_event_type_t;
+
+typedef union {
+ u32 add_del_swindex;
+ struct {
+ u32 up_down_swindex;
+ u32 fib_index;
+ } up_down_event;
+} ip6_icmp_neighbor_discovery_event_data_t;
+
+u32 ip6_fib_lookup (ip6_main_t * im, u32 sw_if_index, ip6_address_t * dst);
+u32 ip6_fib_lookup_with_table (ip6_main_t * im, u32 fib_index,
+ ip6_address_t * dst);
+ip6_fib_t * find_ip6_fib_by_table_index_or_id (ip6_main_t * im,
+ u32 table_index_or_id,
+ u32 flags);
+
+always_inline uword
+ip6_destination_matches_route (ip6_main_t * im,
+ ip6_address_t * key,
+ ip6_address_t * dest,
+ uword dest_length)
+{
+ int i;
+ for (i = 0; i < ARRAY_LEN (key->as_uword); i++)
+ {
+ if ((key->as_uword[i] ^ dest->as_uword[i]) & im->fib_masks[dest_length].as_uword[i])
+ return 0;
+ }
+ return 1;
+}
+
+always_inline uword
+ip6_destination_matches_interface (ip6_main_t * im,
+ ip6_address_t * key,
+ ip_interface_address_t * ia)
+{
+ ip6_address_t * a = ip_interface_address_get_address (&im->lookup_main, ia);
+ return ip6_destination_matches_route (im, key, a, ia->address_length);
+}
+
+/* As above but allows for unaligned destinations (e.g. works right from IP header of packet). */
+always_inline uword
+ip6_unaligned_destination_matches_route (ip6_main_t * im,
+ ip6_address_t * key,
+ ip6_address_t * dest,
+ uword dest_length)
+{
+ int i;
+ for (i = 0; i < ARRAY_LEN (key->as_uword); i++)
+ {
+ if ((clib_mem_unaligned (&key->as_uword[i], uword) ^ dest->as_uword[i]) & im->fib_masks[dest_length].as_uword[i])
+ return 0;
+ }
+ return 1;
+}
+
+always_inline void
+ip6_src_address_for_packet (ip6_main_t * im, vlib_buffer_t * p, ip6_address_t * src, u32 sw_if_index)
+{
+ ip_lookup_main_t * lm = &im->lookup_main;
+ ip_interface_address_t * ia = ip_interface_address_for_packet (lm, p, sw_if_index);
+ ip6_address_t * a = ip_interface_address_get_address (lm, ia);
+ *src = a[0];
+}
+
+always_inline u32
+ip6_src_lookup_for_packet (ip6_main_t * im, vlib_buffer_t * b, ip6_header_t * i)
+{
+ if (vnet_buffer (b)->ip.adj_index[VLIB_RX] == ~0)
+ vnet_buffer (b)->ip.adj_index[VLIB_RX]
+ = ip6_fib_lookup (im, vnet_buffer (b)->sw_if_index[VLIB_RX],
+ &i->src_address);
+ return vnet_buffer (b)->ip.adj_index[VLIB_RX];
+}
+
+/* Find interface address which matches destination. */
+always_inline ip6_address_t *
+ip6_interface_address_matching_destination (ip6_main_t * im, ip6_address_t * dst, u32 sw_if_index,
+ ip_interface_address_t ** result_ia)
+{
+ ip_lookup_main_t * lm = &im->lookup_main;
+ ip_interface_address_t * ia;
+ ip6_address_t * result = 0;
+
+ foreach_ip_interface_address (lm, ia, sw_if_index,
+ 1 /* honor unnumbered */,
+ ({
+ ip6_address_t * a = ip_interface_address_get_address (lm, ia);
+ if (ip6_destination_matches_route (im, dst, a, ia->address_length))
+ {
+ result = a;
+ break;
+ }
+ }));
+ if (result_ia)
+ *result_ia = result ? ia : 0;
+ return result;
+}
+
+clib_error_t *
+ip6_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
+ ip6_address_t * address, u32 address_length,
+ u32 is_del);
+
+int ip6_address_compare (ip6_address_t * a1, ip6_address_t * a2);
+
+/* Add/del a route to the FIB. */
+
+#define IP6_ROUTE_FLAG_ADD (0 << 0)
+#define IP6_ROUTE_FLAG_DEL (1 << 0)
+#define IP6_ROUTE_FLAG_TABLE_ID (0 << 1)
+#define IP6_ROUTE_FLAG_FIB_INDEX (1 << 1)
+#define IP6_ROUTE_FLAG_KEEP_OLD_ADJACENCY (1 << 2)
+#define IP6_ROUTE_FLAG_NO_REDISTRIBUTE (1 << 3)
+#define IP6_ROUTE_FLAG_NOT_LAST_IN_GROUP (1 << 4)
+/* Dynamic route created via neighbor discovery. */
+#define IP6_ROUTE_FLAG_NEIGHBOR (1 << 5)
+
+typedef struct {
+ /* IP6_ROUTE_FLAG_* */
+ u32 flags;
+
+ /* Either index of fib or table_id to hash and get fib.
+ IP6_ROUTE_FLAG_FIB_INDEX specifies index; otherwise table_id is assumed. */
+ u32 table_index_or_table_id;
+
+ /* Destination address (prefix) and length. */
+ ip6_address_t dst_address;
+ u32 dst_address_length;
+
+ /* Adjacency to use for this destination. */
+ u32 adj_index;
+
+ /* If specified adjacencies to add and then
+ use for this destination. add_adj/n_add_adj
+ are override adj_index if specified. */
+ ip_adjacency_t * add_adj;
+ u32 n_add_adj;
+} ip6_add_del_route_args_t;
+
+void ip6_add_del_route (ip6_main_t * im, ip6_add_del_route_args_t * args);
+
+void ip6_add_del_route_next_hop (ip6_main_t * im,
+ u32 flags,
+ ip6_address_t * dst_address,
+ u32 dst_address_length,
+ ip6_address_t * next_hop,
+ u32 next_hop_sw_if_index,
+ u32 next_hop_weight, u32 adj_index,
+ u32 explicit_fib_index);
+u32
+ip6_get_route (ip6_main_t * im,
+ u32 fib_index_or_table_id,
+ u32 flags,
+ ip6_address_t * address,
+ u32 address_length);
+
+void
+ip6_foreach_matching_route (ip6_main_t * im,
+ u32 table_index_or_table_id,
+ u32 flags,
+ ip6_address_t * address,
+ u32 address_length,
+ ip6_address_t ** results,
+ u8 ** result_length);
+
+void ip6_delete_matching_routes (ip6_main_t * im,
+ u32 table_index_or_table_id,
+ u32 flags,
+ ip6_address_t * address,
+ u32 address_length);
+
+void ip6_maybe_remap_adjacencies (ip6_main_t * im,
+ u32 table_index_or_table_id,
+ u32 flags);
+
+void ip6_adjacency_set_interface_route (vnet_main_t * vnm,
+ ip_adjacency_t * adj,
+ u32 sw_if_index,
+ u32 if_address_index);
+
+clib_error_t *
+ip6_probe_neighbor (vlib_main_t * vm, ip6_address_t * dst, u32 sw_if_index);
+
+clib_error_t *
+ip6_set_neighbor_limit (u32 neighbor_limit);
+
+uword
+ip6_tcp_register_listener (vlib_main_t * vm,
+ u16 dst_port,
+ u32 next_node_index);
+uword
+ip6_udp_register_listener (vlib_main_t * vm,
+ u16 dst_port,
+ u32 next_node_index);
+
+u16 ip6_tcp_udp_icmp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0, ip6_header_t * ip0, int *bogus_lengthp);
+
+void ip6_register_protocol (u32 protocol, u32 node_index);
+
+serialize_function_t serialize_vnet_ip6_main, unserialize_vnet_ip6_main;
+
+int
+vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm,
+ u32 sw_if_index,
+ ip6_address_t * a,
+ u8 * link_layer_address,
+ uword n_bytes_link_layer_address);
+int
+vnet_unset_ip6_ethernet_neighbor (vlib_main_t * vm,
+ u32 sw_if_index,
+ ip6_address_t * a,
+ u8 * link_layer_address,
+ uword n_bytes_link_layer_address);
+void
+vnet_ip6_fib_init (ip6_main_t * im, u32 fib_index);
+
+void
+ip6_link_local_address_from_ethernet_mac_address (ip6_address_t *ip,
+ u8 *mac);
+
+void
+ip6_ethernet_mac_address_from_link_local_address (u8 *mac,
+ ip6_address_t *ip);
+
+int vnet_set_ip6_flow_hash (u32 table_id, u32 flow_hash_config);
+
+int
+ip6_neighbor_ra_config(vlib_main_t * vm, u32 sw_if_index,
+ u8 surpress, u8 managed, u8 other,
+ u8 ll_option, u8 send_unicast, u8 cease,
+ u8 use_lifetime, u32 lifetime,
+ u32 initial_count, u32 initial_interval,
+ u32 max_interval, u32 min_interval,
+ u8 is_no);
+
+int
+ip6_neighbor_ra_prefix(vlib_main_t * vm, u32 sw_if_index,
+ ip6_address_t *prefix_addr, u8 prefix_len,
+ u8 use_default, u32 val_lifetime, u32 pref_lifetime,
+ u8 no_advertise, u8 off_link, u8 no_autoconfig, u8 no_onlink,
+ u8 is_no);
+
+
+clib_error_t *
+enable_ip6_interface(vlib_main_t * vm,
+ u32 sw_if_index);
+
+clib_error_t *
+disable_ip6_interface(vlib_main_t * vm,
+ u32 sw_if_index);
+
+int
+ip6_interface_enabled(vlib_main_t * vm,
+ u32 sw_if_index);
+
+clib_error_t *
+set_ip6_link_local_address(vlib_main_t * vm,
+ u32 sw_if_index,
+ ip6_address_t *address,
+ u8 address_length);
+
+void vnet_register_ip6_neighbor_resolution_event(vnet_main_t * vnm,
+ void * address_arg,
+ uword node_index,
+ uword type_opaque,
+ uword data);
+
+int vnet_set_ip6_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
+ u32 table_index);
+extern vlib_node_registration_t ip6_lookup_node;
+
+/* Compute flow hash. We'll use it to select which Sponge to use for this
+ flow. And other things. */
+always_inline u32
+ip6_compute_flow_hash (ip6_header_t * ip, u32 flow_hash_config)
+{
+ tcp_header_t * tcp = (void *) (ip + 1);
+ u64 a, b, c;
+ u64 t1, t2;
+ uword is_tcp_udp = (ip->protocol == IP_PROTOCOL_TCP
+ || ip->protocol == IP_PROTOCOL_UDP);
+
+ t1 = (ip->src_address.as_u64[0] ^ ip->src_address.as_u64[1]);
+ t1 = (flow_hash_config & IP_FLOW_HASH_SRC_ADDR) ? t1 : 0;
+
+ t2 = (ip->dst_address.as_u64[0] ^ ip->dst_address.as_u64[1]);
+ t2 = (flow_hash_config & IP_FLOW_HASH_DST_ADDR) ? t2 : 0;
+
+ a = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ? t2 : t1;
+ b = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ? t1 : t2;
+ b ^= (flow_hash_config & IP_FLOW_HASH_PROTO) ? ip->protocol : 0;
+
+ t1 = is_tcp_udp ? tcp->ports.src : 0;
+ t2 = is_tcp_udp ? tcp->ports.dst : 0;
+
+ t1 = (flow_hash_config & IP_FLOW_HASH_SRC_PORT) ? t1 : 0;
+ t2 = (flow_hash_config & IP_FLOW_HASH_DST_PORT) ? t2 : 0;
+
+ c = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ?
+ ((t1<<16) | t2) : ((t2<<16) | t1);
+
+ hash_mix64 (a, b, c);
+ return (u32) c;
+}
+
+#endif /* included_ip_ip6_h */
diff --git a/vnet/vnet/ip/ip6_error.h b/vnet/vnet/ip/ip6_error.h
new file mode 100644
index 00000000000..93754a10fcc
--- /dev/null
+++ b/vnet/vnet/ip/ip6_error.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip6_error.h: ip6 fast path errors
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_ip_ip6_error_h
+#define included_ip_ip6_error_h
+
+#define foreach_ip6_error \
+ /* Must be first. */ \
+ _ (NONE, "valid ip6 packets") \
+ \
+ /* Errors signalled by ip6-input */ \
+ _ (TOO_SHORT, "ip6 length < 40 bytes") \
+ _ (BAD_LENGTH, "ip6 length > l2 length") \
+ _ (VERSION, "ip6 version != 6") \
+ _ (TIME_EXPIRED, "ip6 ttl <= 1") \
+ \
+ /* Errors signalled by ip6-rewrite. */ \
+ _ (MTU_EXCEEDED, "ip6 MTU exceeded") \
+ _ (DST_LOOKUP_MISS, "ip6 destination lookup miss") \
+ _ (SRC_LOOKUP_MISS, "ip6 source lookup miss") \
+ _ (ADJACENCY_DROP, "ip6 adjacency drop") \
+ _ (ADJACENCY_PUNT, "ip6 adjacency punt") \
+ \
+ /* Errors signalled by ip6-local. */ \
+ _ (UNKNOWN_PROTOCOL, "unknown ip protocol") \
+ _ (UDP_CHECKSUM, "bad udp checksum") \
+ _ (TCP_CHECKSUM, "bad tcp checksum") \
+ _ (ICMP_CHECKSUM, "bad icmp checksum") \
+ _ (UDP_LENGTH, "inconsistent udp/ip lengths") \
+ \
+ /* Errors signalled by {tcp6,udp6}-lookup. */ \
+ _ (UNKNOWN_UDP_PORT, "no listener for udp port") \
+ _ (UNKNOWN_TCP_PORT, "no listener for tcp port") \
+ \
+ /* Spoofed packets in ip6-rewrite-local */ \
+ _(SPOOFED_LOCAL_PACKETS, "ip4 spoofed local-address packet drops") \
+ \
+ /* Erros singalled by ip6-inacl */ \
+ _ (INACL_TABLE_MISS, "input ACL table-miss drops") \
+ _ (INACL_SESSION_DENY, "input ACL session deny drops")
+
+typedef enum {
+#define _(sym,str) IP6_ERROR_##sym,
+ foreach_ip6_error
+#undef _
+ IP6_N_ERROR,
+} ip6_error_t;
+
+#endif /* included_ip_ip6_error_h */
diff --git a/vnet/vnet/ip/ip6_format.c b/vnet/vnet/ip/ip6_format.c
new file mode 100644
index 00000000000..1a2810e16ec
--- /dev/null
+++ b/vnet/vnet/ip/ip6_format.c
@@ -0,0 +1,322 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip6_format.c: ip6 formatting
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+
+/* Format an IP6 address. */
+u8 * format_ip6_address (u8 * s, va_list * args)
+{
+ ip6_address_t * a = va_arg (*args, ip6_address_t *);
+ u32 max_zero_run = 0, this_zero_run = 0;
+ int max_zero_run_index = -1, this_zero_run_index=0;
+ int in_zero_run = 0, i;
+ int last_double_colon = 0;
+
+ /* Ugh, this is a pain. Scan forward looking for runs of 0's */
+ for (i = 0; i < ARRAY_LEN (a->as_u16); i++)
+ {
+ if (a->as_u16[i] == 0)
+ {
+ if (in_zero_run)
+ this_zero_run++;
+ else
+ {
+ in_zero_run = 1;
+ this_zero_run =1;
+ this_zero_run_index = i;
+ }
+ }
+ else
+ {
+ if (in_zero_run)
+ {
+ /* offer to compress the biggest run of > 1 zero */
+ if (this_zero_run > max_zero_run && this_zero_run > 1)
+ {
+ max_zero_run_index = this_zero_run_index;
+ max_zero_run = this_zero_run;
+ }
+ }
+ in_zero_run = 0;
+ this_zero_run = 0;
+ }
+ }
+
+ if (in_zero_run)
+ {
+ if (this_zero_run > max_zero_run && this_zero_run > 1)
+ {
+ max_zero_run_index = this_zero_run_index;
+ max_zero_run = this_zero_run;
+ }
+ }
+
+ for (i = 0; i < ARRAY_LEN (a->as_u16); i++)
+ {
+ if (i == max_zero_run_index)
+ {
+ s = format (s, "::");
+ i += max_zero_run - 1;
+ last_double_colon = 1;
+ }
+ else
+ {
+ s = format (s, "%s%x",
+ (last_double_colon || i == 0) ? "" : ":",
+ clib_net_to_host_u16 (a->as_u16[i]));
+ last_double_colon = 0;
+ }
+ }
+
+ return s;
+}
+
+/* Format an IP6 route destination and length. */
+u8 * format_ip6_address_and_length (u8 * s, va_list * args)
+{
+ ip6_address_t * a = va_arg (*args, ip6_address_t *);
+ u8 l = va_arg (*args, u32);
+ return format (s, "%U/%d", format_ip6_address, a, l);
+}
+
+/* Parse an IP6 address. */
+uword unformat_ip6_address (unformat_input_t * input, va_list * args)
+{
+ ip6_address_t * result = va_arg (*args, ip6_address_t *);
+ u16 hex_quads[8];
+ uword hex_quad, n_hex_quads, hex_digit, n_hex_digits;
+ uword c, n_colon, double_colon_index;
+
+ n_hex_quads = hex_quad = n_hex_digits = n_colon = 0;
+ double_colon_index = ARRAY_LEN (hex_quads);
+ while ((c = unformat_get_input (input)) != UNFORMAT_END_OF_INPUT)
+ {
+ hex_digit = 16;
+ if (c >= '0' && c <= '9')
+ hex_digit = c - '0';
+ else if (c >= 'a' && c <= 'f')
+ hex_digit = c + 10 - 'a';
+ else if (c >= 'A' && c <= 'F')
+ hex_digit = c + 10 - 'A';
+ else if (c == ':' && n_colon < 2)
+ n_colon++;
+ else
+ {
+ unformat_put_input (input);
+ break;
+ }
+
+ /* Too many hex quads. */
+ if (n_hex_quads >= ARRAY_LEN (hex_quads))
+ return 0;
+
+ if (hex_digit < 16)
+ {
+ hex_quad = (hex_quad << 4) | hex_digit;
+
+ /* Hex quad must fit in 16 bits. */
+ if (n_hex_digits >= 4)
+ return 0;
+
+ n_colon = 0;
+ n_hex_digits++;
+ }
+
+ /* Save position of :: */
+ if (n_colon == 2)
+ {
+ /* More than one :: ? */
+ if (double_colon_index < ARRAY_LEN (hex_quads))
+ return 0;
+ double_colon_index = n_hex_quads;
+ }
+
+ if (n_colon > 0 && n_hex_digits > 0)
+ {
+ hex_quads[n_hex_quads++] = hex_quad;
+ hex_quad = 0;
+ n_hex_digits = 0;
+ }
+ }
+
+ if (n_hex_digits > 0)
+ hex_quads[n_hex_quads++] = hex_quad;
+
+ {
+ word i;
+
+ /* Expand :: to appropriate number of zero hex quads. */
+ if (double_colon_index < ARRAY_LEN (hex_quads))
+ {
+ word n_zero = ARRAY_LEN (hex_quads) - n_hex_quads;
+
+ for (i = n_hex_quads - 1; i >= (signed) double_colon_index; i--)
+ hex_quads[n_zero + i] = hex_quads[i];
+
+ for (i = 0; i < n_zero; i++)
+ {
+ ASSERT ((double_colon_index + i) < ARRAY_LEN (hex_quads));
+ hex_quads[double_colon_index + i] = 0;
+ }
+
+ n_hex_quads = ARRAY_LEN (hex_quads);
+ }
+
+ /* Too few hex quads given. */
+ if (n_hex_quads < ARRAY_LEN (hex_quads))
+ return 0;
+
+ for (i = 0; i < ARRAY_LEN (hex_quads); i++)
+ result->as_u16[i] = clib_host_to_net_u16 (hex_quads[i]);
+
+ return 1;
+ }
+}
+
+/* Format an IP6 header. */
+u8 * format_ip6_header (u8 * s, va_list * args)
+{
+ ip6_header_t * ip = va_arg (*args, ip6_header_t *);
+ u32 max_header_bytes = va_arg (*args, u32);
+ u32 i, ip_version, traffic_class, flow_label;
+ uword indent;
+
+ /* Nothing to do. */
+ if (max_header_bytes < sizeof (ip[0]))
+ return format (s, "IP header truncated");
+
+ indent = format_get_indent (s);
+ indent += 2;
+
+ s = format (s, "%U: %U -> %U",
+ format_ip_protocol, ip->protocol,
+ format_ip6_address, &ip->src_address,
+ format_ip6_address, &ip->dst_address);
+
+ i = clib_net_to_host_u32 (ip->ip_version_traffic_class_and_flow_label);
+ ip_version = (i >> 28);
+ traffic_class = (i >> 20) & 0xff;
+ flow_label = i & pow2_mask (20);
+
+ if (ip_version != 6)
+ s = format (s, "\n%Uversion %d",
+ format_white_space, indent, ip_version);
+
+ s = format (s, "\n%Utos 0x%02x, flow label 0x%x, hop limit %d, payload length %d",
+ format_white_space, indent,
+ traffic_class, flow_label, ip->hop_limit,
+ clib_net_to_host_u16 (ip->payload_length));
+
+ /* Recurse into next protocol layer. */
+ if (max_header_bytes != 0 && sizeof (ip[0]) < max_header_bytes)
+ {
+ ip_main_t * im = &ip_main;
+ ip_protocol_info_t * pi = ip_get_protocol_info (im, ip->protocol);
+
+ if (pi && pi->format_header)
+ s = format (s, "\n%U%U",
+ format_white_space, indent - 2,
+ pi->format_header,
+ /* next protocol header */ (void*) (ip + 1),
+ max_header_bytes - sizeof (ip[0]));
+ }
+
+ return s;
+}
+
+/* Parse an IP6 header. */
+uword unformat_ip6_header (unformat_input_t * input, va_list * args)
+{
+ u8 ** result = va_arg (*args, u8 **);
+ ip6_header_t * ip;
+ int old_length;
+
+ /* Allocate space for IP header. */
+ {
+ void * p;
+
+ old_length = vec_len (*result);
+ vec_add2 (*result, p, sizeof (ip[0]));
+ ip = p;
+ }
+
+ memset (ip, 0, sizeof (ip[0]));
+ ip->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32 (6 << 28);
+
+ if (! unformat (input, "%U: %U -> %U",
+ unformat_ip_protocol, &ip->protocol,
+ unformat_ip6_address, &ip->src_address,
+ unformat_ip6_address, &ip->dst_address))
+ return 0;
+
+ /* Parse options. */
+ while (1)
+ {
+ int i;
+
+ if (unformat (input, "tos %U", unformat_vlib_number, &i))
+ ip->ip_version_traffic_class_and_flow_label |= clib_host_to_net_u32 ((i & 0xff) << 20);
+
+ else if (unformat (input, "hop-limit %U", unformat_vlib_number, &i))
+ ip->hop_limit = i;
+
+ /* Can't parse input: try next protocol level. */
+ else
+ break;
+ }
+
+ /* Recurse into next protocol layer. */
+ {
+ ip_main_t * im = &ip_main;
+ ip_protocol_info_t * pi = ip_get_protocol_info (im, ip->protocol);
+
+ if (pi && pi->unformat_header)
+ {
+ if (! unformat_user (input, pi->unformat_header, result))
+ return 0;
+
+ /* Result may have moved. */
+ ip = (void *) *result + old_length;
+ }
+ }
+
+ ip->payload_length = clib_host_to_net_u16 (vec_len (*result) - (old_length + sizeof (ip[0])));
+
+ return 1;
+}
diff --git a/vnet/vnet/ip/ip6_forward.c b/vnet/vnet/ip/ip6_forward.c
new file mode 100644
index 00000000000..f0065e969f8
--- /dev/null
+++ b/vnet/vnet/ip/ip6_forward.c
@@ -0,0 +1,2724 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip6_forward.c: IP v6 forwarding
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */
+#include <vnet/srp/srp.h> /* for srp_hw_interface_class */
+#include <vppinfra/cache.h>
+
+#include <vppinfra/bihash_template.c>
+
+static void compute_prefix_lengths_in_search_order (ip6_main_t * im)
+{
+ int i;
+ vec_reset_length (im->prefix_lengths_in_search_order);
+ /* Note: bitmap reversed so this is in fact a longest prefix match */
+ clib_bitmap_foreach (i, im->non_empty_dst_address_length_bitmap,
+ ({
+ int dst_address_length = 128 - i;
+ vec_add1 (im->prefix_lengths_in_search_order, dst_address_length);
+ }));
+}
+
+u32
+ip6_fib_lookup_with_table (ip6_main_t * im, u32 fib_index, ip6_address_t * dst)
+{
+ ip_lookup_main_t * lm = &im->lookup_main;
+ int i, len;
+ int rv;
+ BVT(clib_bihash_kv) kv, value;
+
+ len = vec_len (im->prefix_lengths_in_search_order);
+
+ for (i = 0; i < len; i++)
+ {
+ int dst_address_length = im->prefix_lengths_in_search_order[i];
+ ip6_address_t * mask = &im->fib_masks[dst_address_length];
+
+ ASSERT(dst_address_length >= 0 && dst_address_length <= 128);
+
+ kv.key[0] = dst->as_u64[0] & mask->as_u64[0];
+ kv.key[1] = dst->as_u64[1] & mask->as_u64[1];
+ kv.key[2] = ((u64)((fib_index))<<32) | dst_address_length;
+
+ rv = BV(clib_bihash_search_inline_2)(&im->ip6_lookup_table, &kv, &value);
+ if (rv == 0)
+ return value.value;
+ }
+
+ return lm->miss_adj_index;
+}
+
+u32 ip6_fib_lookup (ip6_main_t * im, u32 sw_if_index, ip6_address_t * dst)
+{
+ u32 fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
+ return ip6_fib_lookup_with_table (im, fib_index, dst);
+}
+
+void
+vnet_ip6_fib_init (ip6_main_t * im, u32 fib_index)
+{
+ ip_lookup_main_t * lm = &im->lookup_main;
+ ip6_add_del_route_args_t a;
+ ip_adjacency_t * adj;
+
+ memset(&a, 0x0, sizeof(ip6_add_del_route_args_t));
+
+ a.table_index_or_table_id = fib_index;
+ a.flags = (IP6_ROUTE_FLAG_ADD
+ | IP6_ROUTE_FLAG_FIB_INDEX
+ | IP6_ROUTE_FLAG_KEEP_OLD_ADJACENCY
+ | IP6_ROUTE_FLAG_NO_REDISTRIBUTE);
+
+ /* Add ff02::1:ff00:0/104 via local route for all tables.
+ This is required for neighbor discovery to work. */
+ adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
+ &a.adj_index);
+ adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
+ adj->if_address_index = ~0;
+ adj->rewrite_header.data_bytes = 0;
+
+ ip6_set_solicited_node_multicast_address (&a.dst_address, 0);
+
+ a.dst_address_length = 104;
+ ip6_add_del_route (im, &a);
+
+ /* Add all-routers multicast address via local route for all tables */
+ adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
+ &a.adj_index);
+ adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
+ adj->if_address_index = ~0;
+ adj->rewrite_header.data_bytes = 0;
+
+ ip6_set_reserved_multicast_address (&a.dst_address,
+ IP6_MULTICAST_SCOPE_link_local,
+ IP6_MULTICAST_GROUP_ID_all_routers);
+
+ a.dst_address_length = 128;
+ ip6_add_del_route (im, &a);
+
+ /* Add all-nodes multicast address via local route for all tables */
+ adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
+ &a.adj_index);
+ adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
+ adj->if_address_index = ~0;
+ adj->rewrite_header.data_bytes = 0;
+
+ ip6_set_reserved_multicast_address (&a.dst_address,
+ IP6_MULTICAST_SCOPE_link_local,
+ IP6_MULTICAST_GROUP_ID_all_hosts);
+
+ a.dst_address_length = 128;
+ ip6_add_del_route (im, &a);
+
+ /* Add all-mldv2 multicast address via local route for all tables */
+ adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
+ &a.adj_index);
+ adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
+ adj->if_address_index = ~0;
+ adj->rewrite_header.data_bytes = 0;
+
+ ip6_set_reserved_multicast_address (&a.dst_address,
+ IP6_MULTICAST_SCOPE_link_local,
+ IP6_MULTICAST_GROUP_ID_mldv2_routers);
+
+ a.dst_address_length = 128;
+ ip6_add_del_route (im, &a);
+}
+
+static ip6_fib_t *
+create_fib_with_table_id (ip6_main_t * im, u32 table_id)
+{
+ ip6_fib_t * fib;
+ hash_set (im->fib_index_by_table_id, table_id, vec_len (im->fibs));
+ vec_add2 (im->fibs, fib, 1);
+ fib->table_id = table_id;
+ fib->index = fib - im->fibs;
+ fib->flow_hash_config = IP_FLOW_HASH_DEFAULT;
+ vnet_ip6_fib_init (im, fib->index);
+ return fib;
+}
+
+ip6_fib_t *
+find_ip6_fib_by_table_index_or_id (ip6_main_t * im, u32 table_index_or_id, u32 flags)
+{
+ uword * p, fib_index;
+
+ fib_index = table_index_or_id;
+ if (! (flags & IP6_ROUTE_FLAG_FIB_INDEX))
+ {
+ p = hash_get (im->fib_index_by_table_id, table_index_or_id);
+ if (! p)
+ return create_fib_with_table_id (im, table_index_or_id);
+ fib_index = p[0];
+ }
+ return vec_elt_at_index (im->fibs, fib_index);
+}
+
+void ip6_add_del_route (ip6_main_t * im, ip6_add_del_route_args_t * a)
+{
+ ip_lookup_main_t * lm = &im->lookup_main;
+ ip6_fib_t * fib;
+ ip6_address_t dst_address;
+ u32 dst_address_length, adj_index;
+ uword is_del;
+ u32 old_adj_index = ~0;
+ BVT(clib_bihash_kv) kv, value;
+
+ vlib_smp_unsafe_warning();
+
+ is_del = (a->flags & IP6_ROUTE_FLAG_DEL) != 0;
+
+ /* Either create new adjacency or use given one depending on arguments. */
+ if (a->n_add_adj > 0)
+ {
+ ip_add_adjacency (lm, a->add_adj, a->n_add_adj, &adj_index);
+ ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 0);
+ }
+ else
+ adj_index = a->adj_index;
+
+ dst_address = a->dst_address;
+ dst_address_length = a->dst_address_length;
+ fib = find_ip6_fib_by_table_index_or_id (im, a->table_index_or_table_id,
+ a->flags);
+
+ ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
+ ip6_address_mask (&dst_address, &im->fib_masks[dst_address_length]);
+
+ /* refcount accounting */
+ if (is_del)
+ {
+ ASSERT (im->dst_address_length_refcounts[dst_address_length] > 0);
+ if (--im->dst_address_length_refcounts[dst_address_length] == 0)
+ {
+ im->non_empty_dst_address_length_bitmap =
+ clib_bitmap_set (im->non_empty_dst_address_length_bitmap,
+ 128 - dst_address_length, 0);
+ compute_prefix_lengths_in_search_order (im);
+ }
+ }
+ else
+ {
+ im->dst_address_length_refcounts[dst_address_length]++;
+
+ im->non_empty_dst_address_length_bitmap =
+ clib_bitmap_set (im->non_empty_dst_address_length_bitmap,
+ 128 - dst_address_length, 1);
+ compute_prefix_lengths_in_search_order (im);
+ }
+
+ kv.key[0] = dst_address.as_u64[0];
+ kv.key[1] = dst_address.as_u64[1];
+ kv.key[2] = ((u64)((fib - im->fibs))<<32) | dst_address_length;
+
+ if (BV(clib_bihash_search)(&im->ip6_lookup_table, &kv, &value) == 0)
+ old_adj_index = value.value;
+
+ if (is_del)
+ BV(clib_bihash_add_del) (&im->ip6_lookup_table, &kv, 0 /* is_add */);
+ else
+ {
+ /* Make sure adj index is valid. */
+ if (CLIB_DEBUG > 0)
+ (void) ip_get_adjacency (lm, adj_index);
+
+ kv.value = adj_index;
+
+ BV(clib_bihash_add_del) (&im->ip6_lookup_table, &kv, 1 /* is_add */);
+ }
+
+ /* Delete old adjacency index if present and changed. */
+ {
+ if (! (a->flags & IP6_ROUTE_FLAG_KEEP_OLD_ADJACENCY)
+ && old_adj_index != ~0
+ && old_adj_index != adj_index)
+ ip_del_adjacency (lm, old_adj_index);
+ }
+}
+
+void
+ip6_add_del_route_next_hop (ip6_main_t * im,
+ u32 flags,
+ ip6_address_t * dst_address,
+ u32 dst_address_length,
+ ip6_address_t * next_hop,
+ u32 next_hop_sw_if_index,
+ u32 next_hop_weight, u32 adj_index,
+ u32 explicit_fib_index)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ ip_lookup_main_t * lm = &im->lookup_main;
+ u32 fib_index;
+ ip6_fib_t * fib;
+ ip6_address_t masked_dst_address;
+ u32 old_mp_adj_index, new_mp_adj_index;
+ u32 dst_adj_index, nh_adj_index;
+ int rv;
+ ip_adjacency_t * dst_adj;
+ ip_multipath_adjacency_t * old_mp, * new_mp;
+ int is_del = (flags & IP6_ROUTE_FLAG_DEL) != 0;
+ int is_interface_next_hop;
+ clib_error_t * error = 0;
+ uword * nh_result;
+ BVT(clib_bihash_kv) kv, value;
+
+ vlib_smp_unsafe_warning();
+
+ if (explicit_fib_index == (u32)~0)
+ fib_index = vec_elt (im->fib_index_by_sw_if_index, next_hop_sw_if_index);
+ else
+ fib_index = explicit_fib_index;
+
+ fib = vec_elt_at_index (im->fibs, fib_index);
+
+ /* Lookup next hop to be added or deleted. */
+ is_interface_next_hop = ip6_address_is_zero (next_hop);
+ if (adj_index == (u32)~0)
+ {
+ if (is_interface_next_hop)
+ {
+ nh_result = hash_get (im->interface_route_adj_index_by_sw_if_index,
+ next_hop_sw_if_index);
+ if (nh_result)
+ nh_adj_index = *nh_result;
+ else
+ {
+ ip_adjacency_t * adj;
+ adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
+ &nh_adj_index);
+ ip6_adjacency_set_interface_route (vnm, adj,
+ next_hop_sw_if_index, ~0);
+ ip_call_add_del_adjacency_callbacks
+ (lm, next_hop_sw_if_index, /* is_del */ 0);
+ hash_set (im->interface_route_adj_index_by_sw_if_index,
+ next_hop_sw_if_index, nh_adj_index);
+ }
+ }
+ else
+ {
+ /* Look for the interface /128 route */
+ kv.key[0] = next_hop->as_u64[0];
+ kv.key[1] = next_hop->as_u64[1];
+ kv.key[2] = ((u64)((fib - im->fibs))<<32) | 128;
+
+ if (BV(clib_bihash_search)(&im->ip6_lookup_table, &kv, &value) < 0)
+ {
+ vnm->api_errno = VNET_API_ERROR_UNKNOWN_DESTINATION;
+ error = clib_error_return (0, "next-hop %U/128 not in FIB",
+ format_ip6_address, next_hop);
+ goto done;
+ }
+
+ nh_adj_index = value.value;
+ }
+ }
+ else
+ {
+ /* Look for the interface /128 route */
+ kv.key[0] = next_hop->as_u64[0];
+ kv.key[1] = next_hop->as_u64[1];
+ kv.key[2] = ((u64)((fib - im->fibs))<<32) | 128;
+
+ if (BV(clib_bihash_search)(&im->ip6_lookup_table, &kv, &value) < 0)
+ {
+ vnm->api_errno = VNET_API_ERROR_UNKNOWN_DESTINATION;
+ error = clib_error_return (0, "next-hop %U/128 not in FIB",
+ format_ip6_address, next_hop);
+ goto done;
+ }
+
+ nh_adj_index = value.value;
+ }
+
+ ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
+ masked_dst_address = dst_address[0];
+ ip6_address_mask (&masked_dst_address, &im->fib_masks[dst_address_length]);
+
+ kv.key[0] = masked_dst_address.as_u64[0];
+ kv.key[1] = masked_dst_address.as_u64[1];
+ kv.key[2] = ((u64)((fib - im->fibs))<<32) | dst_address_length;
+
+ rv = BV(clib_bihash_search)(&im->ip6_lookup_table, &kv, &value);
+
+ if (rv == 0)
+ {
+ dst_adj_index = value.value;
+ dst_adj = ip_get_adjacency (lm, dst_adj_index);
+ }
+ else
+ {
+ /* For deletes destination must be known. */
+ if (is_del)
+ {
+ vnm->api_errno = VNET_API_ERROR_UNKNOWN_DESTINATION;
+ error = clib_error_return (0, "unknown destination %U/%d",
+ format_ip6_address, dst_address,
+ dst_address_length);
+ goto done;
+ }
+
+ dst_adj_index = ~0;
+ dst_adj = 0;
+ }
+
+ /* Ignore adds of X/128 with next hop of X. */
+ if (! is_del
+ && dst_address_length == 128
+ && ip6_address_is_equal (dst_address, next_hop))
+ {
+ vnm->api_errno = VNET_API_ERROR_PREFIX_MATCHES_NEXT_HOP;
+ error = clib_error_return (0, "prefix matches next hop %U/%d",
+ format_ip6_address, dst_address,
+ dst_address_length);
+ goto done;
+ }
+
+ old_mp_adj_index = dst_adj ? dst_adj->heap_handle : ~0;
+
+ if (! ip_multipath_adjacency_add_del_next_hop
+ (lm, is_del,
+ dst_adj ? dst_adj->heap_handle : ~0,
+ nh_adj_index,
+ next_hop_weight,
+ &new_mp_adj_index))
+ {
+ vnm->api_errno = VNET_API_ERROR_NEXT_HOP_NOT_FOUND_MP;
+ error = clib_error_return
+ (0, "requested deleting next-hop %U not found in multi-path",
+ format_ip6_address, next_hop);
+ goto done;
+ }
+
+ old_mp = new_mp = 0;
+ if (old_mp_adj_index != ~0)
+ old_mp = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index);
+ if (new_mp_adj_index != ~0)
+ new_mp = vec_elt_at_index (lm->multipath_adjacencies, new_mp_adj_index);
+
+ if (old_mp != new_mp)
+ {
+ ip6_add_del_route_args_t a;
+ a.table_index_or_table_id = fib_index;
+ a.flags = ((is_del ? IP6_ROUTE_FLAG_DEL : IP6_ROUTE_FLAG_ADD)
+ | IP6_ROUTE_FLAG_FIB_INDEX
+ | IP6_ROUTE_FLAG_KEEP_OLD_ADJACENCY
+ | (flags & IP6_ROUTE_FLAG_NO_REDISTRIBUTE));
+ a.dst_address = dst_address[0];
+ a.dst_address_length = dst_address_length;
+ a.adj_index = new_mp ? new_mp->adj_index : dst_adj_index;
+ a.add_adj = 0;
+ a.n_add_adj = 0;
+
+ ip6_add_del_route (im, &a);
+ }
+
+ done:
+ if (error)
+ clib_error_report (error);
+}
+
+u32
+ip6_get_route (ip6_main_t * im,
+ u32 table_index_or_table_id,
+ u32 flags,
+ ip6_address_t * address,
+ u32 address_length)
+{
+ ip6_fib_t * fib = find_ip6_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
+ ip6_address_t masked_address;
+ BVT(clib_bihash_kv) kv, value;
+
+ ASSERT (address_length < ARRAY_LEN (im->fib_masks));
+ memcpy (&masked_address, address, sizeof (masked_address));
+ ip6_address_mask (&masked_address, &im->fib_masks[address_length]);
+
+ kv.key[0] = masked_address.as_u64[0];
+ kv.key[1] = masked_address.as_u64[1];
+ kv.key[2] = ((u64)((fib - im->fibs))<<32) | address_length;
+
+ if (BV(clib_bihash_search)(&im->ip6_lookup_table, &kv, &value) == 0)
+ return (value.value);
+ return 0;
+}
+
+void
+ip6_foreach_matching_route (ip6_main_t * im,
+ u32 table_index_or_table_id,
+ u32 flags,
+ ip6_address_t * dst_address,
+ u32 address_length,
+ ip6_address_t ** results,
+ u8 ** result_lengths)
+{
+ ip6_fib_t * fib =
+ find_ip6_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
+ BVT(clib_bihash) * h = &im->ip6_lookup_table;
+ BVT(clib_bihash_value) * v;
+ clib_bihash_bucket_t * b;
+ int i, j, k;
+
+ if (*results)
+ _vec_len (*results) = 0;
+ if (*result_lengths)
+ _vec_len (*result_lengths) = 0;
+
+ /* Walk the table looking for routes which match the supplied address */
+ for (i = 0; i < h->nbuckets; i++)
+ {
+ b = &h->buckets [i];
+ if (b->offset == 0)
+ continue;
+
+ v = BV(clib_bihash_get_value) (h, b->offset);
+ for (j = 0; j < (1<<b->log2_pages); j++)
+ {
+ for (k = 0; k < BIHASH_KVP_PER_PAGE; k++)
+ {
+ if (BV(clib_bihash_is_free)(&v->kvp[k]))
+ continue;
+
+ if ((v->kvp[k].key[2]
+ == (((u64)((fib - im->fibs))<<32) | address_length))
+ && ip6_destination_matches_route
+ (im, dst_address, (ip6_address_t *) &v->kvp[k],
+ address_length))
+ {
+ ip6_address_t * a;
+
+ a = (ip6_address_t *)(&v->kvp[k]);
+
+ vec_add1 (*results, a[0]);
+ vec_add1 (*result_lengths, address_length);
+ }
+ }
+ v++;
+ }
+ }
+}
+
+void ip6_maybe_remap_adjacencies (ip6_main_t * im,
+ u32 table_index_or_table_id,
+ u32 flags)
+{
+#if SOONE
+ ip6_fib_t * fib
+ = find_ip6_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
+#endif
+ ip_lookup_main_t * lm = &im->lookup_main;
+
+ if (lm->n_adjacency_remaps == 0)
+ return;
+
+ clib_warning ("unimplemented, please report to vpp-dev@cisco.com");
+
+ /* All remaps have been performed. */
+ lm->n_adjacency_remaps = 0;
+}
+
+void ip6_delete_matching_routes (ip6_main_t * im,
+ u32 table_index_or_table_id,
+ u32 flags,
+ ip6_address_t * address,
+ u32 address_length)
+{
+ /* $$$$ static may be OK - this should happen only on thread 0 */
+ static ip6_address_t * matching_addresses;
+ static u8 * matching_address_lengths;
+ u32 l, i;
+ ip6_add_del_route_args_t a;
+
+ vlib_smp_unsafe_warning();
+
+ a.flags = IP6_ROUTE_FLAG_DEL | IP6_ROUTE_FLAG_NO_REDISTRIBUTE | flags;
+ a.table_index_or_table_id = table_index_or_table_id;
+ a.adj_index = ~0;
+ a.add_adj = 0;
+ a.n_add_adj = 0;
+
+ for (l = address_length + 1; l <= 128; l++)
+ {
+ ip6_foreach_matching_route (im, table_index_or_table_id, flags,
+ address,
+ l,
+ &matching_addresses,
+ &matching_address_lengths);
+ for (i = 0; i < vec_len (matching_addresses); i++)
+ {
+ a.dst_address = matching_addresses[i];
+ a.dst_address_length = matching_address_lengths[i];
+ ip6_add_del_route (im, &a);
+ }
+ }
+
+ ip6_maybe_remap_adjacencies (im, table_index_or_table_id, flags);
+}
+
+static uword
+ip6_lookup (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ ip6_main_t * im = &ip6_main;
+ ip_lookup_main_t * lm = &im->lookup_main;
+ vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
+ u32 n_left_from, n_left_to_next, * from, * to_next;
+ ip_lookup_next_t next;
+ u32 cpu_index = os_get_cpu_number();
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next,
+ to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ vlib_buffer_t * p0, * p1;
+ u32 pi0, pi1, adj_index0, adj_index1, wrong_next;
+ ip_lookup_next_t next0, next1;
+ ip6_header_t * ip0, * ip1;
+ ip_adjacency_t * adj0, * adj1;
+ u32 fib_index0, fib_index1;
+ u32 flow_hash_config0, flow_hash_config1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+ CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
+ CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
+ }
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+
+ fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
+ fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
+
+ fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
+ fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
+ fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
+ fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
+
+ adj_index0 = ip6_fib_lookup_with_table (im, fib_index0,
+ &ip0->dst_address);
+ adj_index1 = ip6_fib_lookup_with_table (im, fib_index1,
+ &ip1->dst_address);
+
+ adj0 = ip_get_adjacency (lm, adj_index0);
+ adj1 = ip_get_adjacency (lm, adj_index1);
+
+ if (PREDICT_FALSE (adj0->explicit_fib_index != ~0))
+ {
+ adj_index0 = ip6_fib_lookup_with_table
+ (im, adj0->explicit_fib_index, &ip0->dst_address);
+ adj0 = ip_get_adjacency (lm, adj_index0);
+ }
+ if (PREDICT_FALSE (adj1->explicit_fib_index != ~0))
+ {
+ adj_index1 = ip6_fib_lookup_with_table
+ (im, adj1->explicit_fib_index, &ip1->dst_address);
+ adj1 = ip_get_adjacency (lm, adj_index1);
+ }
+
+ next0 = adj0->lookup_next_index;
+ next1 = adj1->lookup_next_index;
+
+ /* Process hop-by-hop options if present */
+ next0 = (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) ?
+ IP_LOOKUP_NEXT_HOP_BY_HOP : next0;
+ next1 = (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) ?
+ IP_LOOKUP_NEXT_HOP_BY_HOP : next1;
+
+ vnet_buffer (p0)->ip.flow_hash =
+ vnet_buffer(p1)->ip.flow_hash = 0;
+
+ if (PREDICT_FALSE(adj0->n_adj > 1))
+ {
+ flow_hash_config0 =
+ vec_elt_at_index (im->fibs,fib_index0)->flow_hash_config;
+ vnet_buffer (p0)->ip.flow_hash =
+ ip6_compute_flow_hash (ip0, flow_hash_config0);
+ }
+
+ if (PREDICT_FALSE(adj1->n_adj > 1))
+ {
+ flow_hash_config1 =
+ vec_elt_at_index (im->fibs,fib_index0)->flow_hash_config;
+
+ vnet_buffer (p1)->ip.flow_hash =
+ ip6_compute_flow_hash (ip1, flow_hash_config1);
+ }
+
+ ASSERT (adj0->n_adj > 0);
+ ASSERT (adj1->n_adj > 0);
+ ASSERT (is_pow2 (adj0->n_adj));
+ ASSERT (is_pow2 (adj1->n_adj));
+ adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
+ adj_index1 += (vnet_buffer (p1)->ip.flow_hash & (adj1->n_adj - 1));
+
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
+ vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
+
+ vlib_increment_combined_counter
+ (cm, cpu_index, adj_index0, 1,
+ vlib_buffer_length_in_chain (vm, p0));
+ vlib_increment_combined_counter
+ (cm, cpu_index, adj_index1, 1,
+ vlib_buffer_length_in_chain (vm, p1));
+
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
+
+ wrong_next = (next0 != next) + 2*(next1 != next);
+ if (PREDICT_FALSE (wrong_next != 0))
+ {
+ switch (wrong_next)
+ {
+ case 1:
+ /* A B A */
+ to_next[-2] = pi1;
+ to_next -= 1;
+ n_left_to_next += 1;
+ vlib_set_next_frame_buffer (vm, node, next0, pi0);
+ break;
+
+ case 2:
+ /* A A B */
+ to_next -= 1;
+ n_left_to_next += 1;
+ vlib_set_next_frame_buffer (vm, node, next1, pi1);
+ break;
+
+ case 3:
+ /* A B C */
+ to_next -= 2;
+ n_left_to_next += 2;
+ vlib_set_next_frame_buffer (vm, node, next0, pi0);
+ vlib_set_next_frame_buffer (vm, node, next1, pi1);
+ if (next0 == next1)
+ {
+ /* A B B */
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ next = next1;
+ vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
+ }
+ }
+ }
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t * p0;
+ ip6_header_t * ip0;
+ u32 pi0, adj_index0;
+ ip_lookup_next_t next0;
+ ip_adjacency_t * adj0;
+ u32 fib_index0, flow_hash_config0;
+
+ pi0 = from[0];
+ to_next[0] = pi0;
+
+ p0 = vlib_get_buffer (vm, pi0);
+
+ ip0 = vlib_buffer_get_current (p0);
+
+ fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
+ fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
+ fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
+
+ flow_hash_config0 =
+ vec_elt_at_index (im->fibs,fib_index0)->flow_hash_config;
+
+ adj_index0 = ip6_fib_lookup_with_table (im, fib_index0,
+ &ip0->dst_address);
+
+ adj0 = ip_get_adjacency (lm, adj_index0);
+
+ if (PREDICT_FALSE (adj0->explicit_fib_index != ~0))
+ {
+ adj_index0 = ip6_fib_lookup_with_table
+ (im, adj0->explicit_fib_index, &ip0->dst_address);
+ adj0 = ip_get_adjacency (lm, adj_index0);
+ }
+
+ next0 = adj0->lookup_next_index;
+ next0 = (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) ?
+ IP_LOOKUP_NEXT_HOP_BY_HOP : next0;
+
+ vnet_buffer (p0)->ip.flow_hash = 0;
+
+ if (PREDICT_FALSE(adj0->n_adj > 1))
+ {
+ flow_hash_config0 =
+ vec_elt_at_index (im->fibs,fib_index0)->flow_hash_config;
+ vnet_buffer (p0)->ip.flow_hash =
+ ip6_compute_flow_hash (ip0, flow_hash_config0);
+ }
+
+ ASSERT (adj0->n_adj > 0);
+ ASSERT (is_pow2 (adj0->n_adj));
+ adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
+
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
+
+ vlib_increment_combined_counter
+ (cm, cpu_index, adj_index0, 1,
+ vlib_buffer_length_in_chain (vm, p0));
+
+ from += 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+ n_left_from -= 1;
+
+ if (PREDICT_FALSE (next0 != next))
+ {
+ n_left_to_next += 1;
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ next = next0;
+ vlib_get_next_frame (vm, node, next,
+ to_next, n_left_to_next);
+ to_next[0] = pi0;
+ to_next += 1;
+ n_left_to_next -= 1;
+ }
+ }
+
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+void ip6_adjacency_set_interface_route (vnet_main_t * vnm,
+ ip_adjacency_t * adj,
+ u32 sw_if_index,
+ u32 if_address_index)
+{
+ vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
+ ip_lookup_next_t n;
+ u32 node_index;
+
+ if (hw->hw_class_index == ethernet_hw_interface_class.index
+ || hw->hw_class_index == srp_hw_interface_class.index)
+ {
+ n = IP_LOOKUP_NEXT_ARP;
+ node_index = ip6_discover_neighbor_node.index;
+ adj->if_address_index = if_address_index;
+ }
+ else
+ {
+ n = IP_LOOKUP_NEXT_REWRITE;
+ node_index = ip6_rewrite_node.index;
+ }
+
+ adj->lookup_next_index = n;
+ adj->explicit_fib_index = ~0;
+
+ vnet_rewrite_for_sw_interface
+ (vnm,
+ VNET_L3_PACKET_TYPE_IP6,
+ sw_if_index,
+ node_index,
+ VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST,
+ &adj->rewrite_header,
+ sizeof (adj->rewrite_data));
+}
+
+static void
+ip6_add_interface_routes (vnet_main_t * vnm, u32 sw_if_index,
+ ip6_main_t * im, u32 fib_index,
+ ip_interface_address_t * a)
+{
+ ip_lookup_main_t * lm = &im->lookup_main;
+ ip_adjacency_t * adj;
+ ip6_address_t * address = ip_interface_address_get_address (lm, a);
+ ip6_add_del_route_args_t x;
+ vnet_hw_interface_t * hw_if = vnet_get_sup_hw_interface (vnm, sw_if_index);
+ u32 classify_table_index;
+
+ /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
+ x.table_index_or_table_id = fib_index;
+ x.flags = (IP6_ROUTE_FLAG_ADD
+ | IP6_ROUTE_FLAG_FIB_INDEX
+ | IP6_ROUTE_FLAG_NO_REDISTRIBUTE);
+ x.dst_address = address[0];
+ x.dst_address_length = a->address_length;
+ x.n_add_adj = 0;
+ x.add_adj = 0;
+
+ a->neighbor_probe_adj_index = ~0;
+ if (a->address_length < 128)
+ {
+ adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
+ &x.adj_index);
+ ip6_adjacency_set_interface_route (vnm, adj, sw_if_index, a - lm->if_address_pool);
+ ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
+ ip6_add_del_route (im, &x);
+ a->neighbor_probe_adj_index = x.adj_index;
+ }
+
+ /* Add e.g. ::1/128 as local to this host. */
+ adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
+ &x.adj_index);
+
+ classify_table_index = ~0;
+ if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
+ classify_table_index = lm->classify_table_index_by_sw_if_index [sw_if_index];
+ if (classify_table_index != (u32) ~0)
+ {
+ adj->lookup_next_index = IP_LOOKUP_NEXT_CLASSIFY;
+ adj->classify_table_index = classify_table_index;
+ }
+ else
+ adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
+
+ adj->if_address_index = a - lm->if_address_pool;
+ adj->rewrite_header.sw_if_index = sw_if_index;
+ adj->rewrite_header.max_l3_packet_bytes = hw_if->max_l3_packet_bytes[VLIB_RX];
+ adj->rewrite_header.data_bytes = 0;
+ ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
+ x.dst_address_length = 128;
+ ip6_add_del_route (im, &x);
+}
+
+static void
+ip6_del_interface_routes (ip6_main_t * im, u32 fib_index,
+ ip6_address_t * address, u32 address_length)
+{
+ ip6_add_del_route_args_t x;
+
+ /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
+ x.table_index_or_table_id = fib_index;
+ x.flags = (IP6_ROUTE_FLAG_DEL
+ | IP6_ROUTE_FLAG_FIB_INDEX
+ | IP6_ROUTE_FLAG_NO_REDISTRIBUTE);
+ x.dst_address = address[0];
+ x.dst_address_length = address_length;
+ x.adj_index = ~0;
+ x.n_add_adj = 0;
+ x.add_adj = 0;
+
+ if (address_length < 128)
+ {
+ /* Don't wipe out fe80::0/64 */
+ if (address_length != 64 ||
+ address[0].as_u64[0] != clib_net_to_host_u64(0xfe80000000000000ULL))
+ ip6_add_del_route (im, &x);
+ }
+
+ x.dst_address_length = 128;
+ ip6_add_del_route (im, &x);
+
+ ip6_delete_matching_routes (im,
+ fib_index,
+ IP6_ROUTE_FLAG_FIB_INDEX,
+ address,
+ address_length);
+}
+
+typedef struct {
+ u32 sw_if_index;
+ ip6_address_t address;
+ u32 length;
+} ip6_interface_address_t;
+
+static clib_error_t *
+ip6_add_del_interface_address_internal (vlib_main_t * vm,
+ u32 sw_if_index,
+ ip6_address_t * new_address,
+ u32 new_length,
+ u32 redistribute,
+ u32 insert_routes,
+ u32 is_del);
+
+static clib_error_t *
+ip6_add_del_interface_address_internal (vlib_main_t * vm,
+ u32 sw_if_index,
+ ip6_address_t * address,
+ u32 address_length,
+ u32 redistribute,
+ u32 insert_routes,
+ u32 is_del)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ ip6_main_t * im = &ip6_main;
+ ip_lookup_main_t * lm = &im->lookup_main;
+ clib_error_t * error;
+ u32 if_address_index;
+ ip6_address_fib_t ip6_af, * addr_fib = 0;
+
+ vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
+ ip6_addr_fib_init (&ip6_af, address,
+ vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
+ vec_add1 (addr_fib, ip6_af);
+
+ {
+ uword elts_before = pool_elts (lm->if_address_pool);
+
+ error = ip_interface_address_add_del
+ (lm,
+ sw_if_index,
+ addr_fib,
+ address_length,
+ is_del,
+ &if_address_index);
+ if (error)
+ goto done;
+
+ /* Pool did not grow: add duplicate address. */
+ if (elts_before == pool_elts (lm->if_address_pool))
+ goto done;
+ }
+
+ if (vnet_sw_interface_is_admin_up (vnm, sw_if_index) && insert_routes)
+ {
+ if (is_del)
+ ip6_del_interface_routes (im, ip6_af.fib_index, address,
+ address_length);
+
+ else
+ ip6_add_interface_routes (vnm, sw_if_index,
+ im, ip6_af.fib_index,
+ pool_elt_at_index (lm->if_address_pool, if_address_index));
+ }
+
+ {
+ ip6_add_del_interface_address_callback_t * cb;
+ vec_foreach (cb, im->add_del_interface_address_callbacks)
+ cb->function (im, cb->function_opaque, sw_if_index,
+ address, address_length,
+ if_address_index,
+ is_del);
+ }
+
+ done:
+ vec_free (addr_fib);
+ return error;
+}
+
+clib_error_t *
+ip6_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
+ ip6_address_t * address, u32 address_length,
+ u32 is_del)
+{
+ return ip6_add_del_interface_address_internal
+ (vm, sw_if_index, address, address_length,
+ /* redistribute */ 1,
+ /* insert_routes */ 1,
+ is_del);
+}
+
+clib_error_t *
+ip6_sw_interface_admin_up_down (vnet_main_t * vnm,
+ u32 sw_if_index,
+ u32 flags)
+{
+ ip6_main_t * im = &ip6_main;
+ ip_interface_address_t * ia;
+ ip6_address_t * a;
+ u32 is_admin_up, fib_index;
+
+ /* Fill in lookup tables with default table (0). */
+ vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
+
+ vec_validate_init_empty (im->lookup_main.if_address_pool_index_by_sw_if_index, sw_if_index, ~0);
+
+ is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
+
+ fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
+
+ foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
+ 0 /* honor unnumbered */,
+ ({
+ a = ip_interface_address_get_address (&im->lookup_main, ia);
+ if (is_admin_up)
+ ip6_add_interface_routes (vnm, sw_if_index,
+ im, fib_index,
+ ia);
+ else
+ ip6_del_interface_routes (im, fib_index,
+ a, ia->address_length);
+ }));
+
+ return 0;
+}
+
+VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip6_sw_interface_admin_up_down);
+
+clib_error_t *
+ip6_sw_interface_add_del (vnet_main_t * vnm,
+ u32 sw_if_index,
+ u32 is_add)
+{
+ vlib_main_t * vm = vnm->vlib_main;
+ ip6_main_t * im = &ip6_main;
+ ip_lookup_main_t * lm = &im->lookup_main;
+ u32 ci, cast;
+
+ for (cast = 0; cast < VNET_N_CAST; cast++)
+ {
+ ip_config_main_t * cm = &lm->rx_config_mains[cast];
+ vnet_config_main_t * vcm = &cm->config_main;
+
+ /* FIXME multicast. */
+ if (! vcm->node_index_by_feature_index)
+ {
+ char * start_nodes[] = { "ip6-input", };
+ char * feature_nodes[] = {
+ [IP6_RX_FEATURE_CHECK_ACCESS] = "ip6-inacl",
+ [IP6_RX_FEATURE_IPSEC] = "ipsec-input-ip6",
+ [IP6_RX_FEATURE_L2TPV3] = "l2tp-decap",
+ [IP6_RX_FEATURE_VPATH] = "vpath-input-ip6",
+ [IP6_RX_FEATURE_LOOKUP] = "ip6-lookup",
+ };
+ vnet_config_init (vm, vcm,
+ start_nodes, ARRAY_LEN (start_nodes),
+ feature_nodes, ARRAY_LEN (feature_nodes));
+ }
+
+ vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
+ ci = cm->config_index_by_sw_if_index[sw_if_index];
+
+ if (is_add)
+ ci = vnet_config_add_feature (vm, vcm,
+ ci,
+ IP6_RX_FEATURE_LOOKUP,
+ /* config data */ 0,
+ /* # bytes of config data */ 0);
+ else
+ ci = vnet_config_del_feature (vm, vcm,
+ ci,
+ IP6_RX_FEATURE_LOOKUP,
+ /* config data */ 0,
+ /* # bytes of config data */ 0);
+
+ cm->config_index_by_sw_if_index[sw_if_index] = ci;
+ }
+ return /* no error */ 0;
+}
+
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip6_sw_interface_add_del);
+
+VLIB_REGISTER_NODE (ip6_lookup_node) = {
+ .function = ip6_lookup,
+ .name = "ip6-lookup",
+ .vector_size = sizeof (u32),
+
+ .n_next_nodes = IP_LOOKUP_N_NEXT,
+ .next_nodes = {
+ [IP_LOOKUP_NEXT_MISS] = "ip6-miss",
+ [IP_LOOKUP_NEXT_DROP] = "ip6-drop",
+ [IP_LOOKUP_NEXT_PUNT] = "ip6-punt",
+ [IP_LOOKUP_NEXT_LOCAL] = "ip6-local",
+ [IP_LOOKUP_NEXT_ARP] = "ip6-discover-neighbor",
+ [IP_LOOKUP_NEXT_REWRITE] = "ip6-rewrite",
+ [IP_LOOKUP_NEXT_CLASSIFY] = "ip6-classify",
+ [IP_LOOKUP_NEXT_MAP] = "ip6-map",
+ [IP_LOOKUP_NEXT_MAP_T] = "ip6-map-t",
+ [IP_LOOKUP_NEXT_SIXRD] = "ip6-sixrd",
+ [IP_LOOKUP_NEXT_HOP_BY_HOP] = "ip6-hop-by-hop",
+ [IP_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip6-add-hop-by-hop",
+ [IP_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip6-pop-hop-by-hop",
+ },
+};
+
+typedef struct {
+ /* Adjacency taken. */
+ u32 adj_index;
+ u32 flow_hash;
+
+ /* Packet data, possibly *after* rewrite. */
+ u8 packet_data[64 - 1*sizeof(u32)];
+} ip6_forward_next_trace_t;
+
+static u8 * format_ip6_forward_next_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ip6_forward_next_trace_t * t = va_arg (*args, ip6_forward_next_trace_t *);
+ vnet_main_t * vnm = vnet_get_main();
+ ip6_main_t * im = &ip6_main;
+ ip_adjacency_t * adj;
+ uword indent = format_get_indent (s);
+
+ adj = ip_get_adjacency (&im->lookup_main, t->adj_index);
+ s = format (s, "adjacency: %U flow hash: 0x%08x",
+ format_ip_adjacency,
+ vnm, &im->lookup_main, t->adj_index, t->flow_hash);
+ switch (adj->lookup_next_index)
+ {
+ case IP_LOOKUP_NEXT_REWRITE:
+ s = format (s, "\n%U%U",
+ format_white_space, indent,
+ format_ip_adjacency_packet_data,
+ vnm, &im->lookup_main, t->adj_index,
+ t->packet_data, sizeof (t->packet_data));
+ break;
+
+ default:
+ break;
+ }
+
+ return s;
+}
+
+/* Common trace function for all ip6-forward next nodes. */
+void
+ip6_forward_next_trace (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ vlib_rx_or_tx_t which_adj_index)
+{
+ u32 * from, n_left;
+
+ n_left = frame->n_vectors;
+ from = vlib_frame_vector_args (frame);
+
+ while (n_left >= 4)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t * b0, * b1;
+ ip6_forward_next_trace_t * t0, * t1;
+
+ /* Prefetch next iteration. */
+ vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
+ vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
+
+ bi0 = from[0];
+ bi1 = from[1];
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
+ t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
+ t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
+ memcpy (t0->packet_data,
+ vlib_buffer_get_current (b0),
+ sizeof (t0->packet_data));
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
+ t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
+ t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
+ memcpy (t1->packet_data,
+ vlib_buffer_get_current (b1),
+ sizeof (t1->packet_data));
+ }
+ from += 2;
+ n_left -= 2;
+ }
+
+ while (n_left >= 1)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ ip6_forward_next_trace_t * t0;
+
+ bi0 = from[0];
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
+ t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
+ t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
+ memcpy (t0->packet_data,
+ vlib_buffer_get_current (b0),
+ sizeof (t0->packet_data));
+ }
+ from += 1;
+ n_left -= 1;
+ }
+}
+
+static uword
+ip6_drop_or_punt (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ ip6_error_t error_code)
+{
+ u32 * buffers = vlib_frame_vector_args (frame);
+ uword n_packets = frame->n_vectors;
+
+ vlib_error_drop_buffers (vm, node,
+ buffers,
+ /* stride */ 1,
+ n_packets,
+ /* next */ 0,
+ ip6_input_node.index,
+ error_code);
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip6_forward_next_trace (vm, node, frame, VLIB_TX);
+
+ return n_packets;
+}
+
+static uword
+ip6_drop (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{ return ip6_drop_or_punt (vm, node, frame, IP6_ERROR_ADJACENCY_DROP); }
+
+static uword
+ip6_punt (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{ return ip6_drop_or_punt (vm, node, frame, IP6_ERROR_ADJACENCY_PUNT); }
+
+static uword
+ip6_miss (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{ return ip6_drop_or_punt (vm, node, frame, IP6_ERROR_DST_LOOKUP_MISS); }
+
+VLIB_REGISTER_NODE (ip6_drop_node,static) = {
+ .function = ip6_drop,
+ .name = "ip6-drop",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_ip6_forward_next_trace,
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+
+VLIB_REGISTER_NODE (ip6_punt_node,static) = {
+ .function = ip6_punt,
+ .name = "ip6-punt",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_ip6_forward_next_trace,
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-punt",
+ },
+};
+
+VLIB_REGISTER_NODE (ip6_miss_node,static) = {
+ .function = ip6_miss,
+ .name = "ip6-miss",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_ip6_forward_next_trace,
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+
+VLIB_REGISTER_NODE (ip6_multicast_node,static) = {
+ .function = ip6_drop,
+ .name = "ip6-multicast",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_ip6_forward_next_trace,
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+
+/* Compute TCP/UDP/ICMP6 checksum in software. */
+u16 ip6_tcp_udp_icmp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0, ip6_header_t * ip0, int *bogus_lengthp)
+{
+ ip_csum_t sum0;
+ u16 sum16, payload_length_host_byte_order;
+ u32 i, n_this_buffer, n_bytes_left;
+ u32 headers_size = sizeof(ip0[0]);
+ void * data_this_buffer;
+
+ ASSERT(bogus_lengthp);
+ *bogus_lengthp = 0;
+
+ /* Initialize checksum with ip header. */
+ sum0 = ip0->payload_length + clib_host_to_net_u16 (ip0->protocol);
+ payload_length_host_byte_order = clib_net_to_host_u16 (ip0->payload_length);
+ data_this_buffer = (void *) (ip0 + 1);
+
+ for (i = 0; i < ARRAY_LEN (ip0->src_address.as_uword); i++)
+ {
+ sum0 = ip_csum_with_carry (sum0,
+ clib_mem_unaligned (&ip0->src_address.as_uword[i], uword));
+ sum0 = ip_csum_with_carry (sum0,
+ clib_mem_unaligned (&ip0->dst_address.as_uword[i], uword));
+ }
+
+ /* some icmp packets may come with a "router alert" hop-by-hop extension header (e.g., mldv2 packets) */
+ if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+ {
+ u32 skip_bytes;
+ ip6_hop_by_hop_ext_t *ext_hdr = (ip6_hop_by_hop_ext_t *)data_this_buffer;
+
+ /* validate really icmp6 next */
+ ASSERT(ext_hdr->next_hdr == IP_PROTOCOL_ICMP6);
+
+ skip_bytes = 8* (1 + ext_hdr->n_data_u64s);
+ data_this_buffer = (void *)((u8 *)data_this_buffer + skip_bytes);
+
+ payload_length_host_byte_order -= skip_bytes;
+ headers_size += skip_bytes;
+ }
+
+ n_bytes_left = n_this_buffer = payload_length_host_byte_order;
+#if DPDK > 0
+ if (p0)
+ {
+ struct rte_mbuf *mb = ((struct rte_mbuf *)p0)-1;
+ u8 nb_segs = mb->nb_segs;
+
+ n_this_buffer = (p0->current_length > headers_size ?
+ p0->current_length - headers_size : 0);
+ while (n_bytes_left)
+ {
+ sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
+ n_bytes_left -= n_this_buffer;
+
+ mb = mb->next;
+ nb_segs--;
+ if ((nb_segs == 0) || (mb == 0))
+ break;
+
+ data_this_buffer = rte_ctrlmbuf_data(mb);
+ n_this_buffer = mb->data_len;
+ }
+ if (n_bytes_left || nb_segs)
+ {
+ *bogus_lengthp = 1;
+ return 0xfefe;
+ }
+ }
+ else sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
+#else
+ if (p0 && n_this_buffer + headers_size > p0->current_length)
+ n_this_buffer = p0->current_length > headers_size ? p0->current_length - headers_size : 0;
+ while (1)
+ {
+ sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
+ n_bytes_left -= n_this_buffer;
+ if (n_bytes_left == 0)
+ break;
+
+ if (!(p0->flags & VLIB_BUFFER_NEXT_PRESENT))
+ {
+ *bogus_lengthp = 1;
+ return 0xfefe;
+ }
+ p0 = vlib_get_buffer (vm, p0->next_buffer);
+ data_this_buffer = vlib_buffer_get_current (p0);
+ n_this_buffer = p0->current_length;
+ }
+#endif /* DPDK */
+
+ sum16 = ~ ip_csum_fold (sum0);
+
+ return sum16;
+}
+
+u32 ip6_tcp_udp_icmp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
+{
+ ip6_header_t * ip0 = vlib_buffer_get_current (p0);
+ udp_header_t * udp0;
+ u16 sum16;
+ int bogus_length;
+
+ /* some icmp packets may come with a "router alert" hop-by-hop extension header (e.g., mldv2 packets) */
+ ASSERT (ip0->protocol == IP_PROTOCOL_TCP
+ || ip0->protocol == IP_PROTOCOL_ICMP6
+ || ip0->protocol == IP_PROTOCOL_UDP
+ || ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS);
+
+ udp0 = (void *) (ip0 + 1);
+ if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
+ {
+ p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
+ | IP_BUFFER_L4_CHECKSUM_CORRECT);
+ return p0->flags;
+ }
+
+ sum16 = ip6_tcp_udp_icmp_compute_checksum (vm, p0, ip0, &bogus_length);
+
+ p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
+ | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
+
+ return p0->flags;
+}
+
+static uword
+ip6_local (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ ip6_main_t * im = &ip6_main;
+ ip_lookup_main_t * lm = &im->lookup_main;
+ ip_local_next_t next_index;
+ u32 * from, * to_next, n_left_from, n_left_to_next;
+ vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip6_input_node.index);
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip6_forward_next_trace (vm, node, frame, VLIB_TX);
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ vlib_buffer_t * p0, * p1;
+ ip6_header_t * ip0, * ip1;
+ udp_header_t * udp0, * udp1;
+ u32 pi0, ip_len0, udp_len0, flags0, next0;
+ u32 pi1, ip_len1, udp_len1, flags1, next1;
+ i32 len_diff0, len_diff1;
+ u8 error0, type0, good_l4_checksum0;
+ u8 error1, type1, good_l4_checksum1;
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+
+ type0 = lm->builtin_protocol_by_ip_protocol[ip0->protocol];
+ type1 = lm->builtin_protocol_by_ip_protocol[ip1->protocol];
+
+ next0 = lm->local_next_by_ip_protocol[ip0->protocol];
+ next1 = lm->local_next_by_ip_protocol[ip1->protocol];
+
+ flags0 = p0->flags;
+ flags1 = p1->flags;
+
+ good_l4_checksum0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
+ good_l4_checksum1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
+
+ udp0 = ip6_next_header (ip0);
+ udp1 = ip6_next_header (ip1);
+
+ /* Don't verify UDP checksum for packets with explicit zero checksum. */
+ good_l4_checksum0 |= type0 == IP_BUILTIN_PROTOCOL_UDP && udp0->checksum == 0;
+ good_l4_checksum1 |= type1 == IP_BUILTIN_PROTOCOL_UDP && udp1->checksum == 0;
+
+ good_l4_checksum0 |= type0 == IP_BUILTIN_PROTOCOL_UNKNOWN;
+ good_l4_checksum1 |= type1 == IP_BUILTIN_PROTOCOL_UNKNOWN;
+
+ /* Verify UDP length. */
+ ip_len0 = clib_net_to_host_u16 (ip0->payload_length);
+ ip_len1 = clib_net_to_host_u16 (ip1->payload_length);
+ udp_len0 = clib_net_to_host_u16 (udp0->length);
+ udp_len1 = clib_net_to_host_u16 (udp1->length);
+
+ len_diff0 = ip_len0 - udp_len0;
+ len_diff1 = ip_len1 - udp_len1;
+
+ len_diff0 = type0 == IP_BUILTIN_PROTOCOL_UDP ? len_diff0 : 0;
+ len_diff1 = type1 == IP_BUILTIN_PROTOCOL_UDP ? len_diff1 : 0;
+
+ if (PREDICT_FALSE (type0 != IP_BUILTIN_PROTOCOL_UNKNOWN
+ && ! good_l4_checksum0
+ && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED)))
+ {
+ flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, p0);
+ good_l4_checksum0 =
+ (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
+ }
+ if (PREDICT_FALSE (type1 != IP_BUILTIN_PROTOCOL_UNKNOWN
+ && ! good_l4_checksum1
+ && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED)))
+ {
+ flags1 = ip6_tcp_udp_icmp_validate_checksum (vm, p1);
+ good_l4_checksum1 =
+ (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
+ }
+
+ error0 = error1 = IP6_ERROR_UNKNOWN_PROTOCOL;
+
+ error0 = len_diff0 < 0 ? IP6_ERROR_UDP_LENGTH : error0;
+ error1 = len_diff1 < 0 ? IP6_ERROR_UDP_LENGTH : error1;
+
+ ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_UDP == IP6_ERROR_UDP_CHECKSUM);
+ ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_TCP == IP6_ERROR_TCP_CHECKSUM);
+ ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_ICMP == IP6_ERROR_ICMP_CHECKSUM);
+ error0 = (! good_l4_checksum0
+ ? IP6_ERROR_UDP_CHECKSUM + type0
+ : error0);
+ error1 = (! good_l4_checksum1
+ ? IP6_ERROR_UDP_CHECKSUM + type1
+ : error1);
+
+ /* Drop packets from unroutable hosts. */
+ /* If this is a neighbor solicitation (ICMP), skip source RPF check */
+ if (error0 == IP6_ERROR_UNKNOWN_PROTOCOL && type0 != IP_BUILTIN_PROTOCOL_ICMP)
+ {
+ u32 src_adj_index0 = ip6_src_lookup_for_packet (im, p0, ip0);
+ error0 = (lm->miss_adj_index == src_adj_index0
+ ? IP6_ERROR_SRC_LOOKUP_MISS
+ : error0);
+ }
+ if (error1 == IP6_ERROR_UNKNOWN_PROTOCOL && type1 != IP_BUILTIN_PROTOCOL_ICMP)
+ {
+ u32 src_adj_index1 = ip6_src_lookup_for_packet (im, p1, ip1);
+ error1 = (lm->miss_adj_index == src_adj_index1
+ ? IP6_ERROR_SRC_LOOKUP_MISS
+ : error1);
+ }
+
+ next0 = error0 != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
+ next1 = error1 != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
+
+ p0->error = error_node->errors[error0];
+ p1->error = error_node->errors[error1];
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, pi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t * p0;
+ ip6_header_t * ip0;
+ udp_header_t * udp0;
+ u32 pi0, ip_len0, udp_len0, flags0, next0;
+ i32 len_diff0;
+ u8 error0, type0, good_l4_checksum0;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+
+ ip0 = vlib_buffer_get_current (p0);
+
+ type0 = lm->builtin_protocol_by_ip_protocol[ip0->protocol];
+ next0 = lm->local_next_by_ip_protocol[ip0->protocol];
+
+ flags0 = p0->flags;
+
+ good_l4_checksum0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
+
+ udp0 = ip6_next_header (ip0);
+
+ /* Don't verify UDP checksum for packets with explicit zero checksum. */
+ good_l4_checksum0 |= type0 == IP_BUILTIN_PROTOCOL_UDP && udp0->checksum == 0;
+
+ good_l4_checksum0 |= type0 == IP_BUILTIN_PROTOCOL_UNKNOWN;
+
+ /* Verify UDP length. */
+ ip_len0 = clib_net_to_host_u16 (ip0->payload_length);
+ udp_len0 = clib_net_to_host_u16 (udp0->length);
+
+ len_diff0 = ip_len0 - udp_len0;
+
+ len_diff0 = type0 == IP_BUILTIN_PROTOCOL_UDP ? len_diff0 : 0;
+
+ if (PREDICT_FALSE (type0 != IP_BUILTIN_PROTOCOL_UNKNOWN
+ && ! good_l4_checksum0
+ && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED)))
+ {
+ flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, p0);
+ good_l4_checksum0 =
+ (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
+ }
+
+ error0 = IP6_ERROR_UNKNOWN_PROTOCOL;
+
+ error0 = len_diff0 < 0 ? IP6_ERROR_UDP_LENGTH : error0;
+
+ ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_UDP == IP6_ERROR_UDP_CHECKSUM);
+ ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_TCP == IP6_ERROR_TCP_CHECKSUM);
+ ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_ICMP == IP6_ERROR_ICMP_CHECKSUM);
+ error0 = (! good_l4_checksum0
+ ? IP6_ERROR_UDP_CHECKSUM + type0
+ : error0);
+
+ /* If this is a neighbor solicitation (ICMP), skip source RPF check */
+ if (error0 == IP6_ERROR_UNKNOWN_PROTOCOL && type0 != IP_BUILTIN_PROTOCOL_ICMP)
+ {
+ u32 src_adj_index0 = ip6_src_lookup_for_packet (im, p0, ip0);
+ error0 = (lm->miss_adj_index == src_adj_index0
+ ? IP6_ERROR_SRC_LOOKUP_MISS
+ : error0);
+ }
+
+ next0 = error0 != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
+
+ p0->error = error_node->errors[error0];
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (ip6_local_node,static) = {
+ .function = ip6_local,
+ .name = "ip6-local",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_ip6_forward_next_trace,
+
+ .n_next_nodes = IP_LOCAL_N_NEXT,
+ .next_nodes = {
+ [IP_LOCAL_NEXT_DROP] = "error-drop",
+ [IP_LOCAL_NEXT_PUNT] = "error-punt",
+ // [IP_LOCAL_NEXT_TCP_LOOKUP] = "ip6-tcp-lookup",
+ [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip6-udp-lookup",
+ [IP_LOCAL_NEXT_ICMP] = "ip6-icmp-input",
+ },
+};
+
+void ip6_register_protocol (u32 protocol, u32 node_index)
+{
+ vlib_main_t * vm = vlib_get_main();
+ ip6_main_t * im = &ip6_main;
+ ip_lookup_main_t * lm = &im->lookup_main;
+
+ ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
+ lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip6_local_node.index, node_index);
+}
+
+typedef enum {
+ IP6_DISCOVER_NEIGHBOR_NEXT_DROP,
+ IP6_DISCOVER_NEIGHBOR_N_NEXT,
+} ip6_discover_neighbor_next_t;
+
+typedef enum {
+ IP6_DISCOVER_NEIGHBOR_ERROR_DROP,
+ IP6_DISCOVER_NEIGHBOR_ERROR_REQUEST_SENT,
+} ip6_discover_neighbor_error_t;
+
+static uword
+ip6_discover_neighbor (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ ip6_main_t * im = &ip6_main;
+ ip_lookup_main_t * lm = &im->lookup_main;
+ u32 * from, * to_next_drop;
+ uword n_left_from, n_left_to_next_drop;
+ static f64 time_last_seed_change = -1e100;
+ static u32 hash_seeds[3];
+ static uword hash_bitmap[256 / BITS (uword)];
+ f64 time_now;
+ int bogus_length;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip6_forward_next_trace (vm, node, frame, VLIB_TX);
+
+ time_now = vlib_time_now (vm);
+ if (time_now - time_last_seed_change > 1e-3)
+ {
+ uword i;
+ u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
+ sizeof (hash_seeds));
+ for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
+ hash_seeds[i] = r[i];
+
+ /* Mark all hash keys as been not-seen before. */
+ for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
+ hash_bitmap[i] = 0;
+
+ time_last_seed_change = time_now;
+ }
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, IP6_DISCOVER_NEIGHBOR_NEXT_DROP,
+ to_next_drop, n_left_to_next_drop);
+
+ while (n_left_from > 0 && n_left_to_next_drop > 0)
+ {
+ vlib_buffer_t * p0;
+ ip6_header_t * ip0;
+ u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
+ uword bm0;
+ ip_adjacency_t * adj0;
+ vnet_hw_interface_t * hw_if0;
+ u32 next0;
+
+ pi0 = from[0];
+
+ p0 = vlib_get_buffer (vm, pi0);
+
+ adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+
+ ip0 = vlib_buffer_get_current (p0);
+
+ adj0 = ip_get_adjacency (lm, adj_index0);
+
+ a0 = hash_seeds[0];
+ b0 = hash_seeds[1];
+ c0 = hash_seeds[2];
+
+ sw_if_index0 = adj0->rewrite_header.sw_if_index;
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
+
+ a0 ^= sw_if_index0;
+ b0 ^= ip0->dst_address.as_u32[0];
+ c0 ^= ip0->dst_address.as_u32[1];
+
+ hash_v3_mix32 (a0, b0, c0);
+
+ b0 ^= ip0->dst_address.as_u32[2];
+ c0 ^= ip0->dst_address.as_u32[3];
+
+ hash_v3_finalize32 (a0, b0, c0);
+
+ c0 &= BITS (hash_bitmap) - 1;
+ c0 = c0 / BITS (uword);
+ m0 = (uword) 1 << (c0 % BITS (uword));
+
+ bm0 = hash_bitmap[c0];
+ drop0 = (bm0 & m0) != 0;
+
+ /* Mark it as seen. */
+ hash_bitmap[c0] = bm0 | m0;
+
+ from += 1;
+ n_left_from -= 1;
+ to_next_drop[0] = pi0;
+ to_next_drop += 1;
+ n_left_to_next_drop -= 1;
+
+ hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
+
+ /* If the interface is link-down, drop the pkt */
+ if (!(hw_if0->flags & VNET_HW_INTERFACE_FLAG_LINK_UP))
+ drop0 = 1;
+
+ p0->error =
+ node->errors[drop0 ? IP6_DISCOVER_NEIGHBOR_ERROR_DROP
+ : IP6_DISCOVER_NEIGHBOR_ERROR_REQUEST_SENT];
+ if (drop0)
+ continue;
+
+ {
+ u32 bi0 = 0;
+ icmp6_neighbor_solicitation_header_t * h0;
+ vlib_buffer_t * b0;
+
+ h0 = vlib_packet_template_get_packet
+ (vm, &im->discover_neighbor_packet_template, &bi0);
+
+ /*
+ * Build ethernet header.
+ * Choose source address based on destination lookup
+ * adjacency.
+ */
+ ip6_src_address_for_packet (im, p0, &h0->ip.src_address,
+ sw_if_index0);
+
+ /*
+ * Destination address is a solicited node multicast address.
+ * We need to fill in
+ * the low 24 bits with low 24 bits of target's address.
+ */
+ h0->ip.dst_address.as_u8[13] = ip0->dst_address.as_u8[13];
+ h0->ip.dst_address.as_u8[14] = ip0->dst_address.as_u8[14];
+ h0->ip.dst_address.as_u8[15] = ip0->dst_address.as_u8[15];
+
+ h0->neighbor.target_address = ip0->dst_address;
+
+ memcpy (h0->link_layer_option.ethernet_address,
+ hw_if0->hw_address, vec_len (hw_if0->hw_address));
+
+ /* $$$$ appears we need this; why is the checksum non-zero? */
+ h0->neighbor.icmp.checksum = 0;
+ h0->neighbor.icmp.checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, 0, &h0->ip,
+ &bogus_length);
+
+ ASSERT (bogus_length == 0);
+
+ vlib_buffer_copy_trace_flag (vm, p0, bi0);
+ b0 = vlib_get_buffer (vm, bi0);
+ vnet_buffer (b0)->sw_if_index[VLIB_TX]
+ = vnet_buffer (p0)->sw_if_index[VLIB_TX];
+
+ /* Add rewrite/encap string. */
+ vnet_rewrite_one_header (adj0[0], h0,
+ sizeof (ethernet_header_t));
+ vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
+
+ /* $$$$ hack in case next0 == 0 */
+ b0->error = node->errors[IP6_DISCOVER_NEIGHBOR_ERROR_DROP];
+ next0 =
+ vec_elt (im->discover_neighbor_next_index_by_hw_if_index,
+ hw_if0->hw_if_index);
+
+ vlib_set_next_frame_buffer (vm, node, next0, bi0);
+ }
+ }
+
+ vlib_put_next_frame (vm, node, IP6_DISCOVER_NEIGHBOR_NEXT_DROP,
+ n_left_to_next_drop);
+ }
+
+ return frame->n_vectors;
+}
+
+static char * ip6_discover_neighbor_error_strings[] = {
+ [IP6_DISCOVER_NEIGHBOR_ERROR_DROP] = "address overflow drops",
+ [IP6_DISCOVER_NEIGHBOR_ERROR_REQUEST_SENT]
+ = "neighbor solicitations sent",
+};
+
+VLIB_REGISTER_NODE (ip6_discover_neighbor_node) = {
+ .function = ip6_discover_neighbor,
+ .name = "ip6-discover-neighbor",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_ip6_forward_next_trace,
+
+ .n_errors = ARRAY_LEN (ip6_discover_neighbor_error_strings),
+ .error_strings = ip6_discover_neighbor_error_strings,
+
+ .n_next_nodes = IP6_DISCOVER_NEIGHBOR_N_NEXT,
+ .next_nodes = {
+ [IP6_DISCOVER_NEIGHBOR_NEXT_DROP] = "error-drop",
+ },
+};
+
+clib_error_t *
+ip6_discover_neighbor_hw_interface_link_up_down (vnet_main_t * vnm,
+ u32 hw_if_index,
+ u32 flags)
+{
+ vlib_main_t * vm = vnm->vlib_main;
+ ip6_main_t * im = &ip6_main;
+ vnet_hw_interface_t * hw_if;
+
+ hw_if = vnet_get_hw_interface (vnm, hw_if_index);
+
+ vec_validate_init_empty
+ (im->discover_neighbor_next_index_by_hw_if_index, hw_if_index, 0);
+ im->discover_neighbor_next_index_by_hw_if_index[hw_if_index]
+ = vlib_node_add_next (vm, ip6_discover_neighbor_node.index,
+ hw_if->output_node_index);
+ return 0;
+}
+
+VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION
+(ip6_discover_neighbor_hw_interface_link_up_down);
+
+clib_error_t *
+ip6_probe_neighbor (vlib_main_t * vm, ip6_address_t * dst, u32 sw_if_index)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ ip6_main_t * im = &ip6_main;
+ icmp6_neighbor_solicitation_header_t * h;
+ ip6_address_t * src;
+ ip_interface_address_t * ia;
+ ip_adjacency_t * adj;
+ vnet_hw_interface_t * hi;
+ vnet_sw_interface_t * si;
+ vlib_buffer_t * b;
+ u32 bi = 0;
+ int bogus_length;
+
+ si = vnet_get_sw_interface (vnm, sw_if_index);
+
+ if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
+ {
+ return clib_error_return (0, "%U: interface %U down",
+ format_ip6_address, dst,
+ format_vnet_sw_if_index_name, vnm,
+ sw_if_index);
+ }
+
+ src = ip6_interface_address_matching_destination (im, dst, sw_if_index, &ia);
+ if (! src)
+ {
+ vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
+ return clib_error_return
+ (0, "no matching interface address for destination %U (interface %U)",
+ format_ip6_address, dst,
+ format_vnet_sw_if_index_name, vnm, sw_if_index);
+ }
+
+ h = vlib_packet_template_get_packet (vm, &im->discover_neighbor_packet_template, &bi);
+
+ hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
+
+ /* Destination address is a solicited node multicast address. We need to fill in
+ the low 24 bits with low 24 bits of target's address. */
+ h->ip.dst_address.as_u8[13] = dst->as_u8[13];
+ h->ip.dst_address.as_u8[14] = dst->as_u8[14];
+ h->ip.dst_address.as_u8[15] = dst->as_u8[15];
+
+ h->ip.src_address = src[0];
+ h->neighbor.target_address = dst[0];
+
+ memcpy (h->link_layer_option.ethernet_address, hi->hw_address, vec_len (hi->hw_address));
+
+ h->neighbor.icmp.checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, 0, &h->ip, &bogus_length);
+ ASSERT(bogus_length == 0);
+
+ b = vlib_get_buffer (vm, bi);
+ vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
+
+ /* Add encapsulation string for software interface (e.g. ethernet header). */
+ adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
+ vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
+ vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
+
+ {
+ vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
+ u32 * to_next = vlib_frame_vector_args (f);
+ to_next[0] = bi;
+ f->n_vectors = 1;
+ vlib_put_frame_to_node (vm, hi->output_node_index, f);
+ }
+
+ return /* no error */ 0;
+}
+
+typedef enum {
+ IP6_REWRITE_NEXT_DROP,
+} ip6_rewrite_next_t;
+
+always_inline uword
+ip6_rewrite_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ int rewrite_for_locally_received_packets)
+{
+ ip_lookup_main_t * lm = &ip6_main.lookup_main;
+ u32 * from = vlib_frame_vector_args (frame);
+ u32 n_left_from, n_left_to_next, * to_next, next_index;
+ vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip6_input_node.index);
+ vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
+
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ u32 cpu_index = os_get_cpu_number();
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ ip_adjacency_t * adj0, * adj1;
+ vlib_buffer_t * p0, * p1;
+ ip6_header_t * ip0, * ip1;
+ u32 pi0, rw_len0, next0, error0, adj_index0;
+ u32 pi1, rw_len1, next1, error1, adj_index1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->pre_data, 32, STORE);
+ CLIB_PREFETCH (p3->pre_data, 32, STORE);
+
+ CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
+ CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
+ }
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+
+ adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
+ adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
+
+ /* We should never rewrite a pkt using the MISS adjacency */
+ ASSERT(adj_index0 && adj_index1);
+
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+
+ error0 = error1 = IP6_ERROR_NONE;
+
+ if (! rewrite_for_locally_received_packets)
+ {
+ i32 hop_limit0 = ip0->hop_limit, hop_limit1 = ip1->hop_limit;
+
+ /* Input node should have reject packets with hop limit 0. */
+ ASSERT (ip0->hop_limit > 0);
+ ASSERT (ip1->hop_limit > 0);
+
+ hop_limit0 -= 1;
+ hop_limit1 -= 1;
+
+ ip0->hop_limit = hop_limit0;
+ ip1->hop_limit = hop_limit1;
+
+ error0 = hop_limit0 <= 0 ? IP6_ERROR_TIME_EXPIRED : error0;
+ error1 = hop_limit1 <= 0 ? IP6_ERROR_TIME_EXPIRED : error1;
+ }
+
+ adj0 = ip_get_adjacency (lm, adj_index0);
+ adj1 = ip_get_adjacency (lm, adj_index1);
+
+ if (rewrite_for_locally_received_packets)
+ {
+ /*
+ * If someone sends e.g. an icmp6 w/ src = dst = interface addr,
+ * we end up here with a local adjacency in hand
+ */
+ if (PREDICT_FALSE(adj0->lookup_next_index
+ == IP_LOOKUP_NEXT_LOCAL))
+ error0 = IP6_ERROR_SPOOFED_LOCAL_PACKETS;
+ if (PREDICT_FALSE(adj1->lookup_next_index
+ == IP_LOOKUP_NEXT_LOCAL))
+ error1 = IP6_ERROR_SPOOFED_LOCAL_PACKETS;
+ }
+
+ rw_len0 = adj0[0].rewrite_header.data_bytes;
+ rw_len1 = adj1[0].rewrite_header.data_bytes;
+
+ vlib_increment_combined_counter (&lm->adjacency_counters,
+ cpu_index,
+ adj_index0,
+ /* packet increment */ 0,
+ /* byte increment */ rw_len0);
+ vlib_increment_combined_counter (&lm->adjacency_counters,
+ cpu_index,
+ adj_index1,
+ /* packet increment */ 0,
+ /* byte increment */ rw_len1);
+
+ /* Check MTU of outgoing interface. */
+ error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
+ ? IP6_ERROR_MTU_EXCEEDED
+ : error0);
+ error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
+ ? IP6_ERROR_MTU_EXCEEDED
+ : error1);
+
+ p0->current_data -= rw_len0;
+ p1->current_data -= rw_len1;
+
+ p0->current_length += rw_len0;
+ p1->current_length += rw_len1;
+
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = adj0[0].rewrite_header.sw_if_index;
+ vnet_buffer (p1)->sw_if_index[VLIB_TX] = adj1[0].rewrite_header.sw_if_index;
+
+ next0 = (error0 == IP6_ERROR_NONE) ?
+ adj0[0].rewrite_header.next_index : IP6_REWRITE_NEXT_DROP;
+ next1 = (error1 == IP6_ERROR_NONE) ?
+ adj1[0].rewrite_header.next_index : IP6_REWRITE_NEXT_DROP;
+
+ /* Guess we are only writing on simple Ethernet header. */
+ vnet_rewrite_two_headers (adj0[0], adj1[0],
+ ip0, ip1,
+ sizeof (ethernet_header_t));
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, pi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ ip_adjacency_t * adj0;
+ vlib_buffer_t * p0;
+ ip6_header_t * ip0;
+ u32 pi0, rw_len0;
+ u32 adj_index0, next0, error0;
+
+ pi0 = to_next[0] = from[0];
+
+ p0 = vlib_get_buffer (vm, pi0);
+
+ adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
+
+ /* We should never rewrite a pkt using the MISS adjacency */
+ ASSERT(adj_index0);
+
+ adj0 = ip_get_adjacency (lm, adj_index0);
+
+ ip0 = vlib_buffer_get_current (p0);
+
+ error0 = IP6_ERROR_NONE;
+
+ /* Check hop limit */
+ if (! rewrite_for_locally_received_packets)
+ {
+ i32 hop_limit0 = ip0->hop_limit;
+
+ ASSERT (ip0->hop_limit > 0);
+
+ hop_limit0 -= 1;
+
+ ip0->hop_limit = hop_limit0;
+
+ error0 = hop_limit0 <= 0 ? IP6_ERROR_TIME_EXPIRED : error0;
+ }
+
+ if (rewrite_for_locally_received_packets)
+ {
+ if (PREDICT_FALSE(adj0->lookup_next_index
+ == IP_LOOKUP_NEXT_LOCAL))
+ error0 = IP6_ERROR_SPOOFED_LOCAL_PACKETS;
+ }
+
+ /* Guess we are only writing on simple Ethernet header. */
+ vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
+
+ /* Update packet buffer attributes/set output interface. */
+ rw_len0 = adj0[0].rewrite_header.data_bytes;
+
+ vlib_increment_combined_counter (&lm->adjacency_counters,
+ cpu_index,
+ adj_index0,
+ /* packet increment */ 0,
+ /* byte increment */ rw_len0);
+
+ /* Check MTU of outgoing interface. */
+ error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
+ ? IP6_ERROR_MTU_EXCEEDED
+ : error0);
+
+ p0->current_data -= rw_len0;
+ p0->current_length += rw_len0;
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = adj0[0].rewrite_header.sw_if_index;
+
+ next0 = (error0 == IP6_ERROR_NONE) ?
+ adj0[0].rewrite_header.next_index : IP6_REWRITE_NEXT_DROP;
+
+ p0->error = error_node->errors[error0];
+
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ /* Need to do trace after rewrites to pick up new packet data. */
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip6_forward_next_trace (vm, node, frame, adj_rx_tx);
+
+ return frame->n_vectors;
+}
+
+static uword
+ip6_rewrite_transit (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return ip6_rewrite_inline (vm, node, frame,
+ /* rewrite_for_locally_received_packets */ 0);
+}
+
+static uword
+ip6_rewrite_local (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return ip6_rewrite_inline (vm, node, frame,
+ /* rewrite_for_locally_received_packets */ 1);
+}
+
+VLIB_REGISTER_NODE (ip6_rewrite_node) = {
+ .function = ip6_rewrite_transit,
+ .name = "ip6-rewrite",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_ip6_forward_next_trace,
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [IP6_REWRITE_NEXT_DROP] = "error-drop",
+ },
+};
+
+VLIB_REGISTER_NODE (ip6_rewrite_local_node,static) = {
+ .function = ip6_rewrite_local,
+ .name = "ip6-rewrite-local",
+ .vector_size = sizeof (u32),
+
+ .sibling_of = "ip6-rewrite",
+
+ .format_trace = format_ip6_forward_next_trace,
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [IP6_REWRITE_NEXT_DROP] = "error-drop",
+ },
+};
+
+/* Global IP6 main. */
+ip6_main_t ip6_main;
+
+static clib_error_t *
+ip6_lookup_init (vlib_main_t * vm)
+{
+ ip6_main_t * im = &ip6_main;
+ uword i;
+
+ for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
+ {
+ u32 j, i0, i1;
+
+ i0 = i / 32;
+ i1 = i % 32;
+
+ for (j = 0; j < i0; j++)
+ im->fib_masks[i].as_u32[j] = ~0;
+
+ if (i1)
+ im->fib_masks[i].as_u32[i0] = clib_host_to_net_u32 (pow2_mask (i1) << (32 - i1));
+ }
+
+ ip_lookup_init (&im->lookup_main, /* is_ip6 */ 1);
+
+ if (im->lookup_table_nbuckets == 0)
+ im->lookup_table_nbuckets = IP6_FIB_DEFAULT_HASH_NUM_BUCKETS;
+
+ im->lookup_table_nbuckets = 1<< max_log2 (im->lookup_table_nbuckets);
+
+ if (im->lookup_table_size == 0)
+ im->lookup_table_size = IP6_FIB_DEFAULT_HASH_MEMORY_SIZE;
+
+ BV(clib_bihash_init) (&im->ip6_lookup_table, "ip6 lookup table",
+ im->lookup_table_nbuckets,
+ im->lookup_table_size);
+
+ /* Create FIB with index 0 and table id of 0. */
+ find_ip6_fib_by_table_index_or_id (im, /* table id */ 0, IP6_ROUTE_FLAG_TABLE_ID);
+
+ {
+ pg_node_t * pn;
+ pn = pg_get_node (ip6_lookup_node.index);
+ pn->unformat_edit = unformat_pg_ip6_header;
+ }
+
+ {
+ icmp6_neighbor_solicitation_header_t p;
+
+ memset (&p, 0, sizeof (p));
+
+ p.ip.ip_version_traffic_class_and_flow_label = clib_host_to_net_u32 (0x6 << 28);
+ p.ip.payload_length = clib_host_to_net_u16 (sizeof (p)
+ - STRUCT_OFFSET_OF (icmp6_neighbor_solicitation_header_t, neighbor));
+ p.ip.protocol = IP_PROTOCOL_ICMP6;
+ p.ip.hop_limit = 255;
+ ip6_set_solicited_node_multicast_address (&p.ip.dst_address, 0);
+
+ p.neighbor.icmp.type = ICMP6_neighbor_solicitation;
+
+ p.link_layer_option.header.type = ICMP6_NEIGHBOR_DISCOVERY_OPTION_source_link_layer_address;
+ p.link_layer_option.header.n_data_u64s = sizeof (p.link_layer_option) / sizeof (u64);
+
+ vlib_packet_template_init (vm,
+ &im->discover_neighbor_packet_template,
+ &p, sizeof (p),
+ /* alloc chunk size */ 8,
+ "ip6 neighbor discovery");
+ }
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (ip6_lookup_init);
+
+static clib_error_t *
+add_del_ip6_interface_table (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ clib_error_t * error = 0;
+ u32 sw_if_index, table_id;
+
+ sw_if_index = ~0;
+
+ if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ if (unformat (input, "%d", &table_id))
+ ;
+ else
+ {
+ error = clib_error_return (0, "expected table id `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ {
+ ip6_main_t * im = &ip6_main;
+ ip6_fib_t * fib =
+ find_ip6_fib_by_table_index_or_id (im, table_id, IP6_ROUTE_FLAG_TABLE_ID);
+
+ if (fib)
+ {
+ vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
+ im->fib_index_by_sw_if_index[sw_if_index] = fib->index;
+ }
+ }
+
+ done:
+ return error;
+}
+
+VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
+ .path = "set interface ip6 table",
+ .function = add_del_ip6_interface_table,
+ .short_help = "set interface ip6 table <intfc> <table-id>"
+};
+
+void
+ip6_link_local_address_from_ethernet_mac_address (ip6_address_t *ip,
+ u8 *mac)
+{
+ ip->as_u64[0] = clib_host_to_net_u64 (0xFE80000000000000ULL);
+ /* Invert the "u" bit */
+ ip->as_u8 [8] = mac[0] ^ (1<<1);
+ ip->as_u8 [9] = mac[1];
+ ip->as_u8 [10] = mac[2];
+ ip->as_u8 [11] = 0xFF;
+ ip->as_u8 [12] = 0xFE;
+ ip->as_u8 [13] = mac[3];
+ ip->as_u8 [14] = mac[4];
+ ip->as_u8 [15] = mac[5];
+}
+
+void
+ip6_ethernet_mac_address_from_link_local_address (u8 *mac,
+ ip6_address_t *ip)
+{
+ /* Invert the previously inverted "u" bit */
+ mac[0] = ip->as_u8 [8] ^ (1<<1);
+ mac[1] = ip->as_u8 [9];
+ mac[2] = ip->as_u8 [10];
+ mac[3] = ip->as_u8 [13];
+ mac[4] = ip->as_u8 [14];
+ mac[5] = ip->as_u8 [15];
+}
+
+static clib_error_t *
+test_ip6_link_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u8 mac[6];
+ ip6_address_t _a, *a = &_a;
+
+ if (unformat (input, "%U", unformat_ethernet_address, mac))
+ {
+ ip6_link_local_address_from_ethernet_mac_address (a, mac);
+ vlib_cli_output (vm, "Link local address: %U",
+ format_ip6_address, a);
+ ip6_ethernet_mac_address_from_link_local_address (mac, a);
+ vlib_cli_output (vm, "Original MAC address: %U",
+ format_ethernet_address, mac);
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (test_link_command, static) = {
+ .path = "test ip6 link",
+ .function = test_ip6_link_command_fn,
+ .short_help = "test ip6 link <mac-address>",
+};
+
+int vnet_set_ip6_flow_hash (u32 table_id, u32 flow_hash_config)
+{
+ ip6_main_t * im6 = &ip6_main;
+ ip6_fib_t * fib;
+ uword * p = hash_get (im6->fib_index_by_table_id, table_id);
+
+ if (p == 0)
+ return -1;
+
+ fib = vec_elt_at_index (im6->fibs, p[0]);
+
+ fib->flow_hash_config = flow_hash_config;
+ return 1;
+}
+
+static clib_error_t *
+set_ip6_flow_hash_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ int matched = 0;
+ u32 table_id = 0;
+ u32 flow_hash_config = 0;
+ int rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
+ if (unformat (input, "table %d", &table_id))
+ matched = 1;
+#define _(a,v) \
+ else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
+ foreach_flow_hash_bit
+#undef _
+ else break;
+ }
+
+ if (matched == 0)
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+
+ rv = vnet_set_ip6_flow_hash (table_id, flow_hash_config);
+ switch (rv)
+ {
+ case 1:
+ break;
+
+ case -1:
+ return clib_error_return (0, "no such FIB table %d", table_id);
+
+ default:
+ clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
+ break;
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (set_ip6_flow_hash_command, static) = {
+ .path = "set ip6 flow-hash",
+ .short_help =
+ "set ip table flow-hash table <fib-id> src dst sport dport proto reverse",
+ .function = set_ip6_flow_hash_command_fn,
+};
+
+static clib_error_t *
+show_ip6_local_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ ip6_main_t * im = &ip6_main;
+ ip_lookup_main_t * lm = &im->lookup_main;
+ int i;
+
+ vlib_cli_output (vm, "Protocols handled by ip6_local");
+ for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
+ {
+ if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
+ vlib_cli_output (vm, "%d", i);
+ }
+ return 0;
+}
+
+
+
+VLIB_CLI_COMMAND (show_ip_local, static) = {
+ .path = "show ip6 local",
+ .function = show_ip6_local_command_fn,
+ .short_help = "Show ip6 local protocol table",
+};
+
+int vnet_set_ip6_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
+ u32 table_index)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ vnet_interface_main_t * im = &vnm->interface_main;
+ ip6_main_t * ipm = &ip6_main;
+ ip_lookup_main_t * lm = &ipm->lookup_main;
+ vnet_classify_main_t * cm = &vnet_classify_main;
+
+ if (pool_is_free_index (im->sw_interfaces, sw_if_index))
+ return VNET_API_ERROR_NO_MATCHING_INTERFACE;
+
+ if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
+ lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
+
+ return 0;
+}
+
+static clib_error_t *
+set_ip6_classify_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u32 table_index = ~0;
+ int table_index_set = 0;
+ u32 sw_if_index = ~0;
+ int rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
+ if (unformat (input, "table-index %d", &table_index))
+ table_index_set = 1;
+ else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
+ vnet_get_main(), &sw_if_index))
+ ;
+ else
+ break;
+ }
+
+ if (table_index_set == 0)
+ return clib_error_return (0, "classify table-index must be specified");
+
+ if (sw_if_index == ~0)
+ return clib_error_return (0, "interface / subif must be specified");
+
+ rv = vnet_set_ip6_classify_intfc (vm, sw_if_index, table_index);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ case VNET_API_ERROR_NO_MATCHING_INTERFACE:
+ return clib_error_return (0, "No such interface");
+
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ return clib_error_return (0, "No such classifier table");
+ }
+ return 0;
+}
+
+VLIB_CLI_COMMAND (set_ip6_classify_command, static) = {
+ .path = "set ip6 classify",
+ .short_help =
+ "set ip6 classify intfc <int> table-index <index>",
+ .function = set_ip6_classify_command_fn,
+};
+
+static clib_error_t *
+ip6_config (vlib_main_t * vm, unformat_input_t * input)
+{
+ ip6_main_t * im = &ip6_main;
+ uword heapsize = 0;
+ u32 tmp;
+ u32 nbuckets = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
+ if (unformat (input, "hash-buckets %d", &tmp))
+ nbuckets = tmp;
+ else if (unformat (input, "heap-size %dm", &tmp))
+ heapsize = ((u64)tmp) << 20;
+ else if (unformat (input, "heap-size %dM", &tmp))
+ heapsize = ((u64)tmp) << 20;
+ else if (unformat (input, "heap-size %dg", &tmp))
+ heapsize = ((u64)tmp) << 30;
+ else if (unformat (input, "heap-size %dG", &tmp))
+ heapsize = ((u64)tmp) << 30;
+ else
+ return clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, input);
+ }
+
+ im->lookup_table_nbuckets = nbuckets;
+ im->lookup_table_size = heapsize;
+
+ return 0;
+}
+
+VLIB_EARLY_CONFIG_FUNCTION (ip6_config, "ip6");
+
diff --git a/vnet/vnet/ip/ip6_hop_by_hop.c b/vnet/vnet/ip/ip6_hop_by_hop.c
new file mode 100644
index 00000000000..64edfd249c3
--- /dev/null
+++ b/vnet/vnet/ip/ip6_hop_by_hop.c
@@ -0,0 +1,1139 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+
+#include <vnet/ip/ip.h>
+
+#include <vppinfra/hash.h>
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+
+#include <vnet/ip/ip6_hop_by_hop.h>
+
+ip6_hop_by_hop_main_t ip6_hop_by_hop_main;
+
+/*
+ * ip6 hop-by-hop option handling. We push pkts with h-b-h options to
+ * ip6_hop_by_hop_node_fn from ip6-lookup at a cost of ~2 clocks/pkt in
+ * the speed path.
+ *
+ * We parse through the h-b-h option TLVs, specifically looking for
+ * HBH_OPTION_TYPE_IOAM_DATA_LIST. [Someone needs to get bananas from
+ * IANA, aka to actually allocate the option TLV codes.]
+ *
+ * If we find the indicated option type, and we have remaining list
+ * elements in the trace list, allocate and populate the trace list
+ * element.
+ *
+ * At the ingress edge: punch in the h-b-h rewrite, then visit the
+ * standard h-b-h option handler. We have to be careful in the standard
+ * h-b-h handler, to avoid looping until we run out of rewrite space.
+ * Ask me how I know that.
+ *
+ * Remaining work:
+ * decide on egress point "pop and count" scheme
+ * time stamp handling: usec since the top of the hour?
+ * configure the node id
+ * trace list application data support
+ * cons up analysis / steering plug-in(s)
+ * add configuration binary APIs, vpe_api_test_support, yang models and
+ * orca code
+ * perf tune: dual loop, replace memcpy w/ N x 8-byte load/stores
+ *
+ */
+
+/*
+ * primary h-b-h handler trace support
+ * We work pretty hard on the problem for obvious reasons
+ */
+typedef struct {
+ u32 next_index;
+ u32 trace_len;
+ u8 option_data[256];
+} ip6_hop_by_hop_trace_t;
+
+static u8 * format_ioam_data_list_element (u8 * s, va_list * args)
+{
+ ioam_data_list_element_t *elt = va_arg (*args, ioam_data_list_element_t *);
+ u32 ttl_node_id_host_byte_order =
+ clib_net_to_host_u32 (elt->ttl_node_id);
+
+ s = format (s, "ttl %d node id %d ingress %d egress %d ts %u",
+ ttl_node_id_host_byte_order>>24,
+ ttl_node_id_host_byte_order & 0x00FFFFFF,
+ elt->ingress_if,
+ elt->egress_if,
+ elt->timestamp);
+ return s;
+}
+
+static u8 * format_ip6_hop_by_hop_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ip6_hop_by_hop_trace_t * t = va_arg (*args, ip6_hop_by_hop_trace_t *);
+ ip6_hop_by_hop_header_t *hbh0;
+ ip6_hop_by_hop_option_t *opt0, *limit0;
+ ioam_trace_option_t * trace0;
+ ioam_data_list_element_t * elt0;
+ int elt_index;
+ u8 type0;
+
+ hbh0 = (ip6_hop_by_hop_header_t *)t->option_data;
+
+ s = format (s, "IP6_HOP_BY_HOP: next index %d len %d traced %d\n",
+ t->next_index, (hbh0->length+1)<<3, t->trace_len);
+
+ opt0 = (ip6_hop_by_hop_option_t *) (hbh0+1);
+ limit0 = (ip6_hop_by_hop_option_t *) ((u8 *)hbh0) + t->trace_len;
+
+ while (opt0 < limit0)
+ {
+ type0 = opt0->type & HBH_OPTION_TYPE_MASK;
+ elt_index = 0;
+ switch (type0)
+ {
+ case HBH_OPTION_TYPE_IOAM_DATA_LIST:
+ trace0 = (ioam_trace_option_t *)opt0;
+ s = format (s, " Trace %d elts left\n",
+ trace0->data_list_elts_left);
+ elt0 = &trace0->elts[0];
+ while ((u8 *) elt0 <
+ ((u8 *)(&trace0->elts[0]) + trace0->hdr.length - 1
+ /* -1 accounts for elts_left */))
+ {
+ s = format (s, " [%d] %U\n",elt_index,
+ format_ioam_data_list_element, elt0);
+ elt_index++;
+ elt0++;
+ }
+
+ opt0 = (ip6_hop_by_hop_option_t *)
+ (((u8 *)opt0) + opt0->length
+ + sizeof (ip6_hop_by_hop_option_t));
+ break;
+
+ case HBH_OPTION_TYPE_IOAM_PROOF_OF_WORK:
+ s = format (s, " POW opt present\n");
+ opt0 = (ip6_hop_by_hop_option_t *)
+ (((u8 *)opt0) + sizeof (ioam_pow_option_t));
+ break;
+
+ case 0: /* Pad, just stop */
+ opt0 = (ip6_hop_by_hop_option_t *) ((u8 *)opt0) + 1;
+ break;
+
+ default:
+ s = format (s, "Unknown %d", type0);
+ opt0 = (ip6_hop_by_hop_option_t *)
+ (((u8 *)opt0) + opt0->length
+ + sizeof (ip6_hop_by_hop_option_t));
+ break;
+ }
+ }
+ return s;
+}
+
+vlib_node_registration_t ip6_hop_by_hop_node;
+
+#define foreach_ip6_hop_by_hop_error \
+_(PROCESSED, "Pkts with ip6 hop-by-hop options")
+
+typedef enum {
+#define _(sym,str) IP6_HOP_BY_HOP_ERROR_##sym,
+ foreach_ip6_hop_by_hop_error
+#undef _
+ IP6_HOP_BY_HOP_N_ERROR,
+} ip6_hop_by_hop_error_t;
+
+static char * ip6_hop_by_hop_error_strings[] = {
+#define _(sym,string) string,
+ foreach_ip6_hop_by_hop_error
+#undef _
+};
+
+static uword
+ip6_hop_by_hop_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ ip6_main_t * im = &ip6_main;
+ ip_lookup_main_t * lm = &im->lookup_main;
+ ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main;
+ u32 n_left_from, * from, * to_next;
+ ip_lookup_next_t next_index;
+ u32 processed = 0;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+#if 0 /* $$$ DUAL-LOOP ME */
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 next0 = IP6_HOP_BY_HOP_NEXT_INTERFACE_OUTPUT;
+ u32 next1 = IP6_HOP_BY_HOP_NEXT_INTERFACE_OUTPUT;
+ u32 sw_if_index0, sw_if_index1;
+ u8 tmp0[6], tmp1[6];
+ ethernet_header_t *en0, *en1;
+ u32 bi0, bi1;
+ vlib_buffer_t * b0, * b1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ /* $$$$$ Dual loop: process 2 x packets here $$$$$ */
+ ASSERT (b0->current_data == 0);
+ ASSERT (b1->current_data == 0);
+
+ ip0 = vlib_buffer_get_current (b0);
+ ip1 = vlib_buffer_get_current (b0);
+
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+ sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
+
+ /* $$$$$ End of processing 2 x packets $$$$$ */
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ ip6_hop_by_hop_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->next_index = next0;
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ ip6_hop_by_hop_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ t->sw_if_index = sw_if_index1;
+ t->next_index = next1;
+ }
+ }
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+#endif
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ u32 next0;
+ u32 adj_index0;
+ ip6_header_t * ip0;
+ ip_adjacency_t * adj0;
+ ip6_hop_by_hop_header_t *hbh0;
+ ip6_hop_by_hop_option_t *opt0, *limit0;
+ ioam_trace_option_t * trace0;
+ ioam_data_list_element_t * elt0;
+ u8 type0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ ip0 = vlib_buffer_get_current (b0);
+ adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
+ adj0 = ip_get_adjacency (lm, adj_index0);
+ hbh0 = (ip6_hop_by_hop_header_t *)(ip0+1);
+ opt0 = (ip6_hop_by_hop_option_t *)(hbh0+1);
+ limit0 = (ip6_hop_by_hop_option_t *)
+ ((u8 *)hbh0 + ((hbh0->length+1)<<3));
+
+ /* Scan the set of h-b-h options, process ones that we understand */
+ while (opt0 < limit0)
+ {
+ type0 = opt0->type & HBH_OPTION_TYPE_MASK;
+ switch (type0)
+ {
+ case HBH_OPTION_TYPE_IOAM_DATA_LIST:
+ trace0 = (ioam_trace_option_t *)opt0;
+ if (PREDICT_TRUE (trace0->data_list_elts_left))
+ {
+ trace0->data_list_elts_left--;
+ elt0 = &trace0->elts[trace0->data_list_elts_left];
+ elt0->ttl_node_id =
+ clib_host_to_net_u32 ((ip0->hop_limit<<24)
+ | hm->node_id);
+ elt0->ingress_if =
+ vnet_buffer(b0)->sw_if_index[VLIB_RX];
+ elt0->egress_if = adj0->rewrite_header.sw_if_index;
+ elt0->timestamp = 123; /* $$$$ */
+ /* $$$ set elt0->app_data */
+ }
+
+ opt0 = (ip6_hop_by_hop_option_t *)
+ (((u8 *)opt0) + opt0->length
+ + sizeof (ip6_hop_by_hop_option_t));
+ break;
+
+ case HBH_OPTION_TYPE_IOAM_PROOF_OF_WORK:
+ opt0 = (ip6_hop_by_hop_option_t *)
+ (((u8 *)opt0) + sizeof (ioam_pow_option_t));
+ break;
+
+ case 0: /* Pad */
+ opt0 = (ip6_hop_by_hop_option_t *) ((u8 *)opt0) + 1;
+ goto out0;
+ }
+ }
+
+ out0:
+
+ /*
+ * Since we push pkts here from the h-b-h header imposition code
+ * we have to be careful what we wish for...
+ */
+ next0 = adj0->lookup_next_index != IP_LOOKUP_NEXT_ADD_HOP_BY_HOP ?
+ adj0->lookup_next_index : adj0->saved_lookup_next_index;
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ ip6_hop_by_hop_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ u32 trace_len = (hbh0->length+1)<<3;
+ t->next_index = next0;
+ /* Capture the h-b-h option verbatim */
+ trace_len = trace_len < ARRAY_LEN(t->option_data) ?
+ trace_len : ARRAY_LEN(t->option_data);
+ t->trace_len = trace_len;
+ memcpy (t->option_data, hbh0, trace_len);
+ }
+
+ processed++;
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, ip6_hop_by_hop_node.index,
+ IP6_HOP_BY_HOP_ERROR_PROCESSED, processed);
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (ip6_hop_by_hop_node) = {
+ .function = ip6_hop_by_hop_node_fn,
+ .name = "ip6-hop-by-hop",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip6_hop_by_hop_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(ip6_hop_by_hop_error_strings),
+ .error_strings = ip6_hop_by_hop_error_strings,
+
+ /* See ip/lookup.h */
+ .n_next_nodes = IP_LOOKUP_N_NEXT,
+ .next_nodes = {
+ [IP_LOOKUP_NEXT_MISS] = "ip6-miss",
+ [IP_LOOKUP_NEXT_DROP] = "ip6-drop",
+ [IP_LOOKUP_NEXT_PUNT] = "ip6-punt",
+ [IP_LOOKUP_NEXT_LOCAL] = "ip6-local",
+ [IP_LOOKUP_NEXT_ARP] = "ip6-discover-neighbor",
+ [IP_LOOKUP_NEXT_REWRITE] = "ip6-rewrite",
+ [IP_LOOKUP_NEXT_CLASSIFY] = "ip6-classify",
+ [IP_LOOKUP_NEXT_MAP] = "ip6-map",
+ [IP_LOOKUP_NEXT_MAP_T] = "ip6-map-t",
+ [IP_LOOKUP_NEXT_SIXRD] = "ip6-sixrd",
+ /* Next 3 arcs probably never used */
+ [IP_LOOKUP_NEXT_HOP_BY_HOP] = "ip6-hop-by-hop",
+ [IP_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip6-add-hop-by-hop",
+ [IP_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip6-pop-hop-by-hop",
+ },
+};
+
+/* The main h-b-h tracer will be invoked, no need to do much here */
+typedef struct {
+ u32 next_index;
+} ip6_add_hop_by_hop_trace_t;
+
+/* packet trace format function */
+static u8 * format_ip6_add_hop_by_hop_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ip6_add_hop_by_hop_trace_t * t = va_arg (*args,
+ ip6_add_hop_by_hop_trace_t *);
+
+ s = format (s, "IP6_ADD_HOP_BY_HOP: next index %d",
+ t->next_index);
+ return s;
+}
+
+vlib_node_registration_t ip6_add_hop_by_hop_node;
+
+#define foreach_ip6_add_hop_by_hop_error \
+_(PROCESSED, "Pkts w/ added ip6 hop-by-hop options")
+
+typedef enum {
+#define _(sym,str) IP6_ADD_HOP_BY_HOP_ERROR_##sym,
+ foreach_ip6_add_hop_by_hop_error
+#undef _
+ IP6_ADD_HOP_BY_HOP_N_ERROR,
+} ip6_add_hop_by_hop_error_t;
+
+static char * ip6_add_hop_by_hop_error_strings[] = {
+#define _(sym,string) string,
+ foreach_ip6_add_hop_by_hop_error
+#undef _
+};
+
+static uword
+ip6_add_hop_by_hop_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main;
+ u32 n_left_from, * from, * to_next;
+ ip_lookup_next_t next_index;
+ u32 processed = 0;
+ u8 * rewrite = hm->rewrite;
+ u32 rewrite_length = vec_len (rewrite);
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+#if 0
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 next0 = IP6_ADD_HOP_BY_HOP_NEXT_INTERFACE_OUTPUT;
+ u32 next1 = IP6_ADD_HOP_BY_HOP_NEXT_INTERFACE_OUTPUT;
+ u32 sw_if_index0, sw_if_index1;
+ u8 tmp0[6], tmp1[6];
+ ethernet_header_t *en0, *en1;
+ u32 bi0, bi1;
+ vlib_buffer_t * b0, * b1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ /* $$$$$ Dual loop: process 2 x packets here $$$$$ */
+ ASSERT (b0->current_data == 0);
+ ASSERT (b1->current_data == 0);
+
+ ip0 = vlib_buffer_get_current (b0);
+ ip1 = vlib_buffer_get_current (b0);
+
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+ sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
+
+ /* $$$$$ End of processing 2 x packets $$$$$ */
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ ip6_add_hop_by_hop_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->next_index = next0;
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ ip6_add_hop_by_hop_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ t->sw_if_index = sw_if_index1;
+ t->next_index = next1;
+ }
+ }
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+#endif
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ u32 next0;
+ ip6_header_t * ip0;
+ ip6_hop_by_hop_header_t * hbh0;
+ u64 * copy_src0, * copy_dst0;
+ u16 new_l0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ ip0 = vlib_buffer_get_current (b0);
+
+ /* Copy the ip header left by the required amount */
+ copy_dst0 = (u64 *)(((u8 *)ip0) - rewrite_length);
+ copy_src0 = (u64 *) ip0;
+
+ copy_dst0 [0] = copy_src0 [0];
+ copy_dst0 [1] = copy_src0 [1];
+ copy_dst0 [2] = copy_src0 [2];
+ copy_dst0 [3] = copy_src0 [3];
+ copy_dst0 [4] = copy_src0 [4];
+ vlib_buffer_advance (b0, - (word)rewrite_length);
+ ip0 = vlib_buffer_get_current (b0);
+
+ hbh0 = (ip6_hop_by_hop_header_t *)(ip0 + 1);
+ /* $$$ tune, rewrite_length is a multiple of 8 */
+ memcpy (hbh0, rewrite, rewrite_length);
+ /* Patch the protocol chain, insert the h-b-h (type 0) header */
+ hbh0->protocol = ip0->protocol;
+ ip0->protocol = 0;
+ new_l0 = clib_net_to_host_u16 (ip0->payload_length) + rewrite_length;
+ ip0->payload_length = clib_host_to_net_u16 (new_l0);
+
+ /* Populate the (first) h-b-h list elt */
+ next0 = IP_LOOKUP_NEXT_HOP_BY_HOP;
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ ip6_add_hop_by_hop_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->next_index = next0;
+ }
+
+ processed++;
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, ip6_add_hop_by_hop_node.index,
+ IP6_ADD_HOP_BY_HOP_ERROR_PROCESSED, processed);
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (ip6_add_hop_by_hop_node) = {
+ .function = ip6_add_hop_by_hop_node_fn,
+ .name = "ip6-add-hop-by-hop",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip6_add_hop_by_hop_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(ip6_add_hop_by_hop_error_strings),
+ .error_strings = ip6_add_hop_by_hop_error_strings,
+
+ /* See ip/lookup.h */
+ .n_next_nodes = IP_LOOKUP_N_NEXT,
+ .next_nodes = {
+ [IP_LOOKUP_NEXT_MISS] = "ip6-miss",
+ [IP_LOOKUP_NEXT_DROP] = "ip6-drop",
+ [IP_LOOKUP_NEXT_PUNT] = "ip6-punt",
+ [IP_LOOKUP_NEXT_LOCAL] = "ip6-local",
+ [IP_LOOKUP_NEXT_ARP] = "ip6-discover-neighbor",
+ [IP_LOOKUP_NEXT_REWRITE] = "ip6-rewrite",
+ [IP_LOOKUP_NEXT_CLASSIFY] = "ip6-classify",
+ [IP_LOOKUP_NEXT_MAP] = "ip6-map",
+ [IP_LOOKUP_NEXT_MAP_T] = "ip6-map-t",
+ [IP_LOOKUP_NEXT_SIXRD] = "ip6-sixrd",
+ /* Next 3 arcs probably never used */
+ [IP_LOOKUP_NEXT_HOP_BY_HOP] = "ip6-hop-by-hop",
+ [IP_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip6-add-hop-by-hop",
+ [IP_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip6-pop-hop-by-hop",
+ },
+};
+
+
+/* The main h-b-h tracer was already invoked, no need to do much here */
+typedef struct {
+ u32 next_index;
+} ip6_pop_hop_by_hop_trace_t;
+
+/* packet trace format function */
+static u8 * format_ip6_pop_hop_by_hop_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ip6_pop_hop_by_hop_trace_t * t = va_arg (*args, ip6_pop_hop_by_hop_trace_t *);
+
+ s = format (s, "IP6_POP_HOP_BY_HOP: next index %d",
+ t->next_index);
+ return s;
+}
+
+vlib_node_registration_t ip6_pop_hop_by_hop_node;
+
+#define foreach_ip6_pop_hop_by_hop_error \
+_(PROCESSED, "Pkts w/ removed ip6 hop-by-hop options") \
+_(NO_HOHO, "Pkts w/ no ip6 hop-by-hop options")
+
+typedef enum {
+#define _(sym,str) IP6_POP_HOP_BY_HOP_ERROR_##sym,
+ foreach_ip6_pop_hop_by_hop_error
+#undef _
+ IP6_POP_HOP_BY_HOP_N_ERROR,
+} ip6_pop_hop_by_hop_error_t;
+
+static char * ip6_pop_hop_by_hop_error_strings[] = {
+#define _(sym,string) string,
+ foreach_ip6_pop_hop_by_hop_error
+#undef _
+};
+
+static uword
+ip6_pop_hop_by_hop_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main;
+ ip6_main_t * im = &ip6_main;
+ ip_lookup_main_t * lm = &im->lookup_main;
+ u32 n_left_from, * from, * to_next;
+ ip_lookup_next_t next_index;
+ u32 processed = 0;
+ u32 no_header = 0;
+ u32 (*ioam_end_of_path_cb) (vlib_main_t *, vlib_node_runtime_t *,
+ vlib_buffer_t *, ip6_header_t *,
+ ip_adjacency_t *);
+
+ ioam_end_of_path_cb = hm->ioam_end_of_path_cb;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+#if 0
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 next0 = IP6_POP_HOP_BY_HOP_NEXT_INTERFACE_OUTPUT;
+ u32 next1 = IP6_POP_HOP_BY_HOP_NEXT_INTERFACE_OUTPUT;
+ u32 sw_if_index0, sw_if_index1;
+ u8 tmp0[6], tmp1[6];
+ ethernet_header_t *en0, *en1;
+ u32 bi0, bi1;
+ vlib_buffer_t * b0, * b1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ /* $$$$$ Dual loop: process 2 x packets here $$$$$ */
+ ASSERT (b0->current_data == 0);
+ ASSERT (b1->current_data == 0);
+
+ ip0 = vlib_buffer_get_current (b0);
+ ip1 = vlib_buffer_get_current (b0);
+
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+ sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
+
+ /* $$$$$ End of processing 2 x packets $$$$$ */
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ ip6_pop_hop_by_hop_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->next_index = next0;
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ ip6_pop_hop_by_hop_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ t->sw_if_index = sw_if_index1;
+ t->next_index = next1;
+ }
+ }
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+#endif
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ u32 next0;
+ u32 adj_index0;
+ ip6_header_t * ip0;
+ ip_adjacency_t * adj0;
+ ip6_hop_by_hop_header_t *hbh0;
+ u64 * copy_dst0, * copy_src0;
+ u16 new_l0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ ip0 = vlib_buffer_get_current (b0);
+ adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
+ adj0 = ip_get_adjacency (lm, adj_index0);
+
+ /* Perfectly normal to end up here w/ out h-b-h header */
+ if (PREDICT_TRUE (ip0->protocol == 0))
+ {
+ hbh0 = (ip6_hop_by_hop_header_t *)(ip0+1);
+
+ /* Collect data from trace via callback */
+ next0 = ioam_end_of_path_cb ?
+ ioam_end_of_path_cb (vm, node, b0, ip0, adj0)
+ : adj0->saved_lookup_next_index;
+
+
+ /* Pop the trace data */
+ vlib_buffer_advance (b0, (hbh0->length+1)<<3);
+ new_l0 = clib_net_to_host_u16 (ip0->payload_length) -
+ ((hbh0->length+1)<<3);
+ ip0->payload_length = clib_host_to_net_u16 (new_l0);
+ ip0->protocol = hbh0->protocol;
+ copy_src0 = (u64 *)ip0;
+ copy_dst0 = copy_src0 + (hbh0->length+1);
+ copy_dst0 [4] = copy_src0[4];
+ copy_dst0 [3] = copy_src0[3];
+ copy_dst0 [2] = copy_src0[2];
+ copy_dst0 [1] = copy_src0[1];
+ copy_dst0 [0] = copy_src0[0];
+ processed++;
+ }
+ else
+ {
+ next0 = adj0->saved_lookup_next_index;
+ no_header++;
+ }
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ ip6_pop_hop_by_hop_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->next_index = next0;
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, ip6_pop_hop_by_hop_node.index,
+ IP6_POP_HOP_BY_HOP_ERROR_PROCESSED, processed);
+ vlib_node_increment_counter (vm, ip6_pop_hop_by_hop_node.index,
+ IP6_POP_HOP_BY_HOP_ERROR_NO_HOHO, no_header);
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (ip6_pop_hop_by_hop_node) = {
+ .function = ip6_pop_hop_by_hop_node_fn,
+ .name = "ip6-pop-hop-by-hop",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip6_pop_hop_by_hop_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(ip6_pop_hop_by_hop_error_strings),
+ .error_strings = ip6_pop_hop_by_hop_error_strings,
+
+ /* See ip/lookup.h */
+ .n_next_nodes = IP_LOOKUP_N_NEXT,
+ .next_nodes = {
+ [IP_LOOKUP_NEXT_MISS] = "ip6-miss",
+ [IP_LOOKUP_NEXT_DROP] = "ip6-drop",
+ [IP_LOOKUP_NEXT_PUNT] = "ip6-punt",
+ [IP_LOOKUP_NEXT_LOCAL] = "ip6-local",
+ [IP_LOOKUP_NEXT_ARP] = "ip6-discover-neighbor",
+ [IP_LOOKUP_NEXT_REWRITE] = "ip6-rewrite",
+ [IP_LOOKUP_NEXT_CLASSIFY] = "ip6-classify",
+ [IP_LOOKUP_NEXT_MAP] = "ip6-map",
+ [IP_LOOKUP_NEXT_MAP_T] = "ip6-map-t",
+ [IP_LOOKUP_NEXT_SIXRD] = "ip6-sixrd",
+ /* Next 3 arcs probably never used */
+ [IP_LOOKUP_NEXT_HOP_BY_HOP] = "ip6-hop-by-hop",
+ [IP_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip6-add-hop-by-hop",
+ [IP_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip6-pop-hop-by-hop",
+ },
+};
+
+
+static clib_error_t *
+ip6_hop_by_hop_init (vlib_main_t * vm)
+{
+ ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main;
+
+ hm->vlib_main = vm;
+ hm->vnet_main = vnet_get_main();
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (ip6_hop_by_hop_init);
+
+int ip6_ioam_set_rewrite (u8 **rwp, u32 trace_option_elts, int has_pow_option)
+{
+ u8 *rewrite = 0;
+ u32 size, rnd_size;
+ ip6_hop_by_hop_header_t *hbh;
+ ioam_trace_option_t * trace_option;
+ ioam_pow_option_t * pow_option;
+ u8 *current;
+
+ vec_free (*rwp);
+
+ if (trace_option_elts == 0 && has_pow_option == 0)
+ return 0;
+
+ if (trace_option_elts * sizeof (ioam_data_list_element_t) > 254)
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ /* Work out how much space we need */
+ size = sizeof (ip6_hop_by_hop_header_t);
+
+ if (trace_option_elts)
+ {
+ size += sizeof (ip6_hop_by_hop_option_t);
+ size += trace_option_elts * (sizeof (ioam_data_list_element_t));
+ }
+ if (has_pow_option)
+ {
+ size += sizeof (ip6_hop_by_hop_option_t);
+ size += sizeof (ioam_pow_option_t);
+ }
+
+ /* Round to a multiple of 8 octets */
+ rnd_size = (size + 7) & ~7;
+
+ /* allocate it, zero-fill / pad by construction */
+ vec_validate (rewrite, rnd_size-1);
+
+ hbh = (ip6_hop_by_hop_header_t *) rewrite;
+ /* Length of header in 8 octet units, not incl first 8 octets */
+ hbh->length = (rnd_size>>3) - 1;
+ current = (u8 *)(hbh+1);
+
+ if (trace_option_elts)
+ {
+ trace_option = (ioam_trace_option_t *)current;
+ trace_option->hdr.type = HBH_OPTION_TYPE_IOAM_DATA_LIST
+ | HBH_OPTION_TYPE_DATA_CHANGE_ENROUTE;
+ trace_option->hdr.length = 1 /*data_list_elts_left */ +
+ trace_option_elts * sizeof (ioam_data_list_element_t);
+ trace_option->data_list_elts_left = trace_option_elts;
+ current += sizeof (ioam_trace_option_t) +
+ trace_option_elts * sizeof (ioam_data_list_element_t);
+ }
+ if (has_pow_option)
+ {
+ pow_option = (ioam_pow_option_t *)current;
+ pow_option->hdr.type = HBH_OPTION_TYPE_IOAM_PROOF_OF_WORK
+ | HBH_OPTION_TYPE_DATA_CHANGE_ENROUTE;
+ pow_option->hdr.length = sizeof (ioam_pow_option_t) -
+ sizeof (ip6_hop_by_hop_option_t);
+ current += sizeof (ioam_pow_option_t);
+ }
+
+ *rwp = rewrite;
+ return 0;
+}
+
+static clib_error_t *
+ip6_ioam_set_rewrite_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
+ u32 trace_option_elts = 0;
+ int has_pow_option = 0;
+ int rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "trace-elts %d", &trace_option_elts))
+ ;
+ else if (unformat (input, "pow"))
+ has_pow_option = 1;
+ else
+ break;
+ }
+
+ rv = ip6_ioam_set_rewrite (&hm->rewrite, trace_option_elts, has_pow_option);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+ default:
+ return clib_error_return (0, "ip6_ioam_set_rewrite returned %d", rv);
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (ip6_ioam_set_rewrite_cmd, static) = {
+ .path = "ioam set rewrite",
+ .short_help = "ioam set rewrite [trace-elts <nn>] [pow]",
+ .function = ip6_ioam_set_rewrite_command_fn,
+};
+
+int ip6_ioam_set_destination (ip6_address_t *addr, u32 mask_width, u32 vrf_id,
+ int is_add, int is_pop, int is_none)
+{
+ ip6_main_t * im = &ip6_main;
+ ip_lookup_main_t * lm = &im->lookup_main;
+ ip_adjacency_t * adj;
+ u32 fib_index;
+ u32 len, adj_index;
+ int i, rv;
+ uword * p;
+ BVT(clib_bihash_kv) kv, value;
+
+ if ((is_add + is_pop + is_none) != 1)
+ return VNET_API_ERROR_INVALID_VALUE_2;
+
+ /* Go find the adjacency we're supposed to tickle */
+ p = hash_get (im->fib_index_by_table_id, vrf_id);
+
+ if (p == 0)
+ return VNET_API_ERROR_NO_SUCH_FIB;
+
+ fib_index = p[0];
+
+ len = vec_len (im->prefix_lengths_in_search_order);
+
+ for (i = 0; i < len; i++)
+ {
+ int dst_address_length = im->prefix_lengths_in_search_order[i];
+ ip6_address_t * mask = &im->fib_masks[dst_address_length];
+
+ if (dst_address_length != mask_width)
+ continue;
+
+ kv.key[0] = addr->as_u64[0] & mask->as_u64[0];
+ kv.key[1] = addr->as_u64[1] & mask->as_u64[1];
+ kv.key[2] = ((u64)((fib_index))<<32) | dst_address_length;
+
+ rv = BV(clib_bihash_search_inline_2)(&im->ip6_lookup_table, &kv, &value);
+ if (rv == 0)
+ goto found;
+
+ }
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ found:
+
+ /* Got it, modify as directed... */
+ adj_index = value.value;
+ adj = ip_get_adjacency (lm, adj_index);
+
+ /* Restore original lookup-next action */
+ if (adj->saved_lookup_next_index)
+ {
+ adj->lookup_next_index = adj->saved_lookup_next_index;
+ adj->saved_lookup_next_index = 0;
+ }
+
+ /* Save current action */
+ if (is_add || is_pop)
+ adj->saved_lookup_next_index = adj->lookup_next_index;
+
+ if (is_add)
+ adj->lookup_next_index = IP_LOOKUP_NEXT_ADD_HOP_BY_HOP;
+
+ if (is_pop)
+ adj->lookup_next_index = IP_LOOKUP_NEXT_POP_HOP_BY_HOP;
+
+ return 0;
+}
+
+static clib_error_t *
+ip6_ioam_set_destination_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ ip6_address_t addr;
+ u32 mask_width = ~0;
+ int is_add = 0;
+ int is_pop = 0;
+ int is_none = 0;
+ u32 vrf_id = 0;
+ int rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%U/%d",
+ unformat_ip6_address, &addr, &mask_width))
+ ;
+ else if (unformat (input, "vrf-id %d", &vrf_id))
+ ;
+ else if (unformat (input, "add"))
+ is_add = 1;
+ else if (unformat (input, "pop"))
+ is_pop = 1;
+ else if (unformat (input, "none"))
+ is_none = 1;
+ else
+ break;
+ }
+
+ if ((is_add + is_pop + is_none) != 1)
+ return clib_error_return (0, "One of (add, pop, none) required");
+ if (mask_width == ~0)
+ return clib_error_return (0, "<address>/<mask-width> required");
+
+ rv = ip6_ioam_set_destination (&addr, mask_width, vrf_id,
+ is_add, is_pop, is_none);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+ default:
+ return clib_error_return (0, "ip6_ioam_set_destination returned %d", rv);
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (ip6_ioam_set_destination_cmd, static) = {
+ .path = "ioam set destination",
+ .short_help = "ioam set destination <ip6-address>/<width> add | pop | none",
+ .function = ip6_ioam_set_destination_command_fn,
+};
+
+void vnet_register_ioam_end_of_path_callback (void *cb)
+{
+ ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main;
+
+ hm->ioam_end_of_path_cb = cb;
+}
+
diff --git a/vnet/vnet/ip/ip6_hop_by_hop.h b/vnet/vnet/ip/ip6_hop_by_hop.h
new file mode 100644
index 00000000000..82bafc5777b
--- /dev/null
+++ b/vnet/vnet/ip/ip6_hop_by_hop.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_ip6_hop_by_hop_h__
+#define __included_ip6_hop_by_hop_h__
+
+#include <vnet/ip/ip6_hop_by_hop_packet.h>
+
+typedef struct {
+ /* The current rewrite we're using */
+ u8 * rewrite;
+
+ /* Trace data processing callback */
+ void *ioam_end_of_path_cb;
+
+ /* Configured node-id */
+ u32 node_id;
+
+ /* convenience */
+ vlib_main_t * vlib_main;
+ vnet_main_t * vnet_main;
+} ip6_hop_by_hop_main_t;
+
+#endif /* __included_ip6_hop_by_hop_h__ */
diff --git a/vnet/vnet/ip/ip6_hop_by_hop_packet.h b/vnet/vnet/ip/ip6_hop_by_hop_packet.h
new file mode 100644
index 00000000000..a3d19035dae
--- /dev/null
+++ b/vnet/vnet/ip/ip6_hop_by_hop_packet.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_ip6_hop_by_hop_packet_h__
+#define __included_ip6_hop_by_hop_packet_h__
+
+typedef struct {
+ /* Protocol for next header */
+ u8 protocol;
+ /*
+ * Length of hop_by_hop header in 8 octet units,
+ * not including the first 8 octets
+ */
+ u8 length;
+} ip6_hop_by_hop_header_t;
+
+typedef struct {
+ /* Option Type */
+#define HBH_OPTION_TYPE_SKIP_UNKNOWN (0x0 << 6)
+#define HBH_OPTION_TYPE_DISCARD_UNKNOWN (0x1 << 6)
+#define HBH_OPTION_TYPE_DISCARD_UNKNOWN_ICMP (0x2 << 6)
+#define HBH_OPTION_TYPE_DISCARD_UNKNOWN_ICMP_NOT_MCAST (0x3 << 6)
+#define HBH_OPTION_TYPE_DATA_CHANGE_ENROUTE (1<<5)
+#define HBH_OPTION_TYPE_MASK (0x1F)
+ u8 type;
+ /* Length in octets of the option data field */
+ u8 length;
+} ip6_hop_by_hop_option_t;
+
+/* $$$$ IANA banana constants */
+#define HBH_OPTION_TYPE_IOAM_DATA_LIST 1
+#define HBH_OPTION_TYPE_IOAM_PROOF_OF_WORK 2
+
+typedef struct {
+ u32 ttl_node_id;
+ u16 ingress_if;
+ u16 egress_if;
+ u32 timestamp;
+ u32 app_data;
+} ioam_data_list_element_t;
+
+typedef CLIB_PACKED(struct {
+ ip6_hop_by_hop_option_t hdr;
+ u8 data_list_elts_left;
+ ioam_data_list_element_t elts[0];
+}) ioam_trace_option_t;
+
+typedef CLIB_PACKED(struct {
+ ip6_hop_by_hop_option_t hdr;
+ u8 pow_type;
+ u8 reserved;
+ u32 random[2];
+ u32 cumulative[2];
+}) ioam_pow_option_t;
+
+#endif /* __included_ip6_hop_by_hop_packet_h__ */
diff --git a/vnet/vnet/ip/ip6_input.c b/vnet/vnet/ip/ip6_input.c
new file mode 100644
index 00000000000..ef8c7762625
--- /dev/null
+++ b/vnet/vnet/ip/ip6_input.c
@@ -0,0 +1,317 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip6_input.c: IP v6 input node
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ppp/ppp.h>
+#include <vnet/hdlc/hdlc.h>
+
+typedef struct {
+ u8 packet_data[64];
+} ip6_input_trace_t;
+
+static u8 * format_ip6_input_trace (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ ip6_input_trace_t * t = va_arg (*va, ip6_input_trace_t *);
+
+ s = format (s, "%U",
+ format_ip6_header,
+ t->packet_data, sizeof (t->packet_data));
+
+ return s;
+}
+
+typedef enum {
+ IP6_INPUT_NEXT_DROP,
+ IP6_INPUT_NEXT_LOOKUP,
+ IP6_INPUT_NEXT_TTL_EXPIRE,
+ IP6_INPUT_N_NEXT,
+} ip6_input_next_t;
+
+/* Validate IP v6 packets and pass them either to forwarding code
+ or drop exception packets. */
+static uword
+ip6_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ ip6_main_t * im = &ip6_main;
+ ip_lookup_main_t * lm = &im->lookup_main;
+ u32 n_left_from, * from, * to_next;
+ ip6_input_next_t next_index;
+ vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip6_input_node.index);
+ vlib_simple_counter_main_t * cm;
+ u32 cpu_index = os_get_cpu_number();
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1,
+ sizeof (ip6_input_trace_t));
+
+ cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
+ VNET_INTERFACE_COUNTER_IP6);
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ vlib_buffer_t * p0, * p1;
+ ip6_header_t * ip0, * ip1;
+ ip_config_main_t * cm0, * cm1;
+ u32 pi0, sw_if_index0, next0;
+ u32 pi1, sw_if_index1, next1;
+ u8 error0, error1, cast0, cast1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
+ CLIB_PREFETCH (p3->data, sizeof (ip1[0]), LOAD);
+ }
+
+ pi0 = from[0];
+ pi1 = from[1];
+
+ to_next[0] = pi0;
+ to_next[1] = pi1;
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+
+ sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+ sw_if_index1 = vnet_buffer (p1)->sw_if_index[VLIB_RX];
+
+ cast0 = ip6_address_is_multicast (&ip0->dst_address) ? VNET_MULTICAST : VNET_UNICAST;
+ cast1 = ip6_address_is_multicast (&ip1->dst_address) ? VNET_MULTICAST : VNET_UNICAST;
+
+ cm0 = lm->rx_config_mains + cast0;
+ cm1 = lm->rx_config_mains + cast1;
+
+ vnet_buffer (p0)->ip.current_config_index = vec_elt (cm0->config_index_by_sw_if_index, sw_if_index0);
+ vnet_buffer (p1)->ip.current_config_index = vec_elt (cm1->config_index_by_sw_if_index, sw_if_index1);
+
+ vnet_buffer (p0)->ip.adj_index[VLIB_RX] = ~0;
+ vnet_buffer (p1)->ip.adj_index[VLIB_RX] = ~0;
+
+ vnet_get_config_data (&cm0->config_main,
+ &vnet_buffer (p0)->ip.current_config_index,
+ &next0,
+ /* # bytes of config data */ 0);
+ vnet_get_config_data (&cm1->config_main,
+ &vnet_buffer (p1)->ip.current_config_index,
+ &next1,
+ /* # bytes of config data */ 0);
+
+ vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1);
+ vlib_increment_simple_counter (cm, cpu_index, sw_if_index1, 1);
+
+ error0 = error1 = IP6_ERROR_NONE;
+
+ /* Version != 6? Drop it. */
+ error0 = (clib_net_to_host_u32 (ip0->ip_version_traffic_class_and_flow_label) >> 28) != 6 ? IP6_ERROR_VERSION : error0;
+ error1 = (clib_net_to_host_u32 (ip1->ip_version_traffic_class_and_flow_label) >> 28) != 6 ? IP6_ERROR_VERSION : error1;
+
+ /* hop limit < 1? Drop it. for link-local broadcast packets,
+ * like dhcpv6 packets from client has hop-limit 1, which should not
+ * be dropped.
+ */
+ error0 = ip0->hop_limit <= 1 ? IP6_ERROR_TIME_EXPIRED : error0;
+ error1 = ip1->hop_limit <= 1 ? IP6_ERROR_TIME_EXPIRED : error1;
+
+ /* L2 length must be at least minimal IP header. */
+ error0 = p0->current_length < sizeof (ip0[0]) ? IP6_ERROR_TOO_SHORT : error0;
+ error1 = p1->current_length < sizeof (ip1[0]) ? IP6_ERROR_TOO_SHORT : error1;
+
+ if (PREDICT_FALSE(error0 != IP6_ERROR_NONE))
+ {
+ next0 = (error0 == IP6_ERROR_TIME_EXPIRED) ?
+ IP6_INPUT_NEXT_TTL_EXPIRE : IP6_INPUT_NEXT_DROP;
+ }
+ if (PREDICT_FALSE(error1 != IP6_ERROR_NONE))
+ {
+ next1 = (error1 == IP6_ERROR_TIME_EXPIRED) ?
+ IP6_INPUT_NEXT_TTL_EXPIRE : IP6_INPUT_NEXT_DROP;
+ }
+
+ p0->error = error_node->errors[error0];
+ p1->error = error_node->errors[error1];
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, pi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t * p0;
+ ip6_header_t * ip0;
+ ip_config_main_t * cm0;
+ u32 pi0, sw_if_index0, next0;
+ u8 error0, cast0;
+
+ pi0 = from[0];
+ to_next[0] = pi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ ip0 = vlib_buffer_get_current (p0);
+
+ sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+ cast0 = ip6_address_is_multicast (&ip0->dst_address) ? VNET_MULTICAST : VNET_UNICAST;
+ cm0 = lm->rx_config_mains + cast0;
+ vnet_buffer (p0)->ip.current_config_index = vec_elt (cm0->config_index_by_sw_if_index, sw_if_index0);
+ vnet_buffer (p0)->ip.adj_index[VLIB_RX] = ~0;
+
+ vnet_get_config_data (&cm0->config_main,
+ &vnet_buffer (p0)->ip.current_config_index,
+ &next0,
+ /* # bytes of config data */ 0);
+
+ vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1);
+ error0 = IP6_ERROR_NONE;
+
+ /* Version != 6? Drop it. */
+ error0 = (clib_net_to_host_u32 (ip0->ip_version_traffic_class_and_flow_label) >> 28) != 6 ? IP6_ERROR_VERSION : error0;
+
+ /* hop limit < 1? Drop it. for link-local broadcast packets,
+ * like dhcpv6 packets from client has hop-limit 1, which should not
+ * be dropped.
+ */
+ error0 = ip0->hop_limit <= 1 ? IP6_ERROR_TIME_EXPIRED : error0;
+
+ /* L2 length must be at least minimal IP header. */
+ error0 = p0->current_length < sizeof (ip0[0]) ? IP6_ERROR_TOO_SHORT : error0;
+
+ if (PREDICT_FALSE(error0 != IP6_ERROR_NONE))
+ {
+ next0 = (error0 == IP6_ERROR_TIME_EXPIRED) ?
+ IP6_INPUT_NEXT_TTL_EXPIRE : IP6_INPUT_NEXT_DROP;
+ }
+ p0->error = error_node->errors[error0];
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+static char * ip6_error_strings[] = {
+#define _(sym,string) string,
+ foreach_ip6_error
+#undef _
+};
+
+VLIB_REGISTER_NODE (ip6_input_node) = {
+ .function = ip6_input,
+ .name = "ip6-input",
+ .vector_size = sizeof (u32),
+
+ .n_errors = IP6_N_ERROR,
+ .error_strings = ip6_error_strings,
+
+ .n_next_nodes = IP6_INPUT_N_NEXT,
+ .next_nodes = {
+ [IP6_INPUT_NEXT_DROP] = "error-drop",
+ [IP6_INPUT_NEXT_LOOKUP] = "ip6-lookup",
+ [IP6_INPUT_NEXT_TTL_EXPIRE] = "ip6-icmp-ttl-expire",
+ },
+
+ .format_buffer = format_ip6_header,
+ .format_trace = format_ip6_input_trace,
+};
+
+static clib_error_t * ip6_init (vlib_main_t * vm)
+{
+ ethernet_register_input_type (vm, ETHERNET_TYPE_IP6,
+ ip6_input_node.index);
+ ppp_register_input_protocol (vm, PPP_PROTOCOL_ip6,
+ ip6_input_node.index);
+ hdlc_register_input_protocol (vm, HDLC_PROTOCOL_ip6,
+ ip6_input_node.index);
+
+ {
+ pg_node_t * pn;
+ pn = pg_get_node (ip6_input_node.index);
+ pn->unformat_edit = unformat_pg_ip6_header;
+ }
+
+ /* Set flow hash to something non-zero. */
+ ip6_main.flow_hash_seed = 0xdeadbeef;
+
+ /* Default hop limit for packets we generate. */
+ ip6_main.host_config.ttl = 64;
+
+ return /* no error */ 0;
+}
+
+VLIB_INIT_FUNCTION (ip6_init);
diff --git a/vnet/vnet/ip/ip6_neighbor.c b/vnet/vnet/ip/ip6_neighbor.c
new file mode 100644
index 00000000000..28f964c804f
--- /dev/null
+++ b/vnet/vnet/ip/ip6_neighbor.c
@@ -0,0 +1,3146 @@
+/*
+ * ip/ip6_neighbor.c: IP6 neighbor handling
+ *
+ * Copyright (c) 2010 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vppinfra/mhash.h>
+#include <vppinfra/md5.h>
+
+#if DPDK==1
+#include <vnet/devices/dpdk/dpdk.h>
+#endif
+
+typedef struct {
+ ip6_address_t ip6_address;
+ u32 sw_if_index;
+ u32 pad;
+} ip6_neighbor_key_t;
+
+/* can't use sizeof link_layer_address, that's 8 */
+#define ETHER_MAC_ADDR_LEN 6
+
+typedef struct {
+ ip6_neighbor_key_t key;
+ u8 link_layer_address[8];
+ u64 cpu_time_last_updated;
+} ip6_neighbor_t;
+
+/* advertised prefix option */
+typedef struct {
+ /* basic advertised information */
+ ip6_address_t prefix;
+ u8 prefix_len;
+ int adv_on_link_flag;
+ int adv_autonomous_flag;
+ u32 adv_valid_lifetime_in_secs;
+ u32 adv_pref_lifetime_in_secs;
+
+ /* advertised values are computed from these times if decrementing */
+ f64 valid_lifetime_expires;
+ f64 pref_lifetime_expires;
+
+ /* local information */
+ int enabled;
+ int deprecated_prefix_flag;
+ int decrement_lifetime_flag;
+
+#define MIN_ADV_VALID_LIFETIME 7203 /* seconds */
+#define DEF_ADV_VALID_LIFETIME 2592000
+#define DEF_ADV_PREF_LIFETIME 604800
+
+ /* extensions are added here, mobile, DNS etc.. */
+} ip6_radv_prefix_t;
+
+
+typedef struct {
+ /* group information */
+ u8 type;
+ ip6_address_t mcast_address;
+ u16 num_sources;
+ ip6_address_t *mcast_source_address_pool;
+} ip6_mldp_group_t;
+
+/* configured router advertisement information per ipv6 interface */
+typedef struct {
+
+ /* advertised config information, zero means unspecified */
+ u8 curr_hop_limit;
+ int adv_managed_flag;
+ int adv_other_flag;
+ u16 adv_router_lifetime_in_sec;
+ u32 adv_neighbor_reachable_time_in_msec;
+ u32 adv_time_in_msec_between_retransmitted_neighbor_solicitations;
+
+ /* mtu option */
+ u32 adv_link_mtu;
+
+ /* source link layer option */
+ u8 link_layer_address[8];
+ u8 link_layer_addr_len;
+
+ /* prefix option */
+ ip6_radv_prefix_t * adv_prefixes_pool;
+
+ /* Hash table mapping address to index in interface advertised prefix pool. */
+ mhash_t address_to_prefix_index;
+
+ /* MLDP group information */
+ ip6_mldp_group_t * mldp_group_pool;
+
+ /* Hash table mapping address to index in mldp address pool. */
+ mhash_t address_to_mldp_index;
+
+ /* local information */
+ u32 sw_if_index;
+ u32 fib_index;
+ int send_radv; /* radv on/off on this interface - set by config */
+ int cease_radv; /* we are ceasing to send - set byf config */
+ int send_unicast;
+ int adv_link_layer_address;
+ int prefix_option;
+ int failed_device_check;
+ int all_routers_mcast;
+ u32 seed;
+ u64 randomizer;
+ int ref_count;
+ u32 all_nodes_adj_index;
+ u32 all_routers_adj_index;
+ u32 all_mldv2_routers_adj_index;
+
+ /* timing information */
+#define DEF_MAX_RADV_INTERVAL 200
+#define DEF_MIN_RADV_INTERVAL .75 * DEF_MAX_RADV_INTERVAL
+#define DEF_CURR_HOP_LIMIT 64
+#define DEF_DEF_RTR_LIFETIME 3 * DEF_MAX_RADV_INTERVAL
+#define MAX_DEF_RTR_LIFETIME 9000
+
+#define MAX_INITIAL_RTR_ADVERT_INTERVAL 16 /* seconds */
+#define MAX_INITIAL_RTR_ADVERTISEMENTS 3 /*transmissions */
+#define MIN_DELAY_BETWEEN_RAS 3 /* seconds */
+#define MAX_DELAY_BETWEEN_RAS 1800 /* seconds */
+#define MAX_RA_DELAY_TIME .5 /* seconds */
+
+ f64 max_radv_interval;
+ f64 min_radv_interval;
+ f64 min_delay_between_radv;
+ f64 max_delay_between_radv;
+ f64 max_rtr_default_lifetime;
+
+ f64 last_radv_time;
+ f64 last_multicast_time;
+ f64 next_multicast_time;
+
+
+ u32 initial_adverts_count;
+ f64 initial_adverts_interval;
+ u32 initial_adverts_sent;
+
+ /* stats */
+ u32 n_advertisements_sent;
+ u32 n_solicitations_rcvd;
+ u32 n_solicitations_dropped;
+
+ /* Link local address to use (defaults to underlying physical for logical interfaces */
+ ip6_address_t link_local_address;
+ u8 link_local_prefix_len;
+
+} ip6_radv_t;
+
+typedef struct {
+ u32 next_index;
+ uword node_index;
+ uword type_opaque;
+ uword data;
+} pending_resolution_t;
+
+
+typedef struct {
+ /* Hash tables mapping name to opcode. */
+ uword * opcode_by_name;
+
+ /* lite beer "glean" adjacency handling */
+ mhash_t pending_resolutions_by_address;
+ pending_resolution_t * pending_resolutions;
+
+ u32 * neighbor_input_next_index_by_hw_if_index;
+
+ ip6_neighbor_t * neighbor_pool;
+
+ mhash_t neighbor_index_by_key;
+
+ u32 * if_radv_pool_index_by_sw_if_index;
+
+ ip6_radv_t * if_radv_pool;
+
+ /* Neighbor attack mitigation */
+ u32 limit_neighbor_cache_size;
+ u32 neighbor_delete_rotor;
+
+} ip6_neighbor_main_t;
+
+static ip6_neighbor_main_t ip6_neighbor_main;
+
+static u8 * format_ip6_neighbor_ip6_entry (u8 * s, va_list * va)
+{
+ vlib_main_t * vm = va_arg (*va, vlib_main_t *);
+ ip6_neighbor_t * n = va_arg (*va, ip6_neighbor_t *);
+ vnet_main_t * vnm = vnet_get_main();
+ vnet_sw_interface_t * si;
+
+ if (! n)
+ return format (s, "%=12s%=20s%=20s%=40s", "Time", "Address", "Link layer", "Interface");
+
+ si = vnet_get_sw_interface (vnm, n->key.sw_if_index);
+ s = format (s, "%=12U%=20U%=20U%=40U",
+ format_vlib_cpu_time, vm, n->cpu_time_last_updated,
+ format_ip6_address, &n->key.ip6_address,
+ format_ethernet_address, n->link_layer_address,
+ format_vnet_sw_interface_name, vnm, si);
+
+ return s;
+}
+
+static clib_error_t *
+ip6_neighbor_sw_interface_up_down (vnet_main_t * vnm,
+ u32 sw_if_index,
+ u32 flags)
+{
+ ip6_neighbor_main_t * nm = &ip6_neighbor_main;
+ ip6_neighbor_t * n;
+
+ if (! (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
+ {
+ u32 i, * to_delete = 0;
+
+ pool_foreach (n, nm->neighbor_pool, ({
+ if (n->key.sw_if_index == sw_if_index)
+ vec_add1 (to_delete, n - nm->neighbor_pool);
+ }));
+
+ for (i = 0; i < vec_len (to_delete); i++)
+ {
+ n = pool_elt_at_index (nm->neighbor_pool, to_delete[i]);
+ mhash_unset (&nm->neighbor_index_by_key, &n->key, 0);
+ pool_put (nm->neighbor_pool, n);
+ }
+
+ vec_free (to_delete);
+ }
+
+ return 0;
+}
+
+VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip6_neighbor_sw_interface_up_down);
+
+static void unset_random_neighbor_entry (void)
+{
+ ip6_neighbor_main_t * nm = &ip6_neighbor_main;
+ vnet_main_t * vnm = vnet_get_main();
+ vlib_main_t * vm = vnm->vlib_main;
+ ip6_neighbor_t * e;
+ u32 index;
+
+ index = pool_next_index (nm->neighbor_pool, nm->neighbor_delete_rotor);
+ nm->neighbor_delete_rotor = index;
+
+ /* Try again from elt 0, could happen if an intfc goes down */
+ if (index == ~0)
+ {
+ index = pool_next_index (nm->neighbor_pool, nm->neighbor_delete_rotor);
+ nm->neighbor_delete_rotor = index;
+ }
+
+ /* Nothing left in the pool */
+ if (index == ~0)
+ return;
+
+ e = pool_elt_at_index (nm->neighbor_pool, index);
+
+ vnet_unset_ip6_ethernet_neighbor (vm, e->key.sw_if_index,
+ &e->key.ip6_address,
+ e->link_layer_address,
+ ETHER_MAC_ADDR_LEN);
+}
+
+typedef struct {
+ u8 is_add;
+ u8 pad;
+ u8 link_layer_address[6];
+ u32 sw_if_index;
+ ip6_address_t addr;
+} ip6_neighbor_set_unset_rpc_args_t;
+
+static void ip6_neighbor_set_unset_rpc_callback
+( ip6_neighbor_set_unset_rpc_args_t * a);
+
+#if DPDK > 0
+static void set_unset_ip6_neighbor_rpc
+(vlib_main_t * vm,
+ u32 sw_if_index,
+ ip6_address_t * a,
+ u8 *link_layer_addreess,
+ int is_add)
+{
+ ip6_neighbor_set_unset_rpc_args_t args;
+
+ args.sw_if_index = sw_if_index;
+ args.is_add = is_add;
+ memcpy (&args.addr, a, sizeof (*a));
+ memcpy (args.link_layer_address, link_layer_addreess, 6);
+
+ vl_api_rpc_call_main_thread (ip6_neighbor_set_unset_rpc_callback,
+ (u8 *) &args, sizeof (args));
+}
+#endif
+
+int
+vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm,
+ u32 sw_if_index,
+ ip6_address_t * a,
+ u8 * link_layer_address,
+ uword n_bytes_link_layer_address)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ ip6_neighbor_main_t * nm = &ip6_neighbor_main;
+ ip6_neighbor_key_t k;
+ ip6_neighbor_t * n;
+ ip6_main_t * im = &ip6_main;
+ uword * p;
+ u32 next_index;
+ pending_resolution_t * pr;
+
+#if DPDK > 0
+ if (os_get_cpu_number())
+ {
+ set_unset_ip6_neighbor_rpc (vm, sw_if_index, a, link_layer_address,
+ 1 /* set new neighbor */);
+ return 0;
+ }
+#endif
+
+ k.sw_if_index = sw_if_index;
+ k.ip6_address = a[0];
+ k.pad = 0;
+
+ vlib_worker_thread_barrier_sync (vm);
+
+ p = mhash_get (&nm->neighbor_index_by_key, &k);
+ if (p)
+ n = pool_elt_at_index (nm->neighbor_pool, p[0]);
+ else
+ {
+ ip6_add_del_route_args_t args;
+ ip_adjacency_t adj;
+
+ memset (&adj, 0, sizeof(adj));
+ adj.lookup_next_index = IP_LOOKUP_NEXT_REWRITE;
+ adj.explicit_fib_index = ~0;
+
+ vnet_rewrite_for_sw_interface
+ (vnm,
+ VNET_L3_PACKET_TYPE_IP6,
+ sw_if_index,
+ ip6_rewrite_node.index,
+ link_layer_address,
+ &adj.rewrite_header,
+ sizeof (adj.rewrite_data));
+
+ args.table_index_or_table_id = im->fib_index_by_sw_if_index[sw_if_index];
+ args.flags = IP6_ROUTE_FLAG_FIB_INDEX | IP6_ROUTE_FLAG_ADD | IP6_ROUTE_FLAG_NEIGHBOR;
+ args.dst_address = a[0];
+ args.dst_address_length = 128;
+ args.adj_index = ~0;
+ args.add_adj = &adj;
+ args.n_add_adj = 1;
+
+ ip6_add_del_route (im, &args);
+ pool_get (nm->neighbor_pool, n);
+ mhash_set (&nm->neighbor_index_by_key, &k, n - nm->neighbor_pool,
+ /* old value */ 0);
+ n->key = k;
+ }
+
+ /* Update time stamp and ethernet address. */
+ memcpy (n->link_layer_address, link_layer_address, n_bytes_link_layer_address);
+ n->cpu_time_last_updated = clib_cpu_time_now ();
+
+ /* Customer(s) waiting for this address to be resolved? */
+ p = mhash_get (&nm->pending_resolutions_by_address, a);
+ if (p == 0)
+ goto out;
+
+ next_index = p[0];
+
+ while (next_index != (u32)~0)
+ {
+ pr = pool_elt_at_index (nm->pending_resolutions, next_index);
+ vlib_process_signal_event (vm, pr->node_index,
+ pr->type_opaque,
+ pr->data);
+ next_index = pr->next_index;
+ pool_put (nm->pending_resolutions, pr);
+ }
+
+ mhash_unset (&nm->pending_resolutions_by_address, a, 0);
+
+out:
+ vlib_worker_thread_barrier_release(vm);
+ return 0;
+}
+
+int
+vnet_unset_ip6_ethernet_neighbor (vlib_main_t * vm,
+ u32 sw_if_index,
+ ip6_address_t * a,
+ u8 * link_layer_address,
+ uword n_bytes_link_layer_address)
+{
+ ip6_neighbor_main_t * nm = &ip6_neighbor_main;
+ ip6_neighbor_key_t k;
+ ip6_neighbor_t * n;
+ ip6_main_t * im = &ip6_main;
+ ip6_add_del_route_args_t args;
+ uword * p;
+ int rv = 0;
+
+#if DPDK > 0
+ if (os_get_cpu_number())
+ {
+ set_unset_ip6_neighbor_rpc (vm, sw_if_index, a, link_layer_address,
+ 0 /* unset */);
+ return 0;
+ }
+#endif
+
+ k.sw_if_index = sw_if_index;
+ k.ip6_address = a[0];
+ k.pad = 0;
+
+ vlib_worker_thread_barrier_sync (vm);
+
+ p = mhash_get (&nm->neighbor_index_by_key, &k);
+ if (p == 0)
+ {
+ rv = -1;
+ goto out;
+ }
+
+ n = pool_elt_at_index (nm->neighbor_pool, p[0]);
+ mhash_unset (&nm->neighbor_index_by_key, &n->key, 0);
+ pool_put (nm->neighbor_pool, n);
+
+ args.table_index_or_table_id = im->fib_index_by_sw_if_index[sw_if_index];
+ args.flags = IP6_ROUTE_FLAG_FIB_INDEX | IP6_ROUTE_FLAG_DEL
+ | IP6_ROUTE_FLAG_NEIGHBOR;
+ args.dst_address = a[0];
+ args.dst_address_length = 128;
+ args.adj_index = ~0;
+ args.add_adj = NULL;
+ args.n_add_adj = 0;
+ ip6_add_del_route (im, &args);
+ out:
+ vlib_worker_thread_barrier_release(vm);
+ return rv;
+}
+
+static void ip6_neighbor_set_unset_rpc_callback
+( ip6_neighbor_set_unset_rpc_args_t * a)
+{
+ vlib_main_t * vm = vlib_get_main();
+ if (a->is_add)
+ vnet_set_ip6_ethernet_neighbor (vm, a->sw_if_index, &a->addr,
+ a->link_layer_address, 6);
+ else
+ vnet_unset_ip6_ethernet_neighbor (vm, a->sw_if_index, &a->addr,
+ a->link_layer_address, 6);
+}
+
+static int
+ip6_neighbor_sort (void *a1, void *a2)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ ip6_neighbor_t * n1 = a1, * n2 = a2;
+ int cmp;
+ cmp = vnet_sw_interface_compare (vnm, n1->key.sw_if_index,
+ n2->key.sw_if_index);
+ if (! cmp)
+ cmp = ip6_address_compare (&n1->key.ip6_address, &n2->key.ip6_address);
+ return cmp;
+}
+
+static clib_error_t *
+show_ip6_neighbors (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ ip6_neighbor_main_t * nm = &ip6_neighbor_main;
+ ip6_neighbor_t * n, * ns;
+ clib_error_t * error = 0;
+ u32 sw_if_index;
+
+ /* Filter entries by interface if given. */
+ sw_if_index = ~0;
+ (void) unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index);
+
+ ns = 0;
+ pool_foreach (n, nm->neighbor_pool, ({ vec_add1 (ns, n[0]); }));
+ vec_sort_with_function (ns, ip6_neighbor_sort);
+ vlib_cli_output (vm, "%U", format_ip6_neighbor_ip6_entry, vm, 0);
+ vec_foreach (n, ns) {
+ if (sw_if_index != ~0 && n->key.sw_if_index != sw_if_index)
+ continue;
+ vlib_cli_output (vm, "%U", format_ip6_neighbor_ip6_entry, vm, n);
+ }
+ vec_free (ns);
+
+ return error;
+}
+
+VLIB_CLI_COMMAND (show_ip6_neighbors_command, static) = {
+ .path = "show ip6 neighbors",
+ .function = show_ip6_neighbors,
+ .short_help = "Show ip6 neighbors",
+};
+
+static clib_error_t *
+set_ip6_neighbor (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ ip6_address_t addr;
+ u8 mac_address[6];
+ int addr_valid = 0;
+ int is_del = 0;
+ u32 sw_if_index;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ /* intfc, ip6-address, mac-address */
+ if (unformat (input, "%U %U %U",
+ unformat_vnet_sw_interface, vnm, &sw_if_index,
+ unformat_ip6_address, &addr,
+ unformat_ethernet_address, mac_address))
+ addr_valid = 1;
+
+ else if (unformat (input, "delete") || unformat (input, "del"))
+ is_del = 1;
+ else
+ break;
+ }
+
+ if (!addr_valid)
+ return clib_error_return (0, "Missing interface, ip6 or hw address");
+
+ if (!is_del)
+ vnet_set_ip6_ethernet_neighbor (vm, sw_if_index, &addr,
+ mac_address, sizeof(mac_address));
+ else
+ vnet_unset_ip6_ethernet_neighbor (vm, sw_if_index, &addr,
+ mac_address, sizeof(mac_address));
+ return 0;
+}
+
+VLIB_CLI_COMMAND (set_ip6_neighbor_command, static) = {
+ .path = "set ip6 neighbor",
+ .function = set_ip6_neighbor,
+ .short_help = "set ip6 neighbor [del] <intfc> <ip6-address> <mac-address>",
+};
+
+typedef enum {
+ ICMP6_NEIGHBOR_SOLICITATION_NEXT_DROP,
+ ICMP6_NEIGHBOR_SOLICITATION_NEXT_REPLY,
+ ICMP6_NEIGHBOR_SOLICITATION_N_NEXT,
+} icmp6_neighbor_solicitation_or_advertisement_next_t;
+
+static_always_inline uword
+icmp6_neighbor_solicitation_or_advertisement (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ uword is_solicitation)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ ip6_main_t * im = &ip6_main;
+ ip_lookup_main_t * lm = &im->lookup_main;
+ uword n_packets = frame->n_vectors;
+ u32 * from, * to_next;
+ u32 n_left_from, n_left_to_next, next_index, n_advertisements_sent;
+ icmp6_neighbor_discovery_option_type_t option_type;
+ vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip6_icmp_input_node.index);
+ int bogus_length;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = n_packets;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1,
+ sizeof (icmp6_input_trace_t));
+
+ option_type =
+ (is_solicitation
+ ? ICMP6_NEIGHBOR_DISCOVERY_OPTION_source_link_layer_address
+ : ICMP6_NEIGHBOR_DISCOVERY_OPTION_target_link_layer_address);
+ n_advertisements_sent = 0;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t * p0;
+ ip6_header_t * ip0;
+ icmp6_neighbor_solicitation_or_advertisement_header_t * h0;
+ icmp6_neighbor_discovery_ethernet_link_layer_address_option_t * o0;
+ u32 bi0, options_len0, sw_if_index0, next0, error0;
+ u32 ip6_sadd_link_local, ip6_sadd_unspecified;
+ int is_rewrite0;
+ u32 ni0;
+
+ bi0 = to_next[0] = from[0];
+
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, bi0);
+ ip0 = vlib_buffer_get_current (p0);
+ h0 = ip6_next_header (ip0);
+ options_len0 = clib_net_to_host_u16 (ip0->payload_length) - sizeof (h0[0]);
+
+ error0 = ICMP6_ERROR_NONE;
+ sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+ ip6_sadd_link_local = ip6_address_is_link_local_unicast(&ip0->src_address);
+ ip6_sadd_unspecified = ip6_address_is_unspecified (&ip0->src_address);
+
+ /* Check that source address is unspecified, link-local or else on-link. */
+ if (!ip6_sadd_unspecified && !ip6_sadd_link_local)
+ {
+ u32 src_adj_index0 = ip6_src_lookup_for_packet (im, p0, ip0);
+ ip_adjacency_t * adj0 = ip_get_adjacency (&im->lookup_main, src_adj_index0);
+
+ /* Allow all realistic-looking rewrite adjacencies to pass */
+ ni0 = adj0->lookup_next_index;
+ is_rewrite0 = (ni0 >= IP_LOOKUP_NEXT_ARP) &&
+ (ni0 < IP_LOOKUP_N_NEXT);
+
+ error0 = ((adj0->rewrite_header.sw_if_index != sw_if_index0
+ || ! is_rewrite0)
+ ? ICMP6_ERROR_NEIGHBOR_SOLICITATION_SOURCE_NOT_ON_LINK
+ : error0);
+ }
+
+ o0 = (void *) (h0 + 1);
+ o0 = ((options_len0 == 8 && o0->header.type == option_type
+ && o0->header.n_data_u64s == 1) ? o0 : 0);
+
+ /* If src address unspecified or link local, donot learn neighbor MAC */
+ if (PREDICT_TRUE (error0 == ICMP6_ERROR_NONE && o0 != 0 &&
+ !ip6_sadd_unspecified && !ip6_sadd_link_local))
+ {
+ ip6_neighbor_main_t * nm = &ip6_neighbor_main;
+ if (nm->limit_neighbor_cache_size &&
+ pool_elts (nm->neighbor_pool) >= nm->limit_neighbor_cache_size)
+ unset_random_neighbor_entry();
+ vnet_set_ip6_ethernet_neighbor (
+ vm, sw_if_index0,
+ is_solicitation ? &ip0->src_address : &h0->target_address,
+ o0->ethernet_address, sizeof (o0->ethernet_address));
+ }
+
+ if (is_solicitation && error0 == ICMP6_ERROR_NONE)
+ {
+ /* Check that target address is one that we know about. */
+ ip_interface_address_t * ia0;
+ ip6_address_fib_t ip6_af0;
+ void * oldheap;
+
+ ip6_addr_fib_init (&ip6_af0, &h0->target_address,
+ vec_elt (im->fib_index_by_sw_if_index,
+ sw_if_index0));
+
+ /* Gross kludge, "thank you" MJ, don't even ask */
+ oldheap = clib_mem_set_heap (clib_per_cpu_mheaps[0]);
+ ia0 = ip_get_interface_address (lm, &ip6_af0);
+ clib_mem_set_heap (oldheap);
+ error0 = ia0 == 0 ?
+ ICMP6_ERROR_NEIGHBOR_SOLICITATION_SOURCE_UNKNOWN : error0;
+ }
+
+ if (is_solicitation)
+ next0 = (error0 != ICMP6_ERROR_NONE
+ ? ICMP6_NEIGHBOR_SOLICITATION_NEXT_DROP
+ : ICMP6_NEIGHBOR_SOLICITATION_NEXT_REPLY);
+ else
+ {
+ next0 = 0;
+ error0 = error0 == ICMP6_ERROR_NONE ?
+ ICMP6_ERROR_NEIGHBOR_ADVERTISEMENTS_RX : error0;
+ }
+
+ if (is_solicitation && error0 == ICMP6_ERROR_NONE)
+ {
+ vnet_sw_interface_t * sw_if0;
+ ethernet_interface_t * eth_if0;
+ ethernet_header_t *eth0;
+
+ /* dst address is either source address or the all-nodes mcast addr */
+ if(!ip6_sadd_unspecified)
+ ip0->dst_address = ip0->src_address;
+ else
+ ip6_set_reserved_multicast_address(&ip0->dst_address,
+ IP6_MULTICAST_SCOPE_link_local,
+ IP6_MULTICAST_GROUP_ID_all_hosts);
+
+ ip0->src_address = h0->target_address;
+ ip0->hop_limit = 255;
+ h0->icmp.type = ICMP6_neighbor_advertisement;
+
+ sw_if0 = vnet_get_sup_sw_interface (vnm, sw_if_index0);
+ ASSERT (sw_if0->type == VNET_SW_INTERFACE_TYPE_HARDWARE);
+ eth_if0 = ethernet_get_interface (&ethernet_main, sw_if0->hw_if_index);
+ if (eth_if0 && o0)
+ {
+ memcpy (o0->ethernet_address, eth_if0->address, 6);
+ o0->header.type =
+ ICMP6_NEIGHBOR_DISCOVERY_OPTION_target_link_layer_address;
+ }
+
+ h0->advertisement_flags = clib_host_to_net_u32
+ (ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_SOLICITED
+ | ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_OVERRIDE);
+
+ h0->icmp.checksum = 0;
+ h0->icmp.checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, p0, ip0,
+ &bogus_length);
+ ASSERT(bogus_length == 0);
+
+ /* Reuse current MAC header, copy SMAC to DMAC and
+ * interface MAC to SMAC */
+ vlib_buffer_reset (p0);
+ eth0 = vlib_buffer_get_current(p0);
+ memcpy(eth0->dst_address, eth0->src_address, 6);
+ memcpy(eth0->src_address, eth_if0->address, 6);
+
+ /* Setup input and output sw_if_index for packet */
+ ASSERT(vnet_buffer(p0)->sw_if_index[VLIB_RX] == sw_if_index0);
+ vnet_buffer(p0)->sw_if_index[VLIB_TX] = sw_if_index0;
+ vnet_buffer(p0)->sw_if_index[VLIB_RX] =
+ vnet_main.local_interface_sw_if_index;
+
+ n_advertisements_sent++;
+ }
+
+ p0->error = error_node->errors[error0];
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ /* Account for advertisements sent. */
+ vlib_error_count (vm, error_node->node_index, ICMP6_ERROR_NEIGHBOR_ADVERTISEMENTS_TX, n_advertisements_sent);
+
+ return frame->n_vectors;
+}
+
+/* for "syslogging" - use elog for now */
+#define foreach_log_level \
+ _ (DEBUG, "DEBUG") \
+ _ (INFO, "INFORMATION") \
+ _ (NOTICE, "NOTICE") \
+ _ (WARNING, "WARNING") \
+ _ (ERR, "ERROR") \
+ _ (CRIT, "CRITICAL") \
+ _ (ALERT, "ALERT") \
+ _ (EMERG, "EMERGENCY")
+
+typedef enum {
+#define _(f,s) LOG_##f,
+ foreach_log_level
+#undef _
+} log_level_t;
+
+static char * log_level_strings[] = {
+#define _(f,s) s,
+ foreach_log_level
+#undef _
+};
+
+static int logmask = 1 << LOG_DEBUG;
+
+static void
+ip6_neighbor_syslog(vlib_main_t *vm, int priority, char * fmt, ...)
+{
+ /* just use elog for now */
+ u8 *what;
+ va_list va;
+
+ if( (priority > LOG_EMERG) ||
+ !(logmask & (1 << priority)))
+ return;
+
+ va_start (va, fmt);
+ if(fmt)
+ {
+ what = va_format (0, fmt, &va);
+
+ ELOG_TYPE_DECLARE (e) = {
+ .format = "ip6 nd: (%s): %s",
+ .format_args = "T4T4",
+ };
+ struct { u32 s[2]; } * ed;
+ ed = ELOG_DATA (&vm->elog_main, e);
+ ed->s[0] = elog_string(&vm->elog_main, log_level_strings[priority]);
+ ed->s[1] = elog_string(&vm->elog_main, (char *)what);
+ }
+ va_end (va);
+ return;
+}
+
+/* ipv6 neighbor discovery - router advertisements */
+typedef enum {
+ ICMP6_ROUTER_SOLICITATION_NEXT_DROP,
+ ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_RW,
+ ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_TX,
+ ICMP6_ROUTER_SOLICITATION_N_NEXT,
+} icmp6_router_solicitation_or_advertisement_next_t;
+
+static_always_inline uword
+icmp6_router_solicitation(vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ ip6_main_t * im = &ip6_main;
+ ip6_neighbor_main_t * nm = &ip6_neighbor_main;
+ uword n_packets = frame->n_vectors;
+ u32 * from, * to_next;
+ u32 n_left_from, n_left_to_next, next_index;
+ u32 n_advertisements_sent = 0;
+ int bogus_length;
+
+ icmp6_neighbor_discovery_option_type_t option_type;
+
+ vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip6_icmp_input_node.index);
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = n_packets;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1,
+ sizeof (icmp6_input_trace_t));
+
+ /* source may append his LL address */
+ option_type = ICMP6_NEIGHBOR_DISCOVERY_OPTION_source_link_layer_address;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t * p0;
+ ip6_header_t * ip0;
+ ip6_radv_t *radv_info = 0;
+
+ icmp6_neighbor_discovery_header_t * h0;
+ icmp6_neighbor_discovery_ethernet_link_layer_address_option_t * o0;
+
+ u32 bi0, options_len0, sw_if_index0, next0, error0;
+ u32 is_solicitation = 1, is_dropped = 0;
+ u32 is_unspecified, is_link_local;
+
+ bi0 = to_next[0] = from[0];
+
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, bi0);
+ ip0 = vlib_buffer_get_current (p0);
+ h0 = ip6_next_header (ip0);
+ options_len0 = clib_net_to_host_u16 (ip0->payload_length) - sizeof (h0[0]);
+ is_unspecified = ip6_address_is_unspecified (&ip0->src_address);
+ is_link_local = ip6_address_is_link_local_unicast (&ip0->src_address);
+
+ error0 = ICMP6_ERROR_NONE;
+ sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+
+ /* check if solicitation (not from nd_timer node) */
+ if (ip6_address_is_unspecified (&ip0->dst_address))
+ is_solicitation = 0;
+
+ /* Check that source address is unspecified, link-local or else on-link. */
+ if (!is_unspecified && !is_link_local)
+ {
+ u32 src_adj_index0 = ip6_src_lookup_for_packet (im, p0, ip0);
+ ip_adjacency_t * adj0 = ip_get_adjacency (&im->lookup_main, src_adj_index0);
+
+ error0 = ((adj0->rewrite_header.sw_if_index != sw_if_index0
+ || (adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
+ && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE))
+ ? ICMP6_ERROR_ROUTER_SOLICITATION_SOURCE_NOT_ON_LINK
+ : error0);
+ }
+
+ /* check for source LL option and process */
+ o0 = (void *) (h0 + 1);
+ o0 = ((options_len0 == 8
+ && o0->header.type == option_type
+ && o0->header.n_data_u64s == 1)
+ ? o0
+ : 0);
+
+ /* if src address unspecified IGNORE any options */
+ if (PREDICT_TRUE (error0 == ICMP6_ERROR_NONE && o0 != 0 &&
+ !is_unspecified && !is_link_local)) {
+ ip6_neighbor_main_t * nm = &ip6_neighbor_main;
+ if (nm->limit_neighbor_cache_size &&
+ pool_elts (nm->neighbor_pool) >= nm->limit_neighbor_cache_size)
+ unset_random_neighbor_entry();
+
+ vnet_set_ip6_ethernet_neighbor (vm, sw_if_index0,
+ &ip0->src_address,
+ o0->ethernet_address,
+ sizeof (o0->ethernet_address));
+ }
+
+ /* default is to drop */
+ next0 = ICMP6_ROUTER_SOLICITATION_NEXT_DROP;
+
+ if (error0 == ICMP6_ERROR_NONE)
+ {
+ vnet_sw_interface_t * sw_if0;
+ ethernet_interface_t * eth_if0;
+ u32 adj_index0;
+
+ sw_if0 = vnet_get_sup_sw_interface (vnm, sw_if_index0);
+ ASSERT (sw_if0->type == VNET_SW_INTERFACE_TYPE_HARDWARE);
+ eth_if0 = ethernet_get_interface (&ethernet_main, sw_if0->hw_if_index);
+
+ /* only support ethernet interface type for now */
+ error0 = (!eth_if0) ? ICMP6_ERROR_ROUTER_SOLICITATION_UNSUPPORTED_INTF : error0;
+
+ if (error0 == ICMP6_ERROR_NONE)
+ {
+ u32 ri;
+
+ /* adjust the sizeof the buffer to just include the ipv6 header */
+ p0->current_length -= (options_len0 + sizeof(icmp6_neighbor_discovery_header_t));
+
+ /* look up the radv_t information for this interface */
+ vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index0, ~0);
+
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index0];
+
+ if(ri != ~0)
+ radv_info = pool_elt_at_index (nm->if_radv_pool, ri);
+
+ error0 = ((!radv_info) ? ICMP6_ERROR_ROUTER_SOLICITATION_RADV_NOT_CONFIG : error0);
+
+ if (error0 == ICMP6_ERROR_NONE)
+ {
+ f64 now = vlib_time_now (vm);
+
+ /* for solicited adverts - need to rate limit */
+ if(is_solicitation)
+ {
+ if( (now - radv_info->last_radv_time) < MIN_DELAY_BETWEEN_RAS )
+ is_dropped = 1;
+ else
+ radv_info->last_radv_time = now;
+ }
+
+ /* send now */
+ icmp6_router_advertisement_header_t rh;
+
+ rh.icmp.type = ICMP6_router_advertisement;
+ rh.icmp.code = 0;
+ rh.icmp.checksum = 0;
+
+ rh.current_hop_limit = radv_info->curr_hop_limit;
+ rh.router_lifetime_in_sec = clib_host_to_net_u16(radv_info->adv_router_lifetime_in_sec);
+ rh.time_in_msec_between_retransmitted_neighbor_solicitations =
+ clib_host_to_net_u32(radv_info->adv_time_in_msec_between_retransmitted_neighbor_solicitations);
+ rh.neighbor_reachable_time_in_msec =
+ clib_host_to_net_u32(radv_info->adv_neighbor_reachable_time_in_msec);
+
+ rh.flags = (radv_info->adv_managed_flag) ? ICMP6_ROUTER_DISCOVERY_FLAG_ADDRESS_CONFIG_VIA_DHCP : 0;
+ rh.flags |= ( (radv_info->adv_other_flag) ? ICMP6_ROUTER_DISCOVERY_FLAG_OTHER_CONFIG_VIA_DHCP : 0);
+
+
+ u16 payload_length = sizeof(icmp6_router_advertisement_header_t);
+
+ vlib_buffer_add_data (vm,
+ p0->free_list_index,
+ bi0,
+ (void *)&rh, sizeof(icmp6_router_advertisement_header_t));
+
+ if(radv_info->adv_link_layer_address)
+ {
+ icmp6_neighbor_discovery_ethernet_link_layer_address_option_t h;
+
+ h.header.type = ICMP6_NEIGHBOR_DISCOVERY_OPTION_source_link_layer_address;
+ h.header.n_data_u64s = 1;
+
+ /* copy ll address */
+ memcpy(&h.ethernet_address[0], eth_if0->address, 6);
+
+ vlib_buffer_add_data (vm,
+ p0->free_list_index,
+ bi0,
+ (void *)&h, sizeof(icmp6_neighbor_discovery_ethernet_link_layer_address_option_t));
+
+ payload_length += sizeof(icmp6_neighbor_discovery_ethernet_link_layer_address_option_t);
+ }
+
+ /* add MTU option */
+ if(radv_info->adv_link_mtu)
+ {
+ icmp6_neighbor_discovery_mtu_option_t h;
+
+ h.unused = 0;
+ h.mtu = clib_host_to_net_u32(radv_info->adv_link_mtu);
+ h.header.type = ICMP6_NEIGHBOR_DISCOVERY_OPTION_mtu;
+ h.header.n_data_u64s = 1;
+
+ payload_length += sizeof( icmp6_neighbor_discovery_mtu_option_t);
+
+ vlib_buffer_add_data (vm,
+ p0->free_list_index,
+ bi0,
+ (void *)&h, sizeof(icmp6_neighbor_discovery_mtu_option_t));
+ }
+
+ /* add advertised prefix options */
+ ip6_radv_prefix_t *pr_info;
+
+ pool_foreach (pr_info, radv_info->adv_prefixes_pool, ({
+
+ if(pr_info->enabled &&
+ (!pr_info->decrement_lifetime_flag || (pr_info->pref_lifetime_expires >0)))
+ {
+ /* advertise this prefix */
+ icmp6_neighbor_discovery_prefix_information_option_t h;
+
+ h.header.type = ICMP6_NEIGHBOR_DISCOVERY_OPTION_prefix_information;
+ h.header.n_data_u64s = (sizeof(icmp6_neighbor_discovery_prefix_information_option_t) >> 3);
+
+ h.dst_address_length = pr_info->prefix_len;
+
+ h.flags = (pr_info->adv_on_link_flag) ? ICMP6_NEIGHBOR_DISCOVERY_PREFIX_INFORMATION_FLAG_ON_LINK : 0;
+ h.flags |= (pr_info->adv_autonomous_flag) ? ICMP6_NEIGHBOR_DISCOVERY_PREFIX_INFORMATION_AUTO : 0;
+
+ if(radv_info->cease_radv && pr_info->deprecated_prefix_flag)
+ {
+ h.valid_time = clib_host_to_net_u32(MIN_ADV_VALID_LIFETIME);
+ h.preferred_time = 0;
+ }
+ else
+ {
+ if(pr_info->decrement_lifetime_flag)
+ {
+ pr_info->adv_valid_lifetime_in_secs = ((pr_info->valid_lifetime_expires > now)) ?
+ (pr_info->valid_lifetime_expires - now) : 0;
+
+ pr_info->adv_pref_lifetime_in_secs = ((pr_info->pref_lifetime_expires > now)) ?
+ (pr_info->pref_lifetime_expires - now) : 0;
+ }
+
+ h.valid_time = clib_host_to_net_u32(pr_info->adv_valid_lifetime_in_secs);
+ h.preferred_time = clib_host_to_net_u32(pr_info->adv_pref_lifetime_in_secs) ;
+ }
+ h.unused = 0;
+
+ memcpy(&h.dst_address, &pr_info->prefix, sizeof(ip6_address_t));
+
+ payload_length += sizeof( icmp6_neighbor_discovery_prefix_information_option_t);
+
+ vlib_buffer_add_data (vm,
+ p0->free_list_index,
+ bi0,
+ (void *)&h, sizeof(icmp6_neighbor_discovery_prefix_information_option_t));
+
+ }
+ }));
+
+ /* add additional options before here */
+
+ /* finish building the router advertisement... */
+ if(!is_unspecified && radv_info->send_unicast)
+ {
+ ip0->dst_address = ip0->src_address;
+ }
+ else
+ {
+ /* target address is all-nodes mcast addr */
+ ip6_set_reserved_multicast_address(&ip0->dst_address,
+ IP6_MULTICAST_SCOPE_link_local,
+ IP6_MULTICAST_GROUP_ID_all_hosts);
+ }
+
+ /* source address MUST be the link-local address */
+ ip0->src_address = radv_info->link_local_address;
+
+ ip0->hop_limit = 255;
+ ip0->payload_length = clib_host_to_net_u16 (payload_length);
+
+ icmp6_router_advertisement_header_t * rh0 = (icmp6_router_advertisement_header_t *)(ip0 + 1);
+ rh0->icmp.checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, p0, ip0,
+ &bogus_length);
+ ASSERT(bogus_length == 0);
+
+ /* setup output if and adjacency */
+ vnet_buffer (p0)->sw_if_index[VLIB_RX] =
+ vnet_main.local_interface_sw_if_index;
+
+ if (is_solicitation)
+ {
+ ethernet_header_t *eth0;
+ /* Reuse current MAC header, copy SMAC to DMAC and
+ * interface MAC to SMAC */
+ vlib_buffer_reset (p0);
+ eth0 = vlib_buffer_get_current(p0);
+ memcpy(eth0->dst_address, eth0->src_address, 6);
+ memcpy(eth0->src_address, eth_if0->address, 6);
+ next0 = is_dropped ?
+ next0 : ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_TX;
+ vnet_buffer(p0)->sw_if_index[VLIB_TX] = sw_if_index0;
+ }
+ else
+ {
+ adj_index0 = radv_info->all_nodes_adj_index;
+ if (adj_index0 == 0)
+ error0 = ICMP6_ERROR_DST_LOOKUP_MISS;
+ else
+ {
+ ip_adjacency_t * adj0 = ip_get_adjacency (&im->lookup_main, adj_index0);
+ error0 =
+ ((adj0->rewrite_header.sw_if_index != sw_if_index0
+ || adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE)
+ ? ICMP6_ERROR_ROUTER_SOLICITATION_DEST_UNKNOWN
+ : error0);
+ next0 = is_dropped ?
+ next0 : ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_RW;
+ vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0;
+ }
+ }
+
+ radv_info->n_solicitations_dropped += is_dropped;
+ radv_info->n_solicitations_rcvd += is_solicitation;
+
+ if((error0 == ICMP6_ERROR_NONE) && !is_dropped)
+ {
+ radv_info->n_advertisements_sent++;
+ n_advertisements_sent++;
+ }
+ }
+ }
+ }
+
+ p0->error = error_node->errors[error0];
+
+ if(error0 != ICMP6_ERROR_NONE)
+ vlib_error_count (vm, error_node->node_index, error0, 1);
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ /* Account for router advertisements sent. */
+ vlib_error_count (vm, error_node->node_index, ICMP6_ERROR_ROUTER_ADVERTISEMENTS_TX, n_advertisements_sent);
+
+ return frame->n_vectors;
+}
+
+ /* validate advertised info for consistancy (see RFC-4861 section 6.2.7) - log any inconsistencies, packet will always be dropped */
+static_always_inline uword
+icmp6_router_advertisement(vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ ip6_neighbor_main_t * nm = &ip6_neighbor_main;
+ uword n_packets = frame->n_vectors;
+ u32 * from, * to_next;
+ u32 n_left_from, n_left_to_next, next_index;
+ u32 n_advertisements_rcvd = 0;
+
+ vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip6_icmp_input_node.index);
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = n_packets;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1,
+ sizeof (icmp6_input_trace_t));
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t * p0;
+ ip6_header_t * ip0;
+ ip6_radv_t *radv_info = 0;
+ icmp6_router_advertisement_header_t * h0;
+ u32 bi0, options_len0, sw_if_index0, next0, error0;
+
+ bi0 = to_next[0] = from[0];
+
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, bi0);
+ ip0 = vlib_buffer_get_current (p0);
+ h0 = ip6_next_header (ip0);
+ options_len0 = clib_net_to_host_u16 (ip0->payload_length) - sizeof (h0[0]);
+
+ error0 = ICMP6_ERROR_NONE;
+ sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+
+ /* Check that source address is link-local*/
+ error0 = (!ip6_address_is_link_local_unicast (&ip0->src_address)) ?
+ ICMP6_ERROR_ROUTER_ADVERTISEMENT_SOURCE_NOT_LINK_LOCAL : error0;
+
+ /* default is to drop */
+ next0 = ICMP6_ROUTER_SOLICITATION_NEXT_DROP;
+
+ n_advertisements_rcvd++;
+
+ if (error0 == ICMP6_ERROR_NONE)
+ {
+ vnet_sw_interface_t * sw_if0;
+ ethernet_interface_t * eth_if0;
+
+ sw_if0 = vnet_get_sup_sw_interface (vnm, sw_if_index0);
+ ASSERT (sw_if0->type == VNET_SW_INTERFACE_TYPE_HARDWARE);
+ eth_if0 = ethernet_get_interface (&ethernet_main, sw_if0->hw_if_index);
+
+ /* only support ethernet interface type for now */
+ error0 = (!eth_if0) ? ICMP6_ERROR_ROUTER_SOLICITATION_UNSUPPORTED_INTF : error0;
+
+ if (error0 == ICMP6_ERROR_NONE)
+ {
+ u32 ri;
+
+ /* look up the radv_t information for this interface */
+ vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index0, ~0);
+
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index0];
+
+ if(ri != ~0)
+ radv_info = pool_elt_at_index (nm->if_radv_pool, ri);
+
+ error0 = ((!radv_info) ? ICMP6_ERROR_ROUTER_SOLICITATION_RADV_NOT_CONFIG : error0);
+
+ if (error0 == ICMP6_ERROR_NONE)
+ {
+ /* validate advertised information */
+ if((h0->current_hop_limit && radv_info->curr_hop_limit) &&
+ (h0->current_hop_limit != radv_info->curr_hop_limit))
+ {
+ ip6_neighbor_syslog(vm, LOG_WARNING,
+ "our AdvCurHopLimit on %U doesn't agree with %U",
+ format_vnet_sw_if_index_name, vnm, sw_if_index0, format_ip6_address, &ip0->src_address);
+ }
+
+ if((h0->flags & ICMP6_ROUTER_DISCOVERY_FLAG_ADDRESS_CONFIG_VIA_DHCP) !=
+ radv_info->adv_managed_flag)
+ {
+ ip6_neighbor_syslog(vm, LOG_WARNING,
+ "our AdvManagedFlag on %U doesn't agree with %U",
+ format_vnet_sw_if_index_name, vnm, sw_if_index0, format_ip6_address, &ip0->src_address);
+ }
+
+ if((h0->flags & ICMP6_ROUTER_DISCOVERY_FLAG_OTHER_CONFIG_VIA_DHCP) !=
+ radv_info->adv_other_flag)
+ {
+ ip6_neighbor_syslog(vm, LOG_WARNING,
+ "our AdvOtherConfigFlag on %U doesn't agree with %U",
+ format_vnet_sw_if_index_name, vnm, sw_if_index0, format_ip6_address, &ip0->src_address);
+ }
+
+ if((h0->time_in_msec_between_retransmitted_neighbor_solicitations &&
+ radv_info->adv_time_in_msec_between_retransmitted_neighbor_solicitations) &&
+ (h0->time_in_msec_between_retransmitted_neighbor_solicitations !=
+ clib_host_to_net_u32(radv_info->adv_time_in_msec_between_retransmitted_neighbor_solicitations)))
+ {
+ ip6_neighbor_syslog(vm, LOG_WARNING,
+ "our AdvRetransTimer on %U doesn't agree with %U",
+ format_vnet_sw_if_index_name, vnm, sw_if_index0, format_ip6_address, &ip0->src_address);
+ }
+
+ if((h0->neighbor_reachable_time_in_msec &&
+ radv_info->adv_neighbor_reachable_time_in_msec) &&
+ (h0->neighbor_reachable_time_in_msec !=
+ clib_host_to_net_u32(radv_info->adv_neighbor_reachable_time_in_msec)))
+ {
+ ip6_neighbor_syslog(vm, LOG_WARNING,
+ "our AdvReachableTime on %U doesn't agree with %U",
+ format_vnet_sw_if_index_name, vnm, sw_if_index0, format_ip6_address, &ip0->src_address);
+ }
+
+ /* check for MTU or prefix options or .. */
+ u8 * opt_hdr = (u8 *)(h0 + 1);
+ while( options_len0 > 0)
+ {
+ icmp6_neighbor_discovery_option_header_t *o0 = ( icmp6_neighbor_discovery_option_header_t *)opt_hdr;
+ int opt_len = o0->n_data_u64s << 3;
+ icmp6_neighbor_discovery_option_type_t option_type = o0->type;
+
+ if(options_len0 < 2)
+ {
+ ip6_neighbor_syslog(vm, LOG_ERR,
+ "malformed RA packet on %U from %U",
+ format_vnet_sw_if_index_name, vnm, sw_if_index0, format_ip6_address, &ip0->src_address);
+ break;
+ }
+
+ if(opt_len == 0)
+ {
+ ip6_neighbor_syslog(vm, LOG_ERR,
+ " zero length option in RA on %U from %U",
+ format_vnet_sw_if_index_name, vnm, sw_if_index0, format_ip6_address, &ip0->src_address);
+ break;
+ }
+ else if( opt_len > options_len0)
+ {
+ ip6_neighbor_syslog(vm, LOG_ERR,
+ "option length in RA packet greater than total length on %U from %U",
+ format_vnet_sw_if_index_name, vnm, sw_if_index0, format_ip6_address, &ip0->src_address);
+ break;
+ }
+
+ options_len0 -= opt_len;
+ opt_hdr += opt_len;
+
+ switch(option_type)
+ {
+ case ICMP6_NEIGHBOR_DISCOVERY_OPTION_mtu:
+ {
+ icmp6_neighbor_discovery_mtu_option_t *h =
+ (icmp6_neighbor_discovery_mtu_option_t *)(o0);
+
+ if(opt_len < sizeof(*h))
+ break;
+
+ if((h->mtu && radv_info->adv_link_mtu) &&
+ (h->mtu != clib_host_to_net_u32(radv_info->adv_link_mtu)))
+ {
+ ip6_neighbor_syslog(vm, LOG_WARNING,
+ "our AdvLinkMTU on %U doesn't agree with %U",
+ format_vnet_sw_if_index_name, vnm, sw_if_index0, format_ip6_address, &ip0->src_address);
+ }
+ }
+ break;
+
+ case ICMP6_NEIGHBOR_DISCOVERY_OPTION_prefix_information:
+ {
+ icmp6_neighbor_discovery_prefix_information_option_t *h =
+ (icmp6_neighbor_discovery_prefix_information_option_t *)(o0);
+
+ /* validate advertised prefix options */
+ ip6_radv_prefix_t *pr_info;
+ u32 preferred, valid;
+
+ if(opt_len < sizeof(*h))
+ break;
+
+ preferred = clib_net_to_host_u32(h->preferred_time);
+ valid = clib_net_to_host_u32(h->valid_time);
+
+ /* look for matching prefix - if we our advertising it, it better be consistant */
+ pool_foreach (pr_info, radv_info->adv_prefixes_pool, ({
+
+ ip6_address_t mask;
+ ip6_address_mask_from_width(&mask, pr_info->prefix_len);
+
+ if(pr_info->enabled &&
+ (pr_info->prefix_len == h->dst_address_length) &&
+ ip6_address_is_equal_masked (&pr_info->prefix, &h->dst_address, &mask))
+ {
+ /* found it */
+ if(!pr_info->decrement_lifetime_flag &&
+ valid != pr_info->adv_valid_lifetime_in_secs)
+ {
+ ip6_neighbor_syslog(vm, LOG_WARNING,
+ "our ADV validlifetime on %U for %U does not agree with %U",
+ format_vnet_sw_if_index_name, vnm, sw_if_index0,format_ip6_address, &pr_info->prefix,
+ format_ip6_address, &h->dst_address);
+ }
+ if(!pr_info->decrement_lifetime_flag &&
+ preferred != pr_info->adv_pref_lifetime_in_secs)
+ {
+ ip6_neighbor_syslog(vm, LOG_WARNING,
+ "our ADV preferredlifetime on %U for %U does not agree with %U",
+ format_vnet_sw_if_index_name, vnm, sw_if_index0,format_ip6_address, &pr_info->prefix,
+ format_ip6_address, &h->dst_address);
+ }
+ }
+ break;
+ }));
+ break;
+ }
+ default:
+ /* skip this one */
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ p0->error = error_node->errors[error0];
+
+ if(error0 != ICMP6_ERROR_NONE)
+ vlib_error_count (vm, error_node->node_index, error0, 1);
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ /* Account for router advertisements sent. */
+ vlib_error_count (vm, error_node->node_index, ICMP6_ERROR_ROUTER_ADVERTISEMENTS_RX, n_advertisements_rcvd);
+
+ return frame->n_vectors;
+}
+
+/* create and initialize router advertisement parameters with default values for this intfc */
+static u32
+ip6_neighbor_sw_interface_add_del (vnet_main_t * vnm,
+ u32 sw_if_index,
+ u32 is_add)
+{
+ ip6_main_t * im = &ip6_main;
+ ip6_neighbor_main_t * nm = &ip6_neighbor_main;
+ ip_lookup_main_t * lm = &im->lookup_main;
+ ip6_radv_t * a= 0;
+ u32 ri = ~0;;
+ vnet_sw_interface_t * sw_if0;
+ ethernet_interface_t * eth_if0 = 0;
+
+ /* lookup radv container - ethernet interfaces only */
+ sw_if0 = vnet_get_sup_sw_interface (vnm, sw_if_index);
+ if(sw_if0->type == VNET_SW_INTERFACE_TYPE_HARDWARE)
+ eth_if0 = ethernet_get_interface (&ethernet_main, sw_if0->hw_if_index);
+
+ if(!eth_if0)
+ return ri;
+
+ vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index, ~0);
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index];
+
+ if(ri != ~0)
+ {
+ a = pool_elt_at_index (nm->if_radv_pool, ri);
+
+ if(!is_add)
+ {
+ u32 i, * to_delete = 0;
+ ip6_radv_prefix_t *p;
+ ip6_mldp_group_t *m;
+
+ /* remove adjacencies */
+ ip_del_adjacency (lm, a->all_nodes_adj_index);
+ ip_del_adjacency (lm, a->all_routers_adj_index);
+ ip_del_adjacency (lm, a->all_mldv2_routers_adj_index);
+
+ /* clean up prefix_pool */
+ pool_foreach (p, a->adv_prefixes_pool, ({
+ vec_add1 (to_delete, p - a->adv_prefixes_pool);
+ }));
+
+ for (i = 0; i < vec_len (to_delete); i++)
+ {
+ p = pool_elt_at_index (a->adv_prefixes_pool, to_delete[i]);
+ mhash_unset (&a->address_to_prefix_index, &p->prefix, 0);
+ pool_put (a->adv_prefixes_pool, p);
+ }
+
+ vec_free (to_delete);
+ to_delete = 0;
+
+ /* clean up mldp group pool */
+ pool_foreach (m, a->mldp_group_pool, ({
+ vec_add1 (to_delete, m - a->mldp_group_pool);
+ }));
+
+ for (i = 0; i < vec_len (to_delete); i++)
+ {
+ m = pool_elt_at_index (a->mldp_group_pool, to_delete[i]);
+ mhash_unset (&a->address_to_mldp_index, &m->mcast_address, 0);
+ pool_put (a->mldp_group_pool, m);
+ }
+
+ vec_free (to_delete);
+
+ pool_put (nm->if_radv_pool, a);
+ nm->if_radv_pool_index_by_sw_if_index[sw_if_index] = ~0;
+ ri = ~0;
+ }
+ }
+ else
+ {
+ if(is_add)
+ {
+ vnet_hw_interface_t * hw_if0;
+
+ hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index);
+
+ pool_get (nm->if_radv_pool, a);
+
+ ri = a - nm->if_radv_pool;
+ nm->if_radv_pool_index_by_sw_if_index[sw_if_index] = ri;
+
+ /* initialize default values (most of which are zero) */
+ memset (a, 0, sizeof (a[0]));
+
+ a->sw_if_index = sw_if_index;
+ a->fib_index = ~0;
+ a->max_radv_interval = DEF_MAX_RADV_INTERVAL;
+ a->min_radv_interval = DEF_MIN_RADV_INTERVAL;
+ a->curr_hop_limit = DEF_CURR_HOP_LIMIT;
+ a->adv_router_lifetime_in_sec = DEF_DEF_RTR_LIFETIME;
+
+ a->adv_link_layer_address = 1; /* send ll address source address option */
+
+ a->min_delay_between_radv = MIN_DELAY_BETWEEN_RAS;
+ a->max_delay_between_radv = MAX_DELAY_BETWEEN_RAS;
+ a->max_rtr_default_lifetime = MAX_DEF_RTR_LIFETIME;
+ a->seed = random_default_seed();
+
+ /* for generating random interface ids */
+ a->randomizer = 0x1119194911191949;
+ a->randomizer = random_u64 ((u32 *)&a->randomizer);
+
+ a->initial_adverts_count = MAX_INITIAL_RTR_ADVERTISEMENTS ;
+ a->initial_adverts_sent = a->initial_adverts_count-1;
+ a->initial_adverts_interval = MAX_INITIAL_RTR_ADVERT_INTERVAL;
+
+ /* deafult is to send */
+ a->send_radv = 1;
+
+ /* fill in radv_info for this interface that will be needed later */
+ a->adv_link_mtu = hw_if0->max_l3_packet_bytes[VLIB_RX];
+
+ memcpy (a->link_layer_address, eth_if0->address, 6);
+
+ /* fill in default link-local address (this may be overridden) */
+ ip6_link_local_address_from_ethernet_address (&a->link_local_address, eth_if0->address);
+ a->link_local_prefix_len = 64;
+
+ mhash_init (&a->address_to_prefix_index, sizeof (uword), sizeof (ip6_address_t));
+ mhash_init (&a->address_to_mldp_index, sizeof (uword), sizeof (ip6_address_t));
+
+ {
+ ip_adjacency_t *adj;
+ u8 link_layer_address[6] =
+ {0x33, 0x33, 0x00, 0x00, 0x00, IP6_MULTICAST_GROUP_ID_all_hosts};
+
+ adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
+ &a->all_nodes_adj_index);
+
+ adj->lookup_next_index = IP_LOOKUP_NEXT_REWRITE;
+ adj->if_address_index = ~0;
+
+ vnet_rewrite_for_sw_interface
+ (vnm,
+ VNET_L3_PACKET_TYPE_IP6,
+ sw_if_index,
+ ip6_rewrite_node.index,
+ link_layer_address,
+ &adj->rewrite_header,
+ sizeof (adj->rewrite_data));
+ }
+
+ {
+ ip_adjacency_t *adj;
+ u8 link_layer_address[6] =
+ {0x33, 0x33, 0x00, 0x00, 0x00, IP6_MULTICAST_GROUP_ID_all_routers};
+
+ adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
+ &a->all_routers_adj_index);
+
+ adj->lookup_next_index = IP_LOOKUP_NEXT_REWRITE;
+ adj->if_address_index = ~0;
+
+ vnet_rewrite_for_sw_interface
+ (vnm,
+ VNET_L3_PACKET_TYPE_IP6,
+ sw_if_index,
+ ip6_rewrite_node.index,
+ link_layer_address,
+ &adj->rewrite_header,
+ sizeof (adj->rewrite_data));
+ }
+
+ {
+ ip_adjacency_t *adj;
+ u8 link_layer_address[6] =
+ {0x33, 0x33, 0x00, 0x00, 0x00, IP6_MULTICAST_GROUP_ID_mldv2_routers};
+
+ adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
+ &a->all_mldv2_routers_adj_index);
+
+ adj->lookup_next_index = IP_LOOKUP_NEXT_REWRITE;
+ adj->if_address_index = ~0;
+
+ vnet_rewrite_for_sw_interface
+ (vnm,
+ VNET_L3_PACKET_TYPE_IP6,
+ sw_if_index,
+ ip6_rewrite_node.index,
+ link_layer_address,
+ &adj->rewrite_header,
+ sizeof (adj->rewrite_data));
+ }
+
+ /* add multicast groups we will always be reporting */
+ ip6_address_t addr;
+ ip6_mldp_group_t *mcast_group_info;
+
+ ip6_set_reserved_multicast_address (&addr,
+ IP6_MULTICAST_SCOPE_link_local,
+ IP6_MULTICAST_GROUP_ID_all_hosts);
+
+ /* lookup mldp info for this interface */
+
+ uword * p = mhash_get (&a->address_to_mldp_index, &addr);
+ mcast_group_info = p ? pool_elt_at_index (a->mldp_group_pool, p[0]) : 0;
+
+ /* add address */
+ if(!mcast_group_info)
+ {
+ /* add */
+ u32 mi;
+ pool_get (a->mldp_group_pool, mcast_group_info);
+
+ mi = mcast_group_info - a->mldp_group_pool;
+ mhash_set (&a->address_to_mldp_index, &addr, mi, /* old_value */ 0);
+
+ mcast_group_info->type = 4;
+ mcast_group_info->mcast_source_address_pool = 0;
+ mcast_group_info->num_sources = 0;
+ memcpy(&mcast_group_info->mcast_address, &addr, sizeof(ip6_address_t));
+ }
+
+ ip6_set_reserved_multicast_address (&addr,
+ IP6_MULTICAST_SCOPE_link_local,
+ IP6_MULTICAST_GROUP_ID_all_routers);
+
+ p = mhash_get (&a->address_to_mldp_index, &addr);
+ mcast_group_info = p ? pool_elt_at_index (a->mldp_group_pool, p[0]) : 0;
+
+ if(!mcast_group_info)
+ {
+ /* add */
+ u32 mi;
+ pool_get (a->mldp_group_pool, mcast_group_info);
+
+ mi = mcast_group_info - a->mldp_group_pool;
+ mhash_set (&a->address_to_mldp_index, &addr, mi, /* old_value */ 0);
+
+ mcast_group_info->type = 4;
+ mcast_group_info->mcast_source_address_pool = 0;
+ mcast_group_info->num_sources = 0;
+ memcpy(&mcast_group_info->mcast_address, &addr, sizeof(ip6_address_t));
+ }
+
+ ip6_set_reserved_multicast_address (&addr,
+ IP6_MULTICAST_SCOPE_link_local,
+ IP6_MULTICAST_GROUP_ID_mldv2_routers);
+
+ p = mhash_get (&a->address_to_mldp_index, &addr);
+ mcast_group_info = p ? pool_elt_at_index (a->mldp_group_pool, p[0]) : 0;
+
+ if(!mcast_group_info)
+ {
+ /* add */
+ u32 mi;
+ pool_get (a->mldp_group_pool, mcast_group_info);
+
+ mi = mcast_group_info - a->mldp_group_pool;
+ mhash_set (&a->address_to_mldp_index, &addr, mi, /* old_value */ 0);
+
+ mcast_group_info->type = 4;
+ mcast_group_info->mcast_source_address_pool = 0;
+ mcast_group_info->num_sources = 0;
+ memcpy(&mcast_group_info->mcast_address, &addr, sizeof(ip6_address_t));
+ }
+ }
+ }
+ return ri;
+}
+
+/* send an mldpv2 report */
+static void
+ip6_neighbor_send_mldpv2_report(u32 sw_if_index)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ vlib_main_t * vm = vnm->vlib_main;
+ ip6_neighbor_main_t * nm = &ip6_neighbor_main;
+ vnet_sw_interface_t * sw_if0;
+ ethernet_interface_t * eth_if0;
+ u32 ri;
+ int bogus_length;
+
+ ip6_radv_t *radv_info;
+ u16 payload_length;
+ vlib_buffer_t * b0;
+ ip6_header_t * ip0;
+ u32 * to_next;
+ vlib_frame_t * f;
+ u32 bo0;
+ u32 n_to_alloc = 1;
+ u32 n_allocated;
+
+ icmp6_multicast_listener_report_header_t *rh0;
+ icmp6_multicast_listener_report_packet_t *rp0;
+
+ sw_if0 = vnet_get_sup_sw_interface (vnm, sw_if_index);
+ ASSERT (sw_if0->type == VNET_SW_INTERFACE_TYPE_HARDWARE);
+ eth_if0 = ethernet_get_interface (&ethernet_main, sw_if0->hw_if_index);
+
+ if (!eth_if0 || !vnet_sw_interface_is_admin_up (vnm, sw_if_index))
+ return;
+
+ /* look up the radv_t information for this interface */
+ vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index, ~0);
+
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index];
+
+ if(ri == ~0)
+ return;
+
+ /* send report now - build a mldpv2 report packet */
+ n_allocated = vlib_buffer_alloc_from_free_list(vm,
+ &bo0,
+ n_to_alloc,
+ VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
+ if (PREDICT_FALSE(n_allocated == 0))
+ {
+ clib_warning ("buffer allocation failure");
+ return;
+ }
+
+ b0 = vlib_get_buffer (vm, bo0);
+
+ /* adjust the sizeof the buffer to just include the ipv6 header */
+ b0->current_length = sizeof(icmp6_multicast_listener_report_packet_t);
+
+ payload_length = sizeof(icmp6_multicast_listener_report_header_t);
+
+ b0->error = ICMP6_ERROR_NONE;
+
+ rp0 = vlib_buffer_get_current (b0);
+ ip0 = (ip6_header_t *)&rp0-> ip;
+ rh0 = (icmp6_multicast_listener_report_header_t *)&rp0-> report_hdr;
+
+ memset (rp0 , 0x0, sizeof (icmp6_multicast_listener_report_packet_t));
+
+ ip0->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32 (0x6 << 28);
+
+ ip0->protocol = IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS;
+ /* for DEBUG - vnet driver won't seem to emit router alerts */
+ /* ip0->protocol = IP_PROTOCOL_ICMP6; */
+ ip0->hop_limit = 1;
+
+ rh0->icmp.type = ICMP6_multicast_listener_report_v2;
+
+ /* source address MUST be the link-local address */
+ radv_info = pool_elt_at_index (nm->if_radv_pool, ri);
+ ip0->src_address = radv_info->link_local_address;
+
+ /* destination is all mldpv2 routers */
+ ip6_set_reserved_multicast_address(&ip0->dst_address,
+ IP6_MULTICAST_SCOPE_link_local,
+ IP6_MULTICAST_GROUP_ID_mldv2_routers);
+
+ /* add reports here */
+ ip6_mldp_group_t *m;
+ int num_addr_records = 0;
+ icmp6_multicast_address_record_t rr;
+
+ /* fill in the hop-by-hop extension header (router alert) info */
+ rh0->ext_hdr.next_hdr = IP_PROTOCOL_ICMP6;
+ rh0->ext_hdr.n_data_u64s = 0;
+
+ rh0->alert.type = IP6_MLDP_ALERT_TYPE;
+ rh0->alert.len = 2;
+ rh0->alert.value = 0;
+
+ rh0->pad.type = 1;
+ rh0->pad.len = 0;
+
+ rh0->icmp.checksum = 0;
+
+ pool_foreach (m, radv_info->mldp_group_pool, ({
+
+ rr.type = m->type;
+ rr.aux_data_len_u32s = 0;
+ rr.num_sources = clib_host_to_net_u16 (m->num_sources);
+ memcpy(&rr.mcast_addr, &m->mcast_address, sizeof(ip6_address_t));
+
+ num_addr_records++;
+
+ vlib_buffer_add_data (vm,
+ b0->free_list_index,
+ bo0,
+ (void *)&rr, sizeof(icmp6_multicast_address_record_t));
+
+ payload_length += sizeof( icmp6_multicast_address_record_t);
+ }));
+
+ rh0->rsvd = 0;
+ rh0->num_addr_records = clib_host_to_net_u16(num_addr_records);
+
+ /* update lengths */
+ ip0->payload_length = clib_host_to_net_u16 (payload_length);
+
+ rh0->icmp.checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip0,
+ &bogus_length);
+ ASSERT(bogus_length == 0);
+
+ /*
+ * OK to override w/ no regard for actual FIB, because
+ * ip6-rewrite-local only looks at the adjacency.
+ */
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] =
+ vnet_main.local_interface_sw_if_index;
+
+ vnet_buffer (b0)->ip.adj_index[VLIB_RX] =
+ radv_info->all_mldv2_routers_adj_index;
+
+ vlib_node_t * node = vlib_get_node_by_name (vm, (u8 *) "ip6-rewrite-local");
+
+ f = vlib_get_frame_to_node (vm, node->index);
+ to_next = vlib_frame_vector_args (f);
+ to_next[0] = bo0;
+ f->n_vectors = 1;
+
+ vlib_put_frame_to_node (vm, node->index, f);
+ return;
+}
+
+VLIB_REGISTER_NODE (ip6_icmp_router_solicitation_node,static) = {
+ .function = icmp6_router_solicitation,
+ .name = "icmp6-router-solicitation",
+
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_icmp6_input_trace,
+
+ .n_next_nodes = ICMP6_ROUTER_SOLICITATION_N_NEXT,
+ .next_nodes = {
+ [ICMP6_ROUTER_SOLICITATION_NEXT_DROP] = "error-drop",
+ [ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_RW] = "ip6-rewrite-local",
+ [ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_TX] = "interface-output",
+ },
+};
+
+/* send a RA or update the timer info etc.. */
+static uword
+ip6_neighbor_process_timer_event (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ ip6_neighbor_main_t * nm = &ip6_neighbor_main;
+ ip6_radv_t *radv_info;
+ vlib_frame_t * f = 0;
+ u32 n_this_frame = 0;
+ u32 n_left_to_next;
+ u32 * to_next;
+ u32 bo0;
+ icmp6_router_solicitation_header_t * h0;
+ vlib_buffer_t * b0;
+ f64 now = vlib_time_now (vm);
+
+ /* Interface ip6 radv info list */
+ pool_foreach (radv_info, nm->if_radv_pool, ({
+
+ if( !vnet_sw_interface_is_admin_up (vnm, radv_info->sw_if_index))
+ {
+ radv_info->initial_adverts_sent = radv_info->initial_adverts_count-1;
+ radv_info->next_multicast_time = now;
+ radv_info->last_multicast_time = now;
+ radv_info->last_radv_time = 0;
+ radv_info->all_routers_mcast = 0;
+ continue;
+ }
+
+ /* Make sure that we've joined the all-routers multicast group */
+ if(!radv_info->all_routers_mcast)
+ {
+ /* send MDLP_REPORT_EVENT message */
+ ip6_neighbor_send_mldpv2_report(radv_info->sw_if_index);
+ radv_info->all_routers_mcast = 1;
+ }
+
+ /* is it time to send a multicast RA on this interface? */
+ if(radv_info->send_radv && (now >= radv_info->next_multicast_time))
+ {
+ u32 n_to_alloc = 1;
+ u32 n_allocated;
+
+ f64 rfn = (radv_info->max_radv_interval - radv_info->min_radv_interval) *
+ random_f64 (&radv_info->seed) + radv_info->min_radv_interval;
+
+ /* multicast send - compute next multicast send time */
+ if( radv_info->initial_adverts_sent > 0)
+ {
+ radv_info->initial_adverts_sent--;
+ if(rfn > radv_info-> initial_adverts_interval)
+ rfn = radv_info-> initial_adverts_interval;
+
+ /* check to see if we are ceasing to send */
+ if( radv_info->initial_adverts_sent == 0)
+ if(radv_info->cease_radv)
+ radv_info->send_radv = 0;
+ }
+
+ radv_info->next_multicast_time = rfn + now;
+ radv_info->last_multicast_time = now;
+
+ /* send advert now - build a "solicted" router advert with unspecified source address */
+ n_allocated = vlib_buffer_alloc_from_free_list(vm,
+ &bo0,
+ n_to_alloc,
+ VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
+
+ if (PREDICT_FALSE(n_allocated == 0))
+ {
+ clib_warning ("buffer allocation failure");
+ continue;
+ }
+ b0 = vlib_get_buffer (vm, bo0);
+ b0->current_length = sizeof( icmp6_router_solicitation_header_t);
+ b0->error = ICMP6_ERROR_NONE;
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = radv_info->sw_if_index;
+
+ h0 = vlib_buffer_get_current (b0);
+
+ memset (h0, 0, sizeof (icmp6_router_solicitation_header_t));
+
+ h0->ip.ip_version_traffic_class_and_flow_label = clib_host_to_net_u32 (0x6 << 28);
+ h0->ip.payload_length = clib_host_to_net_u16 (sizeof (icmp6_router_solicitation_header_t)
+ - STRUCT_OFFSET_OF (icmp6_router_solicitation_header_t, neighbor));
+ h0->ip.protocol = IP_PROTOCOL_ICMP6;
+ h0->ip.hop_limit = 255;
+
+ /* set src/dst address as "unspecified" this marks this packet as internally generated rather than recieved */
+ h0->ip.src_address.as_u64[0] = 0;
+ h0->ip.src_address.as_u64[1] = 0;
+
+ h0->ip.dst_address.as_u64[0] = 0;
+ h0->ip.dst_address.as_u64[1] = 0;
+
+ h0->neighbor.icmp.type = ICMP6_router_solicitation;
+
+ if (PREDICT_FALSE(f == 0))
+ {
+ f = vlib_get_frame_to_node (vm, ip6_icmp_router_solicitation_node.index);
+ to_next = vlib_frame_vector_args (f);
+ n_left_to_next = VLIB_FRAME_SIZE;
+ n_this_frame = 0;
+ }
+
+ n_this_frame++;
+ n_left_to_next--;
+ to_next[0] = bo0;
+ to_next += 1;
+
+ if (PREDICT_FALSE(n_left_to_next == 0))
+ {
+ f->n_vectors = n_this_frame;
+ vlib_put_frame_to_node (vm, ip6_icmp_router_solicitation_node.index, f);
+ f = 0;
+ }
+ }
+ }));
+
+ if (f)
+ {
+ ASSERT(n_this_frame);
+ f->n_vectors = n_this_frame;
+ vlib_put_frame_to_node (vm, ip6_icmp_router_solicitation_node.index, f);
+ }
+ return 0;
+}
+
+static uword
+ip6_icmp_neighbor_discovery_event_process (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ uword event_type;
+ ip6_icmp_neighbor_discovery_event_data_t * event_data;
+
+ /* init code here */
+
+ while (1)
+ {
+ vlib_process_wait_for_event_or_clock (vm, 1. /* seconds */);
+
+ event_data = vlib_process_get_event_data (vm, &event_type);
+
+ if(!event_data)
+ {
+ /* No events found: timer expired. */
+ /* process interface list and send RAs as appropriate, update timer info */
+ ip6_neighbor_process_timer_event (vm, node, frame);
+ }
+ else
+ {
+ switch (event_type) {
+
+ case ICMP6_ND_EVENT_INIT:
+ break;
+
+ case ~0:
+ break;
+
+ default:
+ ASSERT (0);
+ }
+
+ if (event_data)
+ _vec_len (event_data) = 0;
+ }
+ }
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (ip6_icmp_router_advertisement_node,static) = {
+ .function = icmp6_router_advertisement,
+ .name = "icmp6-router-advertisement",
+
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_icmp6_input_trace,
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+
+vlib_node_registration_t ip6_icmp_neighbor_discovery_event_node = {
+
+ .function = ip6_icmp_neighbor_discovery_event_process,
+ .name = "ip6-icmp-neighbor-discovery-event-process",
+ .type = VLIB_NODE_TYPE_PROCESS,
+};
+
+static uword
+icmp6_neighbor_solicitation (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{ return icmp6_neighbor_solicitation_or_advertisement (vm, node, frame, /* is_solicitation */ 1); }
+
+static uword
+icmp6_neighbor_advertisement (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{ return icmp6_neighbor_solicitation_or_advertisement (vm, node, frame, /* is_solicitation */ 0); }
+
+VLIB_REGISTER_NODE (ip6_icmp_neighbor_solicitation_node,static) = {
+ .function = icmp6_neighbor_solicitation,
+ .name = "icmp6-neighbor-solicitation",
+
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_icmp6_input_trace,
+
+ .n_next_nodes = ICMP6_NEIGHBOR_SOLICITATION_N_NEXT,
+ .next_nodes = {
+ [ICMP6_NEIGHBOR_SOLICITATION_NEXT_DROP] = "error-drop",
+ [ICMP6_NEIGHBOR_SOLICITATION_NEXT_REPLY] = "interface-output",
+ },
+};
+
+VLIB_REGISTER_NODE (ip6_icmp_neighbor_advertisement_node,static) = {
+ .function = icmp6_neighbor_advertisement,
+ .name = "icmp6-neighbor-advertisement",
+
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_icmp6_input_trace,
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+
+/* API support functions */
+int
+ip6_neighbor_ra_config(vlib_main_t * vm, u32 sw_if_index,
+ u8 surpress, u8 managed, u8 other,
+ u8 ll_option, u8 send_unicast, u8 cease,
+ u8 use_lifetime, u32 lifetime,
+ u32 initial_count, u32 initial_interval,
+ u32 max_interval, u32 min_interval,
+ u8 is_no)
+{
+ ip6_neighbor_main_t * nm = &ip6_neighbor_main;
+ int error;
+ u32 ri;
+
+ /* look up the radv_t information for this interface */
+ vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index, ~0);
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index];
+ error = (ri != ~0) ? 0 : VNET_API_ERROR_INVALID_SW_IF_INDEX;
+
+ if(!error)
+ {
+
+ ip6_radv_t * radv_info;
+ radv_info = pool_elt_at_index (nm->if_radv_pool, ri);
+
+ if((max_interval != 0) && (min_interval ==0))
+ min_interval = .75 * max_interval;
+
+ max_interval = (max_interval != 0) ? ( (is_no) ? DEF_MAX_RADV_INTERVAL : max_interval) : radv_info->max_radv_interval;
+ min_interval = (min_interval != 0) ? ( (is_no) ? DEF_MIN_RADV_INTERVAL : min_interval) : radv_info->min_radv_interval;
+ lifetime = (use_lifetime != 0) ? ( (is_no) ? DEF_DEF_RTR_LIFETIME : lifetime) : radv_info->adv_router_lifetime_in_sec;
+
+ if(lifetime)
+ {
+ if(lifetime > MAX_DEF_RTR_LIFETIME)
+ lifetime = MAX_DEF_RTR_LIFETIME;
+
+ if(lifetime <= max_interval)
+ return VNET_API_ERROR_INVALID_VALUE;
+ }
+
+ if(min_interval != 0)
+ {
+ if((min_interval > .75 * max_interval) ||
+ (min_interval < 3))
+ return VNET_API_ERROR_INVALID_VALUE;
+ }
+
+ if((initial_count > MAX_INITIAL_RTR_ADVERTISEMENTS) ||
+ (initial_interval > MAX_INITIAL_RTR_ADVERT_INTERVAL))
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ /*
+ if "flag" is set and is_no is true then restore default value else set value corresponding to "flag"
+ if "flag" is clear don't change corresponding value
+ */
+ radv_info->send_radv = (surpress != 0) ? ( (is_no != 0) ? 1 : 0 ) : radv_info->send_radv;
+ radv_info->adv_managed_flag = ( managed != 0) ? ( (is_no) ? 0 : 1) : radv_info->adv_managed_flag;
+ radv_info->adv_other_flag = (other != 0) ? ( (is_no) ? 0: 1) : radv_info->adv_other_flag;
+ radv_info->adv_link_layer_address = ( ll_option != 0) ? ( (is_no) ? 1 : 0) : radv_info->adv_link_layer_address;
+ radv_info->send_unicast = (send_unicast != 0) ? ( (is_no) ? 0 : 1) : radv_info->send_unicast;
+ radv_info->cease_radv = ( cease != 0) ? ( (is_no) ? 0 : 1) : radv_info->cease_radv;
+
+ radv_info->min_radv_interval = min_interval;
+ radv_info->max_radv_interval = max_interval;
+ radv_info->adv_router_lifetime_in_sec = lifetime;
+
+ radv_info->initial_adverts_count =
+ (initial_count != 0) ? ( (is_no) ? MAX_INITIAL_RTR_ADVERTISEMENTS : initial_count) : radv_info->initial_adverts_count ;
+ radv_info->initial_adverts_interval =
+ (initial_interval != 0) ? ( (is_no) ? MAX_INITIAL_RTR_ADVERT_INTERVAL : initial_interval) : radv_info->initial_adverts_interval;
+
+ /* restart */
+ if((cease != 0) && (is_no))
+ radv_info-> send_radv = 1;
+
+ radv_info->initial_adverts_sent = radv_info->initial_adverts_count -1;
+ radv_info->next_multicast_time = vlib_time_now (vm);
+ radv_info->last_multicast_time = vlib_time_now (vm);
+ radv_info->last_radv_time = 0;
+ }
+ return(error);
+}
+
+int
+ip6_neighbor_ra_prefix(vlib_main_t * vm, u32 sw_if_index,
+ ip6_address_t *prefix_addr, u8 prefix_len,
+ u8 use_default, u32 val_lifetime, u32 pref_lifetime,
+ u8 no_advertise, u8 off_link, u8 no_autoconfig, u8 no_onlink,
+ u8 is_no)
+{
+ ip6_neighbor_main_t * nm = &ip6_neighbor_main;
+ int error;
+
+ u32 ri;
+
+ /* look up the radv_t information for this interface */
+ vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index, ~0);
+
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index];
+
+ error = (ri != ~0) ? 0 : VNET_API_ERROR_INVALID_SW_IF_INDEX;
+
+ if(!error)
+ {
+ f64 now = vlib_time_now (vm);
+ ip6_radv_t * radv_info;
+ radv_info = pool_elt_at_index (nm->if_radv_pool, ri);
+
+ /* prefix info add, delete or update */
+ ip6_radv_prefix_t * prefix;
+
+ /* lookup prefix info for this address on this interface */
+ uword * p = mhash_get (&radv_info->address_to_prefix_index, prefix_addr);
+
+ prefix = p ? pool_elt_at_index (radv_info->adv_prefixes_pool, p[0]) : 0;
+
+ if(is_no)
+ {
+ /* delete */
+ if(!prefix)
+ return VNET_API_ERROR_INVALID_VALUE; /* invalid prefix */
+
+ if(prefix->prefix_len != prefix_len)
+ return VNET_API_ERROR_INVALID_VALUE_2;
+
+ /* FIXME - Should the DP do this or the CP ?*/
+ /* do specific delete processing here before returning */
+ /* try to remove from routing table */
+
+ mhash_unset (&radv_info->address_to_prefix_index, prefix_addr,/* old_value */ 0);
+ pool_put (radv_info->adv_prefixes_pool, prefix);
+
+ radv_info->initial_adverts_sent = radv_info->initial_adverts_count -1;
+ radv_info->next_multicast_time = vlib_time_now (vm);
+ radv_info->last_multicast_time = vlib_time_now (vm);
+ radv_info->last_radv_time = 0;
+ return(error);
+ }
+
+ /* adding or changing */
+ if(!prefix)
+ {
+ /* add */
+ u32 pi;
+ pool_get (radv_info->adv_prefixes_pool, prefix);
+ pi = prefix - radv_info->adv_prefixes_pool;
+ mhash_set (&radv_info->address_to_prefix_index, prefix_addr, pi, /* old_value */ 0);
+
+ memset(prefix, 0x0, sizeof(ip6_radv_prefix_t));
+
+ prefix->prefix_len = prefix_len;
+ memcpy(&prefix->prefix, prefix_addr, sizeof(ip6_address_t));
+
+ /* initialize default values */
+ prefix->adv_on_link_flag = 1; /* L bit set */
+ prefix->adv_autonomous_flag = 1; /* A bit set */
+ prefix->adv_valid_lifetime_in_secs = DEF_ADV_VALID_LIFETIME;
+ prefix->adv_pref_lifetime_in_secs = DEF_ADV_PREF_LIFETIME;
+ prefix->enabled = 1;
+ prefix->decrement_lifetime_flag = 1;
+ prefix->deprecated_prefix_flag = 1;
+
+ if(off_link == 0)
+ {
+ /* FIXME - Should the DP do this or the CP ?*/
+ /* insert prefix into routing table as a connected prefix */
+ }
+
+ if(use_default)
+ goto restart;
+ }
+ else
+ {
+
+ if(prefix->prefix_len != prefix_len)
+ return VNET_API_ERROR_INVALID_VALUE_2;
+
+ if(off_link != 0)
+ {
+ /* FIXME - Should the DP do this or the CP ?*/
+ /* remove from routing table if already there */
+ }
+ }
+
+ if((val_lifetime == ~0) || (pref_lifetime == ~0))
+ {
+ prefix->adv_valid_lifetime_in_secs = ~0;
+ prefix->adv_pref_lifetime_in_secs = ~0;
+ prefix->decrement_lifetime_flag = 0;
+ }
+ else
+ {
+ prefix->adv_valid_lifetime_in_secs = val_lifetime;;
+ prefix->adv_pref_lifetime_in_secs = pref_lifetime;
+ }
+
+ /* copy remaining */
+ prefix->enabled = !(no_advertise != 0);
+ prefix->adv_on_link_flag = !((off_link != 0) || (no_onlink != 0));
+ prefix->adv_autonomous_flag = !(no_autoconfig != 0);
+
+ restart:
+ /* restart */
+ /* fill in the expiration times */
+ prefix->valid_lifetime_expires = now + prefix->adv_valid_lifetime_in_secs;
+ prefix->pref_lifetime_expires = now + prefix->adv_pref_lifetime_in_secs;
+
+ radv_info->initial_adverts_sent = radv_info->initial_adverts_count -1;
+ radv_info->next_multicast_time = vlib_time_now (vm);
+ radv_info->last_multicast_time = vlib_time_now (vm);
+ radv_info->last_radv_time = 0;
+ }
+ return(error);
+}
+
+clib_error_t *
+ip6_neighbor_cmd(vlib_main_t * vm, unformat_input_t * main_input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ ip6_neighbor_main_t * nm = &ip6_neighbor_main;
+ clib_error_t * error = 0;
+ u8 is_no = 0;
+ u8 surpress = 0, managed = 0, other = 0;
+ u8 surpress_ll_option = 0, send_unicast = 0, cease= 0;
+ u8 use_lifetime = 0;
+ u32 sw_if_index, ra_lifetime = 0, ra_initial_count = 0, ra_initial_interval = 0;
+ u32 ra_max_interval = 0 , ra_min_interval = 0;
+
+ unformat_input_t _line_input, * line_input = &_line_input;
+ vnet_sw_interface_t * sw_if0;
+
+ int add_radv_info = 1;
+ __attribute__((unused)) ip6_radv_t * radv_info = 0;
+ ip6_address_t ip6_addr;
+ u32 addr_len;
+
+
+ /* Get a line of input. */
+ if (! unformat_user (main_input, unformat_line_input, line_input))
+ return 0;
+
+ /* get basic radv info for this interface */
+ if(unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+
+ if (unformat_user (line_input,
+ unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ u32 ri;
+ ethernet_interface_t * eth_if0 = 0;
+
+ sw_if0 = vnet_get_sup_sw_interface (vnm, sw_if_index);
+ if(sw_if0->type == VNET_SW_INTERFACE_TYPE_HARDWARE)
+ eth_if0 = ethernet_get_interface (&ethernet_main, sw_if0->hw_if_index);
+
+ if(!eth_if0)
+ {
+ error = clib_error_return (0, "Interface must be of ethernet type");
+ goto done;
+ }
+
+ /* look up the radv_t information for this interface */
+ vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index, ~0);
+
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index];
+
+ if(ri != ~0)
+ {
+ radv_info = pool_elt_at_index (nm->if_radv_pool, ri);
+ }
+ else
+ {
+ error = clib_error_return (0, "unknown interface %U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+ else
+ {
+ error = clib_error_return (0, "invalid interface name %U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ /* get the rest of the command */
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "no"))
+ is_no = 1;
+ else if(unformat (line_input, "prefix %U/%d",
+ unformat_ip6_address, &ip6_addr,
+ &addr_len))
+ {
+ add_radv_info = 0;
+ break;
+ }
+ else if (unformat (line_input, "ra-managed-config-flag"))
+ {
+ managed = 1;
+ break;
+ }
+ else if (unformat (line_input, "ra-other-config-flag"))
+ {
+ other = 1;
+ break;
+ }
+ else if (unformat (line_input, "ra-surpress"))
+ {
+ surpress = 1;
+ break;
+ }
+ else if (unformat (line_input, "ra-surpress-link-layer"))
+ {
+ surpress_ll_option = 1;
+ break;
+ }
+ else if (unformat (line_input, "ra-send-unicast"))
+ {
+ send_unicast = 1;
+ break;
+ }
+ else if (unformat (line_input, "ra-lifetime"))
+ {
+ if (!unformat (line_input, "%d", &ra_lifetime))
+ return(error = unformat_parse_error (line_input));
+ use_lifetime = 1;
+ break;
+ }
+ else if (unformat (line_input, "ra-initial"))
+ {
+ if (!unformat (line_input, "%d %d", &ra_initial_count, &ra_initial_interval))
+ return(error = unformat_parse_error (line_input));
+ break;
+ }
+ else if (unformat (line_input, "ra-interval"))
+ {
+ if (!unformat (line_input, "%d", &ra_max_interval))
+ return(error = unformat_parse_error (line_input));
+
+ if (!unformat (line_input, "%d", &ra_min_interval))
+ ra_min_interval = 0;
+ break;
+ }
+ else if(unformat (line_input, "ra-cease"))
+ {
+ cease = 1;
+ break;
+ }
+ else
+ return(unformat_parse_error (line_input));
+ }
+
+ if(add_radv_info)
+ {
+ ip6_neighbor_ra_config(vm, sw_if_index,
+ surpress, managed, other,
+ surpress_ll_option, send_unicast, cease,
+ use_lifetime, ra_lifetime,
+ ra_initial_count, ra_initial_interval,
+ ra_max_interval, ra_min_interval,
+ is_no);
+ }
+ else
+ {
+ u32 valid_lifetime_in_secs = 0;
+ u32 pref_lifetime_in_secs = 0;
+ u8 use_prefix_default_values = 0;
+ u8 no_advertise = 0;
+ u8 off_link= 0;
+ u8 no_autoconfig = 0;
+ u8 no_onlink= 0;
+
+ /* get the rest of the command */
+ while(unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if(unformat (line_input, "default"))
+ {
+ use_prefix_default_values = 1;
+ break;
+ }
+ else if(unformat (line_input, "infinite"))
+ {
+ valid_lifetime_in_secs = ~0;
+ pref_lifetime_in_secs = ~0;
+ break;
+ }
+ else if(unformat (line_input, "%d %d", &valid_lifetime_in_secs,
+ &pref_lifetime_in_secs))
+ break;
+ else
+ break;
+ }
+
+
+ /* get the rest of the command */
+ while (!use_prefix_default_values &&
+ unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if(unformat (line_input, "no-advertise"))
+ no_advertise = 1;
+ else if(unformat (line_input, "off-link"))
+ off_link = 1;
+ else if(unformat (line_input, "no-autoconfig"))
+ no_autoconfig = 1;
+ else if(unformat (line_input, "no-onlink"))
+ no_onlink = 1;
+ else
+ return(unformat_parse_error (line_input));
+ }
+
+ ip6_neighbor_ra_prefix(vm, sw_if_index,
+ &ip6_addr, addr_len,
+ use_prefix_default_values,
+ valid_lifetime_in_secs,
+ pref_lifetime_in_secs,
+ no_advertise,
+ off_link,
+ no_autoconfig,
+ no_onlink,
+ is_no);
+ }
+
+ unformat_free (line_input);
+
+ done:
+ return error;
+}
+
+static clib_error_t *
+show_ip6_interface_cmd (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ ip6_neighbor_main_t * nm = &ip6_neighbor_main;
+ clib_error_t * error = 0;
+ u32 sw_if_index;
+
+ sw_if_index = ~0;
+
+ if (unformat_user (input,
+ unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ u32 ri;
+
+ /* look up the radv_t information for this interface */
+ vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index, ~0);
+
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index];
+
+ if(ri != ~0)
+ {
+ ip_lookup_main_t * lm = &ip6_main.lookup_main;
+ ip6_radv_t * radv_info;
+ radv_info = pool_elt_at_index (nm->if_radv_pool, ri);
+
+ vlib_cli_output (vm, "%U is admin %s\n", format_vnet_sw_interface_name, vnm,
+ vnet_get_sw_interface (vnm, sw_if_index),
+ (vnet_sw_interface_is_admin_up (vnm, sw_if_index) ? "up" : "down"));
+
+ u32 ai;
+ u32 *global_scope = 0,i;
+ ip_interface_address_t * a;
+
+ vec_validate_init_empty (lm->if_address_pool_index_by_sw_if_index, sw_if_index, ~0);
+ ai = lm->if_address_pool_index_by_sw_if_index[sw_if_index];
+
+ while (ai != (u32)~0)
+ {
+ a = pool_elt_at_index(lm->if_address_pool, ai);
+ ip6_address_t * address = ip_interface_address_get_address (lm, a);
+
+ if( ip6_address_is_link_local_unicast (address))
+ vlib_cli_output (vm, "\tIPv6 is enabled, link-local address is %U\n", format_ip6_address,
+ address);
+
+ if((address->as_u8[0] & 0xe0) == 0x20)
+ vec_add1 (global_scope, ai);
+
+ ai = a->next_this_sw_interface;
+ }
+
+ vlib_cli_output (vm, "\tGlobal unicast address(es):\n");
+ for (i = 0; i < vec_len (global_scope); i++)
+ {
+ a = pool_elt_at_index(lm->if_address_pool, global_scope[i]);
+ ip6_address_t * address = ip_interface_address_get_address (lm, a);
+ ip6_address_t mask, subnet;
+
+ subnet = *address;
+ ip6_address_mask_from_width(&mask, a->address_length);
+ ip6_address_mask(&subnet, &mask);
+
+ vlib_cli_output (vm, "\t\t%U, subnet is %U/%d",
+ format_ip6_address, address,
+ format_ip6_address,&subnet,
+ a->address_length);
+ }
+ vec_free (global_scope);
+ vlib_cli_output (vm, "\tJoined group address(es):\n");
+ ip6_mldp_group_t *m;
+ pool_foreach (m, radv_info->mldp_group_pool, ({
+ vlib_cli_output (vm, "\t\t%U\n", format_ip6_address, &m->mcast_address);
+ }));
+
+ vlib_cli_output (vm, "\tAdvertised Prefixes:\n");
+ ip6_radv_prefix_t * p;
+ pool_foreach (p, radv_info->adv_prefixes_pool, ({
+ vlib_cli_output (vm, "\t\tprefix %U, length %d\n",
+ format_ip6_address, &p->prefix, p->prefix_len);
+ }));
+
+ vlib_cli_output (vm, "\tMTU is %d\n", radv_info->adv_link_mtu);
+ vlib_cli_output (vm, "\tICMP error messages are unlimited\n");
+ vlib_cli_output (vm, "\tICMP redirects are disabled\n");
+ vlib_cli_output (vm, "\tICMP unreachables are not sent\n");
+ vlib_cli_output (vm, "\tND DAD is disabled\n");
+ //vlib_cli_output (vm, "\tND reachable time is %d milliseconds\n",);
+ vlib_cli_output (vm, "\tND advertised reachable time is %d\n",
+ radv_info->adv_neighbor_reachable_time_in_msec);
+ vlib_cli_output (vm, "\tND advertised retransmit interval is %d (msec)\n",
+ radv_info->adv_time_in_msec_between_retransmitted_neighbor_solicitations);
+
+ u32 ra_interval = radv_info->max_radv_interval;
+ u32 ra_interval_min = radv_info->min_radv_interval;
+ vlib_cli_output (vm, "\tND router advertisements are sent every %d seconds (min interval is %d)\n",
+ ra_interval, ra_interval_min);
+ vlib_cli_output (vm, "\tND router advertisements live for %d seconds\n",
+ radv_info->adv_router_lifetime_in_sec);
+ vlib_cli_output (vm, "\tHosts %s stateless autoconfig for addresses\n",
+ (radv_info->adv_managed_flag) ? "use" :" don't use");
+ vlib_cli_output (vm, "\tND router advertisements sent %d\n", radv_info->n_advertisements_sent);
+ vlib_cli_output (vm, "\tND router solicitations received %d\n", radv_info->n_solicitations_rcvd);
+ vlib_cli_output (vm, "\tND router solicitations dropped %d\n", radv_info->n_solicitations_dropped);
+ }
+ else
+ {
+ error = clib_error_return (0, "Ipv6 not enabled on interface",
+ format_unformat_error, input);
+
+ }
+ }
+ return error;
+}
+
+VLIB_CLI_COMMAND (show_ip6_interface_command, static) = {
+ .path = "show ip6 interface",
+ .function = show_ip6_interface_cmd,
+ .short_help = "Show ip6 interface <iface name>",
+};
+
+clib_error_t *
+disable_ip6_interface(vlib_main_t * vm,
+ u32 sw_if_index)
+{
+ clib_error_t * error = 0;
+ ip6_neighbor_main_t * nm = &ip6_neighbor_main;
+ u32 ri;
+
+ /* look up the radv_t information for this interface */
+ vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index, ~0);
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index];
+
+ /* if not created - do nothing */
+ if(ri != ~0)
+ {
+ vnet_main_t * vnm = vnet_get_main();
+ ip6_radv_t * radv_info;
+
+ radv_info = pool_elt_at_index (nm->if_radv_pool, ri);
+
+ /* check radv_info ref count for other ip6 addresses on this interface */
+ if(radv_info->ref_count == 0 )
+ {
+ /* essentially "disables" ipv6 on this interface */
+ error = ip6_add_del_interface_address (vm, sw_if_index,
+ &radv_info->link_local_address,
+ radv_info->link_local_prefix_len,
+ 1 /* is_del */);
+
+ ip6_neighbor_sw_interface_add_del (vnm, sw_if_index, 0/* is_add */);
+ }
+ }
+ return error;
+}
+
+int
+ip6_interface_enabled(vlib_main_t * vm,
+ u32 sw_if_index)
+{
+ ip6_neighbor_main_t * nm = &ip6_neighbor_main;
+ u32 ri = ~0;
+
+ /* look up the radv_t information for this interface */
+ vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index, ~0);
+
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index];
+
+ return ri != ~0;
+}
+
+clib_error_t *
+enable_ip6_interface(vlib_main_t * vm,
+ u32 sw_if_index)
+{
+ clib_error_t * error = 0;
+ ip6_neighbor_main_t * nm = &ip6_neighbor_main;
+ u32 ri;
+ int is_add = 1;
+
+ /* look up the radv_t information for this interface */
+ vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index, ~0);
+
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index];
+
+ /* if not created yet */
+ if(ri == ~0)
+ {
+ vnet_main_t * vnm = vnet_get_main();
+ vnet_sw_interface_t * sw_if0;
+
+ sw_if0 = vnet_get_sup_sw_interface (vnm, sw_if_index);
+ if(sw_if0->type == VNET_SW_INTERFACE_TYPE_HARDWARE)
+ {
+ ethernet_interface_t * eth_if0;
+
+ eth_if0 = ethernet_get_interface (&ethernet_main, sw_if0->hw_if_index);
+ if(eth_if0)
+ {
+ /* create radv_info. for this interface. This holds all the info needed for router adverts */
+ ri = ip6_neighbor_sw_interface_add_del (vnm, sw_if_index, is_add);
+
+ if(ri != ~0)
+ {
+ ip6_radv_t * radv_info;
+ ip6_address_t link_local_address;
+
+ radv_info = pool_elt_at_index (nm->if_radv_pool, ri);
+
+ ip6_link_local_address_from_ethernet_mac_address (&link_local_address,
+ eth_if0->address);
+
+ sw_if0 = vnet_get_sw_interface (vnm, sw_if_index);
+ if(sw_if0->type == VNET_SW_INTERFACE_TYPE_SUB)
+ {
+ /* make up an interface id */
+ md5_context_t m;
+ u8 digest[16];
+
+ link_local_address.as_u64[0] = radv_info->randomizer;
+
+ md5_init (&m);
+ md5_add (&m, &link_local_address, 16);
+ md5_finish (&m, digest);
+
+ memcpy(&link_local_address, digest, 16);
+
+ radv_info->randomizer = link_local_address.as_u64[0];
+
+ link_local_address.as_u64[0] = clib_host_to_net_u64 (0xFE80000000000000ULL);
+ /* clear u bit */
+ link_local_address.as_u8[8] &= 0xfd;
+ }
+
+ /* essentially "enables" ipv6 on this interface */
+ error = ip6_add_del_interface_address (vm, sw_if_index,
+ &link_local_address, 64 /* address width */,
+ 0 /* is_del */);
+
+ if(error)
+ ip6_neighbor_sw_interface_add_del (vnm, sw_if_index, !is_add);
+ else
+ {
+ radv_info->link_local_address = link_local_address;
+ radv_info->link_local_prefix_len = 64;
+ }
+ }
+ }
+ }
+ }
+ return error;
+}
+
+static clib_error_t *
+enable_ip6_interface_cmd (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ clib_error_t * error = 0;
+ u32 sw_if_index;
+
+ sw_if_index = ~0;
+
+ if (unformat_user (input,
+ unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ enable_ip6_interface(vm, sw_if_index);
+ }
+ else
+ {
+ error = clib_error_return (0, "unknown interface\n'",
+ format_unformat_error, input);
+
+ }
+ return error;
+}
+
+VLIB_CLI_COMMAND (enable_ip6_interface_command, static) = {
+ .path = "enable ip6 interface",
+ .function = enable_ip6_interface_cmd,
+ .short_help = "enable ip6 interface <iface name>",
+};
+
+static clib_error_t *
+disable_ip6_interface_cmd (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ clib_error_t * error = 0;
+ u32 sw_if_index;
+
+ sw_if_index = ~0;
+
+ if (unformat_user (input,
+ unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error = disable_ip6_interface(vm, sw_if_index);
+ }
+ else
+ {
+ error = clib_error_return (0, "unknown interface\n'",
+ format_unformat_error, input);
+
+ }
+ return error;
+}
+
+VLIB_CLI_COMMAND (disable_ip6_interface_command, static) = {
+ .path = "disable ip6 interface",
+ .function = disable_ip6_interface_cmd,
+ .short_help = "disable ip6 interface <iface name>",
+};
+
+VLIB_CLI_COMMAND (ip6_nd_command, static) = {
+ .path = "ip6 nd",
+ .short_help = "Set ip6 neighbor discovery parameters",
+ .function = ip6_neighbor_cmd,
+};
+
+clib_error_t *
+set_ip6_link_local_address(vlib_main_t * vm,
+ u32 sw_if_index,
+ ip6_address_t *address,
+ u8 address_length)
+{
+ clib_error_t * error = 0;
+ ip6_neighbor_main_t * nm = &ip6_neighbor_main;
+ u32 ri;
+ ip6_radv_t * radv_info;
+ vnet_main_t * vnm = vnet_get_main();
+
+ if( !ip6_address_is_link_local_unicast (address))
+ {
+ vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_LINK_LOCAL;
+ return(error = clib_error_return (0, "address not link-local",
+ format_unformat_error));
+ }
+
+ /* call enable ipv6 */
+ enable_ip6_interface(vm, sw_if_index);
+
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index];
+
+ if(ri != ~0)
+ {
+ radv_info = pool_elt_at_index (nm->if_radv_pool, ri);
+
+ /* save if link local address (overwrite default) */
+
+ /* delete the old one */
+ error = ip6_add_del_interface_address (vm, sw_if_index,
+ &radv_info->link_local_address,
+ radv_info->link_local_prefix_len /* address width */,
+ 1 /* is_del */);
+
+ if(!error)
+ {
+ /* add the new one */
+ error = ip6_add_del_interface_address (vm, sw_if_index,
+ address ,
+ address_length /* address width */,
+ 0/* is_del */);
+
+ if(!error)
+ {
+ radv_info->link_local_address = *address;
+ radv_info->link_local_prefix_len = address_length;
+ }
+ }
+ }
+ else
+ {
+ vnm->api_errno = VNET_API_ERROR_IP6_NOT_ENABLED;
+ error = clib_error_return (0, "ip6 not enabled for interface",
+ format_unformat_error);
+ }
+ return error;
+}
+
+clib_error_t *
+set_ip6_link_local_address_cmd (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ clib_error_t * error = 0;
+ u32 sw_if_index;
+ ip6_address_t ip6_addr;
+ u32 addr_len = 0;
+
+ if (unformat_user (input,
+ unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ /* get the rest of the command */
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if(unformat (input, "%U/%d",
+ unformat_ip6_address, &ip6_addr,
+ &addr_len))
+ break;
+ else
+ return(unformat_parse_error (input));
+ }
+ }
+ error = set_ip6_link_local_address(vm,
+ sw_if_index,
+ &ip6_addr,
+ addr_len);
+ return error;
+}
+
+VLIB_CLI_COMMAND (set_ip6_link_local_address_command, static) = {
+ .path = "set ip6 link-local address",
+ .short_help = "Set ip6 interface link-local address <intfc> <address.>",
+ .function = set_ip6_link_local_address_cmd,
+};
+
+/* callback when an interface address is added or deleted */
+static void
+ip6_neighbor_add_del_interface_address (ip6_main_t * im,
+ uword opaque,
+ u32 sw_if_index,
+ ip6_address_t * address,
+ u32 address_length,
+ u32 if_address_index,
+ u32 is_delete)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ ip6_neighbor_main_t * nm = &ip6_neighbor_main;
+ u32 ri;
+ vlib_main_t * vm = vnm->vlib_main;
+ ip6_radv_t * radv_info;
+ ip6_address_t a;
+ ip6_mldp_group_t *mcast_group_info;
+
+ /* create solicited node multicast address for this interface adddress */
+ ip6_set_solicited_node_multicast_address (&a, 0);
+
+ a.as_u8[0xd] = address->as_u8[0xd];
+ a.as_u8[0xe] = address->as_u8[0xe];
+ a.as_u8[0xf] = address->as_u8[0xf];
+
+ if(!is_delete)
+ {
+ /* try to create radv_info - does nothing if ipv6 already enabled */
+ enable_ip6_interface(vm, sw_if_index);
+
+ /* look up the radv_t information for this interface */
+ vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index, ~0);
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index];
+ if(ri != ~0)
+ {
+ /* get radv_info */
+ radv_info = pool_elt_at_index (nm->if_radv_pool, ri);
+
+ /* add address */
+ if( !ip6_address_is_link_local_unicast (address))
+ radv_info->ref_count++;
+
+ /* lookup prefix info for this address on this interface */
+ uword * p = mhash_get (&radv_info->address_to_mldp_index, &a);
+ mcast_group_info = p ? pool_elt_at_index (radv_info->mldp_group_pool, p[0]) : 0;
+
+ /* add -solicted node multicast address */
+ if(!mcast_group_info)
+ {
+ /* add */
+ u32 mi;
+ pool_get (radv_info->mldp_group_pool, mcast_group_info);
+
+ mi = mcast_group_info - radv_info->mldp_group_pool;
+ mhash_set (&radv_info->address_to_mldp_index, &a, mi, /* old_value */ 0);
+
+ mcast_group_info->type = 4;
+ mcast_group_info->mcast_source_address_pool = 0;
+ mcast_group_info->num_sources = 0;
+ memcpy(&mcast_group_info->mcast_address, &a, sizeof(ip6_address_t));
+ }
+ }
+ }
+ else
+ {
+
+ /* delete */
+ /* look up the radv_t information for this interface */
+ vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index, ~0);
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index];
+ if(ri != ~0)
+ {
+ /* get radv_info */
+ radv_info = pool_elt_at_index (nm->if_radv_pool, ri);
+
+ /* lookup prefix info for this address on this interface */
+ uword * p = mhash_get (&radv_info->address_to_mldp_index, &a);
+ mcast_group_info = p ? pool_elt_at_index (radv_info->mldp_group_pool, p[0]) : 0;
+
+ if(mcast_group_info)
+ {
+ mhash_unset (&radv_info->address_to_mldp_index, &a,/* old_value */ 0);
+ pool_put (radv_info->mldp_group_pool, mcast_group_info);
+ }
+
+ /* if interface up send MLDP "report" */
+ radv_info->all_routers_mcast = 0;
+
+ /* add address */
+ if( !ip6_address_is_link_local_unicast (address))
+ radv_info->ref_count--;
+ }
+ }
+}
+
+clib_error_t *ip6_set_neighbor_limit (u32 neighbor_limit)
+{
+ ip6_neighbor_main_t * nm = &ip6_neighbor_main;
+
+ nm->limit_neighbor_cache_size = neighbor_limit;
+ return 0;
+}
+
+static clib_error_t * ip6_neighbor_init (vlib_main_t * vm)
+{
+ ip6_neighbor_main_t * nm = &ip6_neighbor_main;
+ ip6_main_t * im = &ip6_main;
+
+ mhash_init (&nm->neighbor_index_by_key,
+ /* value size */ sizeof (uword),
+ /* key size */ sizeof (ip6_neighbor_key_t));
+
+ icmp6_register_type (vm, ICMP6_neighbor_solicitation, ip6_icmp_neighbor_solicitation_node.index);
+ icmp6_register_type (vm, ICMP6_neighbor_advertisement, ip6_icmp_neighbor_advertisement_node.index);
+ icmp6_register_type (vm, ICMP6_router_solicitation, ip6_icmp_router_solicitation_node.index);
+ icmp6_register_type (vm, ICMP6_router_advertisement, ip6_icmp_router_advertisement_node.index);
+
+ /* handler node for ip6 neighbor discovery events and timers */
+ vlib_register_node (vm, &ip6_icmp_neighbor_discovery_event_node);
+
+ /* add call backs */
+ ip6_add_del_interface_address_callback_t cb;
+ memset(&cb, 0x0, sizeof(ip6_add_del_interface_address_callback_t));
+
+ /* when an interface address changes... */
+ cb.function = ip6_neighbor_add_del_interface_address;
+ cb.function_opaque = 0;
+ vec_add1 (im->add_del_interface_address_callbacks, cb);
+
+ mhash_init (&nm->pending_resolutions_by_address,
+ /* value size */ sizeof (uword),
+ /* key size */ sizeof (ip6_address_t));
+
+ /* default, configurable */
+ nm->limit_neighbor_cache_size = 50000;
+
+#if 0
+ /* $$$$ Hack fix for today */
+ vec_validate_init_empty
+ (im->discover_neighbor_next_index_by_hw_if_index, 32, 0 /* drop */);
+#endif
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (ip6_neighbor_init);
+
+
+void vnet_register_ip6_neighbor_resolution_event (vnet_main_t * vnm,
+ void * address_arg,
+ uword node_index,
+ uword type_opaque,
+ uword data)
+{
+ ip6_neighbor_main_t * nm = &ip6_neighbor_main;
+ ip6_address_t * address = address_arg;
+ uword * p;
+ pending_resolution_t * pr;
+
+ pool_get (nm->pending_resolutions, pr);
+
+ pr->next_index = ~0;
+ pr->node_index = node_index;
+ pr->type_opaque = type_opaque;
+ pr->data = data;
+
+ p = mhash_get (&nm->pending_resolutions_by_address, address);
+ if (p)
+ {
+ /* Insert new resolution at the head of the list */
+ pr->next_index = p[0];
+ mhash_unset (&nm->pending_resolutions_by_address, address, 0);
+ }
+
+ mhash_set (&nm->pending_resolutions_by_address, address,
+ pr - nm->pending_resolutions, 0 /* old value */);
+}
+
diff --git a/vnet/vnet/ip/ip6_packet.h b/vnet/vnet/ip/ip6_packet.h
new file mode 100644
index 00000000000..9a52cf72586
--- /dev/null
+++ b/vnet/vnet/ip/ip6_packet.h
@@ -0,0 +1,378 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip6/packet.h: ip6 packet format
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_ip6_packet_h
+#define included_ip6_packet_h
+
+typedef union {
+ u8 as_u8[16];
+ u16 as_u16[8];
+ u32 as_u32[4];
+ u64 as_u64[2];
+ uword as_uword[16 / sizeof (uword)];
+} ip6_address_t;
+
+/* Packed so that the mhash key doesn't include uninitialized pad bytes */
+typedef CLIB_PACKED (struct {
+ /* IP address must be first for ip_interface_address_get_address() to work */
+ ip6_address_t ip6_addr;
+ u32 fib_index;
+}) ip6_address_fib_t;
+
+always_inline void
+ip6_addr_fib_init (ip6_address_fib_t * addr_fib, ip6_address_t * address,
+ u32 fib_index)
+{
+ addr_fib->ip6_addr.as_u64[0] = address->as_u64[0];
+ addr_fib->ip6_addr.as_u64[1] = address->as_u64[1];
+ addr_fib->fib_index = fib_index;
+}
+
+/* Special addresses:
+ unspecified ::/128
+ loopback ::1/128
+ global unicast 2000::/3
+ unique local unicast fc00::/7
+ link local unicast fe80::/10
+ multicast ff00::/8
+ ietf reserved everything else. */
+
+#define foreach_ip6_multicast_address_scope \
+ _ (loopback, 0x1) \
+ _ (link_local, 0x2) \
+ _ (admin_local, 0x4) \
+ _ (site_local, 0x5) \
+ _ (organization_local, 0x8) \
+ _ (global, 0xe)
+
+#define foreach_ip6_multicast_link_local_group_id \
+ _ (all_hosts, 0x1) \
+ _ (all_routers, 0x2) \
+ _ (rip_routers, 0x9) \
+ _ (eigrp_routers, 0xa) \
+ _ (pim_routers, 0xd) \
+ _ (mldv2_routers, 0x16)
+
+typedef enum {
+#define _(f,n) IP6_MULTICAST_SCOPE_##f = n,
+ foreach_ip6_multicast_address_scope
+#undef _
+} ip6_multicast_address_scope_t;
+
+typedef enum {
+#define _(f,n) IP6_MULTICAST_GROUP_ID_##f = n,
+ foreach_ip6_multicast_link_local_group_id
+#undef _
+} ip6_multicast_link_local_group_id_t;
+
+always_inline uword
+ip6_address_is_multicast (ip6_address_t * a)
+{ return a->as_u8[0] == 0xff; }
+
+always_inline void
+ip6_set_reserved_multicast_address (ip6_address_t * a,
+ ip6_multicast_address_scope_t scope,
+ u16 id)
+{
+ a->as_u64[0] = a->as_u64[1] = 0;
+ a->as_u16[0] = clib_host_to_net_u16 (0xff00 | scope);
+ a->as_u16[7] = clib_host_to_net_u16 (id);
+}
+
+always_inline void
+ip6_set_solicited_node_multicast_address (ip6_address_t * a, u32 id)
+{
+ /* 0xff02::1:ffXX:XXXX. */
+ a->as_u64[0] = a->as_u64[1] = 0;
+ a->as_u16[0] = clib_host_to_net_u16 (0xff02);
+ a->as_u8[11] = 1;
+ ASSERT ((id >> 24) == 0);
+ id |= 0xff << 24;
+ a->as_u32[3] = clib_host_to_net_u32 (id);
+}
+
+always_inline void
+ip6_link_local_address_from_ethernet_address (ip6_address_t * a, u8 * ethernet_address)
+{
+ a->as_u64[0] = a->as_u64[1] = 0;
+ a->as_u16[0] = clib_host_to_net_u16 (0xfe80);
+ /* Always set locally administered bit (6). */
+ a->as_u8[0x8] = ethernet_address[0] | (1 << 6);
+ a->as_u8[0x9] = ethernet_address[1];
+ a->as_u8[0xa] = ethernet_address[2];
+ a->as_u8[0xb] = 0xff;
+ a->as_u8[0xc] = 0xfe;
+ a->as_u8[0xd] = ethernet_address[3];
+ a->as_u8[0xe] = ethernet_address[4];
+ a->as_u8[0xf] = ethernet_address[5];
+}
+
+always_inline void
+ip6_multicast_ethernet_address (u8 * ethernet_address, u32 group_id)
+{
+ ethernet_address[0] = 0x33;
+ ethernet_address[1] = 0x33;
+ ethernet_address[2] = ((group_id >> 24) & 0xff);
+ ethernet_address[3] = ((group_id >> 16) & 0xff);
+ ethernet_address[4] = ((group_id >> 8) & 0xff);
+ ethernet_address[5] = ((group_id >> 0) & 0xff);
+}
+
+always_inline uword
+ip6_address_is_equal (ip6_address_t * a, ip6_address_t * b)
+{
+ int i;
+ for (i = 0; i < ARRAY_LEN (a->as_uword); i++)
+ if (a->as_uword[i] != b->as_uword[i])
+ return 0;
+ return 1;
+}
+
+always_inline uword
+ip6_address_is_equal_masked (ip6_address_t * a, ip6_address_t * b,
+ ip6_address_t * mask)
+{
+ int i;
+ for (i = 0; i < ARRAY_LEN (a->as_uword); i++)
+ {
+ uword a_masked, b_masked;
+ a_masked = a->as_uword[i] & mask->as_uword[i];
+ b_masked = b->as_uword[i] & mask->as_uword[i];
+
+ if (a_masked != b_masked)
+ return 0;
+ }
+ return 1;
+}
+
+always_inline void
+ip6_address_mask (ip6_address_t * a, ip6_address_t * mask)
+{
+ int i;
+ for (i = 0; i < ARRAY_LEN (a->as_uword); i++)
+ a->as_uword[i] &= mask->as_uword[i];
+}
+
+always_inline void
+ip6_address_set_zero (ip6_address_t * a)
+{
+ int i;
+ for (i = 0; i < ARRAY_LEN (a->as_uword); i++)
+ a->as_uword[i] = 0;
+}
+
+always_inline void
+ip6_address_mask_from_width (ip6_address_t * a, u32 width)
+{
+ int i, byte, bit, bitnum;
+ ASSERT (width <= 128);
+ memset (a, 0, sizeof (a[0]));
+ for (i = 0; i < width; i++)
+ {
+ bitnum = (7 - (i & 7));
+ byte = i / 8;
+ bit = 1<<bitnum;
+ a->as_u8[byte] |= bit;
+ }
+}
+
+always_inline uword
+ip6_address_is_zero (ip6_address_t * a)
+{
+ int i;
+ for (i = 0; i < ARRAY_LEN (a->as_uword); i++)
+ if (a->as_uword[i] != 0)
+ return 0;
+ return 1;
+}
+
+/* Check for unspecified address ::0 */
+always_inline uword
+ip6_address_is_unspecified (ip6_address_t * a)
+{ return ip6_address_is_zero (a); }
+
+/* Check for loopback address ::1 */
+always_inline uword
+ip6_address_is_loopback (ip6_address_t * a)
+{
+ uword is_loopback;
+ u8 save = a->as_u8[15];
+ a->as_u8[15] = save ^ 1;
+ is_loopback = ip6_address_is_zero (a);
+ a->as_u8[15] = save;
+ return is_loopback;
+}
+
+/* Check for link local unicast fe80::/10. */
+always_inline uword
+ip6_address_is_link_local_unicast (ip6_address_t * a)
+{ return a->as_u8[0] == 0xfe && (a->as_u8[1] & 0xc0) == 0x80; }
+
+/* Check for unique local unicast fc00::/7. */
+always_inline uword
+ip6_address_is_local_unicast (ip6_address_t * a)
+{ return (a->as_u8[0] & 0xfe) == 0xfc; }
+
+/* Check for solicited node multicast 0xff02::1:ff00:0/104 */
+always_inline uword
+ip6_is_solicited_node_multicast_address (ip6_address_t * a)
+{
+ return (a->as_u32[0] == clib_host_to_net_u32 (0xff020000)
+ && a->as_u32[1] == 0
+ && a->as_u32[2] == clib_host_to_net_u32 (1)
+ && a->as_u8[12] == 0xff);
+}
+
+typedef struct {
+ /* 4 bit version, 8 bit traffic class and 20 bit flow label. */
+ u32 ip_version_traffic_class_and_flow_label;
+
+ /* Total packet length not including this header (but including
+ any extension headers if present). */
+ u16 payload_length;
+
+ /* Protocol for next header. */
+ u8 protocol;
+
+ /* Hop limit decremented by router at each hop. */
+ u8 hop_limit;
+
+ /* Source and destination address. */
+ ip6_address_t src_address, dst_address;
+} ip6_header_t;
+
+always_inline void *
+ip6_next_header (ip6_header_t * i)
+{ return (void *) (i + 1); }
+
+always_inline void
+ip6_tcp_reply_x1 (ip6_header_t * ip0, tcp_header_t * tcp0)
+{
+ {
+ ip6_address_t src0, dst0;
+
+ src0 = ip0->src_address;
+ dst0 = ip0->dst_address;
+ ip0->src_address = dst0;
+ ip0->dst_address = src0;
+ }
+
+ {
+ u16 src0, dst0;
+
+ src0 = tcp0->ports.src;
+ dst0 = tcp0->ports.dst;
+ tcp0->ports.src = dst0;
+ tcp0->ports.dst = src0;
+ }
+}
+
+always_inline void
+ip6_tcp_reply_x2 (ip6_header_t * ip0, ip6_header_t * ip1,
+ tcp_header_t * tcp0, tcp_header_t * tcp1)
+{
+ {
+ ip6_address_t src0, dst0, src1, dst1;
+
+ src0 = ip0->src_address;
+ src1 = ip1->src_address;
+ dst0 = ip0->dst_address;
+ dst1 = ip1->dst_address;
+ ip0->src_address = dst0;
+ ip1->src_address = dst1;
+ ip0->dst_address = src0;
+ ip1->dst_address = src1;
+ }
+
+ {
+ u16 src0, dst0, src1, dst1;
+
+ src0 = tcp0->ports.src;
+ src1 = tcp1->ports.src;
+ dst0 = tcp0->ports.dst;
+ dst1 = tcp1->ports.dst;
+ tcp0->ports.src = dst0;
+ tcp1->ports.src = dst1;
+ tcp0->ports.dst = src0;
+ tcp1->ports.dst = src1;
+ }
+}
+
+
+typedef CLIB_PACKED (struct {
+ u8 data;
+}) ip6_pad1_option_t;
+
+typedef CLIB_PACKED (struct {
+ u8 type;
+ u8 len;
+ u8 data[0];
+}) ip6_padN_option_t;
+
+typedef CLIB_PACKED (struct {
+#define IP6_MLDP_ALERT_TYPE 0x5
+ u8 type;
+ u8 len;
+ u16 value;
+}) ip6_router_alert_option_t;
+
+typedef CLIB_PACKED (struct {
+ u8 next_hdr;
+ /* Length of this header plus option data in 8 byte units. */
+ u8 n_data_u64s;
+ u8 data[0];
+}) ip6_hop_by_hop_ext_t;
+
+typedef CLIB_PACKED (struct {
+ u8 next_hdr;
+ u8 rsv;
+ u16 fragment_offset_and_more;
+ u32 identification;
+}) ip6_frag_hdr_t;
+
+#define ip6_frag_hdr_offset(hdr) \
+ (clib_net_to_host_u16((hdr)->fragment_offset_and_more) >> 3)
+
+#define ip6_frag_hdr_more(hdr) \
+ (clib_net_to_host_u16((hdr)->fragment_offset_and_more) & 0x1)
+
+#define ip6_frag_hdr_offset_and_more(offset, more) \
+ clib_host_to_net_u16(((offset) << 3) + !!(more))
+
+#endif /* included_ip6_packet_h */
diff --git a/vnet/vnet/ip/ip6_pg.c b/vnet/vnet/ip/ip6_pg.c
new file mode 100644
index 00000000000..2c3852765d4
--- /dev/null
+++ b/vnet/vnet/ip/ip6_pg.c
@@ -0,0 +1,222 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip6_pg: IP v4 packet-generator interface
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/pg/pg.h>
+
+static void
+ip6_pg_edit_function (pg_main_t * pg,
+ pg_stream_t * s,
+ pg_edit_group_t * g,
+ u32 * packets,
+ u32 n_packets)
+{
+ vlib_main_t * vm = pg->vlib_main;
+ u32 ip_header_offset = g->start_byte_offset;
+
+ while (n_packets >= 2)
+ {
+ u32 pi0, pi1;
+ vlib_buffer_t * p0, * p1;
+ ip6_header_t * ip0, * ip1;
+
+ pi0 = packets[0];
+ pi1 = packets[1];
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+ n_packets -= 2;
+ packets += 2;
+
+ ip0 = (void *) (p0->data + ip_header_offset);
+ ip1 = (void *) (p1->data + ip_header_offset);
+
+ ip0->payload_length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, p0) - ip_header_offset - sizeof (ip0[0]));
+ ip1->payload_length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, p1) - ip_header_offset - sizeof (ip1[0]));
+ }
+
+ while (n_packets >= 1)
+ {
+ u32 pi0;
+ vlib_buffer_t * p0;
+ ip6_header_t * ip0;
+
+ pi0 = packets[0];
+ p0 = vlib_get_buffer (vm, pi0);
+ n_packets -= 1;
+ packets += 1;
+
+ ip0 = (void *) (p0->data + ip_header_offset);
+
+ ip0->payload_length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, p0) - ip_header_offset - sizeof (ip0[0]));
+ }
+}
+
+typedef struct {
+ pg_edit_t ip_version;
+ pg_edit_t traffic_class;
+ pg_edit_t flow_label;
+ pg_edit_t payload_length;
+ pg_edit_t protocol;
+ pg_edit_t hop_limit;
+ pg_edit_t src_address, dst_address;
+} pg_ip6_header_t;
+
+static inline void
+pg_ip6_header_init (pg_ip6_header_t * p)
+{
+ /* Initialize fields that are not bit fields in the IP header. */
+#define _(f) pg_edit_init (&p->f, ip6_header_t, f);
+ _ (payload_length);
+ _ (hop_limit);
+ _ (protocol);
+ _ (src_address);
+ _ (dst_address);
+#undef _
+
+ /* Initialize bit fields. */
+ pg_edit_init_bitfield (&p->ip_version, ip6_header_t,
+ ip_version_traffic_class_and_flow_label,
+ 28, 4);
+ pg_edit_init_bitfield (&p->traffic_class, ip6_header_t,
+ ip_version_traffic_class_and_flow_label,
+ 20, 8);
+ pg_edit_init_bitfield (&p->flow_label, ip6_header_t,
+ ip_version_traffic_class_and_flow_label,
+ 0, 20);
+}
+
+uword
+unformat_pg_ip6_header (unformat_input_t * input, va_list * args)
+{
+ pg_stream_t * s = va_arg (*args, pg_stream_t *);
+ pg_ip6_header_t * p;
+ u32 group_index;
+
+ p = pg_create_edit_group (s, sizeof (p[0]), sizeof (ip6_header_t),
+ &group_index);
+ pg_ip6_header_init (p);
+
+ /* Defaults. */
+ pg_edit_set_fixed (&p->ip_version, 6);
+ pg_edit_set_fixed (&p->traffic_class, 0);
+ pg_edit_set_fixed (&p->flow_label, 0);
+ pg_edit_set_fixed (&p->hop_limit, 64);
+
+ p->payload_length.type = PG_EDIT_UNSPECIFIED;
+
+ if (! unformat (input, "%U: %U -> %U",
+ unformat_pg_edit,
+ unformat_ip_protocol, &p->protocol,
+ unformat_pg_edit,
+ unformat_ip6_address, &p->src_address,
+ unformat_pg_edit,
+ unformat_ip6_address, &p->dst_address))
+ goto error;
+
+ /* Parse options. */
+ while (1)
+ {
+ if (unformat (input, "version %U",
+ unformat_pg_edit,
+ unformat_pg_number, &p->ip_version))
+ ;
+
+ else if (unformat (input, "traffic-class %U",
+ unformat_pg_edit,
+ unformat_pg_number, &p->traffic_class))
+ ;
+
+ else if (unformat (input, "length %U",
+ unformat_pg_edit,
+ unformat_pg_number, &p->payload_length))
+ ;
+
+ else if (unformat (input, "hop-limit %U",
+ unformat_pg_edit,
+ unformat_pg_number, &p->hop_limit))
+ ;
+
+ /* Can't parse input: try next protocol level. */
+ else
+ break;
+ }
+
+ {
+ ip_main_t * im = &ip_main;
+ ip_protocol_t protocol;
+ ip_protocol_info_t * pi;
+
+ pi = 0;
+ if (p->protocol.type == PG_EDIT_FIXED)
+ {
+ protocol = pg_edit_get_value (&p->protocol, PG_EDIT_LO);
+ pi = ip_get_protocol_info (im, protocol);
+ }
+
+ if (pi && pi->unformat_pg_edit
+ && unformat_user (input, pi->unformat_pg_edit, s))
+ ;
+
+ else if (! unformat_user (input, unformat_pg_payload, s))
+ goto error;
+
+ if (p->payload_length.type == PG_EDIT_UNSPECIFIED
+ && s->min_packet_bytes == s->max_packet_bytes
+ && group_index + 1 < vec_len (s->edit_groups))
+ {
+ pg_edit_set_fixed (&p->payload_length,
+ pg_edit_group_n_bytes (s, group_index) - sizeof (ip6_header_t));
+ }
+
+ p = pg_get_edit_group (s, group_index);
+ if (p->payload_length.type == PG_EDIT_UNSPECIFIED)
+ {
+ pg_edit_group_t * g = pg_stream_get_group (s, group_index);
+ g->edit_function = ip6_pg_edit_function;
+ }
+
+ return 1;
+ }
+
+ error:
+ /* Free up any edits we may have added. */
+ pg_free_edit_group (s);
+ return 0;
+}
+
diff --git a/vnet/vnet/ip/ip_checksum.c b/vnet/vnet/ip/ip_checksum.c
new file mode 100644
index 00000000000..23e7889bc7e
--- /dev/null
+++ b/vnet/vnet/ip/ip_checksum.c
@@ -0,0 +1,184 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip4/ip_checksum.c: ip/tcp/udp checksums
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+
+ip_csum_t
+ip_incremental_checksum (ip_csum_t sum, void * _data, uword n_bytes)
+{
+ uword data = pointer_to_uword (_data);
+ ip_csum_t sum0, sum1;
+
+ sum0 = 0;
+ sum1 = sum;
+
+ /* Align data pointer to 64 bits. */
+#define _(t) \
+do { \
+ if (n_bytes >= sizeof (t) \
+ && sizeof (t) < sizeof (ip_csum_t) \
+ && (data % (2 * sizeof (t))) != 0) \
+ { \
+ sum0 += * uword_to_pointer (data, t *); \
+ data += sizeof (t); \
+ n_bytes -= sizeof (t); \
+ } \
+} while (0)
+
+ _ (u8);
+ _ (u16);
+ if (BITS (ip_csum_t) > 32)
+ _ (u32);
+
+#undef _
+
+ {
+ ip_csum_t * d = uword_to_pointer (data, ip_csum_t *);
+
+ while (n_bytes >= 2 * sizeof (d[0]))
+ {
+ sum0 = ip_csum_with_carry (sum0, d[0]);
+ sum1 = ip_csum_with_carry (sum1, d[1]);
+ d += 2;
+ n_bytes -= 2 * sizeof (d[0]);
+ }
+
+ data = pointer_to_uword (d);
+ }
+
+#define _(t) \
+do { \
+ if (n_bytes >= sizeof (t) && sizeof (t) <= sizeof (ip_csum_t)) \
+ { \
+ sum0 = ip_csum_with_carry (sum0, * uword_to_pointer (data, t *)); \
+ data += sizeof (t); \
+ n_bytes -= sizeof (t); \
+ } \
+} while (0)
+
+ if (BITS (ip_csum_t) > 32)
+ _ (u64);
+ _ (u32);
+ _ (u16);
+ _ (u8);
+
+#undef _
+
+ /* Combine even and odd sums. */
+ sum0 = ip_csum_with_carry (sum0, sum1);
+
+ return sum0;
+}
+
+ip_csum_t
+ip_csum_and_memcpy (ip_csum_t sum, void * dst, void * src, uword n_bytes)
+{
+ uword n_left, n_left_odd;
+ ip_csum_t * dst_even, * src_even;
+ ip_csum_t sum0 = sum, sum1;
+
+ dst_even = uword_to_pointer
+ (pointer_to_uword (dst) &~ (sizeof (sum) - 1),
+ ip_csum_t *);
+ src_even = src;
+
+ n_left = n_bytes;
+ if ((n_left_odd = dst - (void *) dst_even))
+ {
+ u8 * d8 = dst, * s8 = src;
+ uword i, n_copy_odd;
+
+ n_copy_odd = clib_min (n_left, n_left_odd);
+
+ for (i = 0; i < n_copy_odd; i++)
+ d8[i] = s8[i];
+
+ if (n_copy_odd != n_left_odd)
+ return sum0;
+
+ sum0 = ip_csum_with_carry (sum0, dst_even[0]);
+ dst_even += 1;
+ src_even = (void *) (src + n_copy_odd);
+ n_left -= n_left_odd;
+ }
+
+ sum1 = 0;
+ while (n_left >= 2 * sizeof (dst_even[0]))
+ {
+ ip_csum_t dst0, dst1;
+
+ dst0 = clib_mem_unaligned (&src_even[0], ip_csum_t);
+ dst1 = clib_mem_unaligned (&src_even[1], ip_csum_t);
+
+ dst_even[0] = dst0;
+ dst_even[1] = dst1;
+
+ dst_even += 2;
+ src_even += 2;
+ n_left -= 2 * sizeof (dst_even[0]);
+
+ sum0 = ip_csum_with_carry (sum0, dst0);
+ sum1 = ip_csum_with_carry (sum1, dst1);
+ }
+
+ if (n_left >= 1 * sizeof (dst_even[0]))
+ {
+ ip_csum_t dst0;
+
+ dst0 = clib_mem_unaligned (&src_even[0], ip_csum_t);
+
+ dst_even[0] = dst0;
+
+ dst_even += 1;
+ src_even += 1;
+ n_left -= 1 * sizeof (dst_even[0]);
+
+ sum0 = ip_csum_with_carry (sum0, dst0);
+ }
+
+ if (n_left > 0)
+ {
+ u8 * d8 = dst, * s8 = src;
+ uword i;
+ for (i = 0; i < n_left; i++)
+ d8[i] = s8[i];
+ }
+
+ return ip_csum_with_carry (sum0, sum1);
+}
diff --git a/vnet/vnet/ip/ip_frag.c b/vnet/vnet/ip/ip_frag.c
new file mode 100644
index 00000000000..22176187a9c
--- /dev/null
+++ b/vnet/vnet/ip/ip_frag.c
@@ -0,0 +1,449 @@
+/*---------------------------------------------------------------------------
+ * Copyright (c) 2009-2014 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *---------------------------------------------------------------------------
+ */
+/*
+ * IPv4 Fragmentation Node
+ *
+ *
+ */
+
+#include "ip_frag.h"
+
+#include <vnet/ip/ip.h>
+
+
+typedef struct {
+ u8 ipv6;
+ u16 header_offset;
+ u16 mtu;
+ u8 next;
+ u16 n_fragments;
+} ip_frag_trace_t;
+
+static u8 * format_ip_frag_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ip_frag_trace_t * t = va_arg (*args, ip_frag_trace_t *);
+ s = format(s, "IPv%s offset: %u mtu: %u fragments: %u next: %s",
+ t->ipv6?"6":"4",
+ t->header_offset, t->mtu, t->n_fragments, node->next_node_names[t->next]);
+ return s;
+}
+
+static u32 running_fragment_id;
+
+static void
+ip4_frag_do_fragment(vlib_main_t *vm, u32 pi, u32 **buffer, ip_frag_error_t *error)
+{
+ vlib_buffer_t *p;
+ ip4_header_t *ip4;
+ u16 mtu, ptr, len, max, rem,
+ offset, ip_frag_id, ip_frag_offset;
+ u8 *packet, more;
+
+ vec_add1(*buffer, pi);
+ p = vlib_get_buffer(vm, pi);
+ offset = vnet_buffer(p)->ip_frag.header_offset;
+ mtu = vnet_buffer(p)->ip_frag.mtu;
+ packet = (u8 *)vlib_buffer_get_current(p);
+ ip4 = (ip4_header_t *)(packet + offset);
+
+ rem = clib_net_to_host_u16(ip4->length) - sizeof(*ip4);
+ ptr = 0;
+ max = (mtu - sizeof(*ip4) - vnet_buffer(p)->ip_frag.header_offset) & ~0x7;
+
+ if (rem < (p->current_length - offset - sizeof(*ip4))) {
+ *error = IP_FRAG_ERROR_MALFORMED;
+ return;
+ }
+
+ if (mtu < sizeof(*ip4)) {
+ *error = IP_FRAG_ERROR_CANT_FRAGMENT_HEADER;
+ return;
+ }
+
+ if (ip4->flags_and_fragment_offset &
+ clib_host_to_net_u16(IP4_HEADER_FLAG_DONT_FRAGMENT)) {
+ *error = IP_FRAG_ERROR_DONT_FRAGMENT_SET;
+ return;
+ }
+
+ if (ip4_is_fragment(ip4)) {
+ ip_frag_id = ip4->fragment_id;
+ ip_frag_offset = ip4_get_fragment_offset(ip4);
+ more = !!(ip4->flags_and_fragment_offset & clib_host_to_net_u16(IP4_HEADER_FLAG_MORE_FRAGMENTS));
+ } else {
+ ip_frag_id = (++running_fragment_id);
+ ip_frag_offset = 0;
+ more = 0;
+ }
+
+ //Do the actual fragmentation
+ while (rem) {
+ u32 bi;
+ vlib_buffer_t *b;
+ ip4_header_t *fip4;
+
+ len = (rem > (mtu - sizeof(*ip4) - vnet_buffer(p)->ip_frag.header_offset)) ? max : rem;
+
+ if (ptr == 0) {
+ bi = pi;
+ b = p;
+ fip4 = (ip4_header_t *)(vlib_buffer_get_current(b) + offset);
+ } else {
+ if (!vlib_buffer_alloc(vm, &bi, 1)) {
+ *error = IP_FRAG_ERROR_MEMORY;
+ return;
+ }
+ vec_add1(*buffer, bi);
+ b = vlib_get_buffer(vm, bi);
+ vnet_buffer(b)->sw_if_index[VLIB_RX] = vnet_buffer(p)->sw_if_index[VLIB_RX];
+ vnet_buffer(b)->sw_if_index[VLIB_TX] = vnet_buffer(p)->sw_if_index[VLIB_TX];
+ fip4 = (ip4_header_t *)(vlib_buffer_get_current(b) + offset);
+
+ //Copy offset and ip4 header
+ memcpy(b->data, packet, offset + sizeof(*ip4));
+ //Copy data
+ memcpy(((u8*)(fip4)) + sizeof(*fip4),
+ packet + offset + sizeof(*fip4) + ptr, len);
+ }
+ b->current_length = offset + len + sizeof(*fip4);
+
+ fip4->fragment_id = ip_frag_id;
+ fip4->flags_and_fragment_offset = clib_host_to_net_u16((ptr >> 3) + ip_frag_offset);
+ fip4->flags_and_fragment_offset |= clib_host_to_net_u16(((len != rem) || more) << 13);
+ // ((len0 != rem0) || more0) << 13 is optimization for
+ // ((len0 != rem0) || more0) ? IP4_HEADER_FLAG_MORE_FRAGMENTS : 0
+ fip4->length = clib_host_to_net_u16(len + sizeof(*fip4));
+ fip4->checksum = ip4_header_checksum(fip4);
+
+ if(vnet_buffer(p)->ip_frag.flags & IP_FRAG_FLAG_IP4_HEADER) {
+ //Encapsulating ipv4 header
+ ip4_header_t *encap_header4 = (ip4_header_t *)vlib_buffer_get_current(b);
+ encap_header4->length = clib_host_to_net_u16(b->current_length);
+ encap_header4->checksum = ip4_header_checksum(encap_header4);
+ } else if (vnet_buffer(p)->ip_frag.flags & IP_FRAG_FLAG_IP6_HEADER) {
+ //Encapsulating ipv6 header
+ ip6_header_t *encap_header6 = (ip6_header_t *)vlib_buffer_get_current(b);
+ encap_header6->payload_length = clib_host_to_net_u16(b->current_length - sizeof(*encap_header6));
+ }
+
+ rem -= len;
+ ptr += len;
+ }
+}
+
+
+static uword
+ip4_frag (vlib_main_t *vm,
+ vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ vlib_node_runtime_t * error_node = vlib_node_get_runtime(vm, ip4_frag_node.index);
+ from = vlib_frame_vector_args(frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ u32 frag_sent = 0, small_packets = 0;
+ u32 *buffer = 0;
+
+ while (n_left_from > 0) {
+ vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0) {
+ u32 pi0, *frag_from, frag_left;
+ vlib_buffer_t *p0;
+ ip_frag_error_t error0;
+ ip4_frag_next_t next0;
+
+ //Note: The packet is not enqueued now.
+ //It is instead put in a vector where other fragments
+ //will be put as well.
+ pi0 = from[0];
+ from += 1;
+ n_left_from -= 1;
+ error0 = IP_FRAG_ERROR_NONE;
+
+ p0 = vlib_get_buffer(vm, pi0);
+ ip4_frag_do_fragment(vm, pi0, &buffer, &error0);
+
+ if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) {
+ ip_frag_trace_t *tr = vlib_add_trace(vm, node, p0, sizeof (*tr));
+ tr->header_offset = vnet_buffer(p0)->ip_frag.header_offset;
+ tr->mtu = vnet_buffer(p0)->ip_frag.mtu;
+ tr->ipv6 = 0;
+ tr->n_fragments = vec_len(buffer);
+ tr->next = vnet_buffer(p0)->ip_frag.next_index;
+ }
+
+ next0 = (error0 == IP_FRAG_ERROR_NONE) ? vnet_buffer(p0)->ip_frag.next_index : IP4_FRAG_NEXT_DROP;
+ frag_sent += vec_len(buffer);
+ small_packets += (vec_len(buffer) == 1);
+
+ //Send fragments that were added in the frame
+ frag_from = buffer;
+ frag_left = vec_len(buffer);
+ while (frag_left > 0) {
+ while (frag_left > 0 && n_left_to_next > 0) {
+ u32 i;
+ i = to_next[0] = frag_from[0];
+ frag_from += 1;
+ frag_left -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ vlib_get_buffer(vm, i)->error = error_node->errors[error0];
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next, i,
+ next0);
+ }
+ vlib_put_next_frame(vm, node, next_index, n_left_to_next);
+ vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+ }
+ vec_reset_length(buffer);
+ }
+ vlib_put_next_frame(vm, node, next_index, n_left_to_next);
+ }
+ vec_free(buffer);
+ vlib_node_increment_counter(vm, ip4_frag_node.index, IP_FRAG_ERROR_FRAGMENT_SENT, frag_sent);
+ vlib_node_increment_counter(vm, ip4_frag_node.index, IP_FRAG_ERROR_SMALL_PACKET, small_packets);
+
+ return frame->n_vectors;
+}
+
+
+static void
+ip6_frag_do_fragment(vlib_main_t *vm, u32 pi, u32 **buffer, ip_frag_error_t *error)
+{
+ vlib_buffer_t *p;
+ ip6_header_t *ip6_hdr;
+ ip6_frag_hdr_t *frag_hdr;
+ u8 *payload, *next_header;
+
+ p = vlib_get_buffer(vm, pi);
+
+ //Parsing the IPv6 headers
+ ip6_hdr = vlib_buffer_get_current(p) + vnet_buffer(p)->ip_frag.header_offset;
+ payload = (u8 *)(ip6_hdr + 1);
+ next_header = &ip6_hdr->protocol;
+ if (*next_header == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) {
+ next_header = payload;
+ payload += payload[1] * 8;
+ }
+
+ if (*next_header == IP_PROTOCOL_IP6_DESTINATION_OPTIONS) {
+ next_header = payload;
+ payload += payload[1] * 8;
+ }
+
+ if (*next_header == IP_PROTOCOL_IPV6_ROUTE) {
+ next_header = payload;
+ payload += payload[1] * 8;
+ }
+
+ u8 has_more;
+ u16 initial_offset;
+ if (*next_header == IP_PROTOCOL_IPV6_FRAGMENTATION) {
+ //The fragmentation header is already there
+ frag_hdr = (ip6_frag_hdr_t *)payload;
+ has_more = ip6_frag_hdr_more(frag_hdr);
+ initial_offset = ip6_frag_hdr_offset(frag_hdr);
+ } else {
+ //Insert a fragmentation header in the packet
+ u8 nh = *next_header;
+ *next_header = IP_PROTOCOL_IPV6_FRAGMENTATION;
+ vlib_buffer_advance(p, -sizeof(*frag_hdr));
+ u8 *start = vlib_buffer_get_current(p);
+ memmove(start, start + sizeof(*frag_hdr), payload - (start + sizeof(*frag_hdr)));
+ frag_hdr = (ip6_frag_hdr_t *)(payload - sizeof(*frag_hdr));
+ frag_hdr->identification = ++running_fragment_id;
+ frag_hdr->next_hdr = nh;
+ frag_hdr->rsv = 0;
+ has_more = 0;
+ initial_offset = 0;
+ }
+ payload = (u8 *)(frag_hdr + 1);
+
+ u16 headers_len = payload - (u8 *)vlib_buffer_get_current(p);
+ u16 max_payload = vnet_buffer(p)->ip_frag.mtu - headers_len;
+ u16 rem = p->current_length - headers_len;
+ u16 ptr = 0;
+
+ if(max_payload < 8) {
+ *error = IP_FRAG_ERROR_CANT_FRAGMENT_HEADER;
+ return;
+ }
+
+ while (rem) {
+ u32 bi;
+ vlib_buffer_t *b;
+ u16 len = (rem > max_payload)?(max_payload & ~0x7):rem;
+ rem -= len;
+
+ if (ptr != 0) {
+ if (!vlib_buffer_alloc(vm, &bi, 1)) {
+ *error = IP_FRAG_ERROR_MEMORY;
+ return;
+ }
+ b = vlib_get_buffer(vm, bi);
+ vnet_buffer(b)->sw_if_index[VLIB_RX] = vnet_buffer(p)->sw_if_index[VLIB_RX];
+ vnet_buffer(b)->sw_if_index[VLIB_TX] = vnet_buffer(p)->sw_if_index[VLIB_TX];
+ memcpy(vlib_buffer_get_current(b), vlib_buffer_get_current(p), headers_len);
+ memcpy(vlib_buffer_get_current(b) + headers_len, payload + ptr, len);
+ frag_hdr = vlib_buffer_get_current(b) + headers_len - sizeof(*frag_hdr);
+ } else {
+ bi = pi;
+ b = vlib_get_buffer(vm, bi);
+ //frag_hdr already set here
+ }
+
+ ip6_hdr = vlib_buffer_get_current(b) + vnet_buffer(p)->ip_frag.header_offset;
+ frag_hdr->fragment_offset_and_more = ip6_frag_hdr_offset_and_more(initial_offset + (ptr >> 3), (rem || has_more));
+ b->current_length = headers_len + len;
+ ip6_hdr->payload_length = clib_host_to_net_u16(b->current_length - vnet_buffer(p)->ip_frag.header_offset - sizeof(*ip6_hdr));
+
+ if(vnet_buffer(p)->ip_frag.flags & IP_FRAG_FLAG_IP4_HEADER) {
+ //Encapsulating ipv4 header
+ ip4_header_t *encap_header4 = (ip4_header_t *)vlib_buffer_get_current(b);
+ encap_header4->length = clib_host_to_net_u16(b->current_length);
+ encap_header4->checksum = ip4_header_checksum(encap_header4);
+ } else if (vnet_buffer(p)->ip_frag.flags & IP_FRAG_FLAG_IP6_HEADER) {
+ //Encapsulating ipv6 header
+ ip6_header_t *encap_header6 = (ip6_header_t *)vlib_buffer_get_current(b);
+ encap_header6->payload_length = clib_host_to_net_u16(b->current_length - sizeof(*encap_header6));
+ }
+
+ vec_add1(*buffer, bi);
+
+ ptr += len;
+ }
+}
+
+static uword
+ip6_frag (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip6_frag_node.index);
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ u32 frag_sent = 0, small_packets = 0;
+ u32 *buffer = 0;
+
+ while (n_left_from > 0) {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0) {
+ u32 pi0, *frag_from, frag_left;
+ vlib_buffer_t * p0;
+ ip_frag_error_t error0;
+ ip6_frag_next_t next0;
+
+ pi0 = from[0];
+ from += 1;
+ n_left_from -= 1;
+ error0 = IP_FRAG_ERROR_NONE;
+
+ p0 = vlib_get_buffer(vm, pi0);
+ ip6_frag_do_fragment(vm, pi0, &buffer, &error0);
+
+ if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) {
+ ip_frag_trace_t *tr = vlib_add_trace(vm, node, p0, sizeof (*tr));
+ tr->header_offset = vnet_buffer(p0)->ip_frag.header_offset;
+ tr->mtu = vnet_buffer(p0)->ip_frag.mtu;
+ tr->ipv6 = 1;
+ tr->n_fragments = vec_len(buffer);
+ tr->next = vnet_buffer(p0)->ip_frag.next_index;
+ }
+
+ next0 = (error0 == IP_FRAG_ERROR_NONE) ? vnet_buffer(p0)->ip_frag.next_index : IP6_FRAG_NEXT_DROP;
+ frag_sent += vec_len(buffer);
+ small_packets += (vec_len(buffer) == 1);
+
+ //Send fragments that were added in the frame
+ frag_from = buffer;
+ frag_left = vec_len(buffer);
+ while (frag_left > 0) {
+ while (frag_left > 0 && n_left_to_next > 0) {
+ u32 i;
+ i = to_next[0] = frag_from[0];
+ frag_from += 1;
+ frag_left -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ vlib_get_buffer(vm, i)->error = error_node->errors[error0];
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next, i,
+ next0);
+ }
+ vlib_put_next_frame(vm, node, next_index, n_left_to_next);
+ vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+ }
+ vec_reset_length(buffer);
+ }
+ vlib_put_next_frame(vm, node, next_index, n_left_to_next);
+ }
+ vec_free(buffer);
+ vlib_node_increment_counter(vm, ip6_frag_node.index, IP_FRAG_ERROR_FRAGMENT_SENT, frag_sent);
+ vlib_node_increment_counter(vm, ip6_frag_node.index, IP_FRAG_ERROR_SMALL_PACKET, small_packets);
+
+ return frame->n_vectors;
+}
+
+static char * ip4_frag_error_strings[] = {
+#define _(sym,string) string,
+ foreach_ip_frag_error
+#undef _
+};
+
+VLIB_REGISTER_NODE (ip4_frag_node) = {
+ .function = ip4_frag,
+ .name = IP4_FRAG_NODE_NAME,
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip_frag_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = IP_FRAG_N_ERROR,
+ .error_strings = ip4_frag_error_strings,
+
+ .n_next_nodes = IP4_FRAG_N_NEXT,
+ .next_nodes = {
+ [IP4_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [IP4_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup",
+ [IP4_FRAG_NEXT_DROP] = "error-drop"
+ },
+};
+
+VLIB_REGISTER_NODE (ip6_frag_node) = {
+ .function = ip6_frag,
+ .name = IP6_FRAG_NODE_NAME,
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip_frag_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = IP_FRAG_N_ERROR,
+ .error_strings = ip4_frag_error_strings,
+
+ .n_next_nodes = IP6_FRAG_N_NEXT,
+ .next_nodes = {
+ [IP6_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [IP6_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup",
+ [IP6_FRAG_NEXT_DROP] = "error-drop"
+ },
+};
diff --git a/vnet/vnet/ip/ip_frag.h b/vnet/vnet/ip/ip_frag.h
new file mode 100644
index 00000000000..04566904e5f
--- /dev/null
+++ b/vnet/vnet/ip/ip_frag.h
@@ -0,0 +1,81 @@
+/*---------------------------------------------------------------------------
+ * Copyright (c) 2009-2014 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *---------------------------------------------------------------------------
+ */
+/*
+ * IPv4 and IPv6 Fragmentation Nodes
+ *
+ * A packet sent to those nodes require the following
+ * buffer attributes to be set:
+ * ip_frag.header_offset :
+ * Where to find the IPv4 (or IPv6) header in the packet. Previous
+ * bytes are left untouched and copied in every fragment. The fragments
+ * are then appended. This option is used for fragmented packets
+ * that are encapsulated.
+ * ip_frag.mtu :
+ * Maximum size of IP packets, header included, but ignoring
+ * the 'ip_frag.header_offset' copied bytes.
+ * ip_frag.next_index :
+ * One of ip_frag_next_t, indicating to which exit node the fragments
+ * should be sent to.
+ *
+ */
+
+#ifndef IP_FRAG_H
+#define IP_FRAG_H
+
+#include <vnet/vnet.h>
+
+#define IP_FRAG_FLAG_IP4_HEADER 0x01 //Encapsulating IPv4 header
+#define IP_FRAG_FLAG_IP6_HEADER 0x02 //Encapsulating IPv6 header
+
+#define IP4_FRAG_NODE_NAME "ip4-frag"
+#define IP6_FRAG_NODE_NAME "ip6-frag"
+
+vlib_node_registration_t ip4_frag_node;
+vlib_node_registration_t ip6_frag_node;
+
+typedef enum {
+ IP4_FRAG_NEXT_IP4_LOOKUP,
+ IP4_FRAG_NEXT_IP6_LOOKUP,
+ IP4_FRAG_NEXT_DROP,
+ IP4_FRAG_N_NEXT
+} ip4_frag_next_t;
+
+typedef enum {
+ IP6_FRAG_NEXT_IP4_LOOKUP,
+ IP6_FRAG_NEXT_IP6_LOOKUP,
+ IP6_FRAG_NEXT_DROP,
+ IP6_FRAG_N_NEXT
+} ip6_frag_next_t;
+
+#define foreach_ip_frag_error \
+ /* Must be first. */ \
+ _(NONE, "packet fragmented") \
+ _(SMALL_PACKET, "packet smaller than MTU") \
+ _(FRAGMENT_SENT, "number of sent fragments") \
+ _(CANT_FRAGMENT_HEADER, "can't fragment header'") \
+ _(DONT_FRAGMENT_SET, "can't fragment this packet'") \
+ _(MALFORMED, "malformed packet") \
+ _(MEMORY, "could not allocate buffer") \
+ _(UNKNOWN, "unknown error")
+
+typedef enum {
+#define _(sym,str) IP_FRAG_ERROR_##sym,
+ foreach_ip_frag_error
+#undef _
+ IP_FRAG_N_ERROR,
+ } ip_frag_error_t;
+
+#endif /* ifndef IP_FRAG_H */
diff --git a/vnet/vnet/ip/ip_init.c b/vnet/vnet/ip/ip_init.c
new file mode 100644
index 00000000000..0654daa7685
--- /dev/null
+++ b/vnet/vnet/ip/ip_init.c
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip_init.c: ip generic initialization
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+
+ip_main_t ip_main;
+
+clib_error_t *
+ip_main_init (vlib_main_t * vm)
+{
+ ip_main_t * im = &ip_main;
+ clib_error_t * error = 0;
+
+ memset (im, 0, sizeof (im[0]));
+
+ {
+ ip_protocol_info_t * pi;
+ u32 i;
+
+#define ip_protocol(n,s) \
+do { \
+ vec_add2 (im->protocol_infos, pi, 1); \
+ pi->protocol = n; \
+ pi->name = (u8 *) #s; \
+} while (0);
+
+#include "protocols.def"
+
+#undef ip_protocol
+
+ im->protocol_info_by_name = hash_create_string (0, sizeof (uword));
+ for (i = 0; i < vec_len (im->protocol_infos); i++)
+ {
+ pi = im->protocol_infos + i;
+
+ hash_set_mem (im->protocol_info_by_name, pi->name, i);
+ hash_set (im->protocol_info_by_protocol, pi->protocol, i);
+ }
+ }
+
+ {
+ tcp_udp_port_info_t * pi;
+ u32 i;
+ static char * port_names[] =
+ {
+#define ip_port(s,n) #s,
+#include "ports.def"
+#undef ip_port
+ };
+ static u16 ports[] =
+ {
+#define ip_port(s,n) n,
+#include "ports.def"
+#undef ip_port
+ };
+
+ vec_resize (im->port_infos, ARRAY_LEN (port_names));
+ im->port_info_by_name = hash_create_string (0, sizeof (uword));
+
+ for (i = 0; i < vec_len (im->port_infos); i++)
+ {
+ pi = im->port_infos + i;
+ pi->port = clib_host_to_net_u16 (ports[i]);
+ pi->name = (u8 *) port_names[i];
+ hash_set_mem (im->port_info_by_name, pi->name, i);
+ hash_set (im->port_info_by_port, pi->port, i);
+ }
+ }
+
+ if ((error = vlib_call_init_function (vm, vnet_main_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, ip4_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, ip6_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, icmp4_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, icmp6_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, ip6_hop_by_hop_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, ip4_hop_by_hop_init)))
+ return error;
+
+#if 0
+ if ((error = vlib_call_init_function (vm, tcp_udp_lookup_init)))
+ return error;
+
+#endif
+
+ if ((error = vlib_call_init_function (vm, udp_local_init)))
+ return error;
+
+#if 0
+ if ((error = vlib_call_init_function (vm, tcp_init)))
+ return error;
+#endif
+
+ if ((error = vlib_call_init_function (vm, udp_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, ip_classify_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, input_acl_init)))
+ return error;
+
+ return error;
+}
+
+VLIB_INIT_FUNCTION (ip_main_init);
diff --git a/vnet/vnet/ip/ip_input_acl.c b/vnet/vnet/ip/ip_input_acl.c
new file mode 100644
index 00000000000..75aa9ef818f
--- /dev/null
+++ b/vnet/vnet/ip/ip_input_acl.c
@@ -0,0 +1,394 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/ip/ip.h>
+#include <vnet/classify/vnet_classify.h>
+#include <vnet/classify/input_acl.h>
+
+typedef struct {
+ u32 sw_if_index;
+ u32 next_index;
+ u32 table_index;
+ u32 offset;
+} ip_inacl_trace_t;
+
+/* packet trace format function */
+static u8 * format_ip_inacl_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ip_inacl_trace_t * t = va_arg (*args, ip_inacl_trace_t *);
+
+ s = format (s, "INACL: sw_if_index %d, next_index %d, table %d, offset %d",
+ t->sw_if_index, t->next_index, t->table_index, t->offset);
+ return s;
+}
+
+vlib_node_registration_t ip4_inacl_node;
+vlib_node_registration_t ip6_inacl_node;
+
+#define foreach_ip_inacl_error \
+_(MISS, "input ACL misses") \
+_(HIT, "input ACL hits") \
+_(CHAIN_HIT, "input ACL hits after chain walk")
+
+typedef enum {
+#define _(sym,str) IP_INACL_ERROR_##sym,
+ foreach_ip_inacl_error
+#undef _
+ IP_INACL_N_ERROR,
+} ip_inacl_error_t;
+
+static char * ip_inacl_error_strings[] = {
+#define _(sym,string) string,
+ foreach_ip_inacl_error
+#undef _
+};
+
+static inline uword
+ip_inacl_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame, int is_ip4)
+{
+ u32 n_left_from, * from, * to_next;
+ acl_next_index_t next_index;
+ input_acl_main_t * am = &input_acl_main;
+ vnet_classify_main_t * vcm = am->vnet_classify_main;
+ f64 now = vlib_time_now (vm);
+ u32 hits = 0;
+ u32 misses = 0;
+ u32 chain_hits = 0;
+ input_acl_table_id_t tid;
+ vlib_node_runtime_t * error_node;
+
+ if (is_ip4)
+ {
+ tid = INPUT_ACL_TABLE_IP4;
+ error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
+ }
+ else
+ {
+ tid = INPUT_ACL_TABLE_IP6;
+ error_node = vlib_node_get_runtime (vm, ip6_input_node.index);
+ }
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+
+ /* First pass: compute hashes */
+
+ while (n_left_from > 2)
+ {
+ vlib_buffer_t * b0, * b1;
+ u32 bi0, bi1;
+ u8 * h0, * h1;
+ u32 sw_if_index0, sw_if_index1;
+ u32 table_index0, table_index1;
+ vnet_classify_table_t * t0, * t1;
+
+ /* prefetch next iteration */
+ {
+ vlib_buffer_t * p1, * p2;
+
+ p1 = vlib_get_buffer (vm, from[1]);
+ p2 = vlib_get_buffer (vm, from[2]);
+
+ vlib_prefetch_buffer_header (p1, STORE);
+ CLIB_PREFETCH (p1->data, CLIB_CACHE_LINE_BYTES, STORE);
+ vlib_prefetch_buffer_header (p2, STORE);
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ bi0 = from[0];
+ b0 = vlib_get_buffer (vm, bi0);
+ h0 = b0->data;
+
+ bi1 = from[1];
+ b1 = vlib_get_buffer (vm, bi1);
+ h1 = b1->data;
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ table_index0 = am->classify_table_index_by_sw_if_index[tid][sw_if_index0];
+
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+ table_index1 = am->classify_table_index_by_sw_if_index[tid][sw_if_index1];
+
+ t0 = pool_elt_at_index (vcm->tables, table_index0);
+
+ t1 = pool_elt_at_index (vcm->tables, table_index1);
+
+ vnet_buffer(b0)->l2_classify.hash =
+ vnet_classify_hash_packet (t0, (u8 *) h0);
+
+ vnet_classify_prefetch_bucket (t0, vnet_buffer(b0)->l2_classify.hash);
+
+ vnet_buffer(b1)->l2_classify.hash =
+ vnet_classify_hash_packet (t1, (u8 *) h1);
+
+ vnet_classify_prefetch_bucket (t1, vnet_buffer(b1)->l2_classify.hash);
+
+ vnet_buffer(b0)->l2_classify.table_index = table_index0;
+
+ vnet_buffer(b1)->l2_classify.table_index = table_index1;
+
+ from += 2;
+ n_left_from -= 2;
+ }
+
+ while (n_left_from > 0)
+ {
+ vlib_buffer_t * b0;
+ u32 bi0;
+ u8 * h0;
+ u32 sw_if_index0;
+ u32 table_index0;
+ vnet_classify_table_t * t0;
+
+ bi0 = from[0];
+ b0 = vlib_get_buffer (vm, bi0);
+ h0 = b0->data;
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ table_index0 = am->classify_table_index_by_sw_if_index[tid][sw_if_index0];
+
+ t0 = pool_elt_at_index (vcm->tables, table_index0);
+ vnet_buffer(b0)->l2_classify.hash =
+ vnet_classify_hash_packet (t0, (u8 *) h0);
+
+ vnet_buffer(b0)->l2_classify.table_index = table_index0;
+ vnet_classify_prefetch_bucket (t0, vnet_buffer(b0)->l2_classify.hash);
+
+ from++;
+ n_left_from--;
+ }
+
+ next_index = node->cached_next_index;
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ /* Not enough load/store slots to dual loop... */
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ u32 next0 = ACL_NEXT_INDEX_DENY;
+ u32 table_index0;
+ vnet_classify_table_t * t0;
+ vnet_classify_entry_t * e0;
+ u64 hash0;
+ u8 * h0;
+ u8 error0;
+
+ /* Stride 3 seems to work best */
+ if (PREDICT_TRUE (n_left_from > 3))
+ {
+ vlib_buffer_t * p1 = vlib_get_buffer(vm, from[3]);
+ vnet_classify_table_t * tp1;
+ u32 table_index1;
+ u64 phash1;
+
+ table_index1 = vnet_buffer(p1)->l2_classify.table_index;
+
+ if (PREDICT_TRUE (table_index1 != ~0))
+ {
+ tp1 = pool_elt_at_index (vcm->tables, table_index1);
+ phash1 = vnet_buffer(p1)->l2_classify.hash;
+ vnet_classify_prefetch_entry (tp1, phash1);
+ }
+ }
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ h0 = b0->data;
+ table_index0 = vnet_buffer(b0)->l2_classify.table_index;
+ e0 = 0;
+ t0 = 0;
+
+ vnet_get_config_data (am->vnet_config_main[tid],
+ &vnet_buffer(b0)->ip.current_config_index,
+ &next0,
+ /* # bytes of config data */ 0);
+
+ if (PREDICT_TRUE(table_index0 != ~0))
+ {
+ hash0 = vnet_buffer(b0)->l2_classify.hash;
+ t0 = pool_elt_at_index (vcm->tables, table_index0);
+
+ e0 = vnet_classify_find_entry (t0, (u8 *) h0, hash0,
+ now);
+ if (e0)
+ {
+ vlib_buffer_advance (b0, e0->advance);
+
+ next0 = (e0->next_index < ACL_NEXT_INDEX_N_NEXT)?
+ e0->next_index:next0;
+
+ hits++;
+
+ if (is_ip4)
+ error0 = (next0 == ACL_NEXT_INDEX_DENY)?
+ IP4_ERROR_INACL_SESSION_DENY:IP4_ERROR_NONE;
+ else
+ error0 = (next0 == ACL_NEXT_INDEX_DENY)?
+ IP6_ERROR_INACL_SESSION_DENY:IP6_ERROR_NONE;
+ b0->error = error_node->errors[error0];
+ }
+ else
+ {
+ while (1)
+ {
+ if (PREDICT_TRUE(t0->next_table_index != ~0))
+ t0 = pool_elt_at_index (vcm->tables,
+ t0->next_table_index);
+ else
+ {
+ next0 = (t0->miss_next_index < ACL_NEXT_INDEX_N_NEXT)?
+ t0->miss_next_index:next0;
+
+ misses++;
+
+ if (is_ip4)
+ error0 = (next0 == ACL_NEXT_INDEX_DENY)?
+ IP4_ERROR_INACL_TABLE_MISS:IP4_ERROR_NONE;
+ else
+ error0 = (next0 == ACL_NEXT_INDEX_DENY)?
+ IP6_ERROR_INACL_TABLE_MISS:IP6_ERROR_NONE;
+ b0->error = error_node->errors[error0];
+ break;
+ }
+
+ hash0 = vnet_classify_hash_packet (t0, (u8 *) h0);
+ e0 = vnet_classify_find_entry
+ (t0, (u8 *) h0, hash0, now);
+ if (e0)
+ {
+ vlib_buffer_advance (b0, e0->advance);
+ next0 = (e0->next_index < ACL_NEXT_INDEX_N_NEXT)?
+ e0->next_index:next0;
+ hits++;
+ chain_hits++;
+
+ if (is_ip4)
+ error0 = (next0 == ACL_NEXT_INDEX_DENY)?
+ IP4_ERROR_INACL_SESSION_DENY:IP4_ERROR_NONE;
+ else
+ error0 = (next0 == ACL_NEXT_INDEX_DENY)?
+ IP6_ERROR_INACL_SESSION_DENY:IP6_ERROR_NONE;
+ b0->error = error_node->errors[error0];
+ break;
+ }
+ }
+ }
+ }
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ ip_inacl_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+ t->next_index = next0;
+ t->table_index = t0 ? t0 - vcm->tables : ~0;
+ t->offset = e0 ? vnet_classify_get_offset (t0, e0): ~0;
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, node->node_index,
+ IP_INACL_ERROR_MISS,
+ misses);
+ vlib_node_increment_counter (vm, node->node_index,
+ IP_INACL_ERROR_HIT,
+ hits);
+ vlib_node_increment_counter (vm, node->node_index,
+ IP_INACL_ERROR_CHAIN_HIT,
+ chain_hits);
+ return frame->n_vectors;
+}
+
+static uword
+ip4_inacl (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return ip_inacl_inline (vm, node, frame, 1 /* is_ip4 */);
+}
+
+
+VLIB_REGISTER_NODE (ip4_inacl_node) = {
+ .function = ip4_inacl,
+ .name = "ip4-inacl",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip_inacl_trace,
+ .n_errors = ARRAY_LEN(ip_inacl_error_strings),
+ .error_strings = ip_inacl_error_strings,
+
+ .n_next_nodes = ACL_NEXT_INDEX_N_NEXT,
+ .next_nodes = {
+ [ACL_NEXT_INDEX_DENY] = "error-drop",
+ },
+};
+
+static uword
+ip6_inacl (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return ip_inacl_inline (vm, node, frame, 0 /* is_ip4 */);
+}
+
+
+VLIB_REGISTER_NODE (ip6_inacl_node) = {
+ .function = ip6_inacl,
+ .name = "ip6-inacl",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip_inacl_trace,
+ .n_errors = ARRAY_LEN(ip_inacl_error_strings),
+ .error_strings = ip_inacl_error_strings,
+
+ .n_next_nodes = ACL_NEXT_INDEX_N_NEXT,
+ .next_nodes = {
+ [ACL_NEXT_INDEX_DENY] = "error-drop",
+ },
+};
+
+static clib_error_t *
+ip_inacl_init (vlib_main_t * vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (ip_inacl_init);
+
diff --git a/vnet/vnet/ip/ip_packet.h b/vnet/vnet/ip/ip_packet.h
new file mode 100644
index 00000000000..fb9a23604e1
--- /dev/null
+++ b/vnet/vnet/ip/ip_packet.h
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip_packet.h: packet format common between ip4 & ip6
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_ip_packet_h
+#define included_ip_packet_h
+
+#include <vppinfra/byte_order.h>
+#include <vppinfra/error.h>
+
+typedef enum ip_protocol {
+#define ip_protocol(n,s) IP_PROTOCOL_##s = n,
+#include "protocols.def"
+#undef ip_protocol
+} ip_protocol_t;
+
+/* TCP/UDP ports. */
+typedef enum {
+#define ip_port(s,n) IP_PORT_##s = n,
+#include "ports.def"
+#undef ip_port
+} ip_port_t;
+
+/* Classifies protocols into TCP, UDP, ICMP or other. */
+typedef enum {
+ IP_BUILTIN_PROTOCOL_UDP,
+ IP_BUILTIN_PROTOCOL_TCP,
+ IP_BUILTIN_PROTOCOL_ICMP,
+ IP_BUILTIN_PROTOCOL_UNKNOWN,
+} ip_builtin_protocol_t;
+
+#define foreach_ip_builtin_multicast_group \
+ _ (1, all_hosts_on_subnet) \
+ _ (2, all_routers_on_subnet) \
+ _ (4, dvmrp) \
+ _ (5, ospf_all_routers) \
+ _ (6, ospf_designated_routers) \
+ _ (13, pim) \
+ _ (18, vrrp) \
+ _ (102, hsrp) \
+ _ (22, igmp_v3)
+
+typedef enum {
+#define _(n,f) IP_MULTICAST_GROUP_##f = n,
+ foreach_ip_builtin_multicast_group
+#undef _
+} ip_multicast_group_t;
+
+/* IP checksum support. */
+
+/* Incremental checksum update. */
+typedef uword ip_csum_t;
+
+always_inline ip_csum_t
+ip_csum_with_carry (ip_csum_t sum, ip_csum_t x)
+{
+ ip_csum_t t = sum + x;
+ return t + (t < x);
+}
+
+/* Update checksum changing field at even byte offset from x -> 0. */
+always_inline ip_csum_t
+ip_csum_add_even (ip_csum_t c, ip_csum_t x)
+{
+ ip_csum_t d;
+
+ d = c - x;
+
+ /* Fold in carry from high bit. */
+ d -= d > c;
+
+ ASSERT (ip_csum_with_carry (d, x) == c);
+
+ return d;
+}
+
+/* Update checksum changing field at even byte offset from 0 -> x. */
+always_inline ip_csum_t
+ip_csum_sub_even (ip_csum_t c, ip_csum_t x)
+{ return ip_csum_with_carry (c, x); }
+
+always_inline ip_csum_t
+ip_csum_update_inline (ip_csum_t sum, ip_csum_t old, ip_csum_t new,
+ u32 field_byte_offset, u32 field_n_bytes)
+{
+ /* For even 1-byte fields on big-endian and odd 1-byte fields on little endian
+ we need to shift byte into place for checksum. */
+ if ((field_n_bytes % 2)
+ && (field_byte_offset % 2) == CLIB_ARCH_IS_LITTLE_ENDIAN)
+ {
+ old = old << 8;
+ new = new << 8;
+ }
+ sum = ip_csum_sub_even (sum, old);
+ sum = ip_csum_add_even (sum, new);
+ return sum;
+}
+
+#define ip_csum_update(sum,old,new,type,field) \
+ ip_csum_update_inline ((sum), (old), (new), \
+ STRUCT_OFFSET_OF (type, field), \
+ STRUCT_SIZE_OF (type, field))
+
+always_inline u16 ip_csum_fold (ip_csum_t c)
+{
+ /* Reduce to 16 bits. */
+#if uword_bits == 64
+ c = (c & (ip_csum_t) 0xffffffff) + (c >> (ip_csum_t) 32);
+ c = (c & 0xffff) + (c >> 16);
+#endif
+
+ c = (c & 0xffff) + (c >> 16);
+ c = (c & 0xffff) + (c >> 16);
+
+ return c;
+}
+
+/* Copy data and checksum at the same time. */
+ip_csum_t ip_csum_and_memcpy (ip_csum_t sum, void * dst, void * src, uword n_bytes);
+
+always_inline u16
+ip_csum_and_memcpy_fold (ip_csum_t sum, void * dst)
+{
+ uword n_zero;
+ ip_csum_t * dst_even;
+
+ dst_even = uword_to_pointer
+ (pointer_to_uword (dst) &~ (sizeof (sum) - 1),
+ ip_csum_t *);
+
+ if ((n_zero = dst - (void *) dst_even))
+ {
+ u8 * d8 = dst;
+ uword i;
+
+ for (i = 0; i < n_zero; i++)
+ d8[i] = 0;
+
+ sum = ip_csum_with_carry (sum, dst_even[0]);
+ }
+
+ return ip_csum_fold (sum);
+}
+
+/* Checksum routine. */
+ip_csum_t ip_incremental_checksum (ip_csum_t sum, void * data, uword n_bytes);
+
+#endif /* included_ip_packet_h */
diff --git a/vnet/vnet/ip/lookup.c b/vnet/vnet/ip/lookup.c
new file mode 100644
index 00000000000..80f0a33e731
--- /dev/null
+++ b/vnet/vnet/ip/lookup.c
@@ -0,0 +1,2271 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip_lookup.c: ip4/6 adjacency and lookup table managment
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vppinfra/math.h> /* for fabs */
+#include <vnet/ip/ip.h>
+
+static void
+ip_multipath_del_adjacency (ip_lookup_main_t * lm, u32 del_adj_index);
+
+always_inline void
+ip_poison_adjacencies (ip_adjacency_t * adj, uword n_adj)
+{
+ if (CLIB_DEBUG > 0)
+ memset (adj, 0xfe, n_adj * sizeof (adj[0]));
+}
+
+/* Create new block of given number of contiguous adjacencies. */
+ip_adjacency_t *
+ip_add_adjacency (ip_lookup_main_t * lm,
+ ip_adjacency_t * copy_adj,
+ u32 n_adj,
+ u32 * adj_index_return)
+{
+ ip_adjacency_t * adj;
+ u32 ai, i, handle;
+
+ ai = heap_alloc (lm->adjacency_heap, n_adj, handle);
+ adj = heap_elt_at_index (lm->adjacency_heap, ai);
+
+ ip_poison_adjacencies (adj, n_adj);
+
+ /* Validate adjacency counters. */
+ vlib_validate_combined_counter (&lm->adjacency_counters, ai + n_adj - 1);
+
+ for (i = 0; i < n_adj; i++)
+ {
+ /* Make sure certain fields are always initialized. */
+ adj[i].rewrite_header.sw_if_index = ~0;
+ adj[i].explicit_fib_index = ~0;
+ adj[i].mcast_group_index = ~0;
+ adj[i].classify_table_index = ~0;
+ adj[i].saved_lookup_next_index = 0;
+
+ if (copy_adj)
+ adj[i] = copy_adj[i];
+
+ adj[i].heap_handle = handle;
+ adj[i].n_adj = n_adj;
+
+ /* Zero possibly stale counters for re-used adjacencies. */
+ vlib_zero_combined_counter (&lm->adjacency_counters, ai + i);
+ }
+
+ *adj_index_return = ai;
+ return adj;
+}
+
+static void ip_del_adjacency2 (ip_lookup_main_t * lm, u32 adj_index, u32 delete_multipath_adjacency)
+{
+ ip_adjacency_t * adj;
+ uword handle;
+
+ ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 1);
+
+ adj = ip_get_adjacency (lm, adj_index);
+ handle = adj->heap_handle;
+
+ if (delete_multipath_adjacency)
+ ip_multipath_del_adjacency (lm, adj_index);
+
+ ip_poison_adjacencies (adj, adj->n_adj);
+
+ heap_dealloc (lm->adjacency_heap, handle);
+}
+
+void ip_del_adjacency (ip_lookup_main_t * lm, u32 adj_index)
+{ ip_del_adjacency2 (lm, adj_index, /* delete_multipath_adjacency */ 1); }
+
+static int
+next_hop_sort_by_weight (ip_multipath_next_hop_t * n1,
+ ip_multipath_next_hop_t * n2)
+{
+ int cmp = (int) n1->weight - (int) n2->weight;
+ return (cmp == 0
+ ? (int) n1->next_hop_adj_index - (int) n2->next_hop_adj_index
+ : (cmp > 0 ? +1 : -1));
+}
+
+/* Given next hop vector is over-written with normalized one with sorted weights and
+ with weights corresponding to the number of adjacencies for each next hop.
+ Returns number of adjacencies in block. */
+static u32 ip_multipath_normalize_next_hops (ip_lookup_main_t * lm,
+ ip_multipath_next_hop_t * raw_next_hops,
+ ip_multipath_next_hop_t ** normalized_next_hops)
+{
+ ip_multipath_next_hop_t * nhs;
+ uword n_nhs, n_adj, n_adj_left, i;
+ f64 sum_weight, norm, error;
+
+ n_nhs = vec_len (raw_next_hops);
+ ASSERT (n_nhs > 0);
+ if (n_nhs == 0)
+ return 0;
+
+ /* Allocate enough space for 2 copies; we'll use second copy to save original weights. */
+ nhs = *normalized_next_hops;
+ vec_validate (nhs, 2*n_nhs - 1);
+
+ /* Fast path: 1 next hop in block. */
+ n_adj = n_nhs;
+ if (n_nhs == 1)
+ {
+ nhs[0] = raw_next_hops[0];
+ nhs[0].weight = 1;
+ _vec_len (nhs) = 1;
+ goto done;
+ }
+
+ else if (n_nhs == 2)
+ {
+ int cmp = next_hop_sort_by_weight (&raw_next_hops[0], &raw_next_hops[1]) < 0;
+
+ /* Fast sort. */
+ nhs[0] = raw_next_hops[cmp];
+ nhs[1] = raw_next_hops[cmp ^ 1];
+
+ /* Fast path: equal cost multipath with 2 next hops. */
+ if (nhs[0].weight == nhs[1].weight)
+ {
+ nhs[0].weight = nhs[1].weight = 1;
+ _vec_len (nhs) = 2;
+ goto done;
+ }
+ }
+ else
+ {
+ memcpy (nhs, raw_next_hops, n_nhs * sizeof (raw_next_hops[0]));
+ qsort (nhs, n_nhs, sizeof (nhs[0]), (void *) next_hop_sort_by_weight);
+ }
+
+ /* Find total weight to normalize weights. */
+ sum_weight = 0;
+ for (i = 0; i < n_nhs; i++)
+ sum_weight += nhs[i].weight;
+
+ /* In the unlikely case that all weights are given as 0, set them all to 1. */
+ if (sum_weight == 0)
+ {
+ for (i = 0; i < n_nhs; i++)
+ nhs[i].weight = 1;
+ sum_weight = n_nhs;
+ }
+
+ /* Save copies of all next hop weights to avoid being overwritten in loop below. */
+ for (i = 0; i < n_nhs; i++)
+ nhs[n_nhs + i].weight = nhs[i].weight;
+
+ /* Try larger and larger power of 2 sized adjacency blocks until we
+ find one where traffic flows to within 1% of specified weights. */
+ for (n_adj = max_pow2 (n_nhs); ; n_adj *= 2)
+ {
+ error = 0;
+
+ norm = n_adj / sum_weight;
+ n_adj_left = n_adj;
+ for (i = 0; i < n_nhs; i++)
+ {
+ f64 nf = nhs[n_nhs + i].weight * norm; /* use saved weights */
+ word n = flt_round_nearest (nf);
+
+ n = n > n_adj_left ? n_adj_left : n;
+ n_adj_left -= n;
+ error += fabs (nf - n);
+ nhs[i].weight = n;
+ }
+
+ nhs[0].weight += n_adj_left;
+
+ /* Less than 5% average error per adjacency with this size adjacency block? */
+ if (error <= lm->multipath_next_hop_error_tolerance*n_adj)
+ {
+ /* Truncate any next hops with zero weight. */
+ _vec_len (nhs) = i;
+ break;
+ }
+ }
+
+ done:
+ /* Save vector for next call. */
+ *normalized_next_hops = nhs;
+ return n_adj;
+}
+
+always_inline uword
+ip_next_hop_hash_key_from_handle (uword handle)
+{ return 1 + 2*handle; }
+
+always_inline uword
+ip_next_hop_hash_key_is_heap_handle (uword k)
+{ return k & 1; }
+
+always_inline uword
+ip_next_hop_hash_key_get_heap_handle (uword k)
+{
+ ASSERT (ip_next_hop_hash_key_is_heap_handle (k));
+ return k / 2;
+}
+
+static u32
+ip_multipath_adjacency_get (ip_lookup_main_t * lm,
+ ip_multipath_next_hop_t * raw_next_hops,
+ uword create_if_non_existent)
+{
+ uword * p;
+ u32 i, j, n_adj, adj_index, adj_heap_handle;
+ ip_adjacency_t * adj, * copy_adj;
+ ip_multipath_next_hop_t * nh, * nhs;
+ ip_multipath_adjacency_t * madj;
+
+ n_adj = ip_multipath_normalize_next_hops (lm, raw_next_hops, &lm->next_hop_hash_lookup_key_normalized);
+ nhs = lm->next_hop_hash_lookup_key_normalized;
+
+ /* Basic sanity. */
+ ASSERT (n_adj >= vec_len (raw_next_hops));
+
+ /* Use normalized next hops to see if we've seen a block equivalent to this one before. */
+ p = hash_get_mem (lm->multipath_adjacency_by_next_hops, nhs);
+ if (p)
+ return p[0];
+
+ if (! create_if_non_existent)
+ return 0;
+
+ adj = ip_add_adjacency (lm, /* copy_adj */ 0, n_adj, &adj_index);
+ adj_heap_handle = adj[0].heap_handle;
+
+ /* Fill in adjacencies in block based on corresponding next hop adjacencies. */
+ i = 0;
+ vec_foreach (nh, nhs)
+ {
+ copy_adj = ip_get_adjacency (lm, nh->next_hop_adj_index);
+ for (j = 0; j < nh->weight; j++)
+ {
+ adj[i] = copy_adj[0];
+ adj[i].heap_handle = adj_heap_handle;
+ adj[i].n_adj = n_adj;
+ i++;
+ }
+ }
+
+ /* All adjacencies should have been initialized. */
+ ASSERT (i == n_adj);
+
+ vec_validate (lm->multipath_adjacencies, adj_heap_handle);
+ madj = vec_elt_at_index (lm->multipath_adjacencies, adj_heap_handle);
+
+ madj->adj_index = adj_index;
+ madj->n_adj_in_block = n_adj;
+ madj->reference_count = 0; /* caller will set to one. */
+
+ madj->normalized_next_hops.count = vec_len (nhs);
+ madj->normalized_next_hops.heap_offset
+ = heap_alloc (lm->next_hop_heap, vec_len (nhs),
+ madj->normalized_next_hops.heap_handle);
+ memcpy (lm->next_hop_heap + madj->normalized_next_hops.heap_offset,
+ nhs, vec_bytes (nhs));
+
+ hash_set (lm->multipath_adjacency_by_next_hops,
+ ip_next_hop_hash_key_from_handle (madj->normalized_next_hops.heap_handle),
+ madj - lm->multipath_adjacencies);
+
+ madj->unnormalized_next_hops.count = vec_len (raw_next_hops);
+ madj->unnormalized_next_hops.heap_offset
+ = heap_alloc (lm->next_hop_heap, vec_len (raw_next_hops),
+ madj->unnormalized_next_hops.heap_handle);
+ memcpy (lm->next_hop_heap + madj->unnormalized_next_hops.heap_offset,
+ raw_next_hops, vec_bytes (raw_next_hops));
+
+ ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 0);
+
+ return adj_heap_handle;
+}
+
+/* Returns 0 for next hop not found. */
+u32
+ip_multipath_adjacency_add_del_next_hop (ip_lookup_main_t * lm,
+ u32 is_del,
+ u32 old_mp_adj_index,
+ u32 next_hop_adj_index,
+ u32 next_hop_weight,
+ u32 * new_mp_adj_index)
+{
+ ip_multipath_adjacency_t * mp_old, * mp_new;
+ ip_multipath_next_hop_t * nh, * nhs, * hash_nhs;
+ u32 n_nhs, i_nh;
+
+ mp_new = mp_old = 0;
+ n_nhs = 0;
+ i_nh = 0;
+ nhs = 0;
+
+ /* If old multipath adjacency is valid, find requested next hop. */
+ if (old_mp_adj_index < vec_len (lm->multipath_adjacencies)
+ && lm->multipath_adjacencies[old_mp_adj_index].normalized_next_hops.count > 0)
+ {
+ mp_old = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index);
+
+ nhs = vec_elt_at_index (lm->next_hop_heap, mp_old->unnormalized_next_hops.heap_offset);
+ n_nhs = mp_old->unnormalized_next_hops.count;
+
+ /* Linear search: ok since n_next_hops is small. */
+ for (i_nh = 0; i_nh < n_nhs; i_nh++)
+ if (nhs[i_nh].next_hop_adj_index == next_hop_adj_index)
+ break;
+
+ /* Given next hop not found. */
+ if (i_nh >= n_nhs && is_del)
+ return 0;
+ }
+
+ hash_nhs = lm->next_hop_hash_lookup_key;
+ if (hash_nhs)
+ _vec_len (hash_nhs) = 0;
+
+ if (is_del)
+ {
+ if (n_nhs > 1)
+ {
+ /* Prepare lookup key for multipath with target next hop deleted. */
+ if (i_nh > 0)
+ vec_add (hash_nhs, nhs + 0, i_nh);
+ if (i_nh + 1 < n_nhs)
+ vec_add (hash_nhs, nhs + i_nh + 1, n_nhs - (i_nh + 1));
+ }
+ }
+ else /* it's an add. */
+ {
+ /* If next hop is already there with the same weight, we have nothing to do. */
+ if (i_nh < n_nhs && nhs[i_nh].weight == next_hop_weight)
+ {
+ new_mp_adj_index[0] = ~0;
+ goto done;
+ }
+
+ /* Copy old next hops to lookup key vector. */
+ if (n_nhs > 0)
+ vec_add (hash_nhs, nhs, n_nhs);
+
+ if (i_nh < n_nhs)
+ {
+ /* Change weight of existing next hop. */
+ nh = vec_elt_at_index (hash_nhs, i_nh);
+ }
+ else
+ {
+ /* Add a new next hop. */
+ vec_add2 (hash_nhs, nh, 1);
+ nh->next_hop_adj_index = next_hop_adj_index;
+ }
+
+ /* Set weight for added or old next hop. */
+ nh->weight = next_hop_weight;
+ }
+
+ if (vec_len (hash_nhs) > 0)
+ {
+ u32 tmp = ip_multipath_adjacency_get (lm, hash_nhs,
+ /* create_if_non_existent */ 1);
+ if (tmp != ~0)
+ mp_new = vec_elt_at_index (lm->multipath_adjacencies, tmp);
+
+ /* Fetch again since pool may have moved. */
+ if (mp_old)
+ mp_old = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index);
+ }
+
+ new_mp_adj_index[0] = mp_new ? mp_new - lm->multipath_adjacencies : ~0;
+
+ if (mp_new != mp_old)
+ {
+ if (mp_old)
+ {
+ ASSERT (mp_old->reference_count > 0);
+ mp_old->reference_count -= 1;
+ }
+ if (mp_new)
+ mp_new->reference_count += 1;
+ }
+
+ if (mp_old && mp_old->reference_count == 0)
+ ip_multipath_adjacency_free (lm, mp_old);
+
+ done:
+ /* Save key vector next call. */
+ lm->next_hop_hash_lookup_key = hash_nhs;
+
+ return 1;
+}
+
+static void
+ip_multipath_del_adjacency (ip_lookup_main_t * lm, u32 del_adj_index)
+{
+ ip_adjacency_t * adj = ip_get_adjacency (lm, del_adj_index);
+ ip_multipath_adjacency_t * madj, * new_madj;
+ ip_multipath_next_hop_t * nhs, * hash_nhs;
+ u32 i, n_nhs, madj_index, new_madj_index;
+
+ if (adj->heap_handle >= vec_len (lm->multipath_adjacencies))
+ return;
+
+ vec_validate (lm->adjacency_remap_table, vec_len (lm->adjacency_heap) - 1);
+
+ for (madj_index = 0; madj_index < vec_len (lm->multipath_adjacencies); madj_index++)
+ {
+ madj = vec_elt_at_index (lm->multipath_adjacencies, madj_index);
+ if (madj->n_adj_in_block == 0)
+ continue;
+
+ nhs = heap_elt_at_index (lm->next_hop_heap, madj->unnormalized_next_hops.heap_offset);
+ n_nhs = madj->unnormalized_next_hops.count;
+ for (i = 0; i < n_nhs; i++)
+ if (nhs[i].next_hop_adj_index == del_adj_index)
+ break;
+
+ /* del_adj_index not found in unnormalized_next_hops? We're done. */
+ if (i >= n_nhs)
+ continue;
+
+ new_madj = 0;
+ if (n_nhs > 1)
+ {
+ hash_nhs = lm->next_hop_hash_lookup_key;
+ if (hash_nhs)
+ _vec_len (hash_nhs) = 0;
+ if (i > 0)
+ vec_add (hash_nhs, nhs + 0, i);
+ if (i + 1 < n_nhs)
+ vec_add (hash_nhs, nhs + i + 1, n_nhs - (i + 1));
+
+ new_madj_index = ip_multipath_adjacency_get (lm, hash_nhs, /* create_if_non_existent */ 1);
+
+ lm->next_hop_hash_lookup_key = hash_nhs;
+
+ if (new_madj_index == madj_index)
+ continue;
+
+ new_madj = vec_elt_at_index (lm->multipath_adjacencies, new_madj_index);
+ }
+
+ lm->adjacency_remap_table[madj->adj_index] = new_madj ? 1 + new_madj->adj_index : ~0;
+ lm->n_adjacency_remaps += 1;
+ ip_multipath_adjacency_free (lm, madj);
+ }
+}
+
+void
+ip_multipath_adjacency_free (ip_lookup_main_t * lm,
+ ip_multipath_adjacency_t * a)
+{
+ hash_unset (lm->multipath_adjacency_by_next_hops,
+ ip_next_hop_hash_key_from_handle (a->normalized_next_hops.heap_handle));
+ heap_dealloc (lm->next_hop_heap, a->normalized_next_hops.heap_handle);
+ heap_dealloc (lm->next_hop_heap, a->unnormalized_next_hops.heap_handle);
+
+ ip_del_adjacency2 (lm, a->adj_index, a->reference_count == 0);
+ memset (a, 0, sizeof (a[0]));
+}
+
+always_inline ip_multipath_next_hop_t *
+ip_next_hop_hash_key_get_next_hops (ip_lookup_main_t * lm, uword k,
+ uword * n_next_hops)
+{
+ ip_multipath_next_hop_t * nhs;
+ uword n_nhs;
+ if (ip_next_hop_hash_key_is_heap_handle (k))
+ {
+ uword handle = ip_next_hop_hash_key_get_heap_handle (k);
+ nhs = heap_elt_with_handle (lm->next_hop_heap, handle);
+ n_nhs = heap_len (lm->next_hop_heap, handle);
+ }
+ else
+ {
+ nhs = uword_to_pointer (k, ip_multipath_next_hop_t *);
+ n_nhs = vec_len (nhs);
+ }
+ *n_next_hops = n_nhs;
+ return nhs;
+}
+
+static uword
+ip_next_hop_hash_key_sum (hash_t * h, uword key0)
+{
+ ip_lookup_main_t * lm = uword_to_pointer (h->user, ip_lookup_main_t *);
+ ip_multipath_next_hop_t * k0;
+ uword n0;
+
+ k0 = ip_next_hop_hash_key_get_next_hops (lm, key0, &n0);
+ return hash_memory (k0, n0 * sizeof (k0[0]), /* seed */ n0);
+}
+
+static uword
+ip_next_hop_hash_key_equal (hash_t * h, uword key0, uword key1)
+{
+ ip_lookup_main_t * lm = uword_to_pointer (h->user, ip_lookup_main_t *);
+ ip_multipath_next_hop_t * k0, * k1;
+ uword n0, n1;
+
+ k0 = ip_next_hop_hash_key_get_next_hops (lm, key0, &n0);
+ k1 = ip_next_hop_hash_key_get_next_hops (lm, key1, &n1);
+
+ return n0 == n1 && ! memcmp (k0, k1, n0 * sizeof (k0[0]));
+}
+
+clib_error_t *
+ip_interface_address_add_del (ip_lookup_main_t * lm,
+ u32 sw_if_index,
+ void * addr_fib,
+ u32 address_length,
+ u32 is_del,
+ u32 * result_if_address_index)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ ip_interface_address_t * a, * prev, * next;
+ uword * p = mhash_get (&lm->address_to_if_address_index, addr_fib);
+
+ vec_validate_init_empty (lm->if_address_pool_index_by_sw_if_index, sw_if_index, ~0);
+ a = p ? pool_elt_at_index (lm->if_address_pool, p[0]) : 0;
+
+ /* Verify given length. */
+ if ((a && (address_length != a->address_length)) || (address_length == 0))
+ {
+ vnm->api_errno = VNET_API_ERROR_ADDRESS_LENGTH_MISMATCH;
+ return clib_error_create
+ ( "%U wrong length (expected %d) for interface %U",
+ lm->format_address_and_length, addr_fib,
+ address_length, a? a->address_length : -1,
+ format_vnet_sw_if_index_name, vnm, sw_if_index);
+ }
+
+ if (is_del)
+ {
+ if (!a)
+ {
+ vnet_sw_interface_t * si = vnet_get_sw_interface (vnm, sw_if_index);
+ vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_FOUND_FOR_INTERFACE;
+ return clib_error_create ("%U not found for interface %U",
+ lm->format_address_and_length,
+ addr_fib, address_length,
+ format_vnet_sw_interface_name, vnm, si);
+ }
+
+ if (a->prev_this_sw_interface != ~0)
+ {
+ prev = pool_elt_at_index (lm->if_address_pool, a->prev_this_sw_interface);
+ prev->next_this_sw_interface = a->next_this_sw_interface;
+ }
+ if (a->next_this_sw_interface != ~0)
+ {
+ next = pool_elt_at_index (lm->if_address_pool, a->next_this_sw_interface);
+ next->prev_this_sw_interface = a->prev_this_sw_interface;
+
+ if(a->prev_this_sw_interface == ~0)
+ lm->if_address_pool_index_by_sw_if_index[sw_if_index] = a->next_this_sw_interface;
+ }
+
+ if ((a->next_this_sw_interface == ~0) && (a->prev_this_sw_interface == ~0))
+ lm->if_address_pool_index_by_sw_if_index[sw_if_index] = ~0;
+
+ mhash_unset (&lm->address_to_if_address_index, addr_fib,
+ /* old_value */ 0);
+ pool_put (lm->if_address_pool, a);
+
+ if (result_if_address_index)
+ *result_if_address_index = ~0;
+ }
+
+ else if (! a)
+ {
+ u32 pi; /* previous index */
+ u32 ai;
+ u32 hi; /* head index */
+
+ pool_get (lm->if_address_pool, a);
+ memset (a, ~0, sizeof (a[0]));
+ ai = a - lm->if_address_pool;
+
+ hi = pi = lm->if_address_pool_index_by_sw_if_index[sw_if_index];
+ prev = 0;
+ while (pi != (u32)~0)
+ {
+ prev = pool_elt_at_index(lm->if_address_pool, pi);
+ pi = prev->next_this_sw_interface;
+ }
+ pi = prev ? prev - lm->if_address_pool : (u32)~0;
+
+ a->address_key = mhash_set (&lm->address_to_if_address_index,
+ addr_fib, ai, /* old_value */ 0);
+ a->address_length = address_length;
+ a->sw_if_index = sw_if_index;
+ a->flags = 0;
+ a->prev_this_sw_interface = pi;
+ a->next_this_sw_interface = ~0;
+ if (prev)
+ prev->next_this_sw_interface = ai;
+
+ lm->if_address_pool_index_by_sw_if_index[sw_if_index] =
+ (hi != ~0) ? hi : ai;
+ if (result_if_address_index)
+ *result_if_address_index = ai;
+ }
+ else
+ {
+ if (result_if_address_index)
+ *result_if_address_index = a - lm->if_address_pool;
+ }
+
+
+ return /* no error */ 0;
+}
+
+void serialize_vec_ip_adjacency (serialize_main_t * m, va_list * va)
+{
+ ip_adjacency_t * a = va_arg (*va, ip_adjacency_t *);
+ u32 n = va_arg (*va, u32);
+ u32 i;
+ for (i = 0; i < n; i++)
+ {
+ serialize_integer (m, a[i].heap_handle, sizeof (a[i].heap_handle));
+ serialize_integer (m, a[i].n_adj, sizeof (a[i].n_adj));
+ serialize_integer (m, a[i].lookup_next_index, sizeof (a[i].lookup_next_index_as_int));
+ switch (a[i].lookup_next_index)
+ {
+ case IP_LOOKUP_NEXT_LOCAL:
+ serialize_integer (m, a[i].if_address_index, sizeof (a[i].if_address_index));
+ break;
+
+ case IP_LOOKUP_NEXT_ARP:
+ serialize_integer (m, a[i].if_address_index, sizeof (a[i].if_address_index));
+ serialize_integer (m, a[i].rewrite_header.sw_if_index, sizeof (a[i].rewrite_header.sw_if_index));
+ break;
+
+ case IP_LOOKUP_NEXT_REWRITE:
+ serialize (m, serialize_vnet_rewrite, &a[i].rewrite_header, sizeof (a[i].rewrite_data));
+ break;
+
+ default:
+ /* nothing else to serialize. */
+ break;
+ }
+ }
+}
+
+void unserialize_vec_ip_adjacency (serialize_main_t * m, va_list * va)
+{
+ ip_adjacency_t * a = va_arg (*va, ip_adjacency_t *);
+ u32 n = va_arg (*va, u32);
+ u32 i;
+ ip_poison_adjacencies (a, n);
+ for (i = 0; i < n; i++)
+ {
+ unserialize_integer (m, &a[i].heap_handle, sizeof (a[i].heap_handle));
+ unserialize_integer (m, &a[i].n_adj, sizeof (a[i].n_adj));
+ unserialize_integer (m, &a[i].lookup_next_index_as_int, sizeof (a[i].lookup_next_index_as_int));
+ switch (a[i].lookup_next_index)
+ {
+ case IP_LOOKUP_NEXT_LOCAL:
+ unserialize_integer (m, &a[i].if_address_index, sizeof (a[i].if_address_index));
+ break;
+
+ case IP_LOOKUP_NEXT_ARP:
+ unserialize_integer (m, &a[i].if_address_index, sizeof (a[i].if_address_index));
+ unserialize_integer (m, &a[i].rewrite_header.sw_if_index, sizeof (a[i].rewrite_header.sw_if_index));
+ break;
+
+ case IP_LOOKUP_NEXT_REWRITE:
+ unserialize (m, unserialize_vnet_rewrite, &a[i].rewrite_header, sizeof (a[i].rewrite_data));
+ break;
+
+ default:
+ /* nothing else to unserialize. */
+ break;
+ }
+ }
+}
+
+static void serialize_vec_ip_multipath_next_hop (serialize_main_t * m, va_list * va)
+{
+ ip_multipath_next_hop_t * nh = va_arg (*va, ip_multipath_next_hop_t *);
+ u32 n = va_arg (*va, u32);
+ u32 i;
+ for (i = 0; i < n; i++)
+ {
+ serialize_integer (m, nh[i].next_hop_adj_index, sizeof (nh[i].next_hop_adj_index));
+ serialize_integer (m, nh[i].weight, sizeof (nh[i].weight));
+ }
+}
+
+static void unserialize_vec_ip_multipath_next_hop (serialize_main_t * m, va_list * va)
+{
+ ip_multipath_next_hop_t * nh = va_arg (*va, ip_multipath_next_hop_t *);
+ u32 n = va_arg (*va, u32);
+ u32 i;
+ for (i = 0; i < n; i++)
+ {
+ unserialize_integer (m, &nh[i].next_hop_adj_index, sizeof (nh[i].next_hop_adj_index));
+ unserialize_integer (m, &nh[i].weight, sizeof (nh[i].weight));
+ }
+}
+
+static void serialize_vec_ip_multipath_adjacency (serialize_main_t * m, va_list * va)
+{
+ ip_multipath_adjacency_t * a = va_arg (*va, ip_multipath_adjacency_t *);
+ u32 n = va_arg (*va, u32);
+ u32 i;
+ for (i = 0; i < n; i++)
+ {
+#define foreach_ip_multipath_adjacency_field \
+ _ (adj_index) _ (n_adj_in_block) _ (reference_count) \
+ _ (normalized_next_hops.count) \
+ _ (normalized_next_hops.heap_offset) \
+ _ (normalized_next_hops.heap_handle) \
+ _ (unnormalized_next_hops.count) \
+ _ (unnormalized_next_hops.heap_offset) \
+ _ (unnormalized_next_hops.heap_handle)
+
+#define _(f) serialize_integer (m, a[i].f, sizeof (a[i].f));
+ foreach_ip_multipath_adjacency_field;
+#undef _
+ }
+}
+
+static void unserialize_vec_ip_multipath_adjacency (serialize_main_t * m, va_list * va)
+{
+ ip_multipath_adjacency_t * a = va_arg (*va, ip_multipath_adjacency_t *);
+ u32 n = va_arg (*va, u32);
+ u32 i;
+ for (i = 0; i < n; i++)
+ {
+#define _(f) unserialize_integer (m, &a[i].f, sizeof (a[i].f));
+ foreach_ip_multipath_adjacency_field;
+#undef _
+ }
+}
+
+void serialize_ip_lookup_main (serialize_main_t * m, va_list * va)
+{
+ ip_lookup_main_t * lm = va_arg (*va, ip_lookup_main_t *);
+
+ /* If this isn't true you need to call e.g. ip4_maybe_remap_adjacencies
+ to make it true. */
+ ASSERT (lm->n_adjacency_remaps == 0);
+
+ serialize (m, serialize_heap, lm->adjacency_heap, serialize_vec_ip_adjacency);
+
+ serialize (m, serialize_heap, lm->next_hop_heap, serialize_vec_ip_multipath_next_hop);
+ vec_serialize (m, lm->multipath_adjacencies, serialize_vec_ip_multipath_adjacency);
+
+ /* Adjacency counters (FIXME disabled for now). */
+ if (0)
+ serialize (m, serialize_vlib_combined_counter_main, &lm->adjacency_counters, /* incremental */ 0);
+}
+
+void unserialize_ip_lookup_main (serialize_main_t * m, va_list * va)
+{
+ ip_lookup_main_t * lm = va_arg (*va, ip_lookup_main_t *);
+
+ unserialize (m, unserialize_heap, &lm->adjacency_heap, unserialize_vec_ip_adjacency);
+ unserialize (m, unserialize_heap, &lm->next_hop_heap, unserialize_vec_ip_multipath_next_hop);
+ vec_unserialize (m, &lm->multipath_adjacencies, unserialize_vec_ip_multipath_adjacency);
+
+ /* Build hash table from unserialized data. */
+ {
+ ip_multipath_adjacency_t * a;
+
+ vec_foreach (a, lm->multipath_adjacencies)
+ {
+ if (a->n_adj_in_block > 0 && a->reference_count > 0)
+ hash_set (lm->multipath_adjacency_by_next_hops,
+ ip_next_hop_hash_key_from_handle (a->normalized_next_hops.heap_handle),
+ a - lm->multipath_adjacencies);
+ }
+ }
+
+ /* Validate adjacency counters. */
+ vlib_validate_combined_counter (&lm->adjacency_counters,
+ vec_len (lm->adjacency_heap) - 1);
+
+ /* Adjacency counters (FIXME disabled for now). */
+ if (0)
+ unserialize (m, unserialize_vlib_combined_counter_main, &lm->adjacency_counters, /* incremental */ 0);
+}
+
+void ip_lookup_init (ip_lookup_main_t * lm, u32 is_ip6)
+{
+ ip_adjacency_t * adj;
+
+ /* Hand-craft special miss adjacency to use when nothing matches in the
+ routing table. Same for drop adjacency. */
+ adj = ip_add_adjacency (lm, /* template */ 0, /* n-adj */ 1, &lm->miss_adj_index);
+ adj->lookup_next_index = IP_LOOKUP_NEXT_MISS;
+ ASSERT (lm->miss_adj_index == IP_LOOKUP_MISS_ADJ_INDEX);
+
+ adj = ip_add_adjacency (lm, /* template */ 0, /* n-adj */ 1, &lm->drop_adj_index);
+ adj->lookup_next_index = IP_LOOKUP_NEXT_DROP;
+
+ adj = ip_add_adjacency (lm, /* template */ 0, /* n-adj */ 1, &lm->local_adj_index);
+ adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
+ adj->if_address_index = ~0;
+
+ if (! lm->fib_result_n_bytes)
+ lm->fib_result_n_bytes = sizeof (uword);
+
+ lm->multipath_adjacency_by_next_hops
+ = hash_create2 (/* elts */ 0,
+ /* user */ pointer_to_uword (lm),
+ /* value_bytes */ sizeof (uword),
+ ip_next_hop_hash_key_sum,
+ ip_next_hop_hash_key_equal,
+ /* format pair/arg */
+ 0, 0);
+
+ /* 1% max error tolerance for multipath. */
+ lm->multipath_next_hop_error_tolerance = .01;
+
+ lm->is_ip6 = is_ip6;
+ if (is_ip6)
+ {
+ lm->format_address_and_length = format_ip6_address_and_length;
+ mhash_init (&lm->address_to_if_address_index, sizeof (uword),
+ sizeof (ip6_address_fib_t));
+ }
+ else
+ {
+ lm->format_address_and_length = format_ip4_address_and_length;
+ mhash_init (&lm->address_to_if_address_index, sizeof (uword),
+ sizeof (ip4_address_fib_t));
+ }
+
+ {
+ int i;
+
+ /* Setup all IP protocols to be punted and builtin-unknown. */
+ for (i = 0; i < 256; i++)
+ {
+ lm->local_next_by_ip_protocol[i] = IP_LOCAL_NEXT_PUNT;
+ lm->builtin_protocol_by_ip_protocol[i] = IP_BUILTIN_PROTOCOL_UNKNOWN;
+ }
+#if 0
+ /* Eliot's TCP doesn't actually work */
+ lm->local_next_by_ip_protocol[IP_PROTOCOL_TCP] = IP_LOCAL_NEXT_TCP_LOOKUP;
+ lm->builtin_protocol_by_ip_protocol[IP_PROTOCOL_TCP] =
+ IP_BUILTIN_PROTOCOL_TCP;
+#endif
+
+ lm->local_next_by_ip_protocol[IP_PROTOCOL_UDP] = IP_LOCAL_NEXT_UDP_LOOKUP;
+ lm->local_next_by_ip_protocol[is_ip6 ? IP_PROTOCOL_ICMP6 : IP_PROTOCOL_ICMP] = IP_LOCAL_NEXT_ICMP;
+ lm->builtin_protocol_by_ip_protocol[IP_PROTOCOL_UDP] = IP_BUILTIN_PROTOCOL_UDP;
+ lm->builtin_protocol_by_ip_protocol[is_ip6 ? IP_PROTOCOL_ICMP6 : IP_PROTOCOL_ICMP] = IP_BUILTIN_PROTOCOL_ICMP;
+ }
+}
+
+u8 * format_ip_flow_hash_config (u8 * s, va_list * args)
+{
+ u32 flow_hash_config = va_arg (*args, u32);
+
+#define _(n,v) if (flow_hash_config & v) s = format (s, "%s ", #n);
+ foreach_flow_hash_bit;
+#undef _
+
+ return s;
+}
+
+u8 * format_ip_lookup_next (u8 * s, va_list * args)
+{
+ ip_lookup_next_t n = va_arg (*args, ip_lookup_next_t);
+ char * t = 0;
+
+ switch (n)
+ {
+ default:
+ s = format (s, "unknown %d", n);
+ return s;
+
+ case IP_LOOKUP_NEXT_MISS: t = "miss"; break;
+ case IP_LOOKUP_NEXT_DROP: t = "drop"; break;
+ case IP_LOOKUP_NEXT_PUNT: t = "punt"; break;
+ case IP_LOOKUP_NEXT_LOCAL: t = "local"; break;
+ case IP_LOOKUP_NEXT_ARP: t = "arp"; break;
+ case IP_LOOKUP_NEXT_CLASSIFY: t = "classify"; break;
+ case IP_LOOKUP_NEXT_MAP: t = "map"; break;
+ case IP_LOOKUP_NEXT_MAP_T: t = "map-t"; break;
+ case IP_LOOKUP_NEXT_SIXRD: t = "sixrd"; break;
+ case IP_LOOKUP_NEXT_REWRITE:
+ break;
+ }
+
+ if (t)
+ vec_add (s, t, strlen (t));
+
+ return s;
+}
+
+static u8 * format_ip_interface_address (u8 * s, va_list * args)
+{
+ ip_lookup_main_t * lm = va_arg (*args, ip_lookup_main_t *);
+ u32 if_address_index = va_arg (*args, u32);
+ ip_interface_address_t * ia = pool_elt_at_index (lm->if_address_pool, if_address_index);
+ void * a = ip_interface_address_get_address (lm, ia);
+
+ if (lm->is_ip6)
+ return format (s, "%U", format_ip6_address_and_length, a, ia->address_length);
+ else
+ return format (s, "%U", format_ip4_address_and_length, a, ia->address_length);
+}
+
+u8 * format_ip_adjacency (u8 * s, va_list * args)
+{
+ vnet_main_t * vnm = va_arg (*args, vnet_main_t *);
+ ip_lookup_main_t * lm = va_arg (*args, ip_lookup_main_t *);
+ u32 adj_index = va_arg (*args, u32);
+ ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index);
+
+ switch (adj->lookup_next_index)
+ {
+ case IP_LOOKUP_NEXT_REWRITE:
+ s = format (s, "%U",
+ format_vnet_rewrite,
+ vnm->vlib_main, &adj->rewrite_header, sizeof (adj->rewrite_data));
+ break;
+
+ default:
+ s = format (s, "%U", format_ip_lookup_next, adj->lookup_next_index);
+ if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP)
+ s = format (s, " %U",
+ format_vnet_sw_interface_name,
+ vnm,
+ vnet_get_sw_interface (vnm, adj->rewrite_header.sw_if_index));
+ switch (adj->lookup_next_index)
+ {
+ case IP_LOOKUP_NEXT_ARP:
+ case IP_LOOKUP_NEXT_LOCAL:
+ if (adj->if_address_index != ~0)
+ s = format (s, " %U", format_ip_interface_address, lm, adj->if_address_index);
+ break;
+
+ case IP_LOOKUP_NEXT_CLASSIFY:
+ s = format (s, " table %d", adj->classify_table_index);
+
+ default:
+ break;
+ }
+ break;
+ }
+ if (adj->explicit_fib_index != ~0 && adj->explicit_fib_index != 0)
+ s = format (s, " lookup fib index %d", adj->explicit_fib_index);
+
+ return s;
+}
+
+u8 * format_ip_adjacency_packet_data (u8 * s, va_list * args)
+{
+ vnet_main_t * vnm = va_arg (*args, vnet_main_t *);
+ ip_lookup_main_t * lm = va_arg (*args, ip_lookup_main_t *);
+ u32 adj_index = va_arg (*args, u32);
+ u8 * packet_data = va_arg (*args, u8 *);
+ u32 n_packet_data_bytes = va_arg (*args, u32);
+ ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index);
+
+ switch (adj->lookup_next_index)
+ {
+ case IP_LOOKUP_NEXT_REWRITE:
+ s = format (s, "%U",
+ format_vnet_rewrite_header,
+ vnm->vlib_main, &adj->rewrite_header, packet_data, n_packet_data_bytes);
+ break;
+
+ default:
+ break;
+ }
+
+ return s;
+}
+
+static uword unformat_ip_lookup_next (unformat_input_t * input, va_list * args)
+{
+ ip_lookup_next_t * result = va_arg (*args, ip_lookup_next_t *);
+ ip_lookup_next_t n;
+
+ if (unformat (input, "drop"))
+ n = IP_LOOKUP_NEXT_DROP;
+
+ else if (unformat (input, "punt"))
+ n = IP_LOOKUP_NEXT_PUNT;
+
+ else if (unformat (input, "local"))
+ n = IP_LOOKUP_NEXT_LOCAL;
+
+ else if (unformat (input, "arp"))
+ n = IP_LOOKUP_NEXT_ARP;
+
+ else if (unformat (input, "classify"))
+ n = IP_LOOKUP_NEXT_CLASSIFY;
+
+ else
+ return 0;
+
+ *result = n;
+ return 1;
+}
+
+static uword unformat_ip_adjacency (unformat_input_t * input, va_list * args)
+{
+ vlib_main_t * vm = va_arg (*args, vlib_main_t *);
+ ip_adjacency_t * adj = va_arg (*args, ip_adjacency_t *);
+ u32 node_index = va_arg (*args, u32);
+ vnet_main_t * vnm = vnet_get_main();
+ u32 sw_if_index, is_ip6;
+ ip46_address_t a46;
+ ip_lookup_next_t next;
+
+ is_ip6 = node_index == ip6_rewrite_node.index;
+ adj->rewrite_header.node_index = node_index;
+ adj->explicit_fib_index = ~0;
+
+ if (unformat (input, "arp %U %U",
+ unformat_vnet_sw_interface, vnm, &sw_if_index,
+ unformat_ip46_address, &a46, is_ip6))
+ {
+ ip_lookup_main_t * lm = is_ip6 ? &ip6_main.lookup_main : &ip4_main.lookup_main;
+ ip_adjacency_t * a_adj;
+ u32 adj_index;
+
+ if (is_ip6)
+ adj_index = ip6_fib_lookup (&ip6_main, sw_if_index, &a46.ip6);
+ else
+ adj_index = ip4_fib_lookup (&ip4_main, sw_if_index, &a46.ip4);
+
+ a_adj = ip_get_adjacency (lm, adj_index);
+
+ if (a_adj->rewrite_header.sw_if_index != sw_if_index)
+ return 0;
+
+ if (is_ip6)
+ ip6_adjacency_set_interface_route (vnm, adj, sw_if_index, a_adj->if_address_index);
+ else
+ ip4_adjacency_set_interface_route (vnm, adj, sw_if_index, a_adj->if_address_index);
+ }
+
+ else if (unformat_user (input, unformat_ip_lookup_next, &next))
+ {
+ adj->lookup_next_index = next;
+ adj->if_address_index = ~0;
+ if (next == IP_LOOKUP_NEXT_LOCAL)
+ (void) unformat (input, "%d", &adj->if_address_index);
+ else if (next == IP_LOOKUP_NEXT_CLASSIFY)
+ if (!unformat (input, "%d", &adj->classify_table_index))
+ {
+ clib_warning ("classify adj must specify table index");
+ return 0;
+ }
+ }
+
+ else if (unformat_user (input,
+ unformat_vnet_rewrite,
+ vm, &adj->rewrite_header, sizeof (adj->rewrite_data)))
+ adj->lookup_next_index = IP_LOOKUP_NEXT_REWRITE;
+
+ else
+ return 0;
+
+ return 1;
+}
+
+clib_error_t *
+vnet_ip_route_cmd (vlib_main_t * vm, unformat_input_t * main_input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ clib_error_t * error = 0;
+ u32 table_id, is_del;
+ u32 weight, * weights = 0;
+ u32 * table_ids = 0;
+ u32 sw_if_index, * sw_if_indices = 0;
+ ip4_address_t ip4_addr, * ip4_dst_addresses = 0, * ip4_via_next_hops = 0;
+ ip6_address_t ip6_addr, * ip6_dst_addresses = 0, * ip6_via_next_hops = 0;
+ u32 dst_address_length, * dst_address_lengths = 0;
+ ip_adjacency_t parse_adj, * add_adj = 0;
+ unformat_input_t _line_input, * line_input = &_line_input;
+ f64 count;
+ u32 outer_table_id;
+
+ is_del = 0;
+ table_id = 0;
+ count = 1;
+
+ /* Get a line of input. */
+ if (! unformat_user (main_input, unformat_line_input, line_input))
+ return 0;
+
+ memset(&parse_adj, 0, sizeof (parse_adj));
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "table %d", &table_id))
+ ;
+ else if (unformat (line_input, "del"))
+ is_del = 1;
+ else if (unformat (line_input, "add"))
+ is_del = 0;
+ else if (unformat (line_input, "count %f", &count))
+ ;
+
+ else if (unformat (line_input, "%U/%d",
+ unformat_ip4_address, &ip4_addr,
+ &dst_address_length))
+ {
+ vec_add1 (ip4_dst_addresses, ip4_addr);
+ vec_add1 (dst_address_lengths, dst_address_length);
+ }
+
+ else if (unformat (line_input, "%U/%d",
+ unformat_ip6_address, &ip6_addr,
+ &dst_address_length))
+ {
+ vec_add1 (ip6_dst_addresses, ip6_addr);
+ vec_add1 (dst_address_lengths, dst_address_length);
+ }
+
+ else if (unformat (line_input, "via %U %U weight %u",
+ unformat_ip4_address, &ip4_addr,
+ unformat_vnet_sw_interface, vnm, &sw_if_index,
+ &weight))
+ {
+ vec_add1 (ip4_via_next_hops, ip4_addr);
+ vec_add1 (sw_if_indices, sw_if_index);
+ vec_add1 (weights, weight);
+ vec_add1 (table_ids, (u32)~0);
+ }
+
+ else if (unformat (line_input, "via %U %U weight %u",
+ unformat_ip6_address, &ip6_addr,
+ unformat_vnet_sw_interface, vnm, &sw_if_index,
+ &weight))
+ {
+ vec_add1 (ip6_via_next_hops, ip6_addr);
+ vec_add1 (sw_if_indices, sw_if_index);
+ vec_add1 (weights, weight);
+ vec_add1 (table_ids, (u32)~0);
+ }
+
+ else if (unformat (line_input, "via %U %U",
+ unformat_ip4_address, &ip4_addr,
+ unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ vec_add1 (ip4_via_next_hops, ip4_addr);
+ vec_add1 (sw_if_indices, sw_if_index);
+ vec_add1 (weights, 1);
+ vec_add1 (table_ids, (u32)~0);
+ }
+
+ else if (unformat (line_input, "via %U %U",
+ unformat_ip6_address, &ip6_addr,
+ unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ vec_add1 (ip6_via_next_hops, ip6_addr);
+ vec_add1 (sw_if_indices, sw_if_index);
+ vec_add1 (weights, 1);
+ vec_add1 (table_ids, (u32)~0);
+ }
+ else if (unformat (line_input, "via %U",
+ unformat_ip4_address, &ip4_addr))
+ {
+ vec_add1 (ip4_via_next_hops, ip4_addr);
+ vec_add1 (sw_if_indices, (u32)~0);
+ vec_add1 (weights, 1);
+ vec_add1 (table_ids, table_id);
+ }
+ else if (unformat (line_input, "via %U",
+ unformat_ip6_address, &ip6_addr))
+ {
+ vec_add1 (ip6_via_next_hops, ip6_addr);
+ vec_add1 (sw_if_indices, (u32)~0);
+ vec_add1 (weights, 1);
+ vec_add1 (table_ids, (u32)table_id);
+ }
+
+ else if (vec_len (ip4_dst_addresses) > 0
+ && unformat (line_input, "via %U",
+ unformat_ip_adjacency, vm, &parse_adj, ip4_rewrite_node.index))
+ vec_add1 (add_adj, parse_adj);
+
+ else if (vec_len (ip6_dst_addresses) > 0
+ && unformat (line_input, "via %U",
+ unformat_ip_adjacency, vm, &parse_adj, ip6_rewrite_node.index))
+ vec_add1 (add_adj, parse_adj);
+ else if (unformat (line_input, "lookup in table %d", &outer_table_id))
+ {
+ uword * p;
+
+ if (vec_len (ip4_dst_addresses) > 0)
+ p = hash_get (ip4_main.fib_index_by_table_id, outer_table_id);
+ else
+ p = hash_get (ip6_main.fib_index_by_table_id, outer_table_id);
+
+ if (p == 0)
+ {
+ error = clib_error_return (0, "Nonexistent outer table id %d",
+ outer_table_id);
+ goto done;
+ }
+
+ parse_adj.lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
+ parse_adj.explicit_fib_index = p[0];
+ vec_add1 (add_adj, parse_adj);
+ }
+ else
+ {
+ error = unformat_parse_error (line_input);
+ goto done;
+ }
+ }
+
+ unformat_free (line_input);
+
+ if (vec_len (ip4_dst_addresses) + vec_len (ip6_dst_addresses) == 0)
+ {
+ error = clib_error_return (0, "expected ip4/ip6 destination address/length.");
+ goto done;
+ }
+
+ if (vec_len (ip4_dst_addresses) > 0 && vec_len (ip6_dst_addresses) > 0)
+ {
+ error = clib_error_return (0, "mixed ip4/ip6 address/length.");
+ goto done;
+ }
+
+ if (vec_len (ip4_dst_addresses) > 0 && vec_len (ip6_via_next_hops) > 0)
+ {
+ error = clib_error_return (0, "ip4 destinations with ip6 next hops.");
+ goto done;
+ }
+
+ if (vec_len (ip6_dst_addresses) > 0 && vec_len (ip4_via_next_hops) > 0)
+ {
+ error = clib_error_return (0, "ip6 destinations with ip4 next hops.");
+ goto done;
+ }
+
+ if (! is_del && vec_len (add_adj) + vec_len (weights) == 0)
+ {
+ error = clib_error_return (0, "no next hops or adjacencies to add.");
+ goto done;
+ }
+
+ if (vec_len(ip4_via_next_hops))
+ {
+ if (sw_if_indices[0] == (u32)~0)
+ {
+ u32 ai;
+ uword * p;
+ u32 fib_index;
+ ip_adjacency_t *nh_adj;
+
+ p = hash_get (ip4_main.fib_index_by_table_id, table_ids[0]);
+ if (p == 0)
+ {
+ error = clib_error_return (0, "Nonexistent FIB id %d",
+ table_ids[0]);
+ goto done;
+ }
+
+ fib_index = p[0];
+
+ ai = ip4_fib_lookup_with_table (&ip4_main,
+ fib_index,
+ ip4_via_next_hops,
+ 1 /* disable default route */);
+ if (ai == 0)
+ {
+ error = clib_error_return (0, "next hop %U not in FIB",
+ format_ip4_address,
+ ip4_via_next_hops);
+ goto done;
+ }
+ nh_adj = ip_get_adjacency (&ip4_main.lookup_main, ai);
+ vec_add1 (add_adj, nh_adj[0]);
+ }
+ }
+ if (vec_len(ip6_via_next_hops))
+ {
+ if (sw_if_indices[0] == (u32)~0)
+ {
+ u32 ai;
+ uword * p;
+ u32 fib_index;
+ ip_adjacency_t *nh_adj;
+
+ p = hash_get (ip6_main.fib_index_by_table_id, table_ids[0]);
+ if (p == 0)
+ {
+ error = clib_error_return (0, "Nonexistent FIB id %d",
+ table_ids[0]);
+ goto done;
+ }
+
+ fib_index = p[0];
+ ai = ip6_fib_lookup_with_table (&ip6_main,
+ fib_index,
+ ip6_via_next_hops);
+ if (ai == 0)
+ {
+ error = clib_error_return (0, "next hop %U not in FIB",
+ format_ip6_address,
+ ip6_via_next_hops);
+ goto done;
+ }
+ nh_adj = ip_get_adjacency (&ip6_main.lookup_main, ai);
+ vec_add1 (add_adj, nh_adj[0]);
+ }
+ }
+
+ {
+ int i;
+ ip4_main_t * im4 = &ip4_main;
+ ip6_main_t * im6 = &ip6_main;
+
+ for (i = 0; i < vec_len (ip4_dst_addresses); i++)
+ {
+ ip4_add_del_route_args_t a;
+
+ memset (&a, 0, sizeof (a));
+ a.flags = IP4_ROUTE_FLAG_TABLE_ID;
+ a.table_index_or_table_id = table_id;
+ a.dst_address = ip4_dst_addresses[i];
+ a.dst_address_length = dst_address_lengths[i];
+ a.adj_index = ~0;
+
+ if (is_del)
+ {
+ if (vec_len (ip4_via_next_hops) == 0)
+ {
+ uword * dst_hash, * dst_result;
+ u32 dst_address_u32;
+ ip4_fib_t * fib;
+
+ fib = find_ip4_fib_by_table_index_or_id (im4, table_id,
+ 0 /* by table id */);
+
+ a.flags |= IP4_ROUTE_FLAG_DEL;
+ dst_address_u32 = a.dst_address.as_u32
+ & im4->fib_masks[a.dst_address_length];
+
+ dst_hash =
+ fib->adj_index_by_dst_address[a.dst_address_length];
+ dst_result = hash_get (dst_hash, dst_address_u32);
+ if (dst_result)
+ a.adj_index = dst_result[0];
+ else
+ {
+ clib_warning ("%U/%d not in FIB",
+ format_ip4_address, &a.dst_address,
+ a.dst_address_length);
+ continue;
+ }
+
+ ip4_add_del_route (im4, &a);
+ ip4_maybe_remap_adjacencies (im4, table_id,
+ IP4_ROUTE_FLAG_TABLE_ID);
+ }
+ else
+ {
+ u32 i, j, n, f, incr;
+ ip4_address_t dst = a.dst_address;
+ f64 t[2];
+ n = count;
+ t[0] = vlib_time_now (vm);
+ incr = 1<<(32 - a.dst_address_length);
+ for (i = 0; i < n; i++)
+ {
+ f = i + 1 < n ? IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP : 0;
+ a.dst_address = dst;
+ for (j = 0; j < vec_len (ip4_via_next_hops); j++)
+ {
+ if (table_ids[j] != (u32)~0)
+ {
+ uword * p = hash_get (im4->fib_index_by_table_id,
+ table_ids[j]);
+ if (p == 0)
+ {
+ clib_warning ("no such FIB table %d",
+ table_ids[j]);
+ continue;
+ }
+ table_ids[j] = p[0];
+ }
+
+ ip4_add_del_route_next_hop (im4,
+ IP4_ROUTE_FLAG_DEL | f,
+ &a.dst_address,
+ a.dst_address_length,
+ &ip4_via_next_hops[j],
+ sw_if_indices[j],
+ weights[j], (u32)~0,
+ table_ids[j] /* fib index */);
+ }
+ dst.as_u32 = clib_host_to_net_u32 (incr + clib_net_to_host_u32 (dst.as_u32));
+ }
+ t[1] = vlib_time_now (vm);
+ if (count > 1)
+ vlib_cli_output (vm, "%.6e routes/sec", count / (t[1] - t[0]));
+ }
+ }
+ else
+ {
+ if (vec_len (add_adj) > 0)
+ {
+ a.flags |= IP4_ROUTE_FLAG_ADD;
+ a.add_adj = add_adj;
+ a.n_add_adj = vec_len (add_adj);
+
+ ip4_add_del_route (im4, &a);
+ }
+ else if (vec_len (ip4_via_next_hops) > 0)
+ {
+ u32 i, j, n, f, incr;
+ ip4_address_t dst = a.dst_address;
+ f64 t[2];
+ n = count;
+ t[0] = vlib_time_now (vm);
+ incr = 1<<(32 - a.dst_address_length);
+ for (i = 0; i < n; i++)
+ {
+ f = i + 1 < n ? IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP : 0;
+ a.dst_address = dst;
+ for (j = 0; j < vec_len (ip4_via_next_hops); j++)
+ {
+ if (table_ids[j] != (u32)~0)
+ {
+ uword * p = hash_get (im4->fib_index_by_table_id,
+ table_ids[j]);
+ if (p == 0)
+ {
+ clib_warning ("no such FIB table %d",
+ table_ids[j]);
+ continue;
+ }
+ table_ids[j] = p[0];
+ }
+ ip4_add_del_route_next_hop (im4,
+ IP4_ROUTE_FLAG_ADD | f,
+ &a.dst_address,
+ a.dst_address_length,
+ &ip4_via_next_hops[j],
+ sw_if_indices[j],
+ weights[j], (u32)~0,
+ table_ids[j] /* fib index */);
+ }
+ dst.as_u32 = clib_host_to_net_u32 (incr + clib_net_to_host_u32 (dst.as_u32));
+ }
+ t[1] = vlib_time_now (vm);
+ if (count > 1)
+ vlib_cli_output (vm, "%.6e routes/sec", count / (t[1] - t[0]));
+ }
+ }
+ }
+
+ for (i = 0; i < vec_len (ip6_dst_addresses); i++)
+ {
+ ip6_add_del_route_args_t a;
+
+
+ memset (&a, 0, sizeof (a));
+ a.flags = IP6_ROUTE_FLAG_TABLE_ID;
+ a.table_index_or_table_id = table_id;
+ a.dst_address = ip6_dst_addresses[i];
+ a.dst_address_length = dst_address_lengths[i];
+ a.adj_index = ~0;
+
+ if (is_del)
+ {
+ if (vec_len (ip6_via_next_hops) == 0)
+ {
+ BVT(clib_bihash_kv) kv, value;
+ ip6_address_t dst_address;
+ ip6_fib_t * fib;
+
+ fib = find_ip6_fib_by_table_index_or_id (im6, table_id,
+ 0 /* by table id */);
+
+ a.flags |= IP4_ROUTE_FLAG_DEL;
+
+ dst_address = ip6_dst_addresses[i];
+
+ ip6_address_mask (&dst_address,
+ &im6->fib_masks[dst_address_length]);
+
+ kv.key[0] = dst_address.as_u64[0];
+ kv.key[1] = dst_address.as_u64[1];
+ kv.key[2] = ((u64)(fib - im6->fibs)<<32)
+ | a.dst_address_length;
+
+ if (BV(clib_bihash_search)(&im6->ip6_lookup_table,
+ &kv, &value) == 0)
+ a.adj_index = value.value;
+ else
+ {
+ clib_warning ("%U/%d not in FIB",
+ format_ip6_address, &a.dst_address,
+ a.dst_address_length);
+ continue;
+ }
+
+ a.flags |= IP6_ROUTE_FLAG_DEL;
+ ip6_add_del_route (im6, &a);
+ ip6_maybe_remap_adjacencies (im6, table_id,
+ IP6_ROUTE_FLAG_TABLE_ID);
+ }
+ else
+ {
+ u32 i;
+ for (i = 0; i < vec_len (ip6_via_next_hops); i++)
+ {
+ ip6_add_del_route_next_hop (im6,
+ IP6_ROUTE_FLAG_DEL,
+ &a.dst_address,
+ a.dst_address_length,
+ &ip6_via_next_hops[i],
+ sw_if_indices[i],
+ weights[i], (u32)~0,
+ table_ids[i] /* fib index */);
+ }
+ }
+ }
+ else
+ {
+ if (vec_len (add_adj) > 0)
+ {
+ a.flags |= IP6_ROUTE_FLAG_ADD;
+ a.add_adj = add_adj;
+ a.n_add_adj = vec_len (add_adj);
+
+ ip6_add_del_route (im6, &a);
+ }
+ else if (vec_len (ip6_via_next_hops) > 0)
+ {
+ u32 i;
+ for (i = 0; i < vec_len (ip6_via_next_hops); i++)
+ {
+ ip6_add_del_route_next_hop (im6,
+ IP6_ROUTE_FLAG_ADD,
+ &a.dst_address,
+ a.dst_address_length,
+ &ip6_via_next_hops[i],
+ sw_if_indices[i],
+ weights[i], (u32)~0,
+ table_ids[i]);
+ }
+ }
+ }
+ }
+ }
+
+ done:
+ vec_free (add_adj);
+ vec_free (weights);
+ vec_free (dst_address_lengths);
+ vec_free (ip4_dst_addresses);
+ vec_free (ip6_dst_addresses);
+ vec_free (ip4_via_next_hops);
+ vec_free (ip6_via_next_hops);
+ return error;
+}
+
+VLIB_CLI_COMMAND (vlib_cli_ip_command, static) = {
+ .path = "ip",
+ .short_help = "Internet protocol (IP) commands",
+};
+
+VLIB_CLI_COMMAND (vlib_cli_show_ip_command, static) = {
+ .path = "show ip",
+ .short_help = "Internet protocol (IP) show commands",
+};
+
+VLIB_CLI_COMMAND (vlib_cli_show_ip4_command, static) = {
+ .path = "show ip4",
+ .short_help = "Internet protocol version 4 (IP4) show commands",
+};
+
+VLIB_CLI_COMMAND (vlib_cli_show_ip6_command, static) = {
+ .path = "show ip6",
+ .short_help = "Internet protocol version 6 (IP6) show commands",
+};
+
+VLIB_CLI_COMMAND (ip_route_command, static) = {
+ .path = "ip route",
+ .short_help = "Add/delete IP routes",
+ .function = vnet_ip_route_cmd,
+};
+
+/*
+ * The next two routines address a longstanding script hemorrhoid.
+ * Probing a v4 or v6 neighbor needs to appear to be synchronous,
+ * or dependent route-adds will simply fail.
+ */
+static clib_error_t *
+ip6_probe_neighbor_wait (vlib_main_t *vm, ip6_address_t * a, u32 sw_if_index,
+ int retry_count)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ clib_error_t * e;
+ int i;
+ int resolved = 0;
+ uword event_type;
+ uword *event_data = 0;
+
+ ASSERT (vlib_in_process_context(vm));
+
+ if (retry_count > 0)
+ vnet_register_ip6_neighbor_resolution_event
+ (vnm, a, vlib_get_current_process (vm)->node_runtime.node_index,
+ 1 /* event */, 0 /* data */);
+
+ for (i = 0; i < retry_count; i++)
+ {
+ /* The interface may be down, etc. */
+ e = ip6_probe_neighbor (vm, a, sw_if_index);
+
+ if (e)
+ return e;
+
+ vlib_process_wait_for_event_or_clock (vm, 1.0);
+ event_type = vlib_process_get_events (vm, &event_data);
+ switch (event_type)
+ {
+ case 1: /* resolved... */
+ vlib_cli_output (vm, "Resolved %U",
+ format_ip6_address, a);
+ resolved = 1;
+ goto done;
+
+ case ~0: /* timeout */
+ break;
+
+ default:
+ clib_warning ("unknown event_type %d", event_type);
+ }
+ }
+
+ done:
+ vec_reset_length (event_data);
+
+ if (!resolved)
+ return clib_error_return (0, "Resolution failed for %U",
+ format_ip6_address, a);
+ return 0;
+}
+
+static clib_error_t *
+ip4_probe_neighbor_wait (vlib_main_t *vm, ip4_address_t * a, u32 sw_if_index,
+ int retry_count)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ clib_error_t * e;
+ int i;
+ int resolved = 0;
+ uword event_type;
+ uword *event_data = 0;
+
+ ASSERT (vlib_in_process_context(vm));
+
+ if (retry_count > 0)
+ vnet_register_ip4_arp_resolution_event
+ (vnm, a, vlib_get_current_process (vm)->node_runtime.node_index,
+ 1 /* event */, 0 /* data */);
+
+ for (i = 0; i < retry_count; i++)
+ {
+ /* The interface may be down, etc. */
+ e = ip4_probe_neighbor (vm, a, sw_if_index);
+
+ if (e)
+ return e;
+
+ vlib_process_wait_for_event_or_clock (vm, 1.0);
+ event_type = vlib_process_get_events (vm, &event_data);
+ switch (event_type)
+ {
+ case 1: /* resolved... */
+ vlib_cli_output (vm, "Resolved %U",
+ format_ip4_address, a);
+ resolved = 1;
+ goto done;
+
+ case ~0: /* timeout */
+ break;
+
+ default:
+ clib_warning ("unknown event_type %d", event_type);
+ }
+ }
+
+ done:
+
+ vec_reset_length (event_data);
+
+ if (!resolved)
+ return clib_error_return (0, "Resolution failed for %U",
+ format_ip4_address, a);
+ return 0;
+}
+
+static clib_error_t *
+probe_neighbor_address (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ unformat_input_t _line_input, * line_input = &_line_input;
+ ip4_address_t a4;
+ ip6_address_t a6;
+ clib_error_t * error = 0;
+ u32 sw_if_index = ~0;
+ int retry_count = 3;
+ int is_ip4 = 1;
+ int address_set = 0;
+
+ /* Get a line of input. */
+ if (! unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat_user (line_input, unformat_vnet_sw_interface, vnm,
+ &sw_if_index))
+ ;
+ else if (unformat (line_input, "retry %d", &retry_count))
+ ;
+
+ else if (unformat (line_input, "%U", unformat_ip4_address, &a4))
+ address_set++;
+ else if (unformat (line_input, "%U", unformat_ip6_address, &a6))
+ {
+ address_set++;
+ is_ip4 = 0;
+ }
+ else
+ return clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, line_input);
+ }
+
+ unformat_free (line_input);
+
+ if (sw_if_index == ~0)
+ return clib_error_return (0, "Interface required, not set.");
+ if (address_set == 0)
+ return clib_error_return (0, "ip address required, not set.");
+ if (address_set > 1)
+ return clib_error_return (0, "Multiple ip addresses not supported.");
+
+ if (is_ip4)
+ error = ip4_probe_neighbor_wait (vm, &a4, sw_if_index, retry_count);
+ else
+ error = ip6_probe_neighbor_wait (vm, &a6, sw_if_index, retry_count);
+
+ return error;
+}
+
+VLIB_CLI_COMMAND (ip_probe_neighbor_command, static) = {
+ .path = "ip probe-neighbor",
+ .function = probe_neighbor_address,
+ .short_help = "ip probe-neighbor <intfc> <ip4-addr> | <ip6-addr> [retry nn]",
+};
+
+typedef CLIB_PACKED (struct {
+ ip4_address_t address;
+
+ u32 address_length : 6;
+
+ u32 index : 26;
+}) ip4_route_t;
+
+static clib_error_t *
+ip4_show_fib (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ ip4_main_t * im4 = &ip4_main;
+ ip4_route_t * routes, * r;
+ ip4_fib_t * fib;
+ ip_lookup_main_t * lm = &im4->lookup_main;
+ uword * results, i;
+ int verbose, matching, mtrie, include_empty_fibs;
+ ip4_address_t matching_address;
+ u8 clear = 0;
+ int table_id = -1;
+
+ routes = 0;
+ results = 0;
+ verbose = 1;
+ include_empty_fibs = 0;
+ matching = 0;
+ mtrie = 0;
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "brief") || unformat (input, "summary")
+ || unformat (input, "sum"))
+ verbose = 0;
+
+ else if (unformat (input, "mtrie"))
+ mtrie = 1;
+
+ else if (unformat (input, "include-empty"))
+ include_empty_fibs = 1;
+
+ else if (unformat (input, "%U", unformat_ip4_address, &matching_address))
+ matching = 1;
+
+ else if (unformat (input, "clear"))
+ clear = 1;
+
+ else if (unformat (input, "table %d", &table_id))
+ ;
+ else
+ break;
+ }
+
+ vec_foreach (fib, im4->fibs)
+ {
+ int fib_not_empty;
+
+ fib_not_empty = 0;
+ for (i = 0; i < ARRAY_LEN (fib->adj_index_by_dst_address); i++)
+ {
+ uword * hash = fib->adj_index_by_dst_address[i];
+ uword n_elts = hash_elts (hash);
+ if (n_elts)
+ {
+ fib_not_empty = 1;
+ break;
+ }
+ }
+
+ if (fib_not_empty == 0 && include_empty_fibs == 0)
+ continue;
+
+ if (table_id >= 0 && table_id != (int)fib->table_id)
+ continue;
+
+ if (include_empty_fibs)
+ vlib_cli_output (vm, "Table %d, fib_index %d, flow hash: %U",
+ fib->table_id, fib - im4->fibs,
+ format_ip_flow_hash_config, fib->flow_hash_config);
+
+ /* Show summary? */
+ if (! verbose)
+ {
+ if (include_empty_fibs == 0)
+ vlib_cli_output (vm, "Table %d, fib_index %d, flow hash: %U",
+ fib->table_id, fib - im4->fibs,
+ format_ip_flow_hash_config, fib->flow_hash_config);
+ vlib_cli_output (vm, "%=20s%=16s", "Prefix length", "Count");
+ for (i = 0; i < ARRAY_LEN (fib->adj_index_by_dst_address); i++)
+ {
+ uword * hash = fib->adj_index_by_dst_address[i];
+ uword n_elts = hash_elts (hash);
+ if (n_elts > 0)
+ vlib_cli_output (vm, "%20d%16d", i, n_elts);
+ }
+ continue;
+ }
+
+ if (routes)
+ _vec_len (routes) = 0;
+ if (results)
+ _vec_len (results) = 0;
+
+ for (i = 0; i < ARRAY_LEN (fib->adj_index_by_dst_address); i++)
+ {
+ uword * hash = fib->adj_index_by_dst_address[i];
+ hash_pair_t * p;
+ ip4_route_t x;
+
+ x.address_length = i;
+
+ if (matching)
+ {
+ x.address.as_u32 = matching_address.as_u32 & im4->fib_masks[i];
+ p = hash_get_pair (hash, x.address.as_u32);
+ if (p)
+ {
+ if (lm->fib_result_n_words > 1)
+ {
+ x.index = vec_len (results);
+ vec_add (results, p->value, lm->fib_result_n_words);
+ }
+ else
+ x.index = p->value[0];
+ vec_add1 (routes, x);
+ }
+ }
+ else
+ {
+ hash_foreach_pair (p, hash, ({
+ x.address.data_u32 = p->key;
+ if (lm->fib_result_n_words > 1)
+ {
+ x.index = vec_len (results);
+ vec_add (results, p->value, lm->fib_result_n_words);
+ }
+ else
+ x.index = p->value[0];
+
+ vec_add1 (routes, x);
+ }));
+ }
+ }
+
+ vec_sort (routes, r1, r2,
+ ({ int cmp = ip4_address_compare (&r1->address, &r2->address);
+ cmp ? cmp : ((int) r1->address_length - (int) r2->address_length); }));
+ if (vec_len(routes)) {
+ if (include_empty_fibs == 0)
+ vlib_cli_output (vm, "Table %d, fib_index %d, flow hash: %U",
+ fib->table_id, fib - im4->fibs,
+ format_ip_flow_hash_config, fib->flow_hash_config);
+ if (mtrie)
+ vlib_cli_output (vm, "%U", format_ip4_fib_mtrie, &fib->mtrie);
+ vlib_cli_output (vm, "%=20s%=16s%=16s%=16s",
+ "Destination", "Packets", "Bytes", "Adjacency");
+ }
+ vec_foreach (r, routes)
+ {
+ vlib_counter_t c, sum;
+ uword i, j, n_left, n_nhs, adj_index, * result = 0;
+ ip_adjacency_t * adj;
+ ip_multipath_next_hop_t * nhs, tmp_nhs[1];
+
+ adj_index = r->index;
+ if (lm->fib_result_n_words > 1)
+ {
+ result = vec_elt_at_index (results, adj_index);
+ adj_index = result[0];
+ }
+
+ adj = ip_get_adjacency (lm, adj_index);
+ if (adj->n_adj == 1)
+ {
+ nhs = &tmp_nhs[0];
+ nhs[0].next_hop_adj_index = ~0; /* not used */
+ nhs[0].weight = 1;
+ n_nhs = 1;
+ }
+ else
+ {
+ ip_multipath_adjacency_t * madj;
+ madj = vec_elt_at_index (lm->multipath_adjacencies, adj->heap_handle);
+ nhs = heap_elt_at_index (lm->next_hop_heap, madj->normalized_next_hops.heap_offset);
+ n_nhs = madj->normalized_next_hops.count;
+ }
+
+ n_left = nhs[0].weight;
+ vlib_counter_zero (&sum);
+ for (i = j = 0; i < adj->n_adj; i++)
+ {
+ n_left -= 1;
+ vlib_get_combined_counter (&lm->adjacency_counters,
+ adj_index + i, &c);
+ if (clear)
+ vlib_zero_combined_counter (&lm->adjacency_counters,
+ adj_index + i);
+ vlib_counter_add (&sum, &c);
+ if (n_left == 0)
+ {
+ u8 * msg = 0;
+ uword indent;
+
+ if (j == 0)
+ msg = format (msg, "%-20U",
+ format_ip4_address_and_length,
+ r->address.data, r->address_length);
+ else
+ msg = format (msg, "%U", format_white_space, 20);
+
+ msg = format (msg, "%16Ld%16Ld ", sum.packets, sum.bytes);
+
+ indent = vec_len (msg);
+ msg = format (msg, "weight %d, index %d\n%U%U",
+ nhs[j].weight, adj_index + i,
+ format_white_space, indent,
+ format_ip_adjacency,
+ vnm, lm, adj_index + i);
+
+ vlib_cli_output (vm, "%v", msg);
+ vec_free (msg);
+
+ if (result && lm->format_fib_result)
+ vlib_cli_output (vm, "%20s%U", "",
+ lm->format_fib_result, vm, lm, result,
+ i + 1 - nhs[j].weight,
+ nhs[j].weight);
+
+ j++;
+ if (j < n_nhs)
+ {
+ n_left = nhs[j].weight;
+ vlib_counter_zero (&sum);
+ }
+ }
+ }
+ }
+ }
+
+ vec_free (routes);
+ vec_free (results);
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (ip4_show_fib_command, static) = {
+ .path = "show ip fib",
+ .short_help = "show ip fib [mtrie] [summary] [table <n>] [<ip4-addr>] [clear] [include-empty]",
+ .function = ip4_show_fib,
+};
+
+typedef struct {
+ ip6_address_t address;
+
+ u32 address_length;
+
+ u32 index;
+} ip6_route_t;
+
+typedef struct {
+ u32 fib_index;
+ ip6_route_t ** routep;
+} add_routes_in_fib_arg_t;
+
+static void add_routes_in_fib (BVT(clib_bihash_kv) * kvp, void *arg)
+{
+ add_routes_in_fib_arg_t * ap = arg;
+
+ if (kvp->key[2]>>32 == ap->fib_index)
+ {
+ ip6_address_t *addr;
+ ip6_route_t * r;
+ addr = (ip6_address_t *) kvp;
+ vec_add2 (*ap->routep, r, 1);
+ r->address = addr[0];
+ r->address_length = kvp->key[2] & 0xFF;
+ r->index = kvp->value;
+ }
+}
+
+typedef struct {
+ u32 fib_index;
+ u64 count_by_prefix_length[129];
+} count_routes_in_fib_at_prefix_length_arg_t;
+
+static void count_routes_in_fib_at_prefix_length
+(BVT(clib_bihash_kv) * kvp, void *arg)
+{
+ count_routes_in_fib_at_prefix_length_arg_t * ap = arg;
+ int mask_width;
+
+ if ((kvp->key[2]>>32) != ap->fib_index)
+ return;
+
+ mask_width = kvp->key[2] & 0xFF;
+
+ ap->count_by_prefix_length[mask_width]++;
+}
+
+
+static clib_error_t *
+ip6_show_fib (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ ip6_main_t * im6 = &ip6_main;
+ ip6_route_t * routes, * r;
+ ip6_fib_t * fib;
+ ip_lookup_main_t * lm = &im6->lookup_main;
+ uword * results;
+ int verbose;
+ BVT(clib_bihash) * h = &im6->ip6_lookup_table;
+ __attribute__((unused)) u8 clear = 0;
+ add_routes_in_fib_arg_t _a, *a=&_a;
+ count_routes_in_fib_at_prefix_length_arg_t _ca, *ca = &_ca;
+
+ routes = 0;
+ results = 0;
+ verbose = 1;
+ if (unformat (input, "brief") || unformat (input, "summary")
+ || unformat (input, "sum"))
+ verbose = 0;
+
+ if (unformat (input, "clear"))
+ clear = 1;
+
+ vlib_cli_output (vm, "FIB lookup table: %d buckets, %lld MB heap",
+ im6->lookup_table_nbuckets, im6->lookup_table_size>>20);
+ vlib_cli_output (vm, "%U", format_mheap, h->mheap, 0 /*verbose*/);
+ vlib_cli_output (vm, " ");
+
+ vec_foreach (fib, im6->fibs)
+ {
+ vlib_cli_output (vm, "VRF %d, fib_index %d, flow hash: %U",
+ fib->table_id, fib - im6->fibs,
+ format_ip_flow_hash_config, fib->flow_hash_config);
+
+ /* Show summary? */
+ if (! verbose)
+ {
+ int len;
+ vlib_cli_output (vm, "%=20s%=16s", "Prefix length", "Count");
+
+ memset (ca, 0, sizeof(*ca));
+ ca->fib_index = fib - im6->fibs;
+
+ BV(clib_bihash_foreach_key_value_pair)
+ (h, count_routes_in_fib_at_prefix_length, ca);
+
+ for (len = 128; len >= 0; len--)
+ {
+ if (ca->count_by_prefix_length[len])
+ vlib_cli_output (vm, "%=20d%=16lld",
+ len, ca->count_by_prefix_length[len]);
+ }
+ continue;
+ }
+
+ if (routes)
+ _vec_len (routes) = 0;
+ if (results)
+ _vec_len (results) = 0;
+
+ a->fib_index = fib - im6->fibs;
+ a->routep = &routes;
+
+ BV(clib_bihash_foreach_key_value_pair)(h, add_routes_in_fib, a);
+
+ vec_sort (routes, r1, r2,
+ ({ int cmp = ip6_address_compare (&r1->address, &r2->address);
+ cmp ? cmp : ((int) r1->address_length - (int) r2->address_length); }));
+
+ vlib_cli_output (vm, "%=45s%=16s%=16s%=16s",
+ "Destination", "Packets", "Bytes", "Adjacency");
+ vec_foreach (r, routes)
+ {
+ vlib_counter_t c, sum;
+ uword i, j, n_left, n_nhs, adj_index, * result = 0;
+ ip_adjacency_t * adj;
+ ip_multipath_next_hop_t * nhs, tmp_nhs[1];
+
+ adj_index = r->index;
+ if (lm->fib_result_n_words > 1)
+ {
+ result = vec_elt_at_index (results, adj_index);
+ adj_index = result[0];
+ }
+
+ adj = ip_get_adjacency (lm, adj_index);
+ if (adj->n_adj == 1)
+ {
+ nhs = &tmp_nhs[0];
+ nhs[0].next_hop_adj_index = ~0; /* not used */
+ nhs[0].weight = 1;
+ n_nhs = 1;
+ }
+ else
+ {
+ ip_multipath_adjacency_t * madj;
+ madj = vec_elt_at_index (lm->multipath_adjacencies, adj->heap_handle);
+ nhs = heap_elt_at_index (lm->next_hop_heap, madj->normalized_next_hops.heap_offset);
+ n_nhs = madj->normalized_next_hops.count;
+ }
+
+ n_left = nhs[0].weight;
+ vlib_counter_zero (&sum);
+ for (i = j = 0; i < adj->n_adj; i++)
+ {
+ n_left -= 1;
+ vlib_get_combined_counter (&lm->adjacency_counters,
+ adj_index + i, &c);
+ if (clear)
+ vlib_zero_combined_counter (&lm->adjacency_counters,
+ adj_index + i);
+ vlib_counter_add (&sum, &c);
+ if (n_left == 0)
+ {
+ u8 * msg = 0;
+ uword indent;
+
+ if (j == 0)
+ msg = format (msg, "%-45U",
+ format_ip6_address_and_length,
+ r->address.as_u8, r->address_length);
+ else
+ msg = format (msg, "%U", format_white_space, 20);
+
+ msg = format (msg, "%16Ld%16Ld ", sum.packets, sum.bytes);
+
+ indent = vec_len (msg);
+ msg = format (msg, "weight %d, index %d\n%U%U",
+ nhs[j].weight, adj_index + i,
+ format_white_space, indent,
+ format_ip_adjacency,
+ vnm, lm, adj_index + i);
+
+ vlib_cli_output (vm, "%v", msg);
+ vec_free (msg);
+
+ j++;
+ if (j < n_nhs)
+ {
+ n_left = nhs[j].weight;
+ vlib_counter_zero (&sum);
+ }
+ }
+ }
+
+ if (result && lm->format_fib_result)
+ vlib_cli_output (vm, "%20s%U", "", lm->format_fib_result, vm, lm, result, 0);
+ }
+ vlib_cli_output (vm, " ");
+ }
+
+ vec_free (routes);
+ vec_free (results);
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (ip6_show_fib_command, static) = {
+ .path = "show ip6 fib",
+ .short_help = "show ip6 fib [summary] [clear]",
+ .function = ip6_show_fib,
+};
diff --git a/vnet/vnet/ip/lookup.h b/vnet/vnet/ip/lookup.h
new file mode 100644
index 00000000000..e4e5acfece3
--- /dev/null
+++ b/vnet/vnet/ip/lookup.h
@@ -0,0 +1,442 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip_lookup.h: ip (4 or 6) lookup structures, adjacencies, ...
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_ip_lookup_h
+#define included_ip_lookup_h
+
+#include <vnet/vnet.h>
+#include <vlib/buffer.h>
+
+/* Next index stored in adjacency. */
+typedef enum {
+ /* Packet does not match any route in table. */
+ IP_LOOKUP_NEXT_MISS,
+
+ /* Adjacency says to drop or punt this packet. */
+ IP_LOOKUP_NEXT_DROP,
+ IP_LOOKUP_NEXT_PUNT,
+
+ /* This packet is for one of our own IP addresses. */
+ IP_LOOKUP_NEXT_LOCAL,
+
+ /* This packet matches an "interface route" and packets
+ need to be passed to ARP to find rewrite string for
+ this destination. */
+ IP_LOOKUP_NEXT_ARP,
+
+ /* This packet is to be rewritten and forwarded to the next
+ processing node. This is typically the output interface but
+ might be another node for further output processing. */
+ IP_LOOKUP_NEXT_REWRITE,
+
+ /* This packet needs to be classified */
+ IP_LOOKUP_NEXT_CLASSIFY,
+
+ /* This packet needs to go to MAP - RFC7596, RFC7597 */
+ IP_LOOKUP_NEXT_MAP,
+
+ /* This packet needs to go to MAP with Translation - RFC7599 */
+ IP_LOOKUP_NEXT_MAP_T,
+
+ /* This packets needs to go to 6RD (RFC5969) */
+ IP_LOOKUP_NEXT_SIXRD,
+
+ /* Hop-by-hop header handling */
+ IP_LOOKUP_NEXT_HOP_BY_HOP,
+ IP_LOOKUP_NEXT_ADD_HOP_BY_HOP,
+ IP_LOOKUP_NEXT_POP_HOP_BY_HOP,
+
+ IP_LOOKUP_N_NEXT,
+} ip_lookup_next_t;
+
+/* Flow hash configuration */
+#define IP_FLOW_HASH_SRC_ADDR (1<<0)
+#define IP_FLOW_HASH_DST_ADDR (1<<1)
+#define IP_FLOW_HASH_PROTO (1<<2)
+#define IP_FLOW_HASH_SRC_PORT (1<<3)
+#define IP_FLOW_HASH_DST_PORT (1<<4)
+#define IP_FLOW_HASH_REVERSE_SRC_DST (1<<5)
+
+/* Default: 5-tuple without the "reverse" bit */
+#define IP_FLOW_HASH_DEFAULT (0x1F)
+
+#define foreach_flow_hash_bit \
+_(src, IP_FLOW_HASH_SRC_ADDR) \
+_(dst, IP_FLOW_HASH_DST_ADDR) \
+_(sport, IP_FLOW_HASH_SRC_PORT) \
+_(dport, IP_FLOW_HASH_DST_PORT) \
+_(proto, IP_FLOW_HASH_PROTO) \
+_(reverse, IP_FLOW_HASH_REVERSE_SRC_DST)
+
+/* IP unicast adjacency. */
+typedef struct {
+ /* Handle for this adjacency in adjacency heap. */
+ u32 heap_handle;
+
+ /* Interface address index for this local/arp adjacency. */
+ u32 if_address_index;
+
+ /* Number of adjecencies in block. Greater than 1 means multipath;
+ otherwise equal to 1. */
+ u16 n_adj;
+
+ /* Next hop after ip4-lookup. */
+ union {
+ ip_lookup_next_t lookup_next_index : 16;
+ u16 lookup_next_index_as_int;
+ };
+
+ /* Force re-lookup in a different FIB. ~0 => normal behavior */
+ i16 explicit_fib_index;
+ u16 mcast_group_index;
+
+ /* When classifying, start here */
+ u16 classify_table_index;
+ /* Highest possible perf subgraph arc interposition, e.g. for ip6 ioam */
+ u16 saved_lookup_next_index;
+
+ vnet_declare_rewrite (VLIB_BUFFER_PRE_DATA_SIZE - 5*sizeof(u32));
+} ip_adjacency_t;
+
+/* Index into adjacency table. */
+typedef u32 ip_adjacency_index_t;
+
+typedef struct {
+ /* Directly connected next-hop adjacency index. */
+ u32 next_hop_adj_index;
+
+ /* Path weight for this adjacency. */
+ u32 weight;
+} ip_multipath_next_hop_t;
+
+typedef struct {
+ /* Adjacency index of first index in block. */
+ u32 adj_index;
+
+ /* Power of 2 size of adjacency block. */
+ u32 n_adj_in_block;
+
+ /* Number of prefixes that point to this adjacency. */
+ u32 reference_count;
+
+ /* Normalized next hops are used as hash keys: they are sorted by weight
+ and weights are chosen so they add up to 1 << log2_n_adj_in_block (with
+ zero-weighted next hops being deleted).
+ Unnormalized next hops are saved so that control plane has a record of exactly
+ what the RIB told it. */
+ struct {
+ /* Number of hops in the multipath. */
+ u32 count;
+
+ /* Offset into next hop heap for this block. */
+ u32 heap_offset;
+
+ /* Heap handle used to for example free block when we're done with it. */
+ u32 heap_handle;
+ } normalized_next_hops, unnormalized_next_hops;
+} ip_multipath_adjacency_t;
+
+/* IP multicast adjacency. */
+typedef struct {
+ /* Handle for this adjacency in adjacency heap. */
+ u32 heap_handle;
+
+ /* Number of adjecencies in block. */
+ u32 n_adj;
+
+ /* Rewrite string. */
+ vnet_declare_rewrite (64 - 2*sizeof(u32));
+} ip_multicast_rewrite_t;
+
+typedef struct {
+ /* ip4-multicast-rewrite next index. */
+ u32 next_index;
+
+ u8 n_rewrite_bytes;
+
+ u8 rewrite_string[64 - 1*sizeof(u32) - 1*sizeof(u8)];
+} ip_multicast_rewrite_string_t;
+
+typedef struct {
+ ip_multicast_rewrite_t * rewrite_heap;
+
+ ip_multicast_rewrite_string_t * rewrite_strings;
+
+ /* Negative rewrite string index; >= 0 sw_if_index.
+ Sorted. Used to hash. */
+ i32 ** adjacency_id_vector;
+
+ uword * adjacency_by_id_vector;
+} ip_multicast_lookup_main_t;
+
+typedef struct {
+ /* Key for mhash; in fact, just a byte offset into mhash key vector. */
+ u32 address_key;
+
+ /* Interface which has this address. */
+ u32 sw_if_index;
+
+ /* Adjacency for neighbor probe (ARP) for this interface address. */
+ u32 neighbor_probe_adj_index;
+
+ /* Address (prefix) length for this interface. */
+ u16 address_length;
+
+ /* Will be used for something eventually. Primary vs. secondary? */
+ u16 flags;
+
+ /* Next and previous pointers for doubly linked list of
+ addresses per software interface. */
+ u32 next_this_sw_interface;
+ u32 prev_this_sw_interface;
+} ip_interface_address_t;
+
+typedef enum {
+ IP_LOCAL_NEXT_DROP,
+ IP_LOCAL_NEXT_PUNT,
+ // IP_LOCAL_NEXT_TCP_LOOKUP,
+ IP_LOCAL_NEXT_UDP_LOOKUP,
+ IP_LOCAL_NEXT_ICMP,
+ IP_LOCAL_N_NEXT,
+} ip_local_next_t;
+
+struct ip_lookup_main_t;
+
+typedef void (* ip_add_del_adjacency_callback_t) (struct ip_lookup_main_t * lm,
+ u32 adj_index,
+ ip_adjacency_t * adj,
+ u32 is_del);
+
+typedef struct {
+ vnet_config_main_t config_main;
+
+ u32 * config_index_by_sw_if_index;
+} ip_config_main_t;
+
+typedef struct ip_lookup_main_t {
+ /* Adjacency heap. */
+ ip_adjacency_t * adjacency_heap;
+
+ /* Adjacency packet/byte counters indexed by adjacency index. */
+ vlib_combined_counter_main_t adjacency_counters;
+
+ /* Heap of (next hop, weight) blocks. Sorted by next hop. */
+ ip_multipath_next_hop_t * next_hop_heap;
+
+ /* Indexed by heap_handle from ip_adjacency_t. */
+ ip_multipath_adjacency_t * multipath_adjacencies;
+
+ /* Temporary vectors for looking up next hops in hash. */
+ ip_multipath_next_hop_t * next_hop_hash_lookup_key;
+ ip_multipath_next_hop_t * next_hop_hash_lookup_key_normalized;
+
+ /* Hash table mapping normalized next hops and weights
+ to multipath adjacency index. */
+ uword * multipath_adjacency_by_next_hops;
+
+ u32 * adjacency_remap_table;
+ u32 n_adjacency_remaps;
+
+ /* If average error per adjacency is less than this threshold adjacency block
+ size is accepted. */
+ f64 multipath_next_hop_error_tolerance;
+
+ /* Adjacency index for routing table misses, local punts, and drops. */
+ u32 miss_adj_index, drop_adj_index, local_adj_index;
+
+ /* Miss adjacency is always first in adjacency table. */
+#define IP_LOOKUP_MISS_ADJ_INDEX 0
+
+ ip_add_del_adjacency_callback_t * add_del_adjacency_callbacks;
+
+ /* Pool of addresses that are assigned to interfaces. */
+ ip_interface_address_t * if_address_pool;
+
+ /* Hash table mapping address to index in interface address pool. */
+ mhash_t address_to_if_address_index;
+
+ /* Head of doubly linked list of interface addresses for each software interface.
+ ~0 means this interface has no address. */
+ u32 * if_address_pool_index_by_sw_if_index;
+
+ /* First table index to use for this interface, ~0 => none */
+ u32 * classify_table_index_by_sw_if_index;
+
+ /* rx/tx interface/feature configuration. */
+ ip_config_main_t rx_config_mains[VNET_N_CAST], tx_config_main;
+
+ /* Number of bytes in a fib result. Must be at least
+ sizeof (uword). First word is always adjacency index. */
+ u32 fib_result_n_bytes, fib_result_n_words;
+
+ format_function_t * format_fib_result;
+
+ /* 1 for ip6; 0 for ip4. */
+ u32 is_ip6;
+
+ /* Either format_ip4_address_and_length or format_ip6_address_and_length. */
+ format_function_t * format_address_and_length;
+
+ /* Table mapping ip protocol to ip[46]-local node next index. */
+ u8 local_next_by_ip_protocol[256];
+
+ /* IP_BUILTIN_PROTOCOL_{TCP,UDP,ICMP,OTHER} by protocol in IP header. */
+ u8 builtin_protocol_by_ip_protocol[256];
+} ip_lookup_main_t;
+
+always_inline ip_adjacency_t *
+ip_get_adjacency (ip_lookup_main_t * lm,
+ u32 adj_index)
+{
+ ip_adjacency_t * adj;
+
+ adj = heap_elt_at_index (lm->adjacency_heap, adj_index);
+
+ ASSERT (! heap_is_free_handle (lm->adjacency_heap, adj->heap_handle));
+
+ return adj;
+}
+
+#define ip_prefetch_adjacency(lm,adj_index,type) \
+do { \
+ ip_adjacency_t * _adj = (lm)->adjacency_heap + (adj_index); \
+ CLIB_PREFETCH (_adj, sizeof (_adj[0]), type); \
+} while (0)
+
+always_inline void
+ip_call_add_del_adjacency_callbacks (ip_lookup_main_t * lm, u32 adj_index, u32 is_del)
+{
+ ip_adjacency_t * adj;
+ uword i;
+ adj = ip_get_adjacency (lm, adj_index);
+ for (i = 0; i < vec_len (lm->add_del_adjacency_callbacks); i++)
+ lm->add_del_adjacency_callbacks[i] (lm, adj_index, adj, is_del);
+}
+
+/* Create new block of given number of contiguous adjacencies. */
+ip_adjacency_t *
+ip_add_adjacency (ip_lookup_main_t * lm,
+ ip_adjacency_t * adj,
+ u32 n_adj,
+ u32 * adj_index_result);
+
+void ip_del_adjacency (ip_lookup_main_t * lm, u32 adj_index);
+
+void
+ip_multipath_adjacency_free (ip_lookup_main_t * lm,
+ ip_multipath_adjacency_t * a);
+
+u32
+ip_multipath_adjacency_add_del_next_hop (ip_lookup_main_t * lm,
+ u32 is_del,
+ u32 old_mp_adj_index,
+ u32 next_hop_adj_index,
+ u32 next_hop_weight,
+ u32 * new_mp_adj_index);
+
+clib_error_t *
+ip_interface_address_add_del (ip_lookup_main_t * lm,
+ u32 sw_if_index,
+ void * address,
+ u32 address_length,
+ u32 is_del,
+ u32 * result_index);
+
+always_inline ip_interface_address_t *
+ip_get_interface_address (ip_lookup_main_t * lm, void * addr_fib)
+{
+ uword * p = mhash_get (&lm->address_to_if_address_index, addr_fib);
+ return p ? pool_elt_at_index (lm->if_address_pool, p[0]) : 0;
+}
+
+always_inline void *
+ip_interface_address_get_address (ip_lookup_main_t * lm, ip_interface_address_t * a)
+{ return mhash_key_to_mem (&lm->address_to_if_address_index, a->address_key); }
+
+always_inline ip_interface_address_t *
+ip_interface_address_for_packet (ip_lookup_main_t * lm, vlib_buffer_t * b, u32 sw_if_index)
+{
+ ip_adjacency_t * adj;
+ u32 if_address_index;
+
+ adj = ip_get_adjacency (lm, vnet_buffer (b)->ip.adj_index[VLIB_TX]);
+
+ ASSERT (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP
+ || adj->lookup_next_index == IP_LOOKUP_NEXT_LOCAL);
+ if_address_index = adj->if_address_index;
+ if_address_index = (if_address_index == ~0 ?
+ vec_elt (lm->if_address_pool_index_by_sw_if_index, sw_if_index)
+ : if_address_index);
+
+ return pool_elt_at_index (lm->if_address_pool, if_address_index);
+}
+
+#define foreach_ip_interface_address(lm,a,sw_if_index,loop,body) \
+do { \
+ vnet_main_t *_vnm = vnet_get_main(); \
+ u32 _sw_if_index = sw_if_index; \
+ vnet_sw_interface_t *_swif; \
+ _swif = vnet_get_sw_interface (_vnm, _sw_if_index); \
+ \
+ /* \
+ * Loop => honor unnumbered interface addressing. \
+ */ \
+ if (loop && _swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED) \
+ _sw_if_index = _swif->unnumbered_sw_if_index; \
+ u32 _ia = \
+ (vec_len((lm)->if_address_pool_index_by_sw_if_index) \
+ > (_sw_if_index)) \
+ ? vec_elt ((lm)->if_address_pool_index_by_sw_if_index, \
+ (_sw_if_index)) : (u32)~0; \
+ ip_interface_address_t * _a; \
+ while (_ia != ~0) \
+ { \
+ _a = pool_elt_at_index ((lm)->if_address_pool, _ia); \
+ _ia = _a->next_this_sw_interface; \
+ (a) = _a; \
+ body; \
+ } \
+} while (0)
+
+void ip_lookup_init (ip_lookup_main_t * lm, u32 ip_lookup_node_index);
+
+serialize_function_t serialize_ip_lookup_main, unserialize_ip_lookup_main;
+serialize_function_t serialize_vec_ip_adjacency, unserialize_vec_ip_adjacency;
+
+#endif /* included_ip_lookup_h */
diff --git a/vnet/vnet/ip/ports.def b/vnet/vnet/ip/ports.def
new file mode 100644
index 00000000000..cdb754f5b2e
--- /dev/null
+++ b/vnet/vnet/ip/ports.def
@@ -0,0 +1,757 @@
+/*
+ * ip/ports.def: tcp/udp port definitions
+ *
+ * Eliot Dresselhaus
+ * August, 2005
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+PORT NUMBERS
+
+(last updated 18 October 2005)
+
+The port numbers are divided into three ranges: the Well Known Ports,
+the Registered Ports, and the Dynamic and/or Private Ports.
+
+The Well Known Ports are those from 0 through 1023.
+
+The Registered Ports are those from 1024 through 49151
+
+The Dynamic and/or Private Ports are those from 49152 through 65535
+
+
+************************************************************************
+* PLEASE NOTE THE FOLLOWING: *
+* *
+* 1. UNASSIGNED PORT NUMBERS SHOULD NOT BE USED. THE IANA WILL ASSIGN *
+* THE NUMBER FOR THE PORT AFTER YOUR APPLICATION HAS BEEN APPROVED. *
+* *
+* 2. ASSIGNMENT OF A PORT NUMBER DOES NOT IN ANY WAY IMPLY AN *
+* ENDORSEMENT OF AN APPLICATION OR PRODUCT, AND THE FACT THAT NETWORK *
+* TRAFFIC IS FLOWING TO OR FROM A REGISTERED PORT DOES NOT MEAN THAT *
+* IT IS "GOOD" TRAFFIC. FIREWALL AND SYSTEM ADMINISTRATORS SHOULD *
+* CHOOSE HOW TO CONFIGURE THEIR SYSTEMS BASED ON THEIR KNOWLEDGE OF *
+* THE TRAFFIC IN QUESTION, NOT WHETHER THERE IS A PORT NUMBER *
+* REGISTERED OR NOT. *
+************************************************************************
+
+
+WELL KNOWN PORT NUMBERS
+
+The Well Known Ports are assigned by the IANA and on most systems can
+only be used by system (or root) processes or by programs executed by
+privileged users.
+
+Ports are used in the TCP [RFC793] to name the ends of logical
+connections which carry long term conversations. For the purpose of
+providing services to unknown callers, a service contact port is
+defined. This list specifies the port used by the server process as
+its contact port. The contact port is sometimes called the
+"well-known port".
+
+To the extent possible, these same port assignments are used with the
+UDP [RFC768].
+
+The range for assigned ports managed by the IANA is 0-1023.
+*/
+ip_port (TCPMUX, 1)
+ip_port (COMPRESS_NET_MANAGEMENT, 2)
+ip_port (COMPRESS_NET, 3)
+ip_port (RJE, 5)
+ip_port (ECHO, 7)
+ip_port (DISCARD, 9)
+ip_port (SYSTAT, 11)
+ip_port (DAYTIME, 13)
+ip_port (QOTD, 17)
+ip_port (MSP, 18)
+ip_port (CHARGEN, 19)
+ip_port (FTP_DATA, 20)
+ip_port (FTP, 21)
+ip_port (SSH, 22)
+ip_port (TELNET, 23)
+ip_port (SMTP, 25)
+ip_port (NSW_FE, 27)
+ip_port (MSG_ICP, 29)
+ip_port (MSG_AUTH, 31)
+ip_port (DSP, 33)
+ip_port (TIME, 37)
+ip_port (RAP, 38)
+ip_port (RLP, 39)
+ip_port (GRAPHICS, 41)
+ip_port (NAME, 42)
+ip_port (NAMESERVER, 42)
+ip_port (NICNAME, 43)
+ip_port (MPM_FLAGS, 44)
+ip_port (MPM, 45)
+ip_port (MPM_SND, 46)
+ip_port (NI_FTP, 47)
+ip_port (AUDITD, 48)
+ip_port (TACACS, 49)
+ip_port (RE_MAIL_CK, 50)
+ip_port (LA_MAINT, 51)
+ip_port (XNS_TIME, 52)
+ip_port (DNS, 53)
+ip_port (XNS_CH, 54)
+ip_port (ISI_GL, 55)
+ip_port (XNS_AUTH, 56)
+ip_port (XNS_MAIL, 58)
+ip_port (NI_MAIL, 61)
+ip_port (ACAS, 62)
+ip_port (WHOIS_PLUS_PLUS, 63)
+ip_port (COVIA, 64)
+ip_port (TACACS_DS, 65)
+ip_port (ORACLE_SQL_NET, 66)
+ip_port (BOOTPS, 67)
+ip_port (BOOTPC, 68)
+ip_port (TFTP, 69)
+ip_port (GOPHER, 70)
+ip_port (NETRJS_1, 71)
+ip_port (NETRJS_2, 72)
+ip_port (NETRJS_3, 73)
+ip_port (NETRJS_4, 74)
+ip_port (DEOS, 76)
+ip_port (VETTCP, 78)
+ip_port (FINGER, 79)
+ip_port (WWW, 80)
+ip_port (HOSTS2_NS, 81)
+ip_port (XFER, 82)
+ip_port (MIT_ML_DEV, 83)
+ip_port (CTF, 84)
+ip_port (MIT_ML_DEV1, 85)
+ip_port (MFCOBOL, 86)
+ip_port (KERBEROS, 88)
+ip_port (SU_MIT_TG, 89)
+ip_port (DNSIX, 90)
+ip_port (MIT_DOV, 91)
+ip_port (NPP, 92)
+ip_port (DCP, 93)
+ip_port (OBJCALL, 94)
+ip_port (SUPDUP, 95)
+ip_port (DIXIE, 96)
+ip_port (SWIFT_RVF, 97)
+ip_port (TACNEWS, 98)
+ip_port (METAGRAM, 99)
+ip_port (NEWACCT, 100)
+ip_port (HOSTNAME, 101)
+ip_port (ISO_TSAP, 102)
+ip_port (GPPITNP, 103)
+ip_port (ACR_NEMA, 104)
+ip_port (CSO, 105)
+ip_port (CSNET_NS, 105)
+ip_port (3COM_TSMUX, 106)
+ip_port (RTELNET, 107)
+ip_port (SNAGAS, 108)
+ip_port (POP2, 109)
+ip_port (POP3, 110)
+ip_port (SUNRPC, 111)
+ip_port (MCIDAS, 112)
+ip_port (IDENT, 113)
+ip_port (SFTP, 115)
+ip_port (ANSANOTIFY, 116)
+ip_port (UUCP_PATH, 117)
+ip_port (SQLSERV, 118)
+ip_port (NNTP, 119)
+ip_port (CFDPTKT, 120)
+ip_port (ERPC, 121)
+ip_port (SMAKYNET, 122)
+ip_port (NTP, 123)
+ip_port (ANSATRADER, 124)
+ip_port (LOCUS_MAP, 125)
+ip_port (NXEDIT, 126)
+ip_port (LOCUS_CON, 127)
+ip_port (GSS_XLICEN, 128)
+ip_port (PWDGEN, 129)
+ip_port (CISCO_FNA, 130)
+ip_port (CISCO_TNA, 131)
+ip_port (CISCO_SYS, 132)
+ip_port (STATSRV, 133)
+ip_port (INGRES_NET, 134)
+ip_port (EPMAP, 135)
+ip_port (PROFILE, 136)
+ip_port (NETBIOS_NS, 137)
+ip_port (NETBIOS_DGM, 138)
+ip_port (NETBIOS_SSN, 139)
+ip_port (EMFIS_DATA, 140)
+ip_port (EMFIS_CNTL, 141)
+ip_port (BL_IDM, 142)
+ip_port (IMAP, 143)
+ip_port (UMA, 144)
+ip_port (UAAC, 145)
+ip_port (ISO_TP0, 146)
+ip_port (ISO_IP, 147)
+ip_port (JARGON, 148)
+ip_port (AED_512, 149)
+ip_port (SQL_NET, 150)
+ip_port (HEMS, 151)
+ip_port (BFTP, 152)
+ip_port (SGMP, 153)
+ip_port (NETSC_PROD, 154)
+ip_port (NETSC_DEV, 155)
+ip_port (SQLSRV, 156)
+ip_port (KNET_CMP, 157)
+ip_port (PCMAIL_SRV, 158)
+ip_port (NSS_ROUTING, 159)
+ip_port (SGMP_TRAPS, 160)
+ip_port (SNMP, 161)
+ip_port (SNMPTRAP, 162)
+ip_port (CMIP_MAN, 163)
+ip_port (CMIP_AGENT, 164)
+ip_port (XNS_COURIER, 165)
+ip_port (S_NET, 166)
+ip_port (NAMP, 167)
+ip_port (RSVD, 168)
+ip_port (SEND, 169)
+ip_port (PRINT_SRV, 170)
+ip_port (MULTIPLEX, 171)
+ip_port (CL1, 172)
+ip_port (XYPLEX_MUX, 173)
+ip_port (MAILQ, 174)
+ip_port (VMNET, 175)
+ip_port (GENRAD_MUX, 176)
+ip_port (XDMCP, 177)
+ip_port (NEXTSTEP, 178)
+ip_port (BGP, 179)
+ip_port (RIS, 180)
+ip_port (UNIFY, 181)
+ip_port (AUDIT, 182)
+ip_port (OCBINDER, 183)
+ip_port (OCSERVER, 184)
+ip_port (REMOTE_KIS, 185)
+ip_port (KIS, 186)
+ip_port (ACI, 187)
+ip_port (MUMPS, 188)
+ip_port (QFT, 189)
+ip_port (GACP, 190)
+ip_port (PROSPERO, 191)
+ip_port (OSU_NMS, 192)
+ip_port (SRMP, 193)
+ip_port (IRC, 194)
+ip_port (DN6_NLM_AUD, 195)
+ip_port (DN6_SMM_RED, 196)
+ip_port (DLS, 197)
+ip_port (DLS_MON, 198)
+ip_port (SMUX, 199)
+ip_port (SRC, 200)
+ip_port (AT_RTMP, 201)
+ip_port (AT_NBP, 202)
+ip_port (AT_3, 203)
+ip_port (AT_ECHO, 204)
+ip_port (AT_5, 205)
+ip_port (AT_ZIS, 206)
+ip_port (AT_7, 207)
+ip_port (AT_8, 208)
+ip_port (QMTP, 209)
+ip_port (Z39_50, 210)
+ip_port (TI914CG, 211)
+ip_port (ANET, 212)
+ip_port (IPX, 213)
+ip_port (VMPWSCS, 214)
+ip_port (SOFTPC, 215)
+ip_port (CAILIC, 216)
+ip_port (DBASE, 217)
+ip_port (MPP, 218)
+ip_port (UARPS, 219)
+ip_port (IMAP3, 220)
+ip_port (FLN_SPX, 221)
+ip_port (RSH_SPX, 222)
+ip_port (CDC, 223)
+ip_port (MASQDIALER, 224)
+ip_port (DIRECT, 242)
+ip_port (SUR_MEAS, 243)
+ip_port (INBUSINESS, 244)
+ip_port (LINK, 245)
+ip_port (DSP3270, 246)
+ip_port (SUBNTBCST_TFTP, 247)
+ip_port (BHFHS, 248)
+ip_port (RAP1, 256)
+ip_port (SET, 257)
+ip_port (YAK_CHAT, 258)
+ip_port (ESRO_GEN, 259)
+ip_port (OPENPORT, 260)
+ip_port (NSIIOPS, 261)
+ip_port (ARCISDMS, 262)
+ip_port (HDAP, 263)
+ip_port (BGMP, 264)
+ip_port (X_BONE_CTL, 265)
+ip_port (SST, 266)
+ip_port (TD_SERVICE, 267)
+ip_port (TD_REPLICA, 268)
+ip_port (HTTP_MGMT, 280)
+ip_port (PERSONAL_LINK, 281)
+ip_port (CABLEPORT_AX, 282)
+ip_port (RESCAP, 283)
+ip_port (CORERJD, 284)
+ip_port (FXP, 286)
+ip_port (K_BLOCK, 287)
+ip_port (NOVASTORBAKCUP, 308)
+ip_port (ENTRUSTTIME, 309)
+ip_port (BHMDS, 310)
+ip_port (ASIP_WEBADMIN, 311)
+ip_port (VSLMP, 312)
+ip_port (MAGENTA_LOGIC, 313)
+ip_port (OPALIS_ROBOT, 314)
+ip_port (DPSI, 315)
+ip_port (DECAUTH, 316)
+ip_port (ZANNET, 317)
+ip_port (PKIX_TIMESTAMP, 318)
+ip_port (PTP_EVENT, 319)
+ip_port (PTP_GENERAL, 320)
+ip_port (PIP, 321)
+ip_port (RTSPS, 322)
+ip_port (TEXAR, 333)
+ip_port (PDAP, 344)
+ip_port (PAWSERV, 345)
+ip_port (ZSERV, 346)
+ip_port (FATSERV, 347)
+ip_port (CSI_SGWP, 348)
+ip_port (MFTP, 349)
+ip_port (MATIP_TYPE_A, 350)
+ip_port (MATIP_TYPE_B, 351)
+ip_port (BHOETTY, 351)
+ip_port (DTAG_STE_SB, 352)
+ip_port (BHOEDAP4, 352)
+ip_port (NDSAUTH, 353)
+ip_port (BH611, 354)
+ip_port (DATEX_ASN, 355)
+ip_port (CLOANTO_NET_1, 356)
+ip_port (BHEVENT, 357)
+ip_port (SHRINKWRAP, 358)
+ip_port (NSRMP, 359)
+ip_port (SCOI2ODIALOG, 360)
+ip_port (SEMANTIX, 361)
+ip_port (SRSSEND, 362)
+ip_port (RSVP_TUNNEL, 363)
+ip_port (AURORA_CMGR, 364)
+ip_port (DTK, 365)
+ip_port (ODMR, 366)
+ip_port (MORTGAGEWARE, 367)
+ip_port (QBIKGDP, 368)
+ip_port (RPC2PORTMAP, 369)
+ip_port (CODAAUTH2, 370)
+ip_port (CLEARCASE, 371)
+ip_port (ULISTPROC, 372)
+ip_port (LEGENT_1, 373)
+ip_port (LEGENT_2, 374)
+ip_port (HASSLE, 375)
+ip_port (NIP, 376)
+ip_port (TNETOS, 377)
+ip_port (DSETOS, 378)
+ip_port (IS99C, 379)
+ip_port (IS99S, 380)
+ip_port (HP_COLLECTOR, 381)
+ip_port (HP_MANAGED_NODE, 382)
+ip_port (HP_ALARM_MGR, 383)
+ip_port (ARNS, 384)
+ip_port (IBM_APP, 385)
+ip_port (ASA, 386)
+ip_port (AURP, 387)
+ip_port (UNIDATA_LDM, 388)
+ip_port (LDAP, 389)
+ip_port (UIS, 390)
+ip_port (SYNOTICS_RELAY, 391)
+ip_port (SYNOTICS_BROKER, 392)
+ip_port (META5, 393)
+ip_port (EMBL_NDT, 394)
+ip_port (NETCP, 395)
+ip_port (NETWARE_IP, 396)
+ip_port (MPTN, 397)
+ip_port (KRYPTOLAN, 398)
+ip_port (ISO_TSAP_C2, 399)
+ip_port (WORK_SOL, 400)
+ip_port (UPS, 401)
+ip_port (GENIE, 402)
+ip_port (DECAP, 403)
+ip_port (NCED, 404)
+ip_port (NCLD, 405)
+ip_port (IMSP, 406)
+ip_port (TIMBUKTU, 407)
+ip_port (PRM_SM, 408)
+ip_port (PRM_NM, 409)
+ip_port (DECLADEBUG, 410)
+ip_port (RMT, 411)
+ip_port (SYNOPTICS_TRAP, 412)
+ip_port (SMSP, 413)
+ip_port (INFOSEEK, 414)
+ip_port (BNET, 415)
+ip_port (SILVERPLATTER, 416)
+ip_port (ONMUX, 417)
+ip_port (HYPER_G, 418)
+ip_port (ARIEL1, 419)
+ip_port (SMPTE, 420)
+ip_port (ARIEL2, 421)
+ip_port (ARIEL3, 422)
+ip_port (OPC_JOB_START, 423)
+ip_port (OPC_JOB_TRACK, 424)
+ip_port (ICAD_EL, 425)
+ip_port (SMARTSDP, 426)
+ip_port (SVRLOC, 427)
+ip_port (OCS_CMU, 428)
+ip_port (OCS_AMU, 429)
+ip_port (UTMPSD, 430)
+ip_port (UTMPCD, 431)
+ip_port (IASD, 432)
+ip_port (NNSP, 433)
+ip_port (MOBILEIP_AGENT, 434)
+ip_port (MOBILIP_MN, 435)
+ip_port (DNA_CML, 436)
+ip_port (COMSCM, 437)
+ip_port (DSFGW, 438)
+ip_port (DASP, 439)
+ip_port (SGCP, 440)
+ip_port (DECVMS_SYSMGT, 441)
+ip_port (CVC_HOSTD, 442)
+ip_port (HTTPS, 443)
+ip_port (SNPP, 444)
+ip_port (MICROSOFT_DS, 445)
+ip_port (DDM_RDB, 446)
+ip_port (DDM_DFM, 447)
+ip_port (DDM_SSL, 448)
+ip_port (AS_SERVERMAP, 449)
+ip_port (TSERVER, 450)
+ip_port (SFS_SMP_NET, 451)
+ip_port (SFS_CONFIG, 452)
+ip_port (CREATIVESERVER, 453)
+ip_port (CONTENTSERVER, 454)
+ip_port (CREATIVEPARTNR, 455)
+ip_port (MACON_TCP, 456)
+ip_port (SCOHELP, 457)
+ip_port (APPLEQTC, 458)
+ip_port (AMPR_RCMD, 459)
+ip_port (SKRONK, 460)
+ip_port (DATASURFSRV, 461)
+ip_port (DATASURFSRVSEC, 462)
+ip_port (ALPES, 463)
+ip_port (KPASSWD, 464)
+ip_port (URD, 465)
+ip_port (DIGITAL_VRC, 466)
+ip_port (MYLEX_MAPD, 467)
+ip_port (PHOTURIS, 468)
+ip_port (RCP, 469)
+ip_port (SCX_PROXY, 470)
+ip_port (MONDEX, 471)
+ip_port (LJK_LOGIN, 472)
+ip_port (HYBRID_POP, 473)
+ip_port (TN_TL_W1, 474)
+ip_port (TCPNETHASPSRV, 475)
+ip_port (TN_TL_FD1, 476)
+ip_port (SS7NS, 477)
+ip_port (SPSC, 478)
+ip_port (IAFSERVER, 479)
+ip_port (IAFDBASE, 480)
+ip_port (PH, 481)
+ip_port (BGS_NSI, 482)
+ip_port (ULPNET, 483)
+ip_port (INTEGRA_SME, 484)
+ip_port (POWERBURST, 485)
+ip_port (AVIAN, 486)
+ip_port (SAFT, 487)
+ip_port (GSS_HTTP, 488)
+ip_port (NEST_PROTOCOL, 489)
+ip_port (MICOM_PFS, 490)
+ip_port (GO_LOGIN, 491)
+ip_port (TICF_1, 492)
+ip_port (TICF_2, 493)
+ip_port (POV_RAY, 494)
+ip_port (INTECOURIER, 495)
+ip_port (PIM_RP_DISC, 496)
+ip_port (DANTZ, 497)
+ip_port (SIAM, 498)
+ip_port (ISO_ILL, 499)
+ip_port (ISAKMP, 500)
+ip_port (STMF, 501)
+ip_port (ASA_APPL_PROTO, 502)
+ip_port (INTRINSA, 503)
+ip_port (CITADEL, 504)
+ip_port (MAILBOX_LM, 505)
+ip_port (OHIMSRV, 506)
+ip_port (CRS, 507)
+ip_port (XVTTP, 508)
+ip_port (SNARE, 509)
+ip_port (FCP, 510)
+ip_port (PASSGO, 511)
+ip_port (EXEC, 512)
+ip_port (LOGIN, 513)
+ip_port (SHELL, 514)
+ip_port (PRINTER, 515)
+ip_port (VIDEOTEX, 516)
+ip_port (TALK, 517)
+ip_port (NTALK, 518)
+ip_port (UTIME, 519)
+ip_port (EFS, 520)
+ip_port (RIPNG, 521)
+ip_port (ULP, 522)
+ip_port (IBM_DB2, 523)
+ip_port (NCP, 524)
+ip_port (TIMED, 525)
+ip_port (TEMPO, 526)
+ip_port (STX, 527)
+ip_port (CUSTIX, 528)
+ip_port (IRC_SERV, 529)
+ip_port (COURIER, 530)
+ip_port (CONFERENCE, 531)
+ip_port (NETNEWS, 532)
+ip_port (NETWALL, 533)
+ip_port (MM_ADMIN, 534)
+ip_port (IIOP, 535)
+ip_port (OPALIS_RDV, 536)
+ip_port (NMSP, 537)
+ip_port (GDOMAP, 538)
+ip_port (APERTUS_LDP, 539)
+ip_port (UUCP, 540)
+ip_port (UUCP_RLOGIN, 541)
+ip_port (COMMERCE, 542)
+ip_port (KLOGIN, 543)
+ip_port (KSHELL, 544)
+ip_port (APPLEQTCSRVR, 545)
+ip_port (DHCPV6_CLIENT, 546)
+ip_port (DHCPV6_SERVER, 547)
+ip_port (AFPOVERTCP, 548)
+ip_port (IDFP, 549)
+ip_port (NEW_RWHO, 550)
+ip_port (CYBERCASH, 551)
+ip_port (DEVSHR_NTS, 552)
+ip_port (PIRP, 553)
+ip_port (RTSP, 554)
+ip_port (DSF, 555)
+ip_port (REMOTEFS, 556)
+ip_port (OPENVMS_SYSIPC, 557)
+ip_port (SDNSKMP, 558)
+ip_port (TEEDTAP, 559)
+ip_port (RMONITOR, 560)
+ip_port (MONITOR, 561)
+ip_port (CHSHELL, 562)
+ip_port (NNTPS, 563)
+ip_port (9PFS, 564)
+ip_port (WHOAMI, 565)
+ip_port (STREETTALK, 566)
+ip_port (BANYAN_RPC, 567)
+ip_port (MS_SHUTTLE, 568)
+ip_port (MS_ROME, 569)
+ip_port (METER, 570)
+ip_port (METER1, 571)
+ip_port (SONAR, 572)
+ip_port (BANYAN_VIP, 573)
+ip_port (FTP_AGENT, 574)
+ip_port (VEMMI, 575)
+ip_port (IPCD, 576)
+ip_port (VNAS, 577)
+ip_port (IPDD, 578)
+ip_port (DECBSRV, 579)
+ip_port (SNTP_HEARTBEAT, 580)
+ip_port (BDP, 581)
+ip_port (SCC_SECURITY, 582)
+ip_port (PHILIPS_VC, 583)
+ip_port (KEYSERVER, 584)
+ip_port (IMAP4_SSL, 585)
+ip_port (PASSWORD_CHG, 586)
+ip_port (SUBMISSION, 587)
+ip_port (CAL, 588)
+ip_port (EYELINK, 589)
+ip_port (TNS_CML, 590)
+ip_port (HTTP_ALT, 591)
+ip_port (EUDORA_SET, 592)
+ip_port (HTTP_RPC_EPMAP, 593)
+ip_port (TPIP, 594)
+ip_port (CAB_PROTOCOL, 595)
+ip_port (SMSD, 596)
+ip_port (PTCNAMESERVICE, 597)
+ip_port (SCO_WEBSRVRMG3, 598)
+ip_port (ACP, 599)
+ip_port (IPCSERVER, 600)
+ip_port (SYSLOG_CONN, 601)
+ip_port (XMLRPC_BEEP, 602)
+ip_port (IDXP, 603)
+ip_port (TUNNEL, 604)
+ip_port (SOAP_BEEP, 605)
+ip_port (URM, 606)
+ip_port (NQS, 607)
+ip_port (SIFT_UFT, 608)
+ip_port (NPMP_TRAP, 609)
+ip_port (NPMP_LOCAL, 610)
+ip_port (NPMP_GUI, 611)
+ip_port (HMMP_IND, 612)
+ip_port (HMMP_OP, 613)
+ip_port (SSHELL, 614)
+ip_port (SCO_INETMGR, 615)
+ip_port (SCO_SYSMGR, 616)
+ip_port (SCO_DTMGR, 617)
+ip_port (DEI_ICDA, 618)
+ip_port (COMPAQ_EVM, 619)
+ip_port (SCO_WEBSRVRMGR, 620)
+ip_port (ESCP_IP, 621)
+ip_port (COLLABORATOR, 622)
+ip_port (ASF_RMCP, 623)
+ip_port (CRYPTOADMIN, 624)
+ip_port (DEC_DLM, 625)
+ip_port (ASIA, 626)
+ip_port (PASSGO_TIVOLI, 627)
+ip_port (QMQP, 628)
+ip_port (3COM_AMP3, 629)
+ip_port (RDA, 630)
+ip_port (IPP, 631)
+ip_port (BMPP, 632)
+ip_port (SERVSTAT, 633)
+ip_port (GINAD, 634)
+ip_port (RLZDBASE, 635)
+ip_port (LDAPS, 636)
+ip_port (LANSERVER, 637)
+ip_port (MCNS_SEC, 638)
+ip_port (MSDP, 639)
+ip_port (ENTRUST_SPS, 640)
+ip_port (REPCMD, 641)
+ip_port (ESRO_EMSDP, 642)
+ip_port (SANITY, 643)
+ip_port (DWR, 644)
+ip_port (PSSC, 645)
+ip_port (LDP, 646)
+ip_port (DHCP_FAILOVER, 647)
+ip_port (RRP, 648)
+ip_port (CADVIEW_3D, 649)
+ip_port (OBEX, 650)
+ip_port (IEEE_MMS, 651)
+ip_port (HELLO_PORT, 652)
+ip_port (REPSCMD, 653)
+ip_port (AODV, 654)
+ip_port (TINC, 655)
+ip_port (SPMP, 656)
+ip_port (RMC, 657)
+ip_port (TENFOLD, 658)
+ip_port (MAC_SRVR_ADMIN, 660)
+ip_port (HAP, 661)
+ip_port (PFTP, 662)
+ip_port (PURENOISE, 663)
+ip_port (ASF_SECURE_RMCP, 664)
+ip_port (SUN_DR, 665)
+ip_port (MDQS, 666)
+ip_port (DOOM, 666)
+ip_port (DISCLOSE, 667)
+ip_port (MECOMM, 668)
+ip_port (MEREGISTER, 669)
+ip_port (VACDSM_SWS, 670)
+ip_port (VACDSM_APP, 671)
+ip_port (VPPS_QUA, 672)
+ip_port (CIMPLEX, 673)
+ip_port (ACAP, 674)
+ip_port (DCTP, 675)
+ip_port (VPPS_VIA, 676)
+ip_port (VPP, 677)
+ip_port (GGF_NCP, 678)
+ip_port (MRM, 679)
+ip_port (ENTRUST_AAAS, 680)
+ip_port (ENTRUST_AAMS, 681)
+ip_port (XFR, 682)
+ip_port (CORBA_IIOP, 683)
+ip_port (CORBA_IIOP_SSL, 684)
+ip_port (MDC_PORTMAPPER, 685)
+ip_port (HCP_WISMAR, 686)
+ip_port (ASIPREGISTRY, 687)
+ip_port (REALM_RUSD, 688)
+ip_port (NMAP, 689)
+ip_port (VATP, 690)
+ip_port (MSEXCH_ROUTING, 691)
+ip_port (HYPERWAVE_ISP, 692)
+ip_port (CONNENDP, 693)
+ip_port (HA_CLUSTER, 694)
+ip_port (IEEE_MMS_SSL, 695)
+ip_port (RUSHD, 696)
+ip_port (UUIDGEN, 697)
+ip_port (OLSR, 698)
+ip_port (ACCESSNETWORK, 699)
+ip_port (EPP, 700)
+ip_port (LMP, 701)
+ip_port (IRIS_BEEP, 702)
+ip_port (ELCSD, 704)
+ip_port (AGENTX, 705)
+ip_port (SILC, 706)
+ip_port (BORLAND_DSJ, 707)
+ip_port (ENTRUST_KMSH, 709)
+ip_port (ENTRUST_ASH, 710)
+ip_port (CISCO_TDP, 711)
+ip_port (TBRPF, 712)
+ip_port (NETVIEWDM1, 729)
+ip_port (NETVIEWDM2, 730)
+ip_port (NETVIEWDM3, 731)
+ip_port (NETGW, 741)
+ip_port (NETRCS, 742)
+ip_port (FLEXLM, 744)
+ip_port (FUJITSU_DEV, 747)
+ip_port (RIS_CM, 748)
+ip_port (KERBEROS_ADM, 749)
+ip_port (RFILE, 750)
+ip_port (PUMP, 751)
+ip_port (QRH, 752)
+ip_port (RRH, 753)
+ip_port (TELL, 754)
+ip_port (NLOGIN, 758)
+ip_port (CON, 759)
+ip_port (NS, 760)
+ip_port (RXE, 761)
+ip_port (QUOTAD, 762)
+ip_port (CYCLESERV, 763)
+ip_port (OMSERV, 764)
+ip_port (WEBSTER, 765)
+ip_port (PHONEBOOK, 767)
+ip_port (VID, 769)
+ip_port (CADLOCK, 770)
+ip_port (RTIP, 771)
+ip_port (CYCLESERV2, 772)
+ip_port (SUBMIT, 773)
+ip_port (RPASSWD, 774)
+ip_port (ENTOMB, 775)
+ip_port (WPAGES, 776)
+ip_port (MULTILING_HTTP, 777)
+ip_port (WPGS, 780)
+ip_port (MDBS_DAEMON, 800)
+ip_port (DEVICE, 801)
+ip_port (FCP_UDP, 810)
+ip_port (ITM_MCELL_S, 828)
+ip_port (PKIX_3_CA_RA, 829)
+ip_port (DHCP_FAILOVER2, 847)
+ip_port (GDOI, 848)
+ip_port (ISCSI, 860)
+ip_port (RSYNC, 873)
+ip_port (ICLCNET_LOCATE, 886)
+ip_port (ICLCNET_SVINFO, 887)
+ip_port (ACCESSBUILDER, 888)
+ip_port (CDDBP, 888)
+ip_port (OMGINITIALREFS, 900)
+ip_port (SMPNAMERES, 901)
+ip_port (IDEAFARM_CHAT, 902)
+ip_port (IDEAFARM_CATCH, 903)
+ip_port (XACT_BACKUP, 911)
+ip_port (APEX_MESH, 912)
+ip_port (APEX_EDGE, 913)
+ip_port (FTPS_DATA, 989)
+ip_port (FTPS, 990)
+ip_port (NAS, 991)
+ip_port (TELNETS, 992)
+ip_port (IMAPS, 993)
+ip_port (IRCS, 994)
+ip_port (POP3S, 995)
+ip_port (VSINET, 996)
+ip_port (MAITRD, 997)
+ip_port (BUSBOY, 998)
+ip_port (GARCON, 999)
+ip_port (PUPROUTER, 999)
+ip_port (CADLOCK2, 1000)
+ip_port (SURF, 1010)
+
diff --git a/vnet/vnet/ip/protocols.def b/vnet/vnet/ip/protocols.def
new file mode 100644
index 00000000000..77fab31da05
--- /dev/null
+++ b/vnet/vnet/ip/protocols.def
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/* Emacs editing mode -*-C-*-
+
+From http://www.iana.org/assignments/protocol-numbers
+
+PROTOCOL NUMBERS
+
+(last updated 18 October 2004)
+
+In the Internet Protocol version 4 (IPv4) [RFC791] there is a field,
+called "Protocol", to identify the next level protocol. This is an 8
+bit field. In Internet Protocol version 6 (IPv6) [RFC1883] this field
+is called the "Next Header" field.
+*/
+ip_protocol (0, IP6_HOP_BY_HOP_OPTIONS)
+ip_protocol (1, ICMP)
+ip_protocol (2, IGMP)
+ip_protocol (3, GGP)
+ip_protocol (4, IP_IN_IP)
+ip_protocol (5, ST)
+ip_protocol (6, TCP)
+ip_protocol (7, CBT)
+ip_protocol (8, EGP)
+ip_protocol (9, IGP)
+ip_protocol (10, BBN_RCC_MON)
+ip_protocol (11, NVP_II)
+ip_protocol (12, PUP)
+ip_protocol (13, ARGUS)
+ip_protocol (14, EMCON)
+ip_protocol (15, XNET)
+ip_protocol (16, CHAOS)
+ip_protocol (17, UDP)
+ip_protocol (18, MUX)
+ip_protocol (19, DCN_MEAS)
+ip_protocol (20, HMP)
+ip_protocol (21, PRM)
+ip_protocol (22, XNS_IDP)
+ip_protocol (23, TRUNK_1)
+ip_protocol (24, TRUNK_2)
+ip_protocol (25, LEAF_1)
+ip_protocol (26, LEAF_2)
+ip_protocol (27, RDP)
+ip_protocol (28, IRTP)
+ip_protocol (29, ISO_TP4)
+ip_protocol (30, NETBLT)
+ip_protocol (31, MFE_NSP)
+ip_protocol (32, MERIT_INP)
+ip_protocol (33, SEP)
+ip_protocol (34, 3PC)
+ip_protocol (35, IDPR)
+ip_protocol (36, XTP)
+ip_protocol (37, DDP)
+ip_protocol (38, IDPR_CMTP)
+ip_protocol (39, TP)
+ip_protocol (40, IL)
+ip_protocol (41, IPV6)
+ip_protocol (42, SDRP)
+ip_protocol (43, IPV6_ROUTE)
+ip_protocol (44, IPV6_FRAGMENTATION)
+ip_protocol (45, IDRP)
+ip_protocol (46, RSVP)
+ip_protocol (47, GRE)
+ip_protocol (48, MHRP)
+ip_protocol (49, BNA)
+ip_protocol (50, IPSEC_ESP)
+ip_protocol (51, IPSEC_AH)
+ip_protocol (52, I_NLSP)
+ip_protocol (53, SWIPE)
+ip_protocol (54, NARP)
+ip_protocol (55, MOBILE)
+ip_protocol (56, TLSP)
+ip_protocol (57, SKIP)
+ip_protocol (58, ICMP6)
+ip_protocol (59, IP6_NONXT)
+ip_protocol (60, IP6_DESTINATION_OPTIONS)
+ip_protocol (62, CFTP)
+ip_protocol (64, SAT_EXPAK)
+ip_protocol (65, KRYPTOLAN)
+ip_protocol (66, RVD)
+ip_protocol (67, IPPC)
+ip_protocol (69, SAT_MON)
+ip_protocol (70, VISA)
+ip_protocol (71, IPCV)
+ip_protocol (72, CPNX)
+ip_protocol (73, CPHB)
+ip_protocol (74, WSN)
+ip_protocol (75, PVP)
+ip_protocol (76, BR_SAT_MON)
+ip_protocol (77, SUN_ND)
+ip_protocol (78, WB_MON)
+ip_protocol (79, WB_EXPAK)
+ip_protocol (80, ISO_IP)
+ip_protocol (81, VMTP)
+ip_protocol (82, SECURE_VMTP)
+ip_protocol (83, VINES)
+ip_protocol (84, TTP)
+ip_protocol (85, NSFNET_IGP)
+ip_protocol (86, DGP)
+ip_protocol (87, TCF)
+ip_protocol (88, EIGRP)
+ip_protocol (89, OSPF)
+ip_protocol (90, SPRITE_RPC)
+ip_protocol (91, LARP)
+ip_protocol (92, MTP)
+ip_protocol (93, AX)
+ip_protocol (94, IPIP)
+ip_protocol (95, MICP)
+ip_protocol (96, SCC_SP)
+ip_protocol (97, ETHERIP)
+ip_protocol (98, ENCAP)
+ip_protocol (100, GMTP)
+ip_protocol (101, IFMP)
+ip_protocol (102, PNNI)
+ip_protocol (103, PIM)
+ip_protocol (104, ARIS)
+ip_protocol (105, SCPS)
+ip_protocol (106, QNX)
+ip_protocol (107, A)
+ip_protocol (108, IPCOMP)
+ip_protocol (109, SNP)
+ip_protocol (110, COMPAQ_PEER)
+ip_protocol (111, IPX_IN_IP)
+ip_protocol (112, VRRP)
+ip_protocol (113, PGM)
+ip_protocol (115, L2TP)
+ip_protocol (116, DDX)
+ip_protocol (117, IATP)
+ip_protocol (118, STP)
+ip_protocol (119, SRP)
+ip_protocol (120, UTI)
+ip_protocol (121, SMP)
+ip_protocol (122, SM)
+ip_protocol (123, PTP)
+ip_protocol (124, ISIS)
+ip_protocol (125, FIRE)
+ip_protocol (126, CRTP)
+ip_protocol (127, CRUDP)
+ip_protocol (128, SSCOPMCE)
+ip_protocol (129, IPLT)
+ip_protocol (130, SPS)
+ip_protocol (131, PIPE)
+ip_protocol (132, SCTP)
+ip_protocol (133, FC)
+ip_protocol (134, RSVP_E2E_IGNORE)
+ip_protocol (135, MOBILITY)
+ip_protocol (136, UDP_LITE)
+ip_protocol (137, MPLS_IN_IP)
+ip_protocol (255, RESERVED)
+
diff --git a/vnet/vnet/ip/tcp.c b/vnet/vnet/ip/tcp.c
new file mode 100644
index 00000000000..53f82f1c5b9
--- /dev/null
+++ b/vnet/vnet/ip/tcp.c
@@ -0,0 +1,2983 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/tcp.c: tcp protocol
+ *
+ * Copyright (c) 2011 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/ip/tcp.h>
+#include <math.h>
+
+static u8 my_zero_mask_table[256] = {
+ [0xf0] = (1 << 1),
+ [0x0f] = (1 << 0),
+ [0xff] = (1 << 0) | (1 << 1),
+};
+
+static_always_inline u32 my_zero_mask (u32 x)
+{
+ return ((my_zero_mask_table[(x >> 0) & 0xff] << 0)
+ | (my_zero_mask_table[(x >> 8) & 0xff] << 2));
+}
+
+static u8 my_first_set_table[256] = {
+ [0x00] = 4,
+ [0xf0] = 1,
+ [0x0f] = 0,
+ [0xff] = 0,
+};
+
+static_always_inline u32 my_first_set (u32 zero_mask)
+{
+ u8 r0 = my_first_set_table[(zero_mask >> 0) & 0xff];
+ u8 r1 = 2 + my_first_set_table[(zero_mask >> 8) & 0xff];
+ return r0 != 4 ? r0 : r1;
+}
+
+static_always_inline void
+ip4_tcp_udp_address_x4_set_from_headers (ip4_tcp_udp_address_x4_t * a,
+ ip4_header_t * ip,
+ tcp_header_t * tcp,
+ u32 i)
+{
+ a->src.as_ip4_address[i] = ip->src_address;
+ a->dst.as_ip4_address[i] = ip->dst_address;
+ a->ports.as_ports[i].as_u32 = tcp->ports.src_and_dst;
+}
+
+static_always_inline void
+ip4_tcp_udp_address_x4_copy_and_invalidate (ip4_tcp_udp_address_x4_t * dst,
+ ip4_tcp_udp_address_x4_t * src,
+ u32 dst_i, u32 src_i)
+{
+#define _(d,s) d = s; s = 0;
+ _ (dst->src.as_ip4_address[dst_i].as_u32, src->src.as_ip4_address[src_i].as_u32);
+ _ (dst->dst.as_ip4_address[dst_i].as_u32, src->dst.as_ip4_address[src_i].as_u32);
+ _ (dst->ports.as_ports[dst_i].as_u32, src->ports.as_ports[src_i].as_u32);
+#undef _
+}
+
+static_always_inline void
+ip4_tcp_udp_address_x4_invalidate (ip4_tcp_udp_address_x4_t * a, u32 i)
+{
+ a->src.as_ip4_address[i].as_u32 = 0;
+ a->dst.as_ip4_address[i].as_u32 = 0;
+ a->ports.as_ports[i].as_u32 = 0;
+}
+
+static_always_inline uword
+ip4_tcp_udp_address_x4_is_valid (ip4_tcp_udp_address_x4_t * a, u32 i)
+{
+ return !(a->src.as_ip4_address[i].as_u32 == 0
+ && a->dst.as_ip4_address[i].as_u32 == 0
+ && a->ports.as_ports[i].as_u32 == 0);
+}
+
+#ifdef TCP_HAVE_VEC128
+static_always_inline uword
+ip4_tcp_udp_address_x4_match_helper (ip4_tcp_udp_address_x4_t * ax4,
+ u32x4 src, u32x4 dst, u32x4 ports)
+{
+ u32x4 r;
+ u32 m;
+
+ r = u32x4_is_equal (src, ax4->src.as_u32x4);
+ r &= u32x4_is_equal (dst, ax4->dst.as_u32x4);
+ r &= u32x4_is_equal (ports, ax4->ports.as_u32x4);
+
+ /* At this point r will be either all zeros (if nothing matched)
+ or have 32 1s in the position that did match. */
+ m = u8x16_compare_byte_mask ((u8x16) r);
+
+ return m;
+}
+
+static_always_inline uword
+ip4_tcp_udp_address_x4_match (ip4_tcp_udp_address_x4_t * ax4,
+ ip4_header_t * ip,
+ tcp_header_t * tcp)
+{
+ u32x4 src = u32x4_splat (ip->src_address.as_u32);
+ u32x4 dst = u32x4_splat (ip->dst_address.as_u32);
+ u32x4 ports = u32x4_splat (tcp->ports.src_and_dst);
+ return my_first_set (ip4_tcp_udp_address_x4_match_helper (ax4, src, dst, ports));
+}
+
+static_always_inline uword
+ip4_tcp_udp_address_x4_first_empty (ip4_tcp_udp_address_x4_t * ax4)
+{
+ u32x4 zero = {0};
+ return my_first_set (ip4_tcp_udp_address_x4_match_helper (ax4, zero, zero, zero));
+}
+
+static_always_inline uword
+ip4_tcp_udp_address_x4_empty_mask (ip4_tcp_udp_address_x4_t * ax4)
+{
+ u32x4 zero = {0};
+ return my_zero_mask (ip4_tcp_udp_address_x4_match_helper (ax4, zero, zero, zero));
+}
+#else /* TCP_HAVE_VEC128 */
+static_always_inline uword
+ip4_tcp_udp_address_x4_match_helper (ip4_tcp_udp_address_x4_t * ax4,
+ u32 src, u32 dst, u32 ports)
+{
+ u32 r0, r1, r2, r3;
+
+#define _(i) \
+ r##i = (src == ax4->src.as_ip4_address[i].as_u32 \
+ && dst == ax4->dst.as_ip4_address[i].as_u32 \
+ && ports == ax4->ports.as_ports[i].as_u32)
+
+ _ (0);
+ _ (1);
+ _ (2);
+ _ (3);
+
+#undef _
+
+ return (((r0 ? 0xf : 0x0) << 0)
+ | ((r1 ? 0xf : 0x0) << 4)
+ | ((r2 ? 0xf : 0x0) << 8)
+ | ((r3 ? 0xf : 0x0) << 12));
+}
+
+static_always_inline uword
+ip4_tcp_udp_address_x4_match (ip4_tcp_udp_address_x4_t * ax4,
+ ip4_header_t * ip,
+ tcp_header_t * tcp)
+{
+ return my_first_set (ip4_tcp_udp_address_x4_match_helper (ax4,
+ ip->src_address.as_u32,
+ ip->dst_address.as_u32,
+ tcp->ports.src_and_dst));
+}
+
+static_always_inline uword
+ip4_tcp_udp_address_x4_first_empty (ip4_tcp_udp_address_x4_t * ax4)
+{
+ return my_first_set (ip4_tcp_udp_address_x4_match_helper (ax4, 0, 0, 0));
+}
+
+static_always_inline uword
+ip4_tcp_udp_address_x4_empty_mask (ip4_tcp_udp_address_x4_t * ax4)
+{
+ return my_zero_mask (ip4_tcp_udp_address_x4_match_helper (ax4, 0, 0, 0));
+}
+#endif
+
+static u8 * format_ip4_tcp_udp_address_x4 (u8 * s, va_list * va)
+{
+ ip4_tcp_udp_address_x4_t * a = va_arg (*va, ip4_tcp_udp_address_x4_t *);
+ u32 ai = va_arg (*va, u32);
+ ASSERT (ai < 4);
+
+ s = format (s, "%U:%d -> %U:%d",
+ format_ip4_address, &a->src.as_ip4_address[ai],
+ clib_net_to_host_u16 (a->ports.as_ports[ai].src),
+ format_ip4_address, &a->dst.as_ip4_address[ai],
+ clib_net_to_host_u16 (a->ports.as_ports[ai].dst));
+
+ return s;
+}
+
+static_always_inline void
+ip6_tcp_udp_address_x4_set_from_headers (ip6_tcp_udp_address_x4_t * a,
+ ip6_header_t * ip,
+ tcp_header_t * tcp,
+ u32 i)
+{
+ a->src.as_u32[0][i] = ip->src_address.as_u32[0];
+ a->src.as_u32[1][i] = ip->src_address.as_u32[1];
+ a->src.as_u32[2][i] = ip->src_address.as_u32[2];
+ a->src.as_u32[3][i] = ip->src_address.as_u32[3];
+ a->dst.as_u32[0][i] = ip->dst_address.as_u32[0];
+ a->dst.as_u32[1][i] = ip->dst_address.as_u32[1];
+ a->dst.as_u32[2][i] = ip->dst_address.as_u32[2];
+ a->dst.as_u32[3][i] = ip->dst_address.as_u32[3];
+ a->ports.as_ports[i].as_u32 = tcp->ports.src_and_dst;
+}
+
+static_always_inline void
+ip6_tcp_udp_address_x4_copy_and_invalidate (ip6_tcp_udp_address_x4_t * dst,
+ ip6_tcp_udp_address_x4_t * src,
+ u32 dst_i, u32 src_i)
+{
+#define _(d,s) d = s; s = 0;
+ _ (dst->src.as_u32[0][dst_i], src->src.as_u32[0][src_i]);
+ _ (dst->src.as_u32[1][dst_i], src->src.as_u32[1][src_i]);
+ _ (dst->src.as_u32[2][dst_i], src->src.as_u32[2][src_i]);
+ _ (dst->src.as_u32[3][dst_i], src->src.as_u32[3][src_i]);
+ _ (dst->dst.as_u32[0][dst_i], src->dst.as_u32[0][src_i]);
+ _ (dst->dst.as_u32[1][dst_i], src->dst.as_u32[1][src_i]);
+ _ (dst->dst.as_u32[2][dst_i], src->dst.as_u32[2][src_i]);
+ _ (dst->dst.as_u32[3][dst_i], src->dst.as_u32[3][src_i]);
+ _ (dst->ports.as_ports[dst_i].as_u32, src->ports.as_ports[src_i].as_u32);
+#undef _
+}
+
+static_always_inline void
+ip6_tcp_udp_address_x4_invalidate (ip6_tcp_udp_address_x4_t * a, u32 i)
+{
+ a->src.as_u32[0][i] = 0;
+ a->src.as_u32[1][i] = 0;
+ a->src.as_u32[2][i] = 0;
+ a->src.as_u32[3][i] = 0;
+ a->dst.as_u32[0][i] = 0;
+ a->dst.as_u32[1][i] = 0;
+ a->dst.as_u32[2][i] = 0;
+ a->dst.as_u32[3][i] = 0;
+ a->ports.as_ports[i].as_u32 = 0;
+}
+
+static_always_inline uword
+ip6_tcp_udp_address_x4_is_valid (ip6_tcp_udp_address_x4_t * a, u32 i)
+{
+ return !(a->src.as_u32[0][i] == 0
+ && a->src.as_u32[1][i] == 0
+ && a->src.as_u32[2][i] == 0
+ && a->src.as_u32[3][i] == 0
+ && a->dst.as_u32[0][i] == 0
+ && a->dst.as_u32[1][i] == 0
+ && a->dst.as_u32[2][i] == 0
+ && a->dst.as_u32[3][i] == 0
+ && a->ports.as_ports[i].as_u32 == 0);
+}
+
+#ifdef TCP_HAVE_VEC128
+static_always_inline uword
+ip6_tcp_udp_address_x4_match_helper (ip6_tcp_udp_address_x4_t * ax4,
+ u32x4 src0, u32x4 src1, u32x4 src2, u32x4 src3,
+ u32x4 dst0, u32x4 dst1, u32x4 dst2, u32x4 dst3,
+ u32x4 ports)
+{
+ u32x4 r;
+ u32 m;
+
+ r = u32x4_is_equal (src0, ax4->src.as_u32x4[0]);
+ r &= u32x4_is_equal (src1, ax4->src.as_u32x4[1]);
+ r &= u32x4_is_equal (src2, ax4->src.as_u32x4[2]);
+ r &= u32x4_is_equal (src3, ax4->src.as_u32x4[3]);
+ r &= u32x4_is_equal (dst0, ax4->dst.as_u32x4[0]);
+ r &= u32x4_is_equal (dst1, ax4->dst.as_u32x4[1]);
+ r &= u32x4_is_equal (dst2, ax4->dst.as_u32x4[2]);
+ r &= u32x4_is_equal (dst3, ax4->dst.as_u32x4[3]);
+ r &= u32x4_is_equal (ports, ax4->ports.as_u32x4);
+
+ /* At this point r will be either all zeros (if nothing matched)
+ or have 32 1s in the position that did match. */
+ m = u8x16_compare_byte_mask ((u8x16) r);
+
+ return m;
+}
+
+static_always_inline uword
+ip6_tcp_udp_address_x4_match (ip6_tcp_udp_address_x4_t * ax4,
+ ip6_header_t * ip,
+ tcp_header_t * tcp)
+{
+ u32x4 src0 = u32x4_splat (ip->src_address.as_u32[0]);
+ u32x4 src1 = u32x4_splat (ip->src_address.as_u32[1]);
+ u32x4 src2 = u32x4_splat (ip->src_address.as_u32[2]);
+ u32x4 src3 = u32x4_splat (ip->src_address.as_u32[3]);
+ u32x4 dst0 = u32x4_splat (ip->dst_address.as_u32[0]);
+ u32x4 dst1 = u32x4_splat (ip->dst_address.as_u32[1]);
+ u32x4 dst2 = u32x4_splat (ip->dst_address.as_u32[2]);
+ u32x4 dst3 = u32x4_splat (ip->dst_address.as_u32[3]);
+ u32x4 ports = u32x4_splat (tcp->ports.src_and_dst);
+ return my_first_set (ip6_tcp_udp_address_x4_match_helper (ax4,
+ src0, src1, src2, src3,
+ dst0, dst1, dst2, dst3,
+ ports));
+}
+
+static_always_inline uword
+ip6_tcp_udp_address_x4_first_empty (ip6_tcp_udp_address_x4_t * ax4)
+{
+ u32x4 zero = {0};
+ return my_first_set (ip6_tcp_udp_address_x4_match_helper (ax4,
+ zero, zero, zero, zero,
+ zero, zero, zero, zero,
+ zero));
+}
+
+static_always_inline uword
+ip6_tcp_udp_address_x4_empty_mask (ip6_tcp_udp_address_x4_t * ax4)
+{
+ u32x4 zero = {0};
+ return my_zero_mask (ip6_tcp_udp_address_x4_match_helper (ax4,
+ zero, zero, zero, zero,
+ zero, zero, zero, zero,
+ zero));
+}
+#else /* TCP_HAVE_VEC128 */
+static_always_inline uword
+ip6_tcp_udp_address_x4_match_helper (ip6_tcp_udp_address_x4_t * ax4,
+ u32 src0, u32 src1, u32 src2, u32 src3,
+ u32 dst0, u32 dst1, u32 dst2, u32 dst3,
+ u32 ports)
+{
+ u32 r0, r1, r2, r3;
+
+#define _(i) \
+ r##i = (src0 == ax4->src.as_u32[i][0] \
+ && src1 == ax4->src.as_u32[i][1] \
+ && src2 == ax4->src.as_u32[i][2] \
+ && src3 == ax4->src.as_u32[i][3] \
+ && dst0 == ax4->dst.as_u32[i][0] \
+ && dst1 == ax4->dst.as_u32[i][1] \
+ && dst2 == ax4->dst.as_u32[i][2] \
+ && dst3 == ax4->dst.as_u32[i][3] \
+ && ports == ax4->ports.as_ports[i].as_u32)
+
+ _ (0);
+ _ (1);
+ _ (2);
+ _ (3);
+
+#undef _
+
+ return (((r0 ? 0xf : 0x0) << 0)
+ | ((r1 ? 0xf : 0x0) << 4)
+ | ((r2 ? 0xf : 0x0) << 8)
+ | ((r3 ? 0xf : 0x0) << 12));
+}
+
+static_always_inline uword
+ip6_tcp_udp_address_x4_match (ip6_tcp_udp_address_x4_t * ax4,
+ ip6_header_t * ip,
+ tcp_header_t * tcp)
+{
+ u32 src0 = ip->src_address.as_u32[0];
+ u32 src1 = ip->src_address.as_u32[1];
+ u32 src2 = ip->src_address.as_u32[2];
+ u32 src3 = ip->src_address.as_u32[3];
+ u32 dst0 = ip->dst_address.as_u32[0];
+ u32 dst1 = ip->dst_address.as_u32[1];
+ u32 dst2 = ip->dst_address.as_u32[2];
+ u32 dst3 = ip->dst_address.as_u32[3];
+ u32 ports = tcp->ports.src_and_dst;
+ return my_first_set (ip6_tcp_udp_address_x4_match_helper (ax4,
+ src0, src1, src2, src3,
+ dst0, dst1, dst2, dst3,
+ ports));
+}
+
+static_always_inline uword
+ip6_tcp_udp_address_x4_first_empty (ip6_tcp_udp_address_x4_t * ax4)
+{
+ return my_first_set (ip6_tcp_udp_address_x4_match_helper (ax4,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0));
+}
+
+static_always_inline uword
+ip6_tcp_udp_address_x4_empty_mask (ip6_tcp_udp_address_x4_t * ax4)
+{
+ return my_zero_mask (ip6_tcp_udp_address_x4_match_helper (ax4,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0));
+}
+#endif /* ! TCP_HAVE_VEC128 */
+
+static u8 * format_ip6_tcp_udp_address_x4 (u8 * s, va_list * va)
+{
+ ip6_tcp_udp_address_x4_t * a = va_arg (*va, ip6_tcp_udp_address_x4_t *);
+ u32 i, ai = va_arg (*va, u32);
+ ip6_address_t src, dst;
+
+ ASSERT (ai < 4);
+ for (i = 0; i < 4; i++)
+ {
+ src.as_u32[i] = a->src.as_u32[i][ai];
+ dst.as_u32[i] = a->dst.as_u32[i][ai];
+ }
+
+ s = format (s, "%U:%d -> %U:%d",
+ format_ip6_address, &src,
+ clib_net_to_host_u16 (a->ports.as_ports[ai].src),
+ format_ip6_address, &dst,
+ clib_net_to_host_u16 (a->ports.as_ports[ai].dst));
+
+ return s;
+}
+
+static_always_inline u32
+find_oldest_timestamp_x4 (u32 * time_stamps, u32 now)
+{
+ u32 dt0, dt_min0, i_min0;
+ u32 dt1, dt_min1, i_min1;
+
+ i_min0 = i_min1 = 0;
+ dt_min0 = now - time_stamps[0];
+ dt_min1 = now - time_stamps[2];
+ dt0 = now - time_stamps[1];
+ dt1 = now - time_stamps[3];
+
+ i_min0 += dt0 > dt_min0;
+ i_min1 += dt1 > dt_min1;
+
+ dt_min0 = i_min0 > 0 ? dt0 : dt_min0;
+ dt_min1 = i_min1 > 0 ? dt1 : dt_min1;
+
+ return dt_min0 > dt_min1 ? i_min0 : (2 + i_min1);
+}
+
+static_always_inline uword
+tcp_round_trip_time_stats_is_valid (tcp_round_trip_time_stats_t * s)
+{ return s->count > 0; }
+
+static_always_inline void
+tcp_round_trip_time_stats_compute (tcp_round_trip_time_stats_t * s, f64 * r)
+{
+ f64 ave, rms;
+ ASSERT (s->count > 0);
+ ave = s->sum / s->count;
+ rms = sqrt (s->sum2 / s->count - ave*ave);
+ r[0] = ave;
+ r[1] = rms;
+}
+
+typedef struct {
+ tcp_option_type_t type : 8;
+ u8 length;
+ u32 my_time_stamp, his_time_stamp;
+} __attribute__ ((packed)) tcp_time_stamp_option_t;
+
+typedef struct {
+ tcp_header_t header;
+
+ struct {
+ struct {
+ tcp_option_type_t type : 8;
+ u8 length;
+ u16 value;
+ } mss;
+
+ struct {
+ tcp_option_type_t type : 8;
+ u8 length;
+ u8 value;
+ } __attribute__ ((packed)) window_scale;
+
+ u8 nops[3];
+
+ tcp_time_stamp_option_t time_stamp;
+ } __attribute__ ((packed)) options;
+} __attribute__ ((packed)) tcp_syn_packet_t;
+
+typedef struct {
+ tcp_header_t header;
+
+ struct {
+ u8 nops[2];
+
+ tcp_time_stamp_option_t time_stamp;
+ } options;
+} __attribute__ ((packed)) tcp_ack_packet_t;
+
+typedef struct {
+ ip4_header_t ip4;
+ tcp_syn_packet_t tcp;
+} ip4_tcp_syn_packet_t;
+
+typedef struct {
+ ip4_header_t ip4;
+ tcp_ack_packet_t tcp;
+} ip4_tcp_ack_packet_t;
+
+typedef struct {
+ ip6_header_t ip6;
+ tcp_syn_packet_t tcp;
+} ip6_tcp_syn_packet_t;
+
+typedef struct {
+ ip6_header_t ip6;
+ tcp_ack_packet_t tcp;
+} ip6_tcp_ack_packet_t;
+
+static_always_inline void
+ip4_tcp_packet_init (ip4_header_t * ip, u32 n_bytes)
+{
+ ip->ip_version_and_header_length = 0x45;
+
+ ip->tos = ip4_main.host_config.tos;
+ ip->ttl = ip4_main.host_config.ttl;
+
+ /* No need to set fragment ID due to DF bit. */
+ ip->flags_and_fragment_offset = clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT);
+
+ ip->protocol = IP_PROTOCOL_TCP;
+
+ ip->length = clib_host_to_net_u16 (n_bytes);
+
+ ip->checksum = ip4_header_checksum (ip);
+}
+
+static_always_inline void
+ip6_tcp_packet_init (ip6_header_t * ip, u32 n_bytes)
+{
+ ip->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32 (0x6 << 28);
+
+ ip->payload_length = clib_host_to_net_u16 (n_bytes - sizeof (ip[0]));
+
+ ip->hop_limit = ip6_main.host_config.ttl;
+}
+
+static_always_inline u32
+tcp_time_now (tcp_main_t * tm, tcp_timer_type_t t)
+{
+ ASSERT (t < ARRAY_LEN (tm->log2_clocks_per_tick));
+ return clib_cpu_time_now () >> tm->log2_clocks_per_tick[t];
+}
+
+static void
+tcp_time_init (vlib_main_t * vm, tcp_main_t * tm)
+{
+ int i;
+ f64 log2 = .69314718055994530941;
+
+ for (i = 0; i < ARRAY_LEN (tm->log2_clocks_per_tick); i++)
+ {
+ static f64 t[] = {
+#define _(f,r) r,
+ foreach_tcp_timer
+#undef _
+ };
+ tm->log2_clocks_per_tick[i] =
+ flt_round_nearest (log (t[i] / vm->clib_time.seconds_per_clock) / log2);
+ tm->secs_per_tick[i] = vm->clib_time.seconds_per_clock * (1 << tm->log2_clocks_per_tick[i]);
+ }
+}
+
+tcp_main_t tcp_main;
+
+typedef enum {
+ TCP_LOOKUP_NEXT_DROP,
+ TCP_LOOKUP_NEXT_PUNT,
+ TCP_LOOKUP_NEXT_LISTEN_SYN,
+ TCP_LOOKUP_NEXT_LISTEN_ACK,
+ TCP_LOOKUP_NEXT_CONNECT_SYN_ACK,
+ TCP_LOOKUP_NEXT_ESTABLISHED,
+ TCP_LOOKUP_N_NEXT,
+} tcp_lookup_next_t;
+
+#define foreach_tcp_error \
+ _ (NONE, "no error") \
+ _ (LOOKUP_DROPS, "lookup drops") \
+ _ (LISTEN_RESPONSES, "listen responses sent") \
+ _ (CONNECTS_SENT, "connects sent") \
+ _ (LISTENS_ESTABLISHED, "listens connected") \
+ _ (UNEXPECTED_SEQ_NUMBER, "unexpected sequence number drops") \
+ _ (UNEXPECTED_ACK_NUMBER, "unexpected acknowledgment number drops") \
+ _ (CONNECTS_ESTABLISHED, "connects established") \
+ _ (NO_LISTENER_FOR_PORT, "no listener for port") \
+ _ (WRONG_LOCAL_ADDRESS_FOR_PORT, "wrong local address for port") \
+ _ (ACKS_SENT, "acks sent for established connections") \
+ _ (NO_DATA, "acks with no data") \
+ _ (FINS_RECEIVED, "fins received") \
+ _ (SEGMENT_AFTER_FIN, "segments dropped after fin received") \
+ _ (CONNECTIONS_CLOSED, "connections closed")
+
+typedef enum {
+#define _(sym,str) TCP_ERROR_##sym,
+ foreach_tcp_error
+#undef _
+ TCP_N_ERROR,
+} tcp_error_t;
+
+#ifdef TCP_HAVE_VEC128
+static_always_inline u32x4 u32x4_splat_x2 (u32 x)
+{
+ u32x4 r = u32x4_set0 (x);
+ return u32x4_interleave_lo (r, r);
+}
+
+static_always_inline u32x4 u32x4_set_x2 (u32 x, u32 y)
+{
+ u32x4 r0 = u32x4_set0 (x);
+ u32x4 r1 = u32x4_set0 (y);
+ return u32x4_interleave_lo (r0, r1);
+}
+
+/* FIXME */
+#define u32x4_get(x,i) \
+ __builtin_ia32_vec_ext_v4si ((i32x4) (x), (int) (i))
+#else /* TCP_HAVE_VEC128 */
+#endif /* TCP_HAVE_VEC128 */
+
+/* Dispatching on tcp/udp listeners (by dst port)
+ and tcp/udp connections (by src/dst address/port). */
+static_always_inline uword
+ip46_tcp_lookup (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ uword is_ip6)
+{
+ tcp_main_t * tm = &tcp_main;
+ ip46_tcp_main_t * tm46 = is_ip6 ? &tm->ip6 : &tm->ip4;
+ uword n_packets = frame->n_vectors;
+ u32 * from, * to_next;
+ u32 n_left_from, n_left_to_next, next, mini_now;
+ vlib_node_runtime_t * error_node = node;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = n_packets;
+ next = node->cached_next_index;
+ mini_now = tcp_time_now (tm, TCP_TIMER_mini_connection);
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t * p0;
+ ip6_header_t * ip60;
+ ip4_header_t * ip40;
+ tcp_header_t * tcp0;
+ u32 bi0, imin0, iest0, li0;
+ tcp_connection_state_t state0;
+ u8 error0, next0;
+ u8 min_match0, est_match0, is_min_match0, is_est_match0;
+ u8 min_oldest0, est_first_empty0;
+
+ bi0 = to_next[0] = from[0];
+
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, bi0);
+
+#ifdef TCP_HAVE_VEC128
+ {
+ u32x4 a0, b0, c0;
+
+ a0 = tm->connection_hash_seeds[is_ip6][0].as_u32x4;
+ b0 = tm->connection_hash_seeds[is_ip6][1].as_u32x4;
+ c0 = tm->connection_hash_seeds[is_ip6][2].as_u32x4;
+
+ if (is_ip6)
+ {
+ ip60 = vlib_buffer_get_current (p0);
+ tcp0 = ip6_next_header (ip60);
+
+ a0 ^= u32x4_splat_x2 (ip60->src_address.as_u32[0]);
+ b0 ^= u32x4_splat_x2 (ip60->src_address.as_u32[1]);
+ c0 ^= u32x4_splat_x2 (ip60->src_address.as_u32[2]);
+
+ hash_v3_mix_u32x (a0, b0, c0);
+
+ a0 ^= u32x4_splat_x2 (ip60->src_address.as_u32[3]);
+ b0 ^= u32x4_splat_x2 (ip60->dst_address.as_u32[0]);
+ c0 ^= u32x4_splat_x2 (ip60->dst_address.as_u32[1]);
+
+ hash_v3_mix_u32x (a0, b0, c0);
+
+ a0 ^= u32x4_splat_x2 (ip60->dst_address.as_u32[2]);
+ b0 ^= u32x4_splat_x2 (ip60->dst_address.as_u32[3]);
+ c0 ^= u32x4_splat_x2 (tcp0->ports.src_and_dst);
+ }
+ else
+ {
+ ip40 = vlib_buffer_get_current (p0);
+ tcp0 = ip4_next_header (ip40);
+
+ a0 ^= u32x4_splat_x2 (ip40->src_address.as_u32);
+ b0 ^= u32x4_splat_x2 (ip40->dst_address.as_u32);
+ c0 ^= u32x4_splat_x2 (tcp0->ports.src_and_dst);
+ }
+
+ hash_v3_finalize_u32x (a0, b0, c0);
+
+ c0 &= tm->connection_hash_masks[is_ip6].as_u32x4;
+
+ imin0 = u32x4_get0 (c0);
+ iest0 = u32x4_get (c0, 1);
+ }
+#else
+ {
+ u32 a00, a01, b00, b01, c00, c01;
+
+ a00 = tm->connection_hash_seeds[is_ip6][0].as_u32[0];
+ a01 = tm->connection_hash_seeds[is_ip6][0].as_u32[1];
+ b00 = tm->connection_hash_seeds[is_ip6][1].as_u32[0];
+ b01 = tm->connection_hash_seeds[is_ip6][1].as_u32[1];
+ c00 = tm->connection_hash_seeds[is_ip6][2].as_u32[0];
+ c01 = tm->connection_hash_seeds[is_ip6][2].as_u32[1];
+
+ if (is_ip6)
+ {
+ ip60 = vlib_buffer_get_current (p0);
+ tcp0 = ip6_next_header (ip60);
+
+ a00 ^= ip60->src_address.as_u32[0];
+ a01 ^= ip60->src_address.as_u32[0];
+ b00 ^= ip60->src_address.as_u32[1];
+ b01 ^= ip60->src_address.as_u32[1];
+ c00 ^= ip60->src_address.as_u32[2];
+ c01 ^= ip60->src_address.as_u32[2];
+
+ hash_v3_mix32 (a00, b00, c00);
+ hash_v3_mix32 (a01, b01, c01);
+
+ a00 ^= ip60->src_address.as_u32[3];
+ a01 ^= ip60->src_address.as_u32[3];
+ b00 ^= ip60->dst_address.as_u32[0];
+ b01 ^= ip60->dst_address.as_u32[0];
+ c00 ^= ip60->dst_address.as_u32[1];
+ c01 ^= ip60->dst_address.as_u32[1];
+
+ hash_v3_mix32 (a00, b00, c00);
+ hash_v3_mix32 (a01, b01, c01);
+
+ a00 ^= ip60->dst_address.as_u32[2];
+ a01 ^= ip60->dst_address.as_u32[2];
+ b00 ^= ip60->dst_address.as_u32[3];
+ b01 ^= ip60->dst_address.as_u32[3];
+ c00 ^= tcp0->ports.src_and_dst;
+ c01 ^= tcp0->ports.src_and_dst;
+ }
+ else
+ {
+ ip40 = vlib_buffer_get_current (p0);
+ tcp0 = ip4_next_header (ip40);
+
+ a00 ^= ip40->src_address.as_u32;
+ a01 ^= ip40->src_address.as_u32;
+ b00 ^= ip40->dst_address.as_u32;
+ b01 ^= ip40->dst_address.as_u32;
+ c00 ^= tcp0->ports.src_and_dst;
+ c01 ^= tcp0->ports.src_and_dst;
+ }
+
+ hash_v3_finalize32 (a00, b00, c00);
+ hash_v3_finalize32 (a01, b01, c01);
+
+ c00 &= tm->connection_hash_masks[is_ip6].as_u32[0];
+ c01 &= tm->connection_hash_masks[is_ip6].as_u32[1];
+
+ imin0 = c00;
+ iest0 = c01;
+ }
+#endif
+
+ if (is_ip6)
+ {
+ ip6_tcp_udp_address_x4_and_timestamps_t * mina0;
+ ip6_tcp_udp_address_x4_t * esta0;
+
+ mina0 = vec_elt_at_index (tm->ip6_mini_connection_address_hash, imin0);
+ esta0 = vec_elt_at_index (tm->ip6_established_connection_address_hash, iest0);
+
+ min_match0 = ip6_tcp_udp_address_x4_match (&mina0->address_x4, ip60, tcp0);
+ est_match0 = ip6_tcp_udp_address_x4_match (esta0, ip60, tcp0);
+
+ min_oldest0 = find_oldest_timestamp_x4 (mina0->time_stamps, mini_now);
+ est_first_empty0 = ip6_tcp_udp_address_x4_first_empty (esta0);
+
+ if (PREDICT_FALSE (! est_match0 && est_first_empty0 >= 4 && ! min_match0))
+ {
+ /* Lookup in overflow hash. */
+ ASSERT (0);
+ }
+ }
+ else
+ {
+ ip4_tcp_udp_address_x4_and_timestamps_t * mina0;
+ ip4_tcp_udp_address_x4_t * esta0;
+
+ mina0 = vec_elt_at_index (tm->ip4_mini_connection_address_hash, imin0);
+ esta0 = vec_elt_at_index (tm->ip4_established_connection_address_hash, iest0);
+
+ min_match0 = ip4_tcp_udp_address_x4_match (&mina0->address_x4, ip40, tcp0);
+ est_match0 = ip4_tcp_udp_address_x4_match (esta0, ip40, tcp0);
+
+ min_oldest0 = find_oldest_timestamp_x4 (mina0->time_stamps, mini_now);
+ est_first_empty0 = ip4_tcp_udp_address_x4_first_empty (esta0);
+
+ if (PREDICT_FALSE (! est_match0 && est_first_empty0 >= 4 && ! min_match0))
+ {
+ /* Lookup in overflow hash. */
+ ASSERT (0);
+ }
+ }
+
+ is_min_match0 = min_match0 < 4;
+ is_est_match0 = est_match0 < 4;
+
+ imin0 = 4 * imin0 + (is_min_match0 ? min_match0 : min_oldest0);
+ iest0 = 4 * iest0 + (is_est_match0 ? est_match0 : est_first_empty0);
+
+ /* Should simultaneously not match both in mini and established connection tables. */
+ ASSERT (! (is_min_match0 && is_est_match0));
+
+ {
+ tcp_mini_connection_t * min0;
+ tcp_connection_t * est0;
+ tcp_sequence_pair_t * seq_pair0;
+ u8 flags0;
+
+ min0 = vec_elt_at_index (tm46->mini_connections, imin0);
+ est0 = vec_elt_at_index (tm46->established_connections, iest0);
+
+ if (min_match0 < 4)
+ {
+ ASSERT (min0->state != TCP_CONNECTION_STATE_unused);
+ ASSERT (min0->state != TCP_CONNECTION_STATE_established);
+ }
+
+ seq_pair0 = is_min_match0 ? &min0->sequence_numbers : &est0->sequence_numbers;
+
+ state0 = is_min_match0 ? min0->state : TCP_CONNECTION_STATE_unused;
+ state0 = is_est_match0 ? TCP_CONNECTION_STATE_established : state0;
+
+ vnet_buffer (p0)->ip.tcp.established_connection_index = iest0;
+ vnet_buffer (p0)->ip.tcp.mini_connection_index = imin0;
+ vnet_buffer (p0)->ip.tcp.listener_index = li0 = tm->listener_index_by_dst_port[tcp0->ports.dst];
+
+ flags0 = tcp0->flags & (TCP_FLAG_SYN | TCP_FLAG_ACK | TCP_FLAG_RST | TCP_FLAG_FIN);
+
+ next0 = tm->disposition_by_state_and_flags[state0][flags0].next;
+ error0 = tm->disposition_by_state_and_flags[state0][flags0].error;
+
+ next0 = li0 != 0 ? next0 : TCP_LOOKUP_NEXT_PUNT;
+ error0 = li0 != 0 ? error0 : TCP_ERROR_NO_LISTENER_FOR_PORT;
+ }
+
+ p0->error = error_node->errors[error0];
+
+ if (PREDICT_FALSE (next0 != next))
+ {
+ to_next -= 1;
+ n_left_to_next += 1;
+
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+
+ next = next0;
+ vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
+ to_next[0] = bi0;
+ to_next += 1;
+ n_left_to_next -= 1;
+ }
+ }
+
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ }
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ /* FIXME */ ;
+
+ return frame->n_vectors;
+}
+
+static uword
+ip4_tcp_lookup (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{ return ip46_tcp_lookup (vm, node, frame, /* is_ip6 */ 0); }
+
+static uword
+ip6_tcp_lookup (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{ return ip46_tcp_lookup (vm, node, frame, /* is_ip6 */ 1); }
+
+static void
+ip46_size_hash_tables (ip46_tcp_main_t * m)
+{
+ m->mini_connection_hash_mask = pow2_mask (m->log2_n_mini_connection_hash_elts);
+ vec_validate_aligned (m->mini_connections,
+ m->mini_connection_hash_mask,
+ CLIB_CACHE_LINE_BYTES);
+
+ m->established_connection_hash_mask = pow2_mask (m->log2_n_established_connection_hash_elts);
+ vec_validate_aligned (m->established_connections,
+ m->established_connection_hash_mask,
+ CLIB_CACHE_LINE_BYTES);
+}
+
+static void
+ip46_tcp_lookup_init (vlib_main_t * vm, tcp_main_t * tm, int is_ip6)
+{
+ ip46_tcp_main_t * m = is_ip6 ? &tm->ip6 : &tm->ip4;
+
+ m->is_ip6 = is_ip6;
+
+ m->log2_n_mini_connection_hash_elts = 8;
+ m->log2_n_established_connection_hash_elts = 8;
+ ip46_size_hash_tables (m);
+
+ if (is_ip6)
+ {
+ vec_validate_aligned (tm->ip6_mini_connection_address_hash,
+ m->mini_connection_hash_mask / 4,
+ CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (tm->ip6_established_connection_address_hash,
+ m->established_connection_hash_mask / 4,
+ CLIB_CACHE_LINE_BYTES);
+ }
+ else
+ {
+ vec_validate_aligned (tm->ip4_mini_connection_address_hash,
+ m->mini_connection_hash_mask / 4,
+ CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (tm->ip4_established_connection_address_hash,
+ m->established_connection_hash_mask / 4,
+ CLIB_CACHE_LINE_BYTES);
+ }
+ tm->connection_hash_masks[is_ip6].as_u32[0] = m->mini_connection_hash_mask / 4;
+ tm->connection_hash_masks[is_ip6].as_u32[1] = m->established_connection_hash_mask / 4;
+}
+
+static void
+tcp_lookup_init (vlib_main_t * vm, tcp_main_t * tm)
+{
+ int is_ip6;
+
+ /* Initialize hash seeds. */
+ for (is_ip6 = 0; is_ip6 < 2; is_ip6++)
+ {
+ u32 * r = clib_random_buffer_get_data (&vm->random_buffer, 3 * 2 * sizeof (r[0]));
+ tm->connection_hash_seeds[is_ip6][0].as_u32[0] = r[0];
+ tm->connection_hash_seeds[is_ip6][0].as_u32[1] = r[1];
+ tm->connection_hash_seeds[is_ip6][1].as_u32[0] = r[2];
+ tm->connection_hash_seeds[is_ip6][1].as_u32[1] = r[3];
+ tm->connection_hash_seeds[is_ip6][2].as_u32[0] = r[4];
+ tm->connection_hash_seeds[is_ip6][2].as_u32[1] = r[5];
+
+ ip46_tcp_lookup_init (vm, tm, is_ip6);
+ }
+
+ {
+ tcp_listener_t * l;
+
+ pool_get_aligned (tm->listener_pool, l, CLIB_CACHE_LINE_BYTES);
+
+ /* Null listener must always have zero index. */
+ ASSERT (l - tm->listener_pool == 0);
+
+ memset (l, 0, sizeof (l[0]));
+
+ /* No adjacencies are valid. */
+ l->valid_local_adjacency_bitmap = 0;
+
+ vec_validate_init_empty (tm->listener_index_by_dst_port,
+ (1 << 16) - 1,
+ l - tm->listener_pool);
+ }
+
+ /* Initialize disposition table. */
+ {
+ int i, j;
+ for (i = 0; i < ARRAY_LEN (tm->disposition_by_state_and_flags); i++)
+ for (j = 0; j < ARRAY_LEN (tm->disposition_by_state_and_flags[i]); j++)
+ {
+ tm->disposition_by_state_and_flags[i][j].next = TCP_LOOKUP_NEXT_DROP;
+ tm->disposition_by_state_and_flags[i][j].error = TCP_ERROR_LOOKUP_DROPS;
+ }
+
+#define _(t,f,n,e) \
+do { \
+ tm->disposition_by_state_and_flags[TCP_CONNECTION_STATE_##t][f].next = (n); \
+ tm->disposition_by_state_and_flags[TCP_CONNECTION_STATE_##t][f].error = (e); \
+} while (0)
+
+ /* SYNs for new connections -> tcp-listen. */
+ _ (unused, TCP_FLAG_SYN,
+ TCP_LOOKUP_NEXT_LISTEN_SYN, TCP_ERROR_NONE);
+ _ (listen_ack_wait, TCP_FLAG_ACK,
+ TCP_LOOKUP_NEXT_LISTEN_ACK, TCP_ERROR_NONE);
+ _ (established, TCP_FLAG_ACK,
+ TCP_LOOKUP_NEXT_ESTABLISHED, TCP_ERROR_NONE);
+ _ (established, TCP_FLAG_FIN | TCP_FLAG_ACK,
+ TCP_LOOKUP_NEXT_ESTABLISHED, TCP_ERROR_NONE);
+
+#undef _
+ }
+
+ /* IP4 packet templates. */
+ {
+ ip4_tcp_syn_packet_t ip4_syn, ip4_syn_ack;
+ ip4_tcp_ack_packet_t ip4_ack, ip4_fin_ack, ip4_rst_ack;
+ ip6_tcp_syn_packet_t ip6_syn, ip6_syn_ack;
+ ip6_tcp_ack_packet_t ip6_ack, ip6_fin_ack, ip6_rst_ack;
+
+ memset (&ip4_syn, 0, sizeof (ip4_syn));
+ memset (&ip4_syn_ack, 0, sizeof (ip4_syn_ack));
+ memset (&ip4_ack, 0, sizeof (ip4_ack));
+ memset (&ip4_fin_ack, 0, sizeof (ip4_fin_ack));
+ memset (&ip4_rst_ack, 0, sizeof (ip4_rst_ack));
+ memset (&ip6_syn, 0, sizeof (ip6_syn));
+ memset (&ip6_syn_ack, 0, sizeof (ip6_syn_ack));
+ memset (&ip6_ack, 0, sizeof (ip6_ack));
+ memset (&ip6_fin_ack, 0, sizeof (ip6_fin_ack));
+ memset (&ip6_rst_ack, 0, sizeof (ip6_rst_ack));
+
+ ip4_tcp_packet_init (&ip4_syn.ip4, sizeof (ip4_syn));
+ ip4_tcp_packet_init (&ip4_syn_ack.ip4, sizeof (ip4_syn_ack));
+ ip4_tcp_packet_init (&ip4_ack.ip4, sizeof (ip4_ack));
+ ip4_tcp_packet_init (&ip4_fin_ack.ip4, sizeof (ip4_fin_ack));
+ ip4_tcp_packet_init (&ip4_rst_ack.ip4, sizeof (ip4_rst_ack));
+
+ ip6_tcp_packet_init (&ip6_syn.ip6, sizeof (ip6_syn));
+ ip6_tcp_packet_init (&ip6_syn_ack.ip6, sizeof (ip6_syn_ack));
+ ip6_tcp_packet_init (&ip6_ack.ip6, sizeof (ip6_ack));
+ ip6_tcp_packet_init (&ip6_fin_ack.ip6, sizeof (ip6_fin_ack));
+ ip6_tcp_packet_init (&ip6_rst_ack.ip6, sizeof (ip6_rst_ack));
+
+ /* TCP header. */
+ {
+ u8 window_scale = 7;
+ tcp_syn_packet_t * s = &ip4_syn.tcp;
+ tcp_syn_packet_t * sa = &ip4_syn_ack.tcp;
+ tcp_ack_packet_t * a = &ip4_ack.tcp;
+ tcp_ack_packet_t * fa = &ip4_fin_ack.tcp;
+ tcp_ack_packet_t * ra = &ip4_rst_ack.tcp;
+
+ s->header.tcp_header_u32s_and_reserved = (sizeof (s[0]) / sizeof (u32)) << 4;
+ a->header.tcp_header_u32s_and_reserved = (sizeof (a[0]) / sizeof (u32)) << 4;
+
+ s->header.flags = TCP_FLAG_SYN;
+ a->header.flags = TCP_FLAG_ACK;
+
+ s->header.window = clib_host_to_net_u16 (32 << (10 - window_scale));
+ a->header.window = s->header.window;
+
+ s->options.mss.type = TCP_OPTION_MSS;
+ s->options.mss.length = 4;
+
+ s->options.window_scale.type = TCP_OPTION_WINDOW_SCALE;
+ s->options.window_scale.length = 3;
+ s->options.window_scale.value = window_scale;
+
+ s->options.time_stamp.type = TCP_OPTION_TIME_STAMP;
+ s->options.time_stamp.length = 10;
+
+ memset (&s->options.nops, TCP_OPTION_NOP, sizeof (s->options.nops));
+
+ /* SYN-ACK is same as SYN but with ACK flag set. */
+ sa[0] = s[0];
+ sa->header.flags |= TCP_FLAG_ACK;
+
+ a->options.time_stamp.type = TCP_OPTION_TIME_STAMP;
+ a->options.time_stamp.length = 10;
+ memset (&a->options.nops, TCP_OPTION_NOP, sizeof (a->options.nops));
+
+ /* {FIN,RST}-ACK are same as ACK but with {FIN,RST} flag set. */
+ fa[0] = a[0];
+ fa->header.flags |= TCP_FLAG_FIN;
+ ra[0] = a[0];
+ ra->header.flags |= TCP_FLAG_RST;
+
+ /* IP6 TCP headers are identical. */
+ ip6_syn.tcp = s[0];
+ ip6_syn_ack.tcp = sa[0];
+ ip6_ack.tcp = a[0];
+ ip6_fin_ack.tcp = fa[0];
+ ip6_rst_ack.tcp = ra[0];
+
+ /* TCP checksums. */
+ {
+ ip_csum_t sum;
+
+ sum = clib_host_to_net_u32 (sizeof (ip4_ack.tcp) + (ip4_ack.ip4.protocol << 16));
+ sum = ip_incremental_checksum (sum, &ip4_ack.tcp, sizeof (ip4_ack.tcp));
+ ip4_ack.tcp.header.checksum = ~ ip_csum_fold (sum);
+
+ sum = clib_host_to_net_u32 (sizeof (ip4_fin_ack.tcp) + (ip4_fin_ack.ip4.protocol << 16));
+ sum = ip_incremental_checksum (sum, &ip4_fin_ack.tcp, sizeof (ip4_fin_ack.tcp));
+ ip4_fin_ack.tcp.header.checksum = ~ ip_csum_fold (sum);
+
+ sum = clib_host_to_net_u32 (sizeof (ip4_rst_ack.tcp) + (ip4_rst_ack.ip4.protocol << 16));
+ sum = ip_incremental_checksum (sum, &ip4_rst_ack.tcp, sizeof (ip4_rst_ack.tcp));
+ ip4_rst_ack.tcp.header.checksum = ~ ip_csum_fold (sum);
+
+ sum = clib_host_to_net_u32 (sizeof (ip4_syn.tcp) + (ip4_syn.ip4.protocol << 16));
+ sum = ip_incremental_checksum (sum, &ip4_syn.tcp, sizeof (ip4_syn.tcp));
+ ip4_syn.tcp.header.checksum = ~ ip_csum_fold (sum);
+
+ sum = clib_host_to_net_u32 (sizeof (ip4_syn_ack.tcp) + (ip4_syn_ack.ip4.protocol << 16));
+ sum = ip_incremental_checksum (sum, &ip4_syn_ack.tcp, sizeof (ip4_syn_ack.tcp));
+ ip4_syn_ack.tcp.header.checksum = ~ ip_csum_fold (sum);
+
+ sum = clib_host_to_net_u32 (sizeof (ip6_ack.tcp)) + ip6_ack.ip6.protocol;
+ sum = ip_incremental_checksum (sum, &ip6_ack.tcp, sizeof (ip6_ack.tcp));
+ ip6_ack.tcp.header.checksum = ~ ip_csum_fold (sum);
+
+ sum = clib_host_to_net_u32 (sizeof (ip6_fin_ack.tcp)) + ip6_fin_ack.ip6.protocol;
+ sum = ip_incremental_checksum (sum, &ip6_fin_ack.tcp, sizeof (ip6_fin_ack.tcp));
+ ip6_fin_ack.tcp.header.checksum = ~ ip_csum_fold (sum);
+
+ sum = clib_host_to_net_u32 (sizeof (ip6_rst_ack.tcp)) + ip6_rst_ack.ip6.protocol;
+ sum = ip_incremental_checksum (sum, &ip6_rst_ack.tcp, sizeof (ip6_rst_ack.tcp));
+ ip6_rst_ack.tcp.header.checksum = ~ ip_csum_fold (sum);
+
+ sum = clib_host_to_net_u32 (sizeof (ip6_syn.tcp)) + ip6_syn.ip6.protocol;
+ sum = ip_incremental_checksum (sum, &ip6_syn.tcp, sizeof (ip6_syn.tcp));
+ ip6_syn.tcp.header.checksum = ~ ip_csum_fold (sum);
+
+ sum = clib_host_to_net_u32 (sizeof (ip6_syn_ack.tcp)) + ip6_syn_ack.ip6.protocol;
+ sum = ip_incremental_checksum (sum, &ip6_syn_ack.tcp, sizeof (ip6_syn_ack.tcp));
+ ip6_syn_ack.tcp.header.checksum = ~ ip_csum_fold (sum);
+ }
+ }
+
+#define _(t,x,n) \
+do { \
+ vlib_packet_template_init \
+ (vm, \
+ &tm->ip4.packet_templates[t].vlib, \
+ &x, sizeof (x), \
+ /* alloc chunk size */ VLIB_FRAME_SIZE, \
+ (n)); \
+ tm->ip4.packet_templates[t].tcp_checksum_net_byte_order \
+ = x.tcp.header.checksum; \
+ tm->ip4.packet_templates[t].ip4_checksum_net_byte_order \
+ = x.ip4.checksum; \
+} while (0)
+
+ _ (TCP_PACKET_TEMPLATE_SYN, ip4_syn, "ip4 tcp syn");
+ _ (TCP_PACKET_TEMPLATE_SYN_ACK, ip4_syn_ack, "ip4 tcp syn-ack");
+ _ (TCP_PACKET_TEMPLATE_ACK, ip4_ack, "ip4 tcp ack");
+ _ (TCP_PACKET_TEMPLATE_FIN_ACK, ip4_fin_ack, "ip4 tcp fin-ack");
+ _ (TCP_PACKET_TEMPLATE_RST_ACK, ip4_rst_ack, "ip4 tcp rst-ack");
+
+#undef _
+
+#define _(t,x,n) \
+do { \
+ vlib_packet_template_init \
+ (vm, \
+ &tm->ip6.packet_templates[t].vlib, \
+ &x, sizeof (x), \
+ /* alloc chunk size */ VLIB_FRAME_SIZE, \
+ (n)); \
+ tm->ip6.packet_templates[t].tcp_checksum_net_byte_order \
+ = x.tcp.header.checksum; \
+ tm->ip6.packet_templates[t].ip4_checksum_net_byte_order \
+ = 0xdead; \
+} while (0)
+
+ _ (TCP_PACKET_TEMPLATE_SYN, ip6_syn, "ip6 tcp syn");
+ _ (TCP_PACKET_TEMPLATE_SYN_ACK, ip6_syn_ack, "ip6 tcp syn-ack");
+ _ (TCP_PACKET_TEMPLATE_ACK, ip6_ack, "ip6 tcp ack");
+ _ (TCP_PACKET_TEMPLATE_FIN_ACK, ip6_fin_ack, "ip6 tcp fin-ack");
+ _ (TCP_PACKET_TEMPLATE_RST_ACK, ip6_rst_ack, "ip6 tcp rst-ack");
+
+#undef _
+ }
+}
+
+static char * tcp_error_strings[] = {
+#define _(sym,string) string,
+ foreach_tcp_error
+#undef _
+};
+
+VLIB_REGISTER_NODE (ip4_tcp_lookup_node,static) = {
+ .function = ip4_tcp_lookup,
+ .name = "ip4-tcp-lookup",
+
+ .vector_size = sizeof (u32),
+
+ .n_next_nodes = TCP_LOOKUP_N_NEXT,
+ .next_nodes = {
+ [TCP_LOOKUP_NEXT_DROP] = "error-drop",
+ [TCP_LOOKUP_NEXT_PUNT] = "error-punt",
+ [TCP_LOOKUP_NEXT_LISTEN_SYN] = "ip4-tcp-listen",
+ [TCP_LOOKUP_NEXT_LISTEN_ACK] = "ip4-tcp-establish",
+ [TCP_LOOKUP_NEXT_CONNECT_SYN_ACK] = "ip4-tcp-connect",
+ [TCP_LOOKUP_NEXT_ESTABLISHED] = "ip4-tcp-established",
+ },
+
+ .n_errors = TCP_N_ERROR,
+ .error_strings = tcp_error_strings,
+};
+
+VLIB_REGISTER_NODE (ip6_tcp_lookup_node,static) = {
+ .function = ip6_tcp_lookup,
+ .name = "ip6-tcp-lookup",
+
+ .vector_size = sizeof (u32),
+
+ .n_next_nodes = TCP_LOOKUP_N_NEXT,
+ .next_nodes = {
+ [TCP_LOOKUP_NEXT_DROP] = "error-drop",
+ [TCP_LOOKUP_NEXT_PUNT] = "error-punt",
+ [TCP_LOOKUP_NEXT_LISTEN_SYN] = "ip6-tcp-listen",
+ [TCP_LOOKUP_NEXT_LISTEN_ACK] = "ip4-tcp-establish",
+ [TCP_LOOKUP_NEXT_CONNECT_SYN_ACK] = "ip6-tcp-connect",
+ [TCP_LOOKUP_NEXT_ESTABLISHED] = "ip6-tcp-established",
+ },
+
+ .n_errors = TCP_N_ERROR,
+ .error_strings = tcp_error_strings,
+};
+
+static_always_inline void
+tcp_options_decode_for_syn (tcp_main_t * tm, tcp_mini_connection_t * m, tcp_header_t * tcp)
+{
+ u8 * o = (void *) (tcp + 1);
+ u32 n_bytes = (tcp->tcp_header_u32s_and_reserved >> 4) * sizeof (u32);
+ u8 * e = o + n_bytes;
+ tcp_mini_connection_t * tmpl = &tm->option_decode_mini_connection_template;
+ tcp_option_type_t t;
+ u8 i, l, * p;
+ u8 * option_decode[16];
+
+ /* Initialize defaults. */
+ option_decode[TCP_OPTION_MSS] = (u8 *) &tmpl->max_segment_size;
+ option_decode[TCP_OPTION_WINDOW_SCALE] = (u8 *) &tmpl->window_scale;
+ option_decode[TCP_OPTION_TIME_STAMP] = (u8 *) &tmpl->time_stamps.his_net_byte_order;
+
+ if (n_bytes > 0)
+ {
+#define _ \
+do { \
+ t = o[0]; \
+ i = t >= ARRAY_LEN (option_decode) ? TCP_OPTION_END : t; \
+ option_decode[i] = o + 2; \
+ /* Skip nop; don't skip end; else length from packet. */ \
+ l = t < 2 ? t : o[1]; \
+ p = o + l; \
+ o = p < e ? p : o; \
+} while (0)
+
+ _; _; _;
+ /* Fast path: NOP NOP TIMESTAMP. */
+ if (o >= e) goto done;
+ _; _;
+ if (o >= e) goto done;
+ _; _; _;
+
+#undef _
+
+ done:;
+ }
+
+ m->max_segment_size =
+ clib_net_to_host_u16 (*(u16 *) option_decode[TCP_OPTION_MSS]);
+ m->window_scale = *option_decode[TCP_OPTION_WINDOW_SCALE];
+ m->time_stamps.his_net_byte_order = ((u32 *) option_decode[TCP_OPTION_TIME_STAMP])[0];
+}
+
+static_always_inline u32
+tcp_options_decode_for_ack (tcp_main_t * tm, tcp_header_t * tcp,
+ u32 * his_time_stamp)
+{
+ u8 * o = (void *) (tcp + 1);
+ u32 n_bytes = (tcp->tcp_header_u32s_and_reserved >> 4) * sizeof (u32);
+ u8 * e = o + n_bytes;
+ tcp_option_type_t t;
+ u8 i, l, * p;
+ u8 * option_decode[16];
+ u32 default_time_stamps[2];
+
+ /* Initialize defaults. */
+ default_time_stamps[0] = default_time_stamps[1] = 0;
+ option_decode[TCP_OPTION_TIME_STAMP] = (u8 *) &default_time_stamps;
+
+ if (n_bytes > 0)
+ {
+#define _ \
+do { \
+ t = o[0]; \
+ i = t >= ARRAY_LEN (option_decode) ? TCP_OPTION_END : t; \
+ option_decode[i] = o + 2; \
+ /* Skip nop; don't skip end; else length from packet. */ \
+ l = t < 2 ? t : o[1]; \
+ p = o + l; \
+ o = p < e ? p : o; \
+} while (0)
+
+ _; _; _;
+ /* Fast path: NOP NOP TIMESTAMP. */
+ if (o >= e) goto done;
+ _; _;
+ if (o >= e) goto done;
+ _; _; _;
+#undef _
+
+ done:;
+ }
+
+ if (his_time_stamp)
+ his_time_stamp[0] = ((u32 *) option_decode[TCP_OPTION_TIME_STAMP])[0];
+
+ return clib_net_to_host_u32 (((u32 *) option_decode[TCP_OPTION_TIME_STAMP])[1]);
+}
+
+static void
+tcp_options_decode_init (tcp_main_t * tm)
+{
+ tcp_mini_connection_t * m = &tm->option_decode_mini_connection_template;
+
+ memset (m, 0, sizeof (m[0]));
+ m->max_segment_size = clib_host_to_net_u16 (576 - 40);
+ m->window_scale = 0;
+ m->time_stamps.his_net_byte_order = 0;
+}
+
+/* Initialize target buffer as "related" to given buffer. */
+always_inline void
+vlib_buffer_copy_shared_fields (vlib_main_t * vm, vlib_buffer_t * b, u32 bi_target)
+{
+ vlib_buffer_t * b_target = vlib_get_buffer (vm, bi_target);
+ vnet_buffer (b_target)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_RX];
+ b_target->trace_index = b->trace_index;
+ b_target->flags |= b->flags & VLIB_BUFFER_IS_TRACED;
+}
+
+typedef enum {
+ TCP_LISTEN_NEXT_DROP,
+ TCP_LISTEN_NEXT_REPLY,
+ TCP_LISTEN_N_NEXT,
+} tcp_listen_next_t;
+
+static_always_inline uword
+ip46_tcp_listen (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ uword is_ip6)
+{
+ tcp_main_t * tm = &tcp_main;
+ ip46_tcp_main_t * tm46 = is_ip6 ? &tm->ip6 : &tm->ip4;
+ uword n_packets = frame->n_vectors;
+ u32 * from, * to_reply, * to_drop, * random_ack_numbers;
+ u32 n_left_from, n_left_to_reply, n_left_to_drop, mini_now, timestamp_now;
+ u16 * fid, * fragment_ids;
+ vlib_node_runtime_t * error_node;
+
+ error_node = vlib_node_get_runtime
+ (vm, is_ip6 ? ip6_tcp_lookup_node.index : ip4_tcp_lookup_node.index);
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = n_packets;
+ mini_now = tcp_time_now (tm, TCP_TIMER_mini_connection);
+ timestamp_now = tcp_time_now (tm, TCP_TIMER_timestamp);
+
+ random_ack_numbers = clib_random_buffer_get_data (&vm->random_buffer,
+ n_packets * sizeof (random_ack_numbers[0]));
+ /* Get random fragment IDs for replies. */
+ fid = fragment_ids = clib_random_buffer_get_data (&vm->random_buffer,
+ n_packets * sizeof (fragment_ids[0]));
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, TCP_LISTEN_NEXT_REPLY,
+ to_reply, n_left_to_reply);
+ vlib_get_next_frame (vm, node, TCP_LISTEN_NEXT_DROP,
+ to_drop, n_left_to_drop);
+
+ while (n_left_from > 0 && n_left_to_reply > 0 && n_left_to_drop > 0)
+ {
+ vlib_buffer_t * p0;
+ ip6_header_t * ip60;
+ ip4_header_t * ip40;
+ tcp_header_t * tcp0;
+ tcp_mini_connection_t * min0;
+ tcp_syn_packet_t * tcp_reply0;
+ ip_csum_t tcp_sum0;
+ u32 bi0, bi_reply0, imin0, my_seq_net0, his_seq_host0, his_seq_net0;
+ u8 i0;
+
+ bi0 = to_drop[0] = from[0];
+
+ from += 1;
+ n_left_from -= 1;
+ to_drop += 1;
+ n_left_to_drop -= 1;
+
+ p0 = vlib_get_buffer (vm, bi0);
+
+ p0->error = error_node->errors[TCP_ERROR_LISTEN_RESPONSES];
+
+ imin0 = vnet_buffer (p0)->ip.tcp.mini_connection_index;
+ i0 = imin0 % 4;
+
+ if (is_ip6)
+ {
+ ip6_tcp_udp_address_x4_and_timestamps_t * mina0;
+
+ ip60 = vlib_buffer_get_current (p0);
+ tcp0 = ip6_next_header (ip60);
+
+ mina0 = vec_elt_at_index (tm->ip6_mini_connection_address_hash, imin0 / 4);
+
+ ip6_tcp_udp_address_x4_set_from_headers (&mina0->address_x4,
+ ip60, tcp0, i0);
+ mina0->time_stamps[i0] = mini_now;
+ }
+ else
+ {
+ ip4_tcp_udp_address_x4_and_timestamps_t * mina0;
+
+ ip40 = vlib_buffer_get_current (p0);
+ tcp0 = ip4_next_header (ip40);
+
+ mina0 = vec_elt_at_index (tm->ip4_mini_connection_address_hash, imin0 / 4);
+
+ ip4_tcp_udp_address_x4_set_from_headers (&mina0->address_x4,
+ ip40, tcp0, i0);
+ mina0->time_stamps[i0] = mini_now;
+ }
+
+ min0 = vec_elt_at_index (tm46->mini_connections, imin0);
+
+ min0->state = TCP_CONNECTION_STATE_listen_ack_wait;
+ min0->time_stamps.ours_host_byte_order = timestamp_now;
+ tcp_options_decode_for_syn (tm, min0, tcp0);
+
+ my_seq_net0 = *random_ack_numbers++;
+ his_seq_host0 = 1 + clib_net_to_host_u32 (tcp0->seq_number);
+
+ min0->sequence_numbers.ours = 1 + clib_net_to_host_u32 (my_seq_net0);
+ min0->sequence_numbers.his = his_seq_host0;
+
+ if (is_ip6)
+ {
+ ip6_tcp_syn_packet_t * r0;
+ uword tmp0, i;
+
+ r0 = vlib_packet_template_get_packet
+ (vm,
+ &tm->ip6.packet_templates[TCP_PACKET_TEMPLATE_SYN_ACK].vlib,
+ &bi_reply0);
+ tcp_reply0 = &r0->tcp;
+
+ tcp_sum0 = (tm->ip6.packet_templates[TCP_PACKET_TEMPLATE_SYN_ACK]
+ .tcp_checksum_net_byte_order);
+
+ for (i = 0; i < ARRAY_LEN (ip60->dst_address.as_uword); i++)
+ {
+ tmp0 = r0->ip6.src_address.as_uword[i] = ip60->dst_address.as_uword[i];
+ tcp_sum0 = ip_csum_add_even (tcp_sum0, tmp0);
+
+ tmp0 = r0->ip6.dst_address.as_uword[i] = ip60->src_address.as_uword[i];
+ tcp_sum0 = ip_csum_add_even (tcp_sum0, tmp0);
+ }
+ }
+ else
+ {
+ ip4_tcp_syn_packet_t * r0;
+ ip_csum_t ip_sum0;
+ u32 src0, dst0;
+
+ r0 = vlib_packet_template_get_packet
+ (vm,
+ &tm->ip4.packet_templates[TCP_PACKET_TEMPLATE_SYN_ACK].vlib,
+ &bi_reply0);
+ tcp_reply0 = &r0->tcp;
+
+ tcp_sum0 = (tm->ip4.packet_templates[TCP_PACKET_TEMPLATE_SYN_ACK]
+ .tcp_checksum_net_byte_order);
+ ip_sum0 = (tm->ip4.packet_templates[TCP_PACKET_TEMPLATE_SYN_ACK]
+ .ip4_checksum_net_byte_order);
+
+ src0 = r0->ip4.src_address.as_u32 = ip40->dst_address.as_u32;
+ dst0 = r0->ip4.dst_address.as_u32 = ip40->src_address.as_u32;
+
+ ip_sum0 = ip_csum_add_even (ip_sum0, src0);
+ tcp_sum0 = ip_csum_add_even (tcp_sum0, src0);
+
+ ip_sum0 = ip_csum_add_even (ip_sum0, dst0);
+ tcp_sum0 = ip_csum_add_even (tcp_sum0, dst0);
+
+ r0->ip4.checksum = ip_csum_fold (ip_sum0);
+
+ ASSERT (r0->ip4.checksum == ip4_header_checksum (&r0->ip4));
+ }
+
+ tcp_reply0->header.ports.src = tcp0->ports.dst;
+ tcp_reply0->header.ports.dst = tcp0->ports.src;
+ tcp_sum0 = ip_csum_add_even (tcp_sum0, tcp_reply0->header.ports.src_and_dst);
+
+ tcp_reply0->header.seq_number = my_seq_net0;
+ tcp_sum0 = ip_csum_add_even (tcp_sum0, my_seq_net0);
+
+ his_seq_net0 = clib_host_to_net_u32 (his_seq_host0);
+ tcp_reply0->header.ack_number = his_seq_net0;
+ tcp_sum0 = ip_csum_add_even (tcp_sum0, his_seq_net0);
+
+ {
+ ip_adjacency_t * adj0 = ip_get_adjacency (&ip4_main.lookup_main, vnet_buffer (p0)->ip.adj_index[VLIB_RX]);
+ u16 my_mss =
+ (adj0->rewrite_header.max_l3_packet_bytes
+ - (is_ip6 ? sizeof (ip60[0]) : sizeof (ip40[0]))
+ - sizeof (tcp0[0]));
+
+ my_mss = clib_min (my_mss, min0->max_segment_size);
+ min0->max_segment_size = my_mss;
+
+ tcp_reply0->options.mss.value = clib_host_to_net_u16 (my_mss);
+ tcp_sum0 = ip_csum_add_even (tcp_sum0, tcp_reply0->options.mss.value);
+ }
+
+ tcp_reply0->options.time_stamp.my_time_stamp = clib_host_to_net_u32 (timestamp_now);
+ tcp_sum0 = ip_csum_add_even (tcp_sum0, tcp_reply0->options.time_stamp.my_time_stamp);
+
+ tcp_reply0->options.time_stamp.his_time_stamp = min0->time_stamps.his_net_byte_order;
+ tcp_sum0 = ip_csum_add_even (tcp_sum0, tcp_reply0->options.time_stamp.his_time_stamp);
+
+ tcp_reply0->header.checksum = ip_csum_fold (tcp_sum0);
+
+ vlib_buffer_copy_shared_fields (vm, p0, bi_reply0);
+
+ to_reply[0] = bi_reply0;
+ n_left_to_reply -= 1;
+ to_reply += 1;
+ }
+
+ vlib_put_next_frame (vm, node, TCP_LISTEN_NEXT_REPLY, n_left_to_reply);
+ vlib_put_next_frame (vm, node, TCP_LISTEN_NEXT_DROP, n_left_to_drop);
+ }
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ /* FIXME */ ;
+
+ return frame->n_vectors;
+}
+
+static uword
+ip4_tcp_listen (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{ return ip46_tcp_listen (vm, node, frame, /* is_ip6 */ 0); }
+
+static uword
+ip6_tcp_listen (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{ return ip46_tcp_listen (vm, node, frame, /* is_ip6 */ 1); }
+
+VLIB_REGISTER_NODE (ip4_tcp_listen_node,static) = {
+ .function = ip4_tcp_listen,
+ .name = "ip4-tcp-listen",
+
+ .vector_size = sizeof (u32),
+
+ .n_next_nodes = TCP_LISTEN_N_NEXT,
+ .next_nodes = {
+ [TCP_LISTEN_NEXT_DROP] = "error-drop",
+ [TCP_LISTEN_NEXT_REPLY] = CLIB_DEBUG > 0 ? "ip4-input" : "ip4-lookup",
+ },
+};
+
+VLIB_REGISTER_NODE (ip6_tcp_listen_node,static) = {
+ .function = ip6_tcp_listen,
+ .name = "ip6-tcp-listen",
+
+ .vector_size = sizeof (u32),
+
+ .n_next_nodes = TCP_LISTEN_N_NEXT,
+ .next_nodes = {
+ [TCP_LISTEN_NEXT_DROP] = "error-drop",
+ [TCP_LISTEN_NEXT_REPLY] = CLIB_DEBUG > 0 ? "ip6-input" : "ip6-lookup",
+ },
+};
+
+typedef enum {
+ TCP_CONNECT_NEXT_DROP,
+ TCP_CONNECT_NEXT_REPLY,
+ TCP_CONNECT_N_NEXT,
+} tcp_connect_next_t;
+
+static_always_inline uword
+ip46_tcp_connect (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ uword is_ip6)
+{
+ tcp_main_t * tm = &tcp_main;
+ ip46_tcp_main_t * tm46 = is_ip6 ? &tm->ip6 : &tm->ip4;
+ uword n_packets = frame->n_vectors;
+ u32 * from, * to_next;
+ u32 n_left_from, n_left_to_next, next;
+ vlib_node_runtime_t * error_node;
+
+ /* FIXME */
+ clib_warning ("%p", tm46);
+
+ error_node = vlib_node_get_runtime
+ (vm, is_ip6 ? ip6_tcp_lookup_node.index : ip4_tcp_lookup_node.index);
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = n_packets;
+ next = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t * p0;
+ ip6_header_t * ip60;
+ ip4_header_t * ip40;
+ tcp_header_t * tcp0;
+ u32 bi0;
+ u8 error0, next0;
+
+ bi0 = to_next[0] = from[0];
+
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, bi0);
+
+ if (is_ip6)
+ {
+ ip60 = vlib_buffer_get_current (p0);
+ tcp0 = ip6_next_header (ip60);
+ }
+ else
+ {
+ ip40 = vlib_buffer_get_current (p0);
+ tcp0 = ip4_next_header (ip40);
+ }
+
+ ASSERT (0);
+
+ error0 = next0 = 0;
+ p0->error = error_node->errors[error0];
+
+ if (PREDICT_FALSE (next0 != next))
+ {
+ to_next -= 1;
+ n_left_to_next += 1;
+
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+
+ next = next0;
+ vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
+ to_next[0] = bi0;
+ to_next += 1;
+ n_left_to_next -= 1;
+ }
+ }
+
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ }
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ /* FIXME */ ;
+
+ return frame->n_vectors;
+}
+
+static uword
+ip4_tcp_connect (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{ return ip46_tcp_connect (vm, node, frame, /* is_ip6 */ 0); }
+
+static uword
+ip6_tcp_connect (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{ return ip46_tcp_connect (vm, node, frame, /* is_ip6 */ 1); }
+
+VLIB_REGISTER_NODE (ip4_tcp_connect_node,static) = {
+ .function = ip4_tcp_connect,
+ .name = "ip4-tcp-connect",
+
+ .vector_size = sizeof (u32),
+
+ .n_next_nodes = TCP_CONNECT_N_NEXT,
+ .next_nodes = {
+ [TCP_CONNECT_NEXT_DROP] = "error-drop",
+ [TCP_CONNECT_NEXT_REPLY] = CLIB_DEBUG > 0 ? "ip4-input" : "ip4-lookup",
+ },
+};
+
+VLIB_REGISTER_NODE (ip6_tcp_connect_node,static) = {
+ .function = ip6_tcp_connect,
+ .name = "ip6-tcp-connect",
+
+ .vector_size = sizeof (u32),
+
+ .n_next_nodes = TCP_CONNECT_N_NEXT,
+ .next_nodes = {
+ [TCP_CONNECT_NEXT_DROP] = "error-drop",
+ [TCP_CONNECT_NEXT_REPLY] = CLIB_DEBUG > 0 ? "ip6-input" : "ip6-lookup",
+ },
+};
+
+typedef enum {
+ TCP_ESTABLISH_NEXT_DROP,
+ TCP_ESTABLISH_NEXT_ESTABLISHED,
+ TCP_ESTABLISH_N_NEXT,
+} tcp_establish_next_t;
+
+static_always_inline uword
+ip46_tcp_establish (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ uword is_ip6)
+{
+ tcp_main_t * tm = &tcp_main;
+ ip46_tcp_main_t * tm46 = is_ip6 ? &tm->ip6 : &tm->ip4;
+ uword n_packets = frame->n_vectors;
+ u32 * from, * to_next;
+ u32 n_left_from, n_left_to_next, next, mini_long_long_ago, timestamp_now;
+ vlib_node_runtime_t * error_node;
+
+ error_node = vlib_node_get_runtime
+ (vm, is_ip6 ? ip6_tcp_lookup_node.index : ip4_tcp_lookup_node.index);
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = n_packets;
+ next = node->cached_next_index;
+ mini_long_long_ago =
+ (tcp_time_now (tm, TCP_TIMER_mini_connection)
+ + (1 << (BITS (mini_long_long_ago) - 1)));
+ timestamp_now = tcp_time_now (tm, TCP_TIMER_timestamp);
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t * p0;
+ ip6_header_t * ip60;
+ ip4_header_t * ip40;
+ tcp_header_t * tcp0;
+ tcp_mini_connection_t * min0;
+ tcp_connection_t * est0;
+ tcp_listener_t * l0;
+ u32 bi0, imin0, iest0;
+ u8 error0, next0, i0, e0;
+
+ bi0 = to_next[0] = from[0];
+
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, bi0);
+
+ imin0 = vnet_buffer (p0)->ip.tcp.mini_connection_index;
+ iest0 = vnet_buffer (p0)->ip.tcp.established_connection_index;
+
+ i0 = imin0 % 4;
+ e0 = iest0 % 4;
+
+ min0 = vec_elt_at_index (tm46->mini_connections, imin0);
+ if (PREDICT_FALSE (min0->state == TCP_CONNECTION_STATE_unused))
+ goto already_established0;
+ min0->state = TCP_CONNECTION_STATE_unused;
+
+ if (is_ip6)
+ {
+ ip60 = vlib_buffer_get_current (p0);
+ tcp0 = ip6_next_header (ip60);
+ }
+ else
+ {
+ ip40 = vlib_buffer_get_current (p0);
+ tcp0 = ip4_next_header (ip40);
+ }
+
+ if (PREDICT_FALSE (clib_net_to_host_u32 (tcp0->seq_number)
+ != min0->sequence_numbers.his))
+ goto unexpected_seq_number0;
+ if (PREDICT_FALSE (clib_net_to_host_u32 (tcp0->ack_number)
+ != min0->sequence_numbers.ours))
+ goto unexpected_ack_number0;
+
+ if (is_ip6)
+ {
+ ip6_tcp_udp_address_x4_and_timestamps_t * mina0;
+ ip6_tcp_udp_address_x4_t * esta0;
+
+ mina0 = vec_elt_at_index (tm->ip6_mini_connection_address_hash, imin0 / 4);
+ esta0 = vec_elt_at_index (tm->ip6_established_connection_address_hash, iest0 / 4);
+
+ ip6_tcp_udp_address_x4_copy_and_invalidate (esta0, &mina0->address_x4, e0, i0);
+
+ mina0->time_stamps[i0] = mini_long_long_ago;
+ }
+ else
+ {
+ ip4_tcp_udp_address_x4_and_timestamps_t * mina0;
+ ip4_tcp_udp_address_x4_t * esta0;
+
+ mina0 = vec_elt_at_index (tm->ip4_mini_connection_address_hash, imin0 / 4);
+ esta0 = vec_elt_at_index (tm->ip4_established_connection_address_hash, iest0 / 4);
+
+ ip4_tcp_udp_address_x4_copy_and_invalidate (esta0, &mina0->address_x4, e0, i0);
+
+ mina0->time_stamps[i0] = mini_long_long_ago;
+ }
+
+ est0 = vec_elt_at_index (tm46->established_connections, iest0);
+
+ est0->sequence_numbers = min0->sequence_numbers;
+ est0->max_segment_size = (min0->max_segment_size
+ - STRUCT_SIZE_OF (tcp_ack_packet_t, options));
+ est0->his_window_scale = min0->window_scale;
+ est0->his_window = clib_net_to_host_u16 (tcp0->window);
+ est0->time_stamps.ours_host_byte_order = min0->time_stamps.ours_host_byte_order;
+
+ /* Compute first measurement of round trip time. */
+ {
+ u32 t = tcp_options_decode_for_ack (tm, tcp0, &est0->time_stamps.his_net_byte_order);
+ f64 dt = (timestamp_now - t) * tm->secs_per_tick[TCP_TIMER_timestamp];
+ est0->round_trip_time_stats.sum = dt;
+ est0->round_trip_time_stats.sum2 = dt*dt;
+ est0->round_trip_time_stats.count = 1;
+
+ {
+ ELOG_TYPE_DECLARE (e) = {
+ .format = "establish ack rtt: %.4e",
+ .format_args = "f8",
+ };
+ struct { f64 dt; } * ed;
+ ed = ELOG_DATA (&vm->elog_main, e);
+ ed->dt = dt;
+ }
+ }
+
+ est0->my_window_scale = 7;
+ est0->my_window = 256;
+
+ l0 = pool_elt_at_index (tm->listener_pool, vnet_buffer (p0)->ip.tcp.listener_index);
+ vec_add1 (l0->event_connections[is_ip6], tcp_connection_handle_set (iest0, is_ip6));
+
+ next0 = TCP_ESTABLISH_NEXT_DROP;
+ error0 = TCP_ERROR_LISTENS_ESTABLISHED;
+
+ enqueue0:
+ p0->error = error_node->errors[error0];
+ if (PREDICT_FALSE (next0 != next))
+ {
+ to_next -= 1;
+ n_left_to_next += 1;
+
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+
+ next = next0;
+ vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
+ to_next[0] = bi0;
+ to_next += 1;
+ n_left_to_next -= 1;
+ }
+ continue;
+
+ already_established0:
+ next0 = TCP_ESTABLISH_NEXT_ESTABLISHED;
+ error0 = TCP_ERROR_NONE;
+ goto enqueue0;
+
+ unexpected_seq_number0:
+ next0 = TCP_ESTABLISH_NEXT_DROP;
+ error0 = TCP_ERROR_UNEXPECTED_SEQ_NUMBER;
+ goto enqueue0;
+
+ unexpected_ack_number0:
+ next0 = TCP_ESTABLISH_NEXT_DROP;
+ error0 = TCP_ERROR_UNEXPECTED_ACK_NUMBER;
+ goto enqueue0;
+ }
+
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ }
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ /* FIXME */ ;
+
+ /* Inform listeners of new connections. */
+ {
+ tcp_listener_t * l;
+ uword n;
+ pool_foreach (l, tm->listener_pool, ({
+ if ((n = vec_len (l->event_connections[is_ip6])) > 0)
+ {
+ if (l->event_function)
+ l->event_function (l->event_connections[is_ip6],
+ TCP_EVENT_connection_established);
+ if (tm->n_established_connections[is_ip6] == 0)
+ vlib_node_set_state (vm, tm46->output_node_index, VLIB_NODE_STATE_POLLING);
+ tm->n_established_connections[is_ip6] += n;
+ _vec_len (l->event_connections[is_ip6]) = 0;
+ }
+ }));
+ }
+
+ return frame->n_vectors;
+}
+
+static uword
+ip4_tcp_establish (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{ return ip46_tcp_establish (vm, node, frame, /* is_ip6 */ 0); }
+
+static uword
+ip6_tcp_establish (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{ return ip46_tcp_establish (vm, node, frame, /* is_ip6 */ 1); }
+
+VLIB_REGISTER_NODE (ip4_tcp_establish_node,static) = {
+ .function = ip4_tcp_establish,
+ .name = "ip4-tcp-establish",
+
+ .vector_size = sizeof (u32),
+
+ .n_next_nodes = TCP_ESTABLISH_N_NEXT,
+ .next_nodes = {
+ [TCP_ESTABLISH_NEXT_DROP] = "error-drop",
+ [TCP_ESTABLISH_NEXT_ESTABLISHED] = "ip4-tcp-established",
+ },
+};
+
+VLIB_REGISTER_NODE (ip6_tcp_establish_node,static) = {
+ .function = ip6_tcp_establish,
+ .name = "ip6-tcp-establish",
+
+ .vector_size = sizeof (u32),
+
+ .n_next_nodes = TCP_ESTABLISH_N_NEXT,
+ .next_nodes = {
+ [TCP_ESTABLISH_NEXT_DROP] = "error-drop",
+ [TCP_ESTABLISH_NEXT_ESTABLISHED] = "ip6-tcp-established",
+ },
+};
+
+static_always_inline void
+tcp_free_connection_x1 (vlib_main_t * vm, tcp_main_t * tm,
+ tcp_ip_4_or_6_t is_ip6,
+ u32 iest0)
+{
+ ip46_tcp_main_t * tm46 = is_ip6 ? &tm->ip6 : &tm->ip4;
+ tcp_connection_t * est0;
+ u32 iest_div0, iest_mod0;
+
+ iest_div0 = iest0 / 4;
+ iest_mod0 = iest0 % 4;
+
+ if (is_ip6)
+ {
+ ip6_tcp_udp_address_x4_t * esta0;
+ esta0 = vec_elt_at_index (tm->ip6_established_connection_address_hash, iest_div0);
+ ip6_tcp_udp_address_x4_invalidate (esta0, iest_mod0);
+ }
+ else
+ {
+ ip4_tcp_udp_address_x4_t * esta0;
+ esta0 = vec_elt_at_index (tm->ip4_established_connection_address_hash, iest_div0);
+ ip4_tcp_udp_address_x4_invalidate (esta0, iest_mod0);
+ }
+
+ est0 = vec_elt_at_index (tm46->established_connections, iest0);
+}
+
+static_always_inline void
+tcp_free_connection_x2 (vlib_main_t * vm, tcp_main_t * tm,
+ tcp_ip_4_or_6_t is_ip6,
+ u32 iest0, u32 iest1)
+{
+ tcp_free_connection_x1 (vm, tm, is_ip6, iest0);
+ tcp_free_connection_x1 (vm, tm, is_ip6, iest1);
+}
+
+static_always_inline uword
+ip46_tcp_output (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ tcp_ip_4_or_6_t is_ip6)
+{
+ tcp_main_t * tm = &tcp_main;
+ ip46_tcp_main_t * tm46 = is_ip6 ? &tm->ip6 : &tm->ip4;
+ u32 * cis, * to_next, n_left_to_next, n_connections_left;
+ u32 timestamp_now_host_byte_order, timestamp_now_net_byte_order;
+ vlib_node_runtime_t * error_node;
+ const u32 next = 0;
+ uword n_acks;
+
+ /* Inform listeners of new connections. */
+ {
+ tcp_listener_t * l;
+ pool_foreach (l, tm->listener_pool, ({
+ if (vec_len (l->eof_connections) > 0)
+ {
+ if (l->event_function)
+ l->event_function (l->eof_connections[is_ip6], TCP_EVENT_fin_received);
+ else
+ {
+ uword i;
+ for (i = 0; i < vec_len (l->eof_connections[is_ip6]); i++)
+ {
+ tcp_connection_t * c = tcp_get_connection (l->eof_connections[is_ip6][i]);
+ c->flags |= TCP_CONNECTION_FLAG_application_requested_close;
+ }
+ }
+ _vec_len (l->eof_connections[is_ip6]) = 0;
+ }
+
+ if (vec_len (l->close_connections[is_ip6]) > 0)
+ {
+ uword n_left;
+ u32 * cis;
+
+ if (l->event_function)
+ l->event_function (l->close_connections[is_ip6], TCP_EVENT_connection_closed);
+
+ cis = l->close_connections[is_ip6];
+ n_left = vec_len (cis);
+ ASSERT (tm->n_established_connections[is_ip6] >= n_left);
+ tm->n_established_connections[is_ip6] -= n_left;
+ if (tm->n_established_connections[is_ip6] == 0)
+ vlib_node_set_state (vm, tm46->output_node_index, VLIB_NODE_STATE_DISABLED);
+ while (n_left >= 2)
+ {
+ tcp_free_connection_x2 (vm, tm, is_ip6, cis[0], cis[1]);
+ n_left -= 2;
+ cis += 2;
+ }
+
+ while (n_left > 0)
+ {
+ tcp_free_connection_x1 (vm, tm, is_ip6, cis[0]);
+ n_left -= 1;
+ cis += 1;
+ }
+
+ _vec_len (l->close_connections[is_ip6]) = 0;
+ }
+ }));
+ }
+
+ n_acks = 0;
+ cis = tm46->connections_pending_acks;
+ n_connections_left = vec_len (cis);
+ if (n_connections_left == 0)
+ return n_acks;
+ _vec_len (tm46->connections_pending_acks) = 0;
+ error_node = vlib_node_get_runtime
+ (vm, is_ip6 ? ip6_tcp_lookup_node.index : ip4_tcp_lookup_node.index);
+
+ timestamp_now_host_byte_order = tcp_time_now (tm, TCP_TIMER_timestamp);
+ timestamp_now_net_byte_order = clib_host_to_net_u32 (timestamp_now_host_byte_order);
+
+ while (n_connections_left > 0)
+ {
+ vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
+
+ while (n_connections_left > 0 && n_left_to_next > 0)
+ {
+ tcp_connection_t * est0;
+ tcp_ack_packet_t * tcp0;
+ tcp_udp_ports_t * ports0;
+ ip_csum_t tcp_sum0;
+ tcp_packet_template_type_t template_type0;
+ u32 bi0, iest0, iest_div0, iest_mod0, my_seq_net0, his_seq_net0;
+ u8 is_fin0;
+
+ iest0 = cis[0];
+ cis += 1;
+ iest_div0 = iest0 / 4;
+ iest_mod0 = iest0 % 4;
+ est0 = vec_elt_at_index (tm46->established_connections, iest0);
+
+ /* Send a FIN along with our ACK if application closed connection. */
+ {
+ u8 is_closed0, fin_sent0;
+
+ is_closed0 = (est0->flags & TCP_CONNECTION_FLAG_application_requested_close) != 0;
+ fin_sent0 = (est0->flags & TCP_CONNECTION_FLAG_fin_sent) != 0;
+
+ is_fin0 = is_closed0 && ! fin_sent0;
+ template_type0 =
+ (is_fin0
+ ? TCP_PACKET_TEMPLATE_FIN_ACK
+ : TCP_PACKET_TEMPLATE_ACK);
+ est0->flags |= is_closed0 << LOG2_TCP_CONNECTION_FLAG_fin_sent;
+ }
+
+ if (is_ip6)
+ {
+ ip6_tcp_ack_packet_t * r0;
+ ip6_tcp_udp_address_x4_t * esta0;
+ uword tmp0, i;
+
+ esta0 = vec_elt_at_index (tm->ip6_established_connection_address_hash, iest_div0);
+ r0 = vlib_packet_template_get_packet
+ (vm, &tm->ip6.packet_templates[template_type0].vlib, &bi0);
+ tcp0 = &r0->tcp;
+
+ tcp_sum0 = (tm->ip6.packet_templates[template_type0]
+ .tcp_checksum_net_byte_order);
+
+ for (i = 0; i < ARRAY_LEN (r0->ip6.src_address.as_u32); i++)
+ {
+ tmp0 = r0->ip6.src_address.as_u32[i] = esta0->dst.as_u32[i][iest_mod0];
+ tcp_sum0 = ip_csum_add_even (tcp_sum0, tmp0);
+
+ tmp0 = r0->ip6.dst_address.as_u32[i] = esta0->src.as_u32[i][iest_mod0];
+ tcp_sum0 = ip_csum_add_even (tcp_sum0, tmp0);
+ }
+
+ ports0 = &esta0->ports.as_ports[iest_mod0];
+ }
+ else
+ {
+ ip4_tcp_ack_packet_t * r0;
+ ip4_tcp_udp_address_x4_t * esta0;
+ ip_csum_t ip_sum0;
+ u32 src0, dst0;
+
+ esta0 = vec_elt_at_index (tm->ip4_established_connection_address_hash, iest_div0);
+ r0 = vlib_packet_template_get_packet
+ (vm, &tm->ip4.packet_templates[template_type0].vlib, &bi0);
+ tcp0 = &r0->tcp;
+
+ ip_sum0 = (tm->ip4.packet_templates[template_type0]
+ .ip4_checksum_net_byte_order);
+ tcp_sum0 = (tm->ip4.packet_templates[template_type0]
+ .tcp_checksum_net_byte_order);
+
+ src0 = r0->ip4.src_address.as_u32 = esta0->dst.as_ip4_address[iest_mod0].as_u32;
+ dst0 = r0->ip4.dst_address.as_u32 = esta0->src.as_ip4_address[iest_mod0].as_u32;
+
+ ip_sum0 = ip_csum_add_even (ip_sum0, src0);
+ tcp_sum0 = ip_csum_add_even (tcp_sum0, src0);
+
+ ip_sum0 = ip_csum_add_even (ip_sum0, dst0);
+ tcp_sum0 = ip_csum_add_even (tcp_sum0, dst0);
+
+ r0->ip4.checksum = ip_csum_fold (ip_sum0);
+
+ ASSERT (r0->ip4.checksum == ip4_header_checksum (&r0->ip4));
+ ports0 = &esta0->ports.as_ports[iest_mod0];
+ }
+
+ tcp_sum0 = ip_csum_add_even (tcp_sum0, ports0->as_u32);
+ tcp0->header.ports.src = ports0->dst;
+ tcp0->header.ports.dst = ports0->src;
+
+ my_seq_net0 = clib_host_to_net_u32 (est0->sequence_numbers.ours);
+ his_seq_net0 = clib_host_to_net_u32 (est0->sequence_numbers.his);
+
+ /* FIN accounts for 1 sequence number. */
+ est0->sequence_numbers.ours += is_fin0;
+
+ tcp0->header.seq_number = my_seq_net0;
+ tcp_sum0 = ip_csum_add_even (tcp_sum0, my_seq_net0);
+
+ tcp0->header.ack_number = his_seq_net0;
+ tcp_sum0 = ip_csum_add_even (tcp_sum0, his_seq_net0);
+
+ est0->time_stamps.ours_host_byte_order = timestamp_now_host_byte_order;
+ tcp0->options.time_stamp.my_time_stamp = timestamp_now_net_byte_order;
+ tcp_sum0 = ip_csum_add_even (tcp_sum0, timestamp_now_net_byte_order);
+
+ tcp0->options.time_stamp.his_time_stamp = est0->time_stamps.his_net_byte_order;
+ tcp_sum0 = ip_csum_add_even (tcp_sum0, est0->time_stamps.his_net_byte_order);
+
+ tcp0->header.checksum = ip_csum_fold (tcp_sum0);
+
+ est0->flags &= ~TCP_CONNECTION_FLAG_ack_pending;
+
+ to_next[0] = bi0;
+ to_next += 1;
+ n_left_to_next -= 1;
+ n_connections_left -= 1;
+ n_acks += 1;
+ }
+
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ }
+
+ vlib_error_count (vm, error_node->node_index, TCP_ERROR_ACKS_SENT, n_acks);
+
+ return n_acks;
+}
+
+static uword
+ip4_tcp_output (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{ return ip46_tcp_output (vm, node, frame, /* is_ip6 */ 0); }
+
+static uword
+ip6_tcp_output (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{ return ip46_tcp_output (vm, node, frame, /* is_ip6 */ 1); }
+
+VLIB_REGISTER_NODE (ip4_tcp_output_node,static) = {
+ .function = ip4_tcp_output,
+ .name = "ip4-tcp-output",
+ .state = VLIB_NODE_STATE_DISABLED,
+ .type = VLIB_NODE_TYPE_INPUT,
+
+ .vector_size = sizeof (u32),
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = CLIB_DEBUG > 0 ? "ip4-input" : "ip4-lookup",
+ },
+};
+
+VLIB_REGISTER_NODE (ip6_tcp_output_node,static) = {
+ .function = ip6_tcp_output,
+ .name = "ip6-tcp-output",
+ .state = VLIB_NODE_STATE_DISABLED,
+ .type = VLIB_NODE_TYPE_INPUT,
+
+ .vector_size = sizeof (u32),
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = CLIB_DEBUG > 0 ? "ip6-input" : "ip6-lookup",
+ },
+};
+
+static_always_inline void
+tcp_ack (tcp_main_t * tm, tcp_connection_t * c, u32 n_bytes)
+{
+ ASSERT (n_bytes == 0);
+}
+
+typedef enum {
+ TCP_ESTABLISHED_NEXT_DROP,
+ TCP_ESTABLISHED_N_NEXT,
+} tcp_established_next_t;
+
+static_always_inline uword
+ip46_tcp_established (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ tcp_ip_4_or_6_t is_ip6)
+{
+ tcp_main_t * tm = &tcp_main;
+ ip46_tcp_main_t * tm46 = is_ip6 ? &tm->ip6 : &tm->ip4;
+ uword n_packets = frame->n_vectors;
+ u32 * from, * to_next;
+ u32 n_left_from, n_left_to_next, next, timestamp_now;
+ vlib_node_runtime_t * error_node;
+
+ error_node = vlib_node_get_runtime
+ (vm, is_ip6 ? ip6_tcp_lookup_node.index : ip4_tcp_lookup_node.index);
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = n_packets;
+ next = node->cached_next_index;
+ timestamp_now = tcp_time_now (tm, TCP_TIMER_timestamp);
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t * p0;
+ ip6_header_t * ip60;
+ ip4_header_t * ip40;
+ tcp_header_t * tcp0;
+ tcp_connection_t * est0;
+ tcp_listener_t * l0;
+ u32 bi0, iest0, n_data_bytes0, his_ack_host0, n_ack0;
+ u8 error0, next0, n_advance_bytes0, is_fin0, send_ack0;
+
+ bi0 = to_next[0] = from[0];
+
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, bi0);
+
+ if (is_ip6)
+ {
+ ip60 = vlib_buffer_get_current (p0);
+ tcp0 = ip6_next_header (ip60);
+ ASSERT (ip60->protocol == IP_PROTOCOL_TCP);
+ n_advance_bytes0 = tcp_header_bytes (tcp0);
+ n_data_bytes0 = clib_net_to_host_u16 (ip60->payload_length) - n_advance_bytes0;
+ n_advance_bytes0 += sizeof (ip60[0]);
+ }
+ else
+ {
+ ip40 = vlib_buffer_get_current (p0);
+ tcp0 = ip4_next_header (ip40);
+ n_advance_bytes0 = (ip4_header_bytes (ip40)
+ + tcp_header_bytes (tcp0));
+ n_data_bytes0 = clib_net_to_host_u16 (ip40->length) - n_advance_bytes0;
+ }
+
+ iest0 = vnet_buffer (p0)->ip.tcp.established_connection_index;
+ est0 = vec_elt_at_index (tm46->established_connections, iest0);
+
+ error0 = TCP_ERROR_NO_DATA;
+ next0 = TCP_ESTABLISHED_NEXT_DROP;
+
+ if (PREDICT_FALSE (clib_net_to_host_u32 (tcp0->seq_number)
+ != est0->sequence_numbers.his))
+ goto unexpected_seq_number0;
+ if (PREDICT_FALSE (clib_net_to_host_u32 (tcp0->ack_number) - est0->sequence_numbers.ours
+ > est0->n_tx_unacked_bytes))
+ goto unexpected_ack_number0;
+
+ is_fin0 = (tcp0->flags & TCP_FLAG_FIN) != 0;
+
+ if (PREDICT_FALSE ((est0->flags & TCP_CONNECTION_FLAG_fin_received)
+ && (is_fin0 || n_data_bytes0 > 0)))
+ goto already_received_fin0;
+
+ /* Update window. */
+ est0->his_window = clib_net_to_host_u16 (tcp0->window);
+
+ /* Update his sequence number to account for data he's just sent. */
+ est0->sequence_numbers.his += n_data_bytes0 + is_fin0;
+
+ his_ack_host0 = clib_net_to_host_u32 (tcp0->ack_number);
+ n_ack0 = his_ack_host0 - est0->sequence_numbers.ours;
+ tcp_ack (tm, est0, n_ack0);
+ est0->sequence_numbers.ours = his_ack_host0;
+
+ {
+ u32 t = tcp_options_decode_for_ack (tm, tcp0, &est0->time_stamps.his_net_byte_order);
+ if (t != est0->time_stamps.ours_host_byte_order)
+ {
+ f64 dt = (timestamp_now - t) * tm->secs_per_tick[TCP_TIMER_timestamp];
+ est0->round_trip_time_stats.sum += dt;
+ est0->round_trip_time_stats.sum2 += dt*dt;
+ est0->round_trip_time_stats.count += 1;
+ est0->time_stamps.ours_host_byte_order = t;
+
+ {
+ ELOG_TYPE_DECLARE (e) = {
+ .format = "ack rtt: %.4e",
+ .format_args = "f8",
+ };
+ struct { f64 dt; } * ed;
+ ed = ELOG_DATA (&vm->elog_main, e);
+ ed->dt = dt;
+ }
+ }
+ }
+
+ send_ack0 = ((est0->flags & TCP_CONNECTION_FLAG_ack_pending) == 0
+ && (n_data_bytes0 > 0 || is_fin0));
+ vec_add1 (tm46->connections_pending_acks, vnet_buffer (p0)->ip.tcp.established_connection_index);
+ _vec_len (tm46->connections_pending_acks) -= ! send_ack0;
+ est0->flags |= send_ack0 << LOG2_TCP_CONNECTION_FLAG_ack_pending;
+
+ est0->flags |= is_fin0 << LOG2_TCP_CONNECTION_FLAG_fin_received;
+
+ l0 = pool_elt_at_index (tm->listener_pool, vnet_buffer (p0)->ip.tcp.listener_index);
+
+ {
+ u32 ch0 = tcp_connection_handle_set (iest0, is_ip6);
+
+ vec_add1 (l0->eof_connections[is_ip6], ch0);
+ _vec_len (l0->eof_connections[is_ip6]) -= ! is_fin0;
+
+ vec_add1 (l0->close_connections[is_ip6], ch0);
+ _vec_len (l0->close_connections[is_ip6]) -= !(est0->flags & TCP_CONNECTION_FLAG_fin_sent);
+ }
+
+ next0 = n_data_bytes0 > 0 ? l0->next_index : next0;
+
+ vlib_buffer_advance (p0, n_advance_bytes0);
+
+ enqueue0:
+ p0->error = error_node->errors[error0];
+ if (PREDICT_FALSE (next0 != next))
+ {
+ to_next -= 1;
+ n_left_to_next += 1;
+
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+
+ next = next0;
+ vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
+ to_next[0] = bi0;
+ to_next += 1;
+ n_left_to_next -= 1;
+ }
+ continue;
+
+ unexpected_seq_number0:
+ next0 = TCP_ESTABLISHED_NEXT_DROP;
+ error0 = TCP_ERROR_UNEXPECTED_SEQ_NUMBER;
+ goto enqueue0;
+
+ unexpected_ack_number0:
+ next0 = TCP_ESTABLISHED_NEXT_DROP;
+ error0 = TCP_ERROR_UNEXPECTED_ACK_NUMBER;
+ goto enqueue0;
+
+ already_received_fin0:
+ next0 = TCP_ESTABLISHED_NEXT_DROP;
+ error0 = TCP_ERROR_SEGMENT_AFTER_FIN;
+ goto enqueue0;
+ }
+
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ }
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ /* FIXME */ ;
+
+ return frame->n_vectors;
+}
+
+static uword
+ip4_tcp_established (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{ return ip46_tcp_established (vm, node, frame, /* is_ip6 */ 0); }
+
+static uword
+ip6_tcp_established (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{ return ip46_tcp_established (vm, node, frame, /* is_ip6 */ 1); }
+
+VLIB_REGISTER_NODE (ip4_tcp_established_node,static) = {
+ .function = ip4_tcp_established,
+ .name = "ip4-tcp-established",
+
+ .vector_size = sizeof (u32),
+
+ .n_next_nodes = TCP_ESTABLISHED_N_NEXT,
+ .next_nodes = {
+ [TCP_ESTABLISHED_NEXT_DROP] = "error-drop",
+ },
+};
+
+VLIB_REGISTER_NODE (ip6_tcp_established_node,static) = {
+ .function = ip6_tcp_established,
+ .name = "ip6-tcp-established",
+
+ .vector_size = sizeof (u32),
+
+ .n_next_nodes = TCP_ESTABLISHED_N_NEXT,
+ .next_nodes = {
+ [TCP_ESTABLISHED_NEXT_DROP] = "error-drop",
+ },
+};
+
+uword
+tcp_register_listener (vlib_main_t * vm,
+ tcp_listener_registration_t * r)
+{
+ tcp_main_t * tm = &tcp_main;
+ tcp_listener_t * l;
+
+ {
+ clib_error_t * error;
+
+ if ((error = vlib_call_init_function (vm, tcp_udp_lookup_init)))
+ clib_error_report (error);
+ }
+
+ pool_get_aligned (tm->listener_pool, l, CLIB_CACHE_LINE_BYTES);
+
+ memset (l, 0, sizeof (l[0]));
+
+ l->dst_port = r->port;
+ l->next_index = vlib_node_add_next (vm, ip4_tcp_established_node.index, r->data_node_index);
+ l->valid_local_adjacency_bitmap = 0;
+ l->flags = r->flags & (TCP_LISTENER_IP4 | TCP_LISTENER_IP6);
+
+ tm->listener_index_by_dst_port[clib_host_to_net_u16 (l->dst_port)] = l - tm->listener_pool;
+
+ return l - tm->listener_pool;
+}
+
+static void
+tcp_udp_lookup_ip4_add_del_interface_address (ip4_main_t * im,
+ uword opaque,
+ u32 sw_if_index,
+ ip4_address_t * address,
+ u32 address_length,
+ u32 if_address_index,
+ u32 is_delete)
+{
+ tcp_main_t * tm = &tcp_main;
+
+ tm->ip4.default_valid_local_adjacency_bitmap
+ = clib_bitmap_set (tm->ip4.default_valid_local_adjacency_bitmap,
+ if_address_index,
+ is_delete ? 0 : 1);
+}
+
+static void
+tcp_udp_lookup_ip6_add_del_interface_address (ip6_main_t * im,
+ uword opaque,
+ u32 sw_if_index,
+ ip6_address_t * address,
+ u32 address_length,
+ u32 if_address_index,
+ u32 is_delete)
+{
+ tcp_main_t * tm = &tcp_main;
+
+ tm->ip6.default_valid_local_adjacency_bitmap
+ = clib_bitmap_set (tm->ip6.default_valid_local_adjacency_bitmap,
+ if_address_index,
+ is_delete ? 0 : 1);
+}
+
+static clib_error_t *
+tcp_udp_lookup_init (vlib_main_t * vm)
+{
+ tcp_main_t * tm = &tcp_main;
+ ip4_main_t * im4 = &ip4_main;
+ ip6_main_t * im6 = &ip6_main;
+ clib_error_t * error;
+
+ if ((error = vlib_call_init_function (vm, ip4_lookup_init)))
+ return error;
+ if ((error = vlib_call_init_function (vm, ip6_lookup_init)))
+ return error;
+
+ tcp_time_init (vm, tm);
+
+ {
+ ip4_add_del_interface_address_callback_t cb;
+
+ cb.function = tcp_udp_lookup_ip4_add_del_interface_address;
+ cb.function_opaque = 0;
+ vec_add1 (im4->add_del_interface_address_callbacks, cb);
+ }
+
+ {
+ ip6_add_del_interface_address_callback_t cb;
+
+ cb.function = tcp_udp_lookup_ip6_add_del_interface_address;
+ cb.function_opaque = 0;
+ vec_add1 (im6->add_del_interface_address_callbacks, cb);
+ }
+
+ tm->ip4.output_node_index = ip4_tcp_output_node.index;
+ tm->ip6.output_node_index = ip6_tcp_output_node.index;
+
+ tcp_lookup_init (vm, tm);
+ tcp_options_decode_init (tm);
+
+ tm->tx_buffer_free_list = VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX;
+ tm->tx_buffer_free_list_n_buffer_bytes = VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES;
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (tcp_udp_lookup_init);
+
+static u8 * format_tcp_time_stamp (u8 * s, va_list * va)
+{
+ tcp_timer_type_t type = va_arg (*va, tcp_timer_type_t);
+ u32 value = va_arg (*va, u32);
+ vlib_main_t * vm = vlib_get_main();
+ tcp_main_t * tm = &tcp_main;
+ u64 now;
+ f64 dt;
+
+ now = clib_cpu_time_now ();
+ dt = vm->clib_time.seconds_per_clock * (now - (value << tm->log2_clocks_per_tick[type]));
+ return format (s, "%.4e sec", dt);
+}
+
+static u8 * format_tcp_connection_state (u8 * s, va_list * va)
+{
+ tcp_connection_state_t st = va_arg (*va, tcp_connection_state_t);
+ char * t = 0;
+ switch (st)
+ {
+#define _(f) case TCP_CONNECTION_STATE_##f: t = #f; break;
+ foreach_tcp_connection_state
+#undef _
+ default: break;
+ }
+ if (t)
+ s = format (s, "%s", t);
+ else
+ s = format (s, "unknown 0x%x", st);
+
+ return s;
+}
+
+static u8 * format_tcp_ip_4_or_6 (u8 * s, va_list * va)
+{
+ tcp_ip_4_or_6_t is_ip6 = va_arg (*va, tcp_ip_4_or_6_t);
+ return format (s, "%s", is_ip6 ? "ip6" : "ip4");
+}
+
+static u8 * format_tcp_mini_connection (u8 * s, va_list * va)
+{
+ tcp_mini_connection_t * c = va_arg (*va, tcp_mini_connection_t *);
+
+ s = format (s, "state %U, window scale %d, mss %d",
+ format_tcp_connection_state, c->state,
+ c->window_scale, c->max_segment_size);
+
+ return s;
+}
+
+static u8 * format_ip4_tcp_mini_connection (u8 * s, va_list * va)
+{
+ u32 imin = va_arg (*va, u32);
+ u32 imin_div, imin_mod;
+ tcp_main_t * tm = &tcp_main;
+ tcp_mini_connection_t * min;
+ ip4_tcp_udp_address_x4_and_timestamps_t * mina;
+
+ imin_div = imin / 4;
+ imin_mod = imin % 4;
+
+ mina = vec_elt_at_index (tm->ip4_mini_connection_address_hash, imin_div);
+
+ s = format (s, "%U, age %U",
+ format_ip4_tcp_udp_address_x4, &mina->address_x4, imin_div,
+ format_tcp_time_stamp, TCP_TIMER_mini_connection, mina->time_stamps[imin_div]);
+
+ min = vec_elt_at_index (tm->ip4.mini_connections, imin);
+
+ s = format (s, "%U", format_tcp_mini_connection, min);
+
+ return s;
+}
+
+static u8 * format_ip6_tcp_mini_connection (u8 * s, va_list * va)
+{
+ u32 imin = va_arg (*va, u32);
+ u32 imin_div, imin_mod;
+ tcp_main_t * tm = &tcp_main;
+ tcp_mini_connection_t * min;
+ ip6_tcp_udp_address_x4_and_timestamps_t * mina;
+
+ imin_div = imin / 4;
+ imin_mod = imin % 4;
+
+ mina = vec_elt_at_index (tm->ip6_mini_connection_address_hash, imin_div);
+
+ s = format (s, "%U, age %U",
+ format_ip6_tcp_udp_address_x4, &mina->address_x4, imin_div,
+ format_tcp_time_stamp, TCP_TIMER_mini_connection, mina->time_stamps[imin_div]);
+
+ min = vec_elt_at_index (tm->ip6.mini_connections, imin);
+
+ s = format (s, "%U", format_tcp_mini_connection, min);
+
+ return s;
+}
+
+static u8 * format_tcp_established_connection (u8 * s, va_list * va)
+{
+ tcp_connection_t * c = va_arg (*va, tcp_connection_t *);
+
+ if (c->flags != 0)
+ {
+ s = format (s, ", flags: ");
+#define _(f) if (c->flags & TCP_CONNECTION_FLAG_##f) s = format (s, "%s, ", #f);
+ foreach_tcp_connection_flag;
+#undef _
+ }
+
+ if (tcp_round_trip_time_stats_is_valid (&c->round_trip_time_stats))
+ {
+ f64 r[2];
+ tcp_round_trip_time_stats_compute (&c->round_trip_time_stats, r);
+ s = format (s, ", rtt %.4e +- %.4e",
+ r[0], r[1]);
+ }
+
+ return s;
+}
+
+static u8 * format_ip4_tcp_established_connection (u8 * s, va_list * va)
+{
+ u32 iest = va_arg (*va, u32);
+ u32 iest_div, iest_mod;
+ tcp_main_t * tm = &tcp_main;
+ tcp_connection_t * est;
+ ip4_tcp_udp_address_x4_t * esta;
+
+ iest_div = iest / 4;
+ iest_mod = iest % 4;
+
+ esta = vec_elt_at_index (tm->ip4_established_connection_address_hash, iest_div);
+ est = vec_elt_at_index (tm->ip4.established_connections, iest);
+
+ s = format (s, "%U%U",
+ format_ip4_tcp_udp_address_x4, esta, iest_mod,
+ format_tcp_established_connection, est);
+
+ return s;
+}
+
+static u8 * format_ip6_tcp_established_connection (u8 * s, va_list * va)
+{
+ u32 iest = va_arg (*va, u32);
+ u32 iest_div, iest_mod;
+ tcp_main_t * tm = &tcp_main;
+ tcp_connection_t * est;
+ ip6_tcp_udp_address_x4_t * esta;
+
+ iest_div = iest / 4;
+ iest_mod = iest % 4;
+
+ esta = vec_elt_at_index (tm->ip6_established_connection_address_hash, iest_div);
+ est = vec_elt_at_index (tm->ip6.established_connections, iest);
+
+ s = format (s, "%U%U",
+ format_ip6_tcp_udp_address_x4, esta, iest_mod,
+ format_tcp_established_connection, est);
+
+ return s;
+}
+
+VLIB_CLI_COMMAND (vlib_cli_show_tcp_command, static) = {
+ .path = "show tcp",
+ .short_help = "Transmission control protocol (TCP) show commands",
+};
+
+static clib_error_t *
+show_mini_connections (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ tcp_main_t * tm = &tcp_main;
+ ip46_tcp_main_t * tm46;
+ tcp_ip_4_or_6_t is_ip6 = TCP_IP4;
+ tcp_mini_connection_t * min;
+ ip6_tcp_udp_address_x4_and_timestamps_t * mina6;
+ ip4_tcp_udp_address_x4_and_timestamps_t * mina4;
+ clib_error_t * error = 0;
+ uword i, i0, i1, n_valid;
+
+ if (unformat (input, "4"))
+ is_ip6 = TCP_IP4;
+ if (unformat (input, "6"))
+ is_ip6 = TCP_IP6;
+
+ n_valid = 0;
+ tm46 = is_ip6 ? &tm->ip6 : &tm->ip4;
+ for (i = 0; i <= tm46->mini_connection_hash_mask; i++)
+ {
+ i0 = i / 4;
+ i1 = i % 4;
+
+ min = vec_elt_at_index (tm46->mini_connections, i);
+ if (is_ip6)
+ {
+ mina6 = vec_elt_at_index (tm->ip6_mini_connection_address_hash, i0);
+ if (ip6_tcp_udp_address_x4_is_valid (&mina6->address_x4, i1))
+ {
+ vlib_cli_output (vm, "%U", format_ip4_tcp_mini_connection, i);
+ n_valid += 1;
+ }
+ }
+ else
+ {
+ mina4 = vec_elt_at_index (tm->ip4_mini_connection_address_hash, i0);
+ if (ip4_tcp_udp_address_x4_is_valid (&mina4->address_x4, i1))
+ {
+ vlib_cli_output (vm, "%U", format_ip6_tcp_mini_connection, i);
+ n_valid += 1;
+ }
+ }
+ }
+
+ if (n_valid == 0)
+ vlib_cli_output (vm, "no %U mini tcp connections", format_tcp_ip_4_or_6, is_ip6);
+
+ return error;
+}
+
+VLIB_CLI_COMMAND (vlib_cli_show_tcp_mini_connections_command) = {
+ .path = "show tcp mini-connections",
+ .short_help = "Show not-yet established TCP connections",
+ .function = show_mini_connections,
+};
+
+static clib_error_t *
+show_established_connections (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ tcp_main_t * tm = &tcp_main;
+ ip46_tcp_main_t * tm46;
+ tcp_ip_4_or_6_t is_ip6 = TCP_IP4;
+ tcp_connection_t * est;
+ ip6_tcp_udp_address_x4_t * esta6;
+ ip4_tcp_udp_address_x4_t * esta4;
+ clib_error_t * error = 0;
+ uword i, i0, i1, n_valid;
+
+ if (unformat (input, "4"))
+ is_ip6 = TCP_IP4;
+ if (unformat (input, "6"))
+ is_ip6 = TCP_IP6;
+
+ n_valid = 0;
+ tm46 = is_ip6 ? &tm->ip6 : &tm->ip4;
+ for (i = 0; i < vec_len (tm46->established_connections); i++)
+ {
+ i0 = i / 4;
+ i1 = i % 4;
+
+ est = vec_elt_at_index (tm46->established_connections, i);
+ if (is_ip6)
+ {
+ esta6 = vec_elt_at_index (tm->ip6_established_connection_address_hash, i0);
+ if (ip6_tcp_udp_address_x4_is_valid (esta6, i1))
+ {
+ vlib_cli_output (vm, "%U", format_ip6_tcp_established_connection, i);
+ n_valid += 1;
+ }
+ }
+ else
+ {
+ esta4 = vec_elt_at_index (tm->ip4_established_connection_address_hash, i0);
+ if (ip4_tcp_udp_address_x4_is_valid (esta4, i1))
+ {
+ vlib_cli_output (vm, "%U", format_ip4_tcp_established_connection, i);
+ n_valid += 1;
+ }
+ }
+ }
+
+ if (n_valid == 0)
+ vlib_cli_output (vm, "no %U established tcp connections", format_tcp_ip_4_or_6, is_ip6);
+
+ return error;
+}
+
+VLIB_CLI_COMMAND (vlib_cli_show_tcp_established_connections_command, static) = {
+ .path = "show tcp connections",
+ .short_help = "Show established TCP connections",
+ .function = show_established_connections,
+};
+
+#if 0
+uword
+tcp_write (vlib_main_t * vm, u32 connection_handle, void * data, uword n_data_bytes)
+{
+ tcp_main_t * tm = &tcp_main;
+ tcp_ip_4_or_6_t is_ip6 = tcp_connection_is_ip6 (connection_handle);
+ ip46_tcp_main_t * tm46 = is_ip6 ? &tm->ip6 : &tm->ip4;
+ tcp_connection_t * c = vec_elt_at_index (tm46->established_connections, connection_handle / 2);
+ vlib_buffer_t * b;
+ u32 bi, bi_next, bi_start_of_packet;
+ ip_csum_t sum;
+
+ b = 0;
+ bi = c->write_tail_buffer_index;
+ n_bytes_left_tail = 0;
+ if (bi != 0)
+ {
+ b = vlib_get_buffer (vm, bi);
+ n_bytes_left_tail = tm->tx_buffer_free_list_n_buffer_bytes - b->current_length;
+ }
+
+ n_bytes_this_packet = c->write_tail_packet.n_data_bytes;
+ n_bytes_left_packet = c->max_segment_size - n_bytes_this_packet;
+
+ n_data_left = n_data_bytes;
+ sum = c->write_tail_packet.data_ip_checksum;
+
+ while (n_data_left > 0)
+ {
+ u32 n_copy;
+
+ if (n_bytes_left_tail == 0)
+ {
+ if (! vlib_buffer_alloc_from_free_list (vm, &bi_next, 1,
+ tm->tx_buffer_free_list))
+ return n_data_bytes - n_data_left;
+
+ bi_start_of_packet = bi_next;
+ if (b)
+ {
+ b->flags |= VLIB_BUFFER_NEXT_PRESENT;
+ b->next_buffer = bi_next;
+ bi_start_of_packet = b->opaque[0];
+ }
+ bi = bi_next;
+ b = vlib_get_buffer (vm, bi);
+
+ /* Save away start of packet buffer in opaque. */
+ b->opaque[0] = bi_start_of_packet;
+
+ c->tail_buffer.buffer_index = bi;
+ n_bytes_left_tail = tm->tx_buffer_free_list_n_buffer_bytes;
+ }
+
+ n_copy = n_data_left;
+ n_copy = clib_min (n_copy, n_bytes_left_tail);
+ n_copy = clib_min (n_copy, n_bytes_left_packet);
+
+ sum = ip_csum_and_memcpy (sum, b->data + b->current_length,
+ data, n_copy);
+
+ b->current_length += n_copy;
+ n_bytes_left_tail -= n_copy;
+ n_bytes_left_packet -= n_copy;
+ n_data_left -=- n_copy;
+ n_bytes_this_packet += n_copy;
+
+ if (n_bytes_left_packet == 0)
+ {
+ bi_start_of_packet = b->opaque[0];
+
+ if (c->tail_packet.buffer_index != 0)
+ {
+ vlib_buffer_t * p = vlib_get_buffer (vm, c->tail_packet.buffer_index);
+ tcp_buffer_t * next = vlib_get_buffer_opaque (p);
+ next[0] = c->;
+ }
+ c->tail_packet.buffer_index = bi_start_of_packet;
+ }
+ }
+
+ c->tail_buffer.buffer_index = bi;
+ c->tail_buffer.n_data_bytes = n_bytes_this_packet;
+ c->tail_buffer.data_ip_checksum = ip_csum_fold (sum);
+
+ return 0;
+}
+#endif
diff --git a/vnet/vnet/ip/tcp.h b/vnet/vnet/ip/tcp.h
new file mode 100644
index 00000000000..98d8e34f0d5
--- /dev/null
+++ b/vnet/vnet/ip/tcp.h
@@ -0,0 +1,396 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/tcp.h: tcp protocol
+ *
+ * Copyright (c) 2011 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_tcp_protocol_h
+#define included_tcp_protocol_h
+
+#include <vppinfra/vector.h>
+
+/* No support for e.g. Altivec. */
+#if defined (__SSE2__)
+#define TCP_HAVE_VEC128
+#endif
+
+typedef union {
+ struct {
+ u16 src, dst;
+ };
+ u32 as_u32;
+} tcp_udp_ports_t;
+
+typedef union {
+#ifdef TCP_HAVE_VEC128
+ u32x4 as_u32x4;
+#endif
+ tcp_udp_ports_t as_ports[4];
+} tcp_udp_ports_x4_t;
+
+typedef struct {
+ union {
+#ifdef TCP_HAVE_VEC128
+ u32x4 as_u32x4;
+#endif
+ ip4_address_t as_ip4_address[4];
+ } src, dst;
+ tcp_udp_ports_x4_t ports;
+} ip4_tcp_udp_address_x4_t;
+
+typedef struct {
+ union {
+#ifdef TCP_HAVE_VEC128
+ u32x4 as_u32x4[4];
+#endif
+ u32 as_u32[4][4];
+ } src, dst;
+ tcp_udp_ports_x4_t ports;
+} ip6_tcp_udp_address_x4_t;
+
+typedef struct {
+ u32 his, ours;
+} tcp_sequence_pair_t;
+
+/* Time stamps saved from options. */
+typedef struct {
+ u32 ours_host_byte_order, his_net_byte_order;
+} tcp_time_stamp_pair_t;
+
+typedef struct {
+ ip4_tcp_udp_address_x4_t address_x4;
+ u32 time_stamps[4];
+} ip4_tcp_udp_address_x4_and_timestamps_t;
+
+typedef struct {
+ ip6_tcp_udp_address_x4_t address_x4;
+ u32 time_stamps[4];
+} ip6_tcp_udp_address_x4_and_timestamps_t;
+
+#define foreach_tcp_connection_state \
+ /* unused */ \
+ _ (unused) \
+ /* Sent SYN-ACK waiting for ACK if he ever feels like sending one. */ \
+ _ (listen_ack_wait) \
+ /* Sent SYN waiting for ACK or RST. */ \
+ _ (connecting) \
+ /* Pseudo-type for established connections. */ \
+ _ (established)
+
+typedef enum {
+#define _(f) TCP_CONNECTION_STATE_##f,
+ foreach_tcp_connection_state
+#undef _
+ TCP_N_CONNECTION_STATE,
+} tcp_connection_state_t;
+
+/* Kept small to fight off syn flood attacks. */
+typedef struct {
+ tcp_sequence_pair_t sequence_numbers;
+
+ tcp_time_stamp_pair_t time_stamps;
+
+ /* segment size and window scale (saved from options
+ or set to defaults). */
+ u16 max_segment_size;
+
+ u8 window_scale;
+
+ tcp_connection_state_t state : 8;
+} tcp_mini_connection_t;
+
+typedef struct {
+ /* Sum and sum^2 of measurements.
+ Used to compute average and RMS. */
+ f64 sum, sum2;
+
+ /* Number of measurements. */
+ f64 count;
+} tcp_round_trip_time_stats_t;
+
+typedef struct {
+ u32 first_buffer_index_this_packet;
+
+ u16 data_ip_checksum;
+
+ u16 n_data_bytes;
+} tcp_tx_packet_t;
+
+typedef struct {
+ tcp_sequence_pair_t sequence_numbers;
+
+ tcp_time_stamp_pair_t time_stamps;
+
+ tcp_tx_packet_t head_packet, tx_tail_packet, write_tail_packet;
+
+ u32 write_tail_buffer_index;
+
+ tcp_round_trip_time_stats_t round_trip_time_stats;
+
+ /* Number of un-acknowledged bytes we've sent. */
+ u32 n_tx_unacked_bytes;
+
+ /* segment size and window scale (saved from options
+ or set to defaults). */
+ u16 max_segment_size;
+
+ /* Window from latest received packet. */
+ u16 his_window;
+
+ u16 my_window;
+
+ u8 his_window_scale;
+
+ u8 my_window_scale;
+
+ /* ip4/ip6 tos/ttl to use for packets we send. */
+ u8 tos, ttl;
+
+ u16 flags;
+#define foreach_tcp_connection_flag \
+ _ (ack_pending) \
+ _ (fin_received) \
+ _ (fin_sent) \
+ _ (application_requested_close)
+
+ u8 listener_opaque[128
+ - 1 * sizeof (tcp_sequence_pair_t)
+ - 1 * sizeof (tcp_time_stamp_pair_t)
+ - 3 * sizeof (tcp_tx_packet_t)
+ - 1 * sizeof (tcp_round_trip_time_stats_t)
+ - 2 * sizeof (u32)
+ - 4 * sizeof (u16)
+ - 4 * sizeof (u8)];
+} tcp_connection_t;
+
+typedef enum {
+ TCP_IP4,
+ TCP_IP6,
+ TCP_N_IP46,
+} tcp_ip_4_or_6_t;
+
+typedef enum {
+#define _(f) LOG2_TCP_CONNECTION_FLAG_##f,
+ foreach_tcp_connection_flag
+#undef _
+ N_TCP_CONNECTION_FLAG,
+#define _(f) TCP_CONNECTION_FLAG_##f = 1 << LOG2_TCP_CONNECTION_FLAG_##f,
+ foreach_tcp_connection_flag
+#undef _
+} tcp_connection_flag_t;
+
+typedef enum {
+ TCP_PACKET_TEMPLATE_SYN,
+ TCP_PACKET_TEMPLATE_SYN_ACK,
+ TCP_PACKET_TEMPLATE_ACK,
+ TCP_PACKET_TEMPLATE_FIN_ACK,
+ TCP_PACKET_TEMPLATE_RST_ACK,
+ TCP_N_PACKET_TEMPLATE,
+} tcp_packet_template_type_t;
+
+typedef struct {
+ vlib_packet_template_t vlib;
+
+ /* TCP checksum of template with zeros for all
+ variable fields. Network byte order. */
+ u16 tcp_checksum_net_byte_order;
+
+ /* IP4 checksum. */
+ u16 ip4_checksum_net_byte_order;
+} tcp_packet_template_t;
+
+typedef struct {
+ u8 log2_n_mini_connection_hash_elts;
+ u8 log2_n_established_connection_hash_elts;
+ u8 is_ip6;
+
+ u32 mini_connection_hash_mask;
+ u32 established_connection_hash_mask;
+
+ uword * established_connection_overflow_hash;
+
+ tcp_mini_connection_t * mini_connections;
+
+ tcp_connection_t * established_connections;
+
+ /* Vector of established connection indices which need ACKs sent. */
+ u32 * connections_pending_acks;
+
+ /* Default valid_local_adjacency_bitmap for listeners who want to listen
+ for a given port in on all interfaces. */
+ uword * default_valid_local_adjacency_bitmap;
+
+ u32 output_node_index;
+
+ tcp_packet_template_t packet_templates[TCP_N_PACKET_TEMPLATE];
+} ip46_tcp_main_t;
+
+#define foreach_tcp_event \
+ /* Received a SYN-ACK after sending a SYN to connect. */ \
+ _ (connection_established) \
+ /* Received a reset (RST) after sending a SYN to connect. */ \
+ _ (connect_failed) \
+ /* Received a FIN from an established connection. */ \
+ _ (fin_received) \
+ _ (connection_closed) \
+ /* Received a reset RST from an established connection. */ \
+ _ (reset_received)
+
+typedef enum {
+#define _(f) TCP_EVENT_##f,
+ foreach_tcp_event
+#undef _
+} tcp_event_type_t;
+
+typedef void (tcp_event_function_t)
+ (u32 * connections,
+ tcp_event_type_t event_type);
+
+typedef struct {
+ /* Bitmap indicating which of local (interface) addresses
+ we should listen on for this destination port. */
+ uword * valid_local_adjacency_bitmap;
+
+ /* Destination tcp/udp port to listen for connections. */
+ u16 dst_port;
+
+ u16 next_index;
+
+ u32 flags;
+
+ /* Connection indices for which event in event_function applies to. */
+ u32 * event_connections[TCP_N_IP46];
+ u32 * eof_connections[TCP_N_IP46];
+ u32 * close_connections[TCP_N_IP46];
+
+ tcp_event_function_t * event_function;
+} tcp_listener_t;
+
+typedef struct {
+ u8 next, error;
+} tcp_lookup_disposition_t;
+
+#define foreach_tcp_timer \
+ /* Used to rank mini connections. */ \
+ _ (mini_connection, 10e-3) \
+ /* Used for timestamps. */ \
+ _ (timestamp, 1e-6)
+
+typedef enum {
+#define _(f,s) TCP_TIMER_##f,
+ foreach_tcp_timer
+#undef _
+ TCP_N_TIMER,
+} tcp_timer_type_t;
+
+typedef struct {
+ ip46_tcp_main_t ip4, ip6;
+
+ /* Array of non-established connections, but soon-to be established connections. */
+ ip4_tcp_udp_address_x4_and_timestamps_t * ip4_mini_connection_address_hash;
+ ip6_tcp_udp_address_x4_and_timestamps_t * ip6_mini_connection_address_hash;
+
+ /* Vector of size log2_n_established_connection_hash_elts plus overflow. */
+ ip4_tcp_udp_address_x4_t * ip4_established_connection_address_hash;
+ ip6_tcp_udp_address_x4_t * ip6_established_connection_address_hash;
+
+ /* Jenkins hash seeds for established and mini hash tables. */
+ u32x4_union_t connection_hash_seeds[2][3];
+ u32x4_union_t connection_hash_masks[2];
+
+ /* Pool of listeners. */
+ tcp_listener_t * listener_pool;
+
+ /* Table mapping destination port to listener index. */
+ u16 * listener_index_by_dst_port;
+
+ tcp_lookup_disposition_t disposition_by_state_and_flags[TCP_N_CONNECTION_STATE][64];
+
+ u8 log2_clocks_per_tick[TCP_N_TIMER];
+
+ f64 secs_per_tick[TCP_N_TIMER];
+
+ /* Holds pointers to default and per-packet TCP options while
+ parsing a TCP packet's options. */
+ tcp_mini_connection_t option_decode_mini_connection_template;
+
+ /* Count of currently established connections. */
+ u32 n_established_connections[TCP_N_IP46];
+
+ u32 tx_buffer_free_list;
+ u32 tx_buffer_free_list_n_buffer_bytes;
+} tcp_main_t;
+
+/* Global TCP main structure. */
+tcp_main_t tcp_main;
+
+typedef struct {
+ /* Listen on this port. */
+ u16 port;
+
+#define TCP_LISTENER_IP4 (1 << 0)
+#define TCP_LISTENER_IP6 (1 << 1)
+ u16 flags;
+
+ /* Next node index for data packets. */
+ u32 data_node_index;
+
+ /* Event function: called on new connections, etc. */
+ tcp_event_function_t * event_function;
+} tcp_listener_registration_t;
+
+uword
+tcp_register_listener (vlib_main_t * vm, tcp_listener_registration_t * r);
+
+always_inline tcp_ip_4_or_6_t
+tcp_connection_is_ip6 (u32 h)
+{ return h & 1; }
+
+always_inline tcp_ip_4_or_6_t
+tcp_connection_handle_set (u32 iest, tcp_ip_4_or_6_t is_ip6)
+{ return is_ip6 + 2*iest; }
+
+always_inline tcp_connection_t *
+tcp_get_connection (u32 connection_handle)
+{
+ u32 iest = connection_handle / 2;
+ tcp_ip_4_or_6_t is_ip6 = tcp_connection_is_ip6 (connection_handle);
+ tcp_main_t * tm = &tcp_main;
+ ip46_tcp_main_t * tm46 = is_ip6 ? &tm->ip6 : &tm->ip4;
+ return vec_elt_at_index (tm46->established_connections, iest);
+}
+
+#endif /* included_tcp_protocol_h */
diff --git a/vnet/vnet/ip/tcp_format.c b/vnet/vnet/ip/tcp_format.c
new file mode 100644
index 00000000000..afc3dd20c49
--- /dev/null
+++ b/vnet/vnet/ip/tcp_format.c
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/tcp_format.c: tcp formatting
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+
+static u8 * format_tcp_flags (u8 * s, va_list * args)
+{
+ int flags = va_arg (*args, int);
+
+#define _(f) if (flags & TCP_FLAG_##f) s = format (s, "%s, ", #f);
+ foreach_tcp_flag
+#undef _
+
+ return s;
+}
+
+/* Format TCP header. */
+u8 * format_tcp_header (u8 * s, va_list * args)
+{
+ tcp_header_t * tcp = va_arg (*args, tcp_header_t *);
+ u32 max_header_bytes = va_arg (*args, u32);
+ u32 header_bytes;
+ uword indent;
+
+ /* Nothing to do. */
+ if (max_header_bytes < sizeof (tcp[0]))
+ return format (s, "TCP header truncated");
+
+ indent = format_get_indent (s);
+ indent += 2;
+
+ s = format (s, "TCP: %d -> %d",
+ clib_net_to_host_u16 (tcp->ports.src),
+ clib_net_to_host_u16 (tcp->ports.dst));
+
+ s = format (s, "\n%Useq. tx 0x%08x rx 0x%08x",
+ format_white_space, indent,
+ clib_net_to_host_u32 (tcp->seq_number),
+ clib_net_to_host_u32 (tcp->ack_number));
+
+ s = format (s, "\n%Uflags %U, tcp header: %d bytes",
+ format_white_space, indent,
+ format_tcp_flags, tcp->flags,
+ (tcp->tcp_header_u32s_and_reserved >> 4) * sizeof (u32));
+
+ s = format (s, "\n%Uwindow %d, checksum 0x%04x",
+ format_white_space, indent,
+ clib_net_to_host_u16 (tcp->window),
+ clib_net_to_host_u16 (tcp->checksum));
+
+ header_bytes = tcp_header_bytes (tcp);
+
+ /* Format TCP options. */
+#if 0
+ {
+ u8 * o;
+ u8 * option_start = (void *) (tcp + 1);
+ u8 * option_end = (void *) tcp + header_bytes;
+
+ for (o = option_start; o < option_end; )
+ {
+ u32 length = o[1];
+ switch (o[0])
+ {
+ case TCP_OPTION_END:
+ length = 1;
+ o = option_end;
+ break;
+
+ case TCP_OPTION_NOP:
+ length = 1;
+ break;
+
+ }
+ }
+ }
+#endif
+
+ /* Recurse into next protocol layer. */
+ if (max_header_bytes != 0 && header_bytes < max_header_bytes)
+ {
+ ip_main_t * im = &ip_main;
+ tcp_udp_port_info_t * pi;
+
+ pi = ip_get_tcp_udp_port_info (im, tcp->ports.dst);
+
+ if (pi && pi->format_header)
+ s = format (s, "\n%U%U",
+ format_white_space, indent - 2,
+ pi->format_header,
+ /* next protocol header */ (void*) tcp + header_bytes,
+ max_header_bytes - header_bytes);
+ }
+
+ return s;
+}
diff --git a/vnet/vnet/ip/tcp_init.c b/vnet/vnet/ip/tcp_init.c
new file mode 100644
index 00000000000..3e88d87e11e
--- /dev/null
+++ b/vnet/vnet/ip/tcp_init.c
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/tcp_init.c: tcp initialization
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/ip/format.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ip/tcp_packet.h>
+
+static clib_error_t *
+tcp_init (vlib_main_t * vm)
+{
+ ip_main_t * im = &ip_main;
+ ip_protocol_info_t * pi;
+ clib_error_t * error;
+
+ error = vlib_call_init_function (vm, ip_main_init);
+
+ if (! error)
+ {
+ pi = ip_get_protocol_info (im, IP_PROTOCOL_TCP);
+ pi->format_header = format_tcp_header;
+
+ pi->unformat_pg_edit = unformat_pg_tcp_header;
+ }
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (tcp_init);
diff --git a/vnet/vnet/ip/tcp_packet.h b/vnet/vnet/ip/tcp_packet.h
new file mode 100644
index 00000000000..ebb111572a0
--- /dev/null
+++ b/vnet/vnet/ip/tcp_packet.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip4/tcp_packet.h: TCP packet format (see RFC 793)
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_tcp_packet_h
+#define included_tcp_packet_h
+
+/* TCP flags bit 0 first. */
+#define foreach_tcp_flag \
+ _ (FIN) \
+ _ (SYN) \
+ _ (RST) \
+ _ (PSH) \
+ _ (ACK) \
+ _ (URG) \
+ _ (ECE) \
+ _ (CWR)
+
+enum {
+#define _(f) TCP_FLAG_BIT_##f,
+ foreach_tcp_flag
+#undef _
+ TCP_N_FLAG_BITS,
+
+#define _(f) TCP_FLAG_##f = 1 << TCP_FLAG_BIT_##f,
+ foreach_tcp_flag
+#undef _
+};
+
+typedef struct {
+ /* Source and destination port. */
+ union {
+ struct {
+ u16 src, dst;
+ };
+ u32 src_and_dst;
+ } ports;
+
+ /* Sequence and acknowledgment number. */
+ u32 seq_number, ack_number;
+
+ /* Size of TCP header in 32-bit units plus 4 reserved bits. */
+ u8 tcp_header_u32s_and_reserved;
+
+ /* see foreach_tcp_flag for enumation of tcp flags. */
+ u8 flags;
+
+ /* Current window advertised by sender.
+ This is the number of bytes sender is willing to receive
+ right now. */
+ u16 window;
+
+ /* Checksum of TCP pseudo header and data. */
+ u16 checksum;
+
+ u16 urgent_pointer;
+} tcp_header_t;
+
+always_inline int
+tcp_header_bytes (tcp_header_t * t)
+{ return (t->tcp_header_u32s_and_reserved >> 4) * sizeof (u32); }
+
+/* TCP options. */
+typedef enum tcp_option_type {
+ TCP_OPTION_END = 0,
+ TCP_OPTION_NOP = 1,
+ TCP_OPTION_MSS = 2,
+ TCP_OPTION_WINDOW_SCALE = 3,
+ TCP_OPTION_SACK_PERMITTED = 4,
+ TCP_OPTION_SACK_BLOCK = 5,
+ TCP_OPTION_TIME_STAMP = 8,
+} tcp_option_type_t;
+
+/* All except NOP and END have 1 byte length field. */
+typedef struct {
+ tcp_option_type_t type : 8;
+
+ /* Length of this option in bytes. */
+ u8 length;
+} tcp_option_with_length_t;
+
+#endif /* included_tcp_packet_h */
+
diff --git a/vnet/vnet/ip/tcp_pg.c b/vnet/vnet/ip/tcp_pg.c
new file mode 100644
index 00000000000..122592d1594
--- /dev/null
+++ b/vnet/vnet/ip/tcp_pg.c
@@ -0,0 +1,224 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/tcp_pg: TCP packet-generator interface
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/pg/pg.h>
+
+static void
+tcp_pg_edit_function (pg_main_t * pg,
+ pg_stream_t * s,
+ pg_edit_group_t * g,
+ u32 * packets,
+ u32 n_packets)
+{
+ vlib_main_t * vm = pg->vlib_main;
+ u32 ip_offset, tcp_offset;
+
+ tcp_offset = g->start_byte_offset;
+ ip_offset = (g-1)->start_byte_offset;
+
+ while (n_packets >= 1)
+ {
+ vlib_buffer_t * p0;
+ ip4_header_t * ip0;
+ tcp_header_t * tcp0;
+ ip_csum_t sum0;
+ u32 tcp_len0;
+
+ p0 = vlib_get_buffer (vm, packets[0]);
+ n_packets -= 1;
+ packets += 1;
+
+ ASSERT (p0->current_data == 0);
+ ip0 = (void *) (p0->data + ip_offset);
+ tcp0 = (void *) (p0->data + tcp_offset);
+ tcp_len0 = clib_net_to_host_u16 (ip0->length) - sizeof (ip0[0]);
+
+ /* Initialize checksum with header. */
+ if (BITS (sum0) == 32)
+ {
+ sum0 = clib_mem_unaligned (&ip0->src_address, u32);
+ sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32));
+ }
+ else
+ sum0 = clib_mem_unaligned (&ip0->src_address, u64);
+
+ sum0 = ip_csum_with_carry
+ (sum0, clib_host_to_net_u32 (tcp_len0 + (ip0->protocol << 16)));
+
+ /* Invalidate possibly old checksum. */
+ tcp0->checksum = 0;
+
+ sum0 = ip_incremental_checksum_buffer (vm, p0, tcp_offset, tcp_len0, sum0);
+
+ tcp0->checksum = ~ ip_csum_fold (sum0);
+ }
+}
+
+typedef struct {
+ struct { pg_edit_t src, dst; } ports;
+ pg_edit_t seq_number, ack_number;
+ pg_edit_t tcp_header_u32s;
+#define _(f) pg_edit_t f##_flag;
+ foreach_tcp_flag
+#undef _
+ pg_edit_t window;
+ pg_edit_t checksum;
+ pg_edit_t urgent_pointer;
+} pg_tcp_header_t;
+
+static inline void
+pg_tcp_header_init (pg_tcp_header_t * p)
+{
+ /* Initialize fields that are not bit fields in the IP header. */
+#define _(f) pg_edit_init (&p->f, tcp_header_t, f);
+ _ (ports.src);
+ _ (ports.dst);
+ _ (seq_number);
+ _ (ack_number);
+ _ (window);
+ _ (checksum);
+ _ (urgent_pointer);
+#undef _
+
+ /* Initialize bit fields. */
+#define _(f) \
+ pg_edit_init_bitfield (&p->f##_flag, tcp_header_t, \
+ flags, \
+ TCP_FLAG_BIT_##f, 1);
+
+ foreach_tcp_flag
+#undef _
+
+ pg_edit_init_bitfield (&p->tcp_header_u32s, tcp_header_t,
+ tcp_header_u32s_and_reserved,
+ 4, 4);
+}
+
+uword
+unformat_pg_tcp_header (unformat_input_t * input, va_list * args)
+{
+ pg_stream_t * s = va_arg (*args, pg_stream_t *);
+ pg_tcp_header_t * p;
+ u32 group_index;
+
+ p = pg_create_edit_group (s, sizeof (p[0]), sizeof (tcp_header_t),
+ &group_index);
+ pg_tcp_header_init (p);
+
+ /* Defaults. */
+ pg_edit_set_fixed (&p->seq_number, 0);
+ pg_edit_set_fixed (&p->ack_number, 0);
+
+ pg_edit_set_fixed (&p->tcp_header_u32s, sizeof (tcp_header_t) / sizeof (u32));
+
+ pg_edit_set_fixed (&p->window, 4096);
+ pg_edit_set_fixed (&p->urgent_pointer, 0);
+
+#define _(f) pg_edit_set_fixed (&p->f##_flag, 0);
+ foreach_tcp_flag
+#undef _
+
+ p->checksum.type = PG_EDIT_UNSPECIFIED;
+
+ if (! unformat (input, "TCP: %U -> %U",
+ unformat_pg_edit,
+ unformat_tcp_udp_port, &p->ports.src,
+ unformat_pg_edit,
+ unformat_tcp_udp_port, &p->ports.dst))
+ goto error;
+
+ /* Parse options. */
+ while (1)
+ {
+ if (unformat (input, "window %U",
+ unformat_pg_edit,
+ unformat_pg_number, &p->window))
+ ;
+
+ else if (unformat (input, "checksum %U",
+ unformat_pg_edit,
+ unformat_pg_number, &p->checksum))
+ ;
+
+ /* Flags. */
+#define _(f) else if (unformat (input, #f)) pg_edit_set_fixed (&p->f##_flag, 1);
+ foreach_tcp_flag
+#undef _
+
+ /* Can't parse input: try next protocol level. */
+ else
+ break;
+ }
+
+ {
+ ip_main_t * im = &ip_main;
+ u16 dst_port;
+ tcp_udp_port_info_t * pi;
+
+ pi = 0;
+ if (p->ports.dst.type == PG_EDIT_FIXED)
+ {
+ dst_port = pg_edit_get_value (&p->ports.dst, PG_EDIT_LO);
+ pi = ip_get_tcp_udp_port_info (im, dst_port);
+ }
+
+ if (pi && pi->unformat_pg_edit
+ && unformat_user (input, pi->unformat_pg_edit, s))
+ ;
+
+ else if (! unformat_user (input, unformat_pg_payload, s))
+ goto error;
+
+ if (p->checksum.type == PG_EDIT_UNSPECIFIED)
+ {
+ pg_edit_group_t * g = pg_stream_get_group (s, group_index);
+ g->edit_function = tcp_pg_edit_function;
+ g->edit_function_opaque = 0;
+ }
+
+ return 1;
+ }
+
+ error:
+ /* Free up any edits we may have added. */
+ pg_free_edit_group (s);
+ return 0;
+}
+
diff --git a/vnet/vnet/ip/udp.h b/vnet/vnet/ip/udp.h
new file mode 100644
index 00000000000..65eef29cb10
--- /dev/null
+++ b/vnet/vnet/ip/udp.h
@@ -0,0 +1,113 @@
+/*
+ * ip/udp.h: udp protocol
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_udp_h
+#define included_udp_h
+
+#include <vnet/vnet.h>
+#include <vnet/ip/udp_packet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ip/ip4.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ip/format.h>
+
+typedef enum {
+#define udp_error(n,s) UDP_ERROR_##n,
+#include <vnet/ip/udp_error.def>
+#undef udp_error
+ UDP_N_ERROR,
+} udp_error_t;
+
+#define foreach_udp4_dst_port \
+_ (67, dhcp_to_server) \
+_ (68, dhcp_to_client) \
+_ (500, ikev2) \
+_ (4341, lisp_gpe) \
+_ (4739, ipfix) \
+_ (4789, vxlan) \
+_ (4790, vxlan_gpe) \
+_ (6633, vpath_3)
+
+
+#define foreach_udp6_dst_port \
+_ (547, dhcpv6_to_server) \
+_ (546, dhcpv6_to_client) \
+_ (6633, vpath6_3)
+
+typedef enum {
+#define _(n,f) UDP_DST_PORT_##f = n,
+ foreach_udp4_dst_port
+ foreach_udp6_dst_port
+#undef _
+} udp_dst_port_t;
+
+typedef enum {
+#define _(n,f) UDP6_DST_PORT_##f = n,
+ foreach_udp6_dst_port
+#undef _
+} udp6_dst_port_t;
+
+typedef struct {
+ /* Name (a c string). */
+ char * name;
+
+ /* GRE protocol type in host byte order. */
+ udp_dst_port_t dst_port;
+
+ /* Node which handles this type. */
+ u32 node_index;
+
+ /* Next index for this type. */
+ u32 next_index;
+} udp_dst_port_info_t;
+
+typedef enum {
+ UDP_IP6 = 0,
+ UDP_IP4, /* the code is full of is_ip4... */
+ N_UDP_AF,
+} udp_af_t;
+
+typedef struct {
+ udp_dst_port_info_t * dst_port_infos [N_UDP_AF];
+
+ /* Hash tables mapping name/protocol to protocol info index. */
+ uword * dst_port_info_by_name[N_UDP_AF];
+ uword * dst_port_info_by_dst_port[N_UDP_AF];
+
+ /* convenience */
+ vlib_main_t * vlib_main;
+} udp_main_t;
+
+always_inline udp_dst_port_info_t *
+udp_get_dst_port_info (udp_main_t * um, udp_dst_port_t dst_port, u8 is_ip4)
+{
+ uword * p = hash_get (um->dst_port_info_by_dst_port[is_ip4], dst_port);
+ return p ? vec_elt_at_index (um->dst_port_infos[is_ip4], p[0]) : 0;
+}
+
+format_function_t format_udp_header;
+format_function_t format_udp_rx_trace;
+
+unformat_function_t unformat_udp_header;
+
+void udp_register_dst_port (vlib_main_t * vm,
+ udp_dst_port_t dst_port,
+ u32 node_index, u8 is_ip4);
+
+#endif /* included_udp_h */
+
diff --git a/vnet/vnet/ip/udp_error.def b/vnet/vnet/ip/udp_error.def
new file mode 100644
index 00000000000..46e3bd9ef47
--- /dev/null
+++ b/vnet/vnet/ip/udp_error.def
@@ -0,0 +1,20 @@
+/*
+ * udp_error.def: gre errors
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+udp_error (NONE, "no error")
+udp_error (NO_LISTENER, "no listener for dst port")
+udp_error (LENGTH_ERROR, "UDP packets with length errors")
diff --git a/vnet/vnet/ip/udp_format.c b/vnet/vnet/ip/udp_format.c
new file mode 100644
index 00000000000..dd54095908c
--- /dev/null
+++ b/vnet/vnet/ip/udp_format.c
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/udp_format.c: udp formatting
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+
+/* Format UDP header. */
+u8 * format_udp_header (u8 * s, va_list * args)
+{
+ udp_header_t * udp = va_arg (*args, udp_header_t *);
+ u32 max_header_bytes = va_arg (*args, u32);
+ uword indent;
+ u32 header_bytes = sizeof (udp[0]);
+
+ /* Nothing to do. */
+ if (max_header_bytes < sizeof (udp[0]))
+ return format (s, "UDP header truncated");
+
+ indent = format_get_indent (s);
+ indent += 2;
+
+ s = format (s, "UDP: %d -> %d",
+ clib_net_to_host_u16 (udp->src_port),
+ clib_net_to_host_u16 (udp->dst_port));
+
+ s = format (s, "\n%Ulength %d, checksum 0x%04x",
+ format_white_space, indent,
+ clib_net_to_host_u16 (udp->length),
+ clib_net_to_host_u16 (udp->checksum));
+
+ /* Recurse into next protocol layer. */
+ if (max_header_bytes != 0 && header_bytes < max_header_bytes)
+ {
+ ip_main_t * im = &ip_main;
+ tcp_udp_port_info_t * pi;
+
+ pi = ip_get_tcp_udp_port_info (im, udp->dst_port);
+
+ if (pi && pi->format_header)
+ s = format (s, "\n%U%U",
+ format_white_space, indent - 2,
+ pi->format_header,
+ /* next protocol header */ (udp + 1),
+ max_header_bytes - sizeof (udp[0]));
+ }
+
+ return s;
+}
diff --git a/vnet/vnet/ip/udp_init.c b/vnet/vnet/ip/udp_init.c
new file mode 100644
index 00000000000..40ca032923c
--- /dev/null
+++ b/vnet/vnet/ip/udp_init.c
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/udp_init.c: udp initialization
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+
+clib_error_t *
+udp_init (vlib_main_t * vm)
+{
+ ip_main_t * im = &ip_main;
+ ip_protocol_info_t * pi;
+ clib_error_t * error;
+
+ error = vlib_call_init_function (vm, ip_main_init);
+
+ if (! error)
+ {
+ pi = ip_get_protocol_info (im, IP_PROTOCOL_UDP);
+ if (pi == 0)
+ return clib_error_return (0, "UDP protocol info AWOL");
+ pi->format_header = format_udp_header;
+ pi->unformat_pg_edit = unformat_pg_udp_header;
+ }
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (udp_init);
diff --git a/vnet/vnet/ip/udp_local.c b/vnet/vnet/ip/udp_local.c
new file mode 100644
index 00000000000..c9355d2a322
--- /dev/null
+++ b/vnet/vnet/ip/udp_local.c
@@ -0,0 +1,508 @@
+/*
+ * node.c: udp packet processing
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ip/udp.h>
+#include <vnet/ip/udp_packet.h>
+#include <vppinfra/sparse_vec.h>
+
+udp_main_t udp_main;
+
+#define foreach_udp_input_next \
+ _ (PUNT, "error-punt") \
+ _ (DROP, "error-drop")
+
+typedef enum {
+#define _(s,n) UDP_INPUT_NEXT_##s,
+ foreach_udp_input_next
+#undef _
+ UDP_INPUT_N_NEXT,
+} udp_input_next_t;
+
+typedef struct {
+ u16 src_port;
+ u16 dst_port;
+} udp_rx_trace_t;
+
+u8 * format_udp_rx_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ udp_rx_trace_t * t = va_arg (*args, udp_rx_trace_t *);
+
+ s = format (s, "UDP: src-port %d dst-port %d",
+ clib_net_to_host_u16(t->src_port),
+ clib_net_to_host_u16(t->dst_port));
+ return s;
+}
+
+typedef struct {
+ /* Sparse vector mapping udp dst_port in network byte order
+ to next index. */
+ u16 * next_by_dst_port;
+
+ u32 * sparse_index_by_next_index;
+} udp_input_runtime_t;
+
+vlib_node_registration_t udp4_input_node;
+vlib_node_registration_t udp6_input_node;
+
+always_inline uword
+udp46_input_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame,
+ int is_ip4)
+{
+ udp_input_runtime_t * rt = is_ip4 ?
+ (void *) vlib_node_get_runtime_data (vm, udp4_input_node.index)
+ : (void *) vlib_node_get_runtime_data (vm, udp6_input_node.index);
+ __attribute__((unused)) u32 n_left_from, next_index, i_next, * from, * to_next;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+ i_next = vec_elt (rt->sparse_index_by_next_index, next_index);
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t * b0, * b1;
+ udp_header_t * h0 = 0, * h1 = 0;
+ u32 i0, i1, dst_port0, dst_port1;
+ u32 advance0, advance1;
+ u32 error0, next0, error1, next1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, sizeof (h0[0]), LOAD);
+ CLIB_PREFETCH (p3->data, sizeof (h1[0]), LOAD);
+ }
+
+ bi0 = from[0];
+ bi1 = from[1];
+ to_next[0] = bi0;
+ to_next[1] = bi1;
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ /* ip4/6_local hands us the ip header, not the udp header */
+ if (is_ip4)
+ {
+ advance0 = sizeof(ip4_header_t);
+ advance1 = sizeof(ip4_header_t);
+ }
+ else
+ {
+ advance0 = sizeof(ip6_header_t);
+ advance1 = sizeof(ip6_header_t);
+ }
+
+ if (PREDICT_FALSE(b0->current_length < advance0 + sizeof (h0)))
+ {
+ error0 = UDP_ERROR_LENGTH_ERROR;
+ next0 = UDP_INPUT_NEXT_DROP;
+ }
+ else
+ {
+ vlib_buffer_advance (b0, advance0);
+ h0 = vlib_buffer_get_current (b0);
+ error0 = next0 = 0;
+ }
+
+ if (PREDICT_FALSE(b1->current_length < advance1 + sizeof (h1)))
+ {
+ error1 = UDP_ERROR_LENGTH_ERROR;
+ next1 = UDP_INPUT_NEXT_DROP;
+ }
+ else
+ {
+ vlib_buffer_advance (b1, advance1);
+ h1 = vlib_buffer_get_current (b1);
+ error1 = next1 = 0;
+ }
+
+
+ /* Index sparse array with network byte order. */
+ dst_port0 = (error0 == 0) ? h0->dst_port : 0;
+ dst_port1 = (error1 == 0) ? h1->dst_port : 0;
+ sparse_vec_index2 (rt->next_by_dst_port, dst_port0, dst_port1,
+ &i0, &i1);
+ next0 = (error0 == 0) ? vec_elt(rt->next_by_dst_port, i0) : next0;
+ next1 = (error1 == 0) ? vec_elt(rt->next_by_dst_port, i1) : next1;
+
+ if (PREDICT_TRUE (error0 == 0))
+ b0->error = node->errors[next0 == SPARSE_VEC_INVALID_INDEX ? UDP_ERROR_NO_LISTENER : UDP_ERROR_NONE];
+ if (PREDICT_TRUE (error1 == 0))
+ b1->error = node->errors[next1 == SPARSE_VEC_INVALID_INDEX ? UDP_ERROR_NO_LISTENER : UDP_ERROR_NONE];
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ udp_rx_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ if (b0->error != node->errors[UDP_ERROR_LENGTH_ERROR])
+ {
+ tr->src_port = h0->src_port;
+ tr->dst_port = h0->dst_port;
+ }
+ }
+ if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ udp_rx_trace_t *tr = vlib_add_trace (vm, node,
+ b1, sizeof (*tr));
+ if (b1->error != node->errors[UDP_ERROR_LENGTH_ERROR])
+ {
+ tr->src_port = h1->src_port;
+ tr->dst_port = h1->dst_port;
+ }
+ }
+
+ vlib_buffer_advance (b0, sizeof (*h0));
+ vlib_buffer_advance (b1, sizeof (*h1));
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ udp_header_t * h0 = 0;
+ u32 i0, next0;
+ u32 advance0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ /* ip4/6_local hands us the ip header, not the udp header */
+ if (is_ip4)
+ advance0 = sizeof(ip4_header_t);
+ else
+ advance0 = sizeof(ip6_header_t);
+
+ if (PREDICT_FALSE(b0->current_length < advance0 + sizeof (h0)))
+ {
+ b0->error = node->errors[UDP_ERROR_LENGTH_ERROR];
+ next0 = UDP_INPUT_NEXT_DROP;
+ goto trace_x1;
+ }
+
+ vlib_buffer_advance (b0, advance0);
+
+ h0 = vlib_buffer_get_current (b0);
+
+ if (PREDICT_TRUE
+ (clib_net_to_host_u16(h0->length) <= b0->current_length))
+ {
+ i0 = sparse_vec_index (rt->next_by_dst_port, h0->dst_port);
+ next0 = vec_elt(rt->next_by_dst_port, i0);
+
+ b0->error = node->errors [next0 == SPARSE_VEC_INVALID_INDEX ? UDP_ERROR_NO_LISTENER : UDP_ERROR_NONE];
+ }
+ else
+ {
+ b0->error = node->errors[UDP_ERROR_LENGTH_ERROR];
+ next0 = UDP_INPUT_NEXT_DROP;
+ }
+
+ trace_x1:
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ udp_rx_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ if (b0->error != node->errors[UDP_ERROR_LENGTH_ERROR])
+ {
+ tr->src_port = h0->src_port;
+ tr->dst_port = h0->dst_port;
+ }
+ }
+ vlib_buffer_advance (b0, sizeof (*h0));
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ return from_frame->n_vectors;
+}
+
+static char * udp_error_strings[] = {
+#define udp_error(n,s) s,
+#include "udp_error.def"
+#undef udp_error
+};
+
+static uword
+udp4_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return udp46_input_inline (vm, node, from_frame, 1 /* is_ip4 */);
+}
+
+static uword
+udp6_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return udp46_input_inline (vm, node, from_frame, 0 /* is_ip4 */);
+}
+
+
+VLIB_REGISTER_NODE (udp4_input_node) = {
+ .function = udp4_input,
+ .name = "ip4-udp-lookup",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+
+ .runtime_data_bytes = sizeof (udp_input_runtime_t),
+
+ .n_errors = UDP_N_ERROR,
+ .error_strings = udp_error_strings,
+
+ .n_next_nodes = UDP_INPUT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [UDP_INPUT_NEXT_##s] = n,
+ foreach_udp_input_next
+#undef _
+ },
+
+ .format_buffer = format_udp_header,
+ .format_trace = format_udp_rx_trace,
+ .unformat_buffer = unformat_udp_header,
+};
+
+VLIB_REGISTER_NODE (udp6_input_node) = {
+ .function = udp6_input,
+ .name = "ip6-udp-lookup",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+
+ .runtime_data_bytes = sizeof (udp_input_runtime_t),
+
+ .n_errors = UDP_N_ERROR,
+ .error_strings = udp_error_strings,
+
+ .n_next_nodes = UDP_INPUT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [UDP_INPUT_NEXT_##s] = n,
+ foreach_udp_input_next
+#undef _
+ },
+
+ .format_buffer = format_udp_header,
+ .format_trace = format_udp_rx_trace,
+ .unformat_buffer = unformat_udp_header,
+};
+
+static void add_dst_port (udp_main_t * um,
+ udp_dst_port_t dst_port,
+ char * dst_port_name, u8 is_ip4)
+{
+ udp_dst_port_info_t * pi;
+ u32 i;
+
+ vec_add2 (um->dst_port_infos[is_ip4], pi, 1);
+ i = pi - um->dst_port_infos[is_ip4];
+
+ pi->name = dst_port_name;
+ pi->dst_port = dst_port;
+ pi->next_index = pi->node_index = ~0;
+
+ hash_set (um->dst_port_info_by_dst_port[is_ip4], dst_port, i);
+
+ if (pi->name)
+ hash_set_mem (um->dst_port_info_by_name[is_ip4], pi->name, i);
+}
+
+void
+udp_register_dst_port (vlib_main_t * vm,
+ udp_dst_port_t dst_port,
+ u32 node_index, u8 is_ip4)
+{
+ udp_main_t * um = &udp_main;
+ udp_dst_port_info_t * pi;
+ udp_input_runtime_t * rt;
+ u16 * n;
+ u32 i;
+
+ {
+ clib_error_t * error = vlib_call_init_function (vm, udp_local_init);
+ if (error)
+ clib_error_report (error);
+ }
+
+ pi = udp_get_dst_port_info (um, dst_port, is_ip4);
+ if (! pi)
+ {
+ add_dst_port (um, dst_port, 0, is_ip4);
+ pi = udp_get_dst_port_info (um, dst_port, is_ip4);
+ ASSERT (pi);
+ }
+
+ pi->node_index = node_index;
+ pi->next_index = vlib_node_add_next (vm,
+ is_ip4 ? udp4_input_node.index
+ : udp6_input_node.index,
+ node_index);
+
+ /* Setup udp protocol -> next index sparse vector mapping. */
+ rt = vlib_node_get_runtime_data
+ (vm, is_ip4 ? udp4_input_node.index: udp6_input_node.index);
+ n = sparse_vec_validate (rt->next_by_dst_port,
+ clib_host_to_net_u16 (dst_port));
+ n[0] = pi->next_index;
+
+ /* Rebuild next index -> sparse index inverse mapping when sparse vector
+ is updated. */
+ vec_validate (rt->sparse_index_by_next_index, pi->next_index);
+ for (i = 1; i < vec_len (rt->next_by_dst_port); i++)
+ rt->sparse_index_by_next_index[rt->next_by_dst_port[i]] = i;
+}
+
+/* Parse a UDP header. */
+uword unformat_udp_header (unformat_input_t * input, va_list * args)
+{
+ u8 ** result = va_arg (*args, u8 **);
+ udp_header_t * udp;
+ __attribute__((unused)) int old_length;
+ u16 src_port, dst_port;
+
+ /* Allocate space for IP header. */
+ {
+ void * p;
+
+ old_length = vec_len (*result);
+ vec_add2 (*result, p, sizeof (ip4_header_t));
+ udp = p;
+ }
+
+ memset (udp, 0, sizeof (udp[0]));
+ if (unformat (input, "src-port %d dst-port %d",
+ &src_port, &dst_port))
+ {
+ udp->src_port = clib_host_to_net_u16 (src_port);
+ udp->dst_port = clib_host_to_net_u16 (dst_port);
+ return 1;
+ }
+ return 0;
+}
+
+static void
+udp_setup_node (vlib_main_t * vm, u32 node_index)
+{
+ vlib_node_t * n = vlib_get_node (vm, node_index);
+ pg_node_t * pn = pg_get_node (node_index);
+
+ n->format_buffer = format_udp_header;
+ n->unformat_buffer = unformat_udp_header;
+ pn->unformat_edit = unformat_pg_udp_header;
+}
+
+clib_error_t * udp_local_init (vlib_main_t * vm)
+{
+ udp_input_runtime_t * rt;
+ udp_main_t * um = &udp_main;
+ int i;
+
+ {
+ clib_error_t * error;
+ error = vlib_call_init_function (vm, udp_init);
+ if (error)
+ clib_error_report (error);
+ }
+
+
+ for (i = 0; i < 2; i++)
+ {
+ um->dst_port_info_by_name[i] = hash_create_string (0, sizeof(uword));
+ um->dst_port_info_by_dst_port[i] = hash_create (0, sizeof(uword));
+ }
+
+ udp_setup_node (vm, udp4_input_node.index);
+ udp_setup_node (vm, udp6_input_node.index);
+
+ rt = vlib_node_get_runtime_data (vm, udp4_input_node.index);
+
+ rt->next_by_dst_port = sparse_vec_new
+ (/* elt bytes */ sizeof (rt->next_by_dst_port[0]),
+ /* bits in index */ BITS (((udp_header_t *) 0)->dst_port));
+
+ vec_validate (rt->sparse_index_by_next_index, UDP_INPUT_NEXT_DROP);
+ vec_validate (rt->sparse_index_by_next_index, UDP_INPUT_NEXT_PUNT);
+ rt->sparse_index_by_next_index[UDP_INPUT_NEXT_DROP]
+ = SPARSE_VEC_INVALID_INDEX;
+ rt->sparse_index_by_next_index[UDP_INPUT_NEXT_PUNT]
+ = SPARSE_VEC_INVALID_INDEX;
+
+#define _(n,s) add_dst_port (um, UDP_DST_PORT_##s, #s, 1 /* is_ip4 */);
+ foreach_udp4_dst_port
+#undef _
+
+ rt = vlib_node_get_runtime_data (vm, udp6_input_node.index);
+
+ rt->next_by_dst_port = sparse_vec_new
+ (/* elt bytes */ sizeof (rt->next_by_dst_port[0]),
+ /* bits in index */ BITS (((udp_header_t *) 0)->dst_port));
+
+ vec_validate (rt->sparse_index_by_next_index, UDP_INPUT_NEXT_DROP);
+ vec_validate (rt->sparse_index_by_next_index, UDP_INPUT_NEXT_PUNT);
+ rt->sparse_index_by_next_index[UDP_INPUT_NEXT_DROP]
+ = SPARSE_VEC_INVALID_INDEX;
+ rt->sparse_index_by_next_index[UDP_INPUT_NEXT_PUNT]
+ = SPARSE_VEC_INVALID_INDEX;
+
+#define _(n,s) add_dst_port (um, UDP_DST_PORT_##s, #s, 0 /* is_ip4 */);
+ foreach_udp6_dst_port
+#undef _
+
+ ip4_register_protocol (IP_PROTOCOL_UDP, udp4_input_node.index);
+ /* Note: ip6 differs from ip4, UDP is hotwired to ip6-udp-lookup */
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (udp_local_init);
diff --git a/vnet/vnet/ip/udp_packet.h b/vnet/vnet/ip/udp_packet.h
new file mode 100644
index 00000000000..21c30c6eb71
--- /dev/null
+++ b/vnet/vnet/ip/udp_packet.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip4/udp_packet.h: UDP packet format
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_udp_packet_h
+#define included_udp_packet_h
+
+typedef struct {
+ /* Source and destination port. */
+ u16 src_port, dst_port;
+
+ /* Length of UDP header plus payload. */
+ u16 length;
+
+ /* Checksum of UDP pseudo-header and data or
+ zero if checksum is disabled. */
+ u16 checksum;
+} udp_header_t;
+
+#endif /* included_udp_packet_h */
+
diff --git a/vnet/vnet/ip/udp_pg.c b/vnet/vnet/ip/udp_pg.c
new file mode 100644
index 00000000000..a33a56294fb
--- /dev/null
+++ b/vnet/vnet/ip/udp_pg.c
@@ -0,0 +1,233 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/udp_pg: UDP packet-generator interface
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/pg/pg.h>
+#include <vnet/ip/ip.h> /* for unformat_udp_udp_port */
+
+#define UDP_PG_EDIT_LENGTH (1 << 0)
+#define UDP_PG_EDIT_CHECKSUM (1 << 1)
+
+always_inline void
+udp_pg_edit_function_inline (pg_main_t * pg,
+ pg_stream_t * s,
+ pg_edit_group_t * g,
+ u32 * packets,
+ u32 n_packets,
+ u32 flags)
+{
+ vlib_main_t * vm = pg->vlib_main;
+ u32 ip_offset, udp_offset;
+
+ udp_offset = g->start_byte_offset;
+ ip_offset = (g-1)->start_byte_offset;
+
+ while (n_packets >= 1)
+ {
+ vlib_buffer_t * p0;
+ ip4_header_t * ip0;
+ udp_header_t * udp0;
+ u32 udp_len0;
+
+ p0 = vlib_get_buffer (vm, packets[0]);
+ n_packets -= 1;
+ packets += 1;
+
+ ip0 = (void *) (p0->data + ip_offset);
+ udp0 = (void *) (p0->data + udp_offset);
+ udp_len0 = clib_net_to_host_u16 (ip0->length) - sizeof (ip0[0]);
+
+ if (flags & UDP_PG_EDIT_LENGTH)
+ udp0->length =
+ clib_net_to_host_u16 (vlib_buffer_length_in_chain (vm, p0)
+ - ip_offset);
+
+ /* Initialize checksum with header. */
+ if (flags & UDP_PG_EDIT_CHECKSUM)
+ {
+ ip_csum_t sum0;
+
+ sum0 = clib_mem_unaligned (&ip0->src_address, u64);
+
+ sum0 = ip_csum_with_carry
+ (sum0, clib_host_to_net_u32 (udp_len0 + (ip0->protocol << 16)));
+
+ /* Invalidate possibly old checksum. */
+ udp0->checksum = 0;
+
+ sum0 = ip_incremental_checksum_buffer (vm, p0, udp_offset, udp_len0, sum0);
+
+ sum0 = ~ ip_csum_fold (sum0);
+
+ /* Zero checksum means checksumming disabled. */
+ sum0 = sum0 != 0 ? sum0 : 0xffff;
+
+ udp0->checksum = sum0;
+ }
+ }
+}
+
+static void
+udp_pg_edit_function (pg_main_t * pg,
+ pg_stream_t * s,
+ pg_edit_group_t * g,
+ u32 * packets,
+ u32 n_packets)
+{
+ switch (g->edit_function_opaque)
+ {
+ case UDP_PG_EDIT_LENGTH:
+ udp_pg_edit_function_inline (pg, s, g, packets, n_packets,
+ UDP_PG_EDIT_LENGTH);
+ break;
+
+ case UDP_PG_EDIT_CHECKSUM:
+ udp_pg_edit_function_inline (pg, s, g, packets, n_packets,
+ UDP_PG_EDIT_CHECKSUM);
+ break;
+
+ case UDP_PG_EDIT_CHECKSUM | UDP_PG_EDIT_LENGTH:
+ udp_pg_edit_function_inline (pg, s, g, packets, n_packets,
+ UDP_PG_EDIT_CHECKSUM | UDP_PG_EDIT_LENGTH);
+ break;
+
+ default:
+ ASSERT (0);
+ break;
+ }
+}
+
+typedef struct {
+ pg_edit_t src_port, dst_port;
+ pg_edit_t length;
+ pg_edit_t checksum;
+} pg_udp_header_t;
+
+static inline void
+pg_udp_header_init (pg_udp_header_t * p)
+{
+ /* Initialize fields that are not bit fields in the IP header. */
+#define _(f) pg_edit_init (&p->f, udp_header_t, f);
+ _ (src_port);
+ _ (dst_port);
+ _ (length);
+ _ (checksum);
+#undef _
+}
+
+uword
+unformat_pg_udp_header (unformat_input_t * input, va_list * args)
+{
+ pg_stream_t * s = va_arg (*args, pg_stream_t *);
+ pg_udp_header_t * p;
+ u32 group_index;
+
+ p = pg_create_edit_group (s, sizeof (p[0]), sizeof (udp_header_t),
+ &group_index);
+ pg_udp_header_init (p);
+
+ /* Defaults. */
+ p->checksum.type = PG_EDIT_UNSPECIFIED;
+ p->length.type = PG_EDIT_UNSPECIFIED;
+
+ if (! unformat (input, "UDP: %U -> %U",
+ unformat_pg_edit,
+ unformat_tcp_udp_port, &p->src_port,
+ unformat_pg_edit,
+ unformat_tcp_udp_port, &p->dst_port))
+ goto error;
+
+ /* Parse options. */
+ while (1)
+ {
+ if (unformat (input, "length %U",
+ unformat_pg_edit,
+ unformat_pg_number, &p->length))
+ ;
+
+ else if (unformat (input, "checksum %U",
+ unformat_pg_edit,
+ unformat_pg_number, &p->checksum))
+ ;
+
+ /* Can't parse input: try next protocol level. */
+ else
+ break;
+ }
+
+ {
+ ip_main_t * im = &ip_main;
+ u16 dst_port;
+ tcp_udp_port_info_t * pi;
+
+ pi = 0;
+ if (p->dst_port.type == PG_EDIT_FIXED)
+ {
+ dst_port = pg_edit_get_value (&p->dst_port, PG_EDIT_LO);
+ pi = ip_get_tcp_udp_port_info (im, dst_port);
+ }
+
+ if (pi && pi->unformat_pg_edit
+ && unformat_user (input, pi->unformat_pg_edit, s))
+ ;
+
+ else if (! unformat_user (input, unformat_pg_payload, s))
+ goto error;
+
+ p = pg_get_edit_group (s, group_index);
+ if (p->checksum.type == PG_EDIT_UNSPECIFIED
+ || p->length.type == PG_EDIT_UNSPECIFIED)
+ {
+ pg_edit_group_t * g = pg_stream_get_group (s, group_index);
+ g->edit_function = udp_pg_edit_function;
+ g->edit_function_opaque = 0;
+ if (p->checksum.type == PG_EDIT_UNSPECIFIED)
+ g->edit_function_opaque |= UDP_PG_EDIT_CHECKSUM;
+ if (p->length.type == PG_EDIT_UNSPECIFIED)
+ g->edit_function_opaque |= UDP_PG_EDIT_LENGTH;
+ }
+
+ return 1;
+ }
+
+ error:
+ /* Free up any edits we may have added. */
+ pg_free_edit_group (s);
+ return 0;
+}
+