aboutsummaryrefslogtreecommitdiffstats
path: root/src/vnet/ethernet
diff options
context:
space:
mode:
Diffstat (limited to 'src/vnet/ethernet')
-rw-r--r--src/vnet/ethernet/arp.c2536
-rw-r--r--src/vnet/ethernet/arp_packet.h180
-rw-r--r--src/vnet/ethernet/dir.dox24
-rw-r--r--src/vnet/ethernet/error.def46
-rw-r--r--src/vnet/ethernet/ethernet.h577
-rw-r--r--src/vnet/ethernet/format.c348
-rw-r--r--src/vnet/ethernet/init.c128
-rw-r--r--src/vnet/ethernet/interface.c880
-rw-r--r--src/vnet/ethernet/mac_swap.c397
-rwxr-xr-xsrc/vnet/ethernet/node.c1419
-rw-r--r--src/vnet/ethernet/p2p_ethernet.api50
-rw-r--r--src/vnet/ethernet/p2p_ethernet.c276
-rw-r--r--src/vnet/ethernet/p2p_ethernet.h63
-rw-r--r--src/vnet/ethernet/p2p_ethernet_api.c137
-rw-r--r--src/vnet/ethernet/p2p_ethernet_input.c262
-rw-r--r--src/vnet/ethernet/packet.h152
-rw-r--r--src/vnet/ethernet/pg.c183
-rw-r--r--src/vnet/ethernet/sfp.c117
-rw-r--r--src/vnet/ethernet/sfp.h117
-rw-r--r--src/vnet/ethernet/types.def113
20 files changed, 8005 insertions, 0 deletions
diff --git a/src/vnet/ethernet/arp.c b/src/vnet/ethernet/arp.c
new file mode 100644
index 00000000..52b13e04
--- /dev/null
+++ b/src/vnet/ethernet/arp.c
@@ -0,0 +1,2536 @@
+/*
+ * ethernet/arp.c: IP v4 ARP node
+ *
+ * Copyright (c) 2010 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/ip/ip6.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ethernet/arp_packet.h>
+#include <vnet/l2/l2_input.h>
+#include <vppinfra/mhash.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/fib/fib_entry_src.h>
+#include <vnet/adj/adj_nbr.h>
+#include <vnet/adj/adj_mcast.h>
+#include <vnet/mpls/mpls.h>
+
+/**
+ * @file
+ * @brief IPv4 ARP.
+ *
+ * This file contains code to manage the IPv4 ARP tables (IP Address
+ * to MAC Address lookup).
+ */
+
+
+void vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length);
+
+/**
+ * @brief Per-interface ARP configuration and state
+ */
+typedef struct ethernet_arp_interface_t_
+{
+ /**
+ * Hash table of ARP entries.
+ * Since this hash table is per-interface, the key is only the IPv4 address.
+ */
+ uword *arp_entries;
+} ethernet_arp_interface_t;
+
+typedef struct
+{
+ u32 lo_addr;
+ u32 hi_addr;
+ u32 fib_index;
+} ethernet_proxy_arp_t;
+
+typedef struct
+{
+ u32 next_index;
+ uword node_index;
+ uword type_opaque;
+ uword data;
+ /* Used for arp event notification only */
+ void *data_callback;
+ u32 pid;
+} pending_resolution_t;
+
+typedef struct
+{
+ /* Hash tables mapping name to opcode. */
+ uword *opcode_by_name;
+
+ /* lite beer "glean" adjacency handling */
+ uword *pending_resolutions_by_address;
+ pending_resolution_t *pending_resolutions;
+
+ /* Mac address change notification */
+ uword *mac_changes_by_address;
+ pending_resolution_t *mac_changes;
+
+ ethernet_arp_ip4_entry_t *ip4_entry_pool;
+
+ /* ARP attack mitigation */
+ u32 arp_delete_rotor;
+ u32 limit_arp_cache_size;
+
+ /** Per interface state */
+ ethernet_arp_interface_t *ethernet_arp_by_sw_if_index;
+
+ /* Proxy arp vector */
+ ethernet_proxy_arp_t *proxy_arps;
+
+ uword wc_ip4_arp_publisher_node;
+ uword wc_ip4_arp_publisher_et;
+} ethernet_arp_main_t;
+
+static ethernet_arp_main_t ethernet_arp_main;
+
+typedef struct
+{
+ u32 sw_if_index;
+ ethernet_arp_ip4_over_ethernet_address_t a;
+ int is_static;
+ int is_no_fib_entry;
+ int flags;
+#define ETHERNET_ARP_ARGS_REMOVE (1<<0)
+#define ETHERNET_ARP_ARGS_FLUSH (1<<1)
+#define ETHERNET_ARP_ARGS_POPULATE (1<<2)
+#define ETHERNET_ARP_ARGS_WC_PUB (1<<3)
+} vnet_arp_set_ip4_over_ethernet_rpc_args_t;
+
+static const u8 vrrp_prefix[] = { 0x00, 0x00, 0x5E, 0x00, 0x01 };
+
+/* Node index for send_garp_na_process */
+u32 send_garp_na_process_node_index;
+
+static void
+set_ip4_over_ethernet_rpc_callback (vnet_arp_set_ip4_over_ethernet_rpc_args_t
+ * a);
+
+static u8 *
+format_ethernet_arp_hardware_type (u8 * s, va_list * va)
+{
+ ethernet_arp_hardware_type_t h = va_arg (*va, ethernet_arp_hardware_type_t);
+ char *t = 0;
+ switch (h)
+ {
+#define _(n,f) case n: t = #f; break;
+ foreach_ethernet_arp_hardware_type;
+#undef _
+
+ default:
+ return format (s, "unknown 0x%x", h);
+ }
+
+ return format (s, "%s", t);
+}
+
+static u8 *
+format_ethernet_arp_opcode (u8 * s, va_list * va)
+{
+ ethernet_arp_opcode_t o = va_arg (*va, ethernet_arp_opcode_t);
+ char *t = 0;
+ switch (o)
+ {
+#define _(f) case ETHERNET_ARP_OPCODE_##f: t = #f; break;
+ foreach_ethernet_arp_opcode;
+#undef _
+
+ default:
+ return format (s, "unknown 0x%x", o);
+ }
+
+ return format (s, "%s", t);
+}
+
+static uword
+unformat_ethernet_arp_opcode_host_byte_order (unformat_input_t * input,
+ va_list * args)
+{
+ int *result = va_arg (*args, int *);
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ int x, i;
+
+ /* Numeric opcode. */
+ if (unformat (input, "0x%x", &x) || unformat (input, "%d", &x))
+ {
+ if (x >= (1 << 16))
+ return 0;
+ *result = x;
+ return 1;
+ }
+
+ /* Named type. */
+ if (unformat_user (input, unformat_vlib_number_by_name,
+ am->opcode_by_name, &i))
+ {
+ *result = i;
+ return 1;
+ }
+
+ return 0;
+}
+
+static uword
+unformat_ethernet_arp_opcode_net_byte_order (unformat_input_t * input,
+ va_list * args)
+{
+ int *result = va_arg (*args, int *);
+ if (!unformat_user
+ (input, unformat_ethernet_arp_opcode_host_byte_order, result))
+ return 0;
+
+ *result = clib_host_to_net_u16 ((u16) * result);
+ return 1;
+}
+
+static u8 *
+format_ethernet_arp_header (u8 * s, va_list * va)
+{
+ ethernet_arp_header_t *a = va_arg (*va, ethernet_arp_header_t *);
+ u32 max_header_bytes = va_arg (*va, u32);
+ uword indent;
+ u16 l2_type, l3_type;
+
+ if (max_header_bytes != 0 && sizeof (a[0]) > max_header_bytes)
+ return format (s, "ARP header truncated");
+
+ l2_type = clib_net_to_host_u16 (a->l2_type);
+ l3_type = clib_net_to_host_u16 (a->l3_type);
+
+ indent = format_get_indent (s);
+
+ s = format (s, "%U, type %U/%U, address size %d/%d",
+ format_ethernet_arp_opcode, clib_net_to_host_u16 (a->opcode),
+ format_ethernet_arp_hardware_type, l2_type,
+ format_ethernet_type, l3_type,
+ a->n_l2_address_bytes, a->n_l3_address_bytes);
+
+ if (l2_type == ETHERNET_ARP_HARDWARE_TYPE_ethernet
+ && l3_type == ETHERNET_TYPE_IP4)
+ {
+ s = format (s, "\n%U%U/%U -> %U/%U",
+ format_white_space, indent,
+ format_ethernet_address, a->ip4_over_ethernet[0].ethernet,
+ format_ip4_address, &a->ip4_over_ethernet[0].ip4,
+ format_ethernet_address, a->ip4_over_ethernet[1].ethernet,
+ format_ip4_address, &a->ip4_over_ethernet[1].ip4);
+ }
+ else
+ {
+ uword n2 = a->n_l2_address_bytes;
+ uword n3 = a->n_l3_address_bytes;
+ s = format (s, "\n%U%U/%U -> %U/%U",
+ format_white_space, indent,
+ format_hex_bytes, a->data + 0 * n2 + 0 * n3, n2,
+ format_hex_bytes, a->data + 1 * n2 + 0 * n3, n3,
+ format_hex_bytes, a->data + 1 * n2 + 1 * n3, n2,
+ format_hex_bytes, a->data + 2 * n2 + 1 * n3, n3);
+ }
+
+ return s;
+}
+
+u8 *
+format_ethernet_arp_ip4_entry (u8 * s, va_list * va)
+{
+ vnet_main_t *vnm = va_arg (*va, vnet_main_t *);
+ ethernet_arp_ip4_entry_t *e = va_arg (*va, ethernet_arp_ip4_entry_t *);
+ vnet_sw_interface_t *si;
+ u8 *flags = 0;
+
+ if (!e)
+ return format (s, "%=12s%=16s%=6s%=20s%=24s", "Time", "IP4",
+ "Flags", "Ethernet", "Interface");
+
+ si = vnet_get_sw_interface (vnm, e->sw_if_index);
+
+ if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC)
+ flags = format (flags, "S");
+
+ if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC)
+ flags = format (flags, "D");
+
+ if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_NO_FIB_ENTRY)
+ flags = format (flags, "N");
+
+ s = format (s, "%=12U%=16U%=6s%=20U%U",
+ format_vlib_cpu_time, vnm->vlib_main, e->cpu_time_last_updated,
+ format_ip4_address, &e->ip4_address,
+ flags ? (char *) flags : "",
+ format_ethernet_address, e->ethernet_address,
+ format_vnet_sw_interface_name, vnm, si);
+
+ vec_free (flags);
+ return s;
+}
+
+typedef struct
+{
+ u8 packet_data[64];
+} ethernet_arp_input_trace_t;
+
+static u8 *
+format_ethernet_arp_input_trace (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ ethernet_arp_input_trace_t *t = va_arg (*va, ethernet_arp_input_trace_t *);
+
+ s = format (s, "%U",
+ format_ethernet_arp_header,
+ t->packet_data, sizeof (t->packet_data));
+
+ return s;
+}
+
+static u8 *
+format_arp_term_input_trace (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ ethernet_arp_input_trace_t *t = va_arg (*va, ethernet_arp_input_trace_t *);
+
+ /* arp-term trace data saved is either arp or ip6/icmp6 packet:
+ - for arp, the 1st 16-bit field is hw type of value of 0x0001.
+ - for ip6, the first nibble has value of 6. */
+ s = format (s, "%U", t->packet_data[0] == 0 ?
+ format_ethernet_arp_header : format_ip6_header,
+ t->packet_data, sizeof (t->packet_data));
+
+ return s;
+}
+
+static void
+arp_nbr_probe (ip_adjacency_t * adj)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip4_main_t *im = &ip4_main;
+ ip_interface_address_t *ia;
+ ethernet_arp_header_t *h;
+ vnet_hw_interface_t *hi;
+ vnet_sw_interface_t *si;
+ ip4_address_t *src;
+ vlib_buffer_t *b;
+ vlib_main_t *vm;
+ u32 bi = 0;
+
+ vm = vlib_get_main ();
+
+ si = vnet_get_sw_interface (vnm, adj->rewrite_header.sw_if_index);
+
+ if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
+ {
+ return;
+ }
+
+ src =
+ ip4_interface_address_matching_destination (im,
+ &adj->sub_type.nbr.next_hop.
+ ip4,
+ adj->rewrite_header.
+ sw_if_index, &ia);
+ if (!src)
+ {
+ return;
+ }
+
+ h =
+ vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template,
+ &bi);
+
+ hi = vnet_get_sup_hw_interface (vnm, adj->rewrite_header.sw_if_index);
+
+ clib_memcpy (h->ip4_over_ethernet[0].ethernet,
+ hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
+
+ h->ip4_over_ethernet[0].ip4 = src[0];
+ h->ip4_over_ethernet[1].ip4 = adj->sub_type.nbr.next_hop.ip4;
+
+ b = vlib_get_buffer (vm, bi);
+ vnet_buffer (b)->sw_if_index[VLIB_RX] =
+ vnet_buffer (b)->sw_if_index[VLIB_TX] = adj->rewrite_header.sw_if_index;
+
+ /* Add encapsulation string for software interface (e.g. ethernet header). */
+ vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
+ vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
+
+ {
+ vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
+ u32 *to_next = vlib_frame_vector_args (f);
+ to_next[0] = bi;
+ f->n_vectors = 1;
+ vlib_put_frame_to_node (vm, hi->output_node_index, f);
+ }
+}
+
+static void
+arp_mk_complete (adj_index_t ai, ethernet_arp_ip4_entry_t * e)
+{
+ adj_nbr_update_rewrite
+ (ai, ADJ_NBR_REWRITE_FLAG_COMPLETE,
+ ethernet_build_rewrite (vnet_get_main (),
+ e->sw_if_index,
+ adj_get_link_type (ai), e->ethernet_address));
+}
+
+static void
+arp_mk_incomplete (adj_index_t ai)
+{
+ ip_adjacency_t *adj = adj_get (ai);
+
+ adj_nbr_update_rewrite
+ (ai,
+ ADJ_NBR_REWRITE_FLAG_INCOMPLETE,
+ ethernet_build_rewrite (vnet_get_main (),
+ adj->rewrite_header.sw_if_index,
+ VNET_LINK_ARP,
+ VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
+}
+
+static ethernet_arp_ip4_entry_t *
+arp_entry_find (ethernet_arp_interface_t * eai, const ip4_address_t * addr)
+{
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ ethernet_arp_ip4_entry_t *e = NULL;
+ uword *p;
+
+ if (NULL != eai->arp_entries)
+ {
+ p = hash_get (eai->arp_entries, addr->as_u32);
+ if (!p)
+ return (NULL);
+
+ e = pool_elt_at_index (am->ip4_entry_pool, p[0]);
+ }
+
+ return (e);
+}
+
+static adj_walk_rc_t
+arp_mk_complete_walk (adj_index_t ai, void *ctx)
+{
+ ethernet_arp_ip4_entry_t *e = ctx;
+
+ arp_mk_complete (ai, e);
+
+ return (ADJ_WALK_RC_CONTINUE);
+}
+
+static adj_walk_rc_t
+arp_mk_incomplete_walk (adj_index_t ai, void *ctx)
+{
+ arp_mk_incomplete (ai);
+
+ return (ADJ_WALK_RC_CONTINUE);
+}
+
+void
+arp_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai)
+{
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ ethernet_arp_interface_t *arp_int;
+ ethernet_arp_ip4_entry_t *e;
+ ip_adjacency_t *adj;
+
+ adj = adj_get (ai);
+
+ vec_validate (am->ethernet_arp_by_sw_if_index, sw_if_index);
+ arp_int = &am->ethernet_arp_by_sw_if_index[sw_if_index];
+ e = arp_entry_find (arp_int, &adj->sub_type.nbr.next_hop.ip4);
+
+ switch (adj->lookup_next_index)
+ {
+ case IP_LOOKUP_NEXT_ARP:
+ case IP_LOOKUP_NEXT_GLEAN:
+ if (NULL != e)
+ {
+ adj_nbr_walk_nh4 (sw_if_index,
+ &e->ip4_address, arp_mk_complete_walk, e);
+ }
+ else
+ {
+ /*
+ * no matching ARP entry.
+ * construct the rewrite required to for an ARP packet, and stick
+ * that in the adj's pipe to smoke.
+ */
+ adj_nbr_update_rewrite
+ (ai,
+ ADJ_NBR_REWRITE_FLAG_INCOMPLETE,
+ ethernet_build_rewrite
+ (vnm,
+ sw_if_index,
+ VNET_LINK_ARP,
+ VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
+
+ /*
+ * since the FIB has added this adj for a route, it makes sense it
+ * may want to forward traffic sometime soon. Let's send a
+ * speculative ARP. just one. If we were to do periodically that
+ * wouldn't be bad either, but that's more code than i'm prepared to
+ * write at this time for relatively little reward.
+ */
+ arp_nbr_probe (adj);
+ }
+ break;
+ case IP_LOOKUP_NEXT_MCAST:
+ {
+ /*
+ * Construct a partial rewrite from the known ethernet mcast dest MAC
+ */
+ u8 *rewrite;
+ u8 offset;
+
+ rewrite = ethernet_build_rewrite (vnm,
+ sw_if_index,
+ adj->ia_link,
+ ethernet_ip4_mcast_dst_addr ());
+ offset = vec_len (rewrite) - 2;
+
+ /*
+ * Complete the remaining fields of the adj's rewrite to direct the
+ * complete of the rewrite at switch time by copying in the IP
+ * dst address's bytes.
+ * Ofset is 2 bytes into the MAC desintation address. And we copy 23 bits
+ * from the address.
+ */
+ adj_mcast_update_rewrite (ai, rewrite, offset, 0x007fffff);
+
+ break;
+ }
+ case IP_LOOKUP_NEXT_DROP:
+ case IP_LOOKUP_NEXT_PUNT:
+ case IP_LOOKUP_NEXT_LOCAL:
+ case IP_LOOKUP_NEXT_REWRITE:
+ case IP_LOOKUP_NEXT_MCAST_MIDCHAIN:
+ case IP_LOOKUP_NEXT_MIDCHAIN:
+ case IP_LOOKUP_NEXT_ICMP_ERROR:
+ case IP_LOOKUP_N_NEXT:
+ ASSERT (0);
+ break;
+ }
+}
+
+static void
+arp_adj_fib_add (ethernet_arp_ip4_entry_t * e, u32 fib_index)
+{
+ fib_prefix_t pfx = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr.ip4 = e->ip4_address,
+ };
+
+ e->fib_entry_index =
+ fib_table_entry_path_add (fib_index, &pfx, FIB_SOURCE_ADJ,
+ FIB_ENTRY_FLAG_ATTACHED,
+ DPO_PROTO_IP4, &pfx.fp_addr,
+ e->sw_if_index, ~0, 1, NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fib_table_lock (fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_ADJ);
+}
+
+static int
+vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm,
+ vnet_arp_set_ip4_over_ethernet_rpc_args_t
+ * args)
+{
+ ethernet_arp_ip4_entry_t *e = 0;
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ ethernet_arp_ip4_over_ethernet_address_t *a = &args->a;
+ vlib_main_t *vm = vlib_get_main ();
+ int make_new_arp_cache_entry = 1;
+ uword *p;
+ pending_resolution_t *pr, *mc;
+ ethernet_arp_interface_t *arp_int;
+ int is_static = args->is_static;
+ u32 sw_if_index = args->sw_if_index;
+ int is_no_fib_entry = args->is_no_fib_entry;
+
+ vec_validate (am->ethernet_arp_by_sw_if_index, sw_if_index);
+
+ arp_int = &am->ethernet_arp_by_sw_if_index[sw_if_index];
+
+ if (NULL != arp_int->arp_entries)
+ {
+ p = hash_get (arp_int->arp_entries, a->ip4.as_u32);
+ if (p)
+ {
+ e = pool_elt_at_index (am->ip4_entry_pool, p[0]);
+
+ /* Refuse to over-write static arp. */
+ if (!is_static && (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC))
+ return -2;
+ make_new_arp_cache_entry = 0;
+ }
+ }
+
+ if (make_new_arp_cache_entry)
+ {
+ pool_get (am->ip4_entry_pool, e);
+
+ if (NULL == arp_int->arp_entries)
+ {
+ arp_int->arp_entries = hash_create (0, sizeof (u32));
+ }
+
+ hash_set (arp_int->arp_entries, a->ip4.as_u32, e - am->ip4_entry_pool);
+
+ e->sw_if_index = sw_if_index;
+ e->ip4_address = a->ip4;
+ e->fib_entry_index = FIB_NODE_INDEX_INVALID;
+ clib_memcpy (e->ethernet_address,
+ a->ethernet, sizeof (e->ethernet_address));
+
+ if (!is_no_fib_entry)
+ {
+ arp_adj_fib_add (e,
+ ip4_fib_table_get_index_for_sw_if_index
+ (e->sw_if_index));
+ }
+ else
+ {
+ e->flags |= ETHERNET_ARP_IP4_ENTRY_FLAG_NO_FIB_ENTRY;
+ }
+ }
+ else
+ {
+ /*
+ * prevent a DoS attack from the data-plane that
+ * spams us with no-op updates to the MAC address
+ */
+ if (0 == memcmp (e->ethernet_address,
+ a->ethernet, sizeof (e->ethernet_address)))
+ goto check_customers;
+
+ /* Update time stamp and ethernet address. */
+ clib_memcpy (e->ethernet_address, a->ethernet,
+ sizeof (e->ethernet_address));
+ }
+
+ e->cpu_time_last_updated = clib_cpu_time_now ();
+ if (is_static)
+ e->flags |= ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC;
+ else
+ e->flags |= ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC;
+
+ adj_nbr_walk_nh4 (sw_if_index, &e->ip4_address, arp_mk_complete_walk, e);
+
+check_customers:
+ /* Customer(s) waiting for this address to be resolved? */
+ p = hash_get (am->pending_resolutions_by_address, a->ip4.as_u32);
+ if (p)
+ {
+ u32 next_index;
+ next_index = p[0];
+
+ while (next_index != (u32) ~ 0)
+ {
+ pr = pool_elt_at_index (am->pending_resolutions, next_index);
+ vlib_process_signal_event (vm, pr->node_index,
+ pr->type_opaque, pr->data);
+ next_index = pr->next_index;
+ pool_put (am->pending_resolutions, pr);
+ }
+
+ hash_unset (am->pending_resolutions_by_address, a->ip4.as_u32);
+ }
+
+ /* Customer(s) requesting ARP event for this address? */
+ p = hash_get (am->mac_changes_by_address, a->ip4.as_u32);
+ if (p)
+ {
+ u32 next_index;
+ next_index = p[0];
+
+ while (next_index != (u32) ~ 0)
+ {
+ int (*fp) (u32, u8 *, u32, u32);
+ int rv = 1;
+ mc = pool_elt_at_index (am->mac_changes, next_index);
+ fp = mc->data_callback;
+
+ /* Call the user's data callback, return 1 to suppress dup events */
+ if (fp)
+ rv = (*fp) (mc->data, a->ethernet, sw_if_index, 0);
+
+ /*
+ * Signal the resolver process, as long as the user
+ * says they want to be notified
+ */
+ if (rv == 0)
+ vlib_process_signal_event (vm, mc->node_index,
+ mc->type_opaque, mc->data);
+ next_index = mc->next_index;
+ }
+ }
+
+ return 0;
+}
+
+void
+vnet_register_ip4_arp_resolution_event (vnet_main_t * vnm,
+ void *address_arg,
+ uword node_index,
+ uword type_opaque, uword data)
+{
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ ip4_address_t *address = address_arg;
+ uword *p;
+ pending_resolution_t *pr;
+
+ pool_get (am->pending_resolutions, pr);
+
+ pr->next_index = ~0;
+ pr->node_index = node_index;
+ pr->type_opaque = type_opaque;
+ pr->data = data;
+ pr->data_callback = 0;
+
+ p = hash_get (am->pending_resolutions_by_address, address->as_u32);
+ if (p)
+ {
+ /* Insert new resolution at the head of the list */
+ pr->next_index = p[0];
+ hash_unset (am->pending_resolutions_by_address, address->as_u32);
+ }
+
+ hash_set (am->pending_resolutions_by_address, address->as_u32,
+ pr - am->pending_resolutions);
+}
+
+int
+vnet_add_del_ip4_arp_change_event (vnet_main_t * vnm,
+ void *data_callback,
+ u32 pid,
+ void *address_arg,
+ uword node_index,
+ uword type_opaque, uword data, int is_add)
+{
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ ip4_address_t *address = address_arg;
+
+ /* Try to find an existing entry */
+ u32 *first = (u32 *) hash_get (am->mac_changes_by_address, address->as_u32);
+ u32 *p = first;
+ pending_resolution_t *mc;
+ while (p && *p != ~0)
+ {
+ mc = pool_elt_at_index (am->mac_changes, *p);
+ if (mc->node_index == node_index && mc->type_opaque == type_opaque
+ && mc->pid == pid)
+ break;
+ p = &mc->next_index;
+ }
+
+ int found = p && *p != ~0;
+ if (is_add)
+ {
+ if (found)
+ return VNET_API_ERROR_ENTRY_ALREADY_EXISTS;
+
+ pool_get (am->mac_changes, mc);
+ *mc = (pending_resolution_t)
+ {
+ .next_index = ~0,.node_index = node_index,.type_opaque =
+ type_opaque,.data = data,.data_callback = data_callback,.pid =
+ pid,};
+
+ /* Insert new resolution at the end of the list */
+ u32 new_idx = mc - am->mac_changes;
+ if (p)
+ p[0] = new_idx;
+ else
+ hash_set (am->mac_changes_by_address, address->as_u32, new_idx);
+ }
+ else
+ {
+ if (!found)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ /* Clients may need to clean up pool entries, too */
+ void (*fp) (u32, u8 *) = data_callback;
+ if (fp)
+ (*fp) (mc->data, 0 /* no new mac addrs */ );
+
+ /* Remove the entry from the list and delete the entry */
+ *p = mc->next_index;
+ pool_put (am->mac_changes, mc);
+
+ /* Remove from hash if we deleted the last entry */
+ if (*p == ~0 && p == first)
+ hash_unset (am->mac_changes_by_address, address->as_u32);
+ }
+ return 0;
+}
+
+/* Either we drop the packet or we send a reply to the sender. */
+typedef enum
+{
+ ARP_INPUT_NEXT_DROP,
+ ARP_INPUT_NEXT_REPLY_TX,
+ ARP_INPUT_N_NEXT,
+} arp_input_next_t;
+
+#define foreach_ethernet_arp_error \
+ _ (replies_sent, "ARP replies sent") \
+ _ (l2_type_not_ethernet, "L2 type not ethernet") \
+ _ (l3_type_not_ip4, "L3 type not IP4") \
+ _ (l3_src_address_not_local, "IP4 source address not local to subnet") \
+ _ (l3_dst_address_not_local, "IP4 destination address not local to subnet") \
+ _ (l3_src_address_is_local, "IP4 source address matches local interface") \
+ _ (l3_src_address_learned, "ARP request IP4 source address learned") \
+ _ (replies_received, "ARP replies received") \
+ _ (opcode_not_request, "ARP opcode not request") \
+ _ (proxy_arp_replies_sent, "Proxy ARP replies sent") \
+ _ (l2_address_mismatch, "ARP hw addr does not match L2 frame src addr") \
+ _ (gratuitous_arp, "ARP probe or announcement dropped") \
+ _ (interface_no_table, "Interface is not mapped to an IP table") \
+ _ (interface_not_ip_enabled, "Interface is not IP enabled") \
+
+typedef enum
+{
+#define _(sym,string) ETHERNET_ARP_ERROR_##sym,
+ foreach_ethernet_arp_error
+#undef _
+ ETHERNET_ARP_N_ERROR,
+} ethernet_arp_input_error_t;
+
+
+static void
+unset_random_arp_entry (void)
+{
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ ethernet_arp_ip4_entry_t *e;
+ vnet_main_t *vnm = vnet_get_main ();
+ ethernet_arp_ip4_over_ethernet_address_t delme;
+ u32 index;
+
+ index = pool_next_index (am->ip4_entry_pool, am->arp_delete_rotor);
+ am->arp_delete_rotor = index;
+
+ /* Try again from elt 0, could happen if an intfc goes down */
+ if (index == ~0)
+ {
+ index = pool_next_index (am->ip4_entry_pool, am->arp_delete_rotor);
+ am->arp_delete_rotor = index;
+ }
+
+ /* Nothing left in the pool */
+ if (index == ~0)
+ return;
+
+ e = pool_elt_at_index (am->ip4_entry_pool, index);
+
+ clib_memcpy (&delme.ethernet, e->ethernet_address, 6);
+ delme.ip4.as_u32 = e->ip4_address.as_u32;
+
+ vnet_arp_unset_ip4_over_ethernet (vnm, e->sw_if_index, &delme);
+}
+
+static int
+arp_unnumbered (vlib_buffer_t * p0,
+ u32 input_sw_if_index, u32 conn_sw_if_index)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_interface_main_t *vim = &vnm->interface_main;
+ vnet_sw_interface_t *si;
+
+ /* verify that the input interface is unnumbered to the connected.
+ * the connected interface is the interface on which the subnet is
+ * configured */
+ si = &vim->sw_interfaces[input_sw_if_index];
+
+ if (!(si->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED &&
+ (si->unnumbered_sw_if_index == conn_sw_if_index)))
+ {
+ /* the input interface is not unnumbered to the interface on which
+ * the sub-net is configured that covers the ARP request.
+ * So this is not the case for unnumbered.. */
+ return 0;
+ }
+
+ return !0;
+}
+
+static u32
+arp_learn (vnet_main_t * vnm,
+ ethernet_arp_main_t * am, u32 sw_if_index, void *addr)
+{
+ if (am->limit_arp_cache_size &&
+ pool_elts (am->ip4_entry_pool) >= am->limit_arp_cache_size)
+ unset_random_arp_entry ();
+
+ vnet_arp_set_ip4_over_ethernet (vnm, sw_if_index, addr, 0, 0);
+ return (ETHERNET_ARP_ERROR_l3_src_address_learned);
+}
+
+static uword
+arp_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ vnet_main_t *vnm = vnet_get_main ();
+ ip4_main_t *im4 = &ip4_main;
+ u32 n_left_from, next_index, *from, *to_next;
+ u32 n_replies_sent = 0, n_proxy_arp_replies_sent = 0;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1,
+ sizeof (ethernet_arp_input_trace_t));
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *p0;
+ vnet_hw_interface_t *hw_if0;
+ ethernet_arp_header_t *arp0;
+ ethernet_header_t *eth_rx, *eth_tx;
+ ip4_address_t *if_addr0, proxy_src;
+ u32 pi0, error0, next0, sw_if_index0, conn_sw_if_index0, fib_index0;
+ u8 is_request0, dst_is_local0, is_unnum0, is_vrrp_reply0;
+ ethernet_proxy_arp_t *pa;
+ fib_node_index_t dst_fei, src_fei;
+ fib_prefix_t pfx0;
+ fib_entry_flag_t src_flags, dst_flags;
+ u8 *rewrite0, rewrite0_len;
+
+ pi0 = from[0];
+ to_next[0] = pi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+ pa = 0;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ arp0 = vlib_buffer_get_current (p0);
+ /* Fill in ethernet header. */
+ eth_rx = ethernet_buffer_get_header (p0);
+
+ is_request0 = arp0->opcode
+ == clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_request);
+
+ error0 = ETHERNET_ARP_ERROR_replies_sent;
+
+ error0 =
+ (arp0->l2_type !=
+ clib_net_to_host_u16 (ETHERNET_ARP_HARDWARE_TYPE_ethernet) ?
+ ETHERNET_ARP_ERROR_l2_type_not_ethernet : error0);
+ error0 =
+ (arp0->l3_type !=
+ clib_net_to_host_u16 (ETHERNET_TYPE_IP4) ?
+ ETHERNET_ARP_ERROR_l3_type_not_ip4 : error0);
+
+ sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+
+ /* not playing the ARP game if the interface is not IPv4 enabled */
+ error0 =
+ (im4->ip_enabled_by_sw_if_index[sw_if_index0] == 0 ?
+ ETHERNET_ARP_ERROR_interface_not_ip_enabled : error0);
+
+ if (error0)
+ goto drop2;
+
+ /* Check that IP address is local and matches incoming interface. */
+ fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
+ if (~0 == fib_index0)
+ {
+ error0 = ETHERNET_ARP_ERROR_interface_no_table;
+ goto drop2;
+
+ }
+ dst_fei = ip4_fib_table_lookup (ip4_fib_get (fib_index0),
+ &arp0->ip4_over_ethernet[1].ip4,
+ 32);
+ dst_flags = fib_entry_get_flags (dst_fei);
+
+ conn_sw_if_index0 = fib_entry_get_resolving_interface (dst_fei);
+
+ /* Honor unnumbered interface, if any */
+ is_unnum0 = sw_if_index0 != conn_sw_if_index0;
+
+ {
+ /*
+ * we're looking for FIB entries that indicate the source
+ * is attached. There may be more specific non-attached
+ * routes tht match the source, but these do not influence
+ * whether we respond to an ARP request, i.e. they do not
+ * influence whether we are the correct way for the sender
+ * to reach us, they only affect how we reach the sender.
+ */
+ fib_entry_t *src_fib_entry;
+ fib_entry_src_t *src;
+ fib_source_t source;
+ fib_prefix_t pfx;
+ int attached;
+ int mask;
+
+ mask = 32;
+ attached = 0;
+
+ do
+ {
+ src_fei = ip4_fib_table_lookup (ip4_fib_get (fib_index0),
+ &arp0->
+ ip4_over_ethernet[0].ip4,
+ mask);
+ src_fib_entry = fib_entry_get (src_fei);
+
+ /*
+ * It's possible that the source that provides the
+ * flags we need, or the flags we must not have,
+ * is not the best source, so check then all.
+ */
+ /* *INDENT-OFF* */
+ FOR_EACH_SRC_ADDED(src_fib_entry, src, source,
+ ({
+ src_flags = fib_entry_get_flags_for_source (src_fei, source);
+
+ /* Reject requests/replies with our local interface
+ address. */
+ if (FIB_ENTRY_FLAG_LOCAL & src_flags)
+ {
+ error0 = ETHERNET_ARP_ERROR_l3_src_address_is_local;
+ /*
+ * When VPP has an interface whose address is also
+ * applied to a TAP interface on the host, then VPP's
+ * TAP interface will be unnumbered to the 'real'
+ * interface and do proxy ARP from the host.
+ * The curious aspect of this setup is that ARP requests
+ * from the host will come from the VPP's own address.
+ * So don't drop immediately here, instead go see if this
+ * is a proxy ARP case.
+ */
+ goto drop1;
+ }
+ /* A Source must also be local to subnet of matching
+ * interface address. */
+ if ((FIB_ENTRY_FLAG_ATTACHED & src_flags) ||
+ (FIB_ENTRY_FLAG_CONNECTED & src_flags))
+ {
+ attached = 1;
+ break;
+ }
+ /*
+ * else
+ * The packet was sent from an address that is not
+ * connected nor attached i.e. it is not from an
+ * address that is covered by a link's sub-net,
+ * nor is it a already learned host resp.
+ */
+ }));
+ /* *INDENT-ON* */
+
+ /*
+ * shorter mask lookup for the next iteration.
+ */
+ fib_entry_get_prefix (src_fei, &pfx);
+ mask = pfx.fp_len - 1;
+
+ /*
+ * continue until we hit the default route or we find
+ * the attached we are looking for. The most likely
+ * outcome is we find the attached with the first source
+ * on the first lookup.
+ */
+ }
+ while (!attached &&
+ !fib_entry_is_sourced (src_fei, FIB_SOURCE_DEFAULT_ROUTE));
+
+ if (!attached)
+ {
+ /*
+ * the matching route is a not attached, i.e. it was
+ * added as a result of routing, rather than interface/ARP
+ * configuration. If the matching route is not a host route
+ * (i.e. a /32)
+ */
+ error0 = ETHERNET_ARP_ERROR_l3_src_address_not_local;
+ goto drop2;
+ }
+ }
+
+ if (!(FIB_ENTRY_FLAG_CONNECTED & dst_flags))
+ {
+ error0 = ETHERNET_ARP_ERROR_l3_dst_address_not_local;
+ goto drop1;
+ }
+
+ if (sw_if_index0 != fib_entry_get_resolving_interface (src_fei))
+ {
+ /*
+ * The interface the ARP was received on is not the interface
+ * on which the covering prefix is configured. Maybe this is a
+ * case for unnumbered.
+ */
+ is_unnum0 = 1;
+ }
+
+ dst_is_local0 = (FIB_ENTRY_FLAG_LOCAL & dst_flags);
+ fib_entry_get_prefix (dst_fei, &pfx0);
+ if_addr0 = &pfx0.fp_addr.ip4;
+
+ is_vrrp_reply0 =
+ ((arp0->opcode ==
+ clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply))
+ &&
+ (!memcmp
+ (arp0->ip4_over_ethernet[0].ethernet, vrrp_prefix,
+ sizeof (vrrp_prefix))));
+
+ /* Trash ARP packets whose ARP-level source addresses do not
+ match their L2-frame-level source addresses, unless it's
+ a reply from a VRRP virtual router */
+ if (memcmp
+ (eth_rx->src_address, arp0->ip4_over_ethernet[0].ethernet,
+ sizeof (eth_rx->src_address)) && !is_vrrp_reply0)
+ {
+ error0 = ETHERNET_ARP_ERROR_l2_address_mismatch;
+ goto drop2;
+ }
+
+ /* Learn or update sender's mapping only for replies to addresses
+ * that are local to the subnet */
+ if (arp0->opcode ==
+ clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply) &&
+ dst_is_local0)
+ {
+ error0 = arp_learn (vnm, am, sw_if_index0,
+ &arp0->ip4_over_ethernet[0]);
+ goto drop1;
+ }
+
+ send_reply:
+ /* Send a reply.
+ An adjacency to the sender is not always present,
+ so we use the interface to build us a rewrite string
+ which will contain all the necessary tags. */
+ rewrite0 = ethernet_build_rewrite (vnm, sw_if_index0,
+ VNET_LINK_ARP,
+ eth_rx->src_address);
+ rewrite0_len = vec_len (rewrite0);
+
+ /* Figure out how much to rewind current data from adjacency. */
+ vlib_buffer_advance (p0, -rewrite0_len);
+ eth_tx = vlib_buffer_get_current (p0);
+
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
+ hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
+
+ /* Send reply back through input interface */
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
+ next0 = ARP_INPUT_NEXT_REPLY_TX;
+
+ arp0->opcode = clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply);
+
+ arp0->ip4_over_ethernet[1] = arp0->ip4_over_ethernet[0];
+
+ clib_memcpy (arp0->ip4_over_ethernet[0].ethernet,
+ hw_if0->hw_address, 6);
+ clib_mem_unaligned (&arp0->ip4_over_ethernet[0].ip4.data_u32, u32) =
+ if_addr0->data_u32;
+
+ /* Hardware must be ethernet-like. */
+ ASSERT (vec_len (hw_if0->hw_address) == 6);
+
+ /* the rx nd tx ethernet headers wil overlap in the case
+ * when we received a tagged VLAN=0 packet, but we are sending
+ * back untagged */
+ clib_memcpy (eth_tx, rewrite0, vec_len (rewrite0));
+ vec_free (rewrite0);
+
+ if (NULL == pa)
+ {
+ if (is_unnum0)
+ {
+ if (!arp_unnumbered (p0, sw_if_index0, conn_sw_if_index0))
+ goto drop2;
+ }
+ }
+
+ /* We are going to reply to this request, so, in the absence of
+ errors, learn the sender */
+ if (!error0)
+ error0 = arp_learn (vnm, am, sw_if_index0,
+ &arp0->ip4_over_ethernet[1]);
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, pi0, next0);
+
+ n_replies_sent += 1;
+ continue;
+
+ drop1:
+ if (0 == arp0->ip4_over_ethernet[0].ip4.as_u32 ||
+ (arp0->ip4_over_ethernet[0].ip4.as_u32 ==
+ arp0->ip4_over_ethernet[1].ip4.as_u32))
+ {
+ error0 = ETHERNET_ARP_ERROR_gratuitous_arp;
+ goto drop2;
+ }
+ /* See if proxy arp is configured for the address */
+ if (is_request0)
+ {
+ vnet_sw_interface_t *si;
+ u32 this_addr = clib_net_to_host_u32
+ (arp0->ip4_over_ethernet[1].ip4.as_u32);
+ u32 fib_index0;
+
+ si = vnet_get_sw_interface (vnm, sw_if_index0);
+
+ if (!(si->flags & VNET_SW_INTERFACE_FLAG_PROXY_ARP))
+ goto drop2;
+
+ fib_index0 = vec_elt (im4->fib_index_by_sw_if_index,
+ sw_if_index0);
+
+ vec_foreach (pa, am->proxy_arps)
+ {
+ u32 lo_addr = clib_net_to_host_u32 (pa->lo_addr);
+ u32 hi_addr = clib_net_to_host_u32 (pa->hi_addr);
+
+ /* an ARP request hit in the proxy-arp table? */
+ if ((this_addr >= lo_addr && this_addr <= hi_addr) &&
+ (fib_index0 == pa->fib_index))
+ {
+ proxy_src.as_u32 =
+ arp0->ip4_over_ethernet[1].ip4.data_u32;
+
+ /*
+ * change the interface address to the proxied
+ */
+ if_addr0 = &proxy_src;
+ is_unnum0 = 0;
+ n_proxy_arp_replies_sent++;
+ goto send_reply;
+ }
+ }
+ }
+
+ drop2:
+
+ next0 = ARP_INPUT_NEXT_DROP;
+ p0->error = node->errors[error0];
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, pi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_error_count (vm, node->node_index,
+ ETHERNET_ARP_ERROR_replies_sent,
+ n_replies_sent - n_proxy_arp_replies_sent);
+
+ vlib_error_count (vm, node->node_index,
+ ETHERNET_ARP_ERROR_proxy_arp_replies_sent,
+ n_proxy_arp_replies_sent);
+ return frame->n_vectors;
+}
+
+static char *ethernet_arp_error_strings[] = {
+#define _(sym,string) string,
+ foreach_ethernet_arp_error
+#undef _
+};
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (arp_input_node, static) =
+{
+ .function = arp_input,
+ .name = "arp-input",
+ .vector_size = sizeof (u32),
+ .n_errors = ETHERNET_ARP_N_ERROR,
+ .error_strings = ethernet_arp_error_strings,
+ .n_next_nodes = ARP_INPUT_N_NEXT,
+ .next_nodes = {
+ [ARP_INPUT_NEXT_DROP] = "error-drop",
+ [ARP_INPUT_NEXT_REPLY_TX] = "interface-output",
+ },
+ .format_buffer = format_ethernet_arp_header,
+ .format_trace = format_ethernet_arp_input_trace,
+};
+/* *INDENT-ON* */
+
+static int
+ip4_arp_entry_sort (void *a1, void *a2)
+{
+ ethernet_arp_ip4_entry_t *e1 = a1;
+ ethernet_arp_ip4_entry_t *e2 = a2;
+
+ int cmp;
+ vnet_main_t *vnm = vnet_get_main ();
+
+ cmp = vnet_sw_interface_compare (vnm, e1->sw_if_index, e2->sw_if_index);
+ if (!cmp)
+ cmp = ip4_address_compare (&e1->ip4_address, &e2->ip4_address);
+ return cmp;
+}
+
+ethernet_arp_ip4_entry_t *
+ip4_neighbor_entries (u32 sw_if_index)
+{
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ ethernet_arp_ip4_entry_t *n, *ns = 0;
+
+ /* *INDENT-OFF* */
+ pool_foreach (n, am->ip4_entry_pool, ({
+ if (sw_if_index != ~0 && n->sw_if_index != sw_if_index)
+ continue;
+ vec_add1 (ns, n[0]);
+ }));
+ /* *INDENT-ON* */
+
+ if (ns)
+ vec_sort_with_function (ns, ip4_arp_entry_sort);
+ return ns;
+}
+
+static clib_error_t *
+show_ip4_arp (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ ethernet_arp_ip4_entry_t *e, *es;
+ ethernet_proxy_arp_t *pa;
+ clib_error_t *error = 0;
+ u32 sw_if_index;
+
+ /* Filter entries by interface if given. */
+ sw_if_index = ~0;
+ (void) unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index);
+
+ es = ip4_neighbor_entries (sw_if_index);
+ if (es)
+ {
+ vlib_cli_output (vm, "%U", format_ethernet_arp_ip4_entry, vnm, 0);
+ vec_foreach (e, es)
+ {
+ vlib_cli_output (vm, "%U", format_ethernet_arp_ip4_entry, vnm, e);
+ }
+ vec_free (es);
+ }
+
+ if (vec_len (am->proxy_arps))
+ {
+ vlib_cli_output (vm, "Proxy arps enabled for:");
+ vec_foreach (pa, am->proxy_arps)
+ {
+ vlib_cli_output (vm, "Fib_index %d %U - %U ",
+ pa->fib_index,
+ format_ip4_address, &pa->lo_addr,
+ format_ip4_address, &pa->hi_addr);
+ }
+ }
+
+ return error;
+}
+
+/*?
+ * Display all the IPv4 ARP entries.
+ *
+ * @cliexpar
+ * Example of how to display the IPv4 ARP table:
+ * @cliexstart{show ip arp}
+ * Time FIB IP4 Flags Ethernet Interface
+ * 346.3028 0 6.1.1.3 de:ad:be:ef:ba:be GigabitEthernet2/0/0
+ * 3077.4271 0 6.1.1.4 S de:ad:be:ef:ff:ff GigabitEthernet2/0/0
+ * 2998.6409 1 6.2.2.3 de:ad:be:ef:00:01 GigabitEthernet2/0/0
+ * Proxy arps enabled for:
+ * Fib_index 0 6.0.0.1 - 6.0.0.11
+ * @cliexend
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_ip4_arp_command, static) = {
+ .path = "show ip arp",
+ .function = show_ip4_arp,
+ .short_help = "show ip arp",
+};
+/* *INDENT-ON* */
+
+typedef struct
+{
+ pg_edit_t l2_type, l3_type;
+ pg_edit_t n_l2_address_bytes, n_l3_address_bytes;
+ pg_edit_t opcode;
+ struct
+ {
+ pg_edit_t ethernet;
+ pg_edit_t ip4;
+ } ip4_over_ethernet[2];
+} pg_ethernet_arp_header_t;
+
+static inline void
+pg_ethernet_arp_header_init (pg_ethernet_arp_header_t * p)
+{
+ /* Initialize fields that are not bit fields in the IP header. */
+#define _(f) pg_edit_init (&p->f, ethernet_arp_header_t, f);
+ _(l2_type);
+ _(l3_type);
+ _(n_l2_address_bytes);
+ _(n_l3_address_bytes);
+ _(opcode);
+ _(ip4_over_ethernet[0].ethernet);
+ _(ip4_over_ethernet[0].ip4);
+ _(ip4_over_ethernet[1].ethernet);
+ _(ip4_over_ethernet[1].ip4);
+#undef _
+}
+
+uword
+unformat_pg_arp_header (unformat_input_t * input, va_list * args)
+{
+ pg_stream_t *s = va_arg (*args, pg_stream_t *);
+ pg_ethernet_arp_header_t *p;
+ u32 group_index;
+
+ p = pg_create_edit_group (s, sizeof (p[0]), sizeof (ethernet_arp_header_t),
+ &group_index);
+ pg_ethernet_arp_header_init (p);
+
+ /* Defaults. */
+ pg_edit_set_fixed (&p->l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
+ pg_edit_set_fixed (&p->l3_type, ETHERNET_TYPE_IP4);
+ pg_edit_set_fixed (&p->n_l2_address_bytes, 6);
+ pg_edit_set_fixed (&p->n_l3_address_bytes, 4);
+
+ if (!unformat (input, "%U: %U/%U -> %U/%U",
+ unformat_pg_edit,
+ unformat_ethernet_arp_opcode_net_byte_order, &p->opcode,
+ unformat_pg_edit,
+ unformat_ethernet_address, &p->ip4_over_ethernet[0].ethernet,
+ unformat_pg_edit,
+ unformat_ip4_address, &p->ip4_over_ethernet[0].ip4,
+ unformat_pg_edit,
+ unformat_ethernet_address, &p->ip4_over_ethernet[1].ethernet,
+ unformat_pg_edit,
+ unformat_ip4_address, &p->ip4_over_ethernet[1].ip4))
+ {
+ /* Free up any edits we may have added. */
+ pg_free_edit_group (s);
+ return 0;
+ }
+ return 1;
+}
+
+clib_error_t *
+ip4_set_arp_limit (u32 arp_limit)
+{
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+
+ am->limit_arp_cache_size = arp_limit;
+ return 0;
+}
+
+/**
+ * @brief Control Plane hook to remove an ARP entry
+ */
+int
+vnet_arp_unset_ip4_over_ethernet (vnet_main_t * vnm,
+ u32 sw_if_index, void *a_arg)
+{
+ ethernet_arp_ip4_over_ethernet_address_t *a = a_arg;
+ vnet_arp_set_ip4_over_ethernet_rpc_args_t args;
+
+ args.sw_if_index = sw_if_index;
+ args.flags = ETHERNET_ARP_ARGS_REMOVE;
+ clib_memcpy (&args.a, a, sizeof (*a));
+
+ vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback,
+ (u8 *) & args, sizeof (args));
+ return 0;
+}
+
+/**
+ * @brief Internally generated event to flush the ARP cache on an
+ * interface state change event.
+ * A flush will remove dynamic ARP entries, and for statics remove the MAC
+ * address from the corresponding adjacencies.
+ */
+static int
+vnet_arp_flush_ip4_over_ethernet (vnet_main_t * vnm,
+ u32 sw_if_index, void *a_arg)
+{
+ ethernet_arp_ip4_over_ethernet_address_t *a = a_arg;
+ vnet_arp_set_ip4_over_ethernet_rpc_args_t args;
+
+ args.sw_if_index = sw_if_index;
+ args.flags = ETHERNET_ARP_ARGS_FLUSH;
+ clib_memcpy (&args.a, a, sizeof (*a));
+
+ vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback,
+ (u8 *) & args, sizeof (args));
+ return 0;
+}
+
+/**
+ * @brief Internally generated event to populate the ARP cache on an
+ * interface state change event.
+ * For static entries this will re-source the adjacencies.
+ *
+ * @param sw_if_index The interface on which the ARP entires are acted
+ */
+static int
+vnet_arp_populate_ip4_over_ethernet (vnet_main_t * vnm,
+ u32 sw_if_index, void *a_arg)
+{
+ ethernet_arp_ip4_over_ethernet_address_t *a = a_arg;
+ vnet_arp_set_ip4_over_ethernet_rpc_args_t args;
+
+ args.sw_if_index = sw_if_index;
+ args.flags = ETHERNET_ARP_ARGS_POPULATE;
+ clib_memcpy (&args.a, a, sizeof (*a));
+
+ vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback,
+ (u8 *) & args, sizeof (args));
+ return 0;
+}
+
+/**
+ * @brief publish wildcard arp event
+ * @param sw_if_index The interface on which the ARP entires are acted
+ */
+static int
+vnet_arp_wc_publish (u32 sw_if_index, void *a_arg)
+{
+ ethernet_arp_ip4_over_ethernet_address_t *a = a_arg;
+ vnet_arp_set_ip4_over_ethernet_rpc_args_t args = {
+ .flags = ETHERNET_ARP_ARGS_WC_PUB,
+ .sw_if_index = sw_if_index,
+ .a = *a
+ };
+
+ vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback,
+ (u8 *) & args, sizeof (args));
+ return 0;
+}
+
+static void
+vnet_arp_wc_publish_internal (vnet_main_t * vnm,
+ vnet_arp_set_ip4_over_ethernet_rpc_args_t *
+ args)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ uword ni = am->wc_ip4_arp_publisher_node;
+ uword et = am->wc_ip4_arp_publisher_et;
+
+ if (ni == (uword) ~ 0)
+ return;
+ wc_arp_report_t *r =
+ vlib_process_signal_event_data (vm, ni, et, 1, sizeof *r);
+ r->ip4 = args->a.ip4.as_u32;
+ r->sw_if_index = args->sw_if_index;
+ memcpy (r->mac, args->a.ethernet, sizeof r->mac);
+}
+
+void
+wc_arp_set_publisher_node (uword node_index, uword event_type)
+{
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ am->wc_ip4_arp_publisher_node = node_index;
+ am->wc_ip4_arp_publisher_et = event_type;
+}
+
+/*
+ * arp_add_del_interface_address
+ *
+ * callback when an interface address is added or deleted
+ */
+static void
+arp_add_del_interface_address (ip4_main_t * im,
+ uword opaque,
+ u32 sw_if_index,
+ ip4_address_t * address,
+ u32 address_length,
+ u32 if_address_index, u32 is_del)
+{
+ /*
+ * Flush the ARP cache of all entries covered by the address
+ * that is being removed.
+ */
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ ethernet_arp_ip4_entry_t *e;
+
+ if (vec_len (am->ethernet_arp_by_sw_if_index) <= sw_if_index)
+ return;
+
+ if (is_del)
+ {
+ ethernet_arp_interface_t *eai;
+ u32 i, *to_delete = 0;
+ hash_pair_t *pair;
+
+ eai = &am->ethernet_arp_by_sw_if_index[sw_if_index];
+
+ /* *INDENT-OFF* */
+ hash_foreach_pair (pair, eai->arp_entries,
+ ({
+ e = pool_elt_at_index(am->ip4_entry_pool,
+ pair->value[0]);
+ if (ip4_destination_matches_route (im, &e->ip4_address,
+ address, address_length))
+ {
+ vec_add1 (to_delete, e - am->ip4_entry_pool);
+ }
+ }));
+ /* *INDENT-ON* */
+
+ for (i = 0; i < vec_len (to_delete); i++)
+ {
+ ethernet_arp_ip4_over_ethernet_address_t delme;
+ e = pool_elt_at_index (am->ip4_entry_pool, to_delete[i]);
+
+ clib_memcpy (&delme.ethernet, e->ethernet_address, 6);
+ delme.ip4.as_u32 = e->ip4_address.as_u32;
+
+ vnet_arp_flush_ip4_over_ethernet (vnet_get_main (),
+ e->sw_if_index, &delme);
+ }
+
+ vec_free (to_delete);
+ }
+}
+
+void
+arp_adj_fib_remove (ethernet_arp_ip4_entry_t * e, u32 fib_index)
+{
+ if (FIB_NODE_INDEX_INVALID != e->fib_entry_index)
+ {
+ fib_prefix_t pfx = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr.ip4 = e->ip4_address,
+ };
+ u32 fib_index;
+
+ fib_index = ip4_fib_table_get_index_for_sw_if_index (e->sw_if_index);
+
+ fib_table_entry_path_remove (fib_index, &pfx,
+ FIB_SOURCE_ADJ,
+ DPO_PROTO_IP4,
+ &pfx.fp_addr,
+ e->sw_if_index, ~0, 1,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fib_table_unlock (fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_ADJ);
+ }
+}
+
+static void
+arp_table_bind (ip4_main_t * im,
+ uword opaque,
+ u32 sw_if_index, u32 new_fib_index, u32 old_fib_index)
+{
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ ethernet_arp_interface_t *eai;
+ ethernet_arp_ip4_entry_t *e;
+ hash_pair_t *pair;
+
+ /*
+ * the IP table that the interface is bound to has changed.
+ * reinstall all the adj fibs.
+ */
+
+ if (vec_len (am->ethernet_arp_by_sw_if_index) <= sw_if_index)
+ return;
+
+ eai = &am->ethernet_arp_by_sw_if_index[sw_if_index];
+
+ /* *INDENT-OFF* */
+ hash_foreach_pair (pair, eai->arp_entries,
+ ({
+ e = pool_elt_at_index(am->ip4_entry_pool,
+ pair->value[0]);
+ /*
+ * remove the adj-fib from the old table and add to the new
+ */
+ arp_adj_fib_remove(e, old_fib_index);
+ arp_adj_fib_add(e, new_fib_index);
+ }));
+ /* *INDENT-ON* */
+
+}
+
+static clib_error_t *
+ethernet_arp_init (vlib_main_t * vm)
+{
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ ip4_main_t *im = &ip4_main;
+ clib_error_t *error;
+ pg_node_t *pn;
+
+ if ((error = vlib_call_init_function (vm, ethernet_init)))
+ return error;
+
+ ethernet_register_input_type (vm, ETHERNET_TYPE_ARP, arp_input_node.index);
+
+ pn = pg_get_node (arp_input_node.index);
+ pn->unformat_edit = unformat_pg_arp_header;
+
+ am->opcode_by_name = hash_create_string (0, sizeof (uword));
+#define _(o) hash_set_mem (am->opcode_by_name, #o, ETHERNET_ARP_OPCODE_##o);
+ foreach_ethernet_arp_opcode;
+#undef _
+
+ /* $$$ configurable */
+ am->limit_arp_cache_size = 50000;
+
+ am->pending_resolutions_by_address = hash_create (0, sizeof (uword));
+ am->mac_changes_by_address = hash_create (0, sizeof (uword));
+ am->wc_ip4_arp_publisher_node = (uword) ~ 0;
+
+ /* don't trace ARP error packets */
+ {
+ vlib_node_runtime_t *rt =
+ vlib_node_get_runtime (vm, arp_input_node.index);
+
+#define _(a,b) \
+ vnet_pcap_drop_trace_filter_add_del \
+ (rt->errors[ETHERNET_ARP_ERROR_##a], \
+ 1 /* is_add */);
+ foreach_ethernet_arp_error
+#undef _
+ }
+
+ ip4_add_del_interface_address_callback_t cb;
+ cb.function = arp_add_del_interface_address;
+ cb.function_opaque = 0;
+ vec_add1 (im->add_del_interface_address_callbacks, cb);
+
+ ip4_table_bind_callback_t cbt;
+ cbt.function = arp_table_bind;
+ cbt.function_opaque = 0;
+ vec_add1 (im->table_bind_callbacks, cbt);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (ethernet_arp_init);
+
+static void
+arp_entry_free (ethernet_arp_interface_t * eai, ethernet_arp_ip4_entry_t * e)
+{
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+
+ arp_adj_fib_remove (e,
+ ip4_fib_table_get_index_for_sw_if_index
+ (e->sw_if_index));
+ hash_unset (eai->arp_entries, e->ip4_address.as_u32);
+ pool_put (am->ip4_entry_pool, e);
+}
+
+static inline int
+vnet_arp_unset_ip4_over_ethernet_internal (vnet_main_t * vnm,
+ vnet_arp_set_ip4_over_ethernet_rpc_args_t
+ * args)
+{
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ ethernet_arp_ip4_entry_t *e;
+ ethernet_arp_interface_t *eai;
+
+ if (vec_len (am->ethernet_arp_by_sw_if_index) <= args->sw_if_index)
+ return 0;
+
+ eai = &am->ethernet_arp_by_sw_if_index[args->sw_if_index];
+
+ e = arp_entry_find (eai, &args->a.ip4);
+
+ if (NULL != e)
+ {
+ arp_entry_free (eai, e);
+
+ adj_nbr_walk_nh4 (e->sw_if_index,
+ &e->ip4_address, arp_mk_incomplete_walk, NULL);
+ }
+
+ return 0;
+}
+
+static int
+vnet_arp_flush_ip4_over_ethernet_internal (vnet_main_t * vnm,
+ vnet_arp_set_ip4_over_ethernet_rpc_args_t
+ * args)
+{
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ ethernet_arp_ip4_entry_t *e;
+ ethernet_arp_interface_t *eai;
+
+ if (vec_len (am->ethernet_arp_by_sw_if_index) <= args->sw_if_index)
+ return 0;
+
+ eai = &am->ethernet_arp_by_sw_if_index[args->sw_if_index];
+
+ e = arp_entry_find (eai, &args->a.ip4);
+
+ if (NULL != e)
+ {
+ adj_nbr_walk_nh4 (e->sw_if_index,
+ &e->ip4_address, arp_mk_incomplete_walk, e);
+
+ /*
+ * The difference between flush and unset, is that an unset
+ * means delete for static and dynamic entries. A flush
+ * means delete only for dynamic. Flushing is what the DP
+ * does in response to interface events. unset is only done
+ * by the control plane.
+ */
+ if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC)
+ {
+ e->flags &= ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC;
+ }
+ else if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC)
+ {
+ arp_entry_free (eai, e);
+ }
+ }
+ return (0);
+}
+
+static int
+vnet_arp_populate_ip4_over_ethernet_internal (vnet_main_t * vnm,
+ vnet_arp_set_ip4_over_ethernet_rpc_args_t
+ * args)
+{
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ ethernet_arp_ip4_entry_t *e;
+ ethernet_arp_interface_t *eai;
+
+ vec_validate (am->ethernet_arp_by_sw_if_index, args->sw_if_index);
+ eai = &am->ethernet_arp_by_sw_if_index[args->sw_if_index];
+
+ e = arp_entry_find (eai, &args->a.ip4);
+
+ if (NULL != e)
+ {
+ adj_nbr_walk_nh4 (e->sw_if_index,
+ &e->ip4_address, arp_mk_complete_walk, e);
+ }
+ return (0);
+}
+
+static void
+set_ip4_over_ethernet_rpc_callback (vnet_arp_set_ip4_over_ethernet_rpc_args_t
+ * a)
+{
+ vnet_main_t *vm = vnet_get_main ();
+ ASSERT (vlib_get_thread_index () == 0);
+
+ if (a->flags & ETHERNET_ARP_ARGS_REMOVE)
+ vnet_arp_unset_ip4_over_ethernet_internal (vm, a);
+ else if (a->flags & ETHERNET_ARP_ARGS_FLUSH)
+ vnet_arp_flush_ip4_over_ethernet_internal (vm, a);
+ else if (a->flags & ETHERNET_ARP_ARGS_POPULATE)
+ vnet_arp_populate_ip4_over_ethernet_internal (vm, a);
+ else if (a->flags & ETHERNET_ARP_ARGS_WC_PUB)
+ vnet_arp_wc_publish_internal (vm, a);
+ else
+ vnet_arp_set_ip4_over_ethernet_internal (vm, a);
+}
+
+/**
+ * @brief Invoked when the interface's admin state changes
+ */
+static clib_error_t *
+ethernet_arp_sw_interface_up_down (vnet_main_t * vnm,
+ u32 sw_if_index, u32 flags)
+{
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ ethernet_arp_ip4_entry_t *e;
+ u32 i, *to_delete = 0;
+
+ /* *INDENT-OFF* */
+ pool_foreach (e, am->ip4_entry_pool,
+ ({
+ if (e->sw_if_index == sw_if_index)
+ vec_add1 (to_delete,
+ e - am->ip4_entry_pool);
+ }));
+ /* *INDENT-ON* */
+
+ for (i = 0; i < vec_len (to_delete); i++)
+ {
+ ethernet_arp_ip4_over_ethernet_address_t delme;
+ e = pool_elt_at_index (am->ip4_entry_pool, to_delete[i]);
+
+ clib_memcpy (&delme.ethernet, e->ethernet_address, 6);
+ delme.ip4.as_u32 = e->ip4_address.as_u32;
+
+ if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
+ {
+ vnet_arp_populate_ip4_over_ethernet (vnm, e->sw_if_index, &delme);
+ }
+ else
+ {
+ vnet_arp_flush_ip4_over_ethernet (vnm, e->sw_if_index, &delme);
+ }
+
+ }
+ vec_free (to_delete);
+
+ return 0;
+}
+
+VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ethernet_arp_sw_interface_up_down);
+
+static void
+increment_ip4_and_mac_address (ethernet_arp_ip4_over_ethernet_address_t * a)
+{
+ u8 old;
+ int i;
+
+ for (i = 3; i >= 0; i--)
+ {
+ old = a->ip4.as_u8[i];
+ a->ip4.as_u8[i] += 1;
+ if (old < a->ip4.as_u8[i])
+ break;
+ }
+
+ for (i = 5; i >= 0; i--)
+ {
+ old = a->ethernet[i];
+ a->ethernet[i] += 1;
+ if (old < a->ethernet[i])
+ break;
+ }
+}
+
+int
+vnet_arp_set_ip4_over_ethernet (vnet_main_t * vnm,
+ u32 sw_if_index, void *a_arg,
+ int is_static, int is_no_fib_entry)
+{
+ ethernet_arp_ip4_over_ethernet_address_t *a = a_arg;
+ vnet_arp_set_ip4_over_ethernet_rpc_args_t args;
+
+ args.sw_if_index = sw_if_index;
+ args.is_static = is_static;
+ args.is_no_fib_entry = is_no_fib_entry;
+ args.flags = 0;
+ clib_memcpy (&args.a, a, sizeof (*a));
+
+ vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback,
+ (u8 *) & args, sizeof (args));
+ return 0;
+}
+
+int
+vnet_proxy_arp_add_del (ip4_address_t * lo_addr,
+ ip4_address_t * hi_addr, u32 fib_index, int is_del)
+{
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ ethernet_proxy_arp_t *pa;
+ u32 found_at_index = ~0;
+
+ vec_foreach (pa, am->proxy_arps)
+ {
+ if (pa->lo_addr == lo_addr->as_u32
+ && pa->hi_addr == hi_addr->as_u32 && pa->fib_index == fib_index)
+ {
+ found_at_index = pa - am->proxy_arps;
+ break;
+ }
+ }
+
+ if (found_at_index != ~0)
+ {
+ /* Delete, otherwise it's already in the table */
+ if (is_del)
+ vec_delete (am->proxy_arps, 1, found_at_index);
+ return 0;
+ }
+ /* delete, no such entry */
+ if (is_del)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ /* add, not in table */
+ vec_add2 (am->proxy_arps, pa, 1);
+ pa->lo_addr = lo_addr->as_u32;
+ pa->hi_addr = hi_addr->as_u32;
+ pa->fib_index = fib_index;
+ return 0;
+}
+
+/*
+ * Remove any proxy arp entries asdociated with the
+ * specificed fib.
+ */
+int
+vnet_proxy_arp_fib_reset (u32 fib_id)
+{
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ ethernet_proxy_arp_t *pa;
+ u32 *entries_to_delete = 0;
+ u32 fib_index;
+ int i;
+
+ fib_index = fib_table_find (FIB_PROTOCOL_IP4, fib_id);
+ if (~0 == fib_index)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ vec_foreach (pa, am->proxy_arps)
+ {
+ if (pa->fib_index == fib_index)
+ {
+ vec_add1 (entries_to_delete, pa - am->proxy_arps);
+ }
+ }
+
+ for (i = 0; i < vec_len (entries_to_delete); i++)
+ {
+ vec_delete (am->proxy_arps, 1, entries_to_delete[i]);
+ }
+
+ vec_free (entries_to_delete);
+
+ return 0;
+}
+
+static clib_error_t *
+ip_arp_add_del_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 sw_if_index;
+ ethernet_arp_ip4_over_ethernet_address_t lo_addr, hi_addr, addr;
+ int addr_valid = 0;
+ int is_del = 0;
+ int count = 1;
+ u32 fib_index = 0;
+ u32 fib_id;
+ int is_static = 0;
+ int is_no_fib_entry = 0;
+ int is_proxy = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ /* set ip arp TenGigE1/1/0/1 1.2.3.4 aa:bb:... or aabb.ccdd... */
+ if (unformat (input, "%U %U %U",
+ unformat_vnet_sw_interface, vnm, &sw_if_index,
+ unformat_ip4_address, &addr.ip4,
+ unformat_ethernet_address, &addr.ethernet))
+ addr_valid = 1;
+
+ else if (unformat (input, "delete") || unformat (input, "del"))
+ is_del = 1;
+
+ else if (unformat (input, "static"))
+ is_static = 1;
+
+ else if (unformat (input, "no-fib-entry"))
+ is_no_fib_entry = 1;
+
+ else if (unformat (input, "count %d", &count))
+ ;
+
+ else if (unformat (input, "fib-id %d", &fib_id))
+ {
+ fib_index = fib_table_find (FIB_PROTOCOL_IP4, fib_id);
+
+ if (~0 == fib_index)
+ return clib_error_return (0, "fib ID %d doesn't exist\n", fib_id);
+ }
+
+ else if (unformat (input, "proxy %U - %U",
+ unformat_ip4_address, &lo_addr.ip4,
+ unformat_ip4_address, &hi_addr.ip4))
+ is_proxy = 1;
+ else
+ break;
+ }
+
+ if (is_proxy)
+ {
+ (void) vnet_proxy_arp_add_del (&lo_addr.ip4, &hi_addr.ip4,
+ fib_index, is_del);
+ return 0;
+ }
+
+ if (addr_valid)
+ {
+ int i;
+
+ for (i = 0; i < count; i++)
+ {
+ if (is_del == 0)
+ {
+ uword event_type, *event_data = 0;
+
+ /* Park the debug CLI until the arp entry is installed */
+ vnet_register_ip4_arp_resolution_event
+ (vnm, &addr.ip4, vlib_current_process (vm),
+ 1 /* type */ , 0 /* data */ );
+
+ vnet_arp_set_ip4_over_ethernet
+ (vnm, sw_if_index, &addr, is_static, is_no_fib_entry);
+
+ vlib_process_wait_for_event (vm);
+ event_type = vlib_process_get_events (vm, &event_data);
+ vec_reset_length (event_data);
+ if (event_type != 1)
+ clib_warning ("event type %d unexpected", event_type);
+ }
+ else
+ vnet_arp_unset_ip4_over_ethernet (vnm, sw_if_index, &addr);
+
+ increment_ip4_and_mac_address (&addr);
+ }
+ }
+ else
+ {
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+/*?
+ * Add or delete IPv4 ARP cache entries.
+ *
+ * @note 'set ip arp' options (e.g. delete, static, 'fib-id <id>',
+ * 'count <number>', 'interface ip4_addr mac_addr') can be added in
+ * any order and combination.
+ *
+ * @cliexpar
+ * @parblock
+ * Add or delete IPv4 ARP cache entries as follows. MAC Address can be in
+ * either aa:bb:cc:dd:ee:ff format or aabb.ccdd.eeff format.
+ * @cliexcmd{set ip arp GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
+ * @cliexcmd{set ip arp delete GigabitEthernet2/0/0 6.0.0.3 de:ad:be:ef:ba:be}
+ *
+ * To add or delete an IPv4 ARP cache entry to or from a specific fib
+ * table:
+ * @cliexcmd{set ip arp fib-id 1 GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
+ * @cliexcmd{set ip arp fib-id 1 delete GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
+ *
+ * Add or delete IPv4 static ARP cache entries as follows:
+ * @cliexcmd{set ip arp static GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
+ * @cliexcmd{set ip arp static delete GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
+ *
+ * For testing / debugging purposes, the 'set ip arp' command can add or
+ * delete multiple entries. Supply the 'count N' parameter:
+ * @cliexcmd{set ip arp count 10 GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
+ * @endparblock
+ ?*/
+VLIB_CLI_COMMAND (ip_arp_add_del_command, static) = {
+ .path = "set ip arp",
+ .short_help =
+ "set ip arp [del] <intfc> <ip-address> <mac-address> [static] [no-fib-entry] [count <count>] [fib-id <fib-id>] [proxy <lo-addr> - <hi-addr>]",
+ .function = ip_arp_add_del_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+set_int_proxy_arp_command_fn (vlib_main_t * vm,
+ unformat_input_t *
+ input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 sw_if_index;
+ vnet_sw_interface_t *si;
+ int enable = 0;
+ int intfc_set = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%U", unformat_vnet_sw_interface,
+ vnm, &sw_if_index))
+ intfc_set = 1;
+ else if (unformat (input, "enable") || unformat (input, "on"))
+ enable = 1;
+ else if (unformat (input, "disable") || unformat (input, "off"))
+ enable = 0;
+ else
+ break;
+ }
+
+ if (intfc_set == 0)
+ return clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, input);
+
+ si = vnet_get_sw_interface (vnm, sw_if_index);
+ ASSERT (si);
+ if (enable)
+ si->flags |= VNET_SW_INTERFACE_FLAG_PROXY_ARP;
+ else
+ si->flags &= ~VNET_SW_INTERFACE_FLAG_PROXY_ARP;
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+/*?
+ * Enable proxy-arp on an interface. The vpp stack will answer ARP
+ * requests for the indicated address range. Multiple proxy-arp
+ * ranges may be provisioned.
+ *
+ * @note Proxy ARP as a technology is infamous for blackholing traffic.
+ * Also, the underlying implementation has not been performance-tuned.
+ * Avoid creating an unnecessarily large set of ranges.
+ *
+ * @cliexpar
+ * To enable proxy arp on a range of addresses, use:
+ * @cliexcmd{set ip arp proxy 6.0.0.1 - 6.0.0.11}
+ * Append 'del' to delete a range of proxy ARP addresses:
+ * @cliexcmd{set ip arp proxy 6.0.0.1 - 6.0.0.11 del}
+ * You must then specifically enable proxy arp on individual interfaces:
+ * @cliexcmd{set interface proxy-arp GigabitEthernet0/8/0 enable}
+ * To disable proxy arp on an individual interface:
+ * @cliexcmd{set interface proxy-arp GigabitEthernet0/8/0 disable}
+ ?*/
+VLIB_CLI_COMMAND (set_int_proxy_enable_command, static) = {
+ .path = "set interface proxy-arp",
+ .short_help =
+ "set interface proxy-arp <intfc> [enable|disable]",
+ .function = set_int_proxy_arp_command_fn,
+};
+/* *INDENT-ON* */
+
+
+/*
+ * ARP/ND Termination in a L2 Bridge Domain based on IP4/IP6 to MAC
+ * hash tables mac_by_ip4 and mac_by_ip6 for each BD.
+ */
+typedef enum
+{
+ ARP_TERM_NEXT_L2_OUTPUT,
+ ARP_TERM_NEXT_DROP,
+ ARP_TERM_N_NEXT,
+} arp_term_next_t;
+
+u32 arp_term_next_node_index[32];
+
+static uword
+arp_term_l2bd (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ l2input_main_t *l2im = &l2input_main;
+ u32 n_left_from, next_index, *from, *to_next;
+ u32 n_replies_sent = 0;
+ u16 last_bd_index = ~0;
+ l2_bridge_domain_t *last_bd_config = 0;
+ l2_input_config_t *cfg0;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *p0;
+ ethernet_header_t *eth0;
+ ethernet_arp_header_t *arp0;
+ ip6_header_t *iph0;
+ u8 *l3h0;
+ u32 pi0, error0, next0, sw_if_index0;
+ u16 ethertype0;
+ u16 bd_index0;
+ u32 ip0;
+ u8 *macp0;
+ u8 is_vrrp_reply0;
+
+ pi0 = from[0];
+ to_next[0] = pi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ // Terminate only local (SHG == 0) ARP
+ if (vnet_buffer (p0)->l2.shg != 0)
+ goto next_l2_feature;
+
+ eth0 = vlib_buffer_get_current (p0);
+ l3h0 = (u8 *) eth0 + vnet_buffer (p0)->l2.l2_len;
+ ethertype0 = clib_net_to_host_u16 (*(u16 *) (l3h0 - 2));
+ arp0 = (ethernet_arp_header_t *) l3h0;
+
+ if (PREDICT_FALSE ((ethertype0 != ETHERNET_TYPE_ARP) ||
+ (arp0->opcode !=
+ clib_host_to_net_u16
+ (ETHERNET_ARP_OPCODE_request))))
+ goto check_ip6_nd;
+
+ /* Must be ARP request packet here */
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
+ (p0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ u8 *t0 = vlib_add_trace (vm, node, p0,
+ sizeof (ethernet_arp_input_trace_t));
+ clib_memcpy (t0, l3h0, sizeof (ethernet_arp_input_trace_t));
+ }
+
+ error0 = ETHERNET_ARP_ERROR_replies_sent;
+ error0 =
+ (arp0->l2_type !=
+ clib_net_to_host_u16 (ETHERNET_ARP_HARDWARE_TYPE_ethernet)
+ ? ETHERNET_ARP_ERROR_l2_type_not_ethernet : error0);
+ error0 =
+ (arp0->l3_type !=
+ clib_net_to_host_u16 (ETHERNET_TYPE_IP4) ?
+ ETHERNET_ARP_ERROR_l3_type_not_ip4 : error0);
+
+ sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+
+ if (error0)
+ goto drop;
+
+ is_vrrp_reply0 =
+ ((arp0->opcode ==
+ clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply))
+ &&
+ (!memcmp
+ (arp0->ip4_over_ethernet[0].ethernet, vrrp_prefix,
+ sizeof (vrrp_prefix))));
+
+ /* Trash ARP packets whose ARP-level source addresses do not
+ match their L2-frame-level source addresses, unless it's
+ a reply from a VRRP virtual router */
+ if (PREDICT_FALSE
+ (memcmp (eth0->src_address, arp0->ip4_over_ethernet[0].ethernet,
+ sizeof (eth0->src_address)) && !is_vrrp_reply0))
+ {
+ error0 = ETHERNET_ARP_ERROR_l2_address_mismatch;
+ goto drop;
+ }
+
+ /* Check if anyone want ARP request events for L2 BDs */
+ {
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ if (am->wc_ip4_arp_publisher_node != (uword) ~ 0)
+ vnet_arp_wc_publish (sw_if_index0, &arp0->ip4_over_ethernet[0]);
+ }
+
+ /* lookup BD mac_by_ip4 hash table for MAC entry */
+ ip0 = arp0->ip4_over_ethernet[1].ip4.as_u32;
+ bd_index0 = vnet_buffer (p0)->l2.bd_index;
+ if (PREDICT_FALSE ((bd_index0 != last_bd_index)
+ || (last_bd_index == (u16) ~ 0)))
+ {
+ last_bd_index = bd_index0;
+ last_bd_config = vec_elt_at_index (l2im->bd_configs, bd_index0);
+ }
+ macp0 = (u8 *) hash_get (last_bd_config->mac_by_ip4, ip0);
+
+ if (PREDICT_FALSE (!macp0))
+ goto next_l2_feature; /* MAC not found */
+
+ /* MAC found, send ARP reply -
+ Convert ARP request packet to ARP reply */
+ arp0->opcode = clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply);
+ arp0->ip4_over_ethernet[1] = arp0->ip4_over_ethernet[0];
+ arp0->ip4_over_ethernet[0].ip4.as_u32 = ip0;
+ clib_memcpy (arp0->ip4_over_ethernet[0].ethernet, macp0, 6);
+ clib_memcpy (eth0->dst_address, eth0->src_address, 6);
+ clib_memcpy (eth0->src_address, macp0, 6);
+ n_replies_sent += 1;
+
+ output_response:
+ /* For BVI, need to use l2-fwd node to send ARP reply as
+ l2-output node cannot output packet to BVI properly */
+ cfg0 = vec_elt_at_index (l2im->configs, sw_if_index0);
+ if (PREDICT_FALSE (cfg0->bvi))
+ {
+ vnet_buffer (p0)->l2.feature_bitmap |= L2INPUT_FEAT_FWD;
+ vnet_buffer (p0)->sw_if_index[VLIB_RX] = 0;
+ goto next_l2_feature;
+ }
+
+ /* Send ARP/ND reply back out input interface through l2-output */
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
+ next0 = ARP_TERM_NEXT_L2_OUTPUT;
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next, pi0,
+ next0);
+ continue;
+
+ check_ip6_nd:
+ /* IP6 ND event notification or solicitation handling to generate
+ local response instead of flooding */
+ iph0 = (ip6_header_t *) l3h0;
+ if (PREDICT_FALSE (ethertype0 == ETHERNET_TYPE_IP6 &&
+ iph0->protocol == IP_PROTOCOL_ICMP6 &&
+ !ip6_address_is_unspecified
+ (&iph0->src_address)))
+ {
+ sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+ if (vnet_ip6_nd_term
+ (vm, node, p0, eth0, iph0, sw_if_index0,
+ vnet_buffer (p0)->l2.bd_index))
+ goto output_response;
+ }
+
+ next_l2_feature:
+ {
+ next0 = vnet_l2_feature_next (p0, arp_term_next_node_index,
+ L2INPUT_FEAT_ARP_TERM);
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, next0);
+ continue;
+ }
+
+ drop:
+ if (0 == arp0->ip4_over_ethernet[0].ip4.as_u32 ||
+ (arp0->ip4_over_ethernet[0].ip4.as_u32 ==
+ arp0->ip4_over_ethernet[1].ip4.as_u32))
+ {
+ error0 = ETHERNET_ARP_ERROR_gratuitous_arp;
+ }
+ next0 = ARP_TERM_NEXT_DROP;
+ p0->error = node->errors[error0];
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next, pi0,
+ next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_error_count (vm, node->node_index,
+ ETHERNET_ARP_ERROR_replies_sent, n_replies_sent);
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (arp_term_l2bd_node, static) = {
+ .function = arp_term_l2bd,
+ .name = "arp-term-l2bd",
+ .vector_size = sizeof (u32),
+ .n_errors = ETHERNET_ARP_N_ERROR,
+ .error_strings = ethernet_arp_error_strings,
+ .n_next_nodes = ARP_TERM_N_NEXT,
+ .next_nodes = {
+ [ARP_TERM_NEXT_L2_OUTPUT] = "l2-output",
+ [ARP_TERM_NEXT_DROP] = "error-drop",
+ },
+ .format_buffer = format_ethernet_arp_header,
+ .format_trace = format_arp_term_input_trace,
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+arp_term_init (vlib_main_t * vm)
+{
+ // Initialize the feature next-node indexes
+ feat_bitmap_init_next_nodes (vm,
+ arp_term_l2bd_node.index,
+ L2INPUT_N_FEAT,
+ l2input_get_feat_names (),
+ arp_term_next_node_index);
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (arp_term_init);
+
+void
+change_arp_mac (u32 sw_if_index, ethernet_arp_ip4_entry_t * e)
+{
+ if (e->sw_if_index == sw_if_index)
+ {
+ adj_nbr_walk_nh4 (e->sw_if_index,
+ &e->ip4_address, arp_mk_complete_walk, e);
+ }
+}
+
+void
+ethernet_arp_change_mac (u32 sw_if_index)
+{
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ ethernet_arp_ip4_entry_t *e;
+
+ /* *INDENT-OFF* */
+ pool_foreach (e, am->ip4_entry_pool,
+ ({
+ change_arp_mac (sw_if_index, e);
+ }));
+ /* *INDENT-ON* */
+}
+
+void
+send_ip4_garp (vlib_main_t * vm, vnet_hw_interface_t * hi)
+{
+ ip4_main_t *i4m = &ip4_main;
+ u32 sw_if_index = hi->sw_if_index;
+ ip4_address_t *ip4_addr = ip4_interface_first_address (i4m, sw_if_index, 0);
+
+ if (ip4_addr)
+ {
+ clib_warning ("Sending GARP for IP4 address %U on sw_if_idex %d",
+ format_ip4_address, ip4_addr, sw_if_index);
+
+ /* Form GARP packet for output - Gratuitous ARP is an ARP request packet
+ where the interface IP/MAC pair is used for both source and request
+ MAC/IP pairs in the request */
+ u32 bi = 0;
+ ethernet_arp_header_t *h = vlib_packet_template_get_packet
+ (vm, &i4m->ip4_arp_request_packet_template, &bi);
+ clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address,
+ sizeof (h->ip4_over_ethernet[0].ethernet));
+ clib_memcpy (h->ip4_over_ethernet[1].ethernet, hi->hw_address,
+ sizeof (h->ip4_over_ethernet[1].ethernet));
+ h->ip4_over_ethernet[0].ip4 = ip4_addr[0];
+ h->ip4_over_ethernet[1].ip4 = ip4_addr[0];
+
+ /* Setup MAC header with ARP Etype and broadcast DMAC */
+ vlib_buffer_t *b = vlib_get_buffer (vm, bi);
+ vlib_buffer_advance (b, -sizeof (ethernet_header_t));
+ ethernet_header_t *e = vlib_buffer_get_current (b);
+ e->type = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
+ clib_memcpy (e->src_address, hi->hw_address, sizeof (e->src_address));
+ memset (e->dst_address, 0xff, sizeof (e->dst_address));
+
+ /* Send GARP packet out the specified interface */
+ vnet_buffer (b)->sw_if_index[VLIB_RX] =
+ vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
+ vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
+ u32 *to_next = vlib_frame_vector_args (f);
+ to_next[0] = bi;
+ f->n_vectors = 1;
+ vlib_put_frame_to_node (vm, hi->output_node_index, f);
+ }
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ethernet/arp_packet.h b/src/vnet/ethernet/arp_packet.h
new file mode 100644
index 00000000..661f33f9
--- /dev/null
+++ b/src/vnet/ethernet/arp_packet.h
@@ -0,0 +1,180 @@
+/*
+ * ethernet/arp.c: IP v4 ARP node
+ *
+ * Copyright (c) 2010 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_ethernet_arp_packet_h
+#define included_ethernet_arp_packet_h
+
+#define foreach_ethernet_arp_hardware_type \
+ _ (0, reserved) \
+ _ (1, ethernet) \
+ _ (2, experimental_ethernet) \
+ _ (3, ax_25) \
+ _ (4, proteon_pronet_token_ring) \
+ _ (5, chaos) \
+ _ (6, ieee_802) \
+ _ (7, arcnet) \
+ _ (8, hyperchannel) \
+ _ (9, lanstar) \
+ _ (10, autonet) \
+ _ (11, localtalk) \
+ _ (12, localnet) \
+ _ (13, ultra_link) \
+ _ (14, smds) \
+ _ (15, frame_relay) \
+ _ (16, atm) \
+ _ (17, hdlc) \
+ _ (18, fibre_channel) \
+ _ (19, atm19) \
+ _ (20, serial_line) \
+ _ (21, atm21) \
+ _ (22, mil_std_188_220) \
+ _ (23, metricom) \
+ _ (24, ieee_1394) \
+ _ (25, mapos) \
+ _ (26, twinaxial) \
+ _ (27, eui_64) \
+ _ (28, hiparp) \
+ _ (29, iso_7816_3) \
+ _ (30, arpsec) \
+ _ (31, ipsec_tunnel) \
+ _ (32, infiniband) \
+ _ (33, cai) \
+ _ (34, wiegand) \
+ _ (35, pure_ip) \
+ _ (36, hw_exp1) \
+ _ (256, hw_exp2)
+
+#define foreach_ethernet_arp_opcode \
+ _ (reserved) \
+ _ (request) \
+ _ (reply) \
+ _ (reverse_request) \
+ _ (reverse_reply) \
+ _ (drarp_request) \
+ _ (drarp_reply) \
+ _ (drarp_error) \
+ _ (inarp_request) \
+ _ (inarp_reply) \
+ _ (arp_nak) \
+ _ (mars_request) \
+ _ (mars_multi) \
+ _ (mars_mserv) \
+ _ (mars_join) \
+ _ (mars_leave) \
+ _ (mars_nak) \
+ _ (mars_unserv) \
+ _ (mars_sjoin) \
+ _ (mars_sleave) \
+ _ (mars_grouplist_request) \
+ _ (mars_grouplist_reply) \
+ _ (mars_redirect_map) \
+ _ (mapos_unarp) \
+ _ (exp1) \
+ _ (exp2)
+
+typedef enum
+{
+#define _(n,f) ETHERNET_ARP_HARDWARE_TYPE_##f = (n),
+ foreach_ethernet_arp_hardware_type
+#undef _
+} ethernet_arp_hardware_type_t;
+
+typedef enum
+{
+#define _(f) ETHERNET_ARP_OPCODE_##f,
+ foreach_ethernet_arp_opcode
+#undef _
+ ETHERNET_ARP_N_OPCODE,
+} ethernet_arp_opcode_t;
+
+typedef enum
+{
+ IP4_ARP_NEXT_DROP,
+ IP4_ARP_N_NEXT,
+} ip4_arp_next_t;
+
+typedef enum
+{
+ IP4_ARP_ERROR_DROP,
+ IP4_ARP_ERROR_REQUEST_SENT,
+ IP4_ARP_ERROR_NON_ARP_ADJ,
+ IP4_ARP_ERROR_REPLICATE_DROP,
+ IP4_ARP_ERROR_REPLICATE_FAIL,
+ IP4_ARP_ERROR_NO_SOURCE_ADDRESS,
+} ip4_arp_error_t;
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+ u8 ethernet[6];
+ ip4_address_t ip4;
+}) ethernet_arp_ip4_over_ethernet_address_t;
+/* *INDENT-ON* */
+
+typedef struct
+{
+ u16 l2_type;
+ u16 l3_type;
+ u8 n_l2_address_bytes;
+ u8 n_l3_address_bytes;
+ u16 opcode;
+ union
+ {
+ ethernet_arp_ip4_over_ethernet_address_t ip4_over_ethernet[2];
+
+ /* Others... */
+ u8 data[0];
+ };
+} ethernet_arp_header_t;
+
+typedef enum ethernet_arp_entry_flags_t_
+{
+ ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC = (1 << 0),
+ ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC = (1 << 1),
+ ETHERNET_ARP_IP4_ENTRY_FLAG_NO_FIB_ENTRY = (1 << 2),
+} __attribute__ ((packed)) ethernet_arp_entry_flags_t;
+
+typedef struct
+{
+ u32 sw_if_index;
+ ip4_address_t ip4_address;
+
+ u8 ethernet_address[6];
+
+ ethernet_arp_entry_flags_t flags;
+
+ u64 cpu_time_last_updated;
+
+ /**
+ * The index of the adj-fib entry created
+ */
+ fib_node_index_t fib_entry_index;
+} ethernet_arp_ip4_entry_t;
+
+ethernet_arp_ip4_entry_t *ip4_neighbor_entries (u32 sw_if_index);
+u8 *format_ethernet_arp_ip4_entry (u8 * s, va_list * va);
+
+void send_ip4_garp (vlib_main_t * vm, vnet_hw_interface_t * hi);
+
+#endif /* included_ethernet_arp_packet_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ethernet/dir.dox b/src/vnet/ethernet/dir.dox
new file mode 100644
index 00000000..a55a73c0
--- /dev/null
+++ b/src/vnet/ethernet/dir.dox
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+@dir
+@brief Ethernet ARP and Loopback Code.
+
+This directory contains the source code for ARP and Loopback Interfaces.
+
+*/
+/*? %%clicmd:group_label ARP and Loopback CLI %% ?*/
diff --git a/src/vnet/ethernet/error.def b/src/vnet/ethernet/error.def
new file mode 100644
index 00000000..36679c0c
--- /dev/null
+++ b/src/vnet/ethernet/error.def
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ethernet_error.def: ethernet errors
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+ethernet_error (NONE, PUNT, "no error")
+ethernet_error (BAD_LLC_LENGTH, DROP, "llc length > packet length")
+ethernet_error (UNKNOWN_TYPE, PUNT, "unknown ethernet type")
+ethernet_error (UNKNOWN_VLAN, DROP, "unknown vlan")
+ethernet_error (L3_MAC_MISMATCH, DROP, "l3 mac mismatch")
+ethernet_error (DOWN, DROP, "subinterface down")
+
diff --git a/src/vnet/ethernet/ethernet.h b/src/vnet/ethernet/ethernet.h
new file mode 100644
index 00000000..a6846b13
--- /dev/null
+++ b/src/vnet/ethernet/ethernet.h
@@ -0,0 +1,577 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ethernet.h: types/functions for ethernet.
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_ethernet_h
+#define included_ethernet_h
+
+#include <vnet/vnet.h>
+#include <vnet/ethernet/packet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/feature/feature.h>
+
+always_inline u64
+ethernet_mac_address_u64 (u8 * a)
+{
+ return (((u64) a[0] << (u64) (5 * 8))
+ | ((u64) a[1] << (u64) (4 * 8))
+ | ((u64) a[2] << (u64) (3 * 8))
+ | ((u64) a[3] << (u64) (2 * 8))
+ | ((u64) a[4] << (u64) (1 * 8)) | ((u64) a[5] << (u64) (0 * 8)));
+}
+
+static inline int
+ethernet_mac_address_is_multicast_u64 (u64 a)
+{
+ return (a & (1ULL << (5 * 8))) != 0;
+}
+
+static_always_inline int
+ethernet_frame_is_tagged (u16 type)
+{
+#if __SSE4_2__
+ const __m128i ethertype_mask = _mm_set_epi16 (ETHERNET_TYPE_VLAN,
+ ETHERNET_TYPE_DOT1AD,
+ ETHERNET_TYPE_VLAN_9100,
+ ETHERNET_TYPE_VLAN_9200,
+ /* duplicate last one to
+ fill register */
+ ETHERNET_TYPE_VLAN_9200,
+ ETHERNET_TYPE_VLAN_9200,
+ ETHERNET_TYPE_VLAN_9200,
+ ETHERNET_TYPE_VLAN_9200);
+
+ __m128i r = _mm_set1_epi16 (type);
+ r = _mm_cmpeq_epi16 (ethertype_mask, r);
+ return !_mm_test_all_zeros (r, r);
+#else
+ if ((type == ETHERNET_TYPE_VLAN) ||
+ (type == ETHERNET_TYPE_DOT1AD) ||
+ (type == ETHERNET_TYPE_VLAN_9100) || (type == ETHERNET_TYPE_VLAN_9200))
+ return 1;
+#endif
+ return 0;
+}
+
+/* Max. sized ethernet/vlan header for parsing. */
+typedef struct
+{
+ ethernet_header_t ethernet;
+
+ /* Allow up to 2 stacked vlan headers. */
+ ethernet_vlan_header_t vlan[2];
+} ethernet_max_header_t;
+
+struct vnet_hw_interface_t;
+/* Ethernet flag change callback. */
+typedef u32 (ethernet_flag_change_function_t)
+ (vnet_main_t * vnm, struct vnet_hw_interface_t * hi, u32 flags);
+
+#define ETHERNET_MIN_PACKET_BYTES 64
+#define ETHERNET_MAX_PACKET_BYTES 9216
+
+/* Ethernet interface instance. */
+typedef struct ethernet_interface
+{
+
+ /* Accept all packets (promiscuous mode). */
+#define ETHERNET_INTERFACE_FLAG_ACCEPT_ALL (1 << 0)
+#define ETHERNET_INTERFACE_FLAG_CONFIG_PROMISC(flags) \
+ (((flags) & ~ETHERNET_INTERFACE_FLAG_ACCEPT_ALL) == 0)
+
+ /* Change MTU on interface from hw interface structure */
+#define ETHERNET_INTERFACE_FLAG_MTU (1 << 1)
+#define ETHERNET_INTERFACE_FLAG_CONFIG_MTU(flags) \
+ ((flags) & ETHERNET_INTERFACE_FLAG_MTU)
+
+ /* Callback, e.g. to turn on/off promiscuous mode */
+ ethernet_flag_change_function_t *flag_change;
+
+ u32 driver_instance;
+
+ /* Ethernet (MAC) address for this interface. */
+ u8 address[6];
+} ethernet_interface_t;
+
+extern vnet_hw_interface_class_t ethernet_hw_interface_class;
+
+typedef struct
+{
+ /* Name (a c string). */
+ char *name;
+
+ /* Ethernet type in host byte order. */
+ ethernet_type_t type;
+
+ /* Node which handles this type. */
+ u32 node_index;
+
+ /* Next index for this type. */
+ u32 next_index;
+} ethernet_type_info_t;
+
+typedef enum
+{
+#define ethernet_error(n,c,s) ETHERNET_ERROR_##n,
+#include <vnet/ethernet/error.def>
+#undef ethernet_error
+ ETHERNET_N_ERROR,
+} ethernet_error_t;
+
+
+// Structs used when parsing packet to find sw_if_index
+
+typedef struct
+{
+ u32 sw_if_index;
+ u32 flags;
+ // config entry is-valid flag
+ // exact match flags (valid if packet has 0/1/2/3 tags)
+ // L2 vs L3 forwarding mode
+#define SUBINT_CONFIG_MATCH_0_TAG (1<<0)
+#define SUBINT_CONFIG_MATCH_1_TAG (1<<1)
+#define SUBINT_CONFIG_MATCH_2_TAG (1<<2)
+#define SUBINT_CONFIG_MATCH_3_TAG (1<<3)
+#define SUBINT_CONFIG_VALID (1<<4)
+#define SUBINT_CONFIG_L2 (1<<5)
+#define SUBINT_CONFIG_P2P (1<<6)
+
+} subint_config_t;
+
+always_inline u32
+eth_create_valid_subint_match_flags (u32 num_tags)
+{
+ return SUBINT_CONFIG_VALID | (1 << num_tags);
+}
+
+
+typedef struct
+{
+ subint_config_t untagged_subint;
+ subint_config_t default_subint;
+ u16 dot1q_vlans; // pool id for vlan table
+ u16 dot1ad_vlans; // pool id for vlan table
+} main_intf_t;
+
+typedef struct
+{
+ subint_config_t single_tag_subint;
+ subint_config_t inner_any_subint;
+ u32 qinqs; // pool id for qinq table
+} vlan_intf_t;
+
+typedef struct
+{
+ vlan_intf_t vlans[ETHERNET_N_VLAN];
+} vlan_table_t;
+
+typedef struct
+{
+ subint_config_t subint;
+} qinq_intf_t;
+
+typedef struct
+{
+ qinq_intf_t vlans[ETHERNET_N_VLAN];
+} qinq_table_t;
+
+// Structure mapping to a next index based on ethertype.
+// Common ethertypes are stored explicitly, others are
+// stored in a sparse table.
+typedef struct
+{
+ /* Sparse vector mapping ethernet type in network byte order
+ to next index. */
+ u16 *input_next_by_type;
+ u32 *sparse_index_by_input_next_index;
+
+ /* cached next indexes for common ethertypes */
+ u32 input_next_ip4;
+ u32 input_next_ip6;
+ u32 input_next_mpls;
+} next_by_ethertype_t;
+
+typedef struct
+{
+ vlib_main_t *vlib_main;
+
+ /* next node index for the L3 input node of each ethertype */
+ next_by_ethertype_t l3_next;
+
+ /* next node index for L2 interfaces */
+ u32 l2_next;
+
+ /* flag and next node index for L3 redirect */
+ u32 redirect_l3;
+ u32 redirect_l3_next;
+
+ /* Pool of ethernet interface instances. */
+ ethernet_interface_t *interfaces;
+
+ ethernet_type_info_t *type_infos;
+
+ /* Hash tables mapping name/type to type info index. */
+ uword *type_info_by_name, *type_info_by_type;
+
+ // The root of the vlan parsing tables. A vector with one element
+ // for each main interface, indexed by hw_if_index.
+ main_intf_t *main_intfs;
+
+ // Pool of vlan tables
+ vlan_table_t *vlan_pool;
+
+ // Pool of qinq tables;
+ qinq_table_t *qinq_pool;
+
+ /* Set to one to use AB.CD.EF instead of A:B:C:D:E:F as ethernet format. */
+ int format_ethernet_address_16bit;
+
+ /* debug: make sure we don't wipe out an ethernet registration by mistake */
+ u8 next_by_ethertype_register_called;
+
+ /* Feature arc index */
+ u8 output_feature_arc_index;
+
+ /* Allocated loopback instances */
+ uword *bm_loopback_instances;
+} ethernet_main_t;
+
+ethernet_main_t ethernet_main;
+
+always_inline ethernet_type_info_t *
+ethernet_get_type_info (ethernet_main_t * em, ethernet_type_t type)
+{
+ uword *p = hash_get (em->type_info_by_type, type);
+ return p ? vec_elt_at_index (em->type_infos, p[0]) : 0;
+}
+
+ethernet_interface_t *ethernet_get_interface (ethernet_main_t * em,
+ u32 hw_if_index);
+
+clib_error_t *ethernet_register_interface (vnet_main_t * vnm,
+ u32 dev_class_index,
+ u32 dev_instance,
+ u8 * address,
+ u32 * hw_if_index_return,
+ ethernet_flag_change_function_t
+ flag_change);
+
+void ethernet_delete_interface (vnet_main_t * vnm, u32 hw_if_index);
+
+/* Register given node index to take input for given ethernet type. */
+void
+ethernet_register_input_type (vlib_main_t * vm,
+ ethernet_type_t type, u32 node_index);
+
+/* Register given node index to take input for packet from L2 interfaces. */
+void ethernet_register_l2_input (vlib_main_t * vm, u32 node_index);
+
+/* Register given node index to take redirected L3 traffic, and enable L3 redirect */
+void ethernet_register_l3_redirect (vlib_main_t * vm, u32 node_index);
+
+/* Formats ethernet address X:X:X:X:X:X */
+u8 *format_ethernet_address (u8 * s, va_list * args);
+u8 *format_ethernet_type (u8 * s, va_list * args);
+u8 *format_ethernet_vlan_tci (u8 * s, va_list * va);
+u8 *format_ethernet_header (u8 * s, va_list * args);
+u8 *format_ethernet_header_with_length (u8 * s, va_list * args);
+
+/* Parse ethernet address in either X:X:X:X:X:X unix or X.X.X cisco format. */
+uword unformat_ethernet_address (unformat_input_t * input, va_list * args);
+
+/* Parse ethernet type as 0xXXXX or type name from ethernet/types.def.
+ In either host or network byte order. */
+uword
+unformat_ethernet_type_host_byte_order (unformat_input_t * input,
+ va_list * args);
+uword
+unformat_ethernet_type_net_byte_order (unformat_input_t * input,
+ va_list * args);
+
+/* Parse ethernet header. */
+uword unformat_ethernet_header (unformat_input_t * input, va_list * args);
+
+/* Parse ethernet interface name; return hw_if_index. */
+uword unformat_ethernet_interface (unformat_input_t * input, va_list * args);
+
+uword unformat_pg_ethernet_header (unformat_input_t * input, va_list * args);
+
+always_inline void
+ethernet_setup_node (vlib_main_t * vm, u32 node_index)
+{
+ vlib_node_t *n = vlib_get_node (vm, node_index);
+ pg_node_t *pn = pg_get_node (node_index);
+
+ n->format_buffer = format_ethernet_header_with_length;
+ n->unformat_buffer = unformat_ethernet_header;
+ pn->unformat_edit = unformat_pg_ethernet_header;
+}
+
+always_inline ethernet_header_t *
+ethernet_buffer_get_header (vlib_buffer_t * b)
+{
+ return (void *) (b->data + vnet_buffer (b)->l2_hdr_offset);
+}
+
+/** Returns the number of VLAN headers in the current Ethernet frame in the
+ * buffer. Returns 0, 1, 2 for the known header count. The value 3 indicates
+ * the number of headers is not known.
+ */
+#define ethernet_buffer_get_vlan_count(b) ( \
+ ((b)->flags & VNET_BUFFER_FLAGS_VLAN_BITS) >> VNET_BUFFER_F_LOG2_VLAN_1_DEEP \
+)
+
+/** Sets the number of VLAN headers in the current Ethernet frame in the
+ * buffer. Values 0, 1, 2 indicate the header count. The value 3 indicates
+ * the number of headers is not known.
+ */
+#define ethernet_buffer_set_vlan_count(b, v) ( \
+ (b)->flags = ((b)->flags & ~VNET_BUFFER_FLAGS_VLAN_BITS) | \
+ (((v) << VNET_BUFFER_F_LOG2_VLAN_1_DEEP) & VNET_BUFFER_FLAGS_VLAN_BITS) \
+)
+
+/** Adjusts the vlan count by the delta in 'v' */
+#define ethernet_buffer_adjust_vlan_count(b, v) ( \
+ ethernet_buffer_set_vlan_count(b, \
+ (word)ethernet_buffer_get_vlan_count(b) + (word)(v)) \
+)
+
+/** Adjusts the vlan count by the header size byte delta in 'v' */
+#define ethernet_buffer_adjust_vlan_count_by_bytes(b, v) ( \
+ (b)->flags = ((b)->flags & ~VNET_BUFFER_FLAGS_VLAN_BITS) | (( \
+ ((b)->flags & VNET_BUFFER_FLAGS_VLAN_BITS) + \
+ ((v) << (VNET_BUFFER_F_LOG2_VLAN_1_DEEP - 2)) \
+ ) & VNET_BUFFER_FLAGS_VLAN_BITS) \
+)
+
+/**
+ * Determine the size of the Ethernet headers of the current frame in
+ * the buffer. This uses the VLAN depth flags that are set by
+ * ethernet-input. Because these flags are stored in the vlib_buffer_t
+ * "flags" field this count is valid regardless of the node so long as it's
+ * checked downstream of ethernet-input; That is, the value is not stored in
+ * the opaque space.
+ */
+#define ethernet_buffer_header_size(b) ( \
+ ethernet_buffer_get_vlan_count((b)) * sizeof(ethernet_vlan_header_t) + \
+ sizeof(ethernet_header_t) \
+)
+
+ethernet_main_t *ethernet_get_main (vlib_main_t * vm);
+u32 ethernet_set_flags (vnet_main_t * vnm, u32 hw_if_index, u32 flags);
+void ethernet_sw_interface_set_l2_mode (vnet_main_t * vnm, u32 sw_if_index,
+ u32 l2);
+void ethernet_sw_interface_set_l2_mode_noport (vnet_main_t * vnm,
+ u32 sw_if_index, u32 l2);
+void ethernet_set_rx_redirect (vnet_main_t * vnm, vnet_hw_interface_t * hi,
+ u32 enable);
+
+int
+vnet_arp_set_ip4_over_ethernet (vnet_main_t * vnm,
+ u32 sw_if_index, void *a_arg,
+ int is_static, int is_no_fib_entry);
+
+int
+vnet_arp_unset_ip4_over_ethernet (vnet_main_t * vnm,
+ u32 sw_if_index, void *a_arg);
+
+int vnet_proxy_arp_fib_reset (u32 fib_id);
+
+clib_error_t *next_by_ethertype_init (next_by_ethertype_t * l3_next);
+clib_error_t *next_by_ethertype_register (next_by_ethertype_t * l3_next,
+ u32 ethertype, u32 next_index);
+
+int vnet_create_loopback_interface (u32 * sw_if_indexp, u8 * mac_address,
+ u8 is_specified, u32 user_instance);
+int vnet_delete_loopback_interface (u32 sw_if_index);
+int vnet_delete_sub_interface (u32 sw_if_index);
+
+// Perform ethernet subinterface classification table lookups given
+// the ports's sw_if_index and fields extracted from the ethernet header.
+// The resulting tables are used by identify_subint().
+always_inline void
+eth_vlan_table_lookups (ethernet_main_t * em,
+ vnet_main_t * vnm,
+ u32 port_sw_if_index0,
+ u16 first_ethertype,
+ u16 outer_id,
+ u16 inner_id,
+ vnet_hw_interface_t ** hi,
+ main_intf_t ** main_intf,
+ vlan_intf_t ** vlan_intf, qinq_intf_t ** qinq_intf)
+{
+ vlan_table_t *vlan_table;
+ qinq_table_t *qinq_table;
+ u32 vlan_table_id;
+
+ // Read the main, vlan, and qinq interface table entries
+ // TODO: Consider if/how to prefetch tables. Also consider
+ // single-entry cache to skip table lookups and identify_subint()
+ // processing.
+ *hi = vnet_get_sup_hw_interface (vnm, port_sw_if_index0);
+ *main_intf = vec_elt_at_index (em->main_intfs, (*hi)->hw_if_index);
+
+ // Always read the vlan and qinq tables, even if there are not that
+ // many tags on the packet. This makes the lookups and comparisons
+ // easier (and less branchy).
+ vlan_table_id = (first_ethertype == ETHERNET_TYPE_DOT1AD) ?
+ (*main_intf)->dot1ad_vlans : (*main_intf)->dot1q_vlans;
+ vlan_table = vec_elt_at_index (em->vlan_pool, vlan_table_id);
+ *vlan_intf = &vlan_table->vlans[outer_id];
+
+ qinq_table = vec_elt_at_index (em->qinq_pool, (*vlan_intf)->qinqs);
+ *qinq_intf = &qinq_table->vlans[inner_id];
+}
+
+
+// Determine the subinterface for this packet, given the result of the
+// vlan table lookups and vlan header parsing. Check the most specific
+// matches first.
+// Returns 1 if a matching subinterface was found, otherwise returns 0.
+always_inline u32
+eth_identify_subint (vnet_hw_interface_t * hi,
+ vlib_buffer_t * b0,
+ u32 match_flags,
+ main_intf_t * main_intf,
+ vlan_intf_t * vlan_intf,
+ qinq_intf_t * qinq_intf,
+ u32 * new_sw_if_index, u8 * error0, u32 * is_l2)
+{
+ subint_config_t *subint;
+
+ // Each comparison is checking both the valid flag and the number of tags
+ // (incorporating exact-match/non-exact-match).
+
+ // check for specific double tag
+ subint = &qinq_intf->subint;
+ if ((subint->flags & match_flags) == match_flags)
+ goto matched;
+
+ // check for specific outer and 'any' inner
+ subint = &vlan_intf->inner_any_subint;
+ if ((subint->flags & match_flags) == match_flags)
+ goto matched;
+
+ // check for specific single tag
+ subint = &vlan_intf->single_tag_subint;
+ if ((subint->flags & match_flags) == match_flags)
+ goto matched;
+
+ // check for untagged interface
+ subint = &main_intf->untagged_subint;
+ if ((subint->flags & match_flags) == match_flags)
+ goto matched;
+
+ // check for default interface
+ subint = &main_intf->default_subint;
+ if ((subint->flags & match_flags) == match_flags)
+ goto matched;
+
+ // No matching subinterface
+ *new_sw_if_index = ~0;
+ *error0 = ETHERNET_ERROR_UNKNOWN_VLAN;
+ *is_l2 = 0;
+ return 0;
+
+matched:
+ *new_sw_if_index = subint->sw_if_index;
+ *is_l2 = subint->flags & SUBINT_CONFIG_L2;
+ return 1;
+}
+
+// Compare two ethernet macs. Return 1 if they are the same, 0 if different
+always_inline u32
+eth_mac_equal (u8 * mac1, u8 * mac2)
+{
+ return (*((u32 *) (mac1 + 0)) == *((u32 *) (mac2 + 0)) &&
+ *((u32 *) (mac1 + 2)) == *((u32 *) (mac2 + 2)));
+}
+
+
+always_inline ethernet_main_t *
+vnet_get_ethernet_main (void)
+{
+ return &ethernet_main;
+}
+
+void vnet_register_ip4_arp_resolution_event (vnet_main_t * vnm,
+ void *address_arg,
+ uword node_index,
+ uword type_opaque, uword data);
+
+
+int vnet_add_del_ip4_arp_change_event (vnet_main_t * vnm,
+ void *data_callback,
+ u32 pid,
+ void *address_arg,
+ uword node_index,
+ uword type_opaque,
+ uword data, int is_add);
+
+void wc_arp_set_publisher_node (uword inode_index, uword event_type);
+
+void ethernet_arp_change_mac (u32 sw_if_index);
+void ethernet_ndp_change_mac (u32 sw_if_index);
+
+void arp_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai);
+
+void ethernet_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai);
+u8 *ethernet_build_rewrite (vnet_main_t * vnm,
+ u32 sw_if_index,
+ vnet_link_t link_type, const void *dst_address);
+const u8 *ethernet_ip4_mcast_dst_addr (void);
+const u8 *ethernet_ip6_mcast_dst_addr (void);
+
+extern vlib_node_registration_t ethernet_input_node;
+
+typedef struct
+{
+ u32 sw_if_index;
+ u32 ip4;
+ u8 mac[6];
+} wc_arp_report_t;
+
+#endif /* included_ethernet_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ethernet/format.c b/src/vnet/ethernet/format.c
new file mode 100644
index 00000000..5b589998
--- /dev/null
+++ b/src/vnet/ethernet/format.c
@@ -0,0 +1,348 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ethernet_format.c: ethernet formatting/parsing.
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/ethernet/ethernet.h>
+
+u8 *
+format_ethernet_address (u8 * s, va_list * args)
+{
+ ethernet_main_t *em = &ethernet_main;
+ u8 *a = va_arg (*args, u8 *);
+
+ if (em->format_ethernet_address_16bit)
+ return format (s, "%02x%02x.%02x%02x.%02x%02x",
+ a[0], a[1], a[2], a[3], a[4], a[5]);
+ else
+ return format (s, "%02x:%02x:%02x:%02x:%02x:%02x",
+ a[0], a[1], a[2], a[3], a[4], a[5]);
+}
+
+u8 *
+format_ethernet_type (u8 * s, va_list * args)
+{
+ ethernet_type_t type = va_arg (*args, u32);
+ ethernet_main_t *em = &ethernet_main;
+ ethernet_type_info_t *t = ethernet_get_type_info (em, type);
+
+ if (t)
+ s = format (s, "%s", t->name);
+ else
+ s = format (s, "0x%04x", type);
+
+ return s;
+}
+
+u8 *
+format_ethernet_vlan_tci (u8 * s, va_list * va)
+{
+ u32 vlan_tci = va_arg (*va, u32);
+
+ u32 vid = (vlan_tci & 0xfff);
+ u32 cfi = (vlan_tci >> 12) & 1;
+ u32 pri = (vlan_tci >> 13);
+
+ s = format (s, "%d", vid);
+ if (pri != 0)
+ s = format (s, " priority %d", pri);
+ if (cfi != 0)
+ s = format (s, " cfi");
+
+ return s;
+}
+
+u8 *
+format_ethernet_header_with_length (u8 * s, va_list * args)
+{
+ ethernet_pbb_header_packed_t *ph =
+ va_arg (*args, ethernet_pbb_header_packed_t *);
+ ethernet_max_header_t *m = (ethernet_max_header_t *) ph;
+ u32 max_header_bytes = va_arg (*args, u32);
+ ethernet_main_t *em = &ethernet_main;
+ ethernet_header_t *e = &m->ethernet;
+ ethernet_vlan_header_t *v;
+ ethernet_type_t type = clib_net_to_host_u16 (e->type);
+ ethernet_type_t vlan_type[ARRAY_LEN (m->vlan)];
+ u32 n_vlan = 0, i, header_bytes;
+ uword indent;
+
+ while ((type == ETHERNET_TYPE_VLAN || type == ETHERNET_TYPE_DOT1AD
+ || type == ETHERNET_TYPE_DOT1AH) && n_vlan < ARRAY_LEN (m->vlan))
+ {
+ vlan_type[n_vlan] = type;
+ if (type != ETHERNET_TYPE_DOT1AH)
+ {
+ v = m->vlan + n_vlan;
+ type = clib_net_to_host_u16 (v->type);
+ }
+ n_vlan++;
+ }
+
+ header_bytes = sizeof (e[0]) + n_vlan * sizeof (v[0]);
+ if (max_header_bytes != 0 && header_bytes > max_header_bytes)
+ return format (s, "ethernet header truncated");
+
+ indent = format_get_indent (s);
+
+ s = format (s, "%U: %U -> %U",
+ format_ethernet_type, type,
+ format_ethernet_address, e->src_address,
+ format_ethernet_address, e->dst_address);
+
+ if (type != ETHERNET_TYPE_DOT1AH)
+ {
+ for (i = 0; i < n_vlan; i++)
+ {
+ u32 v = clib_net_to_host_u16 (m->vlan[i].priority_cfi_and_id);
+ if (*vlan_type == ETHERNET_TYPE_VLAN)
+ s = format (s, " 802.1q vlan %U", format_ethernet_vlan_tci, v);
+ else
+ s = format (s, " 802.1ad vlan %U", format_ethernet_vlan_tci, v);
+ }
+
+ if (max_header_bytes != 0 && header_bytes < max_header_bytes)
+ {
+ ethernet_type_info_t *ti;
+ vlib_node_t *node = 0;
+
+ ti = ethernet_get_type_info (em, type);
+ if (ti && ti->node_index != ~0)
+ node = vlib_get_node (em->vlib_main, ti->node_index);
+ if (node && node->format_buffer)
+ s = format (s, "\n%U%U",
+ format_white_space, indent,
+ node->format_buffer, (void *) m + header_bytes,
+ max_header_bytes - header_bytes);
+ }
+ }
+ else
+ {
+ s =
+ format (s, " %s b-tag %04X",
+ (clib_net_to_host_u16 (ph->b_type) ==
+ ETHERNET_TYPE_DOT1AD) ? "802.1ad" : "",
+ clib_net_to_host_u16 (ph->priority_dei_id));
+ s =
+ format (s, " %s i-tag %08X",
+ (clib_net_to_host_u16 (ph->i_type) ==
+ ETHERNET_TYPE_DOT1AH) ? "802.1ah" : "",
+ clib_net_to_host_u32 (ph->priority_dei_uca_res_sid));
+ }
+
+ return s;
+}
+
+u8 *
+format_ethernet_header (u8 * s, va_list * args)
+{
+ ethernet_max_header_t *m = va_arg (*args, ethernet_max_header_t *);
+ return format (s, "%U", format_ethernet_header_with_length, m, 0);
+}
+
+/* Parse X:X:X:X:X:X unix style ethernet address. */
+static uword
+unformat_ethernet_address_unix (unformat_input_t * input, va_list * args)
+{
+ u8 *result = va_arg (*args, u8 *);
+ u32 i, a[6];
+
+ if (!unformat (input, "%_%x:%x:%x:%x:%x:%x%_",
+ &a[0], &a[1], &a[2], &a[3], &a[4], &a[5]))
+ return 0;
+
+ /* Check range. */
+ for (i = 0; i < ARRAY_LEN (a); i++)
+ if (a[i] >= (1 << 8))
+ return 0;
+
+ for (i = 0; i < ARRAY_LEN (a); i++)
+ result[i] = a[i];
+
+ return 1;
+}
+
+/* Parse X.X.X cisco style ethernet address. */
+static uword
+unformat_ethernet_address_cisco (unformat_input_t * input, va_list * args)
+{
+ u8 *result = va_arg (*args, u8 *);
+ u32 i, a[3];
+
+ if (!unformat (input, "%_%x.%x.%x%_", &a[0], &a[1], &a[2]))
+ return 0;
+
+ /* Check range. */
+ for (i = 0; i < ARRAY_LEN (a); i++)
+ if (a[i] >= (1 << 16))
+ return 0;
+
+ result[0] = (a[0] >> 8) & 0xff;
+ result[1] = (a[0] >> 0) & 0xff;
+ result[2] = (a[1] >> 8) & 0xff;
+ result[3] = (a[1] >> 0) & 0xff;
+ result[4] = (a[2] >> 8) & 0xff;
+ result[5] = (a[2] >> 0) & 0xff;
+
+ return 1;
+}
+
+/* Parse ethernet address; accept either unix or style addresses. */
+uword
+unformat_ethernet_address (unformat_input_t * input, va_list * args)
+{
+ u8 *result = va_arg (*args, u8 *);
+ return (unformat_user (input, unformat_ethernet_address_unix, result)
+ || unformat_user (input, unformat_ethernet_address_cisco, result));
+}
+
+/* Returns ethernet type as an int in host byte order. */
+uword
+unformat_ethernet_type_host_byte_order (unformat_input_t * input,
+ va_list * args)
+{
+ u16 *result = va_arg (*args, u16 *);
+ ethernet_main_t *em = &ethernet_main;
+ int type, i;
+
+ /* Numeric type. */
+ if (unformat (input, "0x%x", &type) || unformat (input, "%d", &type))
+ {
+ if (type >= (1 << 16))
+ return 0;
+ *result = type;
+ return 1;
+ }
+
+ /* Named type. */
+ if (unformat_user (input, unformat_vlib_number_by_name,
+ em->type_info_by_name, &i))
+ {
+ ethernet_type_info_t *ti = vec_elt_at_index (em->type_infos, i);
+ *result = ti->type;
+ return 1;
+ }
+
+ return 0;
+}
+
+uword
+unformat_ethernet_type_net_byte_order (unformat_input_t * input,
+ va_list * args)
+{
+ u16 *result = va_arg (*args, u16 *);
+ if (!unformat_user (input, unformat_ethernet_type_host_byte_order, result))
+ return 0;
+
+ *result = clib_host_to_net_u16 ((u16) * result);
+ return 1;
+}
+
+uword
+unformat_ethernet_header (unformat_input_t * input, va_list * args)
+{
+ u8 **result = va_arg (*args, u8 **);
+ ethernet_max_header_t _m, *m = &_m;
+ ethernet_header_t *e = &m->ethernet;
+ u16 type;
+ u32 n_vlan;
+
+ if (!unformat (input, "%U: %U -> %U",
+ unformat_ethernet_type_host_byte_order, &type,
+ unformat_ethernet_address, &e->src_address,
+ unformat_ethernet_address, &e->dst_address))
+ return 0;
+
+ n_vlan = 0;
+ while (unformat (input, "vlan"))
+ {
+ u32 id, priority;
+
+ if (!unformat_user (input, unformat_vlib_number, &id)
+ || id >= ETHERNET_N_VLAN)
+ return 0;
+
+ if (unformat (input, "priority %d", &priority))
+ {
+ if (priority >= 8)
+ return 0;
+ id |= priority << 13;
+ }
+
+ if (unformat (input, "cfi"))
+ id |= 1 << 12;
+
+ /* Too many vlans given. */
+ if (n_vlan >= ARRAY_LEN (m->vlan))
+ return 0;
+
+ m->vlan[n_vlan].priority_cfi_and_id = clib_host_to_net_u16 (id);
+ n_vlan++;
+ }
+
+ if (n_vlan == 0)
+ e->type = clib_host_to_net_u16 (type);
+ else
+ {
+ int i;
+
+ e->type = clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
+ for (i = 0; i < n_vlan - 1; i++)
+ m->vlan[i].type = clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
+ m->vlan[n_vlan - 1].type = clib_host_to_net_u16 (type);
+ }
+
+ /* Add header to result. */
+ {
+ void *p;
+ u32 n_bytes = sizeof (e[0]) + n_vlan * sizeof (m->vlan[0]);
+
+ vec_add2 (*result, p, n_bytes);
+ clib_memcpy (p, m, n_bytes);
+ }
+
+ return 1;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ethernet/init.c b/src/vnet/ethernet/init.c
new file mode 100644
index 00000000..2d20adc9
--- /dev/null
+++ b/src/vnet/ethernet/init.c
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ethernet_init.c: ethernet initialization
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ip/ip.h> // for feature registration
+
+/* Global main structure. */
+ethernet_main_t ethernet_main;
+
+static void
+add_type (ethernet_main_t * em, ethernet_type_t type, char *type_name)
+{
+ ethernet_type_info_t *ti;
+ u32 i;
+
+ vec_add2 (em->type_infos, ti, 1);
+ i = ti - em->type_infos;
+
+ ti->name = type_name;
+ ti->type = type;
+ ti->next_index = ti->node_index = ~0;
+
+ hash_set (em->type_info_by_type, type, i);
+ hash_set_mem (em->type_info_by_name, ti->name, i);
+}
+
+/* Built-in ip4 tx feature path definition */
+/* *INDENT-OFF* */
+VNET_FEATURE_ARC_INIT (ethernet_output, static) =
+{
+ .arc_name = "ethernet-output",
+ .start_nodes = VNET_FEATURES ("adj-l2-midchain"),
+ .arc_index_ptr = &ethernet_main.output_feature_arc_index,
+};
+
+VNET_FEATURE_INIT (ethernet_tx_drop, static) =
+{
+ .arc_name = "ethernet-output",
+ .node_name = "error-drop",
+ .runs_before = 0, /* not before any other features */
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+ethernet_init (vlib_main_t * vm)
+{
+ ethernet_main_t *em = &ethernet_main;
+ clib_error_t *error;
+
+ /*
+ * Set up the L2 path now, or we'll wipe out the L2 ARP
+ * registration set up by ethernet_arp_init.
+ */
+ if ((error = vlib_call_init_function (vm, l2_init)))
+ return error;
+
+ em->vlib_main = vm;
+
+ em->type_info_by_name = hash_create_string (0, sizeof (uword));
+ em->type_info_by_type = hash_create (0, sizeof (uword));
+
+#define ethernet_type(n,s) add_type (em, ETHERNET_TYPE_##s, #s);
+#include "types.def"
+#undef ethernet_type
+
+ if ((error = vlib_call_init_function (vm, llc_init)))
+ return error;
+ if ((error = vlib_call_init_function (vm, ethernet_input_init)))
+ return error;
+ if ((error = vlib_call_init_function (vm, vnet_feature_init)))
+ return error;
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (ethernet_init);
+
+ethernet_main_t *
+ethernet_get_main (vlib_main_t * vm)
+{
+ vlib_call_init_function (vm, ethernet_init);
+ return &ethernet_main;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ethernet/interface.c b/src/vnet/ethernet/interface.c
new file mode 100644
index 00000000..3e78a49d
--- /dev/null
+++ b/src/vnet/ethernet/interface.c
@@ -0,0 +1,880 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ethernet_interface.c: ethernet interfaces
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/l2/l2_input.h>
+#include <vnet/adj/adj.h>
+
+/**
+ * @file
+ * @brief Loopback Interfaces.
+ *
+ * This file contains code to manage loopback interfaces.
+ */
+
+const u8 *
+ethernet_ip4_mcast_dst_addr (void)
+{
+ const static u8 ethernet_mcast_dst_mac[] = {
+ 0x1, 0x0, 0x5e, 0x0, 0x0, 0x0,
+ };
+
+ return (ethernet_mcast_dst_mac);
+}
+
+const u8 *
+ethernet_ip6_mcast_dst_addr (void)
+{
+ const static u8 ethernet_mcast_dst_mac[] = {
+ 0x33, 0x33, 0x00, 0x0, 0x0, 0x0,
+ };
+
+ return (ethernet_mcast_dst_mac);
+}
+
+/**
+ * @brief build a rewrite string to use for sending packets of type 'link_type'
+ * to 'dst_address'
+ */
+u8 *
+ethernet_build_rewrite (vnet_main_t * vnm,
+ u32 sw_if_index,
+ vnet_link_t link_type, const void *dst_address)
+{
+ vnet_sw_interface_t *sub_sw = vnet_get_sw_interface (vnm, sw_if_index);
+ vnet_sw_interface_t *sup_sw = vnet_get_sup_sw_interface (vnm, sw_if_index);
+ vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
+ ethernet_main_t *em = &ethernet_main;
+ ethernet_interface_t *ei;
+ ethernet_header_t *h;
+ ethernet_type_t type;
+ uword n_bytes = sizeof (h[0]);
+ u8 *rewrite = NULL;
+ u8 is_p2p = 0;
+
+ if (sub_sw->type == VNET_SW_INTERFACE_TYPE_P2P)
+ is_p2p = 1;
+ if (sub_sw != sup_sw)
+ {
+ if (sub_sw->sub.eth.flags.one_tag)
+ {
+ n_bytes += sizeof (ethernet_vlan_header_t);
+ }
+ else if (sub_sw->sub.eth.flags.two_tags)
+ {
+ n_bytes += 2 * (sizeof (ethernet_vlan_header_t));
+ }
+ else if (PREDICT_FALSE (is_p2p))
+ {
+ n_bytes = sizeof (ethernet_header_t);
+ }
+ if (PREDICT_FALSE (!is_p2p))
+ {
+ // Check for encaps that are not supported for L3 interfaces
+ if (!(sub_sw->sub.eth.flags.exact_match) ||
+ (sub_sw->sub.eth.flags.default_sub) ||
+ (sub_sw->sub.eth.flags.outer_vlan_id_any) ||
+ (sub_sw->sub.eth.flags.inner_vlan_id_any))
+ {
+ return 0;
+ }
+ }
+ else
+ {
+ n_bytes = sizeof (ethernet_header_t);
+ }
+ }
+
+ switch (link_type)
+ {
+#define _(a,b) case VNET_LINK_##a: type = ETHERNET_TYPE_##b; break
+ _(IP4, IP4);
+ _(IP6, IP6);
+ _(MPLS, MPLS);
+ _(ARP, ARP);
+#undef _
+ default:
+ return NULL;
+ }
+
+ vec_validate (rewrite, n_bytes - 1);
+ h = (ethernet_header_t *) rewrite;
+ ei = pool_elt_at_index (em->interfaces, hw->hw_instance);
+ clib_memcpy (h->src_address, ei->address, sizeof (h->src_address));
+ if (is_p2p)
+ {
+ clib_memcpy (h->dst_address, sub_sw->p2p.client_mac,
+ sizeof (h->dst_address));
+ }
+ else
+ {
+ if (dst_address)
+ clib_memcpy (h->dst_address, dst_address, sizeof (h->dst_address));
+ else
+ memset (h->dst_address, ~0, sizeof (h->dst_address)); /* broadcast */
+ }
+
+ if (PREDICT_FALSE (!is_p2p) && sub_sw->sub.eth.flags.one_tag)
+ {
+ ethernet_vlan_header_t *outer = (void *) (h + 1);
+
+ h->type = sub_sw->sub.eth.flags.dot1ad ?
+ clib_host_to_net_u16 (ETHERNET_TYPE_DOT1AD) :
+ clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
+ outer->priority_cfi_and_id =
+ clib_host_to_net_u16 (sub_sw->sub.eth.outer_vlan_id);
+ outer->type = clib_host_to_net_u16 (type);
+
+ }
+ else if (PREDICT_FALSE (!is_p2p) && sub_sw->sub.eth.flags.two_tags)
+ {
+ ethernet_vlan_header_t *outer = (void *) (h + 1);
+ ethernet_vlan_header_t *inner = (void *) (outer + 1);
+
+ h->type = sub_sw->sub.eth.flags.dot1ad ?
+ clib_host_to_net_u16 (ETHERNET_TYPE_DOT1AD) :
+ clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
+ outer->priority_cfi_and_id =
+ clib_host_to_net_u16 (sub_sw->sub.eth.outer_vlan_id);
+ outer->type = clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
+ inner->priority_cfi_and_id =
+ clib_host_to_net_u16 (sub_sw->sub.eth.inner_vlan_id);
+ inner->type = clib_host_to_net_u16 (type);
+
+ }
+ else
+ {
+ h->type = clib_host_to_net_u16 (type);
+ }
+
+ return (rewrite);
+}
+
+void
+ethernet_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai)
+{
+ ip_adjacency_t *adj;
+
+ adj = adj_get (ai);
+
+ vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index);
+ if (si->type == VNET_SW_INTERFACE_TYPE_P2P)
+ {
+ default_update_adjacency (vnm, sw_if_index, ai);
+ }
+ else if (FIB_PROTOCOL_IP4 == adj->ia_nh_proto)
+ {
+ arp_update_adjacency (vnm, sw_if_index, ai);
+ }
+ else if (FIB_PROTOCOL_IP6 == adj->ia_nh_proto)
+ {
+ ip6_ethernet_update_adjacency (vnm, sw_if_index, ai);
+ }
+ else
+ {
+ ASSERT (0);
+ }
+}
+
+static clib_error_t *
+ethernet_mac_change (vnet_hw_interface_t * hi, char *mac_address)
+{
+ ethernet_interface_t *ei;
+ ethernet_main_t *em;
+
+ em = &ethernet_main;
+ ei = pool_elt_at_index (em->interfaces, hi->hw_instance);
+
+ vec_validate (hi->hw_address,
+ STRUCT_SIZE_OF (ethernet_header_t, src_address) - 1);
+ clib_memcpy (hi->hw_address, mac_address, vec_len (hi->hw_address));
+
+ clib_memcpy (ei->address, (u8 *) mac_address, sizeof (ei->address));
+ ethernet_arp_change_mac (hi->sw_if_index);
+ ethernet_ndp_change_mac (hi->sw_if_index);
+
+ return (NULL);
+}
+
+/* *INDENT-OFF* */
+VNET_HW_INTERFACE_CLASS (ethernet_hw_interface_class) = {
+ .name = "Ethernet",
+ .format_address = format_ethernet_address,
+ .format_header = format_ethernet_header_with_length,
+ .unformat_hw_address = unformat_ethernet_address,
+ .unformat_header = unformat_ethernet_header,
+ .build_rewrite = ethernet_build_rewrite,
+ .update_adjacency = ethernet_update_adjacency,
+ .mac_addr_change_function = ethernet_mac_change,
+};
+/* *INDENT-ON* */
+
+uword
+unformat_ethernet_interface (unformat_input_t * input, va_list * args)
+{
+ vnet_main_t *vnm = va_arg (*args, vnet_main_t *);
+ u32 *result = va_arg (*args, u32 *);
+ u32 hw_if_index;
+ ethernet_main_t *em = &ethernet_main;
+ ethernet_interface_t *eif;
+
+ if (!unformat_user (input, unformat_vnet_hw_interface, vnm, &hw_if_index))
+ return 0;
+
+ eif = ethernet_get_interface (em, hw_if_index);
+ if (eif)
+ {
+ *result = hw_if_index;
+ return 1;
+ }
+ return 0;
+}
+
+clib_error_t *
+ethernet_register_interface (vnet_main_t * vnm,
+ u32 dev_class_index,
+ u32 dev_instance,
+ u8 * address,
+ u32 * hw_if_index_return,
+ ethernet_flag_change_function_t flag_change)
+{
+ ethernet_main_t *em = &ethernet_main;
+ ethernet_interface_t *ei;
+ vnet_hw_interface_t *hi;
+ clib_error_t *error = 0;
+ u32 hw_if_index;
+
+ pool_get (em->interfaces, ei);
+ ei->flag_change = flag_change;
+
+ hw_if_index = vnet_register_interface
+ (vnm,
+ dev_class_index, dev_instance,
+ ethernet_hw_interface_class.index, ei - em->interfaces);
+ *hw_if_index_return = hw_if_index;
+
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+
+ ethernet_setup_node (vnm->vlib_main, hi->output_node_index);
+
+ hi->min_packet_bytes = hi->min_supported_packet_bytes =
+ ETHERNET_MIN_PACKET_BYTES;
+ hi->max_packet_bytes = hi->max_supported_packet_bytes =
+ ETHERNET_MAX_PACKET_BYTES;
+ hi->per_packet_overhead_bytes =
+ /* preamble */ 8 + /* inter frame gap */ 12;
+
+ /* Standard default ethernet MTU. */
+ hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = 9000;
+
+ clib_memcpy (ei->address, address, sizeof (ei->address));
+ vec_free (hi->hw_address);
+ vec_add (hi->hw_address, address, sizeof (ei->address));
+
+ if (error)
+ {
+ pool_put (em->interfaces, ei);
+ return error;
+ }
+ return error;
+}
+
+void
+ethernet_delete_interface (vnet_main_t * vnm, u32 hw_if_index)
+{
+ ethernet_main_t *em = &ethernet_main;
+ ethernet_interface_t *ei;
+ vnet_hw_interface_t *hi;
+ main_intf_t *main_intf;
+ vlan_table_t *vlan_table;
+ u32 idx;
+
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+ ei = pool_elt_at_index (em->interfaces, hi->hw_instance);
+
+ /* Delete vlan mapping table for dot1q and dot1ad. */
+ main_intf = vec_elt_at_index (em->main_intfs, hi->hw_if_index);
+ if (main_intf->dot1q_vlans)
+ {
+ vlan_table = vec_elt_at_index (em->vlan_pool, main_intf->dot1q_vlans);
+ for (idx = 0; idx < ETHERNET_N_VLAN; idx++)
+ {
+ if (vlan_table->vlans[idx].qinqs)
+ {
+ pool_put_index (em->qinq_pool, vlan_table->vlans[idx].qinqs);
+ }
+ }
+ pool_put_index (em->vlan_pool, main_intf->dot1q_vlans);
+ }
+ if (main_intf->dot1ad_vlans)
+ {
+ vlan_table = vec_elt_at_index (em->vlan_pool, main_intf->dot1ad_vlans);
+ for (idx = 0; idx < ETHERNET_N_VLAN; idx++)
+ {
+ if (vlan_table->vlans[idx].qinqs)
+ {
+ pool_put_index (em->qinq_pool, vlan_table->vlans[idx].qinqs);
+ }
+ }
+ pool_put_index (em->vlan_pool, main_intf->dot1ad_vlans);
+ }
+
+ vnet_delete_hw_interface (vnm, hw_if_index);
+ pool_put (em->interfaces, ei);
+}
+
+u32
+ethernet_set_flags (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
+{
+ ethernet_main_t *em = &ethernet_main;
+ vnet_hw_interface_t *hi;
+ ethernet_interface_t *ei;
+
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+
+ ASSERT (hi->hw_class_index == ethernet_hw_interface_class.index);
+
+ ei = pool_elt_at_index (em->interfaces, hi->hw_instance);
+ if (ei->flag_change)
+ return ei->flag_change (vnm, hi, flags);
+ return (u32) ~ 0;
+}
+
+/* Echo packets back to ethernet/l2-input. */
+static uword
+simulated_ethernet_interface_tx (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 n_left_from, n_left_to_next, n_copy, *from, *to_next;
+ u32 next_index = VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT;
+ u32 i, next_node_index, bvi_flag, sw_if_index;
+ u32 n_pkts = 0, n_bytes = 0;
+ u32 thread_index = vm->thread_index;
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_interface_main_t *im = &vnm->interface_main;
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_node_t *loop_node;
+ vlib_buffer_t *b;
+
+ // check tx node index, it is ethernet-input on loopback create
+ // but can be changed to l2-input if loopback is configured as
+ // BVI of a BD (Bridge Domain).
+ loop_node = vec_elt (nm->nodes, node->node_index);
+ next_node_index = loop_node->next_nodes[next_index];
+ bvi_flag = (next_node_index == l2input_node.index) ? 1 : 0;
+
+ n_left_from = frame->n_vectors;
+ from = vlib_frame_args (frame);
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ n_copy = clib_min (n_left_from, n_left_to_next);
+
+ clib_memcpy (to_next, from, n_copy * sizeof (from[0]));
+ n_left_to_next -= n_copy;
+ n_left_from -= n_copy;
+ i = 0;
+ b = vlib_get_buffer (vm, from[i]);
+ sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_TX];
+ while (1)
+ {
+ // Set up RX and TX indices as if received from a real driver
+ // unless loopback is used as a BVI. For BVI case, leave TX index
+ // and update l2_len in packet as required for l2 forwarding path
+ vnet_buffer (b)->sw_if_index[VLIB_RX] = sw_if_index;
+ if (bvi_flag)
+ {
+ vnet_update_l2_len (b);
+ vnet_buffer (b)->sw_if_index[VLIB_TX] = L2INPUT_BVI;
+ }
+ else
+ vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+
+ i++;
+ n_pkts++;
+ n_bytes += vlib_buffer_length_in_chain (vm, b);
+
+ if (i < n_copy)
+ b = vlib_get_buffer (vm, from[i]);
+ else
+ break;
+ }
+ from += n_copy;
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+
+ /* increment TX interface stat */
+ vlib_increment_combined_counter (im->combined_sw_if_counters +
+ VNET_INTERFACE_COUNTER_TX,
+ thread_index, sw_if_index, n_pkts,
+ n_bytes);
+ }
+
+ return n_left_from;
+}
+
+static u8 *
+format_simulated_ethernet_name (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ return format (s, "loop%d", dev_instance);
+}
+
+static clib_error_t *
+simulated_ethernet_admin_up_down (vnet_main_t * vnm, u32 hw_if_index,
+ u32 flags)
+{
+ u32 hw_flags = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ?
+ VNET_HW_INTERFACE_FLAG_LINK_UP : 0;
+ vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags);
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VNET_DEVICE_CLASS (ethernet_simulated_device_class) = {
+ .name = "Loopback",
+ .format_device_name = format_simulated_ethernet_name,
+ .tx_function = simulated_ethernet_interface_tx,
+ .admin_up_down_function = simulated_ethernet_admin_up_down,
+};
+/* *INDENT-ON* */
+
+
+/*
+ * Maintain a bitmap of allocated loopback instance numbers.
+ */
+#define LOOPBACK_MAX_INSTANCE (16 * 1024)
+
+static u32
+loopback_instance_alloc (u8 is_specified, u32 want)
+{
+ ethernet_main_t *em = &ethernet_main;
+
+ /*
+ * Check for dynamically allocaetd instance number.
+ */
+ if (!is_specified)
+ {
+ u32 bit;
+
+ bit = clib_bitmap_first_clear (em->bm_loopback_instances);
+ if (bit >= LOOPBACK_MAX_INSTANCE)
+ {
+ return ~0;
+ }
+ em->bm_loopback_instances = clib_bitmap_set (em->bm_loopback_instances,
+ bit, 1);
+ return bit;
+ }
+
+ /*
+ * In range?
+ */
+ if (want >= LOOPBACK_MAX_INSTANCE)
+ {
+ return ~0;
+ }
+
+ /*
+ * Already in use?
+ */
+ if (clib_bitmap_get (em->bm_loopback_instances, want))
+ {
+ return ~0;
+ }
+
+ /*
+ * Grant allocation request.
+ */
+ em->bm_loopback_instances = clib_bitmap_set (em->bm_loopback_instances,
+ want, 1);
+
+ return want;
+}
+
+static int
+loopback_instance_free (u32 instance)
+{
+ ethernet_main_t *em = &ethernet_main;
+
+ if (instance >= LOOPBACK_MAX_INSTANCE)
+ {
+ return -1;
+ }
+
+ if (clib_bitmap_get (em->bm_loopback_instances, instance) == 0)
+ {
+ return -1;
+ }
+
+ em->bm_loopback_instances = clib_bitmap_set (em->bm_loopback_instances,
+ instance, 0);
+ return 0;
+}
+
+int
+vnet_create_loopback_interface (u32 * sw_if_indexp, u8 * mac_address,
+ u8 is_specified, u32 user_instance)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vlib_main_t *vm = vlib_get_main ();
+ clib_error_t *error;
+ u32 instance;
+ u8 address[6];
+ u32 hw_if_index;
+ vnet_hw_interface_t *hw_if;
+ u32 slot;
+ int rv = 0;
+
+ ASSERT (sw_if_indexp);
+
+ *sw_if_indexp = (u32) ~ 0;
+
+ memset (address, 0, sizeof (address));
+
+ /*
+ * Allocate a loopback instance. Either select on dynamically
+ * or try to use the desired user_instance number.
+ */
+ instance = loopback_instance_alloc (is_specified, user_instance);
+ if (instance == ~0)
+ {
+ return VNET_API_ERROR_INVALID_REGISTRATION;
+ }
+
+ /*
+ * Default MAC address (dead:0000:0000 + instance) is allocated
+ * if zero mac_address is configured. Otherwise, user-configurable MAC
+ * address is programmed on the loopback interface.
+ */
+ if (memcmp (address, mac_address, sizeof (address)))
+ clib_memcpy (address, mac_address, sizeof (address));
+ else
+ {
+ address[0] = 0xde;
+ address[1] = 0xad;
+ address[5] = instance;
+ }
+
+ error = ethernet_register_interface
+ (vnm,
+ ethernet_simulated_device_class.index, instance, address, &hw_if_index,
+ /* flag change */ 0);
+
+ if (error)
+ {
+ rv = VNET_API_ERROR_INVALID_REGISTRATION;
+ clib_error_report (error);
+ return rv;
+ }
+
+ hw_if = vnet_get_hw_interface (vnm, hw_if_index);
+ slot = vlib_node_add_named_next_with_slot
+ (vm, hw_if->tx_node_index,
+ "ethernet-input", VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT);
+ ASSERT (slot == VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT);
+
+ {
+ vnet_sw_interface_t *si = vnet_get_hw_sw_interface (vnm, hw_if_index);
+ *sw_if_indexp = si->sw_if_index;
+ }
+
+ return 0;
+}
+
+static clib_error_t *
+create_simulated_ethernet_interfaces (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ int rv;
+ u32 sw_if_index;
+ u8 mac_address[6];
+ u8 is_specified = 0;
+ u32 user_instance = 0;
+
+ memset (mac_address, 0, sizeof (mac_address));
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "mac %U", unformat_ethernet_address, mac_address))
+ ;
+ if (unformat (input, "instance %d", &user_instance))
+ is_specified = 1;
+ else
+ break;
+ }
+
+ rv = vnet_create_loopback_interface (&sw_if_index, mac_address,
+ is_specified, user_instance);
+
+ if (rv)
+ return clib_error_return (0, "vnet_create_loopback_interface failed");
+
+ vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main (),
+ sw_if_index);
+ return 0;
+}
+
+/*?
+ * Create a loopback interface. Optionally, a MAC Address can be
+ * provided. If not provided, de:ad:00:00:00:<loopId> will be used.
+ *
+ * @cliexpar
+ * The following two command syntaxes are equivalent:
+ * @cliexcmd{loopback create-interface [mac <mac-addr>] [instance <instance>]}
+ * @cliexcmd{create loopback interface [mac <mac-addr>] [instance <instance>]}
+ * Example of how to create a loopback interface:
+ * @cliexcmd{loopback create-interface}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (create_simulated_ethernet_interface_command, static) = {
+ .path = "loopback create-interface",
+ .short_help = "loopback create-interface [mac <mac-addr>] [instance <instance>]",
+ .function = create_simulated_ethernet_interfaces,
+};
+/* *INDENT-ON* */
+
+/*?
+ * Create a loopback interface. Optionally, a MAC Address can be
+ * provided. If not provided, de:ad:00:00:00:<loopId> will be used.
+ *
+ * @cliexpar
+ * The following two command syntaxes are equivalent:
+ * @cliexcmd{loopback create-interface [mac <mac-addr>] [instance <instance>]}
+ * @cliexcmd{create loopback interface [mac <mac-addr>] [instance <instance>]}
+ * Example of how to create a loopback interface:
+ * @cliexcmd{create loopback interface}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (create_loopback_interface_command, static) = {
+ .path = "create loopback interface",
+ .short_help = "create loopback interface [mac <mac-addr>] [instance <instance>]",
+ .function = create_simulated_ethernet_interfaces,
+};
+/* *INDENT-ON* */
+
+ethernet_interface_t *
+ethernet_get_interface (ethernet_main_t * em, u32 hw_if_index)
+{
+ vnet_hw_interface_t *i =
+ vnet_get_hw_interface (vnet_get_main (), hw_if_index);
+ return (i->hw_class_index ==
+ ethernet_hw_interface_class.
+ index ? pool_elt_at_index (em->interfaces, i->hw_instance) : 0);
+}
+
+int
+vnet_delete_loopback_interface (u32 sw_if_index)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_sw_interface_t *si;
+ u32 hw_if_index;
+ vnet_hw_interface_t *hw;
+ u32 instance;
+
+ if (pool_is_free_index (vnm->interface_main.sw_interfaces, sw_if_index))
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+
+ si = vnet_get_sw_interface (vnm, sw_if_index);
+ hw_if_index = si->hw_if_index;
+ hw = vnet_get_hw_interface (vnm, hw_if_index);
+ instance = hw->dev_instance;
+
+ if (loopback_instance_free (instance) < 0)
+ {
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+ }
+
+ ethernet_delete_interface (vnm, hw_if_index);
+
+ return 0;
+}
+
+int
+vnet_delete_sub_interface (u32 sw_if_index)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ int rv = 0;
+
+ if (pool_is_free_index (vnm->interface_main.sw_interfaces, sw_if_index))
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+
+
+ vnet_interface_main_t *im = &vnm->interface_main;
+ vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index);
+
+ if (si->type == VNET_SW_INTERFACE_TYPE_SUB ||
+ si->type == VNET_SW_INTERFACE_TYPE_P2P)
+ {
+ vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index);
+ u64 sup_and_sub_key =
+ ((u64) (si->sup_sw_if_index) << 32) | (u64) si->sub.id;
+
+ hash_unset_mem (im->sw_if_index_by_sup_and_sub, &sup_and_sub_key);
+ vnet_delete_sw_interface (vnm, sw_if_index);
+ }
+ else
+ {
+ rv = VNET_API_ERROR_INVALID_SUB_SW_IF_INDEX;
+ }
+ return rv;
+}
+
+static clib_error_t *
+delete_simulated_ethernet_interfaces (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ int rv;
+ u32 sw_if_index = ~0;
+ vnet_main_t *vnm = vnet_get_main ();
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "intfc %U",
+ unformat_vnet_sw_interface, vnm, &sw_if_index))
+ ;
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0)
+ return clib_error_return (0, "interface not specified");
+
+ rv = vnet_delete_loopback_interface (sw_if_index);
+
+ if (rv)
+ return clib_error_return (0, "vnet_delete_loopback_interface failed");
+
+ return 0;
+}
+
+static clib_error_t *
+delete_sub_interface (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ int rv = 0;
+ u32 sw_if_index = ~0;
+ vnet_main_t *vnm = vnet_get_main ();
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat
+ (input, "%U", unformat_vnet_sw_interface, vnm, &sw_if_index))
+ ;
+ else
+ break;
+ }
+ if (sw_if_index == ~0)
+ return clib_error_return (0, "interface doesn't exist");
+
+ if (pool_is_free_index (vnm->interface_main.sw_interfaces, sw_if_index))
+ rv = VNET_API_ERROR_INVALID_SW_IF_INDEX;
+ else
+ rv = vnet_delete_sub_interface (sw_if_index);
+ if (rv)
+ return clib_error_return (0, "delete_subinterface_interface failed");
+ return 0;
+}
+
+/*?
+ * Delete a loopback interface.
+ *
+ * @cliexpar
+ * The following two command syntaxes are equivalent:
+ * @cliexcmd{loopback delete-interface intfc <interface>}
+ * @cliexcmd{delete loopback interface intfc <interface>}
+ * Example of how to delete a loopback interface:
+ * @cliexcmd{loopback delete-interface intfc loop0}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (delete_simulated_ethernet_interface_command, static) = {
+ .path = "loopback delete-interface",
+ .short_help = "loopback delete-interface intfc <interface>",
+ .function = delete_simulated_ethernet_interfaces,
+};
+/* *INDENT-ON* */
+
+/*?
+ * Delete a loopback interface.
+ *
+ * @cliexpar
+ * The following two command syntaxes are equivalent:
+ * @cliexcmd{loopback delete-interface intfc <interface>}
+ * @cliexcmd{delete loopback interface intfc <interface>}
+ * Example of how to delete a loopback interface:
+ * @cliexcmd{delete loopback interface intfc loop0}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (delete_loopback_interface_command, static) = {
+ .path = "delete loopback interface",
+ .short_help = "delete loopback interface intfc <interface>",
+ .function = delete_simulated_ethernet_interfaces,
+};
+/* *INDENT-ON* */
+
+/*?
+ * Delete a sub-interface.
+ *
+ * @cliexpar
+ * Example of how to delete a sub-interface:
+ * @cliexcmd{delete sub-interface GigabitEthernet0/8/0.200}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (delete_sub_interface_command, static) = {
+ .path = "delete sub-interface",
+ .short_help = "delete sub-interface <interface>",
+ .function = delete_sub_interface,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ethernet/mac_swap.c b/src/vnet/ethernet/mac_swap.c
new file mode 100644
index 00000000..c0fec12e
--- /dev/null
+++ b/src/vnet/ethernet/mac_swap.c
@@ -0,0 +1,397 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vppinfra/error.h>
+#include <vnet/devices/pci/ige.h>
+#include <vnet/devices/pci/ixge.h>
+#include <vnet/devices/pci/ixgev.h>
+
+typedef struct
+{
+ u32 cached_next_index;
+ u32 cached_sw_if_index;
+
+ /* Hash table to map sw_if_index to next node index */
+ uword *next_node_index_by_sw_if_index;
+
+ /* convenience */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} mac_swap_main_t;
+
+typedef struct
+{
+ u8 src[6];
+ u8 dst[6];
+ u32 sw_if_index;
+ u32 next_index;
+} swap_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_swap_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ swap_trace_t *t = va_arg (*args, swap_trace_t *);
+
+ s = format (s, "SWAP: dst now %U src now %U sw_if_index %d next_index %d",
+ format_ethernet_address, t->dst,
+ format_ethernet_address, t->src, t->sw_if_index, t->next_index);
+ return s;
+}
+
+#define foreach_hw_driver_next \
+ _(IP4) \
+ _(IP6) \
+ _(ETHERNET)
+
+mac_swap_main_t mac_swap_main;
+
+static vlib_node_registration_t mac_swap_node;
+
+#define foreach_mac_swap_error \
+_(SWAPS, "mac addresses swapped")
+
+typedef enum
+{
+#define _(sym,str) MAC_SWAP_ERROR_##sym,
+ foreach_mac_swap_error
+#undef _
+ MAC_SWAP_N_ERROR,
+} mac_swap_error_t;
+
+static char *mac_swap_error_strings[] = {
+#define _(sym,string) string,
+ foreach_mac_swap_error
+#undef _
+};
+
+/*
+ * To drop a pkt and increment one of the previous counters:
+ *
+ * set b0->error = error_node->errors[RANDOM_ERROR_SAMPLE];
+ * set next0 to a disposition index bound to "error-drop".
+ *
+ * To manually increment the specific counter MAC_SWAP_ERROR_SAMPLE:
+ *
+ * vlib_node_t *n = vlib_get_node (vm, mac_swap.index);
+ * u32 node_counter_base_index = n->error_heap_index;
+ * vlib_error_main_t * em = &vm->error_main;
+ * em->counters[node_counter_base_index + MAC_SWAP_ERROR_SAMPLE] += 1;
+ *
+ */
+
+typedef enum
+{
+ MAC_SWAP_NEXT_DROP,
+ MAC_SWAP_N_NEXT,
+} mac_swap_next_t;
+
+static uword
+mac_swap_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, *to_next;
+ mac_swap_next_t next_index;
+ mac_swap_main_t *msm = &mac_swap_main;
+ vlib_node_t *n = vlib_get_node (vm, mac_swap_node.index);
+ u32 node_counter_base_index = n->error_heap_index;
+ vlib_error_main_t *em = &vm->error_main;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ u32 next0, next1;
+ u32 sw_if_index0, sw_if_index1;
+ uword *p0, *p1;
+ u64 tmp0a, tmp0b;
+ u64 tmp1a, tmp1b;
+ ethernet_header_t *h0, *h1;
+
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ next0 = msm->cached_next_index;
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+ next1 = msm->cached_next_index;
+
+ if (PREDICT_FALSE (msm->cached_sw_if_index != sw_if_index0))
+ {
+ p0 =
+ hash_get (msm->next_node_index_by_sw_if_index, sw_if_index0);
+ if (p0 == 0)
+ {
+ vnet_hw_interface_t *hw0;
+
+ hw0 = vnet_get_sup_hw_interface (msm->vnet_main,
+ sw_if_index0);
+
+ next0 = vlib_node_add_next (msm->vlib_main,
+ mac_swap_node.index,
+ hw0->output_node_index);
+ hash_set (msm->next_node_index_by_sw_if_index,
+ sw_if_index0, next0);
+ }
+ else
+ next0 = p0[0];
+ msm->cached_sw_if_index = sw_if_index0;
+ msm->cached_next_index = next0;
+ next1 = next0;
+ }
+ if (PREDICT_FALSE (msm->cached_sw_if_index != sw_if_index1))
+ {
+ p1 =
+ hash_get (msm->next_node_index_by_sw_if_index, sw_if_index1);
+ if (p1 == 0)
+ {
+ vnet_hw_interface_t *hw1;
+
+ hw1 = vnet_get_sup_hw_interface (msm->vnet_main,
+ sw_if_index1);
+
+ next1 = vlib_node_add_next (msm->vlib_main,
+ mac_swap_node.index,
+ hw1->output_node_index);
+ hash_set (msm->next_node_index_by_sw_if_index,
+ sw_if_index1, next1);
+ }
+ else
+ next1 = p1[0];
+ msm->cached_sw_if_index = sw_if_index1;
+ msm->cached_next_index = next1;
+ }
+
+ em->counters[node_counter_base_index + MAC_SWAP_ERROR_SWAPS] += 2;
+
+ /* reset buffer so we always point at the MAC hdr */
+ vlib_buffer_reset (b0);
+ vlib_buffer_reset (b1);
+ h0 = vlib_buffer_get_current (b0);
+ h1 = vlib_buffer_get_current (b1);
+
+ /* Swap 2 x src and dst mac addresses using 8-byte load/stores */
+ tmp0a = clib_net_to_host_u64 (((u64 *) (h0->dst_address))[0]);
+ tmp1a = clib_net_to_host_u64 (((u64 *) (h1->dst_address))[0]);
+ tmp0b = clib_net_to_host_u64 (((u64 *) (h0->src_address))[0]);
+ tmp1b = clib_net_to_host_u64 (((u64 *) (h1->src_address))[0]);
+ ((u64 *) (h0->dst_address))[0] = clib_host_to_net_u64 (tmp0b);
+ ((u64 *) (h1->dst_address))[0] = clib_host_to_net_u64 (tmp1b);
+ /* Move the ethertype from "b" to "a" */
+ tmp0a &= ~(0xFFFF);
+ tmp1a &= ~(0xFFFF);
+ tmp0a |= tmp0b & 0xFFFF;
+ ((u64 *) (h0->src_address))[0] = clib_host_to_net_u64 (tmp0a);
+ tmp1a |= tmp1b & 0xFFFF;
+ ((u64 *) (h1->src_address))[0] = clib_host_to_net_u64 (tmp1a);
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ swap_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ clib_memcpy (t->src, h0->src_address, 6);
+ clib_memcpy (t->dst, h0->dst_address, 6);
+ t->sw_if_index = sw_if_index0;
+ t->next_index = next0;
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ swap_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ clib_memcpy (t->src, h1->src_address, 6);
+ clib_memcpy (t->dst, h1->dst_address, 6);
+ t->sw_if_index = sw_if_index1;
+ t->next_index = next1;
+ }
+ }
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+ u32 sw_if_index0;
+ uword *p0;
+ u64 tmp0a, tmp0b;
+ ethernet_header_t *h0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ next0 = msm->cached_next_index;
+
+ if (PREDICT_FALSE (msm->cached_sw_if_index != sw_if_index0))
+ {
+ p0 =
+ hash_get (msm->next_node_index_by_sw_if_index, sw_if_index0);
+ if (p0 == 0)
+ {
+ vnet_hw_interface_t *hw0;
+
+ hw0 = vnet_get_sup_hw_interface (msm->vnet_main,
+ sw_if_index0);
+
+ next0 = vlib_node_add_next (msm->vlib_main,
+ mac_swap_node.index,
+ hw0->output_node_index);
+ hash_set (msm->next_node_index_by_sw_if_index,
+ sw_if_index0, next0);
+ }
+ else
+ next0 = p0[0];
+ msm->cached_sw_if_index = sw_if_index0;
+ msm->cached_next_index = next0;
+ }
+
+ em->counters[node_counter_base_index + MAC_SWAP_ERROR_SWAPS] += 1;
+
+ /* reset buffer so we always point at the MAC hdr */
+ vlib_buffer_reset (b0);
+ h0 = vlib_buffer_get_current (b0);
+
+ /* Exchange src and dst, preserve the ethertype */
+ tmp0a = clib_net_to_host_u64 (((u64 *) (h0->dst_address))[0]);
+ tmp0b = clib_net_to_host_u64 (((u64 *) (h0->src_address))[0]);
+ ((u64 *) (h0->dst_address))[0] = clib_host_to_net_u64 (tmp0b);
+ tmp0a &= ~(0xFFFF);
+ tmp0a |= tmp0b & 0xFFFF;
+ ((u64 *) (h0->src_address))[0] = clib_host_to_net_u64 (tmp0a);
+
+ /* ship it */
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ swap_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+ clib_memcpy (t->src, h0->src_address, 6);
+ clib_memcpy (t->dst, h0->dst_address, 6);
+ t->sw_if_index = sw_if_index0;
+ t->next_index = next0;
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (mac_swap_node,static) = {
+ .function = mac_swap_node_fn,
+ .name = "mac-swap",
+ .vector_size = sizeof (u32),
+ .format_trace = format_swap_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(mac_swap_error_strings),
+ .error_strings = mac_swap_error_strings,
+
+ .n_next_nodes = MAC_SWAP_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [MAC_SWAP_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+mac_swap_init (vlib_main_t * vm)
+{
+ mac_swap_main_t *msm = &mac_swap_main;
+
+ msm->next_node_index_by_sw_if_index = hash_create (0, sizeof (uword));
+ msm->cached_next_index = (u32) ~ 0;
+ msm->cached_sw_if_index = (u32) ~ 0;
+ msm->vlib_main = vm;
+ msm->vnet_main = vnet_get_main ();
+
+ /* Driver RX nodes send pkts here... */
+#define _(a) ixge_set_next_node (IXGE_RX_NEXT_##a##_INPUT, "mac-swap");
+ foreach_hw_driver_next
+#undef _
+#define _(a) ixgev_set_next_node (IXGEV_RX_NEXT_##a##_INPUT, "mac-swap");
+ foreach_hw_driver_next
+#undef _
+#define _(a) ige_set_next_node (IGE_RX_NEXT_##a##_INPUT, "mac-swap");
+ foreach_hw_driver_next
+#undef _
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (mac_swap_init);
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ethernet/node.c b/src/vnet/ethernet/node.c
new file mode 100755
index 00000000..f216216d
--- /dev/null
+++ b/src/vnet/ethernet/node.c
@@ -0,0 +1,1419 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ethernet_node.c: ethernet packet processing
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ethernet/p2p_ethernet.h>
+#include <vppinfra/sparse_vec.h>
+#include <vnet/l2/l2_bvi.h>
+
+
+#define foreach_ethernet_input_next \
+ _ (PUNT, "error-punt") \
+ _ (DROP, "error-drop") \
+ _ (LLC, "llc-input")
+
+typedef enum
+{
+#define _(s,n) ETHERNET_INPUT_NEXT_##s,
+ foreach_ethernet_input_next
+#undef _
+ ETHERNET_INPUT_N_NEXT,
+} ethernet_input_next_t;
+
+typedef struct
+{
+ u8 packet_data[32];
+} ethernet_input_trace_t;
+
+static u8 *
+format_ethernet_input_trace (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ ethernet_input_trace_t *t = va_arg (*va, ethernet_input_trace_t *);
+
+ s = format (s, "%U", format_ethernet_header, t->packet_data);
+
+ return s;
+}
+
+vlib_node_registration_t ethernet_input_node;
+
+typedef enum
+{
+ ETHERNET_INPUT_VARIANT_ETHERNET,
+ ETHERNET_INPUT_VARIANT_ETHERNET_TYPE,
+ ETHERNET_INPUT_VARIANT_NOT_L2,
+} ethernet_input_variant_t;
+
+
+// Parse the ethernet header to extract vlan tags and innermost ethertype
+static_always_inline void
+parse_header (ethernet_input_variant_t variant,
+ vlib_buffer_t * b0,
+ u16 * type,
+ u16 * orig_type,
+ u16 * outer_id, u16 * inner_id, u32 * match_flags)
+{
+ u8 vlan_count;
+
+ if (variant == ETHERNET_INPUT_VARIANT_ETHERNET
+ || variant == ETHERNET_INPUT_VARIANT_NOT_L2)
+ {
+ ethernet_header_t *e0;
+
+ e0 = (void *) (b0->data + b0->current_data);
+
+ vnet_buffer (b0)->l2_hdr_offset = b0->current_data;
+
+ vlib_buffer_advance (b0, sizeof (e0[0]));
+
+ *type = clib_net_to_host_u16 (e0->type);
+ }
+ else if (variant == ETHERNET_INPUT_VARIANT_ETHERNET_TYPE)
+ {
+ // here when prior node was LLC/SNAP processing
+ u16 *e0;
+
+ e0 = (void *) (b0->data + b0->current_data);
+
+ vlib_buffer_advance (b0, sizeof (e0[0]));
+
+ *type = clib_net_to_host_u16 (e0[0]);
+ }
+
+ // save for distinguishing between dot1q and dot1ad later
+ *orig_type = *type;
+
+ // default the tags to 0 (used if there is no corresponding tag)
+ *outer_id = 0;
+ *inner_id = 0;
+
+ *match_flags = SUBINT_CONFIG_VALID | SUBINT_CONFIG_MATCH_0_TAG;
+ vlan_count = 0;
+
+ // check for vlan encaps
+ if (ethernet_frame_is_tagged (*type))
+ {
+ ethernet_vlan_header_t *h0;
+ u16 tag;
+
+ *match_flags = SUBINT_CONFIG_VALID | SUBINT_CONFIG_MATCH_1_TAG;
+
+ h0 = (void *) (b0->data + b0->current_data);
+
+ tag = clib_net_to_host_u16 (h0->priority_cfi_and_id);
+
+ *outer_id = tag & 0xfff;
+ if (0 == *outer_id)
+ *match_flags &= ~SUBINT_CONFIG_MATCH_1_TAG;
+
+ *type = clib_net_to_host_u16 (h0->type);
+
+ vlib_buffer_advance (b0, sizeof (h0[0]));
+ vlan_count = 1;
+
+ if (*type == ETHERNET_TYPE_VLAN)
+ {
+ // Double tagged packet
+ *match_flags = SUBINT_CONFIG_VALID | SUBINT_CONFIG_MATCH_2_TAG;
+
+ h0 = (void *) (b0->data + b0->current_data);
+
+ tag = clib_net_to_host_u16 (h0->priority_cfi_and_id);
+
+ *inner_id = tag & 0xfff;
+
+ *type = clib_net_to_host_u16 (h0->type);
+
+ vlib_buffer_advance (b0, sizeof (h0[0]));
+ vlan_count = 2;
+ if (*type == ETHERNET_TYPE_VLAN)
+ {
+ // More than double tagged packet
+ *match_flags = SUBINT_CONFIG_VALID | SUBINT_CONFIG_MATCH_3_TAG;
+
+ vlib_buffer_advance (b0, sizeof (h0[0]));
+ vlan_count = 3; // "unknown" number, aka, 3-or-more
+ }
+ }
+ }
+ ethernet_buffer_set_vlan_count (b0, vlan_count);
+}
+
+// Determine the subinterface for this packet, given the result of the
+// vlan table lookups and vlan header parsing. Check the most specific
+// matches first.
+static_always_inline void
+identify_subint (vnet_hw_interface_t * hi,
+ vlib_buffer_t * b0,
+ u32 match_flags,
+ main_intf_t * main_intf,
+ vlan_intf_t * vlan_intf,
+ qinq_intf_t * qinq_intf,
+ u32 * new_sw_if_index, u8 * error0, u32 * is_l2)
+{
+ u32 matched;
+
+ matched = eth_identify_subint (hi, b0, match_flags,
+ main_intf, vlan_intf, qinq_intf,
+ new_sw_if_index, error0, is_l2);
+
+ if (matched)
+ {
+
+ // Perform L3 my-mac filter
+ // A unicast packet arriving on an L3 interface must have a dmac matching the interface mac.
+ // This is required for promiscuous mode, else we will forward packets we aren't supposed to.
+ if (!(*is_l2))
+ {
+ ethernet_header_t *e0;
+ e0 = (void *) (b0->data + vnet_buffer (b0)->l2_hdr_offset);
+
+ if (!(ethernet_address_cast (e0->dst_address)))
+ {
+ if (!eth_mac_equal ((u8 *) e0, hi->hw_address))
+ {
+ *error0 = ETHERNET_ERROR_L3_MAC_MISMATCH;
+ }
+ }
+ }
+
+ // Check for down subinterface
+ *error0 = (*new_sw_if_index) != ~0 ? (*error0) : ETHERNET_ERROR_DOWN;
+ }
+}
+
+static_always_inline void
+determine_next_node (ethernet_main_t * em,
+ ethernet_input_variant_t variant,
+ u32 is_l20,
+ u32 type0, vlib_buffer_t * b0, u8 * error0, u8 * next0)
+{
+ if (PREDICT_FALSE (*error0 != ETHERNET_ERROR_NONE))
+ {
+ // some error occurred
+ *next0 = ETHERNET_INPUT_NEXT_DROP;
+ }
+ else if (is_l20)
+ {
+ *next0 = em->l2_next;
+ // record the L2 len and reset the buffer so the L2 header is preserved
+ u32 eth_start = vnet_buffer (b0)->l2_hdr_offset;
+ vnet_buffer (b0)->l2.l2_len = b0->current_data - eth_start;
+ ASSERT (vnet_buffer (b0)->l2.l2_len ==
+ ethernet_buffer_header_size (b0));
+ vlib_buffer_advance (b0, -ethernet_buffer_header_size (b0));
+
+ // check for common IP/MPLS ethertypes
+ }
+ else if (type0 == ETHERNET_TYPE_IP4)
+ {
+ *next0 = em->l3_next.input_next_ip4;
+ }
+ else if (type0 == ETHERNET_TYPE_IP6)
+ {
+ *next0 = em->l3_next.input_next_ip6;
+ }
+ else if (type0 == ETHERNET_TYPE_MPLS)
+ {
+ *next0 = em->l3_next.input_next_mpls;
+
+ }
+ else if (em->redirect_l3)
+ {
+ // L3 Redirect is on, the cached common next nodes will be
+ // pointing to the redirect node, catch the uncommon types here
+ *next0 = em->redirect_l3_next;
+ }
+ else
+ {
+ // uncommon ethertype, check table
+ u32 i0;
+ i0 = sparse_vec_index (em->l3_next.input_next_by_type, type0);
+ *next0 = vec_elt (em->l3_next.input_next_by_type, i0);
+ *error0 =
+ i0 ==
+ SPARSE_VEC_INVALID_INDEX ? ETHERNET_ERROR_UNKNOWN_TYPE : *error0;
+
+ // The table is not populated with LLC values, so check that now.
+ // If variant is variant_ethernet then we came from LLC processing. Don't
+ // go back there; drop instead using by keeping the drop/bad table result.
+ if ((type0 < 0x600) && (variant == ETHERNET_INPUT_VARIANT_ETHERNET))
+ {
+ *next0 = ETHERNET_INPUT_NEXT_LLC;
+ }
+ }
+}
+
+static_always_inline int
+ethernet_frame_is_any_tagged (u16 type0, u16 type1)
+{
+#if __SSE4_2__
+ const __m128i ethertype_mask = _mm_set_epi16 (ETHERNET_TYPE_VLAN,
+ ETHERNET_TYPE_DOT1AD,
+ ETHERNET_TYPE_VLAN_9100,
+ ETHERNET_TYPE_VLAN_9200,
+ /* duplicate for type1 */
+ ETHERNET_TYPE_VLAN,
+ ETHERNET_TYPE_DOT1AD,
+ ETHERNET_TYPE_VLAN_9100,
+ ETHERNET_TYPE_VLAN_9200);
+
+ __m128i r =
+ _mm_set_epi16 (type0, type0, type0, type0, type1, type1, type1, type1);
+ r = _mm_cmpeq_epi16 (ethertype_mask, r);
+ return !_mm_test_all_zeros (r, r);
+#else
+ return ethernet_frame_is_tagged (type0) || ethernet_frame_is_tagged (type1);
+#endif
+}
+
+static_always_inline uword
+ethernet_input_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame,
+ ethernet_input_variant_t variant)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ethernet_main_t *em = &ethernet_main;
+ vlib_node_runtime_t *error_node;
+ u32 n_left_from, next_index, *from, *to_next;
+ u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
+ u32 thread_index = vlib_get_thread_index ();
+ u32 cached_sw_if_index = ~0;
+ u32 cached_is_l2 = 0; /* shut up gcc */
+ vnet_hw_interface_t *hi = NULL; /* used for main interface only */
+
+ if (variant != ETHERNET_INPUT_VARIANT_ETHERNET)
+ error_node = vlib_node_get_runtime (vm, ethernet_input_node.index);
+ else
+ error_node = node;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node,
+ from,
+ n_left_from,
+ sizeof (from[0]),
+ sizeof (ethernet_input_trace_t));
+
+ next_index = node->cached_next_index;
+ stats_sw_if_index = node->runtime_data[0];
+ stats_n_packets = stats_n_bytes = 0;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ u8 next0, next1, error0, error1;
+ u16 type0, orig_type0, type1, orig_type1;
+ u16 outer_id0, inner_id0, outer_id1, inner_id1;
+ u32 match_flags0, match_flags1;
+ u32 old_sw_if_index0, new_sw_if_index0, len0, old_sw_if_index1,
+ new_sw_if_index1, len1;
+ vnet_hw_interface_t *hi0, *hi1;
+ main_intf_t *main_intf0, *main_intf1;
+ vlan_intf_t *vlan_intf0, *vlan_intf1;
+ qinq_intf_t *qinq_intf0, *qinq_intf1;
+ u32 is_l20, is_l21;
+ ethernet_header_t *e0, *e1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *b2, *b3;
+
+ b2 = vlib_get_buffer (vm, from[2]);
+ b3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (b2, STORE);
+ vlib_prefetch_buffer_header (b3, STORE);
+
+ CLIB_PREFETCH (b2->data, sizeof (ethernet_header_t), LOAD);
+ CLIB_PREFETCH (b3->data, sizeof (ethernet_header_t), LOAD);
+ }
+
+ bi0 = from[0];
+ bi1 = from[1];
+ to_next[0] = bi0;
+ to_next[1] = bi1;
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ error0 = error1 = ETHERNET_ERROR_NONE;
+ e0 = vlib_buffer_get_current (b0);
+ type0 = clib_net_to_host_u16 (e0->type);
+ e1 = vlib_buffer_get_current (b1);
+ type1 = clib_net_to_host_u16 (e1->type);
+
+ /* Speed-path for the untagged case */
+ if (PREDICT_TRUE (variant == ETHERNET_INPUT_VARIANT_ETHERNET
+ && !ethernet_frame_is_any_tagged (type0, type1)))
+ {
+ main_intf_t *intf0;
+ subint_config_t *subint0;
+ u32 sw_if_index0, sw_if_index1;
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+ is_l20 = cached_is_l2;
+
+ /* This is probably wholly unnecessary */
+ if (PREDICT_FALSE (sw_if_index0 != sw_if_index1))
+ goto slowpath;
+
+ /* Now sw_if_index0 == sw_if_index1 */
+ if (PREDICT_FALSE (cached_sw_if_index != sw_if_index0))
+ {
+ cached_sw_if_index = sw_if_index0;
+ hi = vnet_get_sup_hw_interface (vnm, sw_if_index0);
+ intf0 = vec_elt_at_index (em->main_intfs, hi->hw_if_index);
+ subint0 = &intf0->untagged_subint;
+ cached_is_l2 = is_l20 = subint0->flags & SUBINT_CONFIG_L2;
+ }
+
+ vnet_buffer (b0)->l2_hdr_offset = b0->current_data;
+ vnet_buffer (b1)->l2_hdr_offset = b1->current_data;
+
+ if (PREDICT_TRUE (is_l20 != 0))
+ {
+ next0 = em->l2_next;
+ vnet_buffer (b0)->l2.l2_len = sizeof (ethernet_header_t);
+ next1 = em->l2_next;
+ vnet_buffer (b1)->l2.l2_len = sizeof (ethernet_header_t);
+ }
+ else
+ {
+ if (!ethernet_address_cast (e0->dst_address) &&
+ (hi->hw_address != 0) &&
+ !eth_mac_equal ((u8 *) e0, hi->hw_address))
+ error0 = ETHERNET_ERROR_L3_MAC_MISMATCH;
+ if (!ethernet_address_cast (e1->dst_address) &&
+ (hi->hw_address != 0) &&
+ !eth_mac_equal ((u8 *) e1, hi->hw_address))
+ error1 = ETHERNET_ERROR_L3_MAC_MISMATCH;
+ determine_next_node (em, variant, 0, type0, b0,
+ &error0, &next0);
+ vlib_buffer_advance (b0, sizeof (ethernet_header_t));
+ determine_next_node (em, variant, 0, type1, b1,
+ &error1, &next1);
+ vlib_buffer_advance (b1, sizeof (ethernet_header_t));
+ }
+ goto ship_it01;
+ }
+
+ /* Slow-path for the tagged case */
+ slowpath:
+ parse_header (variant,
+ b0,
+ &type0,
+ &orig_type0, &outer_id0, &inner_id0, &match_flags0);
+
+ parse_header (variant,
+ b1,
+ &type1,
+ &orig_type1, &outer_id1, &inner_id1, &match_flags1);
+
+ old_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ old_sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+
+ eth_vlan_table_lookups (em,
+ vnm,
+ old_sw_if_index0,
+ orig_type0,
+ outer_id0,
+ inner_id0,
+ &hi0,
+ &main_intf0, &vlan_intf0, &qinq_intf0);
+
+ eth_vlan_table_lookups (em,
+ vnm,
+ old_sw_if_index1,
+ orig_type1,
+ outer_id1,
+ inner_id1,
+ &hi1,
+ &main_intf1, &vlan_intf1, &qinq_intf1);
+
+ identify_subint (hi0,
+ b0,
+ match_flags0,
+ main_intf0,
+ vlan_intf0,
+ qinq_intf0, &new_sw_if_index0, &error0, &is_l20);
+
+ identify_subint (hi1,
+ b1,
+ match_flags1,
+ main_intf1,
+ vlan_intf1,
+ qinq_intf1, &new_sw_if_index1, &error1, &is_l21);
+
+ // Save RX sw_if_index for later nodes
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] =
+ error0 !=
+ ETHERNET_ERROR_NONE ? old_sw_if_index0 : new_sw_if_index0;
+ vnet_buffer (b1)->sw_if_index[VLIB_RX] =
+ error1 !=
+ ETHERNET_ERROR_NONE ? old_sw_if_index1 : new_sw_if_index1;
+
+ // Check if there is a stat to take (valid and non-main sw_if_index for pkt 0 or pkt 1)
+ if (((new_sw_if_index0 != ~0)
+ && (new_sw_if_index0 != old_sw_if_index0))
+ || ((new_sw_if_index1 != ~0)
+ && (new_sw_if_index1 != old_sw_if_index1)))
+ {
+
+ len0 = vlib_buffer_length_in_chain (vm, b0) + b0->current_data
+ - vnet_buffer (b0)->l2_hdr_offset;
+ len1 = vlib_buffer_length_in_chain (vm, b1) + b1->current_data
+ - vnet_buffer (b1)->l2_hdr_offset;
+
+ stats_n_packets += 2;
+ stats_n_bytes += len0 + len1;
+
+ if (PREDICT_FALSE
+ (!(new_sw_if_index0 == stats_sw_if_index
+ && new_sw_if_index1 == stats_sw_if_index)))
+ {
+ stats_n_packets -= 2;
+ stats_n_bytes -= len0 + len1;
+
+ if (new_sw_if_index0 != old_sw_if_index0
+ && new_sw_if_index0 != ~0)
+ vlib_increment_combined_counter (vnm->
+ interface_main.combined_sw_if_counters
+ +
+ VNET_INTERFACE_COUNTER_RX,
+ thread_index,
+ new_sw_if_index0, 1,
+ len0);
+ if (new_sw_if_index1 != old_sw_if_index1
+ && new_sw_if_index1 != ~0)
+ vlib_increment_combined_counter (vnm->
+ interface_main.combined_sw_if_counters
+ +
+ VNET_INTERFACE_COUNTER_RX,
+ thread_index,
+ new_sw_if_index1, 1,
+ len1);
+
+ if (new_sw_if_index0 == new_sw_if_index1)
+ {
+ if (stats_n_packets > 0)
+ {
+ vlib_increment_combined_counter
+ (vnm->interface_main.combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_RX,
+ thread_index,
+ stats_sw_if_index,
+ stats_n_packets, stats_n_bytes);
+ stats_n_packets = stats_n_bytes = 0;
+ }
+ stats_sw_if_index = new_sw_if_index0;
+ }
+ }
+ }
+
+ if (variant == ETHERNET_INPUT_VARIANT_NOT_L2)
+ is_l20 = is_l21 = 0;
+
+ determine_next_node (em, variant, is_l20, type0, b0, &error0,
+ &next0);
+ determine_next_node (em, variant, is_l21, type1, b1, &error1,
+ &next1);
+
+ ship_it01:
+ b0->error = error_node->errors[error0];
+ b1->error = error_node->errors[error1];
+
+ // verify speculative enqueue
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, bi1, next0,
+ next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u8 error0, next0;
+ u16 type0, orig_type0;
+ u16 outer_id0, inner_id0;
+ u32 match_flags0;
+ u32 old_sw_if_index0, new_sw_if_index0, len0;
+ vnet_hw_interface_t *hi0;
+ main_intf_t *main_intf0;
+ vlan_intf_t *vlan_intf0;
+ qinq_intf_t *qinq_intf0;
+ ethernet_header_t *e0;
+ u32 is_l20;
+
+ // Prefetch next iteration
+ if (n_left_from > 1)
+ {
+ vlib_buffer_t *p2;
+
+ p2 = vlib_get_buffer (vm, from[1]);
+ vlib_prefetch_buffer_header (p2, STORE);
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ error0 = ETHERNET_ERROR_NONE;
+ e0 = vlib_buffer_get_current (b0);
+ type0 = clib_net_to_host_u16 (e0->type);
+
+ /* Speed-path for the untagged case */
+ if (PREDICT_TRUE (variant == ETHERNET_INPUT_VARIANT_ETHERNET
+ && !ethernet_frame_is_tagged (type0)))
+ {
+ main_intf_t *intf0;
+ subint_config_t *subint0;
+ u32 sw_if_index0;
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ is_l20 = cached_is_l2;
+
+ if (PREDICT_FALSE (cached_sw_if_index != sw_if_index0))
+ {
+ cached_sw_if_index = sw_if_index0;
+ hi = vnet_get_sup_hw_interface (vnm, sw_if_index0);
+ intf0 = vec_elt_at_index (em->main_intfs, hi->hw_if_index);
+ subint0 = &intf0->untagged_subint;
+ cached_is_l2 = is_l20 = subint0->flags & SUBINT_CONFIG_L2;
+ }
+
+ vnet_buffer (b0)->l2_hdr_offset = b0->current_data;
+
+ if (PREDICT_TRUE (is_l20 != 0))
+ {
+ next0 = em->l2_next;
+ vnet_buffer (b0)->l2.l2_len = sizeof (ethernet_header_t);
+ }
+ else
+ {
+ if (!ethernet_address_cast (e0->dst_address) &&
+ (hi->hw_address != 0) &&
+ !eth_mac_equal ((u8 *) e0, hi->hw_address))
+ error0 = ETHERNET_ERROR_L3_MAC_MISMATCH;
+ determine_next_node (em, variant, 0, type0, b0,
+ &error0, &next0);
+ vlib_buffer_advance (b0, sizeof (ethernet_header_t));
+ }
+ goto ship_it0;
+ }
+
+ /* Slow-path for the tagged case */
+ parse_header (variant,
+ b0,
+ &type0,
+ &orig_type0, &outer_id0, &inner_id0, &match_flags0);
+
+ old_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+
+ eth_vlan_table_lookups (em,
+ vnm,
+ old_sw_if_index0,
+ orig_type0,
+ outer_id0,
+ inner_id0,
+ &hi0,
+ &main_intf0, &vlan_intf0, &qinq_intf0);
+
+ identify_subint (hi0,
+ b0,
+ match_flags0,
+ main_intf0,
+ vlan_intf0,
+ qinq_intf0, &new_sw_if_index0, &error0, &is_l20);
+
+ // Save RX sw_if_index for later nodes
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] =
+ error0 !=
+ ETHERNET_ERROR_NONE ? old_sw_if_index0 : new_sw_if_index0;
+
+ // Increment subinterface stats
+ // Note that interface-level counters have already been incremented
+ // prior to calling this function. Thus only subinterface counters
+ // are incremented here.
+ //
+ // Interface level counters include packets received on the main
+ // interface and all subinterfaces. Subinterface level counters
+ // include only those packets received on that subinterface
+ // Increment stats if the subint is valid and it is not the main intf
+ if ((new_sw_if_index0 != ~0)
+ && (new_sw_if_index0 != old_sw_if_index0))
+ {
+
+ len0 = vlib_buffer_length_in_chain (vm, b0) + b0->current_data
+ - vnet_buffer (b0)->l2_hdr_offset;
+
+ stats_n_packets += 1;
+ stats_n_bytes += len0;
+
+ // Batch stat increments from the same subinterface so counters
+ // don't need to be incremented for every packet.
+ if (PREDICT_FALSE (new_sw_if_index0 != stats_sw_if_index))
+ {
+ stats_n_packets -= 1;
+ stats_n_bytes -= len0;
+
+ if (new_sw_if_index0 != ~0)
+ vlib_increment_combined_counter
+ (vnm->interface_main.combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_RX,
+ thread_index, new_sw_if_index0, 1, len0);
+ if (stats_n_packets > 0)
+ {
+ vlib_increment_combined_counter
+ (vnm->interface_main.combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_RX,
+ thread_index,
+ stats_sw_if_index, stats_n_packets, stats_n_bytes);
+ stats_n_packets = stats_n_bytes = 0;
+ }
+ stats_sw_if_index = new_sw_if_index0;
+ }
+ }
+
+ if (variant == ETHERNET_INPUT_VARIANT_NOT_L2)
+ is_l20 = 0;
+
+ determine_next_node (em, variant, is_l20, type0, b0, &error0,
+ &next0);
+
+ ship_it0:
+ b0->error = error_node->errors[error0];
+
+ // verify speculative enqueue
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ // Increment any remaining batched stats
+ if (stats_n_packets > 0)
+ {
+ vlib_increment_combined_counter
+ (vnm->interface_main.combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_RX,
+ thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes);
+ node->runtime_data[0] = stats_sw_if_index;
+ }
+
+ return from_frame->n_vectors;
+}
+
+static uword
+ethernet_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * from_frame)
+{
+ return ethernet_input_inline (vm, node, from_frame,
+ ETHERNET_INPUT_VARIANT_ETHERNET);
+}
+
+static uword
+ethernet_input_type (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * from_frame)
+{
+ return ethernet_input_inline (vm, node, from_frame,
+ ETHERNET_INPUT_VARIANT_ETHERNET_TYPE);
+}
+
+static uword
+ethernet_input_not_l2 (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * from_frame)
+{
+ return ethernet_input_inline (vm, node, from_frame,
+ ETHERNET_INPUT_VARIANT_NOT_L2);
+}
+
+
+// Return the subinterface config struct for the given sw_if_index
+// Also return via parameter the appropriate match flags for the
+// configured number of tags.
+// On error (unsupported or not ethernet) return 0.
+static subint_config_t *
+ethernet_sw_interface_get_config (vnet_main_t * vnm,
+ u32 sw_if_index,
+ u32 * flags, u32 * unsupported)
+{
+ ethernet_main_t *em = &ethernet_main;
+ vnet_hw_interface_t *hi;
+ vnet_sw_interface_t *si;
+ main_intf_t *main_intf;
+ vlan_table_t *vlan_table;
+ qinq_table_t *qinq_table;
+ subint_config_t *subint = 0;
+
+ hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
+
+ if (!hi || (hi->hw_class_index != ethernet_hw_interface_class.index))
+ {
+ *unsupported = 0;
+ goto done; // non-ethernet interface
+ }
+
+ // ensure there's an entry for the main intf (shouldn't really be necessary)
+ vec_validate (em->main_intfs, hi->hw_if_index);
+ main_intf = vec_elt_at_index (em->main_intfs, hi->hw_if_index);
+
+ // Locate the subint for the given ethernet config
+ si = vnet_get_sw_interface (vnm, sw_if_index);
+
+ if (si->type == VNET_SW_INTERFACE_TYPE_P2P)
+ {
+ p2p_ethernet_main_t *p2pm = &p2p_main;
+ u32 p2pe_sw_if_index =
+ p2p_ethernet_lookup (hi->hw_if_index, si->p2p.client_mac);
+ if (p2pe_sw_if_index == ~0)
+ {
+ pool_get (p2pm->p2p_subif_pool, subint);
+ si->p2p.pool_index = subint - p2pm->p2p_subif_pool;
+ }
+ else
+ subint = vec_elt_at_index (p2pm->p2p_subif_pool, si->p2p.pool_index);
+ *flags = SUBINT_CONFIG_P2P;
+ }
+ else if (si->sub.eth.flags.default_sub)
+ {
+ subint = &main_intf->default_subint;
+ *flags = SUBINT_CONFIG_MATCH_0_TAG |
+ SUBINT_CONFIG_MATCH_1_TAG |
+ SUBINT_CONFIG_MATCH_2_TAG | SUBINT_CONFIG_MATCH_3_TAG;
+ }
+ else if ((si->sub.eth.flags.no_tags) || (si->sub.eth.raw_flags == 0))
+ {
+ // if no flags are set then this is a main interface
+ // so treat as untagged
+ subint = &main_intf->untagged_subint;
+ *flags = SUBINT_CONFIG_MATCH_0_TAG;
+ }
+ else
+ {
+ // one or two tags
+ // first get the vlan table
+ if (si->sub.eth.flags.dot1ad)
+ {
+ if (main_intf->dot1ad_vlans == 0)
+ {
+ // Allocate a vlan table from the pool
+ pool_get (em->vlan_pool, vlan_table);
+ main_intf->dot1ad_vlans = vlan_table - em->vlan_pool;
+ }
+ else
+ {
+ // Get ptr to existing vlan table
+ vlan_table =
+ vec_elt_at_index (em->vlan_pool, main_intf->dot1ad_vlans);
+ }
+ }
+ else
+ { // dot1q
+ if (main_intf->dot1q_vlans == 0)
+ {
+ // Allocate a vlan table from the pool
+ pool_get (em->vlan_pool, vlan_table);
+ main_intf->dot1q_vlans = vlan_table - em->vlan_pool;
+ }
+ else
+ {
+ // Get ptr to existing vlan table
+ vlan_table =
+ vec_elt_at_index (em->vlan_pool, main_intf->dot1q_vlans);
+ }
+ }
+
+ if (si->sub.eth.flags.one_tag)
+ {
+ *flags = si->sub.eth.flags.exact_match ?
+ SUBINT_CONFIG_MATCH_1_TAG :
+ (SUBINT_CONFIG_MATCH_1_TAG |
+ SUBINT_CONFIG_MATCH_2_TAG | SUBINT_CONFIG_MATCH_3_TAG);
+
+ if (si->sub.eth.flags.outer_vlan_id_any)
+ {
+ // not implemented yet
+ *unsupported = 1;
+ goto done;
+ }
+ else
+ {
+ // a single vlan, a common case
+ subint =
+ &vlan_table->vlans[si->sub.eth.
+ outer_vlan_id].single_tag_subint;
+ }
+
+ }
+ else
+ {
+ // Two tags
+ *flags = si->sub.eth.flags.exact_match ?
+ SUBINT_CONFIG_MATCH_2_TAG :
+ (SUBINT_CONFIG_MATCH_2_TAG | SUBINT_CONFIG_MATCH_3_TAG);
+
+ if (si->sub.eth.flags.outer_vlan_id_any
+ && si->sub.eth.flags.inner_vlan_id_any)
+ {
+ // not implemented yet
+ *unsupported = 1;
+ goto done;
+ }
+
+ if (si->sub.eth.flags.inner_vlan_id_any)
+ {
+ // a specific outer and "any" inner
+ // don't need a qinq table for this
+ subint =
+ &vlan_table->vlans[si->sub.eth.
+ outer_vlan_id].inner_any_subint;
+ if (si->sub.eth.flags.exact_match)
+ {
+ *flags = SUBINT_CONFIG_MATCH_2_TAG;
+ }
+ else
+ {
+ *flags = SUBINT_CONFIG_MATCH_2_TAG |
+ SUBINT_CONFIG_MATCH_3_TAG;
+ }
+ }
+ else
+ {
+ // a specific outer + specifc innner vlan id, a common case
+
+ // get the qinq table
+ if (vlan_table->vlans[si->sub.eth.outer_vlan_id].qinqs == 0)
+ {
+ // Allocate a qinq table from the pool
+ pool_get (em->qinq_pool, qinq_table);
+ vlan_table->vlans[si->sub.eth.outer_vlan_id].qinqs =
+ qinq_table - em->qinq_pool;
+ }
+ else
+ {
+ // Get ptr to existing qinq table
+ qinq_table =
+ vec_elt_at_index (em->qinq_pool,
+ vlan_table->vlans[si->sub.
+ eth.outer_vlan_id].
+ qinqs);
+ }
+ subint = &qinq_table->vlans[si->sub.eth.inner_vlan_id].subint;
+ }
+ }
+ }
+
+done:
+ return subint;
+}
+
+clib_error_t *
+ethernet_sw_interface_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
+{
+ subint_config_t *subint;
+ u32 dummy_flags;
+ u32 dummy_unsup;
+ clib_error_t *error = 0;
+
+ // Find the config for this subinterface
+ subint =
+ ethernet_sw_interface_get_config (vnm, sw_if_index, &dummy_flags,
+ &dummy_unsup);
+
+ if (subint == 0)
+ {
+ // not implemented yet or not ethernet
+ goto done;
+ }
+
+ subint->sw_if_index =
+ ((flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ? sw_if_index : ~0);
+
+done:
+ return error;
+}
+
+VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ethernet_sw_interface_up_down);
+
+
+// Set the L2/L3 mode for the subinterface
+void
+ethernet_sw_interface_set_l2_mode (vnet_main_t * vnm, u32 sw_if_index, u32 l2)
+{
+ subint_config_t *subint;
+ u32 dummy_flags;
+ u32 dummy_unsup;
+ int is_port;
+ vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, sw_if_index);
+
+ is_port = !(sw->type == VNET_SW_INTERFACE_TYPE_SUB);
+
+ // Find the config for this subinterface
+ subint =
+ ethernet_sw_interface_get_config (vnm, sw_if_index, &dummy_flags,
+ &dummy_unsup);
+
+ if (subint == 0)
+ {
+ // unimplemented or not ethernet
+ goto done;
+ }
+
+ // Double check that the config we found is for our interface (or the interface is down)
+ ASSERT ((subint->sw_if_index == sw_if_index) | (subint->sw_if_index == ~0));
+
+ if (l2)
+ {
+ subint->flags |= SUBINT_CONFIG_L2;
+ if (is_port)
+ subint->flags |=
+ SUBINT_CONFIG_MATCH_0_TAG | SUBINT_CONFIG_MATCH_1_TAG
+ | SUBINT_CONFIG_MATCH_2_TAG | SUBINT_CONFIG_MATCH_3_TAG;
+ }
+ else
+ {
+ subint->flags &= ~SUBINT_CONFIG_L2;
+ if (is_port)
+ subint->flags &=
+ ~(SUBINT_CONFIG_MATCH_1_TAG | SUBINT_CONFIG_MATCH_2_TAG
+ | SUBINT_CONFIG_MATCH_3_TAG);
+ }
+
+done:
+ return;
+}
+
+/*
+ * Set the L2/L3 mode for the subinterface regardless of port
+ */
+void
+ethernet_sw_interface_set_l2_mode_noport (vnet_main_t * vnm,
+ u32 sw_if_index, u32 l2)
+{
+ subint_config_t *subint;
+ u32 dummy_flags;
+ u32 dummy_unsup;
+
+ /* Find the config for this subinterface */
+ subint =
+ ethernet_sw_interface_get_config (vnm, sw_if_index, &dummy_flags,
+ &dummy_unsup);
+
+ if (subint == 0)
+ {
+ /* unimplemented or not ethernet */
+ goto done;
+ }
+
+ /*
+ * Double check that the config we found is for our interface (or the
+ * interface is down)
+ */
+ ASSERT ((subint->sw_if_index == sw_if_index) | (subint->sw_if_index == ~0));
+
+ if (l2)
+ {
+ subint->flags |= SUBINT_CONFIG_L2;
+ }
+ else
+ {
+ subint->flags &= ~SUBINT_CONFIG_L2;
+ }
+
+done:
+ return;
+}
+
+static clib_error_t *
+ethernet_sw_interface_add_del (vnet_main_t * vnm,
+ u32 sw_if_index, u32 is_create)
+{
+ clib_error_t *error = 0;
+ subint_config_t *subint;
+ u32 match_flags;
+ u32 unsupported = 0;
+
+ // Find the config for this subinterface
+ subint =
+ ethernet_sw_interface_get_config (vnm, sw_if_index, &match_flags,
+ &unsupported);
+
+ if (subint == 0)
+ {
+ // not implemented yet or not ethernet
+ if (unsupported)
+ {
+ // this is the NYI case
+ error = clib_error_return (0, "not implemented yet");
+ }
+ goto done;
+ }
+
+ if (!is_create)
+ {
+ subint->flags = 0;
+ return error;
+ }
+
+ // Initialize the subint
+ if (subint->flags & SUBINT_CONFIG_VALID)
+ {
+ // Error vlan already in use
+ error = clib_error_return (0, "vlan is already in use");
+ }
+ else
+ {
+ // Note that config is L3 by defaulty
+ subint->flags = SUBINT_CONFIG_VALID | match_flags;
+ subint->sw_if_index = ~0; // because interfaces are initially down
+ }
+
+done:
+ return error;
+}
+
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ethernet_sw_interface_add_del);
+
+static char *ethernet_error_strings[] = {
+#define ethernet_error(n,c,s) s,
+#include "error.def"
+#undef ethernet_error
+};
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ethernet_input_node) = {
+ .function = ethernet_input,
+ .name = "ethernet-input",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+ .n_errors = ETHERNET_N_ERROR,
+ .error_strings = ethernet_error_strings,
+ .n_next_nodes = ETHERNET_INPUT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [ETHERNET_INPUT_NEXT_##s] = n,
+ foreach_ethernet_input_next
+#undef _
+ },
+ .format_buffer = format_ethernet_header_with_length,
+ .format_trace = format_ethernet_input_trace,
+ .unformat_buffer = unformat_ethernet_header,
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_NODE_FUNCTION_MULTIARCH (ethernet_input_node, ethernet_input)
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ethernet_input_type_node, static) = {
+ .function = ethernet_input_type,
+ .name = "ethernet-input-type",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+ .n_next_nodes = ETHERNET_INPUT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [ETHERNET_INPUT_NEXT_##s] = n,
+ foreach_ethernet_input_next
+#undef _
+ },
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_NODE_FUNCTION_MULTIARCH (ethernet_input_type_node, ethernet_input_type)
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ethernet_input_not_l2_node, static) = {
+ .function = ethernet_input_not_l2,
+ .name = "ethernet-input-not-l2",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+ .n_next_nodes = ETHERNET_INPUT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [ETHERNET_INPUT_NEXT_##s] = n,
+ foreach_ethernet_input_next
+#undef _
+ },
+};
+/* *INDENT-ON* */
+
+
+/* *INDENT-OFF* */
+VLIB_NODE_FUNCTION_MULTIARCH (ethernet_input_not_l2_node,
+ ethernet_input_not_l2)
+/* *INDENT-ON* */
+
+
+void
+ethernet_set_rx_redirect (vnet_main_t * vnm,
+ vnet_hw_interface_t * hi, u32 enable)
+{
+ // Insure all packets go to ethernet-input (i.e. untagged ipv4 packets
+ // don't go directly to ip4-input)
+ vnet_hw_interface_rx_redirect_to_node
+ (vnm, hi->hw_if_index, enable ? ethernet_input_node.index : ~0);
+}
+
+
+/*
+ * Initialization and registration for the next_by_ethernet structure
+ */
+
+clib_error_t *
+next_by_ethertype_init (next_by_ethertype_t * l3_next)
+{
+ l3_next->input_next_by_type = sparse_vec_new
+ ( /* elt bytes */ sizeof (l3_next->input_next_by_type[0]),
+ /* bits in index */ BITS (((ethernet_header_t *) 0)->type));
+
+ vec_validate (l3_next->sparse_index_by_input_next_index,
+ ETHERNET_INPUT_NEXT_DROP);
+ vec_validate (l3_next->sparse_index_by_input_next_index,
+ ETHERNET_INPUT_NEXT_PUNT);
+ l3_next->sparse_index_by_input_next_index[ETHERNET_INPUT_NEXT_DROP] =
+ SPARSE_VEC_INVALID_INDEX;
+ l3_next->sparse_index_by_input_next_index[ETHERNET_INPUT_NEXT_PUNT] =
+ SPARSE_VEC_INVALID_INDEX;
+
+ /*
+ * Make sure we don't wipe out an ethernet registration by mistake
+ * Can happen if init function ordering constraints are missing.
+ */
+ if (CLIB_DEBUG > 0)
+ {
+ ethernet_main_t *em = &ethernet_main;
+ ASSERT (em->next_by_ethertype_register_called == 0);
+ }
+
+ return 0;
+}
+
+// Add an ethertype -> next index mapping to the structure
+clib_error_t *
+next_by_ethertype_register (next_by_ethertype_t * l3_next,
+ u32 ethertype, u32 next_index)
+{
+ u32 i;
+ u16 *n;
+ ethernet_main_t *em = &ethernet_main;
+
+ if (CLIB_DEBUG > 0)
+ {
+ ethernet_main_t *em = &ethernet_main;
+ em->next_by_ethertype_register_called = 1;
+ }
+
+ /* Setup ethernet type -> next index sparse vector mapping. */
+ n = sparse_vec_validate (l3_next->input_next_by_type, ethertype);
+ n[0] = next_index;
+
+ /* Rebuild next index -> sparse index inverse mapping when sparse vector
+ is updated. */
+ vec_validate (l3_next->sparse_index_by_input_next_index, next_index);
+ for (i = 1; i < vec_len (l3_next->input_next_by_type); i++)
+ l3_next->
+ sparse_index_by_input_next_index[l3_next->input_next_by_type[i]] = i;
+
+ // do not allow the cached next index's to be updated if L3
+ // redirect is enabled, as it will have overwritten them
+ if (!em->redirect_l3)
+ {
+ // Cache common ethertypes directly
+ if (ethertype == ETHERNET_TYPE_IP4)
+ {
+ l3_next->input_next_ip4 = next_index;
+ }
+ else if (ethertype == ETHERNET_TYPE_IP6)
+ {
+ l3_next->input_next_ip6 = next_index;
+ }
+ else if (ethertype == ETHERNET_TYPE_MPLS)
+ {
+ l3_next->input_next_mpls = next_index;
+ }
+ }
+ return 0;
+}
+
+
+static clib_error_t *
+ethernet_input_init (vlib_main_t * vm)
+{
+ ethernet_main_t *em = &ethernet_main;
+ __attribute__ ((unused)) vlan_table_t *invalid_vlan_table;
+ __attribute__ ((unused)) qinq_table_t *invalid_qinq_table;
+
+ ethernet_setup_node (vm, ethernet_input_node.index);
+ ethernet_setup_node (vm, ethernet_input_type_node.index);
+ ethernet_setup_node (vm, ethernet_input_not_l2_node.index);
+
+ next_by_ethertype_init (&em->l3_next);
+
+ // Initialize pools and vector for vlan parsing
+ vec_validate (em->main_intfs, 10); // 10 main interfaces
+ pool_alloc (em->vlan_pool, 10);
+ pool_alloc (em->qinq_pool, 1);
+
+ // The first vlan pool will always be reserved for an invalid table
+ pool_get (em->vlan_pool, invalid_vlan_table); // first id = 0
+ // The first qinq pool will always be reserved for an invalid table
+ pool_get (em->qinq_pool, invalid_qinq_table); // first id = 0
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (ethernet_input_init);
+
+void
+ethernet_register_input_type (vlib_main_t * vm,
+ ethernet_type_t type, u32 node_index)
+{
+ ethernet_main_t *em = &ethernet_main;
+ ethernet_type_info_t *ti;
+ u32 i;
+
+ {
+ clib_error_t *error = vlib_call_init_function (vm, ethernet_init);
+ if (error)
+ clib_error_report (error);
+ }
+
+ ti = ethernet_get_type_info (em, type);
+ ti->node_index = node_index;
+ ti->next_index = vlib_node_add_next (vm,
+ ethernet_input_node.index, node_index);
+ i = vlib_node_add_next (vm, ethernet_input_type_node.index, node_index);
+ ASSERT (i == ti->next_index);
+
+ i = vlib_node_add_next (vm, ethernet_input_not_l2_node.index, node_index);
+ ASSERT (i == ti->next_index);
+
+ // Add the L3 node for this ethertype to the next nodes structure
+ next_by_ethertype_register (&em->l3_next, type, ti->next_index);
+
+ // Call the registration functions for other nodes that want a mapping
+ l2bvi_register_input_type (vm, type, node_index);
+}
+
+void
+ethernet_register_l2_input (vlib_main_t * vm, u32 node_index)
+{
+ ethernet_main_t *em = &ethernet_main;
+ u32 i;
+
+ em->l2_next =
+ vlib_node_add_next (vm, ethernet_input_node.index, node_index);
+
+ /*
+ * Even if we never use these arcs, we have to align the next indices...
+ */
+ i = vlib_node_add_next (vm, ethernet_input_type_node.index, node_index);
+
+ ASSERT (i == em->l2_next);
+
+ i = vlib_node_add_next (vm, ethernet_input_not_l2_node.index, node_index);
+ ASSERT (i == em->l2_next);
+}
+
+// Register a next node for L3 redirect, and enable L3 redirect
+void
+ethernet_register_l3_redirect (vlib_main_t * vm, u32 node_index)
+{
+ ethernet_main_t *em = &ethernet_main;
+ u32 i;
+
+ em->redirect_l3 = 1;
+ em->redirect_l3_next = vlib_node_add_next (vm,
+ ethernet_input_node.index,
+ node_index);
+ /*
+ * Change the cached next nodes to the redirect node
+ */
+ em->l3_next.input_next_ip4 = em->redirect_l3_next;
+ em->l3_next.input_next_ip6 = em->redirect_l3_next;
+ em->l3_next.input_next_mpls = em->redirect_l3_next;
+
+ /*
+ * Even if we never use these arcs, we have to align the next indices...
+ */
+ i = vlib_node_add_next (vm, ethernet_input_type_node.index, node_index);
+
+ ASSERT (i == em->redirect_l3_next);
+
+ i = vlib_node_add_next (vm, ethernet_input_not_l2_node.index, node_index);
+
+ ASSERT (i == em->redirect_l3_next);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ethernet/p2p_ethernet.api b/src/vnet/ethernet/p2p_ethernet.api
new file mode 100644
index 00000000..8fb66376
--- /dev/null
+++ b/src/vnet/ethernet/p2p_ethernet.api
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2015-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+define p2p_ethernet_add
+{
+ u32 client_index;
+ u32 context;
+ u32 parent_if_index;
+ u32 subif_id;
+ u8 remote_mac[6];
+};
+
+define p2p_ethernet_add_reply
+{
+ u32 context;
+ i32 retval;
+ u32 sw_if_index;
+};
+
+define p2p_ethernet_del
+{
+ u32 client_index;
+ u32 context;
+ u32 parent_if_index;
+ u8 remote_mac[6];
+};
+
+define p2p_ethernet_del_reply
+{
+ u32 context;
+ i32 retval;
+};
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */ \ No newline at end of file
diff --git a/src/vnet/ethernet/p2p_ethernet.c b/src/vnet/ethernet/p2p_ethernet.c
new file mode 100644
index 00000000..cf3c56b5
--- /dev/null
+++ b/src/vnet/ethernet/p2p_ethernet.c
@@ -0,0 +1,276 @@
+/*
+ * p2p_ethernet.c: p2p ethernet
+ *
+ * Copyright (c) 2012 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vppinfra/bihash_16_8.h>
+#include <vnet/vnet.h>
+#include <vnet/ethernet/p2p_ethernet.h>
+#include <vnet/l2/l2_input.h>
+
+p2p_ethernet_main_t p2p_main;
+
+static void
+create_p2pe_key (p2p_key_t * p2pe_key, u32 parent_if_index, u8 * client_mac)
+{
+ clib_memcpy (p2pe_key->mac, client_mac, 6);
+ p2pe_key->pad1 = 0;
+ p2pe_key->hw_if_index = parent_if_index;
+ p2pe_key->pad2 = 0;
+}
+
+u32
+p2p_ethernet_lookup (u32 parent_if_index, u8 * client_mac)
+{
+ p2p_ethernet_main_t *p2pm = &p2p_main;
+ p2p_key_t p2pe_key;
+ uword *p;
+
+ create_p2pe_key (&p2pe_key, parent_if_index, client_mac);
+ p = hash_get_mem (p2pm->p2p_ethernet_by_key, &p2pe_key);
+ if (p)
+ return p[0];
+
+ return ~0;
+}
+
+int
+p2p_ethernet_add_del (vlib_main_t * vm, u32 parent_if_index,
+ u8 * client_mac, u32 p2pe_subif_id, int is_add,
+ u32 * p2pe_if_index)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ p2p_ethernet_main_t *p2pm = &p2p_main;
+ vnet_interface_main_t *im = &vnm->interface_main;
+
+ u32 p2pe_sw_if_index = ~0;
+ p2pe_sw_if_index = p2p_ethernet_lookup (parent_if_index, client_mac);
+
+ if (p2pe_if_index)
+ *p2pe_if_index = ~0;
+
+ if (is_add)
+ {
+ if (p2pe_sw_if_index == ~0)
+ {
+ vnet_hw_interface_t *hi;
+
+ hi = vnet_get_hw_interface (vnm, parent_if_index);
+ if (hi->bond_info == VNET_HW_INTERFACE_BOND_INFO_SLAVE)
+ return VNET_API_ERROR_BOND_SLAVE_NOT_ALLOWED;
+
+ u64 sup_and_sub_key =
+ ((u64) (hi->sw_if_index) << 32) | (u64) p2pe_subif_id;
+ uword *p;
+ p = hash_get_mem (im->sw_if_index_by_sup_and_sub, &sup_and_sub_key);
+ if (p)
+ {
+ if (CLIB_DEBUG > 0)
+ clib_warning
+ ("p2p ethernet sub-interface on sw_if_index %d with sub id %d already exists\n",
+ hi->sw_if_index, p2pe_subif_id);
+ return VNET_API_ERROR_SUBIF_ALREADY_EXISTS;
+ }
+ vnet_sw_interface_t template = {
+ .type = VNET_SW_INTERFACE_TYPE_P2P,
+ .flood_class = VNET_FLOOD_CLASS_NORMAL,
+ .sup_sw_if_index = hi->sw_if_index,
+ .sub.id = p2pe_subif_id
+ };
+
+ clib_memcpy (template.p2p.client_mac, client_mac,
+ sizeof (template.p2p.client_mac));
+
+ if (vnet_create_sw_interface (vnm, &template, &p2pe_sw_if_index))
+ return VNET_API_ERROR_SUBIF_CREATE_FAILED;
+
+ /* Allocate counters for this interface. */
+ {
+ u32 i;
+
+ vnet_interface_counter_lock (im);
+
+ for (i = 0; i < vec_len (im->sw_if_counters); i++)
+ {
+ vlib_validate_simple_counter (&im->sw_if_counters[i],
+ p2pe_sw_if_index);
+ vlib_zero_simple_counter (&im->sw_if_counters[i],
+ p2pe_sw_if_index);
+ }
+
+ for (i = 0; i < vec_len (im->combined_sw_if_counters); i++)
+ {
+ vlib_validate_combined_counter (&im->combined_sw_if_counters
+ [i], p2pe_sw_if_index);
+ vlib_zero_combined_counter (&im->combined_sw_if_counters[i],
+ p2pe_sw_if_index);
+ }
+
+ vnet_interface_counter_unlock (im);
+ }
+
+ vnet_interface_main_t *im = &vnm->interface_main;
+ sup_and_sub_key =
+ ((u64) (hi->sw_if_index) << 32) | (u64) p2pe_subif_id;
+ u64 *kp = clib_mem_alloc (sizeof (*kp));
+
+ *kp = sup_and_sub_key;
+ hash_set (hi->sub_interface_sw_if_index_by_id, p2pe_subif_id,
+ p2pe_sw_if_index);
+ hash_set_mem (im->sw_if_index_by_sup_and_sub, kp, p2pe_sw_if_index);
+
+ p2p_key_t *p_p2pe_key;
+ p_p2pe_key = clib_mem_alloc (sizeof (*p_p2pe_key));
+ create_p2pe_key (p_p2pe_key, parent_if_index, client_mac);
+ hash_set_mem (p2pm->p2p_ethernet_by_key, p_p2pe_key,
+ p2pe_sw_if_index);
+
+ if (p2pe_if_index)
+ *p2pe_if_index = p2pe_sw_if_index;
+
+ vec_validate (p2pm->p2p_ethernet_by_sw_if_index, parent_if_index);
+ if (p2pm->p2p_ethernet_by_sw_if_index[parent_if_index] == 0)
+ {
+ vnet_feature_enable_disable ("device-input",
+ "p2p-ethernet-input",
+ parent_if_index, 1, 0, 0);
+ /* Set promiscuous mode on the l2 interface */
+ ethernet_set_flags (vnm, parent_if_index,
+ ETHERNET_INTERFACE_FLAG_ACCEPT_ALL);
+
+ }
+ p2pm->p2p_ethernet_by_sw_if_index[parent_if_index]++;
+ /* set the interface mode */
+ set_int_l2_mode (vm, vnm, MODE_L3, p2pe_subif_id, 0, 0, 0, 0);
+ return 0;
+ }
+ return VNET_API_ERROR_SUBIF_ALREADY_EXISTS;
+ }
+ else
+ {
+ if (p2pe_sw_if_index == ~0)
+ return VNET_API_ERROR_SUBIF_DOESNT_EXIST;
+ else
+ {
+ int rv = 0;
+ rv = vnet_delete_sub_interface (p2pe_sw_if_index);
+ if (!rv)
+ {
+ vec_validate (p2pm->p2p_ethernet_by_sw_if_index,
+ parent_if_index);
+ if (p2pm->p2p_ethernet_by_sw_if_index[parent_if_index] == 1)
+ {
+ vnet_feature_enable_disable ("device-input",
+ "p2p-ethernet-input",
+ parent_if_index, 0, 0, 0);
+ /* Disable promiscuous mode on the l2 interface */
+ ethernet_set_flags (vnm, parent_if_index, 0);
+ }
+ p2pm->p2p_ethernet_by_sw_if_index[parent_if_index]--;
+
+ /* Remove p2p_ethernet from hash map */
+ p2p_key_t *p_p2pe_key;
+ p_p2pe_key = clib_mem_alloc (sizeof (*p_p2pe_key));
+ create_p2pe_key (p_p2pe_key, parent_if_index, client_mac);
+ hash_unset_mem (p2pm->p2p_ethernet_by_key, p_p2pe_key);
+ }
+ return rv;
+ }
+ }
+}
+
+static clib_error_t *
+vnet_p2p_ethernet_add_del (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+
+ int is_add = 1;
+ int remote_mac = 0;
+ u32 hw_if_index = ~0;
+ u32 sub_id = ~0;
+ u8 client_mac[6];
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat
+ (input, "%U", unformat_vnet_hw_interface, vnm, &hw_if_index))
+ ;
+ else if (unformat (input, "%U", unformat_ethernet_address, &client_mac))
+ remote_mac = 1;
+ else if (unformat (input, "sub-id %d", &sub_id))
+ ;
+ else if (unformat (input, "del"))
+ is_add = 0;
+ else
+ break;
+ }
+
+ if (hw_if_index == ~0)
+ return clib_error_return (0, "Please specify parent interface ...");
+ if (!remote_mac)
+ return clib_error_return (0, "Please specify client MAC address ...");
+ if (sub_id == ~0 && is_add)
+ return clib_error_return (0, "Please specify sub-interface id ...");
+
+ u32 rv;
+ rv = p2p_ethernet_add_del (vm, hw_if_index, client_mac, sub_id, is_add, 0);
+ switch (rv)
+ {
+ case VNET_API_ERROR_BOND_SLAVE_NOT_ALLOWED:
+ return clib_error_return (0,
+ "not allowed as parent interface belongs to a BondEthernet interface");
+ case -1:
+ return clib_error_return (0,
+ "p2p ethernet for given parent interface and client mac already exists");
+ case -2:
+ return clib_error_return (0,
+ "couldn't create p2p ethernet subinterface");
+ case -3:
+ return clib_error_return (0,
+ "p2p ethernet for given parent interface and client mac doesn't exist");
+ default:
+ break;
+ }
+ return 0;
+}
+
+VLIB_CLI_COMMAND (p2p_ethernet_add_del_command, static) =
+{
+.path = "p2p_ethernet ",.function = vnet_p2p_ethernet_add_del,.short_help =
+ "p2p_ethernet <intfc> <mac-address> [sub-id <id> | del]",};
+
+static clib_error_t *
+p2p_ethernet_init (vlib_main_t * vm)
+{
+ p2p_ethernet_main_t *p2pm = &p2p_main;
+
+ p2pm->vlib_main = vm;
+ p2pm->vnet_main = vnet_get_main ();
+ p2pm->p2p_ethernet_by_key =
+ hash_create_mem (0, sizeof (p2p_key_t), sizeof (uword));
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (p2p_ethernet_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ethernet/p2p_ethernet.h b/src/vnet/ethernet/p2p_ethernet.h
new file mode 100644
index 00000000..bb1e2896
--- /dev/null
+++ b/src/vnet/ethernet/p2p_ethernet.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_vnet_p2p_ethernet_h
+#define included_vnet_p2p_ethernet_h
+
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+
+
+typedef struct {
+ /**
+ * Hash mapping parent sw_if_index and client mac address to p2p_ethernet sub-interface
+ */
+ uword * p2p_ethernet_by_key;
+
+ u32 *p2p_ethernet_by_sw_if_index;
+
+ // Pool of p2p subifs;
+ subint_config_t *p2p_subif_pool;
+
+ /* convenience */
+ vlib_main_t * vlib_main;
+ vnet_main_t * vnet_main;
+} p2p_ethernet_main_t;
+
+extern p2p_ethernet_main_t p2p_main;
+
+typedef struct
+{
+ u32 sw_if_index;
+ u32 p2pe_sw_if_index;
+ u8 client_mac[6];
+} p2p_ethernet_trace_t;
+
+/**
+ * @brief Key struct for P2P Ethernet
+ * Key fields: parent sw_if_index and client mac address
+ * all fields in NET byte order
+ */
+
+typedef struct {
+ u8 mac[6];
+ u16 pad1; // padding for u64 mac address
+ u32 hw_if_index;
+ u32 pad2; // padding for u64
+} p2p_key_t;
+
+u32 p2p_ethernet_lookup (u32 parent_sw_if_index, u8* client_mac);
+int p2p_ethernet_add_del (vlib_main_t * vm, u32 parent_if_index, u8 * client_mac, u32 sub_id, int is_add, u32 *p2pe_if_index);
+
+#endif /* included_vnet_p2p_ethernet_h */
diff --git a/src/vnet/ethernet/p2p_ethernet_api.c b/src/vnet/ethernet/p2p_ethernet_api.c
new file mode 100644
index 00000000..f2c730b4
--- /dev/null
+++ b/src/vnet/ethernet/p2p_ethernet_api.c
@@ -0,0 +1,137 @@
+/*
+ *------------------------------------------------------------------
+ * p2p_ethernet_api.c - p2p ethernet api
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+
+#include <vnet/vnet_msg_enum.h>
+#include <vnet/ethernet/p2p_ethernet.h>
+
+#define vl_typedefs /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <vnet/vnet_all_api_h.h>
+#undef vl_printfun
+
+#include <vlibapi/api_helper_macros.h>
+
+#define foreach_vpe_api_msg \
+_(P2P_ETHERNET_ADD, p2p_ethernet_add) \
+_(P2P_ETHERNET_DEL, p2p_ethernet_del)
+
+void
+vl_api_p2p_ethernet_add_t_handler (vl_api_p2p_ethernet_add_t * mp)
+{
+ vl_api_p2p_ethernet_add_reply_t *rmp;
+ vlib_main_t *vm = vlib_get_main ();
+ int rv;
+
+ u32 parent_if_index = htonl (mp->parent_if_index);
+ u32 sub_id = htonl (mp->subif_id);
+ u32 p2pe_if_index;
+ u8 remote_mac[6];
+
+ clib_memcpy (remote_mac, mp->remote_mac, 6);
+ rv =
+ p2p_ethernet_add_del (vm, parent_if_index, remote_mac, sub_id, 1,
+ &p2pe_if_index);
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2(VL_API_P2P_ETHERNET_ADD_REPLY,
+ ({
+ rmp->sw_if_index = htonl(p2pe_if_index);
+ }));
+ /* *INDENT-ON* */
+}
+
+void
+vl_api_p2p_ethernet_del_t_handler (vl_api_p2p_ethernet_del_t * mp)
+{
+ vl_api_p2p_ethernet_del_reply_t *rmp;
+ vlib_main_t *vm = vlib_get_main ();
+ int rv;
+
+ u32 parent_if_index = htonl (mp->parent_if_index);
+ u8 remote_mac[6];
+
+ clib_memcpy (remote_mac, mp->remote_mac, 6);
+ rv = p2p_ethernet_add_del (vm, parent_if_index, remote_mac, ~0, 0, 0);
+
+ REPLY_MACRO (VL_API_P2P_ETHERNET_DEL_REPLY);
+}
+
+/*
+ * p2p_ethernet_api_hookup
+ * Add vpe's API message handlers to the table.
+ * vlib has alread mapped shared memory and
+ * added the client registration handlers.
+ * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process()
+ */
+#define vl_msg_name_crc_list
+#include <vnet/vnet_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (api_main_t * am)
+{
+#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id);
+ foreach_vl_msg_name_crc_p2p_ethernet;
+#undef _
+}
+
+static clib_error_t *
+p2p_ethernet_api_hookup (vlib_main_t * vm)
+{
+ api_main_t *am = &api_main;
+
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_msg;
+#undef _
+
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ setup_message_id_table (am);
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (p2p_ethernet_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ethernet/p2p_ethernet_input.c b/src/vnet/ethernet/p2p_ethernet_input.c
new file mode 100644
index 00000000..eeff4f06
--- /dev/null
+++ b/src/vnet/ethernet/p2p_ethernet_input.c
@@ -0,0 +1,262 @@
+/*
+ * node.c: p2p ethernet vpp node
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vppinfra/error.h>
+
+#include <vnet/ethernet/p2p_ethernet.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+
+vlib_node_registration_t p2p_ethernet_input_node;
+
+/* packet trace format function */
+u8 *
+format_p2p_ethernet_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ p2p_ethernet_trace_t *t = va_arg (*args, p2p_ethernet_trace_t *);
+
+ vnet_main_t *vnm = &vnet_main;
+ s = format (s, "P2P ethernet: %U -> %U",
+ format_vnet_sw_if_index_name, vnm, t->sw_if_index,
+ format_vnet_sw_if_index_name, vnm, t->p2pe_sw_if_index);
+
+ return s;
+}
+
+#define foreach_p2p_ethernet_error \
+_(HITS, "P2P ethernet incoming packets processed")
+
+typedef enum
+{
+#define _(sym,str) P2PE_ERROR_##sym,
+ foreach_p2p_ethernet_error
+#undef _
+ P2PE_N_ERROR,
+} p2p_ethernet_error_t;
+
+static char *p2p_ethernet_error_strings[] = {
+#define _(sym,string) string,
+ foreach_p2p_ethernet_error
+#undef _
+};
+
+static uword
+p2p_ethernet_input_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 thread_index = vlib_get_thread_index ();
+ u32 n_trace = vlib_get_trace_count (vm, node);
+ u32 n_left_from, *from, *to_next;
+ u32 next_index;
+ u32 n_p2p_ethernet_packets = 0;
+ vlib_combined_counter_main_t *cm =
+ vnet_get_main ()->interface_main.combined_sw_if_counters;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ u32 next0 = 0, next1 = 0;
+ u32 sw_if_index0, sw_if_index1;
+ ethernet_header_t *en0, *en1;
+ u32 rx0, rx1;
+
+ bi0 = from[0];
+ bi1 = from[1];
+ to_next[0] = bi0;
+ to_next[1] = bi1;
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ en0 = vlib_buffer_get_current (b0);
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ en1 = vlib_buffer_get_current (b1);
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+
+ vnet_feature_next (sw_if_index0, &next0, b0);
+ vnet_feature_next (sw_if_index1, &next1, b1);
+
+ rx0 = p2p_ethernet_lookup (sw_if_index0, en0->src_address);
+ rx1 = p2p_ethernet_lookup (sw_if_index1, en1->src_address);
+
+ if (rx0 != ~0)
+ {
+ /* Send pkt to p2p_ethernet RX interface */
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = rx0;
+ n_p2p_ethernet_packets += 1;
+
+ if (PREDICT_FALSE (n_trace > 0))
+ {
+ p2p_ethernet_trace_t *t0;
+ vlib_trace_buffer (vm, node, next_index, b0,
+ 1 /* follow_chain */ );
+ vlib_set_trace_count (vm, node, --n_trace);
+ t0 = vlib_add_trace (vm, node, b0, sizeof (*t0));
+ t0->sw_if_index = sw_if_index0;
+ t0->p2pe_sw_if_index = rx0;
+ }
+
+ vlib_increment_combined_counter (cm, thread_index, rx0, 1,
+ vlib_buffer_length_in_chain
+ (vm, b0));
+ }
+ if (rx1 != ~0)
+ {
+ /* Send pkt to p2p_ethernet RX interface */
+ vnet_buffer (b1)->sw_if_index[VLIB_RX] = rx1;
+ n_p2p_ethernet_packets += 1;
+
+ if (PREDICT_FALSE (n_trace > 0))
+ {
+ p2p_ethernet_trace_t *t1;
+ vlib_trace_buffer (vm, node, next_index, b1,
+ 1 /* follow_chain */ );
+ vlib_set_trace_count (vm, node, --n_trace);
+ t1 = vlib_add_trace (vm, node, b1, sizeof (*t1));
+ t1->sw_if_index = sw_if_index1;
+ t1->p2pe_sw_if_index = rx1;
+ }
+
+ vlib_increment_combined_counter (cm, thread_index, rx1, 1,
+ vlib_buffer_length_in_chain
+ (vm, b1));
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi1, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0 = 0;
+ u32 sw_if_index0;
+ ethernet_header_t *en0;
+ u32 rx0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ en0 = vlib_buffer_get_current (b0);
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+
+ vnet_feature_next (sw_if_index0, &next0, b0);
+
+ rx0 = p2p_ethernet_lookup (sw_if_index0, en0->src_address);
+ if (rx0 != ~0)
+ {
+ /* Send pkt to p2p_ethernet RX interface */
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = rx0;
+ n_p2p_ethernet_packets += 1;
+
+ if (PREDICT_FALSE (n_trace > 0))
+ {
+ p2p_ethernet_trace_t *t0;
+ vlib_trace_buffer (vm, node, next_index, b0,
+ 1 /* follow_chain */ );
+ vlib_set_trace_count (vm, node, --n_trace);
+ t0 = vlib_add_trace (vm, node, b0, sizeof (*t0));
+ t0->sw_if_index = sw_if_index0;
+ t0->p2pe_sw_if_index = rx0;
+ }
+
+ vlib_increment_combined_counter (cm, thread_index, rx0, 1,
+ vlib_buffer_length_in_chain
+ (vm, b0));
+ }
+ else
+ {
+ if (PREDICT_FALSE (n_trace > 0))
+ {
+ node->flags |= VLIB_NODE_FLAG_TRACE;
+ }
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, p2p_ethernet_input_node.index,
+ P2PE_ERROR_HITS, n_p2p_ethernet_packets);
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (p2p_ethernet_input_node) = {
+ .function = p2p_ethernet_input_node_fn,
+ .name = "p2p-ethernet-input",
+ .vector_size = sizeof (u32),
+ .format_trace = format_p2p_ethernet_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(p2p_ethernet_error_strings),
+ .error_strings = p2p_ethernet_error_strings,
+
+ .n_next_nodes = 1,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (p2p_ethernet_input_node,
+ p2p_ethernet_input_node_fn)
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ethernet/packet.h b/src/vnet/ethernet/packet.h
new file mode 100644
index 00000000..964cf638
--- /dev/null
+++ b/src/vnet/ethernet/packet.h
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ethernet/packet.h: ethernet packet format.
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_ethernet_packet_h
+#define included_ethernet_packet_h
+
+typedef enum
+{
+#define ethernet_type(n,s) ETHERNET_TYPE_##s = n,
+#include <vnet/ethernet/types.def>
+#undef ethernet_type
+} ethernet_type_t;
+
+typedef struct
+{
+ /* Source/destination address. */
+ u8 dst_address[6];
+ u8 src_address[6];
+
+ /* Ethernet type. */
+ u16 type;
+} ethernet_header_t;
+
+#define ETHERNET_ADDRESS_UNICAST 0
+#define ETHERNET_ADDRESS_MULTICAST 1
+
+/* I/G bit: individual (unicast)/group (broadcast/multicast). */
+always_inline uword
+ethernet_address_cast (u8 * a)
+{
+ return (a[0] >> 0) & 1;
+}
+
+always_inline uword
+ethernet_address_is_locally_administered (u8 * a)
+{
+ return (a[0] >> 1) & 1;
+}
+
+always_inline void
+ethernet_address_set_locally_administered (u8 * a)
+{
+ a[0] |= 1 << 1;
+}
+
+/* For VLAN ethernet type. */
+typedef struct
+{
+ /* 3 bit priority, 1 bit CFI and 12 bit vlan id. */
+ u16 priority_cfi_and_id;
+
+#define ETHERNET_N_VLAN (1 << 12)
+
+ /* Inner ethernet type. */
+ u16 type;
+} ethernet_vlan_header_t;
+
+
+/* VLAN with ethertype first and vlan id second */
+typedef struct
+{
+ /* vlan type */
+ u16 type;
+
+ /* 3 bit priority, 1 bit CFI and 12 bit vlan id. */
+ u16 priority_cfi_and_id;
+} ethernet_vlan_header_tv_t;
+
+/* PBB header with B-TAG - backbone VLAN indicator and I-TAG - service encapsulation */
+typedef struct
+{
+ /* Backbone source/destination address. */
+ u8 b_dst_address[6];
+ u8 b_src_address[6];
+
+ /* B-tag */
+ u16 b_type;
+ /* 3 bit priority, 1 bit DEI and 12 bit vlan id */
+ u16 priority_dei_id;
+
+ /* I-tag */
+ u16 i_type;
+ /* 3 bit priority, 1 bit DEI, 1 bit UCA, 3 bit RES and 24 bit I_SID (service identifier) */
+ u32 priority_dei_uca_res_sid;
+
+#define ETHERNET_N_PBB (1 << 24)
+} ethernet_pbb_header_t;
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct
+{
+ /* Backbone source/destination address. */
+ u8 b_dst_address[6];
+ u8 b_src_address[6];
+
+ /* B-tag */
+ u16 b_type;
+ /* 3 bit priority, 1 bit DEI and 12 bit vlan id */
+ u16 priority_dei_id;
+
+ /* I-tag */
+ u16 i_type;
+ /* 3 bit priority, 1 bit DEI, 1 bit UCA, 3 bit RES and 24 bit I_SID (service identifier) */
+ u32 priority_dei_uca_res_sid;
+}) ethernet_pbb_header_packed_t;
+/* *INDENT-ON* */
+
+#endif /* included_ethernet_packet_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ethernet/pg.c b/src/vnet/ethernet/pg.c
new file mode 100644
index 00000000..67ccfcf5
--- /dev/null
+++ b/src/vnet/ethernet/pg.c
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ethernet_pg.c: packet generator ethernet interface
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ethernet/ethernet.h>
+
+typedef struct
+{
+ pg_edit_t type;
+ pg_edit_t src_address;
+ pg_edit_t dst_address;
+} pg_ethernet_header_t;
+
+static inline void
+pg_ethernet_header_init (pg_ethernet_header_t * e)
+{
+ pg_edit_init (&e->type, ethernet_header_t, type);
+ pg_edit_init (&e->src_address, ethernet_header_t, src_address);
+ pg_edit_init (&e->dst_address, ethernet_header_t, dst_address);
+}
+
+typedef struct
+{
+ pg_edit_t type;
+ pg_edit_t id;
+ pg_edit_t cfi;
+ pg_edit_t priority;
+} pg_ethernet_vlan_header_t;
+
+static inline void
+pg_ethernet_vlan_header_init (pg_ethernet_vlan_header_t * v, int vlan_index)
+{
+ ASSERT (vlan_index < ARRAY_LEN (((ethernet_max_header_t *) 0)->vlan));
+ pg_edit_init (&v->type, ethernet_max_header_t, vlan[vlan_index].type);
+
+ pg_edit_init_bitfield (&v->id, ethernet_max_header_t,
+ vlan[vlan_index].priority_cfi_and_id, 0, 12);
+ pg_edit_init_bitfield (&v->cfi, ethernet_max_header_t,
+ vlan[vlan_index].priority_cfi_and_id, 12, 1);
+ pg_edit_init_bitfield (&v->priority, ethernet_max_header_t,
+ vlan[vlan_index].priority_cfi_and_id, 13, 3);
+}
+
+uword
+unformat_pg_ethernet_header (unformat_input_t * input, va_list * args)
+{
+ pg_stream_t *s = va_arg (*args, pg_stream_t *);
+ pg_ethernet_header_t *e;
+ pg_ethernet_vlan_header_t *v;
+ pg_edit_t *ether_type_edit;
+ u32 n_vlan, error, group_index;
+
+ e = pg_create_edit_group (s, sizeof (e[0]), sizeof (ethernet_header_t),
+ &group_index);
+ pg_ethernet_header_init (e);
+ error = 1;
+
+ if (!unformat (input, "%U: %U -> %U",
+ unformat_pg_edit,
+ unformat_ethernet_type_net_byte_order, &e->type,
+ unformat_pg_edit,
+ unformat_ethernet_address, &e->src_address,
+ unformat_pg_edit,
+ unformat_ethernet_address, &e->dst_address))
+ goto done;
+
+ n_vlan = 0;
+ while (unformat (input, "vlan"))
+ {
+ v = pg_add_edits (s, sizeof (v[0]), sizeof (ethernet_vlan_header_t),
+ group_index);
+ pg_ethernet_vlan_header_init (v, n_vlan);
+
+ if (!unformat_user (input, unformat_pg_edit,
+ unformat_pg_number, &v->id))
+ goto done;
+
+ if (!unformat (input, "priority %U", unformat_pg_edit,
+ unformat_pg_number, &v->priority))
+ pg_edit_set_fixed (&v->priority, 0);
+
+ if (!unformat (input, "cfi %U", unformat_pg_edit,
+ unformat_pg_number, &v->cfi))
+ pg_edit_set_fixed (&v->cfi, 0);
+
+ /* Too many vlans given. */
+ if (n_vlan >= 2)
+ goto done;
+
+ n_vlan++;
+ }
+
+ /* Address of e may have changed due to vlan edits being added */
+ e = pg_get_edit_group (s, group_index);
+ v = (void *) (e + 1);
+
+ /* Correct types for vlan packets. */
+ ether_type_edit = &e->type;
+ if (n_vlan > 0)
+ {
+ int i;
+
+ ether_type_edit = &v[n_vlan - 1].type;
+ pg_edit_copy_type_and_values (ether_type_edit, &e->type);
+ pg_edit_set_fixed (&e->type, ETHERNET_TYPE_VLAN);
+
+ for (i = 0; i < n_vlan - 1; i++)
+ pg_edit_set_fixed (&v[i].type, ETHERNET_TYPE_VLAN);
+ }
+
+ {
+ ethernet_main_t *em = &ethernet_main;
+ ethernet_type_info_t *ti = 0;
+ pg_node_t *pg_node = 0;
+
+ if (ether_type_edit->type == PG_EDIT_FIXED)
+ {
+ u16 t = *(u16 *) ether_type_edit->values[PG_EDIT_LO];
+ ti = ethernet_get_type_info (em, clib_net_to_host_u16 (t));
+ if (ti && ti->node_index != ~0)
+ pg_node = pg_get_node (ti->node_index);
+ }
+
+ if (pg_node && pg_node->unformat_edit
+ && unformat_user (input, pg_node->unformat_edit, s))
+ ;
+ else if (!unformat_user (input, unformat_pg_payload, s))
+ goto done;
+ }
+
+ error = 0;
+
+done:
+ if (error)
+ pg_free_edit_group (s);
+ return error == 0;
+}
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ethernet/sfp.c b/src/vnet/ethernet/sfp.c
new file mode 100644
index 00000000..624740e3
--- /dev/null
+++ b/src/vnet/ethernet/sfp.c
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/ethernet/sfp.h>
+
+static u8 *
+format_space_terminated (u8 * s, va_list * args)
+{
+ u32 l = va_arg (*args, u32);
+ u8 *v = va_arg (*args, u8 *);
+ u8 *p;
+
+ for (p = v + l - 1; p >= v && p[0] == ' '; p--)
+ ;
+ vec_add (s, v, clib_min (p - v + 1, l));
+ return s;
+}
+
+static u8 *
+format_sfp_id (u8 * s, va_list * args)
+{
+ u32 id = va_arg (*args, u32);
+ char *t = 0;
+ switch (id)
+ {
+#define _(f) case SFP_ID_##f: t = #f; break;
+ foreach_sfp_id
+#undef _
+ default:
+ return format (s, "unknown 0x%x", id);
+ }
+ return format (s, "%s", t);
+}
+
+static u8 *
+format_sfp_compatibility (u8 * s, va_list * args)
+{
+ u32 c = va_arg (*args, u32);
+ char *t = 0;
+ switch (c)
+ {
+#define _(a,b,f) case SFP_COMPATIBILITY_##f: t = #f; break;
+ foreach_sfp_compatibility
+#undef _
+ default:
+ return format (s, "unknown 0x%x", c);
+ }
+ return format (s, "%s", t);
+}
+
+u32
+sfp_is_comatible (sfp_eeprom_t * e, sfp_compatibility_t c)
+{
+ static struct
+ {
+ u8 byte, bit;
+ } t[] =
+ {
+#define _(a,b,f) { .byte = a, .bit = b, },
+ foreach_sfp_compatibility
+#undef _
+ };
+
+ ASSERT (c < ARRAY_LEN (t));
+ return (e->compatibility[t[c].byte] & (1 << t[c].bit)) != 0;
+}
+
+u8 *
+format_sfp_eeprom (u8 * s, va_list * args)
+{
+ sfp_eeprom_t *e = va_arg (*args, sfp_eeprom_t *);
+ uword indent = format_get_indent (s);
+ int i;
+
+ if (e->id != SFP_ID_sfp)
+ s = format (s, "id %U, ", format_sfp_id, e->id);
+
+ s = format (s, "compatibility:");
+ for (i = 0; i < SFP_N_COMPATIBILITY; i++)
+ if (sfp_is_comatible (e, i))
+ s = format (s, " %U", format_sfp_compatibility, i);
+
+ s = format (s, "\n%Uvendor: %U, part %U",
+ format_white_space, indent,
+ format_space_terminated, sizeof (e->vendor_name),
+ e->vendor_name, format_space_terminated,
+ sizeof (e->vendor_part_number), e->vendor_part_number);
+ s =
+ format (s, "\n%Urevision: %U, serial: %U, date code: %U",
+ format_white_space, indent, format_space_terminated,
+ sizeof (e->vendor_revision), e->vendor_revision,
+ format_space_terminated, sizeof (e->vendor_serial_number),
+ e->vendor_serial_number, format_space_terminated,
+ sizeof (e->vendor_date_code), e->vendor_date_code);
+
+ return s;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ethernet/sfp.h b/src/vnet/ethernet/sfp.h
new file mode 100644
index 00000000..a1ac7997
--- /dev/null
+++ b/src/vnet/ethernet/sfp.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_vnet_optics_sfp_h
+#define included_vnet_optics_sfp_h
+
+#include <vppinfra/format.h>
+
+#define foreach_sfp_id \
+ _ (unknown) \
+ _ (gbic) \
+ _ (on_motherboard) \
+ _ (sfp)
+
+typedef enum
+{
+#define _(f) SFP_ID_##f,
+ foreach_sfp_id
+#undef _
+} sfp_id_t;
+
+typedef struct
+{
+ u8 id;
+ u8 extended_id;
+ u8 connector_type;
+ u8 compatibility[8];
+ u8 encoding;
+ u8 nominal_bit_rate_100mbits_per_sec;
+ u8 reserved13;
+ u8 link_length[5];
+ u8 reserved19;
+ u8 vendor_name[16];
+ u8 reserved36;
+ u8 vendor_oui[3];
+ u8 vendor_part_number[16];
+ u8 vendor_revision[4];
+ /* 16 bit value network byte order. */
+ u8 laser_wavelength_in_nm[2];
+ u8 reserved62;
+ u8 checksum_0_to_62;
+
+ u8 options[2];
+ u8 max_bit_rate_margin_percent;
+ u8 min_bit_rate_margin_percent;
+ u8 vendor_serial_number[16];
+ u8 vendor_date_code[8];
+ u8 reserved92[3];
+ u8 checksum_63_to_94;
+ u8 vendor_specific[32];
+ u8 reserved128[384];
+
+ /* Vendor specific data follows. */
+ u8 vendor_specific1[0];
+} sfp_eeprom_t;
+
+always_inline uword
+sfp_eeprom_is_valid (sfp_eeprom_t * e)
+{
+ int i;
+ u8 sum = 0;
+ for (i = 0; i < 63; i++)
+ sum += ((u8 *) e)[i];
+ return sum == e->checksum_0_to_62;
+}
+
+/* _ (byte_index, bit_index, name) */
+#define foreach_sfp_compatibility \
+ _ (0, 4, 10g_base_sr) \
+ _ (0, 5, 10g_base_lr) \
+ _ (1, 2, oc48_long_reach) \
+ _ (1, 1, oc48_intermediate_reach) \
+ _ (1, 0, oc48_short_reach) \
+ _ (2, 6, oc12_long_reach) \
+ _ (2, 5, oc12_intermediate_reach) \
+ _ (2, 4, oc12_short_reach) \
+ _ (2, 2, oc3_long_reach) \
+ _ (2, 1, oc3_intermediate_reach) \
+ _ (2, 0, oc3_short_reach) \
+ _ (3, 3, 1g_base_t) \
+ _ (3, 2, 1g_base_cx) \
+ _ (3, 1, 1g_base_lx) \
+ _ (3, 0, 1g_base_sx)
+
+typedef enum
+{
+#define _(a,b,f) SFP_COMPATIBILITY_##f,
+ foreach_sfp_compatibility
+#undef _
+ SFP_N_COMPATIBILITY,
+} sfp_compatibility_t;
+
+u32 sfp_is_comatible (sfp_eeprom_t * e, sfp_compatibility_t c);
+
+format_function_t format_sfp_eeprom;
+
+#endif /* included_vnet_optics_sfp_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ethernet/types.def b/src/vnet/ethernet/types.def
new file mode 100644
index 00000000..c7a47221
--- /dev/null
+++ b/src/vnet/ethernet/types.def
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/* Emacs editing mode -*-C-*- Ethernet types. */
+
+/*
+ * ethernet types
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* Types < 0x600 (1536) are LLC packet lengths. */
+ethernet_type (0x600, LLC_LENGTH)
+
+ethernet_type (0x600, XNS_IDP)
+ethernet_type (0x800, IP4)
+ethernet_type (0x806, ARP)
+ethernet_type (0x0BAD, VINES_IP)
+ethernet_type (0x0BAE, VINES_LOOPBACK)
+ethernet_type (0x0BAF, VINES_ECHO)
+ethernet_type (0x1984, TRAIN)
+ethernet_type (0x2000, CDP)
+ethernet_type (0x2001, CGMP)
+ethernet_type (0x2007, SRP_CONTROL)
+ethernet_type (0x2452, CENTRINO_PROMISC)
+ethernet_type (0x6000, DECNET)
+ethernet_type (0x6001, DECNET_DUMP_LOAD)
+ethernet_type (0x6002, DECNET_REMOTE_CONSOLE)
+ethernet_type (0x6003, DECNET_ROUTE)
+ethernet_type (0x6004, DEC_LAT)
+ethernet_type (0x6005, DEC_DIAGNOSTIC)
+ethernet_type (0x6006, DEC_CUSTOMER)
+ethernet_type (0x6007, DEC_SCA)
+ethernet_type (0x6558, TRANSPARENT_BRIDGING)
+ethernet_type (0x6559, RAW_FRAME_RELAY)
+ethernet_type (0x8035, REVERSE_ARP)
+ethernet_type (0x8038, DEC_LAN_BRIDGE)
+ethernet_type (0x803D, DEC_ETHERNET_ENCRYPTION)
+ethernet_type (0x803F, DEC_LAN_TRAFFIC_MONITOR)
+ethernet_type (0x8041, DEC_LAST)
+ethernet_type (0x809B, APPLETALK)
+ethernet_type (0x80D5, IBM_SNA)
+ethernet_type (0x80F3, APPLETALK_AARP)
+ethernet_type (0x80FF, WELLFLEET_COMPRESSION)
+ethernet_type (0x8100, VLAN)
+ethernet_type (0x8137, IPX)
+ethernet_type (0x814C, SNMP)
+ethernet_type (0x81FD, CABLETRON_ISMP)
+ethernet_type (0x81FF, CABLETRON_ISMP_TBFLOOD)
+ethernet_type (0x86DD, IP6)
+ethernet_type (0x86DF, ATOMIC)
+ethernet_type (0x876B, TCP_IP_COMPRESSION)
+ethernet_type (0x876C, IP_AUTONOMOUS_SYSTEMS)
+ethernet_type (0x876D, SECURE_DATA)
+ethernet_type (0x8808, MAC_CONTROL)
+ethernet_type (0x8809, SLOW_PROTOCOLS)
+ethernet_type (0x880B, PPP)
+ethernet_type (0x8847, MPLS)
+ethernet_type (0x8848, MPLS_UPSTREAM_ASSIGNED)
+ethernet_type (0x8863, PPPOE_DISCOVERY)
+ethernet_type (0x8864, PPPOE_SESSION)
+ethernet_type (0x886D, INTEL_ANS)
+ethernet_type (0x886F, MICROSOFT_NLB_HEARTBEAT)
+ethernet_type (0x8881, CDMA_2000)
+ethernet_type (0x888e, 802_1X_AUTHENTICATION)
+ethernet_type (0x8892, PROFINET)
+ethernet_type (0x889a, HYPERSCSI)
+ethernet_type (0x88a2, AOE)
+ethernet_type (0x88a8, DOT1AD)
+ethernet_type (0x88AE, BRDWALK)
+ethernet_type (0x88B7, 802_OUI_EXTENDED)
+ethernet_type (0x88c7, 802_11I_PRE_AUTHENTICATION)
+ethernet_type (0x88cc, 802_1_LLDP)
+ethernet_type (0x88e7, DOT1AH)
+ethernet_type (0x894f, NSH)
+ethernet_type (0x9000, LOOPBACK)
+ethernet_type (0x9021, RTNET_MAC)
+ethernet_type (0x9022, RTNET_CONFIG)
+ethernet_type (0x9100, VLAN_9100)
+ethernet_type (0x9200, VLAN_9200)
+ethernet_type (0x9999, PGLAN)
+ethernet_type (0xFEFE, SRP_ISIS)
+ethernet_type (0xFFFF, RESERVED)