diff options
Diffstat (limited to 'src/vnet/ethernet')
-rw-r--r-- | src/vnet/ethernet/arp.c | 2536 | ||||
-rw-r--r-- | src/vnet/ethernet/arp_packet.h | 180 | ||||
-rw-r--r-- | src/vnet/ethernet/dir.dox | 24 | ||||
-rw-r--r-- | src/vnet/ethernet/error.def | 46 | ||||
-rw-r--r-- | src/vnet/ethernet/ethernet.h | 577 | ||||
-rw-r--r-- | src/vnet/ethernet/format.c | 348 | ||||
-rw-r--r-- | src/vnet/ethernet/init.c | 128 | ||||
-rw-r--r-- | src/vnet/ethernet/interface.c | 880 | ||||
-rw-r--r-- | src/vnet/ethernet/mac_swap.c | 397 | ||||
-rwxr-xr-x | src/vnet/ethernet/node.c | 1419 | ||||
-rw-r--r-- | src/vnet/ethernet/p2p_ethernet.api | 50 | ||||
-rw-r--r-- | src/vnet/ethernet/p2p_ethernet.c | 276 | ||||
-rw-r--r-- | src/vnet/ethernet/p2p_ethernet.h | 63 | ||||
-rw-r--r-- | src/vnet/ethernet/p2p_ethernet_api.c | 137 | ||||
-rw-r--r-- | src/vnet/ethernet/p2p_ethernet_input.c | 262 | ||||
-rw-r--r-- | src/vnet/ethernet/packet.h | 152 | ||||
-rw-r--r-- | src/vnet/ethernet/pg.c | 183 | ||||
-rw-r--r-- | src/vnet/ethernet/sfp.c | 117 | ||||
-rw-r--r-- | src/vnet/ethernet/sfp.h | 117 | ||||
-rw-r--r-- | src/vnet/ethernet/types.def | 113 |
20 files changed, 8005 insertions, 0 deletions
diff --git a/src/vnet/ethernet/arp.c b/src/vnet/ethernet/arp.c new file mode 100644 index 00000000..52b13e04 --- /dev/null +++ b/src/vnet/ethernet/arp.c @@ -0,0 +1,2536 @@ +/* + * ethernet/arp.c: IP v4 ARP node + * + * Copyright (c) 2010 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/ip/ip.h> +#include <vnet/ip/ip6.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ethernet/arp_packet.h> +#include <vnet/l2/l2_input.h> +#include <vppinfra/mhash.h> +#include <vnet/fib/ip4_fib.h> +#include <vnet/fib/fib_entry_src.h> +#include <vnet/adj/adj_nbr.h> +#include <vnet/adj/adj_mcast.h> +#include <vnet/mpls/mpls.h> + +/** + * @file + * @brief IPv4 ARP. + * + * This file contains code to manage the IPv4 ARP tables (IP Address + * to MAC Address lookup). + */ + + +void vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length); + +/** + * @brief Per-interface ARP configuration and state + */ +typedef struct ethernet_arp_interface_t_ +{ + /** + * Hash table of ARP entries. + * Since this hash table is per-interface, the key is only the IPv4 address. + */ + uword *arp_entries; +} ethernet_arp_interface_t; + +typedef struct +{ + u32 lo_addr; + u32 hi_addr; + u32 fib_index; +} ethernet_proxy_arp_t; + +typedef struct +{ + u32 next_index; + uword node_index; + uword type_opaque; + uword data; + /* Used for arp event notification only */ + void *data_callback; + u32 pid; +} pending_resolution_t; + +typedef struct +{ + /* Hash tables mapping name to opcode. */ + uword *opcode_by_name; + + /* lite beer "glean" adjacency handling */ + uword *pending_resolutions_by_address; + pending_resolution_t *pending_resolutions; + + /* Mac address change notification */ + uword *mac_changes_by_address; + pending_resolution_t *mac_changes; + + ethernet_arp_ip4_entry_t *ip4_entry_pool; + + /* ARP attack mitigation */ + u32 arp_delete_rotor; + u32 limit_arp_cache_size; + + /** Per interface state */ + ethernet_arp_interface_t *ethernet_arp_by_sw_if_index; + + /* Proxy arp vector */ + ethernet_proxy_arp_t *proxy_arps; + + uword wc_ip4_arp_publisher_node; + uword wc_ip4_arp_publisher_et; +} ethernet_arp_main_t; + +static ethernet_arp_main_t ethernet_arp_main; + +typedef struct +{ + u32 sw_if_index; + ethernet_arp_ip4_over_ethernet_address_t a; + int is_static; + int is_no_fib_entry; + int flags; +#define ETHERNET_ARP_ARGS_REMOVE (1<<0) +#define ETHERNET_ARP_ARGS_FLUSH (1<<1) +#define ETHERNET_ARP_ARGS_POPULATE (1<<2) +#define ETHERNET_ARP_ARGS_WC_PUB (1<<3) +} vnet_arp_set_ip4_over_ethernet_rpc_args_t; + +static const u8 vrrp_prefix[] = { 0x00, 0x00, 0x5E, 0x00, 0x01 }; + +/* Node index for send_garp_na_process */ +u32 send_garp_na_process_node_index; + +static void +set_ip4_over_ethernet_rpc_callback (vnet_arp_set_ip4_over_ethernet_rpc_args_t + * a); + +static u8 * +format_ethernet_arp_hardware_type (u8 * s, va_list * va) +{ + ethernet_arp_hardware_type_t h = va_arg (*va, ethernet_arp_hardware_type_t); + char *t = 0; + switch (h) + { +#define _(n,f) case n: t = #f; break; + foreach_ethernet_arp_hardware_type; +#undef _ + + default: + return format (s, "unknown 0x%x", h); + } + + return format (s, "%s", t); +} + +static u8 * +format_ethernet_arp_opcode (u8 * s, va_list * va) +{ + ethernet_arp_opcode_t o = va_arg (*va, ethernet_arp_opcode_t); + char *t = 0; + switch (o) + { +#define _(f) case ETHERNET_ARP_OPCODE_##f: t = #f; break; + foreach_ethernet_arp_opcode; +#undef _ + + default: + return format (s, "unknown 0x%x", o); + } + + return format (s, "%s", t); +} + +static uword +unformat_ethernet_arp_opcode_host_byte_order (unformat_input_t * input, + va_list * args) +{ + int *result = va_arg (*args, int *); + ethernet_arp_main_t *am = ðernet_arp_main; + int x, i; + + /* Numeric opcode. */ + if (unformat (input, "0x%x", &x) || unformat (input, "%d", &x)) + { + if (x >= (1 << 16)) + return 0; + *result = x; + return 1; + } + + /* Named type. */ + if (unformat_user (input, unformat_vlib_number_by_name, + am->opcode_by_name, &i)) + { + *result = i; + return 1; + } + + return 0; +} + +static uword +unformat_ethernet_arp_opcode_net_byte_order (unformat_input_t * input, + va_list * args) +{ + int *result = va_arg (*args, int *); + if (!unformat_user + (input, unformat_ethernet_arp_opcode_host_byte_order, result)) + return 0; + + *result = clib_host_to_net_u16 ((u16) * result); + return 1; +} + +static u8 * +format_ethernet_arp_header (u8 * s, va_list * va) +{ + ethernet_arp_header_t *a = va_arg (*va, ethernet_arp_header_t *); + u32 max_header_bytes = va_arg (*va, u32); + uword indent; + u16 l2_type, l3_type; + + if (max_header_bytes != 0 && sizeof (a[0]) > max_header_bytes) + return format (s, "ARP header truncated"); + + l2_type = clib_net_to_host_u16 (a->l2_type); + l3_type = clib_net_to_host_u16 (a->l3_type); + + indent = format_get_indent (s); + + s = format (s, "%U, type %U/%U, address size %d/%d", + format_ethernet_arp_opcode, clib_net_to_host_u16 (a->opcode), + format_ethernet_arp_hardware_type, l2_type, + format_ethernet_type, l3_type, + a->n_l2_address_bytes, a->n_l3_address_bytes); + + if (l2_type == ETHERNET_ARP_HARDWARE_TYPE_ethernet + && l3_type == ETHERNET_TYPE_IP4) + { + s = format (s, "\n%U%U/%U -> %U/%U", + format_white_space, indent, + format_ethernet_address, a->ip4_over_ethernet[0].ethernet, + format_ip4_address, &a->ip4_over_ethernet[0].ip4, + format_ethernet_address, a->ip4_over_ethernet[1].ethernet, + format_ip4_address, &a->ip4_over_ethernet[1].ip4); + } + else + { + uword n2 = a->n_l2_address_bytes; + uword n3 = a->n_l3_address_bytes; + s = format (s, "\n%U%U/%U -> %U/%U", + format_white_space, indent, + format_hex_bytes, a->data + 0 * n2 + 0 * n3, n2, + format_hex_bytes, a->data + 1 * n2 + 0 * n3, n3, + format_hex_bytes, a->data + 1 * n2 + 1 * n3, n2, + format_hex_bytes, a->data + 2 * n2 + 1 * n3, n3); + } + + return s; +} + +u8 * +format_ethernet_arp_ip4_entry (u8 * s, va_list * va) +{ + vnet_main_t *vnm = va_arg (*va, vnet_main_t *); + ethernet_arp_ip4_entry_t *e = va_arg (*va, ethernet_arp_ip4_entry_t *); + vnet_sw_interface_t *si; + u8 *flags = 0; + + if (!e) + return format (s, "%=12s%=16s%=6s%=20s%=24s", "Time", "IP4", + "Flags", "Ethernet", "Interface"); + + si = vnet_get_sw_interface (vnm, e->sw_if_index); + + if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC) + flags = format (flags, "S"); + + if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC) + flags = format (flags, "D"); + + if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_NO_FIB_ENTRY) + flags = format (flags, "N"); + + s = format (s, "%=12U%=16U%=6s%=20U%U", + format_vlib_cpu_time, vnm->vlib_main, e->cpu_time_last_updated, + format_ip4_address, &e->ip4_address, + flags ? (char *) flags : "", + format_ethernet_address, e->ethernet_address, + format_vnet_sw_interface_name, vnm, si); + + vec_free (flags); + return s; +} + +typedef struct +{ + u8 packet_data[64]; +} ethernet_arp_input_trace_t; + +static u8 * +format_ethernet_arp_input_trace (u8 * s, va_list * va) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); + ethernet_arp_input_trace_t *t = va_arg (*va, ethernet_arp_input_trace_t *); + + s = format (s, "%U", + format_ethernet_arp_header, + t->packet_data, sizeof (t->packet_data)); + + return s; +} + +static u8 * +format_arp_term_input_trace (u8 * s, va_list * va) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); + ethernet_arp_input_trace_t *t = va_arg (*va, ethernet_arp_input_trace_t *); + + /* arp-term trace data saved is either arp or ip6/icmp6 packet: + - for arp, the 1st 16-bit field is hw type of value of 0x0001. + - for ip6, the first nibble has value of 6. */ + s = format (s, "%U", t->packet_data[0] == 0 ? + format_ethernet_arp_header : format_ip6_header, + t->packet_data, sizeof (t->packet_data)); + + return s; +} + +static void +arp_nbr_probe (ip_adjacency_t * adj) +{ + vnet_main_t *vnm = vnet_get_main (); + ip4_main_t *im = &ip4_main; + ip_interface_address_t *ia; + ethernet_arp_header_t *h; + vnet_hw_interface_t *hi; + vnet_sw_interface_t *si; + ip4_address_t *src; + vlib_buffer_t *b; + vlib_main_t *vm; + u32 bi = 0; + + vm = vlib_get_main (); + + si = vnet_get_sw_interface (vnm, adj->rewrite_header.sw_if_index); + + if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)) + { + return; + } + + src = + ip4_interface_address_matching_destination (im, + &adj->sub_type.nbr.next_hop. + ip4, + adj->rewrite_header. + sw_if_index, &ia); + if (!src) + { + return; + } + + h = + vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, + &bi); + + hi = vnet_get_sup_hw_interface (vnm, adj->rewrite_header.sw_if_index); + + clib_memcpy (h->ip4_over_ethernet[0].ethernet, + hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet)); + + h->ip4_over_ethernet[0].ip4 = src[0]; + h->ip4_over_ethernet[1].ip4 = adj->sub_type.nbr.next_hop.ip4; + + b = vlib_get_buffer (vm, bi); + vnet_buffer (b)->sw_if_index[VLIB_RX] = + vnet_buffer (b)->sw_if_index[VLIB_TX] = adj->rewrite_header.sw_if_index; + + /* Add encapsulation string for software interface (e.g. ethernet header). */ + vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t)); + vlib_buffer_advance (b, -adj->rewrite_header.data_bytes); + + { + vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index); + u32 *to_next = vlib_frame_vector_args (f); + to_next[0] = bi; + f->n_vectors = 1; + vlib_put_frame_to_node (vm, hi->output_node_index, f); + } +} + +static void +arp_mk_complete (adj_index_t ai, ethernet_arp_ip4_entry_t * e) +{ + adj_nbr_update_rewrite + (ai, ADJ_NBR_REWRITE_FLAG_COMPLETE, + ethernet_build_rewrite (vnet_get_main (), + e->sw_if_index, + adj_get_link_type (ai), e->ethernet_address)); +} + +static void +arp_mk_incomplete (adj_index_t ai) +{ + ip_adjacency_t *adj = adj_get (ai); + + adj_nbr_update_rewrite + (ai, + ADJ_NBR_REWRITE_FLAG_INCOMPLETE, + ethernet_build_rewrite (vnet_get_main (), + adj->rewrite_header.sw_if_index, + VNET_LINK_ARP, + VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST)); +} + +static ethernet_arp_ip4_entry_t * +arp_entry_find (ethernet_arp_interface_t * eai, const ip4_address_t * addr) +{ + ethernet_arp_main_t *am = ðernet_arp_main; + ethernet_arp_ip4_entry_t *e = NULL; + uword *p; + + if (NULL != eai->arp_entries) + { + p = hash_get (eai->arp_entries, addr->as_u32); + if (!p) + return (NULL); + + e = pool_elt_at_index (am->ip4_entry_pool, p[0]); + } + + return (e); +} + +static adj_walk_rc_t +arp_mk_complete_walk (adj_index_t ai, void *ctx) +{ + ethernet_arp_ip4_entry_t *e = ctx; + + arp_mk_complete (ai, e); + + return (ADJ_WALK_RC_CONTINUE); +} + +static adj_walk_rc_t +arp_mk_incomplete_walk (adj_index_t ai, void *ctx) +{ + arp_mk_incomplete (ai); + + return (ADJ_WALK_RC_CONTINUE); +} + +void +arp_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai) +{ + ethernet_arp_main_t *am = ðernet_arp_main; + ethernet_arp_interface_t *arp_int; + ethernet_arp_ip4_entry_t *e; + ip_adjacency_t *adj; + + adj = adj_get (ai); + + vec_validate (am->ethernet_arp_by_sw_if_index, sw_if_index); + arp_int = &am->ethernet_arp_by_sw_if_index[sw_if_index]; + e = arp_entry_find (arp_int, &adj->sub_type.nbr.next_hop.ip4); + + switch (adj->lookup_next_index) + { + case IP_LOOKUP_NEXT_ARP: + case IP_LOOKUP_NEXT_GLEAN: + if (NULL != e) + { + adj_nbr_walk_nh4 (sw_if_index, + &e->ip4_address, arp_mk_complete_walk, e); + } + else + { + /* + * no matching ARP entry. + * construct the rewrite required to for an ARP packet, and stick + * that in the adj's pipe to smoke. + */ + adj_nbr_update_rewrite + (ai, + ADJ_NBR_REWRITE_FLAG_INCOMPLETE, + ethernet_build_rewrite + (vnm, + sw_if_index, + VNET_LINK_ARP, + VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST)); + + /* + * since the FIB has added this adj for a route, it makes sense it + * may want to forward traffic sometime soon. Let's send a + * speculative ARP. just one. If we were to do periodically that + * wouldn't be bad either, but that's more code than i'm prepared to + * write at this time for relatively little reward. + */ + arp_nbr_probe (adj); + } + break; + case IP_LOOKUP_NEXT_MCAST: + { + /* + * Construct a partial rewrite from the known ethernet mcast dest MAC + */ + u8 *rewrite; + u8 offset; + + rewrite = ethernet_build_rewrite (vnm, + sw_if_index, + adj->ia_link, + ethernet_ip4_mcast_dst_addr ()); + offset = vec_len (rewrite) - 2; + + /* + * Complete the remaining fields of the adj's rewrite to direct the + * complete of the rewrite at switch time by copying in the IP + * dst address's bytes. + * Ofset is 2 bytes into the MAC desintation address. And we copy 23 bits + * from the address. + */ + adj_mcast_update_rewrite (ai, rewrite, offset, 0x007fffff); + + break; + } + case IP_LOOKUP_NEXT_DROP: + case IP_LOOKUP_NEXT_PUNT: + case IP_LOOKUP_NEXT_LOCAL: + case IP_LOOKUP_NEXT_REWRITE: + case IP_LOOKUP_NEXT_MCAST_MIDCHAIN: + case IP_LOOKUP_NEXT_MIDCHAIN: + case IP_LOOKUP_NEXT_ICMP_ERROR: + case IP_LOOKUP_N_NEXT: + ASSERT (0); + break; + } +} + +static void +arp_adj_fib_add (ethernet_arp_ip4_entry_t * e, u32 fib_index) +{ + fib_prefix_t pfx = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr.ip4 = e->ip4_address, + }; + + e->fib_entry_index = + fib_table_entry_path_add (fib_index, &pfx, FIB_SOURCE_ADJ, + FIB_ENTRY_FLAG_ATTACHED, + DPO_PROTO_IP4, &pfx.fp_addr, + e->sw_if_index, ~0, 1, NULL, + FIB_ROUTE_PATH_FLAG_NONE); + fib_table_lock (fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_ADJ); +} + +static int +vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm, + vnet_arp_set_ip4_over_ethernet_rpc_args_t + * args) +{ + ethernet_arp_ip4_entry_t *e = 0; + ethernet_arp_main_t *am = ðernet_arp_main; + ethernet_arp_ip4_over_ethernet_address_t *a = &args->a; + vlib_main_t *vm = vlib_get_main (); + int make_new_arp_cache_entry = 1; + uword *p; + pending_resolution_t *pr, *mc; + ethernet_arp_interface_t *arp_int; + int is_static = args->is_static; + u32 sw_if_index = args->sw_if_index; + int is_no_fib_entry = args->is_no_fib_entry; + + vec_validate (am->ethernet_arp_by_sw_if_index, sw_if_index); + + arp_int = &am->ethernet_arp_by_sw_if_index[sw_if_index]; + + if (NULL != arp_int->arp_entries) + { + p = hash_get (arp_int->arp_entries, a->ip4.as_u32); + if (p) + { + e = pool_elt_at_index (am->ip4_entry_pool, p[0]); + + /* Refuse to over-write static arp. */ + if (!is_static && (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC)) + return -2; + make_new_arp_cache_entry = 0; + } + } + + if (make_new_arp_cache_entry) + { + pool_get (am->ip4_entry_pool, e); + + if (NULL == arp_int->arp_entries) + { + arp_int->arp_entries = hash_create (0, sizeof (u32)); + } + + hash_set (arp_int->arp_entries, a->ip4.as_u32, e - am->ip4_entry_pool); + + e->sw_if_index = sw_if_index; + e->ip4_address = a->ip4; + e->fib_entry_index = FIB_NODE_INDEX_INVALID; + clib_memcpy (e->ethernet_address, + a->ethernet, sizeof (e->ethernet_address)); + + if (!is_no_fib_entry) + { + arp_adj_fib_add (e, + ip4_fib_table_get_index_for_sw_if_index + (e->sw_if_index)); + } + else + { + e->flags |= ETHERNET_ARP_IP4_ENTRY_FLAG_NO_FIB_ENTRY; + } + } + else + { + /* + * prevent a DoS attack from the data-plane that + * spams us with no-op updates to the MAC address + */ + if (0 == memcmp (e->ethernet_address, + a->ethernet, sizeof (e->ethernet_address))) + goto check_customers; + + /* Update time stamp and ethernet address. */ + clib_memcpy (e->ethernet_address, a->ethernet, + sizeof (e->ethernet_address)); + } + + e->cpu_time_last_updated = clib_cpu_time_now (); + if (is_static) + e->flags |= ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC; + else + e->flags |= ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC; + + adj_nbr_walk_nh4 (sw_if_index, &e->ip4_address, arp_mk_complete_walk, e); + +check_customers: + /* Customer(s) waiting for this address to be resolved? */ + p = hash_get (am->pending_resolutions_by_address, a->ip4.as_u32); + if (p) + { + u32 next_index; + next_index = p[0]; + + while (next_index != (u32) ~ 0) + { + pr = pool_elt_at_index (am->pending_resolutions, next_index); + vlib_process_signal_event (vm, pr->node_index, + pr->type_opaque, pr->data); + next_index = pr->next_index; + pool_put (am->pending_resolutions, pr); + } + + hash_unset (am->pending_resolutions_by_address, a->ip4.as_u32); + } + + /* Customer(s) requesting ARP event for this address? */ + p = hash_get (am->mac_changes_by_address, a->ip4.as_u32); + if (p) + { + u32 next_index; + next_index = p[0]; + + while (next_index != (u32) ~ 0) + { + int (*fp) (u32, u8 *, u32, u32); + int rv = 1; + mc = pool_elt_at_index (am->mac_changes, next_index); + fp = mc->data_callback; + + /* Call the user's data callback, return 1 to suppress dup events */ + if (fp) + rv = (*fp) (mc->data, a->ethernet, sw_if_index, 0); + + /* + * Signal the resolver process, as long as the user + * says they want to be notified + */ + if (rv == 0) + vlib_process_signal_event (vm, mc->node_index, + mc->type_opaque, mc->data); + next_index = mc->next_index; + } + } + + return 0; +} + +void +vnet_register_ip4_arp_resolution_event (vnet_main_t * vnm, + void *address_arg, + uword node_index, + uword type_opaque, uword data) +{ + ethernet_arp_main_t *am = ðernet_arp_main; + ip4_address_t *address = address_arg; + uword *p; + pending_resolution_t *pr; + + pool_get (am->pending_resolutions, pr); + + pr->next_index = ~0; + pr->node_index = node_index; + pr->type_opaque = type_opaque; + pr->data = data; + pr->data_callback = 0; + + p = hash_get (am->pending_resolutions_by_address, address->as_u32); + if (p) + { + /* Insert new resolution at the head of the list */ + pr->next_index = p[0]; + hash_unset (am->pending_resolutions_by_address, address->as_u32); + } + + hash_set (am->pending_resolutions_by_address, address->as_u32, + pr - am->pending_resolutions); +} + +int +vnet_add_del_ip4_arp_change_event (vnet_main_t * vnm, + void *data_callback, + u32 pid, + void *address_arg, + uword node_index, + uword type_opaque, uword data, int is_add) +{ + ethernet_arp_main_t *am = ðernet_arp_main; + ip4_address_t *address = address_arg; + + /* Try to find an existing entry */ + u32 *first = (u32 *) hash_get (am->mac_changes_by_address, address->as_u32); + u32 *p = first; + pending_resolution_t *mc; + while (p && *p != ~0) + { + mc = pool_elt_at_index (am->mac_changes, *p); + if (mc->node_index == node_index && mc->type_opaque == type_opaque + && mc->pid == pid) + break; + p = &mc->next_index; + } + + int found = p && *p != ~0; + if (is_add) + { + if (found) + return VNET_API_ERROR_ENTRY_ALREADY_EXISTS; + + pool_get (am->mac_changes, mc); + *mc = (pending_resolution_t) + { + .next_index = ~0,.node_index = node_index,.type_opaque = + type_opaque,.data = data,.data_callback = data_callback,.pid = + pid,}; + + /* Insert new resolution at the end of the list */ + u32 new_idx = mc - am->mac_changes; + if (p) + p[0] = new_idx; + else + hash_set (am->mac_changes_by_address, address->as_u32, new_idx); + } + else + { + if (!found) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + /* Clients may need to clean up pool entries, too */ + void (*fp) (u32, u8 *) = data_callback; + if (fp) + (*fp) (mc->data, 0 /* no new mac addrs */ ); + + /* Remove the entry from the list and delete the entry */ + *p = mc->next_index; + pool_put (am->mac_changes, mc); + + /* Remove from hash if we deleted the last entry */ + if (*p == ~0 && p == first) + hash_unset (am->mac_changes_by_address, address->as_u32); + } + return 0; +} + +/* Either we drop the packet or we send a reply to the sender. */ +typedef enum +{ + ARP_INPUT_NEXT_DROP, + ARP_INPUT_NEXT_REPLY_TX, + ARP_INPUT_N_NEXT, +} arp_input_next_t; + +#define foreach_ethernet_arp_error \ + _ (replies_sent, "ARP replies sent") \ + _ (l2_type_not_ethernet, "L2 type not ethernet") \ + _ (l3_type_not_ip4, "L3 type not IP4") \ + _ (l3_src_address_not_local, "IP4 source address not local to subnet") \ + _ (l3_dst_address_not_local, "IP4 destination address not local to subnet") \ + _ (l3_src_address_is_local, "IP4 source address matches local interface") \ + _ (l3_src_address_learned, "ARP request IP4 source address learned") \ + _ (replies_received, "ARP replies received") \ + _ (opcode_not_request, "ARP opcode not request") \ + _ (proxy_arp_replies_sent, "Proxy ARP replies sent") \ + _ (l2_address_mismatch, "ARP hw addr does not match L2 frame src addr") \ + _ (gratuitous_arp, "ARP probe or announcement dropped") \ + _ (interface_no_table, "Interface is not mapped to an IP table") \ + _ (interface_not_ip_enabled, "Interface is not IP enabled") \ + +typedef enum +{ +#define _(sym,string) ETHERNET_ARP_ERROR_##sym, + foreach_ethernet_arp_error +#undef _ + ETHERNET_ARP_N_ERROR, +} ethernet_arp_input_error_t; + + +static void +unset_random_arp_entry (void) +{ + ethernet_arp_main_t *am = ðernet_arp_main; + ethernet_arp_ip4_entry_t *e; + vnet_main_t *vnm = vnet_get_main (); + ethernet_arp_ip4_over_ethernet_address_t delme; + u32 index; + + index = pool_next_index (am->ip4_entry_pool, am->arp_delete_rotor); + am->arp_delete_rotor = index; + + /* Try again from elt 0, could happen if an intfc goes down */ + if (index == ~0) + { + index = pool_next_index (am->ip4_entry_pool, am->arp_delete_rotor); + am->arp_delete_rotor = index; + } + + /* Nothing left in the pool */ + if (index == ~0) + return; + + e = pool_elt_at_index (am->ip4_entry_pool, index); + + clib_memcpy (&delme.ethernet, e->ethernet_address, 6); + delme.ip4.as_u32 = e->ip4_address.as_u32; + + vnet_arp_unset_ip4_over_ethernet (vnm, e->sw_if_index, &delme); +} + +static int +arp_unnumbered (vlib_buffer_t * p0, + u32 input_sw_if_index, u32 conn_sw_if_index) +{ + vnet_main_t *vnm = vnet_get_main (); + vnet_interface_main_t *vim = &vnm->interface_main; + vnet_sw_interface_t *si; + + /* verify that the input interface is unnumbered to the connected. + * the connected interface is the interface on which the subnet is + * configured */ + si = &vim->sw_interfaces[input_sw_if_index]; + + if (!(si->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED && + (si->unnumbered_sw_if_index == conn_sw_if_index))) + { + /* the input interface is not unnumbered to the interface on which + * the sub-net is configured that covers the ARP request. + * So this is not the case for unnumbered.. */ + return 0; + } + + return !0; +} + +static u32 +arp_learn (vnet_main_t * vnm, + ethernet_arp_main_t * am, u32 sw_if_index, void *addr) +{ + if (am->limit_arp_cache_size && + pool_elts (am->ip4_entry_pool) >= am->limit_arp_cache_size) + unset_random_arp_entry (); + + vnet_arp_set_ip4_over_ethernet (vnm, sw_if_index, addr, 0, 0); + return (ETHERNET_ARP_ERROR_l3_src_address_learned); +} + +static uword +arp_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + ethernet_arp_main_t *am = ðernet_arp_main; + vnet_main_t *vnm = vnet_get_main (); + ip4_main_t *im4 = &ip4_main; + u32 n_left_from, next_index, *from, *to_next; + u32 n_replies_sent = 0, n_proxy_arp_replies_sent = 0; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors, + /* stride */ 1, + sizeof (ethernet_arp_input_trace_t)); + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t *p0; + vnet_hw_interface_t *hw_if0; + ethernet_arp_header_t *arp0; + ethernet_header_t *eth_rx, *eth_tx; + ip4_address_t *if_addr0, proxy_src; + u32 pi0, error0, next0, sw_if_index0, conn_sw_if_index0, fib_index0; + u8 is_request0, dst_is_local0, is_unnum0, is_vrrp_reply0; + ethernet_proxy_arp_t *pa; + fib_node_index_t dst_fei, src_fei; + fib_prefix_t pfx0; + fib_entry_flag_t src_flags, dst_flags; + u8 *rewrite0, rewrite0_len; + + pi0 = from[0]; + to_next[0] = pi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + pa = 0; + + p0 = vlib_get_buffer (vm, pi0); + arp0 = vlib_buffer_get_current (p0); + /* Fill in ethernet header. */ + eth_rx = ethernet_buffer_get_header (p0); + + is_request0 = arp0->opcode + == clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_request); + + error0 = ETHERNET_ARP_ERROR_replies_sent; + + error0 = + (arp0->l2_type != + clib_net_to_host_u16 (ETHERNET_ARP_HARDWARE_TYPE_ethernet) ? + ETHERNET_ARP_ERROR_l2_type_not_ethernet : error0); + error0 = + (arp0->l3_type != + clib_net_to_host_u16 (ETHERNET_TYPE_IP4) ? + ETHERNET_ARP_ERROR_l3_type_not_ip4 : error0); + + sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX]; + + /* not playing the ARP game if the interface is not IPv4 enabled */ + error0 = + (im4->ip_enabled_by_sw_if_index[sw_if_index0] == 0 ? + ETHERNET_ARP_ERROR_interface_not_ip_enabled : error0); + + if (error0) + goto drop2; + + /* Check that IP address is local and matches incoming interface. */ + fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0); + if (~0 == fib_index0) + { + error0 = ETHERNET_ARP_ERROR_interface_no_table; + goto drop2; + + } + dst_fei = ip4_fib_table_lookup (ip4_fib_get (fib_index0), + &arp0->ip4_over_ethernet[1].ip4, + 32); + dst_flags = fib_entry_get_flags (dst_fei); + + conn_sw_if_index0 = fib_entry_get_resolving_interface (dst_fei); + + /* Honor unnumbered interface, if any */ + is_unnum0 = sw_if_index0 != conn_sw_if_index0; + + { + /* + * we're looking for FIB entries that indicate the source + * is attached. There may be more specific non-attached + * routes tht match the source, but these do not influence + * whether we respond to an ARP request, i.e. they do not + * influence whether we are the correct way for the sender + * to reach us, they only affect how we reach the sender. + */ + fib_entry_t *src_fib_entry; + fib_entry_src_t *src; + fib_source_t source; + fib_prefix_t pfx; + int attached; + int mask; + + mask = 32; + attached = 0; + + do + { + src_fei = ip4_fib_table_lookup (ip4_fib_get (fib_index0), + &arp0-> + ip4_over_ethernet[0].ip4, + mask); + src_fib_entry = fib_entry_get (src_fei); + + /* + * It's possible that the source that provides the + * flags we need, or the flags we must not have, + * is not the best source, so check then all. + */ + /* *INDENT-OFF* */ + FOR_EACH_SRC_ADDED(src_fib_entry, src, source, + ({ + src_flags = fib_entry_get_flags_for_source (src_fei, source); + + /* Reject requests/replies with our local interface + address. */ + if (FIB_ENTRY_FLAG_LOCAL & src_flags) + { + error0 = ETHERNET_ARP_ERROR_l3_src_address_is_local; + /* + * When VPP has an interface whose address is also + * applied to a TAP interface on the host, then VPP's + * TAP interface will be unnumbered to the 'real' + * interface and do proxy ARP from the host. + * The curious aspect of this setup is that ARP requests + * from the host will come from the VPP's own address. + * So don't drop immediately here, instead go see if this + * is a proxy ARP case. + */ + goto drop1; + } + /* A Source must also be local to subnet of matching + * interface address. */ + if ((FIB_ENTRY_FLAG_ATTACHED & src_flags) || + (FIB_ENTRY_FLAG_CONNECTED & src_flags)) + { + attached = 1; + break; + } + /* + * else + * The packet was sent from an address that is not + * connected nor attached i.e. it is not from an + * address that is covered by a link's sub-net, + * nor is it a already learned host resp. + */ + })); + /* *INDENT-ON* */ + + /* + * shorter mask lookup for the next iteration. + */ + fib_entry_get_prefix (src_fei, &pfx); + mask = pfx.fp_len - 1; + + /* + * continue until we hit the default route or we find + * the attached we are looking for. The most likely + * outcome is we find the attached with the first source + * on the first lookup. + */ + } + while (!attached && + !fib_entry_is_sourced (src_fei, FIB_SOURCE_DEFAULT_ROUTE)); + + if (!attached) + { + /* + * the matching route is a not attached, i.e. it was + * added as a result of routing, rather than interface/ARP + * configuration. If the matching route is not a host route + * (i.e. a /32) + */ + error0 = ETHERNET_ARP_ERROR_l3_src_address_not_local; + goto drop2; + } + } + + if (!(FIB_ENTRY_FLAG_CONNECTED & dst_flags)) + { + error0 = ETHERNET_ARP_ERROR_l3_dst_address_not_local; + goto drop1; + } + + if (sw_if_index0 != fib_entry_get_resolving_interface (src_fei)) + { + /* + * The interface the ARP was received on is not the interface + * on which the covering prefix is configured. Maybe this is a + * case for unnumbered. + */ + is_unnum0 = 1; + } + + dst_is_local0 = (FIB_ENTRY_FLAG_LOCAL & dst_flags); + fib_entry_get_prefix (dst_fei, &pfx0); + if_addr0 = &pfx0.fp_addr.ip4; + + is_vrrp_reply0 = + ((arp0->opcode == + clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply)) + && + (!memcmp + (arp0->ip4_over_ethernet[0].ethernet, vrrp_prefix, + sizeof (vrrp_prefix)))); + + /* Trash ARP packets whose ARP-level source addresses do not + match their L2-frame-level source addresses, unless it's + a reply from a VRRP virtual router */ + if (memcmp + (eth_rx->src_address, arp0->ip4_over_ethernet[0].ethernet, + sizeof (eth_rx->src_address)) && !is_vrrp_reply0) + { + error0 = ETHERNET_ARP_ERROR_l2_address_mismatch; + goto drop2; + } + + /* Learn or update sender's mapping only for replies to addresses + * that are local to the subnet */ + if (arp0->opcode == + clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply) && + dst_is_local0) + { + error0 = arp_learn (vnm, am, sw_if_index0, + &arp0->ip4_over_ethernet[0]); + goto drop1; + } + + send_reply: + /* Send a reply. + An adjacency to the sender is not always present, + so we use the interface to build us a rewrite string + which will contain all the necessary tags. */ + rewrite0 = ethernet_build_rewrite (vnm, sw_if_index0, + VNET_LINK_ARP, + eth_rx->src_address); + rewrite0_len = vec_len (rewrite0); + + /* Figure out how much to rewind current data from adjacency. */ + vlib_buffer_advance (p0, -rewrite0_len); + eth_tx = vlib_buffer_get_current (p0); + + vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0; + hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0); + + /* Send reply back through input interface */ + vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0; + next0 = ARP_INPUT_NEXT_REPLY_TX; + + arp0->opcode = clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply); + + arp0->ip4_over_ethernet[1] = arp0->ip4_over_ethernet[0]; + + clib_memcpy (arp0->ip4_over_ethernet[0].ethernet, + hw_if0->hw_address, 6); + clib_mem_unaligned (&arp0->ip4_over_ethernet[0].ip4.data_u32, u32) = + if_addr0->data_u32; + + /* Hardware must be ethernet-like. */ + ASSERT (vec_len (hw_if0->hw_address) == 6); + + /* the rx nd tx ethernet headers wil overlap in the case + * when we received a tagged VLAN=0 packet, but we are sending + * back untagged */ + clib_memcpy (eth_tx, rewrite0, vec_len (rewrite0)); + vec_free (rewrite0); + + if (NULL == pa) + { + if (is_unnum0) + { + if (!arp_unnumbered (p0, sw_if_index0, conn_sw_if_index0)) + goto drop2; + } + } + + /* We are going to reply to this request, so, in the absence of + errors, learn the sender */ + if (!error0) + error0 = arp_learn (vnm, am, sw_if_index0, + &arp0->ip4_over_ethernet[1]); + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, pi0, next0); + + n_replies_sent += 1; + continue; + + drop1: + if (0 == arp0->ip4_over_ethernet[0].ip4.as_u32 || + (arp0->ip4_over_ethernet[0].ip4.as_u32 == + arp0->ip4_over_ethernet[1].ip4.as_u32)) + { + error0 = ETHERNET_ARP_ERROR_gratuitous_arp; + goto drop2; + } + /* See if proxy arp is configured for the address */ + if (is_request0) + { + vnet_sw_interface_t *si; + u32 this_addr = clib_net_to_host_u32 + (arp0->ip4_over_ethernet[1].ip4.as_u32); + u32 fib_index0; + + si = vnet_get_sw_interface (vnm, sw_if_index0); + + if (!(si->flags & VNET_SW_INTERFACE_FLAG_PROXY_ARP)) + goto drop2; + + fib_index0 = vec_elt (im4->fib_index_by_sw_if_index, + sw_if_index0); + + vec_foreach (pa, am->proxy_arps) + { + u32 lo_addr = clib_net_to_host_u32 (pa->lo_addr); + u32 hi_addr = clib_net_to_host_u32 (pa->hi_addr); + + /* an ARP request hit in the proxy-arp table? */ + if ((this_addr >= lo_addr && this_addr <= hi_addr) && + (fib_index0 == pa->fib_index)) + { + proxy_src.as_u32 = + arp0->ip4_over_ethernet[1].ip4.data_u32; + + /* + * change the interface address to the proxied + */ + if_addr0 = &proxy_src; + is_unnum0 = 0; + n_proxy_arp_replies_sent++; + goto send_reply; + } + } + } + + drop2: + + next0 = ARP_INPUT_NEXT_DROP; + p0->error = node->errors[error0]; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, pi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_error_count (vm, node->node_index, + ETHERNET_ARP_ERROR_replies_sent, + n_replies_sent - n_proxy_arp_replies_sent); + + vlib_error_count (vm, node->node_index, + ETHERNET_ARP_ERROR_proxy_arp_replies_sent, + n_proxy_arp_replies_sent); + return frame->n_vectors; +} + +static char *ethernet_arp_error_strings[] = { +#define _(sym,string) string, + foreach_ethernet_arp_error +#undef _ +}; + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (arp_input_node, static) = +{ + .function = arp_input, + .name = "arp-input", + .vector_size = sizeof (u32), + .n_errors = ETHERNET_ARP_N_ERROR, + .error_strings = ethernet_arp_error_strings, + .n_next_nodes = ARP_INPUT_N_NEXT, + .next_nodes = { + [ARP_INPUT_NEXT_DROP] = "error-drop", + [ARP_INPUT_NEXT_REPLY_TX] = "interface-output", + }, + .format_buffer = format_ethernet_arp_header, + .format_trace = format_ethernet_arp_input_trace, +}; +/* *INDENT-ON* */ + +static int +ip4_arp_entry_sort (void *a1, void *a2) +{ + ethernet_arp_ip4_entry_t *e1 = a1; + ethernet_arp_ip4_entry_t *e2 = a2; + + int cmp; + vnet_main_t *vnm = vnet_get_main (); + + cmp = vnet_sw_interface_compare (vnm, e1->sw_if_index, e2->sw_if_index); + if (!cmp) + cmp = ip4_address_compare (&e1->ip4_address, &e2->ip4_address); + return cmp; +} + +ethernet_arp_ip4_entry_t * +ip4_neighbor_entries (u32 sw_if_index) +{ + ethernet_arp_main_t *am = ðernet_arp_main; + ethernet_arp_ip4_entry_t *n, *ns = 0; + + /* *INDENT-OFF* */ + pool_foreach (n, am->ip4_entry_pool, ({ + if (sw_if_index != ~0 && n->sw_if_index != sw_if_index) + continue; + vec_add1 (ns, n[0]); + })); + /* *INDENT-ON* */ + + if (ns) + vec_sort_with_function (ns, ip4_arp_entry_sort); + return ns; +} + +static clib_error_t * +show_ip4_arp (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + vnet_main_t *vnm = vnet_get_main (); + ethernet_arp_main_t *am = ðernet_arp_main; + ethernet_arp_ip4_entry_t *e, *es; + ethernet_proxy_arp_t *pa; + clib_error_t *error = 0; + u32 sw_if_index; + + /* Filter entries by interface if given. */ + sw_if_index = ~0; + (void) unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index); + + es = ip4_neighbor_entries (sw_if_index); + if (es) + { + vlib_cli_output (vm, "%U", format_ethernet_arp_ip4_entry, vnm, 0); + vec_foreach (e, es) + { + vlib_cli_output (vm, "%U", format_ethernet_arp_ip4_entry, vnm, e); + } + vec_free (es); + } + + if (vec_len (am->proxy_arps)) + { + vlib_cli_output (vm, "Proxy arps enabled for:"); + vec_foreach (pa, am->proxy_arps) + { + vlib_cli_output (vm, "Fib_index %d %U - %U ", + pa->fib_index, + format_ip4_address, &pa->lo_addr, + format_ip4_address, &pa->hi_addr); + } + } + + return error; +} + +/*? + * Display all the IPv4 ARP entries. + * + * @cliexpar + * Example of how to display the IPv4 ARP table: + * @cliexstart{show ip arp} + * Time FIB IP4 Flags Ethernet Interface + * 346.3028 0 6.1.1.3 de:ad:be:ef:ba:be GigabitEthernet2/0/0 + * 3077.4271 0 6.1.1.4 S de:ad:be:ef:ff:ff GigabitEthernet2/0/0 + * 2998.6409 1 6.2.2.3 de:ad:be:ef:00:01 GigabitEthernet2/0/0 + * Proxy arps enabled for: + * Fib_index 0 6.0.0.1 - 6.0.0.11 + * @cliexend + ?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_ip4_arp_command, static) = { + .path = "show ip arp", + .function = show_ip4_arp, + .short_help = "show ip arp", +}; +/* *INDENT-ON* */ + +typedef struct +{ + pg_edit_t l2_type, l3_type; + pg_edit_t n_l2_address_bytes, n_l3_address_bytes; + pg_edit_t opcode; + struct + { + pg_edit_t ethernet; + pg_edit_t ip4; + } ip4_over_ethernet[2]; +} pg_ethernet_arp_header_t; + +static inline void +pg_ethernet_arp_header_init (pg_ethernet_arp_header_t * p) +{ + /* Initialize fields that are not bit fields in the IP header. */ +#define _(f) pg_edit_init (&p->f, ethernet_arp_header_t, f); + _(l2_type); + _(l3_type); + _(n_l2_address_bytes); + _(n_l3_address_bytes); + _(opcode); + _(ip4_over_ethernet[0].ethernet); + _(ip4_over_ethernet[0].ip4); + _(ip4_over_ethernet[1].ethernet); + _(ip4_over_ethernet[1].ip4); +#undef _ +} + +uword +unformat_pg_arp_header (unformat_input_t * input, va_list * args) +{ + pg_stream_t *s = va_arg (*args, pg_stream_t *); + pg_ethernet_arp_header_t *p; + u32 group_index; + + p = pg_create_edit_group (s, sizeof (p[0]), sizeof (ethernet_arp_header_t), + &group_index); + pg_ethernet_arp_header_init (p); + + /* Defaults. */ + pg_edit_set_fixed (&p->l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet); + pg_edit_set_fixed (&p->l3_type, ETHERNET_TYPE_IP4); + pg_edit_set_fixed (&p->n_l2_address_bytes, 6); + pg_edit_set_fixed (&p->n_l3_address_bytes, 4); + + if (!unformat (input, "%U: %U/%U -> %U/%U", + unformat_pg_edit, + unformat_ethernet_arp_opcode_net_byte_order, &p->opcode, + unformat_pg_edit, + unformat_ethernet_address, &p->ip4_over_ethernet[0].ethernet, + unformat_pg_edit, + unformat_ip4_address, &p->ip4_over_ethernet[0].ip4, + unformat_pg_edit, + unformat_ethernet_address, &p->ip4_over_ethernet[1].ethernet, + unformat_pg_edit, + unformat_ip4_address, &p->ip4_over_ethernet[1].ip4)) + { + /* Free up any edits we may have added. */ + pg_free_edit_group (s); + return 0; + } + return 1; +} + +clib_error_t * +ip4_set_arp_limit (u32 arp_limit) +{ + ethernet_arp_main_t *am = ðernet_arp_main; + + am->limit_arp_cache_size = arp_limit; + return 0; +} + +/** + * @brief Control Plane hook to remove an ARP entry + */ +int +vnet_arp_unset_ip4_over_ethernet (vnet_main_t * vnm, + u32 sw_if_index, void *a_arg) +{ + ethernet_arp_ip4_over_ethernet_address_t *a = a_arg; + vnet_arp_set_ip4_over_ethernet_rpc_args_t args; + + args.sw_if_index = sw_if_index; + args.flags = ETHERNET_ARP_ARGS_REMOVE; + clib_memcpy (&args.a, a, sizeof (*a)); + + vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback, + (u8 *) & args, sizeof (args)); + return 0; +} + +/** + * @brief Internally generated event to flush the ARP cache on an + * interface state change event. + * A flush will remove dynamic ARP entries, and for statics remove the MAC + * address from the corresponding adjacencies. + */ +static int +vnet_arp_flush_ip4_over_ethernet (vnet_main_t * vnm, + u32 sw_if_index, void *a_arg) +{ + ethernet_arp_ip4_over_ethernet_address_t *a = a_arg; + vnet_arp_set_ip4_over_ethernet_rpc_args_t args; + + args.sw_if_index = sw_if_index; + args.flags = ETHERNET_ARP_ARGS_FLUSH; + clib_memcpy (&args.a, a, sizeof (*a)); + + vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback, + (u8 *) & args, sizeof (args)); + return 0; +} + +/** + * @brief Internally generated event to populate the ARP cache on an + * interface state change event. + * For static entries this will re-source the adjacencies. + * + * @param sw_if_index The interface on which the ARP entires are acted + */ +static int +vnet_arp_populate_ip4_over_ethernet (vnet_main_t * vnm, + u32 sw_if_index, void *a_arg) +{ + ethernet_arp_ip4_over_ethernet_address_t *a = a_arg; + vnet_arp_set_ip4_over_ethernet_rpc_args_t args; + + args.sw_if_index = sw_if_index; + args.flags = ETHERNET_ARP_ARGS_POPULATE; + clib_memcpy (&args.a, a, sizeof (*a)); + + vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback, + (u8 *) & args, sizeof (args)); + return 0; +} + +/** + * @brief publish wildcard arp event + * @param sw_if_index The interface on which the ARP entires are acted + */ +static int +vnet_arp_wc_publish (u32 sw_if_index, void *a_arg) +{ + ethernet_arp_ip4_over_ethernet_address_t *a = a_arg; + vnet_arp_set_ip4_over_ethernet_rpc_args_t args = { + .flags = ETHERNET_ARP_ARGS_WC_PUB, + .sw_if_index = sw_if_index, + .a = *a + }; + + vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback, + (u8 *) & args, sizeof (args)); + return 0; +} + +static void +vnet_arp_wc_publish_internal (vnet_main_t * vnm, + vnet_arp_set_ip4_over_ethernet_rpc_args_t * + args) +{ + vlib_main_t *vm = vlib_get_main (); + ethernet_arp_main_t *am = ðernet_arp_main; + uword ni = am->wc_ip4_arp_publisher_node; + uword et = am->wc_ip4_arp_publisher_et; + + if (ni == (uword) ~ 0) + return; + wc_arp_report_t *r = + vlib_process_signal_event_data (vm, ni, et, 1, sizeof *r); + r->ip4 = args->a.ip4.as_u32; + r->sw_if_index = args->sw_if_index; + memcpy (r->mac, args->a.ethernet, sizeof r->mac); +} + +void +wc_arp_set_publisher_node (uword node_index, uword event_type) +{ + ethernet_arp_main_t *am = ðernet_arp_main; + am->wc_ip4_arp_publisher_node = node_index; + am->wc_ip4_arp_publisher_et = event_type; +} + +/* + * arp_add_del_interface_address + * + * callback when an interface address is added or deleted + */ +static void +arp_add_del_interface_address (ip4_main_t * im, + uword opaque, + u32 sw_if_index, + ip4_address_t * address, + u32 address_length, + u32 if_address_index, u32 is_del) +{ + /* + * Flush the ARP cache of all entries covered by the address + * that is being removed. + */ + ethernet_arp_main_t *am = ðernet_arp_main; + ethernet_arp_ip4_entry_t *e; + + if (vec_len (am->ethernet_arp_by_sw_if_index) <= sw_if_index) + return; + + if (is_del) + { + ethernet_arp_interface_t *eai; + u32 i, *to_delete = 0; + hash_pair_t *pair; + + eai = &am->ethernet_arp_by_sw_if_index[sw_if_index]; + + /* *INDENT-OFF* */ + hash_foreach_pair (pair, eai->arp_entries, + ({ + e = pool_elt_at_index(am->ip4_entry_pool, + pair->value[0]); + if (ip4_destination_matches_route (im, &e->ip4_address, + address, address_length)) + { + vec_add1 (to_delete, e - am->ip4_entry_pool); + } + })); + /* *INDENT-ON* */ + + for (i = 0; i < vec_len (to_delete); i++) + { + ethernet_arp_ip4_over_ethernet_address_t delme; + e = pool_elt_at_index (am->ip4_entry_pool, to_delete[i]); + + clib_memcpy (&delme.ethernet, e->ethernet_address, 6); + delme.ip4.as_u32 = e->ip4_address.as_u32; + + vnet_arp_flush_ip4_over_ethernet (vnet_get_main (), + e->sw_if_index, &delme); + } + + vec_free (to_delete); + } +} + +void +arp_adj_fib_remove (ethernet_arp_ip4_entry_t * e, u32 fib_index) +{ + if (FIB_NODE_INDEX_INVALID != e->fib_entry_index) + { + fib_prefix_t pfx = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr.ip4 = e->ip4_address, + }; + u32 fib_index; + + fib_index = ip4_fib_table_get_index_for_sw_if_index (e->sw_if_index); + + fib_table_entry_path_remove (fib_index, &pfx, + FIB_SOURCE_ADJ, + DPO_PROTO_IP4, + &pfx.fp_addr, + e->sw_if_index, ~0, 1, + FIB_ROUTE_PATH_FLAG_NONE); + fib_table_unlock (fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_ADJ); + } +} + +static void +arp_table_bind (ip4_main_t * im, + uword opaque, + u32 sw_if_index, u32 new_fib_index, u32 old_fib_index) +{ + ethernet_arp_main_t *am = ðernet_arp_main; + ethernet_arp_interface_t *eai; + ethernet_arp_ip4_entry_t *e; + hash_pair_t *pair; + + /* + * the IP table that the interface is bound to has changed. + * reinstall all the adj fibs. + */ + + if (vec_len (am->ethernet_arp_by_sw_if_index) <= sw_if_index) + return; + + eai = &am->ethernet_arp_by_sw_if_index[sw_if_index]; + + /* *INDENT-OFF* */ + hash_foreach_pair (pair, eai->arp_entries, + ({ + e = pool_elt_at_index(am->ip4_entry_pool, + pair->value[0]); + /* + * remove the adj-fib from the old table and add to the new + */ + arp_adj_fib_remove(e, old_fib_index); + arp_adj_fib_add(e, new_fib_index); + })); + /* *INDENT-ON* */ + +} + +static clib_error_t * +ethernet_arp_init (vlib_main_t * vm) +{ + ethernet_arp_main_t *am = ðernet_arp_main; + ip4_main_t *im = &ip4_main; + clib_error_t *error; + pg_node_t *pn; + + if ((error = vlib_call_init_function (vm, ethernet_init))) + return error; + + ethernet_register_input_type (vm, ETHERNET_TYPE_ARP, arp_input_node.index); + + pn = pg_get_node (arp_input_node.index); + pn->unformat_edit = unformat_pg_arp_header; + + am->opcode_by_name = hash_create_string (0, sizeof (uword)); +#define _(o) hash_set_mem (am->opcode_by_name, #o, ETHERNET_ARP_OPCODE_##o); + foreach_ethernet_arp_opcode; +#undef _ + + /* $$$ configurable */ + am->limit_arp_cache_size = 50000; + + am->pending_resolutions_by_address = hash_create (0, sizeof (uword)); + am->mac_changes_by_address = hash_create (0, sizeof (uword)); + am->wc_ip4_arp_publisher_node = (uword) ~ 0; + + /* don't trace ARP error packets */ + { + vlib_node_runtime_t *rt = + vlib_node_get_runtime (vm, arp_input_node.index); + +#define _(a,b) \ + vnet_pcap_drop_trace_filter_add_del \ + (rt->errors[ETHERNET_ARP_ERROR_##a], \ + 1 /* is_add */); + foreach_ethernet_arp_error +#undef _ + } + + ip4_add_del_interface_address_callback_t cb; + cb.function = arp_add_del_interface_address; + cb.function_opaque = 0; + vec_add1 (im->add_del_interface_address_callbacks, cb); + + ip4_table_bind_callback_t cbt; + cbt.function = arp_table_bind; + cbt.function_opaque = 0; + vec_add1 (im->table_bind_callbacks, cbt); + + return 0; +} + +VLIB_INIT_FUNCTION (ethernet_arp_init); + +static void +arp_entry_free (ethernet_arp_interface_t * eai, ethernet_arp_ip4_entry_t * e) +{ + ethernet_arp_main_t *am = ðernet_arp_main; + + arp_adj_fib_remove (e, + ip4_fib_table_get_index_for_sw_if_index + (e->sw_if_index)); + hash_unset (eai->arp_entries, e->ip4_address.as_u32); + pool_put (am->ip4_entry_pool, e); +} + +static inline int +vnet_arp_unset_ip4_over_ethernet_internal (vnet_main_t * vnm, + vnet_arp_set_ip4_over_ethernet_rpc_args_t + * args) +{ + ethernet_arp_main_t *am = ðernet_arp_main; + ethernet_arp_ip4_entry_t *e; + ethernet_arp_interface_t *eai; + + if (vec_len (am->ethernet_arp_by_sw_if_index) <= args->sw_if_index) + return 0; + + eai = &am->ethernet_arp_by_sw_if_index[args->sw_if_index]; + + e = arp_entry_find (eai, &args->a.ip4); + + if (NULL != e) + { + arp_entry_free (eai, e); + + adj_nbr_walk_nh4 (e->sw_if_index, + &e->ip4_address, arp_mk_incomplete_walk, NULL); + } + + return 0; +} + +static int +vnet_arp_flush_ip4_over_ethernet_internal (vnet_main_t * vnm, + vnet_arp_set_ip4_over_ethernet_rpc_args_t + * args) +{ + ethernet_arp_main_t *am = ðernet_arp_main; + ethernet_arp_ip4_entry_t *e; + ethernet_arp_interface_t *eai; + + if (vec_len (am->ethernet_arp_by_sw_if_index) <= args->sw_if_index) + return 0; + + eai = &am->ethernet_arp_by_sw_if_index[args->sw_if_index]; + + e = arp_entry_find (eai, &args->a.ip4); + + if (NULL != e) + { + adj_nbr_walk_nh4 (e->sw_if_index, + &e->ip4_address, arp_mk_incomplete_walk, e); + + /* + * The difference between flush and unset, is that an unset + * means delete for static and dynamic entries. A flush + * means delete only for dynamic. Flushing is what the DP + * does in response to interface events. unset is only done + * by the control plane. + */ + if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC) + { + e->flags &= ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC; + } + else if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC) + { + arp_entry_free (eai, e); + } + } + return (0); +} + +static int +vnet_arp_populate_ip4_over_ethernet_internal (vnet_main_t * vnm, + vnet_arp_set_ip4_over_ethernet_rpc_args_t + * args) +{ + ethernet_arp_main_t *am = ðernet_arp_main; + ethernet_arp_ip4_entry_t *e; + ethernet_arp_interface_t *eai; + + vec_validate (am->ethernet_arp_by_sw_if_index, args->sw_if_index); + eai = &am->ethernet_arp_by_sw_if_index[args->sw_if_index]; + + e = arp_entry_find (eai, &args->a.ip4); + + if (NULL != e) + { + adj_nbr_walk_nh4 (e->sw_if_index, + &e->ip4_address, arp_mk_complete_walk, e); + } + return (0); +} + +static void +set_ip4_over_ethernet_rpc_callback (vnet_arp_set_ip4_over_ethernet_rpc_args_t + * a) +{ + vnet_main_t *vm = vnet_get_main (); + ASSERT (vlib_get_thread_index () == 0); + + if (a->flags & ETHERNET_ARP_ARGS_REMOVE) + vnet_arp_unset_ip4_over_ethernet_internal (vm, a); + else if (a->flags & ETHERNET_ARP_ARGS_FLUSH) + vnet_arp_flush_ip4_over_ethernet_internal (vm, a); + else if (a->flags & ETHERNET_ARP_ARGS_POPULATE) + vnet_arp_populate_ip4_over_ethernet_internal (vm, a); + else if (a->flags & ETHERNET_ARP_ARGS_WC_PUB) + vnet_arp_wc_publish_internal (vm, a); + else + vnet_arp_set_ip4_over_ethernet_internal (vm, a); +} + +/** + * @brief Invoked when the interface's admin state changes + */ +static clib_error_t * +ethernet_arp_sw_interface_up_down (vnet_main_t * vnm, + u32 sw_if_index, u32 flags) +{ + ethernet_arp_main_t *am = ðernet_arp_main; + ethernet_arp_ip4_entry_t *e; + u32 i, *to_delete = 0; + + /* *INDENT-OFF* */ + pool_foreach (e, am->ip4_entry_pool, + ({ + if (e->sw_if_index == sw_if_index) + vec_add1 (to_delete, + e - am->ip4_entry_pool); + })); + /* *INDENT-ON* */ + + for (i = 0; i < vec_len (to_delete); i++) + { + ethernet_arp_ip4_over_ethernet_address_t delme; + e = pool_elt_at_index (am->ip4_entry_pool, to_delete[i]); + + clib_memcpy (&delme.ethernet, e->ethernet_address, 6); + delme.ip4.as_u32 = e->ip4_address.as_u32; + + if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) + { + vnet_arp_populate_ip4_over_ethernet (vnm, e->sw_if_index, &delme); + } + else + { + vnet_arp_flush_ip4_over_ethernet (vnm, e->sw_if_index, &delme); + } + + } + vec_free (to_delete); + + return 0; +} + +VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ethernet_arp_sw_interface_up_down); + +static void +increment_ip4_and_mac_address (ethernet_arp_ip4_over_ethernet_address_t * a) +{ + u8 old; + int i; + + for (i = 3; i >= 0; i--) + { + old = a->ip4.as_u8[i]; + a->ip4.as_u8[i] += 1; + if (old < a->ip4.as_u8[i]) + break; + } + + for (i = 5; i >= 0; i--) + { + old = a->ethernet[i]; + a->ethernet[i] += 1; + if (old < a->ethernet[i]) + break; + } +} + +int +vnet_arp_set_ip4_over_ethernet (vnet_main_t * vnm, + u32 sw_if_index, void *a_arg, + int is_static, int is_no_fib_entry) +{ + ethernet_arp_ip4_over_ethernet_address_t *a = a_arg; + vnet_arp_set_ip4_over_ethernet_rpc_args_t args; + + args.sw_if_index = sw_if_index; + args.is_static = is_static; + args.is_no_fib_entry = is_no_fib_entry; + args.flags = 0; + clib_memcpy (&args.a, a, sizeof (*a)); + + vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback, + (u8 *) & args, sizeof (args)); + return 0; +} + +int +vnet_proxy_arp_add_del (ip4_address_t * lo_addr, + ip4_address_t * hi_addr, u32 fib_index, int is_del) +{ + ethernet_arp_main_t *am = ðernet_arp_main; + ethernet_proxy_arp_t *pa; + u32 found_at_index = ~0; + + vec_foreach (pa, am->proxy_arps) + { + if (pa->lo_addr == lo_addr->as_u32 + && pa->hi_addr == hi_addr->as_u32 && pa->fib_index == fib_index) + { + found_at_index = pa - am->proxy_arps; + break; + } + } + + if (found_at_index != ~0) + { + /* Delete, otherwise it's already in the table */ + if (is_del) + vec_delete (am->proxy_arps, 1, found_at_index); + return 0; + } + /* delete, no such entry */ + if (is_del) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + /* add, not in table */ + vec_add2 (am->proxy_arps, pa, 1); + pa->lo_addr = lo_addr->as_u32; + pa->hi_addr = hi_addr->as_u32; + pa->fib_index = fib_index; + return 0; +} + +/* + * Remove any proxy arp entries asdociated with the + * specificed fib. + */ +int +vnet_proxy_arp_fib_reset (u32 fib_id) +{ + ethernet_arp_main_t *am = ðernet_arp_main; + ethernet_proxy_arp_t *pa; + u32 *entries_to_delete = 0; + u32 fib_index; + int i; + + fib_index = fib_table_find (FIB_PROTOCOL_IP4, fib_id); + if (~0 == fib_index) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + vec_foreach (pa, am->proxy_arps) + { + if (pa->fib_index == fib_index) + { + vec_add1 (entries_to_delete, pa - am->proxy_arps); + } + } + + for (i = 0; i < vec_len (entries_to_delete); i++) + { + vec_delete (am->proxy_arps, 1, entries_to_delete[i]); + } + + vec_free (entries_to_delete); + + return 0; +} + +static clib_error_t * +ip_arp_add_del_command_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + vnet_main_t *vnm = vnet_get_main (); + u32 sw_if_index; + ethernet_arp_ip4_over_ethernet_address_t lo_addr, hi_addr, addr; + int addr_valid = 0; + int is_del = 0; + int count = 1; + u32 fib_index = 0; + u32 fib_id; + int is_static = 0; + int is_no_fib_entry = 0; + int is_proxy = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + /* set ip arp TenGigE1/1/0/1 1.2.3.4 aa:bb:... or aabb.ccdd... */ + if (unformat (input, "%U %U %U", + unformat_vnet_sw_interface, vnm, &sw_if_index, + unformat_ip4_address, &addr.ip4, + unformat_ethernet_address, &addr.ethernet)) + addr_valid = 1; + + else if (unformat (input, "delete") || unformat (input, "del")) + is_del = 1; + + else if (unformat (input, "static")) + is_static = 1; + + else if (unformat (input, "no-fib-entry")) + is_no_fib_entry = 1; + + else if (unformat (input, "count %d", &count)) + ; + + else if (unformat (input, "fib-id %d", &fib_id)) + { + fib_index = fib_table_find (FIB_PROTOCOL_IP4, fib_id); + + if (~0 == fib_index) + return clib_error_return (0, "fib ID %d doesn't exist\n", fib_id); + } + + else if (unformat (input, "proxy %U - %U", + unformat_ip4_address, &lo_addr.ip4, + unformat_ip4_address, &hi_addr.ip4)) + is_proxy = 1; + else + break; + } + + if (is_proxy) + { + (void) vnet_proxy_arp_add_del (&lo_addr.ip4, &hi_addr.ip4, + fib_index, is_del); + return 0; + } + + if (addr_valid) + { + int i; + + for (i = 0; i < count; i++) + { + if (is_del == 0) + { + uword event_type, *event_data = 0; + + /* Park the debug CLI until the arp entry is installed */ + vnet_register_ip4_arp_resolution_event + (vnm, &addr.ip4, vlib_current_process (vm), + 1 /* type */ , 0 /* data */ ); + + vnet_arp_set_ip4_over_ethernet + (vnm, sw_if_index, &addr, is_static, is_no_fib_entry); + + vlib_process_wait_for_event (vm); + event_type = vlib_process_get_events (vm, &event_data); + vec_reset_length (event_data); + if (event_type != 1) + clib_warning ("event type %d unexpected", event_type); + } + else + vnet_arp_unset_ip4_over_ethernet (vnm, sw_if_index, &addr); + + increment_ip4_and_mac_address (&addr); + } + } + else + { + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + + return 0; +} + +/* *INDENT-OFF* */ +/*? + * Add or delete IPv4 ARP cache entries. + * + * @note 'set ip arp' options (e.g. delete, static, 'fib-id <id>', + * 'count <number>', 'interface ip4_addr mac_addr') can be added in + * any order and combination. + * + * @cliexpar + * @parblock + * Add or delete IPv4 ARP cache entries as follows. MAC Address can be in + * either aa:bb:cc:dd:ee:ff format or aabb.ccdd.eeff format. + * @cliexcmd{set ip arp GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe} + * @cliexcmd{set ip arp delete GigabitEthernet2/0/0 6.0.0.3 de:ad:be:ef:ba:be} + * + * To add or delete an IPv4 ARP cache entry to or from a specific fib + * table: + * @cliexcmd{set ip arp fib-id 1 GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe} + * @cliexcmd{set ip arp fib-id 1 delete GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe} + * + * Add or delete IPv4 static ARP cache entries as follows: + * @cliexcmd{set ip arp static GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe} + * @cliexcmd{set ip arp static delete GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe} + * + * For testing / debugging purposes, the 'set ip arp' command can add or + * delete multiple entries. Supply the 'count N' parameter: + * @cliexcmd{set ip arp count 10 GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe} + * @endparblock + ?*/ +VLIB_CLI_COMMAND (ip_arp_add_del_command, static) = { + .path = "set ip arp", + .short_help = + "set ip arp [del] <intfc> <ip-address> <mac-address> [static] [no-fib-entry] [count <count>] [fib-id <fib-id>] [proxy <lo-addr> - <hi-addr>]", + .function = ip_arp_add_del_command_fn, +}; +/* *INDENT-ON* */ + +static clib_error_t * +set_int_proxy_arp_command_fn (vlib_main_t * vm, + unformat_input_t * + input, vlib_cli_command_t * cmd) +{ + vnet_main_t *vnm = vnet_get_main (); + u32 sw_if_index; + vnet_sw_interface_t *si; + int enable = 0; + int intfc_set = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "%U", unformat_vnet_sw_interface, + vnm, &sw_if_index)) + intfc_set = 1; + else if (unformat (input, "enable") || unformat (input, "on")) + enable = 1; + else if (unformat (input, "disable") || unformat (input, "off")) + enable = 0; + else + break; + } + + if (intfc_set == 0) + return clib_error_return (0, "unknown input '%U'", + format_unformat_error, input); + + si = vnet_get_sw_interface (vnm, sw_if_index); + ASSERT (si); + if (enable) + si->flags |= VNET_SW_INTERFACE_FLAG_PROXY_ARP; + else + si->flags &= ~VNET_SW_INTERFACE_FLAG_PROXY_ARP; + + return 0; +} + +/* *INDENT-OFF* */ +/*? + * Enable proxy-arp on an interface. The vpp stack will answer ARP + * requests for the indicated address range. Multiple proxy-arp + * ranges may be provisioned. + * + * @note Proxy ARP as a technology is infamous for blackholing traffic. + * Also, the underlying implementation has not been performance-tuned. + * Avoid creating an unnecessarily large set of ranges. + * + * @cliexpar + * To enable proxy arp on a range of addresses, use: + * @cliexcmd{set ip arp proxy 6.0.0.1 - 6.0.0.11} + * Append 'del' to delete a range of proxy ARP addresses: + * @cliexcmd{set ip arp proxy 6.0.0.1 - 6.0.0.11 del} + * You must then specifically enable proxy arp on individual interfaces: + * @cliexcmd{set interface proxy-arp GigabitEthernet0/8/0 enable} + * To disable proxy arp on an individual interface: + * @cliexcmd{set interface proxy-arp GigabitEthernet0/8/0 disable} + ?*/ +VLIB_CLI_COMMAND (set_int_proxy_enable_command, static) = { + .path = "set interface proxy-arp", + .short_help = + "set interface proxy-arp <intfc> [enable|disable]", + .function = set_int_proxy_arp_command_fn, +}; +/* *INDENT-ON* */ + + +/* + * ARP/ND Termination in a L2 Bridge Domain based on IP4/IP6 to MAC + * hash tables mac_by_ip4 and mac_by_ip6 for each BD. + */ +typedef enum +{ + ARP_TERM_NEXT_L2_OUTPUT, + ARP_TERM_NEXT_DROP, + ARP_TERM_N_NEXT, +} arp_term_next_t; + +u32 arp_term_next_node_index[32]; + +static uword +arp_term_l2bd (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + l2input_main_t *l2im = &l2input_main; + u32 n_left_from, next_index, *from, *to_next; + u32 n_replies_sent = 0; + u16 last_bd_index = ~0; + l2_bridge_domain_t *last_bd_config = 0; + l2_input_config_t *cfg0; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t *p0; + ethernet_header_t *eth0; + ethernet_arp_header_t *arp0; + ip6_header_t *iph0; + u8 *l3h0; + u32 pi0, error0, next0, sw_if_index0; + u16 ethertype0; + u16 bd_index0; + u32 ip0; + u8 *macp0; + u8 is_vrrp_reply0; + + pi0 = from[0]; + to_next[0] = pi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer (vm, pi0); + // Terminate only local (SHG == 0) ARP + if (vnet_buffer (p0)->l2.shg != 0) + goto next_l2_feature; + + eth0 = vlib_buffer_get_current (p0); + l3h0 = (u8 *) eth0 + vnet_buffer (p0)->l2.l2_len; + ethertype0 = clib_net_to_host_u16 (*(u16 *) (l3h0 - 2)); + arp0 = (ethernet_arp_header_t *) l3h0; + + if (PREDICT_FALSE ((ethertype0 != ETHERNET_TYPE_ARP) || + (arp0->opcode != + clib_host_to_net_u16 + (ETHERNET_ARP_OPCODE_request)))) + goto check_ip6_nd; + + /* Must be ARP request packet here */ + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) && + (p0->flags & VLIB_BUFFER_IS_TRACED))) + { + u8 *t0 = vlib_add_trace (vm, node, p0, + sizeof (ethernet_arp_input_trace_t)); + clib_memcpy (t0, l3h0, sizeof (ethernet_arp_input_trace_t)); + } + + error0 = ETHERNET_ARP_ERROR_replies_sent; + error0 = + (arp0->l2_type != + clib_net_to_host_u16 (ETHERNET_ARP_HARDWARE_TYPE_ethernet) + ? ETHERNET_ARP_ERROR_l2_type_not_ethernet : error0); + error0 = + (arp0->l3_type != + clib_net_to_host_u16 (ETHERNET_TYPE_IP4) ? + ETHERNET_ARP_ERROR_l3_type_not_ip4 : error0); + + sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX]; + + if (error0) + goto drop; + + is_vrrp_reply0 = + ((arp0->opcode == + clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply)) + && + (!memcmp + (arp0->ip4_over_ethernet[0].ethernet, vrrp_prefix, + sizeof (vrrp_prefix)))); + + /* Trash ARP packets whose ARP-level source addresses do not + match their L2-frame-level source addresses, unless it's + a reply from a VRRP virtual router */ + if (PREDICT_FALSE + (memcmp (eth0->src_address, arp0->ip4_over_ethernet[0].ethernet, + sizeof (eth0->src_address)) && !is_vrrp_reply0)) + { + error0 = ETHERNET_ARP_ERROR_l2_address_mismatch; + goto drop; + } + + /* Check if anyone want ARP request events for L2 BDs */ + { + ethernet_arp_main_t *am = ðernet_arp_main; + if (am->wc_ip4_arp_publisher_node != (uword) ~ 0) + vnet_arp_wc_publish (sw_if_index0, &arp0->ip4_over_ethernet[0]); + } + + /* lookup BD mac_by_ip4 hash table for MAC entry */ + ip0 = arp0->ip4_over_ethernet[1].ip4.as_u32; + bd_index0 = vnet_buffer (p0)->l2.bd_index; + if (PREDICT_FALSE ((bd_index0 != last_bd_index) + || (last_bd_index == (u16) ~ 0))) + { + last_bd_index = bd_index0; + last_bd_config = vec_elt_at_index (l2im->bd_configs, bd_index0); + } + macp0 = (u8 *) hash_get (last_bd_config->mac_by_ip4, ip0); + + if (PREDICT_FALSE (!macp0)) + goto next_l2_feature; /* MAC not found */ + + /* MAC found, send ARP reply - + Convert ARP request packet to ARP reply */ + arp0->opcode = clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply); + arp0->ip4_over_ethernet[1] = arp0->ip4_over_ethernet[0]; + arp0->ip4_over_ethernet[0].ip4.as_u32 = ip0; + clib_memcpy (arp0->ip4_over_ethernet[0].ethernet, macp0, 6); + clib_memcpy (eth0->dst_address, eth0->src_address, 6); + clib_memcpy (eth0->src_address, macp0, 6); + n_replies_sent += 1; + + output_response: + /* For BVI, need to use l2-fwd node to send ARP reply as + l2-output node cannot output packet to BVI properly */ + cfg0 = vec_elt_at_index (l2im->configs, sw_if_index0); + if (PREDICT_FALSE (cfg0->bvi)) + { + vnet_buffer (p0)->l2.feature_bitmap |= L2INPUT_FEAT_FWD; + vnet_buffer (p0)->sw_if_index[VLIB_RX] = 0; + goto next_l2_feature; + } + + /* Send ARP/ND reply back out input interface through l2-output */ + vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0; + next0 = ARP_TERM_NEXT_L2_OUTPUT; + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, pi0, + next0); + continue; + + check_ip6_nd: + /* IP6 ND event notification or solicitation handling to generate + local response instead of flooding */ + iph0 = (ip6_header_t *) l3h0; + if (PREDICT_FALSE (ethertype0 == ETHERNET_TYPE_IP6 && + iph0->protocol == IP_PROTOCOL_ICMP6 && + !ip6_address_is_unspecified + (&iph0->src_address))) + { + sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX]; + if (vnet_ip6_nd_term + (vm, node, p0, eth0, iph0, sw_if_index0, + vnet_buffer (p0)->l2.bd_index)) + goto output_response; + } + + next_l2_feature: + { + next0 = vnet_l2_feature_next (p0, arp_term_next_node_index, + L2INPUT_FEAT_ARP_TERM); + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + pi0, next0); + continue; + } + + drop: + if (0 == arp0->ip4_over_ethernet[0].ip4.as_u32 || + (arp0->ip4_over_ethernet[0].ip4.as_u32 == + arp0->ip4_over_ethernet[1].ip4.as_u32)) + { + error0 = ETHERNET_ARP_ERROR_gratuitous_arp; + } + next0 = ARP_TERM_NEXT_DROP; + p0->error = node->errors[error0]; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, pi0, + next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_error_count (vm, node->node_index, + ETHERNET_ARP_ERROR_replies_sent, n_replies_sent); + return frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (arp_term_l2bd_node, static) = { + .function = arp_term_l2bd, + .name = "arp-term-l2bd", + .vector_size = sizeof (u32), + .n_errors = ETHERNET_ARP_N_ERROR, + .error_strings = ethernet_arp_error_strings, + .n_next_nodes = ARP_TERM_N_NEXT, + .next_nodes = { + [ARP_TERM_NEXT_L2_OUTPUT] = "l2-output", + [ARP_TERM_NEXT_DROP] = "error-drop", + }, + .format_buffer = format_ethernet_arp_header, + .format_trace = format_arp_term_input_trace, +}; +/* *INDENT-ON* */ + +clib_error_t * +arp_term_init (vlib_main_t * vm) +{ + // Initialize the feature next-node indexes + feat_bitmap_init_next_nodes (vm, + arp_term_l2bd_node.index, + L2INPUT_N_FEAT, + l2input_get_feat_names (), + arp_term_next_node_index); + return 0; +} + +VLIB_INIT_FUNCTION (arp_term_init); + +void +change_arp_mac (u32 sw_if_index, ethernet_arp_ip4_entry_t * e) +{ + if (e->sw_if_index == sw_if_index) + { + adj_nbr_walk_nh4 (e->sw_if_index, + &e->ip4_address, arp_mk_complete_walk, e); + } +} + +void +ethernet_arp_change_mac (u32 sw_if_index) +{ + ethernet_arp_main_t *am = ðernet_arp_main; + ethernet_arp_ip4_entry_t *e; + + /* *INDENT-OFF* */ + pool_foreach (e, am->ip4_entry_pool, + ({ + change_arp_mac (sw_if_index, e); + })); + /* *INDENT-ON* */ +} + +void +send_ip4_garp (vlib_main_t * vm, vnet_hw_interface_t * hi) +{ + ip4_main_t *i4m = &ip4_main; + u32 sw_if_index = hi->sw_if_index; + ip4_address_t *ip4_addr = ip4_interface_first_address (i4m, sw_if_index, 0); + + if (ip4_addr) + { + clib_warning ("Sending GARP for IP4 address %U on sw_if_idex %d", + format_ip4_address, ip4_addr, sw_if_index); + + /* Form GARP packet for output - Gratuitous ARP is an ARP request packet + where the interface IP/MAC pair is used for both source and request + MAC/IP pairs in the request */ + u32 bi = 0; + ethernet_arp_header_t *h = vlib_packet_template_get_packet + (vm, &i4m->ip4_arp_request_packet_template, &bi); + clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, + sizeof (h->ip4_over_ethernet[0].ethernet)); + clib_memcpy (h->ip4_over_ethernet[1].ethernet, hi->hw_address, + sizeof (h->ip4_over_ethernet[1].ethernet)); + h->ip4_over_ethernet[0].ip4 = ip4_addr[0]; + h->ip4_over_ethernet[1].ip4 = ip4_addr[0]; + + /* Setup MAC header with ARP Etype and broadcast DMAC */ + vlib_buffer_t *b = vlib_get_buffer (vm, bi); + vlib_buffer_advance (b, -sizeof (ethernet_header_t)); + ethernet_header_t *e = vlib_buffer_get_current (b); + e->type = clib_host_to_net_u16 (ETHERNET_TYPE_ARP); + clib_memcpy (e->src_address, hi->hw_address, sizeof (e->src_address)); + memset (e->dst_address, 0xff, sizeof (e->dst_address)); + + /* Send GARP packet out the specified interface */ + vnet_buffer (b)->sw_if_index[VLIB_RX] = + vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index; + vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index); + u32 *to_next = vlib_frame_vector_args (f); + to_next[0] = bi; + f->n_vectors = 1; + vlib_put_frame_to_node (vm, hi->output_node_index, f); + } +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ethernet/arp_packet.h b/src/vnet/ethernet/arp_packet.h new file mode 100644 index 00000000..661f33f9 --- /dev/null +++ b/src/vnet/ethernet/arp_packet.h @@ -0,0 +1,180 @@ +/* + * ethernet/arp.c: IP v4 ARP node + * + * Copyright (c) 2010 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_ethernet_arp_packet_h +#define included_ethernet_arp_packet_h + +#define foreach_ethernet_arp_hardware_type \ + _ (0, reserved) \ + _ (1, ethernet) \ + _ (2, experimental_ethernet) \ + _ (3, ax_25) \ + _ (4, proteon_pronet_token_ring) \ + _ (5, chaos) \ + _ (6, ieee_802) \ + _ (7, arcnet) \ + _ (8, hyperchannel) \ + _ (9, lanstar) \ + _ (10, autonet) \ + _ (11, localtalk) \ + _ (12, localnet) \ + _ (13, ultra_link) \ + _ (14, smds) \ + _ (15, frame_relay) \ + _ (16, atm) \ + _ (17, hdlc) \ + _ (18, fibre_channel) \ + _ (19, atm19) \ + _ (20, serial_line) \ + _ (21, atm21) \ + _ (22, mil_std_188_220) \ + _ (23, metricom) \ + _ (24, ieee_1394) \ + _ (25, mapos) \ + _ (26, twinaxial) \ + _ (27, eui_64) \ + _ (28, hiparp) \ + _ (29, iso_7816_3) \ + _ (30, arpsec) \ + _ (31, ipsec_tunnel) \ + _ (32, infiniband) \ + _ (33, cai) \ + _ (34, wiegand) \ + _ (35, pure_ip) \ + _ (36, hw_exp1) \ + _ (256, hw_exp2) + +#define foreach_ethernet_arp_opcode \ + _ (reserved) \ + _ (request) \ + _ (reply) \ + _ (reverse_request) \ + _ (reverse_reply) \ + _ (drarp_request) \ + _ (drarp_reply) \ + _ (drarp_error) \ + _ (inarp_request) \ + _ (inarp_reply) \ + _ (arp_nak) \ + _ (mars_request) \ + _ (mars_multi) \ + _ (mars_mserv) \ + _ (mars_join) \ + _ (mars_leave) \ + _ (mars_nak) \ + _ (mars_unserv) \ + _ (mars_sjoin) \ + _ (mars_sleave) \ + _ (mars_grouplist_request) \ + _ (mars_grouplist_reply) \ + _ (mars_redirect_map) \ + _ (mapos_unarp) \ + _ (exp1) \ + _ (exp2) + +typedef enum +{ +#define _(n,f) ETHERNET_ARP_HARDWARE_TYPE_##f = (n), + foreach_ethernet_arp_hardware_type +#undef _ +} ethernet_arp_hardware_type_t; + +typedef enum +{ +#define _(f) ETHERNET_ARP_OPCODE_##f, + foreach_ethernet_arp_opcode +#undef _ + ETHERNET_ARP_N_OPCODE, +} ethernet_arp_opcode_t; + +typedef enum +{ + IP4_ARP_NEXT_DROP, + IP4_ARP_N_NEXT, +} ip4_arp_next_t; + +typedef enum +{ + IP4_ARP_ERROR_DROP, + IP4_ARP_ERROR_REQUEST_SENT, + IP4_ARP_ERROR_NON_ARP_ADJ, + IP4_ARP_ERROR_REPLICATE_DROP, + IP4_ARP_ERROR_REPLICATE_FAIL, + IP4_ARP_ERROR_NO_SOURCE_ADDRESS, +} ip4_arp_error_t; + +/* *INDENT-OFF* */ +typedef CLIB_PACKED (struct { + u8 ethernet[6]; + ip4_address_t ip4; +}) ethernet_arp_ip4_over_ethernet_address_t; +/* *INDENT-ON* */ + +typedef struct +{ + u16 l2_type; + u16 l3_type; + u8 n_l2_address_bytes; + u8 n_l3_address_bytes; + u16 opcode; + union + { + ethernet_arp_ip4_over_ethernet_address_t ip4_over_ethernet[2]; + + /* Others... */ + u8 data[0]; + }; +} ethernet_arp_header_t; + +typedef enum ethernet_arp_entry_flags_t_ +{ + ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC = (1 << 0), + ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC = (1 << 1), + ETHERNET_ARP_IP4_ENTRY_FLAG_NO_FIB_ENTRY = (1 << 2), +} __attribute__ ((packed)) ethernet_arp_entry_flags_t; + +typedef struct +{ + u32 sw_if_index; + ip4_address_t ip4_address; + + u8 ethernet_address[6]; + + ethernet_arp_entry_flags_t flags; + + u64 cpu_time_last_updated; + + /** + * The index of the adj-fib entry created + */ + fib_node_index_t fib_entry_index; +} ethernet_arp_ip4_entry_t; + +ethernet_arp_ip4_entry_t *ip4_neighbor_entries (u32 sw_if_index); +u8 *format_ethernet_arp_ip4_entry (u8 * s, va_list * va); + +void send_ip4_garp (vlib_main_t * vm, vnet_hw_interface_t * hi); + +#endif /* included_ethernet_arp_packet_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ethernet/dir.dox b/src/vnet/ethernet/dir.dox new file mode 100644 index 00000000..a55a73c0 --- /dev/null +++ b/src/vnet/ethernet/dir.dox @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2013 Cisco and/or its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** +@dir +@brief Ethernet ARP and Loopback Code. + +This directory contains the source code for ARP and Loopback Interfaces. + +*/ +/*? %%clicmd:group_label ARP and Loopback CLI %% ?*/ diff --git a/src/vnet/ethernet/error.def b/src/vnet/ethernet/error.def new file mode 100644 index 00000000..36679c0c --- /dev/null +++ b/src/vnet/ethernet/error.def @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ethernet_error.def: ethernet errors + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +ethernet_error (NONE, PUNT, "no error") +ethernet_error (BAD_LLC_LENGTH, DROP, "llc length > packet length") +ethernet_error (UNKNOWN_TYPE, PUNT, "unknown ethernet type") +ethernet_error (UNKNOWN_VLAN, DROP, "unknown vlan") +ethernet_error (L3_MAC_MISMATCH, DROP, "l3 mac mismatch") +ethernet_error (DOWN, DROP, "subinterface down") + diff --git a/src/vnet/ethernet/ethernet.h b/src/vnet/ethernet/ethernet.h new file mode 100644 index 00000000..a6846b13 --- /dev/null +++ b/src/vnet/ethernet/ethernet.h @@ -0,0 +1,577 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ethernet.h: types/functions for ethernet. + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_ethernet_h +#define included_ethernet_h + +#include <vnet/vnet.h> +#include <vnet/ethernet/packet.h> +#include <vnet/pg/pg.h> +#include <vnet/feature/feature.h> + +always_inline u64 +ethernet_mac_address_u64 (u8 * a) +{ + return (((u64) a[0] << (u64) (5 * 8)) + | ((u64) a[1] << (u64) (4 * 8)) + | ((u64) a[2] << (u64) (3 * 8)) + | ((u64) a[3] << (u64) (2 * 8)) + | ((u64) a[4] << (u64) (1 * 8)) | ((u64) a[5] << (u64) (0 * 8))); +} + +static inline int +ethernet_mac_address_is_multicast_u64 (u64 a) +{ + return (a & (1ULL << (5 * 8))) != 0; +} + +static_always_inline int +ethernet_frame_is_tagged (u16 type) +{ +#if __SSE4_2__ + const __m128i ethertype_mask = _mm_set_epi16 (ETHERNET_TYPE_VLAN, + ETHERNET_TYPE_DOT1AD, + ETHERNET_TYPE_VLAN_9100, + ETHERNET_TYPE_VLAN_9200, + /* duplicate last one to + fill register */ + ETHERNET_TYPE_VLAN_9200, + ETHERNET_TYPE_VLAN_9200, + ETHERNET_TYPE_VLAN_9200, + ETHERNET_TYPE_VLAN_9200); + + __m128i r = _mm_set1_epi16 (type); + r = _mm_cmpeq_epi16 (ethertype_mask, r); + return !_mm_test_all_zeros (r, r); +#else + if ((type == ETHERNET_TYPE_VLAN) || + (type == ETHERNET_TYPE_DOT1AD) || + (type == ETHERNET_TYPE_VLAN_9100) || (type == ETHERNET_TYPE_VLAN_9200)) + return 1; +#endif + return 0; +} + +/* Max. sized ethernet/vlan header for parsing. */ +typedef struct +{ + ethernet_header_t ethernet; + + /* Allow up to 2 stacked vlan headers. */ + ethernet_vlan_header_t vlan[2]; +} ethernet_max_header_t; + +struct vnet_hw_interface_t; +/* Ethernet flag change callback. */ +typedef u32 (ethernet_flag_change_function_t) + (vnet_main_t * vnm, struct vnet_hw_interface_t * hi, u32 flags); + +#define ETHERNET_MIN_PACKET_BYTES 64 +#define ETHERNET_MAX_PACKET_BYTES 9216 + +/* Ethernet interface instance. */ +typedef struct ethernet_interface +{ + + /* Accept all packets (promiscuous mode). */ +#define ETHERNET_INTERFACE_FLAG_ACCEPT_ALL (1 << 0) +#define ETHERNET_INTERFACE_FLAG_CONFIG_PROMISC(flags) \ + (((flags) & ~ETHERNET_INTERFACE_FLAG_ACCEPT_ALL) == 0) + + /* Change MTU on interface from hw interface structure */ +#define ETHERNET_INTERFACE_FLAG_MTU (1 << 1) +#define ETHERNET_INTERFACE_FLAG_CONFIG_MTU(flags) \ + ((flags) & ETHERNET_INTERFACE_FLAG_MTU) + + /* Callback, e.g. to turn on/off promiscuous mode */ + ethernet_flag_change_function_t *flag_change; + + u32 driver_instance; + + /* Ethernet (MAC) address for this interface. */ + u8 address[6]; +} ethernet_interface_t; + +extern vnet_hw_interface_class_t ethernet_hw_interface_class; + +typedef struct +{ + /* Name (a c string). */ + char *name; + + /* Ethernet type in host byte order. */ + ethernet_type_t type; + + /* Node which handles this type. */ + u32 node_index; + + /* Next index for this type. */ + u32 next_index; +} ethernet_type_info_t; + +typedef enum +{ +#define ethernet_error(n,c,s) ETHERNET_ERROR_##n, +#include <vnet/ethernet/error.def> +#undef ethernet_error + ETHERNET_N_ERROR, +} ethernet_error_t; + + +// Structs used when parsing packet to find sw_if_index + +typedef struct +{ + u32 sw_if_index; + u32 flags; + // config entry is-valid flag + // exact match flags (valid if packet has 0/1/2/3 tags) + // L2 vs L3 forwarding mode +#define SUBINT_CONFIG_MATCH_0_TAG (1<<0) +#define SUBINT_CONFIG_MATCH_1_TAG (1<<1) +#define SUBINT_CONFIG_MATCH_2_TAG (1<<2) +#define SUBINT_CONFIG_MATCH_3_TAG (1<<3) +#define SUBINT_CONFIG_VALID (1<<4) +#define SUBINT_CONFIG_L2 (1<<5) +#define SUBINT_CONFIG_P2P (1<<6) + +} subint_config_t; + +always_inline u32 +eth_create_valid_subint_match_flags (u32 num_tags) +{ + return SUBINT_CONFIG_VALID | (1 << num_tags); +} + + +typedef struct +{ + subint_config_t untagged_subint; + subint_config_t default_subint; + u16 dot1q_vlans; // pool id for vlan table + u16 dot1ad_vlans; // pool id for vlan table +} main_intf_t; + +typedef struct +{ + subint_config_t single_tag_subint; + subint_config_t inner_any_subint; + u32 qinqs; // pool id for qinq table +} vlan_intf_t; + +typedef struct +{ + vlan_intf_t vlans[ETHERNET_N_VLAN]; +} vlan_table_t; + +typedef struct +{ + subint_config_t subint; +} qinq_intf_t; + +typedef struct +{ + qinq_intf_t vlans[ETHERNET_N_VLAN]; +} qinq_table_t; + +// Structure mapping to a next index based on ethertype. +// Common ethertypes are stored explicitly, others are +// stored in a sparse table. +typedef struct +{ + /* Sparse vector mapping ethernet type in network byte order + to next index. */ + u16 *input_next_by_type; + u32 *sparse_index_by_input_next_index; + + /* cached next indexes for common ethertypes */ + u32 input_next_ip4; + u32 input_next_ip6; + u32 input_next_mpls; +} next_by_ethertype_t; + +typedef struct +{ + vlib_main_t *vlib_main; + + /* next node index for the L3 input node of each ethertype */ + next_by_ethertype_t l3_next; + + /* next node index for L2 interfaces */ + u32 l2_next; + + /* flag and next node index for L3 redirect */ + u32 redirect_l3; + u32 redirect_l3_next; + + /* Pool of ethernet interface instances. */ + ethernet_interface_t *interfaces; + + ethernet_type_info_t *type_infos; + + /* Hash tables mapping name/type to type info index. */ + uword *type_info_by_name, *type_info_by_type; + + // The root of the vlan parsing tables. A vector with one element + // for each main interface, indexed by hw_if_index. + main_intf_t *main_intfs; + + // Pool of vlan tables + vlan_table_t *vlan_pool; + + // Pool of qinq tables; + qinq_table_t *qinq_pool; + + /* Set to one to use AB.CD.EF instead of A:B:C:D:E:F as ethernet format. */ + int format_ethernet_address_16bit; + + /* debug: make sure we don't wipe out an ethernet registration by mistake */ + u8 next_by_ethertype_register_called; + + /* Feature arc index */ + u8 output_feature_arc_index; + + /* Allocated loopback instances */ + uword *bm_loopback_instances; +} ethernet_main_t; + +ethernet_main_t ethernet_main; + +always_inline ethernet_type_info_t * +ethernet_get_type_info (ethernet_main_t * em, ethernet_type_t type) +{ + uword *p = hash_get (em->type_info_by_type, type); + return p ? vec_elt_at_index (em->type_infos, p[0]) : 0; +} + +ethernet_interface_t *ethernet_get_interface (ethernet_main_t * em, + u32 hw_if_index); + +clib_error_t *ethernet_register_interface (vnet_main_t * vnm, + u32 dev_class_index, + u32 dev_instance, + u8 * address, + u32 * hw_if_index_return, + ethernet_flag_change_function_t + flag_change); + +void ethernet_delete_interface (vnet_main_t * vnm, u32 hw_if_index); + +/* Register given node index to take input for given ethernet type. */ +void +ethernet_register_input_type (vlib_main_t * vm, + ethernet_type_t type, u32 node_index); + +/* Register given node index to take input for packet from L2 interfaces. */ +void ethernet_register_l2_input (vlib_main_t * vm, u32 node_index); + +/* Register given node index to take redirected L3 traffic, and enable L3 redirect */ +void ethernet_register_l3_redirect (vlib_main_t * vm, u32 node_index); + +/* Formats ethernet address X:X:X:X:X:X */ +u8 *format_ethernet_address (u8 * s, va_list * args); +u8 *format_ethernet_type (u8 * s, va_list * args); +u8 *format_ethernet_vlan_tci (u8 * s, va_list * va); +u8 *format_ethernet_header (u8 * s, va_list * args); +u8 *format_ethernet_header_with_length (u8 * s, va_list * args); + +/* Parse ethernet address in either X:X:X:X:X:X unix or X.X.X cisco format. */ +uword unformat_ethernet_address (unformat_input_t * input, va_list * args); + +/* Parse ethernet type as 0xXXXX or type name from ethernet/types.def. + In either host or network byte order. */ +uword +unformat_ethernet_type_host_byte_order (unformat_input_t * input, + va_list * args); +uword +unformat_ethernet_type_net_byte_order (unformat_input_t * input, + va_list * args); + +/* Parse ethernet header. */ +uword unformat_ethernet_header (unformat_input_t * input, va_list * args); + +/* Parse ethernet interface name; return hw_if_index. */ +uword unformat_ethernet_interface (unformat_input_t * input, va_list * args); + +uword unformat_pg_ethernet_header (unformat_input_t * input, va_list * args); + +always_inline void +ethernet_setup_node (vlib_main_t * vm, u32 node_index) +{ + vlib_node_t *n = vlib_get_node (vm, node_index); + pg_node_t *pn = pg_get_node (node_index); + + n->format_buffer = format_ethernet_header_with_length; + n->unformat_buffer = unformat_ethernet_header; + pn->unformat_edit = unformat_pg_ethernet_header; +} + +always_inline ethernet_header_t * +ethernet_buffer_get_header (vlib_buffer_t * b) +{ + return (void *) (b->data + vnet_buffer (b)->l2_hdr_offset); +} + +/** Returns the number of VLAN headers in the current Ethernet frame in the + * buffer. Returns 0, 1, 2 for the known header count. The value 3 indicates + * the number of headers is not known. + */ +#define ethernet_buffer_get_vlan_count(b) ( \ + ((b)->flags & VNET_BUFFER_FLAGS_VLAN_BITS) >> VNET_BUFFER_F_LOG2_VLAN_1_DEEP \ +) + +/** Sets the number of VLAN headers in the current Ethernet frame in the + * buffer. Values 0, 1, 2 indicate the header count. The value 3 indicates + * the number of headers is not known. + */ +#define ethernet_buffer_set_vlan_count(b, v) ( \ + (b)->flags = ((b)->flags & ~VNET_BUFFER_FLAGS_VLAN_BITS) | \ + (((v) << VNET_BUFFER_F_LOG2_VLAN_1_DEEP) & VNET_BUFFER_FLAGS_VLAN_BITS) \ +) + +/** Adjusts the vlan count by the delta in 'v' */ +#define ethernet_buffer_adjust_vlan_count(b, v) ( \ + ethernet_buffer_set_vlan_count(b, \ + (word)ethernet_buffer_get_vlan_count(b) + (word)(v)) \ +) + +/** Adjusts the vlan count by the header size byte delta in 'v' */ +#define ethernet_buffer_adjust_vlan_count_by_bytes(b, v) ( \ + (b)->flags = ((b)->flags & ~VNET_BUFFER_FLAGS_VLAN_BITS) | (( \ + ((b)->flags & VNET_BUFFER_FLAGS_VLAN_BITS) + \ + ((v) << (VNET_BUFFER_F_LOG2_VLAN_1_DEEP - 2)) \ + ) & VNET_BUFFER_FLAGS_VLAN_BITS) \ +) + +/** + * Determine the size of the Ethernet headers of the current frame in + * the buffer. This uses the VLAN depth flags that are set by + * ethernet-input. Because these flags are stored in the vlib_buffer_t + * "flags" field this count is valid regardless of the node so long as it's + * checked downstream of ethernet-input; That is, the value is not stored in + * the opaque space. + */ +#define ethernet_buffer_header_size(b) ( \ + ethernet_buffer_get_vlan_count((b)) * sizeof(ethernet_vlan_header_t) + \ + sizeof(ethernet_header_t) \ +) + +ethernet_main_t *ethernet_get_main (vlib_main_t * vm); +u32 ethernet_set_flags (vnet_main_t * vnm, u32 hw_if_index, u32 flags); +void ethernet_sw_interface_set_l2_mode (vnet_main_t * vnm, u32 sw_if_index, + u32 l2); +void ethernet_sw_interface_set_l2_mode_noport (vnet_main_t * vnm, + u32 sw_if_index, u32 l2); +void ethernet_set_rx_redirect (vnet_main_t * vnm, vnet_hw_interface_t * hi, + u32 enable); + +int +vnet_arp_set_ip4_over_ethernet (vnet_main_t * vnm, + u32 sw_if_index, void *a_arg, + int is_static, int is_no_fib_entry); + +int +vnet_arp_unset_ip4_over_ethernet (vnet_main_t * vnm, + u32 sw_if_index, void *a_arg); + +int vnet_proxy_arp_fib_reset (u32 fib_id); + +clib_error_t *next_by_ethertype_init (next_by_ethertype_t * l3_next); +clib_error_t *next_by_ethertype_register (next_by_ethertype_t * l3_next, + u32 ethertype, u32 next_index); + +int vnet_create_loopback_interface (u32 * sw_if_indexp, u8 * mac_address, + u8 is_specified, u32 user_instance); +int vnet_delete_loopback_interface (u32 sw_if_index); +int vnet_delete_sub_interface (u32 sw_if_index); + +// Perform ethernet subinterface classification table lookups given +// the ports's sw_if_index and fields extracted from the ethernet header. +// The resulting tables are used by identify_subint(). +always_inline void +eth_vlan_table_lookups (ethernet_main_t * em, + vnet_main_t * vnm, + u32 port_sw_if_index0, + u16 first_ethertype, + u16 outer_id, + u16 inner_id, + vnet_hw_interface_t ** hi, + main_intf_t ** main_intf, + vlan_intf_t ** vlan_intf, qinq_intf_t ** qinq_intf) +{ + vlan_table_t *vlan_table; + qinq_table_t *qinq_table; + u32 vlan_table_id; + + // Read the main, vlan, and qinq interface table entries + // TODO: Consider if/how to prefetch tables. Also consider + // single-entry cache to skip table lookups and identify_subint() + // processing. + *hi = vnet_get_sup_hw_interface (vnm, port_sw_if_index0); + *main_intf = vec_elt_at_index (em->main_intfs, (*hi)->hw_if_index); + + // Always read the vlan and qinq tables, even if there are not that + // many tags on the packet. This makes the lookups and comparisons + // easier (and less branchy). + vlan_table_id = (first_ethertype == ETHERNET_TYPE_DOT1AD) ? + (*main_intf)->dot1ad_vlans : (*main_intf)->dot1q_vlans; + vlan_table = vec_elt_at_index (em->vlan_pool, vlan_table_id); + *vlan_intf = &vlan_table->vlans[outer_id]; + + qinq_table = vec_elt_at_index (em->qinq_pool, (*vlan_intf)->qinqs); + *qinq_intf = &qinq_table->vlans[inner_id]; +} + + +// Determine the subinterface for this packet, given the result of the +// vlan table lookups and vlan header parsing. Check the most specific +// matches first. +// Returns 1 if a matching subinterface was found, otherwise returns 0. +always_inline u32 +eth_identify_subint (vnet_hw_interface_t * hi, + vlib_buffer_t * b0, + u32 match_flags, + main_intf_t * main_intf, + vlan_intf_t * vlan_intf, + qinq_intf_t * qinq_intf, + u32 * new_sw_if_index, u8 * error0, u32 * is_l2) +{ + subint_config_t *subint; + + // Each comparison is checking both the valid flag and the number of tags + // (incorporating exact-match/non-exact-match). + + // check for specific double tag + subint = &qinq_intf->subint; + if ((subint->flags & match_flags) == match_flags) + goto matched; + + // check for specific outer and 'any' inner + subint = &vlan_intf->inner_any_subint; + if ((subint->flags & match_flags) == match_flags) + goto matched; + + // check for specific single tag + subint = &vlan_intf->single_tag_subint; + if ((subint->flags & match_flags) == match_flags) + goto matched; + + // check for untagged interface + subint = &main_intf->untagged_subint; + if ((subint->flags & match_flags) == match_flags) + goto matched; + + // check for default interface + subint = &main_intf->default_subint; + if ((subint->flags & match_flags) == match_flags) + goto matched; + + // No matching subinterface + *new_sw_if_index = ~0; + *error0 = ETHERNET_ERROR_UNKNOWN_VLAN; + *is_l2 = 0; + return 0; + +matched: + *new_sw_if_index = subint->sw_if_index; + *is_l2 = subint->flags & SUBINT_CONFIG_L2; + return 1; +} + +// Compare two ethernet macs. Return 1 if they are the same, 0 if different +always_inline u32 +eth_mac_equal (u8 * mac1, u8 * mac2) +{ + return (*((u32 *) (mac1 + 0)) == *((u32 *) (mac2 + 0)) && + *((u32 *) (mac1 + 2)) == *((u32 *) (mac2 + 2))); +} + + +always_inline ethernet_main_t * +vnet_get_ethernet_main (void) +{ + return ðernet_main; +} + +void vnet_register_ip4_arp_resolution_event (vnet_main_t * vnm, + void *address_arg, + uword node_index, + uword type_opaque, uword data); + + +int vnet_add_del_ip4_arp_change_event (vnet_main_t * vnm, + void *data_callback, + u32 pid, + void *address_arg, + uword node_index, + uword type_opaque, + uword data, int is_add); + +void wc_arp_set_publisher_node (uword inode_index, uword event_type); + +void ethernet_arp_change_mac (u32 sw_if_index); +void ethernet_ndp_change_mac (u32 sw_if_index); + +void arp_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai); + +void ethernet_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai); +u8 *ethernet_build_rewrite (vnet_main_t * vnm, + u32 sw_if_index, + vnet_link_t link_type, const void *dst_address); +const u8 *ethernet_ip4_mcast_dst_addr (void); +const u8 *ethernet_ip6_mcast_dst_addr (void); + +extern vlib_node_registration_t ethernet_input_node; + +typedef struct +{ + u32 sw_if_index; + u32 ip4; + u8 mac[6]; +} wc_arp_report_t; + +#endif /* included_ethernet_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ethernet/format.c b/src/vnet/ethernet/format.c new file mode 100644 index 00000000..5b589998 --- /dev/null +++ b/src/vnet/ethernet/format.c @@ -0,0 +1,348 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ethernet_format.c: ethernet formatting/parsing. + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vlib/vlib.h> +#include <vnet/ethernet/ethernet.h> + +u8 * +format_ethernet_address (u8 * s, va_list * args) +{ + ethernet_main_t *em = ðernet_main; + u8 *a = va_arg (*args, u8 *); + + if (em->format_ethernet_address_16bit) + return format (s, "%02x%02x.%02x%02x.%02x%02x", + a[0], a[1], a[2], a[3], a[4], a[5]); + else + return format (s, "%02x:%02x:%02x:%02x:%02x:%02x", + a[0], a[1], a[2], a[3], a[4], a[5]); +} + +u8 * +format_ethernet_type (u8 * s, va_list * args) +{ + ethernet_type_t type = va_arg (*args, u32); + ethernet_main_t *em = ðernet_main; + ethernet_type_info_t *t = ethernet_get_type_info (em, type); + + if (t) + s = format (s, "%s", t->name); + else + s = format (s, "0x%04x", type); + + return s; +} + +u8 * +format_ethernet_vlan_tci (u8 * s, va_list * va) +{ + u32 vlan_tci = va_arg (*va, u32); + + u32 vid = (vlan_tci & 0xfff); + u32 cfi = (vlan_tci >> 12) & 1; + u32 pri = (vlan_tci >> 13); + + s = format (s, "%d", vid); + if (pri != 0) + s = format (s, " priority %d", pri); + if (cfi != 0) + s = format (s, " cfi"); + + return s; +} + +u8 * +format_ethernet_header_with_length (u8 * s, va_list * args) +{ + ethernet_pbb_header_packed_t *ph = + va_arg (*args, ethernet_pbb_header_packed_t *); + ethernet_max_header_t *m = (ethernet_max_header_t *) ph; + u32 max_header_bytes = va_arg (*args, u32); + ethernet_main_t *em = ðernet_main; + ethernet_header_t *e = &m->ethernet; + ethernet_vlan_header_t *v; + ethernet_type_t type = clib_net_to_host_u16 (e->type); + ethernet_type_t vlan_type[ARRAY_LEN (m->vlan)]; + u32 n_vlan = 0, i, header_bytes; + uword indent; + + while ((type == ETHERNET_TYPE_VLAN || type == ETHERNET_TYPE_DOT1AD + || type == ETHERNET_TYPE_DOT1AH) && n_vlan < ARRAY_LEN (m->vlan)) + { + vlan_type[n_vlan] = type; + if (type != ETHERNET_TYPE_DOT1AH) + { + v = m->vlan + n_vlan; + type = clib_net_to_host_u16 (v->type); + } + n_vlan++; + } + + header_bytes = sizeof (e[0]) + n_vlan * sizeof (v[0]); + if (max_header_bytes != 0 && header_bytes > max_header_bytes) + return format (s, "ethernet header truncated"); + + indent = format_get_indent (s); + + s = format (s, "%U: %U -> %U", + format_ethernet_type, type, + format_ethernet_address, e->src_address, + format_ethernet_address, e->dst_address); + + if (type != ETHERNET_TYPE_DOT1AH) + { + for (i = 0; i < n_vlan; i++) + { + u32 v = clib_net_to_host_u16 (m->vlan[i].priority_cfi_and_id); + if (*vlan_type == ETHERNET_TYPE_VLAN) + s = format (s, " 802.1q vlan %U", format_ethernet_vlan_tci, v); + else + s = format (s, " 802.1ad vlan %U", format_ethernet_vlan_tci, v); + } + + if (max_header_bytes != 0 && header_bytes < max_header_bytes) + { + ethernet_type_info_t *ti; + vlib_node_t *node = 0; + + ti = ethernet_get_type_info (em, type); + if (ti && ti->node_index != ~0) + node = vlib_get_node (em->vlib_main, ti->node_index); + if (node && node->format_buffer) + s = format (s, "\n%U%U", + format_white_space, indent, + node->format_buffer, (void *) m + header_bytes, + max_header_bytes - header_bytes); + } + } + else + { + s = + format (s, " %s b-tag %04X", + (clib_net_to_host_u16 (ph->b_type) == + ETHERNET_TYPE_DOT1AD) ? "802.1ad" : "", + clib_net_to_host_u16 (ph->priority_dei_id)); + s = + format (s, " %s i-tag %08X", + (clib_net_to_host_u16 (ph->i_type) == + ETHERNET_TYPE_DOT1AH) ? "802.1ah" : "", + clib_net_to_host_u32 (ph->priority_dei_uca_res_sid)); + } + + return s; +} + +u8 * +format_ethernet_header (u8 * s, va_list * args) +{ + ethernet_max_header_t *m = va_arg (*args, ethernet_max_header_t *); + return format (s, "%U", format_ethernet_header_with_length, m, 0); +} + +/* Parse X:X:X:X:X:X unix style ethernet address. */ +static uword +unformat_ethernet_address_unix (unformat_input_t * input, va_list * args) +{ + u8 *result = va_arg (*args, u8 *); + u32 i, a[6]; + + if (!unformat (input, "%_%x:%x:%x:%x:%x:%x%_", + &a[0], &a[1], &a[2], &a[3], &a[4], &a[5])) + return 0; + + /* Check range. */ + for (i = 0; i < ARRAY_LEN (a); i++) + if (a[i] >= (1 << 8)) + return 0; + + for (i = 0; i < ARRAY_LEN (a); i++) + result[i] = a[i]; + + return 1; +} + +/* Parse X.X.X cisco style ethernet address. */ +static uword +unformat_ethernet_address_cisco (unformat_input_t * input, va_list * args) +{ + u8 *result = va_arg (*args, u8 *); + u32 i, a[3]; + + if (!unformat (input, "%_%x.%x.%x%_", &a[0], &a[1], &a[2])) + return 0; + + /* Check range. */ + for (i = 0; i < ARRAY_LEN (a); i++) + if (a[i] >= (1 << 16)) + return 0; + + result[0] = (a[0] >> 8) & 0xff; + result[1] = (a[0] >> 0) & 0xff; + result[2] = (a[1] >> 8) & 0xff; + result[3] = (a[1] >> 0) & 0xff; + result[4] = (a[2] >> 8) & 0xff; + result[5] = (a[2] >> 0) & 0xff; + + return 1; +} + +/* Parse ethernet address; accept either unix or style addresses. */ +uword +unformat_ethernet_address (unformat_input_t * input, va_list * args) +{ + u8 *result = va_arg (*args, u8 *); + return (unformat_user (input, unformat_ethernet_address_unix, result) + || unformat_user (input, unformat_ethernet_address_cisco, result)); +} + +/* Returns ethernet type as an int in host byte order. */ +uword +unformat_ethernet_type_host_byte_order (unformat_input_t * input, + va_list * args) +{ + u16 *result = va_arg (*args, u16 *); + ethernet_main_t *em = ðernet_main; + int type, i; + + /* Numeric type. */ + if (unformat (input, "0x%x", &type) || unformat (input, "%d", &type)) + { + if (type >= (1 << 16)) + return 0; + *result = type; + return 1; + } + + /* Named type. */ + if (unformat_user (input, unformat_vlib_number_by_name, + em->type_info_by_name, &i)) + { + ethernet_type_info_t *ti = vec_elt_at_index (em->type_infos, i); + *result = ti->type; + return 1; + } + + return 0; +} + +uword +unformat_ethernet_type_net_byte_order (unformat_input_t * input, + va_list * args) +{ + u16 *result = va_arg (*args, u16 *); + if (!unformat_user (input, unformat_ethernet_type_host_byte_order, result)) + return 0; + + *result = clib_host_to_net_u16 ((u16) * result); + return 1; +} + +uword +unformat_ethernet_header (unformat_input_t * input, va_list * args) +{ + u8 **result = va_arg (*args, u8 **); + ethernet_max_header_t _m, *m = &_m; + ethernet_header_t *e = &m->ethernet; + u16 type; + u32 n_vlan; + + if (!unformat (input, "%U: %U -> %U", + unformat_ethernet_type_host_byte_order, &type, + unformat_ethernet_address, &e->src_address, + unformat_ethernet_address, &e->dst_address)) + return 0; + + n_vlan = 0; + while (unformat (input, "vlan")) + { + u32 id, priority; + + if (!unformat_user (input, unformat_vlib_number, &id) + || id >= ETHERNET_N_VLAN) + return 0; + + if (unformat (input, "priority %d", &priority)) + { + if (priority >= 8) + return 0; + id |= priority << 13; + } + + if (unformat (input, "cfi")) + id |= 1 << 12; + + /* Too many vlans given. */ + if (n_vlan >= ARRAY_LEN (m->vlan)) + return 0; + + m->vlan[n_vlan].priority_cfi_and_id = clib_host_to_net_u16 (id); + n_vlan++; + } + + if (n_vlan == 0) + e->type = clib_host_to_net_u16 (type); + else + { + int i; + + e->type = clib_host_to_net_u16 (ETHERNET_TYPE_VLAN); + for (i = 0; i < n_vlan - 1; i++) + m->vlan[i].type = clib_host_to_net_u16 (ETHERNET_TYPE_VLAN); + m->vlan[n_vlan - 1].type = clib_host_to_net_u16 (type); + } + + /* Add header to result. */ + { + void *p; + u32 n_bytes = sizeof (e[0]) + n_vlan * sizeof (m->vlan[0]); + + vec_add2 (*result, p, n_bytes); + clib_memcpy (p, m, n_bytes); + } + + return 1; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ethernet/init.c b/src/vnet/ethernet/init.c new file mode 100644 index 00000000..2d20adc9 --- /dev/null +++ b/src/vnet/ethernet/init.c @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ethernet_init.c: ethernet initialization + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vlib/vlib.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ip/ip.h> // for feature registration + +/* Global main structure. */ +ethernet_main_t ethernet_main; + +static void +add_type (ethernet_main_t * em, ethernet_type_t type, char *type_name) +{ + ethernet_type_info_t *ti; + u32 i; + + vec_add2 (em->type_infos, ti, 1); + i = ti - em->type_infos; + + ti->name = type_name; + ti->type = type; + ti->next_index = ti->node_index = ~0; + + hash_set (em->type_info_by_type, type, i); + hash_set_mem (em->type_info_by_name, ti->name, i); +} + +/* Built-in ip4 tx feature path definition */ +/* *INDENT-OFF* */ +VNET_FEATURE_ARC_INIT (ethernet_output, static) = +{ + .arc_name = "ethernet-output", + .start_nodes = VNET_FEATURES ("adj-l2-midchain"), + .arc_index_ptr = ðernet_main.output_feature_arc_index, +}; + +VNET_FEATURE_INIT (ethernet_tx_drop, static) = +{ + .arc_name = "ethernet-output", + .node_name = "error-drop", + .runs_before = 0, /* not before any other features */ +}; +/* *INDENT-ON* */ + +static clib_error_t * +ethernet_init (vlib_main_t * vm) +{ + ethernet_main_t *em = ðernet_main; + clib_error_t *error; + + /* + * Set up the L2 path now, or we'll wipe out the L2 ARP + * registration set up by ethernet_arp_init. + */ + if ((error = vlib_call_init_function (vm, l2_init))) + return error; + + em->vlib_main = vm; + + em->type_info_by_name = hash_create_string (0, sizeof (uword)); + em->type_info_by_type = hash_create (0, sizeof (uword)); + +#define ethernet_type(n,s) add_type (em, ETHERNET_TYPE_##s, #s); +#include "types.def" +#undef ethernet_type + + if ((error = vlib_call_init_function (vm, llc_init))) + return error; + if ((error = vlib_call_init_function (vm, ethernet_input_init))) + return error; + if ((error = vlib_call_init_function (vm, vnet_feature_init))) + return error; + + return 0; +} + +VLIB_INIT_FUNCTION (ethernet_init); + +ethernet_main_t * +ethernet_get_main (vlib_main_t * vm) +{ + vlib_call_init_function (vm, ethernet_init); + return ðernet_main; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ethernet/interface.c b/src/vnet/ethernet/interface.c new file mode 100644 index 00000000..3e78a49d --- /dev/null +++ b/src/vnet/ethernet/interface.c @@ -0,0 +1,880 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ethernet_interface.c: ethernet interfaces + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/vnet.h> +#include <vnet/ip/ip.h> +#include <vnet/pg/pg.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/l2/l2_input.h> +#include <vnet/adj/adj.h> + +/** + * @file + * @brief Loopback Interfaces. + * + * This file contains code to manage loopback interfaces. + */ + +const u8 * +ethernet_ip4_mcast_dst_addr (void) +{ + const static u8 ethernet_mcast_dst_mac[] = { + 0x1, 0x0, 0x5e, 0x0, 0x0, 0x0, + }; + + return (ethernet_mcast_dst_mac); +} + +const u8 * +ethernet_ip6_mcast_dst_addr (void) +{ + const static u8 ethernet_mcast_dst_mac[] = { + 0x33, 0x33, 0x00, 0x0, 0x0, 0x0, + }; + + return (ethernet_mcast_dst_mac); +} + +/** + * @brief build a rewrite string to use for sending packets of type 'link_type' + * to 'dst_address' + */ +u8 * +ethernet_build_rewrite (vnet_main_t * vnm, + u32 sw_if_index, + vnet_link_t link_type, const void *dst_address) +{ + vnet_sw_interface_t *sub_sw = vnet_get_sw_interface (vnm, sw_if_index); + vnet_sw_interface_t *sup_sw = vnet_get_sup_sw_interface (vnm, sw_if_index); + vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index); + ethernet_main_t *em = ðernet_main; + ethernet_interface_t *ei; + ethernet_header_t *h; + ethernet_type_t type; + uword n_bytes = sizeof (h[0]); + u8 *rewrite = NULL; + u8 is_p2p = 0; + + if (sub_sw->type == VNET_SW_INTERFACE_TYPE_P2P) + is_p2p = 1; + if (sub_sw != sup_sw) + { + if (sub_sw->sub.eth.flags.one_tag) + { + n_bytes += sizeof (ethernet_vlan_header_t); + } + else if (sub_sw->sub.eth.flags.two_tags) + { + n_bytes += 2 * (sizeof (ethernet_vlan_header_t)); + } + else if (PREDICT_FALSE (is_p2p)) + { + n_bytes = sizeof (ethernet_header_t); + } + if (PREDICT_FALSE (!is_p2p)) + { + // Check for encaps that are not supported for L3 interfaces + if (!(sub_sw->sub.eth.flags.exact_match) || + (sub_sw->sub.eth.flags.default_sub) || + (sub_sw->sub.eth.flags.outer_vlan_id_any) || + (sub_sw->sub.eth.flags.inner_vlan_id_any)) + { + return 0; + } + } + else + { + n_bytes = sizeof (ethernet_header_t); + } + } + + switch (link_type) + { +#define _(a,b) case VNET_LINK_##a: type = ETHERNET_TYPE_##b; break + _(IP4, IP4); + _(IP6, IP6); + _(MPLS, MPLS); + _(ARP, ARP); +#undef _ + default: + return NULL; + } + + vec_validate (rewrite, n_bytes - 1); + h = (ethernet_header_t *) rewrite; + ei = pool_elt_at_index (em->interfaces, hw->hw_instance); + clib_memcpy (h->src_address, ei->address, sizeof (h->src_address)); + if (is_p2p) + { + clib_memcpy (h->dst_address, sub_sw->p2p.client_mac, + sizeof (h->dst_address)); + } + else + { + if (dst_address) + clib_memcpy (h->dst_address, dst_address, sizeof (h->dst_address)); + else + memset (h->dst_address, ~0, sizeof (h->dst_address)); /* broadcast */ + } + + if (PREDICT_FALSE (!is_p2p) && sub_sw->sub.eth.flags.one_tag) + { + ethernet_vlan_header_t *outer = (void *) (h + 1); + + h->type = sub_sw->sub.eth.flags.dot1ad ? + clib_host_to_net_u16 (ETHERNET_TYPE_DOT1AD) : + clib_host_to_net_u16 (ETHERNET_TYPE_VLAN); + outer->priority_cfi_and_id = + clib_host_to_net_u16 (sub_sw->sub.eth.outer_vlan_id); + outer->type = clib_host_to_net_u16 (type); + + } + else if (PREDICT_FALSE (!is_p2p) && sub_sw->sub.eth.flags.two_tags) + { + ethernet_vlan_header_t *outer = (void *) (h + 1); + ethernet_vlan_header_t *inner = (void *) (outer + 1); + + h->type = sub_sw->sub.eth.flags.dot1ad ? + clib_host_to_net_u16 (ETHERNET_TYPE_DOT1AD) : + clib_host_to_net_u16 (ETHERNET_TYPE_VLAN); + outer->priority_cfi_and_id = + clib_host_to_net_u16 (sub_sw->sub.eth.outer_vlan_id); + outer->type = clib_host_to_net_u16 (ETHERNET_TYPE_VLAN); + inner->priority_cfi_and_id = + clib_host_to_net_u16 (sub_sw->sub.eth.inner_vlan_id); + inner->type = clib_host_to_net_u16 (type); + + } + else + { + h->type = clib_host_to_net_u16 (type); + } + + return (rewrite); +} + +void +ethernet_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai) +{ + ip_adjacency_t *adj; + + adj = adj_get (ai); + + vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index); + if (si->type == VNET_SW_INTERFACE_TYPE_P2P) + { + default_update_adjacency (vnm, sw_if_index, ai); + } + else if (FIB_PROTOCOL_IP4 == adj->ia_nh_proto) + { + arp_update_adjacency (vnm, sw_if_index, ai); + } + else if (FIB_PROTOCOL_IP6 == adj->ia_nh_proto) + { + ip6_ethernet_update_adjacency (vnm, sw_if_index, ai); + } + else + { + ASSERT (0); + } +} + +static clib_error_t * +ethernet_mac_change (vnet_hw_interface_t * hi, char *mac_address) +{ + ethernet_interface_t *ei; + ethernet_main_t *em; + + em = ðernet_main; + ei = pool_elt_at_index (em->interfaces, hi->hw_instance); + + vec_validate (hi->hw_address, + STRUCT_SIZE_OF (ethernet_header_t, src_address) - 1); + clib_memcpy (hi->hw_address, mac_address, vec_len (hi->hw_address)); + + clib_memcpy (ei->address, (u8 *) mac_address, sizeof (ei->address)); + ethernet_arp_change_mac (hi->sw_if_index); + ethernet_ndp_change_mac (hi->sw_if_index); + + return (NULL); +} + +/* *INDENT-OFF* */ +VNET_HW_INTERFACE_CLASS (ethernet_hw_interface_class) = { + .name = "Ethernet", + .format_address = format_ethernet_address, + .format_header = format_ethernet_header_with_length, + .unformat_hw_address = unformat_ethernet_address, + .unformat_header = unformat_ethernet_header, + .build_rewrite = ethernet_build_rewrite, + .update_adjacency = ethernet_update_adjacency, + .mac_addr_change_function = ethernet_mac_change, +}; +/* *INDENT-ON* */ + +uword +unformat_ethernet_interface (unformat_input_t * input, va_list * args) +{ + vnet_main_t *vnm = va_arg (*args, vnet_main_t *); + u32 *result = va_arg (*args, u32 *); + u32 hw_if_index; + ethernet_main_t *em = ðernet_main; + ethernet_interface_t *eif; + + if (!unformat_user (input, unformat_vnet_hw_interface, vnm, &hw_if_index)) + return 0; + + eif = ethernet_get_interface (em, hw_if_index); + if (eif) + { + *result = hw_if_index; + return 1; + } + return 0; +} + +clib_error_t * +ethernet_register_interface (vnet_main_t * vnm, + u32 dev_class_index, + u32 dev_instance, + u8 * address, + u32 * hw_if_index_return, + ethernet_flag_change_function_t flag_change) +{ + ethernet_main_t *em = ðernet_main; + ethernet_interface_t *ei; + vnet_hw_interface_t *hi; + clib_error_t *error = 0; + u32 hw_if_index; + + pool_get (em->interfaces, ei); + ei->flag_change = flag_change; + + hw_if_index = vnet_register_interface + (vnm, + dev_class_index, dev_instance, + ethernet_hw_interface_class.index, ei - em->interfaces); + *hw_if_index_return = hw_if_index; + + hi = vnet_get_hw_interface (vnm, hw_if_index); + + ethernet_setup_node (vnm->vlib_main, hi->output_node_index); + + hi->min_packet_bytes = hi->min_supported_packet_bytes = + ETHERNET_MIN_PACKET_BYTES; + hi->max_packet_bytes = hi->max_supported_packet_bytes = + ETHERNET_MAX_PACKET_BYTES; + hi->per_packet_overhead_bytes = + /* preamble */ 8 + /* inter frame gap */ 12; + + /* Standard default ethernet MTU. */ + hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = 9000; + + clib_memcpy (ei->address, address, sizeof (ei->address)); + vec_free (hi->hw_address); + vec_add (hi->hw_address, address, sizeof (ei->address)); + + if (error) + { + pool_put (em->interfaces, ei); + return error; + } + return error; +} + +void +ethernet_delete_interface (vnet_main_t * vnm, u32 hw_if_index) +{ + ethernet_main_t *em = ðernet_main; + ethernet_interface_t *ei; + vnet_hw_interface_t *hi; + main_intf_t *main_intf; + vlan_table_t *vlan_table; + u32 idx; + + hi = vnet_get_hw_interface (vnm, hw_if_index); + ei = pool_elt_at_index (em->interfaces, hi->hw_instance); + + /* Delete vlan mapping table for dot1q and dot1ad. */ + main_intf = vec_elt_at_index (em->main_intfs, hi->hw_if_index); + if (main_intf->dot1q_vlans) + { + vlan_table = vec_elt_at_index (em->vlan_pool, main_intf->dot1q_vlans); + for (idx = 0; idx < ETHERNET_N_VLAN; idx++) + { + if (vlan_table->vlans[idx].qinqs) + { + pool_put_index (em->qinq_pool, vlan_table->vlans[idx].qinqs); + } + } + pool_put_index (em->vlan_pool, main_intf->dot1q_vlans); + } + if (main_intf->dot1ad_vlans) + { + vlan_table = vec_elt_at_index (em->vlan_pool, main_intf->dot1ad_vlans); + for (idx = 0; idx < ETHERNET_N_VLAN; idx++) + { + if (vlan_table->vlans[idx].qinqs) + { + pool_put_index (em->qinq_pool, vlan_table->vlans[idx].qinqs); + } + } + pool_put_index (em->vlan_pool, main_intf->dot1ad_vlans); + } + + vnet_delete_hw_interface (vnm, hw_if_index); + pool_put (em->interfaces, ei); +} + +u32 +ethernet_set_flags (vnet_main_t * vnm, u32 hw_if_index, u32 flags) +{ + ethernet_main_t *em = ðernet_main; + vnet_hw_interface_t *hi; + ethernet_interface_t *ei; + + hi = vnet_get_hw_interface (vnm, hw_if_index); + + ASSERT (hi->hw_class_index == ethernet_hw_interface_class.index); + + ei = pool_elt_at_index (em->interfaces, hi->hw_instance); + if (ei->flag_change) + return ei->flag_change (vnm, hi, flags); + return (u32) ~ 0; +} + +/* Echo packets back to ethernet/l2-input. */ +static uword +simulated_ethernet_interface_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, n_left_to_next, n_copy, *from, *to_next; + u32 next_index = VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT; + u32 i, next_node_index, bvi_flag, sw_if_index; + u32 n_pkts = 0, n_bytes = 0; + u32 thread_index = vm->thread_index; + vnet_main_t *vnm = vnet_get_main (); + vnet_interface_main_t *im = &vnm->interface_main; + vlib_node_main_t *nm = &vm->node_main; + vlib_node_t *loop_node; + vlib_buffer_t *b; + + // check tx node index, it is ethernet-input on loopback create + // but can be changed to l2-input if loopback is configured as + // BVI of a BD (Bridge Domain). + loop_node = vec_elt (nm->nodes, node->node_index); + next_node_index = loop_node->next_nodes[next_index]; + bvi_flag = (next_node_index == l2input_node.index) ? 1 : 0; + + n_left_from = frame->n_vectors; + from = vlib_frame_args (frame); + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + n_copy = clib_min (n_left_from, n_left_to_next); + + clib_memcpy (to_next, from, n_copy * sizeof (from[0])); + n_left_to_next -= n_copy; + n_left_from -= n_copy; + i = 0; + b = vlib_get_buffer (vm, from[i]); + sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_TX]; + while (1) + { + // Set up RX and TX indices as if received from a real driver + // unless loopback is used as a BVI. For BVI case, leave TX index + // and update l2_len in packet as required for l2 forwarding path + vnet_buffer (b)->sw_if_index[VLIB_RX] = sw_if_index; + if (bvi_flag) + { + vnet_update_l2_len (b); + vnet_buffer (b)->sw_if_index[VLIB_TX] = L2INPUT_BVI; + } + else + vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0; + + i++; + n_pkts++; + n_bytes += vlib_buffer_length_in_chain (vm, b); + + if (i < n_copy) + b = vlib_get_buffer (vm, from[i]); + else + break; + } + from += n_copy; + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + + /* increment TX interface stat */ + vlib_increment_combined_counter (im->combined_sw_if_counters + + VNET_INTERFACE_COUNTER_TX, + thread_index, sw_if_index, n_pkts, + n_bytes); + } + + return n_left_from; +} + +static u8 * +format_simulated_ethernet_name (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + return format (s, "loop%d", dev_instance); +} + +static clib_error_t * +simulated_ethernet_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, + u32 flags) +{ + u32 hw_flags = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ? + VNET_HW_INTERFACE_FLAG_LINK_UP : 0; + vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags); + return 0; +} + +/* *INDENT-OFF* */ +VNET_DEVICE_CLASS (ethernet_simulated_device_class) = { + .name = "Loopback", + .format_device_name = format_simulated_ethernet_name, + .tx_function = simulated_ethernet_interface_tx, + .admin_up_down_function = simulated_ethernet_admin_up_down, +}; +/* *INDENT-ON* */ + + +/* + * Maintain a bitmap of allocated loopback instance numbers. + */ +#define LOOPBACK_MAX_INSTANCE (16 * 1024) + +static u32 +loopback_instance_alloc (u8 is_specified, u32 want) +{ + ethernet_main_t *em = ðernet_main; + + /* + * Check for dynamically allocaetd instance number. + */ + if (!is_specified) + { + u32 bit; + + bit = clib_bitmap_first_clear (em->bm_loopback_instances); + if (bit >= LOOPBACK_MAX_INSTANCE) + { + return ~0; + } + em->bm_loopback_instances = clib_bitmap_set (em->bm_loopback_instances, + bit, 1); + return bit; + } + + /* + * In range? + */ + if (want >= LOOPBACK_MAX_INSTANCE) + { + return ~0; + } + + /* + * Already in use? + */ + if (clib_bitmap_get (em->bm_loopback_instances, want)) + { + return ~0; + } + + /* + * Grant allocation request. + */ + em->bm_loopback_instances = clib_bitmap_set (em->bm_loopback_instances, + want, 1); + + return want; +} + +static int +loopback_instance_free (u32 instance) +{ + ethernet_main_t *em = ðernet_main; + + if (instance >= LOOPBACK_MAX_INSTANCE) + { + return -1; + } + + if (clib_bitmap_get (em->bm_loopback_instances, instance) == 0) + { + return -1; + } + + em->bm_loopback_instances = clib_bitmap_set (em->bm_loopback_instances, + instance, 0); + return 0; +} + +int +vnet_create_loopback_interface (u32 * sw_if_indexp, u8 * mac_address, + u8 is_specified, u32 user_instance) +{ + vnet_main_t *vnm = vnet_get_main (); + vlib_main_t *vm = vlib_get_main (); + clib_error_t *error; + u32 instance; + u8 address[6]; + u32 hw_if_index; + vnet_hw_interface_t *hw_if; + u32 slot; + int rv = 0; + + ASSERT (sw_if_indexp); + + *sw_if_indexp = (u32) ~ 0; + + memset (address, 0, sizeof (address)); + + /* + * Allocate a loopback instance. Either select on dynamically + * or try to use the desired user_instance number. + */ + instance = loopback_instance_alloc (is_specified, user_instance); + if (instance == ~0) + { + return VNET_API_ERROR_INVALID_REGISTRATION; + } + + /* + * Default MAC address (dead:0000:0000 + instance) is allocated + * if zero mac_address is configured. Otherwise, user-configurable MAC + * address is programmed on the loopback interface. + */ + if (memcmp (address, mac_address, sizeof (address))) + clib_memcpy (address, mac_address, sizeof (address)); + else + { + address[0] = 0xde; + address[1] = 0xad; + address[5] = instance; + } + + error = ethernet_register_interface + (vnm, + ethernet_simulated_device_class.index, instance, address, &hw_if_index, + /* flag change */ 0); + + if (error) + { + rv = VNET_API_ERROR_INVALID_REGISTRATION; + clib_error_report (error); + return rv; + } + + hw_if = vnet_get_hw_interface (vnm, hw_if_index); + slot = vlib_node_add_named_next_with_slot + (vm, hw_if->tx_node_index, + "ethernet-input", VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT); + ASSERT (slot == VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT); + + { + vnet_sw_interface_t *si = vnet_get_hw_sw_interface (vnm, hw_if_index); + *sw_if_indexp = si->sw_if_index; + } + + return 0; +} + +static clib_error_t * +create_simulated_ethernet_interfaces (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + int rv; + u32 sw_if_index; + u8 mac_address[6]; + u8 is_specified = 0; + u32 user_instance = 0; + + memset (mac_address, 0, sizeof (mac_address)); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "mac %U", unformat_ethernet_address, mac_address)) + ; + if (unformat (input, "instance %d", &user_instance)) + is_specified = 1; + else + break; + } + + rv = vnet_create_loopback_interface (&sw_if_index, mac_address, + is_specified, user_instance); + + if (rv) + return clib_error_return (0, "vnet_create_loopback_interface failed"); + + vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main (), + sw_if_index); + return 0; +} + +/*? + * Create a loopback interface. Optionally, a MAC Address can be + * provided. If not provided, de:ad:00:00:00:<loopId> will be used. + * + * @cliexpar + * The following two command syntaxes are equivalent: + * @cliexcmd{loopback create-interface [mac <mac-addr>] [instance <instance>]} + * @cliexcmd{create loopback interface [mac <mac-addr>] [instance <instance>]} + * Example of how to create a loopback interface: + * @cliexcmd{loopback create-interface} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (create_simulated_ethernet_interface_command, static) = { + .path = "loopback create-interface", + .short_help = "loopback create-interface [mac <mac-addr>] [instance <instance>]", + .function = create_simulated_ethernet_interfaces, +}; +/* *INDENT-ON* */ + +/*? + * Create a loopback interface. Optionally, a MAC Address can be + * provided. If not provided, de:ad:00:00:00:<loopId> will be used. + * + * @cliexpar + * The following two command syntaxes are equivalent: + * @cliexcmd{loopback create-interface [mac <mac-addr>] [instance <instance>]} + * @cliexcmd{create loopback interface [mac <mac-addr>] [instance <instance>]} + * Example of how to create a loopback interface: + * @cliexcmd{create loopback interface} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (create_loopback_interface_command, static) = { + .path = "create loopback interface", + .short_help = "create loopback interface [mac <mac-addr>] [instance <instance>]", + .function = create_simulated_ethernet_interfaces, +}; +/* *INDENT-ON* */ + +ethernet_interface_t * +ethernet_get_interface (ethernet_main_t * em, u32 hw_if_index) +{ + vnet_hw_interface_t *i = + vnet_get_hw_interface (vnet_get_main (), hw_if_index); + return (i->hw_class_index == + ethernet_hw_interface_class. + index ? pool_elt_at_index (em->interfaces, i->hw_instance) : 0); +} + +int +vnet_delete_loopback_interface (u32 sw_if_index) +{ + vnet_main_t *vnm = vnet_get_main (); + vnet_sw_interface_t *si; + u32 hw_if_index; + vnet_hw_interface_t *hw; + u32 instance; + + if (pool_is_free_index (vnm->interface_main.sw_interfaces, sw_if_index)) + return VNET_API_ERROR_INVALID_SW_IF_INDEX; + + si = vnet_get_sw_interface (vnm, sw_if_index); + hw_if_index = si->hw_if_index; + hw = vnet_get_hw_interface (vnm, hw_if_index); + instance = hw->dev_instance; + + if (loopback_instance_free (instance) < 0) + { + return VNET_API_ERROR_INVALID_SW_IF_INDEX; + } + + ethernet_delete_interface (vnm, hw_if_index); + + return 0; +} + +int +vnet_delete_sub_interface (u32 sw_if_index) +{ + vnet_main_t *vnm = vnet_get_main (); + int rv = 0; + + if (pool_is_free_index (vnm->interface_main.sw_interfaces, sw_if_index)) + return VNET_API_ERROR_INVALID_SW_IF_INDEX; + + + vnet_interface_main_t *im = &vnm->interface_main; + vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index); + + if (si->type == VNET_SW_INTERFACE_TYPE_SUB || + si->type == VNET_SW_INTERFACE_TYPE_P2P) + { + vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index); + u64 sup_and_sub_key = + ((u64) (si->sup_sw_if_index) << 32) | (u64) si->sub.id; + + hash_unset_mem (im->sw_if_index_by_sup_and_sub, &sup_and_sub_key); + vnet_delete_sw_interface (vnm, sw_if_index); + } + else + { + rv = VNET_API_ERROR_INVALID_SUB_SW_IF_INDEX; + } + return rv; +} + +static clib_error_t * +delete_simulated_ethernet_interfaces (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + int rv; + u32 sw_if_index = ~0; + vnet_main_t *vnm = vnet_get_main (); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "intfc %U", + unformat_vnet_sw_interface, vnm, &sw_if_index)) + ; + else + break; + } + + if (sw_if_index == ~0) + return clib_error_return (0, "interface not specified"); + + rv = vnet_delete_loopback_interface (sw_if_index); + + if (rv) + return clib_error_return (0, "vnet_delete_loopback_interface failed"); + + return 0; +} + +static clib_error_t * +delete_sub_interface (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + int rv = 0; + u32 sw_if_index = ~0; + vnet_main_t *vnm = vnet_get_main (); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (input, "%U", unformat_vnet_sw_interface, vnm, &sw_if_index)) + ; + else + break; + } + if (sw_if_index == ~0) + return clib_error_return (0, "interface doesn't exist"); + + if (pool_is_free_index (vnm->interface_main.sw_interfaces, sw_if_index)) + rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; + else + rv = vnet_delete_sub_interface (sw_if_index); + if (rv) + return clib_error_return (0, "delete_subinterface_interface failed"); + return 0; +} + +/*? + * Delete a loopback interface. + * + * @cliexpar + * The following two command syntaxes are equivalent: + * @cliexcmd{loopback delete-interface intfc <interface>} + * @cliexcmd{delete loopback interface intfc <interface>} + * Example of how to delete a loopback interface: + * @cliexcmd{loopback delete-interface intfc loop0} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (delete_simulated_ethernet_interface_command, static) = { + .path = "loopback delete-interface", + .short_help = "loopback delete-interface intfc <interface>", + .function = delete_simulated_ethernet_interfaces, +}; +/* *INDENT-ON* */ + +/*? + * Delete a loopback interface. + * + * @cliexpar + * The following two command syntaxes are equivalent: + * @cliexcmd{loopback delete-interface intfc <interface>} + * @cliexcmd{delete loopback interface intfc <interface>} + * Example of how to delete a loopback interface: + * @cliexcmd{delete loopback interface intfc loop0} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (delete_loopback_interface_command, static) = { + .path = "delete loopback interface", + .short_help = "delete loopback interface intfc <interface>", + .function = delete_simulated_ethernet_interfaces, +}; +/* *INDENT-ON* */ + +/*? + * Delete a sub-interface. + * + * @cliexpar + * Example of how to delete a sub-interface: + * @cliexcmd{delete sub-interface GigabitEthernet0/8/0.200} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (delete_sub_interface_command, static) = { + .path = "delete sub-interface", + .short_help = "delete sub-interface <interface>", + .function = delete_sub_interface, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ethernet/mac_swap.c b/src/vnet/ethernet/mac_swap.c new file mode 100644 index 00000000..c0fec12e --- /dev/null +++ b/src/vnet/ethernet/mac_swap.c @@ -0,0 +1,397 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <vnet/ethernet/ethernet.h> +#include <vppinfra/error.h> +#include <vnet/devices/pci/ige.h> +#include <vnet/devices/pci/ixge.h> +#include <vnet/devices/pci/ixgev.h> + +typedef struct +{ + u32 cached_next_index; + u32 cached_sw_if_index; + + /* Hash table to map sw_if_index to next node index */ + uword *next_node_index_by_sw_if_index; + + /* convenience */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; +} mac_swap_main_t; + +typedef struct +{ + u8 src[6]; + u8 dst[6]; + u32 sw_if_index; + u32 next_index; +} swap_trace_t; + +/* packet trace format function */ +static u8 * +format_swap_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + swap_trace_t *t = va_arg (*args, swap_trace_t *); + + s = format (s, "SWAP: dst now %U src now %U sw_if_index %d next_index %d", + format_ethernet_address, t->dst, + format_ethernet_address, t->src, t->sw_if_index, t->next_index); + return s; +} + +#define foreach_hw_driver_next \ + _(IP4) \ + _(IP6) \ + _(ETHERNET) + +mac_swap_main_t mac_swap_main; + +static vlib_node_registration_t mac_swap_node; + +#define foreach_mac_swap_error \ +_(SWAPS, "mac addresses swapped") + +typedef enum +{ +#define _(sym,str) MAC_SWAP_ERROR_##sym, + foreach_mac_swap_error +#undef _ + MAC_SWAP_N_ERROR, +} mac_swap_error_t; + +static char *mac_swap_error_strings[] = { +#define _(sym,string) string, + foreach_mac_swap_error +#undef _ +}; + +/* + * To drop a pkt and increment one of the previous counters: + * + * set b0->error = error_node->errors[RANDOM_ERROR_SAMPLE]; + * set next0 to a disposition index bound to "error-drop". + * + * To manually increment the specific counter MAC_SWAP_ERROR_SAMPLE: + * + * vlib_node_t *n = vlib_get_node (vm, mac_swap.index); + * u32 node_counter_base_index = n->error_heap_index; + * vlib_error_main_t * em = &vm->error_main; + * em->counters[node_counter_base_index + MAC_SWAP_ERROR_SAMPLE] += 1; + * + */ + +typedef enum +{ + MAC_SWAP_NEXT_DROP, + MAC_SWAP_N_NEXT, +} mac_swap_next_t; + +static uword +mac_swap_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 n_left_from, *from, *to_next; + mac_swap_next_t next_index; + mac_swap_main_t *msm = &mac_swap_main; + vlib_node_t *n = vlib_get_node (vm, mac_swap_node.index); + u32 node_counter_base_index = n->error_heap_index; + vlib_error_main_t *em = &vm->error_main; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t *b0, *b1; + u32 next0, next1; + u32 sw_if_index0, sw_if_index1; + uword *p0, *p1; + u64 tmp0a, tmp0b; + u64 tmp1a, tmp1b; + ethernet_header_t *h0, *h1; + + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p2, *p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + next0 = msm->cached_next_index; + sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX]; + next1 = msm->cached_next_index; + + if (PREDICT_FALSE (msm->cached_sw_if_index != sw_if_index0)) + { + p0 = + hash_get (msm->next_node_index_by_sw_if_index, sw_if_index0); + if (p0 == 0) + { + vnet_hw_interface_t *hw0; + + hw0 = vnet_get_sup_hw_interface (msm->vnet_main, + sw_if_index0); + + next0 = vlib_node_add_next (msm->vlib_main, + mac_swap_node.index, + hw0->output_node_index); + hash_set (msm->next_node_index_by_sw_if_index, + sw_if_index0, next0); + } + else + next0 = p0[0]; + msm->cached_sw_if_index = sw_if_index0; + msm->cached_next_index = next0; + next1 = next0; + } + if (PREDICT_FALSE (msm->cached_sw_if_index != sw_if_index1)) + { + p1 = + hash_get (msm->next_node_index_by_sw_if_index, sw_if_index1); + if (p1 == 0) + { + vnet_hw_interface_t *hw1; + + hw1 = vnet_get_sup_hw_interface (msm->vnet_main, + sw_if_index1); + + next1 = vlib_node_add_next (msm->vlib_main, + mac_swap_node.index, + hw1->output_node_index); + hash_set (msm->next_node_index_by_sw_if_index, + sw_if_index1, next1); + } + else + next1 = p1[0]; + msm->cached_sw_if_index = sw_if_index1; + msm->cached_next_index = next1; + } + + em->counters[node_counter_base_index + MAC_SWAP_ERROR_SWAPS] += 2; + + /* reset buffer so we always point at the MAC hdr */ + vlib_buffer_reset (b0); + vlib_buffer_reset (b1); + h0 = vlib_buffer_get_current (b0); + h1 = vlib_buffer_get_current (b1); + + /* Swap 2 x src and dst mac addresses using 8-byte load/stores */ + tmp0a = clib_net_to_host_u64 (((u64 *) (h0->dst_address))[0]); + tmp1a = clib_net_to_host_u64 (((u64 *) (h1->dst_address))[0]); + tmp0b = clib_net_to_host_u64 (((u64 *) (h0->src_address))[0]); + tmp1b = clib_net_to_host_u64 (((u64 *) (h1->src_address))[0]); + ((u64 *) (h0->dst_address))[0] = clib_host_to_net_u64 (tmp0b); + ((u64 *) (h1->dst_address))[0] = clib_host_to_net_u64 (tmp1b); + /* Move the ethertype from "b" to "a" */ + tmp0a &= ~(0xFFFF); + tmp1a &= ~(0xFFFF); + tmp0a |= tmp0b & 0xFFFF; + ((u64 *) (h0->src_address))[0] = clib_host_to_net_u64 (tmp0a); + tmp1a |= tmp1b & 0xFFFF; + ((u64 *) (h1->src_address))[0] = clib_host_to_net_u64 (tmp1a); + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + swap_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + clib_memcpy (t->src, h0->src_address, 6); + clib_memcpy (t->dst, h0->dst_address, 6); + t->sw_if_index = sw_if_index0; + t->next_index = next0; + } + if (b1->flags & VLIB_BUFFER_IS_TRACED) + { + swap_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + clib_memcpy (t->src, h1->src_address, 6); + clib_memcpy (t->dst, h1->dst_address, 6); + t->sw_if_index = sw_if_index1; + t->next_index = next1; + } + } + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0; + u32 sw_if_index0; + uword *p0; + u64 tmp0a, tmp0b; + ethernet_header_t *h0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + next0 = msm->cached_next_index; + + if (PREDICT_FALSE (msm->cached_sw_if_index != sw_if_index0)) + { + p0 = + hash_get (msm->next_node_index_by_sw_if_index, sw_if_index0); + if (p0 == 0) + { + vnet_hw_interface_t *hw0; + + hw0 = vnet_get_sup_hw_interface (msm->vnet_main, + sw_if_index0); + + next0 = vlib_node_add_next (msm->vlib_main, + mac_swap_node.index, + hw0->output_node_index); + hash_set (msm->next_node_index_by_sw_if_index, + sw_if_index0, next0); + } + else + next0 = p0[0]; + msm->cached_sw_if_index = sw_if_index0; + msm->cached_next_index = next0; + } + + em->counters[node_counter_base_index + MAC_SWAP_ERROR_SWAPS] += 1; + + /* reset buffer so we always point at the MAC hdr */ + vlib_buffer_reset (b0); + h0 = vlib_buffer_get_current (b0); + + /* Exchange src and dst, preserve the ethertype */ + tmp0a = clib_net_to_host_u64 (((u64 *) (h0->dst_address))[0]); + tmp0b = clib_net_to_host_u64 (((u64 *) (h0->src_address))[0]); + ((u64 *) (h0->dst_address))[0] = clib_host_to_net_u64 (tmp0b); + tmp0a &= ~(0xFFFF); + tmp0a |= tmp0b & 0xFFFF; + ((u64 *) (h0->src_address))[0] = clib_host_to_net_u64 (tmp0a); + + /* ship it */ + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + swap_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + clib_memcpy (t->src, h0->src_address, 6); + clib_memcpy (t->dst, h0->dst_address, 6); + t->sw_if_index = sw_if_index0; + t->next_index = next0; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (mac_swap_node,static) = { + .function = mac_swap_node_fn, + .name = "mac-swap", + .vector_size = sizeof (u32), + .format_trace = format_swap_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(mac_swap_error_strings), + .error_strings = mac_swap_error_strings, + + .n_next_nodes = MAC_SWAP_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [MAC_SWAP_NEXT_DROP] = "error-drop", + }, +}; +/* *INDENT-ON* */ + +clib_error_t * +mac_swap_init (vlib_main_t * vm) +{ + mac_swap_main_t *msm = &mac_swap_main; + + msm->next_node_index_by_sw_if_index = hash_create (0, sizeof (uword)); + msm->cached_next_index = (u32) ~ 0; + msm->cached_sw_if_index = (u32) ~ 0; + msm->vlib_main = vm; + msm->vnet_main = vnet_get_main (); + + /* Driver RX nodes send pkts here... */ +#define _(a) ixge_set_next_node (IXGE_RX_NEXT_##a##_INPUT, "mac-swap"); + foreach_hw_driver_next +#undef _ +#define _(a) ixgev_set_next_node (IXGEV_RX_NEXT_##a##_INPUT, "mac-swap"); + foreach_hw_driver_next +#undef _ +#define _(a) ige_set_next_node (IGE_RX_NEXT_##a##_INPUT, "mac-swap"); + foreach_hw_driver_next +#undef _ + return 0; +} + +VLIB_INIT_FUNCTION (mac_swap_init); + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ethernet/node.c b/src/vnet/ethernet/node.c new file mode 100755 index 00000000..f216216d --- /dev/null +++ b/src/vnet/ethernet/node.c @@ -0,0 +1,1419 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ethernet_node.c: ethernet packet processing + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ethernet/p2p_ethernet.h> +#include <vppinfra/sparse_vec.h> +#include <vnet/l2/l2_bvi.h> + + +#define foreach_ethernet_input_next \ + _ (PUNT, "error-punt") \ + _ (DROP, "error-drop") \ + _ (LLC, "llc-input") + +typedef enum +{ +#define _(s,n) ETHERNET_INPUT_NEXT_##s, + foreach_ethernet_input_next +#undef _ + ETHERNET_INPUT_N_NEXT, +} ethernet_input_next_t; + +typedef struct +{ + u8 packet_data[32]; +} ethernet_input_trace_t; + +static u8 * +format_ethernet_input_trace (u8 * s, va_list * va) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); + ethernet_input_trace_t *t = va_arg (*va, ethernet_input_trace_t *); + + s = format (s, "%U", format_ethernet_header, t->packet_data); + + return s; +} + +vlib_node_registration_t ethernet_input_node; + +typedef enum +{ + ETHERNET_INPUT_VARIANT_ETHERNET, + ETHERNET_INPUT_VARIANT_ETHERNET_TYPE, + ETHERNET_INPUT_VARIANT_NOT_L2, +} ethernet_input_variant_t; + + +// Parse the ethernet header to extract vlan tags and innermost ethertype +static_always_inline void +parse_header (ethernet_input_variant_t variant, + vlib_buffer_t * b0, + u16 * type, + u16 * orig_type, + u16 * outer_id, u16 * inner_id, u32 * match_flags) +{ + u8 vlan_count; + + if (variant == ETHERNET_INPUT_VARIANT_ETHERNET + || variant == ETHERNET_INPUT_VARIANT_NOT_L2) + { + ethernet_header_t *e0; + + e0 = (void *) (b0->data + b0->current_data); + + vnet_buffer (b0)->l2_hdr_offset = b0->current_data; + + vlib_buffer_advance (b0, sizeof (e0[0])); + + *type = clib_net_to_host_u16 (e0->type); + } + else if (variant == ETHERNET_INPUT_VARIANT_ETHERNET_TYPE) + { + // here when prior node was LLC/SNAP processing + u16 *e0; + + e0 = (void *) (b0->data + b0->current_data); + + vlib_buffer_advance (b0, sizeof (e0[0])); + + *type = clib_net_to_host_u16 (e0[0]); + } + + // save for distinguishing between dot1q and dot1ad later + *orig_type = *type; + + // default the tags to 0 (used if there is no corresponding tag) + *outer_id = 0; + *inner_id = 0; + + *match_flags = SUBINT_CONFIG_VALID | SUBINT_CONFIG_MATCH_0_TAG; + vlan_count = 0; + + // check for vlan encaps + if (ethernet_frame_is_tagged (*type)) + { + ethernet_vlan_header_t *h0; + u16 tag; + + *match_flags = SUBINT_CONFIG_VALID | SUBINT_CONFIG_MATCH_1_TAG; + + h0 = (void *) (b0->data + b0->current_data); + + tag = clib_net_to_host_u16 (h0->priority_cfi_and_id); + + *outer_id = tag & 0xfff; + if (0 == *outer_id) + *match_flags &= ~SUBINT_CONFIG_MATCH_1_TAG; + + *type = clib_net_to_host_u16 (h0->type); + + vlib_buffer_advance (b0, sizeof (h0[0])); + vlan_count = 1; + + if (*type == ETHERNET_TYPE_VLAN) + { + // Double tagged packet + *match_flags = SUBINT_CONFIG_VALID | SUBINT_CONFIG_MATCH_2_TAG; + + h0 = (void *) (b0->data + b0->current_data); + + tag = clib_net_to_host_u16 (h0->priority_cfi_and_id); + + *inner_id = tag & 0xfff; + + *type = clib_net_to_host_u16 (h0->type); + + vlib_buffer_advance (b0, sizeof (h0[0])); + vlan_count = 2; + if (*type == ETHERNET_TYPE_VLAN) + { + // More than double tagged packet + *match_flags = SUBINT_CONFIG_VALID | SUBINT_CONFIG_MATCH_3_TAG; + + vlib_buffer_advance (b0, sizeof (h0[0])); + vlan_count = 3; // "unknown" number, aka, 3-or-more + } + } + } + ethernet_buffer_set_vlan_count (b0, vlan_count); +} + +// Determine the subinterface for this packet, given the result of the +// vlan table lookups and vlan header parsing. Check the most specific +// matches first. +static_always_inline void +identify_subint (vnet_hw_interface_t * hi, + vlib_buffer_t * b0, + u32 match_flags, + main_intf_t * main_intf, + vlan_intf_t * vlan_intf, + qinq_intf_t * qinq_intf, + u32 * new_sw_if_index, u8 * error0, u32 * is_l2) +{ + u32 matched; + + matched = eth_identify_subint (hi, b0, match_flags, + main_intf, vlan_intf, qinq_intf, + new_sw_if_index, error0, is_l2); + + if (matched) + { + + // Perform L3 my-mac filter + // A unicast packet arriving on an L3 interface must have a dmac matching the interface mac. + // This is required for promiscuous mode, else we will forward packets we aren't supposed to. + if (!(*is_l2)) + { + ethernet_header_t *e0; + e0 = (void *) (b0->data + vnet_buffer (b0)->l2_hdr_offset); + + if (!(ethernet_address_cast (e0->dst_address))) + { + if (!eth_mac_equal ((u8 *) e0, hi->hw_address)) + { + *error0 = ETHERNET_ERROR_L3_MAC_MISMATCH; + } + } + } + + // Check for down subinterface + *error0 = (*new_sw_if_index) != ~0 ? (*error0) : ETHERNET_ERROR_DOWN; + } +} + +static_always_inline void +determine_next_node (ethernet_main_t * em, + ethernet_input_variant_t variant, + u32 is_l20, + u32 type0, vlib_buffer_t * b0, u8 * error0, u8 * next0) +{ + if (PREDICT_FALSE (*error0 != ETHERNET_ERROR_NONE)) + { + // some error occurred + *next0 = ETHERNET_INPUT_NEXT_DROP; + } + else if (is_l20) + { + *next0 = em->l2_next; + // record the L2 len and reset the buffer so the L2 header is preserved + u32 eth_start = vnet_buffer (b0)->l2_hdr_offset; + vnet_buffer (b0)->l2.l2_len = b0->current_data - eth_start; + ASSERT (vnet_buffer (b0)->l2.l2_len == + ethernet_buffer_header_size (b0)); + vlib_buffer_advance (b0, -ethernet_buffer_header_size (b0)); + + // check for common IP/MPLS ethertypes + } + else if (type0 == ETHERNET_TYPE_IP4) + { + *next0 = em->l3_next.input_next_ip4; + } + else if (type0 == ETHERNET_TYPE_IP6) + { + *next0 = em->l3_next.input_next_ip6; + } + else if (type0 == ETHERNET_TYPE_MPLS) + { + *next0 = em->l3_next.input_next_mpls; + + } + else if (em->redirect_l3) + { + // L3 Redirect is on, the cached common next nodes will be + // pointing to the redirect node, catch the uncommon types here + *next0 = em->redirect_l3_next; + } + else + { + // uncommon ethertype, check table + u32 i0; + i0 = sparse_vec_index (em->l3_next.input_next_by_type, type0); + *next0 = vec_elt (em->l3_next.input_next_by_type, i0); + *error0 = + i0 == + SPARSE_VEC_INVALID_INDEX ? ETHERNET_ERROR_UNKNOWN_TYPE : *error0; + + // The table is not populated with LLC values, so check that now. + // If variant is variant_ethernet then we came from LLC processing. Don't + // go back there; drop instead using by keeping the drop/bad table result. + if ((type0 < 0x600) && (variant == ETHERNET_INPUT_VARIANT_ETHERNET)) + { + *next0 = ETHERNET_INPUT_NEXT_LLC; + } + } +} + +static_always_inline int +ethernet_frame_is_any_tagged (u16 type0, u16 type1) +{ +#if __SSE4_2__ + const __m128i ethertype_mask = _mm_set_epi16 (ETHERNET_TYPE_VLAN, + ETHERNET_TYPE_DOT1AD, + ETHERNET_TYPE_VLAN_9100, + ETHERNET_TYPE_VLAN_9200, + /* duplicate for type1 */ + ETHERNET_TYPE_VLAN, + ETHERNET_TYPE_DOT1AD, + ETHERNET_TYPE_VLAN_9100, + ETHERNET_TYPE_VLAN_9200); + + __m128i r = + _mm_set_epi16 (type0, type0, type0, type0, type1, type1, type1, type1); + r = _mm_cmpeq_epi16 (ethertype_mask, r); + return !_mm_test_all_zeros (r, r); +#else + return ethernet_frame_is_tagged (type0) || ethernet_frame_is_tagged (type1); +#endif +} + +static_always_inline uword +ethernet_input_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame, + ethernet_input_variant_t variant) +{ + vnet_main_t *vnm = vnet_get_main (); + ethernet_main_t *em = ðernet_main; + vlib_node_runtime_t *error_node; + u32 n_left_from, next_index, *from, *to_next; + u32 stats_sw_if_index, stats_n_packets, stats_n_bytes; + u32 thread_index = vlib_get_thread_index (); + u32 cached_sw_if_index = ~0; + u32 cached_is_l2 = 0; /* shut up gcc */ + vnet_hw_interface_t *hi = NULL; /* used for main interface only */ + + if (variant != ETHERNET_INPUT_VARIANT_ETHERNET) + error_node = vlib_node_get_runtime (vm, ethernet_input_node.index); + else + error_node = node; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + vlib_trace_frame_buffers_only (vm, node, + from, + n_left_from, + sizeof (from[0]), + sizeof (ethernet_input_trace_t)); + + next_index = node->cached_next_index; + stats_sw_if_index = node->runtime_data[0]; + stats_n_packets = stats_n_bytes = 0; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t *b0, *b1; + u8 next0, next1, error0, error1; + u16 type0, orig_type0, type1, orig_type1; + u16 outer_id0, inner_id0, outer_id1, inner_id1; + u32 match_flags0, match_flags1; + u32 old_sw_if_index0, new_sw_if_index0, len0, old_sw_if_index1, + new_sw_if_index1, len1; + vnet_hw_interface_t *hi0, *hi1; + main_intf_t *main_intf0, *main_intf1; + vlan_intf_t *vlan_intf0, *vlan_intf1; + qinq_intf_t *qinq_intf0, *qinq_intf1; + u32 is_l20, is_l21; + ethernet_header_t *e0, *e1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *b2, *b3; + + b2 = vlib_get_buffer (vm, from[2]); + b3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (b2, STORE); + vlib_prefetch_buffer_header (b3, STORE); + + CLIB_PREFETCH (b2->data, sizeof (ethernet_header_t), LOAD); + CLIB_PREFETCH (b3->data, sizeof (ethernet_header_t), LOAD); + } + + bi0 = from[0]; + bi1 = from[1]; + to_next[0] = bi0; + to_next[1] = bi1; + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + error0 = error1 = ETHERNET_ERROR_NONE; + e0 = vlib_buffer_get_current (b0); + type0 = clib_net_to_host_u16 (e0->type); + e1 = vlib_buffer_get_current (b1); + type1 = clib_net_to_host_u16 (e1->type); + + /* Speed-path for the untagged case */ + if (PREDICT_TRUE (variant == ETHERNET_INPUT_VARIANT_ETHERNET + && !ethernet_frame_is_any_tagged (type0, type1))) + { + main_intf_t *intf0; + subint_config_t *subint0; + u32 sw_if_index0, sw_if_index1; + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX]; + is_l20 = cached_is_l2; + + /* This is probably wholly unnecessary */ + if (PREDICT_FALSE (sw_if_index0 != sw_if_index1)) + goto slowpath; + + /* Now sw_if_index0 == sw_if_index1 */ + if (PREDICT_FALSE (cached_sw_if_index != sw_if_index0)) + { + cached_sw_if_index = sw_if_index0; + hi = vnet_get_sup_hw_interface (vnm, sw_if_index0); + intf0 = vec_elt_at_index (em->main_intfs, hi->hw_if_index); + subint0 = &intf0->untagged_subint; + cached_is_l2 = is_l20 = subint0->flags & SUBINT_CONFIG_L2; + } + + vnet_buffer (b0)->l2_hdr_offset = b0->current_data; + vnet_buffer (b1)->l2_hdr_offset = b1->current_data; + + if (PREDICT_TRUE (is_l20 != 0)) + { + next0 = em->l2_next; + vnet_buffer (b0)->l2.l2_len = sizeof (ethernet_header_t); + next1 = em->l2_next; + vnet_buffer (b1)->l2.l2_len = sizeof (ethernet_header_t); + } + else + { + if (!ethernet_address_cast (e0->dst_address) && + (hi->hw_address != 0) && + !eth_mac_equal ((u8 *) e0, hi->hw_address)) + error0 = ETHERNET_ERROR_L3_MAC_MISMATCH; + if (!ethernet_address_cast (e1->dst_address) && + (hi->hw_address != 0) && + !eth_mac_equal ((u8 *) e1, hi->hw_address)) + error1 = ETHERNET_ERROR_L3_MAC_MISMATCH; + determine_next_node (em, variant, 0, type0, b0, + &error0, &next0); + vlib_buffer_advance (b0, sizeof (ethernet_header_t)); + determine_next_node (em, variant, 0, type1, b1, + &error1, &next1); + vlib_buffer_advance (b1, sizeof (ethernet_header_t)); + } + goto ship_it01; + } + + /* Slow-path for the tagged case */ + slowpath: + parse_header (variant, + b0, + &type0, + &orig_type0, &outer_id0, &inner_id0, &match_flags0); + + parse_header (variant, + b1, + &type1, + &orig_type1, &outer_id1, &inner_id1, &match_flags1); + + old_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + old_sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX]; + + eth_vlan_table_lookups (em, + vnm, + old_sw_if_index0, + orig_type0, + outer_id0, + inner_id0, + &hi0, + &main_intf0, &vlan_intf0, &qinq_intf0); + + eth_vlan_table_lookups (em, + vnm, + old_sw_if_index1, + orig_type1, + outer_id1, + inner_id1, + &hi1, + &main_intf1, &vlan_intf1, &qinq_intf1); + + identify_subint (hi0, + b0, + match_flags0, + main_intf0, + vlan_intf0, + qinq_intf0, &new_sw_if_index0, &error0, &is_l20); + + identify_subint (hi1, + b1, + match_flags1, + main_intf1, + vlan_intf1, + qinq_intf1, &new_sw_if_index1, &error1, &is_l21); + + // Save RX sw_if_index for later nodes + vnet_buffer (b0)->sw_if_index[VLIB_RX] = + error0 != + ETHERNET_ERROR_NONE ? old_sw_if_index0 : new_sw_if_index0; + vnet_buffer (b1)->sw_if_index[VLIB_RX] = + error1 != + ETHERNET_ERROR_NONE ? old_sw_if_index1 : new_sw_if_index1; + + // Check if there is a stat to take (valid and non-main sw_if_index for pkt 0 or pkt 1) + if (((new_sw_if_index0 != ~0) + && (new_sw_if_index0 != old_sw_if_index0)) + || ((new_sw_if_index1 != ~0) + && (new_sw_if_index1 != old_sw_if_index1))) + { + + len0 = vlib_buffer_length_in_chain (vm, b0) + b0->current_data + - vnet_buffer (b0)->l2_hdr_offset; + len1 = vlib_buffer_length_in_chain (vm, b1) + b1->current_data + - vnet_buffer (b1)->l2_hdr_offset; + + stats_n_packets += 2; + stats_n_bytes += len0 + len1; + + if (PREDICT_FALSE + (!(new_sw_if_index0 == stats_sw_if_index + && new_sw_if_index1 == stats_sw_if_index))) + { + stats_n_packets -= 2; + stats_n_bytes -= len0 + len1; + + if (new_sw_if_index0 != old_sw_if_index0 + && new_sw_if_index0 != ~0) + vlib_increment_combined_counter (vnm-> + interface_main.combined_sw_if_counters + + + VNET_INTERFACE_COUNTER_RX, + thread_index, + new_sw_if_index0, 1, + len0); + if (new_sw_if_index1 != old_sw_if_index1 + && new_sw_if_index1 != ~0) + vlib_increment_combined_counter (vnm-> + interface_main.combined_sw_if_counters + + + VNET_INTERFACE_COUNTER_RX, + thread_index, + new_sw_if_index1, 1, + len1); + + if (new_sw_if_index0 == new_sw_if_index1) + { + if (stats_n_packets > 0) + { + vlib_increment_combined_counter + (vnm->interface_main.combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, + thread_index, + stats_sw_if_index, + stats_n_packets, stats_n_bytes); + stats_n_packets = stats_n_bytes = 0; + } + stats_sw_if_index = new_sw_if_index0; + } + } + } + + if (variant == ETHERNET_INPUT_VARIANT_NOT_L2) + is_l20 = is_l21 = 0; + + determine_next_node (em, variant, is_l20, type0, b0, &error0, + &next0); + determine_next_node (em, variant, is_l21, type1, b1, &error1, + &next1); + + ship_it01: + b0->error = error_node->errors[error0]; + b1->error = error_node->errors[error1]; + + // verify speculative enqueue + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next, + n_left_to_next, bi0, bi1, next0, + next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u8 error0, next0; + u16 type0, orig_type0; + u16 outer_id0, inner_id0; + u32 match_flags0; + u32 old_sw_if_index0, new_sw_if_index0, len0; + vnet_hw_interface_t *hi0; + main_intf_t *main_intf0; + vlan_intf_t *vlan_intf0; + qinq_intf_t *qinq_intf0; + ethernet_header_t *e0; + u32 is_l20; + + // Prefetch next iteration + if (n_left_from > 1) + { + vlib_buffer_t *p2; + + p2 = vlib_get_buffer (vm, from[1]); + vlib_prefetch_buffer_header (p2, STORE); + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, LOAD); + } + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + error0 = ETHERNET_ERROR_NONE; + e0 = vlib_buffer_get_current (b0); + type0 = clib_net_to_host_u16 (e0->type); + + /* Speed-path for the untagged case */ + if (PREDICT_TRUE (variant == ETHERNET_INPUT_VARIANT_ETHERNET + && !ethernet_frame_is_tagged (type0))) + { + main_intf_t *intf0; + subint_config_t *subint0; + u32 sw_if_index0; + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + is_l20 = cached_is_l2; + + if (PREDICT_FALSE (cached_sw_if_index != sw_if_index0)) + { + cached_sw_if_index = sw_if_index0; + hi = vnet_get_sup_hw_interface (vnm, sw_if_index0); + intf0 = vec_elt_at_index (em->main_intfs, hi->hw_if_index); + subint0 = &intf0->untagged_subint; + cached_is_l2 = is_l20 = subint0->flags & SUBINT_CONFIG_L2; + } + + vnet_buffer (b0)->l2_hdr_offset = b0->current_data; + + if (PREDICT_TRUE (is_l20 != 0)) + { + next0 = em->l2_next; + vnet_buffer (b0)->l2.l2_len = sizeof (ethernet_header_t); + } + else + { + if (!ethernet_address_cast (e0->dst_address) && + (hi->hw_address != 0) && + !eth_mac_equal ((u8 *) e0, hi->hw_address)) + error0 = ETHERNET_ERROR_L3_MAC_MISMATCH; + determine_next_node (em, variant, 0, type0, b0, + &error0, &next0); + vlib_buffer_advance (b0, sizeof (ethernet_header_t)); + } + goto ship_it0; + } + + /* Slow-path for the tagged case */ + parse_header (variant, + b0, + &type0, + &orig_type0, &outer_id0, &inner_id0, &match_flags0); + + old_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + + eth_vlan_table_lookups (em, + vnm, + old_sw_if_index0, + orig_type0, + outer_id0, + inner_id0, + &hi0, + &main_intf0, &vlan_intf0, &qinq_intf0); + + identify_subint (hi0, + b0, + match_flags0, + main_intf0, + vlan_intf0, + qinq_intf0, &new_sw_if_index0, &error0, &is_l20); + + // Save RX sw_if_index for later nodes + vnet_buffer (b0)->sw_if_index[VLIB_RX] = + error0 != + ETHERNET_ERROR_NONE ? old_sw_if_index0 : new_sw_if_index0; + + // Increment subinterface stats + // Note that interface-level counters have already been incremented + // prior to calling this function. Thus only subinterface counters + // are incremented here. + // + // Interface level counters include packets received on the main + // interface and all subinterfaces. Subinterface level counters + // include only those packets received on that subinterface + // Increment stats if the subint is valid and it is not the main intf + if ((new_sw_if_index0 != ~0) + && (new_sw_if_index0 != old_sw_if_index0)) + { + + len0 = vlib_buffer_length_in_chain (vm, b0) + b0->current_data + - vnet_buffer (b0)->l2_hdr_offset; + + stats_n_packets += 1; + stats_n_bytes += len0; + + // Batch stat increments from the same subinterface so counters + // don't need to be incremented for every packet. + if (PREDICT_FALSE (new_sw_if_index0 != stats_sw_if_index)) + { + stats_n_packets -= 1; + stats_n_bytes -= len0; + + if (new_sw_if_index0 != ~0) + vlib_increment_combined_counter + (vnm->interface_main.combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, + thread_index, new_sw_if_index0, 1, len0); + if (stats_n_packets > 0) + { + vlib_increment_combined_counter + (vnm->interface_main.combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, + thread_index, + stats_sw_if_index, stats_n_packets, stats_n_bytes); + stats_n_packets = stats_n_bytes = 0; + } + stats_sw_if_index = new_sw_if_index0; + } + } + + if (variant == ETHERNET_INPUT_VARIANT_NOT_L2) + is_l20 = 0; + + determine_next_node (em, variant, is_l20, type0, b0, &error0, + &next0); + + ship_it0: + b0->error = error_node->errors[error0]; + + // verify speculative enqueue + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + // Increment any remaining batched stats + if (stats_n_packets > 0) + { + vlib_increment_combined_counter + (vnm->interface_main.combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, + thread_index, stats_sw_if_index, stats_n_packets, stats_n_bytes); + node->runtime_data[0] = stats_sw_if_index; + } + + return from_frame->n_vectors; +} + +static uword +ethernet_input (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * from_frame) +{ + return ethernet_input_inline (vm, node, from_frame, + ETHERNET_INPUT_VARIANT_ETHERNET); +} + +static uword +ethernet_input_type (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * from_frame) +{ + return ethernet_input_inline (vm, node, from_frame, + ETHERNET_INPUT_VARIANT_ETHERNET_TYPE); +} + +static uword +ethernet_input_not_l2 (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * from_frame) +{ + return ethernet_input_inline (vm, node, from_frame, + ETHERNET_INPUT_VARIANT_NOT_L2); +} + + +// Return the subinterface config struct for the given sw_if_index +// Also return via parameter the appropriate match flags for the +// configured number of tags. +// On error (unsupported or not ethernet) return 0. +static subint_config_t * +ethernet_sw_interface_get_config (vnet_main_t * vnm, + u32 sw_if_index, + u32 * flags, u32 * unsupported) +{ + ethernet_main_t *em = ðernet_main; + vnet_hw_interface_t *hi; + vnet_sw_interface_t *si; + main_intf_t *main_intf; + vlan_table_t *vlan_table; + qinq_table_t *qinq_table; + subint_config_t *subint = 0; + + hi = vnet_get_sup_hw_interface (vnm, sw_if_index); + + if (!hi || (hi->hw_class_index != ethernet_hw_interface_class.index)) + { + *unsupported = 0; + goto done; // non-ethernet interface + } + + // ensure there's an entry for the main intf (shouldn't really be necessary) + vec_validate (em->main_intfs, hi->hw_if_index); + main_intf = vec_elt_at_index (em->main_intfs, hi->hw_if_index); + + // Locate the subint for the given ethernet config + si = vnet_get_sw_interface (vnm, sw_if_index); + + if (si->type == VNET_SW_INTERFACE_TYPE_P2P) + { + p2p_ethernet_main_t *p2pm = &p2p_main; + u32 p2pe_sw_if_index = + p2p_ethernet_lookup (hi->hw_if_index, si->p2p.client_mac); + if (p2pe_sw_if_index == ~0) + { + pool_get (p2pm->p2p_subif_pool, subint); + si->p2p.pool_index = subint - p2pm->p2p_subif_pool; + } + else + subint = vec_elt_at_index (p2pm->p2p_subif_pool, si->p2p.pool_index); + *flags = SUBINT_CONFIG_P2P; + } + else if (si->sub.eth.flags.default_sub) + { + subint = &main_intf->default_subint; + *flags = SUBINT_CONFIG_MATCH_0_TAG | + SUBINT_CONFIG_MATCH_1_TAG | + SUBINT_CONFIG_MATCH_2_TAG | SUBINT_CONFIG_MATCH_3_TAG; + } + else if ((si->sub.eth.flags.no_tags) || (si->sub.eth.raw_flags == 0)) + { + // if no flags are set then this is a main interface + // so treat as untagged + subint = &main_intf->untagged_subint; + *flags = SUBINT_CONFIG_MATCH_0_TAG; + } + else + { + // one or two tags + // first get the vlan table + if (si->sub.eth.flags.dot1ad) + { + if (main_intf->dot1ad_vlans == 0) + { + // Allocate a vlan table from the pool + pool_get (em->vlan_pool, vlan_table); + main_intf->dot1ad_vlans = vlan_table - em->vlan_pool; + } + else + { + // Get ptr to existing vlan table + vlan_table = + vec_elt_at_index (em->vlan_pool, main_intf->dot1ad_vlans); + } + } + else + { // dot1q + if (main_intf->dot1q_vlans == 0) + { + // Allocate a vlan table from the pool + pool_get (em->vlan_pool, vlan_table); + main_intf->dot1q_vlans = vlan_table - em->vlan_pool; + } + else + { + // Get ptr to existing vlan table + vlan_table = + vec_elt_at_index (em->vlan_pool, main_intf->dot1q_vlans); + } + } + + if (si->sub.eth.flags.one_tag) + { + *flags = si->sub.eth.flags.exact_match ? + SUBINT_CONFIG_MATCH_1_TAG : + (SUBINT_CONFIG_MATCH_1_TAG | + SUBINT_CONFIG_MATCH_2_TAG | SUBINT_CONFIG_MATCH_3_TAG); + + if (si->sub.eth.flags.outer_vlan_id_any) + { + // not implemented yet + *unsupported = 1; + goto done; + } + else + { + // a single vlan, a common case + subint = + &vlan_table->vlans[si->sub.eth. + outer_vlan_id].single_tag_subint; + } + + } + else + { + // Two tags + *flags = si->sub.eth.flags.exact_match ? + SUBINT_CONFIG_MATCH_2_TAG : + (SUBINT_CONFIG_MATCH_2_TAG | SUBINT_CONFIG_MATCH_3_TAG); + + if (si->sub.eth.flags.outer_vlan_id_any + && si->sub.eth.flags.inner_vlan_id_any) + { + // not implemented yet + *unsupported = 1; + goto done; + } + + if (si->sub.eth.flags.inner_vlan_id_any) + { + // a specific outer and "any" inner + // don't need a qinq table for this + subint = + &vlan_table->vlans[si->sub.eth. + outer_vlan_id].inner_any_subint; + if (si->sub.eth.flags.exact_match) + { + *flags = SUBINT_CONFIG_MATCH_2_TAG; + } + else + { + *flags = SUBINT_CONFIG_MATCH_2_TAG | + SUBINT_CONFIG_MATCH_3_TAG; + } + } + else + { + // a specific outer + specifc innner vlan id, a common case + + // get the qinq table + if (vlan_table->vlans[si->sub.eth.outer_vlan_id].qinqs == 0) + { + // Allocate a qinq table from the pool + pool_get (em->qinq_pool, qinq_table); + vlan_table->vlans[si->sub.eth.outer_vlan_id].qinqs = + qinq_table - em->qinq_pool; + } + else + { + // Get ptr to existing qinq table + qinq_table = + vec_elt_at_index (em->qinq_pool, + vlan_table->vlans[si->sub. + eth.outer_vlan_id]. + qinqs); + } + subint = &qinq_table->vlans[si->sub.eth.inner_vlan_id].subint; + } + } + } + +done: + return subint; +} + +clib_error_t * +ethernet_sw_interface_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags) +{ + subint_config_t *subint; + u32 dummy_flags; + u32 dummy_unsup; + clib_error_t *error = 0; + + // Find the config for this subinterface + subint = + ethernet_sw_interface_get_config (vnm, sw_if_index, &dummy_flags, + &dummy_unsup); + + if (subint == 0) + { + // not implemented yet or not ethernet + goto done; + } + + subint->sw_if_index = + ((flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ? sw_if_index : ~0); + +done: + return error; +} + +VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ethernet_sw_interface_up_down); + + +// Set the L2/L3 mode for the subinterface +void +ethernet_sw_interface_set_l2_mode (vnet_main_t * vnm, u32 sw_if_index, u32 l2) +{ + subint_config_t *subint; + u32 dummy_flags; + u32 dummy_unsup; + int is_port; + vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, sw_if_index); + + is_port = !(sw->type == VNET_SW_INTERFACE_TYPE_SUB); + + // Find the config for this subinterface + subint = + ethernet_sw_interface_get_config (vnm, sw_if_index, &dummy_flags, + &dummy_unsup); + + if (subint == 0) + { + // unimplemented or not ethernet + goto done; + } + + // Double check that the config we found is for our interface (or the interface is down) + ASSERT ((subint->sw_if_index == sw_if_index) | (subint->sw_if_index == ~0)); + + if (l2) + { + subint->flags |= SUBINT_CONFIG_L2; + if (is_port) + subint->flags |= + SUBINT_CONFIG_MATCH_0_TAG | SUBINT_CONFIG_MATCH_1_TAG + | SUBINT_CONFIG_MATCH_2_TAG | SUBINT_CONFIG_MATCH_3_TAG; + } + else + { + subint->flags &= ~SUBINT_CONFIG_L2; + if (is_port) + subint->flags &= + ~(SUBINT_CONFIG_MATCH_1_TAG | SUBINT_CONFIG_MATCH_2_TAG + | SUBINT_CONFIG_MATCH_3_TAG); + } + +done: + return; +} + +/* + * Set the L2/L3 mode for the subinterface regardless of port + */ +void +ethernet_sw_interface_set_l2_mode_noport (vnet_main_t * vnm, + u32 sw_if_index, u32 l2) +{ + subint_config_t *subint; + u32 dummy_flags; + u32 dummy_unsup; + + /* Find the config for this subinterface */ + subint = + ethernet_sw_interface_get_config (vnm, sw_if_index, &dummy_flags, + &dummy_unsup); + + if (subint == 0) + { + /* unimplemented or not ethernet */ + goto done; + } + + /* + * Double check that the config we found is for our interface (or the + * interface is down) + */ + ASSERT ((subint->sw_if_index == sw_if_index) | (subint->sw_if_index == ~0)); + + if (l2) + { + subint->flags |= SUBINT_CONFIG_L2; + } + else + { + subint->flags &= ~SUBINT_CONFIG_L2; + } + +done: + return; +} + +static clib_error_t * +ethernet_sw_interface_add_del (vnet_main_t * vnm, + u32 sw_if_index, u32 is_create) +{ + clib_error_t *error = 0; + subint_config_t *subint; + u32 match_flags; + u32 unsupported = 0; + + // Find the config for this subinterface + subint = + ethernet_sw_interface_get_config (vnm, sw_if_index, &match_flags, + &unsupported); + + if (subint == 0) + { + // not implemented yet or not ethernet + if (unsupported) + { + // this is the NYI case + error = clib_error_return (0, "not implemented yet"); + } + goto done; + } + + if (!is_create) + { + subint->flags = 0; + return error; + } + + // Initialize the subint + if (subint->flags & SUBINT_CONFIG_VALID) + { + // Error vlan already in use + error = clib_error_return (0, "vlan is already in use"); + } + else + { + // Note that config is L3 by defaulty + subint->flags = SUBINT_CONFIG_VALID | match_flags; + subint->sw_if_index = ~0; // because interfaces are initially down + } + +done: + return error; +} + +VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ethernet_sw_interface_add_del); + +static char *ethernet_error_strings[] = { +#define ethernet_error(n,c,s) s, +#include "error.def" +#undef ethernet_error +}; + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ethernet_input_node) = { + .function = ethernet_input, + .name = "ethernet-input", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = ETHERNET_N_ERROR, + .error_strings = ethernet_error_strings, + .n_next_nodes = ETHERNET_INPUT_N_NEXT, + .next_nodes = { +#define _(s,n) [ETHERNET_INPUT_NEXT_##s] = n, + foreach_ethernet_input_next +#undef _ + }, + .format_buffer = format_ethernet_header_with_length, + .format_trace = format_ethernet_input_trace, + .unformat_buffer = unformat_ethernet_header, +}; +/* *INDENT-ON* */ + +/* *INDENT-OFF* */ +VLIB_NODE_FUNCTION_MULTIARCH (ethernet_input_node, ethernet_input) +/* *INDENT-ON* */ + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ethernet_input_type_node, static) = { + .function = ethernet_input_type, + .name = "ethernet-input-type", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_next_nodes = ETHERNET_INPUT_N_NEXT, + .next_nodes = { +#define _(s,n) [ETHERNET_INPUT_NEXT_##s] = n, + foreach_ethernet_input_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +/* *INDENT-OFF* */ +VLIB_NODE_FUNCTION_MULTIARCH (ethernet_input_type_node, ethernet_input_type) +/* *INDENT-ON* */ + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ethernet_input_not_l2_node, static) = { + .function = ethernet_input_not_l2, + .name = "ethernet-input-not-l2", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_next_nodes = ETHERNET_INPUT_N_NEXT, + .next_nodes = { +#define _(s,n) [ETHERNET_INPUT_NEXT_##s] = n, + foreach_ethernet_input_next +#undef _ + }, +}; +/* *INDENT-ON* */ + + +/* *INDENT-OFF* */ +VLIB_NODE_FUNCTION_MULTIARCH (ethernet_input_not_l2_node, + ethernet_input_not_l2) +/* *INDENT-ON* */ + + +void +ethernet_set_rx_redirect (vnet_main_t * vnm, + vnet_hw_interface_t * hi, u32 enable) +{ + // Insure all packets go to ethernet-input (i.e. untagged ipv4 packets + // don't go directly to ip4-input) + vnet_hw_interface_rx_redirect_to_node + (vnm, hi->hw_if_index, enable ? ethernet_input_node.index : ~0); +} + + +/* + * Initialization and registration for the next_by_ethernet structure + */ + +clib_error_t * +next_by_ethertype_init (next_by_ethertype_t * l3_next) +{ + l3_next->input_next_by_type = sparse_vec_new + ( /* elt bytes */ sizeof (l3_next->input_next_by_type[0]), + /* bits in index */ BITS (((ethernet_header_t *) 0)->type)); + + vec_validate (l3_next->sparse_index_by_input_next_index, + ETHERNET_INPUT_NEXT_DROP); + vec_validate (l3_next->sparse_index_by_input_next_index, + ETHERNET_INPUT_NEXT_PUNT); + l3_next->sparse_index_by_input_next_index[ETHERNET_INPUT_NEXT_DROP] = + SPARSE_VEC_INVALID_INDEX; + l3_next->sparse_index_by_input_next_index[ETHERNET_INPUT_NEXT_PUNT] = + SPARSE_VEC_INVALID_INDEX; + + /* + * Make sure we don't wipe out an ethernet registration by mistake + * Can happen if init function ordering constraints are missing. + */ + if (CLIB_DEBUG > 0) + { + ethernet_main_t *em = ðernet_main; + ASSERT (em->next_by_ethertype_register_called == 0); + } + + return 0; +} + +// Add an ethertype -> next index mapping to the structure +clib_error_t * +next_by_ethertype_register (next_by_ethertype_t * l3_next, + u32 ethertype, u32 next_index) +{ + u32 i; + u16 *n; + ethernet_main_t *em = ðernet_main; + + if (CLIB_DEBUG > 0) + { + ethernet_main_t *em = ðernet_main; + em->next_by_ethertype_register_called = 1; + } + + /* Setup ethernet type -> next index sparse vector mapping. */ + n = sparse_vec_validate (l3_next->input_next_by_type, ethertype); + n[0] = next_index; + + /* Rebuild next index -> sparse index inverse mapping when sparse vector + is updated. */ + vec_validate (l3_next->sparse_index_by_input_next_index, next_index); + for (i = 1; i < vec_len (l3_next->input_next_by_type); i++) + l3_next-> + sparse_index_by_input_next_index[l3_next->input_next_by_type[i]] = i; + + // do not allow the cached next index's to be updated if L3 + // redirect is enabled, as it will have overwritten them + if (!em->redirect_l3) + { + // Cache common ethertypes directly + if (ethertype == ETHERNET_TYPE_IP4) + { + l3_next->input_next_ip4 = next_index; + } + else if (ethertype == ETHERNET_TYPE_IP6) + { + l3_next->input_next_ip6 = next_index; + } + else if (ethertype == ETHERNET_TYPE_MPLS) + { + l3_next->input_next_mpls = next_index; + } + } + return 0; +} + + +static clib_error_t * +ethernet_input_init (vlib_main_t * vm) +{ + ethernet_main_t *em = ðernet_main; + __attribute__ ((unused)) vlan_table_t *invalid_vlan_table; + __attribute__ ((unused)) qinq_table_t *invalid_qinq_table; + + ethernet_setup_node (vm, ethernet_input_node.index); + ethernet_setup_node (vm, ethernet_input_type_node.index); + ethernet_setup_node (vm, ethernet_input_not_l2_node.index); + + next_by_ethertype_init (&em->l3_next); + + // Initialize pools and vector for vlan parsing + vec_validate (em->main_intfs, 10); // 10 main interfaces + pool_alloc (em->vlan_pool, 10); + pool_alloc (em->qinq_pool, 1); + + // The first vlan pool will always be reserved for an invalid table + pool_get (em->vlan_pool, invalid_vlan_table); // first id = 0 + // The first qinq pool will always be reserved for an invalid table + pool_get (em->qinq_pool, invalid_qinq_table); // first id = 0 + + return 0; +} + +VLIB_INIT_FUNCTION (ethernet_input_init); + +void +ethernet_register_input_type (vlib_main_t * vm, + ethernet_type_t type, u32 node_index) +{ + ethernet_main_t *em = ðernet_main; + ethernet_type_info_t *ti; + u32 i; + + { + clib_error_t *error = vlib_call_init_function (vm, ethernet_init); + if (error) + clib_error_report (error); + } + + ti = ethernet_get_type_info (em, type); + ti->node_index = node_index; + ti->next_index = vlib_node_add_next (vm, + ethernet_input_node.index, node_index); + i = vlib_node_add_next (vm, ethernet_input_type_node.index, node_index); + ASSERT (i == ti->next_index); + + i = vlib_node_add_next (vm, ethernet_input_not_l2_node.index, node_index); + ASSERT (i == ti->next_index); + + // Add the L3 node for this ethertype to the next nodes structure + next_by_ethertype_register (&em->l3_next, type, ti->next_index); + + // Call the registration functions for other nodes that want a mapping + l2bvi_register_input_type (vm, type, node_index); +} + +void +ethernet_register_l2_input (vlib_main_t * vm, u32 node_index) +{ + ethernet_main_t *em = ðernet_main; + u32 i; + + em->l2_next = + vlib_node_add_next (vm, ethernet_input_node.index, node_index); + + /* + * Even if we never use these arcs, we have to align the next indices... + */ + i = vlib_node_add_next (vm, ethernet_input_type_node.index, node_index); + + ASSERT (i == em->l2_next); + + i = vlib_node_add_next (vm, ethernet_input_not_l2_node.index, node_index); + ASSERT (i == em->l2_next); +} + +// Register a next node for L3 redirect, and enable L3 redirect +void +ethernet_register_l3_redirect (vlib_main_t * vm, u32 node_index) +{ + ethernet_main_t *em = ðernet_main; + u32 i; + + em->redirect_l3 = 1; + em->redirect_l3_next = vlib_node_add_next (vm, + ethernet_input_node.index, + node_index); + /* + * Change the cached next nodes to the redirect node + */ + em->l3_next.input_next_ip4 = em->redirect_l3_next; + em->l3_next.input_next_ip6 = em->redirect_l3_next; + em->l3_next.input_next_mpls = em->redirect_l3_next; + + /* + * Even if we never use these arcs, we have to align the next indices... + */ + i = vlib_node_add_next (vm, ethernet_input_type_node.index, node_index); + + ASSERT (i == em->redirect_l3_next); + + i = vlib_node_add_next (vm, ethernet_input_not_l2_node.index, node_index); + + ASSERT (i == em->redirect_l3_next); +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ethernet/p2p_ethernet.api b/src/vnet/ethernet/p2p_ethernet.api new file mode 100644 index 00000000..8fb66376 --- /dev/null +++ b/src/vnet/ethernet/p2p_ethernet.api @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2015-2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +define p2p_ethernet_add +{ + u32 client_index; + u32 context; + u32 parent_if_index; + u32 subif_id; + u8 remote_mac[6]; +}; + +define p2p_ethernet_add_reply +{ + u32 context; + i32 retval; + u32 sw_if_index; +}; + +define p2p_ethernet_del +{ + u32 client_index; + u32 context; + u32 parent_if_index; + u8 remote_mac[6]; +}; + +define p2p_ethernet_del_reply +{ + u32 context; + i32 retval; +}; + +/* + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */
\ No newline at end of file diff --git a/src/vnet/ethernet/p2p_ethernet.c b/src/vnet/ethernet/p2p_ethernet.c new file mode 100644 index 00000000..cf3c56b5 --- /dev/null +++ b/src/vnet/ethernet/p2p_ethernet.c @@ -0,0 +1,276 @@ +/* + * p2p_ethernet.c: p2p ethernet + * + * Copyright (c) 2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vppinfra/bihash_16_8.h> +#include <vnet/vnet.h> +#include <vnet/ethernet/p2p_ethernet.h> +#include <vnet/l2/l2_input.h> + +p2p_ethernet_main_t p2p_main; + +static void +create_p2pe_key (p2p_key_t * p2pe_key, u32 parent_if_index, u8 * client_mac) +{ + clib_memcpy (p2pe_key->mac, client_mac, 6); + p2pe_key->pad1 = 0; + p2pe_key->hw_if_index = parent_if_index; + p2pe_key->pad2 = 0; +} + +u32 +p2p_ethernet_lookup (u32 parent_if_index, u8 * client_mac) +{ + p2p_ethernet_main_t *p2pm = &p2p_main; + p2p_key_t p2pe_key; + uword *p; + + create_p2pe_key (&p2pe_key, parent_if_index, client_mac); + p = hash_get_mem (p2pm->p2p_ethernet_by_key, &p2pe_key); + if (p) + return p[0]; + + return ~0; +} + +int +p2p_ethernet_add_del (vlib_main_t * vm, u32 parent_if_index, + u8 * client_mac, u32 p2pe_subif_id, int is_add, + u32 * p2pe_if_index) +{ + vnet_main_t *vnm = vnet_get_main (); + p2p_ethernet_main_t *p2pm = &p2p_main; + vnet_interface_main_t *im = &vnm->interface_main; + + u32 p2pe_sw_if_index = ~0; + p2pe_sw_if_index = p2p_ethernet_lookup (parent_if_index, client_mac); + + if (p2pe_if_index) + *p2pe_if_index = ~0; + + if (is_add) + { + if (p2pe_sw_if_index == ~0) + { + vnet_hw_interface_t *hi; + + hi = vnet_get_hw_interface (vnm, parent_if_index); + if (hi->bond_info == VNET_HW_INTERFACE_BOND_INFO_SLAVE) + return VNET_API_ERROR_BOND_SLAVE_NOT_ALLOWED; + + u64 sup_and_sub_key = + ((u64) (hi->sw_if_index) << 32) | (u64) p2pe_subif_id; + uword *p; + p = hash_get_mem (im->sw_if_index_by_sup_and_sub, &sup_and_sub_key); + if (p) + { + if (CLIB_DEBUG > 0) + clib_warning + ("p2p ethernet sub-interface on sw_if_index %d with sub id %d already exists\n", + hi->sw_if_index, p2pe_subif_id); + return VNET_API_ERROR_SUBIF_ALREADY_EXISTS; + } + vnet_sw_interface_t template = { + .type = VNET_SW_INTERFACE_TYPE_P2P, + .flood_class = VNET_FLOOD_CLASS_NORMAL, + .sup_sw_if_index = hi->sw_if_index, + .sub.id = p2pe_subif_id + }; + + clib_memcpy (template.p2p.client_mac, client_mac, + sizeof (template.p2p.client_mac)); + + if (vnet_create_sw_interface (vnm, &template, &p2pe_sw_if_index)) + return VNET_API_ERROR_SUBIF_CREATE_FAILED; + + /* Allocate counters for this interface. */ + { + u32 i; + + vnet_interface_counter_lock (im); + + for (i = 0; i < vec_len (im->sw_if_counters); i++) + { + vlib_validate_simple_counter (&im->sw_if_counters[i], + p2pe_sw_if_index); + vlib_zero_simple_counter (&im->sw_if_counters[i], + p2pe_sw_if_index); + } + + for (i = 0; i < vec_len (im->combined_sw_if_counters); i++) + { + vlib_validate_combined_counter (&im->combined_sw_if_counters + [i], p2pe_sw_if_index); + vlib_zero_combined_counter (&im->combined_sw_if_counters[i], + p2pe_sw_if_index); + } + + vnet_interface_counter_unlock (im); + } + + vnet_interface_main_t *im = &vnm->interface_main; + sup_and_sub_key = + ((u64) (hi->sw_if_index) << 32) | (u64) p2pe_subif_id; + u64 *kp = clib_mem_alloc (sizeof (*kp)); + + *kp = sup_and_sub_key; + hash_set (hi->sub_interface_sw_if_index_by_id, p2pe_subif_id, + p2pe_sw_if_index); + hash_set_mem (im->sw_if_index_by_sup_and_sub, kp, p2pe_sw_if_index); + + p2p_key_t *p_p2pe_key; + p_p2pe_key = clib_mem_alloc (sizeof (*p_p2pe_key)); + create_p2pe_key (p_p2pe_key, parent_if_index, client_mac); + hash_set_mem (p2pm->p2p_ethernet_by_key, p_p2pe_key, + p2pe_sw_if_index); + + if (p2pe_if_index) + *p2pe_if_index = p2pe_sw_if_index; + + vec_validate (p2pm->p2p_ethernet_by_sw_if_index, parent_if_index); + if (p2pm->p2p_ethernet_by_sw_if_index[parent_if_index] == 0) + { + vnet_feature_enable_disable ("device-input", + "p2p-ethernet-input", + parent_if_index, 1, 0, 0); + /* Set promiscuous mode on the l2 interface */ + ethernet_set_flags (vnm, parent_if_index, + ETHERNET_INTERFACE_FLAG_ACCEPT_ALL); + + } + p2pm->p2p_ethernet_by_sw_if_index[parent_if_index]++; + /* set the interface mode */ + set_int_l2_mode (vm, vnm, MODE_L3, p2pe_subif_id, 0, 0, 0, 0); + return 0; + } + return VNET_API_ERROR_SUBIF_ALREADY_EXISTS; + } + else + { + if (p2pe_sw_if_index == ~0) + return VNET_API_ERROR_SUBIF_DOESNT_EXIST; + else + { + int rv = 0; + rv = vnet_delete_sub_interface (p2pe_sw_if_index); + if (!rv) + { + vec_validate (p2pm->p2p_ethernet_by_sw_if_index, + parent_if_index); + if (p2pm->p2p_ethernet_by_sw_if_index[parent_if_index] == 1) + { + vnet_feature_enable_disable ("device-input", + "p2p-ethernet-input", + parent_if_index, 0, 0, 0); + /* Disable promiscuous mode on the l2 interface */ + ethernet_set_flags (vnm, parent_if_index, 0); + } + p2pm->p2p_ethernet_by_sw_if_index[parent_if_index]--; + + /* Remove p2p_ethernet from hash map */ + p2p_key_t *p_p2pe_key; + p_p2pe_key = clib_mem_alloc (sizeof (*p_p2pe_key)); + create_p2pe_key (p_p2pe_key, parent_if_index, client_mac); + hash_unset_mem (p2pm->p2p_ethernet_by_key, p_p2pe_key); + } + return rv; + } + } +} + +static clib_error_t * +vnet_p2p_ethernet_add_del (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t *vnm = vnet_get_main (); + + int is_add = 1; + int remote_mac = 0; + u32 hw_if_index = ~0; + u32 sub_id = ~0; + u8 client_mac[6]; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (input, "%U", unformat_vnet_hw_interface, vnm, &hw_if_index)) + ; + else if (unformat (input, "%U", unformat_ethernet_address, &client_mac)) + remote_mac = 1; + else if (unformat (input, "sub-id %d", &sub_id)) + ; + else if (unformat (input, "del")) + is_add = 0; + else + break; + } + + if (hw_if_index == ~0) + return clib_error_return (0, "Please specify parent interface ..."); + if (!remote_mac) + return clib_error_return (0, "Please specify client MAC address ..."); + if (sub_id == ~0 && is_add) + return clib_error_return (0, "Please specify sub-interface id ..."); + + u32 rv; + rv = p2p_ethernet_add_del (vm, hw_if_index, client_mac, sub_id, is_add, 0); + switch (rv) + { + case VNET_API_ERROR_BOND_SLAVE_NOT_ALLOWED: + return clib_error_return (0, + "not allowed as parent interface belongs to a BondEthernet interface"); + case -1: + return clib_error_return (0, + "p2p ethernet for given parent interface and client mac already exists"); + case -2: + return clib_error_return (0, + "couldn't create p2p ethernet subinterface"); + case -3: + return clib_error_return (0, + "p2p ethernet for given parent interface and client mac doesn't exist"); + default: + break; + } + return 0; +} + +VLIB_CLI_COMMAND (p2p_ethernet_add_del_command, static) = +{ +.path = "p2p_ethernet ",.function = vnet_p2p_ethernet_add_del,.short_help = + "p2p_ethernet <intfc> <mac-address> [sub-id <id> | del]",}; + +static clib_error_t * +p2p_ethernet_init (vlib_main_t * vm) +{ + p2p_ethernet_main_t *p2pm = &p2p_main; + + p2pm->vlib_main = vm; + p2pm->vnet_main = vnet_get_main (); + p2pm->p2p_ethernet_by_key = + hash_create_mem (0, sizeof (p2p_key_t), sizeof (uword)); + + return 0; +} + +VLIB_INIT_FUNCTION (p2p_ethernet_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ethernet/p2p_ethernet.h b/src/vnet/ethernet/p2p_ethernet.h new file mode 100644 index 00000000..bb1e2896 --- /dev/null +++ b/src/vnet/ethernet/p2p_ethernet.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_vnet_p2p_ethernet_h +#define included_vnet_p2p_ethernet_h + +#include <vnet/vnet.h> +#include <vnet/ethernet/ethernet.h> + + +typedef struct { + /** + * Hash mapping parent sw_if_index and client mac address to p2p_ethernet sub-interface + */ + uword * p2p_ethernet_by_key; + + u32 *p2p_ethernet_by_sw_if_index; + + // Pool of p2p subifs; + subint_config_t *p2p_subif_pool; + + /* convenience */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} p2p_ethernet_main_t; + +extern p2p_ethernet_main_t p2p_main; + +typedef struct +{ + u32 sw_if_index; + u32 p2pe_sw_if_index; + u8 client_mac[6]; +} p2p_ethernet_trace_t; + +/** + * @brief Key struct for P2P Ethernet + * Key fields: parent sw_if_index and client mac address + * all fields in NET byte order + */ + +typedef struct { + u8 mac[6]; + u16 pad1; // padding for u64 mac address + u32 hw_if_index; + u32 pad2; // padding for u64 +} p2p_key_t; + +u32 p2p_ethernet_lookup (u32 parent_sw_if_index, u8* client_mac); +int p2p_ethernet_add_del (vlib_main_t * vm, u32 parent_if_index, u8 * client_mac, u32 sub_id, int is_add, u32 *p2pe_if_index); + +#endif /* included_vnet_p2p_ethernet_h */ diff --git a/src/vnet/ethernet/p2p_ethernet_api.c b/src/vnet/ethernet/p2p_ethernet_api.c new file mode 100644 index 00000000..f2c730b4 --- /dev/null +++ b/src/vnet/ethernet/p2p_ethernet_api.c @@ -0,0 +1,137 @@ +/* + *------------------------------------------------------------------ + * p2p_ethernet_api.c - p2p ethernet api + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <vnet/vnet.h> +#include <vlibmemory/api.h> + +#include <vnet/vnet_msg_enum.h> +#include <vnet/ethernet/p2p_ethernet.h> + +#define vl_typedefs /* define message structures */ +#include <vnet/vnet_all_api_h.h> +#undef vl_typedefs + +#define vl_endianfun /* define message structures */ +#include <vnet/vnet_all_api_h.h> +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) +#define vl_printfun +#include <vnet/vnet_all_api_h.h> +#undef vl_printfun + +#include <vlibapi/api_helper_macros.h> + +#define foreach_vpe_api_msg \ +_(P2P_ETHERNET_ADD, p2p_ethernet_add) \ +_(P2P_ETHERNET_DEL, p2p_ethernet_del) + +void +vl_api_p2p_ethernet_add_t_handler (vl_api_p2p_ethernet_add_t * mp) +{ + vl_api_p2p_ethernet_add_reply_t *rmp; + vlib_main_t *vm = vlib_get_main (); + int rv; + + u32 parent_if_index = htonl (mp->parent_if_index); + u32 sub_id = htonl (mp->subif_id); + u32 p2pe_if_index; + u8 remote_mac[6]; + + clib_memcpy (remote_mac, mp->remote_mac, 6); + rv = + p2p_ethernet_add_del (vm, parent_if_index, remote_mac, sub_id, 1, + &p2pe_if_index); + + /* *INDENT-OFF* */ + REPLY_MACRO2(VL_API_P2P_ETHERNET_ADD_REPLY, + ({ + rmp->sw_if_index = htonl(p2pe_if_index); + })); + /* *INDENT-ON* */ +} + +void +vl_api_p2p_ethernet_del_t_handler (vl_api_p2p_ethernet_del_t * mp) +{ + vl_api_p2p_ethernet_del_reply_t *rmp; + vlib_main_t *vm = vlib_get_main (); + int rv; + + u32 parent_if_index = htonl (mp->parent_if_index); + u8 remote_mac[6]; + + clib_memcpy (remote_mac, mp->remote_mac, 6); + rv = p2p_ethernet_add_del (vm, parent_if_index, remote_mac, ~0, 0, 0); + + REPLY_MACRO (VL_API_P2P_ETHERNET_DEL_REPLY); +} + +/* + * p2p_ethernet_api_hookup + * Add vpe's API message handlers to the table. + * vlib has alread mapped shared memory and + * added the client registration handlers. + * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process() + */ +#define vl_msg_name_crc_list +#include <vnet/vnet_all_api_h.h> +#undef vl_msg_name_crc_list + +static void +setup_message_id_table (api_main_t * am) +{ +#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id); + foreach_vl_msg_name_crc_p2p_ethernet; +#undef _ +} + +static clib_error_t * +p2p_ethernet_api_hookup (vlib_main_t * vm) +{ + api_main_t *am = &api_main; + +#define _(N,n) \ + vl_msg_api_set_handlers(VL_API_##N, #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_vpe_api_msg; +#undef _ + + /* + * Set up the (msg_name, crc, message-id) table + */ + setup_message_id_table (am); + + return 0; +} + +VLIB_API_INIT_FUNCTION (p2p_ethernet_api_hookup); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ethernet/p2p_ethernet_input.c b/src/vnet/ethernet/p2p_ethernet_input.c new file mode 100644 index 00000000..eeff4f06 --- /dev/null +++ b/src/vnet/ethernet/p2p_ethernet_input.c @@ -0,0 +1,262 @@ +/* + * node.c: p2p ethernet vpp node + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vppinfra/error.h> + +#include <vnet/ethernet/p2p_ethernet.h> + +#include <vppinfra/error.h> +#include <vppinfra/elog.h> + +vlib_node_registration_t p2p_ethernet_input_node; + +/* packet trace format function */ +u8 * +format_p2p_ethernet_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + p2p_ethernet_trace_t *t = va_arg (*args, p2p_ethernet_trace_t *); + + vnet_main_t *vnm = &vnet_main; + s = format (s, "P2P ethernet: %U -> %U", + format_vnet_sw_if_index_name, vnm, t->sw_if_index, + format_vnet_sw_if_index_name, vnm, t->p2pe_sw_if_index); + + return s; +} + +#define foreach_p2p_ethernet_error \ +_(HITS, "P2P ethernet incoming packets processed") + +typedef enum +{ +#define _(sym,str) P2PE_ERROR_##sym, + foreach_p2p_ethernet_error +#undef _ + P2PE_N_ERROR, +} p2p_ethernet_error_t; + +static char *p2p_ethernet_error_strings[] = { +#define _(sym,string) string, + foreach_p2p_ethernet_error +#undef _ +}; + +static uword +p2p_ethernet_input_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 thread_index = vlib_get_thread_index (); + u32 n_trace = vlib_get_trace_count (vm, node); + u32 n_left_from, *from, *to_next; + u32 next_index; + u32 n_p2p_ethernet_packets = 0; + vlib_combined_counter_main_t *cm = + vnet_get_main ()->interface_main.combined_sw_if_counters; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t *b0, *b1; + u32 next0 = 0, next1 = 0; + u32 sw_if_index0, sw_if_index1; + ethernet_header_t *en0, *en1; + u32 rx0, rx1; + + bi0 = from[0]; + bi1 = from[1]; + to_next[0] = bi0; + to_next[1] = bi1; + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + en0 = vlib_buffer_get_current (b0); + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + en1 = vlib_buffer_get_current (b1); + sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX]; + + vnet_feature_next (sw_if_index0, &next0, b0); + vnet_feature_next (sw_if_index1, &next1, b1); + + rx0 = p2p_ethernet_lookup (sw_if_index0, en0->src_address); + rx1 = p2p_ethernet_lookup (sw_if_index1, en1->src_address); + + if (rx0 != ~0) + { + /* Send pkt to p2p_ethernet RX interface */ + vnet_buffer (b0)->sw_if_index[VLIB_RX] = rx0; + n_p2p_ethernet_packets += 1; + + if (PREDICT_FALSE (n_trace > 0)) + { + p2p_ethernet_trace_t *t0; + vlib_trace_buffer (vm, node, next_index, b0, + 1 /* follow_chain */ ); + vlib_set_trace_count (vm, node, --n_trace); + t0 = vlib_add_trace (vm, node, b0, sizeof (*t0)); + t0->sw_if_index = sw_if_index0; + t0->p2pe_sw_if_index = rx0; + } + + vlib_increment_combined_counter (cm, thread_index, rx0, 1, + vlib_buffer_length_in_chain + (vm, b0)); + } + if (rx1 != ~0) + { + /* Send pkt to p2p_ethernet RX interface */ + vnet_buffer (b1)->sw_if_index[VLIB_RX] = rx1; + n_p2p_ethernet_packets += 1; + + if (PREDICT_FALSE (n_trace > 0)) + { + p2p_ethernet_trace_t *t1; + vlib_trace_buffer (vm, node, next_index, b1, + 1 /* follow_chain */ ); + vlib_set_trace_count (vm, node, --n_trace); + t1 = vlib_add_trace (vm, node, b1, sizeof (*t1)); + t1->sw_if_index = sw_if_index1; + t1->p2pe_sw_if_index = rx1; + } + + vlib_increment_combined_counter (cm, thread_index, rx1, 1, + vlib_buffer_length_in_chain + (vm, b1)); + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi1, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0 = 0; + u32 sw_if_index0; + ethernet_header_t *en0; + u32 rx0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + en0 = vlib_buffer_get_current (b0); + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + + vnet_feature_next (sw_if_index0, &next0, b0); + + rx0 = p2p_ethernet_lookup (sw_if_index0, en0->src_address); + if (rx0 != ~0) + { + /* Send pkt to p2p_ethernet RX interface */ + vnet_buffer (b0)->sw_if_index[VLIB_RX] = rx0; + n_p2p_ethernet_packets += 1; + + if (PREDICT_FALSE (n_trace > 0)) + { + p2p_ethernet_trace_t *t0; + vlib_trace_buffer (vm, node, next_index, b0, + 1 /* follow_chain */ ); + vlib_set_trace_count (vm, node, --n_trace); + t0 = vlib_add_trace (vm, node, b0, sizeof (*t0)); + t0->sw_if_index = sw_if_index0; + t0->p2pe_sw_if_index = rx0; + } + + vlib_increment_combined_counter (cm, thread_index, rx0, 1, + vlib_buffer_length_in_chain + (vm, b0)); + } + else + { + if (PREDICT_FALSE (n_trace > 0)) + { + node->flags |= VLIB_NODE_FLAG_TRACE; + } + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, p2p_ethernet_input_node.index, + P2PE_ERROR_HITS, n_p2p_ethernet_packets); + return frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (p2p_ethernet_input_node) = { + .function = p2p_ethernet_input_node_fn, + .name = "p2p-ethernet-input", + .vector_size = sizeof (u32), + .format_trace = format_p2p_ethernet_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(p2p_ethernet_error_strings), + .error_strings = p2p_ethernet_error_strings, + + .n_next_nodes = 1, + + /* edit / add dispositions here */ + .next_nodes = { + [0] = "error-drop", + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (p2p_ethernet_input_node, + p2p_ethernet_input_node_fn) +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ethernet/packet.h b/src/vnet/ethernet/packet.h new file mode 100644 index 00000000..964cf638 --- /dev/null +++ b/src/vnet/ethernet/packet.h @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ethernet/packet.h: ethernet packet format. + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_ethernet_packet_h +#define included_ethernet_packet_h + +typedef enum +{ +#define ethernet_type(n,s) ETHERNET_TYPE_##s = n, +#include <vnet/ethernet/types.def> +#undef ethernet_type +} ethernet_type_t; + +typedef struct +{ + /* Source/destination address. */ + u8 dst_address[6]; + u8 src_address[6]; + + /* Ethernet type. */ + u16 type; +} ethernet_header_t; + +#define ETHERNET_ADDRESS_UNICAST 0 +#define ETHERNET_ADDRESS_MULTICAST 1 + +/* I/G bit: individual (unicast)/group (broadcast/multicast). */ +always_inline uword +ethernet_address_cast (u8 * a) +{ + return (a[0] >> 0) & 1; +} + +always_inline uword +ethernet_address_is_locally_administered (u8 * a) +{ + return (a[0] >> 1) & 1; +} + +always_inline void +ethernet_address_set_locally_administered (u8 * a) +{ + a[0] |= 1 << 1; +} + +/* For VLAN ethernet type. */ +typedef struct +{ + /* 3 bit priority, 1 bit CFI and 12 bit vlan id. */ + u16 priority_cfi_and_id; + +#define ETHERNET_N_VLAN (1 << 12) + + /* Inner ethernet type. */ + u16 type; +} ethernet_vlan_header_t; + + +/* VLAN with ethertype first and vlan id second */ +typedef struct +{ + /* vlan type */ + u16 type; + + /* 3 bit priority, 1 bit CFI and 12 bit vlan id. */ + u16 priority_cfi_and_id; +} ethernet_vlan_header_tv_t; + +/* PBB header with B-TAG - backbone VLAN indicator and I-TAG - service encapsulation */ +typedef struct +{ + /* Backbone source/destination address. */ + u8 b_dst_address[6]; + u8 b_src_address[6]; + + /* B-tag */ + u16 b_type; + /* 3 bit priority, 1 bit DEI and 12 bit vlan id */ + u16 priority_dei_id; + + /* I-tag */ + u16 i_type; + /* 3 bit priority, 1 bit DEI, 1 bit UCA, 3 bit RES and 24 bit I_SID (service identifier) */ + u32 priority_dei_uca_res_sid; + +#define ETHERNET_N_PBB (1 << 24) +} ethernet_pbb_header_t; + +/* *INDENT-OFF* */ +typedef CLIB_PACKED (struct +{ + /* Backbone source/destination address. */ + u8 b_dst_address[6]; + u8 b_src_address[6]; + + /* B-tag */ + u16 b_type; + /* 3 bit priority, 1 bit DEI and 12 bit vlan id */ + u16 priority_dei_id; + + /* I-tag */ + u16 i_type; + /* 3 bit priority, 1 bit DEI, 1 bit UCA, 3 bit RES and 24 bit I_SID (service identifier) */ + u32 priority_dei_uca_res_sid; +}) ethernet_pbb_header_packed_t; +/* *INDENT-ON* */ + +#endif /* included_ethernet_packet_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ethernet/pg.c b/src/vnet/ethernet/pg.c new file mode 100644 index 00000000..67ccfcf5 --- /dev/null +++ b/src/vnet/ethernet/pg.c @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ethernet_pg.c: packet generator ethernet interface + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <vnet/ethernet/ethernet.h> + +typedef struct +{ + pg_edit_t type; + pg_edit_t src_address; + pg_edit_t dst_address; +} pg_ethernet_header_t; + +static inline void +pg_ethernet_header_init (pg_ethernet_header_t * e) +{ + pg_edit_init (&e->type, ethernet_header_t, type); + pg_edit_init (&e->src_address, ethernet_header_t, src_address); + pg_edit_init (&e->dst_address, ethernet_header_t, dst_address); +} + +typedef struct +{ + pg_edit_t type; + pg_edit_t id; + pg_edit_t cfi; + pg_edit_t priority; +} pg_ethernet_vlan_header_t; + +static inline void +pg_ethernet_vlan_header_init (pg_ethernet_vlan_header_t * v, int vlan_index) +{ + ASSERT (vlan_index < ARRAY_LEN (((ethernet_max_header_t *) 0)->vlan)); + pg_edit_init (&v->type, ethernet_max_header_t, vlan[vlan_index].type); + + pg_edit_init_bitfield (&v->id, ethernet_max_header_t, + vlan[vlan_index].priority_cfi_and_id, 0, 12); + pg_edit_init_bitfield (&v->cfi, ethernet_max_header_t, + vlan[vlan_index].priority_cfi_and_id, 12, 1); + pg_edit_init_bitfield (&v->priority, ethernet_max_header_t, + vlan[vlan_index].priority_cfi_and_id, 13, 3); +} + +uword +unformat_pg_ethernet_header (unformat_input_t * input, va_list * args) +{ + pg_stream_t *s = va_arg (*args, pg_stream_t *); + pg_ethernet_header_t *e; + pg_ethernet_vlan_header_t *v; + pg_edit_t *ether_type_edit; + u32 n_vlan, error, group_index; + + e = pg_create_edit_group (s, sizeof (e[0]), sizeof (ethernet_header_t), + &group_index); + pg_ethernet_header_init (e); + error = 1; + + if (!unformat (input, "%U: %U -> %U", + unformat_pg_edit, + unformat_ethernet_type_net_byte_order, &e->type, + unformat_pg_edit, + unformat_ethernet_address, &e->src_address, + unformat_pg_edit, + unformat_ethernet_address, &e->dst_address)) + goto done; + + n_vlan = 0; + while (unformat (input, "vlan")) + { + v = pg_add_edits (s, sizeof (v[0]), sizeof (ethernet_vlan_header_t), + group_index); + pg_ethernet_vlan_header_init (v, n_vlan); + + if (!unformat_user (input, unformat_pg_edit, + unformat_pg_number, &v->id)) + goto done; + + if (!unformat (input, "priority %U", unformat_pg_edit, + unformat_pg_number, &v->priority)) + pg_edit_set_fixed (&v->priority, 0); + + if (!unformat (input, "cfi %U", unformat_pg_edit, + unformat_pg_number, &v->cfi)) + pg_edit_set_fixed (&v->cfi, 0); + + /* Too many vlans given. */ + if (n_vlan >= 2) + goto done; + + n_vlan++; + } + + /* Address of e may have changed due to vlan edits being added */ + e = pg_get_edit_group (s, group_index); + v = (void *) (e + 1); + + /* Correct types for vlan packets. */ + ether_type_edit = &e->type; + if (n_vlan > 0) + { + int i; + + ether_type_edit = &v[n_vlan - 1].type; + pg_edit_copy_type_and_values (ether_type_edit, &e->type); + pg_edit_set_fixed (&e->type, ETHERNET_TYPE_VLAN); + + for (i = 0; i < n_vlan - 1; i++) + pg_edit_set_fixed (&v[i].type, ETHERNET_TYPE_VLAN); + } + + { + ethernet_main_t *em = ðernet_main; + ethernet_type_info_t *ti = 0; + pg_node_t *pg_node = 0; + + if (ether_type_edit->type == PG_EDIT_FIXED) + { + u16 t = *(u16 *) ether_type_edit->values[PG_EDIT_LO]; + ti = ethernet_get_type_info (em, clib_net_to_host_u16 (t)); + if (ti && ti->node_index != ~0) + pg_node = pg_get_node (ti->node_index); + } + + if (pg_node && pg_node->unformat_edit + && unformat_user (input, pg_node->unformat_edit, s)) + ; + else if (!unformat_user (input, unformat_pg_payload, s)) + goto done; + } + + error = 0; + +done: + if (error) + pg_free_edit_group (s); + return error == 0; +} + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ethernet/sfp.c b/src/vnet/ethernet/sfp.c new file mode 100644 index 00000000..624740e3 --- /dev/null +++ b/src/vnet/ethernet/sfp.c @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/ethernet/sfp.h> + +static u8 * +format_space_terminated (u8 * s, va_list * args) +{ + u32 l = va_arg (*args, u32); + u8 *v = va_arg (*args, u8 *); + u8 *p; + + for (p = v + l - 1; p >= v && p[0] == ' '; p--) + ; + vec_add (s, v, clib_min (p - v + 1, l)); + return s; +} + +static u8 * +format_sfp_id (u8 * s, va_list * args) +{ + u32 id = va_arg (*args, u32); + char *t = 0; + switch (id) + { +#define _(f) case SFP_ID_##f: t = #f; break; + foreach_sfp_id +#undef _ + default: + return format (s, "unknown 0x%x", id); + } + return format (s, "%s", t); +} + +static u8 * +format_sfp_compatibility (u8 * s, va_list * args) +{ + u32 c = va_arg (*args, u32); + char *t = 0; + switch (c) + { +#define _(a,b,f) case SFP_COMPATIBILITY_##f: t = #f; break; + foreach_sfp_compatibility +#undef _ + default: + return format (s, "unknown 0x%x", c); + } + return format (s, "%s", t); +} + +u32 +sfp_is_comatible (sfp_eeprom_t * e, sfp_compatibility_t c) +{ + static struct + { + u8 byte, bit; + } t[] = + { +#define _(a,b,f) { .byte = a, .bit = b, }, + foreach_sfp_compatibility +#undef _ + }; + + ASSERT (c < ARRAY_LEN (t)); + return (e->compatibility[t[c].byte] & (1 << t[c].bit)) != 0; +} + +u8 * +format_sfp_eeprom (u8 * s, va_list * args) +{ + sfp_eeprom_t *e = va_arg (*args, sfp_eeprom_t *); + uword indent = format_get_indent (s); + int i; + + if (e->id != SFP_ID_sfp) + s = format (s, "id %U, ", format_sfp_id, e->id); + + s = format (s, "compatibility:"); + for (i = 0; i < SFP_N_COMPATIBILITY; i++) + if (sfp_is_comatible (e, i)) + s = format (s, " %U", format_sfp_compatibility, i); + + s = format (s, "\n%Uvendor: %U, part %U", + format_white_space, indent, + format_space_terminated, sizeof (e->vendor_name), + e->vendor_name, format_space_terminated, + sizeof (e->vendor_part_number), e->vendor_part_number); + s = + format (s, "\n%Urevision: %U, serial: %U, date code: %U", + format_white_space, indent, format_space_terminated, + sizeof (e->vendor_revision), e->vendor_revision, + format_space_terminated, sizeof (e->vendor_serial_number), + e->vendor_serial_number, format_space_terminated, + sizeof (e->vendor_date_code), e->vendor_date_code); + + return s; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ethernet/sfp.h b/src/vnet/ethernet/sfp.h new file mode 100644 index 00000000..a1ac7997 --- /dev/null +++ b/src/vnet/ethernet/sfp.h @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_vnet_optics_sfp_h +#define included_vnet_optics_sfp_h + +#include <vppinfra/format.h> + +#define foreach_sfp_id \ + _ (unknown) \ + _ (gbic) \ + _ (on_motherboard) \ + _ (sfp) + +typedef enum +{ +#define _(f) SFP_ID_##f, + foreach_sfp_id +#undef _ +} sfp_id_t; + +typedef struct +{ + u8 id; + u8 extended_id; + u8 connector_type; + u8 compatibility[8]; + u8 encoding; + u8 nominal_bit_rate_100mbits_per_sec; + u8 reserved13; + u8 link_length[5]; + u8 reserved19; + u8 vendor_name[16]; + u8 reserved36; + u8 vendor_oui[3]; + u8 vendor_part_number[16]; + u8 vendor_revision[4]; + /* 16 bit value network byte order. */ + u8 laser_wavelength_in_nm[2]; + u8 reserved62; + u8 checksum_0_to_62; + + u8 options[2]; + u8 max_bit_rate_margin_percent; + u8 min_bit_rate_margin_percent; + u8 vendor_serial_number[16]; + u8 vendor_date_code[8]; + u8 reserved92[3]; + u8 checksum_63_to_94; + u8 vendor_specific[32]; + u8 reserved128[384]; + + /* Vendor specific data follows. */ + u8 vendor_specific1[0]; +} sfp_eeprom_t; + +always_inline uword +sfp_eeprom_is_valid (sfp_eeprom_t * e) +{ + int i; + u8 sum = 0; + for (i = 0; i < 63; i++) + sum += ((u8 *) e)[i]; + return sum == e->checksum_0_to_62; +} + +/* _ (byte_index, bit_index, name) */ +#define foreach_sfp_compatibility \ + _ (0, 4, 10g_base_sr) \ + _ (0, 5, 10g_base_lr) \ + _ (1, 2, oc48_long_reach) \ + _ (1, 1, oc48_intermediate_reach) \ + _ (1, 0, oc48_short_reach) \ + _ (2, 6, oc12_long_reach) \ + _ (2, 5, oc12_intermediate_reach) \ + _ (2, 4, oc12_short_reach) \ + _ (2, 2, oc3_long_reach) \ + _ (2, 1, oc3_intermediate_reach) \ + _ (2, 0, oc3_short_reach) \ + _ (3, 3, 1g_base_t) \ + _ (3, 2, 1g_base_cx) \ + _ (3, 1, 1g_base_lx) \ + _ (3, 0, 1g_base_sx) + +typedef enum +{ +#define _(a,b,f) SFP_COMPATIBILITY_##f, + foreach_sfp_compatibility +#undef _ + SFP_N_COMPATIBILITY, +} sfp_compatibility_t; + +u32 sfp_is_comatible (sfp_eeprom_t * e, sfp_compatibility_t c); + +format_function_t format_sfp_eeprom; + +#endif /* included_vnet_optics_sfp_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ethernet/types.def b/src/vnet/ethernet/types.def new file mode 100644 index 00000000..c7a47221 --- /dev/null +++ b/src/vnet/ethernet/types.def @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* Emacs editing mode -*-C-*- Ethernet types. */ + +/* + * ethernet types + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* Types < 0x600 (1536) are LLC packet lengths. */ +ethernet_type (0x600, LLC_LENGTH) + +ethernet_type (0x600, XNS_IDP) +ethernet_type (0x800, IP4) +ethernet_type (0x806, ARP) +ethernet_type (0x0BAD, VINES_IP) +ethernet_type (0x0BAE, VINES_LOOPBACK) +ethernet_type (0x0BAF, VINES_ECHO) +ethernet_type (0x1984, TRAIN) +ethernet_type (0x2000, CDP) +ethernet_type (0x2001, CGMP) +ethernet_type (0x2007, SRP_CONTROL) +ethernet_type (0x2452, CENTRINO_PROMISC) +ethernet_type (0x6000, DECNET) +ethernet_type (0x6001, DECNET_DUMP_LOAD) +ethernet_type (0x6002, DECNET_REMOTE_CONSOLE) +ethernet_type (0x6003, DECNET_ROUTE) +ethernet_type (0x6004, DEC_LAT) +ethernet_type (0x6005, DEC_DIAGNOSTIC) +ethernet_type (0x6006, DEC_CUSTOMER) +ethernet_type (0x6007, DEC_SCA) +ethernet_type (0x6558, TRANSPARENT_BRIDGING) +ethernet_type (0x6559, RAW_FRAME_RELAY) +ethernet_type (0x8035, REVERSE_ARP) +ethernet_type (0x8038, DEC_LAN_BRIDGE) +ethernet_type (0x803D, DEC_ETHERNET_ENCRYPTION) +ethernet_type (0x803F, DEC_LAN_TRAFFIC_MONITOR) +ethernet_type (0x8041, DEC_LAST) +ethernet_type (0x809B, APPLETALK) +ethernet_type (0x80D5, IBM_SNA) +ethernet_type (0x80F3, APPLETALK_AARP) +ethernet_type (0x80FF, WELLFLEET_COMPRESSION) +ethernet_type (0x8100, VLAN) +ethernet_type (0x8137, IPX) +ethernet_type (0x814C, SNMP) +ethernet_type (0x81FD, CABLETRON_ISMP) +ethernet_type (0x81FF, CABLETRON_ISMP_TBFLOOD) +ethernet_type (0x86DD, IP6) +ethernet_type (0x86DF, ATOMIC) +ethernet_type (0x876B, TCP_IP_COMPRESSION) +ethernet_type (0x876C, IP_AUTONOMOUS_SYSTEMS) +ethernet_type (0x876D, SECURE_DATA) +ethernet_type (0x8808, MAC_CONTROL) +ethernet_type (0x8809, SLOW_PROTOCOLS) +ethernet_type (0x880B, PPP) +ethernet_type (0x8847, MPLS) +ethernet_type (0x8848, MPLS_UPSTREAM_ASSIGNED) +ethernet_type (0x8863, PPPOE_DISCOVERY) +ethernet_type (0x8864, PPPOE_SESSION) +ethernet_type (0x886D, INTEL_ANS) +ethernet_type (0x886F, MICROSOFT_NLB_HEARTBEAT) +ethernet_type (0x8881, CDMA_2000) +ethernet_type (0x888e, 802_1X_AUTHENTICATION) +ethernet_type (0x8892, PROFINET) +ethernet_type (0x889a, HYPERSCSI) +ethernet_type (0x88a2, AOE) +ethernet_type (0x88a8, DOT1AD) +ethernet_type (0x88AE, BRDWALK) +ethernet_type (0x88B7, 802_OUI_EXTENDED) +ethernet_type (0x88c7, 802_11I_PRE_AUTHENTICATION) +ethernet_type (0x88cc, 802_1_LLDP) +ethernet_type (0x88e7, DOT1AH) +ethernet_type (0x894f, NSH) +ethernet_type (0x9000, LOOPBACK) +ethernet_type (0x9021, RTNET_MAC) +ethernet_type (0x9022, RTNET_CONFIG) +ethernet_type (0x9100, VLAN_9100) +ethernet_type (0x9200, VLAN_9200) +ethernet_type (0x9999, PGLAN) +ethernet_type (0xFEFE, SRP_ISIS) +ethernet_type (0xFFFF, RESERVED) |