From 1edfba9a6394128ee5fad2b413e9e0a05972ef48 Mon Sep 17 00:00:00 2001 From: John Lo Date: Sat, 27 Aug 2016 01:11:57 -0400 Subject: VPP-358: Add IPv6 ND Event Notification and Termination Add IPv6 equivalent of IPv4 ARP event notification which covers address resolution for L3 and MAC/IP binding in L2 BD and ARP termination in BD. For IPv6, ICMP6 neighbor solicitation and advertisement packets are utilized instead of ARP request and response packets for IPv4. Change-Id: I0088fa173e4480de297c8053ea2fcd0821322815 Signed-off-by: John Lo --- vnet/vnet/ethernet/arp.c | 93 +++++++++++----- vnet/vnet/ip/ip6.h | 20 ++++ vnet/vnet/ip/ip6_neighbor.c | 251 +++++++++++++++++++++++++++++++++++++++--- vnet/vnet/l2/l2_bd.c | 71 +++++++++--- vnet/vnet/l2/l2_bd.h | 2 +- vnet/vnet/l2/l2_input.c | 6 +- vpp-api-test/vat/api_format.c | 54 +++++++++ vpp/vpp-api/api.c | 236 ++++++++++++++++++++++++++++++++++----- vpp/vpp-api/custom_dump.c | 15 +++ vpp/vpp-api/vpe.api | 48 ++++++++ 10 files changed, 707 insertions(+), 89 deletions(-) diff --git a/vnet/vnet/ethernet/arp.c b/vnet/vnet/ethernet/arp.c index c0b06e0f7b1..d0ed1dcb3c1 100644 --- a/vnet/vnet/ethernet/arp.c +++ b/vnet/vnet/ethernet/arp.c @@ -16,6 +16,7 @@ */ #include +#include #include #include #include @@ -267,6 +268,23 @@ format_ethernet_arp_input_trace (u8 * s, va_list * va) return s; } +static u8 * +format_arp_term_input_trace (u8 * s, va_list * va) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); + ethernet_arp_input_trace_t *t = va_arg (*va, ethernet_arp_input_trace_t *); + + /* arp-term trace data saved is either arp or ip6/icmp6 packet: + - for arp, the 1st 16-bit field is hw type of value of 0x0001. + - for ip6, the first nibble has value of 6. */ + s = format (s, "%U", t->packet_data[0] == 0 ? + format_ethernet_arp_header : format_ip6_header, + t->packet_data, sizeof (t->packet_data)); + + return s; +} + clib_error_t * ethernet_arp_sw_interface_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags) @@ -1825,8 +1843,8 @@ VLIB_CLI_COMMAND (set_int_proxy_enable_command, static) = { /* - * ARP Termination in a L2 Bridge Domain based on an - * IP4 to MAC hash table mac_by_ip4 for each BD. + * ARP/ND Termination in a L2 Bridge Domain based on IP4/IP6 to MAC + * hash tables mac_by_ip4 and mac_by_ip6 for each BD. */ typedef enum { @@ -1863,6 +1881,7 @@ arp_term_l2bd (vlib_main_t * vm, vlib_buffer_t *p0; ethernet_header_t *eth0; ethernet_arp_header_t *arp0; + ip6_header_t *iph0; u8 *l3h0; u32 pi0, error0, next0, sw_if_index0; u16 ethertype0; @@ -1883,6 +1902,13 @@ arp_term_l2bd (vlib_main_t * vm, ethertype0 = clib_net_to_host_u16 (*(u16 *) (l3h0 - 2)); arp0 = (ethernet_arp_header_t *) l3h0; + if (PREDICT_FALSE ((ethertype0 != ETHERNET_TYPE_ARP) || + (arp0->opcode != + clib_host_to_net_u16 + (ETHERNET_ARP_OPCODE_request)))) + goto check_ip6_nd; + + /* Must be ARP request packet here */ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) && (p0->flags & VLIB_BUFFER_IS_TRACED))) { @@ -1891,12 +1917,6 @@ arp_term_l2bd (vlib_main_t * vm, clib_memcpy (t0, l3h0, sizeof (ethernet_arp_input_trace_t)); } - if (PREDICT_FALSE ((ethertype0 != ETHERNET_TYPE_ARP) || - (arp0->opcode != - clib_host_to_net_u16 - (ETHERNET_ARP_OPCODE_request)))) - goto next_l2_feature; - error0 = ETHERNET_ARP_ERROR_replies_sent; error0 = (arp0->l2_type != @@ -1912,8 +1932,8 @@ arp_term_l2bd (vlib_main_t * vm, if (error0) goto drop; - // Trash ARP packets whose ARP-level source addresses do not - // match their L2-frame-level source addresses */ + /* Trash ARP packets whose ARP-level source addresses do not + match their L2-frame-level source addresses */ if (PREDICT_FALSE (memcmp (eth0->src_address, arp0->ip4_over_ethernet[0].ethernet, @@ -1923,7 +1943,7 @@ arp_term_l2bd (vlib_main_t * vm, goto drop; } - // Check if anyone want ARP request events for L2 BDs + /* Check if anyone want ARP request events for L2 BDs */ { pending_resolution_t *mc; ethernet_arp_main_t *am = ðernet_arp_main; @@ -1937,13 +1957,13 @@ arp_term_l2bd (vlib_main_t * vm, int rv = 1; mc = pool_elt_at_index (am->mac_changes, next_index); fp = mc->data_callback; - // Call the callback, return 1 to suppress dup events */ + /* Call the callback, return 1 to suppress dup events */ if (fp) rv = (*fp) (mc->data, arp0->ip4_over_ethernet[0].ethernet, sw_if_index0, arp0->ip4_over_ethernet[0].ip4.as_u32); - // Signal the resolver process + /* Signal the resolver process */ if (rv == 0) vlib_process_signal_event (vm, mc->node_index, mc->type_opaque, mc->data); @@ -1952,7 +1972,7 @@ arp_term_l2bd (vlib_main_t * vm, } } - // lookup BD mac_by_ip4 hash table for MAC entry + /* lookup BD mac_by_ip4 hash table for MAC entry */ ip0 = arp0->ip4_over_ethernet[1].ip4.as_u32; bd_index0 = vnet_buffer (p0)->l2.bd_index; if (PREDICT_FALSE ((bd_index0 != last_bd_index) @@ -1964,10 +1984,10 @@ arp_term_l2bd (vlib_main_t * vm, macp0 = (u8 *) hash_get (last_bd_config->mac_by_ip4, ip0); if (PREDICT_FALSE (!macp0)) - goto next_l2_feature; // MAC not found + goto next_l2_feature; /* MAC not found */ - // MAC found, send ARP reply - - // Convert ARP request packet to ARP reply + /* MAC found, send ARP reply - + Convert ARP request packet to ARP reply */ arp0->opcode = clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply); arp0->ip4_over_ethernet[1] = arp0->ip4_over_ethernet[0]; arp0->ip4_over_ethernet[0].ip4.as_u32 = ip0; @@ -1976,8 +1996,9 @@ arp_term_l2bd (vlib_main_t * vm, clib_memcpy (eth0->src_address, macp0, 6); n_replies_sent += 1; - // For BVI, need to use l2-fwd node to send ARP reply as - // l2-output node cannot output packet to BVI properly + output_response: + /* For BVI, need to use l2-fwd node to send ARP reply as + l2-output node cannot output packet to BVI properly */ cfg0 = vec_elt_at_index (l2im->configs, sw_if_index0); if (PREDICT_FALSE (cfg0->bvi)) { @@ -1986,19 +2007,37 @@ arp_term_l2bd (vlib_main_t * vm, goto next_l2_feature; } - // Send ARP reply back out input interface through l2-output + /* Send ARP/ND reply back out input interface through l2-output */ vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0; next0 = ARP_TERM_NEXT_L2_OUTPUT; - // Note that output to VXLAN tunnel will fail due to SHG which - // is probably desireable since ARP termination is not intended - // for ARP requests from other hosts. If output to VXLAN tunnel is - // required, however, can just clear the SHG in packet as follows: - // vnet_buffer(p0)->l2.shg = 0; - + /* Note that output to VXLAN tunnel will fail due to SHG which + is probably desireable since ARP termination is not intended + for ARP requests from other hosts. If output to VXLAN tunnel is + required, however, can just clear the SHG in packet as follows: + vnet_buffer(p0)->l2.shg = 0; */ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, n_left_to_next, pi0, next0); continue; + check_ip6_nd: + /* IP6 ND event notification or solicitation handling to generate + local response instead of flooding */ + iph0 = (ip6_header_t *) l3h0; + if (PREDICT_FALSE (ethertype0 == ETHERNET_TYPE_IP6 && + iph0->protocol == IP_PROTOCOL_ICMP6 && + !ip6_address_is_link_local_unicast + (&iph0->src_address) + && + !ip6_address_is_unspecified + (&iph0->src_address))) + { + sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX]; + if (vnet_ip6_nd_term (vm, node, p0, eth0, iph0, sw_if_index0, + vnet_buffer (p0)->l2.bd_index, + vnet_buffer (p0)->l2.shg)) + goto output_response; + } + next_l2_feature: { u32 feature_bitmap0 = @@ -2046,7 +2085,7 @@ VLIB_REGISTER_NODE (arp_term_l2bd_node, static) = { [ARP_TERM_NEXT_DROP] = "error-drop", }, .format_buffer = format_ethernet_arp_header, - .format_trace = format_ethernet_arp_input_trace, + .format_trace = format_arp_term_input_trace, }; /* *INDENT-ON* */ diff --git a/vnet/vnet/ip/ip6.h b/vnet/vnet/ip/ip6.h index b43e2dac791..f5f3de84676 100644 --- a/vnet/vnet/ip/ip6.h +++ b/vnet/vnet/ip/ip6.h @@ -41,6 +41,8 @@ #define included_ip_ip6_h #include +#include +#include #include #include #include @@ -533,6 +535,24 @@ void vnet_register_ip6_neighbor_resolution_event(vnet_main_t * vnm, uword type_opaque, uword data); +int vnet_add_del_ip6_nd_change_event (vnet_main_t * vnm, + void * data_callback, + u32 pid, + void * address_arg, + uword node_index, + uword type_opaque, + uword data, + int is_add); + +int vnet_ip6_nd_term (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_buffer_t * p0, + ethernet_header_t * eth, + ip6_header_t * ip, + u32 sw_if_index, + u16 bd_index, + u8 shg); + int vnet_set_ip6_classify_intfc (vlib_main_t * vm, u32 sw_if_index, u32 table_index); extern vlib_node_registration_t ip6_lookup_node; diff --git a/vnet/vnet/ip/ip6_neighbor.c b/vnet/vnet/ip/ip6_neighbor.c index 1dd09c11052..a35f58a3039 100644 --- a/vnet/vnet/ip/ip6_neighbor.c +++ b/vnet/vnet/ip/ip6_neighbor.c @@ -169,6 +169,9 @@ typedef struct { uword node_index; uword type_opaque; uword data; + /* Used for nd event notification only */ + void * data_callback; + u32 pid; } pending_resolution_t; @@ -180,6 +183,10 @@ typedef struct { mhash_t pending_resolutions_by_address; pending_resolution_t * pending_resolutions; + /* Mac address change notification */ + mhash_t mac_changes_by_address; + pending_resolution_t * mac_changes; + u32 * neighbor_input_next_index_by_hw_if_index; ip6_neighbor_t * neighbor_pool; @@ -197,6 +204,7 @@ typedef struct { } ip6_neighbor_main_t; static ip6_neighbor_main_t ip6_neighbor_main; +static ip6_address_t ip6a_zero; /* ip6 address 0 */ static u8 * format_ip6_neighbor_ip6_entry (u8 * s, va_list * va) { @@ -341,7 +349,7 @@ vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm, u32 next_index; u32 adj_index; ip_adjacency_t *existing_adj; - pending_resolution_t * pr; + pending_resolution_t * pr, * mc; #if DPDK > 0 if (os_get_cpu_number()) @@ -442,24 +450,51 @@ vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm, /* Customer(s) waiting for this address to be resolved? */ p = mhash_get (&nm->pending_resolutions_by_address, a); - if (p == 0) - goto out; - - next_index = p[0]; + if (p) + { + next_index = p[0]; - while (next_index != (u32)~0) + while (next_index != (u32)~0) + { + pr = pool_elt_at_index (nm->pending_resolutions, next_index); + vlib_process_signal_event (vm, pr->node_index, + pr->type_opaque, + pr->data); + next_index = pr->next_index; + pool_put (nm->pending_resolutions, pr); + } + + mhash_unset (&nm->pending_resolutions_by_address, a, 0); + } + + /* Customer(s) requesting ND event for this address? */ + p = mhash_get (&nm->mac_changes_by_address, a); + if (p) { - pr = pool_elt_at_index (nm->pending_resolutions, next_index); - vlib_process_signal_event (vm, pr->node_index, - pr->type_opaque, - pr->data); - next_index = pr->next_index; - pool_put (nm->pending_resolutions, pr); + next_index = p[0]; + + while (next_index != (u32)~0) + { + int (*fp)(u32, u8 *, u32, ip6_address_t *); + int rv = 1; + mc = pool_elt_at_index (nm->mac_changes, next_index); + fp = mc->data_callback; + + /* Call the user's data callback, return 1 to suppress dup events */ + if (fp) + rv = (*fp)(mc->data, link_layer_address, sw_if_index, &ip6a_zero); + /* + * Signal the resolver process, as long as the user + * says they want to be notified + */ + if (rv == 0) + vlib_process_signal_event (vm, mc->node_index, + mc->type_opaque, + mc->data); + next_index = mc->next_index; + } } - mhash_unset (&nm->pending_resolutions_by_address, a, 0); - -out: vlib_worker_thread_barrier_release(vm); return 0; } @@ -3327,6 +3362,10 @@ static clib_error_t * ip6_neighbor_init (vlib_main_t * vm) /* value size */ sizeof (uword), /* key size */ sizeof (ip6_address_t)); + mhash_init (&nm->mac_changes_by_address, + /* value size */ sizeof (uword), + /* key size */ sizeof (ip6_address_t)); + /* default, configurable */ nm->limit_neighbor_cache_size = 50000; @@ -3374,3 +3413,185 @@ void vnet_register_ip6_neighbor_resolution_event (vnet_main_t * vnm, pr - nm->pending_resolutions, 0 /* old value */); } +int vnet_add_del_ip6_nd_change_event (vnet_main_t * vnm, + void * data_callback, + u32 pid, + void * address_arg, + uword node_index, + uword type_opaque, + uword data, + int is_add) +{ + ip6_neighbor_main_t * nm = &ip6_neighbor_main; + ip6_address_t * address = address_arg; + uword * p; + pending_resolution_t * mc; + void (*fp)(u32, u8 *) = data_callback; + + if (is_add) + { + pool_get (nm->mac_changes, mc); + + mc->next_index = ~0; + mc->node_index = node_index; + mc->type_opaque = type_opaque; + mc->data = data; + mc->data_callback = data_callback; + mc->pid = pid; + + p = mhash_get (&nm->mac_changes_by_address, address); + if (p) + { + /* Insert new resolution at the head of the list */ + mc->next_index = p[0]; + mhash_unset (&nm->mac_changes_by_address, address, 0); + } + + mhash_set (&nm->mac_changes_by_address, address, + mc - nm->mac_changes, 0); + return 0; + } + else + { + u32 index; + pending_resolution_t * mc_last = 0; + + p = mhash_get (&nm->mac_changes_by_address, address); + if (p == 0) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + index = p[0]; + + while (index != (u32)~0) + { + mc = pool_elt_at_index (nm->mac_changes, index); + if (mc->node_index == node_index && + mc->type_opaque == type_opaque && + mc->pid == pid) + { + /* Clients may need to clean up pool entries, too */ + if (fp) + (*fp)(mc->data, 0 /* no new mac addrs */); + if (index == p[0]) + { + mhash_unset (&nm->mac_changes_by_address, address, 0); + if (mc->next_index != ~0) + mhash_set (&nm->mac_changes_by_address, address, + mc->next_index, 0); + pool_put (nm->mac_changes, mc); + return 0; + } + else + { + ASSERT(mc_last); + mc_last->next_index = mc->next_index; + pool_put (nm->mac_changes, mc); + return 0; + } + } + mc_last = mc; + index = mc->next_index; + } + + return VNET_API_ERROR_NO_SUCH_ENTRY; + } +} + +int vnet_ip6_nd_term (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_buffer_t * p0, + ethernet_header_t * eth, + ip6_header_t * ip, + u32 sw_if_index, + u16 bd_index, + u8 shg) +{ + ip6_neighbor_main_t * nm = &ip6_neighbor_main; + icmp6_neighbor_solicitation_or_advertisement_header_t * ndh; + pending_resolution_t * mc; + uword *p; + + ndh = ip6_next_header (ip); + if (ndh->icmp.type != ICMP6_neighbor_solicitation && + ndh->icmp.type != ICMP6_neighbor_advertisement) + return 0; + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) && + (p0->flags & VLIB_BUFFER_IS_TRACED))) + { + u8 *t0 = vlib_add_trace (vm, node, p0, + sizeof (icmp6_input_trace_t)); + clib_memcpy (t0, ip, sizeof (icmp6_input_trace_t)); + } + + /* Check if anyone want ND events for L2 BDs */ + p = mhash_get (&nm->mac_changes_by_address, &ip6a_zero); + if (p && shg == 0) + { /* Only SHG 0 interface which is more likely local */ + u32 next_index = p[0]; + while (next_index != (u32)~0) + { + int (*fp)(u32, u8 *, u32, ip6_address_t *); + int rv = 1; + mc = pool_elt_at_index (nm->mac_changes, next_index); + fp = mc->data_callback; + /* Call the callback, return 1 to suppress dup events */ + if (fp) rv = (*fp)(mc->data, + eth->src_address, + sw_if_index, + &ip->src_address); + /* Signal the resolver process */ + if (rv == 0) + vlib_process_signal_event (vm, mc->node_index, + mc->type_opaque, + mc->data); + next_index = mc->next_index; + } + } + + /* Check if MAC entry exsist for solicited target IP */ + if (ndh->icmp.type == ICMP6_neighbor_solicitation) + { + icmp6_neighbor_discovery_ethernet_link_layer_address_option_t * opt; + l2_bridge_domain_t *bd_config; + u8 * macp; + + opt = (void *) (ndh + 1); + if ((opt->header.type != + ICMP6_NEIGHBOR_DISCOVERY_OPTION_source_link_layer_address) || + (opt->header.n_data_u64s != 1)) + return 0; /* source link layer address option not present */ + + bd_config = vec_elt_at_index (l2input_main.bd_configs, bd_index); + macp = (u8 *) hash_get_mem (bd_config->mac_by_ip6, &ndh->target_address); + if (macp) + { /* found ip-mac entry, generate eighbor advertisement response */ + int bogus_length; + vlib_node_runtime_t * error_node = + vlib_node_get_runtime (vm, ip6_icmp_input_node.index); + ip->dst_address = ip->src_address; + ip->src_address = ndh->target_address; + ip->hop_limit = 255; + opt->header.type = + ICMP6_NEIGHBOR_DISCOVERY_OPTION_target_link_layer_address; + clib_memcpy (opt->ethernet_address, macp, 6); + ndh->icmp.type = ICMP6_neighbor_advertisement; + ndh->advertisement_flags = clib_host_to_net_u32 + (ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_SOLICITED | + ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_OVERRIDE); + ndh->icmp.checksum = 0; + ndh->icmp.checksum = ip6_tcp_udp_icmp_compute_checksum(vm, p0, ip, + &bogus_length); + clib_memcpy(eth->dst_address, eth->src_address, 6); + clib_memcpy(eth->src_address, macp, 6); + vlib_error_count (vm, error_node->node_index, + ICMP6_ERROR_NEIGHBOR_ADVERTISEMENTS_TX, 1); + return 1; + } + } + + return 0; + +} + + diff --git a/vnet/vnet/l2/l2_bd.c b/vnet/vnet/l2/l2_bd.c index a872453a98b..490a08f2d03 100644 --- a/vnet/vnet/l2/l2_bd.c +++ b/vnet/vnet/l2/l2_bd.c @@ -47,6 +47,8 @@ bd_validate (l2_bridge_domain_t * bd_config) bd_config->bvi_sw_if_index = ~0; bd_config->members = 0; bd_config->mac_by_ip4 = 0; + bd_config->mac_by_ip6 = hash_create_mem (0, sizeof (ip6_address_t), + sizeof (uword)); } } @@ -512,11 +514,42 @@ bd_add_del_ip_mac (u32 bd_index, ASSERT (sizeof (uword) == sizeof (u64)); /* make sure uword is 8 bytes */ - mac16[3] = 0; // Clear last 2 unsed bytes of the 8-byte MAC address + mac16[3] = 0; /* Clear last 2 unsed bytes of the 8-byte MAC address */ if (is_ip6) { - /* not yet implemented */ - return 1; + ip6_address_t *ip6_addr_key; + hash_pair_t *hp; + old_mac = (u64 *) hash_get_mem (bd_cfg->mac_by_ip6, ip_addr); + if (is_add) + { + if (old_mac == 0) + { /* new entry - allocate and craete ip6 address key */ + ip6_addr_key = clib_mem_alloc (sizeof (ip6_address_t)); + clib_memcpy (ip6_addr_key, ip_addr, sizeof (ip6_address_t)); + } + else if (*old_mac == new_mac) + { /* same mac entry already exist for ip6 address */ + return 0; + } + else + { /* updat mac for ip6 address */ + hp = hash_get_pair (bd_cfg->mac_by_ip6, ip_addr); + ip6_addr_key = (ip6_address_t *) hp->key; + } + hash_set_mem (bd_cfg->mac_by_ip6, ip6_addr_key, new_mac); + } + else + { + if (old_mac && (*old_mac == new_mac)) + { + hp = hash_get_pair (bd_cfg->mac_by_ip6, ip_addr); + ip6_addr_key = (ip6_address_t *) hp->key; + hash_unset_mem (bd_cfg->mac_by_ip6, ip_addr); + clib_mem_free (ip6_addr_key); + } + else + return 1; + } } else { @@ -524,26 +557,19 @@ bd_add_del_ip_mac (u32 bd_index, old_mac = (u64 *) hash_get (bd_cfg->mac_by_ip4, ip4_addr.as_u32); if (is_add) { - /* mac entry already exist? */ if (old_mac && (*old_mac == new_mac)) - return 0; + return 0; /* mac entry already exist */ hash_set (bd_cfg->mac_by_ip4, ip4_addr.as_u32, new_mac); } else { - /* Mac entry match? */ if (old_mac && (*old_mac == new_mac)) - { - /* clear entry */ - hash_unset (bd_cfg->mac_by_ip4, ip4_addr.as_u32); - } + hash_unset (bd_cfg->mac_by_ip4, ip4_addr.as_u32); else - { - return 1; - } + return 1; } - return 0; } + return 0; } /** @@ -610,8 +636,9 @@ bd_arp_entry (vlib_main_t * vm, { error = clib_error_return (0, "MAC %s for IP %U and MAC %U failed", is_add ? "add" : "del", - format_ip4_address, ip_addr, - format_ethernet_address, mac_addr); + is_ip6 ? + format_ip4_address : format_ip6_address, + ip_addr, format_ethernet_address, mac_addr); } done: @@ -780,17 +807,25 @@ bd_show (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) (bd_config->feature_bitmap & L2INPUT_FEAT_ARP_TERM)) { u32 ip4_addr; + ip6_address_t *ip6_addr; u64 mac_addr; vlib_cli_output (vm, - "\n IP4 to MAC table for ARP Termination"); + "\n IP4/IP6 to MAC table for ARP Termination"); /* *INDENT-OFF* */ hash_foreach (ip4_addr, mac_addr, bd_config->mac_by_ip4, ({ - vlib_cli_output (vm, "%=20U => %=20U", + vlib_cli_output (vm, "%=40U => %=20U", format_ip4_address, &ip4_addr, format_ethernet_address, &mac_addr); })); + + hash_foreach_mem (ip6_addr, mac_addr, bd_config->mac_by_ip6, + ({ + vlib_cli_output (vm, "%=40U => %=20U", + format_ip6_address, ip6_addr, + format_ethernet_address, &mac_addr); + })); /* *INDENT-ON* */ } } diff --git a/vnet/vnet/l2/l2_bd.h b/vnet/vnet/l2/l2_bd.h index 82453eea153..2d7853ebead 100644 --- a/vnet/vnet/l2/l2_bd.h +++ b/vnet/vnet/l2/l2_bd.h @@ -70,7 +70,7 @@ typedef struct /* Vector of members in the replication group */ l2_flood_member_t *members; - /* hash ip4/ip6 -> mac for arp termination */ + /* hash ip4/ip6 -> mac for arp/nd termination */ uword *mac_by_ip4; uword *mac_by_ip6; diff --git a/vnet/vnet/l2/l2_input.c b/vnet/vnet/l2/l2_input.c index 9607031fcda..f337b78d3e6 100644 --- a/vnet/vnet/l2/l2_input.c +++ b/vnet/vnet/l2/l2_input.c @@ -197,10 +197,14 @@ classify_and_dispatch (vlib_main_t * vm, { u32 *dsthi = (u32 *) & h0->dst_address[0]; u32 *dstlo = (u32 *) & h0->dst_address[2]; + protocol = ((ip6_header_t *) l3h0)->protocol; /* Disable bridge forwarding (flooding will execute instead if not xconnect) */ feat_mask &= ~(L2INPUT_FEAT_FWD | L2INPUT_FEAT_UU_FLOOD); - if (ethertype != ETHERNET_TYPE_ARP) /* Disable ARP-term for non-ARP packet */ + + /* Disable ARP-term for non-ARP and non-ICMP6 packet */ + if (ethertype != ETHERNET_TYPE_ARP && + (ethertype != ETHERNET_TYPE_IP6 || protocol != IP_PROTOCOL_ICMP6)) feat_mask &= ~(L2INPUT_FEAT_ARP_TERM); /* dest mac is multicast or broadcast */ diff --git a/vpp-api-test/vat/api_format.c b/vpp-api-test/vat/api_format.c index c1a472a3c8a..0926dfb0a20 100644 --- a/vpp-api-test/vat/api_format.c +++ b/vpp-api-test/vat/api_format.c @@ -1134,6 +1134,21 @@ vl_api_ip4_arp_event_t_handler_json (vl_api_ip4_arp_event_t * mp) /* JSON output not supported */ } +static void +vl_api_ip6_nd_event_t_handler (vl_api_ip6_nd_event_t * mp) +{ + vat_main_t *vam = &vat_main; + errmsg ("ip6 nd event: address %U new mac %U sw_if_index %d\n", + format_ip6_address, mp->address, + format_ethernet_address, mp->new_mac, mp->sw_if_index); +} + +static void +vl_api_ip6_nd_event_t_handler_json (vl_api_ip6_nd_event_t * mp) +{ + /* JSON output not supported */ +} + /* * Special-case: build the bridge domain table, maintain * the next bd id vbl. @@ -3341,6 +3356,7 @@ _(l2_interface_vlan_tag_rewrite_reply) \ _(modify_vhost_user_if_reply) \ _(delete_vhost_user_if_reply) \ _(want_ip4_arp_events_reply) \ +_(want_ip6_nd_events_reply) \ _(input_acl_set_interface_reply) \ _(ipsec_spd_add_del_reply) \ _(ipsec_interface_add_del_spd_reply) \ @@ -3519,6 +3535,8 @@ _(VXLAN_GPE_TUNNEL_DETAILS, vxlan_gpe_tunnel_details) \ _(INTERFACE_NAME_RENUMBER_REPLY, interface_name_renumber_reply) \ _(WANT_IP4_ARP_EVENTS_REPLY, want_ip4_arp_events_reply) \ _(IP4_ARP_EVENT, ip4_arp_event) \ +_(WANT_IP6_ND_EVENTS_REPLY, want_ip6_nd_events_reply) \ +_(IP6_ND_EVENT, ip6_nd_event) \ _(INPUT_ACL_SET_INTERFACE_REPLY, input_acl_set_interface_reply) \ _(IP_ADDRESS_DETAILS, ip_address_details) \ _(IP_DETAILS, ip_details) \ @@ -10212,6 +10230,41 @@ api_want_ip4_arp_events (vat_main_t * vam) W; } +static int +api_want_ip6_nd_events (vat_main_t * vam) +{ + unformat_input_t *line_input = vam->input; + vl_api_want_ip6_nd_events_t *mp; + f64 timeout; + ip6_address_t address; + int address_set = 0; + u32 enable_disable = 1; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "address %U", unformat_ip6_address, &address)) + address_set = 1; + else if (unformat (line_input, "del")) + enable_disable = 0; + else + break; + } + + if (address_set == 0) + { + errmsg ("missing addresses\n"); + return -99; + } + + M (WANT_IP6_ND_EVENTS, want_ip6_nd_events); + mp->enable_disable = enable_disable; + mp->pid = getpid (); + clib_memcpy (mp->address, &address, sizeof (ip6_address_t)); + + S; + W; +} + static int api_input_acl_set_interface (vat_main_t * vam) { @@ -15704,6 +15757,7 @@ _(input_acl_set_interface, \ " | sw_if_index [ip4-table ] [ip6-table ]\n" \ " [l2-table ] [del]") \ _(want_ip4_arp_events, "address [del]") \ +_(want_ip6_nd_events, "address [del]") \ _(ip_address_dump, "(ipv4 | ipv6) ( | sw_if_index )") \ _(ip_dump, "ipv4 | ipv6") \ _(ipsec_spd_add_del, "spd_id [del]") \ diff --git a/vpp/vpp-api/api.c b/vpp/vpp-api/api.c index 8d54ba75d99..009881842d4 100644 --- a/vpp/vpp-api/api.c +++ b/vpp/vpp-api/api.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include #include @@ -301,6 +302,7 @@ _(VXLAN_GPE_ADD_DEL_TUNNEL, vxlan_gpe_add_del_tunnel) \ _(VXLAN_GPE_TUNNEL_DUMP, vxlan_gpe_tunnel_dump) \ _(INTERFACE_NAME_RENUMBER, interface_name_renumber) \ _(WANT_IP4_ARP_EVENTS, want_ip4_arp_events) \ +_(WANT_IP6_ND_EVENTS, want_ip6_nd_events) \ _(INPUT_ACL_SET_INTERFACE, input_acl_set_interface) \ _(IPSEC_SPD_ADD_DEL, ipsec_spd_add_del) \ _(IPSEC_INTERFACE_ADD_DEL_SPD, ipsec_interface_add_del_spd) \ @@ -425,12 +427,15 @@ typedef struct /* notifications happen really early in the game */ u8 link_state_process_up; - /* ip4 pending route adds */ + /* ip4 and ip6 pending route adds */ pending_route_t *pending_routes; /* ip4 arp event registration pool */ vl_api_ip4_arp_event_t *arp_events; + /* ip6 nd event registration pool */ + vl_api_ip6_nd_event_t *nd_events; + /* convenience */ vlib_main_t *vlib_main; vnet_main_t *vnet_main; @@ -447,6 +452,7 @@ static void send_sw_interface_flags_deleted (vpe_api_main_t * am, u32 sw_if_index); static int arp_change_delete_callback (u32 pool_index, u8 * notused); +static int nd_change_delete_callback (u32 pool_index, u8 * notused); /* Clean up all registrations belonging to the indicated client */ @@ -629,17 +635,25 @@ reply: \ REPLY_MACRO (VL_API_WANT_##UCA##_REPLY); \ } +/* *INDENT-OFF* */ pub_sub_handler (interface_events, INTERFACE_EVENTS) pub_sub_handler (oam_events, OAM_EVENTS) +/* *INDENT-ON* */ + #define RESOLUTION_EVENT 1 #define RESOLUTION_PENDING_EVENT 2 #define IP4_ARP_EVENT 3 - static int ip4_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp); - static int ip6_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp); - static int mpls_ethernet_add_del_tunnel_2_t_handler - (vl_api_mpls_ethernet_add_del_tunnel_2_t * mp); +#define IP6_ND_EVENT 4 + +static int ip4_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp); + +static int ip6_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp); + +static int mpls_ethernet_add_del_tunnel_2_t_handler + (vl_api_mpls_ethernet_add_del_tunnel_2_t * mp); - void handle_ip4_arp_event (u32 pool_index) +void +handle_ip4_arp_event (u32 pool_index) { vpe_api_main_t *vam = &vpe_api_main; vnet_main_t *vnm = vam->vnet_main; @@ -687,6 +701,55 @@ pub_sub_handler (oam_events, OAM_EVENTS) } } +void +handle_ip6_nd_event (u32 pool_index) +{ + vpe_api_main_t *vam = &vpe_api_main; + vnet_main_t *vnm = vam->vnet_main; + vlib_main_t *vm = vam->vlib_main; + vl_api_ip6_nd_event_t *event; + vl_api_ip6_nd_event_t *mp; + unix_shared_memory_queue_t *q; + + /* Client can cancel, die, etc. */ + if (pool_is_free_index (vam->nd_events, pool_index)) + return; + + event = pool_elt_at_index (vam->nd_events, pool_index); + + q = vl_api_client_index_to_input_queue (event->client_index); + if (!q) + { + (void) vnet_add_del_ip6_nd_change_event + (vnm, nd_change_delete_callback, + event->pid, &event->address, + vpe_resolver_process_node.index, IP6_ND_EVENT, + ~0 /* pool index, notused */ , 0 /* is_add */ ); + return; + } + + if (q->cursize < q->maxsize) + { + mp = vl_msg_api_alloc (sizeof (*mp)); + clib_memcpy (mp, event, sizeof (*mp)); + vl_msg_api_send_shmem (q, (u8 *) & mp); + } + else + { + static f64 last_time; + /* + * Throttle syslog msgs. + * It's pretty tempting to just revoke the registration... + */ + if (vlib_time_now (vm) > last_time + 10.0) + { + clib_warning ("ip6 nd event for %U to pid %d: queue stuffed!", + format_ip6_address, &event->address, event->pid); + last_time = vlib_time_now (vm); + } + } +} + static uword resolver_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) @@ -774,6 +837,11 @@ resolver_process (vlib_main_t * vm, handle_ip4_arp_event (event_data[i]); break; + case IP6_ND_EVENT: + for (i = 0; i < vec_len (event_data); i++) + handle_ip6_nd_event (event_data[i]); + break; + case ~0: /* timeout, retry pending resolutions */ /* *INDENT-OFF* */ pool_foreach (pr, vam->pending_routes, @@ -6130,29 +6198,68 @@ arp_change_data_callback (u32 pool_index, u8 * new_mac, return 1; event = pool_elt_at_index (am->arp_events, pool_index); + /* *INDENT-OFF* */ if (memcmp (&event->new_mac, new_mac, sizeof (event->new_mac))) { clib_memcpy (event->new_mac, new_mac, sizeof (event->new_mac)); } else { /* same mac */ - if ((sw_if_index == event->sw_if_index) && ((address == 0) || - /* for BD case, also check IP address with 10 sec timeout */ - ((address == event->address) - && - ((now - - arp_event_last_time) < - 10.0)))) + if (sw_if_index == event->sw_if_index && + (!event->mac_ip || + /* for BD case, also check IP address with 10 sec timeout */ + (address == event->address && + (now - arp_event_last_time) < 10.0))) return 1; } + /* *INDENT-ON* */ arp_event_last_time = now; event->sw_if_index = sw_if_index; - if (address) + if (event->mac_ip) event->address = address; return 0; } +static int +nd_change_data_callback (u32 pool_index, u8 * new_mac, + u32 sw_if_index, ip6_address_t * address) +{ + vpe_api_main_t *am = &vpe_api_main; + vlib_main_t *vm = am->vlib_main; + vl_api_ip6_nd_event_t *event; + static f64 nd_event_last_time; + f64 now = vlib_time_now (vm); + + if (pool_is_free_index (am->nd_events, pool_index)) + return 1; + + event = pool_elt_at_index (am->nd_events, pool_index); + + /* *INDENT-OFF* */ + if (memcmp (&event->new_mac, new_mac, sizeof (event->new_mac))) + { + clib_memcpy (event->new_mac, new_mac, sizeof (event->new_mac)); + } + else + { /* same mac */ + if (sw_if_index == event->sw_if_index && + (!event->mac_ip || + /* for BD case, also check IP address with 10 sec timeout */ + (ip6_address_is_equal (address, + (ip6_address_t *) event->address) && + (now - nd_event_last_time) < 10.0))) + return 1; + } + /* *INDENT-ON* */ + + nd_event_last_time = now; + event->sw_if_index = sw_if_index; + if (event->mac_ip) + clib_memcpy (event->address, address, sizeof (event->address)); + return 0; +} + static int arp_change_delete_callback (u32 pool_index, u8 * notused) { @@ -6165,6 +6272,18 @@ arp_change_delete_callback (u32 pool_index, u8 * notused) return 0; } +static int +nd_change_delete_callback (u32 pool_index, u8 * notused) +{ + vpe_api_main_t *am = &vpe_api_main; + + if (pool_is_free_index (am->nd_events, pool_index)) + return 1; + + pool_put_index (am->nd_events, pool_index); + return 0; +} + static void vl_api_want_ip4_arp_events_t_handler (vl_api_want_ip4_arp_events_t * mp) { @@ -6184,6 +6303,8 @@ vl_api_want_ip4_arp_events_t_handler (vl_api_want_ip4_arp_events_t * mp) event->context = mp->context; event->address = mp->address; event->pid = mp->pid; + if (mp->address == 0) + event->mac_ip = 1; rv = vnet_add_del_ip4_arp_change_event (vnm, arp_change_data_callback, @@ -6202,6 +6323,45 @@ vl_api_want_ip4_arp_events_t_handler (vl_api_want_ip4_arp_events_t * mp) REPLY_MACRO (VL_API_WANT_IP4_ARP_EVENTS_REPLY); } +static void +vl_api_want_ip6_nd_events_t_handler (vl_api_want_ip6_nd_events_t * mp) +{ + vpe_api_main_t *am = &vpe_api_main; + vnet_main_t *vnm = vnet_get_main (); + vl_api_want_ip6_nd_events_reply_t *rmp; + vl_api_ip6_nd_event_t *event; + int rv; + + if (mp->enable_disable) + { + pool_get (am->nd_events, event); + memset (event, 0, sizeof (*event)); + + event->_vl_msg_id = ntohs (VL_API_IP6_ND_EVENT); + event->client_index = mp->client_index; + event->context = mp->context; + clib_memcpy (event->address, mp->address, 16); + event->pid = mp->pid; + if (ip6_address_is_zero ((ip6_address_t *) mp->address)) + event->mac_ip = 1; + + rv = vnet_add_del_ip6_nd_change_event + (vnm, nd_change_data_callback, + mp->pid, mp->address /* addr, in net byte order */ , + vpe_resolver_process_node.index, + IP6_ND_EVENT, event - am->nd_events, 1 /* is_add */ ); + } + else + { + rv = vnet_add_del_ip6_nd_change_event + (vnm, nd_change_delete_callback, + mp->pid, mp->address /* addr, in net byte order */ , + vpe_resolver_process_node.index, + IP6_ND_EVENT, ~0 /* pool index */ , 0 /* is_add */ ); + } + REPLY_MACRO (VL_API_WANT_IP6_ND_EVENTS_REPLY); +} + static void vl_api_input_acl_set_interface_t_handler (vl_api_input_acl_set_interface_t * mp) { @@ -8434,28 +8594,50 @@ format_arp_event (u8 * s, va_list * args) { vl_api_ip4_arp_event_t *event = va_arg (*args, vl_api_ip4_arp_event_t *); - s = format (s, "pid %d: %U", event->pid, - format_ip4_address, &event->address); + s = format (s, "pid %d: ", event->pid); + if (event->mac_ip) + s = format (s, "bd mac/ip4 binding events"); + else + s = format (s, "resolution for %U", format_ip4_address, &event->address); + return s; +} + +static u8 * +format_nd_event (u8 * s, va_list * args) +{ + vl_api_ip6_nd_event_t *event = va_arg (*args, vl_api_ip6_nd_event_t *); + + s = format (s, "pid %d: ", event->pid); + if (event->mac_ip) + s = format (s, "bd mac/ip6 binding events"); + else + s = format (s, "resolution for %U", format_ip6_address, event->address); return s; } static clib_error_t * -show_ip4_arp_events_fn (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) +show_ip_arp_nd_events_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) { vpe_api_main_t *am = &vpe_api_main; - vl_api_ip4_arp_event_t *event; + vl_api_ip4_arp_event_t *arp_event; + vl_api_ip6_nd_event_t *nd_event; - if (pool_elts (am->arp_events) == 0) + if ((pool_elts (am->arp_events) == 0) && (pool_elts (am->nd_events) == 0)) { - vlib_cli_output (vm, "No active arp event registrations"); + vlib_cli_output (vm, "No active arp or nd event registrations"); return 0; } /* *INDENT-OFF* */ - pool_foreach (event, am->arp_events, + pool_foreach (arp_event, am->arp_events, + ({ + vlib_cli_output (vm, "%U", format_arp_event, arp_event); + })); + + pool_foreach (nd_event, am->nd_events, ({ - vlib_cli_output (vm, "%U", format_arp_event, event); + vlib_cli_output (vm, "%U", format_nd_event, nd_event); })); /* *INDENT-ON* */ @@ -8463,10 +8645,10 @@ show_ip4_arp_events_fn (vlib_main_t * vm, } /* *INDENT-OFF* */ -VLIB_CLI_COMMAND (show_ip4_arp_events, static) = { - .path = "show arp event registrations", - .function = show_ip4_arp_events_fn, - .short_help = "Show arp event registrations", +VLIB_CLI_COMMAND (show_ip_arp_nd_events, static) = { + .path = "show arp-nd-event registrations", + .function = show_ip_arp_nd_events_fn, + .short_help = "Show ip4 arp and ip6 nd event registrations", }; /* *INDENT-ON* */ diff --git a/vpp/vpp-api/custom_dump.c b/vpp/vpp-api/custom_dump.c index 87331eb166c..913ad1c6e6c 100644 --- a/vpp/vpp-api/custom_dump.c +++ b/vpp/vpp-api/custom_dump.c @@ -1708,6 +1708,20 @@ static void *vl_api_want_ip4_arp_events_t_print FINISH; } +static void *vl_api_want_ip6_nd_events_t_print + (vl_api_want_ip6_nd_events_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: want_ip6_nd_events "); + s = format (s, "pid %d address %U ", mp->pid, + format_ip6_address, mp->address); + if (mp->enable_disable == 0) + s = format (s, "del "); + + FINISH; +} + static void *vl_api_input_acl_set_interface_t_print (vl_api_input_acl_set_interface_t * mp, void *handle) { @@ -2688,6 +2702,7 @@ _(VXLAN_GPE_ADD_DEL_TUNNEL, vxlan_gpe_add_del_tunnel) \ _(VXLAN_GPE_TUNNEL_DUMP, vxlan_gpe_tunnel_dump) \ _(INTERFACE_NAME_RENUMBER, interface_name_renumber) \ _(WANT_IP4_ARP_EVENTS, want_ip4_arp_events) \ +_(WANT_IP6_ND_EVENTS, want_ip6_nd_events) \ _(INPUT_ACL_SET_INTERFACE, input_acl_set_interface) \ _(IP_ADDRESS_DUMP, ip_address_dump) \ _(IP_DUMP, ip_dump) \ diff --git a/vpp/vpp-api/vpe.api b/vpp/vpp-api/vpe.api index 386ff1639ee..0bfa2fa21ac 100644 --- a/vpp/vpp-api/vpe.api +++ b/vpp/vpp-api/vpe.api @@ -3071,6 +3071,7 @@ define want_ip4_arp_events_reply @param pid - client pid registered to receive notification @param sw_if_index - interface which received ARP packet @param new_mac - the new mac address + @param mac_ip - 0: resolution event, 1: mac/ip binding in bd */ define ip4_arp_event { @@ -3080,8 +3081,55 @@ define ip4_arp_event u32 pid; u32 sw_if_index; u8 new_mac[6]; + u8 mac_ip; }; +/** \brief Register for ip6 nd resolution events + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param enable_disable - 1 => register for events, 0 => cancel registration + @param pid - sender's pid + @param address - the exact ip6 address of interest +*/ +define want_ip6_nd_events +{ + u32 client_index; + u32 context; + u8 enable_disable; + u32 pid; + u8 address[16]; +}; + +/** \brief Reply for ip6 nd resolution events registration + @param context - returned sender context, to match reply w/ request + @param retval - return code +*/ +define want_ip6_nd_events_reply +{ + u32 context; + i32 retval; +}; + +/** \brief Tell client about an ip6 nd resolution or mac/ip event + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param pid - client pid registered to receive notification + @param sw_if_index - interface which received ARP packet + @param address - the exact ip6 address of interest + @param new_mac - the new mac address + @param mac_ip - 0: resolution event, 1: mac/ip binding in bd +*/ +define ip6_nd_event +{ + u32 client_index; + u32 context; + u32 pid; + u32 sw_if_index; + u8 address[16]; + u8 new_mac[6]; + u8 mac_ip; +}; + /** \brief L2 bridge domain add or delete request @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request -- cgit 1.2.3-korg