summaryrefslogtreecommitdiffstats
path: root/vnet
diff options
context:
space:
mode:
authorNeale Ranns <nranns@cisco.com>2016-10-08 13:03:40 +0100
committerDave Barach <openvpp@barachs.net>2016-10-14 13:50:39 +0000
commitb80c536e34b610ca77cd84448754e4bd9c46cf68 (patch)
treed7a868cdd657a3a54ff9eef76bfe3e7e4678e6d3 /vnet
parent3ae1a91430a341cd9ca96023e4fb619efe7cac7e (diff)
FIB2.0: Adjacency complete pull model (VPP-487)
Change the adjacency completion model to pull not push. A complete adjacency has a rewirte string, an incomplete one does not. the re-write string for a peer comes either from a discovery protocol (i.e. ARP/ND) or can be directly derived from the link type (i.e. GRE tunnels). Which method it is, is interface type specific. For each packet type sent on a link to a peer there is a corresponding adjacency. For example, if there is a peer 10.0.0.1 on Eth0 and we need to send to it IPv4 and MPLS packets, there will be two adjacencies; one for the IPv4 and one for the MPLS packets. The adjacencies are thus distinguished by the packets the carry, this is known as the adjacency's 'link-type'. It is not an L3 packet type, since the adjacency can have a link type of Ethernet (for L2 over GRE). The discovery protocols are not aware of all the link types required - only the FIB is. the FIB will create adjacencies as and when they are required, and it is thus then desirable to 'pull' from the discovery protocol the re-write required. The alternative (that we have now) is that the discovery protocol pushes (i.e. creates) adjacencies for each link type - this creates more adjacencies than we need. To pull, FIB now requests from the interface-type to 'complete' the adjacency. The interface can then delegate to the discovery protocol (on ethernet links) or directly build the re-write (i.e on GRE). Change-Id: I61451789ae03f26b1012d8d6524007b769b6c6ee Signed-off-by: Neale Ranns <nranns@cisco.com>
Diffstat (limited to 'vnet')
-rw-r--r--vnet/vnet/adj/adj.c197
-rw-r--r--vnet/vnet/adj/adj.h22
-rw-r--r--vnet/vnet/adj/adj_glean.c3
-rw-r--r--vnet/vnet/adj/adj_glean.h5
-rw-r--r--vnet/vnet/adj/adj_internal.h26
-rw-r--r--vnet/vnet/adj/adj_l2.c3
-rw-r--r--vnet/vnet/adj/adj_midchain.c93
-rw-r--r--vnet/vnet/adj/adj_nbr.c618
-rw-r--r--vnet/vnet/adj/adj_nbr.h60
-rw-r--r--vnet/vnet/adj/adj_rewrite.c6
-rw-r--r--vnet/vnet/adj/adj_types.h15
-rw-r--r--vnet/vnet/dhcp/client.c22
-rw-r--r--vnet/vnet/ethernet/arp.c553
-rw-r--r--vnet/vnet/ethernet/ethernet.h7
-rw-r--r--vnet/vnet/ethernet/interface.c72
-rw-r--r--vnet/vnet/fib/fib_entry.c44
-rw-r--r--vnet/vnet/fib/fib_path.c14
-rw-r--r--vnet/vnet/fib/fib_test.c29
-rw-r--r--vnet/vnet/fib/fib_types.h14
-rw-r--r--vnet/vnet/gre/gre.c243
-rw-r--r--vnet/vnet/gre/gre.h17
-rw-r--r--vnet/vnet/gre/interface.c281
-rw-r--r--vnet/vnet/hdlc/hdlc.c33
-rw-r--r--vnet/vnet/interface.c59
-rw-r--r--vnet/vnet/interface.h69
-rw-r--r--vnet/vnet/interface_funcs.h16
-rw-r--r--vnet/vnet/ip/format.h1
-rw-r--r--vnet/vnet/ip/ip4_forward.c9
-rw-r--r--vnet/vnet/ip/ip6.h4
-rw-r--r--vnet/vnet/ip/ip6_forward.c13
-rw-r--r--vnet/vnet/ip/ip6_neighbor.c307
-rw-r--r--vnet/vnet/ipsec/ipsec_if.c27
-rw-r--r--vnet/vnet/l2tp/l2tp.c18
-rw-r--r--vnet/vnet/lisp-cp/lisp_types.c32
-rw-r--r--vnet/vnet/lisp-cp/lisp_types.h2
-rw-r--r--vnet/vnet/lisp-gpe/interface.c12
-rw-r--r--vnet/vnet/lisp-gpe/lisp_gpe_adjacency.c194
-rw-r--r--vnet/vnet/lisp-gpe/lisp_gpe_adjacency.h14
-rw-r--r--vnet/vnet/lisp-gpe/lisp_gpe_fwd_entry.c20
-rw-r--r--vnet/vnet/llc/llc.c27
-rw-r--r--vnet/vnet/mpls/interface.c52
-rw-r--r--vnet/vnet/mpls/mpls.h3
-rw-r--r--vnet/vnet/mpls/mpls_output.c2
-rw-r--r--vnet/vnet/pg/stream.c22
-rw-r--r--vnet/vnet/ppp/ppp.c31
-rw-r--r--vnet/vnet/replication.c6
-rw-r--r--vnet/vnet/rewrite.c80
-rw-r--r--vnet/vnet/rewrite.h33
-rw-r--r--vnet/vnet/srp/interface.c34
-rw-r--r--vnet/vnet/unix/tapcli.c1
-rw-r--r--vnet/vnet/unix/tuntap.c1
-rw-r--r--vnet/vnet/vxlan-gpe/vxlan_gpe.c14
-rw-r--r--vnet/vnet/vxlan/vxlan.c12
53 files changed, 1968 insertions, 1524 deletions
diff --git a/vnet/vnet/adj/adj.c b/vnet/vnet/adj/adj.c
index 8f9d96ef..0bdecc6a 100644
--- a/vnet/vnet/adj/adj.c
+++ b/vnet/vnet/adj/adj.c
@@ -92,43 +92,50 @@ adj_index_is_special (adj_index_t adj_index)
u8 *
format_ip_adjacency (u8 * s, va_list * args)
{
- vnet_main_t * vnm = va_arg (*args, vnet_main_t *);
- u32 adj_index = va_arg (*args, u32);
- format_ip_adjacency_flags_t fiaf = va_arg (*args, format_ip_adjacency_flags_t);
- ip_adjacency_t * adj = adj_get(adj_index);
+ format_ip_adjacency_flags_t fiaf;
+ ip_adjacency_t * adj;
+ u32 adj_index;
+
+ adj_index = va_arg (*args, u32);
+ fiaf = va_arg (*args, format_ip_adjacency_flags_t);
+ adj = adj_get(adj_index);
- switch (adj->lookup_next_index)
- {
- case IP_LOOKUP_NEXT_REWRITE:
- s = format (s, "%U", format_adj_nbr, adj_index, 0);
- break;
- case IP_LOOKUP_NEXT_ARP:
- s = format (s, "%U", format_adj_nbr_incomplete, adj_index, 0);
- break;
- case IP_LOOKUP_NEXT_GLEAN:
- s = format (s, " %U",
- format_vnet_sw_interface_name,
- vnm,
- vnet_get_sw_interface(vnm,
- adj->rewrite_header.sw_if_index));
- break;
-
- case IP_LOOKUP_NEXT_MIDCHAIN:
- s = format (s, "%U", format_adj_midchain, adj_index, 2);
- break;
- default:
- break;
- }
- s = format (s, " index:%d", adj_index);
-
- if (fiaf & FORMAT_IP_ADJACENCY_DETAIL)
- {
- s = format (s, " locks:%d", adj->ia_node.fn_locks);
- s = format(s, "\nchildren:\n ");
- s = fib_node_children_format(adj->ia_node.fn_children, s);
- }
-
- return s;
+ switch (adj->lookup_next_index)
+ {
+ case IP_LOOKUP_NEXT_REWRITE:
+ s = format (s, "%U", format_adj_nbr, adj_index, 0);
+ break;
+ case IP_LOOKUP_NEXT_ARP:
+ s = format (s, "%U", format_adj_nbr_incomplete, adj_index, 0);
+ break;
+ case IP_LOOKUP_NEXT_GLEAN:
+ s = format (s, "%U", format_adj_glean, adj_index, 0);
+ break;
+ case IP_LOOKUP_NEXT_MIDCHAIN:
+ s = format (s, "%U", format_adj_midchain, adj_index, 2);
+ break;
+ default:
+ break;
+ }
+
+ if (fiaf & FORMAT_IP_ADJACENCY_DETAIL)
+ {
+ s = format (s, "\n locks:%d", adj->ia_node.fn_locks);
+ s = format (s, " node:[%d]:%U",
+ adj->rewrite_header.node_index,
+ format_vlib_node_name, vlib_get_main(),
+ adj->rewrite_header.node_index);
+ s = format (s, " next:[%d]:%U",
+ adj->rewrite_header.next_index,
+ format_vlib_next_node_name,
+ vlib_get_main(),
+ adj->rewrite_header.node_index,
+ adj->rewrite_header.next_index);
+ s = format(s, "\n children:\n ");
+ s = fib_node_children_format(adj->ia_node.fn_children, s);
+ }
+
+ return s;
}
/*
@@ -139,9 +146,13 @@ format_ip_adjacency (u8 * s, va_list * args)
static void
adj_last_lock_gone (ip_adjacency_t *adj)
{
+ vlib_main_t * vm = vlib_get_main();
+
ASSERT(0 == fib_node_list_get_size(adj->ia_node.fn_children));
ADJ_DBG(adj, "last-lock-gone");
+ vlib_worker_thread_barrier_sync (vm);
+
switch (adj->lookup_next_index)
{
case IP_LOOKUP_NEXT_MIDCHAIN:
@@ -168,6 +179,8 @@ adj_last_lock_gone (ip_adjacency_t *adj)
break;
}
+ vlib_worker_thread_barrier_release(vm);
+
fib_node_deinit(&adj->ia_node);
pool_put(adj_pool, adj);
}
@@ -239,6 +252,49 @@ adj_child_remove (adj_index_t adj_index,
sibling_index);
}
+/**
+ * @brief Return the link type of the adjacency
+ */
+vnet_link_t
+adj_get_link_type (adj_index_t ai)
+{
+ const ip_adjacency_t *adj;
+
+ adj = adj_get(ai);
+
+ return (adj->ia_link);
+}
+
+/**
+ * @brief Return the sw interface index of the adjacency.
+ */
+u32
+adj_get_sw_if_index (adj_index_t ai)
+{
+ const ip_adjacency_t *adj;
+
+ adj = adj_get(ai);
+
+ return (adj->rewrite_header.sw_if_index);
+}
+
+/**
+ * @brief Return the link type of the adjacency
+ */
+const u8*
+adj_get_rewrite (adj_index_t ai)
+{
+ vnet_rewrite_header_t *rw;
+ ip_adjacency_t *adj;
+
+ adj = adj_get(ai);
+ rw = &adj->rewrite_header;
+
+ ASSERT (rw->data_bytes != 0xfefe);
+
+ return (rw->data - rw->data_bytes);
+}
+
static fib_node_t *
adj_get_node (fib_node_index_t index)
{
@@ -289,7 +345,7 @@ adj_module_init (vlib_main_t * vm)
adj_midchain_module_init();
/*
- * 4 special adjs for v4 and v6 resp.
+ * one special adj to reserve index 0
*/
special_v4_miss_adj_with_index_zero = adj_alloc(FIB_PROTOCOL_IP4);
@@ -298,10 +354,73 @@ adj_module_init (vlib_main_t * vm)
VLIB_INIT_FUNCTION (adj_module_init);
+static clib_error_t *
+adj_show (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ adj_index_t ai = ADJ_INDEX_INVALID;
+ u32 sw_if_index = ~0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%d", &ai))
+ ;
+ else if (unformat (input, "%U",
+ unformat_vnet_sw_interface, vnet_get_main(),
+ &sw_if_index))
+ ;
+ else
+ break;
+ }
+
+ if (ADJ_INDEX_INVALID != ai)
+ {
+ vlib_cli_output (vm, "[@%d] %U",
+ ai,
+ format_ip_adjacency, ai,
+ FORMAT_IP_ADJACENCY_DETAIL);
+ }
+ else
+ {
+ /* *INDENT-OFF* */
+ pool_foreach_index(ai, adj_pool,
+ ({
+ if (~0 != sw_if_index &&
+ sw_if_index == adj_get_sw_if_index(ai))
+ {
+ vlib_cli_output (vm, "[@%d] %U",
+ ai,
+ format_ip_adjacency, ai,
+ FORMAT_IP_ADJACENCY_NONE);
+ }
+ }));
+ /* *INDENT-ON* */
+ }
+
+ return 0;
+}
+
+/*?
+ * Show all adjacencies.
+ * @cliexpar
+ * @cliexstart{sh adj}
+ * [@0]
+ * [@1] glean: loop0
+ * [@2] ipv4 via 1.0.0.2 loop0: IP4: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
+ * [@3] mpls via 1.0.0.2 loop0: MPLS_UNICAST: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
+ * [@4] ipv4 via 1.0.0.3 loop0: IP4: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
+ * [@5] mpls via 1.0.0.3 loop0: MPLS_UNICAST: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
+ * @cliexend
+ ?*/
+VLIB_CLI_COMMAND (adj_show_command, static) = {
+ .path = "show adj",
+ .short_help = "show adj [<adj_index>] [interface]",
+ .function = adj_show,
+};
+
/*
* DEPRECATED: DO NOT USE
- *
- * Create new block of given number of contiguous adjacencies.
*/
ip_adjacency_t *
ip_add_adjacency (ip_lookup_main_t * lm,
diff --git a/vnet/vnet/adj/adj.h b/vnet/vnet/adj/adj.h
index 002dab35..e85625db 100644
--- a/vnet/vnet/adj/adj.h
+++ b/vnet/vnet/adj/adj.h
@@ -76,6 +76,28 @@ extern void adj_child_remove(adj_index_t adj_index,
u32 sibling_index);
/**
+ * @brief Walk the Adjacencies on a given interface
+ */
+extern void adj_walk (u32 sw_if_index,
+ adj_walk_cb_t cb,
+ void *ctx);
+
+/**
+ * @brief Return the link type of the adjacency
+ */
+extern vnet_link_t adj_get_link_type (adj_index_t ai);
+
+/**
+ * @brief Return the sw interface index of the adjacency.
+ */
+extern u32 adj_get_sw_if_index (adj_index_t ai);
+
+/**
+ * @brief Return the link type of the adjacency
+ */
+extern const u8* adj_get_rewrite (adj_index_t ai);
+
+/**
* @brief
* The global adjacnecy pool. Exposed for fast/inline data-plane access
*/
diff --git a/vnet/vnet/adj/adj_glean.c b/vnet/vnet/adj/adj_glean.c
index f5d18101..290af1fd 100644
--- a/vnet/vnet/adj/adj_glean.c
+++ b/vnet/vnet/adj/adj_glean.c
@@ -188,7 +188,8 @@ format_adj_glean (u8* s, va_list *ap)
vnet_main_t * vnm = vnet_get_main();
ip_adjacency_t * adj = adj_get(index);
- return (format(s, " glean: %U",
+ return (format(s, "%U-glean: %U",
+ format_fib_protocol, adj->ia_nh_proto,
format_vnet_sw_interface_name,
vnm,
vnet_get_sw_interface(vnm,
diff --git a/vnet/vnet/adj/adj_glean.h b/vnet/vnet/adj/adj_glean.h
index ce3534ec..640bd2f9 100644
--- a/vnet/vnet/adj/adj_glean.h
+++ b/vnet/vnet/adj/adj_glean.h
@@ -48,6 +48,11 @@ extern adj_index_t adj_glean_add_or_lock(fib_protocol_t proto,
const ip46_address_t *nh_addr);
/**
+ * @brief Format/display a glean adjacency.
+ */
+extern u8* format_adj_glean(u8* s, va_list *ap);
+
+/**
* @brief
* Module initialisation
*/
diff --git a/vnet/vnet/adj/adj_internal.h b/vnet/vnet/adj/adj_internal.h
index 25a477ad..f882bff8 100644
--- a/vnet/vnet/adj/adj_internal.h
+++ b/vnet/vnet/adj/adj_internal.h
@@ -41,21 +41,21 @@
#define ADJ_DBG(_e, _fmt, _args...)
#endif
-static inline vlib_node_registration_t*
+static inline u32
adj_get_rewrite_node (fib_link_t linkt)
{
switch (linkt) {
case FIB_LINK_IP4:
- return (&ip4_rewrite_node);
+ return (ip4_rewrite_node.index);
case FIB_LINK_IP6:
- return (&ip6_rewrite_node);
+ return (ip6_rewrite_node.index);
case FIB_LINK_MPLS:
- return (&mpls_output_node);
+ return (mpls_output_node.index);
case FIB_LINK_ETHERNET:
- return (&adj_l2_rewrite_node);
+ return (adj_l2_rewrite_node.index);
}
ASSERT(0);
- return (NULL);
+ return (0);
}
static inline vnet_l3_packet_type_t
@@ -75,17 +75,17 @@ adj_fib_link_2_vnet (fib_link_t linkt)
return (0);
}
-static inline vnet_l3_packet_type_t
+static inline vnet_link_t
adj_fib_proto_2_nd (fib_protocol_t fp)
{
switch (fp)
{
case FIB_PROTOCOL_IP4:
- return (VNET_L3_PACKET_TYPE_ARP);
+ return (VNET_LINK_ARP);
case FIB_PROTOCOL_IP6:
- return (VNET_L3_PACKET_TYPE_IP6);
+ return (VNET_LINK_IP6);
case FIB_PROTOCOL_MPLS:
- return (VNET_L3_PACKET_TYPE_MPLS_UNICAST);
+ return (VNET_LINK_MPLS);
}
return (0);
}
@@ -100,6 +100,12 @@ adj_get_index (ip_adjacency_t *adj)
return (adj - adj_pool);
}
+extern void adj_nbr_update_rewrite_internal (ip_adjacency_t *adj,
+ adj_nbr_rewrite_flag_t flags,
+ u32 complete_next_index,
+ u32 next_index,
+ u8 *rewrite);
+
extern ip_adjacency_t * adj_alloc(fib_protocol_t proto);
extern void adj_nbr_remove(fib_protocol_t nh_proto,
diff --git a/vnet/vnet/adj/adj_l2.c b/vnet/vnet/adj/adj_l2.c
index cf0f0444..2bb28a20 100644
--- a/vnet/vnet/adj/adj_l2.c
+++ b/vnet/vnet/adj/adj_l2.c
@@ -32,11 +32,10 @@ format_adj_l2_trace (u8 * s, va_list * args)
CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
adj_l2_trace_t * t = va_arg (*args, adj_l2_trace_t *);
- vnet_main_t * vnm = vnet_get_main();
s = format (s, "adj-idx %d : %U",
t->adj_index,
- format_ip_adjacency, vnm, t->adj_index, FORMAT_IP_ADJACENCY_NONE);
+ format_ip_adjacency, t->adj_index, FORMAT_IP_ADJACENCY_NONE);
return s;
}
diff --git a/vnet/vnet/adj/adj_midchain.c b/vnet/vnet/adj/adj_midchain.c
index c40d4e8b..f42e3a90 100644
--- a/vnet/vnet/adj/adj_midchain.c
+++ b/vnet/vnet/adj/adj_midchain.c
@@ -130,7 +130,7 @@ format_adj_midchain_tx_trace (u8 * s, va_list * args)
adj_midchain_tx_trace_t *tr = va_arg (*args, adj_midchain_tx_trace_t*);
s = format(s, "adj-midchain:[%d]:%U", tr->ai,
- format_ip_adjacency, vnet_get_main(), tr->ai,
+ format_ip_adjacency, tr->ai,
FORMAT_IP_ADJACENCY_NONE);
return (s);
@@ -294,7 +294,17 @@ adj_nbr_midchain_update_rewrite (adj_index_t adj_index,
ASSERT(ADJ_INDEX_INVALID != adj_index);
adj = adj_get(adj_index);
- adj->lookup_next_index = IP_LOOKUP_NEXT_MIDCHAIN;
+
+ /*
+ * one time only update. since we don't support chainging the tunnel
+ * src,dst, this is all we need.
+ */
+ ASSERT(adj->lookup_next_index == IP_LOOKUP_NEXT_ARP);
+ /*
+ * tunnels can always provide a rewrite.
+ */
+ ASSERT(NULL != rewrite);
+
adj->sub_type.midchain.fixup_func = fixup;
cm = adj_midchain_get_cofing_for_link_type(adj);
@@ -334,69 +344,26 @@ adj_nbr_midchain_update_rewrite (adj_index_t adj_index,
cm->config_index_by_sw_if_index[adj->rewrite_header.sw_if_index] = ci;
- if (NULL != rewrite)
- {
- /*
- * new rewrite provided.
- * use a dummy rewrite header to get the interface to print into.
- */
- ip_adjacency_t dummy;
- dpo_id_t tmp = DPO_NULL;
-
- vnet_rewrite_for_tunnel(vnet_get_main(),
- adj->rewrite_header.sw_if_index,
- adj_get_midchain_node(adj->ia_link),
- adj->sub_type.midchain.tx_function_node,
- &dummy.rewrite_header,
- rewrite,
- vec_len(rewrite));
-
- /*
- * this is an update of an existing rewrite.
- * packets are in flight. we'll need to briefly stack on the drop DPO
- * whilst the rewrite is written, so any packets that see the partial update
- * are binned.
- */
- if (!dpo_id_is_valid(&adj->sub_type.midchain.next_dpo))
- {
- /*
- * not stacked yet. stack on the drop
- */
- dpo_stack(DPO_ADJACENCY_MIDCHAIN,
- fib_link_to_dpo_proto(adj->ia_link),
- &adj->sub_type.midchain.next_dpo,
- drop_dpo_get(fib_link_to_dpo_proto(adj->ia_link)));
- }
-
- dpo_copy(&tmp, &adj->sub_type.midchain.next_dpo);
- dpo_stack(DPO_ADJACENCY_MIDCHAIN,
- fib_link_to_dpo_proto(adj->ia_link),
- &adj->sub_type.midchain.next_dpo,
- drop_dpo_get(fib_link_to_dpo_proto(adj->ia_link)));
-
- CLIB_MEMORY_BARRIER();
- clib_memcpy(&adj->rewrite_header,
- &dummy.rewrite_header,
- VLIB_BUFFER_PRE_DATA_SIZE);
-
- CLIB_MEMORY_BARRIER();
+ /*
+ * stack the midchain on the drop so it's ready to forward in the adj-midchain-tx.
+ * The graph arc used/created here is from the midchain-tx node to the
+ * child's registered node. This is because post adj processing the next
+ * node are any output features, then the midchain-tx. from there we
+ * need to get to the stacked child's node.
+ */
+ dpo_stack_from_node(adj->sub_type.midchain.tx_function_node,
+ &adj->sub_type.midchain.next_dpo,
+ drop_dpo_get(fib_link_to_dpo_proto(adj->ia_link)));
- /*
- * The graph arc used/created here is from the midchain-tx node to the
- * child's registered node. This is because post adj processing the next
- * node are any output features, then the midchain-tx. from there we
- * need to get to the stacked child's node.
- */
- dpo_stack_from_node(adj->sub_type.midchain.tx_function_node,
- &adj->sub_type.midchain.next_dpo,
- &tmp);
- dpo_reset(&tmp);
- }
- else
- {
- ASSERT(0);
- }
+ /*
+ * update the rewirte with the workers paused.
+ */
+ adj_nbr_update_rewrite_internal(adj,
+ IP_LOOKUP_NEXT_MIDCHAIN,
+ adj_get_midchain_node(adj->ia_link),
+ adj->sub_type.midchain.tx_function_node,
+ rewrite);
/*
* time for walkies fido.
diff --git a/vnet/vnet/adj/adj_nbr.c b/vnet/vnet/adj/adj_nbr.c
index 23e40a68..5351520e 100644
--- a/vnet/vnet/adj/adj_nbr.c
+++ b/vnet/vnet/adj/adj_nbr.c
@@ -115,152 +115,19 @@ adj_nbr_find (fib_protocol_t nh_proto,
}
}
-static inline vlib_node_registration_t*
+static inline u32
adj_get_nd_node (fib_protocol_t proto)
{
switch (proto) {
case FIB_PROTOCOL_IP4:
- return (&ip4_arp_node);
+ return (ip4_arp_node.index);
case FIB_PROTOCOL_IP6:
- return (&ip6_discover_neighbor_node);
+ return (ip6_discover_neighbor_node.index);
case FIB_PROTOCOL_MPLS:
break;
}
ASSERT(0);
- return (NULL);
-}
-
-static void
-adj_ip4_nbr_probe (ip_adjacency_t *adj)
-{
- vnet_main_t * vnm = vnet_get_main();
- ip4_main_t * im = &ip4_main;
- ip_interface_address_t * ia;
- ethernet_arp_header_t * h;
- vnet_hw_interface_t * hi;
- vnet_sw_interface_t * si;
- ip4_address_t * src;
- vlib_buffer_t * b;
- vlib_main_t * vm;
- u32 bi = 0;
-
- vm = vlib_get_main();
-
- si = vnet_get_sw_interface (vnm,
- adj->rewrite_header.sw_if_index);
-
- if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
- {
- return;
- }
-
- src =
- ip4_interface_address_matching_destination(im,
- &adj->sub_type.nbr.next_hop.ip4,
- adj->rewrite_header.sw_if_index,
- &ia);
- if (! src)
- {
- return;
- }
-
- h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi);
-
- hi = vnet_get_sup_hw_interface (vnm, adj->rewrite_header.sw_if_index);
-
- clib_memcpy (h->ip4_over_ethernet[0].ethernet,
- hi->hw_address,
- sizeof (h->ip4_over_ethernet[0].ethernet));
-
- h->ip4_over_ethernet[0].ip4 = src[0];
- h->ip4_over_ethernet[1].ip4 = adj->sub_type.nbr.next_hop.ip4;
-
- b = vlib_get_buffer (vm, bi);
- vnet_buffer (b)->sw_if_index[VLIB_RX] =
- vnet_buffer (b)->sw_if_index[VLIB_TX] =
- adj->rewrite_header.sw_if_index;
-
- /* Add encapsulation string for software interface (e.g. ethernet header). */
- vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
- vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
-
- {
- vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
- u32 * to_next = vlib_frame_vector_args (f);
- to_next[0] = bi;
- f->n_vectors = 1;
- vlib_put_frame_to_node (vm, hi->output_node_index, f);
- }
-}
-
-static void
-adj_ip6_nbr_probe (ip_adjacency_t *adj)
-{
- icmp6_neighbor_solicitation_header_t * h;
- vnet_main_t * vnm = vnet_get_main();
- ip6_main_t * im = &ip6_main;
- ip_interface_address_t * ia;
- ip6_address_t * dst, *src;
- vnet_hw_interface_t * hi;
- vnet_sw_interface_t * si;
- vlib_buffer_t * b;
- int bogus_length;
- vlib_main_t * vm;
- u32 bi = 0;
-
- vm = vlib_get_main();
-
- si = vnet_get_sw_interface(vnm, adj->rewrite_header.sw_if_index);
- dst = &adj->sub_type.nbr.next_hop.ip6;
-
- if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
- {
- return;
- }
- src = ip6_interface_address_matching_destination(im, dst,
- adj->rewrite_header.sw_if_index,
- &ia);
- if (! src)
- {
- return;
- }
-
- h = vlib_packet_template_get_packet(vm,
- &im->discover_neighbor_packet_template,
- &bi);
-
- hi = vnet_get_sup_hw_interface(vnm, adj->rewrite_header.sw_if_index);
-
- h->ip.dst_address.as_u8[13] = dst->as_u8[13];
- h->ip.dst_address.as_u8[14] = dst->as_u8[14];
- h->ip.dst_address.as_u8[15] = dst->as_u8[15];
- h->ip.src_address = src[0];
- h->neighbor.target_address = dst[0];
-
- clib_memcpy (h->link_layer_option.ethernet_address,
- hi->hw_address,
- vec_len(hi->hw_address));
-
- h->neighbor.icmp.checksum =
- ip6_tcp_udp_icmp_compute_checksum(vm, 0, &h->ip, &bogus_length);
- ASSERT(bogus_length == 0);
-
- b = vlib_get_buffer (vm, bi);
- vnet_buffer (b)->sw_if_index[VLIB_RX] =
- vnet_buffer (b)->sw_if_index[VLIB_TX] =
- adj->rewrite_header.sw_if_index;
-
- /* Add encapsulation string for software interface (e.g. ethernet header). */
- vnet_rewrite_one_header(adj[0], h, sizeof (ethernet_header_t));
- vlib_buffer_advance(b, -adj->rewrite_header.data_bytes);
-
- {
- vlib_frame_t * f = vlib_get_frame_to_node(vm, hi->output_node_index);
- u32 * to_next = vlib_frame_vector_args(f);
- to_next[0] = bi;
- f->n_vectors = 1;
- vlib_put_frame_to_node(vm, hi->output_node_index, f);
- }
+ return (ip4_arp_node.index);
}
static ip_adjacency_t*
@@ -285,6 +152,7 @@ adj_nbr_alloc (fib_protocol_t nh_proto,
adj->sub_type.nbr.next_hop = *nh_addr;
adj->ia_link = link_type;
adj->ia_nh_proto = nh_proto;
+ adj->rewrite_header.sw_if_index = sw_if_index;
memset(&adj->sub_type.midchain.next_dpo, 0,
sizeof(adj->sub_type.midchain.next_dpo));
@@ -300,8 +168,6 @@ adj_nbr_alloc (fib_protocol_t nh_proto,
* - the Next-hops protocol (i.e. v4 or v6)
* - the address of the next-hop
* - the interface the next-hop is reachable through
- * - fib_index; this is broken. i will fix it.
- * the adj lookup currently occurs in the FIB.
*/
adj_index_t
adj_nbr_add_or_lock (fib_protocol_t nh_proto,
@@ -316,55 +182,31 @@ adj_nbr_add_or_lock (fib_protocol_t nh_proto,
if (ADJ_INDEX_INVALID == adj_index)
{
+ vnet_main_t *vnm;
+
+ vnm = vnet_get_main();
adj = adj_nbr_alloc(nh_proto, link_type, nh_addr, sw_if_index);
+ adj_index = adj_get_index(adj);
+ adj_lock(adj_index);
+
+ vnet_rewrite_init(vnm, sw_if_index,
+ adj_get_nd_node(nh_proto),
+ vnet_tx_node_index_for_sw_interface(vnm, sw_if_index),
+ &adj->rewrite_header);
/*
- * If there is no next-hop, this is the 'auto-adj' used on p2p
- * links instead of a glean.
+ * we need a rewrite where the destination IP address is converted
+ * to the appropriate link-layer address. This is interface specific.
+ * So ask the interface to do it.
*/
- if (ip46_address_is_zero(nh_addr))
- {
- adj->lookup_next_index = IP_LOOKUP_NEXT_REWRITE;
-
- vnet_rewrite_for_sw_interface(vnet_get_main(),
- adj_fib_link_2_vnet(link_type),
- sw_if_index,
- adj_get_rewrite_node(link_type)->index,
- VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST,
- &adj->rewrite_header,
- sizeof (adj->rewrite_data));
- }
- else
- {
- vnet_rewrite_for_sw_interface(vnet_get_main(),
- adj_fib_proto_2_nd(nh_proto),
- sw_if_index,
- adj_get_nd_node(nh_proto)->index,
- VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST,
- &adj->rewrite_header,
- sizeof (adj->rewrite_data));
-
- switch (nh_proto)
- {
- case FIB_PROTOCOL_IP4:
- adj_ip4_nbr_probe(adj);
- break;
- case FIB_PROTOCOL_IP6:
- adj_ip6_nbr_probe(adj);
- break;
- case FIB_PROTOCOL_MPLS:
- break;
- }
- }
+ vnet_update_adjacency_for_sw_interface(vnm, sw_if_index, adj_index);
}
else
{
- adj = adj_get(adj_index);
+ adj_lock(adj_index);
}
- adj_lock(adj_get_index(adj));
-
- return (adj_get_index(adj));
+ return (adj_index);
}
adj_index_t
@@ -390,7 +232,9 @@ adj_nbr_add_or_lock_w_rewrite (fib_protocol_t nh_proto,
}
adj_lock(adj_get_index(adj));
- adj_nbr_update_rewrite(adj_get_index(adj), rewrite);
+ adj_nbr_update_rewrite(adj_get_index(adj),
+ ADJ_NBR_REWRITE_FLAG_COMPLETE,
+ rewrite);
return (adj_get_index(adj));
}
@@ -404,86 +248,134 @@ adj_nbr_add_or_lock_w_rewrite (fib_protocol_t nh_proto,
*/
void
adj_nbr_update_rewrite (adj_index_t adj_index,
+ adj_nbr_rewrite_flag_t flags,
u8 *rewrite)
{
ip_adjacency_t *adj;
+ u32 old_next;
ASSERT(ADJ_INDEX_INVALID != adj_index);
adj = adj_get(adj_index);
+ old_next = adj->lookup_next_index;
- if (NULL != rewrite)
+ if (flags & ADJ_NBR_REWRITE_FLAG_COMPLETE)
{
/*
- * new rewrite provided.
- * use a dummy rewrite header to get the interface to print into.
- */
- ip_adjacency_t dummy;
-
- vnet_rewrite_for_sw_interface(vnet_get_main(),
- adj_fib_link_2_vnet(adj->ia_link),
- adj->rewrite_header.sw_if_index,
- adj_get_rewrite_node(adj->ia_link)->index,
- rewrite,
- &dummy.rewrite_header,
- sizeof (dummy.rewrite_data));
-
- if (IP_LOOKUP_NEXT_REWRITE == adj->lookup_next_index)
- {
- /*
- * this is an update of an existing rewrite.
- * we can't just paste in the new rewrite as that is not atomic.
- * So we briefly swap the ADJ to ARP type, paste, then swap back.
- */
- adj->lookup_next_index = IP_LOOKUP_NEXT_ARP;
- CLIB_MEMORY_BARRIER();
- }
- /*
- * else
- * this is the first time the rewrite is added.
- * paste it on then swap the next type.
+ * update the adj's rewrite string and build the arc
+ * from the rewrite node to the interface's TX node
*/
- clib_memcpy(&adj->rewrite_header,
- &dummy.rewrite_header,
- VLIB_BUFFER_PRE_DATA_SIZE);
-
- adj->lookup_next_index = IP_LOOKUP_NEXT_REWRITE;
+ adj_nbr_update_rewrite_internal(adj, IP_LOOKUP_NEXT_REWRITE,
+ adj_get_rewrite_node(adj->ia_link),
+ vnet_tx_node_index_for_sw_interface(
+ vnet_get_main(),
+ adj->rewrite_header.sw_if_index),
+ rewrite);
}
else
{
+ adj_nbr_update_rewrite_internal(adj, IP_LOOKUP_NEXT_ARP,
+ adj_get_nd_node(adj->ia_nh_proto),
+ vnet_tx_node_index_for_sw_interface(
+ vnet_get_main(),
+ adj->rewrite_header.sw_if_index),
+ rewrite);
+ }
+
+ if (old_next != adj->lookup_next_index)
+ {
/*
- * clear the rewrite.
+ * time for walkies fido.
+ * The link type MPLS Adj never has children. So if it is this adj
+ * that is updated, we need to walk from its IP sibling.
*/
- adj->lookup_next_index = IP_LOOKUP_NEXT_ARP;
- CLIB_MEMORY_BARRIER();
+ if (FIB_LINK_MPLS == adj->ia_link)
+ {
+ adj_index = adj_nbr_find(adj->ia_nh_proto,
+ fib_proto_to_link(adj->ia_nh_proto),
+ &adj->sub_type.nbr.next_hop,
+ adj->rewrite_header.sw_if_index);
+
+ ASSERT(ADJ_INDEX_INVALID != adj_index);
+ }
- adj->rewrite_header.data_bytes = 0;
+ fib_node_back_walk_ctx_t bw_ctx = {
+ .fnbw_reason = FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE,
+ /*
+ * This walk only needs to go back one level, but there is no control
+ * here. the first receiving fib_entry_t will quash the walk
+ */
+ };
+
+ fib_walk_sync(FIB_NODE_TYPE_ADJ, adj_index, &bw_ctx);
}
+}
+
+/**
+ * adj_nbr_update_rewrite_internal
+ *
+ * Update the adjacency's rewrite string. A NULL string implies the
+ * rewirte is reset (i.e. when ARP/ND etnry is gone).
+ * NB: the adj being updated may be handling traffic in the DP.
+ */
+void
+adj_nbr_update_rewrite_internal (ip_adjacency_t *adj,
+ u32 adj_next_index,
+ u32 this_node,
+ u32 next_node,
+ u8 *rewrite)
+{
+ vlib_main_t * vm = vlib_get_main();
/*
- * time for walkies fido.
- * The link type MPLS Adj never has children. So if it is this adj
- * that is updated, we need to walk from its IP sibling.
+ * Updating a rewrite string is not atomic;
+ * - the rewrite string is too long to write in one instruction
+ * - when swapping from incomplete to complete, we also need to update
+ * the VLIB graph next-index.
+ * ideally we would only want to suspend forwarding via this adj whilst we
+ * do this, but we do not have that level of granularity - it's suspend all
+ * worker threads or nothing.
+ * The other chioces are:
+ * - to mark the adj down and back walk so child load-balances drop this adj
+ * from the set.
+ * - update the next_node index of this adj to point to error-drop
+ * both of which will mean for MAC change we will drop for this adj
+ * which is not acceptable.
+ * So the pause all threads is preferable. We don't update MAC addresses often
+ * so it's no big deal.
*/
- if (FIB_LINK_MPLS == adj->ia_link)
- {
- adj_index = adj_nbr_find(adj->ia_nh_proto,
- fib_proto_to_link(adj->ia_nh_proto),
- &adj->sub_type.nbr.next_hop,
- adj->rewrite_header.sw_if_index);
+ vlib_worker_thread_barrier_sync(vm);
- ASSERT(ADJ_INDEX_INVALID != adj_index);
- }
+ adj->lookup_next_index = adj_next_index;
- fib_node_back_walk_ctx_t bw_ctx = {
- .fnbw_reason = FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE,
+ if (NULL != rewrite)
+ {
/*
- * This walk only needs to go back one level, but there is no control here.
- * the first receiving fib_entry_t will quash the walk
+ * new rewrite provided.
+ * fill in the adj's rewrite string, and build the VLIB graph arc.
*/
- };
+ vnet_rewrite_set_data_internal(&adj->rewrite_header,
+ sizeof(adj->rewrite_data),
+ rewrite,
+ vec_len(rewrite));
- fib_walk_sync(FIB_NODE_TYPE_ADJ, adj_index, &bw_ctx);
+ adj->rewrite_header.node_index = this_node;
+ adj->rewrite_header.next_index = vlib_node_add_next (vlib_get_main(),
+ this_node,
+ next_node);
+
+ vec_free(rewrite);
+ }
+ else
+ {
+ vnet_rewrite_clear_data_internal(&adj->rewrite_header,
+ sizeof(adj->rewrite_data));
+ }
+
+ /*
+ * done with the rewirte update - let the workers loose.
+ */
+ vlib_worker_thread_barrier_release(vm);
}
typedef struct adj_db_count_ctx_t_ {
@@ -524,6 +416,152 @@ adj_nbr_db_size (void)
}
/**
+ * @brief Context for a walk of the adjacency neighbour DB
+ */
+typedef struct adj_walk_ctx_t_
+{
+ adj_walk_cb_t awc_cb;
+ void *awc_ctx;
+} adj_walk_ctx_t;
+
+static void
+adj_nbr_walk_cb (BVT(clib_bihash_kv) * kvp,
+ void *arg)
+{
+ adj_walk_ctx_t *ctx = arg;
+
+ // FIXME: can't stop early...
+ ctx->awc_cb(kvp->value, ctx->awc_ctx);
+}
+
+void
+adj_nbr_walk (u32 sw_if_index,
+ fib_protocol_t adj_nh_proto,
+ adj_walk_cb_t cb,
+ void *ctx)
+{
+ if (!ADJ_NBR_ITF_OK(adj_nh_proto, sw_if_index))
+ return;
+
+ adj_walk_ctx_t awc = {
+ .awc_ctx = ctx,
+ .awc_cb = cb,
+ };
+
+ BV(clib_bihash_foreach_key_value_pair) (
+ adj_nbr_tables[adj_nh_proto][sw_if_index],
+ adj_nbr_walk_cb,
+ &awc);
+}
+
+/**
+ * @brief Context for a walk of the adjacency neighbour DB
+ */
+typedef struct adj_walk_nh_ctx_t_
+{
+ adj_walk_cb_t awc_cb;
+ void *awc_ctx;
+ const ip46_address_t *awc_nh;
+} adj_walk_nh_ctx_t;
+
+static void
+adj_nbr_walk_nh_cb (BVT(clib_bihash_kv) * kvp,
+ void *arg)
+{
+ ip_adjacency_t *adj;
+ adj_walk_nh_ctx_t *ctx = arg;
+
+ adj = adj_get(kvp->value);
+
+ if (!ip46_address_cmp(&adj->sub_type.nbr.next_hop, ctx->awc_nh))
+ ctx->awc_cb(kvp->value, ctx->awc_ctx);
+}
+
+/**
+ * @brief Walk adjacencies on a link with a given v4 next-hop.
+ * that is visit the adjacencies with different link types.
+ */
+void
+adj_nbr_walk_nh4 (u32 sw_if_index,
+ const ip4_address_t *addr,
+ adj_walk_cb_t cb,
+ void *ctx)
+{
+ if (!ADJ_NBR_ITF_OK(FIB_PROTOCOL_IP4, sw_if_index))
+ return;
+
+ ip46_address_t nh = {
+ .ip4 = *addr,
+ };
+
+ adj_walk_nh_ctx_t awc = {
+ .awc_ctx = ctx,
+ .awc_cb = cb,
+ .awc_nh = &nh,
+ };
+
+ BV(clib_bihash_foreach_key_value_pair) (
+ adj_nbr_tables[FIB_PROTOCOL_IP4][sw_if_index],
+ adj_nbr_walk_nh_cb,
+ &awc);
+}
+
+/**
+ * @brief Walk adjacencies on a link with a given v6 next-hop.
+ * that is visit the adjacencies with different link types.
+ */
+void
+adj_nbr_walk_nh6 (u32 sw_if_index,
+ const ip6_address_t *addr,
+ adj_walk_cb_t cb,
+ void *ctx)
+{
+ if (!ADJ_NBR_ITF_OK(FIB_PROTOCOL_IP6, sw_if_index))
+ return;
+
+ ip46_address_t nh = {
+ .ip6 = *addr,
+ };
+
+ adj_walk_nh_ctx_t awc = {
+ .awc_ctx = ctx,
+ .awc_cb = cb,
+ .awc_nh = &nh,
+ };
+
+ BV(clib_bihash_foreach_key_value_pair) (
+ adj_nbr_tables[FIB_PROTOCOL_IP6][sw_if_index],
+ adj_nbr_walk_nh_cb,
+ &awc);
+}
+
+/**
+ * @brief Walk adjacencies on a link with a given next-hop.
+ * that is visit the adjacencies with different link types.
+ */
+void
+adj_nbr_walk_nh (u32 sw_if_index,
+ fib_protocol_t adj_nh_proto,
+ const ip46_address_t *nh,
+ adj_walk_cb_t cb,
+ void *ctx)
+{
+ if (!ADJ_NBR_ITF_OK(adj_nh_proto, sw_if_index))
+ return;
+
+ adj_walk_nh_ctx_t awc = {
+ .awc_ctx = ctx,
+ .awc_cb = cb,
+ .awc_nh = nh,
+ };
+
+ BV(clib_bihash_foreach_key_value_pair) (
+ adj_nbr_tables[adj_nh_proto][sw_if_index],
+ adj_nbr_walk_nh_cb,
+ &awc);
+}
+
+/**
* Context for the state change walk of the DB
*/
typedef struct adj_nbr_interface_state_change_ctx_t_
@@ -534,8 +572,8 @@ typedef struct adj_nbr_interface_state_change_ctx_t_
int flags;
} adj_nbr_interface_state_change_ctx_t;
-static void
-adj_nbr_interface_state_change_one (BVT(clib_bihash_kv) * kvp,
+static adj_walk_rc_t
+adj_nbr_interface_state_change_one (adj_index_t ai,
void *arg)
{
/*
@@ -550,7 +588,9 @@ adj_nbr_interface_state_change_one (BVT(clib_bihash_kv) * kvp,
FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN),
};
- fib_walk_sync(FIB_NODE_TYPE_ADJ, kvp->value, &bw_ctx);
+ fib_walk_sync(FIB_NODE_TYPE_ADJ, ai, &bw_ctx);
+
+ return (ADJ_WALK_RC_CONTINUE);
}
static clib_error_t *
@@ -565,17 +605,13 @@ adj_nbr_interface_state_change (vnet_main_t * vnm,
*/
for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
{
- if (!ADJ_NBR_ITF_OK(proto, sw_if_index))
- continue;
-
adj_nbr_interface_state_change_ctx_t ctx = {
.flags = flags,
};
- BV(clib_bihash_foreach_key_value_pair) (
- adj_nbr_tables[proto][sw_if_index],
- adj_nbr_interface_state_change_one,
- &ctx);
+ adj_nbr_walk(sw_if_index, proto,
+ adj_nbr_interface_state_change_one,
+ &ctx);
}
return (NULL);
@@ -583,8 +619,8 @@ adj_nbr_interface_state_change (vnet_main_t * vnm,
VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION(adj_nbr_interface_state_change);
-static void
-adj_nbr_interface_delete_one (BVT(clib_bihash_kv) * kvp,
+static adj_walk_rc_t
+adj_nbr_interface_delete_one (adj_index_t ai,
void *arg)
{
/*
@@ -595,7 +631,9 @@ adj_nbr_interface_delete_one (BVT(clib_bihash_kv) * kvp,
.fnbw_reason = FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE,
};
- fib_walk_sync(FIB_NODE_TYPE_ADJ, kvp->value, &bw_ctx);
+ fib_walk_sync(FIB_NODE_TYPE_ADJ, ai, &bw_ctx);
+
+ return (ADJ_WALK_RC_CONTINUE);
}
/**
@@ -630,13 +668,9 @@ adj_nbr_interface_add_del (vnet_main_t * vnm,
for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
{
- if (!ADJ_NBR_ITF_OK(proto, sw_if_index))
- continue;
-
- BV(clib_bihash_foreach_key_value_pair) (
- adj_nbr_tables[proto][sw_if_index],
- adj_nbr_interface_delete_one,
- NULL);
+ adj_nbr_walk(sw_if_index, proto,
+ adj_nbr_interface_delete_one,
+ NULL);
}
return (NULL);
@@ -646,15 +680,16 @@ adj_nbr_interface_add_del (vnet_main_t * vnm,
VNET_SW_INTERFACE_ADD_DEL_FUNCTION(adj_nbr_interface_add_del);
-static void
-adj_nbr_show_one (BVT(clib_bihash_kv) * kvp,
+static adj_walk_rc_t
+adj_nbr_show_one (adj_index_t ai,
void *arg)
{
vlib_cli_output (arg, "[@%d] %U",
- kvp->value,
- format_ip_adjacency,
- vnet_get_main(), kvp->value,
+ ai,
+ format_ip_adjacency, ai,
FORMAT_IP_ADJACENCY_NONE);
+
+ return (ADJ_WALK_RC_CONTINUE);
}
static clib_error_t *
@@ -663,11 +698,16 @@ adj_nbr_show (vlib_main_t * vm,
vlib_cli_command_t * cmd)
{
adj_index_t ai = ADJ_INDEX_INVALID;
+ u32 sw_if_index = ~0;
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
if (unformat (input, "%d", &ai))
;
+ else if (unformat (input, "%U",
+ unformat_vnet_sw_interface, vnet_get_main(),
+ &sw_if_index))
+ ;
else
break;
}
@@ -676,28 +716,31 @@ adj_nbr_show (vlib_main_t * vm,
{
vlib_cli_output (vm, "[@%d] %U",
ai,
-
- format_ip_adjacency,
- vnet_get_main(), ai,
+ format_ip_adjacency, ai,
FORMAT_IP_ADJACENCY_DETAIL);
}
- else
+ else if (~0 != sw_if_index)
{
fib_protocol_t proto;
for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
{
- u32 sw_if_index;
+ adj_nbr_walk(sw_if_index, proto,
+ adj_nbr_show_one,
+ vm);
+ }
+ }
+ else
+ {
+ fib_protocol_t proto;
+ for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
+ {
vec_foreach_index(sw_if_index, adj_nbr_tables[proto])
{
- if (!ADJ_NBR_ITF_OK(proto, sw_if_index))
- continue;
-
- BV(clib_bihash_foreach_key_value_pair) (
- adj_nbr_tables[proto][sw_if_index],
- adj_nbr_show_one,
- vm);
+ adj_nbr_walk(sw_if_index, proto,
+ adj_nbr_show_one,
+ vm);
}
}
}
@@ -705,12 +748,37 @@ adj_nbr_show (vlib_main_t * vm,
return 0;
}
+/*?
+ * Show all neighbour adjacencies.
+ * @cliexpar
+ * @cliexstart{sh adj nbr}
+ * [@2] ipv4 via 1.0.0.2 loop0: IP4: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
+ * [@3] mpls via 1.0.0.2 loop0: MPLS_UNICAST: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
+ * [@4] ipv4 via 1.0.0.3 loop0: IP4: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
+ * [@5] mpls via 1.0.0.3 loop0: MPLS_UNICAST: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
+ * @cliexend
+ ?*/
VLIB_CLI_COMMAND (ip4_show_fib_command, static) = {
.path = "show adj nbr",
- .short_help = "show adj nbr [<adj_index>] [sw_if_index <index>]",
+ .short_help = "show adj nbr [<adj_index>] [interface]",
.function = adj_nbr_show,
};
+static ip46_type_t
+adj_proto_to_46 (fib_protocol_t proto)
+{
+ switch (proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ return (IP46_TYPE_IP4);
+ case FIB_PROTOCOL_IP6:
+ return (IP46_TYPE_IP6);
+ default:
+ return (IP46_TYPE_IP4);
+ }
+ return (IP46_TYPE_IP4);
+}
+
u8*
format_adj_nbr_incomplete (u8* s, va_list *ap)
{
@@ -721,7 +789,8 @@ format_adj_nbr_incomplete (u8* s, va_list *ap)
s = format (s, "arp-%U", format_fib_link, adj->ia_link);
s = format (s, ": via %U",
- format_ip46_address, &adj->sub_type.nbr.next_hop, IP46_TYPE_ANY);
+ format_ip46_address, &adj->sub_type.nbr.next_hop,
+ adj_proto_to_46(adj->ia_nh_proto));
s = format (s, " %U",
format_vnet_sw_interface_name,
vnm,
@@ -741,7 +810,8 @@ format_adj_nbr (u8* s, va_list *ap)
s = format (s, "%U", format_fib_link, adj->ia_link);
s = format (s, " via %U ",
- format_ip46_address, &adj->sub_type.nbr.next_hop, IP46_TYPE_ANY);
+ format_ip46_address, &adj->sub_type.nbr.next_hop,
+ adj_proto_to_46(adj->ia_nh_proto));
s = format (s, "%U",
format_vnet_rewrite,
vnm->vlib_main, &adj->rewrite_header, sizeof (adj->rewrite_data), 0);
diff --git a/vnet/vnet/adj/adj_nbr.h b/vnet/vnet/adj/adj_nbr.h
index 331423bd..39663b60 100644
--- a/vnet/vnet/adj/adj_nbr.h
+++ b/vnet/vnet/adj/adj_nbr.h
@@ -75,6 +75,28 @@ extern adj_index_t adj_nbr_add_or_lock_w_rewrite(fib_protocol_t nh_proto,
const ip46_address_t *nh_addr,
u32 sw_if_index,
u8 *rewrite);
+/**
+ * @brief When adding a rewrite to an adjacency these are flags that
+ * apply to that rewrite
+ */
+typedef enum adj_nbr_rewrite_flag_t_
+{
+ ADJ_NBR_REWRITE_FLAG_NONE,
+
+ /**
+ * An indication that the rewrite is incomplete, i.e. that it describes the
+ * ARP/ND rewrite when probing.
+ */
+ ADJ_NBR_REWRITE_FLAG_INCOMPLETE = ADJ_NBR_REWRITE_FLAG_NONE,
+
+ /**
+ * An indication that the rewrite is complete, i.e. that it fully describes
+ * the link-layer addressing for the desintation.
+ * The opposite of this is an incomplete rewrite that describes the ARP/ND
+ * rewrite when probing.
+ */
+ ADJ_NBR_REWRITE_FLAG_COMPLETE = (1 << 0),
+} adj_nbr_rewrite_flag_t;
/**
* @brief
@@ -87,6 +109,7 @@ extern adj_index_t adj_nbr_add_or_lock_w_rewrite(fib_protocol_t nh_proto,
* The new rewrite
*/
extern void adj_nbr_update_rewrite(adj_index_t adj_index,
+ adj_nbr_rewrite_flag_t flags,
u8 *rewrite);
/**
@@ -102,6 +125,43 @@ extern u8* format_adj_nbr_incomplete(u8* s, va_list *ap);
extern u8* format_adj_nbr(u8* s, va_list *ap);
/**
+ * @brief Walk the neighbour Adjacencies on a given interface
+ */
+extern void adj_nbr_walk (u32 sw_if_index,
+ fib_protocol_t adj_nh_proto,
+ adj_walk_cb_t cb,
+ void *ctx);
+/**
+ * @brief Walk the neighbour Adjacencies on a given interface with a given next-hop
+ */
+void
+adj_nbr_walk_nh (u32 sw_if_index,
+ fib_protocol_t adj_nh_proto,
+ const ip46_address_t *nh,
+ adj_walk_cb_t cb,
+ void *ctx);
+
+/**
+ * @brief Walk adjacencies on a link with a given v4 next-hop.
+ * that is visit the adjacencies with different link types.
+ */
+void
+adj_nbr_walk_nh4 (u32 sw_if_index,
+ const ip4_address_t *addr,
+ adj_walk_cb_t cb,
+ void *ctx);
+
+/**
+ * @brief Walk adjacencies on a link with a given v6 next-hop.
+ * that is visit the adjacencies with different link types.
+ */
+void
+adj_nbr_walk_nh6 (u32 sw_if_index,
+ const ip6_address_t *addr,
+ adj_walk_cb_t cb,
+ void *ctx);
+
+/**
* @brief
* Module initialisation
*/
diff --git a/vnet/vnet/adj/adj_rewrite.c b/vnet/vnet/adj/adj_rewrite.c
index eb93f6a4..046fff44 100644
--- a/vnet/vnet/adj/adj_rewrite.c
+++ b/vnet/vnet/adj/adj_rewrite.c
@@ -32,15 +32,17 @@ adj_rewrite_add_and_lock (fib_protocol_t nh_proto,
adj = adj_alloc(nh_proto);
adj->lookup_next_index = IP_LOOKUP_NEXT_REWRITE;
+ memset(&adj->sub_type.nbr.next_hop, 0, sizeof(adj->sub_type.nbr.next_hop));
adj->ia_link = link_type;
+ adj->ia_nh_proto = nh_proto;
adj->rewrite_header.sw_if_index = sw_if_index;
ASSERT(NULL != rewrite);
vnet_rewrite_for_sw_interface(vnet_get_main(),
- adj_fib_link_2_vnet(link_type),
+ link_type,
adj->rewrite_header.sw_if_index,
- adj_get_rewrite_node(link_type)->index,
+ adj_get_rewrite_node(link_type),
rewrite,
&adj->rewrite_header,
sizeof (adj->rewrite_data));
diff --git a/vnet/vnet/adj/adj_types.h b/vnet/vnet/adj/adj_types.h
index a7234663..cf90c084 100644
--- a/vnet/vnet/adj/adj_types.h
+++ b/vnet/vnet/adj/adj_types.h
@@ -35,4 +35,19 @@ typedef u32 adj_index_t;
*/
#define ADJ_INDEX_INVALID ((u32)~0)
+/**
+ * @brief return codes from a adjacency walker callback function
+ */
+typedef enum adj_walk_rc_t_
+{
+ ADJ_WALK_RC_STOP,
+ ADJ_WALK_RC_CONTINUE,
+} adj_walk_rc_t;
+
+/**
+ * @brief Call back function when walking adjacencies
+ */
+typedef adj_walk_rc_t (*adj_walk_cb_t)(adj_index_t ai,
+ void *ctx);
+
#endif
diff --git a/vnet/vnet/dhcp/client.c b/vnet/vnet/dhcp/client.c
index ffe6e8da..f555f19e 100644
--- a/vnet/vnet/dhcp/client.c
+++ b/vnet/vnet/dhcp/client.c
@@ -44,23 +44,15 @@ dhcp_client_release_address (dhcp_client_main_t * dcm, dhcp_client_t * c)
c->subnet_mask_width, 1 /*is_del*/);
}
-static void set_l2_rewrite (dhcp_client_main_t * dcm, dhcp_client_t * c)
+static void
+set_l2_rewrite (dhcp_client_main_t * dcm, dhcp_client_t * c)
{
- vnet_main_t * vnm = dcm->vnet_main;
- vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, c->sw_if_index);
- vnet_hw_interface_class_t * hc =
- vnet_get_hw_interface_class (vnm, hw->hw_class_index);
- u32 n_rw;
-
/* Acquire the L2 rewrite string for the indicated sw_if_index */
- vec_validate (c->l2_rewrite, 32);
- ASSERT (hc->set_rewrite);
- n_rw = hc->set_rewrite (dcm->vnet_main, c->sw_if_index,
- VNET_L3_PACKET_TYPE_IP4,
- 0 /* broadcast */, c->l2_rewrite,
- vec_len(c->l2_rewrite));
-
- _vec_len (c->l2_rewrite) = n_rw;
+ c->l2_rewrite = vnet_build_rewrite_for_sw_interface(
+ dcm->vnet_main,
+ c->sw_if_index,
+ VNET_LINK_IP4,
+ 0 /* broadcast */);
}
/*
diff --git a/vnet/vnet/ethernet/arp.c b/vnet/vnet/ethernet/arp.c
index 645ff86c..eeaac4d3 100644
--- a/vnet/vnet/ethernet/arp.c
+++ b/vnet/vnet/ethernet/arp.c
@@ -22,7 +22,7 @@
#include <vnet/l2/l2_input.h>
#include <vppinfra/mhash.h>
#include <vnet/fib/ip4_fib.h>
-#include <vnet/adj/adj.h>
+#include <vnet/adj/adj_nbr.h>
#include <vnet/mpls/mpls.h>
/**
@@ -48,33 +48,23 @@ typedef struct
#define ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC (1 << 1)
u64 cpu_time_last_updated;
- adj_index_t adj_index[FIB_LINK_NUM];
-} ethernet_arp_ip4_entry_t;
-/**
- * @brief administrative and operational state falgs on an interface
- */
-typedef enum ethernet_arp_interface_flags_t_
-{
- ETHERNET_ARP_INTERFACE_UP = (0 << 1),
- ETHERNET_ARP_INTERFACE_MPLS_ENABLE = (1 << 0),
-} ethernet_arp_interface_flags_t;
+ /**
+ * The index of the adj-fib entry created
+ */
+ fib_node_index_t fib_entry_index;
+} ethernet_arp_ip4_entry_t;
/**
* @brief Per-interface ARP configuration and state
*/
typedef struct ethernet_arp_interface_t_
{
- /**
- * Hash table of ARP entries.
- * Since this hash table is per-interface, the key is only the IPv4 address.
- */
+ /**
+ * Hash table of ARP entries.
+ * Since this hash table is per-interface, the key is only the IPv4 address.
+ */
uword *arp_entries;
-
- /**
- * Flags for administrative and operational state
- */
- ethernet_arp_interface_flags_t flags;
} ethernet_arp_interface_t;
typedef struct
@@ -123,14 +113,6 @@ typedef struct
static ethernet_arp_main_t ethernet_arp_main;
-
-typedef enum arp_ether_type_t_
-{
- ARP_ETHER_TYPE_IP4 = (1 << 0),
- ARP_ETHER_TYPE_MPLS = (1 << 1),
-} arp_ether_type_t;
-#define ARP_ETHER_TYPE_BOTH (ARP_ETHER_TYPE_MPLS | ARP_ETHER_TYPE_IP4)
-
typedef struct
{
u32 sw_if_index;
@@ -140,7 +122,6 @@ typedef struct
#define ETHERNET_ARP_ARGS_REMOVE (1<<0)
#define ETHERNET_ARP_ARGS_FLUSH (1<<1)
#define ETHERNET_ARP_ARGS_POPULATE (1<<2)
- arp_ether_type_t ether_type;
} vnet_arp_set_ip4_over_ethernet_rpc_args_t;
static void
@@ -339,68 +320,170 @@ format_arp_term_input_trace (u8 * s, va_list * va)
}
static void
-arp_mk_complete (ethernet_arp_interface_t * eai,
- ethernet_arp_ip4_entry_t * e, arp_ether_type_t et)
+arp_nbr_probe (ip_adjacency_t * adj)
{
- fib_prefix_t pfx = {
- .fp_len = 32,
- .fp_proto = FIB_PROTOCOL_IP4,
- .fp_addr = {
- .ip4 = e->ip4_address,
- },
- };
- u32 fib_index;
+ vnet_main_t *vnm = vnet_get_main ();
+ ip4_main_t *im = &ip4_main;
+ ip_interface_address_t *ia;
+ ethernet_arp_header_t *h;
+ vnet_hw_interface_t *hi;
+ vnet_sw_interface_t *si;
+ ip4_address_t *src;
+ vlib_buffer_t *b;
+ vlib_main_t *vm;
+ u32 bi = 0;
- fib_index = ip4_fib_table_get_index_for_sw_if_index (e->sw_if_index);
+ vm = vlib_get_main ();
- if (et & ARP_ETHER_TYPE_IP4)
+ si = vnet_get_sw_interface (vnm, adj->rewrite_header.sw_if_index);
+
+ if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
{
- if (ADJ_INDEX_INVALID == e->adj_index[FIB_LINK_IP4])
- {
- e->adj_index[FIB_LINK_IP4] =
- adj_nbr_add_or_lock_w_rewrite (FIB_PROTOCOL_IP4,
- FIB_LINK_IP4,
- &pfx.fp_addr,
- e->sw_if_index,
- e->ethernet_address);
- ASSERT (ADJ_INDEX_INVALID != e->adj_index[FIB_LINK_IP4]);
-
- fib_table_entry_update_one_path (fib_index,
- &pfx,
- FIB_SOURCE_ADJ,
- FIB_ENTRY_FLAG_ATTACHED,
- FIB_PROTOCOL_IP4,
- &pfx.fp_addr,
- e->sw_if_index,
- ~0,
- 1,
- MPLS_LABEL_INVALID,
- FIB_ROUTE_PATH_FLAG_NONE);
- }
- else
- {
- adj_nbr_update_rewrite (e->adj_index[FIB_LINK_IP4],
- e->ethernet_address);
- }
+ return;
}
- if ((et & ARP_ETHER_TYPE_MPLS) &&
- eai->flags & ETHERNET_ARP_INTERFACE_MPLS_ENABLE)
+
+ src =
+ ip4_interface_address_matching_destination (im,
+ &adj->sub_type.nbr.next_hop.
+ ip4,
+ adj->rewrite_header.
+ sw_if_index, &ia);
+ if (!src)
{
- if (ADJ_INDEX_INVALID == e->adj_index[FIB_LINK_MPLS])
- {
- e->adj_index[FIB_LINK_MPLS] =
- adj_nbr_add_or_lock_w_rewrite (FIB_PROTOCOL_IP4,
- FIB_LINK_MPLS,
- &pfx.fp_addr,
- e->sw_if_index,
- e->ethernet_address);
- ASSERT (ADJ_INDEX_INVALID != e->adj_index[FIB_LINK_MPLS]);
- }
- else
- {
- adj_nbr_update_rewrite (e->adj_index[FIB_LINK_MPLS],
- e->ethernet_address);
- }
+ return;
+ }
+
+ h =
+ vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template,
+ &bi);
+
+ hi = vnet_get_sup_hw_interface (vnm, adj->rewrite_header.sw_if_index);
+
+ clib_memcpy (h->ip4_over_ethernet[0].ethernet,
+ hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
+
+ h->ip4_over_ethernet[0].ip4 = src[0];
+ h->ip4_over_ethernet[1].ip4 = adj->sub_type.nbr.next_hop.ip4;
+
+ b = vlib_get_buffer (vm, bi);
+ vnet_buffer (b)->sw_if_index[VLIB_RX] =
+ vnet_buffer (b)->sw_if_index[VLIB_TX] = adj->rewrite_header.sw_if_index;
+
+ /* Add encapsulation string for software interface (e.g. ethernet header). */
+ vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
+ vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
+
+ {
+ vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
+ u32 *to_next = vlib_frame_vector_args (f);
+ to_next[0] = bi;
+ f->n_vectors = 1;
+ vlib_put_frame_to_node (vm, hi->output_node_index, f);
+ }
+}
+
+static void
+arp_mk_complete (adj_index_t ai, ethernet_arp_ip4_entry_t * e)
+{
+ adj_nbr_update_rewrite
+ (ai, ADJ_NBR_REWRITE_FLAG_COMPLETE,
+ ethernet_build_rewrite (vnet_get_main (),
+ e->sw_if_index,
+ adj_get_link_type (ai), e->ethernet_address));
+}
+
+static void
+arp_mk_incomplete (adj_index_t ai, ethernet_arp_ip4_entry_t * e)
+{
+ adj_nbr_update_rewrite
+ (ai,
+ ADJ_NBR_REWRITE_FLAG_INCOMPLETE,
+ ethernet_build_rewrite (vnet_get_main (),
+ e->sw_if_index,
+ VNET_LINK_ARP,
+ VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
+}
+
+static ethernet_arp_ip4_entry_t *
+arp_entry_find (ethernet_arp_interface_t * eai, const ip4_address_t * addr)
+{
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ ethernet_arp_ip4_entry_t *e = NULL;
+ uword *p;
+
+ if (NULL != eai->arp_entries)
+ {
+ p = hash_get (eai->arp_entries, addr->as_u32);
+ if (!p)
+ return (NULL);
+
+ e = pool_elt_at_index (am->ip4_entry_pool, p[0]);
+ }
+
+ return (e);
+}
+
+static adj_walk_rc_t
+arp_mk_complete_walk (adj_index_t ai, void *ctx)
+{
+ ethernet_arp_ip4_entry_t *e = ctx;
+
+ arp_mk_complete (ai, e);
+
+ return (ADJ_WALK_RC_CONTINUE);
+}
+
+static adj_walk_rc_t
+arp_mk_incomplete_walk (adj_index_t ai, void *ctx)
+{
+ ethernet_arp_ip4_entry_t *e = ctx;
+
+ arp_mk_incomplete (ai, e);
+
+ return (ADJ_WALK_RC_CONTINUE);
+}
+
+void
+arp_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai)
+{
+ ethernet_arp_main_t *am = &ethernet_arp_main;
+ ethernet_arp_interface_t *arp_int;
+ ethernet_arp_ip4_entry_t *e;
+ ip_adjacency_t *adj;
+
+ adj = adj_get (ai);
+
+ vec_validate (am->ethernet_arp_by_sw_if_index, sw_if_index);
+ arp_int = &am->ethernet_arp_by_sw_if_index[sw_if_index];
+ e = arp_entry_find (arp_int, &adj->sub_type.nbr.next_hop.ip4);
+
+ if (NULL != e)
+ {
+ adj_nbr_walk_nh4 (sw_if_index,
+ &e->ip4_address, arp_mk_complete_walk, e);
+ }
+ else
+ {
+ /*
+ * no matching ARP entry.
+ * construct the rewire required to for an ARP packet, and stick
+ * that in the adj's pipe to smoke.
+ */
+ adj_nbr_update_rewrite (ai,
+ ADJ_NBR_REWRITE_FLAG_INCOMPLETE,
+ ethernet_build_rewrite (vnm,
+ sw_if_index,
+ VNET_LINK_ARP,
+ VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
+
+ /*
+ * since the FIB has added this adj for a route, it makes sense it may
+ * want to forward traffic sometime soon. Let's send a speculative ARP.
+ * just one. If we were to do periodically that wouldn't be bad either,
+ * but that's more code than i'm prepared to write at this time for
+ * relatively little reward.
+ */
+ arp_nbr_probe (adj);
}
}
@@ -417,7 +500,6 @@ vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm,
uword *p;
pending_resolution_t *pr, *mc;
ethernet_arp_interface_t *arp_int;
- fib_link_t link;
int is_static = args->is_static;
u32 sw_if_index = args->sw_if_index;
@@ -441,23 +523,43 @@ vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm,
if (make_new_arp_cache_entry)
{
+ fib_prefix_t pfx = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4 = a->ip4,
+ }
+ ,
+ };
+ u32 fib_index;
+
pool_get (am->ip4_entry_pool, e);
if (NULL == arp_int->arp_entries)
{
arp_int->arp_entries = hash_create (0, sizeof (u32));
- if (mpls_sw_interface_is_enabled (sw_if_index))
- arp_int->flags |= ETHERNET_ARP_INTERFACE_MPLS_ENABLE;
}
hash_set (arp_int->arp_entries, a->ip4.as_u32, e - am->ip4_entry_pool);
e->sw_if_index = sw_if_index;
e->ip4_address = a->ip4;
- FOR_EACH_FIB_LINK (link)
- {
- e->adj_index[link] = ADJ_INDEX_INVALID;
- }
+ clib_memcpy (e->ethernet_address,
+ a->ethernet, sizeof (e->ethernet_address));
+
+ fib_index = ip4_fib_table_get_index_for_sw_if_index (e->sw_if_index);
+ e->fib_entry_index =
+ fib_table_entry_update_one_path (fib_index,
+ &pfx,
+ FIB_SOURCE_ADJ,
+ FIB_ENTRY_FLAG_ATTACHED,
+ FIB_PROTOCOL_IP4,
+ &pfx.fp_addr,
+ e->sw_if_index,
+ ~0,
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
}
else
{
@@ -468,18 +570,19 @@ vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm,
if (0 == memcmp (e->ethernet_address,
a->ethernet, sizeof (e->ethernet_address)))
return -1;
+
+ /* Update time stamp and ethernet address. */
+ clib_memcpy (e->ethernet_address, a->ethernet,
+ sizeof (e->ethernet_address));
}
- /* Update time stamp and ethernet address. */
- clib_memcpy (e->ethernet_address, a->ethernet,
- sizeof (e->ethernet_address));
e->cpu_time_last_updated = clib_cpu_time_now ();
if (is_static)
e->flags |= ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC;
else
e->flags |= ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC;
- arp_mk_complete (arp_int, e, ARP_ETHER_TYPE_BOTH);
+ adj_nbr_walk_nh4 (sw_if_index, &e->ip4_address, arp_mk_complete_walk, e);
/* Customer(s) waiting for this address to be resolved? */
p = hash_get (am->pending_resolutions_by_address, a->ip4.as_u32);
@@ -1334,7 +1437,6 @@ vnet_arp_unset_ip4_over_ethernet (vnet_main_t * vnm,
args.sw_if_index = sw_if_index;
args.flags = ETHERNET_ARP_ARGS_REMOVE;
- args.ether_type = ARP_ETHER_TYPE_IP4;
clib_memcpy (&args.a, a, sizeof (*a));
vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback,
@@ -1350,15 +1452,13 @@ vnet_arp_unset_ip4_over_ethernet (vnet_main_t * vnm,
*/
static int
vnet_arp_flush_ip4_over_ethernet (vnet_main_t * vnm,
- u32 sw_if_index,
- arp_ether_type_t et, void *a_arg)
+ u32 sw_if_index, void *a_arg)
{
ethernet_arp_ip4_over_ethernet_address_t *a = a_arg;
vnet_arp_set_ip4_over_ethernet_rpc_args_t args;
args.sw_if_index = sw_if_index;
args.flags = ETHERNET_ARP_ARGS_FLUSH;
- args.ether_type = et;
clib_memcpy (&args.a, a, sizeof (*a));
vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback,
@@ -1372,19 +1472,16 @@ vnet_arp_flush_ip4_over_ethernet (vnet_main_t * vnm,
* For static entries this will re-source the adjacencies.
*
* @param sw_if_index The interface on which the ARP entires are acted
- * @param et The ether type of those ARP entries.
*/
static int
vnet_arp_populate_ip4_over_ethernet (vnet_main_t * vnm,
- u32 sw_if_index,
- arp_ether_type_t et, void *a_arg)
+ u32 sw_if_index, void *a_arg)
{
ethernet_arp_ip4_over_ethernet_address_t *a = a_arg;
vnet_arp_set_ip4_over_ethernet_rpc_args_t args;
args.sw_if_index = sw_if_index;
args.flags = ETHERNET_ARP_ARGS_POPULATE;
- args.ether_type = et;
clib_memcpy (&args.a, a, sizeof (*a));
vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback,
@@ -1423,22 +1520,18 @@ arp_add_del_interface_address (ip4_main_t * im,
eai = &am->ethernet_arp_by_sw_if_index[sw_if_index];
- hash_foreach_pair (pair, eai->arp_entries, (
- {
- e =
- pool_elt_at_index
- (am->ip4_entry_pool,
- pair->value[0]);
- if
- (ip4_destination_matches_route
- (im, &e->ip4_address,
- address, address_length))
- {
- vec_add1 (to_delete,
- e -
- am->ip4_entry_pool);}
- }
- ));
+ /* *INDENT-OFF* */
+ hash_foreach_pair (pair, eai->arp_entries,
+ ({
+ e = pool_elt_at_index(am->ip4_entry_pool,
+ pair->value[0]);
+ if (ip4_destination_matches_route (im, &e->ip4_address,
+ address, address_length))
+ {
+ vec_add1 (to_delete, e - am->ip4_entry_pool);
+ }
+ }));
+ /* *INDENT-ON* */
for (i = 0; i < vec_len (to_delete); i++)
{
@@ -1449,62 +1542,13 @@ arp_add_del_interface_address (ip4_main_t * im,
delme.ip4.as_u32 = e->ip4_address.as_u32;
vnet_arp_flush_ip4_over_ethernet (vnet_get_main (),
- e->sw_if_index,
- ARP_ETHER_TYPE_BOTH, &delme);
+ e->sw_if_index, &delme);
}
vec_free (to_delete);
}
}
-static void
-ethernet_arp_sw_interface_mpls_state_change (u32 sw_if_index, u32 is_enable)
-{
- ethernet_arp_main_t *am = &ethernet_arp_main;
- ethernet_arp_ip4_entry_t *e;
- ethernet_arp_interface_t *eai;
- u32 i, *to_update = 0;
- hash_pair_t *pair;
-
- if (vec_len (am->ethernet_arp_by_sw_if_index) < sw_if_index)
- return;
-
- eai = &am->ethernet_arp_by_sw_if_index[sw_if_index];
-
- if (is_enable)
- eai->flags |= ETHERNET_ARP_INTERFACE_MPLS_ENABLE;
- else
- eai->flags &= ~ETHERNET_ARP_INTERFACE_MPLS_ENABLE;
-
- hash_foreach_pair (pair, eai->arp_entries, (
- {
- vec_add1 (to_update,
- pair->value[0]);
- }
- ));
-
- for (i = 0; i < vec_len (to_update); i++)
- {
- ethernet_arp_ip4_over_ethernet_address_t updateme;
- e = pool_elt_at_index (am->ip4_entry_pool, to_update[i]);
-
- clib_memcpy (&updateme.ethernet, e->ethernet_address, 6);
- updateme.ip4.as_u32 = e->ip4_address.as_u32;
-
- if (is_enable)
- {
- vnet_arp_populate_ip4_over_ethernet (vnet_get_main (),
- e->sw_if_index,
- ARP_ETHER_TYPE_MPLS,
- &updateme);
- }
- else
- continue;
-
- }
- vec_free (to_update);
-}
-
static clib_error_t *
ethernet_arp_init (vlib_main_t * vm)
{
@@ -1550,92 +1594,21 @@ ethernet_arp_init (vlib_main_t * vm)
cb.function_opaque = 0;
vec_add1 (im->add_del_interface_address_callbacks, cb);
- vec_add1 (mpls_main.mpls_interface_state_change_callbacks,
- ethernet_arp_sw_interface_mpls_state_change);
-
return 0;
}
VLIB_INIT_FUNCTION (ethernet_arp_init);
static void
-arp_mk_incomplete (ethernet_arp_interface_t * eai,
- ethernet_arp_ip4_entry_t * e, arp_ether_type_t et)
-{
- fib_prefix_t pfx = {
- .fp_len = 32,
- .fp_proto = FIB_PROTOCOL_IP4,
- .fp_addr = {
- .ip4 = e->ip4_address,
- },
- };
- u32 fib_index;
-
- fib_index = ip4_fib_table_get_index_for_sw_if_index (e->sw_if_index);
-
- if ((ARP_ETHER_TYPE_IP4 & et) &&
- (ADJ_INDEX_INVALID != e->adj_index[FIB_LINK_IP4]))
- {
- /*
- * revert the adj this ARP entry sourced to incomplete
- */
- adj_nbr_update_rewrite (e->adj_index[FIB_LINK_IP4], NULL);
-
- /*
- * remove the FIB erntry the ARP entry sourced
- */
- fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_ADJ);
-
- /*
- * Unlock the adj now that the ARP entry is no longer a source
- */
- adj_unlock (e->adj_index[FIB_LINK_IP4]);
- e->adj_index[FIB_LINK_IP4] = ADJ_INDEX_INVALID;
- }
- if ((ARP_ETHER_TYPE_MPLS & et) &&
- (ADJ_INDEX_INVALID != e->adj_index[FIB_LINK_MPLS]))
- {
- /*
- * revert the adj this ARP entry sourced to incomplete
- */
- adj_nbr_update_rewrite (e->adj_index[FIB_LINK_MPLS], NULL);
-
- /*
- * Unlock the adj now that the ARP entry is no longer a source
- */
- adj_unlock (e->adj_index[FIB_LINK_MPLS]);
- e->adj_index[FIB_LINK_MPLS] = ADJ_INDEX_INVALID;
- }
-}
-
-static void
arp_entry_free (ethernet_arp_interface_t * eai, ethernet_arp_ip4_entry_t * e)
{
ethernet_arp_main_t *am = &ethernet_arp_main;
+ fib_table_entry_delete_index (e->fib_entry_index, FIB_SOURCE_ADJ);
hash_unset (eai->arp_entries, e->ip4_address.as_u32);
pool_put (am->ip4_entry_pool, e);
}
-static ethernet_arp_ip4_entry_t *
-arp_entry_find (ethernet_arp_interface_t * eai, const ip4_address_t * addr)
-{
- ethernet_arp_main_t *am = &ethernet_arp_main;
- ethernet_arp_ip4_entry_t *e = NULL;
- uword *p;
-
- if (NULL != eai->arp_entries)
- {
- p = hash_get (eai->arp_entries, addr->as_u32);
- if (!p)
- return (NULL);
-
- e = pool_elt_at_index (am->ip4_entry_pool, p[0]);
- }
-
- return (e);
-}
-
static inline int
vnet_arp_unset_ip4_over_ethernet_internal (vnet_main_t * vnm,
vnet_arp_set_ip4_over_ethernet_rpc_args_t
@@ -1651,7 +1624,8 @@ vnet_arp_unset_ip4_over_ethernet_internal (vnet_main_t * vnm,
if (NULL != e)
{
- arp_mk_incomplete (eai, e, ARP_ETHER_TYPE_BOTH);
+ adj_nbr_walk_nh4 (e->sw_if_index,
+ &e->ip4_address, arp_mk_incomplete_walk, e);
arp_entry_free (eai, e);
}
@@ -1673,7 +1647,8 @@ vnet_arp_flush_ip4_over_ethernet_internal (vnet_main_t * vnm,
if (NULL != e)
{
- arp_mk_incomplete (eai, e, args->ether_type);
+ adj_nbr_walk_nh4 (e->sw_if_index,
+ &e->ip4_address, arp_mk_incomplete_walk, e);
/*
* The difference between flush and unset, is that an unset
@@ -1682,8 +1657,7 @@ vnet_arp_flush_ip4_over_ethernet_internal (vnet_main_t * vnm,
* does in response to interface events. unset is only done
* by the control plane.
*/
- if ((e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC) &&
- (args->ether_type & ARP_ETHER_TYPE_IP4))
+ if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC)
{
arp_entry_free (eai, e);
}
@@ -1706,7 +1680,8 @@ vnet_arp_populate_ip4_over_ethernet_internal (vnet_main_t * vnm,
if (NULL != e)
{
- arp_mk_complete (eai, e, args->ether_type);
+ adj_nbr_walk_nh4 (e->sw_if_index,
+ &e->ip4_address, arp_mk_complete_walk, e);
}
return (0);
}
@@ -1743,9 +1718,8 @@ ethernet_arp_sw_interface_up_down (vnet_main_t * vnm,
pool_foreach (e, am->ip4_entry_pool,
({
if (e->sw_if_index == sw_if_index)
- {
- vec_add1 (to_delete, e - am->ip4_entry_pool);
- }
+ vec_add1 (to_delete,
+ e - am->ip4_entry_pool);
}));
/* *INDENT-ON* */
@@ -1759,25 +1733,21 @@ ethernet_arp_sw_interface_up_down (vnet_main_t * vnm,
if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
{
- vnet_arp_populate_ip4_over_ethernet (vnm, e->sw_if_index,
- ARP_ETHER_TYPE_BOTH, &delme);
+ vnet_arp_populate_ip4_over_ethernet (vnm, e->sw_if_index, &delme);
}
else
{
- vnet_arp_flush_ip4_over_ethernet (vnm, e->sw_if_index,
- ARP_ETHER_TYPE_BOTH, &delme);
+ vnet_arp_flush_ip4_over_ethernet (vnm, e->sw_if_index, &delme);
}
}
vec_free (to_delete);
-
return 0;
}
VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ethernet_arp_sw_interface_up_down);
-
static void
increment_ip4_and_mac_address (ethernet_arp_ip4_over_ethernet_address_t * a)
{
@@ -1811,7 +1781,6 @@ vnet_arp_set_ip4_over_ethernet (vnet_main_t * vnm,
args.sw_if_index = sw_if_index;
args.is_static = is_static;
args.flags = 0;
- args.ether_type = ARP_ETHER_TYPE_IP4;
clib_memcpy (&args.a, a, sizeof (*a));
vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback,
@@ -1990,7 +1959,7 @@ ip_arp_add_del_command_fn (vlib_main_t * vm,
return 0;
}
-
+/* *INDENT-OFF* */
/*?
* Add or delete IPv4 ARP cache entries.
*
@@ -2019,19 +1988,18 @@ ip_arp_add_del_command_fn (vlib_main_t * vm,
* @cliexcmd{set ip arp count 10 GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip_arp_add_del_command, static) = {
.path = "set ip arp",
.short_help =
- "set ip arp [del] <intfc> <ip-address> <mac-address> [static] [count <count>] [fib-id <fib-id>] [proxy <lo-addr> - <hi-addr>]",
+ "set ip arp [del] <intfc> <ip-address> <mac-address> [static] [count <count>] [fib-id <fib-id>] [proxy <lo-addr> - <hi-addr>]",
.function = ip_arp_add_del_command_fn,
};
/* *INDENT-ON* */
static clib_error_t *
set_int_proxy_arp_command_fn (vlib_main_t * vm,
- unformat_input_t * input,
- vlib_cli_command_t * cmd)
+ unformat_input_t *
+ input, vlib_cli_command_t * cmd)
{
vnet_main_t *vnm = vnet_get_main ();
u32 sw_if_index;
@@ -2066,7 +2034,7 @@ set_int_proxy_arp_command_fn (vlib_main_t * vm,
return 0;
}
-
+/* *INDENT-OFF* */
/*?
* Enable proxy-arp on an interface. The vpp stack will answer ARP
* requests for the indicated address range. Multiple proxy-arp
@@ -2086,11 +2054,10 @@ set_int_proxy_arp_command_fn (vlib_main_t * vm,
* To disable proxy arp on an individual interface:
* @cliexcmd{set interface proxy-arp GigabitEthernet0/8/0 disable}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_int_proxy_enable_command, static) = {
.path = "set interface proxy-arp",
.short_help =
- "set interface proxy-arp <intfc> [enable|disable]",
+ "set interface proxy-arp <intfc> [enable|disable]",
.function = set_int_proxy_arp_command_fn,
};
/* *INDENT-ON* */
@@ -2174,8 +2141,8 @@ arp_term_l2bd (vlib_main_t * vm,
error0 = ETHERNET_ARP_ERROR_replies_sent;
error0 =
(arp0->l2_type !=
- clib_net_to_host_u16 (ETHERNET_ARP_HARDWARE_TYPE_ethernet) ?
- ETHERNET_ARP_ERROR_l2_type_not_ethernet : error0);
+ clib_net_to_host_u16 (ETHERNET_ARP_HARDWARE_TYPE_ethernet)
+ ? ETHERNET_ARP_ERROR_l2_type_not_ethernet : error0);
error0 =
(arp0->l3_type !=
clib_net_to_host_u16 (ETHERNET_TYPE_IP4) ?
@@ -2269,8 +2236,9 @@ arp_term_l2bd (vlib_main_t * vm,
for ARP requests from other hosts. If output to VXLAN tunnel is
required, however, can just clear the SHG in packet as follows:
vnet_buffer(p0)->l2.shg = 0; */
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
- n_left_to_next, pi0, next0);
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next, pi0,
+ next0);
continue;
check_ip6_nd:
@@ -2283,9 +2251,9 @@ arp_term_l2bd (vlib_main_t * vm,
(&iph0->src_address)))
{
sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
- if (vnet_ip6_nd_term (vm, node, p0, eth0, iph0, sw_if_index0,
- vnet_buffer (p0)->l2.bd_index,
- vnet_buffer (p0)->l2.shg))
+ if (vnet_ip6_nd_term
+ (vm, node, p0, eth0, iph0, sw_if_index0,
+ vnet_buffer (p0)->l2.bd_index, vnet_buffer (p0)->l2.shg))
goto output_response;
}
@@ -2294,10 +2262,12 @@ arp_term_l2bd (vlib_main_t * vm,
u32 feature_bitmap0 =
vnet_buffer (p0)->l2.feature_bitmap & ~L2INPUT_FEAT_ARP_TERM;
vnet_buffer (p0)->l2.feature_bitmap = feature_bitmap0;
- next0 = feat_bitmap_get_next_node_index (arp_term_next_node_index,
- feature_bitmap0);
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
- n_left_to_next, pi0, next0);
+ next0 =
+ feat_bitmap_get_next_node_index (arp_term_next_node_index,
+ feature_bitmap0);
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, next0);
continue;
}
@@ -2311,8 +2281,9 @@ arp_term_l2bd (vlib_main_t * vm,
next0 = ARP_TERM_NEXT_DROP;
p0->error = node->errors[error0];
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
- n_left_to_next, pi0, next0);
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next, pi0,
+ next0);
}
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
@@ -2342,7 +2313,8 @@ VLIB_REGISTER_NODE (arp_term_l2bd_node, static) = {
clib_error_t *
arp_term_init (vlib_main_t * vm)
-{ // Initialize the feature next-node indexes
+{
+ // Initialize the feature next-node indexes
feat_bitmap_init_next_nodes (vm,
arp_term_l2bd_node.index,
L2INPUT_N_FEAT,
@@ -2358,21 +2330,8 @@ change_arp_mac (u32 sw_if_index, ethernet_arp_ip4_entry_t * e)
{
if (e->sw_if_index == sw_if_index)
{
-
- if (ADJ_INDEX_INVALID != e->adj_index[FIB_LINK_IP4])
- {
- // the update rewrite function takes the dst mac (which is not changing)
- // the new source mac will be retrieved from the interface
- // when the full rewrite is constructed.
- adj_nbr_update_rewrite (e->adj_index[FIB_LINK_IP4],
- e->ethernet_address);
- }
- if (ADJ_INDEX_INVALID != e->adj_index[FIB_LINK_MPLS])
- {
- adj_nbr_update_rewrite (e->adj_index[FIB_LINK_MPLS],
- e->ethernet_address);
- }
-
+ adj_nbr_walk_nh4 (e->sw_if_index,
+ &e->ip4_address, arp_mk_complete_walk, e);
}
}
@@ -2384,9 +2343,9 @@ ethernet_arp_change_mac (vnet_main_t * vnm, u32 sw_if_index)
/* *INDENT-OFF* */
pool_foreach (e, am->ip4_entry_pool,
- ({
- change_arp_mac (sw_if_index, e);
- }));
+ ({
+ change_arp_mac (sw_if_index, e);
+ }));
/* *INDENT-ON* */
}
diff --git a/vnet/vnet/ethernet/ethernet.h b/vnet/vnet/ethernet/ethernet.h
index 973ed58c..34ddb82b 100644
--- a/vnet/vnet/ethernet/ethernet.h
+++ b/vnet/vnet/ethernet/ethernet.h
@@ -562,6 +562,13 @@ int vnet_add_del_ip4_arp_change_event (vnet_main_t * vnm,
void ethernet_arp_change_mac (vnet_main_t * vnm, u32 sw_if_index);
+void arp_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai);
+
+void ethernet_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai);
+u8 *ethernet_build_rewrite (vnet_main_t * vnm,
+ u32 sw_if_index,
+ vnet_link_t link_type, const void *dst_address);
+
extern vlib_node_registration_t ethernet_input_node;
#endif /* included_ethernet_h */
diff --git a/vnet/vnet/ethernet/interface.c b/vnet/vnet/ethernet/interface.c
index 43f1cd4a..45d215d3 100644
--- a/vnet/vnet/ethernet/interface.c
+++ b/vnet/vnet/ethernet/interface.c
@@ -42,9 +42,7 @@
#include <vnet/pg/pg.h>
#include <vnet/ethernet/ethernet.h>
#include <vnet/l2/l2_input.h>
-#include <vnet/srp/srp.h>
-#include <vnet/lisp-gpe/lisp_gpe.h>
-#include <vnet/devices/af_packet/af_packet.h>
+#include <vnet/adj/adj.h>
/**
* @file
@@ -53,32 +51,24 @@
* This file contains code to manage loopback interfaces.
*/
-int
-vnet_sw_interface_is_p2p (vnet_main_t * vnm, u32 sw_if_index)
-{
- // FIXME - use flags on the HW itf
- vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
- return (!(hw->hw_class_index == ethernet_hw_interface_class.index ||
- hw->hw_class_index == af_packet_device_class.index ||
- hw->hw_class_index == lisp_gpe_hw_class.index ||
- hw->hw_class_index == srp_hw_interface_class.index));
-}
-
-static uword
-ethernet_set_rewrite (vnet_main_t * vnm,
- u32 sw_if_index,
- u32 l3_type,
- void *dst_address,
- void *rewrite, uword max_rewrite_bytes)
+/**
+ * @brief build a rewrite string to use for sending packets of type 'link_type'
+ * to 'dst_address'
+ */
+u8 *
+ethernet_build_rewrite (vnet_main_t * vnm,
+ u32 sw_if_index,
+ vnet_link_t link_type, const void *dst_address)
{
vnet_sw_interface_t *sub_sw = vnet_get_sw_interface (vnm, sw_if_index);
vnet_sw_interface_t *sup_sw = vnet_get_sup_sw_interface (vnm, sw_if_index);
vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
ethernet_main_t *em = &ethernet_main;
ethernet_interface_t *ei;
- ethernet_header_t *h = rewrite;
+ ethernet_header_t *h;
ethernet_type_t type;
uword n_bytes = sizeof (h[0]);
+ u8 *rewrite = NULL;
if (sub_sw != sup_sw)
{
@@ -100,22 +90,20 @@ ethernet_set_rewrite (vnet_main_t * vnm,
}
}
- if (n_bytes > max_rewrite_bytes)
- return 0;
-
- switch (l3_type)
+ switch (link_type)
{
-#define _(a,b) case VNET_L3_PACKET_TYPE_##a: type = ETHERNET_TYPE_##b; break
+#define _(a,b) case VNET_LINK_##a: type = ETHERNET_TYPE_##b; break
_(IP4, IP4);
_(IP6, IP6);
- _(MPLS_UNICAST, MPLS_UNICAST);
- _(MPLS_MULTICAST, MPLS_MULTICAST);
+ _(MPLS, MPLS_UNICAST);
_(ARP, ARP);
#undef _
default:
- return 0;
+ return NULL;
}
+ vec_validate (rewrite, n_bytes - 1);
+ h = (ethernet_header_t *) rewrite;
ei = pool_elt_at_index (em->interfaces, hw->hw_instance);
clib_memcpy (h->src_address, ei->address, sizeof (h->src_address));
if (dst_address)
@@ -156,7 +144,28 @@ ethernet_set_rewrite (vnet_main_t * vnm,
h->type = clib_host_to_net_u16 (type);
}
- return n_bytes;
+ return (rewrite);
+}
+
+void
+ethernet_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai)
+{
+ ip_adjacency_t *adj;
+
+ adj = adj_get (ai);
+
+ if (FIB_PROTOCOL_IP4 == adj->ia_nh_proto)
+ {
+ arp_update_adjacency (vnm, sw_if_index, ai);
+ }
+ else if (FIB_PROTOCOL_IP6 == adj->ia_nh_proto)
+ {
+ ip6_ethernet_update_adjacency (vnm, sw_if_index, ai);
+ }
+ else
+ {
+ ASSERT (0);
+ }
}
/* *INDENT-OFF* */
@@ -166,7 +175,8 @@ VNET_HW_INTERFACE_CLASS (ethernet_hw_interface_class) = {
.format_header = format_ethernet_header_with_length,
.unformat_hw_address = unformat_ethernet_address,
.unformat_header = unformat_ethernet_header,
- .set_rewrite = ethernet_set_rewrite,
+ .build_rewrite = ethernet_build_rewrite,
+ .update_adjacency = ethernet_update_adjacency,
};
/* *INDENT-ON* */
diff --git a/vnet/vnet/fib/fib_entry.c b/vnet/vnet/fib/fib_entry.c
index 5429da29..404f0f40 100644
--- a/vnet/vnet/fib/fib_entry.c
+++ b/vnet/vnet/fib/fib_entry.c
@@ -402,35 +402,21 @@ fib_entry_back_walk_notify (fib_node_t *node,
fib_entry_get_index(fib_entry)));
}
- if (FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE & ctx->fnbw_reason)
- {
- /*
- * ADJ updates (complete<->incomplete) do not need to propagate to
- * recursive entries.
- * The only reason its needed as far back as here, is that the adj
- * and the incomplete adj are a different DPO type, so the LBs need
- * to re-stack.
- */
- return (FIB_NODE_BACK_WALK_CONTINUE);
- }
- else
- {
- /*
- * all other walk types can be reclassifed to a re-evaluate to
- * all recursive dependents.
- * By reclassifying we ensure that should any of these walk types meet
- * they can be merged.
- */
- ctx->fnbw_reason = FIB_NODE_BW_REASON_FLAG_EVALUATE;
-
- /*
- * propagate the backwalk further if we haven't already reached the
- * maximum depth.
- */
- fib_walk_sync(FIB_NODE_TYPE_ENTRY,
- fib_entry_get_index(fib_entry),
- ctx);
- }
+ /*
+ * all other walk types can be reclassifed to a re-evaluate to
+ * all recursive dependents.
+ * By reclassifying we ensure that should any of these walk types meet
+ * they can be merged.
+ */
+ ctx->fnbw_reason = FIB_NODE_BW_REASON_FLAG_EVALUATE;
+
+ /*
+ * propagate the backwalk further if we haven't already reached the
+ * maximum depth.
+ */
+ fib_walk_sync(FIB_NODE_TYPE_ENTRY,
+ fib_entry_get_index(fib_entry),
+ ctx);
return (FIB_NODE_BACK_WALK_CONTINUE);
}
diff --git a/vnet/vnet/fib/fib_path.c b/vnet/vnet/fib/fib_path.c
index bea17218..ba42e6be 100644
--- a/vnet/vnet/fib/fib_path.c
+++ b/vnet/vnet/fib/fib_path.c
@@ -757,6 +757,20 @@ fib_path_back_walk_notify (fib_node_t *node,
fib_path_proto_to_chain_type(path->fp_nh_proto),
&path->fp_dpo);
}
+ if (FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE & ctx->fnbw_reason)
+ {
+ /*
+ * ADJ updates (complete<->incomplete) do not need to propagate to
+ * recursive entries.
+ * The only reason its needed as far back as here, is that the adj
+ * and the incomplete adj are a different DPO type, so the LBs need
+ * to re-stack.
+ * If this walk was quashed in the fib_entry, then any non-fib_path
+ * children (like tunnels that collapse out the LB when they stack)
+ * would not see the update.
+ */
+ return (FIB_NODE_BACK_WALK_CONTINUE);
+ }
break;
case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
/*
diff --git a/vnet/vnet/fib/fib_test.c b/vnet/vnet/fib/fib_test.c
index 7729209d..1e459cf1 100644
--- a/vnet/vnet/fib/fib_test.c
+++ b/vnet/vnet/fib/fib_test.c
@@ -222,6 +222,19 @@ fib_test_urpf_is_equal (fib_node_index_t fei,
return (1);
}
+static u8*
+fib_test_build_rewrite (u8 *eth_addr)
+{
+ u8* rewrite = NULL;
+
+ vec_validate(rewrite, 13);
+
+ memcpy(rewrite, eth_addr, 6);
+ memcpy(rewrite+6, eth_addr, 6);
+
+ return (rewrite);
+}
+
static void
fib_test_v4 (void)
{
@@ -523,6 +536,7 @@ fib_test_v4 (void)
u8 eth_addr[] = {
0xde, 0xde, 0xde, 0xba, 0xba, 0xba,
};
+
ip46_address_t nh_12_12_12_12 = {
.ip4.as_u32 = clib_host_to_net_u32(0x0c0c0c0c),
};
@@ -561,7 +575,8 @@ fib_test_v4 (void)
&adj->sub_type.nbr.next_hop)),
"adj nbr next-hop ok");
- adj_nbr_update_rewrite(ai_01, eth_addr);
+ adj_nbr_update_rewrite(ai_01, ADJ_NBR_REWRITE_FLAG_COMPLETE,
+ fib_test_build_rewrite(eth_addr));
FIB_TEST((IP_LOOKUP_NEXT_REWRITE == adj->lookup_next_index),
"adj is complete");
FIB_TEST((0 == ip46_address_cmp(&pfx_10_10_10_1_s_32.fp_addr,
@@ -589,7 +604,8 @@ fib_test_v4 (void)
FIB_TEST((0 == ip46_address_cmp(&nh_12_12_12_12,
&adj->sub_type.nbr.next_hop)),
"adj nbr next-hop ok");
- adj_nbr_update_rewrite(ai_12_12_12_12, eth_addr);
+ adj_nbr_update_rewrite(ai_12_12_12_12, ADJ_NBR_REWRITE_FLAG_COMPLETE,
+ fib_test_build_rewrite(eth_addr));
FIB_TEST((IP_LOOKUP_NEXT_REWRITE == adj->lookup_next_index),
"adj is complete");
@@ -636,7 +652,8 @@ fib_test_v4 (void)
&adj->sub_type.nbr.next_hop)),
"adj nbr next-hop ok");
- adj_nbr_update_rewrite(ai_02, eth_addr);
+ adj_nbr_update_rewrite(ai_02, ADJ_NBR_REWRITE_FLAG_COMPLETE,
+ fib_test_build_rewrite(eth_addr));
FIB_TEST((IP_LOOKUP_NEXT_REWRITE == adj->lookup_next_index),
"adj is complete");
FIB_TEST((0 == ip46_address_cmp(&pfx_10_10_10_2_s_32.fp_addr,
@@ -3113,7 +3130,8 @@ fib_test_v6 (void)
&adj->sub_type.nbr.next_hop)),
"adj nbr next-hop ok");
- adj_nbr_update_rewrite(ai_01, eth_addr);
+ adj_nbr_update_rewrite(ai_01, ADJ_NBR_REWRITE_FLAG_COMPLETE,
+ fib_test_build_rewrite(eth_addr));
FIB_TEST((IP_LOOKUP_NEXT_REWRITE == adj->lookup_next_index),
"adj is complete");
FIB_TEST((0 == ip46_address_cmp(&pfx_2001_1_2_s_128.fp_addr,
@@ -3150,7 +3168,8 @@ fib_test_v6 (void)
&adj->sub_type.nbr.next_hop)),
"adj nbr next-hop ok");
- adj_nbr_update_rewrite(ai_02, eth_addr);
+ adj_nbr_update_rewrite(ai_02, ADJ_NBR_REWRITE_FLAG_COMPLETE,
+ fib_test_build_rewrite(eth_addr));
FIB_TEST((IP_LOOKUP_NEXT_REWRITE == adj->lookup_next_index),
"adj is complete");
FIB_TEST((0 == ip46_address_cmp(&pfx_2001_1_3_s_128.fp_addr,
diff --git a/vnet/vnet/fib/fib_types.h b/vnet/vnet/fib/fib_types.h
index 4d656565..2f23527e 100644
--- a/vnet/vnet/fib/fib_types.h
+++ b/vnet/vnet/fib/fib_types.h
@@ -73,21 +73,17 @@ typedef enum fib_protocol_t_ {
* Link Type. This maps directly into the ethertype.
*/
typedef enum fib_link_t_ {
-#if CLIB_DEBUG > 0
- FIB_LINK_IP4 = 1,
-#else
- FIB_LINK_IP4 = 0,
-#endif
- FIB_LINK_IP6,
- FIB_LINK_ETHERNET,
- FIB_LINK_MPLS,
+ FIB_LINK_IP4 = VNET_LINK_IP4,
+ FIB_LINK_IP6 = VNET_LINK_IP6,
+ FIB_LINK_MPLS = VNET_LINK_MPLS,
+ FIB_LINK_ETHERNET = VNET_LINK_ETHERNET,
} __attribute__ ((packed)) fib_link_t;
/**
* Definition outside of enum so it does not need to be included in non-defaulted
* switch statements
*/
-#define FIB_LINK_NUM (FIB_LINK_MPLS+1)
+#define FIB_LINK_NUM (FIB_LINK_ETHERNET+1)
#define FIB_LINKS { \
[FIB_LINK_ETHERNET] = "ethernet", \
diff --git a/vnet/vnet/gre/gre.c b/vnet/vnet/gre/gre.c
index aa6fca0f..a4b3f9fc 100644
--- a/vnet/vnet/gre/gre.c
+++ b/vnet/vnet/gre/gre.c
@@ -17,7 +17,7 @@
#include <vnet/vnet.h>
#include <vnet/gre/gre.h>
-#include <vnet/adj/adj.h>
+#include <vnet/adj/adj_midchain.h>
gre_main_t gre_main;
@@ -162,133 +162,95 @@ unformat_gre_header (unformat_input_t * input, va_list * args)
return 1;
}
-static uword gre_set_rewrite (vnet_main_t * vnm,
- u32 sw_if_index,
- u32 l3_type,
- void * dst_address,
- void * rewrite,
- uword max_rewrite_bytes)
+static int
+gre_proto_from_vnet_link (vnet_link_t link)
{
- /*
- * Conundrum: packets from tun/tap destined for the tunnel
- * actually have this rewrite applied. Transit packets do not.
- * To make the two cases equivalent, don't generate a
- * rewrite here, build the entire header in the fast path.
- */
- return 0;
-
-#ifdef THINGS_WORKED_AS_ONE_MIGHT_LIKE
- ip4_and_gre_header_t * h = rewrite;
- gre_protocol_t protocol;
-
- if (max_rewrite_bytes < sizeof (h[0]))
- return 0;
-
- switch (l3_type) {
-#define _(a,b) case VNET_L3_PACKET_TYPE_##a: protocol = GRE_PROTOCOL_##b; break
- _ (IP4, ip4);
- _ (IP6, ip6);
-#undef _
- default:
- return 0;
- }
-
- memset (h, 0, sizeof (*h));
- h->ip4.ip_version_and_header_length = 0x45;
- h->ip4.ttl = 64;
- h->ip4.protocol = IP_PROTOCOL_GRE;
- h->gre.protocol = clib_host_to_net_u16 (protocol);
-
- return sizeof (h[0]);
-#endif
+ switch (link)
+ {
+ case VNET_LINK_IP4:
+ return (GRE_PROTOCOL_ip4);
+ case VNET_LINK_IP6:
+ return (GRE_PROTOCOL_ip6);
+ case VNET_LINK_MPLS:
+ return (GRE_PROTOCOL_mpls_unicast);
+ case VNET_LINK_ETHERNET:
+ return (GRE_PROTOCOL_teb);
+ case VNET_LINK_ARP:
+ return (GRE_PROTOCOL_arp);
+ }
+ ASSERT(0);
+ return (GRE_PROTOCOL_ip4);
}
-static uword
-gre_interface_tx (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+static u8*
+gre_build_rewrite (vnet_main_t * vnm,
+ u32 sw_if_index,
+ vnet_link_t link_type,
+ const void *dst_address)
{
gre_main_t * gm = &gre_main;
- u32 next_index;
- u32 * from, * to_next, n_left_from, n_left_to_next;
- vnet_interface_output_runtime_t * rd = (void *) node->runtime_data;
- gre_tunnel_t *t = pool_elt_at_index (gm->tunnels, rd->dev_instance);
-
- /* Vector of buffer / pkt indices we're supposed to process */
- from = vlib_frame_vector_args (frame);
-
- /* Number of buffers / pkts */
- n_left_from = frame->n_vectors;
-
- /* Speculatively send the first buffer to the last disposition we used */
- next_index = node->cached_next_index;
-
- while (n_left_from > 0)
- {
- /* set up to enqueue to our disposition with index = next_index */
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+ ip4_and_gre_header_t * h;
+ u8* rewrite = NULL;
+ gre_tunnel_t *t;
+ u32 ti;
- /*
- * FIXME DUAL LOOP
- */
+ ti = gm->tunnel_index_by_sw_if_index[sw_if_index];
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- u32 bi0, adj_index0, next0;
- const ip_adjacency_t * adj0;
- const dpo_id_t *dpo0;
- ip4_header_t * ip0;
- vlib_buffer_t * b0;
+ if (~0 == ti)
+ /* not one of ours */
+ return (0);
- bi0 = from[0];
- to_next[0] = bi0;
- from += 1;
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
+ t = pool_elt_at_index(gm->tunnels, ti);
- b0 = vlib_get_buffer(vm, bi0);
- ip0 = vlib_buffer_get_current (b0);
+ vec_validate(rewrite, sizeof(*h)-1);
+ h = (ip4_and_gre_header_t*)rewrite;
+ h->gre.protocol = clib_host_to_net_u16(gre_proto_from_vnet_link(link_type));
- /* Fixup the checksum and len fields in the GRE tunnel encap
- * that was applied at the midchain node */
- ip0->length =
- clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0));
- ip0->checksum = ip4_header_checksum (ip0);
+ h->ip4.ip_version_and_header_length = 0x45;
+ h->ip4.ttl = 254;
+ h->ip4.protocol = IP_PROTOCOL_GRE;
+ /* fixup ip4 header length and checksum after-the-fact */
+ h->ip4.src_address.as_u32 = t->tunnel_src.as_u32;
+ h->ip4.dst_address.as_u32 = t->tunnel_dst.as_u32;
+ h->ip4.checksum = ip4_header_checksum (&h->ip4);
- /* Follow the DPO on which the midchain is stacked */
- adj_index0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
- adj0 = adj_get(adj_index0);
- dpo0 = &adj0->sub_type.midchain.next_dpo;
- next0 = dpo0->dpoi_next_node;
- vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+ return (rewrite);
+}
- if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
- {
- gre_tx_trace_t *tr = vlib_add_trace (vm, node,
- b0, sizeof (*tr));
- tr->tunnel_id = t - gm->tunnels;
- tr->length = ip0->length;
- tr->src.as_u32 = ip0->src_address.as_u32;
- tr->dst.as_u32 = ip0->dst_address.as_u32;
- }
+void
+gre_fixup (vlib_main_t *vm,
+ ip_adjacency_t *adj,
+ vlib_buffer_t *b0)
+{
+ ip4_header_t * ip0;
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, next0);
- }
+ ip0 = vlib_buffer_get_current (b0);
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
+ /* Fixup the checksum and len fields in the GRE tunnel encap
+ * that was applied at the midchain node */
+ ip0->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0));
+ ip0->checksum = ip4_header_checksum (ip0);
+}
- vlib_node_increment_counter (vm, gre_input_node.index,
- GRE_ERROR_PKTS_ENCAP, frame->n_vectors);
+void
+gre_update_adj (vnet_main_t * vnm,
+ u32 sw_if_index,
+ adj_index_t ai)
+{
+ adj_nbr_midchain_update_rewrite (ai, gre_fixup,
+ ADJ_MIDCHAIN_FLAG_NONE,
+ gre_build_rewrite(vnm, sw_if_index,
+ adj_get_link_type(ai),
+ NULL));
- return frame->n_vectors;
+ gre_tunnel_stack(ai);
}
+/**
+ * @brief TX function. Only called L2. L3 traffic uses the adj-midchains
+ */
static uword
-gre_l2_interface_tx (vlib_main_t * vm,
+gre_interface_tx (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
@@ -330,7 +292,7 @@ gre_l2_interface_tx (vlib_main_t * vm,
b0 = vlib_get_buffer(vm, bi0);
- vnet_buffer(b0)->ip.adj_index[VLIB_TX] = gt->adj_index[FIB_LINK_ETHERNET];
+ vnet_buffer(b0)->ip.adj_index[VLIB_TX] = gt->l2_adj_index;
if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
{
@@ -356,38 +318,6 @@ gre_l2_interface_tx (vlib_main_t * vm,
return frame->n_vectors;
}
-static clib_error_t *
-gre_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
-{
- gre_main_t * gm = &gre_main;
- vnet_hw_interface_t * hi;
- gre_tunnel_t *t;
- u32 ti;
-
- hi = vnet_get_hw_interface (vnm, hw_if_index);
-
- if (NULL == gm->tunnel_index_by_sw_if_index ||
- hi->sw_if_index >= vec_len(gm->tunnel_index_by_sw_if_index))
- return (NULL);
-
- ti = gm->tunnel_index_by_sw_if_index[hi->sw_if_index];
-
- if (~0 == ti)
- /* not one of ours */
- return (NULL);
-
- t = pool_elt_at_index(gm->tunnels, ti);
-
- if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
- vnet_hw_interface_set_flags (vnm, hw_if_index, VNET_HW_INTERFACE_FLAG_LINK_UP);
- else
- vnet_hw_interface_set_flags (vnm, hw_if_index, 0 /* down */);
-
- gre_tunnel_stack(t);
-
- return /* no error */ 0;
-}
-
static u8 * format_gre_tunnel_name (u8 * s, va_list * args)
{
u32 dev_instance = va_arg (*args, u32);
@@ -403,15 +333,6 @@ static u8 * format_gre_device (u8 * s, va_list * args)
return s;
}
-static u8 * format_gre_l2_device (u8 * s, va_list * args)
-{
- u32 dev_instance = va_arg (*args, u32);
- CLIB_UNUSED (int verbose) = va_arg (*args, int);
-
- s = format (s, "GRE L2-tunnel: id %d\n", dev_instance);
- return s;
-}
-
VNET_DEVICE_CLASS (gre_device_class) = {
.name = "GRE tunnel device",
.format_device_name = format_gre_tunnel_name,
@@ -427,27 +348,13 @@ VNET_DEVICE_CLASS (gre_device_class) = {
VLIB_DEVICE_TX_FUNCTION_MULTIARCH (gre_device_class,
gre_interface_tx)
-VNET_DEVICE_CLASS (gre_l2_device_class) = {
- .name = "GRE L2 tunnel device",
- .format_device_name = format_gre_tunnel_name,
- .format_device = format_gre_l2_device,
- .format_tx_trace = format_gre_tx_trace,
- .tx_function = gre_l2_interface_tx,
- .admin_up_down_function = gre_interface_admin_up_down,
-#ifdef SOON
- .clear counter = 0;
-#endif
-};
-
-VLIB_DEVICE_TX_FUNCTION_MULTIARCH (gre_l2_device_class,
- gre_l2_interface_tx)
-
-
VNET_HW_INTERFACE_CLASS (gre_hw_interface_class) = {
.name = "GRE",
.format_header = format_gre_header_with_length,
.unformat_header = unformat_gre_header,
- .set_rewrite = gre_set_rewrite,
+ .build_rewrite = gre_build_rewrite,
+ .update_adjacency = gre_update_adj,
+ .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
};
static void add_protocol (gre_main_t * gm,
diff --git a/vnet/vnet/gre/gre.h b/vnet/vnet/gre/gre.h
index d1a6f319..a0ee9ad2 100644
--- a/vnet/vnet/gre/gre.h
+++ b/vnet/vnet/gre/gre.h
@@ -86,14 +86,14 @@ typedef struct {
u32 sibling_index;
/**
- * The index of the midchain adjacency created for this tunnel
+ * on a L2 tunnel this is the VLIB arc from the L2-tx to the l2-midchain
*/
- adj_index_t adj_index[FIB_LINK_NUM];
+ u32 l2_tx_arc;
/**
- * on a L2 tunnel this is the VLIB arc from the L2-tx to the l2-midchain
+ * an L2 tunnel always rquires an L2 midchain. cache here for DP.
*/
- u32 l2_tx_arc;
+ adj_index_t l2_adj_index;
} gre_tunnel_t;
typedef struct {
@@ -142,7 +142,14 @@ gre_register_input_type (vlib_main_t * vm,
gre_protocol_t protocol,
u32 node_index);
-extern void gre_tunnel_stack (gre_tunnel_t *gt);
+extern clib_error_t * gre_interface_admin_up_down (vnet_main_t * vnm,
+ u32 hw_if_index,
+ u32 flags);
+
+extern void gre_tunnel_stack (adj_index_t ai);
+extern void gre_update_adj (vnet_main_t * vnm,
+ u32 sw_if_index,
+ adj_index_t ai);
format_function_t format_gre_protocol;
format_function_t format_gre_header;
diff --git a/vnet/vnet/gre/interface.c b/vnet/vnet/gre/interface.c
index 0550c0bd..397a0427 100644
--- a/vnet/vnet/gre/interface.c
+++ b/vnet/vnet/gre/interface.c
@@ -21,6 +21,7 @@
#include <vnet/ip/format.h>
#include <vnet/fib/ip4_fib.h>
#include <vnet/adj/adj_midchain.h>
+#include <vnet/adj/adj_nbr.h>
#include <vnet/mpls/mpls.h>
static inline u64
@@ -36,7 +37,6 @@ static u8 *
format_gre_tunnel (u8 * s, va_list * args)
{
gre_tunnel_t * t = va_arg (*args, gre_tunnel_t *);
- int detail = va_arg (*args, int);
gre_main_t * gm = &gre_main;
s = format (s,
@@ -46,14 +46,6 @@ format_gre_tunnel (u8 * s, va_list * args)
format_ip4_address, &t->tunnel_dst,
(t->teb ? "teb" : "ip"),
t->outer_fib_index);
- if (detail)
- {
- s = format (s, "\n fib-entry:%d adj-ip4:%d adj-ip6:%d adj-mpls:%d",
- t->fib_entry_index,
- t->adj_index[FIB_LINK_IP4],
- t->adj_index[FIB_LINK_IP6],
- t->adj_index[FIB_LINK_MPLS]);
- }
return s;
}
@@ -113,32 +105,68 @@ gre_tunnel_from_fib_node (fib_node_t *node)
* 'stack' (resolve the recursion for) the tunnel's midchain adjacency
*/
void
-gre_tunnel_stack (gre_tunnel_t *gt)
+gre_tunnel_stack (adj_index_t ai)
{
- fib_link_t linkt;
+ gre_main_t * gm = &gre_main;
+ ip_adjacency_t *adj;
+ gre_tunnel_t *gt;
+ u32 sw_if_index;
+
+ adj = adj_get(ai);
+ sw_if_index = adj->rewrite_header.sw_if_index;
+
+ if ((vec_len(gm->tunnel_index_by_sw_if_index) < sw_if_index) ||
+ (~0 == gm->tunnel_index_by_sw_if_index[sw_if_index]))
+ return;
+
+ gt = pool_elt_at_index(gm->tunnels,
+ gm->tunnel_index_by_sw_if_index[sw_if_index]);
/*
* find the adjacency that is contributed by the FIB entry
* that this tunnel resovles via, and use it as the next adj
* in the midchain
*/
- FOR_EACH_FIB_LINK(linkt)
+ if (vnet_hw_interface_get_flags(vnet_get_main(),
+ gt->hw_if_index) &
+ VNET_HW_INTERFACE_FLAG_LINK_UP)
{
- if (ADJ_INDEX_INVALID != gt->adj_index[linkt])
- {
- if (vnet_hw_interface_get_flags(vnet_get_main(),
- gt->hw_if_index) &
- VNET_HW_INTERFACE_FLAG_LINK_UP)
- {
- adj_nbr_midchain_stack(
- gt->adj_index[linkt],
- fib_entry_contribute_ip_forwarding(gt->fib_entry_index));
- }
- else
- {
- adj_nbr_midchain_unstack(gt->adj_index[linkt]);
- }
- }
+ adj_nbr_midchain_stack(
+ ai,
+ fib_entry_contribute_ip_forwarding(gt->fib_entry_index));
+ }
+ else
+ {
+ adj_nbr_midchain_unstack(ai);
+ }
+}
+
+/**
+ * @brief Call back when restacking all adjacencies on a GRE interface
+ */
+static adj_walk_rc_t
+gre_adj_walk_cb (adj_index_t ai,
+ void *ctx)
+{
+ gre_tunnel_stack(ai);
+
+ return (ADJ_WALK_RC_CONTINUE);
+}
+
+static void
+gre_tunnel_restack (gre_tunnel_t *gt)
+{
+ fib_protocol_t proto;
+
+ /*
+ * walk all the adjacencies on th GRE interface and restack them
+ */
+ FOR_EACH_FIB_IP_PROTOCOL(proto)
+ {
+ adj_nbr_walk(gt->sw_if_index,
+ proto,
+ gre_adj_walk_cb,
+ NULL);
}
}
@@ -147,9 +175,9 @@ gre_tunnel_stack (gre_tunnel_t *gt)
*/
static fib_node_back_walk_rc_t
gre_tunnel_back_walk (fib_node_t *node,
- fib_node_back_walk_ctx_t *ctx)
+ fib_node_back_walk_ctx_t *ctx)
{
- gre_tunnel_stack(gre_tunnel_from_fib_node(node));
+ gre_tunnel_restack(gre_tunnel_from_fib_node(node));
return (FIB_NODE_BACK_WALK_CONTINUE);
}
@@ -192,63 +220,6 @@ const static fib_node_vft_t gre_vft = {
.fnv_back_walk = gre_tunnel_back_walk,
};
-static int
-gre_proto_from_fib_link (fib_link_t link)
-{
- switch (link)
- {
- case FIB_LINK_IP4:
- return (GRE_PROTOCOL_ip4);
- case FIB_LINK_IP6:
- return (GRE_PROTOCOL_ip6);
- case FIB_LINK_MPLS:
- return (GRE_PROTOCOL_mpls_unicast);
- case FIB_LINK_ETHERNET:
- return (GRE_PROTOCOL_teb);
- }
- ASSERT(0);
- return (GRE_PROTOCOL_ip4);
-}
-
-static u8 *
-gre_rewrite (gre_tunnel_t * t,
- fib_link_t link)
-{
- ip4_and_gre_header_t * h0;
- u8 * rewrite_data = 0;
-
- vec_validate_init_empty (rewrite_data, sizeof (*h0) - 1, 0);
-
- h0 = (ip4_and_gre_header_t *) rewrite_data;
-
- h0->gre.protocol = clib_host_to_net_u16(gre_proto_from_fib_link(link));
-
- h0->ip4.ip_version_and_header_length = 0x45;
- h0->ip4.ttl = 254;
- h0->ip4.protocol = IP_PROTOCOL_GRE;
- /* $$$ fixup ip4 header length and checksum after-the-fact */
- h0->ip4.src_address.as_u32 = t->tunnel_src.as_u32;
- h0->ip4.dst_address.as_u32 = t->tunnel_dst.as_u32;
- h0->ip4.checksum = ip4_header_checksum (&h0->ip4);
-
- return (rewrite_data);
-}
-
-static void
-gre_fixup (vlib_main_t *vm,
- ip_adjacency_t *adj,
- vlib_buffer_t *b0)
-{
- ip4_header_t * ip0;
-
- ip0 = vlib_buffer_get_current (b0);
-
- /* Fixup the checksum and len fields in the GRE tunnel encap
- * that was applied at the midchain node */
- ip0->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0));
- ip0->checksum = ip4_header_checksum (ip0);
-}
-
static int
vnet_gre_tunnel_add (vnet_gre_add_del_tunnel_args_t *a,
u32 * sw_if_indexp)
@@ -262,8 +233,6 @@ vnet_gre_tunnel_add (vnet_gre_add_del_tunnel_args_t *a,
u32 outer_fib_index;
u8 address[6];
clib_error_t *error;
- fib_link_t linkt;
- u8 *rewrite;
outer_fib_index = ip4_fib_index_from_table_id(a->outer_fib_id);
@@ -278,10 +247,6 @@ vnet_gre_tunnel_add (vnet_gre_add_del_tunnel_args_t *a,
pool_get_aligned (gm->tunnels, t, CLIB_CACHE_LINE_BYTES);
memset (t, 0, sizeof (*t));
fib_node_init(&t->node, FIB_NODE_TYPE_GRE_TUNNEL);
- FOR_EACH_FIB_LINK(linkt)
- {
- t->adj_index[linkt] = ADJ_INDEX_INVALID;
- }
if (vec_len (gm->free_gre_tunnel_hw_if_indices) > 0) {
vnet_interface_main_t * im = &vnm->interface_main;
@@ -321,10 +286,11 @@ vnet_gre_tunnel_add (vnet_gre_add_del_tunnel_args_t *a,
address[3] = 0xd0;
address[4] = t - gm->tunnels;
- error = ethernet_register_interface
- (vnm,
- gre_l2_device_class.index, t - gm->tunnels, address, &hw_if_index,
- 0);
+ error = ethernet_register_interface(vnm,
+ gre_device_class.index,
+ t - gm->tunnels, address,
+ &hw_if_index,
+ 0);
if (error)
{
@@ -337,10 +303,11 @@ vnet_gre_tunnel_add (vnet_gre_add_del_tunnel_args_t *a,
hi->tx_node_index,
"adj-l2-midchain");
} else {
- hw_if_index = vnet_register_interface
- (vnm, gre_device_class.index, t - gm->tunnels,
- gre_hw_interface_class.index,
- t - gm->tunnels);
+ hw_if_index = vnet_register_interface(vnm,
+ gre_device_class.index,
+ t - gm->tunnels,
+ gre_hw_interface_class.index,
+ t - gm->tunnels);
}
hi = vnet_get_hw_interface (vnm, hw_if_index);
sw_if_index = hi->sw_if_index;
@@ -395,48 +362,18 @@ vnet_gre_tunnel_add (vnet_gre_add_del_tunnel_args_t *a,
FIB_NODE_TYPE_GRE_TUNNEL,
t - gm->tunnels);
- /*
- * create and update the midchain adj this tunnel sources.
- * We could be smarter here and trigger this on an interface proto enable,
- * like we do for MPLS.
- */
+ clib_memcpy (&t->tunnel_src, &a->src, sizeof (t->tunnel_src));
+ clib_memcpy (&t->tunnel_dst, &a->dst, sizeof (t->tunnel_dst));
+
if (t->teb)
{
- t->adj_index[FIB_LINK_ETHERNET] = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
- FIB_LINK_ETHERNET,
- &zero_addr,
- sw_if_index);
-
- rewrite = gre_rewrite(t, FIB_LINK_ETHERNET);
- adj_nbr_midchain_update_rewrite(t->adj_index[FIB_LINK_ETHERNET],
- gre_fixup,
- ADJ_MIDCHAIN_FLAG_NO_COUNT,
- rewrite);
- vec_free(rewrite);
- }
- else
- {
- FOR_EACH_FIB_IP_LINK (linkt)
- {
- t->adj_index[linkt] = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
- linkt,
- &zero_addr,
- sw_if_index);
-
- rewrite = gre_rewrite(t, linkt);
- adj_nbr_midchain_update_rewrite(t->adj_index[linkt],
- gre_fixup,
- ADJ_MIDCHAIN_FLAG_NONE,
- rewrite);
- vec_free(rewrite);
- }
- }
-
- t->adj_index[FIB_LINK_MPLS] = ADJ_INDEX_INVALID;
+ t->l2_adj_index = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
+ FIB_LINK_ETHERNET,
+ &zero_addr,
+ sw_if_index);
- clib_memcpy (&t->tunnel_src, &a->src, sizeof (t->tunnel_src));
- clib_memcpy (&t->tunnel_dst, &a->dst, sizeof (t->tunnel_dst));
- gre_tunnel_stack(t);
+ gre_update_adj(vnm, t->sw_if_index, t->l2_adj_index);
+ }
if (sw_if_indexp)
*sw_if_indexp = sw_if_index;
@@ -451,7 +388,6 @@ vnet_gre_tunnel_delete (vnet_gre_add_del_tunnel_args_t *a,
gre_main_t * gm = &gre_main;
vnet_main_t * vnm = gm->vnet_main;
gre_tunnel_t * t;
- fib_link_t linkt;
u32 sw_if_index;
t = gre_tunnel_db_find(&a->src, &a->dst, a->outer_fib_id);
@@ -472,11 +408,6 @@ vnet_gre_tunnel_delete (vnet_gre_add_del_tunnel_args_t *a,
fib_table_entry_delete_index(t->fib_entry_index,
FIB_SOURCE_RR);
- FOR_EACH_FIB_LINK(linkt)
- {
- adj_unlock(t->adj_index[linkt]);
- }
-
gre_tunnel_db_remove(t);
fib_node_deinit(&t->node);
pool_put (gm->tunnels, t);
@@ -497,43 +428,36 @@ vnet_gre_add_del_tunnel (vnet_gre_add_del_tunnel_args_t *a,
return (vnet_gre_tunnel_delete(a, sw_if_indexp));
}
-static void
-gre_sw_interface_mpls_state_change (u32 sw_if_index,
- u32 is_enable)
+clib_error_t *
+gre_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
{
- gre_main_t *gm = &gre_main;
+ gre_main_t * gm = &gre_main;
+ vnet_hw_interface_t * hi;
gre_tunnel_t *t;
- u8 *rewrite;
+ u32 ti;
- if ((vec_len(gm->tunnel_index_by_sw_if_index) < sw_if_index) ||
- (~0 == gm->tunnel_index_by_sw_if_index[sw_if_index]))
- return;
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
- t = pool_elt_at_index(gm->tunnels,
- gm->tunnel_index_by_sw_if_index[sw_if_index]);
+ if (NULL == gm->tunnel_index_by_sw_if_index ||
+ hi->sw_if_index >= vec_len(gm->tunnel_index_by_sw_if_index))
+ return (NULL);
- if (is_enable)
- {
- t->adj_index[FIB_LINK_MPLS] =
- adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
- FIB_LINK_MPLS,
- &zero_addr,
- sw_if_index);
-
- rewrite = gre_rewrite(t, FIB_LINK_MPLS);
- adj_nbr_midchain_update_rewrite(t->adj_index[FIB_LINK_MPLS],
- gre_fixup,
- ADJ_MIDCHAIN_FLAG_NONE,
- rewrite);
- vec_free(rewrite);
- }
+ ti = gm->tunnel_index_by_sw_if_index[hi->sw_if_index];
+
+ if (~0 == ti)
+ /* not one of ours */
+ return (NULL);
+
+ t = pool_elt_at_index(gm->tunnels, ti);
+
+ if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
+ vnet_hw_interface_set_flags (vnm, hw_if_index, VNET_HW_INTERFACE_FLAG_LINK_UP);
else
- {
- adj_unlock(t->adj_index[FIB_LINK_MPLS]);
- t->adj_index[FIB_LINK_MPLS] = ADJ_INDEX_INVALID;
- }
+ vnet_hw_interface_set_flags (vnm, hw_if_index, 0 /* down */);
- gre_tunnel_stack(t);
+ gre_tunnel_restack(t);
+
+ return /* no error */ 0;
}
static clib_error_t *
@@ -637,14 +561,14 @@ show_gre_tunnel_command_fn (vlib_main_t * vm,
{
pool_foreach (t, gm->tunnels,
({
- vlib_cli_output (vm, "%U", format_gre_tunnel, t, 0);
+ vlib_cli_output (vm, "%U", format_gre_tunnel, t);
}));
}
else
{
t = pool_elt_at_index(gm->tunnels, ti);
- vlib_cli_output (vm, "%U", format_gre_tunnel, t, 1);
+ vlib_cli_output (vm, "%U", format_gre_tunnel, t);
}
return 0;
@@ -658,9 +582,6 @@ VLIB_CLI_COMMAND (show_gre_tunnel_command, static) = {
/* force inclusion from application's main.c */
clib_error_t *gre_interface_init (vlib_main_t *vm)
{
- vec_add1(mpls_main.mpls_interface_state_change_callbacks,
- gre_sw_interface_mpls_state_change);
-
fib_node_register_type(FIB_NODE_TYPE_GRE_TUNNEL, &gre_vft);
return 0;
diff --git a/vnet/vnet/hdlc/hdlc.c b/vnet/vnet/hdlc/hdlc.c
index 9997ddc5..174085ac 100644
--- a/vnet/vnet/hdlc/hdlc.c
+++ b/vnet/vnet/hdlc/hdlc.c
@@ -167,42 +167,41 @@ unformat_hdlc_header (unformat_input_t * input, va_list * args)
return 1;
}
-static uword hdlc_set_rewrite (vnet_main_t * vnm,
- u32 sw_if_index,
- u32 l3_type,
- void * dst_address,
- void * rewrite,
- uword max_rewrite_bytes)
+static u8*
+hdlc_build_rewrite (vnet_main_t * vnm,
+ u32 sw_if_index,
+ vnet_link_t link_type,
+ const void *dst_address)
{
- hdlc_header_t * h = rewrite;
+ hdlc_header_t * h;
+ u8* rewrite = NULL;
hdlc_protocol_t protocol;
- if (max_rewrite_bytes < sizeof (h[0]))
- return 0;
-
- switch (l3_type) {
-#define _(a,b) case VNET_L3_PACKET_TYPE_##a: protocol = HDLC_PROTOCOL_##b; break
+ switch (link_type) {
+#define _(a,b) case VNET_LINK_##a: protocol = HDLC_PROTOCOL_##b; break
_ (IP4, ip4);
_ (IP6, ip6);
- _ (MPLS_UNICAST, mpls_unicast);
- _ (MPLS_MULTICAST, mpls_multicast);
+ _ (MPLS, mpls_unicast);
#undef _
default:
- return 0;
+ return (NULL);
}
+ vec_validate(rewrite, sizeof(*h)-1);
+ h = (hdlc_header_t *)rewrite;
h->address = 0x0f;
h->control = 0x00;
h->protocol = clib_host_to_net_u16 (protocol);
- return sizeof (h[0]);
+ return (rewrite);
}
VNET_HW_INTERFACE_CLASS (hdlc_hw_interface_class) = {
.name = "HDLC",
.format_header = format_hdlc_header_with_length,
.unformat_header = unformat_hdlc_header,
- .set_rewrite = hdlc_set_rewrite,
+ .build_rewrite = hdlc_build_rewrite,
+ .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
};
static void add_protocol (hdlc_main_t * pm,
diff --git a/vnet/vnet/interface.c b/vnet/vnet/interface.c
index 08db6832..941ab170 100644
--- a/vnet/vnet/interface.c
+++ b/vnet/vnet/interface.c
@@ -40,6 +40,7 @@
#include <vnet/vnet.h>
#include <vnet/plugin/plugin.h>
#include <vnet/fib/ip6_fib.h>
+#include <vnet/adj/adj.h>
#define VNET_INTERFACE_SET_FLAGS_HELPER_IS_CREATE (1 << 0)
#define VNET_INTERFACE_SET_FLAGS_HELPER_WANT_REDISTRIBUTE (1 << 1)
@@ -1044,6 +1045,16 @@ vnet_hw_interface_compare (vnet_main_t * vnm,
return (word) h0->hw_instance - (word) h1->hw_instance;
}
+int
+vnet_sw_interface_is_p2p (vnet_main_t * vnm, u32 sw_if_index)
+{
+ vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
+ vnet_hw_interface_class_t *hc =
+ vnet_get_hw_interface_class (vnm, hw->hw_class_index);
+
+ return (hc->flags & VNET_HW_INTERFACE_CLASS_FLAG_P2P);
+}
+
clib_error_t *
vnet_interface_init (vlib_main_t * vm)
{
@@ -1120,6 +1131,12 @@ vnet_interface_init (vlib_main_t * vm)
{
c->index = vec_len (im->hw_interface_classes);
hash_set_mem (im->hw_interface_class_by_name, c->name, c->index);
+
+ if (NULL == c->build_rewrite)
+ c->build_rewrite = default_build_rewrite;
+ if (NULL == c->update_adjacency)
+ c->update_adjacency = default_update_adjacency;
+
vec_add1 (im->hw_interface_classes, c[0]);
c = c->next_class_registration;
}
@@ -1287,6 +1304,48 @@ vnet_hw_interface_change_mac_address (vnet_main_t * vnm, u32 hw_if_index,
(vnm, hw_if_index, mac_address);
}
+vnet_l3_packet_type_t
+vnet_link_to_l3_proto (vnet_link_t link)
+{
+ switch (link)
+ {
+ case VNET_LINK_IP4:
+ return (VNET_L3_PACKET_TYPE_IP4);
+ case VNET_LINK_IP6:
+ return (VNET_L3_PACKET_TYPE_IP6);
+ case VNET_LINK_MPLS:
+ return (VNET_L3_PACKET_TYPE_MPLS_UNICAST);
+ case VNET_LINK_ARP:
+ return (VNET_L3_PACKET_TYPE_ARP);
+ case VNET_LINK_ETHERNET:
+ ASSERT (0);
+ break;
+ }
+ ASSERT (0);
+ return (0);
+}
+
+u8 *
+default_build_rewrite (vnet_main_t * vnm,
+ u32 sw_if_index,
+ vnet_link_t link_type, const void *dst_address)
+{
+ return (NULL);
+}
+
+void
+default_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai)
+{
+ u8 *rewrite;
+
+ rewrite = vnet_build_rewrite_for_sw_interface (vnm, sw_if_index,
+ adj_get_link_type (ai),
+ NULL);
+
+ adj_nbr_update_rewrite (ai, ADJ_NBR_REWRITE_FLAG_COMPLETE, rewrite);
+}
+
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/vnet/vnet/interface.h b/vnet/vnet/interface.h
index 245b86f5..c5a79472 100644
--- a/vnet/vnet/interface.h
+++ b/vnet/vnet/interface.h
@@ -41,10 +41,12 @@
#define included_vnet_interface_h
#include <vnet/unix/pcap.h>
+#include <vnet/l3_types.h>
struct vnet_main_t;
struct vnet_hw_interface_t;
struct vnet_sw_interface_t;
+struct ip46_address_t;
/* Interface up/down callback. */
typedef clib_error_t *(vnet_interface_function_t)
@@ -196,6 +198,39 @@ __VA_ARGS__ vnet_device_class_t x
{ dev.tx_function = fn ## _multiarch_select(); }
#endif
+/**
+ * Link Type: A description of the protocol of packets on the link.
+ * On an ethernet link this maps directly into the ethertype. On a GRE tunnel
+ * it maps to the GRE-proto, etc for other lnk types.
+ */
+typedef enum vnet_link_t_
+{
+#if CLIB_DEBUG > 0
+ VNET_LINK_IP4 = 1,
+#else
+ VNET_LINK_IP4 = 0,
+#endif
+ VNET_LINK_IP6,
+ VNET_LINK_MPLS,
+ VNET_LINK_ETHERNET,
+ VNET_LINK_ARP,
+} __attribute__ ((packed)) vnet_link_t;
+
+/**
+ * @brief Convert a link to to an Ethertype
+ */
+extern vnet_l3_packet_type_t vnet_link_to_l3_proto (vnet_link_t link);
+
+/**
+ * @brief Attributes assignable to a HW interface Class.
+ */
+typedef enum vnet_hw_interface_class_flags_t_
+{
+ /**
+ * @brief a point 2 point interface
+ */
+ VNET_HW_INTERFACE_CLASS_FLAG_P2P = (1 << 0),
+} vnet_hw_interface_class_flags_t;
/* Layer-2 (e.g. Ethernet) interface class. */
typedef struct _vnet_hw_interface_class
@@ -206,6 +241,9 @@ typedef struct _vnet_hw_interface_class
/* Class name (e.g. "Ethernet"). */
char *name;
+ /* Flags */
+ vnet_hw_interface_class_flags_t flags;
+
/* Function to call when hardware interface is added/deleted. */
vnet_interface_function_t *interface_add_del_function;
@@ -233,13 +271,16 @@ typedef struct _vnet_hw_interface_class
/* Parser for packet header for e.g. rewrite string. */
unformat_function_t *unformat_header;
- /* Forms adjacency for given l3 packet type and destination address.
- Returns number of bytes in adjacency. */
- uword (*set_rewrite) (struct vnet_main_t * vnm,
- u32 sw_if_index,
- u32 l3_packet_type,
- void *dst_address,
- void *rewrite, uword max_rewrite_bytes);
+ /* Builds a rewrite string for the interface to the destination
+ * for the payload/link type. */
+ u8 *(*build_rewrite) (struct vnet_main_t * vnm,
+ u32 sw_if_index,
+ vnet_link_t link_type, const void *dst_hw_address);
+
+ /* Update an adjacecny added by FIB (as opposed to via the
+ * neighbour resolution protocol). */
+ void (*update_adjacency) (struct vnet_main_t * vnm,
+ u32 sw_if_index, u32 adj_index);
uword (*is_valid_class_for_interface) (struct vnet_main_t * vnm,
u32 hw_if_index,
@@ -255,6 +296,20 @@ typedef struct _vnet_hw_interface_class
} vnet_hw_interface_class_t;
+/**
+ * @brief Return a complete, zero-length (aka dummy) rewrite
+ */
+extern u8 *default_build_rewrite (struct vnet_main_t *vnm,
+ u32 sw_if_index,
+ vnet_link_t link_type,
+ const void *dst_hw_address);
+
+/**
+ * @brief Default adjacency update function
+ */
+extern void default_update_adjacency (struct vnet_main_t *vnm,
+ u32 sw_if_index, u32 adj_index);
+
#define VNET_HW_INTERFACE_CLASS(x,...) \
__VA_ARGS__ vnet_hw_interface_class_t x; \
static void __vnet_add_hw_interface_class_registration_##x (void) \
diff --git a/vnet/vnet/interface_funcs.h b/vnet/vnet/interface_funcs.h
index f603a03e..424e1764 100644
--- a/vnet/vnet/interface_funcs.h
+++ b/vnet/vnet/interface_funcs.h
@@ -129,6 +129,22 @@ vnet_hw_interface_get_flags (vnet_main_t * vnm, u32 hw_if_index)
}
always_inline uword
+vnet_hw_interface_get_mtu (vnet_main_t * vnm, u32 hw_if_index,
+ vlib_rx_or_tx_t dir)
+{
+ vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
+ return hw->max_l3_packet_bytes[dir];
+}
+
+always_inline uword
+vnet_sw_interface_get_mtu (vnet_main_t * vnm, u32 sw_if_index,
+ vlib_rx_or_tx_t dir)
+{
+ vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
+ return (hw->max_l3_packet_bytes[dir]);
+}
+
+always_inline uword
vnet_hw_interface_is_link_up (vnet_main_t * vnm, u32 hw_if_index)
{
return (vnet_hw_interface_get_flags (vnm, hw_if_index) &
diff --git a/vnet/vnet/ip/format.h b/vnet/vnet/ip/format.h
index 0d0eb6c9..8a76a61f 100644
--- a/vnet/vnet/ip/format.h
+++ b/vnet/vnet/ip/format.h
@@ -51,6 +51,7 @@ unformat_function_t unformat_tcp_udp_port;
typedef enum format_ip_adjacency_flags_t_
{
FORMAT_IP_ADJACENCY_NONE,
+ FORMAT_IP_ADJACENCY_BRIEF = FORMAT_IP_ADJACENCY_NONE,
FORMAT_IP_ADJACENCY_DETAIL = (1 << 0),
} format_ip_adjacency_flags_t;
diff --git a/vnet/vnet/ip/ip4_forward.c b/vnet/vnet/ip/ip4_forward.c
index 21973453..b0390b8a 100644
--- a/vnet/vnet/ip/ip4_forward.c
+++ b/vnet/vnet/ip/ip4_forward.c
@@ -1118,7 +1118,7 @@ static u8 * format_ip4_rewrite_trace (u8 * s, va_list * args)
s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
t->fib_index, t->dpo_index, format_ip_adjacency,
- vnm, t->dpo_index, FORMAT_IP_ADJACENCY_NONE,
+ t->dpo_index, FORMAT_IP_ADJACENCY_NONE,
t->flow_hash);
s = format (s, "\n%U%U",
format_white_space, indent,
@@ -1890,6 +1890,13 @@ ip4_arp_inline (vlib_main_t * vm,
p0->error = node->errors[drop0 ? IP4_ARP_ERROR_DROP : IP4_ARP_ERROR_REQUEST_SENT];
+ /*
+ * the adj has been updated to a rewrite but the node the DPO that got
+ * us here hasn't - yet. no big deal. we'll drop while we wait.
+ */
+ if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
+ continue;
+
if (drop0)
continue;
diff --git a/vnet/vnet/ip/ip6.h b/vnet/vnet/ip/ip6.h
index ab0e650b..78546120 100644
--- a/vnet/vnet/ip/ip6.h
+++ b/vnet/vnet/ip/ip6.h
@@ -375,6 +375,10 @@ void ip6_register_protocol (u32 protocol, u32 node_index);
serialize_function_t serialize_vnet_ip6_main, unserialize_vnet_ip6_main;
+void ip6_ethernet_update_adjacency (vnet_main_t * vnm,
+ u32 sw_if_index,
+ u32 ai);
+
int
vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm,
u32 sw_if_index,
diff --git a/vnet/vnet/ip/ip6_forward.c b/vnet/vnet/ip/ip6_forward.c
index d48ccad6..2487af94 100644
--- a/vnet/vnet/ip/ip6_forward.c
+++ b/vnet/vnet/ip/ip6_forward.c
@@ -916,7 +916,7 @@ static u8 * format_ip6_rewrite_trace (u8 * s, va_list * args)
s = format (s, "tx_sw_if_index %d adj-idx %d : %U flow hash: 0x%08x",
t->fib_index, t->adj_index, format_ip_adjacency,
- vnm, t->adj_index, FORMAT_IP_ADJACENCY_NONE,
+ t->adj_index, FORMAT_IP_ADJACENCY_NONE,
t->flow_hash);
s = format (s, "\n%U%U",
format_white_space, indent,
@@ -1605,6 +1605,13 @@ ip6_discover_neighbor_inline (vlib_main_t * vm,
if (drop0)
continue;
+ /*
+ * the adj has been updated to a rewrite but the node the DPO that got
+ * us here hasn't - yet. no big deal. we'll drop while we wait.
+ */
+ if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
+ continue;
+
{
u32 bi0 = 0;
icmp6_neighbor_solicitation_header_t * h0;
@@ -2167,10 +2174,6 @@ VLIB_REGISTER_NODE (ip6_midchain_node) = {
.format_trace = format_ip6_forward_next_trace,
.sibling_of = "ip6-rewrite",
-
- .next_nodes = {
- [IP6_REWRITE_NEXT_DROP] = "error-drop",
- },
};
VLIB_NODE_FUNCTION_MULTIARCH (ip6_midchain_node, ip6_midchain)
diff --git a/vnet/vnet/ip/ip6_neighbor.c b/vnet/vnet/ip/ip6_neighbor.c
index e042385d..3aef2326 100644
--- a/vnet/vnet/ip/ip6_neighbor.c
+++ b/vnet/vnet/ip/ip6_neighbor.c
@@ -51,7 +51,7 @@ typedef struct {
#define IP6_NEIGHBOR_FLAG_STATIC (1 << 0)
#define IP6_NEIGHBOR_FLAG_DYNAMIC (2 << 0)
u64 cpu_time_last_updated;
- adj_index_t adj_index;
+ fib_node_index_t fib_entry_index;
} ip6_neighbor_t;
/* advertised prefix option */
@@ -267,6 +267,7 @@ ip6_neighbor_sw_interface_up_down (vnet_main_t * vnm,
{
n = pool_elt_at_index (nm->neighbor_pool, to_delete[i]);
mhash_unset (&nm->neighbor_index_by_key, &n->key, 0);
+ fib_table_entry_delete_index (n->fib_entry_index, FIB_SOURCE_ADJ);
pool_put (nm->neighbor_pool, n);
}
@@ -342,48 +343,182 @@ static void set_unset_ip6_neighbor_rpc
#endif
static void
-ip6_nd_mk_complete (ip6_neighbor_t * nbr)
+ip6_nbr_probe (ip_adjacency_t *adj)
{
- fib_prefix_t pfx = {
- .fp_len = 128,
- .fp_proto = FIB_PROTOCOL_IP6,
- .fp_addr = {
- .ip6 = nbr->key.ip6_address,
- },
- };
- ip6_main_t *im;
- u32 fib_index;
+ icmp6_neighbor_solicitation_header_t * h;
+ vnet_main_t * vnm = vnet_get_main();
+ ip6_main_t * im = &ip6_main;
+ ip_interface_address_t * ia;
+ ip6_address_t * dst, *src;
+ vnet_hw_interface_t * hi;
+ vnet_sw_interface_t * si;
+ vlib_buffer_t * b;
+ int bogus_length;
+ vlib_main_t * vm;
+ u32 bi = 0;
+
+ vm = vlib_get_main();
- im = &ip6_main;
- fib_index = im->fib_index_by_sw_if_index[nbr->key.sw_if_index];
+ si = vnet_get_sw_interface(vnm, adj->rewrite_header.sw_if_index);
+ dst = &adj->sub_type.nbr.next_hop.ip6;
- /* only once please */
- if (ADJ_INDEX_INVALID == nbr->adj_index)
+ if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
{
- nbr->adj_index =
- adj_nbr_add_or_lock_w_rewrite(FIB_PROTOCOL_IP6,
- FIB_LINK_IP6,
- &pfx.fp_addr,
- nbr->key.sw_if_index,
- nbr->link_layer_address);
- ASSERT(ADJ_INDEX_INVALID != nbr->adj_index);
-
- fib_table_entry_update_one_path(fib_index,
- &pfx,
- FIB_SOURCE_ADJ,
- FIB_ENTRY_FLAG_NONE,
- FIB_PROTOCOL_IP6,
- &pfx.fp_addr,
- nbr->key.sw_if_index,
- ~0,
- 1,
- MPLS_LABEL_INVALID,
- FIB_ROUTE_PATH_FLAG_NONE);
+ return;
+ }
+ src = ip6_interface_address_matching_destination(im, dst,
+ adj->rewrite_header.sw_if_index,
+ &ia);
+ if (! src)
+ {
+ return;
+ }
+
+ h = vlib_packet_template_get_packet(vm,
+ &im->discover_neighbor_packet_template,
+ &bi);
+
+ hi = vnet_get_sup_hw_interface(vnm, adj->rewrite_header.sw_if_index);
+
+ h->ip.dst_address.as_u8[13] = dst->as_u8[13];
+ h->ip.dst_address.as_u8[14] = dst->as_u8[14];
+ h->ip.dst_address.as_u8[15] = dst->as_u8[15];
+ h->ip.src_address = src[0];
+ h->neighbor.target_address = dst[0];
+
+ clib_memcpy (h->link_layer_option.ethernet_address,
+ hi->hw_address,
+ vec_len(hi->hw_address));
+
+ h->neighbor.icmp.checksum =
+ ip6_tcp_udp_icmp_compute_checksum(vm, 0, &h->ip, &bogus_length);
+ ASSERT(bogus_length == 0);
+
+ b = vlib_get_buffer (vm, bi);
+ vnet_buffer (b)->sw_if_index[VLIB_RX] =
+ vnet_buffer (b)->sw_if_index[VLIB_TX] =
+ adj->rewrite_header.sw_if_index;
+
+ /* Add encapsulation string for software interface (e.g. ethernet header). */
+ vnet_rewrite_one_header(adj[0], h, sizeof (ethernet_header_t));
+ vlib_buffer_advance(b, -adj->rewrite_header.data_bytes);
+
+ {
+ vlib_frame_t * f = vlib_get_frame_to_node(vm, hi->output_node_index);
+ u32 * to_next = vlib_frame_vector_args(f);
+ to_next[0] = bi;
+ f->n_vectors = 1;
+ vlib_put_frame_to_node(vm, hi->output_node_index, f);
+ }
+}
+
+static void
+ip6_nd_mk_complete (adj_index_t ai, ip6_neighbor_t * nbr)
+{
+ adj_nbr_update_rewrite (ai, ADJ_NBR_REWRITE_FLAG_COMPLETE,
+ ethernet_build_rewrite (vnet_get_main (),
+ nbr->key.sw_if_index,
+ adj_get_link_type(ai),
+ nbr->link_layer_address));
+}
+
+static void
+ip6_nd_mk_incomplete (adj_index_t ai, ip6_neighbor_t * nbr)
+{
+ adj_nbr_update_rewrite (
+ ai,
+ ADJ_NBR_REWRITE_FLAG_INCOMPLETE,
+ ethernet_build_rewrite (vnet_get_main (),
+ nbr->key.sw_if_index,
+ adj_get_link_type(ai),
+ VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
+}
+
+#define IP6_NBR_MK_KEY(k, sw_if_index, addr) \
+{ \
+ k.sw_if_index = sw_if_index; \
+ k.ip6_address = *addr; \
+ k.pad = 0; \
+}
+
+static ip6_neighbor_t *
+ip6_nd_find (u32 sw_if_index,
+ const ip6_address_t * addr)
+{
+ ip6_neighbor_main_t * nm = &ip6_neighbor_main;
+ ip6_neighbor_t * n = NULL;
+ ip6_neighbor_key_t k;
+ uword *p;
+
+ IP6_NBR_MK_KEY(k, sw_if_index, addr);
+
+ p = mhash_get (&nm->neighbor_index_by_key, &k);
+ if (p) {
+ n = pool_elt_at_index (nm->neighbor_pool, p[0]);
+ }
+
+ return (n);
+}
+
+static adj_walk_rc_t
+ip6_nd_mk_complete_walk (adj_index_t ai, void *ctx)
+{
+ ip6_neighbor_t *nbr = ctx;
+
+ ip6_nd_mk_complete (ai, nbr);
+
+ return (ADJ_WALK_RC_CONTINUE);
+}
+
+static adj_walk_rc_t
+ip6_nd_mk_incomplete_walk (adj_index_t ai, void *ctx)
+{
+ ip6_neighbor_t *nbr = ctx;
+
+ ip6_nd_mk_incomplete (ai, nbr);
+
+ return (ADJ_WALK_RC_CONTINUE);
+}
+
+void
+ip6_ethernet_update_adjacency (vnet_main_t * vnm,
+ u32 sw_if_index,
+ u32 ai)
+{
+ ip6_neighbor_t *nbr;
+ ip_adjacency_t *adj;
+
+ adj = adj_get (ai);
+
+ nbr = ip6_nd_find (sw_if_index, &adj->sub_type.nbr.next_hop.ip6);
+
+ if (NULL != nbr)
+ {
+ adj_nbr_walk_nh6 (sw_if_index, &nbr->key.ip6_address,
+ ip6_nd_mk_complete_walk, nbr);
}
else
{
- adj_nbr_update_rewrite(nbr->adj_index,
- nbr->link_layer_address);
+ /*
+ * no matching ND entry.
+ * construct the rewrite required to for an ND packet, and stick
+ * that in the adj's pipe to smoke.
+ */
+ adj_nbr_update_rewrite (ai,
+ ADJ_NBR_REWRITE_FLAG_INCOMPLETE,
+ ethernet_build_rewrite (vnm,
+ sw_if_index,
+ VNET_LINK_IP6,
+ VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
+
+ /*
+ * since the FIB has added this adj for a route, it makes sense it may
+ * want to forward traffic sometime soon. Let's send a speculative ND.
+ * just one. If we were to do periodically that wouldn't be bad either,
+ * but that's more code than i'm prepared to write at this time for
+ * relatively little reward.
+ */
+ ip6_nbr_probe (adj);
}
}
@@ -416,8 +551,6 @@ vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm,
k.ip6_address = a[0];
k.pad = 0;
- vlib_worker_thread_barrier_sync (vm);
-
p = mhash_get (&nm->neighbor_index_by_key, &k);
if (p) {
n = pool_elt_at_index (nm->neighbor_pool, p[0]);
@@ -429,11 +562,40 @@ vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm,
}
if (make_new_nd_cache_entry) {
+ fib_prefix_t pfx = {
+ .fp_len = 128,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_addr = {
+ .ip6 = k.ip6_address,
+ },
+ };
+ u32 fib_index;
+
pool_get (nm->neighbor_pool, n);
mhash_set (&nm->neighbor_index_by_key, &k, n - nm->neighbor_pool,
/* old value */ 0);
n->key = k;
- n->adj_index = ADJ_INDEX_INVALID;
+
+ clib_memcpy (n->link_layer_address,
+ link_layer_address,
+ n_bytes_link_layer_address);
+
+ /*
+ * create the adj-fib. the entry in the FIB table for and to the peer.
+ */
+ fib_index = ip6_main.fib_index_by_sw_if_index[n->key.sw_if_index];
+ n->fib_entry_index =
+ fib_table_entry_update_one_path(fib_index,
+ &pfx,
+ FIB_SOURCE_ADJ,
+ FIB_ENTRY_FLAG_NONE,
+ FIB_PROTOCOL_IP6,
+ &pfx.fp_addr,
+ n->key.sw_if_index,
+ ~0,
+ 1,
+ MPLS_LABEL_INVALID,
+ FIB_ROUTE_PATH_FLAG_NONE);
}
else
{
@@ -445,20 +607,22 @@ vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm,
link_layer_address,
n_bytes_link_layer_address))
return -1;
- }
- /* Update time stamp and ethernet address. */
- clib_memcpy (n->link_layer_address,
- link_layer_address,
- n_bytes_link_layer_address);
+ clib_memcpy (n->link_layer_address,
+ link_layer_address,
+ n_bytes_link_layer_address);
+ }
+ /* Update time stamp and flags. */
n->cpu_time_last_updated = clib_cpu_time_now ();
if (is_static)
n->flags |= IP6_NEIGHBOR_FLAG_STATIC;
else
n->flags |= IP6_NEIGHBOR_FLAG_DYNAMIC;
- ip6_nd_mk_complete(n);
+ adj_nbr_walk_nh6 (sw_if_index,
+ &n->key.ip6_address,
+ ip6_nd_mk_complete_walk, n);
/* Customer(s) waiting for this address to be resolved? */
p = mhash_get (&nm->pending_resolutions_by_address, a);
@@ -507,44 +671,9 @@ vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm,
}
}
- vlib_worker_thread_barrier_release(vm);
return 0;
}
-static void
-ip6_nd_mk_incomplete (ip6_neighbor_t *nbr)
-{
- fib_prefix_t pfx = {
- .fp_len = 128,
- .fp_proto = FIB_PROTOCOL_IP6,
- .fp_addr = {
- .ip6 = nbr->key.ip6_address,
- },
- };
- u32 fib_index;
- ip6_main_t *im;
-
- im = &ip6_main;
- fib_index = im->fib_index_by_sw_if_index[nbr->key.sw_if_index];
-
- /*
- * revert the adj this ND entry sourced to incomplete
- */
- adj_nbr_update_rewrite(nbr->adj_index,
- NULL);
-
- /*
- * remove the FIB entry the ND entry sourced
- */
- fib_table_entry_delete(fib_index, &pfx, FIB_SOURCE_ADJ);
-
- /*
- * Unlock the adj now that the ARP entry is no longer a source
- */
- adj_unlock(nbr->adj_index);
- nbr->adj_index = ADJ_INDEX_INVALID;
-}
-
int
vnet_unset_ip6_ethernet_neighbor (vlib_main_t * vm,
u32 sw_if_index,
@@ -571,8 +700,6 @@ vnet_unset_ip6_ethernet_neighbor (vlib_main_t * vm,
k.ip6_address = a[0];
k.pad = 0;
- vlib_worker_thread_barrier_sync (vm);
-
p = mhash_get (&nm->neighbor_index_by_key, &k);
if (p == 0)
{
@@ -582,12 +709,16 @@ vnet_unset_ip6_ethernet_neighbor (vlib_main_t * vm,
n = pool_elt_at_index (nm->neighbor_pool, p[0]);
- ip6_nd_mk_incomplete(n);
+ adj_nbr_walk_nh6 (sw_if_index,
+ &n->key.ip6_address,
+ ip6_nd_mk_incomplete_walk,
+ n);
+
mhash_unset (&nm->neighbor_index_by_key, &n->key, 0);
+ fib_table_entry_delete_index (n->fib_entry_index, FIB_SOURCE_ADJ);
pool_put (nm->neighbor_pool, n);
out:
- vlib_worker_thread_barrier_release(vm);
return rv;
}
@@ -3725,11 +3856,9 @@ ethernet_ndp_change_mac (vlib_main_t * vm, u32 sw_if_index)
pool_foreach (n, nm->neighbor_pool, ({
if (n->key.sw_if_index == sw_if_index)
{
- if (ADJ_INDEX_INVALID != n->adj_index)
- {
- adj_nbr_update_rewrite(n->adj_index,
- n->link_layer_address);
- }
+ adj_nbr_walk_nh6 (sw_if_index,
+ &n->key.ip6_address,
+ ip6_nd_mk_complete_walk, n);
}
}));
/* *INDENT-ON* */
diff --git a/vnet/vnet/ipsec/ipsec_if.c b/vnet/vnet/ipsec/ipsec_if.c
index 13901efe..77d5d19a 100644
--- a/vnet/vnet/ipsec/ipsec_if.c
+++ b/vnet/vnet/ipsec/ipsec_if.c
@@ -38,24 +38,23 @@ dummy_interface_tx (vlib_main_t * vm,
return frame->n_vectors;
}
+/* *INDENT-OFF* */
VNET_DEVICE_CLASS (ipsec_device_class, static) =
{
-.name = "IPSec",.format_device_name = format_ipsec_name,.format_tx_trace =
- format_ipsec_if_output_trace,.tx_function = dummy_interface_tx,};
-
-static uword
-dummy_set_rewrite (vnet_main_t * vnm,
- u32 sw_if_index,
- u32 l3_type,
- void *dst_address, void *rewrite, uword max_rewrite_bytes)
-{
- return 0;
-}
-
+ .name = "IPSec",
+ .format_device_name = format_ipsec_name,
+ .format_tx_trace = format_ipsec_if_output_trace,
+ .tx_function = dummy_interface_tx,
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
VNET_HW_INTERFACE_CLASS (ipsec_hw_class) =
{
-.name = "IPSec",.set_rewrite = dummy_set_rewrite,};
-
+ .name = "IPSec",
+ .build_rewrite = default_build_rewrite,
+};
+/* *INDENT-ON* */
static int
ipsec_add_del_tunnel_if_internal (vnet_main_t * vnm,
diff --git a/vnet/vnet/l2tp/l2tp.c b/vnet/vnet/l2tp/l2tp.c
index a043483e..13854e2f 100644
--- a/vnet/vnet/l2tp/l2tp.c
+++ b/vnet/vnet/l2tp/l2tp.c
@@ -271,21 +271,6 @@ VNET_DEVICE_CLASS (l2tpv3_device_class,static) = {
};
/* *INDENT-ON* */
-static uword
-dummy_set_rewrite (vnet_main_t * vnm,
- u32 sw_if_index,
- u32 l3_type,
- void *dst_address, void *rewrite, uword max_rewrite_bytes)
-{
- /*
- * Conundrum: packets from tun/tap destined for the tunnel
- * actually have this rewrite applied. Transit packets do not.
- * To make the two cases equivalent, don't generate a
- * rewrite here, build the entire header in the fast path.
- */
- return 0;
-}
-
static u8 *
format_l2tp_header_with_length (u8 * s, va_list * args)
{
@@ -298,7 +283,8 @@ format_l2tp_header_with_length (u8 * s, va_list * args)
VNET_HW_INTERFACE_CLASS (l2tpv3_hw_class) = {
.name = "L2TPV3",
.format_header = format_l2tp_header_with_length,
- .set_rewrite = dummy_set_rewrite,
+ .build_rewrite = default_build_rewrite,
+ .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
};
/* *INDENT-ON* */
diff --git a/vnet/vnet/lisp-cp/lisp_types.c b/vnet/vnet/lisp-cp/lisp_types.c
index 1f527d6e..04b8462e 100644
--- a/vnet/vnet/lisp-cp/lisp_types.c
+++ b/vnet/vnet/lisp-cp/lisp_types.c
@@ -698,7 +698,17 @@ ip_address_cmp (const ip_address_t * ip1, const ip_address_t * ip2)
void
ip_address_copy (ip_address_t * dst, const ip_address_t * src)
{
- clib_memcpy (dst, src, sizeof (ip_address_t));
+ if (IP4 == ip_addr_version (src))
+ {
+ /* don't copy any garbe from the union */
+ memset (dst, 0, sizeof (*dst));
+ dst->ip.v4 = src->ip.v4;
+ dst->version = IP4;
+ }
+ else
+ {
+ clib_memcpy (dst, src, sizeof (ip_address_t));
+ }
}
void
@@ -714,6 +724,26 @@ ip_address_set (ip_address_t * dst, const void *src, u8 version)
ip_addr_version (dst) = version;
}
+void
+ip_address_to_46 (const ip_address_t * addr,
+ ip46_address_t * a, fib_protocol_t * proto)
+{
+ *proto = (IP4 == ip_addr_version (addr) ?
+ FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6);
+ switch (*proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ ip46_address_set_ip4 (a, &addr->ip.v4);
+ break;
+ case FIB_PROTOCOL_IP6:
+ a->ip6 = addr->ip.v6;
+ break;
+ default:
+ ASSERT (0);
+ break;
+ }
+}
+
static void
ip_prefix_normalize_ip4 (ip4_address_t * ip4, u8 preflen)
{
diff --git a/vnet/vnet/lisp-cp/lisp_types.h b/vnet/vnet/lisp-cp/lisp_types.h
index b37315ed..dd7a53ee 100644
--- a/vnet/vnet/lisp-cp/lisp_types.h
+++ b/vnet/vnet/lisp-cp/lisp_types.h
@@ -186,6 +186,8 @@ u16 ip_address_size_to_write (ip_address_t * a);
u16 ip_address_iana_afi (ip_address_t * a);
u8 ip_address_max_len (u8 ver);
u32 ip_address_put (u8 * b, ip_address_t * a);
+void ip_address_to_46 (const ip_address_t * addr,
+ ip46_address_t * a, fib_protocol_t * proto);
/* LISP AFI codes */
typedef enum
diff --git a/vnet/vnet/lisp-gpe/interface.c b/vnet/vnet/lisp-gpe/interface.c
index 0b4f7ed9..15d203ce 100644
--- a/vnet/vnet/lisp-gpe/interface.c
+++ b/vnet/vnet/lisp-gpe/interface.c
@@ -28,6 +28,7 @@
#include <vnet/lisp-gpe/lisp_gpe.h>
#include <vnet/lisp-gpe/lisp_gpe_fwd_entry.h>
#include <vnet/lisp-gpe/lisp_gpe_tenant.h>
+#include <vnet/lisp-gpe/lisp_gpe_adjacency.h>
#include <vnet/adj/adj.h>
#include <vnet/fib/fib_table.h>
#include <vnet/fib/ip4_fib.h>
@@ -167,13 +168,6 @@ VNET_DEVICE_CLASS (lisp_gpe_device_class) = {
};
/* *INDENT-ON* */
-static uword
-dummy_set_rewrite (vnet_main_t * vnm, u32 sw_if_index, u32 l3_type,
- void *dst_address, void *rewrite, uword max_rewrite_bytes)
-{
- return 0;
-}
-
u8 *
format_lisp_gpe_header_with_length (u8 * s, va_list * args)
{
@@ -200,7 +194,9 @@ format_lisp_gpe_header_with_length (u8 * s, va_list * args)
VNET_HW_INTERFACE_CLASS (lisp_gpe_hw_class) = {
.name = "LISP_GPE",
.format_header = format_lisp_gpe_header_with_length,
- .set_rewrite = dummy_set_rewrite,
+ .build_rewrite = default_build_rewrite,
+ .build_rewrite = lisp_gpe_build_rewrite,
+ .update_adjacency = lisp_gpe_update_adjacency,
};
/* *INDENT-ON* */
diff --git a/vnet/vnet/lisp-gpe/lisp_gpe_adjacency.c b/vnet/vnet/lisp-gpe/lisp_gpe_adjacency.c
index d042f116..93a6d013 100644
--- a/vnet/vnet/lisp-gpe/lisp_gpe_adjacency.c
+++ b/vnet/vnet/lisp-gpe/lisp_gpe_adjacency.c
@@ -18,7 +18,8 @@
*
*/
-#include <vnet/dpo/dpo.h>
+#include <vnet/dpo/load_balance.h>
+#include <vnet/lisp-cp/lisp_types.h>
#include <vnet/lisp-gpe/lisp_gpe_sub_interface.h>
#include <vnet/lisp-gpe/lisp_gpe_adjacency.h>
#include <vnet/lisp-gpe/lisp_gpe_tunnel.h>
@@ -106,30 +107,99 @@ lisp_gpe_adj_get_fib_chain_type (const lisp_gpe_adjacency_t * ladj)
return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4);
}
+static void
+ip46_address_to_ip_address (const ip46_address_t * a, ip_address_t * b)
+{
+ if (ip46_address_is_ip4 (a))
+ {
+ memset (b, 0, sizeof (*b));
+ ip_address_set (b, &a->ip4, IP4);
+ }
+ else
+ {
+ ip_address_set (b, &a->ip6, IP6);
+ }
+}
+
/**
* @brief Stack the tunnel's midchain on the IP forwarding chain of the via
*/
static void
-lisp_gpe_adj_stack (lisp_gpe_adjacency_t * ladj)
+lisp_gpe_adj_stack_one (lisp_gpe_adjacency_t * ladj, adj_index_t ai)
{
const lisp_gpe_tunnel_t *lgt;
dpo_id_t tmp = DPO_NULL;
- fib_link_t linkt;
lgt = lisp_gpe_tunnel_get (ladj->tunnel_index);
fib_entry_contribute_forwarding (lgt->fib_entry_index,
lisp_gpe_adj_get_fib_chain_type (ladj),
&tmp);
- FOR_EACH_FIB_LINK (linkt)
- {
- if (FIB_LINK_MPLS == linkt)
- continue;
- adj_nbr_midchain_stack (ladj->adjs[linkt], &tmp);
- }
+ if (DPO_LOAD_BALANCE == tmp.dpoi_type)
+ {
+ /*
+ * post LISP rewrite we will load-balance. However, the LISP encap
+ * is always the same for this adjacency/tunnel and hence the IP/UDP src,dst
+ * hash is always the same result too. So we do that hash now and
+ * stack on the choice.
+ * If the choice is an incomplete adj then we will need a poke when
+ * it becomes complete. This happens since the adj update walk propagates
+ * as far a recursive paths.
+ */
+ const dpo_id_t *choice;
+ load_balance_t *lb;
+ int hash;
+
+ lb = load_balance_get (tmp.dpoi_index);
+
+ if (IP4 == ip_addr_version (&ladj->remote_rloc))
+ {
+ hash = ip4_compute_flow_hash ((ip4_header_t *) adj_get_rewrite (ai),
+ lb->lb_hash_config);
+ }
+ else
+ {
+ hash = ip6_compute_flow_hash ((ip6_header_t *) adj_get_rewrite (ai),
+ lb->lb_hash_config);
+ }
+
+ choice =
+ load_balance_get_bucket_i (lb, hash & lb->lb_n_buckets_minus_1);
+ dpo_copy (&tmp, choice);
+ }
+
+ adj_nbr_midchain_stack (ai, &tmp);
dpo_reset (&tmp);
}
+/**
+ * @brief Call back when restacking all adjacencies on a GRE interface
+ */
+static adj_walk_rc_t
+lisp_gpe_adj_walk_cb (adj_index_t ai, void *ctx)
+{
+ lisp_gpe_adjacency_t *ladj = ctx;
+
+ lisp_gpe_adj_stack_one (ladj, ai);
+
+ return (ADJ_WALK_RC_CONTINUE);
+}
+
+static void
+lisp_gpe_adj_stack (lisp_gpe_adjacency_t * ladj)
+{
+ fib_protocol_t nh_proto;
+ ip46_address_t nh;
+
+ ip_address_to_46 (&ladj->remote_rloc, &nh, &nh_proto);
+
+ /*
+ * walk all the adjacencies on th lisp interface and restack them
+ */
+ adj_nbr_walk_nh (ladj->sw_if_index,
+ nh_proto, &nh, lisp_gpe_adj_walk_cb, ladj);
+}
+
static lisp_gpe_next_protocol_e
lisp_gpe_adj_proto_from_fib_link_type (fib_link_t linkt)
{
@@ -157,10 +227,59 @@ lisp_gpe_fixup (vlib_main_t * vm, ip_adjacency_t * adj, vlib_buffer_t * b)
ip_udp_fixup_one (vm, b, is_v4_packet (vlib_buffer_get_current (b)));
}
+/**
+ * @brief The LISP-GPE interface registered function to update, i.e.
+ * provide an rewrite string for, an adjacency.
+ */
+void
+lisp_gpe_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, adj_index_t ai)
+{
+ const lisp_gpe_tunnel_t *lgt;
+ lisp_gpe_adjacency_t *ladj;
+ ip_adjacency_t *adj;
+ ip_address_t rloc;
+ vnet_link_t linkt;
+ index_t lai;
+
+ adj = adj_get (ai);
+ ip46_address_to_ip_address (&adj->sub_type.nbr.next_hop, &rloc);
+
+ /*
+ * find an existing or create a new adj
+ */
+ lai = lisp_adj_find (&rloc, sw_if_index);
+
+ ASSERT (INDEX_INVALID != lai);
+
+ ladj = pool_elt_at_index (lisp_adj_pool, lai);
+ lgt = lisp_gpe_tunnel_get (ladj->tunnel_index);
+ linkt = adj_get_link_type (ai);
+
+ adj_nbr_midchain_update_rewrite
+ (ai, lisp_gpe_fixup,
+ (VNET_LINK_ETHERNET == linkt ?
+ ADJ_MIDCHAIN_FLAG_NO_COUNT :
+ ADJ_MIDCHAIN_FLAG_NONE),
+ lisp_gpe_tunnel_build_rewrite
+ (lgt, ladj, lisp_gpe_adj_proto_from_fib_link_type (linkt)));
+
+ lisp_gpe_adj_stack_one (ladj, ai);
+}
+
+u8 *
+lisp_gpe_build_rewrite (vnet_main_t * vnm,
+ u32 sw_if_index,
+ vnet_link_t link_type, const void *dst_address)
+{
+ ASSERT (0);
+ return (NULL);
+}
+
index_t
lisp_gpe_adjacency_find_or_create_and_lock (const locator_pair_t * pair,
u32 overlay_table_id, u32 vni)
{
+ const lisp_gpe_sub_interface_t *l3s;
const lisp_gpe_tunnel_t *lgt;
lisp_gpe_adjacency_t *ladj;
index_t lai, l3si;
@@ -171,29 +290,24 @@ lisp_gpe_adjacency_find_or_create_and_lock (const locator_pair_t * pair,
l3si = lisp_gpe_sub_interface_find_or_create_and_lock (&pair->lcl_loc,
overlay_table_id,
vni);
+ l3s = lisp_gpe_sub_interface_get (l3si);
/*
* find an existing or create a new adj
*/
- lai = lisp_adj_find (&pair->rmt_loc, l3si);
+ lai = lisp_adj_find (&pair->rmt_loc, l3s->sw_if_index);
if (INDEX_INVALID == lai)
{
- const lisp_gpe_sub_interface_t *l3s;
- u8 *rewrite = NULL;
- fib_link_t linkt;
- fib_prefix_t nh;
pool_get (lisp_adj_pool, ladj);
memset (ladj, 0, sizeof (*ladj));
lai = (ladj - lisp_adj_pool);
- ladj->remote_rloc = pair->rmt_loc;
+ ip_address_copy (&ladj->remote_rloc, &pair->rmt_loc);
ladj->vni = vni;
/* transfer the lock to the adj */
ladj->lisp_l3_sub_index = l3si;
-
- l3s = lisp_gpe_sub_interface_get (l3si);
ladj->sw_if_index = l3s->sw_if_index;
/* if vni is non-default */
@@ -219,38 +333,8 @@ lisp_gpe_adjacency_find_or_create_and_lock (const locator_pair_t * pair,
ladj->fib_entry_child_index = fib_entry_child_add (lgt->fib_entry_index,
FIB_NODE_TYPE_LISP_ADJ,
lai);
- ip_address_to_fib_prefix (&pair->rmt_loc, &nh);
-
- /*
- * construct and stack the FIB midchain adjacencies
- */
- FOR_EACH_FIB_LINK (linkt)
- {
- if (FIB_LINK_MPLS == linkt)
- continue;
-
- ladj->adjs[linkt] = adj_nbr_add_or_lock (nh.fp_proto,
- linkt,
- &nh.fp_addr,
- ladj->sw_if_index);
-
- rewrite =
- lisp_gpe_tunnel_build_rewrite (lgt, ladj,
- lisp_gpe_adj_proto_from_fib_link_type
- (linkt));
- adj_nbr_midchain_update_rewrite (ladj->adjs[linkt],
- lisp_gpe_fixup,
- (FIB_LINK_ETHERNET == linkt ?
- ADJ_MIDCHAIN_FLAG_NO_COUNT :
- ADJ_MIDCHAIN_FLAG_NONE), rewrite);
-
- vec_free (rewrite);
- }
-
- lisp_gpe_adj_stack (ladj);
-
- lisp_adj_insert (&ladj->remote_rloc, ladj->lisp_l3_sub_index, lai);
+ lisp_adj_insert (&ladj->remote_rloc, ladj->sw_if_index, lai);
}
else
{
@@ -278,15 +362,21 @@ lisp_gpe_adjacency_from_fib_node (const fib_node_t * node)
static void
lisp_gpe_adjacency_last_lock_gone (lisp_gpe_adjacency_t * ladj)
{
+ const lisp_gpe_tunnel_t *lgt;
+
/*
* no children so we are not counting locks. no-op.
* at least not counting
*/
- lisp_adj_remove (&ladj->remote_rloc, ladj->lisp_l3_sub_index);
+ lisp_adj_remove (&ladj->remote_rloc, ladj->sw_if_index);
/*
* unlock the resources this adj holds
*/
+ lgt = lisp_gpe_tunnel_get (ladj->tunnel_index);
+
+ fib_entry_child_remove (lgt->fib_entry_index, ladj->fib_entry_child_index);
+
lisp_gpe_tunnel_unlock (ladj->tunnel_index);
lisp_gpe_sub_interface_unlock (ladj->lisp_l3_sub_index);
@@ -375,9 +465,9 @@ format_lisp_gpe_adjacency (u8 * s, va_list * args)
s = format (s, " %U\n",
format_lisp_gpe_tunnel,
lisp_gpe_tunnel_get (ladj->tunnel_index));
- s = format (s, " FIB adjacencies: IPV4:%d IPv6:%d L2:%d\n",
- ladj->adjs[FIB_LINK_IP4],
- ladj->adjs[FIB_LINK_IP6], ladj->adjs[FIB_LINK_ETHERNET]);
+ /* s = format (s, " FIB adjacencies: IPV4:%d IPv6:%d L2:%d\n", */
+ /* ladj->adjs[FIB_LINK_IP4], */
+ /* ladj->adjs[FIB_LINK_IP6], ladj->adjs[FIB_LINK_ETHERNET]); */
}
else
{
diff --git a/vnet/vnet/lisp-gpe/lisp_gpe_adjacency.h b/vnet/vnet/lisp-gpe/lisp_gpe_adjacency.h
index f6a66cdd..adc3acae 100644
--- a/vnet/vnet/lisp-gpe/lisp_gpe_adjacency.h
+++ b/vnet/vnet/lisp-gpe/lisp_gpe_adjacency.h
@@ -80,12 +80,6 @@ typedef struct lisp_gpe_adjacency_t_
u32 tunnel_index;
/**
- * Per-link-type FIB adjacencies contributed.
- * These will be used as a result of a FIB lookup.
- */
- adj_index_t adjs[FIB_LINK_NUM];
-
- /**
* This adjacency is a child of the FIB entry to reach the RLOC.
* This is so when the reachability of that RLOC changes, we can restack
* the FIB adjacnecies.
@@ -112,6 +106,14 @@ extern void lisp_gpe_adjacency_unlock (index_t l3si);
extern const lisp_gpe_adjacency_t *lisp_gpe_adjacency_get (index_t l3si);
+extern void lisp_gpe_update_adjacency (vnet_main_t * vnm,
+ u32 sw_if_index, adj_index_t ai);
+extern u8 *lisp_gpe_build_rewrite (vnet_main_t * vnm,
+ u32 sw_if_index,
+ vnet_link_t link_type,
+ const void *dst_address);
+
+
/**
* @brief Flags for displaying the adjacency
*/
diff --git a/vnet/vnet/lisp-gpe/lisp_gpe_fwd_entry.c b/vnet/vnet/lisp-gpe/lisp_gpe_fwd_entry.c
index 3bd83dca..75db97d9 100644
--- a/vnet/vnet/lisp-gpe/lisp_gpe_fwd_entry.c
+++ b/vnet/vnet/lisp-gpe/lisp_gpe_fwd_entry.c
@@ -190,26 +190,6 @@ ip_src_fib_add_route_w_dpo (u32 src_fib_index,
}
}
-static void
-ip_address_to_46 (const ip_address_t * addr,
- ip46_address_t * a, fib_protocol_t * proto)
-{
- *proto = (IP4 == ip_addr_version (addr) ?
- FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6);
- switch (*proto)
- {
- case FIB_PROTOCOL_IP4:
- a->ip4 = addr->ip.v4;
- break;
- case FIB_PROTOCOL_IP6:
- a->ip6 = addr->ip.v6;
- break;
- default:
- ASSERT (0);
- break;
- }
-}
-
static fib_route_path_t *
lisp_gpe_mk_fib_paths (const lisp_fwd_path_t * paths)
{
diff --git a/vnet/vnet/llc/llc.c b/vnet/vnet/llc/llc.c
index 0496e8d4..975207b6 100644
--- a/vnet/vnet/llc/llc.c
+++ b/vnet/vnet/llc/llc.c
@@ -155,31 +155,30 @@ unformat_llc_header (unformat_input_t * input, va_list * args)
return 1;
}
-static uword
-llc_set_rewrite (vnet_main_t * vnm,
- u32 sw_if_index,
- u32 l3_type,
- void *dst_address, void *rewrite, uword max_rewrite_bytes)
+static u8 *
+llc_build_rewrite (vnet_main_t * vnm,
+ u32 sw_if_index,
+ vnet_link_t link_type, const void *dst_address)
{
- llc_header_t *h = rewrite;
+ llc_header_t *h;
+ u8 *rewrite = NULL;
llc_protocol_t protocol;
- if (max_rewrite_bytes < sizeof (h[0]))
- return 0;
-
- switch (l3_type)
+ switch (link_type)
{
-#define _(a,b) case VNET_L3_PACKET_TYPE_##a: protocol = LLC_PROTOCOL_##b; break
+#define _(a,b) case VNET_LINK_##a: protocol = LLC_PROTOCOL_##b; break
_(IP4, ip4);
#undef _
default:
- return 0;
+ return (NULL);
}
+ vec_validate (rewrite, sizeof (*h) - 1);
+ h = (llc_header_t *) rewrite;
h->src_sap = h->dst_sap = protocol;
h->control = 0x3;
- return sizeof (h[0]);
+ return (rewrite);
}
/* *INDENT-OFF* */
@@ -187,7 +186,7 @@ VNET_HW_INTERFACE_CLASS (llc_hw_interface_class) = {
.name = "LLC",
.format_header = format_llc_header_with_length,
.unformat_header = unformat_llc_header,
- .set_rewrite = llc_set_rewrite,
+ .build_rewrite = llc_build_rewrite,
};
/* *INDENT-ON* */
diff --git a/vnet/vnet/mpls/interface.c b/vnet/vnet/mpls/interface.c
index dd40fc29..553d7fb5 100644
--- a/vnet/vnet/mpls/interface.c
+++ b/vnet/vnet/mpls/interface.c
@@ -23,22 +23,6 @@
#include <vnet/adj/adj_midchain.h>
#include <vnet/dpo/classify_dpo.h>
-static uword mpls_gre_set_rewrite (vnet_main_t * vnm,
- u32 sw_if_index,
- u32 l3_type,
- void * dst_address,
- void * rewrite,
- uword max_rewrite_bytes)
-{
- /*
- * Conundrum: packets from tun/tap destined for the tunnel
- * actually have this rewrite applied. Transit packets do not.
- * To make the two cases equivalent, don't generate a
- * rewrite here, build the entire header in the fast path.
- */
- return 0;
-}
-
/* manually added to the interface output node */
#define MPLS_GRE_OUTPUT_NEXT_POST_REWRITE 1
@@ -271,26 +255,10 @@ VNET_HW_INTERFACE_CLASS (mpls_gre_hw_interface_class) = {
#if 0
.unformat_header = unformat_mpls_gre_header,
#endif
- .set_rewrite = mpls_gre_set_rewrite,
+ .build_rewrite = default_build_rewrite,
+ .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
};
-
-static uword mpls_eth_set_rewrite (vnet_main_t * vnm,
- u32 sw_if_index,
- u32 l3_type,
- void * dst_address,
- void * rewrite,
- uword max_rewrite_bytes)
-{
- /*
- * Conundrum: packets from tun/tap destined for the tunnel
- * actually have this rewrite applied. Transit packets do not.
- * To make the two cases equivalent, don't generate a
- * rewrite here, build the entire header in the fast path.
- */
- return 0;
-}
-
/* manually added to the interface output node */
#define MPLS_ETH_OUTPUT_NEXT_OUTPUT 1
@@ -525,7 +493,8 @@ VNET_HW_INTERFACE_CLASS (mpls_eth_hw_interface_class) = {
#if 0
.unformat_header = unformat_mpls_eth_header,
#endif
- .set_rewrite = mpls_eth_set_rewrite,
+ .build_rewrite = default_build_rewrite,
+ .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
};
/**
@@ -609,7 +578,6 @@ mpls_sw_interface_enable_disable (mpls_main_t * mm,
u32 sw_if_index,
u8 is_enable)
{
- mpls_interface_state_change_callback_t *callback;
vlib_main_t * vm = vlib_get_main();
ip_config_main_t * cm = &mm->feature_config_mains[VNET_IP_RX_UNICAST_FEAT];
vnet_config_main_t * vcm = &cm->config_main;
@@ -660,14 +628,6 @@ mpls_sw_interface_enable_disable (mpls_main_t * mm,
/* # bytes of config data */ 0);
cm->config_index_by_sw_if_index[sw_if_index] = ci;
-
- /*
- * notify all interested clients of the change of state.
- */
- vec_foreach(callback, mm->mpls_interface_state_change_callbacks)
- {
- (*callback)(sw_if_index, is_enable);
- }
}
static mpls_gre_tunnel_t *
@@ -1578,7 +1538,7 @@ int vnet_mpls_ethernet_add_del_tunnel (u8 *dst,
vnet_rewrite_for_sw_interface
(vnm,
- VNET_L3_PACKET_TYPE_MPLS_UNICAST,
+ VNET_LINK_MPLS,
tx_sw_if_index,
ip4_rewrite_node.index,
tp->tunnel_dst,
@@ -1763,7 +1723,7 @@ int vnet_mpls_policy_tunnel_add_rewrite (mpls_main_t * mm,
/* Build L2 encap */
vnet_rewrite_for_sw_interface
(mm->vnet_main,
- VNET_L3_PACKET_TYPE_MPLS_UNICAST,
+ VNET_LINK_MPLS,
t->tx_sw_if_index,
mpls_policy_encap_node.index,
t->tunnel_dst,
diff --git a/vnet/vnet/mpls/mpls.h b/vnet/vnet/mpls/mpls.h
index e4737709..35755333 100644
--- a/vnet/vnet/mpls/mpls.h
+++ b/vnet/vnet/mpls/mpls.h
@@ -163,9 +163,6 @@ typedef struct {
/* IP4 enabled count by software interface */
u8 * mpls_enabled_by_sw_if_index;
- /* Functions to call when MPLS state on an interface changes. */
- mpls_interface_state_change_callback_t * mpls_interface_state_change_callbacks;
-
/* convenience */
vlib_main_t * vlib_main;
vnet_main_t * vnet_main;
diff --git a/vnet/vnet/mpls/mpls_output.c b/vnet/vnet/mpls/mpls_output.c
index 932fcb8d..299e1dd2 100644
--- a/vnet/vnet/mpls/mpls_output.c
+++ b/vnet/vnet/mpls/mpls_output.c
@@ -39,7 +39,7 @@ format_mpls_output_trace (u8 * s, va_list * args)
s = format (s, "adj-idx %d : %U flow hash: 0x%08x",
t->adj_index,
- format_ip_adjacency, vnm, t->adj_index, FORMAT_IP_ADJACENCY_NONE,
+ format_ip_adjacency, t->adj_index, FORMAT_IP_ADJACENCY_NONE,
t->flow_hash);
s = format (s, "\n%U%U",
format_white_space, indent,
diff --git a/vnet/vnet/pg/stream.c b/vnet/vnet/pg/stream.c
index b66fb742..20f54a74 100644
--- a/vnet/vnet/pg/stream.c
+++ b/vnet/vnet/pg/stream.c
@@ -121,25 +121,25 @@ VNET_DEVICE_CLASS (pg_dev_class) = {
};
/* *INDENT-ON* */
-static uword
-pg_set_rewrite (vnet_main_t * vnm,
- u32 sw_if_index,
- u32 l3_type,
- void *dst_address, void *rewrite, uword max_rewrite_bytes)
+static u8 *
+pg_build_rewrite (vnet_main_t * vnm,
+ u32 sw_if_index,
+ vnet_link_t link_type, const void *dst_address)
{
- u16 *h = rewrite;
+ u8 *rewrite = NULL;
+ u16 *h;
- if (max_rewrite_bytes < sizeof (h[0]))
- return 0;
+ vec_validate (rewrite, sizeof (*h) - 1);
+ h = (u16 *) rewrite;
+ h[0] = clib_host_to_net_u16 (vnet_link_to_l3_proto (link_type));
- h[0] = clib_host_to_net_u16 (l3_type);
- return sizeof (h[0]);
+ return (rewrite);
}
/* *INDENT-OFF* */
VNET_HW_INTERFACE_CLASS (pg_interface_class,static) = {
.name = "Packet generator",
- .set_rewrite = pg_set_rewrite,
+ .build_rewrite = pg_build_rewrite,
};
/* *INDENT-ON* */
diff --git a/vnet/vnet/ppp/ppp.c b/vnet/vnet/ppp/ppp.c
index 427fd7b7..a0eefbad 100644
--- a/vnet/vnet/ppp/ppp.c
+++ b/vnet/vnet/ppp/ppp.c
@@ -168,35 +168,33 @@ unformat_ppp_header (unformat_input_t * input, va_list * args)
return 1;
}
-static uword
-ppp_set_rewrite (vnet_main_t * vnm,
- u32 sw_if_index,
- u32 l3_type,
- void *dst_address, void *rewrite, uword max_rewrite_bytes)
+static u8 *
+ppp_build_rewrite (vnet_main_t * vnm,
+ u32 sw_if_index,
+ vnet_link_t link_type, const void *dst_hw_address)
{
- ppp_header_t *h = rewrite;
+ ppp_header_t *h;
+ u8 *rewrite = NULL;
ppp_protocol_t protocol;
- if (max_rewrite_bytes < sizeof (h[0]))
- return 0;
-
- switch (l3_type)
+ switch (link_type)
{
-#define _(a,b) case VNET_L3_PACKET_TYPE_##a: protocol = PPP_PROTOCOL_##b; break
+#define _(a,b) case VNET_LINK_##a: protocol = PPP_PROTOCOL_##b; break
_(IP4, ip4);
_(IP6, ip6);
- _(MPLS_UNICAST, mpls_unicast);
- _(MPLS_MULTICAST, mpls_multicast);
+ _(MPLS, mpls_unicast);
#undef _
default:
- return 0;
+ return (NULL);
}
+ vec_validate (rewrite, sizeof (*h) - 1);
+ h = (ppp_header_t *) rewrite;
h->address = 0xff;
h->control = 0x03;
h->protocol = clib_host_to_net_u16 (protocol);
- return sizeof (h[0]);
+ return (rewrite);
}
/* *INDENT-OFF* */
@@ -204,7 +202,8 @@ VNET_HW_INTERFACE_CLASS (ppp_hw_interface_class) = {
.name = "PPP",
.format_header = format_ppp_header_with_length,
.unformat_header = unformat_ppp_header,
- .set_rewrite = ppp_set_rewrite,
+ .build_rewrite = ppp_build_rewrite,
+ .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
};
/* *INDENT-ON* */
diff --git a/vnet/vnet/replication.c b/vnet/vnet/replication.c
index 571be7d8..561c86cd 100644
--- a/vnet/vnet/replication.c
+++ b/vnet/vnet/replication.c
@@ -225,6 +225,12 @@ replication_recycle_callback (vlib_main_t * vm, vlib_buffer_free_list_t * fl)
/* Mark that this buffer was just recycled */
b0->flags |= VLIB_BUFFER_IS_RECYCLED;
+#if (CLIB_DEBUG > 0)
+#if DPDK == 0
+ vlib_buffer_set_known_state (vm, bi0, VLIB_BUFFER_KNOWN_ALLOCATED);
+#endif
+#endif
+
/* If buffer is traced, mark frame as traced */
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
f->flags |= VLIB_FRAME_TRACE;
diff --git a/vnet/vnet/rewrite.c b/vnet/vnet/rewrite.c
index 42d0688a..53d548bc 100644
--- a/vnet/vnet/rewrite.c
+++ b/vnet/vnet/rewrite.c
@@ -184,38 +184,51 @@ done:
return error == 0;
}
+u32
+vnet_tx_node_index_for_sw_interface (vnet_main_t * vnm, u32 sw_if_index)
+{
+ vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
+ return (hw->output_node_index);
+}
+
+void
+vnet_rewrite_init (vnet_main_t * vnm,
+ u32 sw_if_index,
+ u32 this_node, u32 next_node, vnet_rewrite_header_t * rw)
+{
+ rw->sw_if_index = sw_if_index;
+ rw->node_index = this_node;
+ rw->next_index = vlib_node_add_next (vnm->vlib_main, this_node, next_node);
+ rw->max_l3_packet_bytes =
+ vnet_sw_interface_get_mtu (vnm, sw_if_index, VLIB_TX);
+}
+
void
vnet_rewrite_for_sw_interface (vnet_main_t * vnm,
- vnet_l3_packet_type_t packet_type,
+ vnet_link_t link_type,
u32 sw_if_index,
u32 node_index,
void *dst_address,
vnet_rewrite_header_t * rw,
u32 max_rewrite_bytes)
{
+
vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
vnet_hw_interface_class_t *hc =
vnet_get_hw_interface_class (vnm, hw->hw_class_index);
- static u8 *rw_tmp = 0;
- uword n_rw_tmp;
+ u8 *rewrite = NULL;
- rw->sw_if_index = sw_if_index;
- rw->node_index = node_index;
- rw->next_index =
- vlib_node_add_next (vnm->vlib_main, node_index, hw->output_node_index);
- rw->max_l3_packet_bytes = hw->max_l3_packet_bytes[VLIB_TX];
-
- ASSERT (max_rewrite_bytes > 0);
- vec_reset_length (rw_tmp);
- vec_validate (rw_tmp, max_rewrite_bytes - 1);
-
- ASSERT (hc->set_rewrite);
- n_rw_tmp =
- hc->set_rewrite (vnm, sw_if_index, packet_type, dst_address, rw_tmp,
- max_rewrite_bytes);
-
- ASSERT (n_rw_tmp < max_rewrite_bytes);
- vnet_rewrite_set_data_internal (rw, max_rewrite_bytes, rw_tmp, n_rw_tmp);
+ vnet_rewrite_init (vnm, sw_if_index, node_index,
+ vnet_tx_node_index_for_sw_interface (vnm, sw_if_index),
+ rw);
+
+ ASSERT (hc->build_rewrite);
+ rewrite = hc->build_rewrite (vnm, sw_if_index, link_type, dst_address);
+
+ ASSERT (vec_len (rewrite) < max_rewrite_bytes);
+ vnet_rewrite_set_data_internal (rw, max_rewrite_bytes, rewrite,
+ vec_len (rewrite));
+ vec_free (rewrite);
}
void
@@ -280,6 +293,33 @@ unserialize_vnet_rewrite (serialize_main_t * m, va_list * va)
rw->data_bytes);
}
+u8 *
+vnet_build_rewrite_for_sw_interface (vnet_main_t * vnm,
+ u32 sw_if_index,
+ vnet_link_t link_type,
+ const void *dst_address)
+{
+ vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
+ vnet_hw_interface_class_t *hc =
+ vnet_get_hw_interface_class (vnm, hw->hw_class_index);
+
+ ASSERT (hc->build_rewrite);
+ return (hc->build_rewrite (vnm, sw_if_index, link_type, dst_address));
+}
+
+
+void
+vnet_update_adjacency_for_sw_interface (vnet_main_t * vnm,
+ u32 sw_if_index, u32 ai)
+{
+ vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
+ vnet_hw_interface_class_t *hc =
+ vnet_get_hw_interface_class (vnm, hw->hw_class_index);
+
+ ASSERT (hc->update_adjacency);
+ hc->update_adjacency (vnm, sw_if_index, ai);
+}
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/vnet/vnet/rewrite.h b/vnet/vnet/rewrite.h
index fb800da7..00c1efbd 100644
--- a/vnet/vnet/rewrite.h
+++ b/vnet/vnet/rewrite.h
@@ -89,6 +89,16 @@ struct { \
}
always_inline void
+vnet_rewrite_clear_data_internal (vnet_rewrite_header_t * rw, int max_size)
+{
+ /* Sanity check values carefully for this memset operation */
+ ASSERT ((max_size > 0) && (max_size < VLIB_BUFFER_PRE_DATA_SIZE));
+
+ rw->data_bytes = 0;
+ memset (rw->data, 0xfe, max_size);
+}
+
+always_inline void
vnet_rewrite_set_data_internal (vnet_rewrite_header_t * rw,
int max_size, void *data, int data_bytes)
{
@@ -252,20 +262,29 @@ _vnet_rewrite_two_headers (vnet_rewrite_header_t * h0,
(most_likely_size))
#define VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST ((void *) 0)
+/** Deprecated */
void vnet_rewrite_for_sw_interface (struct vnet_main_t *vnm,
- vnet_l3_packet_type_t packet_type,
+ vnet_link_t packet_type,
u32 sw_if_index,
u32 node_index,
void *dst_address,
vnet_rewrite_header_t * rw,
u32 max_rewrite_bytes);
-void vnet_rewrite_for_tunnel (struct vnet_main_t *vnm,
- u32 tx_sw_if_index,
- u32 rewrite_node_index,
- u32 post_rewrite_node_index,
- vnet_rewrite_header_t * rw,
- u8 * rewrite_data, u32 rewrite_length);
+u32 vnet_tx_node_index_for_sw_interface (struct vnet_main_t *vnm,
+ u32 sw_if_index);
+
+void vnet_rewrite_init (struct vnet_main_t *vnm,
+ u32 sw_if_index,
+ u32 this_node,
+ u32 next_node, vnet_rewrite_header_t * rw);
+
+u8 *vnet_build_rewrite_for_sw_interface (struct vnet_main_t *vnm,
+ u32 sw_if_index,
+ vnet_link_t packet_type,
+ const void *dst_address);
+void vnet_update_adjacency_for_sw_interface (struct vnet_main_t *vnm,
+ u32 sw_if_index, u32 ai);
/* Parser for unformat header & rewrite string. */
unformat_function_t unformat_vnet_rewrite;
diff --git a/vnet/vnet/srp/interface.c b/vnet/vnet/srp/interface.c
index 46c091d4..d427cc3c 100644
--- a/vnet/vnet/srp/interface.c
+++ b/vnet/vnet/srp/interface.c
@@ -41,34 +41,33 @@
#include <vnet/pg/pg.h>
#include <vnet/srp/srp.h>
-static uword srp_set_rewrite (vnet_main_t * vnm,
- u32 sw_if_index,
- u32 l3_type,
- void * dst_address,
- void * rewrite,
- uword max_rewrite_bytes)
+static u8*
+srp_build_rewrite (vnet_main_t * vnm,
+ u32 sw_if_index,
+ vnet_link_t link_type,
+ const void * dst_address)
{
vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
srp_main_t * sm = &srp_main;
- srp_and_ethernet_header_t * h = rewrite;
+ srp_and_ethernet_header_t * h;
+ u8* rewrite = NULL;
u16 type;
uword n_bytes = sizeof (h[0]);
- if (n_bytes > max_rewrite_bytes)
- return 0;
-
- switch (l3_type) {
-#define _(a,b) case VNET_L3_PACKET_TYPE_##a: type = ETHERNET_TYPE_##b; break
+ switch (link_type) {
+#define _(a,b) case VNET_LINK_##a: type = ETHERNET_TYPE_##b; break
_ (IP4, IP4);
_ (IP6, IP6);
- _ (MPLS_UNICAST, MPLS_UNICAST);
- _ (MPLS_MULTICAST, MPLS_MULTICAST);
+ _ (MPLS, MPLS_UNICAST);
_ (ARP, ARP);
#undef _
default:
- return 0;
+ return (NULL);
}
+ vec_validate(rewrite, n_bytes-1);
+ h = (srp_and_ethernet_header_t *)rewrite;
+
clib_memcpy (h->ethernet.src_address, hw->hw_address, sizeof (h->ethernet.src_address));
if (dst_address)
clib_memcpy (h->ethernet.dst_address, dst_address, sizeof (h->ethernet.dst_address));
@@ -82,7 +81,7 @@ static uword srp_set_rewrite (vnet_main_t * vnm,
h->srp.ttl = sm->default_data_ttl;
srp_header_compute_parity (&h->srp);
- return n_bytes;
+ return (rewrite);
}
static void srp_register_interface_helper (u32 * hw_if_indices_by_side, u32 redistribute);
@@ -293,7 +292,8 @@ VNET_HW_INTERFACE_CLASS (srp_hw_interface_class) = {
.format_device = format_srp_device,
.unformat_hw_address = unformat_ethernet_address,
.unformat_header = unformat_srp_header,
- .set_rewrite = srp_set_rewrite,
+ .build_rewrite = srp_build_rewrite,
+ .update_adjacency = ethernet_update_adjacency,
.is_valid_class_for_interface = srp_is_valid_class_for_interface,
.hw_class_change = srp_interface_hw_class_change,
};
diff --git a/vnet/vnet/unix/tapcli.c b/vnet/vnet/unix/tapcli.c
index 0be68a9e..8a5d47dd 100644
--- a/vnet/vnet/unix/tapcli.c
+++ b/vnet/vnet/unix/tapcli.c
@@ -585,6 +585,7 @@ tapcli_nopunt_frame (vlib_main_t * vm,
VNET_HW_INTERFACE_CLASS (tapcli_interface_class,static) = {
.name = "tapcli",
+ .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
};
/**
diff --git a/vnet/vnet/unix/tuntap.c b/vnet/vnet/unix/tuntap.c
index 48d5dc23..89fd1dcf 100644
--- a/vnet/vnet/unix/tuntap.c
+++ b/vnet/vnet/unix/tuntap.c
@@ -946,6 +946,7 @@ tuntap_nopunt_frame (vlib_main_t * vm,
VNET_HW_INTERFACE_CLASS (tuntap_interface_class,static) = {
.name = "tuntap",
+ .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
};
/**
diff --git a/vnet/vnet/vxlan-gpe/vxlan_gpe.c b/vnet/vnet/vxlan-gpe/vxlan_gpe.c
index fae481c3..979864e9 100644
--- a/vnet/vnet/vxlan-gpe/vxlan_gpe.c
+++ b/vnet/vnet/vxlan-gpe/vxlan_gpe.c
@@ -121,17 +121,6 @@ VNET_DEVICE_CLASS (vxlan_gpe_device_class,static) = {
.admin_up_down_function = vxlan_gpe_interface_admin_up_down,
};
-static uword dummy_set_rewrite (vnet_main_t * vnm,
- u32 sw_if_index,
- u32 l3_type,
- void * dst_address,
- void * rewrite,
- uword max_rewrite_bytes)
-{
- return 0;
-}
-
-
/**
* @brief Formatting function for tracing VXLAN GPE with length
*
@@ -151,7 +140,8 @@ static u8 * format_vxlan_gpe_header_with_length (u8 * s, va_list * args)
VNET_HW_INTERFACE_CLASS (vxlan_gpe_hw_class) = {
.name = "VXLAN_GPE",
.format_header = format_vxlan_gpe_header_with_length,
- .set_rewrite = dummy_set_rewrite,
+ .build_rewrite = default_build_rewrite,
+ .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
};
diff --git a/vnet/vnet/vxlan/vxlan.c b/vnet/vnet/vxlan/vxlan.c
index da359a8d..5b521dbe 100644
--- a/vnet/vnet/vxlan/vxlan.c
+++ b/vnet/vnet/vxlan/vxlan.c
@@ -103,16 +103,6 @@ VNET_DEVICE_CLASS (vxlan_device_class,static) = {
.admin_up_down_function = vxlan_interface_admin_up_down,
};
-static uword dummy_set_rewrite (vnet_main_t * vnm,
- u32 sw_if_index,
- u32 l3_type,
- void * dst_address,
- void * rewrite,
- uword max_rewrite_bytes)
-{
- return 0;
-}
-
static u8 * format_vxlan_header_with_length (u8 * s, va_list * args)
{
u32 dev_instance = va_arg (*args, u32);
@@ -123,7 +113,7 @@ static u8 * format_vxlan_header_with_length (u8 * s, va_list * args)
VNET_HW_INTERFACE_CLASS (vxlan_hw_class) = {
.name = "VXLAN",
.format_header = format_vxlan_header_with_length,
- .set_rewrite = dummy_set_rewrite,
+ .build_rewrite = default_build_rewrite,
};
#define foreach_copy_field \