From fa5d19829759cef45cc34efe844d9471f5a1fc61 Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Mon, 20 Feb 2017 14:19:51 -0800 Subject: Adjacency layout change and move to vnet/adj Change-Id: I03195a86c69f84a301051c6b3ab64456bbf28645 Signed-off-by: Neale Ranns --- src/vnet/adj/adj.c | 3 +- src/vnet/adj/adj.h | 220 +++++++++++++++++++++++++++- src/vnet/adj/adj_midchain.c | 25 ++-- src/vnet/adj/adj_midchain.h | 22 +-- src/vnet/adj/adj_nbr.c | 8 +- src/vnet/adj/rewrite.c | 234 +++++++++++++++++++++++++++++ src/vnet/adj/rewrite.h | 350 ++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 824 insertions(+), 38 deletions(-) create mode 100644 src/vnet/adj/rewrite.c create mode 100644 src/vnet/adj/rewrite.h (limited to 'src/vnet/adj') diff --git a/src/vnet/adj/adj.c b/src/vnet/adj/adj.c index c1d036a0e8b..7cf9e9d081d 100644 --- a/src/vnet/adj/adj.c +++ b/src/vnet/adj/adj.c @@ -48,7 +48,7 @@ adj_alloc (fib_protocol_t proto) { ip_adjacency_t *adj; - pool_get(adj_pool, adj); + pool_get_aligned(adj_pool, adj, CLIB_CACHE_LINE_BYTES); adj_poison(adj); @@ -58,7 +58,6 @@ adj_alloc (fib_protocol_t proto) adj_get_index(adj)); adj->rewrite_header.sw_if_index = ~0; - adj->n_adj = 1; adj->lookup_next_index = 0; fib_node_init(&adj->ia_node, diff --git a/src/vnet/adj/adj.h b/src/vnet/adj/adj.h index 271fdbc6114..af7730f7086 100644 --- a/src/vnet/adj/adj.h +++ b/src/vnet/adj/adj.h @@ -24,7 +24,6 @@ * address in the ARP packet. * UNSHARED. Only one per-interface. * - midchain: a nighbour adj on a virtual/tunnel interface. - * - rewrite: an adj with no key, but with a rewrite string. * * The API to create and update the adjacency is very sub-type specific. This * is intentional as it encourages the user to carefully consider which adjacency @@ -42,10 +41,227 @@ #ifndef __ADJ_H__ #define __ADJ_H__ -#include #include #include #include +#include + +/** @brief Common (IP4/IP6) next index stored in adjacency. */ +typedef enum +{ + /** Adjacency to drop this packet. */ + IP_LOOKUP_NEXT_DROP, + /** Adjacency to punt this packet. */ + IP_LOOKUP_NEXT_PUNT, + + /** This packet is for one of our own IP addresses. */ + IP_LOOKUP_NEXT_LOCAL, + + /** This packet matches an "incomplete adjacency" and packets + need to be passed to ARP to find rewrite string for + this destination. */ + IP_LOOKUP_NEXT_ARP, + + /** This packet matches an "interface route" and packets + need to be passed to ARP to find rewrite string for + this destination. */ + IP_LOOKUP_NEXT_GLEAN, + + /** This packet is to be rewritten and forwarded to the next + processing node. This is typically the output interface but + might be another node for further output processing. */ + IP_LOOKUP_NEXT_REWRITE, + + /** This packets follow a mid-chain adjacency */ + IP_LOOKUP_NEXT_MIDCHAIN, + + /** This packets needs to go to ICMP error */ + IP_LOOKUP_NEXT_ICMP_ERROR, + + /** Multicast Adjacency. */ + IP_LOOKUP_NEXT_MCAST, + + IP_LOOKUP_N_NEXT, +} __attribute__ ((packed)) ip_lookup_next_t; + +typedef enum +{ + IP4_LOOKUP_N_NEXT = IP_LOOKUP_N_NEXT, +} ip4_lookup_next_t; + +typedef enum +{ + /* Hop-by-hop header handling */ + IP6_LOOKUP_NEXT_HOP_BY_HOP = IP_LOOKUP_N_NEXT, + IP6_LOOKUP_NEXT_ADD_HOP_BY_HOP, + IP6_LOOKUP_NEXT_POP_HOP_BY_HOP, + IP6_LOOKUP_N_NEXT, +} ip6_lookup_next_t; + +#define IP4_LOOKUP_NEXT_NODES { \ + [IP_LOOKUP_NEXT_DROP] = "ip4-drop", \ + [IP_LOOKUP_NEXT_PUNT] = "ip4-punt", \ + [IP_LOOKUP_NEXT_LOCAL] = "ip4-local", \ + [IP_LOOKUP_NEXT_ARP] = "ip4-arp", \ + [IP_LOOKUP_NEXT_GLEAN] = "ip4-glean", \ + [IP_LOOKUP_NEXT_REWRITE] = "ip4-rewrite", \ + [IP_LOOKUP_NEXT_MCAST] = "ip4-rewrite-mcast", \ + [IP_LOOKUP_NEXT_MIDCHAIN] = "ip4-midchain", \ + [IP_LOOKUP_NEXT_ICMP_ERROR] = "ip4-icmp-error", \ +} + +#define IP6_LOOKUP_NEXT_NODES { \ + [IP_LOOKUP_NEXT_DROP] = "ip6-drop", \ + [IP_LOOKUP_NEXT_PUNT] = "ip6-punt", \ + [IP_LOOKUP_NEXT_LOCAL] = "ip6-local", \ + [IP_LOOKUP_NEXT_ARP] = "ip6-discover-neighbor", \ + [IP_LOOKUP_NEXT_GLEAN] = "ip6-glean", \ + [IP_LOOKUP_NEXT_REWRITE] = "ip6-rewrite", \ + [IP_LOOKUP_NEXT_MCAST] = "ip6-rewrite-mcast", \ + [IP_LOOKUP_NEXT_MIDCHAIN] = "ip6-midchain", \ + [IP_LOOKUP_NEXT_ICMP_ERROR] = "ip6-icmp-error", \ + [IP6_LOOKUP_NEXT_HOP_BY_HOP] = "ip6-hop-by-hop", \ + [IP6_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip6-add-hop-by-hop", \ + [IP6_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip6-pop-hop-by-hop", \ +} + +/** + * Forward delcartion + */ +struct ip_adjacency_t_; + +/** + * @brief A function type for post-rewrite fixups on midchain adjacency + */ +typedef void (*adj_midchain_fixup_t) (vlib_main_t * vm, + struct ip_adjacency_t_ * adj, + vlib_buffer_t * b0); + +/** + * @brief Flags on an IP adjacency + */ +typedef enum ip_adjacency_flags_t_ +{ + ADJ_FLAG_NONE = 0, + + /** + * Currently a sync walk is active. Used to prevent re-entrant walking + */ + ADJ_FLAG_SYNC_WALK_ACTIVE = (1 << 0), + + /** + * Packets TX through the midchain do not increment the interface + * counters. This should be used when the adj is associated with an L2 + * interface and that L2 interface is in a bridege domain. In that case + * the packet will have traversed the interface's TX node, and hence have + * been counted, before it traverses ths midchain + */ + ADJ_FLAG_MIDCHAIN_NO_COUNT = (1 << 1), +} __attribute__ ((packed)) adj_flags_t; + +/** + * @brief IP unicast adjacency. + * @note cache aligned. + * + * An adjacency is a represenation of a peer on a particular link. + */ +typedef struct ip_adjacency_t_ +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + + /** + * Linkage into the FIB node grpah. First member since this type + * has 8 byte alignment requirements. + */ + fib_node_t ia_node; + + /** + * Next hop after ip4-lookup. + * This is not accessed in the rewrite nodes. + * 1-bytes + */ + ip_lookup_next_t lookup_next_index; + + /** + * link/ether-type + * 1 bytes + */ + vnet_link_t ia_link; + + /** + * The protocol of the neighbor/peer. i.e. the protocol with + * which to interpret the 'next-hop' attirbutes of the sub-types. + * 1-btyes + */ + fib_protocol_t ia_nh_proto; + + /** + * Flags on the adjacency + * 1-bytes + */ + adj_flags_t ia_flags; + + union + { + /** + * IP_LOOKUP_NEXT_ARP/IP_LOOKUP_NEXT_REWRITE + * + * neighbour adjacency sub-type; + */ + struct + { + ip46_address_t next_hop; + } nbr; + /** + * IP_LOOKUP_NEXT_MIDCHAIN + * + * A nbr adj that is also recursive. Think tunnels. + * A nbr adj can transition to be of type MDICHAIN + * so be sure to leave the two structs with the next_hop + * fields aligned. + */ + struct + { + /** + * The recursive next-hop. + * This field MUST be at the same memory location as + * sub_type.nbr.next_hop + */ + ip46_address_t next_hop; + /** + * The next DPO to use + */ + dpo_id_t next_dpo; + /** + * A function to perform the post-rewrite fixup + */ + adj_midchain_fixup_t fixup_func; + } midchain; + /** + * IP_LOOKUP_NEXT_GLEAN + * + * Glean the address to ARP for from the packet's destination. + * Technically these aren't adjacencies, i.e. they are not a + * representation of a peer. One day we might untangle this coupling + * and use a new Glean DPO. + */ + struct + { + ip46_address_t receive_addr; + } glean; + } sub_type; + + CLIB_CACHE_LINE_ALIGN_MARK (cacheline1); + + /* Rewrite in second/third cache lines */ + vnet_declare_rewrite (VLIB_BUFFER_PRE_DATA_SIZE); +} ip_adjacency_t; + +STATIC_ASSERT ((STRUCT_OFFSET_OF (ip_adjacency_t, cacheline0) == 0), + "IP adjacency cachline 0 is not offset"); +STATIC_ASSERT ((STRUCT_OFFSET_OF (ip_adjacency_t, cacheline1) == + CLIB_CACHE_LINE_BYTES), + "IP adjacency cachline 1 is more than one cachline size offset"); /** * @brief diff --git a/src/vnet/adj/adj_midchain.c b/src/vnet/adj/adj_midchain.c index 55b5e44bc43..e8087f08d21 100644 --- a/src/vnet/adj/adj_midchain.c +++ b/src/vnet/adj/adj_midchain.c @@ -384,6 +384,14 @@ adj_midchain_get_feature_arc_index_for_link_type (const ip_adjacency_t *adj) return (arc); } +static u32 +adj_nbr_midchain_get_tx_node (ip_adjacency_t *adj) +{ + return ((adj->ia_flags & ADJ_FLAG_MIDCHAIN_NO_COUNT) ? + adj_midchain_tx_no_count_node.index : + adj_midchain_tx_node.index); +} + /** * adj_nbr_midchain_update_rewrite * @@ -394,12 +402,12 @@ adj_midchain_get_feature_arc_index_for_link_type (const ip_adjacency_t *adj) void adj_nbr_midchain_update_rewrite (adj_index_t adj_index, adj_midchain_fixup_t fixup, - adj_midchain_flag_t flags, + adj_flags_t flags, u8 *rewrite) { + u32 feature_index, tx_node; ip_adjacency_t *adj; u8 arc_index; - u32 feature_index; ASSERT(ADJ_INDEX_INVALID != adj_index); @@ -416,15 +424,14 @@ adj_nbr_midchain_update_rewrite (adj_index_t adj_index, ASSERT(NULL != rewrite); adj->sub_type.midchain.fixup_func = fixup; + adj->ia_flags |= flags; arc_index = adj_midchain_get_feature_arc_index_for_link_type (adj); - feature_index = (flags & ADJ_MIDCHAIN_FLAG_NO_COUNT) ? + feature_index = (flags & ADJ_FLAG_MIDCHAIN_NO_COUNT) ? adj_midchain_tx_no_count_feature_node[adj->ia_link] : adj_midchain_tx_feature_node[adj->ia_link]; - adj->sub_type.midchain.tx_function_node = (flags & ADJ_MIDCHAIN_FLAG_NO_COUNT) ? - adj_midchain_tx_no_count_node.index : - adj_midchain_tx_node.index; + tx_node = adj_nbr_midchain_get_tx_node(adj); vnet_feature_enable_disable_with_index (arc_index, feature_index, adj->rewrite_header.sw_if_index, @@ -437,7 +444,7 @@ adj_nbr_midchain_update_rewrite (adj_index_t adj_index, * node are any output features, then the midchain-tx. from there we * need to get to the stacked child's node. */ - dpo_stack_from_node(adj->sub_type.midchain.tx_function_node, + dpo_stack_from_node(tx_node, &adj->sub_type.midchain.next_dpo, drop_dpo_get(vnet_link_to_dpo_proto(adj->ia_link))); @@ -447,7 +454,7 @@ adj_nbr_midchain_update_rewrite (adj_index_t adj_index, adj_nbr_update_rewrite_internal(adj, IP_LOOKUP_NEXT_MIDCHAIN, adj_get_midchain_node(adj->ia_link), - adj->sub_type.midchain.tx_function_node, + tx_node, rewrite); } @@ -491,7 +498,7 @@ adj_nbr_midchain_stack (adj_index_t adj_index, ASSERT(IP_LOOKUP_NEXT_MIDCHAIN == adj->lookup_next_index); - dpo_stack_from_node(adj->sub_type.midchain.tx_function_node, + dpo_stack_from_node(adj_nbr_midchain_get_tx_node(adj), &adj->sub_type.midchain.next_dpo, next); } diff --git a/src/vnet/adj/adj_midchain.h b/src/vnet/adj/adj_midchain.h index ae414aea6dc..27ca1d3398d 100644 --- a/src/vnet/adj/adj_midchain.h +++ b/src/vnet/adj/adj_midchain.h @@ -24,26 +24,6 @@ #include -/** - * @brief Flags controlling the midchain adjacency - */ -typedef enum adj_midchain_flag_t_ -{ - /** - * No flags - */ - ADJ_MIDCHAIN_FLAG_NONE = 0, - - /** - * Packets TX through the midchain do not increment the interface - * counters. This should be used when the adj is associated with an L2 - * interface and that L2 interface is in a bridege domain. In that case - * the packet will have traversed the interface's TX node, and hence have - * been counted, before it traverses ths midchain - */ - ADJ_MIDCHAIN_FLAG_NO_COUNT = (1 << 0), -} adj_midchain_flag_t; - /** * @brief * Convert an existing neighbour adjacency into a midchain @@ -60,7 +40,7 @@ typedef enum adj_midchain_flag_t_ */ extern void adj_nbr_midchain_update_rewrite(adj_index_t adj_index, adj_midchain_fixup_t fixup, - adj_midchain_flag_t flags, + adj_flags_t flags, u8 *rewrite); /** diff --git a/src/vnet/adj/adj_nbr.c b/src/vnet/adj/adj_nbr.c index 072abd0300d..ddacb030f1d 100644 --- a/src/vnet/adj/adj_nbr.c +++ b/src/vnet/adj/adj_nbr.c @@ -333,7 +333,7 @@ adj_nbr_update_rewrite (adj_index_t adj_index, */ void adj_nbr_update_rewrite_internal (ip_adjacency_t *adj, - u32 adj_next_index, + ip_lookup_next_t adj_next_index, u32 this_node, u32 next_node, u8 *rewrite) @@ -367,7 +367,7 @@ adj_nbr_update_rewrite_internal (ip_adjacency_t *adj, if (ADJ_INDEX_INVALID != walk_ai) { walk_adj = adj_get(walk_ai); - if (IP_ADJ_SYNC_WALK_ACTIVE & walk_adj->ia_flags) + if (ADJ_FLAG_SYNC_WALK_ACTIVE & walk_adj->ia_flags) { do_walk = 0; } @@ -376,7 +376,7 @@ adj_nbr_update_rewrite_internal (ip_adjacency_t *adj, /* * Prevent re-entrant walk of the same adj */ - walk_adj->ia_flags |= IP_ADJ_SYNC_WALK_ACTIVE; + walk_adj->ia_flags |= ADJ_FLAG_SYNC_WALK_ACTIVE; do_walk = 1; } } @@ -502,7 +502,7 @@ adj_nbr_update_rewrite_internal (ip_adjacency_t *adj, */ if (do_walk) { - walk_adj->ia_flags &= ~IP_ADJ_SYNC_WALK_ACTIVE; + walk_adj->ia_flags &= ~ADJ_FLAG_SYNC_WALK_ACTIVE; } adj_unlock(adj_get_index(adj)); diff --git a/src/vnet/adj/rewrite.c b/src/vnet/adj/rewrite.c new file mode 100644 index 00000000000..47fb74df01b --- /dev/null +++ b/src/vnet/adj/rewrite.c @@ -0,0 +1,234 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * rewrite.c: packet rewrite + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include + +void +vnet_rewrite_copy_slow_path (vnet_rewrite_data_t * p0, + vnet_rewrite_data_t * rw0, + word n_left, uword most_likely_size) +{ + uword n_done = + round_pow2 (most_likely_size, sizeof (rw0[0])) / sizeof (rw0[0]); + + p0 -= n_done; + rw0 -= n_done; + + /* As we enter the cleanup loop, p0 and rw0 point to the last chunk written + by the fast path. Hence, the constant 1, which the + vnet_rewrite_copy_one macro renders as p0[-1] = rw0[-1]. */ + + while (n_left > 0) + { + vnet_rewrite_copy_one (p0, rw0, 1); + p0--; + rw0--; + n_left--; + } +} + +u8 * +format_vnet_rewrite (u8 * s, va_list * args) +{ + vnet_rewrite_header_t *rw = va_arg (*args, vnet_rewrite_header_t *); + u32 max_data_bytes = va_arg (*args, u32); + CLIB_UNUSED (uword indent) = va_arg (*args, u32); + vnet_main_t *vnm = vnet_get_main (); + + if (rw->sw_if_index != ~0) + { + vnet_sw_interface_t *si; + si = vnet_get_sw_interface_safe (vnm, rw->sw_if_index); + if (NULL != si) + s = format (s, "%U: ", format_vnet_sw_interface_name, vnm, si); + else + s = format (s, "DELETED:%d", rw->sw_if_index); + } + + /* Format rewrite string. */ + if (rw->data_bytes > 0) + + s = format (s, "%U", + format_hex_bytes, + rw->data + max_data_bytes - rw->data_bytes, rw->data_bytes); + + return s; +} + +u32 +vnet_tx_node_index_for_sw_interface (vnet_main_t * vnm, u32 sw_if_index) +{ + vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index); + return (hw->output_node_index); +} + +void +vnet_rewrite_init (vnet_main_t * vnm, + u32 sw_if_index, + u32 this_node, u32 next_node, vnet_rewrite_header_t * rw) +{ + rw->sw_if_index = sw_if_index; + rw->next_index = vlib_node_add_next (vnm->vlib_main, this_node, next_node); + rw->max_l3_packet_bytes = + vnet_sw_interface_get_mtu (vnm, sw_if_index, VLIB_TX); +} + +void +vnet_rewrite_for_sw_interface (vnet_main_t * vnm, + vnet_link_t link_type, + u32 sw_if_index, + u32 node_index, + void *dst_address, + vnet_rewrite_header_t * rw, + u32 max_rewrite_bytes) +{ + + vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index); + vnet_hw_interface_class_t *hc = + vnet_get_hw_interface_class (vnm, hw->hw_class_index); + u8 *rewrite = NULL; + + vnet_rewrite_init (vnm, sw_if_index, node_index, + vnet_tx_node_index_for_sw_interface (vnm, sw_if_index), + rw); + + ASSERT (hc->build_rewrite); + rewrite = hc->build_rewrite (vnm, sw_if_index, link_type, dst_address); + + ASSERT (vec_len (rewrite) < max_rewrite_bytes); + vnet_rewrite_set_data_internal (rw, max_rewrite_bytes, rewrite, + vec_len (rewrite)); + vec_free (rewrite); +} + +void +vnet_rewrite_for_tunnel (vnet_main_t * vnm, + u32 tx_sw_if_index, + u32 rewrite_node_index, + u32 post_rewrite_node_index, + vnet_rewrite_header_t * rw, + u8 * rewrite_data, u32 rewrite_length) +{ + ip_adjacency_t *adj = 0; + /* + * Installed into vnet_buffer(b)->sw_if_index[VLIB_TX] e.g. + * by ip4_rewrite_inline. If the post-rewrite node injects into + * ipX-forward, this will be interpreted as a FIB number. + */ + rw->sw_if_index = tx_sw_if_index; + rw->next_index = vlib_node_add_next (vnm->vlib_main, rewrite_node_index, + post_rewrite_node_index); + rw->max_l3_packet_bytes = (u16) ~ 0; /* we can't know at this point */ + + ASSERT (rewrite_length < sizeof (adj->rewrite_data)); + /* Leave room for ethernet + VLAN tag */ + vnet_rewrite_set_data_internal (rw, sizeof (adj->rewrite_data), + rewrite_data, rewrite_length); +} + +void +serialize_vnet_rewrite (serialize_main_t * m, va_list * va) +{ + vnet_rewrite_header_t *rw = va_arg (*va, vnet_rewrite_header_t *); + u32 max_data_bytes = va_arg (*va, u32); + u8 *p; + + serialize_integer (m, rw->sw_if_index, sizeof (rw->sw_if_index)); + serialize_integer (m, rw->data_bytes, sizeof (rw->data_bytes)); + serialize_integer (m, rw->max_l3_packet_bytes, + sizeof (rw->max_l3_packet_bytes)); + p = serialize_get (m, rw->data_bytes); + clib_memcpy (p, vnet_rewrite_get_data_internal (rw, max_data_bytes), + rw->data_bytes); +} + +void +unserialize_vnet_rewrite (serialize_main_t * m, va_list * va) +{ + vnet_rewrite_header_t *rw = va_arg (*va, vnet_rewrite_header_t *); + u32 max_data_bytes = va_arg (*va, u32); + u8 *p; + + /* It is up to user to fill these in. */ + rw->next_index = ~0; + + unserialize_integer (m, &rw->sw_if_index, sizeof (rw->sw_if_index)); + unserialize_integer (m, &rw->data_bytes, sizeof (rw->data_bytes)); + unserialize_integer (m, &rw->max_l3_packet_bytes, + sizeof (rw->max_l3_packet_bytes)); + p = unserialize_get (m, rw->data_bytes); + clib_memcpy (vnet_rewrite_get_data_internal (rw, max_data_bytes), p, + rw->data_bytes); +} + +u8 * +vnet_build_rewrite_for_sw_interface (vnet_main_t * vnm, + u32 sw_if_index, + vnet_link_t link_type, + const void *dst_address) +{ + vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index); + vnet_hw_interface_class_t *hc = + vnet_get_hw_interface_class (vnm, hw->hw_class_index); + + ASSERT (hc->build_rewrite); + return (hc->build_rewrite (vnm, sw_if_index, link_type, dst_address)); +} + + +void +vnet_update_adjacency_for_sw_interface (vnet_main_t * vnm, + u32 sw_if_index, u32 ai) +{ + vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index); + vnet_hw_interface_class_t *hc = + vnet_get_hw_interface_class (vnm, hw->hw_class_index); + + ASSERT (hc->update_adjacency); + hc->update_adjacency (vnm, sw_if_index, ai); +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/adj/rewrite.h b/src/vnet/adj/rewrite.h new file mode 100644 index 00000000000..1dea72f5ed8 --- /dev/null +++ b/src/vnet/adj/rewrite.h @@ -0,0 +1,350 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * rewrite.h: packet rewrite + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_vnet_rewrite_h +#define included_vnet_rewrite_h + +#include +#include + +/* Consider using vector types for speed? */ +typedef uword vnet_rewrite_data_t; + +/** + * Flags associated with the rewrite/adjacency + */ +typedef enum vnet_rewrite_flags_t_ +{ + /** + * This adjacency/interface has output features configured + */ + VNET_REWRITE_HAS_FEATURES = (1 << 0), +} __attribute__ ((packed)) vnet_rewrite_flags_t; + +/* *INDENT-OFF* */ +typedef CLIB_PACKED (struct { + /* Interface to mark re-written packets with. */ + u32 sw_if_index; + + /* Next node to feed after packet rewrite is done. */ + u16 next_index; + + /* Number of bytes in rewrite data. */ + u16 data_bytes; + + /* Max packet size layer 3 (MTU) for output interface. + Used for MTU check after packet rewrite. */ + u16 max_l3_packet_bytes; + + /* Data-plane flags on the adjacency/rewrite */ + vnet_rewrite_flags_t flags; + + /* When dynamically writing a multicast destination L2 addresss + * this is the offset from the IP address at which to write in the + * IP->MAC address translation. + */ + u8 dst_mcast_offset; + + /* The mask to apply to the lower 4 bytes of the IP address before ORing + * into the destinaiton MAC address */ + u32 dst_mcast_mask; + + /* Rewrite string starting at end and going backwards. */ + u8 data[0]; +}) vnet_rewrite_header_t; +/* *INDENT-ON* */ + +/** + * At 16 bytes of rewrite herader we have enought space left for a IPv6 + * (40 bytes) + LISP-GPE (8 bytes) in the cache line + */ +STATIC_ASSERT (sizeof (vnet_rewrite_header_t) <= 16, + "Rewrite header too big"); + +/* + Helper macro for declaring rewrite string w/ given max-size. + + Typical usage: + typedef struct { + // + int a, b; + + // Total adjacency is 64 bytes. + vnet_rewrite_declare(64 - 2*sizeof(int)) rw; + } my_adjacency_t; +*/ +#define vnet_declare_rewrite(total_bytes) \ +struct { \ + vnet_rewrite_header_t rewrite_header; \ + \ + u8 rewrite_data[(total_bytes) - sizeof (vnet_rewrite_header_t)]; \ +} + +always_inline void +vnet_rewrite_clear_data_internal (vnet_rewrite_header_t * rw, int max_size) +{ + /* Sanity check values carefully for this memset operation */ + ASSERT ((max_size > 0) && (max_size < VLIB_BUFFER_PRE_DATA_SIZE)); + + rw->data_bytes = 0; + memset (rw->data, 0xfe, max_size); +} + +always_inline void +vnet_rewrite_set_data_internal (vnet_rewrite_header_t * rw, + int max_size, void *data, int data_bytes) +{ + /* Sanity check values carefully for this memset operation */ + ASSERT ((max_size > 0) && (max_size < VLIB_BUFFER_PRE_DATA_SIZE)); + ASSERT ((data_bytes >= 0) && (data_bytes < max_size)); + + rw->data_bytes = data_bytes; + clib_memcpy (rw->data + max_size - data_bytes, data, data_bytes); + memset (rw->data, 0xfe, max_size - data_bytes); +} + +#define vnet_rewrite_set_data(rw,data,data_bytes) \ + vnet_rewrite_set_data_internal (&((rw).rewrite_header), \ + sizeof ((rw).rewrite_data), \ + (data), \ + (data_bytes)) + +always_inline void * +vnet_rewrite_get_data_internal (vnet_rewrite_header_t * rw, int max_size) +{ + ASSERT (rw->data_bytes <= max_size); + return rw->data + max_size - rw->data_bytes; +} + +#define vnet_rewrite_get_data(rw) \ + vnet_rewrite_get_data_internal (&((rw).rewrite_header), sizeof ((rw).rewrite_data)) + +always_inline void +vnet_rewrite_copy_one (vnet_rewrite_data_t * p0, vnet_rewrite_data_t * rw0, + int i) +{ + p0[-i] = rw0[-i]; +} + +void vnet_rewrite_copy_slow_path (vnet_rewrite_data_t * p0, + vnet_rewrite_data_t * rw0, + word n_left, uword most_likely_size); + +/* *INDENT-OFF* */ +typedef CLIB_PACKED (struct { + u64 a; + u32 b; + u16 c; +}) eh_copy_t; +/* *INDENT-ON* */ + +always_inline void +_vnet_rewrite_one_header (vnet_rewrite_header_t * h0, + void *packet0, int max_size, int most_likely_size) +{ + vnet_rewrite_data_t *p0 = packet0; + vnet_rewrite_data_t *rw0 = (vnet_rewrite_data_t *) (h0->data + max_size); + word n_left0; + + /* 0xfefe => poisoned adjacency => crash */ + ASSERT (h0->data_bytes != 0xfefe); + + if (PREDICT_TRUE (h0->data_bytes == sizeof (eh_copy_t))) + { + eh_copy_t *s, *d; + s = (eh_copy_t *) (h0->data + max_size - sizeof (eh_copy_t)); + d = (eh_copy_t *) (((u8 *) packet0) - sizeof (eh_copy_t)); + clib_memcpy (d, s, sizeof (eh_copy_t)); + return; + } + + +#define _(i) \ + do { \ + if (most_likely_size > ((i)-1)*sizeof (vnet_rewrite_data_t)) \ + vnet_rewrite_copy_one (p0, rw0, (i)); \ + } while (0) + + _(4); + _(3); + _(2); + _(1); + +#undef _ + + n_left0 = (int) + (((int) h0->data_bytes - most_likely_size) + (sizeof (rw0[0]) - 1)) + / (int) sizeof (rw0[0]); + if (PREDICT_FALSE (n_left0 > 0)) + vnet_rewrite_copy_slow_path (p0, rw0, n_left0, most_likely_size); +} + +always_inline void +_vnet_rewrite_two_headers (vnet_rewrite_header_t * h0, + vnet_rewrite_header_t * h1, + void *packet0, + void *packet1, int max_size, int most_likely_size) +{ + vnet_rewrite_data_t *p0 = packet0; + vnet_rewrite_data_t *p1 = packet1; + vnet_rewrite_data_t *rw0 = (vnet_rewrite_data_t *) (h0->data + max_size); + vnet_rewrite_data_t *rw1 = (vnet_rewrite_data_t *) (h1->data + max_size); + word n_left0, n_left1; + int slow_path; + + /* 0xfefe => poisoned adjacency => crash */ + ASSERT (h0->data_bytes != 0xfefe); + ASSERT (h1->data_bytes != 0xfefe); + + /* Arithmetic calculation: bytes0 == bytes1 == 14 */ + slow_path = h0->data_bytes ^ h1->data_bytes; + slow_path += h0->data_bytes ^ sizeof (eh_copy_t); + + if (PREDICT_TRUE (slow_path == 0)) + { + eh_copy_t *s0, *d0, *s1, *d1; + s0 = (eh_copy_t *) (h0->data + max_size - sizeof (eh_copy_t)); + d0 = (eh_copy_t *) (((u8 *) packet0) - sizeof (eh_copy_t)); + clib_memcpy (d0, s0, sizeof (eh_copy_t)); + s1 = (eh_copy_t *) (h1->data + max_size - sizeof (eh_copy_t)); + d1 = (eh_copy_t *) (((u8 *) packet1) - sizeof (eh_copy_t)); + clib_memcpy (d1, s1, sizeof (eh_copy_t)); + return; + } + +#define _(i) \ + do { \ + if (most_likely_size > ((i)-1)*sizeof (vnet_rewrite_data_t)) \ + { \ + vnet_rewrite_copy_one (p0, rw0, (i)); \ + vnet_rewrite_copy_one (p1, rw1, (i)); \ + } \ + } while (0) + + _(4); + _(3); + _(2); + _(1); + +#undef _ + + n_left0 = (int) + (((int) h0->data_bytes - most_likely_size) + (sizeof (rw0[0]) - 1)) + / (int) sizeof (rw0[0]); + n_left1 = (int) + (((int) h1->data_bytes - most_likely_size) + (sizeof (rw1[0]) - 1)) + / (int) sizeof (rw1[0]); + + if (PREDICT_FALSE (n_left0 > 0 || n_left1 > 0)) + { + vnet_rewrite_copy_slow_path (p0, rw0, n_left0, most_likely_size); + vnet_rewrite_copy_slow_path (p1, rw1, n_left1, most_likely_size); + } +} + +#define vnet_rewrite_one_header(rw0,p0,most_likely_size) \ + _vnet_rewrite_one_header (&((rw0).rewrite_header), (p0), \ + sizeof ((rw0).rewrite_data), \ + (most_likely_size)) + +#define vnet_rewrite_two_headers(rw0,rw1,p0,p1,most_likely_size) \ + _vnet_rewrite_two_headers (&((rw0).rewrite_header), &((rw1).rewrite_header), \ + (p0), (p1), \ + sizeof ((rw0).rewrite_data), \ + (most_likely_size)) + +always_inline void +_vnet_fixup_one_header (vnet_rewrite_header_t * h0, + u8 * addr, u32 addr_len, u8 * packet0) +{ + if (PREDICT_TRUE (h0->dst_mcast_mask)) + { + /* location to write to in the packet */ + u8 *p0 = packet0 - h0->dst_mcast_offset; + u32 *p1 = (u32 *) p0; + /* location to copy from in the L3 dest address */ + u32 *a0 = (u32 *) (addr + addr_len - sizeof (h0->dst_mcast_mask)); + + *p1 |= (*a0 & h0->dst_mcast_mask); + } +} + +#define vnet_fixup_one_header(rw0,addr,p0) \ + _vnet_fixup_one_header (&((rw0).rewrite_header), \ + (u8*)(addr), sizeof((*addr)), \ + (u8*)(p0)) + +#define VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST ((void *) 0) +/** Deprecated */ +void vnet_rewrite_for_sw_interface (struct vnet_main_t *vnm, + vnet_link_t packet_type, + u32 sw_if_index, + u32 node_index, + void *dst_address, + vnet_rewrite_header_t * rw, + u32 max_rewrite_bytes); + +u32 vnet_tx_node_index_for_sw_interface (struct vnet_main_t *vnm, + u32 sw_if_index); + +void vnet_rewrite_init (struct vnet_main_t *vnm, + u32 sw_if_index, + u32 this_node, + u32 next_node, vnet_rewrite_header_t * rw); + +u8 *vnet_build_rewrite_for_sw_interface (struct vnet_main_t *vnm, + u32 sw_if_index, + vnet_link_t packet_type, + const void *dst_address); +void vnet_update_adjacency_for_sw_interface (struct vnet_main_t *vnm, + u32 sw_if_index, u32 ai); + +format_function_t format_vnet_rewrite; + +serialize_function_t serialize_vnet_rewrite, unserialize_vnet_rewrite; + +#endif /* included_vnet_rewrite_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ -- cgit 1.2.3-korg