diff options
author | Neale Ranns <nranns@cisco.com> | 2017-02-02 02:18:18 -0800 |
---|---|---|
committer | Damjan Marion <dmarion.lists@gmail.com> | 2017-02-02 14:27:13 +0000 |
commit | a9374df5f351d25e968f5f90a827796203cbafdd (patch) | |
tree | 7876e45e388ebc739722777c1cadcf666dda6439 /src/vnet/sr/sr_replicate.c | |
parent | ed0e49c51841e84c72a5bb2c6d538ee779b734d4 (diff) |
Fix SR multicast post mfib commit
1 - use the SR policy to construct the replicate DPO. Each bucket therein is a SR tunnel.
2 - install a special mfib entry that links via this replicate
3 - forwarding is now mfib-lookup -> replicate -> sr_rewrite (per-tunnel)
no need for a separate sr_replicate node.
4 - Stack the sr tunnel on the forwarding DPO of the first-hop FIB entry.
no need for a second lookup post SR encap.
5 - fix some path-list lock leaks in the MFIB entry.
Change-Id: I20de96ea4c4be4fae252625bde159d9c435c8315
Signed-off-by: Neale Ranns <nranns@cisco.com>
Diffstat (limited to 'src/vnet/sr/sr_replicate.c')
-rw-r--r-- | src/vnet/sr/sr_replicate.c | 491 |
1 files changed, 0 insertions, 491 deletions
diff --git a/src/vnet/sr/sr_replicate.c b/src/vnet/sr/sr_replicate.c deleted file mode 100644 index fa5a68c31c2..00000000000 --- a/src/vnet/sr/sr_replicate.c +++ /dev/null @@ -1,491 +0,0 @@ -/* - * sr_replicate.c: ipv6 segment routing replicator for multicast - * - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/** - * @file - * @brief Functions for replicating packets across SR tunnels. - * - * Leverages rte_pktmbuf_clone() so there is no memcpy for - * invariant parts of the packet. - * - * @note Currently requires DPDK -*/ - -#if DPDK > 0 /* Cannot run replicate without DPDK */ -#include <vlib/vlib.h> -#include <vnet/vnet.h> -#include <vnet/pg/pg.h> -#include <vnet/sr/sr.h> -#include <vnet/devices/dpdk/dpdk.h> -#include <vnet/devices/dpdk/dpdk_priv.h> -#include <vnet/ip/ip.h> -#include <vnet/fib/ip6_fib.h> - -#include <vppinfra/hash.h> -#include <vppinfra/error.h> -#include <vppinfra/elog.h> - -/** - * @brief sr_replicate state. - * -*/ -typedef struct -{ - /* convenience */ - vlib_main_t *vlib_main; - vnet_main_t *vnet_main; -} sr_replicate_main_t; - -sr_replicate_main_t sr_replicate_main; - -/** - * @brief Information to display in packet trace. - * -*/ -typedef struct -{ - ip6_address_t src, dst; - u16 length; - u32 next_index; - u32 tunnel_index; - u8 sr[256]; -} sr_replicate_trace_t; - -/** - * @brief packet trace format function. - * - * @param *s u8 used for string output - * @param *args va_list structured input to va_arg to output @ref sr_replicate_trace_t - * @return *s u8 - formatted trace output -*/ -static u8 * -format_sr_replicate_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - sr_replicate_trace_t *t = va_arg (*args, sr_replicate_trace_t *); - ip6_sr_main_t *sm = &sr_main; - ip6_sr_tunnel_t *tun = pool_elt_at_index (sm->tunnels, t->tunnel_index); - ip6_fib_t *rx_fib, *tx_fib; - - rx_fib = ip6_fib_get (tun->rx_fib_index); - tx_fib = ip6_fib_get (tun->tx_fib_index); - - s = format - (s, "SR-REPLICATE: next %s ip6 src %U dst %U len %u\n" - " rx-fib-id %d tx-fib-id %d\n%U", - "ip6-lookup", - format_ip6_address, &t->src, - format_ip6_address, &t->dst, t->length, - rx_fib->table_id, tx_fib->table_id, - format_ip6_sr_header, t->sr, 0 /* print_hmac */ ); - return s; - -} - -#define foreach_sr_replicate_error \ -_(REPLICATED, "sr packets replicated") \ -_(NO_BUFFERS, "error allocating buffers for replicas") \ -_(NO_REPLICAS, "no replicas were needed") \ -_(NO_BUFFER_DROPS, "sr no buffer drops") - -/** - * @brief Struct for SR replicate errors - */ -typedef enum -{ -#define _(sym,str) SR_REPLICATE_ERROR_##sym, - foreach_sr_replicate_error -#undef _ - SR_REPLICATE_N_ERROR, -} sr_replicate_error_t; - -/** - * @brief Error strings for SR replicate - */ -static char *sr_replicate_error_strings[] = { -#define _(sym,string) string, - foreach_sr_replicate_error -#undef _ -}; - -/** - * @brief Defines next-nodes for packet processing. - * -*/ -typedef enum -{ - SR_REPLICATE_NEXT_IP6_LOOKUP, - SR_REPLICATE_N_NEXT, -} sr_replicate_next_t; - -/** - * @brief Single loop packet replicator. - * - * @node sr-replicate - * @param vm vlib_main_t - * @return frame->n_vectors uword -*/ -static uword -sr_replicate_node_fn (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) -{ - dpdk_main_t *dm = &dpdk_main; - u32 n_left_from, *from, *to_next; - sr_replicate_next_t next_index; - int pkts_replicated = 0; - ip6_sr_main_t *sm = &sr_main; - int no_buffer_drops = 0; - vlib_buffer_free_list_t *fl; - unsigned socket_id = rte_socket_id (); - - from = vlib_frame_vector_args (frame); - n_left_from = frame->n_vectors; - next_index = node->cached_next_index; - - fl = vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 bi0, hdr_bi0; - vlib_buffer_t *b0, *orig_b0; - struct rte_mbuf *orig_mb0 = 0, *hdr_mb0 = 0, *clone0 = 0; - struct rte_mbuf **hdr_vec = 0, **rte_mbuf_vec = 0; - ip6_sr_policy_t *pol0 = 0; - ip6_sr_tunnel_t *t0 = 0; - ip6_sr_header_t *hdr_sr0 = 0; - ip6_header_t *ip0 = 0, *hdr_ip0 = 0; - int num_replicas = 0; - int i; - u32 len_bytes = sizeof (ip6_header_t); - u8 next_hdr, ip_next_hdr = IPPROTO_IPV6_ROUTE; - - bi0 = from[0]; - - b0 = vlib_get_buffer (vm, bi0); - orig_b0 = b0; - - pol0 = pool_elt_at_index (sm->policies, - vnet_buffer (b0)->ip.save_protocol); - - ip0 = vlib_buffer_get_current (b0); - /* Skip forward to the punch-in point */ - vlib_buffer_advance (b0, sizeof (*ip0)); - next_hdr = ip0->protocol; - - /* HBH must immediately follow ipv6 header */ - if (PREDICT_FALSE - (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) - { - ip6_hop_by_hop_ext_t *ext_hdr = - (ip6_hop_by_hop_ext_t *) ip6_next_header (ip0); - u32 ext_hdr_len = 0; - ext_hdr_len = ip6_ext_header_len ((ip6_ext_header_t *) ext_hdr); - len_bytes += ext_hdr_len; - next_hdr = ext_hdr->next_hdr; - ext_hdr->next_hdr = IPPROTO_IPV6_ROUTE; - ip_next_hdr = IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS; - /* Skip forward to the punch-in point */ - vlib_buffer_advance (b0, ext_hdr_len); - - } - - orig_mb0 = rte_mbuf_from_vlib_buffer (b0); - - i16 delta0 = vlib_buffer_length_in_chain (vm, orig_b0) - - (i16) orig_mb0->pkt_len; - - u16 new_data_len0 = (u16) ((i16) orig_mb0->data_len + delta0); - u16 new_pkt_len0 = (u16) ((i16) orig_mb0->pkt_len + delta0); - - orig_mb0->data_len = new_data_len0; - orig_mb0->pkt_len = new_pkt_len0; - orig_mb0->data_off += (u16) (b0->current_data); - - /* - Before entering loop determine if we can allocate: - - all the new HEADER RTE_MBUFs and assign them to a vector - - all the clones - - if successful, then iterate over vectors of resources - - */ - num_replicas = vec_len (pol0->tunnel_indices); - - if (PREDICT_FALSE (num_replicas == 0)) - { - b0->error = node->errors[SR_REPLICATE_ERROR_NO_REPLICAS]; - goto do_trace0; - } - - vec_reset_length (hdr_vec); - vec_reset_length (rte_mbuf_vec); - - for (i = 0; i < num_replicas; i++) - { - uint8_t nb_seg; - struct rte_mbuf *clone0i; - vlib_buffer_t *clone0_c, *clone_b0; - - t0 = vec_elt_at_index (sm->tunnels, pol0->tunnel_indices[i]); - hdr_mb0 = rte_pktmbuf_alloc (dm->pktmbuf_pools[socket_id]); - - if (i < (num_replicas - 1)) - { - /* Not the last tunnel to process */ - clone0 = rte_pktmbuf_clone - (orig_mb0, dm->pktmbuf_pools[socket_id]); - if (clone0 == 0) - goto clone_fail; - nb_seg = 0; - clone0i = clone0; - clone0_c = NULL; - while ((clone0->nb_segs >= 1) && (nb_seg < clone0->nb_segs)) - { - - clone_b0 = vlib_buffer_from_rte_mbuf (clone0i); - vlib_buffer_init_for_free_list (clone_b0, fl); - - ASSERT ((clone_b0->flags & VLIB_BUFFER_NEXT_PRESENT) == - 0); - ASSERT (clone_b0->current_data == 0); - - clone_b0->current_data = - (clone0i->buf_addr + clone0i->data_off) - - (void *) clone_b0->data; - - clone_b0->current_length = clone0i->data_len; - if (PREDICT_FALSE (clone0_c != NULL)) - { - clone0_c->flags |= VLIB_BUFFER_NEXT_PRESENT; - clone0_c->next_buffer = - vlib_get_buffer_index (vm, clone_b0); - } - clone0_c = clone_b0; - clone0i = clone0i->next; - nb_seg++; - } - } - else - /* First tunnel to process, use original MB */ - clone0 = orig_mb0; - - - if (PREDICT_FALSE (!clone0 || !hdr_mb0)) - { - clone_fail: - b0->error = node->errors[SR_REPLICATE_ERROR_NO_BUFFERS]; - - vec_foreach_index (i, rte_mbuf_vec) - { - rte_pktmbuf_free (rte_mbuf_vec[i]); - } - vec_free (rte_mbuf_vec); - - vec_foreach_index (i, hdr_vec) - { - rte_pktmbuf_free (hdr_vec[i]); - } - vec_free (hdr_vec); - - goto do_trace0; - } - - vec_add1 (hdr_vec, hdr_mb0); - vec_add1 (rte_mbuf_vec, clone0); - - } - - for (i = 0; i < num_replicas; i++) - { - vlib_buffer_t *hdr_b0; - u16 new_l0 = 0; - - t0 = vec_elt_at_index (sm->tunnels, pol0->tunnel_indices[i]); - /* Our replicas */ - hdr_mb0 = hdr_vec[i]; - clone0 = rte_mbuf_vec[i]; - - hdr_mb0->data_len = len_bytes + vec_len (t0->rewrite); - hdr_mb0->pkt_len = hdr_mb0->data_len + - vlib_buffer_length_in_chain (vm, orig_b0); - - hdr_b0 = vlib_buffer_from_rte_mbuf (hdr_mb0); - - vlib_buffer_init_for_free_list (hdr_b0, fl); - - memcpy (hdr_b0->data, ip0, len_bytes); - memcpy (hdr_b0->data + len_bytes, t0->rewrite, - vec_len (t0->rewrite)); - - hdr_b0->current_data = 0; - hdr_b0->current_length = len_bytes + vec_len (t0->rewrite); - hdr_b0->flags = orig_b0->flags | VLIB_BUFFER_NEXT_PRESENT; - hdr_b0->trace_index = orig_b0->trace_index; - vnet_buffer (hdr_b0)->l2_classify.opaque_index = 0; - - hdr_b0->total_length_not_including_first_buffer = - hdr_mb0->pkt_len - hdr_b0->current_length; - vnet_buffer (hdr_b0)->sw_if_index[VLIB_TX] = t0->tx_fib_index; - - hdr_ip0 = (ip6_header_t *) hdr_b0->data; - new_l0 = clib_net_to_host_u16 (ip0->payload_length) + - vec_len (t0->rewrite); - hdr_ip0->payload_length = clib_host_to_net_u16 (new_l0); - hdr_sr0 = (ip6_sr_header_t *) ((u8 *) hdr_ip0 + len_bytes); - /* $$$ tune */ - clib_memcpy (hdr_sr0, t0->rewrite, vec_len (t0->rewrite)); - hdr_sr0->protocol = next_hdr; - hdr_ip0->protocol = ip_next_hdr; - - /* Copy dst address into the DA slot in the segment list */ - clib_memcpy (hdr_sr0->segments, ip0->dst_address.as_u64, - sizeof (ip6_address_t)); - - /* Rewrite the ip6 dst address */ - hdr_ip0->dst_address.as_u64[0] = t0->first_hop.as_u64[0]; - hdr_ip0->dst_address.as_u64[1] = t0->first_hop.as_u64[1]; - - sr_fix_hmac (sm, hdr_ip0, hdr_sr0); - - /* prepend new header to invariant piece */ - hdr_mb0->next = clone0; - hdr_b0->next_buffer = - vlib_get_buffer_index (vm, - vlib_buffer_from_rte_mbuf (clone0)); - - /* update header's fields */ - hdr_mb0->pkt_len = - (uint16_t) (hdr_mb0->data_len + clone0->pkt_len); - hdr_mb0->nb_segs = (uint8_t) (clone0->nb_segs + 1); - - /* copy metadata from source packet */ - hdr_mb0->port = clone0->port; - hdr_mb0->vlan_tci = clone0->vlan_tci; - hdr_mb0->vlan_tci_outer = clone0->vlan_tci_outer; - hdr_mb0->tx_offload = clone0->tx_offload; - hdr_mb0->hash = clone0->hash; - - hdr_mb0->ol_flags = clone0->ol_flags & ~(IND_ATTACHED_MBUF); - - __rte_mbuf_sanity_check (hdr_mb0, 1); - - hdr_bi0 = vlib_get_buffer_index (vm, hdr_b0); - - to_next[0] = hdr_bi0; - to_next += 1; - n_left_to_next -= 1; - - if (n_left_to_next == 0) - { - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - vlib_get_next_frame (vm, node, next_index, - to_next, n_left_to_next); - - } - pkts_replicated++; - } - - from += 1; - n_left_from -= 1; - - do_trace0: - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - sr_replicate_trace_t *tr = vlib_add_trace (vm, node, - b0, sizeof (*tr)); - tr->tunnel_index = t0 - sm->tunnels; - tr->length = 0; - if (hdr_ip0) - { - memcpy (tr->src.as_u8, hdr_ip0->src_address.as_u8, - sizeof (tr->src.as_u8)); - memcpy (tr->dst.as_u8, hdr_ip0->dst_address.as_u8, - sizeof (tr->dst.as_u8)); - if (hdr_ip0->payload_length) - tr->length = clib_net_to_host_u16 - (hdr_ip0->payload_length); - } - tr->next_index = next_index; - if (hdr_sr0) - memcpy (tr->sr, hdr_sr0, sizeof (tr->sr)); - } - - } - - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - - vlib_node_increment_counter (vm, sr_replicate_node.index, - SR_REPLICATE_ERROR_REPLICATED, - pkts_replicated); - - vlib_node_increment_counter (vm, sr_replicate_node.index, - SR_REPLICATE_ERROR_NO_BUFFER_DROPS, - no_buffer_drops); - - return frame->n_vectors; -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (sr_replicate_node) = { - .function = sr_replicate_node_fn, - .name = "sr-replicate", - .vector_size = sizeof (u32), - .format_trace = format_sr_replicate_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - - .n_errors = ARRAY_LEN(sr_replicate_error_strings), - .error_strings = sr_replicate_error_strings, - - .n_next_nodes = SR_REPLICATE_N_NEXT, - - .next_nodes = { - [SR_REPLICATE_NEXT_IP6_LOOKUP] = "ip6-lookup", - }, -}; - -VLIB_NODE_FUNCTION_MULTIARCH (sr_replicate_node, sr_replicate_node_fn) -/* *INDENT-ON* */ - -clib_error_t * -sr_replicate_init (vlib_main_t * vm) -{ - sr_replicate_main_t *msm = &sr_replicate_main; - - msm->vlib_main = vm; - msm->vnet_main = vnet_get_main (); - - return 0; -} - -VLIB_INIT_FUNCTION (sr_replicate_init); - -#endif /* DPDK */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ |