diff options
author | John Lo <loj@cisco.com> | 2018-02-13 17:15:23 -0500 |
---|---|---|
committer | Neale Ranns <nranns@cisco.com> | 2018-02-15 11:07:56 +0000 |
commit | a43ccaefc3bd50c03c90f7c3bee02eac9709df56 (patch) | |
tree | bd2820c08864b8c3094cc6f6dddb279926c625cc /src/vnet/gre/interface.c | |
parent | 5fda7a3925be145f0c326d0aecc36d883cbcb2ee (diff) |
Optimize GRE Tunnel and add support for ERSPAN encap
Change GRE tunnel to use the interface type where the same encap
node is used as output node for all GRE tunnels, instead of having
dedicated output and tx node for each tunnel. This allows for more
efficient tunnel creation and deletion at scale tested at 1000's
of GRE tunnels.
Add support for ERSPAN encap as another tunnel type, in addition
to the existing L3 and TEB types. The GRE ERSPAN encap supported
is type 2 thus GRE encap need to include sequence number and GRE-
ERSPAN tunnel can be created with user secified ERSPAN session ID.
The GRE tunnel lookup hash key is updated to inclue tunnel type
and session ID, in addition to SIP/DIP and FIB index.
Thus, GRE-ERSPAN tunnel can be created, with the appropriate
session ID, to be used as output interface for SPAN config to
send mirrored packets.
Change interface naming so that all GRE tunnels, irrespective of
tunnel type, uses "greN" where N is the instance number. Removed
interface reuse on tunnel creation and deletion to enable unfied
tunnel interface name.
Add support of user specified instance on GRE tunnel creation.
Thus, N in the "greN" interface name can optionally be specified
by user via CLI/API.
Optimize GRE tunnel encap DPO stacking to bypass load-balance DPO
node since packet output on GRE tunnel always belong to the same
flow after 5-tupple hash.
Change-Id: Ifa83915744a1a88045c998604777cc3583f4da52
Signed-off-by: John Lo <loj@cisco.com>
Diffstat (limited to 'src/vnet/gre/interface.c')
-rw-r--r-- | src/vnet/gre/interface.c | 330 |
1 files changed, 182 insertions, 148 deletions
diff --git a/src/vnet/gre/interface.c b/src/vnet/gre/interface.c index ce9685d55a3..97c4f1643d9 100644 --- a/src/vnet/gre/interface.c +++ b/src/vnet/gre/interface.c @@ -31,35 +31,41 @@ static u8 * format_gre_tunnel (u8 * s, va_list * args) { gre_tunnel_t *t = va_arg (*args, gre_tunnel_t *); - gre_main_t *gm = &gre_main; - s = format (s, "[%d] src %U dst %U fib-idx %d sw-if-idx %d ", - t - gm->tunnels, + s = format (s, "[%d] instance %d src %U dst %U fib-idx %d sw-if-idx %d ", + t->dev_instance, t->user_instance, format_ip46_address, &t->tunnel_src, IP46_TYPE_ANY, format_ip46_address, &t->tunnel_dst.fp_addr, IP46_TYPE_ANY, t->outer_fib_index, t->sw_if_index); - s = format (s, "payload %s", gre_tunnel_type_names[t->type]); + s = format (s, "payload %s ", gre_tunnel_type_names[t->type]); + + if (t->type == GRE_TUNNEL_TYPE_ERSPAN) + s = format (s, "session %d ", t->session_id); + + if (t->type != GRE_TUNNEL_TYPE_L3) + s = format (s, "l2-adj-idx %d ", t->l2_adj_index); return s; } static gre_tunnel_t * -gre_tunnel_db_find (const ip46_address_t * src, - const ip46_address_t * dst, - u32 out_fib_index, u8 is_ipv6, gre_tunnel_key_t * key) +gre_tunnel_db_find (const vnet_gre_add_del_tunnel_args_t * a, + u32 outer_fib_index, gre_tunnel_key_t * key) { gre_main_t *gm = &gre_main; uword *p; - if (!is_ipv6) + if (!a->is_ipv6) { - gre_mk_key4 (&src->ip4, &dst->ip4, out_fib_index, &key->gtk_v4); + gre_mk_key4 (a->src.ip4, a->dst.ip4, outer_fib_index, + a->tunnel_type, a->session_id, &key->gtk_v4); p = hash_get_mem (gm->tunnel_by_key4, &key->gtk_v4); } else { - gre_mk_key6 (&src->ip6, &dst->ip6, out_fib_index, &key->gtk_v6); + gre_mk_key6 (&a->src.ip6, &a->dst.ip6, outer_fib_index, + a->tunnel_type, a->session_id, &key->gtk_v6); p = hash_get_mem (gm->tunnel_by_key6, &key->gtk_v6); } @@ -79,11 +85,11 @@ gre_tunnel_db_add (gre_tunnel_t * t, gre_tunnel_key_t * key) if (t->tunnel_dst.fp_proto == FIB_PROTOCOL_IP6) { - hash_set_mem (gm->tunnel_by_key6, &t->key->gtk_v6, t - gm->tunnels); + hash_set_mem (gm->tunnel_by_key6, &t->key->gtk_v6, t->dev_instance); } else { - hash_set_mem (gm->tunnel_by_key4, &t->key->gtk_v4, t - gm->tunnels); + hash_set_mem (gm->tunnel_by_key4, &t->key->gtk_v4, t->dev_instance); } } @@ -136,23 +142,48 @@ gre_tunnel_stack (adj_index_t ai) gt = pool_elt_at_index (gm->tunnels, gm->tunnel_index_by_sw_if_index[sw_if_index]); - /* - * find the adjacency that is contributed by the FIB entry - * that this tunnel resovles via, and use it as the next adj - * in the midchain - */ - if (vnet_hw_interface_get_flags (vnet_get_main (), - gt->hw_if_index) & - VNET_HW_INTERFACE_FLAG_LINK_UP) + if ((vnet_hw_interface_get_flags (vnet_get_main (), gt->hw_if_index) & + VNET_HW_INTERFACE_FLAG_LINK_UP) == 0) { - adj_nbr_midchain_stack (ai, - fib_entry_contribute_ip_forwarding - (gt->fib_entry_index)); + adj_nbr_midchain_unstack (ai); + return; } - else + + dpo_id_t tmp = DPO_INVALID; + fib_forward_chain_type_t fib_fwd = (FIB_PROTOCOL_IP6 == adj->ia_nh_proto) ? + FIB_FORW_CHAIN_TYPE_UNICAST_IP6 : FIB_FORW_CHAIN_TYPE_UNICAST_IP4; + + fib_entry_contribute_forwarding (gt->fib_entry_index, fib_fwd, &tmp); + if (DPO_LOAD_BALANCE == tmp.dpoi_type) { - adj_nbr_midchain_unstack (ai); + /* + * post GRE rewrite we will load-balance. However, the GRE encap + * is always the same for this adjacency/tunnel and hence the IP/GRE + * src,dst hash is always the same result too. So we do that hash now and + * stack on the choice. + * If the choice is an incomplete adj then we will need a poke when + * it becomes complete. This happens since the adj update walk propagates + * as far a recursive paths. + */ + const dpo_id_t *choice; + load_balance_t *lb; + int hash; + + lb = load_balance_get (tmp.dpoi_index); + + if (fib_fwd == FIB_FORW_CHAIN_TYPE_UNICAST_IP4) + hash = ip4_compute_flow_hash ((ip4_header_t *) adj_get_rewrite (ai), + lb->lb_hash_config); + else + hash = ip6_compute_flow_hash ((ip6_header_t *) adj_get_rewrite (ai), + lb->lb_hash_config); + choice = + load_balance_get_bucket_i (lb, hash & lb->lb_n_buckets_minus_1); + dpo_copy (&tmp, choice); } + + adj_nbr_midchain_stack (ai, &tmp); + dpo_reset (&tmp); } /** @@ -230,7 +261,8 @@ const static fib_node_vft_t gre_vft = { }; static int -vnet_gre_tunnel_add (vnet_gre_add_del_tunnel_args_t * a, u32 * sw_if_indexp) +vnet_gre_tunnel_add (vnet_gre_add_del_tunnel_args_t * a, + u32 outer_fib_index, u32 * sw_if_indexp) { gre_main_t *gm = &gre_main; vnet_main_t *vnm = gm->vnet_main; @@ -239,112 +271,67 @@ vnet_gre_tunnel_add (vnet_gre_add_del_tunnel_args_t * a, u32 * sw_if_indexp) gre_tunnel_t *t; vnet_hw_interface_t *hi; u32 hw_if_index, sw_if_index; - u32 outer_fib_index; - u8 address[6]; clib_error_t *error; u8 is_ipv6 = a->is_ipv6; gre_tunnel_key_t key; - if (!is_ipv6) - outer_fib_index = ip4_fib_index_from_table_id (a->outer_fib_id); - else - outer_fib_index = ip6_fib_index_from_table_id (a->outer_fib_id); - - if (~0 == outer_fib_index) - return VNET_API_ERROR_NO_SUCH_FIB; - - t = - gre_tunnel_db_find (&a->src, &a->dst, outer_fib_index, a->is_ipv6, &key); - + t = gre_tunnel_db_find (a, outer_fib_index, &key); if (NULL != t) - return VNET_API_ERROR_INVALID_VALUE; + return VNET_API_ERROR_IF_ALREADY_EXISTS; pool_get_aligned (gm->tunnels, t, CLIB_CACHE_LINE_BYTES); memset (t, 0, sizeof (*t)); - fib_node_init (&t->node, FIB_NODE_TYPE_GRE_TUNNEL); - if (a->teb) - t->type = GRE_TUNNEL_TYPE_TEB; - else - t->type = GRE_TUNNEL_TYPE_L3; - - if (vec_len (gm->free_gre_tunnel_hw_if_indices[t->type]) > 0) + /* Reconcile the real dev_instance and a possible requested instance */ + u32 t_idx = t - gm->tunnels; /* tunnel index (or instance) */ + u32 u_idx = a->instance; /* user specified instance */ + if (u_idx == ~0) + u_idx = t_idx; + if (hash_get (gm->instance_used, u_idx)) { - vnet_interface_main_t *im = &vnm->interface_main; - - hw_if_index = gm->free_gre_tunnel_hw_if_indices[t->type] - [vec_len (gm->free_gre_tunnel_hw_if_indices[t->type]) - 1]; - _vec_len (gm->free_gre_tunnel_hw_if_indices[t->type]) -= 1; - - hi = vnet_get_hw_interface (vnm, hw_if_index); - hi->dev_instance = t - gm->tunnels; - hi->hw_instance = hi->dev_instance; - - /* clear old stats of freed tunnel before reuse */ - sw_if_index = hi->sw_if_index; - vnet_interface_counter_lock (im); - vlib_zero_combined_counter - (&im->combined_sw_if_counters[VNET_INTERFACE_COUNTER_TX], - sw_if_index); - vlib_zero_combined_counter (&im->combined_sw_if_counters - [VNET_INTERFACE_COUNTER_RX], sw_if_index); - vlib_zero_simple_counter (&im->sw_if_counters - [VNET_INTERFACE_COUNTER_DROP], sw_if_index); - vnet_interface_counter_unlock (im); - if (GRE_TUNNEL_TYPE_TEB == t->type) - { - t->l2_tx_arc = vlib_node_add_named_next (vlib_get_main (), - hi->tx_node_index, - "adj-l2-midchain"); - } + pool_put (gm->tunnels, t); + return VNET_API_ERROR_INSTANCE_IN_USE; } + hash_set (gm->instance_used, u_idx, 1); + + t->dev_instance = t_idx; /* actual */ + t->user_instance = u_idx; /* name */ + fib_node_init (&t->node, FIB_NODE_TYPE_GRE_TUNNEL); + + t->type = a->tunnel_type; + if (t->type == GRE_TUNNEL_TYPE_ERSPAN) + t->session_id = a->session_id; + + if (t->type == GRE_TUNNEL_TYPE_L3) + hw_if_index = vnet_register_interface (vnm, gre_device_class.index, t_idx, + gre_hw_interface_class.index, + t_idx); else { - if (GRE_TUNNEL_TYPE_TEB == t->type) - { - /* Default MAC address (d00b:eed0:0000 + sw_if_index) */ - memset (address, 0, sizeof (address)); - address[0] = 0xd0; - address[1] = 0x0b; - address[2] = 0xee; - address[3] = 0xd0; - address[4] = t - gm->tunnels; - - error = ethernet_register_interface (vnm, - gre_device_teb_class.index, - t - gm->tunnels, address, - &hw_if_index, 0); - - if (error) - { - clib_error_report (error); - return VNET_API_ERROR_INVALID_REGISTRATION; - } - hi = vnet_get_hw_interface (vnm, hw_if_index); - - t->l2_tx_arc = vlib_node_add_named_next (vlib_get_main (), - hi->tx_node_index, - "adj-l2-midchain"); - } - else + /* Default MAC address (d00b:eed0:0000 + sw_if_index) */ + u8 address[6] = { 0xd0, 0x0b, 0xee, 0xd0, (u8) t_idx >> 8, (u8) t_idx }; + error = ethernet_register_interface (vnm, gre_device_class.index, t_idx, + address, &hw_if_index, 0); + if (error) { - hw_if_index = vnet_register_interface (vnm, - gre_device_class.index, - t - gm->tunnels, - gre_hw_interface_class.index, - t - gm->tunnels); + clib_error_report (error); + return VNET_API_ERROR_INVALID_REGISTRATION; } - hi = vnet_get_hw_interface (vnm, hw_if_index); - sw_if_index = hi->sw_if_index; } + /* Set GRE tunnel interface output node (not used for L3 payload) */ + vnet_set_interface_output_node (vnm, hw_if_index, gre_encap_node.index); + + hi = vnet_get_hw_interface (vnm, hw_if_index); + sw_if_index = hi->sw_if_index; + t->hw_if_index = hw_if_index; t->outer_fib_index = outer_fib_index; t->sw_if_index = sw_if_index; t->l2_adj_index = ADJ_INDEX_INVALID; vec_validate_init_empty (gm->tunnel_index_by_sw_if_index, sw_if_index, ~0); - gm->tunnel_index_by_sw_if_index[sw_if_index] = t - gm->tunnels; + gm->tunnel_index_by_sw_if_index[sw_if_index] = t_idx; if (!is_ipv6) { @@ -378,20 +365,37 @@ vnet_gre_tunnel_add (vnet_gre_add_del_tunnel_args_t * a, u32 * sw_if_indexp) t->tunnel_dst.fp_addr = a->dst; gre_tunnel_db_add (t, &key); + if (t->type == GRE_TUNNEL_TYPE_ERSPAN) + { + gre_sn_key_t skey; + gre_sn_t *gre_sn; - t->fib_entry_index = - fib_table_entry_special_add (outer_fib_index, - &t->tunnel_dst, - FIB_SOURCE_RR, FIB_ENTRY_FLAG_NONE); - t->sibling_index = - fib_entry_child_add (t->fib_entry_index, - FIB_NODE_TYPE_GRE_TUNNEL, t - gm->tunnels); + gre_mk_sn_key (t, &skey); + gre_sn = (gre_sn_t *) hash_get_mem (gm->seq_num_by_key, &skey); + if (gre_sn != NULL) + { + gre_sn->ref_count++; + t->gre_sn = gre_sn; + } + else + { + gre_sn = clib_mem_alloc (sizeof (gre_sn_t)); + gre_sn->seq_num = 0; + gre_sn->ref_count = 1; + t->gre_sn = gre_sn; + hash_set_mem_alloc (&gm->seq_num_by_key, &skey, (uword) gre_sn); + } + } + + t->fib_entry_index = fib_table_entry_special_add + (outer_fib_index, &t->tunnel_dst, FIB_SOURCE_RR, FIB_ENTRY_FLAG_NONE); + t->sibling_index = fib_entry_child_add + (t->fib_entry_index, FIB_NODE_TYPE_GRE_TUNNEL, t_idx); - if (GRE_TUNNEL_TYPE_TEB == t->type) + if (t->type != GRE_TUNNEL_TYPE_L3) { - t->l2_adj_index = adj_nbr_add_or_lock (t->tunnel_dst.fp_proto, - VNET_LINK_ETHERNET, - &zero_addr, sw_if_index); + t->l2_adj_index = adj_nbr_add_or_lock + (t->tunnel_dst.fp_proto, VNET_LINK_ETHERNET, &zero_addr, sw_if_index); gre_update_adj (vnm, t->sw_if_index, t->l2_adj_index); } @@ -403,38 +407,29 @@ vnet_gre_tunnel_add (vnet_gre_add_del_tunnel_args_t * a, u32 * sw_if_indexp) static int vnet_gre_tunnel_delete (vnet_gre_add_del_tunnel_args_t * a, - u32 * sw_if_indexp) + u32 outer_fib_index, u32 * sw_if_indexp) { gre_main_t *gm = &gre_main; vnet_main_t *vnm = gm->vnet_main; gre_tunnel_t *t; gre_tunnel_key_t key; u32 sw_if_index; - u32 outer_fib_index; - - if (!a->is_ipv6) - outer_fib_index = ip4_fib_index_from_table_id (a->outer_fib_id); - else - outer_fib_index = ip6_fib_index_from_table_id (a->outer_fib_id); - - if (~0 == outer_fib_index) - return VNET_API_ERROR_NO_SUCH_FIB; - - t = - gre_tunnel_db_find (&a->src, &a->dst, outer_fib_index, a->is_ipv6, &key); + t = gre_tunnel_db_find (a, outer_fib_index, &key); if (NULL == t) return VNET_API_ERROR_NO_SUCH_ENTRY; sw_if_index = t->sw_if_index; vnet_sw_interface_set_flags (vnm, sw_if_index, 0 /* down */ ); + /* make sure tunnel is removed from l2 bd or xconnect */ set_int_l2_mode (gm->vlib_main, vnm, MODE_L3, sw_if_index, 0, 0, 0, 0); - vec_add1 (gm->free_gre_tunnel_hw_if_indices[t->type], t->hw_if_index); gm->tunnel_index_by_sw_if_index[sw_if_index] = ~0; - if (GRE_TUNNEL_TYPE_TEB == t->type) - adj_unlock (t->l2_adj_index); + if (t->type == GRE_TUNNEL_TYPE_L3) + vnet_delete_hw_interface (vnm, t->hw_if_index); + else + ethernet_delete_interface (vnm, t->hw_if_index); if (t->l2_adj_index != ADJ_INDEX_INVALID) adj_unlock (t->l2_adj_index); @@ -442,6 +437,16 @@ vnet_gre_tunnel_delete (vnet_gre_add_del_tunnel_args_t * a, fib_entry_child_remove (t->fib_entry_index, t->sibling_index); fib_table_entry_delete_index (t->fib_entry_index, FIB_SOURCE_RR); + ASSERT ((t->type != GRE_TUNNEL_TYPE_ERSPAN) || (t->gre_sn != NULL)); + if ((t->type == GRE_TUNNEL_TYPE_ERSPAN) && (t->gre_sn->ref_count-- == 1)) + { + gre_sn_key_t skey; + gre_mk_sn_key (t, &skey); + hash_unset_mem_free (&gm->seq_num_by_key, &skey); + clib_mem_free (t->gre_sn); + } + + hash_unset (gm->instance_used, t->user_instance); gre_tunnel_db_remove (t); fib_node_deinit (&t->node); pool_put (gm->tunnels, t); @@ -456,10 +461,23 @@ int vnet_gre_add_del_tunnel (vnet_gre_add_del_tunnel_args_t * a, u32 * sw_if_indexp) { + u32 outer_fib_index; + + if (!a->is_ipv6) + outer_fib_index = ip4_fib_index_from_table_id (a->outer_fib_id); + else + outer_fib_index = ip6_fib_index_from_table_id (a->outer_fib_id); + + if (~0 == outer_fib_index) + return VNET_API_ERROR_NO_SUCH_FIB; + + if (a->session_id > GTK_SESSION_ID_MAX) + return VNET_API_ERROR_INVALID_SESSION_ID; + if (a->is_add) - return (vnet_gre_tunnel_add (a, sw_if_indexp)); + return (vnet_gre_tunnel_add (a, outer_fib_index, sw_if_indexp)); else - return (vnet_gre_tunnel_delete (a, sw_if_indexp)); + return (vnet_gre_tunnel_delete (a, outer_fib_index, sw_if_indexp)); } clib_error_t * @@ -503,8 +521,10 @@ create_gre_tunnel_command_fn (vlib_main_t * vm, unformat_input_t _line_input, *line_input = &_line_input; vnet_gre_add_del_tunnel_args_t _a, *a = &_a; ip46_address_t src, dst; + u32 instance = ~0; u32 outer_fib_id = 0; - u8 teb = 0; + gre_tunnel_type_t t_type = GRE_TUNNEL_TYPE_L3; + u32 session_id = 0; int rv; u32 num_m_args = 0; u8 is_add = 1; @@ -521,6 +541,8 @@ create_gre_tunnel_command_fn (vlib_main_t * vm, { if (unformat (line_input, "del")) is_add = 0; + else if (unformat (line_input, "instance %d", &instance)) + ; else if (unformat (line_input, "src %U", unformat_ip4_address, &src.ip4)) { @@ -548,7 +570,9 @@ create_gre_tunnel_command_fn (vlib_main_t * vm, else if (unformat (line_input, "outer-fib-id %d", &outer_fib_id)) ; else if (unformat (line_input, "teb")) - teb = 1; + t_type = GRE_TUNNEL_TYPE_TEB; + else if (unformat (line_input, "erspan %d", &session_id)) + t_type = GRE_TUNNEL_TYPE_ERSPAN; else { error = clib_error_return (0, "unknown input `%U'", @@ -582,9 +606,12 @@ create_gre_tunnel_command_fn (vlib_main_t * vm, } memset (a, 0, sizeof (*a)); + a->is_add = is_add; a->outer_fib_id = outer_fib_id; - a->teb = teb; + a->tunnel_type = t_type; + a->session_id = session_id; a->is_ipv6 = ipv6_set; + a->instance = instance; if (!ipv6_set) { clib_memcpy (&a->src.ip4, &src.ip4, sizeof (src.ip4)); @@ -596,10 +623,7 @@ create_gre_tunnel_command_fn (vlib_main_t * vm, clib_memcpy (&a->dst.ip6, &dst.ip6, sizeof (dst.ip6)); } - if (is_add) - rv = vnet_gre_tunnel_add (a, &sw_if_index); - else - rv = vnet_gre_tunnel_delete (a, &sw_if_index); + rv = vnet_gre_add_del_tunnel (a, &sw_if_index); switch (rv) { @@ -607,13 +631,23 @@ create_gre_tunnel_command_fn (vlib_main_t * vm, vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main (), sw_if_index); break; - case VNET_API_ERROR_INVALID_VALUE: + case VNET_API_ERROR_IF_ALREADY_EXISTS: error = clib_error_return (0, "GRE tunnel already exists..."); goto done; case VNET_API_ERROR_NO_SUCH_FIB: error = clib_error_return (0, "outer fib ID %d doesn't exist\n", outer_fib_id); goto done; + case VNET_API_ERROR_NO_SUCH_ENTRY: + error = clib_error_return (0, "GRE tunnel doesn't exist"); + goto done; + case VNET_API_ERROR_INVALID_SESSION_ID: + error = clib_error_return (0, "session ID %d out of range\n", + session_id); + goto done; + case VNET_API_ERROR_INSTANCE_IN_USE: + error = clib_error_return (0, "Instance is in use"); + goto done; default: error = clib_error_return (0, "vnet_gre_add_del_tunnel returned %d", rv); @@ -629,8 +663,8 @@ done: /* *INDENT-OFF* */ VLIB_CLI_COMMAND (create_gre_tunnel_command, static) = { .path = "create gre tunnel", - .short_help = "create gre tunnel src <addr> dst <addr> " - "[outer-fib-id <fib>] [teb] [del]", + .short_help = "create gre tunnel src <addr> dst <addr> [instance <n>] " + "[outer-fib-id <fib>] [teb | erspan <session-id>] [del]", .function = create_gre_tunnel_command_fn, }; /* *INDENT-ON* */ |