From 2ba92e32e0197f676dd905e5edcb4ff3e1bec241 Mon Sep 17 00:00:00 2001 From: Matus Fabian Date: Mon, 21 Aug 2017 07:05:03 -0700 Subject: NAT: Rename snat plugin to nat (VPP-955) Change-Id: I30a7e3da7a4efc6038a91e27b48045d4b07e2764 Signed-off-by: Matus Fabian --- src/plugins/nat/nat.c | 2842 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 2842 insertions(+) create mode 100644 src/plugins/nat/nat.c (limited to 'src/plugins/nat/nat.c') diff --git a/src/plugins/nat/nat.c b/src/plugins/nat/nat.c new file mode 100644 index 00000000..ac39be95 --- /dev/null +++ b/src/plugins/nat/nat.c @@ -0,0 +1,2842 @@ +/* + * snat.c - simple nat plugin + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +snat_main_t snat_main; + + +/* Hook up input features */ +VNET_FEATURE_INIT (ip4_snat_in2out, static) = { + .arc_name = "ip4-unicast", + .node_name = "nat44-in2out", + .runs_before = VNET_FEATURES ("nat44-out2in"), +}; +VNET_FEATURE_INIT (ip4_snat_out2in, static) = { + .arc_name = "ip4-unicast", + .node_name = "nat44-out2in", + .runs_before = VNET_FEATURES ("ip4-lookup"), +}; +VNET_FEATURE_INIT (ip4_snat_det_in2out, static) = { + .arc_name = "ip4-unicast", + .node_name = "nat44-det-in2out", + .runs_before = VNET_FEATURES ("nat44-det-out2in"), +}; +VNET_FEATURE_INIT (ip4_snat_det_out2in, static) = { + .arc_name = "ip4-unicast", + .node_name = "nat44-det-out2in", + .runs_before = VNET_FEATURES ("ip4-lookup"), +}; +VNET_FEATURE_INIT (ip4_snat_in2out_worker_handoff, static) = { + .arc_name = "ip4-unicast", + .node_name = "nat44-in2out-worker-handoff", + .runs_before = VNET_FEATURES ("nat44-out2in-worker-handoff"), +}; +VNET_FEATURE_INIT (ip4_snat_out2in_worker_handoff, static) = { + .arc_name = "ip4-unicast", + .node_name = "nat44-out2in-worker-handoff", + .runs_before = VNET_FEATURES ("ip4-lookup"), +}; +VNET_FEATURE_INIT (ip4_snat_in2out_fast, static) = { + .arc_name = "ip4-unicast", + .node_name = "nat44-in2out-fast", + .runs_before = VNET_FEATURES ("nat44-out2in-fast"), +}; +VNET_FEATURE_INIT (ip4_snat_out2in_fast, static) = { + .arc_name = "ip4-unicast", + .node_name = "nat44-out2in-fast", + .runs_before = VNET_FEATURES ("ip4-lookup"), +}; +VNET_FEATURE_INIT (ip4_snat_hairpin_dst, static) = { + .arc_name = "ip4-unicast", + .node_name = "nat44-hairpin-dst", + .runs_before = VNET_FEATURES ("ip4-lookup"), +}; + +/* Hook up output features */ +VNET_FEATURE_INIT (ip4_snat_in2out_output, static) = { + .arc_name = "ip4-output", + .node_name = "nat44-in2out-output", + .runs_before = VNET_FEATURES ("interface-output"), +}; +VNET_FEATURE_INIT (ip4_snat_in2out_output_worker_handoff, static) = { + .arc_name = "ip4-output", + .node_name = "nat44-in2out-output-worker-handoff", + .runs_before = VNET_FEATURES ("interface-output"), +}; +VNET_FEATURE_INIT (ip4_snat_hairpin_src, static) = { + .arc_name = "ip4-output", + .node_name = "nat44-hairpin-src", + .runs_before = VNET_FEATURES ("interface-output"), +}; + + +/* *INDENT-OFF* */ +VLIB_PLUGIN_REGISTER () = { + .version = VPP_BUILD_VER, + .description = "Network Address Translation", +}; +/* *INDENT-ON* */ + +/** + * @brief Add/del NAT address to FIB. + * + * Add the external NAT address to the FIB as receive entries. This ensures + * that VPP will reply to ARP for this address and we don't need to enable + * proxy ARP on the outside interface. + * + * @param addr IPv4 address. + * @param plen address prefix length + * @param sw_if_index Interface. + * @param is_add If 0 delete, otherwise add. + */ +void +snat_add_del_addr_to_fib (ip4_address_t * addr, u8 p_len, u32 sw_if_index, + int is_add) +{ + fib_prefix_t prefix = { + .fp_len = p_len, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4.as_u32 = addr->as_u32, + }, + }; + u32 fib_index = ip4_fib_table_get_index_for_sw_if_index(sw_if_index); + + if (is_add) + fib_table_entry_update_one_path(fib_index, + &prefix, + FIB_SOURCE_PLUGIN_HI, + (FIB_ENTRY_FLAG_CONNECTED | + FIB_ENTRY_FLAG_LOCAL | + FIB_ENTRY_FLAG_EXCLUSIVE), + DPO_PROTO_IP4, + NULL, + sw_if_index, + ~0, + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + else + fib_table_entry_delete(fib_index, + &prefix, + FIB_SOURCE_PLUGIN_HI); +} + +void snat_add_address (snat_main_t *sm, ip4_address_t *addr, u32 vrf_id) +{ + snat_address_t * ap; + snat_interface_t *i; + + if (vrf_id != ~0) + sm->vrf_mode = 1; + + /* Check if address already exists */ + vec_foreach (ap, sm->addresses) + { + if (ap->addr.as_u32 == addr->as_u32) + return; + } + + vec_add2 (sm->addresses, ap, 1); + ap->addr = *addr; + if (vrf_id != ~0) + ap->fib_index = + fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id); + else + ap->fib_index = ~0; +#define _(N, i, n, s) \ + clib_bitmap_alloc (ap->busy_##n##_port_bitmap, 65535); + foreach_snat_protocol +#undef _ + + /* Add external address to FIB */ + pool_foreach (i, sm->interfaces, + ({ + if (i->is_inside) + continue; + + snat_add_del_addr_to_fib(addr, 32, i->sw_if_index, 1); + break; + })); + pool_foreach (i, sm->output_feature_interfaces, + ({ + if (i->is_inside) + continue; + + snat_add_del_addr_to_fib(addr, 32, i->sw_if_index, 1); + break; + })); +} + +static int is_snat_address_used_in_static_mapping (snat_main_t *sm, + ip4_address_t addr) +{ + snat_static_mapping_t *m; + pool_foreach (m, sm->static_mappings, + ({ + if (m->external_addr.as_u32 == addr.as_u32) + return 1; + })); + + return 0; +} + +void increment_v4_address (ip4_address_t * a) +{ + u32 v; + + v = clib_net_to_host_u32(a->as_u32) + 1; + a->as_u32 = clib_host_to_net_u32(v); +} + +static void +snat_add_static_mapping_when_resolved (snat_main_t * sm, + ip4_address_t l_addr, + u16 l_port, + u32 sw_if_index, + u16 e_port, + u32 vrf_id, + snat_protocol_t proto, + int addr_only, + int is_add) +{ + snat_static_map_resolve_t *rp; + + vec_add2 (sm->to_resolve, rp, 1); + rp->l_addr.as_u32 = l_addr.as_u32; + rp->l_port = l_port; + rp->sw_if_index = sw_if_index; + rp->e_port = e_port; + rp->vrf_id = vrf_id; + rp->proto = proto; + rp->addr_only = addr_only; + rp->is_add = is_add; +} + +/** + * @brief Add static mapping. + * + * Create static mapping between local addr+port and external addr+port. + * + * @param l_addr Local IPv4 address. + * @param e_addr External IPv4 address. + * @param l_port Local port number. + * @param e_port External port number. + * @param vrf_id VRF ID. + * @param addr_only If 0 address port and pair mapping, otherwise address only. + * @param sw_if_index External port instead of specific IP address. + * @param is_add If 0 delete static mapping, otherwise add. + * + * @returns + */ +int snat_add_static_mapping(ip4_address_t l_addr, ip4_address_t e_addr, + u16 l_port, u16 e_port, u32 vrf_id, int addr_only, + u32 sw_if_index, snat_protocol_t proto, int is_add) +{ + snat_main_t * sm = &snat_main; + snat_static_mapping_t *m; + snat_session_key_t m_key; + clib_bihash_kv_8_8_t kv, value; + snat_address_t *a = 0; + u32 fib_index = ~0; + uword * p; + snat_interface_t *interface; + int i; + + /* If the external address is a specific interface address */ + if (sw_if_index != ~0) + { + ip4_address_t * first_int_addr; + + /* Might be already set... */ + first_int_addr = ip4_interface_first_address + (sm->ip4_main, sw_if_index, 0 /* just want the address*/); + + /* DHCP resolution required? */ + if (first_int_addr == 0) + { + snat_add_static_mapping_when_resolved + (sm, l_addr, l_port, sw_if_index, e_port, vrf_id, proto, + addr_only, is_add); + return 0; + } + else + e_addr.as_u32 = first_int_addr->as_u32; + } + + m_key.addr = e_addr; + m_key.port = addr_only ? 0 : e_port; + m_key.protocol = addr_only ? 0 : proto; + m_key.fib_index = sm->outside_fib_index; + kv.key = m_key.as_u64; + if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value)) + m = 0; + else + m = pool_elt_at_index (sm->static_mappings, value.value); + + if (is_add) + { + if (m) + return VNET_API_ERROR_VALUE_EXIST; + + /* Convert VRF id to FIB index */ + if (vrf_id != ~0) + { + p = hash_get (sm->ip4_main->fib_index_by_table_id, vrf_id); + if (!p) + return VNET_API_ERROR_NO_SUCH_FIB; + fib_index = p[0]; + } + /* If not specified use inside VRF id from SNAT plugin startup config */ + else + { + fib_index = sm->inside_fib_index; + vrf_id = sm->inside_vrf_id; + } + + /* Find external address in allocated addresses and reserve port for + address and port pair mapping when dynamic translations enabled */ + if (!addr_only && !(sm->static_mapping_only)) + { + for (i = 0; i < vec_len (sm->addresses); i++) + { + if (sm->addresses[i].addr.as_u32 == e_addr.as_u32) + { + a = sm->addresses + i; + /* External port must be unused */ + switch (proto) + { +#define _(N, j, n, s) \ + case SNAT_PROTOCOL_##N: \ + if (clib_bitmap_get_no_check (a->busy_##n##_port_bitmap, e_port)) \ + return VNET_API_ERROR_INVALID_VALUE; \ + clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, e_port, 1); \ + if (e_port > 1024) \ + a->busy_##n##_ports++; \ + break; + foreach_snat_protocol +#undef _ + default: + clib_warning("unknown_protocol"); + return VNET_API_ERROR_INVALID_VALUE_2; + } + break; + } + } + /* External address must be allocated */ + if (!a) + return VNET_API_ERROR_NO_SUCH_ENTRY; + } + + pool_get (sm->static_mappings, m); + memset (m, 0, sizeof (*m)); + m->local_addr = l_addr; + m->external_addr = e_addr; + m->addr_only = addr_only; + m->vrf_id = vrf_id; + m->fib_index = fib_index; + if (!addr_only) + { + m->local_port = l_port; + m->external_port = e_port; + m->proto = proto; + } + + m_key.addr = m->local_addr; + m_key.port = m->local_port; + m_key.protocol = m->proto; + m_key.fib_index = m->fib_index; + kv.key = m_key.as_u64; + kv.value = m - sm->static_mappings; + clib_bihash_add_del_8_8(&sm->static_mapping_by_local, &kv, 1); + + m_key.addr = m->external_addr; + m_key.port = m->external_port; + m_key.fib_index = sm->outside_fib_index; + kv.key = m_key.as_u64; + kv.value = m - sm->static_mappings; + clib_bihash_add_del_8_8(&sm->static_mapping_by_external, &kv, 1); + + /* Assign worker */ + if (sm->workers) + { + snat_user_key_t w_key0; + snat_worker_key_t w_key1; + + w_key0.addr = m->local_addr; + w_key0.fib_index = m->fib_index; + kv.key = w_key0.as_u64; + + if (clib_bihash_search_8_8 (&sm->worker_by_in, &kv, &value)) + { + kv.value = sm->first_worker_index + + sm->workers[sm->next_worker++ % vec_len (sm->workers)]; + + clib_bihash_add_del_8_8 (&sm->worker_by_in, &kv, 1); + } + else + { + kv.value = value.value; + } + + w_key1.addr = m->external_addr; + w_key1.port = clib_host_to_net_u16 (m->external_port); + w_key1.fib_index = sm->outside_fib_index; + kv.key = w_key1.as_u64; + clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv, 1); + } + } + else + { + if (!m) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + /* Free external address port */ + if (!addr_only && !(sm->static_mapping_only)) + { + for (i = 0; i < vec_len (sm->addresses); i++) + { + if (sm->addresses[i].addr.as_u32 == e_addr.as_u32) + { + a = sm->addresses + i; + switch (proto) + { +#define _(N, j, n, s) \ + case SNAT_PROTOCOL_##N: \ + clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, e_port, 0); \ + if (e_port > 1024) \ + a->busy_##n##_ports--; \ + break; + foreach_snat_protocol +#undef _ + default: + clib_warning("unknown_protocol"); + return VNET_API_ERROR_INVALID_VALUE_2; + } + break; + } + } + } + + m_key.addr = m->local_addr; + m_key.port = m->local_port; + m_key.protocol = m->proto; + m_key.fib_index = m->fib_index; + kv.key = m_key.as_u64; + clib_bihash_add_del_8_8(&sm->static_mapping_by_local, &kv, 0); + + m_key.addr = m->external_addr; + m_key.port = m->external_port; + m_key.fib_index = sm->outside_fib_index; + kv.key = m_key.as_u64; + clib_bihash_add_del_8_8(&sm->static_mapping_by_external, &kv, 0); + + /* Delete session(s) for static mapping if exist */ + if (!(sm->static_mapping_only) || + (sm->static_mapping_only && sm->static_mapping_connection_tracking)) + { + snat_user_key_t u_key; + snat_user_t *u; + dlist_elt_t * head, * elt; + u32 elt_index, head_index, del_elt_index; + u32 ses_index; + u64 user_index; + snat_session_t * s; + snat_main_per_thread_data_t *tsm; + + u_key.addr = m->local_addr; + u_key.fib_index = m->fib_index; + kv.key = u_key.as_u64; + if (!clib_bihash_search_8_8 (&sm->user_hash, &kv, &value)) + { + user_index = value.value; + if (!clib_bihash_search_8_8 (&sm->worker_by_in, &kv, &value)) + tsm = vec_elt_at_index (sm->per_thread_data, value.value); + else + tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers); + u = pool_elt_at_index (tsm->users, user_index); + if (u->nstaticsessions) + { + head_index = u->sessions_per_user_list_head_index; + head = pool_elt_at_index (tsm->list_pool, head_index); + elt_index = head->next; + elt = pool_elt_at_index (tsm->list_pool, elt_index); + ses_index = elt->value; + while (ses_index != ~0) + { + s = pool_elt_at_index (tsm->sessions, ses_index); + del_elt_index = elt_index; + elt_index = elt->next; + elt = pool_elt_at_index (tsm->list_pool, elt_index); + ses_index = elt->value; + + if (!addr_only) + { + if ((s->out2in.addr.as_u32 != e_addr.as_u32) && + (clib_net_to_host_u16 (s->out2in.port) != e_port)) + continue; + } + + if (snat_is_unk_proto_session (s)) + { + clib_bihash_kv_16_8_t up_kv; + snat_unk_proto_ses_key_t up_key; + up_key.l_addr = s->in2out.addr; + up_key.r_addr = s->ext_host_addr; + up_key.fib_index = s->in2out.fib_index; + up_key.proto = s->in2out.port; + up_key.rsvd[0] = up_key.rsvd[1] = up_key.rsvd[2] = 0; + up_kv.key[0] = up_key.as_u64[0]; + up_kv.key[1] = up_key.as_u64[1]; + if (clib_bihash_add_del_16_8 (&sm->in2out_unk_proto, + &up_kv, 0)) + clib_warning ("in2out key del failed"); + + up_key.l_addr = s->out2in.addr; + up_key.fib_index = s->out2in.fib_index; + up_kv.key[0] = up_key.as_u64[0]; + up_kv.key[1] = up_key.as_u64[1]; + if (clib_bihash_add_del_16_8 (&sm->out2in_unk_proto, + &up_kv, 0)) + clib_warning ("out2in key del failed"); + + goto delete; + } + /* log NAT event */ + snat_ipfix_logging_nat44_ses_delete(s->in2out.addr.as_u32, + s->out2in.addr.as_u32, + s->in2out.protocol, + s->in2out.port, + s->out2in.port, + s->in2out.fib_index); + + value.key = s->in2out.as_u64; + if (clib_bihash_add_del_8_8 (&sm->in2out, &value, 0)) + clib_warning ("in2out key del failed"); + value.key = s->out2in.as_u64; + if (clib_bihash_add_del_8_8 (&sm->out2in, &value, 0)) + clib_warning ("out2in key del failed"); +delete: + pool_put (tsm->sessions, s); + + clib_dlist_remove (tsm->list_pool, del_elt_index); + pool_put_index (tsm->list_pool, del_elt_index); + u->nstaticsessions--; + + if (!addr_only) + break; + } + if (addr_only) + { + pool_put (tsm->users, u); + clib_bihash_add_del_8_8 (&sm->user_hash, &kv, 0); + } + } + } + } + + /* Delete static mapping from pool */ + pool_put (sm->static_mappings, m); + } + + if (!addr_only) + return 0; + + /* Add/delete external address to FIB */ + pool_foreach (interface, sm->interfaces, + ({ + if (interface->is_inside) + continue; + + snat_add_del_addr_to_fib(&e_addr, 32, interface->sw_if_index, is_add); + break; + })); + pool_foreach (interface, sm->output_feature_interfaces, + ({ + if (interface->is_inside) + continue; + + snat_add_del_addr_to_fib(&e_addr, 32, interface->sw_if_index, is_add); + break; + })); + + return 0; +} + +int snat_del_address (snat_main_t *sm, ip4_address_t addr, u8 delete_sm) +{ + snat_address_t *a = 0; + snat_session_t *ses; + u32 *ses_to_be_removed = 0, *ses_index; + clib_bihash_kv_8_8_t kv, value; + snat_user_key_t user_key; + snat_user_t *u; + snat_main_per_thread_data_t *tsm; + snat_static_mapping_t *m; + snat_interface_t *interface; + int i; + + /* Find SNAT address */ + for (i=0; i < vec_len (sm->addresses); i++) + { + if (sm->addresses[i].addr.as_u32 == addr.as_u32) + { + a = sm->addresses + i; + break; + } + } + if (!a) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + if (delete_sm) + { + pool_foreach (m, sm->static_mappings, + ({ + if (m->external_addr.as_u32 == addr.as_u32) + (void) snat_add_static_mapping (m->local_addr, m->external_addr, + m->local_port, m->external_port, + m->vrf_id, m->addr_only, ~0, + m->proto, 0); + })); + } + else + { + /* Check if address is used in some static mapping */ + if (is_snat_address_used_in_static_mapping(sm, addr)) + { + clib_warning ("address used in static mapping"); + return VNET_API_ERROR_UNSPECIFIED; + } + } + + if (a->fib_index != ~0) + fib_table_unlock(a->fib_index, FIB_PROTOCOL_IP4); + + /* Delete sessions using address */ + if (a->busy_tcp_ports || a->busy_udp_ports || a->busy_icmp_ports) + { + vec_foreach (tsm, sm->per_thread_data) + { + pool_foreach (ses, tsm->sessions, ({ + if (ses->out2in.addr.as_u32 == addr.as_u32) + { + if (snat_is_unk_proto_session (ses)) + { + clib_bihash_kv_16_8_t up_kv; + snat_unk_proto_ses_key_t up_key; + up_key.l_addr = ses->in2out.addr; + up_key.r_addr = ses->ext_host_addr; + up_key.fib_index = ses->in2out.fib_index; + up_key.proto = ses->in2out.port; + up_key.rsvd[0] = up_key.rsvd[1] = up_key.rsvd[2] = 0; + up_kv.key[0] = up_key.as_u64[0]; + up_kv.key[1] = up_key.as_u64[1]; + if (clib_bihash_add_del_16_8 (&sm->in2out_unk_proto, + &up_kv, 0)) + clib_warning ("in2out key del failed"); + + up_key.l_addr = ses->out2in.addr; + up_key.fib_index = ses->out2in.fib_index; + up_kv.key[0] = up_key.as_u64[0]; + up_kv.key[1] = up_key.as_u64[1]; + if (clib_bihash_add_del_16_8 (&sm->out2in_unk_proto, + &up_kv, 0)) + clib_warning ("out2in key del failed"); + } + else + { + /* log NAT event */ + snat_ipfix_logging_nat44_ses_delete(ses->in2out.addr.as_u32, + ses->out2in.addr.as_u32, + ses->in2out.protocol, + ses->in2out.port, + ses->out2in.port, + ses->in2out.fib_index); + kv.key = ses->in2out.as_u64; + clib_bihash_add_del_8_8 (&sm->in2out, &kv, 0); + kv.key = ses->out2in.as_u64; + clib_bihash_add_del_8_8 (&sm->out2in, &kv, 0); + } + vec_add1 (ses_to_be_removed, ses - tsm->sessions); + clib_dlist_remove (tsm->list_pool, ses->per_user_index); + user_key.addr = ses->in2out.addr; + user_key.fib_index = ses->in2out.fib_index; + kv.key = user_key.as_u64; + if (!clib_bihash_search_8_8 (&sm->user_hash, &kv, &value)) + { + u = pool_elt_at_index (tsm->users, value.value); + u->nsessions--; + } + } + })); + + vec_foreach (ses_index, ses_to_be_removed) + pool_put_index (tsm->sessions, ses_index[0]); + + vec_free (ses_to_be_removed); + } + } + + vec_del1 (sm->addresses, i); + + /* Delete external address from FIB */ + pool_foreach (interface, sm->interfaces, + ({ + if (interface->is_inside) + continue; + + snat_add_del_addr_to_fib(&addr, 32, interface->sw_if_index, 0); + break; + })); + pool_foreach (interface, sm->output_feature_interfaces, + ({ + if (interface->is_inside) + continue; + + snat_add_del_addr_to_fib(&addr, 32, interface->sw_if_index, 0); + break; + })); + + return 0; +} + +int snat_interface_add_del (u32 sw_if_index, u8 is_inside, int is_del) +{ + snat_main_t *sm = &snat_main; + snat_interface_t *i; + const char * feature_name; + snat_address_t * ap; + snat_static_mapping_t * m; + snat_det_map_t * dm; + + if (sm->static_mapping_only && !(sm->static_mapping_connection_tracking)) + feature_name = is_inside ? "nat44-in2out-fast" : "nat44-out2in-fast"; + else + { + if (sm->num_workers > 1 && !sm->deterministic) + feature_name = is_inside ? "nat44-in2out-worker-handoff" : "nat44-out2in-worker-handoff"; + else if (sm->deterministic) + feature_name = is_inside ? "nat44-det-in2out" : "nat44-det-out2in"; + else + feature_name = is_inside ? "nat44-in2out" : "nat44-out2in"; + } + + vnet_feature_enable_disable ("ip4-unicast", feature_name, sw_if_index, + !is_del, 0, 0); + + if (sm->fq_in2out_index == ~0 && !sm->deterministic && sm->num_workers > 1) + sm->fq_in2out_index = vlib_frame_queue_main_init (sm->in2out_node_index, 0); + + if (sm->fq_out2in_index == ~0 && !sm->deterministic && sm->num_workers > 1) + sm->fq_out2in_index = vlib_frame_queue_main_init (sm->out2in_node_index, 0); + + pool_foreach (i, sm->interfaces, + ({ + if (i->sw_if_index == sw_if_index) + { + if (is_del) + pool_put (sm->interfaces, i); + else + return VNET_API_ERROR_VALUE_EXIST; + + goto fib; + } + })); + + if (is_del) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + pool_get (sm->interfaces, i); + i->sw_if_index = sw_if_index; + i->is_inside = is_inside; + + /* Add/delete external addresses to FIB */ +fib: + if (is_inside) + return 0; + + vec_foreach (ap, sm->addresses) + snat_add_del_addr_to_fib(&ap->addr, 32, sw_if_index, !is_del); + + pool_foreach (m, sm->static_mappings, + ({ + if (!(m->addr_only)) + continue; + + snat_add_del_addr_to_fib(&m->external_addr, 32, sw_if_index, !is_del); + })); + + pool_foreach (dm, sm->det_maps, + ({ + snat_add_del_addr_to_fib(&dm->out_addr, dm->out_plen, sw_if_index, !is_del); + })); + + return 0; +} + +int snat_interface_add_del_output_feature (u32 sw_if_index, + u8 is_inside, + int is_del) +{ + snat_main_t *sm = &snat_main; + snat_interface_t *i; + snat_address_t * ap; + snat_static_mapping_t * m; + + if (sm->deterministic || + (sm->static_mapping_only && !(sm->static_mapping_connection_tracking))) + return VNET_API_ERROR_UNSUPPORTED; + + if (is_inside) + { + vnet_feature_enable_disable ("ip4-unicast", "nat44-hairpin-dst", + sw_if_index, !is_del, 0, 0); + vnet_feature_enable_disable ("ip4-output", "nat44-hairpin-src", + sw_if_index, !is_del, 0, 0); + goto fq; + } + + if (sm->num_workers > 1) + { + vnet_feature_enable_disable ("ip4-unicast", "nat44-out2in-worker-handoff", + sw_if_index, !is_del, 0, 0); + vnet_feature_enable_disable ("ip4-output", + "nat44-in2out-output-worker-handoff", + sw_if_index, !is_del, 0, 0); + } + else + { + vnet_feature_enable_disable ("ip4-unicast", "nat44-out2in", sw_if_index, + !is_del, 0, 0); + vnet_feature_enable_disable ("ip4-output", "nat44-in2out-output", + sw_if_index, !is_del, 0, 0); + } + +fq: + if (sm->fq_in2out_output_index == ~0 && sm->num_workers > 1) + sm->fq_in2out_output_index = + vlib_frame_queue_main_init (sm->in2out_output_node_index, 0); + + if (sm->fq_out2in_index == ~0 && sm->num_workers > 1) + sm->fq_out2in_index = vlib_frame_queue_main_init (sm->out2in_node_index, 0); + + pool_foreach (i, sm->output_feature_interfaces, + ({ + if (i->sw_if_index == sw_if_index) + { + if (is_del) + pool_put (sm->output_feature_interfaces, i); + else + return VNET_API_ERROR_VALUE_EXIST; + + goto fib; + } + })); + + if (is_del) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + pool_get (sm->output_feature_interfaces, i); + i->sw_if_index = sw_if_index; + i->is_inside = is_inside; + + /* Add/delete external addresses to FIB */ +fib: + if (is_inside) + return 0; + + vec_foreach (ap, sm->addresses) + snat_add_del_addr_to_fib(&ap->addr, 32, sw_if_index, !is_del); + + pool_foreach (m, sm->static_mappings, + ({ + if (!(m->addr_only)) + continue; + + snat_add_del_addr_to_fib(&m->external_addr, 32, sw_if_index, !is_del); + })); + + return 0; +} + +int snat_set_workers (uword * bitmap) +{ + snat_main_t *sm = &snat_main; + int i, j = 0; + + if (sm->num_workers < 2) + return VNET_API_ERROR_FEATURE_DISABLED; + + if (clib_bitmap_last_set (bitmap) >= sm->num_workers) + return VNET_API_ERROR_INVALID_WORKER; + + vec_free (sm->workers); + clib_bitmap_foreach (i, bitmap, + ({ + vec_add1(sm->workers, i); + sm->per_thread_data[i].snat_thread_index = j; + j++; + })); + + sm->port_per_thread = (0xffff - 1024) / _vec_len (sm->workers); + sm->num_snat_thread = _vec_len (sm->workers); + + return 0; +} + + +static void +snat_ip4_add_del_interface_address_cb (ip4_main_t * im, + uword opaque, + u32 sw_if_index, + ip4_address_t * address, + u32 address_length, + u32 if_address_index, + u32 is_delete); + +static clib_error_t * snat_init (vlib_main_t * vm) +{ + snat_main_t * sm = &snat_main; + clib_error_t * error = 0; + ip4_main_t * im = &ip4_main; + ip_lookup_main_t * lm = &im->lookup_main; + uword *p; + vlib_thread_registration_t *tr; + vlib_thread_main_t *tm = vlib_get_thread_main (); + uword *bitmap = 0; + u32 i; + ip4_add_del_interface_address_callback_t cb4; + + sm->vlib_main = vm; + sm->vnet_main = vnet_get_main(); + sm->ip4_main = im; + sm->ip4_lookup_main = lm; + sm->api_main = &api_main; + sm->first_worker_index = 0; + sm->next_worker = 0; + sm->num_workers = 0; + sm->num_snat_thread = 1; + sm->workers = 0; + sm->port_per_thread = 0xffff - 1024; + sm->fq_in2out_index = ~0; + sm->fq_out2in_index = ~0; + sm->udp_timeout = SNAT_UDP_TIMEOUT; + sm->tcp_established_timeout = SNAT_TCP_ESTABLISHED_TIMEOUT; + sm->tcp_transitory_timeout = SNAT_TCP_TRANSITORY_TIMEOUT; + sm->icmp_timeout = SNAT_ICMP_TIMEOUT; + + p = hash_get_mem (tm->thread_registrations_by_name, "workers"); + if (p) + { + tr = (vlib_thread_registration_t *) p[0]; + if (tr) + { + sm->num_workers = tr->count; + sm->first_worker_index = tr->first_index; + } + } + + vec_validate (sm->per_thread_data, tm->n_vlib_mains - 1); + + /* Use all available workers by default */ + if (sm->num_workers > 1) + { + for (i=0; i < sm->num_workers; i++) + bitmap = clib_bitmap_set (bitmap, i, 1); + snat_set_workers(bitmap); + clib_bitmap_free (bitmap); + } + else + { + sm->per_thread_data[0].snat_thread_index = 0; + } + + error = snat_api_init(vm, sm); + if (error) + return error; + + /* Set up the interface address add/del callback */ + cb4.function = snat_ip4_add_del_interface_address_cb; + cb4.function_opaque = 0; + + vec_add1 (im->add_del_interface_address_callbacks, cb4); + + /* Init IPFIX logging */ + snat_ipfix_logging_init(vm); + + error = nat64_init(vm); + + return error; +} + +VLIB_INIT_FUNCTION (snat_init); + +void snat_free_outside_address_and_port (snat_main_t * sm, + snat_session_key_t * k, + u32 address_index) +{ + snat_address_t *a; + u16 port_host_byte_order = clib_net_to_host_u16 (k->port); + + ASSERT (address_index < vec_len (sm->addresses)); + + a = sm->addresses + address_index; + + switch (k->protocol) + { +#define _(N, i, n, s) \ + case SNAT_PROTOCOL_##N: \ + ASSERT (clib_bitmap_get_no_check (a->busy_##n##_port_bitmap, \ + port_host_byte_order) == 1); \ + clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, \ + port_host_byte_order, 0); \ + a->busy_##n##_ports--; \ + break; + foreach_snat_protocol +#undef _ + default: + clib_warning("unknown_protocol"); + return; + } +} + +/** + * @brief Match NAT44 static mapping. + * + * @param sm NAT main. + * @param match Address and port to match. + * @param mapping External or local address and port of the matched mapping. + * @param by_external If 0 match by local address otherwise match by external + * address. + * @param is_addr_only If matched mapping is address only + * + * @returns 0 if match found otherwise 1. + */ +int snat_static_mapping_match (snat_main_t * sm, + snat_session_key_t match, + snat_session_key_t * mapping, + u8 by_external, + u8 *is_addr_only) +{ + clib_bihash_kv_8_8_t kv, value; + snat_static_mapping_t *m; + snat_session_key_t m_key; + clib_bihash_8_8_t *mapping_hash = &sm->static_mapping_by_local; + + if (by_external) + mapping_hash = &sm->static_mapping_by_external; + + m_key.addr = match.addr; + m_key.port = clib_net_to_host_u16 (match.port); + m_key.protocol = match.protocol; + m_key.fib_index = match.fib_index; + + kv.key = m_key.as_u64; + + if (clib_bihash_search_8_8 (mapping_hash, &kv, &value)) + { + /* Try address only mapping */ + m_key.port = 0; + m_key.protocol = 0; + kv.key = m_key.as_u64; + if (clib_bihash_search_8_8 (mapping_hash, &kv, &value)) + return 1; + } + + m = pool_elt_at_index (sm->static_mappings, value.value); + + if (by_external) + { + mapping->addr = m->local_addr; + /* Address only mapping doesn't change port */ + mapping->port = m->addr_only ? match.port + : clib_host_to_net_u16 (m->local_port); + mapping->fib_index = m->fib_index; + } + else + { + mapping->addr = m->external_addr; + /* Address only mapping doesn't change port */ + mapping->port = m->addr_only ? match.port + : clib_host_to_net_u16 (m->external_port); + mapping->fib_index = sm->outside_fib_index; + } + + if (PREDICT_FALSE(is_addr_only != 0)) + *is_addr_only = m->addr_only; + + return 0; +} + +static_always_inline u16 +snat_random_port (snat_main_t * sm, u16 min, u16 max) +{ + return min + random_u32 (&sm->random_seed) / + (random_u32_max() / (max - min + 1) + 1); +} + +int snat_alloc_outside_address_and_port (snat_main_t * sm, + u32 fib_index, + u32 thread_index, + snat_session_key_t * k, + u32 * address_indexp) +{ + int i; + snat_address_t *a; + u32 portnum; + + for (i = 0; i < vec_len (sm->addresses); i++) + { + a = sm->addresses + i; + if (sm->vrf_mode && a->fib_index != ~0 && a->fib_index != fib_index) + continue; + switch (k->protocol) + { +#define _(N, j, n, s) \ + case SNAT_PROTOCOL_##N: \ + if (a->busy_##n##_ports < (sm->port_per_thread * sm->num_snat_thread)) \ + { \ + while (1) \ + { \ + portnum = (sm->port_per_thread * \ + sm->per_thread_data[thread_index].snat_thread_index) + \ + snat_random_port(sm, 0, sm->port_per_thread) + 1024; \ + if (clib_bitmap_get_no_check (a->busy_##n##_port_bitmap, portnum)) \ + continue; \ + clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, portnum, 1); \ + a->busy_##n##_ports++; \ + k->addr = a->addr; \ + k->port = clib_host_to_net_u16(portnum); \ + *address_indexp = i; \ + return 0; \ + } \ + } \ + break; + foreach_snat_protocol +#undef _ + default: + clib_warning("unknown protocol"); + return 1; + } + + } + /* Totally out of translations to use... */ + snat_ipfix_logging_addresses_exhausted(0); + return 1; +} + + +static clib_error_t * +add_address_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + snat_main_t * sm = &snat_main; + ip4_address_t start_addr, end_addr, this_addr; + u32 start_host_order, end_host_order; + u32 vrf_id = ~0; + int i, count; + int is_add = 1; + int rv = 0; + clib_error_t *error = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "%U - %U", + unformat_ip4_address, &start_addr, + unformat_ip4_address, &end_addr)) + ; + else if (unformat (line_input, "tenant-vrf %u", &vrf_id)) + ; + else if (unformat (line_input, "%U", unformat_ip4_address, &start_addr)) + end_addr = start_addr; + else if (unformat (line_input, "del")) + is_add = 0; + else + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (sm->static_mapping_only) + { + error = clib_error_return (0, "static mapping only mode"); + goto done; + } + + start_host_order = clib_host_to_net_u32 (start_addr.as_u32); + end_host_order = clib_host_to_net_u32 (end_addr.as_u32); + + if (end_host_order < start_host_order) + { + error = clib_error_return (0, "end address less than start address"); + goto done; + } + + count = (end_host_order - start_host_order) + 1; + + if (count > 1024) + clib_warning ("%U - %U, %d addresses...", + format_ip4_address, &start_addr, + format_ip4_address, &end_addr, + count); + + this_addr = start_addr; + + for (i = 0; i < count; i++) + { + if (is_add) + snat_add_address (sm, &this_addr, vrf_id); + else + rv = snat_del_address (sm, this_addr, 0); + + switch (rv) + { + case VNET_API_ERROR_NO_SUCH_ENTRY: + error = clib_error_return (0, "S-NAT address not exist."); + goto done; + case VNET_API_ERROR_UNSPECIFIED: + error = clib_error_return (0, "S-NAT address used in static mapping."); + goto done; + default: + break; + } + + increment_v4_address (&this_addr); + } + +done: + unformat_free (line_input); + + return error; +} + +VLIB_CLI_COMMAND (add_address_command, static) = { + .path = "nat44 add address", + .short_help = "nat44 add addresses [- ] " + "[tenant-vrf ] [del]", + .function = add_address_command_fn, +}; + +static clib_error_t * +snat_feature_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + vnet_main_t * vnm = vnet_get_main(); + clib_error_t * error = 0; + u32 sw_if_index; + u32 * inside_sw_if_indices = 0; + u32 * outside_sw_if_indices = 0; + u8 is_output_feature = 0; + int is_del = 0; + int i; + + sw_if_index = ~0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "in %U", unformat_vnet_sw_interface, + vnm, &sw_if_index)) + vec_add1 (inside_sw_if_indices, sw_if_index); + else if (unformat (line_input, "out %U", unformat_vnet_sw_interface, + vnm, &sw_if_index)) + vec_add1 (outside_sw_if_indices, sw_if_index); + else if (unformat (line_input, "output-feature")) + is_output_feature = 1; + else if (unformat (line_input, "del")) + is_del = 1; + else + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (vec_len (inside_sw_if_indices)) + { + for (i = 0; i < vec_len(inside_sw_if_indices); i++) + { + sw_if_index = inside_sw_if_indices[i]; + if (is_output_feature) + { + if (snat_interface_add_del_output_feature (sw_if_index, 1, is_del)) + { + error = clib_error_return (0, "%s %U failed", + is_del ? "del" : "add", + format_vnet_sw_interface_name, vnm, + vnet_get_sw_interface (vnm, + sw_if_index)); + goto done; + } + } + else + { + if (snat_interface_add_del (sw_if_index, 1, is_del)) + { + error = clib_error_return (0, "%s %U failed", + is_del ? "del" : "add", + format_vnet_sw_interface_name, vnm, + vnet_get_sw_interface (vnm, + sw_if_index)); + goto done; + } + } + } + } + + if (vec_len (outside_sw_if_indices)) + { + for (i = 0; i < vec_len(outside_sw_if_indices); i++) + { + sw_if_index = outside_sw_if_indices[i]; + if (is_output_feature) + { + if (snat_interface_add_del_output_feature (sw_if_index, 0, is_del)) + { + error = clib_error_return (0, "%s %U failed", + is_del ? "del" : "add", + format_vnet_sw_interface_name, vnm, + vnet_get_sw_interface (vnm, + sw_if_index)); + goto done; + } + } + else + { + if (snat_interface_add_del (sw_if_index, 0, is_del)) + { + error = clib_error_return (0, "%s %U failed", + is_del ? "del" : "add", + format_vnet_sw_interface_name, vnm, + vnet_get_sw_interface (vnm, + sw_if_index)); + goto done; + } + } + } + } + +done: + unformat_free (line_input); + vec_free (inside_sw_if_indices); + vec_free (outside_sw_if_indices); + + return error; +} + +VLIB_CLI_COMMAND (set_interface_snat_command, static) = { + .path = "set interface nat44", + .function = snat_feature_command_fn, + .short_help = "set interface nat44 in out [output-feature] " + "[del]", +}; + +uword +unformat_snat_protocol (unformat_input_t * input, va_list * args) +{ + u32 *r = va_arg (*args, u32 *); + + if (0); +#define _(N, i, n, s) else if (unformat (input, s)) *r = SNAT_PROTOCOL_##N; + foreach_snat_protocol +#undef _ + else + return 0; + return 1; +} + +u8 * +format_snat_protocol (u8 * s, va_list * args) +{ + u32 i = va_arg (*args, u32); + u8 *t = 0; + + switch (i) + { +#define _(N, j, n, str) case SNAT_PROTOCOL_##N: t = (u8 *) str; break; + foreach_snat_protocol +#undef _ + default: + s = format (s, "unknown"); + return s; + } + s = format (s, "%s", t); + return s; +} + +static clib_error_t * +add_static_mapping_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + clib_error_t * error = 0; + ip4_address_t l_addr, e_addr; + u32 l_port = 0, e_port = 0, vrf_id = ~0; + int is_add = 1; + int addr_only = 1; + u32 sw_if_index = ~0; + vnet_main_t * vnm = vnet_get_main(); + int rv; + snat_protocol_t proto; + u8 proto_set = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "local %U %u", unformat_ip4_address, &l_addr, + &l_port)) + addr_only = 0; + else if (unformat (line_input, "local %U", unformat_ip4_address, &l_addr)) + ; + else if (unformat (line_input, "external %U %u", unformat_ip4_address, + &e_addr, &e_port)) + addr_only = 0; + else if (unformat (line_input, "external %U", unformat_ip4_address, + &e_addr)) + ; + else if (unformat (line_input, "external %U %u", + unformat_vnet_sw_interface, vnm, &sw_if_index, + &e_port)) + addr_only = 0; + + else if (unformat (line_input, "external %U", + unformat_vnet_sw_interface, vnm, &sw_if_index)) + ; + else if (unformat (line_input, "vrf %u", &vrf_id)) + ; + else if (unformat (line_input, "%U", unformat_snat_protocol, &proto)) + proto_set = 1; + else if (unformat (line_input, "del")) + is_add = 0; + else + { + error = clib_error_return (0, "unknown input: '%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (!addr_only && !proto_set) + { + error = clib_error_return (0, "missing protocol"); + goto done; + } + + rv = snat_add_static_mapping(l_addr, e_addr, (u16) l_port, (u16) e_port, + vrf_id, addr_only, sw_if_index, proto, is_add); + + switch (rv) + { + case VNET_API_ERROR_INVALID_VALUE: + error = clib_error_return (0, "External port already in use."); + goto done; + case VNET_API_ERROR_NO_SUCH_ENTRY: + if (is_add) + error = clib_error_return (0, "External addres must be allocated."); + else + error = clib_error_return (0, "Mapping not exist."); + goto done; + case VNET_API_ERROR_NO_SUCH_FIB: + error = clib_error_return (0, "No such VRF id."); + goto done; + case VNET_API_ERROR_VALUE_EXIST: + error = clib_error_return (0, "Mapping already exist."); + goto done; + default: + break; + } + +done: + unformat_free (line_input); + + return error; +} + +/*? + * @cliexpar + * @cliexstart{snat add static mapping} + * Static mapping allows hosts on the external network to initiate connection + * to to the local network host. + * To create static mapping between local host address 10.0.0.3 port 6303 and + * external address 4.4.4.4 port 3606 for TCP protocol use: + * vpp# nat44 add static mapping local tcp 10.0.0.3 6303 external 4.4.4.4 3606 + * If not runnig "static mapping only" NAT plugin mode use before: + * vpp# nat44 add address 4.4.4.4 + * To create static mapping between local and external address use: + * vpp# nat44 add static mapping local 10.0.0.3 external 4.4.4.4 + * @cliexend +?*/ +VLIB_CLI_COMMAND (add_static_mapping_command, static) = { + .path = "nat44 add static mapping", + .function = add_static_mapping_command_fn, + .short_help = + "nat44 add static mapping local tcp|udp|icmp [] external [] [vrf ] [del]", +}; + +static clib_error_t * +set_workers_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + uword *bitmap = 0; + int rv = 0; + clib_error_t *error = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "%U", unformat_bitmap_list, &bitmap)) + ; + else + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (bitmap == 0) + { + error = clib_error_return (0, "List of workers must be specified."); + goto done; + } + + rv = snat_set_workers(bitmap); + + clib_bitmap_free (bitmap); + + switch (rv) + { + case VNET_API_ERROR_INVALID_WORKER: + error = clib_error_return (0, "Invalid worker(s)."); + goto done; + case VNET_API_ERROR_FEATURE_DISABLED: + error = clib_error_return (0, + "Supported only if 2 or more workes available."); + goto done; + default: + break; + } + +done: + unformat_free (line_input); + + return error; +} + +/*? + * @cliexpar + * @cliexstart{set snat workers} + * Set NAT workers if 2 or more workers available, use: + * vpp# set snat workers 0-2,5 + * @cliexend +?*/ +VLIB_CLI_COMMAND (set_workers_command, static) = { + .path = "set nat workers", + .function = set_workers_command_fn, + .short_help = + "set nat workers ", +}; + +static clib_error_t * +snat_ipfix_logging_enable_disable_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + u32 domain_id = 0; + u32 src_port = 0; + u8 enable = 1; + int rv = 0; + clib_error_t *error = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "domain %d", &domain_id)) + ; + else if (unformat (line_input, "src-port %d", &src_port)) + ; + else if (unformat (line_input, "disable")) + enable = 0; + else + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + goto done; + } + } + + rv = snat_ipfix_logging_enable_disable (enable, domain_id, (u16) src_port); + + if (rv) + { + error = clib_error_return (0, "ipfix logging enable failed"); + goto done; + } + +done: + unformat_free (line_input); + + return error; +} + +/*? + * @cliexpar + * @cliexstart{snat ipfix logging} + * To enable NAT IPFIX logging use: + * vpp# nat ipfix logging + * To set IPFIX exporter use: + * vpp# set ipfix exporter collector 10.10.10.3 src 10.10.10.1 + * @cliexend +?*/ +VLIB_CLI_COMMAND (snat_ipfix_logging_enable_disable_command, static) = { + .path = "nat ipfix logging", + .function = snat_ipfix_logging_enable_disable_command_fn, + .short_help = "nat ipfix logging [domain ] [src-port ] [disable]", +}; + +static u32 +snat_get_worker_in2out_cb (ip4_header_t * ip0, u32 rx_fib_index0) +{ + snat_main_t *sm = &snat_main; + snat_user_key_t key0; + clib_bihash_kv_8_8_t kv0, value0; + u32 next_worker_index = 0; + + key0.addr = ip0->src_address; + key0.fib_index = rx_fib_index0; + + kv0.key = key0.as_u64; + + /* Ever heard of of the "user" before? */ + if (clib_bihash_search_8_8 (&sm->worker_by_in, &kv0, &value0)) + { + /* No, assign next available worker (RR) */ + next_worker_index = sm->first_worker_index; + if (vec_len (sm->workers)) + { + next_worker_index += + sm->workers[sm->next_worker++ % _vec_len (sm->workers)]; + } + + /* add non-traslated packets worker lookup */ + kv0.value = next_worker_index; + clib_bihash_add_del_8_8 (&sm->worker_by_in, &kv0, 1); + } + else + next_worker_index = value0.value; + + return next_worker_index; +} + +static u32 +snat_get_worker_out2in_cb (ip4_header_t * ip0, u32 rx_fib_index0) +{ + snat_main_t *sm = &snat_main; + snat_worker_key_t key0; + clib_bihash_kv_8_8_t kv0, value0; + udp_header_t * udp0; + u32 next_worker_index = 0; + + udp0 = ip4_next_header (ip0); + + key0.addr = ip0->dst_address; + key0.port = udp0->dst_port; + key0.fib_index = rx_fib_index0; + + if (PREDICT_FALSE(ip0->protocol == IP_PROTOCOL_ICMP)) + { + icmp46_header_t * icmp0 = (icmp46_header_t *) udp0; + icmp_echo_header_t *echo0 = (icmp_echo_header_t *)(icmp0+1); + key0.port = echo0->identifier; + } + + kv0.key = key0.as_u64; + + /* Ever heard of of the "user" before? */ + if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0)) + { + key0.port = 0; + kv0.key = key0.as_u64; + + if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0)) + { + /* No, assign next available worker (RR) */ + next_worker_index = sm->first_worker_index; + if (vec_len (sm->workers)) + { + next_worker_index += + sm->workers[sm->next_worker++ % _vec_len (sm->workers)]; + } + } + else + { + /* Static mapping without port */ + next_worker_index = value0.value; + } + + /* Add to translated packets worker lookup */ + kv0.value = next_worker_index; + clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv0, 1); + } + else + next_worker_index = value0.value; + + return next_worker_index; +} + +static clib_error_t * +snat_config (vlib_main_t * vm, unformat_input_t * input) +{ + snat_main_t * sm = &snat_main; + u32 translation_buckets = 1024; + u32 translation_memory_size = 128<<20; + u32 user_buckets = 128; + u32 user_memory_size = 64<<20; + u32 max_translations_per_user = 100; + u32 outside_vrf_id = 0; + u32 inside_vrf_id = 0; + u32 static_mapping_buckets = 1024; + u32 static_mapping_memory_size = 64<<20; + u8 static_mapping_only = 0; + u8 static_mapping_connection_tracking = 0; + + sm->deterministic = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "translation hash buckets %d", &translation_buckets)) + ; + else if (unformat (input, "translation hash memory %d", + &translation_memory_size)); + else if (unformat (input, "user hash buckets %d", &user_buckets)) + ; + else if (unformat (input, "user hash memory %d", + &user_memory_size)) + ; + else if (unformat (input, "max translations per user %d", + &max_translations_per_user)) + ; + else if (unformat (input, "outside VRF id %d", + &outside_vrf_id)) + ; + else if (unformat (input, "inside VRF id %d", + &inside_vrf_id)) + ; + else if (unformat (input, "static mapping only")) + { + static_mapping_only = 1; + if (unformat (input, "connection tracking")) + static_mapping_connection_tracking = 1; + } + else if (unformat (input, "deterministic")) + sm->deterministic = 1; + else + return clib_error_return (0, "unknown input '%U'", + format_unformat_error, input); + } + + /* for show commands, etc. */ + sm->translation_buckets = translation_buckets; + sm->translation_memory_size = translation_memory_size; + sm->user_buckets = user_buckets; + sm->user_memory_size = user_memory_size; + sm->max_translations_per_user = max_translations_per_user; + sm->outside_vrf_id = outside_vrf_id; + sm->outside_fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, + outside_vrf_id); + sm->inside_vrf_id = inside_vrf_id; + sm->inside_fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, + inside_vrf_id); + sm->static_mapping_only = static_mapping_only; + sm->static_mapping_connection_tracking = static_mapping_connection_tracking; + + if (sm->deterministic) + { + sm->in2out_node_index = snat_det_in2out_node.index; + sm->in2out_output_node_index = ~0; + sm->out2in_node_index = snat_det_out2in_node.index; + sm->icmp_match_in2out_cb = icmp_match_in2out_det; + sm->icmp_match_out2in_cb = icmp_match_out2in_det; + } + else + { + sm->worker_in2out_cb = snat_get_worker_in2out_cb; + sm->worker_out2in_cb = snat_get_worker_out2in_cb; + sm->in2out_node_index = snat_in2out_node.index; + sm->in2out_output_node_index = snat_in2out_output_node.index; + sm->out2in_node_index = snat_out2in_node.index; + if (!static_mapping_only || + (static_mapping_only && static_mapping_connection_tracking)) + { + sm->icmp_match_in2out_cb = icmp_match_in2out_slow; + sm->icmp_match_out2in_cb = icmp_match_out2in_slow; + + clib_bihash_init_8_8 (&sm->worker_by_in, "worker-by-in", user_buckets, + user_memory_size); + + clib_bihash_init_8_8 (&sm->worker_by_out, "worker-by-out", user_buckets, + user_memory_size); + + clib_bihash_init_8_8 (&sm->in2out, "in2out", translation_buckets, + translation_memory_size); + + clib_bihash_init_8_8 (&sm->out2in, "out2in", translation_buckets, + translation_memory_size); + + clib_bihash_init_8_8 (&sm->user_hash, "users", user_buckets, + user_memory_size); + + clib_bihash_init_16_8 (&sm->in2out_unk_proto, "in2out-unk-proto", + translation_buckets, translation_memory_size); + + clib_bihash_init_16_8 (&sm->out2in_unk_proto, "out2in-unk-proto", + translation_buckets, translation_memory_size); + } + else + { + sm->icmp_match_in2out_cb = icmp_match_in2out_fast; + sm->icmp_match_out2in_cb = icmp_match_out2in_fast; + } + clib_bihash_init_8_8 (&sm->static_mapping_by_local, + "static_mapping_by_local", static_mapping_buckets, + static_mapping_memory_size); + + clib_bihash_init_8_8 (&sm->static_mapping_by_external, + "static_mapping_by_external", static_mapping_buckets, + static_mapping_memory_size); + } + + return 0; +} + +VLIB_CONFIG_FUNCTION (snat_config, "nat"); + +u8 * format_snat_session_state (u8 * s, va_list * args) +{ + u32 i = va_arg (*args, u32); + u8 *t = 0; + + switch (i) + { +#define _(v, N, str) case SNAT_SESSION_##N: t = (u8 *) str; break; + foreach_snat_session_state +#undef _ + default: + t = format (t, "unknown"); + } + s = format (s, "%s", t); + return s; +} + +u8 * format_snat_key (u8 * s, va_list * args) +{ + snat_session_key_t * key = va_arg (*args, snat_session_key_t *); + char * protocol_string = "unknown"; + static char *protocol_strings[] = { + "UDP", + "TCP", + "ICMP", + }; + + if (key->protocol < ARRAY_LEN(protocol_strings)) + protocol_string = protocol_strings[key->protocol]; + + s = format (s, "%U proto %s port %d fib %d", + format_ip4_address, &key->addr, protocol_string, + clib_net_to_host_u16 (key->port), key->fib_index); + return s; +} + +u8 * format_snat_session (u8 * s, va_list * args) +{ + snat_main_t * sm __attribute__((unused)) = va_arg (*args, snat_main_t *); + snat_session_t * sess = va_arg (*args, snat_session_t *); + + if (snat_is_unk_proto_session (sess)) + { + s = format (s, " i2o %U proto %u fib %u\n", + format_ip4_address, &sess->in2out.addr, sess->in2out.port, + sess->in2out.fib_index); + s = format (s, " o2i %U proto %u fib %u\n", + format_ip4_address, &sess->out2in.addr, sess->out2in.port, + sess->out2in.fib_index); + } + else + { + s = format (s, " i2o %U\n", format_snat_key, &sess->in2out); + s = format (s, " o2i %U\n", format_snat_key, &sess->out2in); + } + s = format (s, " last heard %.2f\n", sess->last_heard); + s = format (s, " total pkts %d, total bytes %lld\n", + sess->total_pkts, sess->total_bytes); + if (snat_is_session_static (sess)) + s = format (s, " static translation\n"); + else + s = format (s, " dynamic translation\n"); + + return s; +} + +u8 * format_snat_user (u8 * s, va_list * args) +{ + snat_main_per_thread_data_t * sm = va_arg (*args, snat_main_per_thread_data_t *); + snat_user_t * u = va_arg (*args, snat_user_t *); + int verbose = va_arg (*args, int); + dlist_elt_t * head, * elt; + u32 elt_index, head_index; + u32 session_index; + snat_session_t * sess; + + s = format (s, "%U: %d dynamic translations, %d static translations\n", + format_ip4_address, &u->addr, u->nsessions, u->nstaticsessions); + + if (verbose == 0) + return s; + + if (u->nsessions || u->nstaticsessions) + { + head_index = u->sessions_per_user_list_head_index; + head = pool_elt_at_index (sm->list_pool, head_index); + + elt_index = head->next; + elt = pool_elt_at_index (sm->list_pool, elt_index); + session_index = elt->value; + + while (session_index != ~0) + { + sess = pool_elt_at_index (sm->sessions, session_index); + + s = format (s, " %U\n", format_snat_session, sm, sess); + + elt_index = elt->next; + elt = pool_elt_at_index (sm->list_pool, elt_index); + session_index = elt->value; + } + } + + return s; +} + +u8 * format_snat_static_mapping (u8 * s, va_list * args) +{ + snat_static_mapping_t *m = va_arg (*args, snat_static_mapping_t *); + + if (m->addr_only) + s = format (s, "local %U external %U vrf %d", + format_ip4_address, &m->local_addr, + format_ip4_address, &m->external_addr, + m->vrf_id); + else + s = format (s, "%U local %U:%d external %U:%d vrf %d", + format_snat_protocol, m->proto, + format_ip4_address, &m->local_addr, m->local_port, + format_ip4_address, &m->external_addr, m->external_port, + m->vrf_id); + + return s; +} + +u8 * format_snat_static_map_to_resolve (u8 * s, va_list * args) +{ + snat_static_map_resolve_t *m = va_arg (*args, snat_static_map_resolve_t *); + vnet_main_t *vnm = vnet_get_main(); + + if (m->addr_only) + s = format (s, "local %U external %U vrf %d", + format_ip4_address, &m->l_addr, + format_vnet_sw_interface_name, vnm, + vnet_get_sw_interface (vnm, m->sw_if_index), + m->vrf_id); + else + s = format (s, "%U local %U:%d external %U:%d vrf %d", + format_snat_protocol, m->proto, + format_ip4_address, &m->l_addr, m->l_port, + format_vnet_sw_interface_name, vnm, + vnet_get_sw_interface (vnm, m->sw_if_index), m->e_port, + m->vrf_id); + + return s; +} + +u8 * format_det_map_ses (u8 * s, va_list * args) +{ + snat_det_map_t * det_map = va_arg (*args, snat_det_map_t *); + ip4_address_t in_addr, out_addr; + u32 in_offset, out_offset; + snat_det_session_t * ses = va_arg (*args, snat_det_session_t *); + u32 * i = va_arg (*args, u32 *); + + u32 user_index = *i / SNAT_DET_SES_PER_USER; + in_addr.as_u32 = clib_host_to_net_u32 ( + clib_net_to_host_u32(det_map->in_addr.as_u32) + user_index); + in_offset = clib_net_to_host_u32(in_addr.as_u32) - + clib_net_to_host_u32(det_map->in_addr.as_u32); + out_offset = in_offset / det_map->sharing_ratio; + out_addr.as_u32 = clib_host_to_net_u32( + clib_net_to_host_u32(det_map->out_addr.as_u32) + out_offset); + s = format (s, "in %U:%d out %U:%d external host %U:%d state: %U expire: %d\n", + format_ip4_address, &in_addr, + clib_net_to_host_u16 (ses->in_port), + format_ip4_address, &out_addr, + clib_net_to_host_u16 (ses->out.out_port), + format_ip4_address, &ses->out.ext_host_addr, + clib_net_to_host_u16 (ses->out.ext_host_port), + format_snat_session_state, ses->state, + ses->expire); + + return s; +} + +static clib_error_t * +show_snat_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + int verbose = 0; + snat_main_t * sm = &snat_main; + snat_user_t * u; + snat_static_mapping_t *m; + snat_interface_t *i; + snat_address_t * ap; + vnet_main_t *vnm = vnet_get_main(); + snat_main_per_thread_data_t *tsm; + u32 users_num = 0, sessions_num = 0, *worker, *sw_if_index; + uword j = 0; + snat_static_map_resolve_t *rp; + snat_det_map_t * dm; + snat_det_session_t * ses; + + if (unformat (input, "detail")) + verbose = 1; + else if (unformat (input, "verbose")) + verbose = 2; + + if (sm->static_mapping_only) + { + if (sm->static_mapping_connection_tracking) + vlib_cli_output (vm, "NAT plugin mode: static mapping only connection " + "tracking"); + else + vlib_cli_output (vm, "NAT plugin mode: static mapping only"); + } + else if (sm->deterministic) + { + vlib_cli_output (vm, "NAT plugin mode: deterministic mapping"); + } + else + { + vlib_cli_output (vm, "NAT plugin mode: dynamic translations enabled"); + } + + if (verbose > 0) + { + pool_foreach (i, sm->interfaces, + ({ + vlib_cli_output (vm, "%U %s", format_vnet_sw_interface_name, vnm, + vnet_get_sw_interface (vnm, i->sw_if_index), + i->is_inside ? "in" : "out"); + })); + + pool_foreach (i, sm->output_feature_interfaces, + ({ + vlib_cli_output (vm, "%U output-feature %s", + format_vnet_sw_interface_name, vnm, + vnet_get_sw_interface (vnm, i->sw_if_index), + i->is_inside ? "in" : "out"); + })); + + if (vec_len (sm->auto_add_sw_if_indices)) + { + vlib_cli_output (vm, "NAT44 pool addresses interfaces:"); + vec_foreach (sw_if_index, sm->auto_add_sw_if_indices) + { + vlib_cli_output (vm, "%U", format_vnet_sw_interface_name, vnm, + vnet_get_sw_interface (vnm, *sw_if_index)); + } + } + + vec_foreach (ap, sm->addresses) + { + vlib_cli_output (vm, "%U", format_ip4_address, &ap->addr); + if (ap->fib_index != ~0) + vlib_cli_output (vm, " tenant VRF: %u", + ip4_fib_get(ap->fib_index)->table_id); + else + vlib_cli_output (vm, " tenant VRF independent"); +#define _(N, i, n, s) \ + vlib_cli_output (vm, " %d busy %s ports", ap->busy_##n##_ports, s); + foreach_snat_protocol +#undef _ + } + } + + if (sm->num_workers > 1) + { + vlib_cli_output (vm, "%d workers", vec_len (sm->workers)); + if (verbose > 0) + { + vec_foreach (worker, sm->workers) + { + vlib_worker_thread_t *w = + vlib_worker_threads + *worker + sm->first_worker_index; + vlib_cli_output (vm, " %s", w->name); + } + } + } + + if (sm->deterministic) + { + vlib_cli_output (vm, "udp timeout: %dsec", sm->udp_timeout); + vlib_cli_output (vm, "tcp-established timeout: %dsec", + sm->tcp_established_timeout); + vlib_cli_output (vm, "tcp-transitory timeout: %dsec", + sm->tcp_transitory_timeout); + vlib_cli_output (vm, "icmp timeout: %dsec", sm->icmp_timeout); + vlib_cli_output (vm, "%d deterministic mappings", + pool_elts (sm->det_maps)); + if (verbose > 0) + { + pool_foreach (dm, sm->det_maps, + ({ + vlib_cli_output (vm, "in %U/%d out %U/%d\n", + format_ip4_address, &dm->in_addr, dm->in_plen, + format_ip4_address, &dm->out_addr, dm->out_plen); + vlib_cli_output (vm, " outside address sharing ratio: %d\n", + dm->sharing_ratio); + vlib_cli_output (vm, " number of ports per inside host: %d\n", + dm->ports_per_host); + vlib_cli_output (vm, " sessions number: %d\n", dm->ses_num); + if (verbose > 1) + { + vec_foreach_index (j, dm->sessions) + { + ses = vec_elt_at_index (dm->sessions, j); + if (ses->in_port) + vlib_cli_output (vm, " %U", format_det_map_ses, dm, ses, + &j); + } + } + })); + } + } + else + { + if (sm->static_mapping_only && !(sm->static_mapping_connection_tracking)) + { + vlib_cli_output (vm, "%d static mappings", + pool_elts (sm->static_mappings)); + + if (verbose > 0) + { + pool_foreach (m, sm->static_mappings, + ({ + vlib_cli_output (vm, "%U", format_snat_static_mapping, m); + })); + } + } + else + { + vec_foreach (tsm, sm->per_thread_data) + { + users_num += pool_elts (tsm->users); + sessions_num += pool_elts (tsm->sessions); + } + + vlib_cli_output (vm, "%d users, %d outside addresses, %d active sessions," + " %d static mappings", + users_num, + vec_len (sm->addresses), + sessions_num, + pool_elts (sm->static_mappings)); + + if (verbose > 0) + { + vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->in2out, + verbose - 1); + vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->out2in, + verbose - 1); + vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->worker_by_in, + verbose - 1); + vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->worker_by_out, + verbose - 1); + vec_foreach_index (j, sm->per_thread_data) + { + tsm = vec_elt_at_index (sm->per_thread_data, j); + + if (pool_elts (tsm->users) == 0) + continue; + + vlib_worker_thread_t *w = vlib_worker_threads + j; + vlib_cli_output (vm, "Thread %d (%s at lcore %u):", j, w->name, + w->lcore_id); + vlib_cli_output (vm, " %d list pool elements", + pool_elts (tsm->list_pool)); + + pool_foreach (u, tsm->users, + ({ + vlib_cli_output (vm, " %U", format_snat_user, tsm, u, + verbose - 1); + })); + } + + if (pool_elts (sm->static_mappings)) + { + vlib_cli_output (vm, "static mappings:"); + pool_foreach (m, sm->static_mappings, + ({ + vlib_cli_output (vm, "%U", format_snat_static_mapping, m); + })); + for (j = 0; j < vec_len (sm->to_resolve); j++) + { + rp = sm->to_resolve + j; + vlib_cli_output (vm, "%U", + format_snat_static_map_to_resolve, rp); + } + } + } + } + } + return 0; +} + +VLIB_CLI_COMMAND (show_snat_command, static) = { + .path = "show nat44", + .short_help = "show nat44", + .function = show_snat_command_fn, +}; + + +static void +snat_ip4_add_del_interface_address_cb (ip4_main_t * im, + uword opaque, + u32 sw_if_index, + ip4_address_t * address, + u32 address_length, + u32 if_address_index, + u32 is_delete) +{ + snat_main_t *sm = &snat_main; + snat_static_map_resolve_t *rp; + u32 *indices_to_delete = 0; + int i, j; + int rv; + + for (i = 0; i < vec_len(sm->auto_add_sw_if_indices); i++) + { + if (sw_if_index == sm->auto_add_sw_if_indices[i]) + { + if (!is_delete) + { + /* Don't trip over lease renewal, static config */ + for (j = 0; j < vec_len(sm->addresses); j++) + if (sm->addresses[j].addr.as_u32 == address->as_u32) + return; + + snat_add_address (sm, address, ~0); + /* Scan static map resolution vector */ + for (j = 0; j < vec_len (sm->to_resolve); j++) + { + rp = sm->to_resolve + j; + /* On this interface? */ + if (rp->sw_if_index == sw_if_index) + { + /* Add the static mapping */ + rv = snat_add_static_mapping (rp->l_addr, + address[0], + rp->l_port, + rp->e_port, + rp->vrf_id, + rp->addr_only, + ~0 /* sw_if_index */, + rp->proto, + rp->is_add); + if (rv) + clib_warning ("snat_add_static_mapping returned %d", + rv); + vec_add1 (indices_to_delete, j); + } + } + /* If we resolved any of the outstanding static mappings */ + if (vec_len(indices_to_delete)) + { + /* Delete them */ + for (j = vec_len(indices_to_delete)-1; j >= 0; j--) + vec_delete(sm->to_resolve, 1, j); + vec_free(indices_to_delete); + } + return; + } + else + { + (void) snat_del_address(sm, address[0], 1); + return; + } + } + } +} + + +int snat_add_interface_address (snat_main_t *sm, u32 sw_if_index, int is_del) +{ + ip4_main_t * ip4_main = sm->ip4_main; + ip4_address_t * first_int_addr; + snat_static_map_resolve_t *rp; + u32 *indices_to_delete = 0; + int i, j; + + first_int_addr = ip4_interface_first_address (ip4_main, sw_if_index, + 0 /* just want the address*/); + + for (i = 0; i < vec_len(sm->auto_add_sw_if_indices); i++) + { + if (sm->auto_add_sw_if_indices[i] == sw_if_index) + { + if (is_del) + { + /* if have address remove it */ + if (first_int_addr) + (void) snat_del_address (sm, first_int_addr[0], 1); + else + { + for (j = 0; j < vec_len (sm->to_resolve); j++) + { + rp = sm->to_resolve + j; + if (rp->sw_if_index == sw_if_index) + vec_add1 (indices_to_delete, j); + } + if (vec_len(indices_to_delete)) + { + for (j = vec_len(indices_to_delete)-1; j >= 0; j--) + vec_del1(sm->to_resolve, j); + vec_free(indices_to_delete); + } + } + vec_del1(sm->auto_add_sw_if_indices, i); + } + else + return VNET_API_ERROR_VALUE_EXIST; + + return 0; + } + } + + if (is_del) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + /* add to the auto-address list */ + vec_add1(sm->auto_add_sw_if_indices, sw_if_index); + + /* If the address is already bound - or static - add it now */ + if (first_int_addr) + snat_add_address (sm, first_int_addr, ~0); + + return 0; +} + +static clib_error_t * +snat_add_interface_address_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + snat_main_t *sm = &snat_main; + unformat_input_t _line_input, *line_input = &_line_input; + u32 sw_if_index; + int rv; + int is_del = 0; + clib_error_t *error = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "%U", unformat_vnet_sw_interface, + sm->vnet_main, &sw_if_index)) + ; + else if (unformat (line_input, "del")) + is_del = 1; + else + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + goto done; + } + } + + rv = snat_add_interface_address (sm, sw_if_index, is_del); + + switch (rv) + { + case 0: + break; + + default: + error = clib_error_return (0, "snat_add_interface_address returned %d", + rv); + goto done; + } + +done: + unformat_free (line_input); + + return error; +} + +VLIB_CLI_COMMAND (snat_add_interface_address_command, static) = { + .path = "nat44 add interface address", + .short_help = "nat44 add interface address [del]", + .function = snat_add_interface_address_command_fn, +}; + +static clib_error_t * +snat_det_map_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + snat_main_t *sm = &snat_main; + unformat_input_t _line_input, *line_input = &_line_input; + ip4_address_t in_addr, out_addr; + u32 in_plen, out_plen; + int is_add = 1, rv; + clib_error_t *error = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "in %U/%u", unformat_ip4_address, &in_addr, &in_plen)) + ; + else if (unformat (line_input, "out %U/%u", unformat_ip4_address, &out_addr, &out_plen)) + ; + else if (unformat (line_input, "del")) + is_add = 0; + else + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + goto done; + } + } + + unformat_free (line_input); + + rv = snat_det_add_map(sm, &in_addr, (u8) in_plen, &out_addr, (u8)out_plen, + is_add); + + if (rv) + { + error = clib_error_return (0, "snat_det_add_map return %d", rv); + goto done; + } + +done: + unformat_free (line_input); + + return error; +} + +/*? + * @cliexpar + * @cliexstart{snat deterministic add} + * Create bijective mapping of inside address to outside address and port range + * pairs, with the purpose of enabling deterministic NAT to reduce logging in + * CGN deployments. + * To create deterministic mapping between inside network 10.0.0.0/18 and + * outside network 1.1.1.0/30 use: + * # vpp# nat44 deterministic add in 10.0.0.0/18 out 1.1.1.0/30 + * @cliexend +?*/ +VLIB_CLI_COMMAND (snat_det_map_command, static) = { + .path = "nat44 deterministic add", + .short_help = "nat44 deterministic add in / out / [del]", + .function = snat_det_map_command_fn, +}; + +static clib_error_t * +snat_det_forward_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + snat_main_t *sm = &snat_main; + unformat_input_t _line_input, *line_input = &_line_input; + ip4_address_t in_addr, out_addr; + u16 lo_port; + snat_det_map_t * dm; + clib_error_t *error = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "%U", unformat_ip4_address, &in_addr)) + ; + else + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + goto done; + } + } + + unformat_free (line_input); + + dm = snat_det_map_by_user(sm, &in_addr); + if (!dm) + vlib_cli_output (vm, "no match"); + else + { + snat_det_forward (dm, &in_addr, &out_addr, &lo_port); + vlib_cli_output (vm, "%U:<%d-%d>", format_ip4_address, &out_addr, + lo_port, lo_port + dm->ports_per_host - 1); + } + +done: + unformat_free (line_input); + + return error; +} + +/*? + * @cliexpar + * @cliexstart{snat deterministic forward} + * Return outside address and port range from inside address for deterministic + * NAT. + * To obtain outside address and port of inside host use: + * vpp# nat44 deterministic forward 10.0.0.2 + * 1.1.1.0:<1054-1068> + * @cliexend +?*/ +VLIB_CLI_COMMAND (snat_det_forward_command, static) = { + .path = "nat44 deterministic forward", + .short_help = "nat44 deterministic forward ", + .function = snat_det_forward_command_fn, +}; + +static clib_error_t * +snat_det_reverse_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + snat_main_t *sm = &snat_main; + unformat_input_t _line_input, *line_input = &_line_input; + ip4_address_t in_addr, out_addr; + u32 out_port; + snat_det_map_t * dm; + clib_error_t *error = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "%U:%d", unformat_ip4_address, &out_addr, &out_port)) + ; + else + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + } + } + + unformat_free (line_input); + + if (out_port < 1024 || out_port > 65535) + { + error = clib_error_return (0, "wrong port, must be <1024-65535>"); + goto done; + } + + dm = snat_det_map_by_out(sm, &out_addr); + if (!dm) + vlib_cli_output (vm, "no match"); + else + { + snat_det_reverse (dm, &out_addr, (u16) out_port, &in_addr); + vlib_cli_output (vm, "%U", format_ip4_address, &in_addr); + } + +done: + unformat_free (line_input); + + return error; +} + +/*? + * @cliexpar + * @cliexstart{snat deterministic reverse} + * Return inside address from outside address and port for deterministic NAT. + * To obtain inside host address from outside address and port use: + * #vpp nat44 deterministic reverse 1.1.1.1:1276 + * 10.0.16.16 + * @cliexend +?*/ +VLIB_CLI_COMMAND (snat_det_reverse_command, static) = { + .path = "nat44 deterministic reverse", + .short_help = "nat44 deterministic reverse :", + .function = snat_det_reverse_command_fn, +}; + +static clib_error_t * +set_timeout_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + snat_main_t *sm = &snat_main; + unformat_input_t _line_input, *line_input = &_line_input; + clib_error_t *error = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "udp %u", &sm->udp_timeout)) + ; + else if (unformat (line_input, "tcp-established %u", + &sm->tcp_established_timeout)) + ; + else if (unformat (line_input, "tcp-transitory %u", + &sm->tcp_transitory_timeout)) + ; + else if (unformat (line_input, "icmp %u", &sm->icmp_timeout)) + ; + else if (unformat (line_input, "reset")) + { + sm->udp_timeout = SNAT_UDP_TIMEOUT; + sm->tcp_established_timeout = SNAT_TCP_ESTABLISHED_TIMEOUT; + sm->tcp_transitory_timeout = SNAT_TCP_TRANSITORY_TIMEOUT; + sm->icmp_timeout = SNAT_ICMP_TIMEOUT; + } + else + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + goto done; + } + } + + unformat_free (line_input); + +done: + unformat_free (line_input); + + return error; +} + +/*? + * @cliexpar + * @cliexstart{set snat deterministic timeout} + * Set values of timeouts for deterministic NAT (in seconds), use: + * vpp# set nat44 deterministic timeout udp 120 tcp-established 7500 + * tcp-transitory 250 icmp 90 + * To reset default values use: + * vpp# set nat44 deterministic timeout reset + * @cliexend +?*/ +VLIB_CLI_COMMAND (set_timeout_command, static) = { + .path = "set nat44 deterministic timeout", + .function = set_timeout_command_fn, + .short_help = + "set nat44 deterministic timeout [udp | tcp-established " + "tcp-transitory | icmp | reset]", +}; + +static clib_error_t * +snat_det_close_session_out_fn (vlib_main_t *vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + snat_main_t *sm = &snat_main; + unformat_input_t _line_input, *line_input = &_line_input; + ip4_address_t out_addr, ext_addr, in_addr; + u16 out_port, ext_port; + snat_det_map_t * dm; + snat_det_session_t * ses; + snat_det_out_key_t key; + clib_error_t *error = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "%U:%d %U:%d", + unformat_ip4_address, &out_addr, &out_port, + unformat_ip4_address, &ext_addr, &ext_port)) + ; + else + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + goto done; + } + } + + unformat_free (line_input); + + dm = snat_det_map_by_out(sm, &out_addr); + if (!dm) + vlib_cli_output (vm, "no match"); + else + { + snat_det_reverse(dm, &ext_addr, out_port, &in_addr); + key.ext_host_addr = out_addr; + key.ext_host_port = ntohs(ext_port); + key.out_port = ntohs(out_port); + ses = snat_det_get_ses_by_out(dm, &out_addr, key.as_u64); + if (!ses) + vlib_cli_output (vm, "no match"); + else + snat_det_ses_close(dm, ses); + } + +done: + unformat_free (line_input); + + return error; +} + +/*? + * @cliexpar + * @cliexstart{snat deterministic close session out} + * Close session using outside ip address and port + * and external ip address and port, use: + * vpp# nat44 deterministic close session out 1.1.1.1:1276 2.2.2.2:2387 + * @cliexend +?*/ +VLIB_CLI_COMMAND (snat_det_close_sesion_out_command, static) = { + .path = "nat44 deterministic close session out", + .short_help = "nat44 deterministic close session out " + ": :", + .function = snat_det_close_session_out_fn, +}; + +static clib_error_t * +snat_det_close_session_in_fn (vlib_main_t *vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + snat_main_t *sm = &snat_main; + unformat_input_t _line_input, *line_input = &_line_input; + ip4_address_t in_addr, ext_addr; + u16 in_port, ext_port; + snat_det_map_t * dm; + snat_det_session_t * ses; + snat_det_out_key_t key; + clib_error_t *error = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "%U:%d %U:%d", + unformat_ip4_address, &in_addr, &in_port, + unformat_ip4_address, &ext_addr, &ext_port)) + ; + else + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + goto done; + } + } + + unformat_free (line_input); + + dm = snat_det_map_by_user (sm, &in_addr); + if (!dm) + vlib_cli_output (vm, "no match"); + else + { + key.ext_host_addr = ext_addr; + key.ext_host_port = ntohs (ext_port); + ses = snat_det_find_ses_by_in (dm, &in_addr, ntohs(in_port), key); + if (!ses) + vlib_cli_output (vm, "no match"); + else + snat_det_ses_close(dm, ses); + } + +done: + unformat_free(line_input); + + return error; +} + +/*? + * @cliexpar + * @cliexstart{snat deterministic close_session_in} + * Close session using inside ip address and port + * and external ip address and port, use: + * vpp# nat44 deterministic close session in 3.3.3.3:3487 2.2.2.2:2387 + * @cliexend +?*/ +VLIB_CLI_COMMAND (snat_det_close_session_in_command, static) = { + .path = "nat44 deterministic close session in", + .short_help = "nat44 deterministic close session in " + ": :", + .function = snat_det_close_session_in_fn, +}; -- cgit 1.2.3-korg From 2bd3f8a525d5767ddcc19342167f78ad3c7d5997 Mon Sep 17 00:00:00 2001 From: Hongjun Ni Date: Tue, 29 Aug 2017 20:39:42 +0800 Subject: Fix some issue of comment and help for nat feature Change-Id: I9233e427b5f8874492fefb2bfe7c1ab329e366b2 Signed-off-by: Hongjun Ni --- src/plugins/nat/nat.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/plugins/nat/nat.c') diff --git a/src/plugins/nat/nat.c b/src/plugins/nat/nat.c index ac39be95..f9ecb943 100644 --- a/src/plugins/nat/nat.c +++ b/src/plugins/nat/nat.c @@ -1246,7 +1246,7 @@ done: VLIB_CLI_COMMAND (add_address_command, static) = { .path = "nat44 add address", - .short_help = "nat44 add addresses [- ] " + .short_help = "nat44 add address [- ] " "[tenant-vrf ] [del]", .function = add_address_command_fn, }; @@ -1503,7 +1503,7 @@ done: * to to the local network host. * To create static mapping between local host address 10.0.0.3 port 6303 and * external address 4.4.4.4 port 3606 for TCP protocol use: - * vpp# nat44 add static mapping local tcp 10.0.0.3 6303 external 4.4.4.4 3606 + * vpp# nat44 add static mapping tcp local 10.0.0.3 6303 external 4.4.4.4 3606 * If not runnig "static mapping only" NAT plugin mode use before: * vpp# nat44 add address 4.4.4.4 * To create static mapping between local and external address use: @@ -1514,7 +1514,7 @@ VLIB_CLI_COMMAND (add_static_mapping_command, static) = { .path = "nat44 add static mapping", .function = add_static_mapping_command_fn, .short_help = - "nat44 add static mapping local tcp|udp|icmp [] external [] [vrf ] [del]", + "nat44 add static mapping tcp|udp|icmp local [] external [] [vrf ] [del]", }; static clib_error_t * -- cgit 1.2.3-korg From 704018cf117b6667f08b09d6db5fbec105bf6d57 Mon Sep 17 00:00:00 2001 From: Matus Fabian Date: Mon, 4 Sep 2017 02:17:18 -0700 Subject: NAT: Destination NAT44 with load-balancing (VPP-954) added load-balancing static mappings with unequal load support Change-Id: Ie505e41f24d46f812b94dd28bdafe3dc170a6060 Signed-off-by: Matus Fabian --- src/plugins/nat/in2out.c | 253 ++++++++++++++++++++++++--- src/plugins/nat/nat.api | 33 ++++ src/plugins/nat/nat.c | 430 ++++++++++++++++++++++++++++++++++++++++++---- src/plugins/nat/nat.h | 23 ++- src/plugins/nat/nat_api.c | 137 ++++++++++++++- src/plugins/nat/out2in.c | 205 ++++++++++++++++++++-- test/test_nat.py | 101 +++++++++++ test/vpp_papi_provider.py | 30 ++++ 8 files changed, 1142 insertions(+), 70 deletions(-) (limited to 'src/plugins/nat/nat.c') diff --git a/src/plugins/nat/in2out.c b/src/plugins/nat/in2out.c index bb186393..c51d4fb4 100644 --- a/src/plugins/nat/in2out.c +++ b/src/plugins/nat/in2out.c @@ -314,23 +314,25 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0, if (snat_is_unk_proto_session (s)) { clib_bihash_kv_16_8_t up_kv; - snat_unk_proto_ses_key_t key; + nat_ed_ses_key_t key; /* Remove from lookup tables */ key.l_addr = s->in2out.addr; key.r_addr = s->ext_host_addr; key.fib_index = s->in2out.fib_index; key.proto = s->in2out.port; + key.rsvd = 0; + key.l_port = 0; up_kv.key[0] = key.as_u64[0]; up_kv.key[1] = key.as_u64[1]; - if (clib_bihash_add_del_16_8 (&sm->in2out_unk_proto, &up_kv, 0)) + if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &up_kv, 0)) clib_warning ("in2out key del failed"); key.l_addr = s->out2in.addr; key.fib_index = s->out2in.fib_index; up_kv.key[0] = key.as_u64[0]; up_kv.key[1] = key.as_u64[1]; - if (clib_bihash_add_del_16_8 (&sm->out2in_unk_proto, &up_kv, 0)) + if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &up_kv, 0)) clib_warning ("out2in key del failed"); } else @@ -1033,7 +1035,7 @@ snat_hairpinning_unknown_proto (snat_main_t *sm, u32 old_addr, new_addr = 0, ti = 0; clib_bihash_kv_8_8_t kv, value; clib_bihash_kv_16_8_t s_kv, s_value; - snat_unk_proto_ses_key_t key; + nat_ed_ses_key_t key; snat_session_key_t m_key; snat_worker_key_t w_key; snat_static_mapping_t *m; @@ -1045,10 +1047,11 @@ snat_hairpinning_unknown_proto (snat_main_t *sm, key.r_addr.as_u32 = ip->src_address.as_u32; key.fib_index = sm->outside_fib_index; key.proto = ip->protocol; - key.rsvd[0] = key.rsvd[1] = key.rsvd[2] = 0; + key.rsvd = 0; + key.l_port = 0; s_kv.key[0] = key.as_u64[0]; s_kv.key[1] = key.as_u64[1]; - if (clib_bihash_search_16_8 (&sm->out2in_unk_proto, &s_kv, &s_value)) + if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value)) { m_key.addr = ip->dst_address; m_key.fib_index = sm->outside_fib_index; @@ -1110,7 +1113,7 @@ snat_in2out_unknown_proto (snat_main_t *sm, snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; u32 elt_index, head_index, ses_index, oldest_index; snat_session_t * s; - snat_unk_proto_ses_key_t key; + nat_ed_ses_key_t key; u32 address_index = ~0; int i; u8 is_sm = 0; @@ -1121,11 +1124,12 @@ snat_in2out_unknown_proto (snat_main_t *sm, key.r_addr = ip->dst_address; key.fib_index = rx_fib_index; key.proto = ip->protocol; - key.rsvd[0] = key.rsvd[1] = key.rsvd[2] = 0; + key.rsvd = 0; + key.l_port = 0; s_kv.key[0] = key.as_u64[0]; s_kv.key[1] = key.as_u64[1]; - if (!clib_bihash_search_16_8 (&sm->in2out_unk_proto, &s_kv, &s_value)) + if (!clib_bihash_search_16_8 (&sm->in2out_ed, &s_kv, &s_value)) { s = pool_elt_at_index (tsm->sessions, s_value.value); new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32; @@ -1202,7 +1206,7 @@ snat_in2out_unknown_proto (snat_main_t *sm, key.l_addr.as_u32 = new_addr; s_kv.key[0] = key.as_u64[0]; s_kv.key[1] = key.as_u64[1]; - if (clib_bihash_search_16_8 (&sm->out2in_unk_proto, &s_kv, &s_value)) + if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value)) break; goto create_ses; @@ -1215,7 +1219,7 @@ snat_in2out_unknown_proto (snat_main_t *sm, key.l_addr.as_u32 = sm->addresses[i].addr.as_u32; s_kv.key[0] = key.as_u64[0]; s_kv.key[1] = key.as_u64[1]; - if (clib_bihash_search_16_8 (&sm->out2in_unk_proto, &s_kv, &s_value)) + if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value)) { new_addr = ip->src_address.as_u32 = key.l_addr.as_u32; address_index = i; @@ -1259,14 +1263,14 @@ create_ses: key.proto = s->in2out.port; s_kv.key[0] = key.as_u64[0]; s_kv.key[1] = key.as_u64[1]; - if (clib_bihash_add_del_16_8 (&sm->in2out_unk_proto, &s_kv, 0)) + if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 0)) clib_warning ("in2out key del failed"); key.l_addr = s->out2in.addr; key.fib_index = s->out2in.fib_index; s_kv.key[0] = key.as_u64[0]; s_kv.key[1] = key.as_u64[1]; - if (clib_bihash_add_del_16_8 (&sm->out2in_unk_proto, &s_kv, 0)) + if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 0)) clib_warning ("out2in key del failed"); } else @@ -1333,14 +1337,14 @@ create_ses: s_kv.key[0] = key.as_u64[0]; s_kv.key[1] = key.as_u64[1]; s_kv.value = s - tsm->sessions; - if (clib_bihash_add_del_16_8 (&sm->in2out_unk_proto, &s_kv, 1)) + if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 1)) clib_warning ("in2out key add failed"); key.l_addr.as_u32 = new_addr; key.fib_index = sm->outside_fib_index; s_kv.key[0] = key.as_u64[0]; s_kv.key[1] = key.as_u64[1]; - if (clib_bihash_add_del_16_8 (&sm->out2in_unk_proto, &s_kv, 1)) + if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1)) clib_warning ("out2in key add failed"); } @@ -1366,6 +1370,153 @@ create_ses: vnet_buffer(b)->sw_if_index[VLIB_TX] = sm->outside_fib_index; } +static void +snat_in2out_lb (snat_main_t *sm, + vlib_buffer_t * b, + ip4_header_t * ip, + u32 rx_fib_index, + u32 thread_index, + f64 now, + vlib_main_t * vm) +{ + nat_ed_ses_key_t key; + clib_bihash_kv_16_8_t s_kv, s_value; + udp_header_t *udp = ip4_next_header (ip); + tcp_header_t *tcp = (tcp_header_t *) udp; + snat_session_t *s = 0; + snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; + u32 old_addr, new_addr; + u16 new_port, old_port; + ip_csum_t sum; + u32 proto = ip_proto_to_snat_proto (ip->protocol); + snat_session_key_t e_key, l_key; + clib_bihash_kv_8_8_t kv, value; + snat_user_key_t u_key; + snat_user_t *u; + dlist_elt_t *head, *elt; + + old_addr = ip->src_address.as_u32; + + key.l_addr = ip->src_address; + key.r_addr = ip->dst_address; + key.fib_index = rx_fib_index; + key.proto = ip->protocol; + key.rsvd = 0; + key.l_port = udp->src_port; + s_kv.key[0] = key.as_u64[0]; + s_kv.key[1] = key.as_u64[1]; + + if (!clib_bihash_search_16_8 (&sm->in2out_ed, &s_kv, &s_value)) + { + s = pool_elt_at_index (tsm->sessions, s_value.value); + } + else + { + l_key.addr = ip->src_address; + l_key.port = udp->src_port; + l_key.protocol = proto; + l_key.fib_index = rx_fib_index; + if (snat_static_mapping_match(sm, l_key, &e_key, 0, 0)) + return; + + u_key.addr = ip->src_address; + u_key.fib_index = rx_fib_index; + kv.key = u_key.as_u64; + + /* Ever heard of the "user" = src ip4 address before? */ + if (clib_bihash_search_8_8 (&sm->user_hash, &kv, &value)) + { + /* no, make a new one */ + pool_get (tsm->users, u); + memset (u, 0, sizeof (*u)); + u->addr = ip->src_address; + u->fib_index = rx_fib_index; + + pool_get (tsm->list_pool, head); + u->sessions_per_user_list_head_index = head - tsm->list_pool; + + clib_dlist_init (tsm->list_pool, + u->sessions_per_user_list_head_index); + + kv.value = u - tsm->users; + + /* add user */ + if (clib_bihash_add_del_8_8 (&sm->user_hash, &kv, 1)) + clib_warning ("user key add failed"); + } + else + { + u = pool_elt_at_index (tsm->users, value.value); + } + + /* Create a new session */ + pool_get (tsm->sessions, s); + memset (s, 0, sizeof (*s)); + + s->ext_host_addr.as_u32 = ip->dst_address.as_u32; + s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING; + s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING; + s->outside_address_index = ~0; + s->in2out = l_key; + s->out2in = e_key; + u->nstaticsessions++; + + /* Create list elts */ + pool_get (tsm->list_pool, elt); + clib_dlist_init (tsm->list_pool, elt - tsm->list_pool); + elt->value = s - tsm->sessions; + s->per_user_index = elt - tsm->list_pool; + s->per_user_list_head_index = u->sessions_per_user_list_head_index; + clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index, + s->per_user_index); + + /* Add to lookup tables */ + s_kv.value = s - tsm->sessions; + if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 1)) + clib_warning ("in2out-ed key add failed"); + + key.l_addr = e_key.addr; + key.fib_index = e_key.fib_index; + key.l_port = e_key.port; + s_kv.key[0] = key.as_u64[0]; + s_kv.key[1] = key.as_u64[1]; + if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1)) + clib_warning ("out2in-ed key add failed"); + } + + new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32; + + /* Update IP checksum */ + sum = ip->checksum; + sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address); + ip->checksum = ip_csum_fold (sum); + + if (PREDICT_TRUE(proto == SNAT_PROTOCOL_TCP)) + { + old_port = tcp->src_port; + tcp->src_port = s->out2in.port; + new_port = tcp->src_port; + + sum = tcp->checksum; + sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address); + sum = ip_csum_update (sum, old_port, new_port, ip4_header_t, length); + tcp->checksum = ip_csum_fold(sum); + } + else + { + udp->src_port = s->out2in.port; + udp->checksum = 0; + } + + if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0) + vnet_buffer(b)->sw_if_index[VLIB_TX] = sm->outside_fib_index; + + /* Accounting */ + s->last_heard = now; + s->total_pkts++; + s->total_bytes += vlib_buffer_length_in_chain (vm, b); +} + static inline uword snat_in2out_node_fn_inline (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -1521,8 +1672,28 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, } } else - s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions, - value0.value); + { + if (PREDICT_FALSE (value0.value == ~0ULL)) + { + if (is_slow_path) + { + snat_in2out_lb(sm, b0, ip0, rx_fib_index0, thread_index, + now, vm); + goto trace00; + } + else + { + next0 = SNAT_IN2OUT_NEXT_SLOW_PATH; + goto trace00; + } + } + else + { + s0 = pool_elt_at_index ( + sm->per_thread_data[thread_index].sessions, + value0.value); + } + } old_addr0 = ip0->src_address.as_u32; ip0->src_address = s0->out2in.addr; @@ -1672,8 +1843,28 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, } } else - s1 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions, - value1.value); + { + if (PREDICT_FALSE (value1.value == ~0ULL)) + { + if (is_slow_path) + { + snat_in2out_lb(sm, b1, ip1, rx_fib_index1, thread_index, + now, vm); + goto trace01; + } + else + { + next1 = SNAT_IN2OUT_NEXT_SLOW_PATH; + goto trace01; + } + } + else + { + s1 = pool_elt_at_index ( + sm->per_thread_data[thread_index].sessions, + value1.value); + } + } old_addr1 = ip1->src_address.as_u32; ip1->src_address = s1->out2in.addr; @@ -1860,8 +2051,28 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, } } else - s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions, - value0.value); + { + if (PREDICT_FALSE (value0.value == ~0ULL)) + { + if (is_slow_path) + { + snat_in2out_lb(sm, b0, ip0, rx_fib_index0, thread_index, + now, vm); + goto trace0; + } + else + { + next0 = SNAT_IN2OUT_NEXT_SLOW_PATH; + goto trace0; + } + } + else + { + s0 = pool_elt_at_index ( + sm->per_thread_data[thread_index].sessions, + value0.value); + } + } old_addr0 = ip0->src_address.as_u32; ip0->src_address = s0->out2in.addr; diff --git a/src/plugins/nat/nat.api b/src/plugins/nat/nat.api index 7245cb07..d7a4a9ef 100644 --- a/src/plugins/nat/nat.api +++ b/src/plugins/nat/nat.api @@ -1025,6 +1025,39 @@ define nat44_user_session_details { u32 total_pkts; }; +typeonly manual_endian define nat44_lb_addr_port { + u8 addr[4]; + u16 port; + u8 probability; +}; + +autoreply manual_endian define nat44_add_del_lb_static_mapping { + u32 client_index; + u32 context; + u8 is_add; + u8 external_addr[4]; + u16 external_port; + u8 protocol; + u32 vrf_id; + u8 local_num; + vl_api_nat44_lb_addr_port_t locals[local_num]; +}; + +define nat44_lb_static_mapping_dump { + u32 client_index; + u32 context; +}; + +manual_endian define nat44_lb_static_mapping_details { + u32 context; + u8 external_addr[4]; + u16 external_port; + u8 protocol; + u32 vrf_id; + u8 local_num; + vl_api_nat44_lb_addr_port_t locals[local_num]; +}; + /* * Deterministic NAT (CGN) APIs */ diff --git a/src/plugins/nat/nat.c b/src/plugins/nat/nat.c index f9ecb943..fabd0bc2 100644 --- a/src/plugins/nat/nat.c +++ b/src/plugins/nat/nat.c @@ -506,15 +506,16 @@ int snat_add_static_mapping(ip4_address_t l_addr, ip4_address_t e_addr, if (snat_is_unk_proto_session (s)) { clib_bihash_kv_16_8_t up_kv; - snat_unk_proto_ses_key_t up_key; + nat_ed_ses_key_t up_key; up_key.l_addr = s->in2out.addr; up_key.r_addr = s->ext_host_addr; up_key.fib_index = s->in2out.fib_index; up_key.proto = s->in2out.port; - up_key.rsvd[0] = up_key.rsvd[1] = up_key.rsvd[2] = 0; + up_key.rsvd = 0; + up_key.l_port = 0; up_kv.key[0] = up_key.as_u64[0]; up_kv.key[1] = up_key.as_u64[1]; - if (clib_bihash_add_del_16_8 (&sm->in2out_unk_proto, + if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &up_kv, 0)) clib_warning ("in2out key del failed"); @@ -522,7 +523,7 @@ int snat_add_static_mapping(ip4_address_t l_addr, ip4_address_t e_addr, up_key.fib_index = s->out2in.fib_index; up_kv.key[0] = up_key.as_u64[0]; up_kv.key[1] = up_key.as_u64[1]; - if (clib_bihash_add_del_16_8 (&sm->out2in_unk_proto, + if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &up_kv, 0)) clib_warning ("out2in key del failed"); @@ -589,6 +590,243 @@ delete: return 0; } +int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port, + snat_protocol_t proto, u32 vrf_id, + nat44_lb_addr_port_t *locals, u8 is_add) +{ + snat_main_t * sm = &snat_main; + snat_static_mapping_t *m; + snat_session_key_t m_key; + clib_bihash_kv_8_8_t kv, value; + u32 fib_index; + snat_address_t *a = 0; + int i; + nat44_lb_addr_port_t *local; + snat_user_key_t w_key0; + snat_worker_key_t w_key1; + u32 worker_index = 0; + + m_key.addr = e_addr; + m_key.port = e_port; + m_key.protocol = proto; + m_key.fib_index = sm->outside_fib_index; + kv.key = m_key.as_u64; + if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value)) + m = 0; + else + m = pool_elt_at_index (sm->static_mappings, value.value); + + if (is_add) + { + if (m) + return VNET_API_ERROR_VALUE_EXIST; + + if (vec_len (locals) < 2) + return VNET_API_ERROR_INVALID_VALUE; + + fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, + vrf_id); + + /* Find external address in allocated addresses and reserve port for + address and port pair mapping when dynamic translations enabled */ + if (!sm->static_mapping_only) + { + for (i = 0; i < vec_len (sm->addresses); i++) + { + if (sm->addresses[i].addr.as_u32 == e_addr.as_u32) + { + a = sm->addresses + i; + /* External port must be unused */ + switch (proto) + { +#define _(N, j, n, s) \ + case SNAT_PROTOCOL_##N: \ + if (clib_bitmap_get_no_check (a->busy_##n##_port_bitmap, e_port)) \ + return VNET_API_ERROR_INVALID_VALUE; \ + clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, e_port, 1); \ + if (e_port > 1024) \ + a->busy_##n##_ports++; \ + break; + foreach_snat_protocol +#undef _ + default: + clib_warning("unknown_protocol"); + return VNET_API_ERROR_INVALID_VALUE_2; + } + break; + } + } + /* External address must be allocated */ + if (!a) + return VNET_API_ERROR_NO_SUCH_ENTRY; + } + + pool_get (sm->static_mappings, m); + memset (m, 0, sizeof (*m)); + m->external_addr = e_addr; + m->addr_only = 0; + m->vrf_id = vrf_id; + m->fib_index = fib_index; + m->external_port = e_port; + m->proto = proto; + + m_key.addr = m->external_addr; + m_key.port = m->external_port; + m_key.protocol = m->proto; + m_key.fib_index = sm->outside_fib_index; + kv.key = m_key.as_u64; + kv.value = m - sm->static_mappings; + if (clib_bihash_add_del_8_8(&sm->static_mapping_by_external, &kv, 1)) + { + clib_warning ("static_mapping_by_external key add failed"); + return VNET_API_ERROR_UNSPECIFIED; + } + m_key.port = clib_host_to_net_u16 (m->external_port); + kv.key = m_key.as_u64; + kv.value = ~0ULL; + if (clib_bihash_add_del_8_8(&sm->out2in, &kv, 1)) + { + clib_warning ("static_mapping_by_local key add failed"); + return VNET_API_ERROR_UNSPECIFIED; + } + + m_key.fib_index = m->fib_index; + + /* Assign worker */ + if (sm->workers) + { + w_key0.addr = locals[0].addr; + w_key0.fib_index = fib_index; + kv.key = w_key0.as_u64; + + if (clib_bihash_search_8_8 (&sm->worker_by_in, &kv, &value)) + worker_index = sm->first_worker_index + + sm->workers[sm->next_worker++ % vec_len (sm->workers)]; + else + worker_index = value.value; + + w_key1.addr = m->external_addr; + w_key1.port = clib_host_to_net_u16 (m->external_port); + w_key1.fib_index = sm->outside_fib_index; + kv.key = w_key1.as_u64; + kv.value = worker_index; + if (clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv, 1)) + { + clib_warning ("worker-by-out add key failed"); + return VNET_API_ERROR_UNSPECIFIED; + } + } + + for (i = 0; i < vec_len (locals); i++) + { + m_key.addr = locals[i].addr; + m_key.port = locals[i].port; + kv.key = m_key.as_u64; + kv.value = m - sm->static_mappings; + clib_bihash_add_del_8_8(&sm->static_mapping_by_local, &kv, 1); + locals[i].prefix = locals[i - 1].prefix + locals[i].probability; + vec_add1 (m->locals, locals[i]); + m_key.port = clib_host_to_net_u16 (locals[i].port); + kv.key = m_key.as_u64; + kv.value = ~0ULL; + if (clib_bihash_add_del_8_8(&sm->in2out, &kv, 1)) + { + clib_warning ("in2out key add failed"); + return VNET_API_ERROR_UNSPECIFIED; + } + /* Assign worker */ + if (sm->workers) + { + w_key0.addr = locals[i].addr; + w_key0.fib_index = fib_index; + kv.key = w_key0.as_u64; + kv.value = worker_index; + if (clib_bihash_add_del_8_8 (&sm->worker_by_in, &kv, 1)) + { + clib_warning ("worker-by-in key add failed"); + return VNET_API_ERROR_UNSPECIFIED; + } + } + } + } + else + { + if (!m) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + fib_table_unlock (m->fib_index, FIB_PROTOCOL_IP4); + + /* Free external address port */ + if (!sm->static_mapping_only) + { + for (i = 0; i < vec_len (sm->addresses); i++) + { + if (sm->addresses[i].addr.as_u32 == e_addr.as_u32) + { + a = sm->addresses + i; + switch (proto) + { +#define _(N, j, n, s) \ + case SNAT_PROTOCOL_##N: \ + clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, e_port, 0); \ + if (e_port > 1024) \ + a->busy_##n##_ports--; \ + break; + foreach_snat_protocol +#undef _ + default: + clib_warning("unknown_protocol"); + return VNET_API_ERROR_INVALID_VALUE_2; + } + break; + } + } + } + + m_key.addr = m->external_addr; + m_key.port = m->external_port; + m_key.protocol = m->proto; + m_key.fib_index = sm->outside_fib_index; + kv.key = m_key.as_u64; + if (clib_bihash_add_del_8_8(&sm->static_mapping_by_external, &kv, 0)) + { + clib_warning ("static_mapping_by_external key del failed"); + return VNET_API_ERROR_UNSPECIFIED; + } + m_key.port = clib_host_to_net_u16 (m->external_port); + kv.key = m_key.as_u64; + if (clib_bihash_add_del_8_8(&sm->out2in, &kv, 0)) + { + clib_warning ("outi2in key del failed"); + return VNET_API_ERROR_UNSPECIFIED; + } + + vec_foreach (local, m->locals) + { + m_key.addr = local->addr; + m_key.port = local->port; + m_key.fib_index = m->fib_index; + kv.key = m_key.as_u64; + if (clib_bihash_add_del_8_8(&sm->static_mapping_by_local, &kv, 0)) + { + clib_warning ("static_mapping_by_local key del failed"); + return VNET_API_ERROR_UNSPECIFIED; + } + m_key.port = clib_host_to_net_u16 (local->port); + kv.key = m_key.as_u64; + if (clib_bihash_add_del_8_8(&sm->in2out, &kv, 0)) + { + clib_warning ("in2out key del failed"); + return VNET_API_ERROR_UNSPECIFIED; + } + } + + pool_put (sm->static_mappings, m); + } + + return 0; +} + int snat_del_address (snat_main_t *sm, ip4_address_t addr, u8 delete_sm) { snat_address_t *a = 0; @@ -649,15 +887,16 @@ int snat_del_address (snat_main_t *sm, ip4_address_t addr, u8 delete_sm) if (snat_is_unk_proto_session (ses)) { clib_bihash_kv_16_8_t up_kv; - snat_unk_proto_ses_key_t up_key; + nat_ed_ses_key_t up_key; up_key.l_addr = ses->in2out.addr; up_key.r_addr = ses->ext_host_addr; up_key.fib_index = ses->in2out.fib_index; up_key.proto = ses->in2out.port; - up_key.rsvd[0] = up_key.rsvd[1] = up_key.rsvd[2] = 0; + up_key.rsvd = 0; + up_key.l_port = 0; up_kv.key[0] = up_key.as_u64[0]; up_kv.key[1] = up_key.as_u64[1]; - if (clib_bihash_add_del_16_8 (&sm->in2out_unk_proto, + if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &up_kv, 0)) clib_warning ("in2out key del failed"); @@ -665,7 +904,7 @@ int snat_del_address (snat_main_t *sm, ip4_address_t addr, u8 delete_sm) up_key.fib_index = ses->out2in.fib_index; up_kv.key[0] = up_key.as_u64[0]; up_kv.key[1] = up_key.as_u64[1]; - if (clib_bihash_add_del_16_8 (&sm->out2in_unk_proto, + if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &up_kv, 0)) clib_warning ("out2in key del failed"); } @@ -1048,6 +1287,7 @@ int snat_static_mapping_match (snat_main_t * sm, snat_static_mapping_t *m; snat_session_key_t m_key; clib_bihash_8_8_t *mapping_hash = &sm->static_mapping_by_local; + u32 rand, lo = 0, hi, mid; if (by_external) mapping_hash = &sm->static_mapping_by_external; @@ -1073,11 +1313,29 @@ int snat_static_mapping_match (snat_main_t * sm, if (by_external) { - mapping->addr = m->local_addr; - /* Address only mapping doesn't change port */ - mapping->port = m->addr_only ? match.port - : clib_host_to_net_u16 (m->local_port); + if (vec_len (m->locals)) + { + hi = vec_len (m->locals) - 1; + rand = 1 + (random_u32 (&sm->random_seed) % m->locals[hi].prefix); + while (lo < hi) + { + mid = ((hi - 1) >> 1) + lo; + (rand > m->locals[mid].prefix) ? (lo = mid + 1) : (hi = mid); + } + if (!(m->locals[lo].prefix >= rand)) + return 1; + mapping->addr = m->locals[lo].addr; + mapping->port = clib_host_to_net_u16 (m->locals[lo].port); + } + else + { + mapping->addr = m->local_addr; + /* Address only mapping doesn't change port */ + mapping->port = m->addr_only ? match.port + : clib_host_to_net_u16 (m->local_port); + } mapping->fib_index = m->fib_index; + mapping->protocol = m->proto; } else { @@ -1517,6 +1775,101 @@ VLIB_CLI_COMMAND (add_static_mapping_command, static) = { "nat44 add static mapping tcp|udp|icmp local [] external [] [vrf ] [del]", }; +static clib_error_t * +add_lb_static_mapping_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + clib_error_t * error = 0; + ip4_address_t l_addr, e_addr; + u32 l_port = 0, e_port = 0, vrf_id = 0, probability = 0; + int is_add = 1; + int rv; + snat_protocol_t proto; + u8 proto_set = 0; + nat44_lb_addr_port_t *locals = 0, local; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "local %U:%u probability %u", + unformat_ip4_address, &l_addr, &l_port, &probability)) + { + memset (&local, 0, sizeof (local)); + local.addr = l_addr; + local.port = (u16) l_port; + local.probability = (u8) probability; + vec_add1 (locals, local); + } + else if (unformat (line_input, "external %U:%u", unformat_ip4_address, + &e_addr, &e_port)) + ; + else if (unformat (line_input, "vrf %u", &vrf_id)) + ; + else if (unformat (line_input, "protocol %U", unformat_snat_protocol, + &proto)) + proto_set = 1; + else if (unformat (line_input, "del")) + is_add = 0; + else + { + error = clib_error_return (0, "unknown input: '%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (vec_len (locals) < 2) + { + error = clib_error_return (0, "at least two local must be set"); + goto done; + } + + if (!proto_set) + { + error = clib_error_return (0, "missing protocol"); + goto done; + } + + rv = nat44_add_del_lb_static_mapping (e_addr, (u16) e_port, proto, vrf_id, + locals, is_add); + + switch (rv) + { + case VNET_API_ERROR_INVALID_VALUE: + error = clib_error_return (0, "External port already in use."); + goto done; + case VNET_API_ERROR_NO_SUCH_ENTRY: + if (is_add) + error = clib_error_return (0, "External addres must be allocated."); + else + error = clib_error_return (0, "Mapping not exist."); + goto done; + case VNET_API_ERROR_VALUE_EXIST: + error = clib_error_return (0, "Mapping already exist."); + goto done; + default: + break; + } + +done: + unformat_free (line_input); + vec_free (locals); + + return error; +} + +VLIB_CLI_COMMAND (add_lb_static_mapping_command, static) = { + .path = "nat44 add load-balancing static mapping", + .function = add_lb_static_mapping_command_fn, + .short_help = + "nat44 add load-balancing static mapping protocol tcp|udp external : local : probability [vrf ] [del]", +}; + static clib_error_t * set_workers_command_fn (vlib_main_t * vm, unformat_input_t * input, @@ -1839,10 +2192,10 @@ snat_config (vlib_main_t * vm, unformat_input_t * input) clib_bihash_init_8_8 (&sm->user_hash, "users", user_buckets, user_memory_size); - clib_bihash_init_16_8 (&sm->in2out_unk_proto, "in2out-unk-proto", + clib_bihash_init_16_8 (&sm->in2out_ed, "in2out-ed", translation_buckets, translation_memory_size); - clib_bihash_init_16_8 (&sm->out2in_unk_proto, "out2in-unk-proto", + clib_bihash_init_16_8 (&sm->out2in_ed, "out2in-ed", translation_buckets, translation_memory_size); } else @@ -1884,18 +2237,10 @@ u8 * format_snat_session_state (u8 * s, va_list * args) u8 * format_snat_key (u8 * s, va_list * args) { snat_session_key_t * key = va_arg (*args, snat_session_key_t *); - char * protocol_string = "unknown"; - static char *protocol_strings[] = { - "UDP", - "TCP", - "ICMP", - }; - - if (key->protocol < ARRAY_LEN(protocol_strings)) - protocol_string = protocol_strings[key->protocol]; - s = format (s, "%U proto %s port %d fib %d", - format_ip4_address, &key->addr, protocol_string, + s = format (s, "%U proto %U port %d fib %d", + format_ip4_address, &key->addr, + format_snat_protocol, key->protocol, clib_net_to_host_u16 (key->port), key->fib_index); return s; } @@ -1919,6 +2264,9 @@ u8 * format_snat_session (u8 * s, va_list * args) s = format (s, " i2o %U\n", format_snat_key, &sess->in2out); s = format (s, " o2i %U\n", format_snat_key, &sess->out2in); } + if (sess->ext_host_addr.as_u32) + s = format (s, " external host %U\n", + format_ip4_address, &sess->ext_host_addr); s = format (s, " last heard %.2f\n", sess->last_heard); s = format (s, " total pkts %d, total bytes %lld\n", sess->total_pkts, sess->total_bytes); @@ -1926,6 +2274,8 @@ u8 * format_snat_session (u8 * s, va_list * args) s = format (s, " static translation\n"); else s = format (s, " dynamic translation\n"); + if (sess->flags & SNAT_SESSION_FLAG_LOAD_BALANCING) + s = format (s, " load-balancing\n"); return s; } @@ -1973,6 +2323,7 @@ u8 * format_snat_user (u8 * s, va_list * args) u8 * format_snat_static_mapping (u8 * s, va_list * args) { snat_static_mapping_t *m = va_arg (*args, snat_static_mapping_t *); + nat44_lb_addr_port_t *local; if (m->addr_only) s = format (s, "local %U external %U vrf %d", @@ -1980,12 +2331,25 @@ u8 * format_snat_static_mapping (u8 * s, va_list * args) format_ip4_address, &m->external_addr, m->vrf_id); else - s = format (s, "%U local %U:%d external %U:%d vrf %d", - format_snat_protocol, m->proto, - format_ip4_address, &m->local_addr, m->local_port, - format_ip4_address, &m->external_addr, m->external_port, - m->vrf_id); - + { + if (vec_len (m->locals)) + { + s = format (s, "%U vrf %d external %U:%d", + format_snat_protocol, m->proto, + m->vrf_id, + format_ip4_address, &m->external_addr, m->external_port); + vec_foreach (local, m->locals) + s = format (s, "\n local %U:%d probability %d\%", + format_ip4_address, &local->addr, local->port, + local->probability); + } + else + s = format (s, "%U local %U:%d external %U:%d vrf %d", + format_snat_protocol, m->proto, + format_ip4_address, &m->local_addr, m->local_port, + format_ip4_address, &m->external_addr, m->external_port, + m->vrf_id); + } return s; } @@ -2208,6 +2572,10 @@ show_snat_command_fn (vlib_main_t * vm, verbose - 1); vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->out2in, verbose - 1); + vlib_cli_output (vm, "%U", format_bihash_16_8, &sm->in2out_ed, + verbose - 1); + vlib_cli_output (vm, "%U", format_bihash_16_8, &sm->out2in_ed, + verbose - 1); vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->worker_by_in, verbose - 1); vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->worker_by_out, diff --git a/src/plugins/nat/nat.h b/src/plugins/nat/nat.h index 04c466dc..8935144d 100644 --- a/src/plugins/nat/nat.h +++ b/src/plugins/nat/nat.h @@ -62,12 +62,13 @@ typedef struct { ip4_address_t l_addr; ip4_address_t r_addr; u32 fib_index; + u16 l_port; u8 proto; - u8 rsvd[3]; + u8 rsvd; }; u64 as_u64[2]; }; -} snat_unk_proto_ses_key_t; +} nat_ed_ses_key_t; typedef struct { union @@ -139,6 +140,7 @@ typedef enum { #define SNAT_SESSION_FLAG_STATIC_MAPPING 1 #define SNAT_SESSION_FLAG_UNKNOWN_PROTO 2 +#define SNAT_SESSION_FLAG_LOAD_BALANCING 4 typedef CLIB_PACKED(struct { snat_session_key_t out2in; /* 0-15 */ @@ -205,6 +207,13 @@ typedef struct { snat_det_session_t * sessions; } snat_det_map_t; +typedef struct { + ip4_address_t addr; + u16 port; + u8 probability; + u8 prefix; +} nat44_lb_addr_port_t; + typedef struct { ip4_address_t local_addr; ip4_address_t external_addr; @@ -214,6 +223,7 @@ typedef struct { u32 vrf_id; u32 fib_index; snat_protocol_t proto; + nat44_lb_addr_port_t *locals; } snat_static_mapping_t; typedef struct { @@ -264,9 +274,9 @@ typedef struct snat_main_s { clib_bihash_8_8_t out2in; clib_bihash_8_8_t in2out; - /* Unknown protocol sessions lookup tables */ - clib_bihash_16_8_t out2in_unk_proto; - clib_bihash_16_8_t in2out_unk_proto; + /* Endpoint address dependent sessions lookup tables */ + clib_bihash_16_8_t out2in_ed; + clib_bihash_16_8_t in2out_ed; /* Find-a-user => src address lookup */ clib_bihash_8_8_t user_hash; @@ -496,6 +506,9 @@ int snat_interface_add_del_output_feature(u32 sw_if_index, u8 is_inside, int snat_add_interface_address(snat_main_t *sm, u32 sw_if_index, int is_del); uword unformat_snat_protocol(unformat_input_t * input, va_list * args); u8 * format_snat_protocol(u8 * s, va_list * args); +int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port, + snat_protocol_t proto, u32 vrf_id, + nat44_lb_addr_port_t *locals, u8 is_add); static_always_inline u8 icmp_is_error_message (icmp46_header_t * icmp) diff --git a/src/plugins/nat/nat_api.c b/src/plugins/nat/nat_api.c index 0a2141f2..fa20f2cc 100644 --- a/src/plugins/nat/nat_api.c +++ b/src/plugins/nat/nat_api.c @@ -27,6 +27,10 @@ #include #include +#define vl_api_nat44_lb_addr_port_t_endian vl_noop_handler +#define vl_api_nat44_add_del_lb_static_mapping_t_endian vl_noop_handler +#define vl_api_nat44_nat44_lb_static_mapping_details_t_endian vl_noop_handler + /* define message structures */ #define vl_typedefs #include @@ -465,7 +469,8 @@ static void /* *INDENT-OFF* */ pool_foreach (m, sm->static_mappings, ({ - send_snat_static_mapping_details (m, q, mp->context); + if (!vec_len(m->locals)) + send_snat_static_mapping_details (m, q, mp->context); })); /* *INDENT-ON* */ @@ -1888,7 +1893,8 @@ vl_api_nat44_static_mapping_dump_t_handler (vl_api_nat44_static_mapping_dump_t /* *INDENT-OFF* */ pool_foreach (m, sm->static_mappings, ({ - send_nat44_static_mapping_details (m, q, mp->context); + if (!vec_len(m->locals)) + send_nat44_static_mapping_details (m, q, mp->context); })); /* *INDENT-ON* */ @@ -2136,6 +2142,131 @@ vl_api_nat44_user_session_dump_t_print (vl_api_nat44_user_session_dump_t * mp, FINISH; } +static nat44_lb_addr_port_t * +unformat_nat44_lb_addr_port (vl_api_nat44_lb_addr_port_t * addr_port_pairs, + u8 addr_port_pair_num) +{ + u8 i; + nat44_lb_addr_port_t *lb_addr_port_pairs = 0, lb_addr_port; + vl_api_nat44_lb_addr_port_t *ap; + + for (i = 0; i < addr_port_pair_num; i++) + { + ap = &addr_port_pairs[i]; + memset (&lb_addr_port, 0, sizeof (lb_addr_port)); + clib_memcpy (&lb_addr_port.addr, ap->addr, 4); + lb_addr_port.port = clib_net_to_host_u16 (ap->port); + lb_addr_port.probability = ap->probability; + vec_add1 (lb_addr_port_pairs, lb_addr_port); + } + + return lb_addr_port_pairs; +} + +static void + vl_api_nat44_add_del_lb_static_mapping_t_handler + (vl_api_nat44_add_del_lb_static_mapping_t * mp) +{ + snat_main_t *sm = &snat_main; + vl_api_nat44_add_del_lb_static_mapping_reply_t *rmp; + int rv = 0; + nat44_lb_addr_port_t *locals = 0; + ip4_address_t e_addr; + snat_protocol_t proto; + + locals = unformat_nat44_lb_addr_port (mp->locals, mp->local_num); + clib_memcpy (&e_addr, mp->external_addr, 4); + proto = ip_proto_to_snat_proto (mp->protocol); + + rv = + nat44_add_del_lb_static_mapping (e_addr, + clib_net_to_host_u16 (mp->external_port), + proto, clib_net_to_host_u32 (mp->vrf_id), + locals, mp->is_add); + + vec_free (locals); + + REPLY_MACRO (VL_API_NAT44_ADD_DEL_LB_STATIC_MAPPING_REPLY); +} + +static void *vl_api_nat44_add_del_lb_static_mapping_t_print + (vl_api_nat44_add_del_lb_static_mapping_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: nat44_add_del_lb_static_mapping "); + s = format (s, "is_add %d\n", mp->is_add); + + FINISH; +} + +static void +send_nat44_lb_static_mapping_details (snat_static_mapping_t * m, + unix_shared_memory_queue_t * q, + u32 context) +{ + vl_api_nat44_lb_static_mapping_details_t *rmp; + snat_main_t *sm = &snat_main; + nat44_lb_addr_port_t *ap; + vl_api_nat44_lb_addr_port_t *locals; + + rmp = + vl_msg_api_alloc (sizeof (*rmp) + + (vec_len (m->locals) * sizeof (nat44_lb_addr_port_t))); + memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = + ntohs (VL_API_NAT44_LB_STATIC_MAPPING_DETAILS + sm->msg_id_base); + + clib_memcpy (rmp->external_addr, &(m->external_addr), 4); + rmp->external_port = ntohs (m->external_port); + rmp->protocol = snat_proto_to_ip_proto (m->proto); + rmp->vrf_id = ntohl (m->vrf_id); + rmp->context = context; + + locals = (vl_api_nat44_lb_addr_port_t *) rmp->locals; + vec_foreach (ap, m->locals) + { + clib_memcpy (locals->addr, &(ap->addr), 4); + locals->port = htons (ap->port); + locals->probability = ap->probability; + locals++; + rmp->local_num++; + } + + vl_msg_api_send_shmem (q, (u8 *) & rmp); +} + +static void + vl_api_nat44_lb_static_mapping_dump_t_handler + (vl_api_nat44_lb_static_mapping_dump_t * mp) +{ + unix_shared_memory_queue_t *q; + snat_main_t *sm = &snat_main; + snat_static_mapping_t *m; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + return; + + /* *INDENT-OFF* */ + pool_foreach (m, sm->static_mappings, + ({ + if (vec_len(m->locals)) + send_nat44_lb_static_mapping_details (m, q, mp->context); + })); + /* *INDENT-ON* */ +} + +static void *vl_api_nat44_lb_static_mapping_dump_t_print + (vl_api_nat44_lb_static_mapping_dump_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: nat44_lb_static_mapping_dump "); + + FINISH; +} + /*******************************/ /*** Deterministic NAT (CGN) ***/ /*******************************/ @@ -3159,6 +3290,8 @@ _(NAT44_INTERFACE_ADD_DEL_OUTPUT_FEATURE, \ nat44_interface_add_del_output_feature) \ _(NAT44_INTERFACE_OUTPUT_FEATURE_DUMP, \ nat44_interface_output_feature_dump) \ +_(NAT44_ADD_DEL_LB_STATIC_MAPPING, nat44_add_del_lb_static_mapping) \ +_(NAT44_LB_STATIC_MAPPING_DUMP, nat44_lb_static_mapping_dump) \ _(NAT_DET_ADD_DEL_MAP, nat_det_add_del_map) \ _(NAT_DET_FORWARD, nat_det_forward) \ _(NAT_DET_REVERSE, nat_det_reverse) \ diff --git a/src/plugins/nat/out2in.c b/src/plugins/nat/out2in.c index 67950066..55a750e4 100644 --- a/src/plugins/nat/out2in.c +++ b/src/plugins/nat/out2in.c @@ -630,7 +630,7 @@ snat_out2in_unknown_proto (snat_main_t *sm, snat_session_key_t m_key; u32 old_addr, new_addr; ip_csum_t sum; - snat_unk_proto_ses_key_t key; + nat_ed_ses_key_t key; snat_session_t * s; snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; snat_user_key_t u_key; @@ -643,11 +643,12 @@ snat_out2in_unknown_proto (snat_main_t *sm, key.r_addr = ip->src_address; key.fib_index = rx_fib_index; key.proto = ip->protocol; - key.rsvd[0] = key.rsvd[1] = key.rsvd[2] = 0; + key.rsvd = 0; + key.l_port = 0; s_kv.key[0] = key.as_u64[0]; s_kv.key[1] = key.as_u64[1]; - if (!clib_bihash_search_16_8 (&sm->out2in_unk_proto, &s_kv, &s_value)) + if (!clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value)) { s = pool_elt_at_index (tsm->sessions, s_value.value); new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32; @@ -721,14 +722,14 @@ snat_out2in_unknown_proto (snat_main_t *sm, /* Add to lookup tables */ s_kv.value = s - tsm->sessions; - if (clib_bihash_add_del_16_8 (&sm->out2in_unk_proto, &s_kv, 1)) + if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1)) clib_warning ("out2in key add failed"); key.l_addr = ip->dst_address; key.fib_index = m->fib_index; s_kv.key[0] = key.as_u64[0]; s_kv.key[1] = key.as_u64[1]; - if (clib_bihash_add_del_16_8 (&sm->in2out_unk_proto, &s_kv, 1)) + if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 1)) clib_warning ("in2out key add failed"); } @@ -749,6 +750,152 @@ snat_out2in_unknown_proto (snat_main_t *sm, s->per_user_index); } +static void +snat_out2in_lb (snat_main_t *sm, + vlib_buffer_t * b, + ip4_header_t * ip, + u32 rx_fib_index, + u32 thread_index, + f64 now, + vlib_main_t * vm) +{ + nat_ed_ses_key_t key; + clib_bihash_kv_16_8_t s_kv, s_value; + udp_header_t *udp = ip4_next_header (ip); + tcp_header_t *tcp = (tcp_header_t *) udp; + snat_session_t *s = 0; + snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; + snat_session_key_t e_key, l_key; + clib_bihash_kv_8_8_t kv, value; + u32 old_addr, new_addr; + u32 proto = ip_proto_to_snat_proto (ip->protocol); + u16 new_port, old_port; + ip_csum_t sum; + snat_user_key_t u_key; + snat_user_t *u; + dlist_elt_t *head, *elt; + + old_addr = ip->dst_address.as_u32; + + key.l_addr = ip->dst_address; + key.r_addr = ip->src_address; + key.fib_index = rx_fib_index; + key.proto = ip->protocol; + key.rsvd = 0; + key.l_port = udp->dst_port; + s_kv.key[0] = key.as_u64[0]; + s_kv.key[1] = key.as_u64[1]; + + if (!clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value)) + { + s = pool_elt_at_index (tsm->sessions, s_value.value); + } + else + { + e_key.addr = ip->dst_address; + e_key.port = udp->dst_port; + e_key.protocol = proto; + e_key.fib_index = rx_fib_index; + if (snat_static_mapping_match(sm, e_key, &l_key, 1, 0)) + return; + + u_key.addr = l_key.addr; + u_key.fib_index = l_key.fib_index; + kv.key = u_key.as_u64; + + /* Ever heard of the "user" = src ip4 address before? */ + if (clib_bihash_search_8_8 (&sm->user_hash, &kv, &value)) + { + /* no, make a new one */ + pool_get (tsm->users, u); + memset (u, 0, sizeof (*u)); + u->addr = l_key.addr; + u->fib_index = l_key.fib_index; + + pool_get (tsm->list_pool, head); + u->sessions_per_user_list_head_index = head - tsm->list_pool; + + clib_dlist_init (tsm->list_pool, + u->sessions_per_user_list_head_index); + + kv.value = u - tsm->users; + + /* add user */ + if (clib_bihash_add_del_8_8 (&sm->user_hash, &kv, 1)) + clib_warning ("user key add failed"); + } + else + { + u = pool_elt_at_index (tsm->users, value.value); + } + + /* Create a new session */ + pool_get (tsm->sessions, s); + memset (s, 0, sizeof (*s)); + + s->ext_host_addr.as_u32 = ip->src_address.as_u32; + s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING; + s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING; + s->outside_address_index = ~0; + s->out2in = e_key; + s->in2out = l_key; + u->nstaticsessions++; + + /* Create list elts */ + pool_get (tsm->list_pool, elt); + clib_dlist_init (tsm->list_pool, elt - tsm->list_pool); + elt->value = s - tsm->sessions; + s->per_user_index = elt - tsm->list_pool; + s->per_user_list_head_index = u->sessions_per_user_list_head_index; + clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index, + s->per_user_index); + + /* Add to lookup tables */ + s_kv.value = s - tsm->sessions; + if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1)) + clib_warning ("out2in-ed key add failed"); + + key.l_addr = l_key.addr; + key.fib_index = l_key.fib_index; + key.l_port = l_key.port; + s_kv.key[0] = key.as_u64[0]; + s_kv.key[1] = key.as_u64[1]; + if (clib_bihash_add_del_16_8 (&sm->in2out_ed, &s_kv, 1)) + clib_warning ("in2out-ed key add failed"); + } + + new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32; + + /* Update IP checksum */ + sum = ip->checksum; + sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address); + ip->checksum = ip_csum_fold (sum); + + if (PREDICT_TRUE(proto == SNAT_PROTOCOL_TCP)) + { + old_port = tcp->dst_port; + tcp->dst_port = s->in2out.port; + new_port = tcp->dst_port; + + sum = tcp->checksum; + sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address); + sum = ip_csum_update (sum, old_port, new_port, ip4_header_t, length); + tcp->checksum = ip_csum_fold(sum); + } + else + { + udp->dst_port = s->in2out.port; + udp->checksum = 0; + } + + vnet_buffer(b)->sw_if_index[VLIB_TX] = s->in2out.fib_index; + + /* Accounting */ + s->last_heard = now; + s->total_pkts++; + s->total_bytes += vlib_buffer_length_in_chain (vm, b); +} + static uword snat_out2in_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -894,8 +1041,20 @@ snat_out2in_node_fn (vlib_main_t * vm, } } else - s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions, - value0.value); + { + if (PREDICT_FALSE (value0.value == ~0ULL)) + { + snat_out2in_lb(sm, b0, ip0, rx_fib_index0, thread_index, now, + vm); + goto trace0; + } + else + { + s0 = pool_elt_at_index ( + sm->per_thread_data[thread_index].sessions, + value0.value); + } + } old_addr0 = ip0->dst_address.as_u32; ip0->dst_address = s0->in2out.addr; @@ -1033,8 +1192,20 @@ snat_out2in_node_fn (vlib_main_t * vm, } } else - s1 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions, - value1.value); + { + if (PREDICT_FALSE (value1.value == ~0ULL)) + { + snat_out2in_lb(sm, b1, ip1, rx_fib_index1, thread_index, now, + vm); + goto trace1; + } + else + { + s1 = pool_elt_at_index ( + sm->per_thread_data[thread_index].sessions, + value1.value); + } + } old_addr1 = ip1->dst_address.as_u32; ip1->dst_address = s1->in2out.addr; @@ -1209,8 +1380,20 @@ snat_out2in_node_fn (vlib_main_t * vm, } } else - s0 = pool_elt_at_index (sm->per_thread_data[thread_index].sessions, - value0.value); + { + if (PREDICT_FALSE (value0.value == ~0ULL)) + { + snat_out2in_lb(sm, b0, ip0, rx_fib_index0, thread_index, now, + vm); + goto trace00; + } + else + { + s0 = pool_elt_at_index ( + sm->per_thread_data[thread_index].sessions, + value0.value); + } + } old_addr0 = ip0->dst_address.as_u32; ip0->dst_address = s0->in2out.addr; diff --git a/test/test_nat.py b/test/test_nat.py index 0d622b08..de07019f 100644 --- a/test/test_nat.py +++ b/test/test_nat.py @@ -15,6 +15,7 @@ from scapy.packet import bind_layers from util import ppp from ipfix import IPFIX, Set, Template, Data, IPFIXDecoder from time import sleep +from util import ip4_range class MethodHolder(VppTestCase): @@ -633,6 +634,15 @@ class TestNAT44(MethodHolder): protocol=sm.protocol, is_add=0) + lb_static_mappings = self.vapi.nat44_lb_static_mapping_dump() + for lb_sm in lb_static_mappings: + self.vapi.nat44_add_del_lb_static_mapping( + lb_sm.external_addr, + lb_sm.external_port, + lb_sm.protocol, + lb_sm.vrf_id, + is_add=0) + adresses = self.vapi.nat44_address_dump() for addr in adresses: self.vapi.nat44_add_del_address_range(addr.ip_address, @@ -1037,6 +1047,97 @@ class TestNAT44(MethodHolder): self.pg_start() self.pg3.assert_nothing_captured() + def test_static_lb(self): + """ NAT44 local service load balancing """ + external_addr_n = socket.inet_pton(socket.AF_INET, self.nat_addr) + external_port = 80 + local_port = 8080 + server1 = self.pg0.remote_hosts[0] + server2 = self.pg0.remote_hosts[1] + + locals = [{'addr': server1.ip4n, + 'port': local_port, + 'probability': 70}, + {'addr': server2.ip4n, + 'port': local_port, + 'probability': 30}] + + self.nat44_add_address(self.nat_addr) + self.vapi.nat44_add_del_lb_static_mapping(external_addr_n, + external_port, + IP_PROTOS.tcp, + local_num=len(locals), + locals=locals) + self.vapi.nat44_interface_add_del_feature(self.pg0.sw_if_index) + self.vapi.nat44_interface_add_del_feature(self.pg1.sw_if_index, + is_inside=0) + + # from client to service + p = (Ether(src=self.pg1.remote_mac, dst=self.pg1.local_mac) / + IP(src=self.pg1.remote_ip4, dst=self.nat_addr) / + TCP(sport=12345, dport=external_port)) + self.pg1.add_stream(p) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + capture = self.pg0.get_capture(1) + p = capture[0] + server = None + try: + ip = p[IP] + tcp = p[TCP] + self.assertIn(ip.dst, [server1.ip4, server2.ip4]) + if ip.dst == server1.ip4: + server = server1 + else: + server = server2 + self.assertEqual(tcp.dport, local_port) + self.check_tcp_checksum(p) + self.check_ip_checksum(p) + except: + self.logger.error(ppp("Unexpected or invalid packet:", p)) + raise + + # from service back to client + p = (Ether(src=server.mac, dst=self.pg0.local_mac) / + IP(src=server.ip4, dst=self.pg1.remote_ip4) / + TCP(sport=local_port, dport=12345)) + self.pg0.add_stream(p) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + capture = self.pg1.get_capture(1) + p = capture[0] + try: + ip = p[IP] + tcp = p[TCP] + self.assertEqual(ip.src, self.nat_addr) + self.assertEqual(tcp.sport, external_port) + self.check_tcp_checksum(p) + self.check_ip_checksum(p) + except: + self.logger.error(ppp("Unexpected or invalid packet:", p)) + raise + + # multiple clients + server1_n = 0 + server2_n = 0 + clients = ip4_range(self.pg1.remote_ip4, 10, 20) + pkts = [] + for client in clients: + p = (Ether(src=self.pg1.remote_mac, dst=self.pg1.local_mac) / + IP(src=client, dst=self.nat_addr) / + TCP(sport=12345, dport=external_port)) + pkts.append(p) + self.pg1.add_stream(pkts) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + capture = self.pg0.get_capture(len(pkts)) + for p in capture: + if p[IP].dst == server1.ip4: + server1_n += 1 + else: + server2_n += 1 + self.assertTrue(server1_n > server2_n) + def test_multiple_inside_interfaces(self): """ NAT44 multiple non-overlapping address space inside interfaces """ diff --git a/test/vpp_papi_provider.py b/test/vpp_papi_provider.py index 61db4d6b..03238b9d 100644 --- a/test/vpp_papi_provider.py +++ b/test/vpp_papi_provider.py @@ -1238,6 +1238,36 @@ class VppPapiProvider(object): """ return self.api(self.papi.nat44_user_dump, {}) + def nat44_add_del_lb_static_mapping( + self, + external_addr, + external_port, + protocol, + vrf_id=0, + local_num=0, + locals=None, + is_add=1): + """Add/delete NAT44 load balancing static mapping + + :param is_add - 1 if add, 0 if delete + """ + return self.api( + self.papi.nat44_add_del_lb_static_mapping, + {'is_add': is_add, + 'external_addr': external_addr, + 'external_port': external_port, + 'protocol': protocol, + 'vrf_id': vrf_id, + 'local_num': local_num, + 'locals': locals}) + + def nat44_lb_static_mapping_dump(self): + """Dump NAT44 load balancing static mappings + + :return: Dictionary of NAT44 load balancing static mapping + """ + return self.api(self.papi.nat44_lb_static_mapping_dump, {}) + def nat_det_add_del_map( self, in_addr, -- cgit 1.2.3-korg From 7d6412e66d7bef15e964935845ed30c03d8b12b7 Mon Sep 17 00:00:00 2001 From: Matus Fabian Date: Thu, 7 Sep 2017 03:50:49 -0700 Subject: NAT: fixed bug in snat_get_worker_out2in_cb (VPP-832) Change-Id: Id9c977634a1259865e3403ba0d90aecaca85207d Signed-off-by: Matus Fabian --- src/plugins/nat/nat.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/plugins/nat/nat.c') diff --git a/src/plugins/nat/nat.c b/src/plugins/nat/nat.c index fabd0bc2..aa7ef10a 100644 --- a/src/plugins/nat/nat.c +++ b/src/plugins/nat/nat.c @@ -2081,6 +2081,8 @@ snat_get_worker_out2in_cb (ip4_header_t * ip0, u32 rx_fib_index0) } /* Add to translated packets worker lookup */ + key0.port = udp0->dst_port; + kv0.key = key0.as_u64; kv0.value = next_worker_index; clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv0, 1); } -- cgit 1.2.3-korg From 1500254bee11355bbd69cc1dd9705be4f002f2bd Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Sun, 10 Sep 2017 04:39:11 -0700 Subject: FIB table add/delete API part 2; - this adds the code to create an IP and MPLS table via the API. - but the enforcement that the table must be created before it is used is still missing, this is so that CSIT can pass. Change-Id: Id124d884ade6cb7da947225200e3bb193454c555 Signed-off-by: Neale Ranns --- src/plugins/nat/nat.c | 17 +- src/plugins/nat/nat64.c | 13 +- src/vnet/classify/vnet_classify.c | 16 +- src/vnet/dhcp/dhcp4_proxy_node.c | 9 +- src/vnet/dhcp/dhcp6_proxy_node.c | 9 +- src/vnet/dhcp/dhcp_proxy.c | 19 ++- src/vnet/dpo/lookup_dpo.c | 20 ++- src/vnet/dpo/mpls_label_dpo.c | 12 +- src/vnet/ethernet/arp.c | 127 +++++++++++---- src/vnet/fib/fib_api.h | 1 - src/vnet/fib/fib_entry.c | 15 +- src/vnet/fib/fib_entry.h | 1 + src/vnet/fib/fib_entry_src_mpls.c | 7 +- src/vnet/fib/fib_table.c | 43 +++-- src/vnet/fib/fib_table.h | 32 +++- src/vnet/fib/fib_test.c | 27 ++-- src/vnet/fib/ip4_fib.c | 41 +++-- src/vnet/fib/ip4_fib.h | 5 +- src/vnet/fib/ip6_fib.c | 41 +++-- src/vnet/fib/ip6_fib.h | 5 +- src/vnet/fib/mpls_fib.c | 16 +- src/vnet/fib/mpls_fib.h | 5 +- src/vnet/interface_api.c | 177 ++++++++++++++++---- src/vnet/ip/ip.h | 7 + src/vnet/ip/ip4.h | 13 ++ src/vnet/ip/ip4_forward.c | 101 +----------- src/vnet/ip/ip4_source_and_port_range_check.c | 11 +- src/vnet/ip/ip6.h | 13 ++ src/vnet/ip/ip6_forward.c | 103 +----------- src/vnet/ip/ip6_neighbor.c | 108 +++++++++---- src/vnet/ip/ip_api.c | 122 +++++++++++--- src/vnet/ip/lookup.c | 225 ++++++++++++++++++++++++++ src/vnet/lisp-gpe/interface.c | 11 +- src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c | 9 +- src/vnet/lisp-gpe/lisp_gpe_sub_interface.c | 11 +- src/vnet/mfib/ip4_mfib.c | 12 +- src/vnet/mfib/ip4_mfib.h | 5 +- src/vnet/mfib/ip6_mfib.c | 12 +- src/vnet/mfib/ip6_mfib.h | 5 +- src/vnet/mfib/mfib_entry.c | 11 ++ src/vnet/mfib/mfib_entry.h | 2 + src/vnet/mfib/mfib_table.c | 88 ++++++++-- src/vnet/mfib/mfib_table.h | 29 +++- src/vnet/mfib/mfib_test.c | 11 +- src/vnet/mfib/mfib_types.h | 8 +- src/vnet/mpls/interface.c | 26 ++- src/vnet/mpls/mpls.c | 76 ++++++++- src/vnet/mpls/mpls.h | 16 +- src/vnet/mpls/mpls_api.c | 66 ++++++-- src/vnet/srv6/sr_policy_rewrite.c | 6 +- src/vnet/srv6/sr_steering.c | 6 +- src/vpp/api/api.c | 5 +- src/vpp/api/custom_dump.c | 3 - test/test_dhcp.py | 24 ++- test/test_gre.py | 8 +- test/test_ip4.py | 11 +- test/test_ip4_vrf_multi_instance.py | 4 +- test/test_ip6.py | 7 +- test/test_ip6_vrf_multi_instance.py | 4 +- test/test_ip_mcast.py | 98 ++++++++++- test/test_mpls.py | 48 +++++- test/test_nat.py | 13 ++ test/test_neighbor.py | 66 +++++++- test/vpp_ip_route.py | 73 +++++++++ test/vpp_papi_provider.py | 46 ++++-- 65 files changed, 1643 insertions(+), 538 deletions(-) (limited to 'src/plugins/nat/nat.c') diff --git a/src/plugins/nat/nat.c b/src/plugins/nat/nat.c index aa7ef10a..8aecac6d 100644 --- a/src/plugins/nat/nat.c +++ b/src/plugins/nat/nat.c @@ -167,7 +167,8 @@ void snat_add_address (snat_main_t *sm, ip4_address_t *addr, u32 vrf_id) ap->addr = *addr; if (vrf_id != ~0) ap->fib_index = - fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id); + fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id, + FIB_SOURCE_PLUGIN_HI); else ap->fib_index = ~0; #define _(N, i, n, s) \ @@ -625,7 +626,8 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port, return VNET_API_ERROR_INVALID_VALUE; fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, - vrf_id); + vrf_id, + FIB_SOURCE_PLUGIN_HI); /* Find external address in allocated addresses and reserve port for address and port pair mapping when dynamic translations enabled */ @@ -754,7 +756,7 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port, if (!m) return VNET_API_ERROR_NO_SUCH_ENTRY; - fib_table_unlock (m->fib_index, FIB_PROTOCOL_IP4); + fib_table_unlock (m->fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_PLUGIN_HI); /* Free external address port */ if (!sm->static_mapping_only) @@ -874,7 +876,8 @@ int snat_del_address (snat_main_t *sm, ip4_address_t addr, u8 delete_sm) } if (a->fib_index != ~0) - fib_table_unlock(a->fib_index, FIB_PROTOCOL_IP4); + fib_table_unlock(a->fib_index, FIB_PROTOCOL_IP4, + FIB_SOURCE_PLUGIN_HI); /* Delete sessions using address */ if (a->busy_tcp_ports || a->busy_udp_ports || a->busy_icmp_ports) @@ -2151,10 +2154,12 @@ snat_config (vlib_main_t * vm, unformat_input_t * input) sm->max_translations_per_user = max_translations_per_user; sm->outside_vrf_id = outside_vrf_id; sm->outside_fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, - outside_vrf_id); + outside_vrf_id, + FIB_SOURCE_PLUGIN_HI); sm->inside_vrf_id = inside_vrf_id; sm->inside_fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, - inside_vrf_id); + inside_vrf_id, + FIB_SOURCE_PLUGIN_HI); sm->static_mapping_only = static_mapping_only; sm->static_mapping_connection_tracking = static_mapping_connection_tracking; diff --git a/src/plugins/nat/nat64.c b/src/plugins/nat/nat64.c index b04901fa..bfcfa9b3 100644 --- a/src/plugins/nat/nat64.c +++ b/src/plugins/nat/nat64.c @@ -107,7 +107,8 @@ nat64_add_del_pool_addr (ip4_address_t * addr, u32 vrf_id, u8 is_add) a->fib_index = 0; if (vrf_id != ~0) a->fib_index = - fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id); + fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id, + FIB_SOURCE_PLUGIN_HI); #define _(N, i, n, s) \ clib_bitmap_alloc (a->busy_##n##_port_bitmap, 65535); foreach_snat_protocol @@ -119,7 +120,8 @@ nat64_add_del_pool_addr (ip4_address_t * addr, u32 vrf_id, u8 is_add) return VNET_API_ERROR_NO_SUCH_ENTRY; if (a->fib_index) - fib_table_unlock (a->fib_index, FIB_PROTOCOL_IP6); + fib_table_unlock (a->fib_index, FIB_PROTOCOL_IP6, + FIB_SOURCE_PLUGIN_HI); #define _(N, id, n, s) \ clib_bitmap_free (a->busy_##n##_port_bitmap); @@ -353,8 +355,8 @@ nat64_add_del_static_bib_entry (ip6_address_t * in_addr, { nat64_main_t *nm = &nat64_main; nat64_db_bib_entry_t *bibe; - u32 fib_index = - fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id); + u32 fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id, + FIB_SOURCE_PLUGIN_HI); snat_protocol_t p = ip_proto_to_snat_proto (proto); ip46_address_t addr; int i; @@ -644,7 +646,8 @@ nat64_add_del_prefix (ip6_address_t * prefix, u8 plen, u32 vrf_id, u8 is_add) { vec_add2 (nm->pref64, p, 1); p->fib_index = - fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id); + fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, vrf_id, + FIB_SOURCE_PLUGIN_HI); p->vrf_id = vrf_id; } diff --git a/src/vnet/classify/vnet_classify.c b/src/vnet/classify/vnet_classify.c index 879fba3c..57d86748 100644 --- a/src/vnet/classify/vnet_classify.c +++ b/src/vnet/classify/vnet_classify.c @@ -368,10 +368,10 @@ vnet_classify_entry_claim_resource (vnet_classify_entry_t *e) switch (e->action) { case CLASSIFY_ACTION_SET_IP4_FIB_INDEX: - fib_table_lock (e->metadata, FIB_PROTOCOL_IP4); + fib_table_lock (e->metadata, FIB_PROTOCOL_IP4, FIB_SOURCE_CLASSIFY); break; case CLASSIFY_ACTION_SET_IP6_FIB_INDEX: - fib_table_lock (e->metadata, FIB_PROTOCOL_IP6); + fib_table_lock (e->metadata, FIB_PROTOCOL_IP6, FIB_SOURCE_CLASSIFY); break; } } @@ -382,10 +382,10 @@ vnet_classify_entry_release_resource (vnet_classify_entry_t *e) switch (e->action) { case CLASSIFY_ACTION_SET_IP4_FIB_INDEX: - fib_table_unlock (e->metadata, FIB_PROTOCOL_IP4); + fib_table_unlock (e->metadata, FIB_PROTOCOL_IP4, FIB_SOURCE_CLASSIFY); break; case CLASSIFY_ACTION_SET_IP6_FIB_INDEX: - fib_table_unlock (e->metadata, FIB_PROTOCOL_IP6); + fib_table_unlock (e->metadata, FIB_PROTOCOL_IP6, FIB_SOURCE_CLASSIFY); break; } } @@ -2096,9 +2096,13 @@ int vnet_classify_add_del_session (vnet_classify_main_t * cm, e->flags = 0; e->action = action; if (e->action == CLASSIFY_ACTION_SET_IP4_FIB_INDEX) - e->metadata = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, metadata); + e->metadata = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, + metadata, + FIB_SOURCE_CLASSIFY); else if (e->action == CLASSIFY_ACTION_SET_IP6_FIB_INDEX) - e->metadata = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, metadata); + e->metadata = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, + metadata, + FIB_SOURCE_CLASSIFY); else e->metadata = 0; diff --git a/src/vnet/dhcp/dhcp4_proxy_node.c b/src/vnet/dhcp/dhcp4_proxy_node.c index 1b59cdea..339a7885 100644 --- a/src/vnet/dhcp/dhcp4_proxy_node.c +++ b/src/vnet/dhcp/dhcp4_proxy_node.c @@ -785,7 +785,8 @@ dhcp4_proxy_set_server (ip46_address_t *addr, return VNET_API_ERROR_INVALID_SRC_ADDRESS; rx_fib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, - rx_table_id); + rx_table_id, + FIB_SOURCE_DHCP); if (is_del) { @@ -795,7 +796,7 @@ dhcp4_proxy_set_server (ip46_address_t *addr, fib_table_entry_special_remove(rx_fib_index, &all_1s, FIB_SOURCE_DHCP); - fib_table_unlock (rx_fib_index, FIB_PROTOCOL_IP4); + fib_table_unlock (rx_fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_DHCP); } } else @@ -808,10 +809,10 @@ dhcp4_proxy_set_server (ip46_address_t *addr, &all_1s, FIB_SOURCE_DHCP, FIB_ENTRY_FLAG_LOCAL); - fib_table_lock (rx_fib_index, FIB_PROTOCOL_IP4); + fib_table_lock (rx_fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_DHCP); } } - fib_table_unlock (rx_fib_index, FIB_PROTOCOL_IP4); + fib_table_unlock (rx_fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_DHCP); return (rc); } diff --git a/src/vnet/dhcp/dhcp6_proxy_node.c b/src/vnet/dhcp/dhcp6_proxy_node.c index 9c2f5220..ce7a8fca 100644 --- a/src/vnet/dhcp/dhcp6_proxy_node.c +++ b/src/vnet/dhcp/dhcp6_proxy_node.c @@ -841,7 +841,8 @@ dhcp6_proxy_set_server (ip46_address_t *addr, return VNET_API_ERROR_INVALID_SRC_ADDRESS; rx_fib_index = mfib_table_find_or_create_and_lock(FIB_PROTOCOL_IP6, - rx_table_id); + rx_table_id, + MFIB_SOURCE_DHCP); if (is_del) { @@ -851,7 +852,7 @@ dhcp6_proxy_set_server (ip46_address_t *addr, mfib_table_entry_delete(rx_fib_index, &all_dhcp_servers, MFIB_SOURCE_DHCP); - mfib_table_unlock(rx_fib_index, FIB_PROTOCOL_IP6); + mfib_table_unlock(rx_fib_index, FIB_PROTOCOL_IP6, MFIB_SOURCE_DHCP); } } else @@ -885,11 +886,11 @@ dhcp6_proxy_set_server (ip46_address_t *addr, MFIB_SOURCE_DHCP, MFIB_RPF_ID_NONE, MFIB_ENTRY_FLAG_ACCEPT_ALL_ITF); - mfib_table_lock(rx_fib_index, FIB_PROTOCOL_IP6); + mfib_table_lock(rx_fib_index, FIB_PROTOCOL_IP6, MFIB_SOURCE_DHCP); } } - mfib_table_unlock(rx_fib_index, FIB_PROTOCOL_IP6); + mfib_table_unlock(rx_fib_index, FIB_PROTOCOL_IP6, MFIB_SOURCE_DHCP); return (rc); } diff --git a/src/vnet/dhcp/dhcp_proxy.c b/src/vnet/dhcp/dhcp_proxy.c index ba7f354e..1784906b 100644 --- a/src/vnet/dhcp/dhcp_proxy.c +++ b/src/vnet/dhcp/dhcp_proxy.c @@ -29,9 +29,9 @@ dhcp_proxy_rx_table_lock (fib_protocol_t proto, u32 fib_index) { if (FIB_PROTOCOL_IP4 == proto) - fib_table_lock(fib_index, proto); + fib_table_lock(fib_index, proto, FIB_SOURCE_DHCP); else - mfib_table_lock(fib_index, proto); + mfib_table_lock(fib_index, proto, MFIB_SOURCE_DHCP); } static void @@ -39,9 +39,9 @@ dhcp_proxy_rx_table_unlock (fib_protocol_t proto, u32 fib_index) { if (FIB_PROTOCOL_IP4 == proto) - fib_table_unlock(fib_index, proto); + fib_table_unlock(fib_index, proto, FIB_SOURCE_DHCP); else - mfib_table_unlock(fib_index, proto); + mfib_table_unlock(fib_index, proto, MFIB_SOURCE_DHCP); } u32 @@ -169,7 +169,7 @@ dhcp_proxy_server_del (fib_protocol_t proto, if (~0 != index) { server = &proxy->dhcp_servers[index]; - fib_table_unlock (server->server_fib_index, proto); + fib_table_unlock (server->server_fib_index, proto, FIB_SOURCE_DHCP); vec_del1(proxy->dhcp_servers, index); @@ -228,7 +228,8 @@ dhcp_proxy_server_add (fib_protocol_t proto, dhcp_server_t server = { .dhcp_server = *addr, .server_fib_index = fib_table_find_or_create_and_lock(proto, - server_table_id), + server_table_id, + FIB_SOURCE_DHCP), }; vec_add1(proxy->dhcp_servers, server); @@ -297,9 +298,11 @@ int dhcp_proxy_set_vss (fib_protocol_t proto, int rc = 0; if (proto == FIB_PROTOCOL_IP4) - rx_fib_index = fib_table_find_or_create_and_lock(proto, tbl_id); + rx_fib_index = fib_table_find_or_create_and_lock(proto, tbl_id, + FIB_SOURCE_DHCP); else - rx_fib_index = mfib_table_find_or_create_and_lock(proto, tbl_id); + rx_fib_index = mfib_table_find_or_create_and_lock(proto, tbl_id, + MFIB_SOURCE_DHCP); v = dhcp_get_vss_info(dm, rx_fib_index, proto); if (NULL != v) diff --git a/src/vnet/dpo/lookup_dpo.c b/src/vnet/dpo/lookup_dpo.c index 26363a2f..af189eda 100644 --- a/src/vnet/dpo/lookup_dpo.c +++ b/src/vnet/dpo/lookup_dpo.c @@ -135,11 +135,15 @@ lookup_dpo_add_or_lock_w_fib_index (fib_node_index_t fib_index, { if (LOOKUP_UNICAST == cast) { - fib_table_lock(fib_index, dpo_proto_to_fib(proto)); + fib_table_lock(fib_index, + dpo_proto_to_fib(proto), + FIB_SOURCE_RR); } else { - mfib_table_lock(fib_index, dpo_proto_to_fib(proto)); + mfib_table_lock(fib_index, + dpo_proto_to_fib(proto), + MFIB_SOURCE_RR); } } lookup_dpo_add_or_lock_i(fib_index, proto, cast, input, table_config, dpo); @@ -161,13 +165,15 @@ lookup_dpo_add_or_lock_w_table_id (u32 table_id, { fib_index = fib_table_find_or_create_and_lock(dpo_proto_to_fib(proto), - table_id); + table_id, + FIB_SOURCE_RR); } else { fib_index = mfib_table_find_or_create_and_lock(dpo_proto_to_fib(proto), - table_id); + table_id, + MFIB_SOURCE_RR); } } @@ -238,12 +244,14 @@ lookup_dpo_unlock (dpo_id_t *dpo) if (LOOKUP_UNICAST == lkd->lkd_cast) { fib_table_unlock(lkd->lkd_fib_index, - dpo_proto_to_fib(lkd->lkd_proto)); + dpo_proto_to_fib(lkd->lkd_proto), + FIB_SOURCE_RR); } else { mfib_table_unlock(lkd->lkd_fib_index, - dpo_proto_to_fib(lkd->lkd_proto)); + dpo_proto_to_fib(lkd->lkd_proto), + MFIB_SOURCE_RR); } } pool_put(lookup_dpo_pool, lkd); diff --git a/src/vnet/dpo/mpls_label_dpo.c b/src/vnet/dpo/mpls_label_dpo.c index b178a902..2a6e7dd5 100644 --- a/src/vnet/dpo/mpls_label_dpo.c +++ b/src/vnet/dpo/mpls_label_dpo.c @@ -105,10 +105,18 @@ format_mpls_label_dpo (u8 *s, va_list *args) mpls_label_dpo_t *mld; u32 ii; - mld = mpls_label_dpo_get(index); - s = format(s, "mpls-label:[%d]:", index); + if (pool_is_free_index(mpls_label_dpo_pool, index)) + { + /* + * the packet trace can be printed after the DPO has been deleted + */ + return (s); + } + + mld = mpls_label_dpo_get(index); + for (ii = 0; ii < mld->mld_n_labels; ii++) { hdr.label_exp_s_ttl = diff --git a/src/vnet/ethernet/arp.c b/src/vnet/ethernet/arp.c index c84ff47b..08e91373 100644 --- a/src/vnet/ethernet/arp.c +++ b/src/vnet/ethernet/arp.c @@ -522,6 +522,24 @@ arp_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai) } } +static void +arp_adj_fib_add (ethernet_arp_ip4_entry_t * e, uint32_t fib_index) +{ + fib_prefix_t pfx = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr.ip4 = e->ip4_address, + }; + + e->fib_entry_index = + fib_table_entry_path_add (fib_index, &pfx, FIB_SOURCE_ADJ, + FIB_ENTRY_FLAG_ATTACHED, + DPO_PROTO_IP4, &pfx.fp_addr, + e->sw_if_index, ~0, 1, NULL, + FIB_ROUTE_PATH_FLAG_NONE); + fib_table_lock (fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_ADJ); +} + int vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm, vnet_arp_set_ip4_over_ethernet_rpc_args_t @@ -576,21 +594,9 @@ vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm, if (!is_no_fib_entry) { - fib_prefix_t pfx = { - .fp_len = 32, - .fp_proto = FIB_PROTOCOL_IP4, - .fp_addr.ip4 = a->ip4, - }; - u32 fib_index; - - fib_index = - ip4_fib_table_get_index_for_sw_if_index (e->sw_if_index); - e->fib_entry_index = - fib_table_entry_path_add (fib_index, &pfx, FIB_SOURCE_ADJ, - FIB_ENTRY_FLAG_ATTACHED, - DPO_PROTO_IP4, &pfx.fp_addr, - e->sw_if_index, ~0, 1, NULL, - FIB_ROUTE_PATH_FLAG_NONE); + arp_adj_fib_add (e, + ip4_fib_table_get_index_for_sw_if_index + (e->sw_if_index)); } else { @@ -1561,6 +1567,65 @@ arp_add_del_interface_address (ip4_main_t * im, } } +void +arp_adj_fib_remove (ethernet_arp_ip4_entry_t * e, uint32_t fib_index) +{ + if (FIB_NODE_INDEX_INVALID != e->fib_entry_index) + { + fib_prefix_t pfx = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr.ip4 = e->ip4_address, + }; + u32 fib_index; + + fib_index = ip4_fib_table_get_index_for_sw_if_index (e->sw_if_index); + + fib_table_entry_path_remove (fib_index, &pfx, + FIB_SOURCE_ADJ, + DPO_PROTO_IP4, + &pfx.fp_addr, + e->sw_if_index, ~0, 1, + FIB_ROUTE_PATH_FLAG_NONE); + fib_table_unlock (fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_ADJ); + } +} + +static void +arp_table_bind (ip4_main_t * im, + uword opaque, + u32 sw_if_index, u32 new_fib_index, u32 old_fib_index) +{ + ethernet_arp_main_t *am = ðernet_arp_main; + ethernet_arp_interface_t *eai; + ethernet_arp_ip4_entry_t *e; + hash_pair_t *pair; + + /* + * the IP table that the interface is bound to has changed. + * reinstall all the adj fibs. + */ + + if (vec_len (am->ethernet_arp_by_sw_if_index) <= sw_if_index) + return; + + eai = &am->ethernet_arp_by_sw_if_index[sw_if_index]; + + /* *INDENT-OFF* */ + hash_foreach_pair (pair, eai->arp_entries, + ({ + e = pool_elt_at_index(am->ip4_entry_pool, + pair->value[0]); + /* + * remove the adj-fib from the old table and add to the new + */ + arp_adj_fib_remove(e, old_fib_index); + arp_adj_fib_add(e, new_fib_index); + })); + /* *INDENT-ON* */ + +} + static clib_error_t * ethernet_arp_init (vlib_main_t * vm) { @@ -1606,6 +1671,11 @@ ethernet_arp_init (vlib_main_t * vm) cb.function_opaque = 0; vec_add1 (im->add_del_interface_address_callbacks, cb); + ip4_table_bind_callback_t cbt; + cbt.function = arp_table_bind; + cbt.function_opaque = 0; + vec_add1 (im->table_bind_callbacks, cbt); + return 0; } @@ -1616,24 +1686,9 @@ arp_entry_free (ethernet_arp_interface_t * eai, ethernet_arp_ip4_entry_t * e) { ethernet_arp_main_t *am = ðernet_arp_main; - if (FIB_NODE_INDEX_INVALID != e->fib_entry_index) - { - fib_prefix_t pfx = { - .fp_len = 32, - .fp_proto = FIB_PROTOCOL_IP4, - .fp_addr.ip4 = e->ip4_address, - }; - u32 fib_index; - - fib_index = ip4_fib_table_get_index_for_sw_if_index (e->sw_if_index); - - fib_table_entry_path_remove (fib_index, &pfx, - FIB_SOURCE_ADJ, - DPO_PROTO_IP4, - &pfx.fp_addr, - e->sw_if_index, ~0, 1, - FIB_ROUTE_PATH_FLAG_NONE); - } + arp_adj_fib_remove (e, + ip4_fib_table_get_index_for_sw_if_index + (e->sw_if_index)); hash_unset (eai->arp_entries, e->ip4_address.as_u32); pool_put (am->ip4_entry_pool, e); } @@ -1693,7 +1748,11 @@ vnet_arp_flush_ip4_over_ethernet_internal (vnet_main_t * vnm, * does in response to interface events. unset is only done * by the control plane. */ - if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC) + if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC) + { + e->flags &= ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC; + } + else if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC) { arp_entry_free (eai, e); } diff --git a/src/vnet/fib/fib_api.h b/src/vnet/fib/fib_api.h index d07d6cae..f5a107ca 100644 --- a/src/vnet/fib/fib_api.h +++ b/src/vnet/fib/fib_api.h @@ -23,7 +23,6 @@ add_del_route_check (fib_protocol_t table_proto, u32 next_hop_sw_if_index, dpo_proto_t next_hop_table_proto, u32 next_hop_table_id, - u8 create_missing_tables, u8 is_rpf_id, u32 * fib_index, u32 * next_hop_fib_index); diff --git a/src/vnet/fib/fib_entry.c b/src/vnet/fib/fib_entry.c index 2027f2be..4cb6cf60 100644 --- a/src/vnet/fib/fib_entry.c +++ b/src/vnet/fib/fib_entry.c @@ -89,6 +89,17 @@ fib_entry_get_default_chain_type (const fib_entry_t *fib_entry) return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4); } +u8 * +format_fib_source (u8 * s, va_list * args) +{ + fib_source_t source = va_arg (*args, int); + + s = format (s, "\n src:%s ", + fib_source_names[source]); + + return (s); +} + u8 * format_fib_entry (u8 * s, va_list * args) { @@ -114,8 +125,8 @@ format_fib_entry (u8 * s, va_list * args) FOR_EACH_SRC_ADDED(fib_entry, src, source, ({ - s = format (s, "\n src:%s ", - fib_source_names[source]); + s = format (s, "\n src:%U ", + format_fib_source, source); s = fib_entry_src_format(fib_entry, source, s); s = format (s, " refs:%d ", src->fes_ref_count); if (FIB_ENTRY_FLAG_NONE != src->fes_entry_flags) { diff --git a/src/vnet/fib/fib_entry.h b/src/vnet/fib/fib_entry.h index 93b8016d..2f6e37fe 100644 --- a/src/vnet/fib/fib_entry.h +++ b/src/vnet/fib/fib_entry.h @@ -431,6 +431,7 @@ typedef struct fib_entry_t_ { #define FIB_ENTRY_FORMAT_DETAIL2 (0x2) extern u8 *format_fib_entry (u8 * s, va_list * args); +extern u8 *format_fib_source (u8 * s, va_list * args); extern fib_node_index_t fib_entry_create_special(u32 fib_index, const fib_prefix_t *prefix, diff --git a/src/vnet/fib/fib_entry_src_mpls.c b/src/vnet/fib/fib_entry_src_mpls.c index a616458f..6fdd5c0a 100644 --- a/src/vnet/fib/fib_entry_src_mpls.c +++ b/src/vnet/fib/fib_entry_src_mpls.c @@ -94,7 +94,9 @@ fib_entry_src_mpls_set_data (fib_entry_src_t *src, fib_table_entry_delete_index(src->mpls.fesm_lfes[eos], FIB_SOURCE_SPECIAL); } - fib_table_unlock(MPLS_FIB_DEFAULT_TABLE_ID, FIB_PROTOCOL_MPLS); + fib_table_unlock(MPLS_FIB_DEFAULT_TABLE_ID, + FIB_PROTOCOL_MPLS, + FIB_SOURCE_MPLS); src->mpls.fesm_label = label; } else @@ -113,7 +115,8 @@ fib_entry_src_mpls_set_data (fib_entry_src_t *src, { fib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_MPLS, - MPLS_FIB_DEFAULT_TABLE_ID); + MPLS_FIB_DEFAULT_TABLE_ID, + FIB_SOURCE_MPLS); } else { diff --git a/src/vnet/fib/fib_table.c b/src/vnet/fib/fib_table.c index 6b6cc5cb..75d15628 100644 --- a/src/vnet/fib/fib_table.c +++ b/src/vnet/fib/fib_table.c @@ -1039,7 +1039,8 @@ fib_table_find (fib_protocol_t proto, u32 fib_table_find_or_create_and_lock (fib_protocol_t proto, - u32 table_id) + u32 table_id, + fib_source_t src) { fib_table_t *fib_table; fib_node_index_t fi; @@ -1047,13 +1048,13 @@ fib_table_find_or_create_and_lock (fib_protocol_t proto, switch (proto) { case FIB_PROTOCOL_IP4: - fi = ip4_fib_table_find_or_create_and_lock(table_id); + fi = ip4_fib_table_find_or_create_and_lock(table_id, src); break; case FIB_PROTOCOL_IP6: - fi = ip6_fib_table_find_or_create_and_lock(table_id); + fi = ip6_fib_table_find_or_create_and_lock(table_id, src); break; case FIB_PROTOCOL_MPLS: - fi = mpls_fib_table_find_or_create_and_lock(table_id); + fi = mpls_fib_table_find_or_create_and_lock(table_id, src); break; default: return (~0); @@ -1070,6 +1071,7 @@ fib_table_find_or_create_and_lock (fib_protocol_t proto, u32 fib_table_create_and_lock (fib_protocol_t proto, + fib_source_t src, const char *const fmt, ...) { @@ -1082,13 +1084,13 @@ fib_table_create_and_lock (fib_protocol_t proto, switch (proto) { case FIB_PROTOCOL_IP4: - fi = ip4_fib_table_create_and_lock(); + fi = ip4_fib_table_create_and_lock(src); break; case FIB_PROTOCOL_IP6: - fi = ip6_fib_table_create_and_lock(); + fi = ip6_fib_table_create_and_lock(src); break; case FIB_PROTOCOL_MPLS: - fi = mpls_fib_table_create_and_lock(); + fi = mpls_fib_table_create_and_lock(src); break; default: return (~0); @@ -1143,26 +1145,43 @@ fib_table_walk (u32 fib_index, void fib_table_unlock (u32 fib_index, - fib_protocol_t proto) + fib_protocol_t proto, + fib_source_t source) { fib_table_t *fib_table; fib_table = fib_table_get(fib_index, proto); - fib_table->ft_locks--; + fib_table->ft_locks[source]--; + fib_table->ft_locks[FIB_TABLE_TOTAL_LOCKS]--; - if (0 == fib_table->ft_locks) + if (0 == fib_table->ft_locks[source]) { + /* + * The source no longer needs the table. flush any routes + * from it just in case + */ + fib_table_flush(fib_index, proto, source); + } + + if (0 == fib_table->ft_locks[FIB_TABLE_TOTAL_LOCKS]) + { + /* + * no more locak from any source - kill it + */ fib_table_destroy(fib_table); } } + void fib_table_lock (u32 fib_index, - fib_protocol_t proto) + fib_protocol_t proto, + fib_source_t source) { fib_table_t *fib_table; fib_table = fib_table_get(fib_index, proto); - fib_table->ft_locks++; + fib_table->ft_locks[source]++; + fib_table->ft_locks[FIB_TABLE_TOTAL_LOCKS]++; } u32 diff --git a/src/vnet/fib/fib_table.h b/src/vnet/fib/fib_table.h index 579740e9..6b7011b3 100644 --- a/src/vnet/fib/fib_table.h +++ b/src/vnet/fib/fib_table.h @@ -22,6 +22,12 @@ #include #include +/** + * Keep a lock per-source and a total + */ +#define FIB_TABLE_N_LOCKS (FIB_SOURCE_MAX+1) +#define FIB_TABLE_TOTAL_LOCKS FIB_SOURCE_MAX + /** * @brief * A protocol Independent FIB table @@ -34,9 +40,9 @@ typedef struct fib_table_t_ fib_protocol_t ft_proto; /** - * number of locks on the table + * per-source number of locks on the table */ - u16 ft_locks; + u16 ft_locks[FIB_TABLE_N_LOCKS]; /** * Table ID (hash key) for this FIB. @@ -628,9 +634,13 @@ extern u32 fib_table_find(fib_protocol_t proto, u32 table_id); * * @return fib_index * The index of the FIB + * + * @param source + * The ID of the client/source. */ extern u32 fib_table_find_or_create_and_lock(fib_protocol_t proto, - u32 table_id); + u32 table_id, + fib_source_t source); /** * @brief @@ -643,10 +653,14 @@ extern u32 fib_table_find_or_create_and_lock(fib_protocol_t proto, * @param fmt * A string to describe the table * + * @param source + * The ID of the client/source. + * * @return fib_index * The index of the FIB */ extern u32 fib_table_create_and_lock(fib_protocol_t proto, + fib_source_t source, const char *const fmt, ...); @@ -704,9 +718,13 @@ extern void fib_table_set_flow_hash_config(u32 fib_index, * * @paran proto * The protocol of the FIB (and thus the entries therein) + * + * @param source + * The ID of the client/source. */ extern void fib_table_unlock(u32 fib_index, - fib_protocol_t proto); + fib_protocol_t proto, + fib_source_t source); /** * @brief @@ -718,9 +736,13 @@ extern void fib_table_unlock(u32 fib_index, * * @paran proto * The protocol of the FIB (and thus the entries therein) + * + * @param source + * The ID of the client/source. */ extern void fib_table_lock(u32 fib_index, - fib_protocol_t proto); + fib_protocol_t proto, + fib_source_t source); /** * @brief diff --git a/src/vnet/fib/fib_test.c b/src/vnet/fib/fib_test.c index 6867cca8..572d7f0d 100644 --- a/src/vnet/fib/fib_test.c +++ b/src/vnet/fib/fib_test.c @@ -739,7 +739,8 @@ fib_test_v4 (void) lb_count = pool_elts(load_balance_pool); /* Find or create FIB table 11 */ - fib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 11); + fib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 11, + FIB_SOURCE_API); for (ii = 0; ii < 4; ii++) { @@ -4150,7 +4151,7 @@ fib_test_v4 (void) FIB_SOURCE_INTERFACE)), "NO INterface Source'd prefixes"); - fib_table_unlock(fib_index, FIB_PROTOCOL_IP4); + fib_table_unlock(fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_API); FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d", fib_path_list_db_size()); @@ -4201,7 +4202,8 @@ fib_test_v6 (void) dpo_drop = drop_dpo_get(DPO_PROTO_IP6); /* Find or create FIB table 11 */ - fib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP6, 11); + fib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP6, 11, + FIB_SOURCE_API); for (ii = 0; ii < 4; ii++) { @@ -5025,7 +5027,7 @@ fib_test_v6 (void) /* * now remove the VRF */ - fib_table_unlock(fib_index, FIB_PROTOCOL_IP6); + fib_table_unlock(fib_index, FIB_PROTOCOL_IP6, FIB_SOURCE_API); FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d", fib_path_list_db_size()); @@ -5157,7 +5159,9 @@ fib_test_ae (void) */ u32 import_fib_index1; - import_fib_index1 = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 11); + import_fib_index1 = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, + 11, + FIB_SOURCE_CLI); /* * Add an attached route in the import FIB @@ -5233,7 +5237,8 @@ fib_test_ae (void) */ u32 import_fib_index2; - import_fib_index2 = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 12); + import_fib_index2 = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 12, + FIB_SOURCE_CLI); /* * Add an attached route in the import FIB @@ -5595,8 +5600,8 @@ fib_test_ae (void) &local_pfx, FIB_SOURCE_API); - fib_table_unlock(import_fib_index1, FIB_PROTOCOL_IP4); - fib_table_unlock(import_fib_index2, FIB_PROTOCOL_IP4); + fib_table_unlock(import_fib_index1, FIB_PROTOCOL_IP4, FIB_SOURCE_CLI); + fib_table_unlock(import_fib_index2, FIB_PROTOCOL_IP4, FIB_SOURCE_CLI); FIB_TEST((0 == adj_nbr_db_size()), "ADJ DB size is %d", adj_nbr_db_size()); @@ -8168,9 +8173,10 @@ lfib_test (void) /* * MPLS enable an interface so we get the MPLS table created */ + mpls_table_create(MPLS_FIB_DEFAULT_TABLE_ID, FIB_SOURCE_API); mpls_sw_interface_enable_disable(&mpls_main, tm->hw[0]->sw_if_index, - 1); + 1, 1); ip46_address_t nh_10_10_10_1 = { .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a01), @@ -8662,7 +8668,8 @@ lfib_test (void) */ mpls_sw_interface_enable_disable(&mpls_main, tm->hw[0]->sw_if_index, - 0); + 0, 1); + mpls_table_delete(MPLS_FIB_DEFAULT_TABLE_ID, FIB_SOURCE_API); FIB_TEST(lb_count == pool_elts(load_balance_pool), "Load-balance resources freed %d of %d", diff --git a/src/vnet/fib/ip4_fib.c b/src/vnet/fib/ip4_fib.c index d563bafd..865e2dd5 100644 --- a/src/vnet/fib/ip4_fib.c +++ b/src/vnet/fib/ip4_fib.c @@ -101,7 +101,8 @@ static const ip4_fib_table_special_prefix_t ip4_specials[] = { static u32 -ip4_create_fib_with_table_id (u32 table_id) +ip4_create_fib_with_table_id (u32 table_id, + fib_source_t src) { fib_table_t *fib_table; ip4_fib_t *v4_fib; @@ -128,7 +129,7 @@ ip4_create_fib_with_table_id (u32 table_id) v4_fib->fwd_classify_table_index = ~0; v4_fib->rev_classify_table_index = ~0; - fib_table_lock(fib_table->ft_index, FIB_PROTOCOL_IP4); + fib_table_lock(fib_table->ft_index, FIB_PROTOCOL_IP4, src); ip4_mtrie_init(&v4_fib->mtrie); @@ -198,23 +199,24 @@ ip4_fib_table_destroy (u32 fib_index) u32 -ip4_fib_table_find_or_create_and_lock (u32 table_id) +ip4_fib_table_find_or_create_and_lock (u32 table_id, + fib_source_t src) { u32 index; index = ip4_fib_index_from_table_id(table_id); if (~0 == index) - return ip4_create_fib_with_table_id(table_id); + return ip4_create_fib_with_table_id(table_id, src); - fib_table_lock(index, FIB_PROTOCOL_IP4); + fib_table_lock(index, FIB_PROTOCOL_IP4, src); return (index); } u32 -ip4_fib_table_create_and_lock (void) +ip4_fib_table_create_and_lock (fib_source_t src) { - return (ip4_create_fib_with_table_id(~0)); + return (ip4_create_fib_with_table_id(~0, src)); } u32 @@ -525,17 +527,32 @@ ip4_show_fib (vlib_main_t * vm, pool_foreach (fib_table, im4->fibs, ({ ip4_fib_t *fib = pool_elt_at_index(im4->v4_fibs, fib_table->ft_index); + fib_source_t source; + u8 *s = NULL; if (table_id >= 0 && table_id != (int)fib->table_id) continue; if (fib_index != ~0 && fib_index != (int)fib->index) continue; - vlib_cli_output (vm, "%U, fib_index:%d, flow hash:[%U] locks:%d", - format_fib_table_name, fib->index, FIB_PROTOCOL_IP4, - fib->index, - format_ip_flow_hash_config, fib_table->ft_flow_hash_config, - fib_table->ft_locks); + s = format(s, "%U, fib_index:%d, flow hash:[%U] locks:[", + format_fib_table_name, fib->index, + FIB_PROTOCOL_IP4, + fib->index, + format_ip_flow_hash_config, + fib_table->ft_flow_hash_config); + FOR_EACH_FIB_SOURCE(source) + { + if (0 != fib_table->ft_locks[source]) + { + s = format(s, "%U:%d, ", + format_fib_source, source, + fib_table->ft_locks[source]); + } + } + s = format (s, "]"); + vlib_cli_output (vm, "%V", s); + vec_free(s); /* Show summary? */ if (! verbose) diff --git a/src/vnet/fib/ip4_fib.h b/src/vnet/fib/ip4_fib.h index 006163b4..495b45cc 100644 --- a/src/vnet/fib/ip4_fib.h +++ b/src/vnet/fib/ip4_fib.h @@ -127,8 +127,9 @@ ip4_fib_lookup (ip4_main_t * im, u32 sw_if_index, ip4_address_t * dst) * @returns A pointer to the retrieved or created fib. * */ -extern u32 ip4_fib_table_find_or_create_and_lock(u32 table_id); -extern u32 ip4_fib_table_create_and_lock(void); +extern u32 ip4_fib_table_find_or_create_and_lock(u32 table_id, + fib_source_t src); +extern u32 ip4_fib_table_create_and_lock(fib_source_t src); static inline diff --git a/src/vnet/fib/ip6_fib.c b/src/vnet/fib/ip6_fib.c index 8fde6f9f..3ddb8453 100644 --- a/src/vnet/fib/ip6_fib.c +++ b/src/vnet/fib/ip6_fib.c @@ -50,7 +50,8 @@ vnet_ip6_fib_init (u32 fib_index) } static u32 -create_fib_with_table_id (u32 table_id) +create_fib_with_table_id (u32 table_id, + fib_source_t src) { fib_table_t *fib_table; ip6_fib_t *v6_fib; @@ -77,29 +78,30 @@ create_fib_with_table_id (u32 table_id) fib_table->ft_flow_hash_config = IP_FLOW_HASH_DEFAULT; vnet_ip6_fib_init(fib_table->ft_index); - fib_table_lock(fib_table->ft_index, FIB_PROTOCOL_IP6); + fib_table_lock(fib_table->ft_index, FIB_PROTOCOL_IP6, src); return (fib_table->ft_index); } u32 -ip6_fib_table_find_or_create_and_lock (u32 table_id) +ip6_fib_table_find_or_create_and_lock (u32 table_id, + fib_source_t src) { uword * p; p = hash_get (ip6_main.fib_index_by_table_id, table_id); if (NULL == p) - return create_fib_with_table_id(table_id); + return create_fib_with_table_id(table_id, src); - fib_table_lock(p[0], FIB_PROTOCOL_IP6); + fib_table_lock(p[0], FIB_PROTOCOL_IP6, src); return (p[0]); } u32 -ip6_fib_table_create_and_lock (void) +ip6_fib_table_create_and_lock (fib_source_t src) { - return (create_fib_with_table_id(~0)); + return (create_fib_with_table_id(~0, src)); } void @@ -588,16 +590,33 @@ ip6_show_fib (vlib_main_t * vm, pool_foreach (fib_table, im6->fibs, ({ + fib_source_t source; + u8 *s = NULL; + fib = pool_elt_at_index(im6->v6_fibs, fib_table->ft_index); if (table_id >= 0 && table_id != (int)fib->table_id) continue; if (fib_index != ~0 && fib_index != (int)fib->index) continue; - vlib_cli_output (vm, "%s, fib_index:%d, flow hash:[%U] locks:%d", - fib_table->ft_desc, fib->index, - format_ip_flow_hash_config, fib_table->ft_flow_hash_config, - fib_table->ft_locks); + s = format(s, "%U, fib_index:%d, flow hash:[%U] locks:[", + format_fib_table_name, fib->index, + FIB_PROTOCOL_IP6, + fib->index, + format_ip_flow_hash_config, + fib_table->ft_flow_hash_config); + FOR_EACH_FIB_SOURCE(source) + { + if (0 != fib_table->ft_locks[source]) + { + s = format(s, "%U:%d, ", + format_fib_source, source, + fib_table->ft_locks[source]); + } + } + s = format (s, "]"); + vlib_cli_output (vm, "%V", s); + vec_free(s); /* Show summary? */ if (! verbose) diff --git a/src/vnet/fib/ip6_fib.h b/src/vnet/fib/ip6_fib.h index aad8305c..9728eecc 100644 --- a/src/vnet/fib/ip6_fib.h +++ b/src/vnet/fib/ip6_fib.h @@ -144,8 +144,9 @@ ip6_src_lookup_for_packet (ip6_main_t * im, * \returns A pointer to the retrieved or created fib. * */ -extern u32 ip6_fib_table_find_or_create_and_lock(u32 table_id); -extern u32 ip6_fib_table_create_and_lock(void); +extern u32 ip6_fib_table_find_or_create_and_lock(u32 table_id, + fib_source_t src); +extern u32 ip6_fib_table_create_and_lock(fib_source_t src); static inline ip6_fib_t * ip6_fib_get (fib_node_index_t index) diff --git a/src/vnet/fib/mpls_fib.c b/src/vnet/fib/mpls_fib.c index ca6271fe..4eeef7ab 100644 --- a/src/vnet/fib/mpls_fib.c +++ b/src/vnet/fib/mpls_fib.c @@ -83,7 +83,8 @@ mpls_fib_index_from_table_id (u32 table_id) } static u32 -mpls_fib_create_with_table_id (u32 table_id) +mpls_fib_create_with_table_id (u32 table_id, + fib_source_t src) { dpo_id_t dpo = DPO_INVALID; fib_table_t *fib_table; @@ -107,7 +108,7 @@ mpls_fib_create_with_table_id (u32 table_id) fib_table->ft_table_id = table_id; fib_table->ft_flow_hash_config = MPLS_FLOW_HASH_DEFAULT; - fib_table_lock(fib_table->ft_index, FIB_PROTOCOL_MPLS); + fib_table_lock(fib_table->ft_index, FIB_PROTOCOL_MPLS, src); if (INDEX_INVALID == mpls_fib_drop_dpo_index) { @@ -220,22 +221,23 @@ mpls_fib_create_with_table_id (u32 table_id) } u32 -mpls_fib_table_find_or_create_and_lock (u32 table_id) +mpls_fib_table_find_or_create_and_lock (u32 table_id, + fib_source_t src) { u32 index; index = mpls_fib_index_from_table_id(table_id); if (~0 == index) - return mpls_fib_create_with_table_id(table_id); + return mpls_fib_create_with_table_id(table_id, src); - fib_table_lock(index, FIB_PROTOCOL_MPLS); + fib_table_lock(index, FIB_PROTOCOL_MPLS, src); return (index); } u32 -mpls_fib_table_create_and_lock (void) +mpls_fib_table_create_and_lock (fib_source_t src) { - return (mpls_fib_create_with_table_id(~0)); + return (mpls_fib_create_with_table_id(~0, src)); } void diff --git a/src/vnet/fib/mpls_fib.h b/src/vnet/fib/mpls_fib.h index dfb8b7fc..29cd1d20 100644 --- a/src/vnet/fib/mpls_fib.h +++ b/src/vnet/fib/mpls_fib.h @@ -59,8 +59,9 @@ mpls_fib_get (fib_node_index_t index) return (pool_elt_at_index(mpls_main.mpls_fibs, index)); } -extern u32 mpls_fib_table_find_or_create_and_lock(u32 table_id); -extern u32 mpls_fib_table_create_and_lock(void); +extern u32 mpls_fib_table_find_or_create_and_lock(u32 table_id, + fib_source_t src); +extern u32 mpls_fib_table_create_and_lock(fib_source_t src); // extern mpls_fib_t * mpls_fib_find(u32 table_id); extern u32 mpls_fib_index_from_table_id(u32 table_id); diff --git a/src/vnet/interface_api.c b/src/vnet/interface_api.c index 113728cd..419fef94 100644 --- a/src/vnet/interface_api.c +++ b/src/vnet/interface_api.c @@ -320,68 +320,189 @@ stats_dsunlock (void) static void vl_api_sw_interface_set_table_t_handler (vl_api_sw_interface_set_table_t * mp) { - int rv = 0; - u32 table_id = ntohl (mp->vrf_id); - u32 sw_if_index = ntohl (mp->sw_if_index); vl_api_sw_interface_set_table_reply_t *rmp; - CLIB_UNUSED (ip_interface_address_t * ia); - u32 fib_index; + u32 sw_if_index = ntohl (mp->sw_if_index); + u32 table_id = ntohl (mp->vrf_id); + int rv = 0; VALIDATE_SW_IF_INDEX (mp); stats_dslock_with_hint (1 /* release hint */ , 4 /* tag */ ); if (mp->is_ipv6) + rv = ip_table_bind (FIB_PROTOCOL_IP6, sw_if_index, table_id, 1); + else + rv = ip_table_bind (FIB_PROTOCOL_IP4, sw_if_index, table_id, 1); + + stats_dsunlock (); + + BAD_SW_IF_INDEX_LABEL; + + REPLY_MACRO (VL_API_SW_INTERFACE_SET_TABLE_REPLY); +} + +int +ip_table_bind (fib_protocol_t fproto, + uint32_t sw_if_index, uint32_t table_id, u8 is_api) +{ + CLIB_UNUSED (ip_interface_address_t * ia); + u32 fib_index, mfib_index; + fib_source_t src; + mfib_source_t msrc; + + if (is_api) + { + src = FIB_SOURCE_API; + msrc = MFIB_SOURCE_API; + } + else + { + src = FIB_SOURCE_CLI; + msrc = MFIB_SOURCE_CLI; + } + + /* + * This is temporary whilst I do the song and dance with the CSIT version + */ + if (0 != table_id) { + fib_index = fib_table_find_or_create_and_lock (fproto, table_id, src); + mfib_index = + mfib_table_find_or_create_and_lock (fproto, table_id, msrc); + } + else + { + fib_index = 0; + mfib_index = 0; + } + + /* + * This if table does not exist = error is what we want in the end. + */ + /* fib_index = fib_table_find (fproto, table_id); */ + /* mfib_index = mfib_table_find (fproto, table_id); */ + + /* if (~0 == fib_index || ~0 == mfib_index) */ + /* { */ + /* return (VNET_API_ERROR_NO_SUCH_FIB); */ + /* } */ + + if (FIB_PROTOCOL_IP6 == fproto) + { + /* + * If the interface already has in IP address, then a change int + * VRF is not allowed. The IP address applied must first be removed. + * We do not do that automatically here, since VPP has no knowledge + * of whether thoses subnets are valid in the destination VRF. + */ /* *INDENT-OFF* */ foreach_ip_interface_address (&ip6_main.lookup_main, ia, sw_if_index, 1 /* honor unnumbered */ , ({ - rv = VNET_API_ERROR_ADDRESS_FOUND_FOR_INTERFACE; - goto done; + return (VNET_API_ERROR_ADDRESS_FOUND_FOR_INTERFACE); })); /* *INDENT-ON* */ - fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, - table_id); vec_validate (ip6_main.fib_index_by_sw_if_index, sw_if_index); - ip6_main.fib_index_by_sw_if_index[sw_if_index] = fib_index; - - fib_index = mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, - table_id); vec_validate (ip6_main.mfib_index_by_sw_if_index, sw_if_index); - ip6_main.mfib_index_by_sw_if_index[sw_if_index] = fib_index; + + /* + * tell those that are interested that the binding is changing. + */ + ip6_table_bind_callback_t *cb; + vec_foreach (cb, ip6_main.table_bind_callbacks) + cb->function (&ip6_main, cb->function_opaque, + sw_if_index, + fib_index, + ip6_main.fib_index_by_sw_if_index[sw_if_index]); + + if (0 == table_id) + { + /* reset back to default */ + if (0 != ip6_main.fib_index_by_sw_if_index[sw_if_index]) + fib_table_unlock (ip6_main.fib_index_by_sw_if_index[sw_if_index], + FIB_PROTOCOL_IP6, src); + if (0 != ip6_main.mfib_index_by_sw_if_index[sw_if_index]) + mfib_table_unlock (ip6_main.mfib_index_by_sw_if_index + [sw_if_index], FIB_PROTOCOL_IP6, msrc); + + } + else + { + /* we need to lock the table now it's inuse */ + fib_table_lock (fib_index, FIB_PROTOCOL_IP6, src); + mfib_table_lock (mfib_index, FIB_PROTOCOL_IP6, msrc); + } + + ip6_main.fib_index_by_sw_if_index[sw_if_index] = fib_index; + ip6_main.mfib_index_by_sw_if_index[sw_if_index] = mfib_index; } else { + /* + * If the interface already has in IP address, then a change int + * VRF is not allowed. The IP address applied must first be removed. + * We do not do that automatically here, since VPP has no knowledge + * of whether thoses subnets are valid in the destination VRF. + */ /* *INDENT-OFF* */ foreach_ip_interface_address (&ip4_main.lookup_main, ia, sw_if_index, 1 /* honor unnumbered */ , ({ - rv = VNET_API_ERROR_ADDRESS_FOUND_FOR_INTERFACE; - goto done; + return (VNET_API_ERROR_ADDRESS_FOUND_FOR_INTERFACE); })); /* *INDENT-ON* */ - fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, - table_id); vec_validate (ip4_main.fib_index_by_sw_if_index, sw_if_index); - ip4_main.fib_index_by_sw_if_index[sw_if_index] = fib_index; - - fib_index = mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, - table_id); vec_validate (ip4_main.mfib_index_by_sw_if_index, sw_if_index); - ip4_main.mfib_index_by_sw_if_index[sw_if_index] = fib_index; - } -done: - stats_dsunlock (); + /* + * tell those that are interested that the binding is changing. + */ + ip4_table_bind_callback_t *cb; + vec_foreach (cb, ip4_main.table_bind_callbacks) + cb->function (&ip4_main, cb->function_opaque, + sw_if_index, + fib_index, + ip4_main.fib_index_by_sw_if_index[sw_if_index]); + + if (0 == table_id) + { + /* reset back to default */ + if (0 != ip4_main.fib_index_by_sw_if_index[sw_if_index]) + fib_table_unlock (ip4_main.fib_index_by_sw_if_index[sw_if_index], + FIB_PROTOCOL_IP4, src); + if (0 != ip4_main.mfib_index_by_sw_if_index[sw_if_index]) + mfib_table_unlock (ip4_main.mfib_index_by_sw_if_index + [sw_if_index], FIB_PROTOCOL_IP4, msrc); - BAD_SW_IF_INDEX_LABEL; + } + else + { + /* we need to lock the table now it's inuse */ + fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, + table_id, src); - REPLY_MACRO (VL_API_SW_INTERFACE_SET_TABLE_REPLY); + mfib_index = mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, + table_id, msrc); + } + + ip4_main.fib_index_by_sw_if_index[sw_if_index] = fib_index; + ip4_main.mfib_index_by_sw_if_index[sw_if_index] = mfib_index; + } + + /* + * Temporary. undo the locks from the find and create at the staart + */ + if (0 != table_id) + { + fib_table_unlock (fib_index, fproto, src); + mfib_table_unlock (mfib_index, fproto, msrc); + } + + return (0); } static void diff --git a/src/vnet/ip/ip.h b/src/vnet/ip/ip.h index 70b4ccd8..7aae73ff 100644 --- a/src/vnet/ip/ip.h +++ b/src/vnet/ip/ip.h @@ -184,6 +184,13 @@ void ip_del_all_interface_addresses (vlib_main_t * vm, u32 sw_if_index); extern vlib_node_registration_t ip4_inacl_node; extern vlib_node_registration_t ip6_inacl_node; +void ip_table_create (fib_protocol_t fproto, uint32_t table_id, u8 is_api); + +void ip_table_delete (fib_protocol_t fproto, uint32_t table_id, u8 is_api); + +int ip_table_bind (fib_protocol_t fproto, + uint32_t sw_if_index, uint32_t table_id, u8 is_api); + #endif /* included_ip_main_h */ /* diff --git a/src/vnet/ip/ip4.h b/src/vnet/ip/ip4.h index 8f9a8e27..decb840b 100644 --- a/src/vnet/ip/ip4.h +++ b/src/vnet/ip/ip4.h @@ -72,6 +72,16 @@ typedef struct uword function_opaque; } ip4_add_del_interface_address_callback_t; +typedef void (ip4_table_bind_function_t) + (struct ip4_main_t * im, + uword opaque, u32 sw_if_index, u32 new_fib_index, u32 old_fib_index); + +typedef struct +{ + ip4_table_bind_function_t *function; + uword function_opaque; +} ip4_table_bind_callback_t; + /** * @brief IPv4 main type. * @@ -117,6 +127,9 @@ typedef struct ip4_main_t ip4_add_del_interface_address_callback_t * add_del_interface_address_callbacks; + /** Functions to call when interface to table biding changes. */ + ip4_table_bind_callback_t *table_bind_callbacks; + /** Template used to generate IP4 ARP packets. */ vlib_packet_template_t ip4_arp_request_packet_template; diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c index 2d48e8a9..ec4287bb 100755 --- a/src/vnet/ip/ip4_forward.c +++ b/src/vnet/ip/ip4_forward.c @@ -1198,8 +1198,10 @@ ip4_lookup_init (vlib_main_t * vm) ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0); /* Create FIB with index 0 and table id of 0. */ - fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0); - mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0); + fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0, + FIB_SOURCE_DEFAULT_ROUTE); + mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0, + MFIB_SOURCE_DEFAULT_ROUTE); { pg_node_t *pn; @@ -2794,101 +2796,6 @@ VLIB_REGISTER_NODE (ip4_midchain_node) = { VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain); /* *INDENT-ON */ -static clib_error_t * -add_del_interface_table (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - vnet_main_t *vnm = vnet_get_main (); - ip_interface_address_t *ia; - clib_error_t *error = 0; - u32 sw_if_index, table_id; - - sw_if_index = ~0; - - if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) - { - error = clib_error_return (0, "unknown interface `%U'", - format_unformat_error, input); - goto done; - } - - if (unformat (input, "%d", &table_id)) - ; - else - { - error = clib_error_return (0, "expected table id `%U'", - format_unformat_error, input); - goto done; - } - - /* - * If the interface already has in IP address, then a change int - * VRF is not allowed. The IP address applied must first be removed. - * We do not do that automatically here, since VPP has no knowledge - * of whether thoses subnets are valid in the destination VRF. - */ - /* *INDENT-OFF* */ - foreach_ip_interface_address (&ip4_main.lookup_main, - ia, sw_if_index, - 1 /* honor unnumbered */, - ({ - ip4_address_t * a; - - a = ip_interface_address_get_address (&ip4_main.lookup_main, ia); - error = clib_error_return (0, "interface %U has address %U", - format_vnet_sw_if_index_name, vnm, - sw_if_index, - format_ip4_address, a); - goto done; - })); - /* *INDENT-ON* */ - -{ - ip4_main_t *im = &ip4_main; - u32 fib_index; - - fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id); - - vec_validate (im->fib_index_by_sw_if_index, sw_if_index); - im->fib_index_by_sw_if_index[sw_if_index] = fib_index; - - fib_index = mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id); - vec_validate (im->mfib_index_by_sw_if_index, sw_if_index); - im->mfib_index_by_sw_if_index[sw_if_index] = fib_index; -} - -done: -return error; -} - -/*? - * Place the indicated interface into the supplied IPv4 FIB table (also known - * as a VRF). If the FIB table does not exist, this command creates it. To - * display the current IPv4 FIB table, use the command 'show ip fib'. - * FIB table will only be displayed if a route has been added to the table, or - * an IP Address is assigned to an interface in the table (which adds a route - * automatically). - * - * @note IP addresses added after setting the interface IP table are added to - * the indicated FIB table. If an IP address is added prior to changing the - * table then this is an error. The control plane must remove these addresses - * first and then change the table. VPP will not automatically move the - * addresses from the old to the new table as it does not know the validity - * of such a change. - * - * @cliexpar - * Example of how to add an interface to an IPv4 FIB table (where 2 is the table-id): - * @cliexcmd{set interface ip table GigabitEthernet2/0/0 2} - ?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = -{ - .path = "set interface ip table", - .function = add_del_interface_table, - .short_help = "set interface ip table ", -}; -/* *INDENT-ON* */ - int ip4_lookup_validate (ip4_address_t * a, u32 fib_index0) { diff --git a/src/vnet/ip/ip4_source_and_port_range_check.c b/src/vnet/ip/ip4_source_and_port_range_check.c index ae836a11..9aa880ae 100644 --- a/src/vnet/ip/ip4_source_and_port_range_check.c +++ b/src/vnet/ip/ip4_source_and_port_range_check.c @@ -1126,6 +1126,14 @@ ip6_source_and_port_range_check_add_del (ip6_address_t * address, u16 * low_ports, u16 * high_ports, int is_add) { + uint32_t fib_index; + + fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id); + + ASSERT (~0 != fib_index); + + fib_table_unlock (fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_CLASSIFY); + return 0; } @@ -1138,7 +1146,8 @@ ip4_source_and_port_range_check_add_del (ip4_address_t * address, { u32 fib_index; - fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id); + fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id, + FIB_SOURCE_CLASSIFY); if (is_add == 0) { diff --git a/src/vnet/ip/ip6.h b/src/vnet/ip/ip6.h index fa922725..8aef53a9 100644 --- a/src/vnet/ip/ip6.h +++ b/src/vnet/ip/ip6.h @@ -103,6 +103,16 @@ typedef struct uword function_opaque; } ip6_add_del_interface_address_callback_t; +typedef void (ip6_table_bind_function_t) + (struct ip6_main_t * im, + uword opaque, u32 sw_if_index, u32 new_fib_index, u32 old_fib_index); + +typedef struct +{ + ip6_table_bind_function_t *function; + uword function_opaque; +} ip6_table_bind_callback_t; + /** * Enumeration of the FIB table instance types */ @@ -183,6 +193,9 @@ typedef struct ip6_main_t ip6_add_del_interface_address_callback_t * add_del_interface_address_callbacks; + /** Functions to call when interface to table biding changes. */ + ip6_table_bind_callback_t *table_bind_callbacks; + /* Template used to generate IP6 neighbor solicitation packets. */ vlib_packet_template_t discover_neighbor_packet_template; diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c index 5832bd0b..1002f6b6 100644 --- a/src/vnet/ip/ip6_forward.c +++ b/src/vnet/ip/ip6_forward.c @@ -2999,8 +2999,10 @@ ip6_lookup_init (vlib_main_t * vm) im->lookup_table_nbuckets, im->lookup_table_size); /* Create FIB with index 0 and table id of 0. */ - fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, 0); - mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, 0); + fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, 0, + FIB_SOURCE_DEFAULT_ROUTE); + mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, 0, + MFIB_SOURCE_DEFAULT_ROUTE); { pg_node_t *pn; @@ -3045,103 +3047,6 @@ ip6_lookup_init (vlib_main_t * vm) VLIB_INIT_FUNCTION (ip6_lookup_init); -static clib_error_t * -add_del_ip6_interface_table (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - vnet_main_t *vnm = vnet_get_main (); - ip_interface_address_t *ia; - clib_error_t *error = 0; - u32 sw_if_index, table_id; - - sw_if_index = ~0; - - if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) - { - error = clib_error_return (0, "unknown interface `%U'", - format_unformat_error, input); - goto done; - } - - if (unformat (input, "%d", &table_id)) - ; - else - { - error = clib_error_return (0, "expected table id `%U'", - format_unformat_error, input); - goto done; - } - - /* - * If the interface already has in IP address, then a change int - * VRF is not allowed. The IP address applied must first be removed. - * We do not do that automatically here, since VPP has no knowledge - * of whether thoses subnets are valid in the destination VRF. - */ - /* *INDENT-OFF* */ - foreach_ip_interface_address (&ip6_main.lookup_main, - ia, sw_if_index, - 1 /* honor unnumbered */, - ({ - ip4_address_t * a; - - a = ip_interface_address_get_address (&ip6_main.lookup_main, ia); - error = clib_error_return (0, "interface %U has address %U", - format_vnet_sw_if_index_name, vnm, - sw_if_index, - format_ip6_address, a); - goto done; - })); - /* *INDENT-ON* */ - - { - u32 fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, - table_id); - - vec_validate (ip6_main.fib_index_by_sw_if_index, sw_if_index); - ip6_main.fib_index_by_sw_if_index[sw_if_index] = fib_index; - - fib_index = mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, - table_id); - - vec_validate (ip6_main.mfib_index_by_sw_if_index, sw_if_index); - ip6_main.mfib_index_by_sw_if_index[sw_if_index] = fib_index; - } - - -done: - return error; -} - -/*? - * Place the indicated interface into the supplied IPv6 FIB table (also known - * as a VRF). If the FIB table does not exist, this command creates it. To - * display the current IPv6 FIB table, use the command 'show ip6 fib'. - * FIB table will only be displayed if a route has been added to the table, or - * an IP Address is assigned to an interface in the table (which adds a route - * automatically). - * - * @note IP addresses added after setting the interface IP table are added to - * the indicated FIB table. If an IP address is added prior to changing the - * table then this is an error. The control plane must remove these addresses - * first and then change the table. VPP will not automatically move the - * addresses from the old to the new table as it does not know the validity - * of such a change. - * - * @cliexpar - * Example of how to add an interface to an IPv6 FIB table (where 2 is the table-id): - * @cliexcmd{set interface ip6 table GigabitEthernet2/0/0 2} - ?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (set_interface_ip6_table_command, static) = -{ - .path = "set interface ip6 table", - .function = add_del_ip6_interface_table, - .short_help = "set interface ip6 table " -}; -/* *INDENT-ON* */ - void ip6_link_local_address_from_ethernet_mac_address (ip6_address_t * ip, u8 * mac) diff --git a/src/vnet/ip/ip6_neighbor.c b/src/vnet/ip/ip6_neighbor.c index 62cf23ac..56f33ac8 100644 --- a/src/vnet/ip/ip6_neighbor.c +++ b/src/vnet/ip/ip6_neighbor.c @@ -250,6 +250,26 @@ format_ip6_neighbor_ip6_entry (u8 * s, va_list * va) return s; } +static void +ip6_neighbor_adj_fib_remove (ip6_neighbor_t * n, uint32_t fib_index) +{ + if (FIB_NODE_INDEX_INVALID != n->fib_entry_index) + { + fib_prefix_t pfx = { + .fp_len = 128, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_addr.ip6 = n->key.ip6_address, + }; + fib_table_entry_path_remove (fib_index, + &pfx, + FIB_SOURCE_ADJ, + DPO_PROTO_IP6, + &pfx.fp_addr, + n->key.sw_if_index, ~0, + 1, FIB_ROUTE_PATH_FLAG_NONE); + } +} + static clib_error_t * ip6_neighbor_sw_interface_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags) @@ -273,22 +293,10 @@ ip6_neighbor_sw_interface_up_down (vnet_main_t * vnm, { n = pool_elt_at_index (nm->neighbor_pool, to_delete[i]); mhash_unset (&nm->neighbor_index_by_key, &n->key, 0); - if (FIB_NODE_INDEX_INVALID != n->fib_entry_index) - { - fib_prefix_t pfx = { - .fp_len = 128, - .fp_proto = FIB_PROTOCOL_IP6, - .fp_addr.ip6 = n->key.ip6_address, - }; - fib_table_entry_path_remove - (ip6_fib_table_get_index_for_sw_if_index (n->key.sw_if_index), - &pfx, - FIB_SOURCE_ADJ, - DPO_PROTO_IP6, - &pfx.fp_addr, - n->key.sw_if_index, ~0, 1, FIB_ROUTE_PATH_FLAG_NONE); - pool_put (nm->neighbor_pool, n); - } + ip6_neighbor_adj_fib_remove (n, + ip6_fib_table_get_index_for_sw_if_index + (n->key.sw_if_index)); + pool_put (nm->neighbor_pool, n); } vec_free (to_delete); } @@ -579,6 +587,24 @@ ip6_ethernet_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai) } } + +static void +ip6_neighbor_adj_fib_add (ip6_neighbor_t * n, uint32_t fib_index) +{ + fib_prefix_t pfx = { + .fp_len = 128, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_addr.ip6 = n->key.ip6_address, + }; + + n->fib_entry_index = + fib_table_entry_path_add (fib_index, &pfx, FIB_SOURCE_ADJ, + FIB_ENTRY_FLAG_ATTACHED, + DPO_PROTO_IP6, &pfx.fp_addr, + n->key.sw_if_index, ~0, 1, NULL, + FIB_ROUTE_PATH_FLAG_NONE); +} + int vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm, u32 sw_if_index, @@ -633,21 +659,9 @@ vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm, */ if (!is_no_fib_entry) { - fib_prefix_t pfx = { - .fp_len = 128, - .fp_proto = FIB_PROTOCOL_IP6, - .fp_addr.ip6 = k.ip6_address, - }; - u32 fib_index; - - fib_index = - ip6_fib_table_get_index_for_sw_if_index (n->key.sw_if_index); - n->fib_entry_index = - fib_table_entry_path_add (fib_index, &pfx, FIB_SOURCE_ADJ, - FIB_ENTRY_FLAG_ATTACHED, - DPO_PROTO_IP6, &pfx.fp_addr, - n->key.sw_if_index, ~0, 1, NULL, - FIB_ROUTE_PATH_FLAG_NONE); + ip6_neighbor_adj_fib_add (n, + ip6_fib_table_get_index_for_sw_if_index + (n->key.sw_if_index)); } else { @@ -3843,6 +3857,33 @@ ip6_set_neighbor_limit (u32 neighbor_limit) return 0; } +static void +ip6_neighbor_table_bind (ip6_main_t * im, + uword opaque, + u32 sw_if_index, + u32 new_fib_index, u32 old_fib_index) +{ + ip6_neighbor_main_t *nm = &ip6_neighbor_main; + ip6_neighbor_t *n = NULL; + u32 i, *to_re_add = 0; + + /* *INDENT-OFF* */ + pool_foreach (n, nm->neighbor_pool, + ({ + if (n->key.sw_if_index == sw_if_index) + vec_add1 (to_re_add, n - nm->neighbor_pool); + })); + /* *INDENT-ON* */ + + for (i = 0; i < vec_len (to_re_add); i++) + { + n = pool_elt_at_index (nm->neighbor_pool, to_re_add[i]); + ip6_neighbor_adj_fib_remove (n, old_fib_index); + ip6_neighbor_adj_fib_add (n, new_fib_index); + } + vec_free (to_re_add); +} + static clib_error_t * ip6_neighbor_init (vlib_main_t * vm) { @@ -3874,6 +3915,11 @@ ip6_neighbor_init (vlib_main_t * vm) cb.function_opaque = 0; vec_add1 (im->add_del_interface_address_callbacks, cb); + ip6_table_bind_callback_t cbt; + cbt.function = ip6_neighbor_table_bind; + cbt.function_opaque = 0; + vec_add1 (im->table_bind_callbacks, cbt); + mhash_init (&nm->pending_resolutions_by_address, /* value size */ sizeof (uword), /* key size */ sizeof (ip6_address_t)); diff --git a/src/vnet/ip/ip_api.c b/src/vnet/ip/ip_api.c index bba65ab4..384ec3e0 100644 --- a/src/vnet/ip/ip_api.c +++ b/src/vnet/ip/ip_api.c @@ -699,12 +699,58 @@ vl_api_ip_neighbor_add_del_t_handler (vl_api_ip_neighbor_add_del_t * mp, REPLY_MACRO (VL_API_IP_NEIGHBOR_ADD_DEL_REPLY); } +void +ip_table_delete (fib_protocol_t fproto, u32 table_id, u8 is_api) +{ + u32 fib_index, mfib_index; + + /* + * ignore action on the default table - this is always present + * and cannot be added nor deleted from the API + */ + if (0 != table_id) + { + /* + * The API holds only one lock on the table. + * i.e. it can be added many times via the API but needs to be + * deleted only once. + * The FIB index for unicast and multicast is not necessarily the + * same, since internal VPP systesm (like LISP and SR) create + * their own unicast tables. + */ + fib_index = fib_table_find (fproto, table_id); + mfib_index = mfib_table_find (fproto, table_id); + + if (~0 != fib_index) + { + fib_table_unlock (fib_index, fproto, + (is_api ? FIB_SOURCE_API : FIB_SOURCE_CLI)); + } + if (~0 != mfib_index) + { + mfib_table_unlock (mfib_index, fproto, + (is_api ? MFIB_SOURCE_API : MFIB_SOURCE_CLI)); + } + } +} + void vl_api_ip_table_add_del_t_handler (vl_api_ip_table_add_del_t * mp) { vl_api_ip_table_add_del_reply_t *rmp; + fib_protocol_t fproto = (mp->is_ipv6 ? FIB_PROTOCOL_IP6 : FIB_PROTOCOL_IP4); + u32 table_id = ntohl (mp->table_id); int rv = 0; + if (mp->is_add) + { + ip_table_create (fproto, table_id, 1); + } + else + { + ip_table_delete (fproto, table_id, 1); + } + REPLY_MACRO (VL_API_IP_TABLE_ADD_DEL_REPLY); } @@ -866,18 +912,21 @@ add_del_route_check (fib_protocol_t table_proto, u32 next_hop_sw_if_index, dpo_proto_t next_hop_table_proto, u32 next_hop_table_id, - u8 create_missing_tables, u8 is_rpf_id, u32 * fib_index, u32 * next_hop_fib_index) { vnet_main_t *vnm = vnet_get_main (); + /* Temporaray whilst I do the CSIT dance */ + u8 create_missing_tables = 1; + *fib_index = fib_table_find (table_proto, ntohl (table_id)); if (~0 == *fib_index) { if (create_missing_tables) { *fib_index = fib_table_find_or_create_and_lock (table_proto, - ntohl (table_id)); + ntohl (table_id), + FIB_SOURCE_API); } else { @@ -918,12 +967,14 @@ add_del_route_check (fib_protocol_t table_proto, *next_hop_fib_index = mfib_table_find_or_create_and_lock (fib_nh_proto, ntohl - (next_hop_table_id)); + (next_hop_table_id), + MFIB_SOURCE_API); else *next_hop_fib_index = fib_table_find_or_create_and_lock (fib_nh_proto, ntohl - (next_hop_table_id)); + (next_hop_table_id), + FIB_SOURCE_API); } else { @@ -948,8 +999,7 @@ ip4_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp) mp->next_hop_sw_if_index, DPO_PROTO_IP4, mp->next_hop_table_id, - mp->create_vrf_if_needed, 0, - &fib_index, &next_hop_fib_index); + 0, &fib_index, &next_hop_fib_index); if (0 != rv) return (rv); @@ -1008,8 +1058,7 @@ ip6_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp) mp->next_hop_sw_if_index, DPO_PROTO_IP6, mp->next_hop_table_id, - mp->create_vrf_if_needed, 0, - &fib_index, &next_hop_fib_index); + 0, &fib_index, &next_hop_fib_index); if (0 != rv) return (rv); @@ -1074,27 +1123,57 @@ vl_api_ip_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp) REPLY_MACRO (VL_API_IP_ADD_DEL_ROUTE_REPLY); } +void +ip_table_create (fib_protocol_t fproto, u32 table_id, u8 is_api) +{ + u32 fib_index, mfib_index; + + /* + * ignore action on the default table - this is always present + * and cannot be added nor deleted from the API + */ + if (0 != table_id) + { + /* + * The API holds only one lock on the table. + * i.e. it can be added many times via the API but needs to be + * deleted only once. + * The FIB index for unicast and multicast is not necessarily the + * same, since internal VPP systesm (like LISP and SR) create + * their own unicast tables. + */ + fib_index = fib_table_find (fproto, table_id); + mfib_index = mfib_table_find (fproto, table_id); + + if (~0 == fib_index) + { + fib_table_find_or_create_and_lock (fproto, table_id, + (is_api ? + FIB_SOURCE_API : + FIB_SOURCE_CLI)); + } + if (~0 == mfib_index) + { + mfib_table_find_or_create_and_lock (fproto, table_id, + (is_api ? + MFIB_SOURCE_API : + MFIB_SOURCE_CLI)); + } + } +} + static int add_del_mroute_check (fib_protocol_t table_proto, u32 table_id, - u32 next_hop_sw_if_index, - u8 is_local, u8 create_missing_tables, u32 * fib_index) + u32 next_hop_sw_if_index, u8 is_local, u32 * fib_index) { vnet_main_t *vnm = vnet_get_main (); *fib_index = mfib_table_find (table_proto, ntohl (table_id)); if (~0 == *fib_index) { - if (create_missing_tables) - { - *fib_index = mfib_table_find_or_create_and_lock (table_proto, - ntohl (table_id)); - } - else - { - /* No such VRF, and we weren't asked to create one */ - return VNET_API_ERROR_NO_SUCH_FIB; - } + /* No such table */ + return VNET_API_ERROR_NO_SUCH_FIB; } if (~0 != ntohl (next_hop_sw_if_index)) @@ -1163,8 +1242,7 @@ api_mroute_add_del_t_handler (vl_api_ip_mroute_add_del_t * mp) rv = add_del_mroute_check (fproto, mp->table_id, mp->next_hop_sw_if_index, - mp->is_local, - mp->create_vrf_if_needed, &fib_index); + mp->is_local, &fib_index); if (0 != rv) return (rv); diff --git a/src/vnet/ip/lookup.c b/src/vnet/ip/lookup.c index 5537bb04..667c6791 100755 --- a/src/vnet/ip/lookup.c +++ b/src/vnet/ip/lookup.c @@ -687,6 +687,78 @@ done: return error; } +clib_error_t * +vnet_ip_table_cmd (vlib_main_t * vm, + unformat_input_t * main_input, + vlib_cli_command_t * cmd, fib_protocol_t fproto) +{ + unformat_input_t _line_input, *line_input = &_line_input; + clib_error_t *error = NULL; + u32 table_id, is_add; + + is_add = 1; + table_id = ~0; + + /* Get a line of input. */ + if (!unformat_user (main_input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "%d", &table_id)) + ; + else if (unformat (line_input, "del")) + is_add = 0; + else if (unformat (line_input, "add")) + is_add = 1; + else + { + error = unformat_parse_error (line_input); + goto done; + } + } + + if (~0 == table_id) + { + error = clib_error_return (0, "No table id"); + goto done; + } + else if (0 == table_id) + { + error = clib_error_return (0, "Can't change the default table"); + goto done; + } + else + { + if (is_add) + { + ip_table_create (fproto, table_id, 0); + } + else + { + ip_table_delete (fproto, table_id, 0); + } + } + +done: + unformat_free (line_input); + return error; +} + +clib_error_t * +vnet_ip4_table_cmd (vlib_main_t * vm, + unformat_input_t * main_input, vlib_cli_command_t * cmd) +{ + return (vnet_ip_table_cmd (vm, main_input, cmd, FIB_PROTOCOL_IP4)); +} + +clib_error_t * +vnet_ip6_table_cmd (vlib_main_t * vm, + unformat_input_t * main_input, vlib_cli_command_t * cmd) +{ + return (vnet_ip_table_cmd (vm, main_input, cmd, FIB_PROTOCOL_IP6)); +} + /* *INDENT-OFF* */ VLIB_CLI_COMMAND (vlib_cli_ip_command, static) = { .path = "ip", @@ -749,6 +821,159 @@ VLIB_CLI_COMMAND (ip_route_command, static) = { .function = vnet_ip_route_cmd, .is_mp_safe = 1, }; + +/* *INDENT-ON* */ +/*? + * This command is used to add or delete IPv4 Tables. All + * Tables must be explicitly added before that can be used. Creating a + * table will add both unicast and multicast FIBs + * + ?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (ip4_table_command, static) = { + .path = "ip table", + .short_help = "ip table [add|del] ", + .function = vnet_ip4_table_cmd, + .is_mp_safe = 1, +}; +/* *INDENT-ON* */ + +/* *INDENT-ON* */ +/*? + * This command is used to add or delete IPv4 Tables. All + * Tables must be explicitly added before that can be used. Creating a + * table will add both unicast and multicast FIBs + * + ?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (ip6_table_command, static) = { + .path = "ip6 table", + .short_help = "ip6 table [add|del] ", + .function = vnet_ip6_table_cmd, + .is_mp_safe = 1, +}; + +static clib_error_t * +ip_table_bind_cmd (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd, + fib_protocol_t fproto) +{ + vnet_main_t *vnm = vnet_get_main (); + clib_error_t *error = 0; + u32 sw_if_index, table_id; + int rv; + + sw_if_index = ~0; + + if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, input); + goto done; + } + + if (unformat (input, "%d", &table_id)) + ; + else + { + error = clib_error_return (0, "expected table id `%U'", + format_unformat_error, input); + goto done; + } + + rv = ip_table_bind (fproto, sw_if_index, table_id, 0); + + if (VNET_API_ERROR_ADDRESS_FOUND_FOR_INTERFACE == rv) + { + error = clib_error_return (0, "IP addresses are still present on %U", + format_vnet_sw_if_index_name, + vnet_get_main(), + sw_if_index); + } + else if (VNET_API_ERROR_NO_SUCH_FIB == rv) + { + error = clib_error_return (0, "no such table %d", table_id); + } + else if (0 != rv) + { + error = clib_error_return (0, "unknown error"); + } + + done: + return error; +} + +static clib_error_t * +ip4_table_bind_cmd (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + return (ip_table_bind_cmd (vm , input, cmd, FIB_PROTOCOL_IP4)); +} + +static clib_error_t * +ip6_table_bind_cmd (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + return (ip_table_bind_cmd (vm , input, cmd, FIB_PROTOCOL_IP6)); +} + +/*? + * Place the indicated interface into the supplied IPv4 FIB table (also known + * as a VRF). If the FIB table does not exist, this command creates it. To + * display the current IPv4 FIB table, use the command 'show ip fib'. + * FIB table will only be displayed if a route has been added to the table, or + * an IP Address is assigned to an interface in the table (which adds a route + * automatically). + * + * @note IP addresses added after setting the interface IP table are added to + * the indicated FIB table. If an IP address is added prior to changing the + * table then this is an error. The control plane must remove these addresses + * first and then change the table. VPP will not automatically move the + * addresses from the old to the new table as it does not know the validity + * of such a change. + * + * @cliexpar + * Example of how to add an interface to an IPv4 FIB table (where 2 is the table-id): + * @cliexcmd{set interface ip table GigabitEthernet2/0/0 2} + ?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = +{ + .path = "set interface ip table", + .function = ip4_table_bind_cmd, + .short_help = "set interface ip table ", +}; +/* *INDENT-ON* */ + +/*? + * Place the indicated interface into the supplied IPv6 FIB table (also known + * as a VRF). If the FIB table does not exist, this command creates it. To + * display the current IPv6 FIB table, use the command 'show ip6 fib'. + * FIB table will only be displayed if a route has been added to the table, or + * an IP Address is assigned to an interface in the table (which adds a route + * automatically). + * + * @note IP addresses added after setting the interface IP table are added to + * the indicated FIB table. If an IP address is added prior to changing the + * table then this is an error. The control plane must remove these addresses + * first and then change the table. VPP will not automatically move the + * addresses from the old to the new table as it does not know the validity + * of such a change. + * + * @cliexpar + * Example of how to add an interface to an IPv6 FIB table (where 2 is the table-id): + * @cliexcmd{set interface ip6 table GigabitEthernet2/0/0 2} + ?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (set_interface_ip6_table_command, static) = +{ + .path = "set interface ip6 table", + .function = ip6_table_bind_cmd, + .short_help = "set interface ip6 table " +}; /* *INDENT-ON* */ clib_error_t * diff --git a/src/vnet/lisp-gpe/interface.c b/src/vnet/lisp-gpe/interface.c index e832c23f..a0c05e85 100644 --- a/src/vnet/lisp-gpe/interface.c +++ b/src/vnet/lisp-gpe/interface.c @@ -505,12 +505,14 @@ lisp_gpe_iface_set_table (u32 sw_if_index, u32 table_id) { fib_node_index_t fib_index; - fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id); + fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id, + FIB_SOURCE_LISP); vec_validate (ip4_main.fib_index_by_sw_if_index, sw_if_index); ip4_main.fib_index_by_sw_if_index[sw_if_index] = fib_index; ip4_sw_interface_enable_disable (sw_if_index, 1); - fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, table_id); + fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, table_id, + FIB_SOURCE_LISP); vec_validate (ip6_main.fib_index_by_sw_if_index, sw_if_index); ip6_main.fib_index_by_sw_if_index[sw_if_index] = fib_index; ip6_sw_interface_enable_disable (sw_if_index, 1); @@ -530,7 +532,7 @@ lisp_gpe_tenant_del_default_routes (u32 table_id) fib_index = fib_table_find (prefix.fp_proto, table_id); fib_table_entry_special_remove (fib_index, &prefix, FIB_SOURCE_LISP); - fib_table_unlock (fib_index, prefix.fp_proto); + fib_table_unlock (fib_index, prefix.fp_proto, FIB_SOURCE_LISP); } } @@ -549,7 +551,8 @@ lisp_gpe_tenant_add_default_routes (u32 table_id) /* * Add a deafult route that results in a control plane punt DPO */ - fib_index = fib_table_find_or_create_and_lock (prefix.fp_proto, table_id); + fib_index = fib_table_find_or_create_and_lock (prefix.fp_proto, table_id, + FIB_SOURCE_LISP); fib_table_entry_special_dpo_add (fib_index, &prefix, FIB_SOURCE_LISP, FIB_ENTRY_FLAG_EXCLUSIVE, lisp_cp_dpo_get (fib_proto_to_dpo diff --git a/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c b/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c index d7d3cb86..0a8dc039 100644 --- a/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c +++ b/src/vnet/lisp-gpe/lisp_gpe_fwd_entry.c @@ -66,6 +66,7 @@ ip_dst_fib_add_route (u32 dst_fib_index, const ip_prefix_t * dst_prefix) /* create a new src FIB. */ src_fib_index = fib_table_create_and_lock (dst_fib_prefix.fp_proto, + FIB_SOURCE_LISP, "LISP-src for [%d,%U]", dst_fib_index, format_fib_prefix, &dst_fib_prefix); @@ -180,7 +181,8 @@ ip_src_dst_fib_del_route (u32 src_fib_index, */ fib_table_entry_special_remove (dst_fib_index, &dst_fib_prefix, FIB_SOURCE_LISP); - fib_table_unlock (src_fib_index, src_fib_prefix.fp_proto); + fib_table_unlock (src_fib_index, src_fib_prefix.fp_proto, + FIB_SOURCE_LISP); } } @@ -544,7 +546,8 @@ add_ip_fwd_entry (lisp_gpe_main_t * lgm, lfe->tenant = lisp_gpe_tenant_find_or_create (lfe->key->vni); lfe->eid_table_id = a->table_id; lfe->eid_fib_index = fib_table_find_or_create_and_lock (fproto, - lfe->eid_table_id); + lfe->eid_table_id, + FIB_SOURCE_LISP); lfe->is_src_dst = a->is_src_dst; if (LISP_GPE_FWD_ENTRY_TYPE_NEGATIVE != lfe->type) @@ -578,7 +581,7 @@ del_ip_fwd_entry_i (lisp_gpe_main_t * lgm, lisp_gpe_fwd_entry_t * lfe) fproto = (IP4 == ip_prefix_version (&fid_addr_ippref (&lfe->key->rmt)) ? FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6); - fib_table_unlock (lfe->eid_fib_index, fproto); + fib_table_unlock (lfe->eid_fib_index, fproto, FIB_SOURCE_LISP); hash_unset_mem (lgm->lisp_gpe_fwd_entries, lfe->key); clib_mem_free (lfe->key); diff --git a/src/vnet/lisp-gpe/lisp_gpe_sub_interface.c b/src/vnet/lisp-gpe/lisp_gpe_sub_interface.c index b234d9dc..26664f53 100644 --- a/src/vnet/lisp-gpe/lisp_gpe_sub_interface.c +++ b/src/vnet/lisp-gpe/lisp_gpe_sub_interface.c @@ -89,13 +89,15 @@ lisp_gpe_sub_interface_set_table (u32 sw_if_index, u32 table_id) { fib_node_index_t fib_index; - fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id); + fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id, + FIB_SOURCE_LISP); ASSERT (FIB_NODE_INDEX_INVALID != fib_index); vec_validate (ip4_main.fib_index_by_sw_if_index, sw_if_index); ip4_main.fib_index_by_sw_if_index[sw_if_index] = fib_index; - fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, table_id); + fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, table_id, + FIB_SOURCE_LISP); ASSERT (FIB_NODE_INDEX_INVALID != fib_index); vec_validate (ip6_main.fib_index_by_sw_if_index, sw_if_index); @@ -105,9 +107,13 @@ lisp_gpe_sub_interface_set_table (u32 sw_if_index, u32 table_id) static void lisp_gpe_sub_interface_unset_table (u32 sw_if_index, u32 table_id) { + fib_table_unlock (ip4_main.fib_index_by_sw_if_index[sw_if_index], + FIB_PROTOCOL_IP4, FIB_SOURCE_LISP); ip4_main.fib_index_by_sw_if_index[sw_if_index] = 0; ip4_sw_interface_enable_disable (sw_if_index, 0); + fib_table_unlock (ip6_main.fib_index_by_sw_if_index[sw_if_index], + FIB_PROTOCOL_IP6, FIB_SOURCE_LISP); ip6_main.fib_index_by_sw_if_index[sw_if_index] = 0; ip6_sw_interface_enable_disable (sw_if_index, 0); } @@ -185,6 +191,7 @@ lisp_gpe_sub_interface_unlock (index_t l3si) l3s = lisp_gpe_sub_interface_get_i (l3si); + ASSERT (0 != l3s->locks); l3s->locks--; if (0 == l3s->locks) diff --git a/src/vnet/mfib/ip4_mfib.c b/src/vnet/mfib/ip4_mfib.c index 1849a3a4..b2482580 100644 --- a/src/vnet/mfib/ip4_mfib.c +++ b/src/vnet/mfib/ip4_mfib.c @@ -33,7 +33,8 @@ static const mfib_prefix_t ip4_specials[] = { }; static u32 -ip4_create_mfib_with_table_id (u32 table_id) +ip4_create_mfib_with_table_id (u32 table_id, + mfib_source_t src) { mfib_table_t *mfib_table; @@ -53,7 +54,7 @@ ip4_create_mfib_with_table_id (u32 table_id) mfib_table->v4.table_id = table_id; - mfib_table_lock(mfib_table->mft_index, FIB_PROTOCOL_IP4); + mfib_table_lock(mfib_table->mft_index, FIB_PROTOCOL_IP4, src); /* * add the special entries into the new FIB @@ -113,14 +114,15 @@ ip4_mfib_table_destroy (ip4_mfib_t *mfib) } u32 -ip4_mfib_table_find_or_create_and_lock (u32 table_id) +ip4_mfib_table_find_or_create_and_lock (u32 table_id, + mfib_source_t src) { u32 index; index = ip4_mfib_index_from_table_id(table_id); if (~0 == index) - return ip4_create_mfib_with_table_id(table_id); - mfib_table_lock(index, FIB_PROTOCOL_IP4); + return ip4_create_mfib_with_table_id(table_id, src); + mfib_table_lock(index, FIB_PROTOCOL_IP4, src); return (index); } diff --git a/src/vnet/mfib/ip4_mfib.h b/src/vnet/mfib/ip4_mfib.h index ea682651..e31fb744 100644 --- a/src/vnet/mfib/ip4_mfib.h +++ b/src/vnet/mfib/ip4_mfib.h @@ -72,8 +72,9 @@ ip4_mfib_get (u32 index) * @returns A pointer to the retrieved or created fib. * */ -extern u32 ip4_mfib_table_find_or_create_and_lock(u32 table_id); -extern u32 ip4_mfib_table_create_and_lock(void); +extern u32 ip4_mfib_table_find_or_create_and_lock(u32 table_id, + mfib_source_t src); +extern u32 ip4_mfib_table_create_and_lock(mfib_source_t src); static inline u32 ip4_mfib_index_from_table_id (u32 table_id) diff --git a/src/vnet/mfib/ip6_mfib.c b/src/vnet/mfib/ip6_mfib.c index 5e48e919..e4861330 100644 --- a/src/vnet/mfib/ip6_mfib.c +++ b/src/vnet/mfib/ip6_mfib.c @@ -151,7 +151,8 @@ static const ip6_mfib_special_t ip6_mfib_specials[] = static u32 -ip6_create_mfib_with_table_id (u32 table_id) +ip6_create_mfib_with_table_id (u32 table_id, + mfib_source_t src) { mfib_table_t *mfib_table; mfib_prefix_t pfx = { @@ -182,7 +183,7 @@ ip6_create_mfib_with_table_id (u32 table_id) mfib_table->v6.table_id = table_id; - mfib_table_lock(mfib_table->mft_index, FIB_PROTOCOL_IP6); + mfib_table_lock(mfib_table->mft_index, FIB_PROTOCOL_IP6, src); mfib_table->v6.rhead = clib_mem_alloc_aligned (sizeof(*mfib_table->v6.rhead), @@ -297,14 +298,15 @@ ip6_mfib_interface_enable_disable (u32 sw_if_index, int is_enable) } u32 -ip6_mfib_table_find_or_create_and_lock (u32 table_id) +ip6_mfib_table_find_or_create_and_lock (u32 table_id, + mfib_source_t src) { u32 index; index = ip6_mfib_index_from_table_id(table_id); if (~0 == index) - return ip6_create_mfib_with_table_id(table_id); - mfib_table_lock(index, FIB_PROTOCOL_IP6); + return ip6_create_mfib_with_table_id(table_id, src); + mfib_table_lock(index, FIB_PROTOCOL_IP6, src); return (index); } diff --git a/src/vnet/mfib/ip6_mfib.h b/src/vnet/mfib/ip6_mfib.h index adaa7ec2..ea81b553 100644 --- a/src/vnet/mfib/ip6_mfib.h +++ b/src/vnet/mfib/ip6_mfib.h @@ -79,8 +79,9 @@ ip6_mfib_get (u32 index) * @returns A pointer to the retrieved or created fib. * */ -extern u32 ip6_mfib_table_find_or_create_and_lock(u32 table_id); -extern u32 ip6_mfib_table_create_and_lock(void); +extern u32 ip6_mfib_table_find_or_create_and_lock(u32 table_id, + mfib_source_t src); +extern u32 ip6_mfib_table_create_and_lock(mfib_source_t src); static inline diff --git a/src/vnet/mfib/mfib_entry.c b/src/vnet/mfib/mfib_entry.c index 804e10ab..2302b9a1 100644 --- a/src/vnet/mfib/mfib_entry.c +++ b/src/vnet/mfib/mfib_entry.c @@ -334,6 +334,17 @@ mfib_entry_get_best_src (const mfib_entry_t *mfib_entry) return (bsrc); } +int +mfib_entry_is_sourced (fib_node_index_t mfib_entry_index, + mfib_source_t source) +{ + mfib_entry_t *mfib_entry; + + mfib_entry = mfib_entry_get(mfib_entry_index); + + return (NULL != mfib_entry_src_find(mfib_entry, source, NULL)); +} + static void mfib_entry_src_flush (mfib_entry_src_t *msrc) { diff --git a/src/vnet/mfib/mfib_entry.h b/src/vnet/mfib/mfib_entry.h index d4377878..96ee49f7 100644 --- a/src/vnet/mfib/mfib_entry.h +++ b/src/vnet/mfib/mfib_entry.h @@ -130,6 +130,8 @@ extern void mfib_entry_unlock(fib_node_index_t fib_entry_index); extern void mfib_entry_get_prefix(fib_node_index_t fib_entry_index, mfib_prefix_t *pfx); extern u32 mfib_entry_get_fib_index(fib_node_index_t fib_entry_index); +extern int mfib_entry_is_sourced(fib_node_index_t fib_entry_index, + mfib_source_t source); extern void mfib_entry_contribute_forwarding( fib_node_index_t mfib_entry_index, diff --git a/src/vnet/mfib/mfib_table.c b/src/vnet/mfib/mfib_table.c index 7ffe8941..e5550adc 100644 --- a/src/vnet/mfib/mfib_table.c +++ b/src/vnet/mfib/mfib_table.c @@ -424,7 +424,8 @@ mfib_table_find (fib_protocol_t proto, u32 mfib_table_find_or_create_and_lock (fib_protocol_t proto, - u32 table_id) + u32 table_id, + mfib_source_t src) { mfib_table_t *mfib_table; fib_node_index_t fi; @@ -432,10 +433,10 @@ mfib_table_find_or_create_and_lock (fib_protocol_t proto, switch (proto) { case FIB_PROTOCOL_IP4: - fi = ip4_mfib_table_find_or_create_and_lock(table_id); + fi = ip4_mfib_table_find_or_create_and_lock(table_id, src); break; case FIB_PROTOCOL_IP6: - fi = ip6_mfib_table_find_or_create_and_lock(table_id); + fi = ip6_mfib_table_find_or_create_and_lock(table_id, src); break; case FIB_PROTOCOL_MPLS: default: @@ -451,6 +452,59 @@ mfib_table_find_or_create_and_lock (fib_protocol_t proto, return (fi); } +/** + * @brief Table flush context. Store the indicies of matching FIB entries + * that need to be removed. + */ +typedef struct mfib_table_flush_ctx_t_ +{ + /** + * The list of entries to flush + */ + fib_node_index_t *mftf_entries; + + /** + * The source we are flushing + */ + mfib_source_t mftf_source; +} mfib_table_flush_ctx_t; + +static int +mfib_table_flush_cb (fib_node_index_t mfib_entry_index, + void *arg) +{ + mfib_table_flush_ctx_t *ctx = arg; + + if (mfib_entry_is_sourced(mfib_entry_index, ctx->mftf_source)) + { + vec_add1(ctx->mftf_entries, mfib_entry_index); + } + return (1); +} + +void +mfib_table_flush (u32 mfib_index, + fib_protocol_t proto, + mfib_source_t source) +{ + fib_node_index_t *mfib_entry_index; + mfib_table_flush_ctx_t ctx = { + .mftf_entries = NULL, + .mftf_source = source, + }; + + mfib_table_walk(mfib_index, proto, + mfib_table_flush_cb, + &ctx); + + vec_foreach(mfib_entry_index, ctx.mftf_entries) + { + mfib_table_entry_delete_index(*mfib_entry_index, source); + } + + vec_free(ctx.mftf_entries); +} + static void mfib_table_destroy (mfib_table_t *mfib_table) { @@ -472,27 +526,43 @@ mfib_table_destroy (mfib_table_t *mfib_table) void mfib_table_unlock (u32 fib_index, - fib_protocol_t proto) + fib_protocol_t proto, + mfib_source_t source) { mfib_table_t *mfib_table; mfib_table = mfib_table_get(fib_index, proto); - mfib_table->mft_locks--; + mfib_table->mft_locks[source]--; + mfib_table->mft_locks[MFIB_TABLE_TOTAL_LOCKS]--; + + if (0 == mfib_table->mft_locks[source]) + { + /* + * The source no longer needs the table. flush any routes + * from it just in case + */ + mfib_table_flush(fib_index, proto, source); + } - if (0 == mfib_table->mft_locks) + if (0 == mfib_table->mft_locks[MFIB_TABLE_TOTAL_LOCKS]) { - mfib_table_destroy(mfib_table); + /* + * no more locak from any source - kill it + */ + mfib_table_destroy(mfib_table); } } void mfib_table_lock (u32 fib_index, - fib_protocol_t proto) + fib_protocol_t proto, + mfib_source_t source) { mfib_table_t *mfib_table; mfib_table = mfib_table_get(fib_index, proto); - mfib_table->mft_locks++; + mfib_table->mft_locks[source]++; + mfib_table->mft_locks[MFIB_TABLE_TOTAL_LOCKS]++; } void diff --git a/src/vnet/mfib/mfib_table.h b/src/vnet/mfib/mfib_table.h index 83aa04ef..c6b0b097 100644 --- a/src/vnet/mfib/mfib_table.h +++ b/src/vnet/mfib/mfib_table.h @@ -22,6 +22,12 @@ #include +/** + * Keep a lock per-source and a total + */ +#define MFIB_TABLE_N_LOCKS (MFIB_N_SOURCES+1) +#define MFIB_TABLE_TOTAL_LOCKS MFIB_N_SOURCES + /** * @brief * A protocol Independent IP multicast FIB table @@ -47,7 +53,7 @@ typedef struct mfib_table_t_ /** * number of locks on the table */ - u16 mft_locks; + u16 mft_locks[MFIB_TABLE_N_LOCKS]; /** * Table ID (hash key) for this FIB. @@ -259,7 +265,8 @@ extern fib_node_index_t mfib_table_entry_special_add(u32 fib_index, * the source to flush */ extern void mfib_table_flush(u32 fib_index, - fib_protocol_t proto); + fib_protocol_t proto, + mfib_source_t source); /** * @brief @@ -307,9 +314,13 @@ extern u32 mfib_table_find(fib_protocol_t proto, u32 table_id); * * @return fib_index * The index of the FIB + * + * @param source + * The ID of the client/source. */ extern u32 mfib_table_find_or_create_and_lock(fib_protocol_t proto, - u32 table_id); + u32 table_id, + mfib_source_t source); /** @@ -321,9 +332,13 @@ extern u32 mfib_table_find_or_create_and_lock(fib_protocol_t proto, * * @paran proto * The protocol of the FIB (and thus the entries therein) + * + * @param source + * The ID of the client/source. */ extern void mfib_table_unlock(u32 fib_index, - fib_protocol_t proto); + fib_protocol_t proto, + mfib_source_t source); /** * @brief @@ -335,9 +350,13 @@ extern void mfib_table_unlock(u32 fib_index, * * @paran proto * The protocol of the FIB (and thus the entries therein) + * + * @param source + * The ID of the client/source. */ extern void mfib_table_lock(u32 fib_index, - fib_protocol_t proto); + fib_protocol_t proto, + mfib_source_t source); /** * @brief diff --git a/src/vnet/mfib/mfib_test.c b/src/vnet/mfib/mfib_test.c index 57787eca..3055844d 100644 --- a/src/vnet/mfib/mfib_test.c +++ b/src/vnet/mfib/mfib_test.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -366,7 +367,7 @@ mfib_test_i (fib_protocol_t PROTO, MFIB_TEST(3 == adj_mcast_db_size(), "3 MCAST adjs"); /* Find or create FIB table 11 */ - fib_index = mfib_table_find_or_create_and_lock(PROTO, 11); + fib_index = mfib_table_find_or_create_and_lock(PROTO, 11, MFIB_SOURCE_API); mfib_prefix_t pfx_dft = { .fp_len = 0, @@ -1113,9 +1114,10 @@ mfib_test_i (fib_protocol_t PROTO, /* * MPLS enable an interface so we get the MPLS table created */ + mpls_table_create(MPLS_FIB_DEFAULT_TABLE_ID, FIB_SOURCE_API); mpls_sw_interface_enable_disable(&mpls_main, tm->hw[0]->sw_if_index, - 1); + 1, 0); lfei = fib_table_entry_update_one_path(0, // default MPLS Table &pfx_3500, @@ -1192,7 +1194,7 @@ mfib_test_i (fib_protocol_t PROTO, /* * Unlock the table - it's the last lock so should be gone thereafter */ - mfib_table_unlock(fib_index, PROTO); + mfib_table_unlock(fib_index, PROTO, MFIB_SOURCE_API); MFIB_TEST((FIB_NODE_INDEX_INVALID == mfib_table_find(PROTO, fib_index)), @@ -1207,7 +1209,8 @@ mfib_test_i (fib_protocol_t PROTO, */ mpls_sw_interface_enable_disable(&mpls_main, tm->hw[0]->sw_if_index, - 0); + 0, 0); + mpls_table_delete(MPLS_FIB_DEFAULT_TABLE_ID, FIB_SOURCE_API); /* * test we've leaked no resources diff --git a/src/vnet/mfib/mfib_types.h b/src/vnet/mfib/mfib_types.h index 863fad16..50aede04 100644 --- a/src/vnet/mfib/mfib_types.h +++ b/src/vnet/mfib/mfib_types.h @@ -166,9 +166,10 @@ typedef enum mfib_source_t_ MFIB_SOURCE_VXLAN, MFIB_SOURCE_DHCP, MFIB_SOURCE_SRv6, - MFIB_SOURCE_DEFAULT_ROUTE, MFIB_SOURCE_GTPU, MFIB_SOURCE_VXLAN_GPE, + MFIB_SOURCE_RR, + MFIB_SOURCE_DEFAULT_ROUTE, } mfib_source_t; #define MFIB_SOURCE_NAMES { \ @@ -178,11 +179,14 @@ typedef enum mfib_source_t_ [MFIB_SOURCE_DHCP] = "DHCP", \ [MFIB_SOURCE_VXLAN] = "VXLAN", \ [MFIB_SOURCE_SRv6] = "SRv6", \ - [MFIB_SOURCE_DEFAULT_ROUTE] = "Default Route", \ [MFIB_SOURCE_GTPU] = "GTPU", \ [MFIB_SOURCE_VXLAN_GPE] = "VXLAN-GPE", \ + [MFIB_SOURCE_RR] = "Recursive-resolution", \ + [MFIB_SOURCE_DEFAULT_ROUTE] = "Default Route", \ } +#define MFIB_N_SOURCES (MFIB_SOURCE_DEFAULT_ROUTE) + /** * \brief Compare two prefixes for equality */ diff --git a/src/vnet/mpls/interface.c b/src/vnet/mpls/interface.c index a085aaa2..d7c8e7d3 100644 --- a/src/vnet/mpls/interface.c +++ b/src/vnet/mpls/interface.c @@ -35,25 +35,33 @@ mpls_sw_interface_is_enabled (u32 sw_if_index) return (mm->mpls_enabled_by_sw_if_index[sw_if_index]); } -void +int mpls_sw_interface_enable_disable (mpls_main_t * mm, u32 sw_if_index, - u8 is_enable) + u8 is_enable, + u8 is_api) { fib_node_index_t lfib_index; vec_validate_init_empty (mm->mpls_enabled_by_sw_if_index, sw_if_index, 0); + lfib_index = fib_table_find(FIB_PROTOCOL_MPLS, + MPLS_FIB_DEFAULT_TABLE_ID); + + if (~0 == lfib_index) + return VNET_API_ERROR_NO_SUCH_FIB; + /* * enable/disable only on the 1<->0 transition */ if (is_enable) { if (1 != ++mm->mpls_enabled_by_sw_if_index[sw_if_index]) - return; + return (0); + + fib_table_lock(lfib_index, FIB_PROTOCOL_MPLS, + (is_api? FIB_SOURCE_API: FIB_SOURCE_CLI)); - lfib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_MPLS, - MPLS_FIB_DEFAULT_TABLE_ID); vec_validate(mm->fib_index_by_sw_if_index, 0); mm->fib_index_by_sw_if_index[sw_if_index] = lfib_index; } @@ -61,15 +69,17 @@ mpls_sw_interface_enable_disable (mpls_main_t * mm, { ASSERT(mm->mpls_enabled_by_sw_if_index[sw_if_index] > 0); if (0 != --mm->mpls_enabled_by_sw_if_index[sw_if_index]) - return; + return (0); fib_table_unlock(mm->fib_index_by_sw_if_index[sw_if_index], - FIB_PROTOCOL_MPLS); + FIB_PROTOCOL_MPLS, + (is_api? FIB_SOURCE_API: FIB_SOURCE_CLI)); } vnet_feature_enable_disable ("mpls-input", "mpls-not-enabled", sw_if_index, !is_enable, 0, 0); + return (0); } static clib_error_t * @@ -101,7 +111,7 @@ mpls_interface_enable_disable (vlib_main_t * vm, goto done; } - mpls_sw_interface_enable_disable(&mpls_main, sw_if_index, enable); + mpls_sw_interface_enable_disable(&mpls_main, sw_if_index, enable, 0); done: return error; diff --git a/src/vnet/mpls/mpls.c b/src/vnet/mpls/mpls.c index 5021ac23..7bdfd8c7 100644 --- a/src/vnet/mpls/mpls.c +++ b/src/vnet/mpls/mpls.c @@ -536,6 +536,78 @@ VLIB_CLI_COMMAND (mpls_local_label_command, static) = { .short_help = "Create/Delete MPL local labels", }; +clib_error_t * +vnet_mpls_table_cmd (vlib_main_t * vm, + unformat_input_t * main_input, + vlib_cli_command_t * cmdo) +{ + unformat_input_t _line_input, *line_input = &_line_input; + clib_error_t *error = NULL; + u32 table_id, is_add; + + is_add = 1; + table_id = ~0; + + /* Get a line of input. */ + if (!unformat_user (main_input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "%d", &table_id)) + ; + else if (unformat (line_input, "del")) + is_add = 0; + else if (unformat (line_input, "add")) + is_add = 1; + else + { + error = unformat_parse_error (line_input); + goto done; + } + } + + if (~0 == table_id) + { + error = clib_error_return (0, "No table id"); + goto done; + } + else if (0 == table_id) + { + error = clib_error_return (0, "Can't change the default table"); + goto done; + } + else + { + if (is_add) + { + mpls_table_create (table_id, 0); + } + else + { + mpls_table_delete (table_id, 0); + } + } + + done: + unformat_free (line_input); + return error; +} + +/* *INDENT-ON* */ +/*? + * This command is used to add or delete MPLS Tables. All + * Tables must be explicitly added before that can be used, + * Including the default table. + ?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (ip6_table_command, static) = { + .path = "mpla table", + .short_help = "mpls table [add|del] ", + .function = vnet_mpls_table_cmd, + .is_mp_safe = 1, +}; + int mpls_fib_reset_labels (u32 fib_id) { @@ -546,12 +618,8 @@ mpls_fib_reset_labels (u32 fib_id) static clib_error_t * mpls_init (vlib_main_t * vm) { - mpls_main_t * mm = &mpls_main; clib_error_t * error; - mm->vlib_main = vm; - mm->vnet_main = vnet_get_main(); - if ((error = vlib_call_init_function (vm, ip_main_init))) return error; diff --git a/src/vnet/mpls/mpls.h b/src/vnet/mpls/mpls.h index b0125e60..31cb1746 100644 --- a/src/vnet/mpls/mpls.h +++ b/src/vnet/mpls/mpls.h @@ -56,10 +56,6 @@ typedef struct { /* IP4 enabled count by software interface */ u8 * mpls_enabled_by_sw_if_index; - - /* convenience */ - vlib_main_t * vlib_main; - vnet_main_t * vnet_main; } mpls_main_t; extern mpls_main_t mpls_main; @@ -77,8 +73,6 @@ extern vlib_node_registration_t mpls_midchain_node; /* Parse mpls protocol as 0xXXXX or protocol name. In either host or network byte order. */ -unformat_function_t unformat_mpls_protocol_host_byte_order; -unformat_function_t unformat_mpls_protocol_net_byte_order; unformat_function_t unformat_mpls_label_net_byte_order; unformat_function_t unformat_mpls_unicast_label; @@ -86,9 +80,10 @@ unformat_function_t unformat_mpls_unicast_label; unformat_function_t unformat_mpls_header; unformat_function_t unformat_pg_mpls_header; -void mpls_sw_interface_enable_disable (mpls_main_t * mm, - u32 sw_if_index, - u8 is_enable); +int mpls_sw_interface_enable_disable (mpls_main_t * mm, + u32 sw_if_index, + u8 is_enable, + u8 is_api); u8 mpls_sw_interface_is_enabled (u32 sw_if_index); @@ -103,4 +98,7 @@ mpls_fib_index_cmp(void * a1, void * a2); int mpls_label_cmp(void * a1, void * a2); +void mpls_table_create(uint32_t table_id, u8 is_api); +void mpls_table_delete(uint32_t table_id, u8 is_api); + #endif /* included_vnet_mpls_h */ diff --git a/src/vnet/mpls/mpls_api.c b/src/vnet/mpls/mpls_api.c index a44b1a25..38f5b014 100644 --- a/src/vnet/mpls/mpls_api.c +++ b/src/vnet/mpls/mpls_api.c @@ -58,6 +58,29 @@ _(MPLS_FIB_DUMP, mpls_fib_dump) extern void stats_dslock_with_hint (int hint, int tag); extern void stats_dsunlock (void); +void +mpls_table_delete (u32 table_id, u8 is_api) +{ + u32 fib_index; + + /* + * The MPLS defult table must also be explicitly created via the API. + * So in contrast to IP, it gets no special treatment here. + * + * The API holds only one lock on the table. + * i.e. it can be added many times via the API but needs to be + * deleted only once. + */ + fib_index = fib_table_find (FIB_PROTOCOL_MPLS, table_id); + + if (~0 != fib_index) + { + fib_table_unlock (fib_index, + FIB_PROTOCOL_MPLS, + (is_api ? FIB_SOURCE_API : FIB_SOURCE_CLI)); + } +} + void vl_api_mpls_table_add_del_t_handler (vl_api_mpls_table_add_del_t * mp) { @@ -68,6 +91,13 @@ vl_api_mpls_table_add_del_t_handler (vl_api_mpls_table_add_del_t * mp) vnm = vnet_get_main (); vnm->api_errno = 0; + if (mp->mt_is_add) + mpls_table_create (ntohl (mp->mt_table_id), 1); + else + mpls_table_delete (ntohl (mp->mt_table_id), 1); + + rv = (rv == 0) ? vnm->api_errno : rv; + REPLY_MACRO (VL_API_MPLS_TABLE_ADD_DEL_REPLY); } @@ -82,14 +112,7 @@ mpls_ip_bind_unbind_handler (vnet_main_t * vnm, if (~0 == mpls_fib_index) { - if (mp->mb_create_table_if_needed) - { - mpls_fib_index = - fib_table_find_or_create_and_lock (FIB_PROTOCOL_MPLS, - ntohl (mp->mb_mpls_table_id)); - } - else - return VNET_API_ERROR_NO_SUCH_FIB; + return VNET_API_ERROR_NO_SUCH_FIB; } ip_fib_index = fib_table_find ((mp->mb_is_ip4 ? @@ -170,7 +193,6 @@ mpls_route_add_del_t_handler (vnet_main_t * vnm, mp->mr_next_hop_sw_if_index, pfx.fp_payload_proto, mp->mr_next_hop_table_id, - mp->mr_create_table_if_needed, mp->mr_is_rpf_id, &fib_index, &next_hop_fib_index); @@ -235,6 +257,32 @@ vl_api_mpls_route_add_del_t_handler (vl_api_mpls_route_add_del_t * mp) REPLY_MACRO (VL_API_MPLS_ROUTE_ADD_DEL_REPLY); } +void +mpls_table_create (u32 table_id, u8 is_api) +{ + u32 fib_index; + + /* + * The MPLS defult table must also be explicitly created via the API. + * So in contrast to IP, it gets no special treatment here. + */ + + /* + * The API holds only one lock on the table. + * i.e. it can be added many times via the API but needs to be + * deleted only once. + */ + fib_index = fib_table_find (FIB_PROTOCOL_MPLS, table_id); + + if (~0 == fib_index) + { + fib_table_find_or_create_and_lock (FIB_PROTOCOL_MPLS, + table_id, + (is_api ? + FIB_SOURCE_API : FIB_SOURCE_CLI)); + } +} + static void vl_api_mpls_tunnel_add_del_t_handler (vl_api_mpls_tunnel_add_del_t * mp) { diff --git a/src/vnet/srv6/sr_policy_rewrite.c b/src/vnet/srv6/sr_policy_rewrite.c index f427bbf3..2f90993a 100755 --- a/src/vnet/srv6/sr_policy_rewrite.c +++ b/src/vnet/srv6/sr_policy_rewrite.c @@ -595,8 +595,10 @@ sr_policy_add (ip6_address_t * bsid, ip6_address_t * segments, if (sm->fib_table_ip6 == (u32) ~ 0) { sm->fib_table_ip6 = fib_table_create_and_lock (FIB_PROTOCOL_IP6, + FIB_SOURCE_SR, "SRv6 steering of IP6 prefixes through BSIDs"); sm->fib_table_ip4 = fib_table_create_and_lock (FIB_PROTOCOL_IP6, + FIB_SOURCE_SR, "SRv6 steering of IP4 prefixes through BSIDs"); } @@ -684,8 +686,8 @@ sr_policy_del (ip6_address_t * bsid, u32 index) /* If FIB empty unlock it */ if (!pool_elts (sm->sr_policies) && !pool_elts (sm->steer_policies)) { - fib_table_unlock (sm->fib_table_ip6, FIB_PROTOCOL_IP6); - fib_table_unlock (sm->fib_table_ip4, FIB_PROTOCOL_IP6); + fib_table_unlock (sm->fib_table_ip6, FIB_PROTOCOL_IP6, FIB_SOURCE_SR); + fib_table_unlock (sm->fib_table_ip4, FIB_PROTOCOL_IP6, FIB_SOURCE_SR); sm->fib_table_ip6 = (u32) ~ 0; sm->fib_table_ip4 = (u32) ~ 0; } diff --git a/src/vnet/srv6/sr_steering.c b/src/vnet/srv6/sr_steering.c index 57fe21f6..cf4e81ab 100755 --- a/src/vnet/srv6/sr_steering.c +++ b/src/vnet/srv6/sr_steering.c @@ -159,8 +159,10 @@ sr_steering_policy (int is_del, ip6_address_t * bsid, u32 sr_policy_index, /* If no more SR policies or steering policies */ if (!pool_elts (sm->sr_policies) && !pool_elts (sm->steer_policies)) { - fib_table_unlock (sm->fib_table_ip6, FIB_PROTOCOL_IP6); - fib_table_unlock (sm->fib_table_ip4, FIB_PROTOCOL_IP6); + fib_table_unlock (sm->fib_table_ip6, + FIB_PROTOCOL_IP6, FIB_SOURCE_SR); + fib_table_unlock (sm->fib_table_ip4, + FIB_PROTOCOL_IP6, FIB_SOURCE_SR); sm->fib_table_ip6 = (u32) ~ 0; sm->fib_table_ip4 = (u32) ~ 0; } diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c index f9c3129c..044ddb5b 100644 --- a/src/vpp/api/api.c +++ b/src/vpp/api/api.c @@ -699,8 +699,9 @@ static void VALIDATE_SW_IF_INDEX (mp); - mpls_sw_interface_enable_disable (&mpls_main, - ntohl (mp->sw_if_index), mp->enable); + rv = mpls_sw_interface_enable_disable (&mpls_main, + ntohl (mp->sw_if_index), + mp->enable, 1); BAD_SW_IF_INDEX_LABEL; REPLY_MACRO (VL_API_SW_INTERFACE_SET_MPLS_ENABLE_REPLY); diff --git a/src/vpp/api/custom_dump.c b/src/vpp/api/custom_dump.c index 1353fe28..be74b83a 100644 --- a/src/vpp/api/custom_dump.c +++ b/src/vpp/api/custom_dump.c @@ -560,9 +560,6 @@ static void *vl_api_ip_add_del_route_t_print if (mp->table_id != 0) s = format (s, "vrf %d ", ntohl (mp->table_id)); - if (mp->create_vrf_if_needed) - s = format (s, "create-vrf "); - if (mp->next_hop_weight != 1) s = format (s, "weight %d ", mp->next_hop_weight); diff --git a/test/test_dhcp.py b/test/test_dhcp.py index 6fc29182..fe97f6c9 100644 --- a/test/test_dhcp.py +++ b/test/test_dhcp.py @@ -6,7 +6,7 @@ import struct from framework import VppTestCase, VppTestRunner from vpp_neighbor import VppNeighbor -from vpp_ip_route import find_route +from vpp_ip_route import find_route, VppIpTable from util import mk_ll_addr from scapy.layers.l2 import Ether, getmacbyip, ARP @@ -34,9 +34,19 @@ class TestDHCP(VppTestCase): # create 3 pg interfaces self.create_pg_interfaces(range(4)) + self.tables = [] # pg0 and 1 are IP configured in VRF 0 and 1. # pg2 and 3 are non IP-configured in VRF 0 and 1 + table_id = 0 + for table_id in range(1, 4): + tbl4 = VppIpTable(self, table_id) + tbl4.add_vpp_config() + self.tables.append(tbl4) + tbl6 = VppIpTable(self, table_id, is_ip6=1) + tbl6.add_vpp_config() + self.tables.append(tbl6) + table_id = 0 for i in self.pg_interfaces[:2]: i.admin_up() @@ -56,11 +66,15 @@ class TestDHCP(VppTestCase): table_id += 1 def tearDown(self): - super(TestDHCP, self).tearDown() - for i in self.pg_interfaces: + for i in self.pg_interfaces[:2]: i.unconfig_ip4() i.unconfig_ip6() + + for i in self.pg_interfaces: + i.set_table_ip4(0) + i.set_table_ip6(0) i.admin_down() + super(TestDHCP, self).tearDown() def send_and_assert_no_replies(self, intf, pkts, remark): intf.add_stream(pkts) @@ -667,6 +681,8 @@ class TestDHCP(VppTestCase): "DHCP cleanup VRF 0") self.send_and_assert_no_replies(self.pg3, pkts_disc_vrf1, "DHCP cleanup VRF 1") + self.pg2.unconfig_ip4() + self.pg3.unconfig_ip4() def test_dhcp6_proxy(self): """ DHCPv6 Proxy""" @@ -1045,6 +1061,8 @@ class TestDHCP(VppTestCase): server_table_id=0, is_ipv6=1, is_add=0) + self.pg2.unconfig_ip6() + self.pg3.unconfig_ip6() def test_dhcp_client(self): """ DHCP Client""" diff --git a/test/test_gre.py b/test/test_gre.py index 1afc44fb..9046b05f 100644 --- a/test/test_gre.py +++ b/test/test_gre.py @@ -6,7 +6,7 @@ from logging import * from framework import VppTestCase, VppTestRunner from vpp_sub_interface import VppDot1QSubint from vpp_gre_interface import VppGreInterface, VppGre6Interface -from vpp_ip_route import VppIpRoute, VppRoutePath, DpoProto +from vpp_ip_route import VppIpRoute, VppRoutePath, DpoProto, VppIpTable from vpp_papi_provider import L2_VTR_OP from scapy.packet import Raw @@ -30,6 +30,9 @@ class TestGRE(VppTestCase): # create 3 pg interfaces - set one in a non-default table. self.create_pg_interfaces(range(3)) + + self.tbl = VppIpTable(self, 1) + self.tbl.add_vpp_config() self.pg1.set_table_ip4(1) for i in self.pg_interfaces: @@ -43,11 +46,12 @@ class TestGRE(VppTestCase): self.pg2.resolve_ndp() def tearDown(self): - super(TestGRE, self).tearDown() for i in self.pg_interfaces: i.unconfig_ip4() i.unconfig_ip6() i.admin_down() + self.pg1.set_table_ip4(0) + super(TestGRE, self).tearDown() def create_stream_ip4(self, src_if, src_ip, dst_ip): pkts = [] diff --git a/test/test_ip4.py b/test/test_ip4.py index 7a7098c3..55d16735 100644 --- a/test/test_ip4.py +++ b/test/test_ip4.py @@ -6,7 +6,8 @@ import unittest from framework import VppTestCase, VppTestRunner from vpp_sub_interface import VppSubInterface, VppDot1QSubint, VppDot1ADSubint from vpp_ip_route import VppIpRoute, VppRoutePath, VppIpMRoute, \ - VppMRoutePath, MRouteItfFlags, MRouteEntryFlags, VppMplsIpBind + VppMRoutePath, MRouteItfFlags, MRouteEntryFlags, VppMplsIpBind, \ + VppMplsTable from scapy.packet import Raw from scapy.layers.l2 import Ether, Dot1Q, ARP @@ -774,6 +775,8 @@ class TestIPLoadBalance(VppTestCase): super(TestIPLoadBalance, self).setUp() self.create_pg_interfaces(range(5)) + mpls_tbl = VppMplsTable(self, 0) + mpls_tbl.add_vpp_config() for i in self.pg_interfaces: i.admin_up() @@ -782,11 +785,11 @@ class TestIPLoadBalance(VppTestCase): i.enable_mpls() def tearDown(self): - super(TestIPLoadBalance, self).tearDown() for i in self.pg_interfaces: i.disable_mpls() i.unconfig_ip4() i.admin_down() + super(TestIPLoadBalance, self).tearDown() def send_and_expect_load_balancing(self, input, pkts, outputs): input.add_stream(pkts) @@ -966,6 +969,8 @@ class TestIPVlan0(VppTestCase): super(TestIPVlan0, self).setUp() self.create_pg_interfaces(range(2)) + mpls_tbl = VppMplsTable(self, 0) + mpls_tbl.add_vpp_config() for i in self.pg_interfaces: i.admin_up() @@ -974,11 +979,11 @@ class TestIPVlan0(VppTestCase): i.enable_mpls() def tearDown(self): - super(TestIPVlan0, self).tearDown() for i in self.pg_interfaces: i.disable_mpls() i.unconfig_ip4() i.admin_down() + super(TestIPVlan0, self).tearDown() def send_and_expect(self, input, pkts, output): input.add_stream(pkts) diff --git a/test/test_ip4_vrf_multi_instance.py b/test/test_ip4_vrf_multi_instance.py index b73ac948..5a8d6760 100644 --- a/test/test_ip4_vrf_multi_instance.py +++ b/test/test_ip4_vrf_multi_instance.py @@ -172,9 +172,10 @@ class TestIp4VrfMultiInst(VppTestCase): pg_if = self.pg_if_by_vrf_id[vrf_id][0] dest_addr = pg_if.remote_hosts[0].ip4n dest_addr_len = 24 + self.vapi.ip_table_add_del(vrf_id, is_add=1) self.vapi.ip_add_del_route( dest_addr, dest_addr_len, pg_if.local_ip4n, - table_id=vrf_id, create_vrf_if_needed=1, is_multipath=1) + table_id=vrf_id, is_multipath=1) self.logger.info("IPv4 VRF ID %d created" % vrf_id) if vrf_id not in self.vrf_list: self.vrf_list.append(vrf_id) @@ -216,6 +217,7 @@ class TestIp4VrfMultiInst(VppTestCase): self.logger.info("IPv4 VRF ID %d reset" % vrf_id) self.logger.debug(self.vapi.ppcli("show ip fib")) self.logger.debug(self.vapi.ppcli("show ip arp")) + self.vapi.ip_table_add_del(vrf_id, is_add=0) def create_stream(self, src_if, packet_sizes): """ diff --git a/test/test_ip6.py b/test/test_ip6.py index 285ce181..aad3713c 100644 --- a/test/test_ip6.py +++ b/test/test_ip6.py @@ -8,7 +8,7 @@ from vpp_sub_interface import VppSubInterface, VppDot1QSubint from vpp_pg_interface import is_ipv6_misc from vpp_ip_route import VppIpRoute, VppRoutePath, find_route, VppIpMRoute, \ VppMRoutePath, MRouteItfFlags, MRouteEntryFlags, VppMplsIpBind, \ - VppMplsRoute, DpoProto + VppMplsRoute, DpoProto, VppMplsTable from vpp_neighbor import find_nbr, VppNeighbor from scapy.packet import Raw @@ -1260,6 +1260,9 @@ class TestIP6LoadBalance(VppTestCase): self.create_pg_interfaces(range(5)) + mpls_tbl = VppMplsTable(self, 0) + mpls_tbl.add_vpp_config() + for i in self.pg_interfaces: i.admin_up() i.config_ip6() @@ -1267,11 +1270,11 @@ class TestIP6LoadBalance(VppTestCase): i.enable_mpls() def tearDown(self): - super(TestIP6LoadBalance, self).tearDown() for i in self.pg_interfaces: i.unconfig_ip6() i.admin_down() i.disable_mpls() + super(TestIP6LoadBalance, self).tearDown() def send_and_expect_load_balancing(self, input, pkts, outputs): input.add_stream(pkts) diff --git a/test/test_ip6_vrf_multi_instance.py b/test/test_ip6_vrf_multi_instance.py index af80b5ba..769cb2e5 100644 --- a/test/test_ip6_vrf_multi_instance.py +++ b/test/test_ip6_vrf_multi_instance.py @@ -187,9 +187,10 @@ class TestIP6VrfMultiInst(VppTestCase): pg_if = self.pg_if_by_vrf_id[vrf_id][0] dest_addr = pg_if.remote_hosts[0].ip6n dest_addr_len = 64 + self.vapi.ip_table_add_del(vrf_id, is_add=1, is_ipv6=1) self.vapi.ip_add_del_route( dest_addr, dest_addr_len, pg_if.local_ip6n, is_ipv6=1, - table_id=vrf_id, create_vrf_if_needed=1, is_multipath=1) + table_id=vrf_id, is_multipath=1) self.logger.info("IPv6 VRF ID %d created" % vrf_id) if vrf_id not in self.vrf_list: self.vrf_list.append(vrf_id) @@ -232,6 +233,7 @@ class TestIP6VrfMultiInst(VppTestCase): self.logger.info("IPv6 VRF ID %d reset" % vrf_id) self.logger.debug(self.vapi.ppcli("show ip6 fib")) self.logger.debug(self.vapi.ppcli("show ip6 neighbors")) + self.vapi.ip_table_add_del(vrf_id, is_add=0, is_ipv6=1) def create_stream(self, src_if, packet_sizes): """ diff --git a/test/test_ip_mcast.py b/test/test_ip_mcast.py index 276555d6..7cad683c 100644 --- a/test/test_ip_mcast.py +++ b/test/test_ip_mcast.py @@ -5,7 +5,7 @@ import unittest from framework import VppTestCase, VppTestRunner from vpp_sub_interface import VppSubInterface, VppDot1QSubint, VppDot1ADSubint from vpp_ip_route import VppIpMRoute, VppMRoutePath, VppMFibSignal, \ - MRouteItfFlags, MRouteEntryFlags + MRouteItfFlags, MRouteEntryFlags, VppIpTable from scapy.packet import Raw from scapy.layers.l2 import Ether @@ -44,16 +44,37 @@ class TestIPMcast(VppTestCase): super(TestIPMcast, self).setUp() # create 8 pg interfaces - self.create_pg_interfaces(range(8)) + self.create_pg_interfaces(range(9)) # setup interfaces - for i in self.pg_interfaces: + for i in self.pg_interfaces[:8]: i.admin_up() i.config_ip4() i.config_ip6() i.resolve_arp() i.resolve_ndp() + # one more in a vrf + tbl4 = VppIpTable(self, 10) + tbl4.add_vpp_config() + self.pg8.set_table_ip4(10) + self.pg8.config_ip4() + + tbl6 = VppIpTable(self, 10, is_ip6=1) + tbl6.add_vpp_config() + self.pg8.set_table_ip6(10) + self.pg8.config_ip6() + + def tearDown(self): + for i in self.pg_interfaces: + i.unconfig_ip4() + i.unconfig_ip6() + i.admin_down() + + self.pg8.set_table_ip4(0) + self.pg8.set_table_ip6(0) + super(TestIPMcast, self).tearDown() + def create_stream_ip4(self, src_if, src_ip, dst_ip, payload_size=0): pkts = [] # default to small packet sizes @@ -663,6 +684,77 @@ class TestIPMcast(VppTestCase): # route_232_1_1_1.remove_vpp_config() + def test_ip_mcast_vrf(self): + """ IP Multicast Replication in non-default table""" + + # + # An (S,G). + # one accepting interface, pg0, 2 forwarding interfaces + # + route_1_1_1_1_232_1_1_1 = VppIpMRoute( + self, + "1.1.1.1", + "232.1.1.1", 64, + MRouteEntryFlags.MFIB_ENTRY_FLAG_NONE, + [VppMRoutePath(self.pg8.sw_if_index, + MRouteItfFlags.MFIB_ITF_FLAG_ACCEPT), + VppMRoutePath(self.pg1.sw_if_index, + MRouteItfFlags.MFIB_ITF_FLAG_FORWARD), + VppMRoutePath(self.pg2.sw_if_index, + MRouteItfFlags.MFIB_ITF_FLAG_FORWARD)], + table_id=10) + route_1_1_1_1_232_1_1_1.add_vpp_config() + + # + # a stream that matches the route for (1.1.1.1,232.1.1.1) + # small packets + # + self.vapi.cli("clear trace") + tx = self.create_stream_ip4(self.pg8, "1.1.1.1", "232.1.1.1") + self.pg8.add_stream(tx) + + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + # We expect replications on Pg1 & 2 + self.verify_capture_ip4(self.pg1, tx) + self.verify_capture_ip4(self.pg2, tx) + + def test_ip6_mcast_vrf(self): + """ IPv6 Multicast Replication in non-default table""" + + # + # An (S,G). + # one accepting interface, pg0, 2 forwarding interfaces + # + route_2001_ff01_1 = VppIpMRoute( + self, + "2001::1", + "ff01::1", 256, + MRouteEntryFlags.MFIB_ENTRY_FLAG_NONE, + [VppMRoutePath(self.pg8.sw_if_index, + MRouteItfFlags.MFIB_ITF_FLAG_ACCEPT), + VppMRoutePath(self.pg1.sw_if_index, + MRouteItfFlags.MFIB_ITF_FLAG_FORWARD), + VppMRoutePath(self.pg2.sw_if_index, + MRouteItfFlags.MFIB_ITF_FLAG_FORWARD)], + table_id=10, + is_ip6=1) + route_2001_ff01_1.add_vpp_config() + + # + # a stream that matches the route for (2001::1, ff00::1) + # + self.vapi.cli("clear trace") + tx = self.create_stream_ip6(self.pg8, "2001::1", "ff01::1") + self.pg8.add_stream(tx) + + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + # We expect replications on Pg1, 2, + self.verify_capture_ip6(self.pg1, tx) + self.verify_capture_ip6(self.pg2, tx) if __name__ == '__main__': unittest.main(testRunner=VppTestRunner) diff --git a/test/test_mpls.py b/test/test_mpls.py index b2226a74..460a32d1 100644 --- a/test/test_mpls.py +++ b/test/test_mpls.py @@ -6,7 +6,7 @@ import socket from framework import VppTestCase, VppTestRunner from vpp_ip_route import VppIpRoute, VppRoutePath, VppMplsRoute, \ VppMplsIpBind, VppIpMRoute, VppMRoutePath, \ - MRouteItfFlags, MRouteEntryFlags, DpoProto + MRouteItfFlags, MRouteEntryFlags, DpoProto, VppIpTable, VppMplsTable from vpp_mpls_tunnel_interface import VppMPLSTunnelInterface from scapy.packet import Raw @@ -60,9 +60,23 @@ class TestMPLS(VppTestCase): # setup both interfaces # assign them different tables. table_id = 0 + self.tables = [] + + tbl = VppMplsTable(self, 0) + tbl.add_vpp_config() + self.tables.append(tbl) for i in self.pg_interfaces: i.admin_up() + + if table_id != 0: + tbl = VppIpTable(self, table_id) + tbl.add_vpp_config() + self.tables.append(tbl) + tbl = VppIpTable(self, table_id, is_ip6=1) + tbl.add_vpp_config() + self.tables.append(tbl) + i.set_table_ip4(table_id) i.set_table_ip6(table_id) i.config_ip4() @@ -73,12 +87,15 @@ class TestMPLS(VppTestCase): table_id += 1 def tearDown(self): - super(TestMPLS, self).tearDown() for i in self.pg_interfaces: i.unconfig_ip4() i.unconfig_ip6() i.ip6_disable() + i.set_table_ip4(0) + i.set_table_ip6(0) + i.disable_mpls() i.admin_down() + super(TestMPLS, self).tearDown() # the default of 64 matches the IP packet TTL default def create_stream_labelled_ip4( @@ -1092,6 +1109,9 @@ class TestMPLSDisabled(VppTestCase): # create 2 pg interfaces self.create_pg_interfaces(range(2)) + self.tbl = VppMplsTable(self, 0) + self.tbl.add_vpp_config() + # PG0 is MPLS enalbed self.pg0.admin_up() self.pg0.config_ip4() @@ -1102,11 +1122,13 @@ class TestMPLSDisabled(VppTestCase): self.pg1.admin_up() def tearDown(self): - super(TestMPLSDisabled, self).tearDown() for i in self.pg_interfaces: i.unconfig_ip4() i.admin_down() + self.pg0.disable_mpls() + super(TestMPLSDisabled, self).tearDown() + def send_and_assert_no_replies(self, intf, pkts, remark): intf.add_stream(pkts) self.pg_enable_capture(self.pg_interfaces) @@ -1174,6 +1196,13 @@ class TestMPLSPIC(VppTestCase): # create 2 pg interfaces self.create_pg_interfaces(range(4)) + mpls_tbl = VppMplsTable(self, 0) + mpls_tbl.add_vpp_config() + tbl4 = VppIpTable(self, 1) + tbl4.add_vpp_config() + tbl6 = VppIpTable(self, 1, is_ip6=1) + tbl6.add_vpp_config() + # core links self.pg0.admin_up() self.pg0.config_ip4() @@ -1201,14 +1230,15 @@ class TestMPLSPIC(VppTestCase): self.pg3.resolve_ndp() def tearDown(self): - super(TestMPLSPIC, self).tearDown() self.pg0.disable_mpls() + self.pg1.disable_mpls() for i in self.pg_interfaces: i.unconfig_ip4() i.unconfig_ip6() i.set_table_ip4(0) i.set_table_ip6(0) i.admin_down() + super(TestMPLSPIC, self).tearDown() def test_mpls_ibgp_pic(self): """ MPLS iBGP PIC edge convergence @@ -1534,24 +1564,30 @@ class TestMPLSL2(VppTestCase): # create 2 pg interfaces self.create_pg_interfaces(range(2)) + # create the default MPLS table + self.tables = [] + tbl = VppMplsTable(self, 0) + tbl.add_vpp_config() + self.tables.append(tbl) + # use pg0 as the core facing interface self.pg0.admin_up() self.pg0.config_ip4() self.pg0.resolve_arp() self.pg0.enable_mpls() - # use the other 2 for customer facg L2 links + # use the other 2 for customer facing L2 links for i in self.pg_interfaces[1:]: i.admin_up() def tearDown(self): - super(TestMPLSL2, self).tearDown() for i in self.pg_interfaces[1:]: i.admin_down() self.pg0.disable_mpls() self.pg0.unconfig_ip4() self.pg0.admin_down() + super(TestMPLSL2, self).tearDown() def verify_capture_tunneled_ethernet(self, capture, sent, mpls_labels, ttl=255, top=None): diff --git a/test/test_nat.py b/test/test_nat.py index 1f2d17ab..73e9e217 100644 --- a/test/test_nat.py +++ b/test/test_nat.py @@ -549,6 +549,8 @@ class TestNAT44(MethodHolder): cls.pg0.configure_ipv4_neighbors() cls.overlapping_interfaces = list(list(cls.pg_interfaces[4:7])) + cls.vapi.ip_table_add_del(10, is_add=1) + cls.vapi.ip_table_add_del(20, is_add=1) cls.pg4._local_ip4 = "172.16.255.1" cls.pg4._local_ip4n = socket.inet_pton(socket.AF_INET, i.local_ip4) @@ -1797,6 +1799,8 @@ class TestNAT44(MethodHolder): self.pg0.unconfig_ip4() self.pg1.unconfig_ip4() + self.vapi.ip_table_add_del(vrf_id1, is_add=1) + self.vapi.ip_table_add_del(vrf_id2, is_add=1) self.pg0.set_table_ip4(vrf_id1) self.pg1.set_table_ip4(vrf_id2) self.pg0.config_ip4() @@ -1825,6 +1829,13 @@ class TestNAT44(MethodHolder): capture = self.pg2.get_capture(len(pkts)) self.verify_capture_out(capture, nat_ip2) + self.pg0.unconfig_ip4() + self.pg1.unconfig_ip4() + self.pg0.set_table_ip4(0) + self.pg1.set_table_ip4(0) + self.vapi.ip_table_add_del(vrf_id1, is_add=0) + self.vapi.ip_table_add_del(vrf_id2, is_add=0) + def test_vrf_feature_independent(self): """ NAT44 tenant VRF independent address pool mode """ @@ -3042,6 +3053,8 @@ class TestNAT64(MethodHolder): cls.ip6_interfaces.append(cls.pg_interfaces[2]) cls.ip4_interfaces = list(cls.pg_interfaces[1:2]) + cls.vapi.ip_table_add_del(cls.vrf1_id, is_add=1, is_ipv6=1) + cls.pg_interfaces[2].set_table_ip6(cls.vrf1_id) cls.pg0.generate_remote_hosts(2) diff --git a/test/test_neighbor.py b/test/test_neighbor.py index 1c7cc267..68dde2fb 100644 --- a/test/test_neighbor.py +++ b/test/test_neighbor.py @@ -5,7 +5,8 @@ from socket import AF_INET, AF_INET6, inet_pton from framework import VppTestCase, VppTestRunner from vpp_neighbor import VppNeighbor, find_nbr -from vpp_ip_route import VppIpRoute, VppRoutePath, find_route +from vpp_ip_route import VppIpRoute, VppRoutePath, find_route, \ + VppIpTable from scapy.packet import Raw from scapy.layers.l2 import Ether, ARP, Dot1Q @@ -39,11 +40,13 @@ class ARPTestCase(VppTestCase): self.pg1.config_ip6() # pg3 in a different VRF + self.tbl = VppIpTable(self, 1) + self.tbl.add_vpp_config() + self.pg3.set_table_ip4(1) self.pg3.config_ip4() def tearDown(self): - super(ARPTestCase, self).tearDown() self.pg0.unconfig_ip4() self.pg0.unconfig_ip6() @@ -51,10 +54,13 @@ class ARPTestCase(VppTestCase): self.pg1.unconfig_ip6() self.pg3.unconfig_ip4() + self.pg3.set_table_ip4(0) for i in self.pg_interfaces: i.admin_down() + super(ARPTestCase, self).tearDown() + def verify_arp_req(self, rx, smac, sip, dip): ether = rx[Ether] self.assertEqual(ether.dst, "ff:ff:ff:ff:ff:ff") @@ -1080,6 +1086,62 @@ class ARPTestCase(VppTestCase): self.pg0.remote_ip4, self.pg1.remote_hosts[1].ip4) + def test_arp_static(self): + """ ARP Static""" + self.pg2.generate_remote_hosts(3) + + # + # Add a static ARP entry + # + static_arp = VppNeighbor(self, + self.pg2.sw_if_index, + self.pg2.remote_hosts[1].mac, + self.pg2.remote_hosts[1].ip4, + is_static=1) + static_arp.add_vpp_config() + + # + # Add the connected prefix to the interface + # + self.pg2.config_ip4() + + # + # We should now find the adj-fib + # + self.assertTrue(find_nbr(self, + self.pg2.sw_if_index, + self.pg2.remote_hosts[1].ip4, + is_static=1)) + self.assertTrue(find_route(self, + self.pg2.remote_hosts[1].ip4, + 32)) + + # + # remove the connected + # + self.pg2.unconfig_ip4() + + # + # put the interface into table 1 + # + self.pg2.set_table_ip4(1) + + # + # configure the same connected and expect to find the + # adj fib in the new table + # + self.pg2.config_ip4() + self.assertTrue(find_route(self, + self.pg2.remote_hosts[1].ip4, + 32, + table_id=1)) + + # + # clean-up + # + self.pg2.unconfig_ip4() + self.pg2.set_table_ip4(0) + if __name__ == '__main__': unittest.main(testRunner=VppTestRunner) diff --git a/test/vpp_ip_route.py b/test/vpp_ip_route.py index 2c489e3c..b7993793 100644 --- a/test/vpp_ip_route.py +++ b/test/vpp_ip_route.py @@ -54,6 +54,46 @@ def find_route(test, ip_addr, len, table_id=0, inet=AF_INET): return False +class VppIpTable(VppObject): + + def __init__(self, + test, + table_id, + is_ip6=0): + self._test = test + self.table_id = table_id + self.is_ip6 = is_ip6 + + def add_vpp_config(self): + self._test.vapi.ip_table_add_del( + self.table_id, + is_ipv6=self.is_ip6, + is_add=1) + self._test.registry.register(self, self._test.logger) + + def remove_vpp_config(self): + self._test.vapi.ip_table_add_del( + self.table_id, + is_ipv6=self.is_ip6, + is_add=0) + + def query_vpp_config(self): + # find the default route + return find_route(self._test, + "::" if self.is_ip6 else "0.0.0.0", + 0, + self.table_id, + inet=AF_INET6 if self.is_ip6 == 1 else AF_INET) + + def __str__(self): + return self.object_id() + + def object_id(self): + return ("table-%s-%d" % + ("v6" if self.is_ip6 == 1 else "v4", + self.table_id)) + + class VppRoutePath(object): def __init__( @@ -391,6 +431,39 @@ class VppMplsIpBind(VppObject): self.dest_addr_len)) +class VppMplsTable(VppObject): + + def __init__(self, + test, + table_id): + self._test = test + self.table_id = table_id + + def add_vpp_config(self): + self._test.vapi.mpls_table_add_del( + self.table_id, + is_add=1) + self._test.registry.register(self, self._test.logger) + + def remove_vpp_config(self): + self._test.vapi.mpls_table_add_del( + self.table_id, + is_add=0) + + def query_vpp_config(self): + # find the default route + dump = self._test.vapi.mpls_fib_dump() + if len(dump): + return True + return False + + def __str__(self): + return self.object_id() + + def object_id(self): + return ("table-mpls-%d" % (self.table_id)) + + class VppMplsRoute(VppObject): """ MPLS Route/LSP diff --git a/test/vpp_papi_provider.py b/test/vpp_papi_provider.py index b70da026..519aff80 100644 --- a/test/vpp_papi_provider.py +++ b/test/vpp_papi_provider.py @@ -652,6 +652,24 @@ class VppPapiProvider(object): return self.api(self.papi.delete_loopback, {'sw_if_index': sw_if_index, }) + def ip_table_add_del(self, + table_id, + is_add=1, + is_ipv6=0): + """ + + :param table_id + :param is_add: (Default value = 1) + :param is_ipv6: (Default value = 0) + + """ + + return self.api( + self.papi.ip_table_add_del, + {'table_id': table_id, + 'is_add': is_add, + 'is_ipv6': is_ipv6}) + def ip_add_del_route( self, dst_address, @@ -664,7 +682,6 @@ class VppPapiProvider(object): next_hop_n_out_labels=0, next_hop_out_label_stack=[], next_hop_via_label=MPLS_LABEL_INVALID, - create_vrf_if_needed=0, is_resolve_host=0, is_resolve_attached=0, classify_table_index=0xFFFFFFFF, @@ -687,7 +704,6 @@ class VppPapiProvider(object): :param vrf_id: (Default value = 0) :param lookup_in_vrf: (Default value = 0) :param classify_table_index: (Default value = 0xFFFFFFFF) - :param create_vrf_if_needed: (Default value = 0) :param is_add: (Default value = 1) :param is_drop: (Default value = 0) :param is_ipv6: (Default value = 0) @@ -707,7 +723,6 @@ class VppPapiProvider(object): 'table_id': table_id, 'classify_table_index': classify_table_index, 'next_hop_table_id': next_hop_table_id, - 'create_vrf_if_needed': create_vrf_if_needed, 'is_add': is_add, 'is_drop': is_drop, 'is_unreach': is_unreach, @@ -912,6 +927,22 @@ class VppPapiProvider(object): def mpls_fib_dump(self): return self.api(self.papi.mpls_fib_dump, {}) + def mpls_table_add_del( + self, + table_id, + is_add=1): + """ + + :param table_id + :param is_add: (Default value = 1) + + """ + + return self.api( + self.papi.mpls_table_add_del, + {'mt_table_id': table_id, + 'mt_is_add': is_add}) + def mpls_route_add_del( self, label, @@ -925,7 +956,6 @@ class VppPapiProvider(object): next_hop_n_out_labels=0, next_hop_out_label_stack=[], next_hop_via_label=MPLS_LABEL_INVALID, - create_vrf_if_needed=0, is_resolve_host=0, is_resolve_attached=0, is_interface_rx=0, @@ -947,7 +977,6 @@ class VppPapiProvider(object): :param vrf_id: (Default value = 0) :param lookup_in_vrf: (Default value = 0) :param classify_table_index: (Default value = 0xFFFFFFFF) - :param create_vrf_if_needed: (Default value = 0) :param is_add: (Default value = 1) :param is_drop: (Default value = 0) :param is_ipv6: (Default value = 0) @@ -968,7 +997,6 @@ class VppPapiProvider(object): 'mr_eos': eos, 'mr_table_id': table_id, 'mr_classify_table_index': classify_table_index, - 'mr_create_table_if_needed': create_vrf_if_needed, 'mr_is_add': is_add, 'mr_is_classify': is_classify, 'mr_is_multipath': is_multipath, @@ -994,7 +1022,6 @@ class VppPapiProvider(object): table_id=0, ip_table_id=0, is_ip4=1, - create_vrf_if_needed=0, is_bind=1): """ """ @@ -1003,7 +1030,6 @@ class VppPapiProvider(object): {'mb_mpls_table_id': table_id, 'mb_label': label, 'mb_ip_table_id': ip_table_id, - 'mb_create_table_if_needed': create_vrf_if_needed, 'mb_is_bind': is_bind, 'mb_is_ip4': is_ip4, 'mb_address_length': dst_address_length, @@ -1020,7 +1046,6 @@ class VppPapiProvider(object): next_hop_n_out_labels=0, next_hop_out_label_stack=[], next_hop_via_label=MPLS_LABEL_INVALID, - create_vrf_if_needed=0, is_add=1, l2_only=0, is_multicast=0): @@ -1034,7 +1059,6 @@ class VppPapiProvider(object): :param vrf_id: (Default value = 0) :param lookup_in_vrf: (Default value = 0) :param classify_table_index: (Default value = 0xFFFFFFFF) - :param create_vrf_if_needed: (Default value = 0) :param is_add: (Default value = 1) :param is_drop: (Default value = 0) :param is_ipv6: (Default value = 0) @@ -1844,7 +1868,6 @@ class VppPapiProvider(object): i_flags, rpf_id=0, table_id=0, - create_vrf_if_needed=0, is_add=1, is_ipv6=0, is_local=0): @@ -1857,7 +1880,6 @@ class VppPapiProvider(object): 'itf_flags': i_flags, 'table_id': table_id, 'rpf_id': rpf_id, - 'create_vrf_if_needed': create_vrf_if_needed, 'is_add': is_add, 'is_ipv6': is_ipv6, 'is_local': is_local, -- cgit 1.2.3-korg From e8efd7d16fb3b6e7da752657220f3ed9807d4118 Mon Sep 17 00:00:00 2001 From: flyingeagle23 Date: Mon, 11 Sep 2017 16:02:40 +0800 Subject: nat lb mapping command existed array out of bounds(VPP-979) Change-Id: Id1dce0c2eebd4097bc17842f050453b76a94eb70 Signed-off-by: flyingeagle23 --- src/plugins/nat/nat.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/plugins/nat/nat.c') diff --git a/src/plugins/nat/nat.c b/src/plugins/nat/nat.c index 8aecac6d..b13c8571 100644 --- a/src/plugins/nat/nat.c +++ b/src/plugins/nat/nat.c @@ -726,7 +726,8 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port, kv.key = m_key.as_u64; kv.value = m - sm->static_mappings; clib_bihash_add_del_8_8(&sm->static_mapping_by_local, &kv, 1); - locals[i].prefix = locals[i - 1].prefix + locals[i].probability; + locals[i].prefix = (i == 0) ? locals[i].probability :\ + (locals[i - 1].prefix + locals[i].probability); vec_add1 (m->locals, locals[i]); m_key.port = clib_host_to_net_u16 (locals[i].port); kv.key = m_key.as_u64; -- cgit 1.2.3-korg From 6a58f4a89660b1b8ce37139ec1b65acaad8795ed Mon Sep 17 00:00:00 2001 From: flyingeagle23 Date: Tue, 12 Sep 2017 15:10:46 +0800 Subject: Fix issue in nat Bisearch algorithm (VPP-980) Change-Id: I39b1d1ec6fb9a10e2a0e67d36b01c0bf2522d07f Signed-off-by: flyingeagle23 --- src/plugins/nat/nat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/plugins/nat/nat.c') diff --git a/src/plugins/nat/nat.c b/src/plugins/nat/nat.c index b13c8571..faf75fcb 100644 --- a/src/plugins/nat/nat.c +++ b/src/plugins/nat/nat.c @@ -1323,7 +1323,7 @@ int snat_static_mapping_match (snat_main_t * sm, rand = 1 + (random_u32 (&sm->random_seed) % m->locals[hi].prefix); while (lo < hi) { - mid = ((hi - 1) >> 1) + lo; + mid = ((hi - lo) >> 1) + lo; (rand > m->locals[mid].prefix) ? (lo = mid + 1) : (hi = mid); } if (!(m->locals[lo].prefix >= rand)) -- cgit 1.2.3-korg From 624b8d9807ac449c4077df4d2d4f40313597a224 Mon Sep 17 00:00:00 2001 From: Matus Fabian Date: Tue, 12 Sep 2017 04:15:30 -0700 Subject: NAT: fixed bug in snat_alloc_outside_address_and_port (VPP-981) Change-Id: I6c5eccd4193c44604da3fd27c108defe71b38a4b Signed-off-by: Matus Fabian --- src/plugins/nat/in2out.c | 4 ++-- src/plugins/nat/nat.c | 30 ++++++++++++++++++++++++------ src/plugins/nat/nat.h | 4 +++- 3 files changed, 29 insertions(+), 9 deletions(-) (limited to 'src/plugins/nat/nat.c') diff --git a/src/plugins/nat/in2out.c b/src/plugins/nat/in2out.c index efb3856b..d0a13237 100644 --- a/src/plugins/nat/in2out.c +++ b/src/plugins/nat/in2out.c @@ -354,7 +354,7 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0, s->in2out.fib_index); snat_free_outside_address_and_port - (sm, &s->out2in, s->outside_address_index); + (sm, thread_index, &s->out2in, s->outside_address_index); } s->outside_address_index = ~0; @@ -1283,7 +1283,7 @@ create_ses: s->out2in.port, s->in2out.fib_index); - snat_free_outside_address_and_port (sm, &s->out2in, + snat_free_outside_address_and_port (sm, thread_index, &s->out2in, s->outside_address_index); /* Remove in2out, out2in keys */ diff --git a/src/plugins/nat/nat.c b/src/plugins/nat/nat.c index faf75fcb..b98b8347 100644 --- a/src/plugins/nat/nat.c +++ b/src/plugins/nat/nat.c @@ -152,6 +152,7 @@ void snat_add_address (snat_main_t *sm, ip4_address_t *addr, u32 vrf_id) { snat_address_t * ap; snat_interface_t *i; + vlib_thread_main_t *tm = vlib_get_thread_main (); if (vrf_id != ~0) sm->vrf_mode = 1; @@ -172,7 +173,9 @@ void snat_add_address (snat_main_t *sm, ip4_address_t *addr, u32 vrf_id) else ap->fib_index = ~0; #define _(N, i, n, s) \ - clib_bitmap_alloc (ap->busy_##n##_port_bitmap, 65535); + clib_bitmap_alloc (ap->busy_##n##_port_bitmap, 65535); \ + ap->busy_##n##_ports = 0; \ + vec_validate_init_empty (ap->busy_##n##_ports_per_thread, tm->n_vlib_mains - 1, 0); foreach_snat_protocol #undef _ @@ -339,7 +342,10 @@ int snat_add_static_mapping(ip4_address_t l_addr, ip4_address_t e_addr, return VNET_API_ERROR_INVALID_VALUE; \ clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, e_port, 1); \ if (e_port > 1024) \ - a->busy_##n##_ports++; \ + { \ + a->busy_##n##_ports++; \ + a->busy_##n##_ports_per_thread[e_port / sm->port_per_thread]++; \ + } \ break; foreach_snat_protocol #undef _ @@ -432,7 +438,10 @@ int snat_add_static_mapping(ip4_address_t l_addr, ip4_address_t e_addr, case SNAT_PROTOCOL_##N: \ clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, e_port, 0); \ if (e_port > 1024) \ - a->busy_##n##_ports--; \ + { \ + a->busy_##n##_ports--; \ + a->busy_##n##_ports_per_thread[e_port / sm->port_per_thread]--; \ + } \ break; foreach_snat_protocol #undef _ @@ -647,7 +656,10 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port, return VNET_API_ERROR_INVALID_VALUE; \ clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, e_port, 1); \ if (e_port > 1024) \ - a->busy_##n##_ports++; \ + { \ + a->busy_##n##_ports++; \ + a->busy_##n##_ports_per_thread[e_port / sm->port_per_thread]++; \ + } \ break; foreach_snat_protocol #undef _ @@ -773,7 +785,10 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port, case SNAT_PROTOCOL_##N: \ clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, e_port, 0); \ if (e_port > 1024) \ - a->busy_##n##_ports--; \ + { \ + a->busy_##n##_ports--; \ + a->busy_##n##_ports_per_thread[e_port / sm->port_per_thread]--; \ + } \ break; foreach_snat_protocol #undef _ @@ -1241,6 +1256,7 @@ static clib_error_t * snat_init (vlib_main_t * vm) VLIB_INIT_FUNCTION (snat_init); void snat_free_outside_address_and_port (snat_main_t * sm, + u32 thread_index, snat_session_key_t * k, u32 address_index) { @@ -1260,6 +1276,7 @@ void snat_free_outside_address_and_port (snat_main_t * sm, clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, \ port_host_byte_order, 0); \ a->busy_##n##_ports--; \ + a->busy_##n##_ports_per_thread[thread_index]--; \ break; foreach_snat_protocol #undef _ @@ -1382,7 +1399,7 @@ int snat_alloc_outside_address_and_port (snat_main_t * sm, { #define _(N, j, n, s) \ case SNAT_PROTOCOL_##N: \ - if (a->busy_##n##_ports < (sm->port_per_thread * sm->num_snat_thread)) \ + if (a->busy_##n##_ports_per_thread[thread_index] < sm->port_per_thread) \ { \ while (1) \ { \ @@ -1392,6 +1409,7 @@ int snat_alloc_outside_address_and_port (snat_main_t * sm, if (clib_bitmap_get_no_check (a->busy_##n##_port_bitmap, portnum)) \ continue; \ clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, portnum, 1); \ + a->busy_##n##_ports_per_thread[thread_index]++; \ a->busy_##n##_ports++; \ k->addr = a->addr; \ k->port = clib_host_to_net_u16(portnum); \ diff --git a/src/plugins/nat/nat.h b/src/plugins/nat/nat.h index 8935144d..d1ba5d55 100644 --- a/src/plugins/nat/nat.h +++ b/src/plugins/nat/nat.h @@ -182,7 +182,8 @@ typedef struct { ip4_address_t addr; u32 fib_index; #define _(N, i, n, s) \ - u32 busy_##n##_ports; \ + u16 busy_##n##_ports; \ + u16 * busy_##n##_ports_per_thread; \ uword * busy_##n##_port_bitmap; foreach_snat_protocol #undef _ @@ -389,6 +390,7 @@ extern vlib_node_registration_t snat_hairpin_dst_node; extern vlib_node_registration_t snat_hairpin_src_node; void snat_free_outside_address_and_port (snat_main_t * sm, + u32 thread_index, snat_session_key_t * k, u32 address_index); -- cgit 1.2.3-korg From 01ffc0c1a3f8c5c15dc40cbe4560bdb3c2484152 Mon Sep 17 00:00:00 2001 From: flyingeagle23 Date: Wed, 13 Sep 2017 19:03:56 +0800 Subject: nat lb: del lb static mapping existed vec mem leak(VPP-983) Change-Id: Id7306b432a33bb0fa594b6949e65ca528d8c0916 Signed-off-by: flyingeagle23 --- src/plugins/nat/nat.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/plugins/nat/nat.c') diff --git a/src/plugins/nat/nat.c b/src/plugins/nat/nat.c index b98b8347..5189416e 100644 --- a/src/plugins/nat/nat.c +++ b/src/plugins/nat/nat.c @@ -838,6 +838,7 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port, return VNET_API_ERROR_UNSPECIFIED; } } + vec_free(m->locals); pool_put (sm->static_mappings, m); } -- cgit 1.2.3-korg From daf5162f10b386f004a04466fdea12f792b0c98b Mon Sep 17 00:00:00 2001 From: Matus Fabian Date: Wed, 13 Sep 2017 23:00:53 -0700 Subject: NAT: fixed bug in snat_alloc_outside_address_and_port (VPP-981) generated random ports overlap between threads Change-Id: I7a13785e3f98b87e475426b0bd7f6bf2d9c1336c Signed-off-by: Matus Fabian --- src/plugins/nat/nat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/plugins/nat/nat.c') diff --git a/src/plugins/nat/nat.c b/src/plugins/nat/nat.c index 5189416e..36b72664 100644 --- a/src/plugins/nat/nat.c +++ b/src/plugins/nat/nat.c @@ -1406,7 +1406,7 @@ int snat_alloc_outside_address_and_port (snat_main_t * sm, { \ portnum = (sm->port_per_thread * \ sm->per_thread_data[thread_index].snat_thread_index) + \ - snat_random_port(sm, 0, sm->port_per_thread) + 1024; \ + snat_random_port(sm, 1, sm->port_per_thread) + 1024; \ if (clib_bitmap_get_no_check (a->busy_##n##_port_bitmap, portnum)) \ continue; \ clib_bitmap_set_no_check (a->busy_##n##_port_bitmap, portnum, 1); \ -- cgit 1.2.3-korg From 092b3cd59f17d5c3ebe167d8729273838afbe2cb Mon Sep 17 00:00:00 2001 From: Matus Fabian Date: Tue, 19 Sep 2017 05:42:38 -0700 Subject: NAT: move session and user lookup tables to per thread data (VPP-986) Change-Id: I41a51bb36e31e05c76fef0b34fe006afbee27729 Signed-off-by: Matus Fabian --- src/plugins/nat/in2out.c | 118 ++++++++++++++++++++++++++-------------------- src/plugins/nat/nat.c | 93 +++++++++++++++++++++--------------- src/plugins/nat/nat.h | 14 +++--- src/plugins/nat/nat_api.c | 4 +- src/plugins/nat/out2in.c | 32 ++++++++----- 5 files changed, 151 insertions(+), 110 deletions(-) (limited to 'src/plugins/nat/nat.c') diff --git a/src/plugins/nat/in2out.c b/src/plugins/nat/in2out.c index 9196ccbb..8b658302 100755 --- a/src/plugins/nat/in2out.c +++ b/src/plugins/nat/in2out.c @@ -191,7 +191,7 @@ snat_not_translate_fast (snat_main_t * sm, vlib_node_runtime_t *node, static inline int snat_not_translate (snat_main_t * sm, vlib_node_runtime_t *node, u32 sw_if_index0, ip4_header_t * ip0, u32 proto0, - u32 rx_fib_index0) + u32 rx_fib_index0, u32 thread_index) { udp_header_t * udp0 = ip4_next_header (ip0); snat_session_key_t key0, sm0; @@ -205,7 +205,8 @@ snat_not_translate (snat_main_t * sm, vlib_node_runtime_t *node, /* NAT packet aimed at external address if */ /* has active sessions */ - if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0)) + if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0, + &value0)) { /* or is static mappings */ if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0)) @@ -256,7 +257,8 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0, kv0.key = user_key.as_u64; /* Ever heard of the "user" = src ip4 address before? */ - if (clib_bihash_search_8_8 (&sm->user_hash, &kv0, &value0)) + if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].user_hash, + &kv0, &value0)) { /* no, make a new one */ pool_get (sm->per_thread_data[thread_index].users, u); @@ -275,7 +277,8 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0, kv0.value = u - sm->per_thread_data[thread_index].users; /* add user */ - clib_bihash_add_del_8_8 (&sm->user_hash, &kv0, 1 /* is_add */); + clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].user_hash, + &kv0, 1 /* is_add */); } else { @@ -339,10 +342,12 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0, { /* Remove in2out, out2in keys */ kv0.key = s->in2out.as_u64; - if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 0 /* is_add */)) + if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].in2out, + &kv0, 0 /* is_add */)) clib_warning ("in2out key delete failed"); kv0.key = s->out2in.as_u64; - if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 0 /* is_add */)) + if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].out2in, + &kv0, 0 /* is_add */)) clib_warning ("out2in key delete failed"); /* log NAT event */ @@ -431,13 +436,15 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0, /* Add to translation hashes */ kv0.key = s->in2out.as_u64; kv0.value = s - sm->per_thread_data[thread_index].sessions; - if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 1 /* is_add */)) + if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].in2out, &kv0, + 1 /* is_add */)) clib_warning ("in2out key add failed"); kv0.key = s->out2in.as_u64; kv0.value = s - sm->per_thread_data[thread_index].sessions; - if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 1 /* is_add */)) + if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0, + 1 /* is_add */)) clib_warning ("out2in key add failed"); /* Add to translated packets worker lookup */ @@ -554,10 +561,11 @@ u32 icmp_match_in2out_slow(snat_main_t *sm, vlib_node_runtime_t *node, kv0.key = key0.as_u64; - if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0)) + if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].in2out, &kv0, + &value0)) { if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0, ip0, - IP_PROTOCOL_ICMP, rx_fib_index0) && + IP_PROTOCOL_ICMP, rx_fib_index0, thread_index) && vnet_buffer(b0)->sw_if_index[VLIB_TX] == ~0)) { dont_translate = 1; @@ -841,8 +849,22 @@ snat_hairpinning (snat_main_t *sm, key0.fib_index = sm->outside_fib_index; kv0.key = key0.as_u64; + if (sm->num_workers > 1) + { + k0.addr = ip0->dst_address; + k0.port = udp0->dst_port; + k0.fib_index = sm->outside_fib_index; + kv0.key = k0.as_u64; + if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0)) + return; + else + ti = value0.value; + } + else + ti = sm->num_workers; + /* Check if destination is in active sessions */ - if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0)) + if (clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0, &value0)) { /* or static mappings */ if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0)) @@ -855,19 +877,6 @@ snat_hairpinning (snat_main_t *sm, else { si = value0.value; - if (sm->num_workers > 1) - { - k0.addr = ip0->dst_address; - k0.port = udp0->dst_port; - k0.fib_index = sm->outside_fib_index; - kv0.key = k0.as_u64; - if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0)) - ASSERT(0); - else - ti = value0.value; - } - else - ti = sm->num_workers; s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si); new_dst_addr0 = s0->in2out.addr.as_u32; @@ -940,8 +949,23 @@ snat_icmp_hairpinning (snat_main_t *sm, key0.fib_index = sm->outside_fib_index; kv0.key = key0.as_u64; + if (sm->num_workers > 1) + { + k0.addr = ip0->dst_address; + k0.port = icmp_id0; + k0.fib_index = sm->outside_fib_index; + kv0.key = k0.as_u64; + if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0)) + return; + else + ti = value0.value; + } + else + ti = sm->num_workers; + /* Check if destination is in active sessions */ - if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0)) + if (clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0, + &value0)) { /* or static mappings */ if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0)) @@ -953,19 +977,6 @@ snat_icmp_hairpinning (snat_main_t *sm, else { si = value0.value; - if (sm->num_workers > 1) - { - k0.addr = ip0->dst_address; - k0.port = icmp_id0; - k0.fib_index = sm->outside_fib_index; - kv0.key = k0.as_u64; - if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0)) - ASSERT(0); - else - ti = value0.value; - } - else - ti = sm->num_workers; s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si); new_dst_addr0 = s0->in2out.addr.as_u32; @@ -1141,7 +1152,7 @@ snat_in2out_unknown_proto (snat_main_t *sm, kv.key = u_key.as_u64; /* Ever heard of the "user" = src ip4 address before? */ - if (clib_bihash_search_8_8 (&sm->user_hash, &kv, &value)) + if (clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value)) { /* no, make a new one */ pool_get (tsm->users, u); @@ -1158,7 +1169,7 @@ snat_in2out_unknown_proto (snat_main_t *sm, kv.value = u - tsm->users; /* add user */ - clib_bihash_add_del_8_8 (&sm->user_hash, &kv, 1); + clib_bihash_add_del_8_8 (&tsm->user_hash, &kv, 1); } else { @@ -1183,7 +1194,7 @@ snat_in2out_unknown_proto (snat_main_t *sm, else { /* Choose same out address as for TCP/UDP session to same destination */ - if (!clib_bihash_search_8_8 (&sm->user_hash, &kv, &value)) + if (!clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value)) { head_index = u->sessions_per_user_list_head_index; head = pool_elt_at_index (tsm->list_pool, head_index); @@ -1288,10 +1299,12 @@ create_ses: /* Remove in2out, out2in keys */ kv.key = s->in2out.as_u64; - if (clib_bihash_add_del_8_8 (&sm->in2out, &kv, 0)) + if (clib_bihash_add_del_8_8 ( + &sm->per_thread_data[thread_index].in2out, &kv, 0)) clib_warning ("in2out key del failed"); kv.key = s->out2in.as_u64; - if (clib_bihash_add_del_8_8 (&sm->out2in, &kv, 0)) + if (clib_bihash_add_del_8_8 ( + &sm->per_thread_data[thread_index].out2in, &kv, 0)) clib_warning ("out2in key del failed"); } } @@ -1424,7 +1437,7 @@ snat_in2out_lb (snat_main_t *sm, kv.key = u_key.as_u64; /* Ever heard of the "user" = src ip4 address before? */ - if (clib_bihash_search_8_8 (&sm->user_hash, &kv, &value)) + if (clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value)) { /* no, make a new one */ pool_get (tsm->users, u); @@ -1441,7 +1454,7 @@ snat_in2out_lb (snat_main_t *sm, kv.value = u - tsm->users; /* add user */ - if (clib_bihash_add_del_8_8 (&sm->user_hash, &kv, 1)) + if (clib_bihash_add_del_8_8 (&tsm->user_hash, &kv, 1)) clib_warning ("user key add failed"); } else @@ -1653,12 +1666,13 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, kv0.key = key0.as_u64; - if (PREDICT_FALSE (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0) != 0)) + if (PREDICT_FALSE (clib_bihash_search_8_8 ( + &sm->per_thread_data[thread_index].in2out, &kv0, &value0) != 0)) { if (is_slow_path) { if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0, - ip0, proto0, rx_fib_index0)) && !is_output_feature) + ip0, proto0, rx_fib_index0, thread_index)) && !is_output_feature) goto trace00; next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0, @@ -1824,12 +1838,13 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, kv1.key = key1.as_u64; - if (PREDICT_FALSE(clib_bihash_search_8_8 (&sm->in2out, &kv1, &value1) != 0)) + if (PREDICT_FALSE(clib_bihash_search_8_8 ( + &sm->per_thread_data[thread_index].in2out, &kv1, &value1) != 0)) { if (is_slow_path) { if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index1, - ip1, proto1, rx_fib_index1)) && !is_output_feature) + ip1, proto1, rx_fib_index1, thread_index)) && !is_output_feature) goto trace01; next1 = slow_path (sm, b1, ip1, rx_fib_index1, &key1, @@ -2031,12 +2046,13 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, kv0.key = key0.as_u64; - if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0)) + if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].in2out, + &kv0, &value0)) { if (is_slow_path) { if (PREDICT_FALSE(snat_not_translate(sm, node, sw_if_index0, - ip0, proto0, rx_fib_index0)) && !is_output_feature) + ip0, proto0, rx_fib_index0, thread_index)) && !is_output_feature) goto trace0; next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0, diff --git a/src/plugins/nat/nat.c b/src/plugins/nat/nat.c index 36b72664..876b6aad 100644 --- a/src/plugins/nat/nat.c +++ b/src/plugins/nat/nat.c @@ -483,13 +483,13 @@ int snat_add_static_mapping(ip4_address_t l_addr, ip4_address_t e_addr, u_key.addr = m->local_addr; u_key.fib_index = m->fib_index; kv.key = u_key.as_u64; - if (!clib_bihash_search_8_8 (&sm->user_hash, &kv, &value)) + if (!clib_bihash_search_8_8 (&sm->worker_by_in, &kv, &value)) + tsm = vec_elt_at_index (sm->per_thread_data, value.value); + else + tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers); + if (!clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value)) { user_index = value.value; - if (!clib_bihash_search_8_8 (&sm->worker_by_in, &kv, &value)) - tsm = vec_elt_at_index (sm->per_thread_data, value.value); - else - tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers); u = pool_elt_at_index (tsm->users, user_index); if (u->nstaticsessions) { @@ -548,10 +548,10 @@ int snat_add_static_mapping(ip4_address_t l_addr, ip4_address_t e_addr, s->in2out.fib_index); value.key = s->in2out.as_u64; - if (clib_bihash_add_del_8_8 (&sm->in2out, &value, 0)) + if (clib_bihash_add_del_8_8 (&tsm->in2out, &value, 0)) clib_warning ("in2out key del failed"); value.key = s->out2in.as_u64; - if (clib_bihash_add_del_8_8 (&sm->out2in, &value, 0)) + if (clib_bihash_add_del_8_8 (&tsm->out2in, &value, 0)) clib_warning ("out2in key del failed"); delete: pool_put (tsm->sessions, s); @@ -566,7 +566,7 @@ delete: if (addr_only) { pool_put (tsm->users, u); - clib_bihash_add_del_8_8 (&sm->user_hash, &kv, 0); + clib_bihash_add_del_8_8 (&tsm->user_hash, &kv, 0); } } } @@ -615,6 +615,7 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port, snat_user_key_t w_key0; snat_worker_key_t w_key1; u32 worker_index = 0; + snat_main_per_thread_data_t *tsm; m_key.addr = e_addr; m_key.port = e_port; @@ -695,16 +696,6 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port, clib_warning ("static_mapping_by_external key add failed"); return VNET_API_ERROR_UNSPECIFIED; } - m_key.port = clib_host_to_net_u16 (m->external_port); - kv.key = m_key.as_u64; - kv.value = ~0ULL; - if (clib_bihash_add_del_8_8(&sm->out2in, &kv, 1)) - { - clib_warning ("static_mapping_by_local key add failed"); - return VNET_API_ERROR_UNSPECIFIED; - } - - m_key.fib_index = m->fib_index; /* Assign worker */ if (sm->workers) @@ -729,8 +720,21 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port, clib_warning ("worker-by-out add key failed"); return VNET_API_ERROR_UNSPECIFIED; } + tsm = vec_elt_at_index (sm->per_thread_data, worker_index); + } + else + tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers); + + m_key.port = clib_host_to_net_u16 (m->external_port); + kv.key = m_key.as_u64; + kv.value = ~0ULL; + if (clib_bihash_add_del_8_8(&tsm->out2in, &kv, 1)) + { + clib_warning ("static_mapping_by_local key add failed"); + return VNET_API_ERROR_UNSPECIFIED; } + m_key.fib_index = m->fib_index; for (i = 0; i < vec_len (locals); i++) { m_key.addr = locals[i].addr; @@ -744,7 +748,7 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port, m_key.port = clib_host_to_net_u16 (locals[i].port); kv.key = m_key.as_u64; kv.value = ~0ULL; - if (clib_bihash_add_del_8_8(&sm->in2out, &kv, 1)) + if (clib_bihash_add_del_8_8(&tsm->in2out, &kv, 1)) { clib_warning ("in2out key add failed"); return VNET_API_ERROR_UNSPECIFIED; @@ -801,6 +805,15 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port, } } + w_key1.addr = m->external_addr; + w_key1.port = clib_host_to_net_u16 (m->external_port); + w_key1.fib_index = sm->outside_fib_index; + kv.key = w_key1.as_u64; + if (!clib_bihash_search_8_8 (&sm->worker_by_out, &kv, &value)) + tsm = vec_elt_at_index (sm->per_thread_data, value.value); + else + tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers); + m_key.addr = m->external_addr; m_key.port = m->external_port; m_key.protocol = m->proto; @@ -813,7 +826,7 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port, } m_key.port = clib_host_to_net_u16 (m->external_port); kv.key = m_key.as_u64; - if (clib_bihash_add_del_8_8(&sm->out2in, &kv, 0)) + if (clib_bihash_add_del_8_8(&tsm->out2in, &kv, 0)) { clib_warning ("outi2in key del failed"); return VNET_API_ERROR_UNSPECIFIED; @@ -832,7 +845,7 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port, } m_key.port = clib_host_to_net_u16 (local->port); kv.key = m_key.as_u64; - if (clib_bihash_add_del_8_8(&sm->in2out, &kv, 0)) + if (clib_bihash_add_del_8_8(&tsm->in2out, &kv, 0)) { clib_warning ("in2out key del failed"); return VNET_API_ERROR_UNSPECIFIED; @@ -938,16 +951,16 @@ int snat_del_address (snat_main_t *sm, ip4_address_t addr, u8 delete_sm) ses->out2in.port, ses->in2out.fib_index); kv.key = ses->in2out.as_u64; - clib_bihash_add_del_8_8 (&sm->in2out, &kv, 0); + clib_bihash_add_del_8_8 (&tsm->in2out, &kv, 0); kv.key = ses->out2in.as_u64; - clib_bihash_add_del_8_8 (&sm->out2in, &kv, 0); + clib_bihash_add_del_8_8 (&tsm->out2in, &kv, 0); } vec_add1 (ses_to_be_removed, ses - tsm->sessions); clib_dlist_remove (tsm->list_pool, ses->per_user_index); user_key.addr = ses->in2out.addr; user_key.fib_index = ses->in2out.fib_index; kv.key = user_key.as_u64; - if (!clib_bihash_search_8_8 (&sm->user_hash, &kv, &value)) + if (!clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value)) { u = pool_elt_at_index (tsm->users, value.value); u->nsessions--; @@ -2130,6 +2143,7 @@ snat_config (vlib_main_t * vm, unformat_input_t * input) u32 static_mapping_memory_size = 64<<20; u8 static_mapping_only = 0; u8 static_mapping_connection_tracking = 0; + snat_main_per_thread_data_t *tsm; sm->deterministic = 0; @@ -2204,21 +2218,24 @@ snat_config (vlib_main_t * vm, unformat_input_t * input) sm->icmp_match_in2out_cb = icmp_match_in2out_slow; sm->icmp_match_out2in_cb = icmp_match_out2in_slow; - clib_bihash_init_8_8 (&sm->worker_by_in, "worker-by-in", user_buckets, - user_memory_size); - - clib_bihash_init_8_8 (&sm->worker_by_out, "worker-by-out", user_buckets, - user_memory_size); + vec_foreach (tsm, sm->per_thread_data) + { + clib_bihash_init_8_8 (&tsm->in2out, "in2out", translation_buckets, + translation_memory_size); - clib_bihash_init_8_8 (&sm->in2out, "in2out", translation_buckets, - translation_memory_size); + clib_bihash_init_8_8 (&tsm->out2in, "out2in", translation_buckets, + translation_memory_size); - clib_bihash_init_8_8 (&sm->out2in, "out2in", translation_buckets, - translation_memory_size); + clib_bihash_init_8_8 (&tsm->user_hash, "users", user_buckets, + user_memory_size); + } - clib_bihash_init_8_8 (&sm->user_hash, "users", user_buckets, + clib_bihash_init_8_8 (&sm->worker_by_in, "worker-by-in", user_buckets, user_memory_size); + clib_bihash_init_8_8 (&sm->worker_by_out, "worker-by-out", + translation_buckets, translation_memory_size); + clib_bihash_init_16_8 (&sm->in2out_ed, "in2out-ed", translation_buckets, translation_memory_size); @@ -2595,10 +2612,6 @@ show_snat_command_fn (vlib_main_t * vm, if (verbose > 0) { - vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->in2out, - verbose - 1); - vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->out2in, - verbose - 1); vlib_cli_output (vm, "%U", format_bihash_16_8, &sm->in2out_ed, verbose - 1); vlib_cli_output (vm, "%U", format_bihash_16_8, &sm->out2in_ed, @@ -2617,6 +2630,10 @@ show_snat_command_fn (vlib_main_t * vm, vlib_worker_thread_t *w = vlib_worker_threads + j; vlib_cli_output (vm, "Thread %d (%s at lcore %u):", j, w->name, w->lcore_id); + vlib_cli_output (vm, " %U", format_bihash_8_8, &tsm->in2out, + verbose - 1); + vlib_cli_output (vm, " %U", format_bihash_8_8, &tsm->out2in, + verbose - 1); vlib_cli_output (vm, " %d list pool elements", pool_elts (tsm->list_pool)); diff --git a/src/plugins/nat/nat.h b/src/plugins/nat/nat.h index d1ba5d55..f970821b 100644 --- a/src/plugins/nat/nat.h +++ b/src/plugins/nat/nat.h @@ -244,6 +244,13 @@ typedef struct { } snat_static_map_resolve_t; typedef struct { + /* Main lookup tables */ + clib_bihash_8_8_t out2in; + clib_bihash_8_8_t in2out; + + /* Find-a-user => src address lookup */ + clib_bihash_8_8_t user_hash; + /* User pool */ snat_user_t * users; @@ -271,17 +278,10 @@ typedef u32 snat_icmp_match_function_t (struct snat_main_s *sm, typedef u32 (snat_get_worker_function_t) (ip4_header_t * ip, u32 rx_fib_index); typedef struct snat_main_s { - /* Main lookup tables */ - clib_bihash_8_8_t out2in; - clib_bihash_8_8_t in2out; - /* Endpoint address dependent sessions lookup tables */ clib_bihash_16_8_t out2in_ed; clib_bihash_16_8_t in2out_ed; - /* Find-a-user => src address lookup */ - clib_bihash_8_8_t user_hash; - /* Non-translated packets worker lookup => src address + VRF */ clib_bihash_8_8_t worker_by_in; diff --git a/src/plugins/nat/nat_api.c b/src/plugins/nat/nat_api.c index fa20f2cc..50b4a9ae 100644 --- a/src/plugins/nat/nat_api.c +++ b/src/plugins/nat/nat_api.c @@ -870,7 +870,7 @@ static void tsm = vec_elt_at_index (sm->per_thread_data, value.value); else tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers); - if (clib_bihash_search_8_8 (&sm->user_hash, &key, &value)) + if (clib_bihash_search_8_8 (&tsm->user_hash, &key, &value)) return; u = pool_elt_at_index (tsm->users, value.value); if (!u->nsessions && !u->nstaticsessions) @@ -2105,7 +2105,7 @@ vl_api_nat44_user_session_dump_t_handler (vl_api_nat44_user_session_dump_t * tsm = vec_elt_at_index (sm->per_thread_data, value.value); else tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers); - if (clib_bihash_search_8_8 (&sm->user_hash, &key, &value)) + if (clib_bihash_search_8_8 (&tsm->user_hash, &key, &value)) return; u = pool_elt_at_index (tsm->users, value.value); if (!u->nsessions && !u->nstaticsessions) diff --git a/src/plugins/nat/out2in.c b/src/plugins/nat/out2in.c index 52adc8a8..6472e7ff 100755 --- a/src/plugins/nat/out2in.c +++ b/src/plugins/nat/out2in.c @@ -146,7 +146,8 @@ create_session_for_static_mapping (snat_main_t *sm, kv0.key = user_key.as_u64; /* Ever heard of the "user" = inside ip4 address before? */ - if (clib_bihash_search_8_8 (&sm->user_hash, &kv0, &value0)) + if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].user_hash, + &kv0, &value0)) { /* no, make a new one */ pool_get (sm->per_thread_data[thread_index].users, u); @@ -166,7 +167,8 @@ create_session_for_static_mapping (snat_main_t *sm, kv0.value = u - sm->per_thread_data[thread_index].users; /* add user */ - clib_bihash_add_del_8_8 (&sm->user_hash, &kv0, 1 /* is_add */); + clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].user_hash, + &kv0, 1 /* is_add */); /* add non-traslated packets worker lookup */ kv0.value = thread_index; @@ -211,13 +213,15 @@ create_session_for_static_mapping (snat_main_t *sm, /* Add to translation hashes */ kv0.key = s->in2out.as_u64; kv0.value = s - sm->per_thread_data[thread_index].sessions; - if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 1 /* is_add */)) + if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].in2out, &kv0, + 1 /* is_add */)) clib_warning ("in2out key add failed"); kv0.key = s->out2in.as_u64; kv0.value = s - sm->per_thread_data[thread_index].sessions; - if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 1 /* is_add */)) + if (clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0, + 1 /* is_add */)) clib_warning ("out2in key add failed"); /* log NAT event */ @@ -325,7 +329,8 @@ u32 icmp_match_out2in_slow(snat_main_t *sm, vlib_node_runtime_t *node, kv0.key = key0.as_u64; - if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0)) + if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in, &kv0, + &value0)) { /* Try to match static mapping by external address and port, destination address and port in packet */ @@ -672,7 +677,7 @@ snat_out2in_unknown_proto (snat_main_t *sm, kv.key = u_key.as_u64; /* Ever heard of the "user" = src ip4 address before? */ - if (clib_bihash_search_8_8 (&sm->user_hash, &kv, &value)) + if (clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value)) { /* no, make a new one */ pool_get (tsm->users, u); @@ -689,7 +694,7 @@ snat_out2in_unknown_proto (snat_main_t *sm, kv.value = u - tsm->users; /* add user */ - clib_bihash_add_del_8_8 (&sm->user_hash, &kv, 1); + clib_bihash_add_del_8_8 (&tsm->user_hash, &kv, 1); } else { @@ -804,7 +809,7 @@ snat_out2in_lb (snat_main_t *sm, kv.key = u_key.as_u64; /* Ever heard of the "user" = src ip4 address before? */ - if (clib_bihash_search_8_8 (&sm->user_hash, &kv, &value)) + if (clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value)) { /* no, make a new one */ pool_get (tsm->users, u); @@ -821,7 +826,7 @@ snat_out2in_lb (snat_main_t *sm, kv.value = u - tsm->users; /* add user */ - if (clib_bihash_add_del_8_8 (&sm->user_hash, &kv, 1)) + if (clib_bihash_add_del_8_8 (&tsm->user_hash, &kv, 1)) clib_warning ("user key add failed"); } else @@ -1013,7 +1018,8 @@ snat_out2in_node_fn (vlib_main_t * vm, kv0.key = key0.as_u64; - if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0)) + if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in, + &kv0, &value0)) { /* Try to match static mapping by external address and port, destination address and port in packet */ @@ -1164,7 +1170,8 @@ snat_out2in_node_fn (vlib_main_t * vm, kv1.key = key1.as_u64; - if (clib_bihash_search_8_8 (&sm->out2in, &kv1, &value1)) + if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in, + &kv1, &value1)) { /* Try to match static mapping by external address and port, destination address and port in packet */ @@ -1351,7 +1358,8 @@ snat_out2in_node_fn (vlib_main_t * vm, kv0.key = key0.as_u64; - if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0)) + if (clib_bihash_search_8_8 (&sm->per_thread_data[thread_index].out2in, + &kv0, &value0)) { /* Try to match static mapping by external address and port, destination address and port in packet */ -- cgit 1.2.3-korg From ed3c160983d302909dee5223675a2b356d306c81 Mon Sep 17 00:00:00 2001 From: Matus Fabian Date: Thu, 21 Sep 2017 05:07:12 -0700 Subject: NAT: remove worker_by_out lookup hash table (VPP-989) Change-Id: Ibcd2cf22348ae5a72770a8f8ad25cbe8df7fd390 Signed-off-by: Matus Fabian --- src/plugins/nat/in2out.c | 79 ++++++---------------- src/plugins/nat/nat.c | 170 +++++++++++++++++++++++++++-------------------- src/plugins/nat/nat.h | 4 +- 3 files changed, 120 insertions(+), 133 deletions(-) (limited to 'src/plugins/nat/nat.c') diff --git a/src/plugins/nat/in2out.c b/src/plugins/nat/in2out.c index 8b658302..dfe10303 100755 --- a/src/plugins/nat/in2out.c +++ b/src/plugins/nat/in2out.c @@ -241,7 +241,6 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0, u32 address_index = ~0; u32 outside_fib_index; uword * p; - snat_worker_key_t worker_by_out_key; p = hash_get (sm->ip4_main->fib_index_by_table_id, sm->outside_vrf_id); if (! p) @@ -447,14 +446,6 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0, 1 /* is_add */)) clib_warning ("out2in key add failed"); - /* Add to translated packets worker lookup */ - worker_by_out_key.addr = s->out2in.addr; - worker_by_out_key.port = s->out2in.port; - worker_by_out_key.fib_index = s->out2in.fib_index; - kv0.key = worker_by_out_key.as_u64; - kv0.value = thread_index; - clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv0, 1); - /* log NAT event */ snat_ipfix_logging_nat44_ses_create(s->in2out.addr.as_u32, s->out2in.addr.as_u32, @@ -836,7 +827,6 @@ snat_hairpinning (snat_main_t *sm, u32 proto0) { snat_session_key_t key0, sm0; - snat_worker_key_t k0; snat_session_t * s0; clib_bihash_kv_8_8_t kv0, value0; ip_csum_t sum0; @@ -849,39 +839,30 @@ snat_hairpinning (snat_main_t *sm, key0.fib_index = sm->outside_fib_index; kv0.key = key0.as_u64; - if (sm->num_workers > 1) + /* Check if destination is static mappings */ + if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0)) { - k0.addr = ip0->dst_address; - k0.port = udp0->dst_port; - k0.fib_index = sm->outside_fib_index; - kv0.key = k0.as_u64; - if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0)) - return; - else - ti = value0.value; + new_dst_addr0 = sm0.addr.as_u32; + new_dst_port0 = sm0.port; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index; } + /* or active session */ else - ti = sm->num_workers; - - /* Check if destination is in active sessions */ - if (clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0, &value0)) { - /* or static mappings */ - if (!snat_static_mapping_match(sm, key0, &sm0, 1, 0)) + if (sm->num_workers > 1) + ti = (clib_net_to_host_u16 (udp0->dst_port) - 1024) / sm->port_per_thread; + else + ti = sm->num_workers; + + if (!clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0, &value0)) { - new_dst_addr0 = sm0.addr.as_u32; - new_dst_port0 = sm0.port; - vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index; - } - } - else - { - si = value0.value; + si = value0.value; - s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si); - new_dst_addr0 = s0->in2out.addr.as_u32; - new_dst_port0 = s0->in2out.port; - vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index; + s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si); + new_dst_addr0 = s0->in2out.addr.as_u32; + new_dst_port0 = s0->in2out.port; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index; + } } /* Destination is behind the same NAT, use internal address and port */ @@ -934,7 +915,6 @@ snat_icmp_hairpinning (snat_main_t *sm, { snat_session_key_t key0, sm0; clib_bihash_kv_8_8_t kv0, value0; - snat_worker_key_t k0; u32 new_dst_addr0 = 0, old_dst_addr0, si, ti = 0; ip_csum_t sum0; snat_session_t *s0; @@ -950,16 +930,7 @@ snat_icmp_hairpinning (snat_main_t *sm, kv0.key = key0.as_u64; if (sm->num_workers > 1) - { - k0.addr = ip0->dst_address; - k0.port = icmp_id0; - k0.fib_index = sm->outside_fib_index; - kv0.key = k0.as_u64; - if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0)) - return; - else - ti = value0.value; - } + ti = (clib_net_to_host_u16 (icmp_id0) - 1024) / sm->port_per_thread; else ti = sm->num_workers; @@ -1048,7 +1019,6 @@ snat_hairpinning_unknown_proto (snat_main_t *sm, clib_bihash_kv_16_8_t s_kv, s_value; nat_ed_ses_key_t key; snat_session_key_t m_key; - snat_worker_key_t w_key; snat_static_mapping_t *m; ip_csum_t sum; snat_session_t *s; @@ -1080,16 +1050,7 @@ snat_hairpinning_unknown_proto (snat_main_t *sm, else { if (sm->num_workers > 1) - { - w_key.addr = ip->dst_address; - w_key.port = 0; - w_key.fib_index = sm->outside_fib_index; - kv.key = w_key.as_u64; - if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv, &value)) - return; - else - ti = value.value; - } + ti = sm->worker_out2in_cb (ip, sm->outside_fib_index); else ti = sm->num_workers; diff --git a/src/plugins/nat/nat.c b/src/plugins/nat/nat.c index 876b6aad..5f3b006e 100644 --- a/src/plugins/nat/nat.c +++ b/src/plugins/nat/nat.c @@ -344,7 +344,7 @@ int snat_add_static_mapping(ip4_address_t l_addr, ip4_address_t e_addr, if (e_port > 1024) \ { \ a->busy_##n##_ports++; \ - a->busy_##n##_ports_per_thread[e_port / sm->port_per_thread]++; \ + a->busy_##n##_ports_per_thread[(e_port - 1024) / sm->port_per_thread]++; \ } \ break; foreach_snat_protocol @@ -394,7 +394,6 @@ int snat_add_static_mapping(ip4_address_t l_addr, ip4_address_t e_addr, if (sm->workers) { snat_user_key_t w_key0; - snat_worker_key_t w_key1; w_key0.addr = m->local_addr; w_key0.fib_index = m->fib_index; @@ -412,11 +411,7 @@ int snat_add_static_mapping(ip4_address_t l_addr, ip4_address_t e_addr, kv.value = value.value; } - w_key1.addr = m->external_addr; - w_key1.port = clib_host_to_net_u16 (m->external_port); - w_key1.fib_index = sm->outside_fib_index; - kv.key = w_key1.as_u64; - clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv, 1); + m->worker_index = kv.value; } } else @@ -440,7 +435,7 @@ int snat_add_static_mapping(ip4_address_t l_addr, ip4_address_t e_addr, if (e_port > 1024) \ { \ a->busy_##n##_ports--; \ - a->busy_##n##_ports_per_thread[e_port / sm->port_per_thread]--; \ + a->busy_##n##_ports_per_thread[(e_port - 1024) / sm->port_per_thread]--; \ } \ break; foreach_snat_protocol @@ -613,7 +608,6 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port, int i; nat44_lb_addr_port_t *local; snat_user_key_t w_key0; - snat_worker_key_t w_key1; u32 worker_index = 0; snat_main_per_thread_data_t *tsm; @@ -659,7 +653,7 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port, if (e_port > 1024) \ { \ a->busy_##n##_ports++; \ - a->busy_##n##_ports_per_thread[e_port / sm->port_per_thread]++; \ + a->busy_##n##_ports_per_thread[(e_port - 1024) / sm->port_per_thread]++; \ } \ break; foreach_snat_protocol @@ -710,17 +704,8 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port, else worker_index = value.value; - w_key1.addr = m->external_addr; - w_key1.port = clib_host_to_net_u16 (m->external_port); - w_key1.fib_index = sm->outside_fib_index; - kv.key = w_key1.as_u64; - kv.value = worker_index; - if (clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv, 1)) - { - clib_warning ("worker-by-out add key failed"); - return VNET_API_ERROR_UNSPECIFIED; - } tsm = vec_elt_at_index (sm->per_thread_data, worker_index); + m->worker_index = worker_index; } else tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers); @@ -791,7 +776,7 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port, if (e_port > 1024) \ { \ a->busy_##n##_ports--; \ - a->busy_##n##_ports_per_thread[e_port / sm->port_per_thread]--; \ + a->busy_##n##_ports_per_thread[(e_port - 1024) / sm->port_per_thread]--; \ } \ break; foreach_snat_protocol @@ -805,15 +790,7 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port, } } - w_key1.addr = m->external_addr; - w_key1.port = clib_host_to_net_u16 (m->external_port); - w_key1.fib_index = sm->outside_fib_index; - kv.key = w_key1.as_u64; - if (!clib_bihash_search_8_8 (&sm->worker_by_out, &kv, &value)) - tsm = vec_elt_at_index (sm->per_thread_data, value.value); - else - tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers); - + tsm = vec_elt_at_index (sm->per_thread_data, m->worker_index); m_key.addr = m->external_addr; m_key.port = m->external_port; m_key.protocol = m->proto; @@ -2074,58 +2051,114 @@ static u32 snat_get_worker_out2in_cb (ip4_header_t * ip0, u32 rx_fib_index0) { snat_main_t *sm = &snat_main; - snat_worker_key_t key0; - clib_bihash_kv_8_8_t kv0, value0; - udp_header_t * udp0; - u32 next_worker_index = 0; - - udp0 = ip4_next_header (ip0); - - key0.addr = ip0->dst_address; - key0.port = udp0->dst_port; - key0.fib_index = rx_fib_index0; + udp_header_t *udp; + u16 port; + snat_session_key_t m_key; + clib_bihash_kv_8_8_t kv, value; + snat_static_mapping_t *m; + nat_ed_ses_key_t key; + clib_bihash_kv_16_8_t s_kv, s_value; + snat_main_per_thread_data_t *tsm; + snat_session_t *s; + int i; + u32 proto; - if (PREDICT_FALSE(ip0->protocol == IP_PROTOCOL_ICMP)) + /* first try static mappings without port */ + if (PREDICT_FALSE (pool_elts (sm->static_mappings))) { - icmp46_header_t * icmp0 = (icmp46_header_t *) udp0; - icmp_echo_header_t *echo0 = (icmp_echo_header_t *)(icmp0+1); - key0.port = echo0->identifier; + m_key.addr = ip0->dst_address; + m_key.port = 0; + m_key.protocol = 0; + m_key.fib_index = rx_fib_index0; + kv.key = m_key.as_u64; + if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value)) + { + m = pool_elt_at_index (sm->static_mappings, value.value); + return m->worker_index; + } } - kv0.key = key0.as_u64; + proto = ip_proto_to_snat_proto (ip0->protocol); + udp = ip4_next_header (ip0); + port = udp->dst_port; - /* Ever heard of of the "user" before? */ - if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0)) + /* unknown protocol */ + if (PREDICT_FALSE (proto == ~0)) { - key0.port = 0; - kv0.key = key0.as_u64; + key.l_addr = ip0->dst_address; + key.r_addr = ip0->src_address; + key.fib_index = rx_fib_index0; + key.proto = ip0->protocol; + key.rsvd = 0; + key.l_port = 0; + s_kv.key[0] = key.as_u64[0]; + s_kv.key[1] = key.as_u64[1]; - if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0)) + if (!clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value)) { - /* No, assign next available worker (RR) */ - next_worker_index = sm->first_worker_index; - if (vec_len (sm->workers)) + for (i = 0; i < _vec_len (sm->per_thread_data); i++) { - next_worker_index += - sm->workers[sm->next_worker++ % _vec_len (sm->workers)]; + tsm = vec_elt_at_index (sm->per_thread_data, i); + if (!pool_is_free_index(tsm->sessions, s_value.value)) + { + s = pool_elt_at_index (tsm->sessions, s_value.value); + if (s->out2in.addr.as_u32 == ip0->dst_address.as_u32 && + s->out2in.port == ip0->protocol && + snat_is_unk_proto_session (s)) + return i; + } } - } + } + + /* if no session use current thread */ + return vlib_get_thread_index (); + } + + if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_ICMP)) + { + icmp46_header_t * icmp = (icmp46_header_t *) udp; + icmp_echo_header_t *echo = (icmp_echo_header_t *)(icmp + 1); + if (!icmp_is_error_message (icmp)) + port = echo->identifier; else { - /* Static mapping without port */ - next_worker_index = value0.value; + ip4_header_t *inner_ip = (ip4_header_t *)(echo + 1); + proto = ip_proto_to_snat_proto (inner_ip->protocol); + void *l4_header = ip4_next_header (inner_ip); + switch (proto) + { + case SNAT_PROTOCOL_ICMP: + icmp = (icmp46_header_t*)l4_header; + echo = (icmp_echo_header_t *)(icmp + 1); + port = echo->identifier; + break; + case SNAT_PROTOCOL_UDP: + case SNAT_PROTOCOL_TCP: + port = ((tcp_udp_header_t*)l4_header)->src_port; + break; + default: + return vlib_get_thread_index (); + } } + } - /* Add to translated packets worker lookup */ - key0.port = udp0->dst_port; - kv0.key = key0.as_u64; - kv0.value = next_worker_index; - clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv0, 1); + /* try static mappings with port */ + if (PREDICT_FALSE (pool_elts (sm->static_mappings))) + { + m_key.addr = ip0->dst_address; + m_key.port = clib_net_to_host_u16 (port); + m_key.protocol = proto; + m_key.fib_index = rx_fib_index0; + kv.key = m_key.as_u64; + if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value)) + { + m = pool_elt_at_index (sm->static_mappings, value.value); + return m->worker_index; + } } - else - next_worker_index = value0.value; - return next_worker_index; + /* worker by outside port */ + return (u32) ((clib_net_to_host_u16 (port) - 1024) / sm->port_per_thread); } static clib_error_t * @@ -2233,9 +2266,6 @@ snat_config (vlib_main_t * vm, unformat_input_t * input) clib_bihash_init_8_8 (&sm->worker_by_in, "worker-by-in", user_buckets, user_memory_size); - clib_bihash_init_8_8 (&sm->worker_by_out, "worker-by-out", - translation_buckets, translation_memory_size); - clib_bihash_init_16_8 (&sm->in2out_ed, "in2out-ed", translation_buckets, translation_memory_size); @@ -2618,8 +2648,6 @@ show_snat_command_fn (vlib_main_t * vm, verbose - 1); vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->worker_by_in, verbose - 1); - vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->worker_by_out, - verbose - 1); vec_foreach_index (j, sm->per_thread_data) { tsm = vec_elt_at_index (sm->per_thread_data, j); diff --git a/src/plugins/nat/nat.h b/src/plugins/nat/nat.h index f970821b..20e45952 100644 --- a/src/plugins/nat/nat.h +++ b/src/plugins/nat/nat.h @@ -224,6 +224,7 @@ typedef struct { u32 vrf_id; u32 fib_index; snat_protocol_t proto; + u32 worker_index; nat44_lb_addr_port_t *locals; } snat_static_mapping_t; @@ -285,9 +286,6 @@ typedef struct snat_main_s { /* Non-translated packets worker lookup => src address + VRF */ clib_bihash_8_8_t worker_by_in; - /* Translated packets worker lookup => IP address + port number */ - clib_bihash_8_8_t worker_by_out; - snat_icmp_match_function_t * icmp_match_in2out_cb; snat_icmp_match_function_t * icmp_match_out2in_cb; -- cgit 1.2.3-korg From 41fef50d5db5e7deb3cfd901c3108abbc4406813 Mon Sep 17 00:00:00 2001 From: Matus Fabian Date: Fri, 22 Sep 2017 02:43:05 -0700 Subject: NAT: session number limitation to avoid running out of memory crash (VPP-984) Change-Id: I7f18f8c4ba609d96950dc1f833feb967d4a099b7 Signed-off-by: Matus Fabian --- src/plugins/nat/in2out.c | 69 ++++++++++++++++++++++++++++++++----------- src/plugins/nat/nat.c | 2 ++ src/plugins/nat/nat.h | 10 +++++++ src/plugins/nat/out2in.c | 77 +++++++++++++++++++++++++++++++++++------------- 4 files changed, 120 insertions(+), 38 deletions(-) (limited to 'src/plugins/nat/nat.c') diff --git a/src/plugins/nat/in2out.c b/src/plugins/nat/in2out.c index dfe10303..dbbc67f9 100755 --- a/src/plugins/nat/in2out.c +++ b/src/plugins/nat/in2out.c @@ -101,7 +101,8 @@ _(IN2OUT_PACKETS, "Good in2out packets processed") \ _(OUT_OF_PORTS, "Out of ports") \ _(BAD_OUTSIDE_FIB, "Outside VRF ID not found") \ _(BAD_ICMP_TYPE, "unsupported ICMP type") \ -_(NO_TRANSLATION, "No translation") +_(NO_TRANSLATION, "No translation") \ +_(MAX_SESSIONS_EXCEEDED, "Maximum sessions exceeded") typedef enum { #define _(sym,str) SNAT_IN2OUT_ERROR_##sym, @@ -242,6 +243,12 @@ static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0, u32 outside_fib_index; uword * p; + if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index))) + { + b0->error = node->errors[SNAT_IN2OUT_ERROR_MAX_SESSIONS_EXCEEDED]; + return SNAT_IN2OUT_NEXT_DROP; + } + p = hash_get (sm->ip4_main->fib_index_by_table_id, sm->outside_vrf_id); if (! p) { @@ -1064,14 +1071,15 @@ snat_hairpinning_unknown_proto (snat_main_t *sm, ip->checksum = ip_csum_fold (sum); } -static void +static snat_session_t * snat_in2out_unknown_proto (snat_main_t *sm, vlib_buffer_t * b, ip4_header_t * ip, u32 rx_fib_index, u32 thread_index, f64 now, - vlib_main_t * vm) + vlib_main_t * vm, + vlib_node_runtime_t * node) { clib_bihash_kv_8_8_t kv, value; clib_bihash_kv_16_8_t s_kv, s_value; @@ -1108,6 +1116,12 @@ snat_in2out_unknown_proto (snat_main_t *sm, } else { + if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index))) + { + b->error = node->errors[SNAT_IN2OUT_ERROR_MAX_SESSIONS_EXCEEDED]; + return 0; + } + u_key.addr = ip->src_address; u_key.fib_index = rx_fib_index; kv.key = u_key.as_u64; @@ -1198,7 +1212,7 @@ snat_in2out_unknown_proto (snat_main_t *sm, goto create_ses; } } - return; + return 0; } create_ses: @@ -1342,6 +1356,8 @@ create_ses: if (vnet_buffer(b)->sw_if_index[VLIB_TX] == ~0) vnet_buffer(b)->sw_if_index[VLIB_TX] = sm->outside_fib_index; + + return s; } static snat_session_t * @@ -1351,7 +1367,8 @@ snat_in2out_lb (snat_main_t *sm, u32 rx_fib_index, u32 thread_index, f64 now, - vlib_main_t * vm) + vlib_main_t * vm, + vlib_node_runtime_t * node) { nat_ed_ses_key_t key; clib_bihash_kv_16_8_t s_kv, s_value; @@ -1386,6 +1403,12 @@ snat_in2out_lb (snat_main_t *sm, } else { + if (PREDICT_FALSE (maximum_sessions_exceeded (sm, thread_index))) + { + b->error = node->errors[SNAT_IN2OUT_ERROR_MAX_SESSIONS_EXCEEDED]; + return 0; + } + l_key.addr = ip->src_address; l_key.port = udp->src_port; l_key.protocol = proto; @@ -1598,8 +1621,10 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, { if (PREDICT_FALSE (proto0 == ~0)) { - snat_in2out_unknown_proto (sm, b0, ip0, rx_fib_index0, - thread_index, now, vm); + s0 = snat_in2out_unknown_proto (sm, b0, ip0, rx_fib_index0, + thread_index, now, vm, node); + if (!s0) + next0 = SNAT_IN2OUT_NEXT_DROP; goto trace00; } @@ -1653,8 +1678,10 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, { if (is_slow_path) { - s0 = snat_in2out_lb(sm, b0, ip0, rx_fib_index0, thread_index, - now, vm); + s0 = snat_in2out_lb(sm, b0, ip0, rx_fib_index0, + thread_index, now, vm, node); + if (!s0) + next0 = SNAT_IN2OUT_NEXT_DROP; goto trace00; } else @@ -1770,8 +1797,10 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, { if (PREDICT_FALSE (proto1 == ~0)) { - snat_in2out_unknown_proto (sm, b1, ip1, rx_fib_index1, - thread_index, now, vm); + s1 = snat_in2out_unknown_proto (sm, b1, ip1, rx_fib_index1, + thread_index, now, vm, node); + if (!s1) + next1 = SNAT_IN2OUT_NEXT_DROP; goto trace01; } @@ -1825,8 +1854,10 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, { if (is_slow_path) { - s1 = snat_in2out_lb(sm, b1, ip1, rx_fib_index1, thread_index, - now, vm); + s1 = snat_in2out_lb(sm, b1, ip1, rx_fib_index1, + thread_index, now, vm, node); + if (!s1) + next1 = SNAT_IN2OUT_NEXT_DROP; goto trace01; } else @@ -1978,8 +2009,10 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, { if (PREDICT_FALSE (proto0 == ~0)) { - snat_in2out_unknown_proto (sm, b0, ip0, rx_fib_index0, - thread_index, now, vm); + s0 = snat_in2out_unknown_proto (sm, b0, ip0, rx_fib_index0, + thread_index, now, vm, node); + if (!s0) + next0 = SNAT_IN2OUT_NEXT_DROP; goto trace0; } @@ -2034,8 +2067,10 @@ snat_in2out_node_fn_inline (vlib_main_t * vm, { if (is_slow_path) { - s0 = snat_in2out_lb(sm, b0, ip0, rx_fib_index0, thread_index, - now, vm); + s0 = snat_in2out_lb(sm, b0, ip0, rx_fib_index0, + thread_index, now, vm, node); + if (!s0) + next0 = SNAT_IN2OUT_NEXT_DROP; goto trace0; } else diff --git a/src/plugins/nat/nat.c b/src/plugins/nat/nat.c index 5f3b006e..612085fc 100644 --- a/src/plugins/nat/nat.c +++ b/src/plugins/nat/nat.c @@ -2216,6 +2216,8 @@ snat_config (vlib_main_t * vm, unformat_input_t * input) /* for show commands, etc. */ sm->translation_buckets = translation_buckets; sm->translation_memory_size = translation_memory_size; + /* do not exceed load factor 10 */ + sm->max_translations = 10 * translation_buckets; sm->user_buckets = user_buckets; sm->user_memory_size = user_memory_size; sm->max_translations_per_user = max_translations_per_user; diff --git a/src/plugins/nat/nat.h b/src/plugins/nat/nat.h index 20e45952..d34ff07b 100644 --- a/src/plugins/nat/nat.h +++ b/src/plugins/nat/nat.h @@ -345,6 +345,7 @@ typedef struct snat_main_s { u8 deterministic; u32 translation_buckets; u32 translation_memory_size; + u32 max_translations; u32 user_buckets; u32 user_memory_size; u32 max_translations_per_user; @@ -551,4 +552,13 @@ is_interface_addr(snat_main_t *sm, vlib_node_runtime_t *node, u32 sw_if_index0, return 0; } +always_inline u8 +maximum_sessions_exceeded (snat_main_t *sm, u32 thread_index) +{ + if (pool_elts (sm->per_thread_data[thread_index].sessions) >= sm->max_translations) + return 1; + + return 0; +} + #endif /* __included_nat_h__ */ diff --git a/src/plugins/nat/out2in.c b/src/plugins/nat/out2in.c index 6472e7ff..e5426c1a 100755 --- a/src/plugins/nat/out2in.c +++ b/src/plugins/nat/out2in.c @@ -87,7 +87,8 @@ vlib_node_registration_t snat_det_out2in_node; _(UNSUPPORTED_PROTOCOL, "Unsupported protocol") \ _(OUT2IN_PACKETS, "Good out2in packets processed") \ _(BAD_ICMP_TYPE, "unsupported ICMP type") \ -_(NO_TRANSLATION, "No translation") +_(NO_TRANSLATION, "No translation") \ +_(MAX_SESSIONS_EXCEEDED, "Maximum sessions exceeded") typedef enum { #define _(sym,str) SNAT_OUT2IN_ERROR_##sym, @@ -139,6 +140,12 @@ create_session_for_static_mapping (snat_main_t *sm, dlist_elt_t * per_user_list_head_elt; ip4_header_t *ip0; + if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index))) + { + b0->error = node->errors[SNAT_OUT2IN_ERROR_MAX_SESSIONS_EXCEEDED]; + return 0; + } + ip0 = vlib_buffer_get_current (b0); user_key.addr = in2out.addr; @@ -620,14 +627,15 @@ static inline u32 icmp_out2in_slow_path (snat_main_t *sm, return next0; } -static void +static snat_session_t * snat_out2in_unknown_proto (snat_main_t *sm, vlib_buffer_t * b, ip4_header_t * ip, u32 rx_fib_index, u32 thread_index, f64 now, - vlib_main_t * vm) + vlib_main_t * vm, + vlib_node_runtime_t * node) { clib_bihash_kv_8_8_t kv, value; clib_bihash_kv_16_8_t s_kv, s_value; @@ -660,13 +668,22 @@ snat_out2in_unknown_proto (snat_main_t *sm, } else { + if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index))) + { + b->error = node->errors[SNAT_OUT2IN_ERROR_MAX_SESSIONS_EXCEEDED]; + return 0; + } + m_key.addr = ip->dst_address; m_key.port = 0; m_key.protocol = 0; m_key.fib_index = rx_fib_index; kv.key = m_key.as_u64; if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value)) - return; + { + b->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION]; + return 0; + } m = pool_elt_at_index (sm->static_mappings, value.value); @@ -753,6 +770,8 @@ snat_out2in_unknown_proto (snat_main_t *sm, clib_dlist_remove (tsm->list_pool, s->per_user_index); clib_dlist_addtail (tsm->list_pool, s->per_user_list_head_index, s->per_user_index); + + return s; } static snat_session_t * @@ -762,7 +781,8 @@ snat_out2in_lb (snat_main_t *sm, u32 rx_fib_index, u32 thread_index, f64 now, - vlib_main_t * vm) + vlib_main_t * vm, + vlib_node_runtime_t * node) { nat_ed_ses_key_t key; clib_bihash_kv_16_8_t s_kv, s_value; @@ -797,6 +817,12 @@ snat_out2in_lb (snat_main_t *sm, } else { + if (PREDICT_FALSE (maximum_sessions_exceeded(sm, thread_index))) + { + b->error = node->errors[SNAT_OUT2IN_ERROR_MAX_SESSIONS_EXCEEDED]; + return 0; + } + e_key.addr = ip->dst_address; e_key.port = udp->dst_port; e_key.protocol = proto; @@ -998,8 +1024,10 @@ snat_out2in_node_fn (vlib_main_t * vm, if (PREDICT_FALSE (proto0 == ~0)) { - snat_out2in_unknown_proto(sm, b0, ip0, rx_fib_index0, - thread_index, now, vm); + s0 = snat_out2in_unknown_proto(sm, b0, ip0, rx_fib_index0, + thread_index, now, vm, node); + if (!s0) + next0 = SNAT_OUT2IN_NEXT_DROP; goto trace0; } @@ -1042,7 +1070,6 @@ snat_out2in_node_fn (vlib_main_t * vm, thread_index); if (!s0) { - b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION]; next0 = SNAT_OUT2IN_NEXT_DROP; goto trace0; } @@ -1051,8 +1078,10 @@ snat_out2in_node_fn (vlib_main_t * vm, { if (PREDICT_FALSE (value0.value == ~0ULL)) { - s0 = snat_out2in_lb(sm, b0, ip0, rx_fib_index0, thread_index, now, - vm); + s0 = snat_out2in_lb(sm, b0, ip0, rx_fib_index0, thread_index, + now, vm, node); + if (!s0) + next0 = SNAT_OUT2IN_NEXT_DROP; goto trace0; } else @@ -1150,8 +1179,10 @@ snat_out2in_node_fn (vlib_main_t * vm, if (PREDICT_FALSE (proto1 == ~0)) { - snat_out2in_unknown_proto(sm, b1, ip1, rx_fib_index1, - thread_index, now, vm); + s1 = snat_out2in_unknown_proto(sm, b1, ip1, rx_fib_index1, + thread_index, now, vm, node); + if (!s1) + next1 = SNAT_OUT2IN_NEXT_DROP; goto trace1; } @@ -1194,7 +1225,6 @@ snat_out2in_node_fn (vlib_main_t * vm, thread_index); if (!s1) { - b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION]; next1 = SNAT_OUT2IN_NEXT_DROP; goto trace1; } @@ -1203,8 +1233,10 @@ snat_out2in_node_fn (vlib_main_t * vm, { if (PREDICT_FALSE (value1.value == ~0ULL)) { - s1 = snat_out2in_lb(sm, b1, ip1, rx_fib_index1, thread_index, now, - vm); + s1 = snat_out2in_lb(sm, b1, ip1, rx_fib_index1, thread_index, + now, vm, node); + if (!s1) + next1 = SNAT_OUT2IN_NEXT_DROP; goto trace1; } else @@ -1328,8 +1360,10 @@ snat_out2in_node_fn (vlib_main_t * vm, if (PREDICT_FALSE (proto0 == ~0)) { - snat_out2in_unknown_proto(sm, b0, ip0, rx_fib_index0, - thread_index, now, vm); + s0 = snat_out2in_unknown_proto(sm, b0, ip0, rx_fib_index0, + thread_index, now, vm, node); + if (!s0) + next0 = SNAT_OUT2IN_NEXT_DROP; goto trace00; } @@ -1383,8 +1417,7 @@ snat_out2in_node_fn (vlib_main_t * vm, thread_index); if (!s0) { - b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION]; - next0 = SNAT_OUT2IN_NEXT_DROP; + next0 = SNAT_OUT2IN_NEXT_DROP; goto trace00; } } @@ -1392,8 +1425,10 @@ snat_out2in_node_fn (vlib_main_t * vm, { if (PREDICT_FALSE (value0.value == ~0ULL)) { - s0 = snat_out2in_lb(sm, b0, ip0, rx_fib_index0, thread_index, now, - vm); + s0 = snat_out2in_lb(sm, b0, ip0, rx_fib_index0, thread_index, + now, vm, node); + if (!s0) + next0 = SNAT_OUT2IN_NEXT_DROP; goto trace00; } else -- cgit 1.2.3-korg From fc4510bbdb85c5bd6ff4fc69996e78d568254091 Mon Sep 17 00:00:00 2001 From: Aequitas Date: Sat, 23 Sep 2017 12:58:49 +0800 Subject: Memory overwritten when using unformat %u (VPP-987) Change-Id: I7d8f807fb502d61688aa1dee25fa4edcbeb32f41 Signed-off-by: Aequitas --- src/plugins/nat/nat.c | 14 +++++++------- src/plugins/nat/nat64_cli.c | 8 ++++---- src/plugins/nat/nat_test.c | 16 ++++++++-------- 3 files changed, 19 insertions(+), 19 deletions(-) (limited to 'src/plugins/nat/nat.c') diff --git a/src/plugins/nat/nat.c b/src/plugins/nat/nat.c index 612085fc..94416255 100644 --- a/src/plugins/nat/nat.c +++ b/src/plugins/nat/nat.c @@ -3151,7 +3151,7 @@ snat_det_close_session_out_fn (vlib_main_t *vm, snat_main_t *sm = &snat_main; unformat_input_t _line_input, *line_input = &_line_input; ip4_address_t out_addr, ext_addr, in_addr; - u16 out_port, ext_port; + u32 out_port, ext_port; snat_det_map_t * dm; snat_det_session_t * ses; snat_det_out_key_t key; @@ -3182,10 +3182,10 @@ snat_det_close_session_out_fn (vlib_main_t *vm, vlib_cli_output (vm, "no match"); else { - snat_det_reverse(dm, &ext_addr, out_port, &in_addr); + snat_det_reverse(dm, &ext_addr, (u16)out_port, &in_addr); key.ext_host_addr = out_addr; - key.ext_host_port = ntohs(ext_port); - key.out_port = ntohs(out_port); + key.ext_host_port = ntohs((u16)ext_port); + key.out_port = ntohs((u16)out_port); ses = snat_det_get_ses_by_out(dm, &out_addr, key.as_u64); if (!ses) vlib_cli_output (vm, "no match"); @@ -3222,7 +3222,7 @@ snat_det_close_session_in_fn (vlib_main_t *vm, snat_main_t *sm = &snat_main; unformat_input_t _line_input, *line_input = &_line_input; ip4_address_t in_addr, ext_addr; - u16 in_port, ext_port; + u32 in_port, ext_port; snat_det_map_t * dm; snat_det_session_t * ses; snat_det_out_key_t key; @@ -3254,8 +3254,8 @@ snat_det_close_session_in_fn (vlib_main_t *vm, else { key.ext_host_addr = ext_addr; - key.ext_host_port = ntohs (ext_port); - ses = snat_det_find_ses_by_in (dm, &in_addr, ntohs(in_port), key); + key.ext_host_port = ntohs ((u16)ext_port); + ses = snat_det_find_ses_by_in (dm, &in_addr, ntohs((u16)in_port), key); if (!ses) vlib_cli_output (vm, "no match"); else diff --git a/src/plugins/nat/nat64_cli.c b/src/plugins/nat/nat64_cli.c index 88900387..f3645bbb 100644 --- a/src/plugins/nat/nat64_cli.c +++ b/src/plugins/nat/nat64_cli.c @@ -301,8 +301,8 @@ nat64_add_del_static_bib_command_fn (vlib_main_t * u8 is_add = 1; ip6_address_t in_addr; ip4_address_t out_addr; - u16 in_port = 0; - u16 out_port = 0; + u32 in_port = 0; + u32 out_port = 0; u32 vrf_id = 0, protocol; snat_protocol_t proto = 0; u8 p = 0; @@ -362,8 +362,8 @@ nat64_add_del_static_bib_command_fn (vlib_main_t * } rv = - nat64_add_del_static_bib_entry (&in_addr, &out_addr, in_port, out_port, p, - vrf_id, is_add); + nat64_add_del_static_bib_entry (&in_addr, &out_addr, (u16) in_port, + (u16) out_port, p, vrf_id, is_add); switch (rv) { diff --git a/src/plugins/nat/nat_test.c b/src/plugins/nat/nat_test.c index b653b77e..e0b04940 100644 --- a/src/plugins/nat/nat_test.c +++ b/src/plugins/nat/nat_test.c @@ -846,7 +846,7 @@ static int api_snat_det_reverse (vat_main_t * vam) unformat_input_t * i = vam->input; vl_api_snat_det_reverse_t * mp; ip4_address_t out_addr; - u16 out_port; + u32 out_port; int ret; if (unformat (i, "%U %d", unformat_ip4_address, &out_addr, &out_port)) @@ -859,7 +859,7 @@ static int api_snat_det_reverse (vat_main_t * vam) M(SNAT_DET_REVERSE, mp); clib_memcpy(mp->out_addr, &out_addr, 4); - mp->out_port = htons(out_port); + mp->out_port = htons((u16)out_port); S(mp); W(ret); @@ -981,7 +981,7 @@ static int api_snat_det_close_session_out (vat_main_t * vam) unformat_input_t * i = vam->input; vl_api_snat_det_close_session_out_t * mp; ip4_address_t out_addr, ext_addr; - u16 out_port, ext_port; + u32 out_port, ext_port; int ret; if (unformat (i, "%U:%d %U:%d", @@ -996,9 +996,9 @@ static int api_snat_det_close_session_out (vat_main_t * vam) M(SNAT_DET_CLOSE_SESSION_OUT, mp); clib_memcpy(mp->out_addr, &out_addr, 4); - mp->out_port = ntohs(out_port); + mp->out_port = ntohs((u16)out_port); clib_memcpy(mp->ext_addr, &ext_addr, 4); - mp->ext_port = ntohs(ext_port); + mp->ext_port = ntohs((u16)ext_port); S(mp); W (ret); @@ -1010,7 +1010,7 @@ static int api_snat_det_close_session_in (vat_main_t * vam) unformat_input_t * i = vam->input; vl_api_snat_det_close_session_in_t * mp; ip4_address_t in_addr, ext_addr; - u16 in_port, ext_port; + u32 in_port, ext_port; int ret; if (unformat (i, "%U:%d %U:%d", @@ -1025,9 +1025,9 @@ static int api_snat_det_close_session_in (vat_main_t * vam) M(SNAT_DET_CLOSE_SESSION_IN, mp); clib_memcpy(mp->in_addr, &in_addr, 4); - mp->in_port = ntohs(in_port); + mp->in_port = ntohs((u16)in_port); clib_memcpy(mp->ext_addr, &ext_addr, 4); - mp->ext_port = ntohs(ext_port); + mp->ext_port = ntohs((u16)ext_port); S(mp); W (ret); -- cgit 1.2.3-korg From 7865b5c8a4f11731d8c6d4627e505afe990989b4 Mon Sep 17 00:00:00 2001 From: Matus Fabian Date: Tue, 26 Sep 2017 01:23:01 -0700 Subject: NAT: remove worker_by_in lookup hash table (VPP-992) Change-Id: I3873d3e411bf93cac82e73a0b8e3b22563aaf217 Signed-off-by: Matus Fabian --- src/plugins/nat/nat.c | 91 ++++++++++------------------------------------- src/plugins/nat/nat.h | 16 --------- src/plugins/nat/nat_api.c | 16 ++++++--- src/plugins/nat/out2in.c | 4 --- 4 files changed, 30 insertions(+), 97 deletions(-) (limited to 'src/plugins/nat/nat.c') diff --git a/src/plugins/nat/nat.c b/src/plugins/nat/nat.c index 94416255..c2f9586c 100644 --- a/src/plugins/nat/nat.c +++ b/src/plugins/nat/nat.c @@ -390,28 +390,12 @@ int snat_add_static_mapping(ip4_address_t l_addr, ip4_address_t e_addr, kv.value = m - sm->static_mappings; clib_bihash_add_del_8_8(&sm->static_mapping_by_external, &kv, 1); - /* Assign worker */ if (sm->workers) { - snat_user_key_t w_key0; - - w_key0.addr = m->local_addr; - w_key0.fib_index = m->fib_index; - kv.key = w_key0.as_u64; - - if (clib_bihash_search_8_8 (&sm->worker_by_in, &kv, &value)) - { - kv.value = sm->first_worker_index + - sm->workers[sm->next_worker++ % vec_len (sm->workers)]; - - clib_bihash_add_del_8_8 (&sm->worker_by_in, &kv, 1); - } - else - { - kv.value = value.value; - } - - m->worker_index = kv.value; + ip4_header_t ip = { + .src_address = m->local_addr, + }; + m->worker_index = sm->worker_in2out_cb (&ip, m->fib_index); } } else @@ -478,8 +462,8 @@ int snat_add_static_mapping(ip4_address_t l_addr, ip4_address_t e_addr, u_key.addr = m->local_addr; u_key.fib_index = m->fib_index; kv.key = u_key.as_u64; - if (!clib_bihash_search_8_8 (&sm->worker_by_in, &kv, &value)) - tsm = vec_elt_at_index (sm->per_thread_data, value.value); + if (sm->num_workers) + tsm = vec_elt_at_index (sm->per_thread_data, m->worker_index); else tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers); if (!clib_bihash_search_8_8 (&tsm->user_hash, &kv, &value)) @@ -607,7 +591,6 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port, snat_address_t *a = 0; int i; nat44_lb_addr_port_t *local; - snat_user_key_t w_key0; u32 worker_index = 0; snat_main_per_thread_data_t *tsm; @@ -694,16 +677,8 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port, /* Assign worker */ if (sm->workers) { - w_key0.addr = locals[0].addr; - w_key0.fib_index = fib_index; - kv.key = w_key0.as_u64; - - if (clib_bihash_search_8_8 (&sm->worker_by_in, &kv, &value)) - worker_index = sm->first_worker_index + - sm->workers[sm->next_worker++ % vec_len (sm->workers)]; - else - worker_index = value.value; - + worker_index = sm->first_worker_index + + sm->workers[sm->next_worker++ % vec_len (sm->workers)]; tsm = vec_elt_at_index (sm->per_thread_data, worker_index); m->worker_index = worker_index; } @@ -730,6 +705,7 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port, locals[i].prefix = (i == 0) ? locals[i].probability :\ (locals[i - 1].prefix + locals[i].probability); vec_add1 (m->locals, locals[i]); + m_key.port = clib_host_to_net_u16 (locals[i].port); kv.key = m_key.as_u64; kv.value = ~0ULL; @@ -738,19 +714,6 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port, clib_warning ("in2out key add failed"); return VNET_API_ERROR_UNSPECIFIED; } - /* Assign worker */ - if (sm->workers) - { - w_key0.addr = locals[i].addr; - w_key0.fib_index = fib_index; - kv.key = w_key0.as_u64; - kv.value = worker_index; - if (clib_bihash_add_del_8_8 (&sm->worker_by_in, &kv, 1)) - { - clib_warning ("worker-by-in key add failed"); - return VNET_API_ERROR_UNSPECIFIED; - } - } } } else @@ -801,6 +764,7 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port, clib_warning ("static_mapping_by_external key del failed"); return VNET_API_ERROR_UNSPECIFIED; } + m_key.port = clib_host_to_net_u16 (m->external_port); kv.key = m_key.as_u64; if (clib_bihash_add_del_8_8(&tsm->out2in, &kv, 0)) @@ -820,6 +784,7 @@ int nat44_add_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port, clib_warning ("static_mapping_by_local key del failed"); return VNET_API_ERROR_UNSPECIFIED; } + m_key.port = clib_host_to_net_u16 (local->port); kv.key = m_key.as_u64; if (clib_bihash_add_del_8_8(&tsm->in2out, &kv, 0)) @@ -2017,32 +1982,17 @@ static u32 snat_get_worker_in2out_cb (ip4_header_t * ip0, u32 rx_fib_index0) { snat_main_t *sm = &snat_main; - snat_user_key_t key0; - clib_bihash_kv_8_8_t kv0, value0; u32 next_worker_index = 0; + u32 hash; - key0.addr = ip0->src_address; - key0.fib_index = rx_fib_index0; - - kv0.key = key0.as_u64; + next_worker_index = sm->first_worker_index; + hash = ip0->src_address.as_u32 + (ip0->src_address.as_u32 >> 8) + + (ip0->src_address.as_u32 >> 16) + (ip0->src_address.as_u32 >>24); - /* Ever heard of of the "user" before? */ - if (clib_bihash_search_8_8 (&sm->worker_by_in, &kv0, &value0)) - { - /* No, assign next available worker (RR) */ - next_worker_index = sm->first_worker_index; - if (vec_len (sm->workers)) - { - next_worker_index += - sm->workers[sm->next_worker++ % _vec_len (sm->workers)]; - } - - /* add non-traslated packets worker lookup */ - kv0.value = next_worker_index; - clib_bihash_add_del_8_8 (&sm->worker_by_in, &kv0, 1); - } + if (PREDICT_TRUE (is_pow2 (_vec_len (sm->workers)))) + next_worker_index += sm->workers[hash & (_vec_len (sm->workers) - 1)]; else - next_worker_index = value0.value; + next_worker_index += sm->workers[hash % _vec_len (sm->workers)]; return next_worker_index; } @@ -2265,9 +2215,6 @@ snat_config (vlib_main_t * vm, unformat_input_t * input) user_memory_size); } - clib_bihash_init_8_8 (&sm->worker_by_in, "worker-by-in", user_buckets, - user_memory_size); - clib_bihash_init_16_8 (&sm->in2out_ed, "in2out-ed", translation_buckets, translation_memory_size); @@ -2648,8 +2595,6 @@ show_snat_command_fn (vlib_main_t * vm, verbose - 1); vlib_cli_output (vm, "%U", format_bihash_16_8, &sm->out2in_ed, verbose - 1); - vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->worker_by_in, - verbose - 1); vec_foreach_index (j, sm->per_thread_data) { tsm = vec_elt_at_index (sm->per_thread_data, j); diff --git a/src/plugins/nat/nat.h b/src/plugins/nat/nat.h index d34ff07b..e467fde7 100644 --- a/src/plugins/nat/nat.h +++ b/src/plugins/nat/nat.h @@ -95,19 +95,6 @@ typedef struct { }; } snat_user_key_t; -typedef struct { - union - { - struct - { - ip4_address_t addr; - u16 port; - u16 fib_index; - }; - u64 as_u64; - }; -} snat_worker_key_t; - #define foreach_snat_protocol \ _(UDP, 0, udp, "udp") \ @@ -283,9 +270,6 @@ typedef struct snat_main_s { clib_bihash_16_8_t out2in_ed; clib_bihash_16_8_t in2out_ed; - /* Non-translated packets worker lookup => src address + VRF */ - clib_bihash_8_8_t worker_by_in; - snat_icmp_match_function_t * icmp_match_in2out_cb; snat_icmp_match_function_t * icmp_match_out2in_cb; diff --git a/src/plugins/nat/nat_api.c b/src/plugins/nat/nat_api.c index 50b4a9ae..b56b4436 100644 --- a/src/plugins/nat/nat_api.c +++ b/src/plugins/nat/nat_api.c @@ -856,6 +856,7 @@ static void snat_user_t *u; u32 session_index, head_index, elt_index; dlist_elt_t *head, *elt; + ip4_header_t ip; q = vl_api_client_index_to_input_queue (mp->client_index); if (q == 0) @@ -864,10 +865,13 @@ static void return; clib_memcpy (&ukey.addr, mp->ip_address, 4); + ip.src_address.as_u32 = ukey.addr.as_u32; ukey.fib_index = fib_table_find (FIB_PROTOCOL_IP4, ntohl (mp->vrf_id)); key.key = ukey.as_u64; - if (!clib_bihash_search_8_8 (&sm->worker_by_in, &key, &value)) - tsm = vec_elt_at_index (sm->per_thread_data, value.value); + if (sm->num_workers) + tsm = + vec_elt_at_index (sm->per_thread_data, + sm->worker_in2out_cb (&ip, ukey.fib_index)); else tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers); if (clib_bihash_search_8_8 (&tsm->user_hash, &key, &value)) @@ -2093,16 +2097,20 @@ vl_api_nat44_user_session_dump_t_handler (vl_api_nat44_user_session_dump_t * snat_user_t *u; u32 session_index, head_index, elt_index; dlist_elt_t *head, *elt; + ip4_header_t ip; q = vl_api_client_index_to_input_queue (mp->client_index); if (q == 0) return; clib_memcpy (&ukey.addr, mp->ip_address, 4); + ip.src_address.as_u32 = ukey.addr.as_u32; ukey.fib_index = fib_table_find (FIB_PROTOCOL_IP4, ntohl (mp->vrf_id)); key.key = ukey.as_u64; - if (!clib_bihash_search_8_8 (&sm->worker_by_in, &key, &value)) - tsm = vec_elt_at_index (sm->per_thread_data, value.value); + if (sm->num_workers) + tsm = + vec_elt_at_index (sm->per_thread_data, + sm->worker_in2out_cb (&ip, ukey.fib_index)); else tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers); if (clib_bihash_search_8_8 (&tsm->user_hash, &key, &value)) diff --git a/src/plugins/nat/out2in.c b/src/plugins/nat/out2in.c index e5426c1a..802c3312 100755 --- a/src/plugins/nat/out2in.c +++ b/src/plugins/nat/out2in.c @@ -176,10 +176,6 @@ create_session_for_static_mapping (snat_main_t *sm, /* add user */ clib_bihash_add_del_8_8 (&sm->per_thread_data[thread_index].user_hash, &kv0, 1 /* is_add */); - - /* add non-traslated packets worker lookup */ - kv0.value = thread_index; - clib_bihash_add_del_8_8 (&sm->worker_by_in, &kv0, 1); } else { -- cgit 1.2.3-korg From ec0452133ac225d8c639ffaa37f9ee63aa99a5d8 Mon Sep 17 00:00:00 2001 From: Matus Fabian Date: Thu, 12 Oct 2017 04:17:45 -0700 Subject: NAT: fix delete of sessions for 1:1 NAT if 1 worker (VPP-1023) Change-Id: I2446c646de7f227f9438dd7ef93a455ba5af0102 Signed-off-by: Matus Fabian --- src/plugins/nat/nat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/plugins/nat/nat.c') diff --git a/src/plugins/nat/nat.c b/src/plugins/nat/nat.c index c2f9586c..9bdb0351 100644 --- a/src/plugins/nat/nat.c +++ b/src/plugins/nat/nat.c @@ -462,7 +462,7 @@ int snat_add_static_mapping(ip4_address_t l_addr, ip4_address_t e_addr, u_key.addr = m->local_addr; u_key.fib_index = m->fib_index; kv.key = u_key.as_u64; - if (sm->num_workers) + if (sm->num_workers > 1) tsm = vec_elt_at_index (sm->per_thread_data, m->worker_index); else tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers); -- cgit 1.2.3-korg