/* * Copyright (c) 2018 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** * @file * @brief NAT plugin client-IP based session affinity for load-balancing */ #include #include #include nat_affinity_main_t nat_affinity_main; #define AFFINITY_HASH_BUCKETS 65536 #define AFFINITY_HASH_MEMORY (2 << 25) u8 * format_affinity_kvp (u8 * s, va_list * args) { clib_bihash_kv_16_8_t *v = va_arg (*args, clib_bihash_kv_16_8_t *); nat_affinity_key_t k; k.as_u64[0] = v->key[0]; k.as_u64[1] = v->key[1]; s = format (s, "client %U backend %U:%d proto %U index %llu", format_ip4_address, &k.client_addr, format_ip4_address, &k.service_addr, clib_net_to_host_u16 (k.service_port), format_ip_protocol, k.proto); return s; } void nat_affinity_enable () { nat_affinity_main_t *nam = &nat_affinity_main; vlib_thread_main_t *tm = vlib_get_thread_main (); if (tm->n_vlib_mains > 1) clib_spinlock_init (&nam->affinity_lock); clib_bihash_init_16_8 (&nam->affinity_hash, "nat-affinity", AFFINITY_HASH_BUCKETS, AFFINITY_HASH_MEMORY); clib_bihash_set_kvp_format_fn_16_8 (&nam->affinity_hash, format_affinity_kvp); } void nat_affinity_disable () { nat_affinity_main_t *nam = &nat_affinity_main; vlib_thread_main_t *tm = vlib_get_thread_main (); if (tm->n_vlib_mains > 1) clib_spinlock_free (&nam->affinity_lock); clib_bihash_free_16_8 (&nam->affinity_hash); } clib_error_t * nat_affinity_init (vlib_main_t * vm) { nat_affinity_main_t *nam = &nat_affinity_main; nam->vlib_main = vm; return 0; } static_always_inline void make_affinity_kv (clib_bihash_kv_16_8_t * kv, ip4_address_t client_addr, ip4_address_t service_addr, u8 proto, u16 service_port) { nat_affinity_key_t *key = (nat_affinity_key_t *) kv->key; key->client_addr = client_addr; key->service_addr = service_addr; key->proto = proto; key->service_port = service_port; kv->value = ~0ULL; } u32 nat_affinity_get_per_service_list_head_index (void) { nat_affinity_main_t *nam = &nat_affinity_main; dlist_elt_t *head_elt; clib_spinlock_lock_if_init (&nam->affinity_lock); pool_get (nam->list_pool, head_elt); clib_dlist_init (nam->list_pool, head_elt - nam->list_pool); clib_spinlock_unlock_if_init (&nam->affinity_lock); return head_elt - nam->list_pool; } void nat_affinity_flush_service (u32 affinity_per_service_list_head_index) { snat_main_t *sm = &snat_main; nat_affinity_main_t *nam = &nat_affinity_main; u32 elt_index; dlist_elt_t *elt; nat_affinity_t *a; clib_bihash_kv_16_8_t kv; clib_spinlock_lock_if_init (&nam->affinity_lock); while ((elt_index = clib_dlist_remove_head (nam->list_pool, affinity_per_service_list_head_index)) != ~0) { elt = pool_elt_at_index (nam->list_pool, elt_index); a = pool_elt_at_index (nam->affinity_pool, elt->value); kv.key[0] = a->key.as_u64[0]; kv.key[1] = a->key.as_u64[1]; pool_put_index (nam->affinity_pool, elt->value); if (clib_bihash_add_del_16_8 (&nam->affinity_hash, &kv, 0)) nat_elog_warn (sm, "affinity key del failed"); pool_put_index (nam->list_pool, elt_index); } pool_put_index (nam->list_pool, affinity_per_service_list_head_index); clib_spinlock_unlock_if_init (&nam->affinity_lock); } int nat_affinity_find_and_lock (vlib_main_t *vm, ip4_address_t client_addr, ip4_address_t service_addr, u8 proto, u16 service_port, u8 *backend_index) { snat_main_t *sm = &snat_main; nat_affinity_main_t *nam = &nat_affinity_main; clib_bihash_kv_16_8_t kv, value; nat_affinity_t *a; int rv = 0; make_affinity_kv (&kv, client_addr, service_addr, proto, service_port); clib_spinlock_lock_if_init (&nam->affinity_lock); if (clib_bihash_search_16_8 (&nam->affinity_hash, &kv, &value)) { rv = 1; goto unlock; } a = pool_elt_at_index (nam->affinity_pool, value.value); /* if already expired delete */ if (a->ref_cnt == 0) { if (a->expire < vlib_time_now (vm)) { clib_dlist_remove (nam->list_pool, a->per_service_index); pool_put_index (nam->list_pool, a->per_service_index); pool_put_index (nam->affinity_pool, value.value); if (clib_bihash_add_del_16_8 (&nam->affinity_hash, &kv, 0)) nat_elog_warn (sm, "affinity key del failed"); rv = 1; goto unlock; } } a->ref_cnt++; *backend_index = a->backend_index; unlock: clib_spinlock_unlock_if_init (&nam->affinity_lock); return rv; } static int affinity_is_expired_cb (clib_bihash_kv_16_8_t * kv, void *arg) { snat_main_t *sm = &snat_main; nat_affinity_main_t *nam = &nat_affinity_main; nat_affinity_t *a; a = pool_elt_at_index (nam->affinity_pool, kv->value); if (a->ref_cnt == 0) { if (a->expire < vlib_time_now (nam->vlib_main)) { clib_dlist_remove (nam->list_pool, a->per_service_index); pool_put_index (nam->list_pool, a->per_service_index); pool_put_index (nam->affinity_pool, kv->value); if (clib_bihash_add_del_16_8 (&nam->affinity_hash, kv, 0)) nat_elog_warn (sm, "affinity key del failed"); return 1; } } return 0; } int nat_affinity_create_and_lock (ip4_address_t client_addr, ip4_address_t service_addr, u8 proto, u16 service_port, u8 backend_index, u32 sticky_time, u32 affinity_per_service_list_head_index) { snat_main_t *sm = &snat_main; nat_affinity_main_t *nam = &nat_affinity_main; clib_bihash_kv_16_8_t kv, value; nat_affinity_t *a; dlist_elt_t *list_elt; int rv = 0; make_affinity_kv (&kv, client_addr, service_addr, proto, service_port); clib_spinlock_lock_if_init (&nam->affinity_lock); if (!clib_bihash_search_16_8 (&nam->affinity_hash, &kv, &value)) { rv = 1; nat_elog_notice (sm, "affinity key already exist"); goto unlock; } pool_get (nam->affinity_pool, a); kv.value = a - nam->affinity_pool; rv = clib_bihash_add_or_overwrite_stale_16_8 (&nam->affinity_hash, &kv, affinity_is_expired_cb, NULL); if (rv) { nat_elog_notice (sm, "affinity key add failed"); pool_put (nam->affinity_pool, a); goto unlock; } pool_get (nam->list_pool, list_elt); clib_dlist_init (nam->list_pool, list_elt - nam->list_pool); list_elt->value = a - nam->affinity_pool; a->per_service_index = list_elt - nam->list_pool; a->backend_index = backend_index; a->ref_cnt = 1; a->sticky_time = sticky_time; a->key.as_u64[0] = kv.key[0]; a->key.as_u64[1] = kv.key[1]; clib_dlist_addtail (nam->list_pool, affinity_per_service_list_head_index, a->per_service_index); unlock: clib_spinlock_unlock_if_init (&nam->affinity_lock); return rv; } void nat_affinity_unlock (ip4_address_t client_addr, ip4_address_t service_addr, u8 proto, u16 service_port) { nat_affinity_main_t *nam = &nat_affinity_main; clib_bihash_kv_16_8_t kv, value; nat_affinity_t *a; make_affinity_kv (&kv, client_addr, service_addr, proto, service_port); clib_spinlock_lock_if_init (&nam->affinity_lock); if (clib_bihash_search_16_8 (&nam->affinity_hash, &kv, &value)) goto unlock; a = pool_elt_at_index (nam->affinity_pool, value.value); a->ref_cnt--; if (a->ref_cnt == 0) a->expire = (u64) a->sticky_time + vlib_time_now (nam->vlib_main); unlock: clib_spinlock_unlock_if_init (&nam->affinity_lock); } /* * fd.io coding-style-patch-verification: ON * * Local Variables: * eval: (c-set-style "gnu") * End: */