/* Copyright (c) 2012 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include /* * skip-list implementation * * Good news / bad news. As balanced binary tree schemes go, * this one seems pretty fast and is reasonably simple. There's a very * limited amount that can be done to mitigate sdram read latency. * * Each active clib_slist_elt_t is on from 1 to N lists. Each active element * is always on the "level-0" list. Since most elements are *only* on * level 0, we keep the level 0 (and level 1) in the element. For those * elements on more than two lists, we switch to a vector. Hence, the * "n" union in slib_slist_elt_t. * * The low-order bit of elt->n.next0[0] is 1 for inlined next indices, * 0 for vector indices (since the allocator always aligns to at least * a 4-byte boundary). We can only represent 2e9 items, but since the * practical performance limit is O(1e7), it doesn't matter. * * We create a "head" element which (by construction) is always * lexically lighter than any other element. This makes a large number * of irritating special cases go away. * * User code is in charge of comparing a supplied key with * the key component of a user pool element. The user tells this code * to add or delete (opaque key, 32-bit integer) pairs to the skip-list. * * The algorithm adds new elements to one or more lists. * For levels greater than zero, the probability of a new element landing on * a list is branching_factor**N. Branching_factor = 0.2 seems to work * OK, yielding about 50 compares per search at O(1e7) items. */ clib_error_t * clib_slist_init (clib_slist_t * sp, f64 branching_factor, clib_slist_key_compare_function_t compare, format_function_t format_user_element) { clib_slist_elt_t *head; memset (sp, 0, sizeof (sp[0])); sp->branching_factor = branching_factor; sp->format_user_element = format_user_element; sp->compare = compare; sp->seed = 0xdeaddabe; pool_get (sp->elts, head); vec_add1 (head->n.nexts, (u32) ~ 0); head->user_pool_index = (u32) ~ 0; vec_validate (sp->path, 1); vec_validate (sp->occupancy, 0); return 0; } /* * slist_search_internal */ static inline clib_slist_search_result_t slist_search_internal (clib_slist_t * sp, void *key, int need_full_path) { int level, comp_result; clib_slist_elt_t *search_elt, *head_elt; sp->ncompares = 0; /* * index 0 is the magic listhead element which is * lexically lighter than / to the left of every element */ search_elt = head_elt = pool_elt_at_index (sp->elts, 0); /* * Initial negotiating position, only the head_elt is * lighter than the supplied key */ memset (sp->path, 0, vec_len (head_elt->n.nexts) * sizeof (u32)); /* Walk the fastest lane first */ level = vec_len (head_elt->n.nexts) - 1; _vec_len (sp->path) = level + 1; while (1) { u32 next_index_this_level; clib_slist_elt_t *prefetch_elt; /* * Prefetching the next element at this level makes a measurable * difference, but doesn't fix the dependent read stall problem */ prefetch_elt = sp->elts + clib_slist_get_next_at_level (search_elt, level); CLIB_PREFETCH (prefetch_elt, CLIB_CACHE_LINE_BYTES, READ); /* Compare the key with the current element */ comp_result = (search_elt == head_elt) ? 1 : sp->compare (key, search_elt->user_pool_index); sp->ncompares++; /* key "lighter" than this element */ if (comp_result < 0) { /* * Back up to previous item on this list * and search the next finer-grained list * starting there. */ search_elt = pool_elt_at_index (sp->elts, sp->path[level]); next_list: if (level > 0) { level--; continue; } else { return CLIB_SLIST_NO_MATCH; } } /* Match */ if (comp_result == 0) { /* * If we're trying to delete an element, we need to * track down all of the elements which point at it. * Otherwise, don't bother with it */ if (need_full_path && level > 0) { search_elt = pool_elt_at_index (sp->elts, sp->path[level]); level--; continue; } level = vec_len (head_elt->n.nexts); sp->path[level] = search_elt - sp->elts; _vec_len (sp->path) = level + 1; return CLIB_SLIST_MATCH; } /* * comp_result positive, key is to the right of * this element */ sp->path[level] = search_elt - sp->elts; /* Out of list at this level? */ next_index_this_level = clib_slist_get_next_at_level (search_elt, level); if (next_index_this_level == (u32) ~ 0) goto next_list; /* No, try the next element */ search_elt = pool_elt_at_index (sp->elts, next_index_this_level); } return 0; /* notreached */ } u32 clib_slist_search (clib_slist_t * sp, void *key, u32 * ncompares) { clib_slist_search_result_t rv; rv = slist_search_internal (sp, key, 0 /* dont need full path */ ); if (rv == CLIB_SLIST_MATCH) { clib_slist_elt_t *elt; elt = pool_elt_at_index (sp->elts, sp->path[vec_len (sp->path) - 1]); if (ncompares) *ncompares = sp->ncompares; return elt->user_pool_index; } return (u32) ~ 0; } void clib_slist_add (clib_slist_t * sp, void *key, u32 user_pool_index) { clib_slist_elt_t *new_elt; clib_slist_search_result_t search_result; int level; search_result = slist_search_internal (sp, key, 0 /* don't need full path */ ); /* Special case: key exists, just replace user_pool_index */ if (PREDICT_FALSE (search_result == CLIB_SLIST_MATCH)) { clib_slist_elt_t *elt; elt = pool_elt_at_index (sp->elts, sp->path[0]); elt->user_pool_index = user_pool_index; return; } pool_get (sp->elts, new_elt); new_elt->n.nexts = 0; new_elt->user_pool_index = user_pool_index; /* sp->path lists elements to the left of key, by level */ for (level = 0; level < vec_len (sp->path); level++) { clib_slist_elt_t *prev_elt_this_level; u32 prev_elt_next_index_this_level; /* Add to list at the current level */ prev_elt_this_level = pool_elt_at_index (sp->elts, sp->path[level]); prev_elt_next_index_this_level = clib_slist_get_next_at_level (prev_elt_this_level, level); clib_slist_set_next_at_level (new_elt, prev_elt_next_index_this_level, level); clib_slist_set_next_at_level (prev_elt_this_level, new_elt - sp->elts, level); sp->occupancy[level]++; /* Randomly add to the next-higher level */ if (random_f64 (&sp->seed) > sp->branching_factor) break; } { /* Time to add a new ply? */ clib_slist_elt_t *head_elt = pool_elt_at_index (sp->elts, 0); int top_level = vec_len (head_elt->n.nexts) - 1; if (((f64) sp->occupancy[top_level]) * sp->branching_factor > 1.0) { vec_add1 (sp->occupancy, 0); vec_add1 (head_elt->n.nexts, (u32) ~ 0); /* full match case returns n+1 items */ vec_validate (sp->path, vec_len (head_elt->n.nexts)); } } } clib_slist_search_result_t clib_slist_del (clib_slist_t * sp, void *key) { clib_slist_search_result_t search_result; clib_slist_elt_t *del_elt; int level; search_result = slist_search_internal (sp, key, 1 /* need full path */ ); if (PREDICT_FALSE (search_result == CLIB_SLIST_NO_MATCH)) return search_result; del_elt = pool_elt_at_index (sp->elts, sp->path[vec_len (sp->path) - 1]); ASSERT (vec_len (sp->path) > 1); for (level = 0; level < vec_len (sp->path) - 1; level++) { clib_slist_elt_t *path_elt; u32 path_elt_next_index; path_elt = pool_elt_at_index (sp->elts, sp->path[level]); path_elt_next_index = clib_slist_get_next_at_level (path_elt, level); /* Splice the item out of the list if it's adjacent to the victim */ if (path_elt_next_index == del_elt - sp->elts) { sp->occupancy[level]--; path_elt_next_index = clib_slist_get_next_at_level (del_elt, level); clib_slist_set_next_at_level (path_elt, path_elt_next_index, level); } } /* If this element is on more than two lists it has a vector of nexts */ if (!(del_elt->n.next0[0] & 1)) vec_free (del_elt->n.nexts); pool_put (sp->elts, del_elt); return CLIB_SLIST_MATCH; } u8 * format_slist (u8 * s, va_list * args) { clib_slist_t *sl = va_arg (*args, clib_slist_t *); int verbose = va_arg (*args, int); int i; clib_slist_elt_t *head_elt, *elt; s = format (s, "slist 0x%x, %u items, branching_factor %.2f\n", sl, sl->occupancy ? sl->occupancy[0] : 0, sl->branching_factor); if (pool_elts (sl->elts) == 0) return s; head_elt = pool_elt_at_index (sl->elts, 0); for (i = 0; i < vec_len (head_elt->n.nexts); i++) { s = format (s, "level %d: %d elts\n", i, sl->occupancy ? sl->occupancy[i] : 0); if (verbose && head_elt->n.nexts[i] != (u32) ~ 0) { elt = pool_elt_at_index (sl->elts, head_elt->n.nexts[i]); while (elt) { u32 next_index; s = format (s, "%U(%d) ", sl->format_user_element, elt->user_pool_index, elt - sl->elts); next_index = clib_slist_get_next_at_level (elt, i); ASSERT (next_index != 0x7fffffff); if (next_index == (u32) ~ 0) break; else elt = pool_elt_at_index (sl->elts, next_index); } } s = format (s, "\n"); } return s; } /* * fd.io coding-style-patch-verification: ON * * Local Variables: * eval: (c-set-style "gnu") * End: */