2 Copyright (c) 2012 Cisco and/or its affiliates.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include <vppinfra/slist.h>
20 * skip-list implementation
22 * Good news / bad news. As balanced binary tree schemes go,
23 * this one seems pretty fast and is reasonably simple. There's a very
24 * limited amount that can be done to mitigate sdram read latency.
26 * Each active clib_slist_elt_t is on from 1 to N lists. Each active element
27 * is always on the "level-0" list. Since most elements are *only* on
28 * level 0, we keep the level 0 (and level 1) in the element. For those
29 * elements on more than two lists, we switch to a vector. Hence, the
30 * "n" union in slib_slist_elt_t.
32 * The low-order bit of elt->n.next0[0] is 1 for inlined next indices,
33 * 0 for vector indices (since the allocator always aligns to at least
34 * a 4-byte boundary). We can only represent 2e9 items, but since the
35 * practical performance limit is O(1e7), it doesn't matter.
37 * We create a "head" element which (by construction) is always
38 * lexically lighter than any other element. This makes a large number
39 * of irritating special cases go away.
41 * User code is in charge of comparing a supplied key with
42 * the key component of a user pool element. The user tells this code
43 * to add or delete (opaque key, 32-bit integer) pairs to the skip-list.
45 * The algorithm adds new elements to one or more lists.
46 * For levels greater than zero, the probability of a new element landing on
47 * a list is branching_factor**N. Branching_factor = 0.2 seems to work
48 * OK, yielding about 50 compares per search at O(1e7) items.
52 clib_slist_init (clib_slist_t * sp, f64 branching_factor,
53 clib_slist_key_compare_function_t compare,
54 format_function_t format_user_element)
56 clib_slist_elt_t *head;
57 clib_memset (sp, 0, sizeof (sp[0]));
58 sp->branching_factor = branching_factor;
59 sp->format_user_element = format_user_element;
60 sp->compare = compare;
61 sp->seed = 0xdeaddabe;
62 pool_get (sp->elts, head);
63 vec_add1 (head->n.nexts, (u32) ~ 0);
64 head->user_pool_index = (u32) ~ 0;
65 vec_validate (sp->path, 1);
66 vec_validate (sp->occupancy, 0);
72 * slist_search_internal
74 static inline clib_slist_search_result_t
75 slist_search_internal (clib_slist_t * sp, void *key, int need_full_path)
77 int level, comp_result;
78 clib_slist_elt_t *search_elt, *head_elt;
82 * index 0 is the magic listhead element which is
83 * lexically lighter than / to the left of every element
85 search_elt = head_elt = pool_elt_at_index (sp->elts, 0);
88 * Initial negotiating position, only the head_elt is
89 * lighter than the supplied key
91 clib_memset (sp->path, 0, vec_len (head_elt->n.nexts) * sizeof (u32));
93 /* Walk the fastest lane first */
94 level = vec_len (head_elt->n.nexts) - 1;
95 _vec_len (sp->path) = level + 1;
99 u32 next_index_this_level;
100 clib_slist_elt_t *prefetch_elt;
103 * Prefetching the next element at this level makes a measurable
104 * difference, but doesn't fix the dependent read stall problem
106 prefetch_elt = sp->elts +
107 clib_slist_get_next_at_level (search_elt, level);
109 CLIB_PREFETCH (prefetch_elt, CLIB_CACHE_LINE_BYTES, READ);
111 /* Compare the key with the current element */
112 comp_result = (search_elt == head_elt) ? 1 :
113 sp->compare (key, search_elt->user_pool_index);
116 /* key "lighter" than this element */
120 * Back up to previous item on this list
121 * and search the next finer-grained list
124 search_elt = pool_elt_at_index (sp->elts, sp->path[level]);
133 return CLIB_SLIST_NO_MATCH;
137 if (comp_result == 0)
140 * If we're trying to delete an element, we need to
141 * track down all of the elements which point at it.
142 * Otherwise, don't bother with it
144 if (need_full_path && level > 0)
146 search_elt = pool_elt_at_index (sp->elts, sp->path[level]);
150 level = vec_len (head_elt->n.nexts);
151 sp->path[level] = search_elt - sp->elts;
152 _vec_len (sp->path) = level + 1;
153 return CLIB_SLIST_MATCH;
156 * comp_result positive, key is to the right of
159 sp->path[level] = search_elt - sp->elts;
161 /* Out of list at this level? */
162 next_index_this_level =
163 clib_slist_get_next_at_level (search_elt, level);
164 if (next_index_this_level == (u32) ~ 0)
167 /* No, try the next element */
168 search_elt = pool_elt_at_index (sp->elts, next_index_this_level);
170 return 0; /* notreached */
174 clib_slist_search (clib_slist_t * sp, void *key, u32 * ncompares)
176 clib_slist_search_result_t rv;
178 rv = slist_search_internal (sp, key, 0 /* dont need full path */ );
179 if (rv == CLIB_SLIST_MATCH)
181 clib_slist_elt_t *elt;
182 elt = pool_elt_at_index (sp->elts, sp->path[vec_len (sp->path) - 1]);
184 *ncompares = sp->ncompares;
185 return elt->user_pool_index;
191 clib_slist_add (clib_slist_t * sp, void *key, u32 user_pool_index)
193 clib_slist_elt_t *new_elt;
194 clib_slist_search_result_t search_result;
197 search_result = slist_search_internal (sp, key,
198 0 /* don't need full path */ );
200 /* Special case: key exists, just replace user_pool_index */
201 if (PREDICT_FALSE (search_result == CLIB_SLIST_MATCH))
203 clib_slist_elt_t *elt;
204 elt = pool_elt_at_index (sp->elts, sp->path[0]);
205 elt->user_pool_index = user_pool_index;
209 pool_get (sp->elts, new_elt);
210 new_elt->n.nexts = 0;
211 new_elt->user_pool_index = user_pool_index;
213 /* sp->path lists elements to the left of key, by level */
214 for (level = 0; level < vec_len (sp->path); level++)
216 clib_slist_elt_t *prev_elt_this_level;
217 u32 prev_elt_next_index_this_level;
219 /* Add to list at the current level */
220 prev_elt_this_level = pool_elt_at_index (sp->elts, sp->path[level]);
221 prev_elt_next_index_this_level = clib_slist_get_next_at_level
222 (prev_elt_this_level, level);
224 clib_slist_set_next_at_level (new_elt, prev_elt_next_index_this_level,
227 clib_slist_set_next_at_level (prev_elt_this_level, new_elt - sp->elts,
229 sp->occupancy[level]++;
231 /* Randomly add to the next-higher level */
232 if (random_f64 (&sp->seed) > sp->branching_factor)
236 /* Time to add a new ply? */
237 clib_slist_elt_t *head_elt = pool_elt_at_index (sp->elts, 0);
238 int top_level = vec_len (head_elt->n.nexts) - 1;
239 if (((f64) sp->occupancy[top_level]) * sp->branching_factor > 1.0)
241 vec_add1 (sp->occupancy, 0);
242 vec_add1 (head_elt->n.nexts, (u32) ~ 0);
243 /* full match case returns n+1 items */
244 vec_validate (sp->path, vec_len (head_elt->n.nexts));
249 clib_slist_search_result_t
250 clib_slist_del (clib_slist_t * sp, void *key)
252 clib_slist_search_result_t search_result;
253 clib_slist_elt_t *del_elt;
256 search_result = slist_search_internal (sp, key, 1 /* need full path */ );
258 if (PREDICT_FALSE (search_result == CLIB_SLIST_NO_MATCH))
259 return search_result;
261 del_elt = pool_elt_at_index (sp->elts, sp->path[vec_len (sp->path) - 1]);
262 ASSERT (vec_len (sp->path) > 1);
264 for (level = 0; level < vec_len (sp->path) - 1; level++)
266 clib_slist_elt_t *path_elt;
267 u32 path_elt_next_index;
269 path_elt = pool_elt_at_index (sp->elts, sp->path[level]);
270 path_elt_next_index = clib_slist_get_next_at_level (path_elt, level);
272 /* Splice the item out of the list if it's adjacent to the victim */
273 if (path_elt_next_index == del_elt - sp->elts)
275 sp->occupancy[level]--;
276 path_elt_next_index = clib_slist_get_next_at_level (del_elt, level);
277 clib_slist_set_next_at_level (path_elt, path_elt_next_index, level);
281 /* If this element is on more than two lists it has a vector of nexts */
282 if (!(del_elt->n.next0[0] & 1))
283 vec_free (del_elt->n.nexts);
284 pool_put (sp->elts, del_elt);
285 return CLIB_SLIST_MATCH;
289 format_slist (u8 * s, va_list * args)
291 clib_slist_t *sl = va_arg (*args, clib_slist_t *);
292 int verbose = va_arg (*args, int);
294 clib_slist_elt_t *head_elt, *elt;
296 s = format (s, "slist 0x%x, %u items, branching_factor %.2f\n", sl,
297 sl->occupancy ? sl->occupancy[0] : 0, sl->branching_factor);
299 if (pool_elts (sl->elts) == 0)
302 head_elt = pool_elt_at_index (sl->elts, 0);
304 for (i = 0; i < vec_len (head_elt->n.nexts); i++)
306 s = format (s, "level %d: %d elts\n", i,
307 sl->occupancy ? sl->occupancy[i] : 0);
309 if (verbose && head_elt->n.nexts[i] != (u32) ~ 0)
311 elt = pool_elt_at_index (sl->elts, head_elt->n.nexts[i]);
315 s = format (s, "%U(%d) ", sl->format_user_element,
316 elt->user_pool_index, elt - sl->elts);
317 next_index = clib_slist_get_next_at_level (elt, i);
318 ASSERT (next_index != 0x7fffffff);
319 if (next_index == (u32) ~ 0)
322 elt = pool_elt_at_index (sl->elts, next_index);
325 s = format (s, "\n");
331 * fd.io coding-style-patch-verification: ON
334 * eval: (c-set-style "gnu")