src/vlib/buffer_funcs.c

   1 /* SPDX-License-Identifier: Apache-2.0
   2  * Copyright(c) 2021 Cisco Systems, Inc.
   3  */
   4
   5 #include <vppinfra/clib.h>
   6 #include <vlib/vlib.h>
   7 #include <vppinfra/vector/mask_compare.h>
   8 #include <vppinfra/vector/compress.h>
   9
  10 static_always_inline u32
  11 enqueue_one (vlib_main_t *vm, vlib_node_runtime_t *node, u64 *used_elt_bmp,
  12              u16 next_index, u32 *buffers, u16 *nexts, u32 n_buffers,
  13              u32 n_left, u32 *tmp)
  14 {
  15   u64 match_bmp[VLIB_FRAME_SIZE / 64];
  16   vlib_frame_t *f;
  17   u32 n_extracted, n_free;
  18   u32 *to;
  19
  20   f = vlib_get_next_frame_internal (vm, node, next_index, 0);
  21
  22   n_free = VLIB_FRAME_SIZE - f->n_vectors;
  23
  24   /* if frame contains enough space for worst case scenario, we can avoid
  25    * use of tmp */
  26   if (n_free >= n_left)
  27     to = (u32 *) vlib_frame_vector_args (f) + f->n_vectors;
  28   else
  29     to = tmp;
  30
  31   clib_mask_compare_u16 (next_index, nexts, match_bmp, n_buffers);
  32
  33   n_extracted = clib_compress_u32 (to, buffers, match_bmp, n_buffers);
  34
  35   for (int i = 0; i < ARRAY_LEN (match_bmp); i++)
  36     used_elt_bmp[i] |= match_bmp[i];
  37
  38   if (to != tmp)
  39     {
  40       /* indices already written to frame, just close it */
  41       vlib_put_next_frame (vm, node, next_index, n_free - n_extracted);
  42     }
  43   else if (n_free >= n_extracted)
  44     {
  45       /* enough space in the existing frame */
  46       to = (u32 *) vlib_frame_vector_args (f) + f->n_vectors;
  47       vlib_buffer_copy_indices (to, tmp, n_extracted);
  48       vlib_put_next_frame (vm, node, next_index, n_free - n_extracted);
  49     }
  50   else
  51     {
  52       /* full frame */
  53       to = (u32 *) vlib_frame_vector_args (f) + f->n_vectors;
  54       vlib_buffer_copy_indices (to, tmp, n_free);
  55       vlib_put_next_frame (vm, node, next_index, 0);
  56
  57       /* second frame */
  58       u32 n_2nd_frame = n_extracted - n_free;
  59       f = vlib_get_next_frame_internal (vm, node, next_index, 1);
  60       to = vlib_frame_vector_args (f);
  61       vlib_buffer_copy_indices (to, tmp + n_free, n_2nd_frame);
  62       vlib_put_next_frame (vm, node, next_index,
  63                            VLIB_FRAME_SIZE - n_2nd_frame);
  64     }
  65
  66   return n_left - n_extracted;
  67 }
  68
  69 void __clib_section (".vlib_buffer_enqueue_to_next_fn")
  70 CLIB_MULTIARCH_FN (vlib_buffer_enqueue_to_next_fn)
  71 (vlib_main_t *vm, vlib_node_runtime_t *node, u32 *buffers, u16 *nexts,
  72  uword count)
  73 {
  74   u32 tmp[VLIB_FRAME_SIZE];
  75   u32 n_left;
  76   u16 next_index;
  77
  78   while (count >= VLIB_FRAME_SIZE)
  79     {
  80       u64 used_elt_bmp[VLIB_FRAME_SIZE / 64] = {};
  81       n_left = VLIB_FRAME_SIZE;
  82       u32 off = 0;
  83
  84       next_index = nexts[0];
  85       n_left = enqueue_one (vm, node, used_elt_bmp, next_index, buffers, nexts,
  86                             VLIB_FRAME_SIZE, n_left, tmp);
  87
  88       while (n_left)
  89         {
  90           while (PREDICT_FALSE (used_elt_bmp[off] == ~0))
  91             {
  92               off++;
  93               ASSERT (off < ARRAY_LEN (used_elt_bmp));
  94             }
  95
  96           next_index =
  97             nexts[off * 64 + count_trailing_zeros (~used_elt_bmp[off])];
  98           n_left = enqueue_one (vm, node, used_elt_bmp, next_index, buffers,
  99                                 nexts, VLIB_FRAME_SIZE, n_left, tmp);
 100         }
 101
 102       buffers += VLIB_FRAME_SIZE;
 103       nexts += VLIB_FRAME_SIZE;
 104       count -= VLIB_FRAME_SIZE;
 105     }
 106
 107   if (count)
 108     {
 109       u64 used_elt_bmp[VLIB_FRAME_SIZE / 64] = {};
 110       next_index = nexts[0];
 111       n_left = count;
 112       u32 off = 0;
 113
 114       n_left = enqueue_one (vm, node, used_elt_bmp, next_index, buffers, nexts,
 115                             count, n_left, tmp);
 116
 117       while (n_left)
 118         {
 119           while (PREDICT_FALSE (used_elt_bmp[off] == ~0))
 120             {
 121               off++;
 122               ASSERT (off < ARRAY_LEN (used_elt_bmp));
 123             }
 124
 125           next_index =
 126             nexts[off * 64 + count_trailing_zeros (~used_elt_bmp[off])];
 127           n_left = enqueue_one (vm, node, used_elt_bmp, next_index, buffers,
 128                                 nexts, count, n_left, tmp);
 129         }
 130     }
 131 }
 132
 133 CLIB_MARCH_FN_REGISTRATION (vlib_buffer_enqueue_to_next_fn);
 134
 135 void __clib_section (".vlib_buffer_enqueue_to_single_next_fn")
 136 CLIB_MULTIARCH_FN (vlib_buffer_enqueue_to_single_next_fn)
 137 (vlib_main_t *vm, vlib_node_runtime_t *node, u32 *buffers, u16 next_index,
 138  u32 count)
 139 {
 140   u32 *to_next, n_left_to_next, n_enq;
 141
 142   vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
 143
 144   if (PREDICT_TRUE (n_left_to_next >= count))
 145     {
 146       vlib_buffer_copy_indices (to_next, buffers, count);
 147       n_left_to_next -= count;
 148       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
 149       return;
 150     }
 151
 152   n_enq = n_left_to_next;
 153 next:
 154   vlib_buffer_copy_indices (to_next, buffers, n_enq);
 155   n_left_to_next -= n_enq;
 156
 157   if (PREDICT_FALSE (count > n_enq))
 158     {
 159       count -= n_enq;
 160       buffers += n_enq;
 161
 162       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
 163       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
 164       n_enq = clib_min (n_left_to_next, count);
 165       goto next;
 166     }
 167   vlib_put_next_frame (vm, node, next_index, n_left_to_next);
 168 }
 169 CLIB_MARCH_FN_REGISTRATION (vlib_buffer_enqueue_to_single_next_fn);
 170
 171 static inline vlib_frame_queue_elt_t *
 172 vlib_get_frame_queue_elt (vlib_frame_queue_main_t *fqm, u32 index,
 173                           int dont_wait)
 174 {
 175   vlib_frame_queue_t *fq;
 176   u64 nelts, tail, new_tail;
 177
 178   fq = fqm->vlib_frame_queues[index];
 179   ASSERT (fq);
 180   nelts = fq->nelts;
 181
 182 retry:
 183   tail = __atomic_load_n (&fq->tail, __ATOMIC_ACQUIRE);
 184   new_tail = tail + 1;
 185
 186   if (new_tail >= fq->head + nelts)
 187     {
 188       if (dont_wait)
 189         return 0;
 190
 191       /* Wait until a ring slot is available */
 192       while (new_tail >= fq->head + nelts)
 193         vlib_worker_thread_barrier_check ();
 194     }
 195
 196   if (!__atomic_compare_exchange_n (&fq->tail, &tail, new_tail, 0 /* weak */,
 197                                     __ATOMIC_RELAXED, __ATOMIC_RELAXED))
 198     goto retry;
 199
 200   return fq->elts + (new_tail & (nelts - 1));
 201 }
 202
 203 static_always_inline u32
 204 vlib_buffer_enqueue_to_thread_inline (vlib_main_t *vm,
 205                                       vlib_node_runtime_t *node,
 206                                       vlib_frame_queue_main_t *fqm,
 207                                       u32 *buffer_indices, u16 *thread_indices,
 208                                       u32 n_packets, int drop_on_congestion)
 209 {
 210   u32 drop_list[VLIB_FRAME_SIZE], n_drop = 0;
 211   u64 used_elts[VLIB_FRAME_SIZE / 64] = {};
 212   u64 mask[VLIB_FRAME_SIZE / 64];
 213   vlib_frame_queue_elt_t *hf = 0;
 214   u16 thread_index;
 215   u32 n_comp, off = 0, n_left = n_packets;
 216
 217   thread_index = thread_indices[0];
 218
 219 more:
 220   clib_mask_compare_u16 (thread_index, thread_indices, mask, n_packets);
 221   hf = vlib_get_frame_queue_elt (fqm, thread_index, drop_on_congestion);
 222
 223   n_comp = clib_compress_u32 (hf ? hf->buffer_index : drop_list + n_drop,
 224                               buffer_indices, mask, n_packets);
 225
 226   if (hf)
 227     {
 228       if (node->flags & VLIB_NODE_FLAG_TRACE)
 229         hf->maybe_trace = 1;
 230       hf->n_vectors = n_comp;
 231       __atomic_store_n (&hf->valid, 1, __ATOMIC_RELEASE);
 232       vlib_get_main_by_index (thread_index)->check_frame_queues = 1;
 233     }
 234   else
 235     n_drop += n_comp;
 236
 237   n_left -= n_comp;
 238
 239   if (n_left)
 240     {
 241       for (int i = 0; i < ARRAY_LEN (used_elts); i++)
 242         used_elts[i] |= mask[i];
 243
 244       while (PREDICT_FALSE (used_elts[off] == ~0))
 245         {
 246           off++;
 247           ASSERT (off < ARRAY_LEN (used_elts));
 248         }
 249
 250       thread_index =
 251         thread_indices[off * 64 + count_trailing_zeros (~used_elts[off])];
 252       goto more;
 253     }
 254
 255   if (drop_on_congestion && n_drop)
 256     vlib_buffer_free (vm, drop_list, n_drop);
 257
 258   return n_packets - n_drop;
 259 }
 260
 261 u32 __clib_section (".vlib_buffer_enqueue_to_thread_fn")
 262 CLIB_MULTIARCH_FN (vlib_buffer_enqueue_to_thread_fn)
 263 (vlib_main_t *vm, vlib_node_runtime_t *node, u32 frame_queue_index,
 264  u32 *buffer_indices, u16 *thread_indices, u32 n_packets,
 265  int drop_on_congestion)
 266 {
 267   vlib_thread_main_t *tm = vlib_get_thread_main ();
 268   vlib_frame_queue_main_t *fqm;
 269   u32 n_enq = 0;
 270
 271   fqm = vec_elt_at_index (tm->frame_queue_mains, frame_queue_index);
 272
 273   while (n_packets >= VLIB_FRAME_SIZE)
 274     {
 275       n_enq += vlib_buffer_enqueue_to_thread_inline (
 276         vm, node, fqm, buffer_indices, thread_indices, VLIB_FRAME_SIZE,
 277         drop_on_congestion);
 278       buffer_indices += VLIB_FRAME_SIZE;
 279       thread_indices += VLIB_FRAME_SIZE;
 280       n_packets -= VLIB_FRAME_SIZE;
 281     }
 282
 283   if (n_packets == 0)
 284     return n_enq;
 285
 286   n_enq += vlib_buffer_enqueue_to_thread_inline (vm, node, fqm, buffer_indices,
 287                                                  thread_indices, n_packets,
 288                                                  drop_on_congestion);
 289
 290   return n_enq;
 291 }
 292
 293 CLIB_MARCH_FN_REGISTRATION (vlib_buffer_enqueue_to_thread_fn);
 294
 295 u32 __clib_section (".vlib_frame_queue_dequeue_fn")
 296 CLIB_MULTIARCH_FN (vlib_frame_queue_dequeue_fn)
 297 (vlib_main_t *vm, vlib_frame_queue_main_t *fqm)
 298 {
 299   u32 thread_id = vm->thread_index;
 300   vlib_frame_queue_t *fq = fqm->vlib_frame_queues[thread_id];
 301   u32 mask = fq->nelts - 1;
 302   vlib_frame_queue_elt_t *elt;
 303   u32 n_free, n_copy, *from, *to = 0, processed = 0, vectors = 0;
 304   vlib_frame_t *f = 0;
 305
 306   ASSERT (fq);
 307   ASSERT (vm == vlib_global_main.vlib_mains[thread_id]);
 308
 309   if (PREDICT_FALSE (fqm->node_index == ~0))
 310     return 0;
 311   /*
 312    * Gather trace data for frame queues
 313    */
 314   if (PREDICT_FALSE (fq->trace))
 315     {
 316       frame_queue_trace_t *fqt;
 317       frame_queue_nelt_counter_t *fqh;
 318       u32 elix;
 319
 320       fqt = &fqm->frame_queue_traces[thread_id];
 321
 322       fqt->nelts = fq->nelts;
 323       fqt->head = fq->head;
 324       fqt->tail = fq->tail;
 325       fqt->threshold = fq->vector_threshold;
 326       fqt->n_in_use = fqt->tail - fqt->head;
 327       if (fqt->n_in_use >= fqt->nelts)
 328         {
 329           // if beyond max then use max
 330           fqt->n_in_use = fqt->nelts - 1;
 331         }
 332
 333       /* Record the number of elements in use in the histogram */
 334       fqh = &fqm->frame_queue_histogram[thread_id];
 335       fqh->count[fqt->n_in_use]++;
 336
 337       /* Record a snapshot of the elements in use */
 338       for (elix = 0; elix < fqt->nelts; elix++)
 339         {
 340           elt = fq->elts + ((fq->head + 1 + elix) & (mask));
 341           if (1 || elt->valid)
 342             {
 343               fqt->n_vectors[elix] = elt->n_vectors;
 344             }
 345         }
 346       fqt->written = 1;
 347     }
 348
 349   while (1)
 350     {
 351       if (fq->head == fq->tail)
 352         break;
 353
 354       elt = fq->elts + ((fq->head + 1) & mask);
 355
 356       if (!__atomic_load_n (&elt->valid, __ATOMIC_ACQUIRE))
 357         break;
 358
 359       from = elt->buffer_index + elt->offset;
 360
 361       ASSERT (elt->offset + elt->n_vectors <= VLIB_FRAME_SIZE);
 362
 363       if (f == 0)
 364         {
 365           f = vlib_get_frame_to_node (vm, fqm->node_index);
 366           to = vlib_frame_vector_args (f);
 367           n_free = VLIB_FRAME_SIZE;
 368         }
 369
 370       if (elt->maybe_trace)
 371         f->frame_flags |= VLIB_NODE_FLAG_TRACE;
 372
 373       n_copy = clib_min (n_free, elt->n_vectors);
 374
 375       vlib_buffer_copy_indices (to, from, n_copy);
 376       to += n_copy;
 377       n_free -= n_copy;
 378       vectors += n_copy;
 379
 380       if (n_free == 0)
 381         {
 382           f->n_vectors = VLIB_FRAME_SIZE;
 383           vlib_put_frame_to_node (vm, fqm->node_index, f);
 384           f = 0;
 385         }
 386
 387       if (n_copy < elt->n_vectors)
 388         {
 389           /* not empty - leave it on the ring */
 390           elt->n_vectors -= n_copy;
 391           elt->offset += n_copy;
 392         }
 393       else
 394         {
 395           /* empty - reset and bump head */
 396           u32 sz = STRUCT_OFFSET_OF (vlib_frame_queue_elt_t, end_of_reset);
 397           clib_memset (elt, 0, sz);
 398           __atomic_store_n (&fq->head, fq->head + 1, __ATOMIC_RELEASE);
 399           processed++;
 400         }
 401
 402       /* Limit the number of packets pushed into the graph */
 403       if (vectors >= fq->vector_threshold)
 404         break;
 405     }
 406
 407   if (f)
 408     {
 409       f->n_vectors = VLIB_FRAME_SIZE - n_free;
 410       vlib_put_frame_to_node (vm, fqm->node_index, f);
 411     }
 412
 413   return processed;
 414 }
 415
 416 CLIB_MARCH_FN_REGISTRATION (vlib_frame_queue_dequeue_fn);
 417
 418 #ifndef CLIB_MARCH_VARIANT
 419 vlib_buffer_func_main_t vlib_buffer_func_main;
 420
 421 static clib_error_t *
 422 vlib_buffer_funcs_init (vlib_main_t *vm)
 423 {
 424   vlib_buffer_func_main_t *bfm = &vlib_buffer_func_main;
 425   bfm->buffer_enqueue_to_next_fn =
 426     CLIB_MARCH_FN_POINTER (vlib_buffer_enqueue_to_next_fn);
 427   bfm->buffer_enqueue_to_single_next_fn =
 428     CLIB_MARCH_FN_POINTER (vlib_buffer_enqueue_to_single_next_fn);
 429   bfm->buffer_enqueue_to_thread_fn =
 430     CLIB_MARCH_FN_POINTER (vlib_buffer_enqueue_to_thread_fn);
 431   bfm->frame_queue_dequeue_fn =
 432     CLIB_MARCH_FN_POINTER (vlib_frame_queue_dequeue_fn);
 433   return 0;
 434 }
 435
 436 VLIB_INIT_FUNCTION (vlib_buffer_funcs_init);
 437 #endif