src/vlib/buffer_node.h

   1 /*
   2  * Copyright (c) 2015 Cisco and/or its affiliates.
   3  * Licensed under the Apache License, Version 2.0 (the "License");
   4  * you may not use this file except in compliance with the License.
   5  * You may obtain a copy of the License at:
   6  *
   7  *     http://www.apache.org/licenses/LICENSE-2.0
   8  *
   9  * Unless required by applicable law or agreed to in writing, software
  10  * distributed under the License is distributed on an "AS IS" BASIS,
  11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12  * See the License for the specific language governing permissions and
  13  * limitations under the License.
  14  */
  15 /*
  16  * buffer_node.h: VLIB buffer handling node helper macros/inlines
  17  *
  18  * Copyright (c) 2008 Eliot Dresselhaus
  19  *
  20  * Permission is hereby granted, free of charge, to any person obtaining
  21  * a copy of this software and associated documentation files (the
  22  * "Software"), to deal in the Software without restriction, including
  23  * without limitation the rights to use, copy, modify, merge, publish,
  24  * distribute, sublicense, and/or sell copies of the Software, and to
  25  * permit persons to whom the Software is furnished to do so, subject to
  26  * the following conditions:
  27  *
  28  * The above copyright notice and this permission notice shall be
  29  * included in all copies or substantial portions of the Software.
  30  *
  31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  38  */
  39
  40 #ifndef included_vlib_buffer_node_h
  41 #define included_vlib_buffer_node_h
  42
  43 /** \file
  44     vlib buffer/node functions
  45 */
  46
  47 /** \brief Finish enqueueing two buffers forward in the graph.
  48  Standard dual loop boilerplate element. This is a MACRO,
  49  with MULTIPLE SIDE EFFECTS. In the ideal case,
  50  <code>next_index == next0 == next1</code>,
  51  which means that the speculative enqueue at the top of the dual loop
  52  has correctly dealt with both packets. In that case, the macro does
  53  nothing at all.
  54
  55  @param vm vlib_main_t pointer, varies by thread
  56  @param node current node vlib_node_runtime_t pointer
  57  @param next_index speculated next index used for both packets
  58  @param to_next speculated vector pointer used for both packets
  59  @param n_left_to_next number of slots left in speculated vector
  60  @param bi0 first buffer index
  61  @param bi1 second buffer index
  62  @param next0 actual next index to be used for the first packet
  63  @param next1 actual next index to be used for the second packet
  64
  65  @return @c next_index -- speculative next index to be used for future packets
  66  @return @c to_next -- speculative frame to be used for future packets
  67  @return @c n_left_to_next -- number of slots left in speculative frame
  68 */
  69
  70 #define vlib_validate_buffer_enqueue_x2(vm,node,next_index,to_next,n_left_to_next,bi0,bi1,next0,next1) \
  71 do {                                                                    \
  72   int enqueue_code = (next0 != next_index) + 2*(next1 != next_index);   \
  73                                                                         \
  74   if (PREDICT_FALSE (enqueue_code != 0))                                \
  75     {                                                                   \
  76       switch (enqueue_code)                                             \
  77         {                                                               \
  78         case 1:                                                         \
  79           /* A B A */                                                   \
  80           to_next[-2] = bi1;                                            \
  81           to_next -= 1;                                                 \
  82           n_left_to_next += 1;                                          \
  83           vlib_set_next_frame_buffer (vm, node, next0, bi0);            \
  84           break;                                                        \
  85                                                                         \
  86         case 2:                                                         \
  87           /* A A B */                                                   \
  88           to_next -= 1;                                                 \
  89           n_left_to_next += 1;                                          \
  90           vlib_set_next_frame_buffer (vm, node, next1, bi1);            \
  91           break;                                                        \
  92                                                                         \
  93         case 3:                                                         \
  94           /* A B B or A B C */                                          \
  95           to_next -= 2;                                                 \
  96           n_left_to_next += 2;                                          \
  97           vlib_set_next_frame_buffer (vm, node, next0, bi0);            \
  98           vlib_set_next_frame_buffer (vm, node, next1, bi1);            \
  99           if (next0 == next1)                                           \
 100             {                                                           \
 101               vlib_put_next_frame (vm, node, next_index,                \
 102                                    n_left_to_next);                     \
 103               next_index = next1;                                       \
 104               vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); \
 105             }                                                           \
 106         }                                                               \
 107     }                                                                   \
 108 } while (0)
 109
 110
 111 /** \brief Finish enqueueing four buffers forward in the graph.
 112  Standard quad loop boilerplate element. This is a MACRO,
 113  with MULTIPLE SIDE EFFECTS. In the ideal case,
 114  <code>next_index == next0 == next1 == next2 == next3</code>,
 115  which means that the speculative enqueue at the top of the quad loop
 116  has correctly dealt with all four packets. In that case, the macro does
 117  nothing at all.
 118
 119  @param vm vlib_main_t pointer, varies by thread
 120  @param node current node vlib_node_runtime_t pointer
 121  @param next_index speculated next index used for both packets
 122  @param to_next speculated vector pointer used for both packets
 123  @param n_left_to_next number of slots left in speculated vector
 124  @param bi0 first buffer index
 125  @param bi1 second buffer index
 126  @param bi2 third buffer index
 127  @param bi3 fourth buffer index
 128  @param next0 actual next index to be used for the first packet
 129  @param next1 actual next index to be used for the second packet
 130  @param next2 actual next index to be used for the third packet
 131  @param next3 actual next index to be used for the fourth packet
 132
 133  @return @c next_index -- speculative next index to be used for future packets
 134  @return @c to_next -- speculative frame to be used for future packets
 135  @return @c n_left_to_next -- number of slots left in speculative frame
 136 */
 137
 138 #define vlib_validate_buffer_enqueue_x4(vm,node,next_index,to_next,n_left_to_next,bi0,bi1,bi2,bi3,next0,next1,next2,next3) \
 139 do {                                                                    \
 140   /* After the fact: check the [speculative] enqueue to "next" */       \
 141   u32 fix_speculation = (next_index ^ next0) | (next_index ^ next1)     \
 142     | (next_index ^ next2) | (next_index ^ next3);                      \
 143   if (PREDICT_FALSE(fix_speculation))                                   \
 144     {                                                                   \
 145       /* rewind... */                                                   \
 146       to_next -= 4;                                                     \
 147       n_left_to_next += 4;                                              \
 148                                                                         \
 149       /* If bi0 belongs to "next", send it there */                     \
 150       if (next_index == next0)                                          \
 151         {                                                               \
 152           to_next[0] = bi0;                                             \
 153           to_next++;                                                    \
 154           n_left_to_next --;                                            \
 155         }                                                               \
 156       else              /* send it where it needs to go */              \
 157         vlib_set_next_frame_buffer (vm, node, next0, bi0);              \
 158                                                                         \
 159       if (next_index == next1)                                          \
 160         {                                                               \
 161           to_next[0] = bi1;                                             \
 162           to_next++;                                                    \
 163           n_left_to_next --;                                            \
 164         }                                                               \
 165       else                                                              \
 166         vlib_set_next_frame_buffer (vm, node, next1, bi1);              \
 167                                                                         \
 168       if (next_index == next2)                                          \
 169         {                                                               \
 170           to_next[0] = bi2;                                             \
 171           to_next++;                                                    \
 172           n_left_to_next --;                                            \
 173         }                                                               \
 174       else                                                              \
 175         vlib_set_next_frame_buffer (vm, node, next2, bi2);              \
 176                                                                         \
 177       if (next_index == next3)                                          \
 178         {                                                               \
 179           to_next[0] = bi3;                                             \
 180           to_next++;                                                    \
 181           n_left_to_next --;                                            \
 182         }                                                               \
 183       else                                                              \
 184         {                                                               \
 185           vlib_set_next_frame_buffer (vm, node, next3, bi3);            \
 186                                                                         \
 187           /* Change speculation: last 2 packets went to the same node*/ \
 188           if (next2 == next3)                                           \
 189             {                                                           \
 190               vlib_put_next_frame (vm, node, next_index, n_left_to_next); \
 191               next_index = next3;                                       \
 192               vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); \
 193             }                                                           \
 194         }                                                               \
 195     }                                                                   \
 196  } while(0);
 197
 198 /** \brief Finish enqueueing one buffer forward in the graph.
 199  Standard single loop boilerplate element. This is a MACRO,
 200  with MULTIPLE SIDE EFFECTS. In the ideal case,
 201  <code>next_index == next0</code>,
 202  which means that the speculative enqueue at the top of the single loop
 203  has correctly dealt with the packet in hand. In that case, the macro does
 204  nothing at all.
 205
 206  @param vm vlib_main_t pointer, varies by thread
 207  @param node current node vlib_node_runtime_t pointer
 208  @param next_index speculated next index used for both packets
 209  @param to_next speculated vector pointer used for both packets
 210  @param n_left_to_next number of slots left in speculated vector
 211  @param bi0 first buffer index
 212  @param next0 actual next index to be used for the first packet
 213
 214  @return @c next_index -- speculative next index to be used for future packets
 215  @return @c to_next -- speculative frame to be used for future packets
 216  @return @c n_left_to_next -- number of slots left in speculative frame
 217 */
 218 #define vlib_validate_buffer_enqueue_x1(vm,node,next_index,to_next,n_left_to_next,bi0,next0) \
 219 do {                                                                    \
 220   if (PREDICT_FALSE (next0 != next_index))                              \
 221     {                                                                   \
 222       vlib_put_next_frame (vm, node, next_index, n_left_to_next + 1);   \
 223       next_index = next0;                                               \
 224       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); \
 225                                                                         \
 226       to_next[0] = bi0;                                                 \
 227       to_next += 1;                                                     \
 228       n_left_to_next -= 1;                                              \
 229     }                                                                   \
 230 } while (0)
 231
 232 always_inline uword
 233 generic_buffer_node_inline (vlib_main_t * vm,
 234                             vlib_node_runtime_t * node,
 235                             vlib_frame_t * frame,
 236                             uword sizeof_trace,
 237                             void *opaque1,
 238                             uword opaque2,
 239                             void (*two_buffers) (vlib_main_t * vm,
 240                                                  void *opaque1,
 241                                                  uword opaque2,
 242                                                  vlib_buffer_t * b0,
 243                                                  vlib_buffer_t * b1,
 244                                                  u32 * next0, u32 * next1),
 245                             void (*one_buffer) (vlib_main_t * vm,
 246                                                 void *opaque1, uword opaque2,
 247                                                 vlib_buffer_t * b0,
 248                                                 u32 * next0))
 249 {
 250   u32 n_left_from, *from, *to_next;
 251   u32 next_index;
 252
 253   from = vlib_frame_vector_args (frame);
 254   n_left_from = frame->n_vectors;
 255   next_index = node->cached_next_index;
 256
 257   if (node->flags & VLIB_NODE_FLAG_TRACE)
 258     vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
 259                                    /* stride */ 1, sizeof_trace);
 260
 261   while (n_left_from > 0)
 262     {
 263       u32 n_left_to_next;
 264
 265       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
 266
 267       while (n_left_from >= 4 && n_left_to_next >= 2)
 268         {
 269           vlib_buffer_t *p0, *p1;
 270           u32 pi0, next0;
 271           u32 pi1, next1;
 272
 273           /* Prefetch next iteration. */
 274           {
 275             vlib_buffer_t *p2, *p3;
 276
 277             p2 = vlib_get_buffer (vm, from[2]);
 278             p3 = vlib_get_buffer (vm, from[3]);
 279
 280             vlib_prefetch_buffer_header (p2, LOAD);
 281             vlib_prefetch_buffer_header (p3, LOAD);
 282
 283             CLIB_PREFETCH (p2->data, 64, LOAD);
 284             CLIB_PREFETCH (p3->data, 64, LOAD);
 285           }
 286
 287           pi0 = to_next[0] = from[0];
 288           pi1 = to_next[1] = from[1];
 289           from += 2;
 290           to_next += 2;
 291           n_left_from -= 2;
 292           n_left_to_next -= 2;
 293
 294           p0 = vlib_get_buffer (vm, pi0);
 295           p1 = vlib_get_buffer (vm, pi1);
 296
 297           two_buffers (vm, opaque1, opaque2, p0, p1, &next0, &next1);
 298
 299           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
 300                                            to_next, n_left_to_next,
 301                                            pi0, pi1, next0, next1);
 302         }
 303
 304       while (n_left_from > 0 && n_left_to_next > 0)
 305         {
 306           vlib_buffer_t *p0;
 307           u32 pi0, next0;
 308
 309           pi0 = from[0];
 310           to_next[0] = pi0;
 311           from += 1;
 312           to_next += 1;
 313           n_left_from -= 1;
 314           n_left_to_next -= 1;
 315
 316           p0 = vlib_get_buffer (vm, pi0);
 317
 318           one_buffer (vm, opaque1, opaque2, p0, &next0);
 319
 320           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
 321                                            to_next, n_left_to_next,
 322                                            pi0, next0);
 323         }
 324
 325       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
 326     }
 327
 328   return frame->n_vectors;
 329 }
 330
 331 static_always_inline void
 332 vlib_buffer_enqueue_to_next (vlib_main_t * vm, vlib_node_runtime_t * node,
 333                              u32 * buffers, u16 * nexts, uword count)
 334 {
 335   u32 *to_next, n_left_to_next, max;
 336   u16 next_index;
 337
 338   next_index = nexts[0];
 339   vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
 340   max = clib_min (n_left_to_next, count);
 341
 342   while (count)
 343     {
 344       u32 n_enqueued;
 345       if ((nexts[0] != next_index) || n_left_to_next == 0)
 346         {
 347           vlib_put_next_frame (vm, node, next_index, n_left_to_next);
 348           next_index = nexts[0];
 349           vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
 350           max = clib_min (n_left_to_next, count);
 351         }
 352 #if defined(CLIB_HAVE_VEC512)
 353       u16x32 next32 = u16x32_load_unaligned (nexts);
 354       next32 = (next32 == u16x32_splat (next32[0]));
 355       u64 bitmap = u16x32_msb_mask (next32);
 356       n_enqueued = count_trailing_zeros (~bitmap);
 357 #elif defined(CLIB_HAVE_VEC256)
 358       u16x16 next16 = u16x16_load_unaligned (nexts);
 359       next16 = (next16 == u16x16_splat (next16[0]));
 360       u64 bitmap = u8x32_msb_mask ((u8x32) next16);
 361       n_enqueued = count_trailing_zeros (~bitmap) / 2;
 362 #elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
 363       u16x8 next8 = u16x8_load_unaligned (nexts);
 364       next8 = (next8 == u16x8_splat (next8[0]));
 365       u64 bitmap = u8x16_msb_mask ((u8x16) next8);
 366       n_enqueued = count_trailing_zeros (~bitmap) / 2;
 367 #else
 368       u16 x = 0;
 369       x |= next_index ^ nexts[1];
 370       x |= next_index ^ nexts[2];
 371       x |= next_index ^ nexts[3];
 372       n_enqueued = (x == 0) ? 4 : 1;
 373 #endif
 374
 375       if (PREDICT_FALSE (n_enqueued > max))
 376         n_enqueued = max;
 377
 378 #ifdef CLIB_HAVE_VEC512
 379       if (n_enqueued >= 32)
 380         {
 381           clib_memcpy (to_next, buffers, 32 * sizeof (u32));
 382           nexts += 32;
 383           to_next += 32;
 384           buffers += 32;
 385           n_left_to_next -= 32;
 386           count -= 32;
 387           max -= 32;
 388           continue;
 389         }
 390 #endif
 391
 392 #ifdef CLIB_HAVE_VEC256
 393       if (n_enqueued >= 16)
 394         {
 395           clib_memcpy (to_next, buffers, 16 * sizeof (u32));
 396           nexts += 16;
 397           to_next += 16;
 398           buffers += 16;
 399           n_left_to_next -= 16;
 400           count -= 16;
 401           max -= 16;
 402           continue;
 403         }
 404 #endif
 405
 406 #ifdef CLIB_HAVE_VEC128
 407       if (n_enqueued >= 8)
 408         {
 409           clib_memcpy (to_next, buffers, 8 * sizeof (u32));
 410           nexts += 8;
 411           to_next += 8;
 412           buffers += 8;
 413           n_left_to_next -= 8;
 414           count -= 8;
 415           max -= 8;
 416           continue;
 417         }
 418 #endif
 419
 420       if (n_enqueued >= 4)
 421         {
 422           clib_memcpy (to_next, buffers, 4 * sizeof (u32));
 423           nexts += 4;
 424           to_next += 4;
 425           buffers += 4;
 426           n_left_to_next -= 4;
 427           count -= 4;
 428           max -= 4;
 429           continue;
 430         }
 431
 432       /* copy */
 433       to_next[0] = buffers[0];
 434
 435       /* next */
 436       nexts += 1;
 437       to_next += 1;
 438       buffers += 1;
 439       n_left_to_next -= 1;
 440       count -= 1;
 441       max -= 1;
 442     }
 443   vlib_put_next_frame (vm, node, next_index, n_left_to_next);
 444 }
 445
 446 static_always_inline u32
 447 vlib_buffer_enqueue_to_thread (vlib_main_t * vm, u32 frame_queue_index,
 448                                u32 * buffer_indices, u16 * thread_indices,
 449                                u32 n_packets, int drop_on_congestion)
 450 {
 451   vlib_thread_main_t *tm = vlib_get_thread_main ();
 452   vlib_frame_queue_main_t *fqm;
 453   vlib_frame_queue_per_thread_data_t *ptd;
 454   u32 n_left = n_packets;
 455   u32 drop_list[VLIB_FRAME_SIZE], *dbi = drop_list, n_drop = 0;
 456   vlib_frame_queue_elt_t *hf = 0;
 457   u32 n_left_to_next_thread = 0, *to_next_thread = 0;
 458   u32 next_thread_index, current_thread_index = ~0;
 459   int i;
 460
 461   fqm = vec_elt_at_index (tm->frame_queue_mains, frame_queue_index);
 462   ptd = vec_elt_at_index (fqm->per_thread_data, vm->thread_index);
 463
 464   while (n_left)
 465     {
 466       next_thread_index = thread_indices[0];
 467
 468       if (next_thread_index != current_thread_index)
 469         {
 470
 471           if (drop_on_congestion &&
 472               is_vlib_frame_queue_congested
 473               (frame_queue_index, next_thread_index, fqm->queue_hi_thresh,
 474                ptd->congested_handoff_queue_by_thread_index))
 475             {
 476               dbi[0] = buffer_indices[0];
 477               dbi++;
 478               n_drop++;
 479               goto next;
 480             }
 481
 482           if (hf)
 483             hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_thread;
 484
 485           hf = vlib_get_worker_handoff_queue_elt (frame_queue_index,
 486                                                   next_thread_index,
 487                                                   ptd->handoff_queue_elt_by_thread_index);
 488
 489           n_left_to_next_thread = VLIB_FRAME_SIZE - hf->n_vectors;
 490           to_next_thread = &hf->buffer_index[hf->n_vectors];
 491           current_thread_index = next_thread_index;
 492         }
 493
 494       to_next_thread[0] = buffer_indices[0];
 495       to_next_thread++;
 496       n_left_to_next_thread--;
 497
 498       if (n_left_to_next_thread == 0)
 499         {
 500           hf->n_vectors = VLIB_FRAME_SIZE;
 501           vlib_put_frame_queue_elt (hf);
 502           current_thread_index = ~0;
 503           ptd->handoff_queue_elt_by_thread_index[next_thread_index] = 0;
 504           hf = 0;
 505         }
 506
 507       /* next */
 508     next:
 509       thread_indices += 1;
 510       buffer_indices += 1;
 511       n_left -= 1;
 512     }
 513
 514   if (hf)
 515     hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_thread;
 516
 517   /* Ship frames to the thread nodes */
 518   for (i = 0; i < vec_len (ptd->handoff_queue_elt_by_thread_index); i++)
 519     {
 520       if (ptd->handoff_queue_elt_by_thread_index[i])
 521         {
 522           hf = ptd->handoff_queue_elt_by_thread_index[i];
 523           /*
 524            * It works better to let the handoff node
 525            * rate-adapt, always ship the handoff queue element.
 526            */
 527           if (1 || hf->n_vectors == hf->last_n_vectors)
 528             {
 529               vlib_put_frame_queue_elt (hf);
 530               ptd->handoff_queue_elt_by_thread_index[i] = 0;
 531             }
 532           else
 533             hf->last_n_vectors = hf->n_vectors;
 534         }
 535       ptd->congested_handoff_queue_by_thread_index[i] =
 536         (vlib_frame_queue_t *) (~0);
 537     }
 538
 539   if (drop_on_congestion && n_drop)
 540     vlib_buffer_free (vm, drop_list, n_drop);
 541
 542   return n_packets - n_drop;
 543 }
 544
 545 #endif /* included_vlib_buffer_node_h */
 546
 547 /*
 548  * fd.io coding-style-patch-verification: ON
 549  *
 550  * Local Variables:
 551  * eval: (c-set-style "gnu")
 552  * End:
 553  */