src/svm/message_queue.c

   1 /*
   2  * Copyright (c) 2018 Cisco and/or its affiliates.
   3  * Licensed under the Apache License, Version 2.0 (the "License");
   4  * you may not use this file except in compliance with the License.
   5  * You may obtain a copy of the License at:
   6  *
   7  *     http://www.apache.org/licenses/LICENSE-2.0
   8  *
   9  * Unless required by applicable law or agreed to in writing, software
  10  * distributed under the License is distributed on an "AS IS" BASIS,
  11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12  * See the License for the specific language governing permissions and
  13  * limitations under the License.
  14  */
  15
  16 #include <svm/message_queue.h>
  17 #include <vppinfra/mem.h>
  18 #include <vppinfra/format.h>
  19 #include <vppinfra/time.h>
  20 #include <sys/eventfd.h>
  21 #include <sys/socket.h>
  22
  23 static inline svm_msg_q_ring_t *
  24 svm_msg_q_ring_inline (svm_msg_q_t * mq, u32 ring_index)
  25 {
  26   return vec_elt_at_index (mq->rings, ring_index);
  27 }
  28
  29 svm_msg_q_ring_t *
  30 svm_msg_q_ring (svm_msg_q_t * mq, u32 ring_index)
  31 {
  32   return svm_msg_q_ring_inline (mq, ring_index);
  33 }
  34
  35 static inline void *
  36 svm_msg_q_ring_data (svm_msg_q_ring_t * ring, u32 elt_index)
  37 {
  38   ASSERT (elt_index < ring->nitems);
  39   return (ring->shr->data + elt_index * ring->elsize);
  40 }
  41
  42 static void
  43 svm_msg_q_init_mutex (svm_msg_q_shared_queue_t *sq)
  44 {
  45   pthread_mutexattr_t attr;
  46   pthread_condattr_t cattr;
  47
  48   clib_memset (&attr, 0, sizeof (attr));
  49   clib_memset (&cattr, 0, sizeof (cattr));
  50
  51   if (pthread_mutexattr_init (&attr))
  52     clib_unix_warning ("mutexattr_init");
  53   if (pthread_mutexattr_setpshared (&attr, PTHREAD_PROCESS_SHARED))
  54     clib_unix_warning ("pthread_mutexattr_setpshared");
  55   if (pthread_mutexattr_setrobust (&attr, PTHREAD_MUTEX_ROBUST))
  56     clib_unix_warning ("setrobust");
  57   if (pthread_mutex_init (&sq->mutex, &attr))
  58     clib_unix_warning ("mutex_init");
  59   if (pthread_mutexattr_destroy (&attr))
  60     clib_unix_warning ("mutexattr_destroy");
  61   if (pthread_condattr_init (&cattr))
  62     clib_unix_warning ("condattr_init");
  63   if (pthread_condattr_setpshared (&cattr, PTHREAD_PROCESS_SHARED))
  64     clib_unix_warning ("condattr_setpshared");
  65   if (pthread_cond_init (&sq->condvar, &cattr))
  66     clib_unix_warning ("cond_init1");
  67   if (pthread_condattr_destroy (&cattr))
  68     clib_unix_warning ("cond_init2");
  69 }
  70
  71 svm_msg_q_shared_t *
  72 svm_msg_q_init (void *base, svm_msg_q_cfg_t *cfg)
  73 {
  74   svm_msg_q_ring_shared_t *ring;
  75   svm_msg_q_shared_queue_t *sq;
  76   svm_msg_q_shared_t *smq;
  77   u32 q_sz, offset;
  78   int i;
  79
  80   q_sz = sizeof (*sq) + cfg->q_nitems * sizeof (svm_msg_q_msg_t);
  81
  82   smq = (svm_msg_q_shared_t *) base;
  83   sq = smq->q;
  84   clib_memset (sq, 0, sizeof (*sq));
  85   sq->elsize = sizeof (svm_msg_q_msg_t);
  86   sq->maxsize = cfg->q_nitems;
  87   smq->n_rings = cfg->n_rings;
  88   ring = (void *) ((u8 *) smq->q + q_sz);
  89   for (i = 0; i < cfg->n_rings; i++)
  90     {
  91       ring->elsize = cfg->ring_cfgs[i].elsize;
  92       ring->nitems = cfg->ring_cfgs[i].nitems;
  93       ring->cursize = ring->head = ring->tail = 0;
  94       offset = sizeof (*ring) + ring->nitems * ring->elsize;
  95       ring = (void *) ((u8 *) ring + offset);
  96     }
  97
  98   svm_msg_q_init_mutex (sq);
  99
 100   return smq;
 101 }
 102
 103 uword
 104 svm_msg_q_size_to_alloc (svm_msg_q_cfg_t *cfg)
 105 {
 106   svm_msg_q_ring_cfg_t *ring_cfg;
 107   uword rings_sz = 0, mq_sz;
 108   u32 q_sz;
 109   int i;
 110
 111   ASSERT (cfg);
 112
 113   rings_sz = sizeof (svm_msg_q_ring_shared_t) * cfg->n_rings;
 114   for (i = 0; i < cfg->n_rings; i++)
 115     {
 116       if (cfg->ring_cfgs[i].data)
 117         continue;
 118       ring_cfg = &cfg->ring_cfgs[i];
 119       rings_sz += (uword) ring_cfg->nitems * ring_cfg->elsize;
 120     }
 121
 122   q_sz = sizeof (svm_msg_q_shared_queue_t) +
 123          cfg->q_nitems * sizeof (svm_msg_q_msg_t);
 124   mq_sz = sizeof (svm_msg_q_shared_t) + q_sz + rings_sz;
 125
 126   return mq_sz;
 127 }
 128
 129 svm_msg_q_shared_t *
 130 svm_msg_q_alloc (svm_msg_q_cfg_t *cfg)
 131 {
 132   uword mq_sz;
 133   u8 *base;
 134
 135   mq_sz = svm_msg_q_size_to_alloc (cfg);
 136   base = clib_mem_alloc_aligned (mq_sz, CLIB_CACHE_LINE_BYTES);
 137   if (!base)
 138     return 0;
 139
 140   return svm_msg_q_init (base, cfg);
 141 }
 142
 143 void
 144 svm_msg_q_attach (svm_msg_q_t *mq, void *smq_base)
 145 {
 146   svm_msg_q_ring_shared_t *ring;
 147   svm_msg_q_shared_t *smq;
 148   u32 i, n_rings, q_sz, offset;
 149
 150   smq = (svm_msg_q_shared_t *) smq_base;
 151   mq->q.shr = smq->q;
 152   mq->q.evtfd = -1;
 153   n_rings = smq->n_rings;
 154   vec_validate (mq->rings, n_rings - 1);
 155   q_sz = sizeof (svm_msg_q_shared_queue_t) +
 156          mq->q.shr->maxsize * sizeof (svm_msg_q_msg_t);
 157   ring = (void *) ((u8 *) smq->q + q_sz);
 158   for (i = 0; i < n_rings; i++)
 159     {
 160       mq->rings[i].nitems = ring->nitems;
 161       mq->rings[i].elsize = ring->elsize;
 162       mq->rings[i].shr = ring;
 163       offset = sizeof (*ring) + ring->nitems * ring->elsize;
 164       ring = (void *) ((u8 *) ring + offset);
 165     }
 166   clib_spinlock_init (&mq->q.lock);
 167 }
 168
 169 void
 170 svm_msg_q_free (svm_msg_q_t * mq)
 171 {
 172   clib_mem_free (mq->q.shr);
 173   clib_spinlock_free (&mq->q.lock);
 174   clib_mem_free (mq);
 175 }
 176
 177 static void
 178 svm_msg_q_send_signal (svm_msg_q_t *mq, u8 is_consumer)
 179 {
 180   if (mq->q.evtfd == -1)
 181     {
 182       if (is_consumer)
 183         {
 184           int rv = pthread_mutex_lock (&mq->q.shr->mutex);
 185           if (PREDICT_FALSE (rv == EOWNERDEAD))
 186             {
 187               rv = pthread_mutex_consistent (&mq->q.shr->mutex);
 188               return;
 189             }
 190         }
 191
 192       (void) pthread_cond_broadcast (&mq->q.shr->condvar);
 193
 194       if (is_consumer)
 195         pthread_mutex_unlock (&mq->q.shr->mutex);
 196     }
 197   else
 198     {
 199       int __clib_unused rv;
 200       u64 data = 1;
 201
 202       if (mq->q.evtfd < 0)
 203         return;
 204
 205       rv = write (mq->q.evtfd, &data, sizeof (data));
 206       if (PREDICT_FALSE (rv < 0))
 207         clib_unix_warning ("signal write on %d returned %d", mq->q.evtfd, rv);
 208     }
 209 }
 210
 211 svm_msg_q_msg_t
 212 svm_msg_q_alloc_msg_w_ring (svm_msg_q_t * mq, u32 ring_index)
 213 {
 214   svm_msg_q_ring_shared_t *sr;
 215   svm_msg_q_ring_t *ring;
 216   svm_msg_q_msg_t msg;
 217
 218   ring = svm_msg_q_ring_inline (mq, ring_index);
 219   sr = ring->shr;
 220
 221   ASSERT (sr->cursize < ring->nitems);
 222   msg.ring_index = ring - mq->rings;
 223   msg.elt_index = sr->tail;
 224   sr->tail = (sr->tail + 1) % ring->nitems;
 225   clib_atomic_fetch_add_rel (&sr->cursize, 1);
 226   return msg;
 227 }
 228
 229 int
 230 svm_msg_q_lock_and_alloc_msg_w_ring (svm_msg_q_t * mq, u32 ring_index,
 231                                      u8 noblock, svm_msg_q_msg_t * msg)
 232 {
 233   if (noblock)
 234     {
 235       if (svm_msg_q_try_lock (mq))
 236         return -1;
 237       if (PREDICT_FALSE (svm_msg_q_is_full (mq)
 238                          || svm_msg_q_ring_is_full (mq, ring_index)))
 239         {
 240           svm_msg_q_unlock (mq);
 241           return -2;
 242         }
 243       *msg = svm_msg_q_alloc_msg_w_ring (mq, ring_index);
 244     }
 245   else
 246     {
 247       svm_msg_q_lock (mq);
 248       while (svm_msg_q_is_full (mq)
 249              || svm_msg_q_ring_is_full (mq, ring_index))
 250         svm_msg_q_wait (mq, SVM_MQ_WAIT_FULL);
 251       *msg = svm_msg_q_alloc_msg_w_ring (mq, ring_index);
 252     }
 253   return 0;
 254 }
 255
 256 svm_msg_q_msg_t
 257 svm_msg_q_alloc_msg (svm_msg_q_t * mq, u32 nbytes)
 258 {
 259   svm_msg_q_msg_t msg = {.as_u64 = ~0 };
 260   svm_msg_q_ring_shared_t *sr;
 261   svm_msg_q_ring_t *ring;
 262
 263   vec_foreach (ring, mq->rings)
 264   {
 265     sr = ring->shr;
 266     if (ring->elsize < nbytes || sr->cursize == ring->nitems)
 267       continue;
 268     msg.ring_index = ring - mq->rings;
 269     msg.elt_index = sr->tail;
 270     sr->tail = (sr->tail + 1) % ring->nitems;
 271     clib_atomic_fetch_add_relax (&sr->cursize, 1);
 272     break;
 273   }
 274   return msg;
 275 }
 276
 277 void *
 278 svm_msg_q_msg_data (svm_msg_q_t * mq, svm_msg_q_msg_t * msg)
 279 {
 280   svm_msg_q_ring_t *ring = svm_msg_q_ring_inline (mq, msg->ring_index);
 281   return svm_msg_q_ring_data (ring, msg->elt_index);
 282 }
 283
 284 void
 285 svm_msg_q_free_msg (svm_msg_q_t * mq, svm_msg_q_msg_t * msg)
 286 {
 287   svm_msg_q_ring_shared_t *sr;
 288   svm_msg_q_ring_t *ring;
 289   u32 need_signal;
 290
 291   ASSERT (vec_len (mq->rings) > msg->ring_index);
 292   ring = svm_msg_q_ring_inline (mq, msg->ring_index);
 293   sr = ring->shr;
 294   if (msg->elt_index == sr->head)
 295     {
 296       sr->head = (sr->head + 1) % ring->nitems;
 297     }
 298   else
 299     {
 300       clib_warning ("message out of order: elt %u head %u ring %u",
 301                     msg->elt_index, sr->head, msg->ring_index);
 302       /* for now, expect messages to be processed in order */
 303       ASSERT (0);
 304     }
 305
 306   need_signal = clib_atomic_load_relax_n (&sr->cursize) == ring->nitems;
 307   clib_atomic_fetch_sub_relax (&sr->cursize, 1);
 308
 309   if (PREDICT_FALSE (need_signal))
 310     svm_msg_q_send_signal (mq, 1 /* is consumer */);
 311 }
 312
 313 static int
 314 svm_msq_q_msg_is_valid (svm_msg_q_t * mq, svm_msg_q_msg_t * msg)
 315 {
 316   u32 dist1, dist2, tail, head;
 317   svm_msg_q_ring_shared_t *sr;
 318   svm_msg_q_ring_t *ring;
 319
 320   if (vec_len (mq->rings) <= msg->ring_index)
 321     return 0;
 322
 323   ring = svm_msg_q_ring_inline (mq, msg->ring_index);
 324   sr = ring->shr;
 325   tail = sr->tail;
 326   head = sr->head;
 327
 328   dist1 = ((ring->nitems + msg->elt_index) - head) % ring->nitems;
 329   if (tail == head)
 330     dist2 = (sr->cursize == 0) ? 0 : ring->nitems;
 331   else
 332     dist2 = ((ring->nitems + tail) - head) % ring->nitems;
 333   return (dist1 < dist2);
 334 }
 335
 336 static void
 337 svm_msg_q_add_raw (svm_msg_q_t *mq, u8 *elem)
 338 {
 339   svm_msg_q_shared_queue_t *sq = mq->q.shr;
 340   i8 *tailp;
 341   u32 sz;
 342
 343   tailp = (i8 *) (&sq->data[0] + sq->elsize * sq->tail);
 344   clib_memcpy_fast (tailp, elem, sq->elsize);
 345
 346   sq->tail = (sq->tail + 1) % sq->maxsize;
 347
 348   sz = clib_atomic_fetch_add_rel (&sq->cursize, 1);
 349   if (!sz)
 350     svm_msg_q_send_signal (mq, 0 /* is consumer */);
 351 }
 352
 353 int
 354 svm_msg_q_add (svm_msg_q_t * mq, svm_msg_q_msg_t * msg, int nowait)
 355 {
 356   ASSERT (svm_msq_q_msg_is_valid (mq, msg));
 357
 358   if (nowait)
 359     {
 360       /* zero on success */
 361       if (svm_msg_q_try_lock (mq))
 362         {
 363           return (-1);
 364         }
 365     }
 366   else
 367     svm_msg_q_lock (mq);
 368
 369   if (PREDICT_FALSE (svm_msg_q_is_full (mq)))
 370     {
 371       if (nowait)
 372         return (-2);
 373       while (svm_msg_q_is_full (mq))
 374         svm_msg_q_wait (mq, SVM_MQ_WAIT_FULL);
 375     }
 376
 377   svm_msg_q_add_raw (mq, (u8 *) msg);
 378
 379   svm_msg_q_unlock (mq);
 380
 381   return 0;
 382 }
 383
 384 void
 385 svm_msg_q_add_and_unlock (svm_msg_q_t * mq, svm_msg_q_msg_t * msg)
 386 {
 387   ASSERT (svm_msq_q_msg_is_valid (mq, msg));
 388   svm_msg_q_add_raw (mq, (u8 *) msg);
 389   svm_msg_q_unlock (mq);
 390 }
 391
 392 int
 393 svm_msg_q_sub_raw (svm_msg_q_t *mq, svm_msg_q_msg_t *elem)
 394 {
 395   svm_msg_q_shared_queue_t *sq = mq->q.shr;
 396   i8 *headp;
 397   u32 sz;
 398
 399   ASSERT (!svm_msg_q_is_empty (mq));
 400
 401   headp = (i8 *) (&sq->data[0] + sq->elsize * sq->head);
 402   clib_memcpy_fast (elem, headp, sq->elsize);
 403
 404   sq->head = (sq->head + 1) % sq->maxsize;
 405
 406   sz = clib_atomic_fetch_sub_relax (&sq->cursize, 1);
 407   if (PREDICT_FALSE (sz == sq->maxsize))
 408     svm_msg_q_send_signal (mq, 1 /* is consumer */);
 409
 410   return 0;
 411 }
 412
 413 int
 414 svm_msg_q_sub_raw_batch (svm_msg_q_t *mq, svm_msg_q_msg_t *msg_buf, u32 n_msgs)
 415 {
 416   svm_msg_q_shared_queue_t *sq = mq->q.shr;
 417   u32 sz, to_deq;
 418   i8 *headp;
 419
 420   sz = svm_msg_q_size (mq);
 421   ASSERT (sz);
 422   to_deq = clib_min (sz, n_msgs);
 423
 424   headp = (i8 *) (&sq->data[0] + sq->elsize * sq->head);
 425
 426   if (sq->head + to_deq < sq->maxsize)
 427     {
 428       clib_memcpy_fast (msg_buf, headp, sq->elsize * to_deq);
 429       sq->head += to_deq;
 430     }
 431   else
 432     {
 433       u32 first_batch = sq->maxsize - sq->head;
 434       clib_memcpy_fast (msg_buf, headp, sq->elsize * first_batch);
 435       clib_memcpy_fast (msg_buf + first_batch, sq->data,
 436                         sq->elsize * (to_deq - first_batch));
 437       sq->head = (sq->head + to_deq) % sq->maxsize;
 438     }
 439
 440   clib_atomic_fetch_sub_relax (&sq->cursize, to_deq);
 441   if (PREDICT_FALSE (sz == sq->maxsize))
 442     svm_msg_q_send_signal (mq, 1 /* is consumer */);
 443
 444   return to_deq;
 445 }
 446
 447 int
 448 svm_msg_q_sub (svm_msg_q_t *mq, svm_msg_q_msg_t *msg,
 449                svm_q_conditional_wait_t cond, u32 time)
 450 {
 451   int rc = 0;
 452
 453   if (svm_msg_q_is_empty (mq))
 454     {
 455       if (cond == SVM_Q_NOWAIT)
 456         {
 457           return (-2);
 458         }
 459       else if (cond == SVM_Q_TIMEDWAIT)
 460         {
 461           if ((rc = svm_msg_q_timedwait (mq, time)))
 462             return rc;
 463         }
 464       else
 465         {
 466           svm_msg_q_wait (mq, SVM_MQ_WAIT_EMPTY);
 467         }
 468     }
 469
 470   svm_msg_q_sub_raw (mq, msg);
 471
 472   return 0;
 473 }
 474
 475 void
 476 svm_msg_q_set_eventfd (svm_msg_q_t *mq, int fd)
 477 {
 478   mq->q.evtfd = fd;
 479 }
 480
 481 int
 482 svm_msg_q_alloc_eventfd (svm_msg_q_t *mq)
 483 {
 484   int fd;
 485   if ((fd = eventfd (0, EFD_NONBLOCK)) < 0)
 486     return -1;
 487   svm_msg_q_set_eventfd (mq, fd);
 488   return 0;
 489 }
 490
 491 int
 492 svm_msg_q_wait (svm_msg_q_t *mq, svm_msg_q_wait_type_t type)
 493 {
 494   u8 (*fn) (svm_msg_q_t *);
 495   int rv;
 496
 497   fn = (type == SVM_MQ_WAIT_EMPTY) ? svm_msg_q_is_empty : svm_msg_q_is_full;
 498
 499   if (mq->q.evtfd == -1)
 500     {
 501       rv = pthread_mutex_lock (&mq->q.shr->mutex);
 502       if (PREDICT_FALSE (rv == EOWNERDEAD))
 503         {
 504           rv = pthread_mutex_consistent (&mq->q.shr->mutex);
 505           return rv;
 506         }
 507
 508       while (fn (mq))
 509         pthread_cond_wait (&mq->q.shr->condvar, &mq->q.shr->mutex);
 510
 511       pthread_mutex_unlock (&mq->q.shr->mutex);
 512     }
 513   else
 514     {
 515       u64 buf;
 516
 517       while (fn (mq))
 518         {
 519           while ((rv = read (mq->q.evtfd, &buf, sizeof (buf))) < 0)
 520             {
 521               if (errno != EAGAIN)
 522                 {
 523                   clib_unix_warning ("read error");
 524                   return rv;
 525                 }
 526             }
 527         }
 528     }
 529
 530   return 0;
 531 }
 532
 533 int
 534 svm_msg_q_timedwait (svm_msg_q_t *mq, double timeout)
 535 {
 536   if (mq->q.evtfd == -1)
 537     {
 538       svm_msg_q_shared_queue_t *sq = mq->q.shr;
 539       struct timespec ts;
 540       u32 sz;
 541       int rv;
 542
 543       rv = pthread_mutex_lock (&sq->mutex);
 544       if (PREDICT_FALSE (rv == EOWNERDEAD))
 545         {
 546           rv = pthread_mutex_consistent (&sq->mutex);
 547           return rv;
 548         }
 549
 550       /* check if we're still in a signalable state after grabbing lock */
 551       sz = svm_msg_q_size (mq);
 552       if (sz != 0 && sz != sq->maxsize)
 553         {
 554           pthread_mutex_unlock (&sq->mutex);
 555           return 0;
 556         }
 557
 558       ts.tv_sec = unix_time_now () + (u32) timeout;
 559       ts.tv_nsec = (timeout - (u32) timeout) * 1e9;
 560       rv = pthread_cond_timedwait (&sq->condvar, &sq->mutex, &ts);
 561
 562       pthread_mutex_unlock (&sq->mutex);
 563       return rv;
 564     }
 565   else
 566     {
 567       struct timeval tv;
 568       u64 buf;
 569       int rv;
 570
 571       tv.tv_sec = (u64) timeout;
 572       tv.tv_usec = ((u64) timeout - (u64) timeout) * 1e9;
 573       setsockopt (mq->q.evtfd, SOL_SOCKET, SO_RCVTIMEO, (const char *) &tv,
 574                   sizeof tv);
 575
 576       rv = read (mq->q.evtfd, &buf, sizeof (buf));
 577       if (rv < 0)
 578         clib_warning ("read %u", errno);
 579
 580       return rv < 0 ? errno : 0;
 581     }
 582 }
 583
 584 u8 *
 585 format_svm_msg_q (u8 * s, va_list * args)
 586 {
 587   svm_msg_q_t *mq = va_arg (*args, svm_msg_q_t *);
 588   s = format (s, " [Q:%d/%d]", mq->q.shr->cursize, mq->q.shr->maxsize);
 589   for (u32 i = 0; i < vec_len (mq->rings); i++)
 590     {
 591       s = format (s, " [R%d:%d/%d]", i, mq->rings[i].shr->cursize,
 592                   mq->rings[i].nitems);
 593     }
 594   return s;
 595 }
 596
 597 /*
 598  * fd.io coding-style-patch-verification: ON
 599  *
 600  * Local Variables:
 601  * eval: (c-set-style "gnu")
 602  * End:
 603  */