src/svm/message_queue.c

   1 /*
   2  * Copyright (c) 2018 Cisco and/or its affiliates.
   3  * Licensed under the Apache License, Version 2.0 (the "License");
   4  * you may not use this file except in compliance with the License.
   5  * You may obtain a copy of the License at:
   6  *
   7  *     http://www.apache.org/licenses/LICENSE-2.0
   8  *
   9  * Unless required by applicable law or agreed to in writing, software
  10  * distributed under the License is distributed on an "AS IS" BASIS,
  11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12  * See the License for the specific language governing permissions and
  13  * limitations under the License.
  14  */
  15
  16 #include <svm/message_queue.h>
  17 #include <vppinfra/mem.h>
  18 #include <vppinfra/format.h>
  19 #include <vppinfra/time.h>
  20 #include <sys/eventfd.h>
  21 #include <sys/socket.h>
  22
  23 static inline svm_msg_q_ring_t *
  24 svm_msg_q_ring_inline (svm_msg_q_t * mq, u32 ring_index)
  25 {
  26   return vec_elt_at_index (mq->rings, ring_index);
  27 }
  28
  29 svm_msg_q_ring_t *
  30 svm_msg_q_ring (svm_msg_q_t * mq, u32 ring_index)
  31 {
  32   return svm_msg_q_ring_inline (mq, ring_index);
  33 }
  34
  35 static inline void *
  36 svm_msg_q_ring_data (svm_msg_q_ring_t * ring, u32 elt_index)
  37 {
  38   ASSERT (elt_index < ring->nitems);
  39   return (ring->shr->data + elt_index * ring->elsize);
  40 }
  41
  42 static void
  43 svm_msg_q_init_mutex (svm_msg_q_shared_queue_t *sq)
  44 {
  45   pthread_mutexattr_t attr;
  46   pthread_condattr_t cattr;
  47
  48   clib_memset (&attr, 0, sizeof (attr));
  49   clib_memset (&cattr, 0, sizeof (cattr));
  50
  51   if (pthread_mutexattr_init (&attr))
  52     clib_unix_warning ("mutexattr_init");
  53   if (pthread_mutexattr_setpshared (&attr, PTHREAD_PROCESS_SHARED))
  54     clib_unix_warning ("pthread_mutexattr_setpshared");
  55   if (pthread_mutexattr_setrobust (&attr, PTHREAD_MUTEX_ROBUST))
  56     clib_unix_warning ("setrobust");
  57   if (pthread_mutex_init (&sq->mutex, &attr))
  58     clib_unix_warning ("mutex_init");
  59   if (pthread_mutexattr_destroy (&attr))
  60     clib_unix_warning ("mutexattr_destroy");
  61   if (pthread_condattr_init (&cattr))
  62     clib_unix_warning ("condattr_init");
  63   if (pthread_condattr_setpshared (&cattr, PTHREAD_PROCESS_SHARED))
  64     clib_unix_warning ("condattr_setpshared");
  65   if (pthread_cond_init (&sq->condvar, &cattr))
  66     clib_unix_warning ("cond_init1");
  67   if (pthread_condattr_destroy (&cattr))
  68     clib_unix_warning ("cond_init2");
  69 }
  70
  71 svm_msg_q_shared_t *
  72 svm_msg_q_init (void *base, svm_msg_q_cfg_t *cfg)
  73 {
  74   svm_msg_q_ring_shared_t *ring;
  75   svm_msg_q_shared_queue_t *sq;
  76   svm_msg_q_shared_t *smq;
  77   u32 q_sz, offset;
  78   int i;
  79
  80   q_sz = sizeof (*sq) + cfg->q_nitems * sizeof (svm_msg_q_msg_t);
  81
  82   smq = (svm_msg_q_shared_t *) base;
  83   sq = smq->q;
  84   clib_memset (sq, 0, sizeof (*sq));
  85   sq->elsize = sizeof (svm_msg_q_msg_t);
  86   sq->maxsize = cfg->q_nitems;
  87   smq->n_rings = cfg->n_rings;
  88   ring = (void *) ((u8 *) smq->q + q_sz);
  89   for (i = 0; i < cfg->n_rings; i++)
  90     {
  91       ring->elsize = cfg->ring_cfgs[i].elsize;
  92       ring->nitems = cfg->ring_cfgs[i].nitems;
  93       ring->cursize = ring->head = ring->tail = 0;
  94       offset = sizeof (*ring) + ring->nitems * ring->elsize;
  95       ring = (void *) ((u8 *) ring + offset);
  96     }
  97
  98   svm_msg_q_init_mutex (sq);
  99
 100   return smq;
 101 }
 102
 103 uword
 104 svm_msg_q_size_to_alloc (svm_msg_q_cfg_t *cfg)
 105 {
 106   svm_msg_q_ring_cfg_t *ring_cfg;
 107   uword rings_sz = 0, mq_sz;
 108   u32 q_sz;
 109   int i;
 110
 111   ASSERT (cfg);
 112
 113   rings_sz = sizeof (svm_msg_q_ring_shared_t) * cfg->n_rings;
 114   for (i = 0; i < cfg->n_rings; i++)
 115     {
 116       if (cfg->ring_cfgs[i].data)
 117         continue;
 118       ring_cfg = &cfg->ring_cfgs[i];
 119       rings_sz += (uword) ring_cfg->nitems * ring_cfg->elsize;
 120     }
 121
 122   q_sz = sizeof (svm_msg_q_shared_queue_t) +
 123          cfg->q_nitems * sizeof (svm_msg_q_msg_t);
 124   mq_sz = sizeof (svm_msg_q_shared_t) + q_sz + rings_sz;
 125
 126   return mq_sz;
 127 }
 128
 129 svm_msg_q_shared_t *
 130 svm_msg_q_alloc (svm_msg_q_cfg_t *cfg)
 131 {
 132   uword mq_sz;
 133   u8 *base;
 134
 135   mq_sz = svm_msg_q_size_to_alloc (cfg);
 136   base = clib_mem_alloc_aligned (mq_sz, CLIB_CACHE_LINE_BYTES);
 137   if (!base)
 138     return 0;
 139
 140   return svm_msg_q_init (base, cfg);
 141 }
 142
 143 void
 144 svm_msg_q_attach (svm_msg_q_t *mq, void *smq_base)
 145 {
 146   svm_msg_q_ring_shared_t *ring;
 147   svm_msg_q_shared_t *smq;
 148   u32 i, n_rings, q_sz, offset;
 149
 150   smq = (svm_msg_q_shared_t *) smq_base;
 151   mq->q.shr = smq->q;
 152   mq->q.evtfd = -1;
 153   n_rings = smq->n_rings;
 154   vec_validate (mq->rings, n_rings - 1);
 155   q_sz = sizeof (svm_msg_q_shared_queue_t) +
 156          mq->q.shr->maxsize * sizeof (svm_msg_q_msg_t);
 157   ring = (void *) ((u8 *) smq->q + q_sz);
 158   for (i = 0; i < n_rings; i++)
 159     {
 160       mq->rings[i].nitems = ring->nitems;
 161       mq->rings[i].elsize = ring->elsize;
 162       mq->rings[i].shr = ring;
 163       offset = sizeof (*ring) + ring->nitems * ring->elsize;
 164       ring = (void *) ((u8 *) ring + offset);
 165     }
 166   clib_spinlock_init (&mq->q.lock);
 167 }
 168
 169 void
 170 svm_msg_q_cleanup (svm_msg_q_t *mq)
 171 {
 172   vec_free (mq->rings);
 173   clib_spinlock_free (&mq->q.lock);
 174   if (mq->q.evtfd != -1)
 175     close (mq->q.evtfd);
 176 }
 177
 178 void
 179 svm_msg_q_free (svm_msg_q_t * mq)
 180 {
 181   svm_msg_q_cleanup (mq);
 182   clib_mem_free (mq->q.shr);
 183   clib_mem_free (mq);
 184 }
 185
 186 static void
 187 svm_msg_q_send_signal (svm_msg_q_t *mq, u8 is_consumer)
 188 {
 189   if (mq->q.evtfd == -1)
 190     {
 191       if (is_consumer)
 192         {
 193           int rv = pthread_mutex_lock (&mq->q.shr->mutex);
 194           if (PREDICT_FALSE (rv == EOWNERDEAD))
 195             {
 196               rv = pthread_mutex_consistent (&mq->q.shr->mutex);
 197               return;
 198             }
 199         }
 200
 201       (void) pthread_cond_broadcast (&mq->q.shr->condvar);
 202
 203       if (is_consumer)
 204         pthread_mutex_unlock (&mq->q.shr->mutex);
 205     }
 206   else
 207     {
 208       int __clib_unused rv;
 209       u64 data = 1;
 210
 211       if (mq->q.evtfd < 0)
 212         return;
 213
 214       rv = write (mq->q.evtfd, &data, sizeof (data));
 215       if (PREDICT_FALSE (rv < 0))
 216         clib_unix_warning ("signal write on %d returned %d", mq->q.evtfd, rv);
 217     }
 218 }
 219
 220 svm_msg_q_msg_t
 221 svm_msg_q_alloc_msg_w_ring (svm_msg_q_t * mq, u32 ring_index)
 222 {
 223   svm_msg_q_ring_shared_t *sr;
 224   svm_msg_q_ring_t *ring;
 225   svm_msg_q_msg_t msg;
 226
 227   ring = svm_msg_q_ring_inline (mq, ring_index);
 228   sr = ring->shr;
 229
 230   ASSERT (sr->cursize < ring->nitems);
 231   msg.ring_index = ring - mq->rings;
 232   msg.elt_index = sr->tail;
 233   sr->tail = (sr->tail + 1) % ring->nitems;
 234   clib_atomic_fetch_add_rel (&sr->cursize, 1);
 235   return msg;
 236 }
 237
 238 int
 239 svm_msg_q_lock_and_alloc_msg_w_ring (svm_msg_q_t * mq, u32 ring_index,
 240                                      u8 noblock, svm_msg_q_msg_t * msg)
 241 {
 242   if (noblock)
 243     {
 244       if (svm_msg_q_try_lock (mq))
 245         return -1;
 246       if (PREDICT_FALSE (svm_msg_q_is_full (mq)
 247                          || svm_msg_q_ring_is_full (mq, ring_index)))
 248         {
 249           svm_msg_q_unlock (mq);
 250           return -2;
 251         }
 252       *msg = svm_msg_q_alloc_msg_w_ring (mq, ring_index);
 253     }
 254   else
 255     {
 256       svm_msg_q_lock (mq);
 257       while (svm_msg_q_is_full (mq)
 258              || svm_msg_q_ring_is_full (mq, ring_index))
 259         svm_msg_q_wait_prod (mq);
 260       *msg = svm_msg_q_alloc_msg_w_ring (mq, ring_index);
 261     }
 262   return 0;
 263 }
 264
 265 svm_msg_q_msg_t
 266 svm_msg_q_alloc_msg (svm_msg_q_t * mq, u32 nbytes)
 267 {
 268   svm_msg_q_msg_t msg = {.as_u64 = ~0 };
 269   svm_msg_q_ring_shared_t *sr;
 270   svm_msg_q_ring_t *ring;
 271
 272   vec_foreach (ring, mq->rings)
 273   {
 274     sr = ring->shr;
 275     if (ring->elsize < nbytes || sr->cursize == ring->nitems)
 276       continue;
 277     msg.ring_index = ring - mq->rings;
 278     msg.elt_index = sr->tail;
 279     sr->tail = (sr->tail + 1) % ring->nitems;
 280     clib_atomic_fetch_add_relax (&sr->cursize, 1);
 281     break;
 282   }
 283   return msg;
 284 }
 285
 286 void *
 287 svm_msg_q_msg_data (svm_msg_q_t * mq, svm_msg_q_msg_t * msg)
 288 {
 289   svm_msg_q_ring_t *ring = svm_msg_q_ring_inline (mq, msg->ring_index);
 290   return svm_msg_q_ring_data (ring, msg->elt_index);
 291 }
 292
 293 void
 294 svm_msg_q_free_msg (svm_msg_q_t * mq, svm_msg_q_msg_t * msg)
 295 {
 296   svm_msg_q_ring_shared_t *sr;
 297   svm_msg_q_ring_t *ring;
 298   u32 need_signal;
 299
 300   ASSERT (vec_len (mq->rings) > msg->ring_index);
 301   ring = svm_msg_q_ring_inline (mq, msg->ring_index);
 302   sr = ring->shr;
 303   if (msg->elt_index == sr->head)
 304     {
 305       sr->head = (sr->head + 1) % ring->nitems;
 306     }
 307   else
 308     {
 309       clib_warning ("message out of order: elt %u head %u ring %u",
 310                     msg->elt_index, sr->head, msg->ring_index);
 311       /* for now, expect messages to be processed in order */
 312       ASSERT (0);
 313     }
 314
 315   need_signal = clib_atomic_load_relax_n (&sr->cursize) == ring->nitems;
 316   clib_atomic_fetch_sub_relax (&sr->cursize, 1);
 317
 318   if (PREDICT_FALSE (need_signal))
 319     svm_msg_q_send_signal (mq, 1 /* is consumer */);
 320 }
 321
 322 static int
 323 svm_msq_q_msg_is_valid (svm_msg_q_t * mq, svm_msg_q_msg_t * msg)
 324 {
 325   u32 dist1, dist2, tail, head;
 326   svm_msg_q_ring_shared_t *sr;
 327   svm_msg_q_ring_t *ring;
 328
 329   if (vec_len (mq->rings) <= msg->ring_index)
 330     return 0;
 331
 332   ring = svm_msg_q_ring_inline (mq, msg->ring_index);
 333   sr = ring->shr;
 334   tail = sr->tail;
 335   head = sr->head;
 336
 337   dist1 = ((ring->nitems + msg->elt_index) - head) % ring->nitems;
 338   if (tail == head)
 339     dist2 = (sr->cursize == 0) ? 0 : ring->nitems;
 340   else
 341     dist2 = ((ring->nitems + tail) - head) % ring->nitems;
 342   return (dist1 < dist2);
 343 }
 344
 345 static void
 346 svm_msg_q_add_raw (svm_msg_q_t *mq, u8 *elem)
 347 {
 348   svm_msg_q_shared_queue_t *sq = mq->q.shr;
 349   i8 *tailp;
 350   u32 sz;
 351
 352   tailp = (i8 *) (&sq->data[0] + sq->elsize * sq->tail);
 353   clib_memcpy_fast (tailp, elem, sq->elsize);
 354
 355   sq->tail = (sq->tail + 1) % sq->maxsize;
 356
 357   sz = clib_atomic_fetch_add_rel (&sq->cursize, 1);
 358   if (!sz)
 359     svm_msg_q_send_signal (mq, 0 /* is consumer */);
 360 }
 361
 362 int
 363 svm_msg_q_add (svm_msg_q_t * mq, svm_msg_q_msg_t * msg, int nowait)
 364 {
 365   ASSERT (svm_msq_q_msg_is_valid (mq, msg));
 366
 367   if (nowait)
 368     {
 369       /* zero on success */
 370       if (svm_msg_q_try_lock (mq))
 371         {
 372           return (-1);
 373         }
 374     }
 375   else
 376     svm_msg_q_lock (mq);
 377
 378   if (PREDICT_FALSE (svm_msg_q_is_full (mq)))
 379     {
 380       if (nowait)
 381         return (-2);
 382       while (svm_msg_q_is_full (mq))
 383         svm_msg_q_wait_prod (mq);
 384     }
 385
 386   svm_msg_q_add_raw (mq, (u8 *) msg);
 387
 388   svm_msg_q_unlock (mq);
 389
 390   return 0;
 391 }
 392
 393 void
 394 svm_msg_q_add_and_unlock (svm_msg_q_t * mq, svm_msg_q_msg_t * msg)
 395 {
 396   ASSERT (svm_msq_q_msg_is_valid (mq, msg));
 397   svm_msg_q_add_raw (mq, (u8 *) msg);
 398   svm_msg_q_unlock (mq);
 399 }
 400
 401 int
 402 svm_msg_q_sub_raw (svm_msg_q_t *mq, svm_msg_q_msg_t *elem)
 403 {
 404   svm_msg_q_shared_queue_t *sq = mq->q.shr;
 405   i8 *headp;
 406   u32 sz;
 407
 408   ASSERT (!svm_msg_q_is_empty (mq));
 409
 410   headp = (i8 *) (&sq->data[0] + sq->elsize * sq->head);
 411   clib_memcpy_fast (elem, headp, sq->elsize);
 412
 413   sq->head = (sq->head + 1) % sq->maxsize;
 414
 415   sz = clib_atomic_fetch_sub_relax (&sq->cursize, 1);
 416   if (PREDICT_FALSE (sz == sq->maxsize))
 417     svm_msg_q_send_signal (mq, 1 /* is consumer */);
 418
 419   return 0;
 420 }
 421
 422 int
 423 svm_msg_q_sub_raw_batch (svm_msg_q_t *mq, svm_msg_q_msg_t *msg_buf, u32 n_msgs)
 424 {
 425   svm_msg_q_shared_queue_t *sq = mq->q.shr;
 426   u32 sz, to_deq;
 427   i8 *headp;
 428
 429   sz = svm_msg_q_size (mq);
 430   ASSERT (sz);
 431   to_deq = clib_min (sz, n_msgs);
 432
 433   headp = (i8 *) (&sq->data[0] + sq->elsize * sq->head);
 434
 435   if (sq->head + to_deq < sq->maxsize)
 436     {
 437       clib_memcpy_fast (msg_buf, headp, sq->elsize * to_deq);
 438       sq->head += to_deq;
 439     }
 440   else
 441     {
 442       u32 first_batch = sq->maxsize - sq->head;
 443       clib_memcpy_fast (msg_buf, headp, sq->elsize * first_batch);
 444       clib_memcpy_fast (msg_buf + first_batch, sq->data,
 445                         sq->elsize * (to_deq - first_batch));
 446       sq->head = (sq->head + to_deq) % sq->maxsize;
 447     }
 448
 449   clib_atomic_fetch_sub_relax (&sq->cursize, to_deq);
 450   if (PREDICT_FALSE (sz == sq->maxsize))
 451     svm_msg_q_send_signal (mq, 1 /* is consumer */);
 452
 453   return to_deq;
 454 }
 455
 456 int
 457 svm_msg_q_sub (svm_msg_q_t *mq, svm_msg_q_msg_t *msg,
 458                svm_q_conditional_wait_t cond, u32 time)
 459 {
 460   int rc = 0;
 461
 462   if (svm_msg_q_is_empty (mq))
 463     {
 464       if (cond == SVM_Q_NOWAIT)
 465         {
 466           return (-2);
 467         }
 468       else if (cond == SVM_Q_TIMEDWAIT)
 469         {
 470           if ((rc = svm_msg_q_timedwait (mq, time)))
 471             return rc;
 472         }
 473       else
 474         {
 475           svm_msg_q_wait (mq, SVM_MQ_WAIT_EMPTY);
 476         }
 477     }
 478
 479   svm_msg_q_sub_raw (mq, msg);
 480
 481   return 0;
 482 }
 483
 484 void
 485 svm_msg_q_set_eventfd (svm_msg_q_t *mq, int fd)
 486 {
 487   mq->q.evtfd = fd;
 488 }
 489
 490 int
 491 svm_msg_q_alloc_eventfd (svm_msg_q_t *mq)
 492 {
 493   int fd;
 494   if ((fd = eventfd (0, 0)) < 0)
 495     return -1;
 496   svm_msg_q_set_eventfd (mq, fd);
 497   return 0;
 498 }
 499
 500 int
 501 svm_msg_q_wait (svm_msg_q_t *mq, svm_msg_q_wait_type_t type)
 502 {
 503   u8 (*fn) (svm_msg_q_t *);
 504   int rv;
 505
 506   fn = (type == SVM_MQ_WAIT_EMPTY) ? svm_msg_q_is_empty : svm_msg_q_is_full;
 507
 508   if (mq->q.evtfd == -1)
 509     {
 510       rv = pthread_mutex_lock (&mq->q.shr->mutex);
 511       if (PREDICT_FALSE (rv == EOWNERDEAD))
 512         {
 513           rv = pthread_mutex_consistent (&mq->q.shr->mutex);
 514           return rv;
 515         }
 516
 517       while (fn (mq))
 518         pthread_cond_wait (&mq->q.shr->condvar, &mq->q.shr->mutex);
 519
 520       pthread_mutex_unlock (&mq->q.shr->mutex);
 521     }
 522   else
 523     {
 524       u64 buf;
 525
 526       while (fn (mq))
 527         {
 528           while ((rv = read (mq->q.evtfd, &buf, sizeof (buf))) < 0)
 529             {
 530               if (errno != EAGAIN)
 531                 {
 532                   clib_unix_warning ("read error");
 533                   return rv;
 534                 }
 535             }
 536         }
 537     }
 538
 539   return 0;
 540 }
 541
 542 int
 543 svm_msg_q_wait_prod (svm_msg_q_t *mq)
 544 {
 545   if (mq->q.evtfd == -1)
 546     {
 547       while (svm_msg_q_is_full (mq))
 548         pthread_cond_wait (&mq->q.shr->condvar, &mq->q.shr->mutex);
 549     }
 550   else
 551     {
 552       u64 buf;
 553       int rv;
 554
 555       while (svm_msg_q_is_full (mq))
 556         {
 557           while ((rv = read (mq->q.evtfd, &buf, sizeof (buf))) < 0)
 558             {
 559               if (errno != EAGAIN)
 560                 {
 561                   clib_unix_warning ("read error");
 562                   return rv;
 563                 }
 564             }
 565         }
 566     }
 567
 568   return 0;
 569 }
 570
 571 int
 572 svm_msg_q_timedwait (svm_msg_q_t *mq, double timeout)
 573 {
 574   if (mq->q.evtfd == -1)
 575     {
 576       svm_msg_q_shared_queue_t *sq = mq->q.shr;
 577       struct timespec ts;
 578       u32 sz;
 579       int rv;
 580
 581       rv = pthread_mutex_lock (&sq->mutex);
 582       if (PREDICT_FALSE (rv == EOWNERDEAD))
 583         {
 584           rv = pthread_mutex_consistent (&sq->mutex);
 585           return rv;
 586         }
 587
 588       /* check if we're still in a signalable state after grabbing lock */
 589       sz = svm_msg_q_size (mq);
 590       if (sz != 0 && sz != sq->maxsize)
 591         {
 592           pthread_mutex_unlock (&sq->mutex);
 593           return 0;
 594         }
 595
 596       ts.tv_sec = unix_time_now () + (u32) timeout;
 597       ts.tv_nsec = (timeout - (u32) timeout) * 1e9;
 598       rv = pthread_cond_timedwait (&sq->condvar, &sq->mutex, &ts);
 599
 600       pthread_mutex_unlock (&sq->mutex);
 601       return rv;
 602     }
 603   else
 604     {
 605       struct timeval tv;
 606       u64 buf;
 607       int rv;
 608
 609       tv.tv_sec = (u64) timeout;
 610       tv.tv_usec = ((u64) timeout - (u64) timeout) * 1e9;
 611       rv = setsockopt (mq->q.evtfd, SOL_SOCKET, SO_RCVTIMEO,
 612                        (const char *) &tv, sizeof tv);
 613       if (rv < 0)
 614         {
 615           clib_unix_warning ("setsockopt");
 616           return -1;
 617         }
 618
 619       rv = read (mq->q.evtfd, &buf, sizeof (buf));
 620       if (rv < 0)
 621         clib_warning ("read %u", errno);
 622
 623       return rv < 0 ? errno : 0;
 624     }
 625 }
 626
 627 u8 *
 628 format_svm_msg_q (u8 * s, va_list * args)
 629 {
 630   svm_msg_q_t *mq = va_arg (*args, svm_msg_q_t *);
 631   s = format (s, " [Q:%d/%d]", mq->q.shr->cursize, mq->q.shr->maxsize);
 632   for (u32 i = 0; i < vec_len (mq->rings); i++)
 633     {
 634       s = format (s, " [R%d:%d/%d]", i, mq->rings[i].shr->cursize,
 635                   mq->rings[i].nitems);
 636     }
 637   return s;
 638 }
 639
 640 /*
 641  * fd.io coding-style-patch-verification: ON
 642  *
 643  * Local Variables:
 644  * eval: (c-set-style "gnu")
 645  * End:
 646  */