src/svm/message_queue.c

   1 /*
   2  * Copyright (c) 2018 Cisco and/or its affiliates.
   3  * Licensed under the Apache License, Version 2.0 (the "License");
   4  * you may not use this file except in compliance with the License.
   5  * You may obtain a copy of the License at:
   6  *
   7  *     http://www.apache.org/licenses/LICENSE-2.0
   8  *
   9  * Unless required by applicable law or agreed to in writing, software
  10  * distributed under the License is distributed on an "AS IS" BASIS,
  11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12  * See the License for the specific language governing permissions and
  13  * limitations under the License.
  14  */
  15
  16 #include <svm/message_queue.h>
  17 #include <vppinfra/mem.h>
  18 #include <vppinfra/format.h>
  19 #include <vppinfra/time.h>
  20 #include <sys/eventfd.h>
  21 #include <sys/socket.h>
  22
  23 static inline svm_msg_q_ring_t *
  24 svm_msg_q_ring_inline (svm_msg_q_t * mq, u32 ring_index)
  25 {
  26   return vec_elt_at_index (mq->rings, ring_index);
  27 }
  28
  29 svm_msg_q_ring_t *
  30 svm_msg_q_ring (svm_msg_q_t * mq, u32 ring_index)
  31 {
  32   return svm_msg_q_ring_inline (mq, ring_index);
  33 }
  34
  35 static inline void *
  36 svm_msg_q_ring_data (svm_msg_q_ring_t * ring, u32 elt_index)
  37 {
  38   ASSERT (elt_index < ring->nitems);
  39   return (ring->shr->data + elt_index * ring->elsize);
  40 }
  41
  42 static void
  43 svm_msg_q_init_mutex (svm_msg_q_shared_queue_t *sq)
  44 {
  45   pthread_mutexattr_t attr;
  46   pthread_condattr_t cattr;
  47
  48   clib_memset (&attr, 0, sizeof (attr));
  49   clib_memset (&cattr, 0, sizeof (cattr));
  50
  51   if (pthread_mutexattr_init (&attr))
  52     clib_unix_warning ("mutexattr_init");
  53   if (pthread_mutexattr_setpshared (&attr, PTHREAD_PROCESS_SHARED))
  54     clib_unix_warning ("pthread_mutexattr_setpshared");
  55   if (pthread_mutexattr_setrobust (&attr, PTHREAD_MUTEX_ROBUST))
  56     clib_unix_warning ("setrobust");
  57   if (pthread_mutex_init (&sq->mutex, &attr))
  58     clib_unix_warning ("mutex_init");
  59   if (pthread_mutexattr_destroy (&attr))
  60     clib_unix_warning ("mutexattr_destroy");
  61   if (pthread_condattr_init (&cattr))
  62     clib_unix_warning ("condattr_init");
  63   if (pthread_condattr_setpshared (&cattr, PTHREAD_PROCESS_SHARED))
  64     clib_unix_warning ("condattr_setpshared");
  65   if (pthread_cond_init (&sq->condvar, &cattr))
  66     clib_unix_warning ("cond_init1");
  67   if (pthread_condattr_destroy (&cattr))
  68     clib_unix_warning ("cond_init2");
  69 }
  70
  71 svm_msg_q_shared_t *
  72 svm_msg_q_init (void *base, svm_msg_q_cfg_t *cfg)
  73 {
  74   svm_msg_q_ring_shared_t *ring;
  75   svm_msg_q_shared_queue_t *sq;
  76   svm_msg_q_shared_t *smq;
  77   u32 q_sz, offset;
  78   int i;
  79
  80   q_sz = sizeof (*sq) + cfg->q_nitems * sizeof (svm_msg_q_msg_t);
  81
  82   smq = (svm_msg_q_shared_t *) base;
  83   sq = smq->q;
  84   clib_memset (sq, 0, sizeof (*sq));
  85   sq->elsize = sizeof (svm_msg_q_msg_t);
  86   sq->maxsize = cfg->q_nitems;
  87   smq->n_rings = cfg->n_rings;
  88   ring = (void *) ((u8 *) smq->q + q_sz);
  89   for (i = 0; i < cfg->n_rings; i++)
  90     {
  91       ring->elsize = cfg->ring_cfgs[i].elsize;
  92       ring->nitems = cfg->ring_cfgs[i].nitems;
  93       ring->cursize = ring->head = ring->tail = 0;
  94       offset = sizeof (*ring) + ring->nitems * ring->elsize;
  95       ring = (void *) ((u8 *) ring + offset);
  96     }
  97
  98   svm_msg_q_init_mutex (sq);
  99
 100   return smq;
 101 }
 102
 103 uword
 104 svm_msg_q_size_to_alloc (svm_msg_q_cfg_t *cfg)
 105 {
 106   svm_msg_q_ring_cfg_t *ring_cfg;
 107   uword rings_sz = 0, mq_sz;
 108   u32 q_sz;
 109   int i;
 110
 111   ASSERT (cfg);
 112
 113   rings_sz = sizeof (svm_msg_q_ring_shared_t) * cfg->n_rings;
 114   for (i = 0; i < cfg->n_rings; i++)
 115     {
 116       if (cfg->ring_cfgs[i].data)
 117         continue;
 118       ring_cfg = &cfg->ring_cfgs[i];
 119       rings_sz += (uword) ring_cfg->nitems * ring_cfg->elsize;
 120     }
 121
 122   q_sz = sizeof (svm_msg_q_shared_queue_t) +
 123          cfg->q_nitems * sizeof (svm_msg_q_msg_t);
 124   mq_sz = sizeof (svm_msg_q_shared_t) + q_sz + rings_sz;
 125
 126   return mq_sz;
 127 }
 128
 129 svm_msg_q_shared_t *
 130 svm_msg_q_alloc (svm_msg_q_cfg_t *cfg)
 131 {
 132   uword mq_sz;
 133   u8 *base;
 134
 135   mq_sz = svm_msg_q_size_to_alloc (cfg);
 136   base = clib_mem_alloc_aligned (mq_sz, CLIB_CACHE_LINE_BYTES);
 137   if (!base)
 138     return 0;
 139
 140   return svm_msg_q_init (base, cfg);
 141 }
 142
 143 void
 144 svm_msg_q_attach (svm_msg_q_t *mq, void *smq_base)
 145 {
 146   svm_msg_q_ring_shared_t *ring;
 147   svm_msg_q_shared_t *smq;
 148   u32 i, n_rings, q_sz, offset;
 149
 150   smq = (svm_msg_q_shared_t *) smq_base;
 151   mq->q.shr = smq->q;
 152   mq->q.evtfd = -1;
 153   n_rings = smq->n_rings;
 154   vec_validate (mq->rings, n_rings - 1);
 155   q_sz = sizeof (svm_msg_q_shared_queue_t) +
 156          mq->q.shr->maxsize * sizeof (svm_msg_q_msg_t);
 157   ring = (void *) ((u8 *) smq->q + q_sz);
 158   for (i = 0; i < n_rings; i++)
 159     {
 160       mq->rings[i].nitems = ring->nitems;
 161       mq->rings[i].elsize = ring->elsize;
 162       mq->rings[i].shr = ring;
 163       offset = sizeof (*ring) + ring->nitems * ring->elsize;
 164       ring = (void *) ((u8 *) ring + offset);
 165     }
 166 }
 167
 168 void
 169 svm_msg_q_free (svm_msg_q_t * mq)
 170 {
 171   clib_mem_free (mq->q.shr);
 172   clib_mem_free (mq);
 173 }
 174
 175 static void
 176 svm_msg_q_send_signal (svm_msg_q_t *mq)
 177 {
 178   if (mq->q.evtfd == -1)
 179     {
 180       (void) pthread_cond_broadcast (&mq->q.shr->condvar);
 181     }
 182   else
 183     {
 184       int __clib_unused rv;
 185       u64 data = 1;
 186
 187       if (mq->q.evtfd < 0)
 188         return;
 189
 190       rv = write (mq->q.evtfd, &data, sizeof (data));
 191       if (PREDICT_FALSE (rv < 0))
 192         clib_unix_warning ("signal write on %d returned %d", mq->q.evtfd, rv);
 193     }
 194 }
 195
 196 svm_msg_q_msg_t
 197 svm_msg_q_alloc_msg_w_ring (svm_msg_q_t * mq, u32 ring_index)
 198 {
 199   svm_msg_q_ring_shared_t *sr;
 200   svm_msg_q_ring_t *ring;
 201   svm_msg_q_msg_t msg;
 202
 203   ring = svm_msg_q_ring_inline (mq, ring_index);
 204   sr = ring->shr;
 205
 206   ASSERT (sr->cursize < ring->nitems);
 207   msg.ring_index = ring - mq->rings;
 208   msg.elt_index = sr->tail;
 209   sr->tail = (sr->tail + 1) % ring->nitems;
 210   clib_atomic_fetch_add_rel (&sr->cursize, 1);
 211   return msg;
 212 }
 213
 214 int
 215 svm_msg_q_lock_and_alloc_msg_w_ring (svm_msg_q_t * mq, u32 ring_index,
 216                                      u8 noblock, svm_msg_q_msg_t * msg)
 217 {
 218   if (noblock)
 219     {
 220       if (svm_msg_q_try_lock (mq))
 221         return -1;
 222       if (PREDICT_FALSE (svm_msg_q_is_full (mq)
 223                          || svm_msg_q_ring_is_full (mq, ring_index)))
 224         {
 225           svm_msg_q_unlock (mq);
 226           return -2;
 227         }
 228       *msg = svm_msg_q_alloc_msg_w_ring (mq, ring_index);
 229     }
 230   else
 231     {
 232       svm_msg_q_lock (mq);
 233       while (svm_msg_q_is_full (mq)
 234              || svm_msg_q_ring_is_full (mq, ring_index))
 235         svm_msg_q_wait (mq);
 236       *msg = svm_msg_q_alloc_msg_w_ring (mq, ring_index);
 237     }
 238   return 0;
 239 }
 240
 241 svm_msg_q_msg_t
 242 svm_msg_q_alloc_msg (svm_msg_q_t * mq, u32 nbytes)
 243 {
 244   svm_msg_q_msg_t msg = {.as_u64 = ~0 };
 245   svm_msg_q_ring_shared_t *sr;
 246   svm_msg_q_ring_t *ring;
 247
 248   vec_foreach (ring, mq->rings)
 249   {
 250     sr = ring->shr;
 251     if (ring->elsize < nbytes || sr->cursize == ring->nitems)
 252       continue;
 253     msg.ring_index = ring - mq->rings;
 254     msg.elt_index = sr->tail;
 255     sr->tail = (sr->tail + 1) % ring->nitems;
 256     clib_atomic_fetch_add_rel (&sr->cursize, 1);
 257     break;
 258   }
 259   return msg;
 260 }
 261
 262 void *
 263 svm_msg_q_msg_data (svm_msg_q_t * mq, svm_msg_q_msg_t * msg)
 264 {
 265   svm_msg_q_ring_t *ring = svm_msg_q_ring_inline (mq, msg->ring_index);
 266   return svm_msg_q_ring_data (ring, msg->elt_index);
 267 }
 268
 269 void
 270 svm_msg_q_free_msg (svm_msg_q_t * mq, svm_msg_q_msg_t * msg)
 271 {
 272   svm_msg_q_ring_shared_t *sr;
 273   svm_msg_q_ring_t *ring;
 274   int need_signal;
 275
 276   ASSERT (vec_len (mq->rings) > msg->ring_index);
 277   ring = svm_msg_q_ring_inline (mq, msg->ring_index);
 278   sr = ring->shr;
 279   if (msg->elt_index == sr->head)
 280     {
 281       sr->head = (sr->head + 1) % ring->nitems;
 282     }
 283   else
 284     {
 285       clib_warning ("message out of order");
 286       /* for now, expect messages to be processed in order */
 287       ASSERT (0);
 288     }
 289
 290   need_signal = sr->cursize == ring->nitems;
 291   clib_atomic_fetch_sub_rel (&sr->cursize, 1);
 292
 293   if (PREDICT_FALSE (need_signal))
 294     svm_msg_q_send_signal (mq);
 295 }
 296
 297 static int
 298 svm_msq_q_msg_is_valid (svm_msg_q_t * mq, svm_msg_q_msg_t * msg)
 299 {
 300   u32 dist1, dist2, tail, head;
 301   svm_msg_q_ring_shared_t *sr;
 302   svm_msg_q_ring_t *ring;
 303
 304   if (vec_len (mq->rings) <= msg->ring_index)
 305     return 0;
 306
 307   ring = svm_msg_q_ring_inline (mq, msg->ring_index);
 308   sr = ring->shr;
 309   tail = sr->tail;
 310   head = sr->head;
 311
 312   dist1 = ((ring->nitems + msg->elt_index) - head) % ring->nitems;
 313   if (tail == head)
 314     dist2 = (sr->cursize == 0) ? 0 : ring->nitems;
 315   else
 316     dist2 = ((ring->nitems + tail) - head) % ring->nitems;
 317   return (dist1 < dist2);
 318 }
 319
 320 static void
 321 svm_msg_q_add_raw (svm_msg_q_t *mq, u8 *elem)
 322 {
 323   svm_msg_q_shared_queue_t *sq = mq->q.shr;
 324   i8 *tailp;
 325   u32 sz;
 326
 327   tailp = (i8 *) (&sq->data[0] + sq->elsize * sq->tail);
 328   clib_memcpy_fast (tailp, elem, sq->elsize);
 329
 330   sq->tail = (sq->tail + 1) % sq->maxsize;
 331
 332   sz = clib_atomic_fetch_add_rel (&sq->cursize, 1);
 333   if (!sz)
 334     svm_msg_q_send_signal (mq);
 335 }
 336
 337 int
 338 svm_msg_q_add (svm_msg_q_t * mq, svm_msg_q_msg_t * msg, int nowait)
 339 {
 340   ASSERT (svm_msq_q_msg_is_valid (mq, msg));
 341
 342   if (nowait)
 343     {
 344       /* zero on success */
 345       if (svm_msg_q_try_lock (mq))
 346         {
 347           return (-1);
 348         }
 349     }
 350   else
 351     svm_msg_q_lock (mq);
 352
 353   if (PREDICT_FALSE (svm_msg_q_is_full (mq)))
 354     {
 355       if (nowait)
 356         return (-2);
 357       while (svm_msg_q_is_full (mq))
 358         svm_msg_q_wait (mq);
 359     }
 360
 361   svm_msg_q_add_raw (mq, (u8 *) msg);
 362
 363   svm_msg_q_unlock (mq);
 364
 365   return 0;
 366 }
 367
 368 void
 369 svm_msg_q_add_and_unlock (svm_msg_q_t * mq, svm_msg_q_msg_t * msg)
 370 {
 371   ASSERT (svm_msq_q_msg_is_valid (mq, msg));
 372   svm_msg_q_add_raw (mq, (u8 *) msg);
 373   svm_msg_q_unlock (mq);
 374 }
 375
 376 static int
 377 svm_msg_q_sub_raw (svm_msg_q_t *mq, u8 *elem)
 378 {
 379   svm_msg_q_shared_queue_t *sq = mq->q.shr;
 380   i8 *headp;
 381   u32 sz;
 382
 383   ASSERT (!svm_msg_q_is_empty (mq));
 384
 385   headp = (i8 *) (&sq->data[0] + sq->elsize * sq->head);
 386   clib_memcpy_fast (elem, headp, sq->elsize);
 387
 388   sq->head = (sq->head + 1) % sq->maxsize;
 389
 390   sz = clib_atomic_fetch_sub_rel (&sq->cursize, 1);
 391   if (PREDICT_FALSE (sz == sq->maxsize))
 392     svm_msg_q_send_signal (mq);
 393
 394   return 0;
 395 }
 396
 397 int
 398 svm_msg_q_sub (svm_msg_q_t * mq, svm_msg_q_msg_t * msg,
 399                svm_q_conditional_wait_t cond, u32 time)
 400 {
 401   int rc = 0;
 402
 403   if (cond == SVM_Q_NOWAIT)
 404     {
 405       /* zero on success */
 406       if (svm_msg_q_try_lock (mq))
 407         {
 408           return (-1);
 409         }
 410     }
 411   else
 412     svm_msg_q_lock (mq);
 413
 414   if (PREDICT_FALSE (svm_msg_q_is_empty (mq)))
 415     {
 416       if (cond == SVM_Q_NOWAIT)
 417         {
 418           svm_msg_q_unlock (mq);
 419           return (-2);
 420         }
 421       else if (cond == SVM_Q_TIMEDWAIT)
 422         {
 423           while (svm_msg_q_is_empty (mq) && rc == 0)
 424             rc = svm_msg_q_timedwait (mq, time);
 425
 426           if (rc == ETIMEDOUT)
 427             {
 428               svm_msg_q_unlock (mq);
 429               return ETIMEDOUT;
 430             }
 431         }
 432       else
 433         {
 434           while (svm_msg_q_is_empty (mq))
 435             svm_msg_q_wait (mq);
 436         }
 437     }
 438
 439   svm_msg_q_sub_raw (mq, (u8 *) msg);
 440
 441   svm_msg_q_unlock (mq);
 442
 443   return 0;
 444 }
 445
 446 void
 447 svm_msg_q_sub_w_lock (svm_msg_q_t *mq, svm_msg_q_msg_t *msg)
 448 {
 449   svm_msg_q_sub_raw (mq, (u8 *) msg);
 450 }
 451
 452 void
 453 svm_msg_q_set_eventfd (svm_msg_q_t *mq, int fd)
 454 {
 455   mq->q.evtfd = fd;
 456 }
 457
 458 int
 459 svm_msg_q_alloc_eventfd (svm_msg_q_t *mq)
 460 {
 461   int fd;
 462   if ((fd = eventfd (0, EFD_NONBLOCK)) < 0)
 463     return -1;
 464   svm_msg_q_set_eventfd (mq, fd);
 465   return 0;
 466 }
 467
 468 void
 469 svm_msg_q_wait (svm_msg_q_t *mq)
 470 {
 471   if (mq->q.evtfd == -1)
 472     {
 473       pthread_cond_wait (&mq->q.shr->condvar, &mq->q.shr->mutex);
 474     }
 475   else
 476     {
 477       u64 buf;
 478       int rv;
 479
 480       svm_msg_q_unlock (mq);
 481       while ((rv = read (mq->q.evtfd, &buf, sizeof (buf))) < 0)
 482         {
 483           if (errno != EAGAIN)
 484             {
 485               clib_unix_warning ("read error");
 486               return;
 487             }
 488         }
 489       svm_msg_q_lock (mq);
 490     }
 491 }
 492
 493 int
 494 svm_msg_q_timedwait (svm_msg_q_t *mq, double timeout)
 495 {
 496   if (mq->q.evtfd == -1)
 497     {
 498       struct timespec ts;
 499       ts.tv_sec = unix_time_now () + (u32) timeout;
 500       ts.tv_nsec = (timeout - (u32) timeout) * 1e9;
 501       return pthread_cond_timedwait (&mq->q.shr->condvar, &mq->q.shr->mutex,
 502                                      &ts);
 503     }
 504   else
 505     {
 506       struct timeval tv;
 507       u64 buf;
 508       int rv;
 509
 510       tv.tv_sec = (u64) timeout;
 511       tv.tv_usec = ((u64) timeout - (u64) timeout) * 1e9;
 512       setsockopt (mq->q.evtfd, SOL_SOCKET, SO_RCVTIMEO, (const char *) &tv,
 513                   sizeof tv);
 514
 515       svm_msg_q_unlock (mq);
 516       rv = read (mq->q.evtfd, &buf, sizeof (buf));
 517       if (rv < 0)
 518         clib_warning ("read %u", errno);
 519       svm_msg_q_lock (mq);
 520
 521       return rv < 0 ? errno : 0;
 522     }
 523 }
 524
 525 u8 *
 526 format_svm_msg_q (u8 * s, va_list * args)
 527 {
 528   svm_msg_q_t *mq = va_arg (*args, svm_msg_q_t *);
 529   s = format (s, " [Q:%d/%d]", mq->q.shr->cursize, mq->q.shr->maxsize);
 530   for (u32 i = 0; i < vec_len (mq->rings); i++)
 531     {
 532       s = format (s, " [R%d:%d/%d]", i, mq->rings[i].shr->cursize,
 533                   mq->rings[i].nitems);
 534     }
 535   return s;
 536 }
 537
 538 /*
 539  * fd.io coding-style-patch-verification: ON
 540  *
 541  * Local Variables:
 542  * eval: (c-set-style "gnu")
 543  * End:
 544  */