2 * Copyright (c) 2018 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 #include <svm/message_queue.h>
17 #include <vppinfra/mem.h>
18 #include <vppinfra/format.h>
19 #include <vppinfra/time.h>
20 #include <sys/eventfd.h>
21 #include <sys/socket.h>
23 static inline svm_msg_q_ring_t *
24 svm_msg_q_ring_inline (svm_msg_q_t * mq, u32 ring_index)
26 return vec_elt_at_index (mq->rings, ring_index);
30 svm_msg_q_ring (svm_msg_q_t * mq, u32 ring_index)
32 return svm_msg_q_ring_inline (mq, ring_index);
36 svm_msg_q_ring_data (svm_msg_q_ring_t * ring, u32 elt_index)
38 ASSERT (elt_index < ring->nitems);
39 return (ring->shr->data + elt_index * ring->elsize);
43 svm_msg_q_init_mutex (svm_msg_q_shared_queue_t *sq)
45 pthread_mutexattr_t attr;
46 pthread_condattr_t cattr;
48 clib_memset (&attr, 0, sizeof (attr));
49 clib_memset (&cattr, 0, sizeof (cattr));
51 if (pthread_mutexattr_init (&attr))
52 clib_unix_warning ("mutexattr_init");
53 if (pthread_mutexattr_setpshared (&attr, PTHREAD_PROCESS_SHARED))
54 clib_unix_warning ("pthread_mutexattr_setpshared");
55 if (pthread_mutexattr_setrobust (&attr, PTHREAD_MUTEX_ROBUST))
56 clib_unix_warning ("setrobust");
57 if (pthread_mutex_init (&sq->mutex, &attr))
58 clib_unix_warning ("mutex_init");
59 if (pthread_mutexattr_destroy (&attr))
60 clib_unix_warning ("mutexattr_destroy");
61 if (pthread_condattr_init (&cattr))
62 clib_unix_warning ("condattr_init");
63 if (pthread_condattr_setpshared (&cattr, PTHREAD_PROCESS_SHARED))
64 clib_unix_warning ("condattr_setpshared");
65 if (pthread_cond_init (&sq->condvar, &cattr))
66 clib_unix_warning ("cond_init1");
67 if (pthread_condattr_destroy (&cattr))
68 clib_unix_warning ("cond_init2");
72 svm_msg_q_init (void *base, svm_msg_q_cfg_t *cfg)
74 svm_msg_q_ring_shared_t *ring;
75 svm_msg_q_shared_queue_t *sq;
76 svm_msg_q_shared_t *smq;
80 q_sz = sizeof (*sq) + cfg->q_nitems * sizeof (svm_msg_q_msg_t);
82 smq = (svm_msg_q_shared_t *) base;
84 clib_memset (sq, 0, sizeof (*sq));
85 sq->elsize = sizeof (svm_msg_q_msg_t);
86 sq->maxsize = cfg->q_nitems;
87 smq->n_rings = cfg->n_rings;
88 ring = (void *) ((u8 *) smq->q + q_sz);
89 for (i = 0; i < cfg->n_rings; i++)
91 ring->elsize = cfg->ring_cfgs[i].elsize;
92 ring->nitems = cfg->ring_cfgs[i].nitems;
93 ring->cursize = ring->head = ring->tail = 0;
94 offset = sizeof (*ring) + ring->nitems * ring->elsize;
95 ring = (void *) ((u8 *) ring + offset);
98 svm_msg_q_init_mutex (sq);
104 svm_msg_q_size_to_alloc (svm_msg_q_cfg_t *cfg)
106 svm_msg_q_ring_cfg_t *ring_cfg;
107 uword rings_sz = 0, mq_sz;
113 rings_sz = sizeof (svm_msg_q_ring_shared_t) * cfg->n_rings;
114 for (i = 0; i < cfg->n_rings; i++)
116 if (cfg->ring_cfgs[i].data)
118 ring_cfg = &cfg->ring_cfgs[i];
119 rings_sz += (uword) ring_cfg->nitems * ring_cfg->elsize;
122 q_sz = sizeof (svm_msg_q_shared_queue_t) +
123 cfg->q_nitems * sizeof (svm_msg_q_msg_t);
124 mq_sz = sizeof (svm_msg_q_shared_t) + q_sz + rings_sz;
130 svm_msg_q_alloc (svm_msg_q_cfg_t *cfg)
135 mq_sz = svm_msg_q_size_to_alloc (cfg);
136 base = clib_mem_alloc_aligned (mq_sz, CLIB_CACHE_LINE_BYTES);
140 return svm_msg_q_init (base, cfg);
144 svm_msg_q_attach (svm_msg_q_t *mq, void *smq_base)
146 svm_msg_q_ring_shared_t *ring;
147 svm_msg_q_shared_t *smq;
148 u32 i, n_rings, q_sz, offset;
150 smq = (svm_msg_q_shared_t *) smq_base;
153 n_rings = smq->n_rings;
154 vec_validate (mq->rings, n_rings - 1);
155 q_sz = sizeof (svm_msg_q_shared_queue_t) +
156 mq->q.shr->maxsize * sizeof (svm_msg_q_msg_t);
157 ring = (void *) ((u8 *) smq->q + q_sz);
158 for (i = 0; i < n_rings; i++)
160 mq->rings[i].nitems = ring->nitems;
161 mq->rings[i].elsize = ring->elsize;
162 mq->rings[i].shr = ring;
163 offset = sizeof (*ring) + ring->nitems * ring->elsize;
164 ring = (void *) ((u8 *) ring + offset);
166 clib_spinlock_init (&mq->q.lock);
170 svm_msg_q_cleanup (svm_msg_q_t *mq)
172 vec_free (mq->rings);
173 clib_spinlock_free (&mq->q.lock);
174 if (mq->q.evtfd != -1)
179 svm_msg_q_free (svm_msg_q_t * mq)
181 svm_msg_q_cleanup (mq);
182 clib_mem_free (mq->q.shr);
187 svm_msg_q_send_signal (svm_msg_q_t *mq, u8 is_consumer)
189 if (mq->q.evtfd == -1)
193 int rv = pthread_mutex_lock (&mq->q.shr->mutex);
194 if (PREDICT_FALSE (rv == EOWNERDEAD))
196 rv = pthread_mutex_consistent (&mq->q.shr->mutex);
201 (void) pthread_cond_broadcast (&mq->q.shr->condvar);
204 pthread_mutex_unlock (&mq->q.shr->mutex);
208 int __clib_unused rv;
214 rv = write (mq->q.evtfd, &data, sizeof (data));
215 if (PREDICT_FALSE (rv < 0))
216 clib_unix_warning ("signal write on %d returned %d", mq->q.evtfd, rv);
221 svm_msg_q_alloc_msg_w_ring (svm_msg_q_t * mq, u32 ring_index)
223 svm_msg_q_ring_shared_t *sr;
224 svm_msg_q_ring_t *ring;
227 ring = svm_msg_q_ring_inline (mq, ring_index);
230 ASSERT (sr->cursize < ring->nitems);
231 msg.ring_index = ring - mq->rings;
232 msg.elt_index = sr->tail;
233 sr->tail = (sr->tail + 1) % ring->nitems;
234 clib_atomic_fetch_add_rel (&sr->cursize, 1);
239 svm_msg_q_lock_and_alloc_msg_w_ring (svm_msg_q_t * mq, u32 ring_index,
240 u8 noblock, svm_msg_q_msg_t * msg)
244 if (svm_msg_q_try_lock (mq))
246 if (PREDICT_FALSE (svm_msg_q_or_ring_is_full (mq, ring_index)))
248 svm_msg_q_unlock (mq);
251 *msg = svm_msg_q_alloc_msg_w_ring (mq, ring_index);
256 while (svm_msg_q_or_ring_is_full (mq, ring_index))
257 svm_msg_q_or_ring_wait_prod (mq, ring_index);
258 *msg = svm_msg_q_alloc_msg_w_ring (mq, ring_index);
264 svm_msg_q_alloc_msg (svm_msg_q_t * mq, u32 nbytes)
266 svm_msg_q_msg_t msg = {.as_u64 = ~0 };
267 svm_msg_q_ring_shared_t *sr;
268 svm_msg_q_ring_t *ring;
270 vec_foreach (ring, mq->rings)
273 if (ring->elsize < nbytes || sr->cursize == ring->nitems)
275 msg.ring_index = ring - mq->rings;
276 msg.elt_index = sr->tail;
277 sr->tail = (sr->tail + 1) % ring->nitems;
278 clib_atomic_fetch_add_relax (&sr->cursize, 1);
285 svm_msg_q_msg_data (svm_msg_q_t * mq, svm_msg_q_msg_t * msg)
287 svm_msg_q_ring_t *ring = svm_msg_q_ring_inline (mq, msg->ring_index);
288 return svm_msg_q_ring_data (ring, msg->elt_index);
292 svm_msg_q_free_msg (svm_msg_q_t * mq, svm_msg_q_msg_t * msg)
294 svm_msg_q_ring_shared_t *sr;
295 svm_msg_q_ring_t *ring;
298 ASSERT (vec_len (mq->rings) > msg->ring_index);
299 ring = svm_msg_q_ring_inline (mq, msg->ring_index);
301 if (msg->elt_index == sr->head)
303 sr->head = (sr->head + 1) % ring->nitems;
307 clib_warning ("message out of order: elt %u head %u ring %u",
308 msg->elt_index, sr->head, msg->ring_index);
309 /* for now, expect messages to be processed in order */
313 need_signal = clib_atomic_load_relax_n (&sr->cursize) == ring->nitems;
314 clib_atomic_fetch_sub_relax (&sr->cursize, 1);
316 if (PREDICT_FALSE (need_signal))
317 svm_msg_q_send_signal (mq, 1 /* is consumer */);
321 svm_msq_q_msg_is_valid (svm_msg_q_t * mq, svm_msg_q_msg_t * msg)
323 u32 dist1, dist2, tail, head;
324 svm_msg_q_ring_shared_t *sr;
325 svm_msg_q_ring_t *ring;
327 if (vec_len (mq->rings) <= msg->ring_index)
330 ring = svm_msg_q_ring_inline (mq, msg->ring_index);
335 dist1 = ((ring->nitems + msg->elt_index) - head) % ring->nitems;
337 dist2 = (sr->cursize == 0) ? 0 : ring->nitems;
339 dist2 = ((ring->nitems + tail) - head) % ring->nitems;
340 return (dist1 < dist2);
344 svm_msg_q_add_raw (svm_msg_q_t *mq, svm_msg_q_msg_t *msg)
346 svm_msg_q_shared_queue_t *sq = mq->q.shr;
350 tailp = (i8 *) (&sq->data[0] + sq->elsize * sq->tail);
351 clib_memcpy_fast (tailp, msg, sq->elsize);
353 sq->tail = (sq->tail + 1) % sq->maxsize;
355 sz = clib_atomic_fetch_add_rel (&sq->cursize, 1);
357 svm_msg_q_send_signal (mq, 0 /* is consumer */);
361 svm_msg_q_add (svm_msg_q_t * mq, svm_msg_q_msg_t * msg, int nowait)
363 ASSERT (svm_msq_q_msg_is_valid (mq, msg));
367 /* zero on success */
368 if (svm_msg_q_try_lock (mq))
376 if (PREDICT_FALSE (svm_msg_q_is_full (mq)))
380 while (svm_msg_q_is_full (mq))
381 svm_msg_q_wait_prod (mq);
384 svm_msg_q_add_raw (mq, msg);
386 svm_msg_q_unlock (mq);
392 svm_msg_q_add_and_unlock (svm_msg_q_t * mq, svm_msg_q_msg_t * msg)
394 ASSERT (svm_msq_q_msg_is_valid (mq, msg));
395 svm_msg_q_add_raw (mq, msg);
396 svm_msg_q_unlock (mq);
400 svm_msg_q_sub_raw (svm_msg_q_t *mq, svm_msg_q_msg_t *elem)
402 svm_msg_q_shared_queue_t *sq = mq->q.shr;
406 ASSERT (!svm_msg_q_is_empty (mq));
408 headp = (i8 *) (&sq->data[0] + sq->elsize * sq->head);
409 clib_memcpy_fast (elem, headp, sq->elsize);
411 sq->head = (sq->head + 1) % sq->maxsize;
413 sz = clib_atomic_fetch_sub_relax (&sq->cursize, 1);
414 if (PREDICT_FALSE (sz == sq->maxsize))
415 svm_msg_q_send_signal (mq, 1 /* is consumer */);
421 svm_msg_q_sub_raw_batch (svm_msg_q_t *mq, svm_msg_q_msg_t *msg_buf, u32 n_msgs)
423 svm_msg_q_shared_queue_t *sq = mq->q.shr;
427 sz = svm_msg_q_size (mq);
429 to_deq = clib_min (sz, n_msgs);
431 headp = (i8 *) (&sq->data[0] + sq->elsize * sq->head);
433 if (sq->head + to_deq < sq->maxsize)
435 clib_memcpy_fast (msg_buf, headp, sq->elsize * to_deq);
440 u32 first_batch = sq->maxsize - sq->head;
441 clib_memcpy_fast (msg_buf, headp, sq->elsize * first_batch);
442 clib_memcpy_fast (msg_buf + first_batch, sq->data,
443 sq->elsize * (to_deq - first_batch));
444 sq->head = (sq->head + to_deq) % sq->maxsize;
447 clib_atomic_fetch_sub_relax (&sq->cursize, to_deq);
448 if (PREDICT_FALSE (sz == sq->maxsize))
449 svm_msg_q_send_signal (mq, 1 /* is consumer */);
455 svm_msg_q_sub (svm_msg_q_t *mq, svm_msg_q_msg_t *msg,
456 svm_q_conditional_wait_t cond, u32 time)
460 if (svm_msg_q_is_empty (mq))
462 if (cond == SVM_Q_NOWAIT)
466 else if (cond == SVM_Q_TIMEDWAIT)
468 if ((rc = svm_msg_q_timedwait (mq, time)))
473 svm_msg_q_wait (mq, SVM_MQ_WAIT_EMPTY);
477 svm_msg_q_sub_raw (mq, msg);
483 svm_msg_q_set_eventfd (svm_msg_q_t *mq, int fd)
489 svm_msg_q_alloc_eventfd (svm_msg_q_t *mq)
492 if ((fd = eventfd (0, 0)) < 0)
494 svm_msg_q_set_eventfd (mq, fd);
499 svm_msg_q_wait (svm_msg_q_t *mq, svm_msg_q_wait_type_t type)
501 u8 (*fn) (svm_msg_q_t *);
504 fn = (type == SVM_MQ_WAIT_EMPTY) ? svm_msg_q_is_empty : svm_msg_q_is_full;
506 if (mq->q.evtfd == -1)
508 rv = pthread_mutex_lock (&mq->q.shr->mutex);
509 if (PREDICT_FALSE (rv == EOWNERDEAD))
511 rv = pthread_mutex_consistent (&mq->q.shr->mutex);
516 pthread_cond_wait (&mq->q.shr->condvar, &mq->q.shr->mutex);
518 pthread_mutex_unlock (&mq->q.shr->mutex);
526 while ((rv = read (mq->q.evtfd, &buf, sizeof (buf))) < 0)
530 clib_unix_warning ("read error");
541 svm_msg_q_wait_prod (svm_msg_q_t *mq)
543 if (mq->q.evtfd == -1)
545 while (svm_msg_q_is_full (mq))
546 pthread_cond_wait (&mq->q.shr->condvar, &mq->q.shr->mutex);
553 while (svm_msg_q_is_full (mq))
555 while ((rv = read (mq->q.evtfd, &buf, sizeof (buf))) < 0)
559 clib_unix_warning ("read error");
570 svm_msg_q_or_ring_wait_prod (svm_msg_q_t *mq, u32 ring_index)
572 if (mq->q.evtfd == -1)
574 while (svm_msg_q_or_ring_is_full (mq, ring_index))
575 pthread_cond_wait (&mq->q.shr->condvar, &mq->q.shr->mutex);
582 while (svm_msg_q_or_ring_is_full (mq, ring_index))
584 while ((rv = read (mq->q.evtfd, &buf, sizeof (buf))) < 0)
588 clib_unix_warning ("read error");
599 svm_msg_q_timedwait (svm_msg_q_t *mq, double timeout)
601 if (mq->q.evtfd == -1)
603 svm_msg_q_shared_queue_t *sq = mq->q.shr;
608 rv = pthread_mutex_lock (&sq->mutex);
609 if (PREDICT_FALSE (rv == EOWNERDEAD))
611 rv = pthread_mutex_consistent (&sq->mutex);
615 /* check if we're still in a signalable state after grabbing lock */
616 sz = svm_msg_q_size (mq);
617 if (sz != 0 && sz != sq->maxsize)
619 pthread_mutex_unlock (&sq->mutex);
623 ts.tv_sec = unix_time_now () + (u32) timeout;
624 ts.tv_nsec = (timeout - (u32) timeout) * 1e9;
625 rv = pthread_cond_timedwait (&sq->condvar, &sq->mutex, &ts);
627 pthread_mutex_unlock (&sq->mutex);
636 tv.tv_sec = (u64) timeout;
637 tv.tv_usec = ((u64) timeout - (u64) timeout) * 1e9;
638 rv = setsockopt (mq->q.evtfd, SOL_SOCKET, SO_RCVTIMEO,
639 (const char *) &tv, sizeof tv);
642 clib_unix_warning ("setsockopt");
646 rv = read (mq->q.evtfd, &buf, sizeof (buf));
648 clib_warning ("read %u", errno);
650 return rv < 0 ? errno : 0;
655 format_svm_msg_q (u8 * s, va_list * args)
657 svm_msg_q_t *mq = va_arg (*args, svm_msg_q_t *);
658 s = format (s, " [Q:%d/%d]", mq->q.shr->cursize, mq->q.shr->maxsize);
659 for (u32 i = 0; i < vec_len (mq->rings); i++)
661 s = format (s, " [R%d:%d/%d]", i, mq->rings[i].shr->cursize,
662 mq->rings[i].nitems);
668 * fd.io coding-style-patch-verification: ON
671 * eval: (c-set-style "gnu")