*/
#include <svm/svm_fifo.h>
+#include <vppinfra/cpu.h>
-#define offset_lt(_a, _b) ((i32)((_a)-(_b)) < 0)
-#define offset_leq(_a, _b) ((i32)((_a)-(_b)) <= 0)
-#define offset_gt(_a, _b) ((i32)((_a)-(_b)) > 0)
-#define offset_geq(_a, _b) ((i32)((_a)-(_b)) >= 0)
+static inline u8
+position_lt (svm_fifo_t * f, u32 a, u32 b)
+{
+ return (ooo_segment_distance_from_tail (f, a)
+ < ooo_segment_distance_from_tail (f, b));
+}
+
+static inline u8
+position_leq (svm_fifo_t * f, u32 a, u32 b)
+{
+ return (ooo_segment_distance_from_tail (f, a)
+ <= ooo_segment_distance_from_tail (f, b));
+}
+
+static inline u8
+position_gt (svm_fifo_t * f, u32 a, u32 b)
+{
+ return (ooo_segment_distance_from_tail (f, a)
+ > ooo_segment_distance_from_tail (f, b));
+}
+
+static inline u32
+position_diff (svm_fifo_t * f, u32 posa, u32 posb)
+{
+ return ooo_segment_distance_from_tail (f, posa)
+ - ooo_segment_distance_from_tail (f, posb);
+}
+
+static inline u32
+ooo_segment_end_pos (svm_fifo_t * f, ooo_segment_t * s)
+{
+ return (s->start + s->length) % f->nitems;
+}
u8 *
format_ooo_segment (u8 * s, va_list * args)
s = format (s, "cursize %u nitems %u has_event %d\n",
f->cursize, f->nitems, f->has_event);
- s = format (s, "head %d tail %d\n", f->head, f->tail);
+ s = format (s, " head %d tail %d\n", f->head, f->tail);
if (verbose > 1)
s = format
- (s, "server session %d thread %d client session %d thread %d\n",
+ (s, " server session %d thread %d client session %d thread %d\n",
f->master_session_index, f->master_thread_index,
f->client_session_index, f->client_thread_index);
if (verbose)
{
- s = format (s, "ooo pool %d active elts\n",
+ s = format (s, " ooo pool %d active elts\n",
pool_elts (f->ooo_segments));
- s = format (s, "%U", format_ooo_list, f);
+ if (svm_fifo_has_ooo_data (f))
+ s = format (s, " %U", format_ooo_list, f);
}
return s;
}
if (f == 0)
return 0;
- memset (f, 0, sizeof (*f) + data_size_in_bytes);
+ memset (f, 0, sizeof (*f));
f->nitems = data_size_in_bytes;
f->ooos_list_head = OOO_SEGMENT_INVALID_INDEX;
ooo_segment_add (svm_fifo_t * f, u32 offset, u32 length)
{
ooo_segment_t *s, *new_s, *prev, *next, *it;
- u32 new_index, end_offset, s_sof, s_eof, s_index;
+ u32 new_index, s_end_pos, s_index;
+ u32 normalized_position, normalized_end_position;
+
+ normalized_position = (f->tail + offset) % f->nitems;
+ normalized_end_position = (f->tail + offset + length) % f->nitems;
- end_offset = offset + length;
+ f->ooos_newest = OOO_SEGMENT_INVALID_INDEX;
if (f->ooos_list_head == OOO_SEGMENT_INVALID_INDEX)
{
- s = ooo_segment_new (f, offset, length);
+ s = ooo_segment_new (f, normalized_position, length);
f->ooos_list_head = s - f->ooo_segments;
f->ooos_newest = f->ooos_list_head;
return;
/* Find first segment that starts after new segment */
s = pool_elt_at_index (f->ooo_segments, f->ooos_list_head);
while (s->next != OOO_SEGMENT_INVALID_INDEX
- && offset_leq (ooo_segment_offset (f, s), offset))
+ && position_lt (f, s->start, normalized_position))
s = pool_elt_at_index (f->ooo_segments, s->next);
/* If we have a previous and we overlap it, use it as starting point */
prev = ooo_segment_get_prev (f, s);
- if (prev && offset_leq (offset, ooo_segment_end_offset (f, prev)))
+ if (prev
+ && position_leq (f, normalized_position, ooo_segment_end_pos (f, prev)))
{
s = prev;
- prev = ooo_segment_get_prev (f, s);
- s_sof = ooo_segment_offset (f, s);
- s_eof = ooo_segment_end_offset (f, s);
- goto merge;
+ s_end_pos = ooo_segment_end_pos (f, s);
+
+ /* Check head and tail now since segment may be wider at both ends so
+ * merge tests lower won't work */
+ if (position_lt (f, normalized_position, s->start))
+ {
+ s->start = normalized_position;
+ s->length = position_diff (f, s_end_pos, s->start);
+ }
+ if (position_gt (f, normalized_end_position, s_end_pos))
+ {
+ s->length = position_diff (f, normalized_end_position, s->start);
+ }
+ goto check_tail;
}
s_index = s - f->ooo_segments;
- s_sof = ooo_segment_offset (f, s);
- s_eof = ooo_segment_end_offset (f, s);
+ s_end_pos = ooo_segment_end_pos (f, s);
/* No overlap, add before current segment */
- if (offset_lt (end_offset, s_sof))
+ if (position_lt (f, normalized_end_position, s->start))
{
- new_s = ooo_segment_new (f, offset, length);
+ new_s = ooo_segment_new (f, normalized_position, length);
new_index = new_s - f->ooo_segments;
/* Pool might've moved, get segment again */
f->ooos_list_head = new_index;
}
- new_s->next = s - f->ooo_segments;
+ new_s->next = s_index;
s->prev = new_index;
f->ooos_newest = new_index;
return;
}
/* No overlap, add after current segment */
- else if (offset_gt (offset, s_eof))
+ else if (position_gt (f, normalized_position, s_end_pos))
{
- new_s = ooo_segment_new (f, offset, length);
+ new_s = ooo_segment_new (f, normalized_position, length);
new_index = new_s - f->ooo_segments;
/* Pool might've moved, get segment again */
s = pool_elt_at_index (f->ooo_segments, s_index);
- if (s->next != OOO_SEGMENT_INVALID_INDEX)
- {
- new_s->next = s->next;
- next = pool_elt_at_index (f->ooo_segments, new_s->next);
- next->prev = new_index;
- }
+ ASSERT (s->next == OOO_SEGMENT_INVALID_INDEX);
- new_s->prev = s - f->ooo_segments;
+ new_s->prev = s_index;
s->next = new_index;
f->ooos_newest = new_index;
* Merge needed
*/
-merge:
-
/* Merge at head */
- if (offset_lt (offset, s_sof))
+ if (position_lt (f, normalized_position, s->start))
{
- s->start = offset;
- s->length = s_eof - ooo_segment_offset (f, s);
+ s->start = normalized_position;
+ s->length = position_diff (f, s_end_pos, s->start);
}
- /* Last but overlapping previous */
- else if (offset_gt (end_offset, s_eof))
+ /* Overlapping tail */
+ else if (position_gt (f, normalized_end_position, s_end_pos))
{
- s->length = end_offset - ooo_segment_offset (f, s);
+ s->length = position_diff (f, normalized_end_position, s->start);
}
/* New segment completely covered by current one */
else
{
/* Do Nothing */
+ s = 0;
goto done;
}
+check_tail:
/* The new segment's tail may cover multiple smaller ones */
- if (offset_geq (end_offset, s_eof))
+ if (position_gt (f, normalized_end_position, s_end_pos))
{
/* Remove the completely overlapped segments */
it = (s->next != OOO_SEGMENT_INVALID_INDEX) ?
pool_elt_at_index (f->ooo_segments, s->next) : 0;
- while (it && offset_leq (ooo_segment_end_offset (f, it), end_offset))
+ while (it && position_leq (f, ooo_segment_end_pos (f, it),
+ normalized_end_position))
{
next = (it->next != OOO_SEGMENT_INVALID_INDEX) ?
pool_elt_at_index (f->ooo_segments, it->next) : 0;
}
/* If partial overlap with last, merge */
- if (it && offset_leq (ooo_segment_offset (f, it), end_offset))
+ if (it && position_leq (f, it->start, normalized_end_position))
{
- s->length = ooo_segment_end_offset (f, it) -
- ooo_segment_offset (f, s);
+ s->length =
+ position_diff (f, ooo_segment_end_pos (f, it), s->start);
ooo_segment_del (f, it - f->ooo_segments);
}
}
done:
/* Most recently updated segment */
- f->ooos_newest = s - f->ooo_segments;
+ if (s)
+ f->ooos_newest = s - f->ooo_segments;
}
/**
ooo_segment_try_collect (svm_fifo_t * f, u32 n_bytes_enqueued)
{
ooo_segment_t *s;
- u32 index, bytes = 0, diff;
- u32 cursize, norm_start, nitems;
-
- /* current size has not yet been updated */
- cursize = svm_fifo_max_dequeue (f) + n_bytes_enqueued;
- nitems = f->nitems;
+ u32 index, bytes = 0;
+ i32 diff;
s = pool_elt_at_index (f->ooo_segments, f->ooos_list_head);
+ diff = ooo_segment_distance_to_tail (f, s->start);
- norm_start = s->start % nitems;
- diff = (f->nitems + (i32) (f->tail - norm_start)) % nitems;
+ ASSERT (diff != n_bytes_enqueued);
- if (diff > cursize)
+ if (diff > n_bytes_enqueued)
return 0;
/* If last tail update overlaps one/multiple ooo segments, remove them */
- while (0 < diff && diff < cursize)
+ while (0 <= diff && diff < n_bytes_enqueued)
{
index = s - f->ooo_segments;
/* Segment end is beyond the tail. Advance tail and remove segment */
- if (diff < s->length)
+ if (s->length > diff)
{
- f->tail += s->length - diff;
- f->tail %= f->nitems;
bytes = s->length - diff;
+ f->tail += bytes;
+ f->tail %= f->nitems;
ooo_segment_del (f, index);
break;
}
if (s->next != OOO_SEGMENT_INVALID_INDEX)
{
s = pool_elt_at_index (f->ooo_segments, s->next);
- norm_start = s->start % nitems;
- diff = (f->nitems + (i32) (f->tail - norm_start)) % nitems;
+ diff = ooo_segment_distance_to_tail (f, s->start);
ooo_segment_del (f, index);
}
/* End of search */
}
}
- /* If tail is adjacent to an ooo segment, 'consume' it */
- if (diff == 0)
- {
- bytes = ((nitems - cursize) >= s->length) ? s->length :
- nitems - cursize;
-
- f->tail += bytes;
- f->tail %= nitems;
-
- ooo_segment_del (f, s - f->ooo_segments);
- }
-
+ ASSERT (bytes >= 0 && bytes <= f->nitems);
return bytes;
}
/* read cursize, which can only increase while we're working */
cursize = svm_fifo_max_dequeue (f);
+ f->ooos_newest = OOO_SEGMENT_INVALID_INDEX;
if (PREDICT_FALSE (cursize == f->nitems))
return -2; /* fifo stuffed */
}
else
{
+ ASSERT (0);
+
/* Account for a zero-copy enqueue done elsewhere */
ASSERT (max_bytes <= (nitems - cursize));
f->tail += max_bytes;
total_copy_bytes += ooo_segment_try_collect (f, total_copy_bytes);
/* Atomically increase the queue length */
+ ASSERT (cursize + total_copy_bytes <= nitems);
__sync_fetch_and_add (&f->cursize, total_copy_bytes);
return (total_copy_bytes);
}
+#define SVM_ENQUEUE_CLONE_TEMPLATE(arch, fn, tgt) \
+ uword \
+ __attribute__ ((flatten)) \
+ __attribute__ ((target (tgt))) \
+ CLIB_CPU_OPTIMIZED \
+ fn ## _ ## arch ( svm_fifo_t * f, u32 max_bytes, u8 * copy_from_here) \
+ { return fn (f, max_bytes, copy_from_here);}
+
+static int
+svm_fifo_enqueue_nowait_ma (svm_fifo_t * f, u32 max_bytes,
+ u8 * copy_from_here)
+{
+ return svm_fifo_enqueue_internal (f, max_bytes, copy_from_here);
+}
+
+foreach_march_variant (SVM_ENQUEUE_CLONE_TEMPLATE,
+ svm_fifo_enqueue_nowait_ma);
+CLIB_MULTIARCH_SELECT_FN (svm_fifo_enqueue_nowait_ma);
+
int
svm_fifo_enqueue_nowait (svm_fifo_t * f, u32 max_bytes, u8 * copy_from_here)
{
- return svm_fifo_enqueue_internal (f, max_bytes, copy_from_here);
+#if CLIB_DEBUG > 0
+ return svm_fifo_enqueue_nowait_ma (f, max_bytes, copy_from_here);
+#else
+ static int (*fp) (svm_fifo_t *, u32, u8 *);
+
+ if (PREDICT_FALSE (fp == 0))
+ fp = (void *) svm_fifo_enqueue_nowait_ma_multiarch_select ();
+
+ return (*fp) (f, max_bytes, copy_from_here);
+#endif
}
/**
u8 * copy_from_here)
{
u32 total_copy_bytes, first_copy_bytes, second_copy_bytes;
- u32 cursize, nitems;
- u32 normalized_offset, offset_from_tail;
+ u32 cursize, nitems, normalized_offset;
+ u32 offset_from_tail;
+
+ f->ooos_newest = OOO_SEGMENT_INVALID_INDEX;
/* read cursize, which can only increase while we're working */
cursize = svm_fifo_max_dequeue (f);
nitems = f->nitems;
- normalized_offset = offset % nitems;
+
+ ASSERT (required_bytes < nitems);
+
+ normalized_offset = (f->tail + offset) % nitems;
/* Will this request fit? */
offset_from_tail = (nitems + normalized_offset - f->tail) % nitems;
}
else
{
+ ASSERT (0);
/* Account for a zero-copy dequeue done elsewhere */
ASSERT (max_bytes <= cursize);
f->head += max_bytes;
total_copy_bytes = max_bytes;
}
+ ASSERT (f->head <= nitems);
+ ASSERT (cursize >= total_copy_bytes);
__sync_fetch_and_sub (&f->cursize, total_copy_bytes);
return (total_copy_bytes);
}
-int
-svm_fifo_dequeue_nowait (svm_fifo_t * f, u32 max_bytes, u8 * copy_here)
+static int
+svm_fifo_dequeue_nowait_ma (svm_fifo_t * f, u32 max_bytes, u8 * copy_here)
{
return svm_fifo_dequeue_internal (f, max_bytes, copy_here);
}
+#define SVM_FIFO_DEQUEUE_CLONE_TEMPLATE(arch, fn, tgt) \
+ uword \
+ __attribute__ ((flatten)) \
+ __attribute__ ((target (tgt))) \
+ CLIB_CPU_OPTIMIZED \
+ fn ## _ ## arch ( svm_fifo_t * f, u32 max_bytes, \
+ u8 * copy_here) \
+ { return fn (f, max_bytes, copy_here);}
+
+foreach_march_variant (SVM_FIFO_DEQUEUE_CLONE_TEMPLATE,
+ svm_fifo_dequeue_nowait_ma);
+CLIB_MULTIARCH_SELECT_FN (svm_fifo_dequeue_nowait_ma);
+
int
-svm_fifo_peek (svm_fifo_t * f, u32 relative_offset, u32 max_bytes,
- u8 * copy_here)
+svm_fifo_dequeue_nowait (svm_fifo_t * f, u32 max_bytes, u8 * copy_here)
+{
+#if CLIB_DEBUG > 0
+ return svm_fifo_dequeue_nowait_ma (f, max_bytes, copy_here);
+#else
+ static int (*fp) (svm_fifo_t *, u32, u8 *);
+
+ if (PREDICT_FALSE (fp == 0))
+ fp = (void *) svm_fifo_dequeue_nowait_ma_multiarch_select ();
+
+ return (*fp) (f, max_bytes, copy_here);
+#endif
+}
+
+static int
+svm_fifo_peek_ma (svm_fifo_t * f, u32 relative_offset, u32 max_bytes,
+ u8 * copy_here)
{
u32 total_copy_bytes, first_copy_bytes, second_copy_bytes;
u32 cursize, nitems, real_head;
/* read cursize, which can only increase while we're working */
cursize = svm_fifo_max_dequeue (f);
- if (PREDICT_FALSE (cursize == 0))
+ if (PREDICT_FALSE (cursize < relative_offset))
return -2; /* nothing in the fifo */
nitems = f->nitems;
real_head = real_head >= nitems ? real_head - nitems : real_head;
/* Number of bytes we're going to copy */
- total_copy_bytes = (cursize < max_bytes) ? cursize : max_bytes;
+ total_copy_bytes = (cursize - relative_offset < max_bytes) ?
+ cursize - relative_offset : max_bytes;
if (PREDICT_TRUE (copy_here != 0))
{
return total_copy_bytes;
}
+#define SVM_FIFO_PEEK_CLONE_TEMPLATE(arch, fn, tgt) \
+ uword \
+ __attribute__ ((flatten)) \
+ __attribute__ ((target (tgt))) \
+ CLIB_CPU_OPTIMIZED \
+ fn ## _ ## arch ( svm_fifo_t * f, u32 relative_offset, u32 max_bytes, \
+ u8 * copy_here) \
+ { return fn (f, relative_offset, max_bytes, copy_here);}
+
+foreach_march_variant (SVM_FIFO_PEEK_CLONE_TEMPLATE, svm_fifo_peek_ma);
+CLIB_MULTIARCH_SELECT_FN (svm_fifo_peek_ma);
+
+int
+svm_fifo_peek (svm_fifo_t * f, u32 relative_offset, u32 max_bytes,
+ u8 * copy_here)
+{
+#if CLIB_DEBUG > 0
+ return svm_fifo_peek_ma (f, relative_offset, max_bytes, copy_here);
+#else
+ static int (*fp) (svm_fifo_t *, u32, u32, u8 *);
+
+ if (PREDICT_FALSE (fp == 0))
+ fp = (void *) svm_fifo_peek_ma_multiarch_select ();
+
+ return (*fp) (f, relative_offset, max_bytes, copy_here);
+#endif
+}
+
int
svm_fifo_dequeue_drop (svm_fifo_t * f, u32 max_bytes)
{
f->head = (f->head == nitems) ? 0 : f->head;
}
+ ASSERT (f->head <= nitems);
+ ASSERT (cursize >= total_drop_bytes);
__sync_fetch_and_sub (&f->cursize, total_drop_bytes);
return total_drop_bytes;