*/
#include <svm/svm_fifo.h>
+#include <vppinfra/cpu.h>
static inline u8
position_lt (svm_fifo_t * f, u32 a, u32 b)
{
- return (ooo_segment_distance_to_tail (f, a)
- < ooo_segment_distance_to_tail (f, b));
+ return (ooo_segment_distance_from_tail (f, a)
+ < ooo_segment_distance_from_tail (f, b));
}
static inline u8
position_leq (svm_fifo_t * f, u32 a, u32 b)
{
- return (ooo_segment_distance_to_tail (f, a)
- <= ooo_segment_distance_to_tail (f, b));
+ return (ooo_segment_distance_from_tail (f, a)
+ <= ooo_segment_distance_from_tail (f, b));
}
static inline u8
position_gt (svm_fifo_t * f, u32 a, u32 b)
{
- return (ooo_segment_distance_to_tail (f, a)
- > ooo_segment_distance_to_tail (f, b));
+ return (ooo_segment_distance_from_tail (f, a)
+ > ooo_segment_distance_from_tail (f, b));
}
static inline u32
position_diff (svm_fifo_t * f, u32 posa, u32 posb)
{
- return ooo_segment_distance_to_tail (f, posa)
- - ooo_segment_distance_to_tail (f, posb);
+ return ooo_segment_distance_from_tail (f, posa)
+ - ooo_segment_distance_from_tail (f, posb);
}
static inline u32
if (f == 0)
return 0;
- memset (f, 0, sizeof (*f) + data_size_in_bytes);
+ memset (f, 0, sizeof (*f));
f->nitems = data_size_in_bytes;
f->ooos_list_head = OOO_SEGMENT_INVALID_INDEX;
{
s = prev;
s_end_pos = ooo_segment_end_pos (f, s);
- goto merge;
+
+ /* Check head and tail now since segment may be wider at both ends so
+ * merge tests lower won't work */
+ if (position_lt (f, normalized_position, s->start))
+ {
+ s->start = normalized_position;
+ s->length = position_diff (f, s_end_pos, s->start);
+ }
+ if (position_gt (f, normalized_end_position, s_end_pos))
+ {
+ s->length = position_diff (f, normalized_end_position, s->start);
+ }
+ goto check_tail;
}
s_index = s - f->ooo_segments;
* Merge needed
*/
-merge:
-
/* Merge at head */
if (position_lt (f, normalized_position, s->start))
{
goto done;
}
+check_tail:
/* The new segment's tail may cover multiple smaller ones */
if (position_gt (f, normalized_end_position, s_end_pos))
{
/* If partial overlap with last, merge */
if (it && position_leq (f, it->start, normalized_end_position))
{
- s->length = ooo_segment_end_pos (f, it) - s->start;
+ s->length =
+ position_diff (f, ooo_segment_end_pos (f, it), s->start);
ooo_segment_del (f, it - f->ooo_segments);
}
}
i32 diff;
s = pool_elt_at_index (f->ooo_segments, f->ooos_list_head);
+ diff = ooo_segment_distance_to_tail (f, s->start);
- diff = (f->tail >= s->start) ?
- f->tail - s->start : f->nitems + f->tail - s->start;
+ ASSERT (diff != n_bytes_enqueued);
if (diff > n_bytes_enqueued)
return 0;
if (s->next != OOO_SEGMENT_INVALID_INDEX)
{
s = pool_elt_at_index (f->ooo_segments, s->next);
- diff = (f->tail >= s->start) ?
- f->tail - s->start : f->nitems + f->tail - s->start;
+ diff = ooo_segment_distance_to_tail (f, s->start);
ooo_segment_del (f, index);
}
/* End of search */
}
}
+ ASSERT (bytes >= 0 && bytes <= f->nitems);
return bytes;
}
}
else
{
+ ASSERT (0);
+
/* Account for a zero-copy enqueue done elsewhere */
ASSERT (max_bytes <= (nitems - cursize));
f->tail += max_bytes;
total_copy_bytes += ooo_segment_try_collect (f, total_copy_bytes);
/* Atomically increase the queue length */
+ ASSERT (cursize + total_copy_bytes <= nitems);
__sync_fetch_and_add (&f->cursize, total_copy_bytes);
return (total_copy_bytes);
}
+#define SVM_ENQUEUE_CLONE_TEMPLATE(arch, fn, tgt) \
+ uword \
+ __attribute__ ((flatten)) \
+ __attribute__ ((target (tgt))) \
+ CLIB_CPU_OPTIMIZED \
+ fn ## _ ## arch ( svm_fifo_t * f, u32 max_bytes, u8 * copy_from_here) \
+ { return fn (f, max_bytes, copy_from_here);}
+
+static int
+svm_fifo_enqueue_nowait_ma (svm_fifo_t * f, u32 max_bytes,
+ u8 * copy_from_here)
+{
+ return svm_fifo_enqueue_internal (f, max_bytes, copy_from_here);
+}
+
+foreach_march_variant (SVM_ENQUEUE_CLONE_TEMPLATE,
+ svm_fifo_enqueue_nowait_ma);
+CLIB_MULTIARCH_SELECT_FN (svm_fifo_enqueue_nowait_ma);
+
int
svm_fifo_enqueue_nowait (svm_fifo_t * f, u32 max_bytes, u8 * copy_from_here)
{
- return svm_fifo_enqueue_internal (f, max_bytes, copy_from_here);
+#if CLIB_DEBUG > 0
+ return svm_fifo_enqueue_nowait_ma (f, max_bytes, copy_from_here);
+#else
+ static int (*fp) (svm_fifo_t *, u32, u8 *);
+
+ if (PREDICT_FALSE (fp == 0))
+ fp = (void *) svm_fifo_enqueue_nowait_ma_multiarch_select ();
+
+ return (*fp) (f, max_bytes, copy_from_here);
+#endif
}
/**
cursize = svm_fifo_max_dequeue (f);
nitems = f->nitems;
+ ASSERT (required_bytes < nitems);
+
normalized_offset = (f->tail + offset) % nitems;
/* Will this request fit? */
}
else
{
+ ASSERT (0);
/* Account for a zero-copy dequeue done elsewhere */
ASSERT (max_bytes <= cursize);
f->head += max_bytes;
total_copy_bytes = max_bytes;
}
+ ASSERT (f->head <= nitems);
+ ASSERT (cursize >= total_copy_bytes);
__sync_fetch_and_sub (&f->cursize, total_copy_bytes);
return (total_copy_bytes);
}
-int
-svm_fifo_dequeue_nowait (svm_fifo_t * f, u32 max_bytes, u8 * copy_here)
+static int
+svm_fifo_dequeue_nowait_ma (svm_fifo_t * f, u32 max_bytes, u8 * copy_here)
{
return svm_fifo_dequeue_internal (f, max_bytes, copy_here);
}
+#define SVM_FIFO_DEQUEUE_CLONE_TEMPLATE(arch, fn, tgt) \
+ uword \
+ __attribute__ ((flatten)) \
+ __attribute__ ((target (tgt))) \
+ CLIB_CPU_OPTIMIZED \
+ fn ## _ ## arch ( svm_fifo_t * f, u32 max_bytes, \
+ u8 * copy_here) \
+ { return fn (f, max_bytes, copy_here);}
+
+foreach_march_variant (SVM_FIFO_DEQUEUE_CLONE_TEMPLATE,
+ svm_fifo_dequeue_nowait_ma);
+CLIB_MULTIARCH_SELECT_FN (svm_fifo_dequeue_nowait_ma);
+
int
-svm_fifo_peek (svm_fifo_t * f, u32 relative_offset, u32 max_bytes,
- u8 * copy_here)
+svm_fifo_dequeue_nowait (svm_fifo_t * f, u32 max_bytes, u8 * copy_here)
+{
+#if CLIB_DEBUG > 0
+ return svm_fifo_dequeue_nowait_ma (f, max_bytes, copy_here);
+#else
+ static int (*fp) (svm_fifo_t *, u32, u8 *);
+
+ if (PREDICT_FALSE (fp == 0))
+ fp = (void *) svm_fifo_dequeue_nowait_ma_multiarch_select ();
+
+ return (*fp) (f, max_bytes, copy_here);
+#endif
+}
+
+static int
+svm_fifo_peek_ma (svm_fifo_t * f, u32 relative_offset, u32 max_bytes,
+ u8 * copy_here)
{
u32 total_copy_bytes, first_copy_bytes, second_copy_bytes;
u32 cursize, nitems, real_head;
return total_copy_bytes;
}
+#define SVM_FIFO_PEEK_CLONE_TEMPLATE(arch, fn, tgt) \
+ uword \
+ __attribute__ ((flatten)) \
+ __attribute__ ((target (tgt))) \
+ CLIB_CPU_OPTIMIZED \
+ fn ## _ ## arch ( svm_fifo_t * f, u32 relative_offset, u32 max_bytes, \
+ u8 * copy_here) \
+ { return fn (f, relative_offset, max_bytes, copy_here);}
+
+foreach_march_variant (SVM_FIFO_PEEK_CLONE_TEMPLATE, svm_fifo_peek_ma);
+CLIB_MULTIARCH_SELECT_FN (svm_fifo_peek_ma);
+
+int
+svm_fifo_peek (svm_fifo_t * f, u32 relative_offset, u32 max_bytes,
+ u8 * copy_here)
+{
+#if CLIB_DEBUG > 0
+ return svm_fifo_peek_ma (f, relative_offset, max_bytes, copy_here);
+#else
+ static int (*fp) (svm_fifo_t *, u32, u32, u8 *);
+
+ if (PREDICT_FALSE (fp == 0))
+ fp = (void *) svm_fifo_peek_ma_multiarch_select ();
+
+ return (*fp) (f, relative_offset, max_bytes, copy_here);
+#endif
+}
+
int
svm_fifo_dequeue_drop (svm_fifo_t * f, u32 max_bytes)
{
f->head = (f->head == nitems) ? 0 : f->head;
}
+ ASSERT (f->head <= nitems);
+ ASSERT (cursize >= total_drop_bytes);
__sync_fetch_and_sub (&f->cursize, total_drop_bytes);
return total_drop_bytes;