svm: check if fifo free list index is valid on alloc
[vpp.git] / src / svm / fifo_segment.c
index 9c332d6..06b7f06 100644 (file)
 
 #include <svm/fifo_segment.h>
 
+static inline fifo_segment_slice_t *
+fsh_slice_get (fifo_segment_header_t * fsh, u32 slice_index)
+{
+  return &fsh->slices[slice_index];
+}
+
+static char *fifo_segment_mem_status_strings[] = {
+#define _(sym,str) str,
+  foreach_segment_mem_status
+#undef _
+};
+
+/**
+ * Fifo segment free space
+ *
+ * Queries the underlying memory manager, dlmalloc, for free space. Since this
+ * ends up walking the internal data structures, it should not be called
+ * indiscriminately.
+ *
+ * @param fs           fifo segment
+ * @return             number of free bytes
+ */
+static uword
+fsh_free_space (fifo_segment_header_t * fsh)
+{
+  struct dlmallinfo dlminfo;
+
+  dlminfo = mspace_mallinfo (fsh->ssvm_sh->heap);
+  return dlminfo.fordblks;
+}
+
+static inline void
+fsh_free_bytes_sub (fifo_segment_header_t * fsh, int size)
+{
+  clib_atomic_fetch_sub_rel (&fsh->n_free_bytes, size);
+}
+
+static inline uword
+fsh_n_free_bytes (fifo_segment_header_t * fsh)
+{
+  uword n_free = clib_atomic_load_relax_n (&fsh->n_free_bytes);
+  return n_free > fsh->n_reserved_bytes ? n_free - fsh->n_reserved_bytes : 0;
+}
+
+static inline void
+fsh_update_free_bytes (fifo_segment_header_t * fsh)
+{
+  clib_atomic_store_rel_n (&fsh->n_free_bytes, fsh_free_space (fsh));
+}
+
+static inline void
+fsh_cached_bytes_add (fifo_segment_header_t * fsh, int size)
+{
+  clib_atomic_fetch_add_rel (&fsh->n_cached_bytes, size);
+}
+
+static inline void
+fsh_cached_bytes_sub (fifo_segment_header_t * fsh, int size)
+{
+  clib_atomic_fetch_sub_rel (&fsh->n_cached_bytes, size);
+}
+
+static inline uword
+fsh_n_cached_bytes (fifo_segment_header_t * fsh)
+{
+  uword n_cached = clib_atomic_load_relax_n (&fsh->n_cached_bytes);
+  return n_cached;
+}
+
+static inline void
+fsh_active_fifos_update (fifo_segment_header_t * fsh, int inc)
+{
+  clib_atomic_fetch_add_rel (&fsh->n_active_fifos, inc);
+}
+
+static inline u32
+fsh_n_active_fifos (fifo_segment_header_t * fsh)
+{
+  return clib_atomic_load_relax_n (&fsh->n_active_fifos);
+}
+
+static inline uword
+fsh_virtual_mem (fifo_segment_header_t * fsh)
+{
+  fifo_segment_slice_t *fss;
+  uword total_vm = 0;
+  int i;
+
+  for (i = 0; i < fsh->n_slices; i++)
+    {
+      fss = fsh_slice_get (fsh, i);
+      total_vm += clib_atomic_load_relax_n (&fss->virtual_mem);
+    }
+  return total_vm;
+}
+
+void
+fsh_virtual_mem_update (fifo_segment_header_t * fsh, u32 slice_index,
+                       int n_bytes)
+{
+  fifo_segment_slice_t *fss = fsh_slice_get (fsh, slice_index);
+  fss->virtual_mem += n_bytes;
+}
+
+static void
+fsh_check_mem (fifo_segment_header_t * fsh)
+{
+  uword thresh;
+
+  if (fsh->flags & FIFO_SEGMENT_F_MEM_LIMIT)
+    return;
+
+  thresh = clib_max (0.01 * fsh->ssvm_sh->ssvm_size,
+                    2 * fsh->n_reserved_bytes);
+  if (fsh->n_free_bytes > thresh)
+    return;
+
+  fsh->flags |= FIFO_SEGMENT_F_MEM_LIMIT;
+  fsh_update_free_bytes (fsh);
+}
+
 /**
  * Initialize fifo segment shared header
  */
@@ -22,73 +143,54 @@ int
 fifo_segment_init (fifo_segment_t * fs)
 {
   fifo_segment_header_t *fsh;
+  fifo_segment_slice_t *fss;
   ssvm_shared_header_t *sh;
+  u32 max_chunk_sz;
+  uword max_fifo;
   void *oldheap;
+  int i;
 
   sh = fs->ssvm.sh;
   oldheap = ssvm_push_heap (sh);
 
-  fsh = clib_mem_alloc (sizeof (*fsh));
+  /*
+   * Manually align the fifo segment header to sizeof(uword) = 8 bytes.
+   * Long story made short: the "process-private" fifo segment
+   * is allocated from the main heap, not mmapped. dlmalloc
+   * only guarantees 4-byte alignment, and on aarch64
+   * the fsh can end up 4-byte but not 8-byte aligned.
+   * That eventually causes the atomic op in fifo_segment_update_free_bytes
+   * to backfire.
+   */
+  fsh = clib_mem_alloc_aligned (sizeof (*fsh), sizeof (uword));
   clib_memset (fsh, 0, sizeof (*fsh));
   fs->h = sh->opaque[0] = fsh;
+  fs->n_slices = clib_max (fs->n_slices, 1);
 
-  ssvm_pop_heap (oldheap);
-
-  sh->ready = 1;
-  return (0);
-}
-
-/**
- * Create a fifo segment in process-private memory
- */
-static int
-fifo_segment_create_process_private (fifo_segment_main_t * sm,
-                                    fifo_segment_create_args_t * a)
-{
-  u32 pagesize = clib_mem_get_page_size ();
-  ssvm_shared_header_t *sh;
-  fifo_segment_t *s;
-  u32 rnd_size = 0;
-  u8 *heap;
-
-  pool_get (sm->segments, s);
-  clib_memset (s, 0, sizeof (*s));
+  fsh->ssvm_sh = fs->ssvm.sh;
+  fsh->n_slices = fs->n_slices;
+  max_fifo = clib_min ((fsh_free_space (fsh) - 4096) / 2,
+                      FIFO_SEGMENT_MAX_FIFO_SIZE);
+  fsh->max_log2_chunk_size = max_log2 (max_fifo);
 
-  rnd_size = (a->segment_size + (pagesize - 1)) & ~pagesize;
+  fsh->slices = clib_mem_alloc (sizeof (*fss) * fs->n_slices);
+  clib_memset (fsh->slices, 0, sizeof (*fss) * fs->n_slices);
+  max_chunk_sz = fsh->max_log2_chunk_size - FIFO_SEGMENT_MIN_LOG2_FIFO_SIZE;
 
-#if USE_DLMALLOC == 0
-  heap = mheap_alloc (0, rnd_size);
-  if (heap == 0)
+  for (i = 0; i < fs->n_slices; i++)
     {
-      clib_unix_warning ("mheap alloc");
-      pool_put (sm->segments, s);
-      return -1;
+      fss = fsh_slice_get (fsh, i);
+      vec_validate_init_empty (fss->free_chunks, max_chunk_sz, 0);
+      vec_validate_init_empty (fss->num_chunks, max_chunk_sz, 0);
+      clib_spinlock_init (&fss->chunk_lock);
     }
-  {
-    mheap_t *heap_header;
-    heap_header = mheap_header (heap);
-    heap_header->flags |= MHEAP_FLAG_THREAD_SAFE;
-  }
-#else
-  heap = create_mspace (rnd_size, 1 /* locked */ );
-#endif
 
-  s->ssvm.ssvm_size = rnd_size;
-  s->ssvm.i_am_master = 1;
-  s->ssvm.my_pid = getpid ();
-  s->ssvm.name = format (0, "%s%c", a->segment_name, 0);
-  s->ssvm.requested_va = ~0;
-
-  /* Allocate a [sic] shared memory header, in process memory... */
-  sh = clib_mem_alloc_aligned (sizeof (*sh), CLIB_CACHE_LINE_BYTES);
-  s->ssvm.sh = sh;
-
-  clib_memset (sh, 0, sizeof (*sh));
-  sh->heap = heap;
-
-  fifo_segment_init (s);
-  vec_add1 (a->new_segment_indices, s - sm->segments);
+  ssvm_pop_heap (oldheap);
 
+  fsh->n_free_bytes = fsh_free_space (fsh);
+  fsh->n_cached_bytes = 0;
+  fsh->n_reserved_bytes = clib_min (0.01 * fsh->n_free_bytes, 256 << 10);
+  sh->ready = 1;
   return (0);
 }
 
@@ -98,33 +200,31 @@ fifo_segment_create_process_private (fifo_segment_main_t * sm,
 int
 fifo_segment_create (fifo_segment_main_t * sm, fifo_segment_create_args_t * a)
 {
-  fifo_segment_t *s;
+  fifo_segment_t *fs;
+  uword baseva;
   int rv;
 
-  if (a->segment_type == SSVM_SEGMENT_PRIVATE)
-    return fifo_segment_create_process_private (sm, a);
-
   /* Allocate a fresh segment */
-  pool_get (sm->segments, s);
-  clib_memset (s, 0, sizeof (*s));
+  pool_get_zero (sm->segments, fs);
 
-  s->ssvm.ssvm_size = a->segment_size;
-  s->ssvm.i_am_master = 1;
-  s->ssvm.my_pid = getpid ();
-  s->ssvm.name = format (0, "%s%c", a->segment_name, 0);
-  s->ssvm.requested_va = sm->next_baseva;
+  baseva = a->segment_type == SSVM_SEGMENT_PRIVATE ? ~0ULL : sm->next_baseva;
+  fs->ssvm.ssvm_size = a->segment_size;
+  fs->ssvm.i_am_master = 1;
+  fs->ssvm.my_pid = getpid ();
+  fs->ssvm.name = format (0, "%s%c", a->segment_name, 0);
+  fs->ssvm.requested_va = baseva;
 
-  if ((rv = ssvm_master_init (&s->ssvm, a->segment_type)))
+  if ((rv = ssvm_master_init (&fs->ssvm, a->segment_type)))
     {
-      pool_put (sm->segments, s);
+      pool_put (sm->segments, fs);
       return (rv);
     }
 
   /* Note: requested_va updated due to seg base addr randomization */
-  sm->next_baseva = s->ssvm.sh->ssvm_va + a->segment_size;
+  sm->next_baseva = fs->ssvm.sh->ssvm_va + fs->ssvm.ssvm_size;
 
-  fifo_segment_init (s);
-  vec_add1 (a->new_segment_indices, s - sm->segments);
+  fifo_segment_init (fs);
+  vec_add1 (a->new_segment_indices, fs - sm->segments);
   return (0);
 }
 
@@ -134,30 +234,30 @@ fifo_segment_create (fifo_segment_main_t * sm, fifo_segment_create_args_t * a)
 int
 fifo_segment_attach (fifo_segment_main_t * sm, fifo_segment_create_args_t * a)
 {
-  fifo_segment_t *s;
+  fifo_segment_t *fs;
   int rv;
 
-  pool_get_zero (sm->segments, s);
+  pool_get_zero (sm->segments, fs);
 
-  s->ssvm.ssvm_size = a->segment_size;
-  s->ssvm.my_pid = getpid ();
-  s->ssvm.name = format (0, "%s%c", a->segment_name, 0);
-  s->ssvm.requested_va = sm->next_baseva;
+  fs->ssvm.ssvm_size = a->segment_size;
+  fs->ssvm.my_pid = getpid ();
+  fs->ssvm.name = format (0, "%s%c", a->segment_name, 0);
+  fs->ssvm.requested_va = sm->next_baseva;
   if (a->segment_type == SSVM_SEGMENT_MEMFD)
-    s->ssvm.fd = a->memfd_fd;
+    fs->ssvm.fd = a->memfd_fd;
   else
-    s->ssvm.attach_timeout = sm->timeout_in_seconds;
+    fs->ssvm.attach_timeout = sm->timeout_in_seconds;
 
-  if ((rv = ssvm_slave_init (&s->ssvm, a->segment_type)))
+  if ((rv = ssvm_slave_init (&fs->ssvm, a->segment_type)))
     {
-      _vec_len (s) = vec_len (s) - 1;
+      _vec_len (fs) = vec_len (fs) - 1;
       return (rv);
     }
 
   /* Fish the segment header */
-  s->h = s->ssvm.sh->opaque[0];
+  fs->h = fs->ssvm.sh->opaque[0];
 
-  vec_add1 (a->new_segment_indices, s - sm->segments);
+  vec_add1 (a->new_segment_indices, fs - sm->segments);
   return (0);
 }
 
@@ -175,12 +275,6 @@ fifo_segment_index (fifo_segment_main_t * sm, fifo_segment_t * s)
   return s - sm->segments;
 }
 
-void *
-svm_fifo_segment_heap (fifo_segment_t * seg)
-{
-  return seg->ssvm.sh->heap;
-}
-
 fifo_segment_t *
 fifo_segment_get_segment (fifo_segment_main_t * sm, u32 segment_index)
 {
@@ -190,23 +284,8 @@ fifo_segment_get_segment (fifo_segment_main_t * sm, u32 segment_index)
 void
 fifo_segment_info (fifo_segment_t * seg, char **address, size_t * size)
 {
-  if (ssvm_type (&seg->ssvm) == SSVM_SEGMENT_PRIVATE)
-    {
-#if USE_DLMALLOC == 0
-      mheap_t *heap_header;
-
-      *address = pointer_to_uword (seg->ssvm.sh->heap);
-      heap_header = mheap_header (seg->ssvm.sh->heap);
-      *size = heap_header->max_size;
-#else
-      mspace_get_address_and_size (seg->ssvm.sh->heap, address, size);
-#endif
-    }
-  else
-    {
-      *address = (char *) seg->ssvm.sh->ssvm_va;
-      *size = seg->ssvm.ssvm_size;
-    }
+  *address = (char *) seg->ssvm.sh->ssvm_va;
+  *size = seg->ssvm.ssvm_size;
 }
 
 void
@@ -217,405 +296,851 @@ fifo_segment_main_init (fifo_segment_main_t * sm, u64 baseva,
   sm->timeout_in_seconds = timeout_in_seconds;
 }
 
-static void
-fifo_init_for_segment (fifo_segment_header_t * fsh, svm_fifo_t * f,
-                      u32 size, u32 fl_index)
+static inline u32
+fs_freelist_for_size (u32 size)
 {
-  f->freelist_index = fl_index;
-  f->default_chunk.start_byte = 0;
-  f->default_chunk.length = size;
-  f->default_chunk.next = f->start_chunk = f->end_chunk = &f->default_chunk;
-  f->head_chunk = f->tail_chunk = f->ooo_enq = f->ooo_deq = f->start_chunk;
-  f->next = fsh->free_fifos[fl_index];
-  fsh->free_fifos[fl_index] = f;
+  if (PREDICT_FALSE (size < FIFO_SEGMENT_MIN_FIFO_SIZE))
+    return 0;
+  return max_log2 (size) - FIFO_SEGMENT_MIN_LOG2_FIFO_SIZE;
 }
 
-static inline int
-fs_free_list_for_size (u32 size)
+static inline u32
+fs_freelist_index_to_size (u32 fl_index)
 {
-  return max_log2 (size) - max_log2 (FIFO_SEGMENT_MIN_FIFO_SIZE);
+  return 1 << (fl_index + FIFO_SEGMENT_MIN_LOG2_FIFO_SIZE);
 }
 
 static inline int
-fs_chunk_size_is_valid (u32 size)
+fs_chunk_size_is_valid (fifo_segment_header_t * fsh, u32 size)
 {
   /*
    * 4K minimum. It's not likely that anything good will happen
    * with a smaller FIFO.
    */
   return size >= FIFO_SEGMENT_MIN_FIFO_SIZE
-    && size <= FIFO_SEGMENT_MAX_FIFO_SIZE;
+    && size <= (1ULL << fsh->max_log2_chunk_size);
 }
 
-static void
-allocate_new_fifo_batch (fifo_segment_header_t * fsh,
-                        u32 data_size_in_bytes, int chunk_size)
+static svm_fifo_t *
+fs_try_alloc_fifo_freelist (fifo_segment_slice_t * fss, u32 fl_index)
 {
-  u32 size, rounded_data_size;
-  int i, fl_index;
-  u8 *fifos_mem;
+  svm_fifo_chunk_t *c;
   svm_fifo_t *f;
 
-  rounded_data_size = (1 << (max_log2 (data_size_in_bytes)));
-  fl_index = fs_free_list_for_size (data_size_in_bytes);
+  f = fss->free_fifos;
+  c = fss->free_chunks[fl_index];
 
-  /* Calculate space requirement $$$ round-up data_size_in_bytes */
-  size = (sizeof (*f) + rounded_data_size) * chunk_size;
-
-  /* Allocate fifo space. May fail. */
-  fifos_mem = clib_mem_alloc_aligned_at_offset (size, CLIB_CACHE_LINE_BYTES,
-                                               0 /* align_offset */ ,
-                                               0 /* os_out_of_memory */ );
+  if (!f || !c)
+    return 0;
 
-  /* Out of space.. */
-  if (fifos_mem == 0)
-    return;
+  fss->free_fifos = f->next;
+  fss->free_chunks[fl_index] = c->next;
+  c->next = 0;
+  c->start_byte = 0;
+  memset (f, 0, sizeof (*f));
+  f->start_chunk = c;
+  f->end_chunk = c;
 
-  /* Carve fifo space */
-  for (i = 0; i < chunk_size; i++)
-    {
-      f = (svm_fifo_t *) fifos_mem;
-      fifo_init_for_segment (fsh, f, rounded_data_size, fl_index);
-      fifos_mem += sizeof (*f) + rounded_data_size;
-    }
+  fss->n_fl_chunk_bytes -= fs_freelist_index_to_size (fl_index);
+  return f;
 }
 
-/**
- * Allocate fifo in fifo segment
- */
-svm_fifo_t *
-fifo_segment_alloc_fifo (fifo_segment_t * fs, u32 data_bytes,
-                        fifo_segment_ftype_t ftype)
+svm_fifo_chunk_t *
+fs_try_alloc_multi_chunk (fifo_segment_header_t * fsh,
+                         fifo_segment_slice_t * fss, u32 data_bytes)
 {
-  fifo_segment_header_t *fsh;
-  ssvm_shared_header_t *sh;
-  svm_fifo_t *f = 0;
-  void *oldheap;
-  int fl_index;
+  u32 fl_index, fl_size, n_alloc = 0, req_bytes = data_bytes;
+  svm_fifo_chunk_t *c, *first = 0, *next;
+
+  fl_index = fs_freelist_for_size (req_bytes);
+  if (fl_index > 0)
+    fl_index -= 1;
 
-  if (!fs_chunk_size_is_valid (data_bytes))
+  fl_size = fs_freelist_index_to_size (fl_index);
+
+  while (req_bytes)
     {
-      clib_warning ("fifo size out of range %d", data_bytes);
-      return 0;
+      c = fss->free_chunks[fl_index];
+      if (c)
+       {
+         fss->free_chunks[fl_index] = c->next;
+         c->next = first;
+         first = c;
+         n_alloc += fl_size;
+         req_bytes -= clib_min (fl_size, req_bytes);
+       }
+      else
+       {
+         /* Failed to allocate with smaller chunks */
+         if (fl_index == 0)
+           {
+             /* free all chunks if any allocated */
+             c = first;
+             while (c)
+               {
+                 fl_index = fs_freelist_for_size (c->length);
+                 fl_size = fs_freelist_index_to_size (fl_index);
+                 next = c->next;
+                 c->next = fss->free_chunks[fl_index];
+                 fss->free_chunks[fl_index] = c;
+                 fss->n_fl_chunk_bytes += fl_size;
+                 c = next;
+               }
+             n_alloc = 0;
+             first = 0;
+             fl_index = fs_freelist_for_size (data_bytes);
+             if (fss->free_chunks[fl_index + 1])
+               {
+                 fl_index += 1;
+                 fl_size = fs_freelist_index_to_size (fl_index);
+                 continue;
+               }
+
+             return 0;
+           }
+         fl_index -= 1;
+         fl_size = fl_size >> 1;
+       }
     }
 
-  fl_index = fs_free_list_for_size (data_bytes);
-
-  sh = fs->ssvm.sh;
-  ssvm_lock_non_recursive (sh, 1);
+  fss->n_fl_chunk_bytes -= n_alloc;
+  fsh_cached_bytes_sub (fsh, n_alloc);
+  return first;
+}
 
-  fsh = fs->h;
-  vec_validate_init_empty (fsh->free_fifos, fl_index, 0);
-  f = fsh->free_fifos[fl_index];
+static svm_fifo_t *
+fs_try_alloc_fifo_freelist_multi_chunk (fifo_segment_header_t * fsh,
+                                       fifo_segment_slice_t * fss,
+                                       u32 data_bytes)
+{
+  svm_fifo_chunk_t *c, *first = 0, *last = 0, *next;
+  u32 fl_index, fl_size, n_alloc = 0;
+  svm_fifo_t *f;
 
-  /* Try to allocate batch of fifos */
-  if (PREDICT_FALSE (!f))
+  f = fss->free_fifos;
+  if (!f)
     {
-      oldheap = ssvm_push_heap (sh);
-      allocate_new_fifo_batch (fsh, data_bytes,
-                              FIFO_SEGMENT_ALLOC_BATCH_SIZE);
+      void *oldheap = ssvm_push_heap (fsh->ssvm_sh);
+      f = clib_mem_alloc_aligned (sizeof (*f), CLIB_CACHE_LINE_BYTES);
       ssvm_pop_heap (oldheap);
-      f = fsh->free_fifos[fl_index];
+      if (!f)
+       return 0;
+      memset (f, 0, sizeof (*f));
+      fsh_free_bytes_sub (fsh, sizeof (*f));
     }
-  if (PREDICT_TRUE (f != 0))
+  else
     {
-      fsh->free_fifos[fl_index] = f->next;
-      /* (re)initialize the fifo, as in svm_fifo_create */
-      memset (f, 0, sizeof (*f));
-      svm_fifo_init (f, data_bytes);
-      goto found;
+      fss->free_fifos = f->next;
     }
 
-  /* Failed to allocate batch, try just one fifo. This can also fail,
-   * in which case, create another segment */
-  oldheap = ssvm_push_heap (sh);
-  f = svm_fifo_create (data_bytes);
-  ssvm_pop_heap (oldheap);
-
-  if (PREDICT_FALSE (f == 0))
-    goto done;
+  fl_index = fs_freelist_for_size (data_bytes);
+  if (fl_index > 0)
+    fl_index -= 1;
 
-  f->freelist_index = fl_index;
+  fl_size = fs_freelist_index_to_size (fl_index);
 
-found:
-  /* If rx fifo type add to active fifos list. When cleaning up segment,
-   * we need a list of active sessions that should be disconnected. Since
-   * both rx and tx fifos keep pointers to the session, it's enough to track
-   * only one. */
-  if (ftype == FIFO_SEGMENT_RX_FIFO)
+  while (data_bytes)
     {
-      if (fsh->fifos)
+      c = fss->free_chunks[fl_index];
+      if (c)
        {
-         fsh->fifos->prev = f;
-         f->next = fsh->fifos;
+         fss->free_chunks[fl_index] = c->next;
+         if (!last)
+           last = c;
+         c->next = first;
+         first = c;
+         n_alloc += fl_size;
+         data_bytes -= clib_min (fl_size, data_bytes);
+       }
+      else
+       {
+         /* Failed to allocate with smaller chunks */
+         if (fl_index == 0)
+           {
+             /* free all chunks if any allocated */
+             c = first;
+             while (c)
+               {
+                 fl_index = fs_freelist_for_size (c->length);
+                 fl_size = fs_freelist_index_to_size (fl_index);
+                 next = c->next;
+                 c->next = fss->free_chunks[fl_index];
+                 fss->free_chunks[fl_index] = c;
+                 fss->n_fl_chunk_bytes += fl_size;
+                 n_alloc -= fl_size;
+                 data_bytes += fl_size;
+                 c = next;
+               }
+             first = last = 0;
+             fl_index = fs_freelist_for_size (data_bytes);
+             if (fss->free_chunks[fl_index + 1])
+               {
+                 fl_index += 1;
+                 fl_size = fs_freelist_index_to_size (fl_index);
+                 continue;
+               }
+
+             f->next = fss->free_fifos;
+             fss->free_fifos = f;
+             return 0;
+           }
+         fl_index -= 1;
+         fl_size = fl_size >> 1;
        }
-      fsh->fifos = f;
-      f->flags |= SVM_FIFO_F_LL_TRACKED;
     }
-  fsh->n_active_fifos++;
 
-done:
-  ssvm_unlock_non_recursive (sh);
-  return (f);
+  f->start_chunk = first;
+  f->end_chunk = last;
+  fss->n_fl_chunk_bytes -= n_alloc;
+  fsh_cached_bytes_sub (fsh, n_alloc);
+  return f;
 }
 
-/**
- * Free fifo allocated in fifo segment
- */
-void
-fifo_segment_free_fifo (fifo_segment_t * fs, svm_fifo_t * f)
+static int
+fsh_try_alloc_chunk_batch (fifo_segment_header_t * fsh,
+                          fifo_segment_slice_t * fss,
+                          u32 fl_index, u32 batch_size)
 {
-  fifo_segment_header_t *fsh;
-  ssvm_shared_header_t *sh;
-  int fl_index;
-
-  ASSERT (f->refcnt > 0);
-
-  if (--f->refcnt > 0)
-    return;
+  u32 rounded_data_size;
+  svm_fifo_chunk_t *c;
+  void *oldheap;
+  uword size;
+  u8 *cmem;
+  int i;
 
-  sh = fs->ssvm.sh;
-  fsh = fs->h;
-  fl_index = f->freelist_index;
+  rounded_data_size = fs_freelist_index_to_size (fl_index);
+  size = (uword) (sizeof (*c) + rounded_data_size) * batch_size;
 
-  ASSERT (fl_index < vec_len (fsh->free_fifos));
+  oldheap = ssvm_push_heap (fsh->ssvm_sh);
+  cmem = clib_mem_alloc_aligned_at_offset (size, CLIB_CACHE_LINE_BYTES,
+                                          0 /* align_offset */ ,
+                                          0 /* os_out_of_memory */ );
+  ssvm_pop_heap (oldheap);
 
-  ssvm_lock_non_recursive (sh, 2);
+  /* Out of space.. */
+  if (cmem == 0)
+    return -1;
 
-  /* Remove from active list. Only rx fifos are tracked */
-  if (f->flags & SVM_FIFO_F_LL_TRACKED)
+  /* Carve fifo + chunk space */
+  for (i = 0; i < batch_size; i++)
     {
-      if (f->prev)
-       f->prev->next = f->next;
-      else
-       fsh->fifos = f->next;
-      if (f->next)
-       f->next->prev = f->prev;
-      f->flags &= ~SVM_FIFO_F_LL_TRACKED;
+      c = (svm_fifo_chunk_t *) cmem;
+      c->start_byte = 0;
+      c->length = rounded_data_size;
+      c->enq_rb_index = RBTREE_TNIL_INDEX;
+      c->deq_rb_index = RBTREE_TNIL_INDEX;
+      c->next = fss->free_chunks[fl_index];
+      fss->free_chunks[fl_index] = c;
+      cmem += sizeof (*c) + rounded_data_size;
     }
 
-  /* Add to free list */
-  f->next = fsh->free_fifos[fl_index];
-  f->prev = 0;
-  fsh->free_fifos[fl_index] = f;
+  fss->num_chunks[fl_index] += batch_size;
+  fss->n_fl_chunk_bytes += batch_size * rounded_data_size;
+  fsh_cached_bytes_add (fsh, batch_size * rounded_data_size);
+  fsh_free_bytes_sub (fsh, size);
 
-  /* If fifo has more chunks, free them */
-  if (f->flags & SVM_FIFO_F_MULTI_CHUNK)
-    {
-      svm_fifo_chunk_t *cur, *next;
-      void *oldheap;
+  return 0;
+}
 
-      next = f->start_chunk->next;
-      while (next != f->start_chunk)
-       {
-         cur = next;
-         next = next->next;
-         fl_index = fs_free_list_for_size (cur->length);
-         cur->next = fsh->free_chunks[fl_index];
-         fsh->free_chunks[fl_index] = cur;
-       }
-      oldheap = ssvm_push_heap (sh);
-      svm_fifo_free_chunk_lookup (f);
-      ssvm_pop_heap (oldheap);
-    }
+static int
+fs_try_alloc_fifo_batch (fifo_segment_header_t * fsh,
+                        fifo_segment_slice_t * fss,
+                        u32 fl_index, u32 batch_size)
+{
+  u32 hdrs, rounded_data_size;
+  svm_fifo_chunk_t *c;
+  svm_fifo_t *f;
+  void *oldheap;
+  uword size;
+  u8 *fmem;
+  int i;
 
-  /* not allocated on segment heap */
-  svm_fifo_free_ooo_data (f);
+  rounded_data_size = fs_freelist_index_to_size (fl_index);
+  hdrs = sizeof (*f) + sizeof (*c);
+  size = (uword) (hdrs + rounded_data_size) * batch_size;
 
-  if (CLIB_DEBUG)
+  oldheap = ssvm_push_heap (fsh->ssvm_sh);
+  fmem = clib_mem_alloc_aligned_at_offset (size, CLIB_CACHE_LINE_BYTES,
+                                          0 /* align_offset */ ,
+                                          0 /* os_out_of_memory */ );
+  ssvm_pop_heap (oldheap);
+
+  /* Out of space.. */
+  if (fmem == 0)
+    return -1;
+
+  /* Carve fifo + chunk space */
+  for (i = 0; i < batch_size; i++)
     {
-      f->master_session_index = ~0;
-      f->master_thread_index = ~0;
+      f = (svm_fifo_t *) fmem;
+      memset (f, 0, sizeof (*f));
+      f->next = fss->free_fifos;
+      fss->free_fifos = f;
+      c = (svm_fifo_chunk_t *) (fmem + sizeof (*f));
+      c->start_byte = 0;
+      c->length = rounded_data_size;
+      c->enq_rb_index = RBTREE_TNIL_INDEX;
+      c->deq_rb_index = RBTREE_TNIL_INDEX;
+      c->next = fss->free_chunks[fl_index];
+      fss->free_chunks[fl_index] = c;
+      fmem += hdrs + rounded_data_size;
     }
 
-  fsh->n_active_fifos--;
-  ssvm_unlock_non_recursive (sh);
+  fss->num_chunks[fl_index] += batch_size;
+  fss->n_fl_chunk_bytes += batch_size * rounded_data_size;
+  fsh_cached_bytes_add (fsh, batch_size * rounded_data_size);
+  fsh_free_bytes_sub (fsh, size);
+
+  return 0;
 }
 
 /**
- * Pre-allocates fifo pairs in fifo segment
+ * Try to allocate new fifo
+ *
+ * Tries the following steps in order:
+ * - grab fifo and chunk from freelists
+ * - batch fifo and chunk allocation
+ * - single fifo allocation
+ * - grab multiple fifo chunks from freelists
  */
-void
-fifo_segment_preallocate_fifo_pairs (fifo_segment_t * fs,
-                                    u32 rx_fifo_size, u32 tx_fifo_size,
-                                    u32 * n_fifo_pairs)
+static svm_fifo_t *
+fs_try_alloc_fifo (fifo_segment_header_t * fsh, fifo_segment_slice_t * fss,
+                  u32 data_bytes)
 {
-  u32 rx_rounded_data_size, tx_rounded_data_size, pair_size;
-  u32 rx_fifos_size, tx_fifos_size, pairs_to_allocate;
-  ssvm_shared_header_t *sh = fs->ssvm.sh;
-  fifo_segment_header_t *fsh = fs->h;
-  int i, rx_fl_index, tx_fl_index;
-  u8 *rx_fifo_mem, *tx_fifo_mem;
-  uword space_available;
-  void *oldheap;
-  svm_fifo_t *f;
+  u32 fifo_sz, fl_index;
+  svm_fifo_t *f = 0;
+  uword n_free_bytes;
+  u32 min_size;
 
-  /* Parameter check */
-  if (rx_fifo_size == 0 || tx_fifo_size == 0 || *n_fifo_pairs == 0)
-    return;
+  min_size = clib_max ((fsh->pct_first_alloc * data_bytes) / 100, 4096);
+  fl_index = fs_freelist_for_size (min_size);
 
-  if (!fs_chunk_size_is_valid (rx_fifo_size))
-    {
-      clib_warning ("rx fifo_size out of range %d", rx_fifo_size);
-      return;
-    }
+  if (fl_index >= vec_len (fss->free_chunks))
+    return 0;
 
-  if (!fs_chunk_size_is_valid (tx_fifo_size))
+  clib_spinlock_lock (&fss->chunk_lock);
+
+  if (fss->free_fifos && fss->free_chunks[fl_index])
     {
-      clib_warning ("tx fifo_size out of range %d", tx_fifo_size);
-      return;
+      f = fs_try_alloc_fifo_freelist (fss, fl_index);
+      if (f)
+       {
+         fsh_cached_bytes_sub (fsh, fs_freelist_index_to_size (fl_index));
+         goto done;
+       }
     }
 
-  rx_rounded_data_size = (1 << (max_log2 (rx_fifo_size)));
-  rx_fl_index = fs_free_list_for_size (rx_fifo_size);
-  tx_rounded_data_size = (1 << (max_log2 (tx_fifo_size)));
-  tx_fl_index = fs_free_list_for_size (tx_fifo_size);
-
-  /* Calculate space requirements */
-  pair_size = 2 * sizeof (*f) + rx_rounded_data_size + tx_rounded_data_size;
-#if USE_DLMALLOC == 0
-  space_available = fs->ssvm.ssvm_size - mheap_bytes (sh->heap);
-#else
-  space_available = fs->ssvm.ssvm_size - mspace_usable_size (sh->heap);
-#endif
-
-  pairs_to_allocate = clib_min (space_available / pair_size, *n_fifo_pairs);
-  rx_fifos_size = (sizeof (*f) + rx_rounded_data_size) * pairs_to_allocate;
-  tx_fifos_size = (sizeof (*f) + tx_rounded_data_size) * pairs_to_allocate;
+  fifo_sz = sizeof (svm_fifo_t) + sizeof (svm_fifo_chunk_t);
+  fifo_sz += 1 << max_log2 (min_size);
+  n_free_bytes = fsh_n_free_bytes (fsh);
 
-  vec_validate_init_empty (fsh->free_fifos,
-                          clib_max (rx_fl_index, tx_fl_index), 0);
-
-  oldheap = ssvm_push_heap (sh);
-
-  /* Allocate rx and tx fifo memory. May fail. */
-  rx_fifo_mem = clib_mem_alloc_aligned_at_offset (rx_fifos_size,
-                                                 CLIB_CACHE_LINE_BYTES,
-                                                 0 /* align_offset */ ,
-                                                 0 /* os_out_of_memory */ );
-  tx_fifo_mem = clib_mem_alloc_aligned_at_offset (tx_fifos_size,
-                                                 CLIB_CACHE_LINE_BYTES,
-                                                 0 /* align_offset */ ,
-                                                 0 /* os_out_of_memory */ );
-
-  /* Make sure it worked. Clean up if it didn't... */
-  if (rx_fifo_mem == 0 || tx_fifo_mem == 0)
-    {
-      rx_fifo_mem ? clib_mem_free (rx_fifo_mem) : clib_mem_free (tx_fifo_mem);
-      clib_warning ("fifo preallocation failure: rx size %d tx size %u "
-                   "npairs %d", rx_fifo_size, tx_fifo_size, *n_fifo_pairs);
+  if (fifo_sz * FIFO_SEGMENT_ALLOC_BATCH_SIZE < n_free_bytes)
+    {
+      if (!fs_try_alloc_fifo_batch (fsh, fss, fl_index,
+                                   FIFO_SEGMENT_ALLOC_BATCH_SIZE))
+       {
+         f = fs_try_alloc_fifo_freelist (fss, fl_index);
+         if (f)
+           {
+             fsh_cached_bytes_sub (fsh,
+                                   fs_freelist_index_to_size (fl_index));
+             goto done;
+           }
+       }
+      else
+       {
+         fsh_check_mem (fsh);
+         n_free_bytes = fsh_n_free_bytes (fsh);
+       }
+    }
+  if (fifo_sz <= n_free_bytes)
+    {
+      void *oldheap = ssvm_push_heap (fsh->ssvm_sh);
+      f = svm_fifo_alloc (min_size);
       ssvm_pop_heap (oldheap);
-      return;
+      if (f)
+       {
+         clib_atomic_fetch_add_rel (&fss->num_chunks[fl_index], 1);
+         fsh_free_bytes_sub (fsh, fifo_sz);
+         goto done;
+       }
+      fsh_check_mem (fsh);
     }
+  /* All failed, try to allocate min of data bytes and fifo sz */
+  fifo_sz = clib_min (fifo_sz, data_bytes);
+  if (fifo_sz <= fss->n_fl_chunk_bytes)
+    f = fs_try_alloc_fifo_freelist_multi_chunk (fsh, fss, fifo_sz);
 
-  /* Carve rx and tx fifo memory */
-  for (i = 0; i < pairs_to_allocate; i++)
-    {
-      f = (svm_fifo_t *) rx_fifo_mem;
-      fifo_init_for_segment (fsh, f, rx_rounded_data_size, rx_fl_index);
-      rx_fifo_mem += sizeof (*f) + rx_rounded_data_size;
+done:
+  clib_spinlock_unlock (&fss->chunk_lock);
 
-      f = (svm_fifo_t *) tx_fifo_mem;
-      fifo_init_for_segment (fsh, f, tx_rounded_data_size, tx_fl_index);
-      tx_fifo_mem += sizeof (*f) + tx_rounded_data_size;
+  if (f)
+    {
+      f->size = data_bytes;
+      f->fs_hdr = fsh;
     }
-
-  /* Account for the pairs allocated */
-  *n_fifo_pairs -= pairs_to_allocate;
-  ssvm_pop_heap (oldheap);
+  return f;
 }
 
-int
-fifo_segment_grow_fifo (fifo_segment_t * fs, svm_fifo_t * f, u32 chunk_size)
+svm_fifo_chunk_t *
+fsh_alloc_chunk (fifo_segment_header_t * fsh, u32 slice_index, u32 chunk_size)
 {
-  ssvm_shared_header_t *sh;
+  fifo_segment_slice_t *fss;
   svm_fifo_chunk_t *c;
-  void *oldheap;
   int fl_index;
 
-  if (!fs_chunk_size_is_valid (chunk_size))
-    {
-      clib_warning ("chunk size out of range %d", chunk_size);
-      return 0;
-    }
+  fl_index = fs_freelist_for_size (chunk_size);
+  fss = fsh_slice_get (fsh, slice_index);
 
-  fl_index = fs_free_list_for_size (chunk_size);
+  clib_spinlock_lock (&fss->chunk_lock);
 
-  sh = fs->ssvm.sh;
-  ssvm_lock_non_recursive (sh, 1);
+  ASSERT (vec_len (fss->free_chunks) > fl_index);
+  c = fss->free_chunks[fl_index];
 
-  vec_validate_init_empty (fs->h->free_chunks, fl_index, 0);
-  c = fs->h->free_chunks[fl_index];
+  if (c)
+    {
+      fss->free_chunks[fl_index] = c->next;
+      c->next = 0;
+      fss->n_fl_chunk_bytes -= fs_freelist_index_to_size (fl_index);
+      fsh_cached_bytes_sub (fsh, fs_freelist_index_to_size (fl_index));
+    }
+  else
+    {
+      void *oldheap;
+      uword n_free;
+      u32 batch;
 
-  oldheap = ssvm_push_heap (sh);
+      chunk_size = fs_freelist_index_to_size (fl_index);
+      n_free = fsh_n_free_bytes (fsh);
 
-  if (!c)
-    {
-      c = svm_fifo_chunk_alloc (chunk_size);
-      if (!c)
+      if (chunk_size <= n_free)
        {
+         oldheap = ssvm_push_heap (fsh->ssvm_sh);
+         c = svm_fifo_chunk_alloc (chunk_size);
          ssvm_pop_heap (oldheap);
-         return -1;
+
+         if (c)
+           {
+             clib_atomic_fetch_add_rel (&fss->num_chunks[fl_index], 1);
+             fsh_free_bytes_sub (fsh, chunk_size + sizeof (*c));
+             goto done;
+           }
+
+         fsh_check_mem (fsh);
+         n_free = fsh_n_free_bytes (fsh);
        }
+      if (chunk_size <= fss->n_fl_chunk_bytes)
+       {
+         c = fs_try_alloc_multi_chunk (fsh, fss, chunk_size);
+         if (c)
+           goto done;
+         batch = n_free / FIFO_SEGMENT_MIN_FIFO_SIZE;
+         if (!batch || fsh_try_alloc_chunk_batch (fsh, fss, 0, batch))
+           {
+             fsh_check_mem (fsh);
+             goto done;
+           }
+       }
+      if (chunk_size <= fss->n_fl_chunk_bytes + n_free)
+       {
+         u32 min_size = FIFO_SEGMENT_MIN_FIFO_SIZE;
+
+         batch = (chunk_size - fss->n_fl_chunk_bytes) / min_size;
+         batch = clib_min (batch + 1, n_free / min_size);
+         if (fsh_try_alloc_chunk_batch (fsh, fss, 0, batch))
+           {
+             fsh_check_mem (fsh);
+             goto done;
+           }
+         c = fs_try_alloc_multi_chunk (fsh, fss, chunk_size);
+       }
+    }
+
+done:
+
+  clib_spinlock_unlock (&fss->chunk_lock);
+
+  return c;
+}
+
+static void
+fsh_slice_collect_chunks (fifo_segment_header_t * fsh,
+                         fifo_segment_slice_t * fss, svm_fifo_chunk_t * c)
+{
+  svm_fifo_chunk_t *next;
+  int fl_index;
+  u32 n_collect = 0;
+
+  clib_spinlock_lock (&fss->chunk_lock);
+
+  while (c)
+    {
+      next = c->next;
+      fl_index = fs_freelist_for_size (c->length);
+      c->next = fss->free_chunks[fl_index];
+      c->enq_rb_index = RBTREE_TNIL_INDEX;
+      c->deq_rb_index = RBTREE_TNIL_INDEX;
+      fss->free_chunks[fl_index] = c;
+      n_collect += fs_freelist_index_to_size (fl_index);
+      c = next;
+    }
+
+  fss->n_fl_chunk_bytes += n_collect;
+  fsh_cached_bytes_add (fsh, n_collect);
+
+  clib_spinlock_unlock (&fss->chunk_lock);
+}
+
+void
+fsh_collect_chunks (fifo_segment_header_t * fsh, u32 slice_index,
+                   svm_fifo_chunk_t * c)
+{
+  fifo_segment_slice_t *fss;
+  fss = fsh_slice_get (fsh, slice_index);
+  fsh_slice_collect_chunks (fsh, fss, c);
+}
+
+static inline void
+fss_fifo_add_active_list (fifo_segment_slice_t * fss, svm_fifo_t * f)
+{
+  if (fss->fifos)
+    {
+      fss->fifos->prev = f;
+      f->next = fss->fifos;
+    }
+  fss->fifos = f;
+}
+
+static inline void
+fss_fifo_del_active_list (fifo_segment_slice_t * fss, svm_fifo_t * f)
+{
+  if (f->flags & SVM_FIFO_F_LL_TRACKED)
+    {
+      if (f->prev)
+       f->prev->next = f->next;
+      else
+       fss->fifos = f->next;
+      if (f->next)
+       f->next->prev = f->prev;
+    }
+}
+
+/**
+ * Allocate fifo in fifo segment
+ */
+svm_fifo_t *
+fifo_segment_alloc_fifo_w_slice (fifo_segment_t * fs, u32 slice_index,
+                                u32 data_bytes, fifo_segment_ftype_t ftype)
+{
+  fifo_segment_header_t *fsh = fs->h;
+  fifo_segment_slice_t *fss;
+  svm_fifo_t *f = 0;
+
+  ASSERT (slice_index < fs->n_slices);
+
+  if (PREDICT_FALSE (data_bytes > 1 << fsh->max_log2_chunk_size))
+    return 0;
+
+  fss = fsh_slice_get (fsh, slice_index);
+  f = fs_try_alloc_fifo (fsh, fss, data_bytes);
+  if (!f)
+    goto done;
+
+  f->slice_index = slice_index;
+
+  svm_fifo_init (f, data_bytes);
+
+  /* If rx fifo type add to active fifos list. When cleaning up segment,
+   * we need a list of active sessions that should be disconnected. Since
+   * both rx and tx fifos keep pointers to the session, it's enough to track
+   * only one. */
+  if (ftype == FIFO_SEGMENT_RX_FIFO)
+    {
+      fss_fifo_add_active_list (fss, f);
+      f->flags |= SVM_FIFO_F_LL_TRACKED;
+
+      svm_fifo_init_ooo_lookup (f, 0 /* ooo enq */ );
     }
   else
     {
-      fs->h->free_chunks[fl_index] = c->next;
+      svm_fifo_init_ooo_lookup (f, 1 /* ooo deq */ );
     }
 
-  svm_fifo_add_chunk (f, c);
+  fsh_active_fifos_update (fsh, 1);
+  fss->virtual_mem += svm_fifo_size (f);
 
-  ssvm_pop_heap (oldheap);
-  ssvm_unlock_non_recursive (sh);
-  return 0;
+done:
+  return (f);
 }
 
 /**
- * Get number of active fifos
+ * Free fifo allocated in fifo segment
  */
-u32
-fifo_segment_num_fifos (fifo_segment_t * fs)
+void
+fifo_segment_free_fifo (fifo_segment_t * fs, svm_fifo_t * f)
 {
-  return fs->h->n_active_fifos;
+  fifo_segment_header_t *fsh = fs->h;
+  fifo_segment_slice_t *fss;
+
+  ASSERT (f->refcnt > 0);
+
+  if (--f->refcnt > 0)
+    return;
+
+  fss = fsh_slice_get (fsh, f->slice_index);
+
+  /* Remove from active list. Only rx fifos are tracked */
+  if (f->flags & SVM_FIFO_F_LL_TRACKED)
+    {
+      fss_fifo_del_active_list (fss, f);
+      f->flags &= ~SVM_FIFO_F_LL_TRACKED;
+    }
+
+  /* Free fifo chunks */
+  fsh_slice_collect_chunks (fsh, fss, f->start_chunk);
+
+  f->start_chunk = f->end_chunk = 0;
+  f->head_chunk = f->tail_chunk = f->ooo_enq = f->ooo_deq = 0;
+
+  /* not allocated on segment heap */
+  svm_fifo_free_chunk_lookup (f);
+  svm_fifo_free_ooo_data (f);
+
+  if (CLIB_DEBUG)
+    {
+      f->master_session_index = ~0;
+      f->master_thread_index = ~0;
+    }
+
+  fss->virtual_mem -= svm_fifo_size (f);
+
+  /* Add to free list */
+  f->next = fss->free_fifos;
+  f->prev = 0;
+  fss->free_fifos = f;
+
+  fsh_active_fifos_update (fsh, -1);
 }
 
-u32
-fifo_segment_num_free_fifos (fifo_segment_t * fs, u32 fifo_size_in_bytes)
+void
+fifo_segment_detach_fifo (fifo_segment_t * fs, svm_fifo_t * f)
 {
-  u32 count = 0, rounded_data_size, fl_index;
-  fifo_segment_header_t *fsh;
-  ssvm_shared_header_t *sh;
+  fifo_segment_slice_t *fss;
+  svm_fifo_chunk_t *c;
+  u32 fl_index;
+
+  ASSERT (f->refcnt == 1);
+
+  fss = fsh_slice_get (fs->h, f->slice_index);
+  fss->virtual_mem -= svm_fifo_size (f);
+  if (f->flags & SVM_FIFO_F_LL_TRACKED)
+    fss_fifo_del_active_list (fss, f);
+
+  c = f->start_chunk;
+  while (c)
+    {
+      fl_index = fs_freelist_for_size (c->length);
+      clib_atomic_fetch_sub_rel (&fss->num_chunks[fl_index], 1);
+      c = c->next;
+    }
+}
+
+void
+fifo_segment_attach_fifo (fifo_segment_t * fs, svm_fifo_t * f,
+                         u32 slice_index)
+{
+  fifo_segment_slice_t *fss;
+  svm_fifo_chunk_t *c;
+  u32 fl_index;
+
+  f->slice_index = slice_index;
+  fss = fsh_slice_get (fs->h, f->slice_index);
+  fss->virtual_mem += svm_fifo_size (f);
+  if (f->flags & SVM_FIFO_F_LL_TRACKED)
+    fss_fifo_add_active_list (fss, f);
+
+  c = f->start_chunk;
+  while (c)
+    {
+      fl_index = fs_freelist_for_size (c->length);
+      clib_atomic_fetch_add_rel (&fss->num_chunks[fl_index], 1);
+      c = c->next;
+    }
+}
+
+int
+fifo_segment_prealloc_fifo_hdrs (fifo_segment_t * fs, u32 slice_index,
+                                u32 batch_size)
+{
+  fifo_segment_header_t *fsh = fs->h;
+  fifo_segment_slice_t *fss;
   svm_fifo_t *f;
+  void *oldheap;
+  uword size;
+  u8 *fmem;
   int i;
 
-  sh = fs->ssvm.sh;
-  fsh = (fifo_segment_header_t *) sh->opaque[0];
+  fss = fsh_slice_get (fsh, slice_index);
+  size = (uword) (sizeof (*f)) * batch_size;
 
-  /* Count all free fifos? */
-  if (fifo_size_in_bytes == ~0)
+  oldheap = ssvm_push_heap (fsh->ssvm_sh);
+  fmem = clib_mem_alloc_aligned_at_offset (size, CLIB_CACHE_LINE_BYTES,
+                                          0 /* align_offset */ ,
+                                          0 /* os_out_of_memory */ );
+  ssvm_pop_heap (oldheap);
+
+  /* Out of space.. */
+  if (fmem == 0)
+    return -1;
+
+  /* Carve fifo + chunk space */
+  for (i = 0; i < batch_size; i++)
     {
-      for (i = 0; i < vec_len (fsh->free_fifos); i++)
-       {
-         f = fsh->free_fifos[i];
-         if (f == 0)
-           continue;
+      f = (svm_fifo_t *) fmem;
+      memset (f, 0, sizeof (*f));
+      f->next = fss->free_fifos;
+      fss->free_fifos = f;
+      fmem += sizeof (*f);
+    }
 
-         while (f)
-           {
-             f = f->next;
-             count++;
-           }
-       }
-      return count;
+  fsh_free_bytes_sub (fsh, size);
+
+  return 0;
+}
+
+int
+fifo_segment_prealloc_fifo_chunks (fifo_segment_t * fs, u32 slice_index,
+                                  u32 chunk_size, u32 batch_size)
+{
+  fifo_segment_header_t *fsh = fs->h;
+  u32 rounded_data_size, fl_index;
+  fifo_segment_slice_t *fss;
+  svm_fifo_chunk_t *c;
+  void *oldheap;
+  uword size;
+  u8 *cmem;
+  int i;
+
+  if (!fs_chunk_size_is_valid (fsh, chunk_size))
+    {
+      clib_warning ("chunk size out of range %d", chunk_size);
+      return -1;
     }
 
-  rounded_data_size = (1 << (max_log2 (fifo_size_in_bytes)));
-  fl_index = fs_free_list_for_size (rounded_data_size);
+  fl_index = fs_freelist_for_size (chunk_size);
+  rounded_data_size = fs_freelist_index_to_size (fl_index);
+  size = (uword) (sizeof (*c) + rounded_data_size) * batch_size;
 
-  if (fl_index >= vec_len (fsh->free_fifos))
-    return 0;
+  oldheap = ssvm_push_heap (fsh->ssvm_sh);
+  cmem = clib_mem_alloc_aligned_at_offset (size, CLIB_CACHE_LINE_BYTES,
+                                          0 /* align_offset */ ,
+                                          0 /* os_out_of_memory */ );
+  ssvm_pop_heap (oldheap);
+
+  /* Out of space.. */
+  if (cmem == 0)
+    return -1;
+
+  fss = fsh_slice_get (fsh, slice_index);
+
+  /* Carve fifo + chunk space */
+  for (i = 0; i < batch_size; i++)
+    {
+      c = (svm_fifo_chunk_t *) cmem;
+      c->start_byte = 0;
+      c->length = rounded_data_size;
+      c->next = fss->free_chunks[fl_index];
+      fss->free_chunks[fl_index] = c;
+      cmem += sizeof (*c) + rounded_data_size;
+      fsh_cached_bytes_add (fsh, rounded_data_size);
+    }
+
+  fss->num_chunks[fl_index] += batch_size;
+  fss->n_fl_chunk_bytes += batch_size * rounded_data_size;
+  fsh_free_bytes_sub (fsh, size);
+
+  return 0;
+}
+
+/**
+ * Pre-allocates fifo pairs in fifo segment
+ */
+void
+fifo_segment_preallocate_fifo_pairs (fifo_segment_t * fs,
+                                    u32 rx_fifo_size, u32 tx_fifo_size,
+                                    u32 * n_fifo_pairs)
+{
+  u32 rx_rounded_data_size, tx_rounded_data_size, pair_size, pairs_to_alloc;
+  u32 hdrs, pairs_per_slice, alloc_now;
+  fifo_segment_header_t *fsh = fs->h;
+  int rx_fl_index, tx_fl_index, i;
+  fifo_segment_slice_t *fss;
+  uword space_available;
+
+  /* Parameter check */
+  if (rx_fifo_size == 0 || tx_fifo_size == 0 || *n_fifo_pairs == 0)
+    return;
+
+  if (!fs_chunk_size_is_valid (fsh, rx_fifo_size))
+    {
+      clib_warning ("rx fifo_size out of range %d", rx_fifo_size);
+      return;
+    }
+
+  if (!fs_chunk_size_is_valid (fsh, tx_fifo_size))
+    {
+      clib_warning ("tx fifo_size out of range %d", tx_fifo_size);
+      return;
+    }
+
+  rx_rounded_data_size = (1 << (max_log2 (rx_fifo_size)));
+  rx_fl_index = fs_freelist_for_size (rx_fifo_size);
+  tx_rounded_data_size = (1 << (max_log2 (tx_fifo_size)));
+  tx_fl_index = fs_freelist_for_size (tx_fifo_size);
+
+  hdrs = sizeof (svm_fifo_t) + sizeof (svm_fifo_chunk_t);
+
+  /* Calculate space requirements */
+  pair_size = 2 * hdrs + rx_rounded_data_size + tx_rounded_data_size;
+  space_available = fsh_free_space (fsh);
+  pairs_to_alloc = space_available / pair_size;
+  pairs_to_alloc = clib_min (pairs_to_alloc, *n_fifo_pairs);
+  pairs_per_slice = pairs_to_alloc / fs->n_slices;
+  pairs_per_slice += pairs_to_alloc % fs->n_slices ? 1 : 0;
+
+  if (!pairs_per_slice)
+    return;
+
+  for (i = 0; i < fs->n_slices; i++)
+    {
+      fss = fsh_slice_get (fsh, i);
+      alloc_now = clib_min (pairs_per_slice, *n_fifo_pairs);
+      if (fs_try_alloc_fifo_batch (fsh, fss, rx_fl_index, alloc_now))
+       clib_warning ("rx prealloc failed: pairs %u", alloc_now);
+      if (fs_try_alloc_fifo_batch (fsh, fss, tx_fl_index, alloc_now))
+       clib_warning ("tx prealloc failed: pairs %u", alloc_now);
+
+      /* Account for the pairs allocated */
+      *n_fifo_pairs -= alloc_now;
+    }
+}
+
+/**
+ * Get number of active fifos
+ */
+u32
+fifo_segment_num_fifos (fifo_segment_t * fs)
+{
+  return fsh_n_active_fifos (fs->h);
+}
 
-  f = fsh->free_fifos[fl_index];
+static u32
+fs_slice_num_free_fifos (fifo_segment_slice_t * fss)
+{
+  svm_fifo_t *f;
+  u32 count = 0;
+
+  f = fss->free_fifos;
   if (f == 0)
     return 0;
 
@@ -628,21 +1153,34 @@ fifo_segment_num_free_fifos (fifo_segment_t * fs, u32 fifo_size_in_bytes)
 }
 
 u32
-fifo_segment_num_free_chunks (fifo_segment_t * fs, u32 size)
+fifo_segment_num_free_fifos (fifo_segment_t * fs)
+{
+  fifo_segment_header_t *fsh = fs->h;
+  fifo_segment_slice_t *fss;
+  int slice_index;
+  u32 count = 0;
+
+  for (slice_index = 0; slice_index < fs->n_slices; slice_index++)
+    {
+      fss = fsh_slice_get (fsh, slice_index);
+      count += fs_slice_num_free_fifos (fss);
+    }
+  return count;
+}
+
+static u32
+fs_slice_num_free_chunks (fifo_segment_slice_t * fss, u32 size)
 {
   u32 count = 0, rounded_size, fl_index;
-  fifo_segment_header_t *fsh;
   svm_fifo_chunk_t *c;
   int i;
 
-  fsh = fs->h;
-
   /* Count all free chunks? */
   if (size == ~0)
     {
-      for (i = 0; i < vec_len (fsh->free_chunks); i++)
+      for (i = 0; i < vec_len (fss->free_chunks); i++)
        {
-         c = fsh->free_chunks[i];
+         c = fss->free_chunks[i];
          if (c == 0)
            continue;
 
@@ -656,12 +1194,12 @@ fifo_segment_num_free_chunks (fifo_segment_t * fs, u32 size)
     }
 
   rounded_size = (1 << (max_log2 (size)));
-  fl_index = fs_free_list_for_size (rounded_size);
+  fl_index = fs_freelist_for_size (rounded_size);
 
-  if (fl_index >= vec_len (fsh->free_chunks))
+  if (fl_index >= vec_len (fss->free_chunks))
     return 0;
 
-  c = fsh->free_chunks[fl_index];
+  c = fss->free_chunks[fl_index];
   if (c == 0)
     return 0;
 
@@ -673,16 +1211,141 @@ fifo_segment_num_free_chunks (fifo_segment_t * fs, u32 size)
   return count;
 }
 
+u32
+fifo_segment_num_free_chunks (fifo_segment_t * fs, u32 size)
+{
+  fifo_segment_header_t *fsh = fs->h;
+  fifo_segment_slice_t *fss;
+  int slice_index;
+  u32 count = 0;
+
+  for (slice_index = 0; slice_index < fs->n_slices; slice_index++)
+    {
+      fss = fsh_slice_get (fsh, slice_index);
+      count += fs_slice_num_free_chunks (fss, size);
+    }
+  return count;
+}
+
+void
+fifo_segment_update_free_bytes (fifo_segment_t * fs)
+{
+  fsh_update_free_bytes (fs->h);
+}
+
+uword
+fifo_segment_size (fifo_segment_t * fs)
+{
+  return fs->ssvm.ssvm_size;
+}
+
+u8
+fsh_has_reached_mem_limit (fifo_segment_header_t * fsh)
+{
+  return (fsh->flags & FIFO_SEGMENT_F_MEM_LIMIT) ? 1 : 0;
+}
+
+void
+fsh_reset_mem_limit (fifo_segment_header_t * fsh)
+{
+  fsh->flags &= ~FIFO_SEGMENT_F_MEM_LIMIT;
+}
+
+uword
+fifo_segment_free_bytes (fifo_segment_t * fs)
+{
+  return fsh_n_free_bytes (fs->h);
+}
+
+uword
+fifo_segment_cached_bytes (fifo_segment_t * fs)
+{
+  return fsh_n_cached_bytes (fs->h);
+}
+
+uword
+fifo_segment_available_bytes (fifo_segment_t * fs)
+{
+  return fsh_n_free_bytes (fs->h) + fsh_n_cached_bytes (fs->h);
+}
+
+uword
+fifo_segment_fl_chunk_bytes (fifo_segment_t * fs)
+{
+  fifo_segment_header_t *fsh = fs->h;
+  fifo_segment_slice_t *fss;
+  uword n_bytes = 0;
+  int slice_index;
+
+  for (slice_index = 0; slice_index < fs->n_slices; slice_index++)
+    {
+      fss = fsh_slice_get (fsh, slice_index);
+      n_bytes += fss->n_fl_chunk_bytes;
+    }
+
+  return n_bytes;
+}
+
 u8
 fifo_segment_has_fifos (fifo_segment_t * fs)
 {
-  return fs->h->fifos != 0;
+  return (fsh_n_active_fifos (fs->h) != 0);
 }
 
 svm_fifo_t *
-fifo_segment_get_fifo_list (fifo_segment_t * fs)
+fifo_segment_get_slice_fifo_list (fifo_segment_t * fs, u32 slice_index)
+{
+  fifo_segment_header_t *fsh = fs->h;
+  fifo_segment_slice_t *fss;
+
+  fss = fsh_slice_get (fsh, slice_index);
+  return fss->fifos;
+}
+
+u8
+fifo_segment_get_mem_usage (fifo_segment_t * fs)
 {
-  return fs->h->fifos;
+  uword size, in_use;
+
+  size = fifo_segment_size (fs);
+  in_use =
+    size - fifo_segment_free_bytes (fs) - fifo_segment_cached_bytes (fs);
+  return (in_use * 100) / size;
+}
+
+fifo_segment_mem_status_t
+fifo_segment_determine_status (fifo_segment_header_t * fsh, u8 usage)
+{
+  if (!fsh->high_watermark || !fsh->low_watermark)
+    return MEMORY_PRESSURE_NO_PRESSURE;
+
+  /* once the no-memory is detected, the status continues
+   * until memory usage gets below the high watermark
+   */
+  if (fsh_has_reached_mem_limit (fsh))
+    {
+      if (usage >= fsh->high_watermark)
+       return MEMORY_PRESSURE_NO_MEMORY;
+      else
+       fsh_reset_mem_limit (fsh);
+    }
+
+  if (usage >= fsh->high_watermark)
+    return MEMORY_PRESSURE_HIGH_PRESSURE;
+
+  else if (usage >= fsh->low_watermark)
+    return MEMORY_PRESSURE_LOW_PRESSURE;
+
+  return MEMORY_PRESSURE_NO_PRESSURE;
+}
+
+fifo_segment_mem_status_t
+fifo_segment_get_mem_status (fifo_segment_t * fs)
+{
+  fifo_segment_header_t *fsh = fs->h;
+  u8 usage = fifo_segment_get_mem_usage (fs);
+
+  return fifo_segment_determine_status (fsh, usage);
 }
 
 u8 *
@@ -709,38 +1372,109 @@ format_fifo_segment_type (u8 * s, va_list * args)
 u8 *
 format_fifo_segment (u8 * s, va_list * args)
 {
-  fifo_segment_t *sp = va_arg (*args, fifo_segment_t *);
+  u32 count, indent, active_fifos, free_fifos;
+  fifo_segment_t *fs = va_arg (*args, fifo_segment_t *);
   int verbose __attribute__ ((unused)) = va_arg (*args, int);
-  fifo_segment_header_t *fsh = sp->h;
-  u32 count, indent;
-  svm_fifo_t *f;
+  uword est_chunk_bytes, est_free_seg_bytes, free_chunks;
+  uword chunk_bytes = 0, free_seg_bytes, chunk_size;
+  uword tracked_cached_bytes;
+  uword fifo_hdr = 0, reserved;
+  fifo_segment_header_t *fsh;
+  fifo_segment_slice_t *fss;
+  svm_fifo_chunk_t *c;
+  u32 slice_index;
+  char *address;
+  size_t size;
   int i;
+  uword allocated, in_use, virt;
+  f64 usage;
+  fifo_segment_mem_status_t mem_st;
 
   indent = format_get_indent (s) + 2;
-#if USE_DLMALLOC == 0
-  s = format (s, "%U segment heap: %U\n", format_white_space, indent,
-             format_mheap, svm_fifo_segment_heap (sp), verbose);
-  s = format (s, "%U segment has %u active fifos\n",
-             format_white_space, indent, fifo_segment_num_fifos (sp));
-#endif
-
-  for (i = 0; i < vec_len (fsh->free_fifos); i++)
-    {
-      f = fsh->free_fifos[i];
-      if (f == 0)
-       continue;
-      count = 0;
-      while (f)
+
+  if (fs == 0)
+    {
+      s = format (s, "%-15s%15s%15s%15s%15s%15s", "Name", "Type",
+                 "HeapSize (M)", "ActiveFifos", "FreeFifos", "Address");
+      return s;
+    }
+
+  fifo_segment_info (fs, &address, &size);
+  active_fifos = fifo_segment_num_fifos (fs);
+  free_fifos = fifo_segment_num_free_fifos (fs);
+
+  s = format (s, "%-15v%15U%15llu%15u%15u%15llx", ssvm_name (&fs->ssvm),
+             format_fifo_segment_type, fs, size >> 20ULL, active_fifos,
+             free_fifos, address);
+
+  if (!verbose)
+    return s;
+
+  fsh = fs->h;
+
+  free_chunks = fifo_segment_num_free_chunks (fs, ~0);
+  if (free_chunks)
+    s =
+      format (s, "\n\n%UFree/Allocated chunks by size:\n", format_white_space,
+             indent + 2);
+  else
+    s = format (s, "\n");
+
+  for (slice_index = 0; slice_index < fs->n_slices; slice_index++)
+    {
+      fss = fsh_slice_get (fsh, slice_index);
+      for (i = 0; i < vec_len (fss->free_chunks); i++)
        {
-         f = f->next;
-         count++;
-       }
+         c = fss->free_chunks[i];
+         if (c == 0 && fss->num_chunks[i] == 0)
+           continue;
+         count = 0;
+         while (c)
+           {
+             c = c->next;
+             count++;
+           }
+
+         chunk_size = fs_freelist_index_to_size (i);
+         s = format (s, "%U%-5u kB: %u/%u\n", format_white_space, indent + 2,
+                     chunk_size >> 10, count, fss->num_chunks[i]);
 
-      s = format (s, "%U%-5u Kb: %u free",
-                 format_white_space, indent + 2,
-                 1 << (i + max_log2 (FIFO_SEGMENT_MIN_FIFO_SIZE) - 10),
-                 count);
+         chunk_bytes += count * chunk_size;
+       }
     }
+
+  fifo_hdr = free_fifos * sizeof (svm_fifo_t);
+  est_chunk_bytes = fifo_segment_fl_chunk_bytes (fs);
+  est_free_seg_bytes = fifo_segment_free_bytes (fs);
+  fifo_segment_update_free_bytes (fs);
+  free_seg_bytes = fifo_segment_free_bytes (fs);
+  tracked_cached_bytes = fifo_segment_cached_bytes (fs);
+  allocated = fifo_segment_size (fs);
+  in_use = fifo_segment_size (fs) - est_free_seg_bytes - tracked_cached_bytes;
+  usage = (100.0 * in_use) / allocated;
+  mem_st = fifo_segment_get_mem_status (fs);
+  virt = fsh_virtual_mem (fsh);
+  reserved = fsh->n_reserved_bytes;
+
+  s = format (s, "\n%Useg free bytes: %U (%lu) estimated: %U (%lu) reserved:"
+             " %U (%lu)\n", format_white_space, indent + 2,
+             format_memory_size, free_seg_bytes, free_seg_bytes,
+             format_memory_size, est_free_seg_bytes, est_free_seg_bytes,
+             format_memory_size, reserved, reserved);
+  s = format (s, "%Uchunk free bytes: %U (%lu) estimated: %U (%lu) tracked:"
+             " %U (%lu)\n", format_white_space, indent + 2,
+             format_memory_size, chunk_bytes, chunk_bytes,
+             format_memory_size, est_chunk_bytes, est_chunk_bytes,
+             format_memory_size, tracked_cached_bytes, tracked_cached_bytes);
+  s = format (s, "%Ufifo active: %u hdr free bytes: %U (%u) \n",
+             format_white_space, indent + 2, fsh->n_active_fifos,
+             format_memory_size, fifo_hdr, fifo_hdr);
+  s = format (s, "%Usegment usage: %.2f%% (%U / %U) virt: %U status: %s\n",
+             format_white_space, indent + 2, usage, format_memory_size,
+             in_use, format_memory_size, allocated, format_memory_size, virt,
+             fifo_segment_mem_status_strings[mem_st]);
+  s = format (s, "\n");
+
   return s;
 }