fib: fix crash while adding intf-rx routes

[vpp.git] / src / plugins / memif / device.c
diff --git a/src/plugins/memif/device.c b/src/plugins/memif/device.c

index 8726603..a7b6a98 100644 (file)
--- a/src/plugins/memif/device.c
+++ b/src/plugins/memif/device.c
@@ -40,7 +40,7 @@ typedef enum
      MEMIF_TX_N_ERROR,
  } memif_tx_func_error_t;
  
      MEMIF_TX_N_ERROR,
  } memif_tx_func_error_t;
  
-static vl_counter_t memif_tx_func_error_counters[] = {
+static vlib_error_desc_t memif_tx_func_error_counters[] = {
  #define _(f, n, s, d) { #n, d, VL_COUNTER_SEVERITY_##s },
    foreach_memif_tx_func_error
  #undef _
  #define _(f, n, s, d) { #n, d, VL_COUNTER_SEVERITY_##s },
    foreach_memif_tx_func_error
  #undef _
@@ -97,14 +97,12 @@ memif_add_copy_op (memif_per_thread_data_t * ptd, void *data, u32 len,
  }
  
  static_always_inline uword
  }
  
  static_always_inline uword
-memif_interface_tx_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
-                          vlib_frame_t * frame, memif_if_t * mif,
-                          memif_ring_type_t type, memif_queue_t * mq,
-                          memif_per_thread_data_t * ptd)
+memif_interface_tx_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+                          u32 *buffers, memif_if_t *mif,
+                          memif_ring_type_t type, memif_queue_t *mq,
+                          memif_per_thread_data_t *ptd, u32 n_left)
  {
    memif_ring_t *ring;
  {
    memif_ring_t *ring;
-  u32 *buffers = vlib_frame_vector_args (frame);
-  u32 n_left = frame->n_vectors;
    u32 n_copy_op;
    u16 ring_size, mask, slot, free_slots;
    int n_retries = 5;
    u32 n_copy_op;
    u16 ring_size, mask, slot, free_slots;
    int n_retries = 5;
@@ -112,6 +110,7 @@ memif_interface_tx_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
    memif_copy_op_t *co;
    memif_region_index_t last_region = ~0;
    void *last_region_shm = 0;
    memif_copy_op_t *co;
    memif_region_index_t last_region = ~0;
    void *last_region_shm = 0;
+  u16 head, tail;
  
    ring = mq->ring;
    ring_size = 1 << mq->log2_ring_size;
  
    ring = mq->ring;
    ring_size = 1 << mq->log2_ring_size;
@@ -119,14 +118,20 @@ memif_interface_tx_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
  
  retry:
  
  
  retry:
  
-  free_slots = ring->tail - mq->last_tail;
-  mq->last_tail += free_slots;
-  slot = (type == MEMIF_RING_S2M) ? ring->head : ring->tail;
-
    if (type == MEMIF_RING_S2M)
    if (type == MEMIF_RING_S2M)
-    free_slots = ring_size - ring->head + mq->last_tail;
+    {
+      slot = head = ring->head;
+      tail = __atomic_load_n (&ring->tail, __ATOMIC_ACQUIRE);
+      mq->last_tail += tail - mq->last_tail;
+      free_slots = ring_size - head + mq->last_tail;
+    }
    else
    else
-    free_slots = ring->head - ring->tail;
+    {
+      slot = tail = ring->tail;
+      head = __atomic_load_n (&ring->head, __ATOMIC_ACQUIRE);
+      mq->last_tail += tail - mq->last_tail;
+      free_slots = head - tail;
+    }
  
    while (n_left && free_slots)
      {
  
    while (n_left && free_slots)
      {
@@ -138,8 +143,7 @@ retry:
        u32 saved_ptd_buffers_len = _vec_len (ptd->buffers);
        u16 saved_slot = slot;
  
        u32 saved_ptd_buffers_len = _vec_len (ptd->buffers);
        u16 saved_slot = slot;
  
-      CLIB_PREFETCH (&ring->desc[(slot + 8) & mask], CLIB_CACHE_LINE_BYTES,
-                    LOAD);
+      clib_prefetch_load (&ring->desc[(slot + 8) & mask]);
  
        d0 = &ring->desc[slot & mask];
        if (PREDICT_FALSE (last_region != d0->region))
  
        d0 = &ring->desc[slot & mask];
        if (PREDICT_FALSE (last_region != d0->region))
@@ -172,6 +176,7 @@ retry:
                 {
                   slot++;
                   free_slots--;
                 {
                   slot++;
                   free_slots--;
+                 d0->length = dst_off;
                   d0->flags = MEMIF_DESC_FLAG_NEXT;
                   d0 = &ring->desc[slot & mask];
                   dst_off = 0;
                   d0->flags = MEMIF_DESC_FLAG_NEXT;
                   d0 = &ring->desc[slot & mask];
                   dst_off = 0;
@@ -189,8 +194,8 @@ retry:
               else
                 {
                   /* we need to rollback vectors before bailing out */
               else
                 {
                   /* we need to rollback vectors before bailing out */
-                 _vec_len (ptd->buffers) = saved_ptd_buffers_len;
-                 _vec_len (ptd->copy_ops) = saved_ptd_copy_ops_len;
+                 vec_set_len (ptd->buffers, saved_ptd_buffers_len);
+                 vec_set_len (ptd->copy_ops, saved_ptd_copy_ops_len);
                   vlib_error_count (vm, node->node_index,
                                     MEMIF_TX_ERROR_ROLLBACK, 1);
                   slot = saved_slot;
                   vlib_error_count (vm, node->node_index,
                                     MEMIF_TX_ERROR_ROLLBACK, 1);
                   slot = saved_slot;
@@ -229,10 +234,10 @@ no_free_slots:
    co = ptd->copy_ops;
    while (n_copy_op >= 8)
      {
    co = ptd->copy_ops;
    while (n_copy_op >= 8)
      {
-      CLIB_PREFETCH (co[4].data, CLIB_CACHE_LINE_BYTES, LOAD);
-      CLIB_PREFETCH (co[5].data, CLIB_CACHE_LINE_BYTES, LOAD);
-      CLIB_PREFETCH (co[6].data, CLIB_CACHE_LINE_BYTES, LOAD);
-      CLIB_PREFETCH (co[7].data, CLIB_CACHE_LINE_BYTES, LOAD);
+      clib_prefetch_load (co[4].data);
+      clib_prefetch_load (co[5].data);
+      clib_prefetch_load (co[6].data);
+      clib_prefetch_load (co[7].data);
  
        b0 = vlib_get_buffer (vm, ptd->buffers[co[0].buffer_vec_index]);
        b1 = vlib_get_buffer (vm, ptd->buffers[co[1].buffer_vec_index]);
  
        b0 = vlib_get_buffer (vm, ptd->buffers[co[0].buffer_vec_index]);
        b1 = vlib_get_buffer (vm, ptd->buffers[co[1].buffer_vec_index]);
@@ -263,52 +268,35 @@ no_free_slots:
    vec_reset_length (ptd->copy_ops);
    vec_reset_length (ptd->buffers);
  
    vec_reset_length (ptd->copy_ops);
    vec_reset_length (ptd->buffers);
  
-  CLIB_MEMORY_STORE_BARRIER ();
    if (type == MEMIF_RING_S2M)
    if (type == MEMIF_RING_S2M)
-    ring->head = slot;
+    __atomic_store_n (&ring->head, slot, __ATOMIC_RELEASE);
    else
    else
-    ring->tail = slot;
+    __atomic_store_n (&ring->tail, slot, __ATOMIC_RELEASE);
  
    if (n_left && n_retries--)
      goto retry;
  
  
    if (n_left && n_retries--)
      goto retry;
  
-  clib_spinlock_unlock_if_init (&mif->lockp);
-
-  if (n_left)
-    {
-      vlib_error_count (vm, node->node_index, MEMIF_TX_ERROR_NO_FREE_SLOTS,
-                       n_left);
-    }
-
-  if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0 && mq->int_fd > -1)
-    {
-      u64 b = 1;
-      CLIB_UNUSED (int r) = write (mq->int_fd, &b, sizeof (b));
-      mq->int_count++;
-    }
-
-  vlib_buffer_free (vm, vlib_frame_vector_args (frame), frame->n_vectors);
-
-  return frame->n_vectors;
+  return n_left;
  }
  
  static_always_inline uword
  }
  
  static_always_inline uword
-memif_interface_tx_zc_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
-                             vlib_frame_t * frame, memif_if_t * mif,
-                             memif_queue_t * mq,
-                             memif_per_thread_data_t * ptd)
+memif_interface_tx_zc_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+                             u32 *buffers, memif_if_t *mif, memif_queue_t *mq,
+                             memif_per_thread_data_t *ptd, u32 n_left)
  {
    memif_ring_t *ring = mq->ring;
  {
    memif_ring_t *ring = mq->ring;
-  u32 *buffers = vlib_frame_vector_args (frame);
-  u32 n_left = frame->n_vectors;
    u16 slot, free_slots, n_free;
    u16 ring_size = 1 << mq->log2_ring_size;
    u16 mask = ring_size - 1;
    int n_retries = 5;
    vlib_buffer_t *b0;
    u16 slot, free_slots, n_free;
    u16 ring_size = 1 << mq->log2_ring_size;
    u16 mask = ring_size - 1;
    int n_retries = 5;
    vlib_buffer_t *b0;
+  u16 head, tail;
  
  retry:
  
  retry:
-  n_free = ring->tail - mq->last_tail;
+  tail = __atomic_load_n (&ring->tail, __ATOMIC_ACQUIRE);
+  slot = head = ring->head;
+
+  n_free = tail - mq->last_tail;
    if (n_free >= 16)
      {
        vlib_buffer_free_from_ring_no_next (vm, mq->buffers,
    if (n_free >= 16)
      {
        vlib_buffer_free_from_ring_no_next (vm, mq->buffers,
@@ -317,8 +305,7 @@ retry:
        mq->last_tail += n_free;
      }
  
        mq->last_tail += n_free;
      }
  
-  slot = ring->head;
-  free_slots = ring_size - ring->head + mq->last_tail;
+  free_slots = ring_size - head + mq->last_tail;
  
    while (n_left && free_slots)
      {
  
    while (n_left && free_slots)
      {
@@ -327,8 +314,7 @@ retry:
        memif_desc_t *d0;
        u32 bi0;
  
        memif_desc_t *d0;
        u32 bi0;
  
-      CLIB_PREFETCH (&ring->desc[(slot + 8) & mask], CLIB_CACHE_LINE_BYTES,
-                    STORE);
+      clib_prefetch_store (&ring->desc[(slot + 8) & mask]);
  
        if (PREDICT_TRUE (n_left >= 4))
         vlib_prefetch_buffer_header (vlib_get_buffer (vm, buffers[3]), LOAD);
  
        if (PREDICT_TRUE (n_left >= 4))
         vlib_prefetch_buffer_header (vlib_get_buffer (vm, buffers[3]), LOAD);
@@ -375,29 +361,276 @@ retry:
      }
  no_free_slots:
  
      }
  no_free_slots:
  
-  CLIB_MEMORY_STORE_BARRIER ();
-  ring->head = slot;
+  __atomic_store_n (&ring->head, slot, __ATOMIC_RELEASE);
  
    if (n_left && n_retries--)
      goto retry;
  
  
    if (n_left && n_retries--)
      goto retry;
  
-  clib_spinlock_unlock_if_init (&mif->lockp);
+  return n_left;
+}
  
  
-  if (n_left)
+CLIB_MARCH_FN (memif_tx_dma_completion_cb, void, vlib_main_t *vm,
+              vlib_dma_batch_t *b)
+{
+  memif_main_t *mm = &memif_main;
+  memif_if_t *mif = vec_elt_at_index (mm->interfaces, b->cookie >> 16);
+  memif_queue_t *mq = vec_elt_at_index (mif->tx_queues, b->cookie & 0xffff);
+  memif_dma_info_t *dma_info = mq->dma_info + mq->dma_info_head;
+  memif_per_thread_data_t *ptd = &dma_info->data;
+
+  vlib_buffer_free (vm, ptd->buffers, vec_len (ptd->buffers));
+
+  dma_info->finished = 1;
+  vec_reset_length (ptd->buffers);
+  vec_reset_length (ptd->copy_ops);
+
+  __atomic_store_n (&mq->ring->tail, dma_info->dma_tail, __ATOMIC_RELEASE);
+
+  mq->dma_info_head++;
+  if (mq->dma_info_head == mq->dma_info_size)
+    mq->dma_info_head = 0;
+  mq->dma_info_full = 0;
+}
+
+#ifndef CLIB_MARCH_VARIANT
+void
+memif_tx_dma_completion_cb (vlib_main_t *vm, vlib_dma_batch_t *b)
+{
+  return CLIB_MARCH_FN_SELECT (memif_tx_dma_completion_cb) (vm, b);
+}
+#endif
+
+static_always_inline uword
+memif_interface_tx_dma_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+                              u32 *buffers, memif_if_t *mif,
+                              memif_ring_type_t type, memif_queue_t *mq,
+                              u32 n_left)
+{
+  memif_ring_t *ring;
+  u32 n_copy_op;
+  u16 ring_size, mask, slot, free_slots;
+  int n_retries = 5, fallback = 0;
+  vlib_buffer_t *b0, *b1, *b2, *b3;
+  memif_copy_op_t *co;
+  memif_region_index_t last_region = ~0;
+  void *last_region_shm = 0;
+  u16 head, tail;
+  memif_dma_info_t *dma_info;
+  memif_per_thread_data_t *ptd;
+  memif_main_t *mm = &memif_main;
+  u16 mif_id = mif - mm->interfaces;
+
+  ring = mq->ring;
+  ring_size = 1 << mq->log2_ring_size;
+  mask = ring_size - 1;
+
+  dma_info = mq->dma_info + mq->dma_info_tail;
+  ptd = &dma_info->data;
+
+  /* do software fallback if dma info ring is full */
+  u16 dma_mask = mq->dma_info_size - 1;
+  if ((((mq->dma_info_tail + 1) & dma_mask) == mq->dma_info_head) ||
+      ((mq->dma_info_head == dma_mask) && (mq->dma_info_tail == 0)))
      {
      {
-      vlib_error_count (vm, node->node_index, MEMIF_TX_ERROR_NO_FREE_SLOTS,
-                       n_left);
-      vlib_buffer_free (vm, buffers, n_left);
+      if (!mq->dma_info_full)
+       mq->dma_info_full = 1;
+      else
+       fallback = 1;
      }
  
      }
  
-  if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0 && mq->int_fd > -1)
+  vlib_dma_batch_t *b = NULL;
+  if (PREDICT_TRUE (!fallback))
+    b = vlib_dma_batch_new (vm, mif->dma_tx_config);
+  if (!b)
+    return n_left;
+
+retry:
+
+  slot = tail = mq->dma_tail;
+  head = __atomic_load_n (&ring->head, __ATOMIC_ACQUIRE);
+  mq->last_tail += tail - mq->last_tail;
+  free_slots = head - mq->dma_tail;
+
+  while (n_left && free_slots)
      {
      {
-      u64 b = 1;
-      CLIB_UNUSED (int r) = write (mq->int_fd, &b, sizeof (b));
-      mq->int_count++;
+      memif_desc_t *d0;
+      void *mb0;
+      i32 src_off;
+      u32 bi0, dst_off, src_left, dst_left, bytes_to_copy;
+      u32 saved_ptd_copy_ops_len = _vec_len (ptd->copy_ops);
+      u32 saved_ptd_buffers_len = _vec_len (ptd->buffers);
+      u16 saved_slot = slot;
+
+      clib_prefetch_load (&ring->desc[(slot + 8) & mask]);
+
+      d0 = &ring->desc[slot & mask];
+      if (PREDICT_FALSE (last_region != d0->region))
+       {
+         last_region_shm = mif->regions[d0->region].shm;
+         last_region = d0->region;
+       }
+      mb0 = last_region_shm + d0->offset;
+
+      dst_off = 0;
+
+      /* slave is the producer, so it should be able to reset buffer length */
+      dst_left = d0->length;
+
+      if (PREDICT_TRUE (n_left >= 4))
+       vlib_prefetch_buffer_header (vlib_get_buffer (vm, buffers[3]), LOAD);
+      bi0 = buffers[0];
+
+    next_in_chain:
+
+      b0 = vlib_get_buffer (vm, bi0);
+      src_off = b0->current_data;
+      src_left = b0->current_length;
+
+      while (src_left)
+       {
+         if (PREDICT_FALSE (dst_left == 0))
+           {
+             if (free_slots)
+               {
+                 d0->length = dst_off;
+                 d0->flags = MEMIF_DESC_FLAG_NEXT;
+                 d0 = &ring->desc[slot & mask];
+                 dst_off = 0;
+                 dst_left = (type == MEMIF_RING_S2M) ? mif->run.buffer_size :
+                                                             d0->length;
+
+                 if (PREDICT_FALSE (last_region != d0->region))
+                   {
+                     last_region_shm = mif->regions[d0->region].shm;
+                     last_region = d0->region;
+                   }
+                 mb0 = last_region_shm + d0->offset;
+               }
+             else
+               {
+                 /* we need to rollback vectors before bailing out */
+                 vec_set_len (ptd->buffers, saved_ptd_buffers_len);
+                 vec_set_len (ptd->copy_ops, saved_ptd_copy_ops_len);
+                 vlib_error_count (vm, node->node_index,
+                                   MEMIF_TX_ERROR_ROLLBACK, 1);
+                 slot = saved_slot;
+                 goto no_free_slots;
+               }
+           }
+         bytes_to_copy = clib_min (src_left, dst_left);
+         memif_add_copy_op (ptd, mb0 + dst_off, bytes_to_copy, src_off,
+                            vec_len (ptd->buffers));
+         src_off += bytes_to_copy;
+         dst_off += bytes_to_copy;
+         src_left -= bytes_to_copy;
+         dst_left -= bytes_to_copy;
+       }
+
+      if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_NEXT_PRESENT))
+       {
+         slot++;
+         free_slots--;
+         bi0 = b0->next_buffer;
+         goto next_in_chain;
+       }
+
+      vec_add1_aligned (ptd->buffers, buffers[0], CLIB_CACHE_LINE_BYTES);
+      d0->length = dst_off;
+      d0->flags = 0;
+
+      free_slots -= 1;
+      slot += 1;
+
+      buffers++;
+      n_left--;
+    }
+no_free_slots:
+
+  /* copy data */
+  n_copy_op = vec_len (ptd->copy_ops);
+  co = ptd->copy_ops;
+  while (n_copy_op >= 8)
+    {
+      clib_prefetch_load (co[4].data);
+      clib_prefetch_load (co[5].data);
+      clib_prefetch_load (co[6].data);
+      clib_prefetch_load (co[7].data);
+
+      b0 = vlib_get_buffer (vm, ptd->buffers[co[0].buffer_vec_index]);
+      b1 = vlib_get_buffer (vm, ptd->buffers[co[1].buffer_vec_index]);
+      b2 = vlib_get_buffer (vm, ptd->buffers[co[2].buffer_vec_index]);
+      b3 = vlib_get_buffer (vm, ptd->buffers[co[3].buffer_vec_index]);
+
+      if (PREDICT_TRUE (!fallback))
+       {
+         vlib_dma_batch_add (vm, b, co[0].data,
+                             b0->data + co[0].buffer_offset, co[0].data_len);
+         vlib_dma_batch_add (vm, b, co[1].data,
+                             b1->data + co[1].buffer_offset, co[1].data_len);
+         vlib_dma_batch_add (vm, b, co[2].data,
+                             b2->data + co[2].buffer_offset, co[2].data_len);
+         vlib_dma_batch_add (vm, b, co[3].data,
+                             b3->data + co[3].buffer_offset, co[3].data_len);
+       }
+      else
+       {
+         clib_memcpy_fast (co[0].data, b0->data + co[0].buffer_offset,
+                           co[0].data_len);
+         clib_memcpy_fast (co[1].data, b1->data + co[1].buffer_offset,
+                           co[1].data_len);
+         clib_memcpy_fast (co[2].data, b2->data + co[2].buffer_offset,
+                           co[2].data_len);
+         clib_memcpy_fast (co[3].data, b3->data + co[3].buffer_offset,
+                           co[3].data_len);
+       }
+
+      co += 4;
+      n_copy_op -= 4;
+    }
+  while (n_copy_op)
+    {
+      b0 = vlib_get_buffer (vm, ptd->buffers[co[0].buffer_vec_index]);
+      if (PREDICT_TRUE (!fallback))
+       vlib_dma_batch_add (vm, b, co[0].data, b0->data + co[0].buffer_offset,
+                           co[0].data_len);
+      else
+       clib_memcpy_fast (co[0].data, b0->data + co[0].buffer_offset,
+                         co[0].data_len);
+      co += 1;
+      n_copy_op -= 1;
+    }
+
+  /* save dma info before retry */
+  dma_info->dma_tail = slot;
+  mq->dma_tail = slot;
+  vec_reset_length (ptd->copy_ops);
+
+  if (n_left && n_retries--)
+    goto retry;
+
+  if (PREDICT_TRUE (!fallback))
+    {
+      vlib_dma_batch_set_cookie (vm, b,
+                                ((u64) mif_id << 16) | (mq - mif->tx_queues));
+      vlib_dma_batch_submit (vm, b);
+      dma_info->finished = 0;
+
+      if (b->n_enq)
+       {
+         mq->dma_info_tail++;
+         if (mq->dma_info_tail == mq->dma_info_size)
+           mq->dma_info_tail = 0;
+       }
+    }
+  else if (fallback && dma_info->finished)
+    {
+      /* if dma has been completed, update ring immediately */
+      vlib_buffer_free (vm, ptd->buffers, vec_len (ptd->buffers));
+      vec_reset_length (ptd->buffers);
+      __atomic_store_n (&mq->ring->tail, slot, __ATOMIC_RELEASE);
      }
  
      }
  
-  return frame->n_vectors;
+  return n_left;
  }
  
  VNET_DEVICE_CLASS_TX_FN (memif_device_class) (vlib_main_t * vm,
  }
  
  VNET_DEVICE_CLASS_TX_FN (memif_device_class) (vlib_main_t * vm,
@@ -407,29 +640,63 @@ VNET_DEVICE_CLASS_TX_FN (memif_device_class) (vlib_main_t * vm,
    memif_main_t *nm = &memif_main;
    vnet_interface_output_runtime_t *rund = (void *) node->runtime_data;
    memif_if_t *mif = pool_elt_at_index (nm->interfaces, rund->dev_instance);
    memif_main_t *nm = &memif_main;
    vnet_interface_output_runtime_t *rund = (void *) node->runtime_data;
    memif_if_t *mif = pool_elt_at_index (nm->interfaces, rund->dev_instance);
+  vnet_hw_if_tx_frame_t *tf = vlib_frame_scalar_args (frame);
    memif_queue_t *mq;
    memif_queue_t *mq;
-  u32 thread_index = vm->thread_index;
+  u32 qid = tf->queue_id;
+  u32 *from, thread_index = vm->thread_index;
    memif_per_thread_data_t *ptd = vec_elt_at_index (memif_main.per_thread_data,
                                                    thread_index);
    memif_per_thread_data_t *ptd = vec_elt_at_index (memif_main.per_thread_data,
                                                    thread_index);
-  u8 tx_queues = vec_len (mif->tx_queues);
+  uword n_left;
  
  
-  if (tx_queues < vlib_get_n_threads ())
-    {
-      ASSERT (tx_queues > 0);
-      mq = vec_elt_at_index (mif->tx_queues, thread_index % tx_queues);
-      clib_spinlock_lock_if_init (&mif->lockp);
-    }
-  else
-    mq = vec_elt_at_index (mif->tx_queues, thread_index);
+  ASSERT (vec_len (mif->tx_queues) > qid);
+  mq = vec_elt_at_index (mif->tx_queues, qid);
  
  
+  if (tf->shared_queue)
+    clib_spinlock_lock (&mq->lockp);
+
+  from = vlib_frame_vector_args (frame);
+  n_left = frame->n_vectors;
    if (mif->flags & MEMIF_IF_FLAG_ZERO_COPY)
    if (mif->flags & MEMIF_IF_FLAG_ZERO_COPY)
-    return memif_interface_tx_zc_inline (vm, node, frame, mif, mq, ptd);
+    n_left =
+      memif_interface_tx_zc_inline (vm, node, from, mif, mq, ptd, n_left);
    else if (mif->flags & MEMIF_IF_FLAG_IS_SLAVE)
    else if (mif->flags & MEMIF_IF_FLAG_IS_SLAVE)
-    return memif_interface_tx_inline (vm, node, frame, mif, MEMIF_RING_S2M,
-                                     mq, ptd);
+    n_left = memif_interface_tx_inline (vm, node, from, mif, MEMIF_RING_S2M,
+                                       mq, ptd, n_left);
    else
    else
-    return memif_interface_tx_inline (vm, node, frame, mif, MEMIF_RING_M2S,
-                                     mq, ptd);
+    {
+      if ((mif->flags & MEMIF_IF_FLAG_USE_DMA) && (mif->dma_tx_config >= 0))
+       n_left = memif_interface_tx_dma_inline (vm, node, from, mif,
+                                               MEMIF_RING_M2S, mq, n_left);
+      else
+       n_left = memif_interface_tx_inline (vm, node, from, mif,
+                                           MEMIF_RING_M2S, mq, ptd, n_left);
+    }
+
+  if (tf->shared_queue)
+    clib_spinlock_unlock (&mq->lockp);
+
+  if (n_left)
+    vlib_error_count (vm, node->node_index, MEMIF_TX_ERROR_NO_FREE_SLOTS,
+                     n_left);
+
+  if ((mq->ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0 && mq->int_fd > -1)
+    {
+      u64 b = 1;
+      int __clib_unused r = write (mq->int_fd, &b, sizeof (b));
+      mq->int_count++;
+    }
+
+  if ((mif->flags & MEMIF_IF_FLAG_USE_DMA) && (mif->dma_tx_config >= 0))
+    {
+      if (n_left)
+       vlib_buffer_free (vm, from + frame->n_vectors - n_left, n_left);
+    }
+  else if ((mif->flags & MEMIF_IF_FLAG_ZERO_COPY) == 0)
+    vlib_buffer_free (vm, from, frame->n_vectors);
+  else if (n_left)
+    vlib_buffer_free (vm, from + frame->n_vectors - n_left, n_left);
+
+  return frame->n_vectors - n_left;
  }
  
  static void
  }
  
  static void
@@ -474,15 +741,6 @@ memif_interface_rx_mode_change (vnet_main_t * vnm, u32 hw_if_index, u32 qid,
    return 0;
  }
  
    return 0;
  }
  
-static clib_error_t *
-memif_subif_add_del_function (vnet_main_t * vnm,
-                             u32 hw_if_index,
-                             struct vnet_sw_interface_t *st, int is_add)
-{
-  /* Nothing for now */
-  return 0;
-}
-
  /* *INDENT-OFF* */
  VNET_DEVICE_CLASS (memif_device_class) = {
    .name = "memif",
  /* *INDENT-OFF* */
  VNET_DEVICE_CLASS (memif_device_class) = {
    .name = "memif",
@@ -494,7 +752,6 @@ VNET_DEVICE_CLASS (memif_device_class) = {
    .rx_redirect_to_node = memif_set_interface_next_node,
    .clear_counters = memif_clear_hw_interface_counters,
    .admin_up_down_function = memif_interface_admin_up_down,
    .rx_redirect_to_node = memif_set_interface_next_node,
    .clear_counters = memif_clear_hw_interface_counters,
    .admin_up_down_function = memif_interface_admin_up_down,
-  .subif_add_del_function = memif_subif_add_del_function,
    .rx_mode_change_function = memif_interface_rx_mode_change,
  };
  
    .rx_mode_change_function = memif_interface_rx_mode_change,
  };