TAP: Jumbo-frame support. 57/457/3
authorOle Troan <ot@cisco.com>
Wed, 2 Mar 2016 09:01:43 +0000 (10:01 +0100)
committerGerrit Code Review <gerrit@fd.io>
Wed, 2 Mar 2016 22:26:41 +0000 (22:26 +0000)
Change-Id: I3a0726d7645f775738253d0a47ee04d94d138c9a
Signed-off-by: Ole Troan <ot@cisco.com>
vlib/vlib/buffer.c
vlib/vlib/buffer_funcs.h
vlib/vlib/dpdk_buffer.c
vnet/vnet/unix/tapcli.c

index 332b430..c28a0c5 100644 (file)
@@ -1248,6 +1248,14 @@ vlib_buffer_chain_append_data_with_alloc(vlib_main_t *vm,
   return copied;
 }
 
+/*
+ * Fills in the required rte_mbuf fields for chained buffers given a VLIB chain.
+ */
+void vlib_buffer_chain_validate (vlib_main_t * vm, vlib_buffer_t * b_first)
+{
+  return;
+}
+
 static void vlib_serialize_tx (serialize_main_header_t * m, serialize_stream_t * s)
 {
   vlib_main_t * vm;
@@ -1460,4 +1468,3 @@ VLIB_CLI_COMMAND (show_buffers_command, static) = {
   .short_help = "Show packet buffer allocation",
   .function = show_buffers,
 };
-
index 2ea506a..eea417a 100644 (file)
@@ -506,6 +506,7 @@ vlib_buffer_chain_append_data_with_alloc(vlib_main_t *vm,
                              vlib_buffer_t *first,
                              vlib_buffer_t **last,
                              void * data, u16 data_len);
+void vlib_buffer_chain_validate(vlib_main_t *vm, vlib_buffer_t *first);
 
 format_function_t format_vlib_buffer, format_vlib_buffer_and_data, format_vlib_buffer_contents;
 
index 145720d..04a6447 100644 (file)
@@ -910,6 +910,25 @@ vlib_buffer_chain_append_data_with_alloc(vlib_main_t *vm,
   return copied;
 }
 
+/*
+ * Fills in the required rte_mbuf fields for chained buffers given a VLIB chain.
+ */
+void vlib_buffer_chain_validate (vlib_main_t * vm, vlib_buffer_t * b_first)
+{
+  vlib_buffer_t *b = b_first, *prev = b_first;
+  struct rte_mbuf *mb_first = ((struct rte_mbuf *) b) - 1;
+
+  mb_first->pkt_len = mb_first-> data_len = b_first->current_length;
+  while (b->flags & VLIB_BUFFER_NEXT_PRESENT) {
+      b = vlib_get_buffer(vm, b->next_buffer);
+      mb_first->nb_segs++;
+      mb_first->pkt_len += b->current_length;
+      (((struct rte_mbuf *) prev) - 1)->next = (((struct rte_mbuf *) b) - 1);
+      (((struct rte_mbuf *) b) - 1)->data_len = b->current_length;
+      prev = b;
+  }
+}
+
 clib_error_t *
 vlib_buffer_pool_create(vlib_main_t * vm, unsigned num_mbufs,
                         unsigned mbuf_size, unsigned socket_id)
index 44af321..4421ffb 100644 (file)
@@ -139,7 +139,7 @@ tapcli_tx (vlib_main_t * vm,
       /* Use the sup intfc to finesse vlan subifs */
       hw = vnet_get_sup_hw_interface (tm->vnet_main, tx_sw_if_index);
       tx_sw_if_index = hw->sw_if_index;
-          
+
       p = hash_get (tm->tapcli_interface_index_by_sw_if_index, 
                     tx_sw_if_index);
       if (p == 0)
@@ -177,8 +177,7 @@ tapcli_tx (vlib_main_t * vm,
        clib_unix_warning ("writev");
     }
     
-  /* interface output path flattens buffer chains */
-  vlib_buffer_free_no_next (vm, buffers, n_packets);
+  vlib_buffer_free(vm, vlib_frame_vector_args(frame), frame->n_vectors);
     
   return n_packets;
 }
@@ -204,19 +203,14 @@ tapcli_rx (vlib_main_t * vm,
           vlib_frame_t * frame)
 {
   tapcli_main_t * tm = &tapcli_main;
-  vlib_buffer_t * b;
+  vlib_buffer_t *b_first;
   u32 bi;
-#if DPDK == 0
+  vlib_buffer_free_list_t *fl;
   const uword buffer_size = VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES;
-  u32 free_list_index = VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX;
-#else
-  dpdk_main_t * dm = &dpdk_main;
-  const uword buffer_size = MBUF_SIZE;
-  u32 free_list_index = dm->vlib_buffer_free_list_index;
-#endif
   static u32 * ready_interface_indices;
   tapcli_interface_t * ti;
   int i;
+  word n_bytes_in_packet;
 
   vec_reset_length (ready_interface_indices);
 
@@ -228,6 +222,8 @@ tapcli_rx (vlib_main_t * vm,
   if (vec_len (ready_interface_indices) == 0)
     return 1;
 
+  fl = vlib_buffer_get_free_list(vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
+
   for (i = 0; i < vec_len(ready_interface_indices); i++)
     {
       /* Clear the "interrupt" bit */
@@ -241,28 +237,20 @@ tapcli_rx (vlib_main_t * vm,
       {
         uword n_left = vec_len (tm->rx_buffers);
         uword n_alloc;
-
-        if (n_left < VLIB_FRAME_SIZE / 2)
-          {
-            if (! tm->rx_buffers)
-              vec_alloc (tm->rx_buffers, VLIB_FRAME_SIZE);
-
-            n_alloc = vlib_buffer_alloc_from_free_list 
-              (vm, tm->rx_buffers + n_left, VLIB_FRAME_SIZE - n_left, 
-               free_list_index);
-            _vec_len (tm->rx_buffers) = n_left + n_alloc;
-          }
+        if (n_left < VLIB_FRAME_SIZE / 2) {
+         vec_validate(tm->rx_buffers, VLIB_FRAME_SIZE + n_left - 1);
+         n_alloc = vlib_buffer_alloc(vm, &tm->rx_buffers[n_left], VLIB_FRAME_SIZE);
+         n_left += n_alloc;
+         _vec_len (tm->rx_buffers) = n_left;
+       }
       }
 
       /* Allocate RX buffers from end of rx_buffers.
          Turn them into iovecs to pass to readv. */
       {
         uword i_rx = vec_len (tm->rx_buffers) - 1;
-        vlib_buffer_t * b;
-        word j, n_bytes_left, n_bytes_in_packet;
-#if DPDK == 1
-        u8 out_of_dpdk_buffers = 0;
-#endif
+        vlib_buffer_t * b, *prev = 0;
+        word j, n_bytes_left;
 
         /* We need enough buffers left for an MTU sized packet. */
         if (PREDICT_FALSE(vec_len (tm->rx_buffers) < tm->mtu_buffers))
@@ -277,15 +265,11 @@ tapcli_rx (vlib_main_t * vm,
         for (j = 0; j < tm->mtu_buffers; j++)
           {
             b = vlib_get_buffer (vm, tm->rx_buffers[i_rx - j]);
+           vlib_buffer_init_for_free_list (b, fl);
             tm->iovecs[j].iov_base = b->data;
             tm->iovecs[j].iov_len = buffer_size;
           }
 
-#if DPDK == 1
-        if (PREDICT_FALSE(out_of_dpdk_buffers == 1))
-          continue;
-#endif
-
         n_bytes_left = readv (ti->unix_fd, tm->iovecs, tm->mtu_buffers);
         n_bytes_in_packet = n_bytes_left;
         if (n_bytes_left <= 0)
@@ -296,36 +280,29 @@ tapcli_rx (vlib_main_t * vm,
           }
         
         bi = tm->rx_buffers[i_rx];
-        while (1)
-          {
-            b = vlib_get_buffer (vm, tm->rx_buffers[i_rx]);
+       b = b_first = vlib_get_buffer (vm, tm->rx_buffers[i_rx]);
 
-            b->flags = 0;
-            b->current_data = 0;
-            b->current_length = n_bytes_left < buffer_size 
-              ? n_bytes_left : buffer_size;
+        while (1) {
+         u32 bi;
 
-            n_bytes_left -= buffer_size;
+         vlib_buffer_init_for_free_list(b, fl);
 
-       if (n_bytes_left <= 0)
-          {
-#if DPDK == 1
-              struct rte_mbuf *mb = (struct rte_mbuf *)(b - 1);
-              rte_pktmbuf_data_len (mb) = n_bytes_in_packet;
-              rte_pktmbuf_pkt_len (mb) = n_bytes_in_packet;
-#endif
-            break;
-          }
-        
-            i_rx--;
-            b->flags |= VLIB_BUFFER_NEXT_PRESENT;
-            b->next_buffer = tm->rx_buffers[i_rx];
-#if DPDK == 1
-            ASSERT(0); /* $$$$ fixme */
-            /* ((struct rte_pktmbuf *)(b->mb))->next = 
-               vlib_get_buffer (vm, tm->rx_buffers[i_rx])->mb; */
-#endif
-          }
+         b->current_length = n_bytes_left < buffer_size ? n_bytes_left : buffer_size;
+         n_bytes_left -= buffer_size;
+
+         if (prev) {
+           prev->next_buffer = bi;
+           prev->flags |= VLIB_BUFFER_NEXT_PRESENT;
+         }
+         prev = b;
+
+         /* last segment */
+         if (n_bytes_left <= 0) break;
+
+         i_rx--;
+         bi = tm->rx_buffers[i_rx];
+         b = vlib_get_buffer (vm, bi);
+       }
 
         /* Interface counters for tapcli interface. */
         vlib_increment_combined_counter 
@@ -338,23 +315,27 @@ tapcli_rx (vlib_main_t * vm,
         _vec_len (tm->rx_buffers) = i_rx;
       }
 
-      b = vlib_get_buffer (vm, bi);
+      b_first->total_length_not_including_first_buffer = (n_bytes_in_packet > buffer_size) ? n_bytes_in_packet - buffer_size : 0;
+      b_first->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
+
+      /* Ensure mbufs are updated */
+      vlib_buffer_chain_validate(vm, b_first);
 
       /*
        * Turn this on if you run into
        * "bad monkey" contexts, and you want to know exactly
        * which nodes they've visited... See .../vlib/vlib/buffer.h
        */
-      VLIB_BUFFER_TRACE_TRAJECTORY_INIT(b);
+      VLIB_BUFFER_TRACE_TRAJECTORY_INIT(b_first);
 
       {
         u32 next_index;
         uword n_trace = vlib_get_trace_count (vm, node);
 
-        vnet_buffer (b)->sw_if_index[VLIB_RX] = ti->sw_if_index;
-        vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32)~0;
+        vnet_buffer (b_first)->sw_if_index[VLIB_RX] = ti->sw_if_index;
+        vnet_buffer (b_first)->sw_if_index[VLIB_TX] = (u32)~0;
 
-        b->error = node->errors[0];
+        b_first->error = node->errors[0];
 
         {
           next_index = TAPCLI_RX_NEXT_ETHERNET_INPUT;
@@ -370,18 +351,17 @@ tapcli_rx (vlib_main_t * vm,
             next_index = TAPCLI_RX_NEXT_DROP;
         }
 
-
         vlib_set_next_frame_buffer (vm, node, next_index, bi);
         
         if (n_trace > 0)
           {
             vlib_trace_buffer (vm, node, next_index,
-                               b, /* follow_chain */ 1);
+                               b_first, /* follow_chain */ 1);
             vlib_set_trace_count (vm, node, n_trace - 1);
           }
       }
     }
-  
+
   return 1;
 }
 
@@ -433,11 +413,7 @@ static clib_error_t *
 tapcli_config (vlib_main_t * vm, unformat_input_t * input)
 {
   tapcli_main_t *tm = &tapcli_main;
-#if DPDK == 0
   const uword buffer_size = VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES;
-#else
-  const uword buffer_size = MBUF_SIZE;
-#endif
 
   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
     {
@@ -604,6 +580,7 @@ VNET_DEVICE_CLASS (tapcli_dev_class,static) = {
   .rx_redirect_to_node = tapcli_set_interface_next_node,
   .name_renumber = tap_name_renumber,
   .admin_up_down_function = tapcli_interface_admin_up_down,
+  .no_flatten_output_chains = 1,
 };
 
 int vnet_tap_dump_ifs (tapcli_interface_details_t **out_tapids)
@@ -790,6 +767,7 @@ int vnet_tap_connect (vlib_main_t * vm, u8 * intfc_name, u8 *hwaddr_arg,
   {
     vnet_hw_interface_t * hw;
     hw = vnet_get_hw_interface (tm->vnet_main, ti->hw_if_index);
+    hw->max_l3_packet_bytes[VLIB_RX] = hw->max_l3_packet_bytes[VLIB_TX] = tm->mtu_bytes - sizeof(ethernet_header_t);
     ti->sw_if_index = hw->sw_if_index;
     if (sw_if_indexp)
       *sw_if_indexp = hw->sw_if_index;
@@ -1004,7 +982,7 @@ tap_connect_command_fn (vlib_main_t * vm,
   clib_error_t * error;
   int user_hwaddr = 0;
   u8 hwaddr[6];
-    
+
   if (tm->is_disabled)
     {
       return clib_error_return (0, "device disabled...");
@@ -1150,6 +1128,7 @@ tap_connect_command_fn (vlib_main_t * vm,
     vnet_hw_interface_t * hw;
     hw = vnet_get_hw_interface (tm->vnet_main, ti->hw_if_index);
     ti->sw_if_index = hw->sw_if_index;
+    hw->max_l3_packet_bytes[VLIB_RX] = hw->max_l3_packet_bytes[VLIB_TX] = tm->mtu_bytes - sizeof(ethernet_header_t);
   }
 
   ti->active = 1;
@@ -1196,5 +1175,3 @@ tapcli_init (vlib_main_t * vm)
 }
 
 VLIB_INIT_FUNCTION (tapcli_init);
-
-