devices: add support for af-packet v2
[vpp.git] / src / vnet / devices / af_packet / device.c
index 1d14c9b..2e9b7a4 100644 (file)
@@ -61,7 +61,12 @@ typedef struct
   u32 buffer_index;
   u32 hw_if_index;
   u16 queue_id;
-  tpacket3_hdr_t tph;
+  u8 is_v2;
+  union
+  {
+    tpacket2_hdr_t tph2;
+    tpacket3_hdr_t tph3;
+  };
   vnet_virtio_net_hdr_t vnet_hdr;
   vlib_buffer_t buffer;
 } af_packet_tx_trace_t;
@@ -91,7 +96,8 @@ format_af_packet_device (u8 * s, va_list * args)
   af_packet_queue_t *rx_queue = 0;
   af_packet_queue_t *tx_queue = 0;
 
-  s = format (s, "Linux PACKET socket interface");
+  s = format (s, "Linux PACKET socket interface %s",
+             (apif->version == TPACKET_V2) ? "v2" : "v3");
   s = format (s, "\n%UFEATURES:", format_white_space, indent);
   if (apif->is_qdisc_bypass_enabled)
     s = format (s, "\n%Uqdisc-bpass-enabled", format_white_space, indent + 2);
@@ -102,17 +108,20 @@ format_af_packet_device (u8 * s, va_list * args)
 
   vec_foreach (rx_queue, apif->rx_queues)
     {
-      u32 rx_block_size = rx_queue->rx_req->tp_block_size;
-      u32 rx_frame_size = rx_queue->rx_req->tp_frame_size;
-      u32 rx_frame_nr = rx_queue->rx_req->tp_frame_nr;
-      u32 rx_block_nr = rx_queue->rx_req->tp_block_nr;
+      u32 rx_block_size = rx_queue->rx_req->req.tp_block_size;
+      u32 rx_frame_size = rx_queue->rx_req->req.tp_frame_size;
+      u32 rx_frame_nr = rx_queue->rx_req->req.tp_frame_nr;
+      u32 rx_block_nr = rx_queue->rx_req->req.tp_block_nr;
 
       s = format (s, "\n%URX Queue %u:", format_white_space, indent,
                  rx_queue->queue_id);
       s = format (s, "\n%Ublock size:%d nr:%d  frame size:%d nr:%d",
                  format_white_space, indent + 2, rx_block_size, rx_block_nr,
                  rx_frame_size, rx_frame_nr);
-      s = format (s, " next block:%d", rx_queue->next_rx_block);
+      if (apif->version == TPACKET_V2)
+       s = format (s, " next frame:%d", rx_queue->next_rx_frame);
+      else
+       s = format (s, " next block:%d", rx_queue->next_rx_block);
       if (rx_queue->is_rx_pending)
        {
          s = format (
@@ -125,15 +134,16 @@ format_af_packet_device (u8 * s, va_list * args)
   vec_foreach (tx_queue, apif->tx_queues)
     {
       clib_spinlock_lock (&tx_queue->lockp);
-      u32 tx_block_sz = tx_queue->tx_req->tp_block_size;
-      u32 tx_frame_sz = tx_queue->tx_req->tp_frame_size;
-      u32 tx_frame_nr = tx_queue->tx_req->tp_frame_nr;
-      u32 tx_block_nr = tx_queue->tx_req->tp_block_nr;
+      u32 tx_block_sz = tx_queue->tx_req->req.tp_block_size;
+      u32 tx_frame_sz = tx_queue->tx_req->req.tp_frame_size;
+      u32 tx_frame_nr = tx_queue->tx_req->req.tp_frame_nr;
+      u32 tx_block_nr = tx_queue->tx_req->req.tp_block_nr;
       int block = 0;
       int n_send_req = 0, n_avail = 0, n_sending = 0, n_tot = 0, n_wrong = 0;
       u8 *tx_block_start = tx_queue->tx_ring[block];
       u32 tx_frame = tx_queue->next_tx_frame;
-      tpacket3_hdr_t *tph;
+      tpacket3_hdr_t *tph3;
+      tpacket2_hdr_t *tph2;
 
       s = format (s, "\n%UTX Queue %u:", format_white_space, indent,
                  tx_queue->queue_id);
@@ -141,22 +151,40 @@ format_af_packet_device (u8 * s, va_list * args)
                  format_white_space, indent + 2, tx_block_sz, tx_block_nr,
                  tx_frame_sz, tx_frame_nr);
       s = format (s, " next frame:%d", tx_queue->next_tx_frame);
-
-      do
-       {
-         tph = (tpacket3_hdr_t *) (tx_block_start + tx_frame * tx_frame_sz);
-         tx_frame = (tx_frame + 1) % tx_frame_nr;
-         if (tph->tp_status == 0)
-           n_avail++;
-         else if (tph->tp_status & TP_STATUS_SEND_REQUEST)
-           n_send_req++;
-         else if (tph->tp_status & TP_STATUS_SENDING)
-           n_sending++;
-         else
-           n_wrong++;
-         n_tot++;
-       }
-      while (tx_frame != tx_queue->next_tx_frame);
+      if (apif->version & TPACKET_V3)
+       do
+         {
+           tph3 =
+             (tpacket3_hdr_t *) (tx_block_start + tx_frame * tx_frame_sz);
+           tx_frame = (tx_frame + 1) % tx_frame_nr;
+           if (tph3->tp_status == 0)
+             n_avail++;
+           else if (tph3->tp_status & TP_STATUS_SEND_REQUEST)
+             n_send_req++;
+           else if (tph3->tp_status & TP_STATUS_SENDING)
+             n_sending++;
+           else
+             n_wrong++;
+           n_tot++;
+         }
+       while (tx_frame != tx_queue->next_tx_frame);
+      else
+       do
+         {
+           tph2 =
+             (tpacket2_hdr_t *) (tx_block_start + tx_frame * tx_frame_sz);
+           tx_frame = (tx_frame + 1) % tx_frame_nr;
+           if (tph2->tp_status == 0)
+             n_avail++;
+           else if (tph2->tp_status & TP_STATUS_SEND_REQUEST)
+             n_send_req++;
+           else if (tph2->tp_status & TP_STATUS_SENDING)
+             n_sending++;
+           else
+             n_wrong++;
+           n_tot++;
+         }
+       while (tx_frame != tx_queue->next_tx_frame);
       s =
        format (s, "\n%Uavailable:%d request:%d sending:%d wrong:%d total:%d",
                format_white_space, indent + 2, n_avail, n_send_req, n_sending,
@@ -177,24 +205,46 @@ format_af_packet_tx_trace (u8 *s, va_list *va)
   s = format (s, "af_packet: hw_if_index %u tx-queue %u", t->hw_if_index,
              t->queue_id);
 
-  s =
-    format (s,
-           "\n%Utpacket3_hdr:\n%Ustatus 0x%x len %u snaplen %u mac %u net %u"
-           "\n%Usec 0x%x nsec 0x%x vlan %U"
+  if (t->is_v2)
+    {
+      s = format (
+       s,
+       "\n%Utpacket2_hdr:\n%Ustatus 0x%x len %u snaplen %u mac %u net %u"
+       "\n%Usec 0x%x nsec 0x%x vlan %U"
 #ifdef TP_STATUS_VLAN_TPID_VALID
-           " vlan_tpid %u"
+       " vlan_tpid %u"
 #endif
-           ,
-           format_white_space, indent + 2, format_white_space, indent + 4,
-           t->tph.tp_status, t->tph.tp_len, t->tph.tp_snaplen, t->tph.tp_mac,
-           t->tph.tp_net, format_white_space, indent + 4, t->tph.tp_sec,
-           t->tph.tp_nsec, format_ethernet_vlan_tci, t->tph.hv1.tp_vlan_tci
+       ,
+       format_white_space, indent + 2, format_white_space, indent + 4,
+       t->tph2.tp_status, t->tph2.tp_len, t->tph2.tp_snaplen, t->tph2.tp_mac,
+       t->tph2.tp_net, format_white_space, indent + 4, t->tph2.tp_sec,
+       t->tph2.tp_nsec, format_ethernet_vlan_tci, t->tph2.tp_vlan_tci
 #ifdef TP_STATUS_VLAN_TPID_VALID
-           ,
-           t->tph.hv1.tp_vlan_tpid
+       ,
+       t->tph2.tp_vlan_tpid
 #endif
-    );
-
+      );
+    }
+  else
+    {
+      s = format (
+       s,
+       "\n%Utpacket3_hdr:\n%Ustatus 0x%x len %u snaplen %u mac %u net %u"
+       "\n%Usec 0x%x nsec 0x%x vlan %U"
+#ifdef TP_STATUS_VLAN_TPID_VALID
+       " vlan_tpid %u"
+#endif
+       ,
+       format_white_space, indent + 2, format_white_space, indent + 4,
+       t->tph3.tp_status, t->tph3.tp_len, t->tph3.tp_snaplen, t->tph3.tp_mac,
+       t->tph3.tp_net, format_white_space, indent + 4, t->tph3.tp_sec,
+       t->tph3.tp_nsec, format_ethernet_vlan_tci, t->tph3.hv1.tp_vlan_tci
+#ifdef TP_STATUS_VLAN_TPID_VALID
+       ,
+       t->tph3.hv1.tp_vlan_tpid
+#endif
+      );
+    }
   s = format (s,
              "\n%Uvnet-hdr:\n%Uflags 0x%02x gso_type 0x%02x hdr_len %u"
              "\n%Ugso_size %u csum_start %u csum_offset %u",
@@ -214,17 +264,23 @@ format_af_packet_tx_trace (u8 *s, va_list *va)
 
 static void
 af_packet_tx_trace (vlib_main_t *vm, vlib_node_runtime_t *node,
-                   vlib_buffer_t *b0, u32 bi, tpacket3_hdr_t *tph,
+                   vlib_buffer_t *b0, u32 bi, void *tph,
                    vnet_virtio_net_hdr_t *vnet_hdr, u32 hw_if_index,
-                   u16 queue_id)
+                   u16 queue_id, u8 is_v2)
 {
   af_packet_tx_trace_t *t;
   t = vlib_add_trace (vm, node, b0, sizeof (t[0]));
   t->hw_if_index = hw_if_index;
   t->queue_id = queue_id;
   t->buffer_index = bi;
+  t->is_v2 = is_v2;
 
-  clib_memcpy_fast (&t->tph, tph, sizeof (*tph));
+  if (is_v2)
+    clib_memcpy_fast (&t->tph2, (tpacket2_hdr_t *) tph,
+                     sizeof (tpacket2_hdr_t));
+  else
+    clib_memcpy_fast (&t->tph3, (tpacket3_hdr_t *) tph,
+                     sizeof (tpacket3_hdr_t));
   clib_memcpy_fast (&t->vnet_hdr, vnet_hdr, sizeof (*vnet_hdr));
   clib_memcpy_fast (&t->buffer, b0, sizeof (*b0) - sizeof (b0->pre_data));
   clib_memcpy_fast (t->buffer.pre_data, vlib_buffer_get_current (b0),
@@ -337,95 +393,177 @@ VNET_DEVICE_CLASS_TX_FN (af_packet_device_class) (vlib_main_t * vm,
   af_packet_queue_t *tx_queue = vec_elt_at_index (apif->tx_queues, queue_id);
   u32 block = 0, frame_size = 0, frame_num = 0, tx_frame = 0;
   u8 *block_start = 0;
-  tpacket3_hdr_t *tph = 0;
+  tpacket3_hdr_t *tph3 = 0;
+  tpacket2_hdr_t *tph2 = 0;
   u32 frame_not_ready = 0;
   u8 is_cksum_gso_enabled = (apif->is_cksum_gso_enabled == 1) ? 1 : 0;
+  u32 tpacket_align = 0;
+  u8 is_v2 = (apif->version == TPACKET_V2) ? 1 : 0;
 
   if (tf->shared_queue)
     clib_spinlock_lock (&tx_queue->lockp);
 
-  frame_size = tx_queue->tx_req->tp_frame_size;
-  frame_num = tx_queue->tx_req->tp_frame_nr;
+  frame_size = tx_queue->tx_req->req.tp_frame_size;
+  frame_num = tx_queue->tx_req->req.tp_frame_nr;
   block_start = tx_queue->tx_ring[block];
   tx_frame = tx_queue->next_tx_frame;
-
-  while (n_left)
+  if (is_v2)
     {
-      u32 len;
-      vnet_virtio_net_hdr_t *vnet_hdr = 0;
-      u32 offset = 0;
-      vlib_buffer_t *b0 = 0, *b0_first = 0;
-      u32 bi, bi_first;
-
-      bi = bi_first = buffers[0];
-      n_left--;
-      buffers++;
-
-      tph = (tpacket3_hdr_t *) (block_start + tx_frame * frame_size);
-      if (PREDICT_FALSE (tph->tp_status &
-                        (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING)))
+      tpacket_align = TPACKET_ALIGN (sizeof (tpacket2_hdr_t));
+      while (n_left)
        {
-         frame_not_ready++;
-         goto next;
-       }
+         u32 len;
+         vnet_virtio_net_hdr_t *vnet_hdr = 0;
+         u32 offset = 0;
+         vlib_buffer_t *b0 = 0, *b0_first = 0;
+         u32 bi, bi_first;
+
+         bi = bi_first = buffers[0];
+         n_left--;
+         buffers++;
+
+         tph2 = (tpacket2_hdr_t *) (block_start + tx_frame * frame_size);
+         if (PREDICT_FALSE (tph2->tp_status &
+                            (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING)))
+           {
+             frame_not_ready++;
+             goto nextv2;
+           }
 
-      b0_first = b0 = vlib_get_buffer (vm, bi);
+         b0_first = b0 = vlib_get_buffer (vm, bi);
 
-      if (PREDICT_TRUE (is_cksum_gso_enabled))
-       {
-         vnet_hdr =
-           (vnet_virtio_net_hdr_t *) ((u8 *) tph + TPACKET_ALIGN (sizeof (
-                                                     tpacket3_hdr_t)));
+         if (PREDICT_TRUE (is_cksum_gso_enabled))
+           {
+             vnet_hdr =
+               (vnet_virtio_net_hdr_t *) ((u8 *) tph2 + tpacket_align);
 
-         clib_memset_u8 (vnet_hdr, 0, sizeof (vnet_virtio_net_hdr_t));
-         offset = sizeof (vnet_virtio_net_hdr_t);
+             clib_memset_u8 (vnet_hdr, 0, sizeof (vnet_virtio_net_hdr_t));
+             offset = sizeof (vnet_virtio_net_hdr_t);
 
-         if (b0->flags & VNET_BUFFER_F_GSO)
-           fill_gso_offload (b0, vnet_hdr);
-         else if (b0->flags & VNET_BUFFER_F_OFFLOAD)
-           fill_cksum_offload (b0, vnet_hdr);
-       }
-
-      len = b0->current_length;
-      clib_memcpy_fast ((u8 *) tph + TPACKET_ALIGN (sizeof (tpacket3_hdr_t)) +
-                         offset,
-                       vlib_buffer_get_current (b0), len);
-      offset += len;
+             if (b0->flags & VNET_BUFFER_F_GSO)
+               fill_gso_offload (b0, vnet_hdr);
+             else if (b0->flags & VNET_BUFFER_F_OFFLOAD)
+               fill_cksum_offload (b0, vnet_hdr);
+           }
 
-      while (b0->flags & VLIB_BUFFER_NEXT_PRESENT)
-       {
-         b0 = vlib_get_buffer (vm, b0->next_buffer);
          len = b0->current_length;
-         clib_memcpy_fast ((u8 *) tph +
-                             TPACKET_ALIGN (sizeof (tpacket3_hdr_t)) + offset,
+         clib_memcpy_fast ((u8 *) tph2 + tpacket_align + offset,
                            vlib_buffer_get_current (b0), len);
          offset += len;
-       }
 
-      tph->tp_len = tph->tp_snaplen = offset;
-      tph->tp_status = TP_STATUS_SEND_REQUEST;
-      n_sent++;
+         while (b0->flags & VLIB_BUFFER_NEXT_PRESENT)
+           {
+             b0 = vlib_get_buffer (vm, b0->next_buffer);
+             len = b0->current_length;
+             clib_memcpy_fast ((u8 *) tph2 + tpacket_align + offset,
+                               vlib_buffer_get_current (b0), len);
+             offset += len;
+           }
+
+         tph2->tp_len = tph2->tp_snaplen = offset;
+         tph2->tp_status = TP_STATUS_SEND_REQUEST;
+         n_sent++;
 
-      if (PREDICT_FALSE (b0_first->flags & VLIB_BUFFER_IS_TRACED))
+         if (PREDICT_FALSE (b0_first->flags & VLIB_BUFFER_IS_TRACED))
+           {
+             if (PREDICT_TRUE (is_cksum_gso_enabled))
+               af_packet_tx_trace (vm, node, b0_first, bi_first, tph2,
+                                   vnet_hdr, apif->hw_if_index, queue_id, 1);
+             else
+               {
+                 vnet_virtio_net_hdr_t vnet_hdr2 = {};
+                 af_packet_tx_trace (vm, node, b0_first, bi_first, tph2,
+                                     &vnet_hdr2, apif->hw_if_index, queue_id,
+                                     1);
+               }
+           }
+         tx_frame = (tx_frame + 1) % frame_num;
+
+       nextv2:
+         /* check if we've exhausted the ring */
+         if (PREDICT_FALSE (frame_not_ready + n_sent == frame_num))
+           break;
+       }
+    }
+  else
+    {
+      tpacket_align = TPACKET_ALIGN (sizeof (tpacket3_hdr_t));
+
+      while (n_left)
        {
+         u32 len;
+         vnet_virtio_net_hdr_t *vnet_hdr = 0;
+         u32 offset = 0;
+         vlib_buffer_t *b0 = 0, *b0_first = 0;
+         u32 bi, bi_first;
+
+         bi = bi_first = buffers[0];
+         n_left--;
+         buffers++;
+
+         tph3 = (tpacket3_hdr_t *) (block_start + tx_frame * frame_size);
+         if (PREDICT_FALSE (tph3->tp_status &
+                            (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING)))
+           {
+             frame_not_ready++;
+             goto nextv3;
+           }
+
+         b0_first = b0 = vlib_get_buffer (vm, bi);
+
          if (PREDICT_TRUE (is_cksum_gso_enabled))
-           af_packet_tx_trace (vm, node, b0_first, bi_first, tph, vnet_hdr,
-                               apif->hw_if_index, queue_id);
-         else
            {
-             vnet_virtio_net_hdr_t vnet_hdr2 = {};
-             af_packet_tx_trace (vm, node, b0_first, bi_first, tph,
-                                 &vnet_hdr2, apif->hw_if_index, queue_id);
+             vnet_hdr =
+               (vnet_virtio_net_hdr_t *) ((u8 *) tph3 + tpacket_align);
+
+             clib_memset_u8 (vnet_hdr, 0, sizeof (vnet_virtio_net_hdr_t));
+             offset = sizeof (vnet_virtio_net_hdr_t);
+
+             if (b0->flags & VNET_BUFFER_F_GSO)
+               fill_gso_offload (b0, vnet_hdr);
+             else if (b0->flags & VNET_BUFFER_F_OFFLOAD)
+               fill_cksum_offload (b0, vnet_hdr);
            }
-       }
-      tx_frame = (tx_frame + 1) % frame_num;
 
-    next:
-      /* check if we've exhausted the ring */
-      if (PREDICT_FALSE (frame_not_ready + n_sent == frame_num))
-       break;
-    }
+         len = b0->current_length;
+         clib_memcpy_fast ((u8 *) tph3 + tpacket_align + offset,
+                           vlib_buffer_get_current (b0), len);
+         offset += len;
+
+         while (b0->flags & VLIB_BUFFER_NEXT_PRESENT)
+           {
+             b0 = vlib_get_buffer (vm, b0->next_buffer);
+             len = b0->current_length;
+             clib_memcpy_fast ((u8 *) tph3 + tpacket_align + offset,
+                               vlib_buffer_get_current (b0), len);
+             offset += len;
+           }
+
+         tph3->tp_len = tph3->tp_snaplen = offset;
+         tph3->tp_status = TP_STATUS_SEND_REQUEST;
+         n_sent++;
 
+         if (PREDICT_FALSE (b0_first->flags & VLIB_BUFFER_IS_TRACED))
+           {
+             if (PREDICT_TRUE (is_cksum_gso_enabled))
+               af_packet_tx_trace (vm, node, b0_first, bi_first, tph3,
+                                   vnet_hdr, apif->hw_if_index, queue_id, 0);
+             else
+               {
+                 vnet_virtio_net_hdr_t vnet_hdr2 = {};
+                 af_packet_tx_trace (vm, node, b0_first, bi_first, tph3,
+                                     &vnet_hdr2, apif->hw_if_index, queue_id,
+                                     0);
+               }
+           }
+         tx_frame = (tx_frame + 1) % frame_num;
+
+       nextv3:
+         /* check if we've exhausted the ring */
+         if (PREDICT_FALSE (frame_not_ready + n_sent == frame_num))
+           break;
+       }
+    }
   CLIB_MEMORY_BARRIER ();
 
   if (PREDICT_TRUE (n_sent || tx_queue->is_tx_pending))