{
vlib_main_t *vm = vlib_get_main ();
vlib_buffer_main_t *bm = vm->buffer_main;
- struct rte_mbuf *first_mb = 0, *new_mb, *pkt_mb, **prev_mb_next = 0;
- u8 nb_segs, nb_segs_left;
- u32 copy_bytes;
+ struct rte_mbuf **mbufs = 0, *s, *d;
+ u8 nb_segs;
unsigned socket_id = rte_socket_id ();
+ int i;
ASSERT (bm->pktmbuf_pools[socket_id]);
- pkt_mb = rte_mbuf_from_vlib_buffer (b);
- nb_segs = pkt_mb->nb_segs;
- for (nb_segs_left = nb_segs; nb_segs_left; nb_segs_left--)
- {
- if (PREDICT_FALSE (pkt_mb == 0))
- {
- clib_warning ("Missing %d mbuf chain segment(s): "
- "(nb_segs = %d, nb_segs_left = %d)!",
- nb_segs - nb_segs_left, nb_segs, nb_segs_left);
- if (first_mb)
- rte_pktmbuf_free (first_mb);
- return NULL;
- }
- new_mb = rte_pktmbuf_alloc (bm->pktmbuf_pools[socket_id]);
- if (PREDICT_FALSE (new_mb == 0))
- {
- if (first_mb)
- rte_pktmbuf_free (first_mb);
- return NULL;
- }
-
- /*
- * Copy packet info into 1st segment.
- */
- if (first_mb == 0)
- {
- first_mb = new_mb;
- rte_pktmbuf_pkt_len (first_mb) = pkt_mb->pkt_len;
- first_mb->nb_segs = pkt_mb->nb_segs;
- first_mb->port = pkt_mb->port;
-#ifdef DAW_FIXME // TX Offload support TBD
- first_mb->vlan_macip = pkt_mb->vlan_macip;
- first_mb->hash = pkt_mb->hash;
- first_mb->ol_flags = pkt_mb->ol_flags
-#endif
- }
- else
- {
- ASSERT (prev_mb_next != 0);
- *prev_mb_next = new_mb;
- }
-
- /*
- * Copy packet segment data into new mbuf segment.
- */
- rte_pktmbuf_data_len (new_mb) = pkt_mb->data_len;
- copy_bytes = pkt_mb->data_len + RTE_PKTMBUF_HEADROOM;
- ASSERT (copy_bytes <= pkt_mb->buf_len);
- clib_memcpy (new_mb->buf_addr, pkt_mb->buf_addr, copy_bytes);
+ s = rte_mbuf_from_vlib_buffer (b);
+ nb_segs = s->nb_segs;
+ vec_validate (mbufs, nb_segs - 1);
- prev_mb_next = &new_mb->next;
- pkt_mb = pkt_mb->next;
+ if (rte_pktmbuf_alloc_bulk (bm->pktmbuf_pools[socket_id], mbufs, nb_segs))
+ {
+ vec_free (mbufs);
+ return 0;
}
- ASSERT (pkt_mb == 0);
- __rte_mbuf_sanity_check (first_mb, 1);
+ d = mbufs[0];
+ d->nb_segs = s->nb_segs;
+ d->data_len = s->data_len;
+ d->pkt_len = s->pkt_len;
+ d->data_off = s->data_off;
+ clib_memcpy (d->buf_addr, s->buf_addr, RTE_PKTMBUF_HEADROOM + s->data_len);
- return first_mb;
-}
-
-struct rte_mbuf *
-dpdk_zerocopy_replicate_packet_mb (vlib_buffer_t * b)
-{
- vlib_main_t *vm = vlib_get_main ();
- vlib_buffer_main_t *bm = vm->buffer_main;
- struct rte_mbuf *first_mb = 0, *new_mb, *pkt_mb, **prev_mb_next = 0;
- u8 nb_segs, nb_segs_left;
- unsigned socket_id = rte_socket_id ();
-
- ASSERT (bm->pktmbuf_pools[socket_id]);
- pkt_mb = rte_mbuf_from_vlib_buffer (b);
- nb_segs = pkt_mb->nb_segs;
- for (nb_segs_left = nb_segs; nb_segs_left; nb_segs_left--)
+ for (i = 1; i < nb_segs; i++)
{
- if (PREDICT_FALSE (pkt_mb == 0))
- {
- clib_warning ("Missing %d mbuf chain segment(s): "
- "(nb_segs = %d, nb_segs_left = %d)!",
- nb_segs - nb_segs_left, nb_segs, nb_segs_left);
- if (first_mb)
- rte_pktmbuf_free (first_mb);
- return NULL;
- }
- new_mb = rte_pktmbuf_clone (pkt_mb, bm->pktmbuf_pools[socket_id]);
- if (PREDICT_FALSE (new_mb == 0))
- {
- if (first_mb)
- rte_pktmbuf_free (first_mb);
- return NULL;
- }
-
- /*
- * Copy packet info into 1st segment.
- */
- if (first_mb == 0)
- {
- first_mb = new_mb;
- rte_pktmbuf_pkt_len (first_mb) = pkt_mb->pkt_len;
- first_mb->nb_segs = pkt_mb->nb_segs;
- first_mb->port = pkt_mb->port;
-#ifdef DAW_FIXME // TX Offload support TBD
- first_mb->vlan_macip = pkt_mb->vlan_macip;
- first_mb->hash = pkt_mb->hash;
- first_mb->ol_flags = pkt_mb->ol_flags
-#endif
- }
- else
- {
- ASSERT (prev_mb_next != 0);
- *prev_mb_next = new_mb;
- }
-
- /*
- * Copy packet segment data into new mbuf segment.
- */
- rte_pktmbuf_data_len (new_mb) = pkt_mb->data_len;
-
- prev_mb_next = &new_mb->next;
- pkt_mb = pkt_mb->next;
+ d->next = mbufs[i];
+ d = mbufs[i];
+ s = s->next;
+ d->data_len = s->data_len;
+ clib_memcpy (d->buf_addr, s->buf_addr,
+ RTE_PKTMBUF_HEADROOM + s->data_len);
}
- ASSERT (pkt_mb == 0);
- __rte_mbuf_sanity_check (first_mb, 1);
-
- return first_mb;
-
-
+ d = mbufs[0];
+ vec_free (mbufs);
+ return d;
}
static void
sizeof (t0->buffer.pre_data));
}
+static_always_inline void
+dpdk_validate_rte_mbuf (vlib_main_t * vm, vlib_buffer_t * b,
+ int maybe_multiseg)
+{
+ struct rte_mbuf *mb, *first_mb, *last_mb;
+
+ /* buffer is coming from non-dpdk source so we need to init
+ rte_mbuf header */
+ if (PREDICT_FALSE ((b->flags & VNET_BUFFER_RTE_MBUF_VALID) == 0))
+ {
+ vlib_buffer_t *b2 = b;
+ last_mb = mb = rte_mbuf_from_vlib_buffer (b2);
+ rte_pktmbuf_reset (mb);
+ while (maybe_multiseg && (b2->flags & VLIB_BUFFER_NEXT_PRESENT))
+ {
+ b2 = vlib_get_buffer (vm, b2->next_buffer);
+ mb = rte_mbuf_from_vlib_buffer (b2);
+ last_mb->next = mb;
+ last_mb = mb;
+ rte_pktmbuf_reset (mb);
+ }
+ }
+
+ first_mb = mb = rte_mbuf_from_vlib_buffer (b);
+ first_mb->nb_segs = 1;
+ mb->data_len = b->current_length;
+ mb->pkt_len = maybe_multiseg ? vlib_buffer_length_in_chain (vm, b) :
+ b->current_length;
+ mb->data_off = VLIB_BUFFER_PRE_DATA_SIZE + b->current_data;
+
+ while (maybe_multiseg && (b->flags & VLIB_BUFFER_NEXT_PRESENT))
+ {
+ b = vlib_get_buffer (vm, b->next_buffer);
+ mb = rte_mbuf_from_vlib_buffer (b);
+ mb->data_len = b->current_length;
+ mb->pkt_len = b->current_length;
+ mb->data_off = VLIB_BUFFER_PRE_DATA_SIZE + b->current_data;
+ first_mb->nb_segs++;
+ }
+}
+
/*
* This function calls the dpdk's tx_burst function to transmit the packets
* on the tx_vector. It manages a lock per-device if the device does not
* on the tx_vector. If all packets are transmitted (the normal case), the
* function returns 0.
*
- * The tx_burst function may not be able to transmit all packets because the
- * dpdk ring is full. If a flowcontrol callback function has been configured
- * then the function simply returns. If no callback has been configured, the
- * function will retry calling tx_burst with the remaining packets. This will
- * continue until all packets are transmitted or tx_burst indicates no packets
- * could be transmitted. (The caller can drop the remaining packets.)
- *
* The function assumes there is at least one packet on the tx_vector.
*/
static_always_inline
* calls due to a ring wrap.
*/
ASSERT (n_packets < xd->nb_tx_desc);
+ ASSERT (ring->tx_tail == 0);
- /*
- * If there is no flowcontrol callback, there is only temporary buffering
- * on the tx_vector and so the tail should always be 0.
- */
- ASSERT (dm->flowcontrol_callback || ring->tx_tail == 0);
-
- /*
- * If there is a flowcontrol callback, don't retry any incomplete tx_bursts.
- * Apply backpressure instead. If there is no callback, keep retrying until
- * a tx_burst sends no packets. n_retry of 255 essentially means no retry
- * limit.
- */
- n_retry = dm->flowcontrol_callback ? 0 : 255;
-
+ n_retry = 16;
queue_id = vm->cpu_index;
do
queue_id = (queue_id + 1) % xd->tx_q_used;
}
- if (PREDICT_TRUE (xd->flags & DPDK_DEVICE_FLAG_HQOS)) /* HQoS ON */
+ if (PREDICT_FALSE (xd->flags & DPDK_DEVICE_FLAG_HQOS)) /* HQoS ON */
{
- if (PREDICT_TRUE (tx_head > tx_tail))
- {
- /* no wrap, transmit in one burst */
- dpdk_device_hqos_per_worker_thread_t *hqos =
- &xd->hqos_wt[vm->cpu_index];
-
- dpdk_hqos_metadata_set (hqos,
- &tx_vector[tx_tail], tx_head - tx_tail);
- rv = rte_ring_sp_enqueue_burst (hqos->swq,
- (void **) &tx_vector[tx_tail],
- (uint16_t) (tx_head - tx_tail));
- }
- else
- {
- /*
- * This can only happen if there is a flowcontrol callback.
- * We need to split the transmit into two calls: one for
- * the packets up to the wrap point, and one to continue
- * at the start of the ring.
- * Transmit pkts up to the wrap point.
- */
- dpdk_device_hqos_per_worker_thread_t *hqos =
- &xd->hqos_wt[vm->cpu_index];
-
- dpdk_hqos_metadata_set (hqos,
- &tx_vector[tx_tail],
- xd->nb_tx_desc - tx_tail);
- rv = rte_ring_sp_enqueue_burst (hqos->swq,
- (void **) &tx_vector[tx_tail],
- (uint16_t) (xd->nb_tx_desc -
- tx_tail));
- /*
- * If we transmitted everything we wanted, then allow 1 retry
- * so we can try to transmit the rest. If we didn't transmit
- * everything, stop now.
- */
- n_retry = (rv == xd->nb_tx_desc - tx_tail) ? 1 : 0;
- }
+ /* no wrap, transmit in one burst */
+ dpdk_device_hqos_per_worker_thread_t *hqos =
+ &xd->hqos_wt[vm->cpu_index];
+
+ dpdk_hqos_metadata_set (hqos,
+ &tx_vector[tx_tail], tx_head - tx_tail);
+ rv = rte_ring_sp_enqueue_burst (hqos->swq,
+ (void **) &tx_vector[tx_tail],
+ (uint16_t) (tx_head - tx_tail));
}
else if (PREDICT_TRUE (xd->flags & DPDK_DEVICE_FLAG_PMD))
{
- if (PREDICT_TRUE (tx_head > tx_tail))
- {
- /* no wrap, transmit in one burst */
- rv = rte_eth_tx_burst (xd->device_index,
- (uint16_t) queue_id,
- &tx_vector[tx_tail],
- (uint16_t) (tx_head - tx_tail));
- }
- else
- {
- /*
- * This can only happen if there is a flowcontrol callback.
- * We need to split the transmit into two calls: one for
- * the packets up to the wrap point, and one to continue
- * at the start of the ring.
- * Transmit pkts up to the wrap point.
- */
- rv = rte_eth_tx_burst (xd->device_index,
- (uint16_t) queue_id,
- &tx_vector[tx_tail],
- (uint16_t) (xd->nb_tx_desc - tx_tail));
-
- /*
- * If we transmitted everything we wanted, then allow 1 retry
- * so we can try to transmit the rest. If we didn't transmit
- * everything, stop now.
- */
- n_retry = (rv == xd->nb_tx_desc - tx_tail) ? 1 : 0;
- }
+ /* no wrap, transmit in one burst */
+ rv = rte_eth_tx_burst (xd->device_index,
+ (uint16_t) queue_id,
+ &tx_vector[tx_tail],
+ (uint16_t) (tx_head - tx_tail));
}
else
{
return n_packets;
}
+static_always_inline void
+dpdk_prefetch_buffer_by_index (vlib_main_t * vm, u32 bi)
+{
+ vlib_buffer_t *b;
+ struct rte_mbuf *mb;
+ b = vlib_get_buffer (vm, bi);
+ mb = rte_mbuf_from_vlib_buffer (b);
+ CLIB_PREFETCH (mb, CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (b, CLIB_CACHE_LINE_BYTES, LOAD);
+}
-/*
- * This function transmits any packets on the interface's tx_vector and returns
- * the number of packets untransmitted on the tx_vector. If the tx_vector is
- * empty the function simply returns 0.
- *
- * It is intended to be called by a traffic manager which has flowed-off an
- * interface to see if the interface can be flowed-on again.
- */
-u32
-dpdk_interface_tx_vector (vlib_main_t * vm, u32 dev_instance)
+static_always_inline void
+dpdk_buffer_recycle (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_buffer_t * b, u32 bi, struct rte_mbuf **mbp)
{
dpdk_main_t *dm = &dpdk_main;
- dpdk_device_t *xd;
- int queue_id;
- struct rte_mbuf **tx_vector;
- tx_ring_hdr_t *ring;
-
- /* param is dev_instance and not hw_if_index to save another lookup */
- xd = vec_elt_at_index (dm->devices, dev_instance);
+ u32 my_cpu = vm->cpu_index;
+ struct rte_mbuf *mb_new;
- queue_id = vm->cpu_index;
- tx_vector = xd->tx_vectors[queue_id];
+ if (PREDICT_FALSE (b->flags & VLIB_BUFFER_RECYCLE) == 0)
+ return;
- /* If no packets on the ring, don't bother calling tx function */
- ring = vec_header (tx_vector, sizeof (*ring));
- if (ring->tx_head == ring->tx_tail)
+ mb_new = dpdk_replicate_packet_mb (b);
+ if (PREDICT_FALSE (mb_new == 0))
{
- return 0;
+ vlib_error_count (vm, node->node_index,
+ DPDK_TX_FUNC_ERROR_REPL_FAIL, 1);
+ b->flags |= VLIB_BUFFER_REPL_FAIL;
}
+ else
+ *mbp = mb_new;
- return tx_burst_vector_internal (vm, xd, tx_vector);
+ vec_add1 (dm->recycle[my_cpu], bi);
}
/*
* node. It first copies packets on the frame to a tx_vector containing the
* rte_mbuf pointers. It then passes this vector to tx_burst_vector_internal
* which calls the dpdk tx_burst function.
- *
- * The tx_vector is treated slightly differently depending on whether or
- * not a flowcontrol callback function has been configured. If there is no
- * callback, the tx_vector is a temporary array of rte_mbuf packet pointers.
- * Its entries are written and consumed before the function exits.
- *
- * If there is a callback then the transmit is being invoked in the presence
- * of a traffic manager. Here the tx_vector is treated like a ring of rte_mbuf
- * pointers. If not all packets can be transmitted, the untransmitted packets
- * stay on the tx_vector until the next call. The callback allows the traffic
- * manager to flow-off dequeues to the interface. The companion function
- * dpdk_interface_tx_vector() allows the traffic manager to detect when
- * it should flow-on the interface again.
*/
static uword
dpdk_interface_tx (vlib_main_t * vm,
u32 n_left;
u32 *from;
struct rte_mbuf **tx_vector;
- int i;
+ u16 i;
+ u16 nb_tx_desc = xd->nb_tx_desc;
int queue_id;
u32 my_cpu;
u32 tx_pkts = 0;
ASSERT (n_packets <= VLIB_FRAME_SIZE);
- if (PREDICT_FALSE (n_on_ring + n_packets > xd->nb_tx_desc))
+ if (PREDICT_FALSE (n_on_ring + n_packets > nb_tx_desc))
{
/*
* Overflowing the ring should never happen.
from = vlib_frame_vector_args (f);
n_left = n_packets;
- i = ring->tx_head % xd->nb_tx_desc;
+ i = ring->tx_head % nb_tx_desc;
- while (n_left >= 4)
+ while (n_left >= 8)
{
- u32 bi0, bi1;
- u32 pi0, pi1;
- struct rte_mbuf *mb0, *mb1;
- struct rte_mbuf *prefmb0, *prefmb1;
- vlib_buffer_t *b0, *b1;
- vlib_buffer_t *pref0, *pref1;
- i16 delta0, delta1;
- u16 new_data_len0, new_data_len1;
- u16 new_pkt_len0, new_pkt_len1;
- u32 any_clone;
-
- pi0 = from[2];
- pi1 = from[3];
- pref0 = vlib_get_buffer (vm, pi0);
- pref1 = vlib_get_buffer (vm, pi1);
-
- prefmb0 = rte_mbuf_from_vlib_buffer (pref0);
- prefmb1 = rte_mbuf_from_vlib_buffer (pref1);
-
- CLIB_PREFETCH (prefmb0, CLIB_CACHE_LINE_BYTES, LOAD);
- CLIB_PREFETCH (pref0, CLIB_CACHE_LINE_BYTES, LOAD);
- CLIB_PREFETCH (prefmb1, CLIB_CACHE_LINE_BYTES, LOAD);
- CLIB_PREFETCH (pref1, CLIB_CACHE_LINE_BYTES, LOAD);
+ u32 bi0, bi1, bi2, bi3;
+ struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
+ vlib_buffer_t *b0, *b1, *b2, *b3;
+ u32 or_flags;
+
+ dpdk_prefetch_buffer_by_index (vm, from[4]);
+ dpdk_prefetch_buffer_by_index (vm, from[5]);
+ dpdk_prefetch_buffer_by_index (vm, from[6]);
+ dpdk_prefetch_buffer_by_index (vm, from[7]);
bi0 = from[0];
bi1 = from[1];
- from += 2;
+ bi2 = from[2];
+ bi3 = from[3];
+ from += 4;
b0 = vlib_get_buffer (vm, bi0);
b1 = vlib_get_buffer (vm, bi1);
+ b2 = vlib_get_buffer (vm, bi2);
+ b3 = vlib_get_buffer (vm, bi3);
+
+ or_flags = b0->flags | b1->flags | b2->flags | b3->flags;
+
+ if (or_flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ dpdk_validate_rte_mbuf (vm, b0, 1);
+ dpdk_validate_rte_mbuf (vm, b1, 1);
+ dpdk_validate_rte_mbuf (vm, b2, 1);
+ dpdk_validate_rte_mbuf (vm, b3, 1);
+ }
+ else
+ {
+ dpdk_validate_rte_mbuf (vm, b0, 0);
+ dpdk_validate_rte_mbuf (vm, b1, 0);
+ dpdk_validate_rte_mbuf (vm, b2, 0);
+ dpdk_validate_rte_mbuf (vm, b3, 0);
+ }
mb0 = rte_mbuf_from_vlib_buffer (b0);
mb1 = rte_mbuf_from_vlib_buffer (b1);
+ mb2 = rte_mbuf_from_vlib_buffer (b2);
+ mb3 = rte_mbuf_from_vlib_buffer (b3);
+
+ if (PREDICT_FALSE (or_flags & VLIB_BUFFER_RECYCLE))
+ {
+ dpdk_buffer_recycle (vm, node, b0, bi0, &mb0);
+ dpdk_buffer_recycle (vm, node, b1, bi1, &mb1);
+ dpdk_buffer_recycle (vm, node, b2, bi2, &mb2);
+ dpdk_buffer_recycle (vm, node, b3, bi3, &mb3);
- any_clone = (b0->flags & VLIB_BUFFER_RECYCLE)
- | (b1->flags & VLIB_BUFFER_RECYCLE);
- if (PREDICT_FALSE (any_clone != 0))
+ /* dont enqueue packets if replication failed as they must
+ be sent back to recycle */
+ if (PREDICT_TRUE ((b0->flags & VLIB_BUFFER_REPL_FAIL) == 0))
+ tx_vector[i++ % nb_tx_desc] = mb0;
+ if (PREDICT_TRUE ((b1->flags & VLIB_BUFFER_REPL_FAIL) == 0))
+ tx_vector[i++ % nb_tx_desc] = mb1;
+ if (PREDICT_TRUE ((b2->flags & VLIB_BUFFER_REPL_FAIL) == 0))
+ tx_vector[i++ % nb_tx_desc] = mb2;
+ if (PREDICT_TRUE ((b3->flags & VLIB_BUFFER_REPL_FAIL) == 0))
+ tx_vector[i++ % nb_tx_desc] = mb3;
+ }
+ else
{
- if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_RECYCLE) != 0))
+ if (PREDICT_FALSE (i + 3 >= nb_tx_desc))
{
- struct rte_mbuf *mb0_new = dpdk_replicate_packet_mb (b0);
- if (PREDICT_FALSE (mb0_new == 0))
- {
- vlib_error_count (vm, node->node_index,
- DPDK_TX_FUNC_ERROR_REPL_FAIL, 1);
- b0->flags |= VLIB_BUFFER_REPL_FAIL;
- }
- else
- mb0 = mb0_new;
- vec_add1 (dm->recycle[my_cpu], bi0);
+ tx_vector[i++ % nb_tx_desc] = mb0;
+ tx_vector[i++ % nb_tx_desc] = mb1;
+ tx_vector[i++ % nb_tx_desc] = mb2;
+ tx_vector[i++ % nb_tx_desc] = mb3;
+ i %= nb_tx_desc;
}
- if (PREDICT_FALSE ((b1->flags & VLIB_BUFFER_RECYCLE) != 0))
+ else
{
- struct rte_mbuf *mb1_new = dpdk_replicate_packet_mb (b1);
- if (PREDICT_FALSE (mb1_new == 0))
- {
- vlib_error_count (vm, node->node_index,
- DPDK_TX_FUNC_ERROR_REPL_FAIL, 1);
- b1->flags |= VLIB_BUFFER_REPL_FAIL;
- }
- else
- mb1 = mb1_new;
- vec_add1 (dm->recycle[my_cpu], bi1);
+ tx_vector[i++] = mb0;
+ tx_vector[i++] = mb1;
+ tx_vector[i++] = mb2;
+ tx_vector[i++] = mb3;
}
}
- delta0 = PREDICT_FALSE (b0->flags & VLIB_BUFFER_REPL_FAIL) ? 0 :
- vlib_buffer_length_in_chain (vm, b0) - (i16) mb0->pkt_len;
- delta1 = PREDICT_FALSE (b1->flags & VLIB_BUFFER_REPL_FAIL) ? 0 :
- vlib_buffer_length_in_chain (vm, b1) - (i16) mb1->pkt_len;
-
- new_data_len0 = (u16) ((i16) mb0->data_len + delta0);
- new_data_len1 = (u16) ((i16) mb1->data_len + delta1);
- new_pkt_len0 = (u16) ((i16) mb0->pkt_len + delta0);
- new_pkt_len1 = (u16) ((i16) mb1->pkt_len + delta1);
-
- b0->current_length = new_data_len0;
- b1->current_length = new_data_len1;
- mb0->data_len = new_data_len0;
- mb1->data_len = new_data_len1;
- mb0->pkt_len = new_pkt_len0;
- mb1->pkt_len = new_pkt_len1;
-
- mb0->data_off = (PREDICT_FALSE (b0->flags & VLIB_BUFFER_REPL_FAIL)) ?
- mb0->data_off : (u16) (RTE_PKTMBUF_HEADROOM + b0->current_data);
- mb1->data_off = (PREDICT_FALSE (b1->flags & VLIB_BUFFER_REPL_FAIL)) ?
- mb1->data_off : (u16) (RTE_PKTMBUF_HEADROOM + b1->current_data);
if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE))
{
dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi0, b0);
if (b1->flags & VLIB_BUFFER_IS_TRACED)
dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi1, b1);
+ if (b2->flags & VLIB_BUFFER_IS_TRACED)
+ dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi2, b2);
+ if (b3->flags & VLIB_BUFFER_IS_TRACED)
+ dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi3, b3);
}
- if (PREDICT_TRUE (any_clone == 0))
- {
- tx_vector[i % xd->nb_tx_desc] = mb0;
- i++;
- tx_vector[i % xd->nb_tx_desc] = mb1;
- i++;
- }
- else
- {
- /* cloning was done, need to check for failure */
- if (PREDICT_TRUE ((b0->flags & VLIB_BUFFER_REPL_FAIL) == 0))
- {
- tx_vector[i % xd->nb_tx_desc] = mb0;
- i++;
- }
- if (PREDICT_TRUE ((b1->flags & VLIB_BUFFER_REPL_FAIL) == 0))
- {
- tx_vector[i % xd->nb_tx_desc] = mb1;
- i++;
- }
- }
-
- n_left -= 2;
+ n_left -= 4;
}
while (n_left > 0)
{
u32 bi0;
struct rte_mbuf *mb0;
vlib_buffer_t *b0;
- i16 delta0;
- u16 new_data_len0;
- u16 new_pkt_len0;
bi0 = from[0];
from++;
b0 = vlib_get_buffer (vm, bi0);
- mb0 = rte_mbuf_from_vlib_buffer (b0);
- if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_RECYCLE) != 0))
- {
- struct rte_mbuf *mb0_new = dpdk_replicate_packet_mb (b0);
- if (PREDICT_FALSE (mb0_new == 0))
- {
- vlib_error_count (vm, node->node_index,
- DPDK_TX_FUNC_ERROR_REPL_FAIL, 1);
- b0->flags |= VLIB_BUFFER_REPL_FAIL;
- }
- else
- mb0 = mb0_new;
- vec_add1 (dm->recycle[my_cpu], bi0);
- }
-
- delta0 = PREDICT_FALSE (b0->flags & VLIB_BUFFER_REPL_FAIL) ? 0 :
- vlib_buffer_length_in_chain (vm, b0) - (i16) mb0->pkt_len;
-
- new_data_len0 = (u16) ((i16) mb0->data_len + delta0);
- new_pkt_len0 = (u16) ((i16) mb0->pkt_len + delta0);
+ dpdk_validate_rte_mbuf (vm, b0, 1);
- b0->current_length = new_data_len0;
- mb0->data_len = new_data_len0;
- mb0->pkt_len = new_pkt_len0;
- mb0->data_off = (PREDICT_FALSE (b0->flags & VLIB_BUFFER_REPL_FAIL)) ?
- mb0->data_off : (u16) (RTE_PKTMBUF_HEADROOM + b0->current_data);
+ mb0 = rte_mbuf_from_vlib_buffer (b0);
+ dpdk_buffer_recycle (vm, node, b0, bi0, &mb0);
if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE))
if (b0->flags & VLIB_BUFFER_IS_TRACED)
if (PREDICT_TRUE ((b0->flags & VLIB_BUFFER_REPL_FAIL) == 0))
{
- tx_vector[i % xd->nb_tx_desc] = mb0;
+ tx_vector[i % nb_tx_desc] = mb0;
i++;
}
n_left--;
*/
tx_pkts = n_on_ring - n_packets;
- if (PREDICT_FALSE (dm->flowcontrol_callback != 0))
- {
- if (PREDICT_FALSE (n_packets))
- {
- /* Callback may want to enable flowcontrol */
- dm->flowcontrol_callback (vm, xd->vlib_hw_if_index,
- ring->tx_head - ring->tx_tail);
- }
- else
- {
- /* Reset head/tail to avoid unnecessary wrap */
- ring->tx_head = 0;
- ring->tx_tail = 0;
- }
- }
- else
- {
- /* If there is no callback then drop any non-transmitted packets */
- if (PREDICT_FALSE (n_packets))
- {
- vlib_simple_counter_main_t *cm;
- vnet_main_t *vnm = vnet_get_main ();
+ {
+ /* If there is no callback then drop any non-transmitted packets */
+ if (PREDICT_FALSE (n_packets))
+ {
+ vlib_simple_counter_main_t *cm;
+ vnet_main_t *vnm = vnet_get_main ();
- cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
- VNET_INTERFACE_COUNTER_TX_ERROR);
+ cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
+ VNET_INTERFACE_COUNTER_TX_ERROR);
- vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index,
- n_packets);
+ vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index,
+ n_packets);
- vlib_error_count (vm, node->node_index, DPDK_TX_FUNC_ERROR_PKT_DROP,
- n_packets);
+ vlib_error_count (vm, node->node_index, DPDK_TX_FUNC_ERROR_PKT_DROP,
+ n_packets);
- while (n_packets--)
- rte_pktmbuf_free (tx_vector[ring->tx_tail + n_packets]);
- }
+ while (n_packets--)
+ rte_pktmbuf_free (tx_vector[ring->tx_tail + n_packets]);
+ }
- /* Reset head/tail to avoid unnecessary wrap */
- ring->tx_head = 0;
- ring->tx_tail = 0;
- }
+ /* Reset head/tail to avoid unnecessary wrap */
+ ring->tx_head = 0;
+ ring->tx_tail = 0;
+ }
/* Recycle replicated buffers */
if (PREDICT_FALSE (vec_len (dm->recycle[my_cpu])))
VLIB_DEVICE_TX_FUNCTION_MULTIARCH (dpdk_device_class, dpdk_interface_tx)
/* *INDENT-ON* */
-void
-dpdk_set_flowcontrol_callback (vlib_main_t * vm,
- dpdk_flowcontrol_callback_t callback)
-{
- dpdk_main.flowcontrol_callback = callback;
-}
-
#define UP_DOWN_FLAG_EVENT 1
-
-u32
-dpdk_get_admin_up_down_in_progress (void)
-{
- return dpdk_main.admin_up_down_in_progress;
-}
-
uword
admin_up_down_process (vlib_main_t * vm,
vlib_node_runtime_t * rt, vlib_frame_t * f)
};
/* *INDENT-ON* */
-/*
- * Asynchronously invoke vnet_sw_interface_set_flags via the admin_up_down
- * process. Useful for avoiding long blocking delays (>150ms) in the dpdk
- * drivers.
- * WARNING: when posting this event, no other interface-related calls should
- * be made (e.g. vnet_create_sw_interface()) while the event is being
- * processed (admin_up_down_in_progress). This is required in order to avoid
- * race conditions in manipulating interface data structures.
- */
-void
-post_sw_interface_set_flags (vlib_main_t * vm, u32 sw_if_index, u32 flags)
-{
- uword *d = vlib_process_signal_event_data
- (vm, admin_up_down_process_node.index,
- UP_DOWN_FLAG_EVENT, 2, sizeof (u32));
- d[0] = sw_if_index;
- d[1] = flags;
-}
-
-/*
- * Return a copy of the DPDK port stats in dest.
- */
-clib_error_t *
-dpdk_get_hw_interface_stats (u32 hw_if_index, struct rte_eth_stats *dest)
-{
- dpdk_main_t *dm = &dpdk_main;
- vnet_main_t *vnm = vnet_get_main ();
- vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
- dpdk_device_t *xd = vec_elt_at_index (dm->devices, hi->dev_instance);
-
- if (!dest)
- {
- return clib_error_return (0, "Missing or NULL argument");
- }
- if (!xd)
- {
- return clib_error_return (0,
- "Unable to get DPDK device from HW interface");
- }
-
- dpdk_update_counters (xd, vlib_time_now (dm->vlib_main));
-
- clib_memcpy (dest, &xd->stats, sizeof (xd->stats));
- return (0);
-}
-
-/*
- * Return the number of dpdk mbufs
- */
-u32
-dpdk_num_mbufs (void)
-{
- dpdk_main_t *dm = &dpdk_main;
-
- return dm->conf->num_mbufs;
-}
-
-/*
- * Return the pmd type for a given hardware interface
- */
-dpdk_pmd_t
-dpdk_get_pmd_type (vnet_hw_interface_t * hi)
-{
- dpdk_main_t *dm = &dpdk_main;
- dpdk_device_t *xd;
-
- assert (hi);
-
- xd = vec_elt_at_index (dm->devices, hi->dev_instance);
-
- assert (xd);
-
- return xd->pmd;
-}
-
-/*
- * Return the cpu socket for a given hardware interface
- */
-i8
-dpdk_get_cpu_socket (vnet_hw_interface_t * hi)
-{
- dpdk_main_t *dm = &dpdk_main;
- dpdk_device_t *xd;
-
- assert (hi);
-
- xd = vec_elt_at_index (dm->devices, hi->dev_instance);
-
- assert (xd);
-
- return xd->cpu_socket;
-}
-
/*
* fd.io coding-style-patch-verification: ON
*