}
}
-static struct rte_mbuf * dpdk_replicate_packet_mb (vlib_buffer_t * b)
+struct rte_mbuf * dpdk_replicate_packet_mb (vlib_buffer_t * b)
{
vlib_main_t * vm = vlib_get_main();
vlib_buffer_main_t * bm = vm->buffer_main;
rte_pktmbuf_data_len (new_mb) = pkt_mb->data_len;
copy_bytes = pkt_mb->data_len + RTE_PKTMBUF_HEADROOM;
ASSERT(copy_bytes <= pkt_mb->buf_len);
- memcpy(new_mb->buf_addr, pkt_mb->buf_addr, copy_bytes);
+ clib_memcpy(new_mb->buf_addr, pkt_mb->buf_addr, copy_bytes);
prev_mb_next = &new_mb->next;
pkt_mb = pkt_mb->next;
return first_mb;
}
+struct rte_mbuf * dpdk_zerocopy_replicate_packet_mb (vlib_buffer_t * b)
+{
+ vlib_main_t * vm = vlib_get_main();
+ vlib_buffer_main_t * bm = vm->buffer_main;
+ struct rte_mbuf * first_mb = 0, * new_mb, * pkt_mb, ** prev_mb_next = 0;
+ u8 nb_segs, nb_segs_left;
+ unsigned socket_id = rte_socket_id();
+
+ ASSERT (bm->pktmbuf_pools[socket_id]);
+ pkt_mb = rte_mbuf_from_vlib_buffer(b);
+ nb_segs = pkt_mb->nb_segs;
+ for (nb_segs_left = nb_segs; nb_segs_left; nb_segs_left--)
+ {
+ if (PREDICT_FALSE(pkt_mb == 0))
+ {
+ clib_warning ("Missing %d mbuf chain segment(s): "
+ "(nb_segs = %d, nb_segs_left = %d)!",
+ nb_segs - nb_segs_left, nb_segs, nb_segs_left);
+ if (first_mb)
+ rte_pktmbuf_free(first_mb);
+ return NULL;
+ }
+ new_mb = rte_pktmbuf_clone(pkt_mb, bm->pktmbuf_pools[socket_id]);
+ if (PREDICT_FALSE(new_mb == 0))
+ {
+ if (first_mb)
+ rte_pktmbuf_free(first_mb);
+ return NULL;
+ }
+
+ /*
+ * Copy packet info into 1st segment.
+ */
+ if (first_mb == 0)
+ {
+ first_mb = new_mb;
+ rte_pktmbuf_pkt_len (first_mb) = pkt_mb->pkt_len;
+ first_mb->nb_segs = pkt_mb->nb_segs;
+ first_mb->port = pkt_mb->port;
+#ifdef DAW_FIXME // TX Offload support TBD
+ first_mb->vlan_macip = pkt_mb->vlan_macip;
+ first_mb->hash = pkt_mb->hash;
+ first_mb->ol_flags = pkt_mb->ol_flags
+#endif
+ }
+ else
+ {
+ ASSERT(prev_mb_next != 0);
+ *prev_mb_next = new_mb;
+ }
+
+ /*
+ * Copy packet segment data into new mbuf segment.
+ */
+ rte_pktmbuf_data_len (new_mb) = pkt_mb->data_len;
+
+ prev_mb_next = &new_mb->next;
+ pkt_mb = pkt_mb->next;
+ }
+
+ ASSERT(pkt_mb == 0);
+ __rte_mbuf_sanity_check(first_mb, 1);
+
+ return first_mb;
+
+
+}
+
static void
dpdk_tx_trace_buffer (dpdk_main_t * dm,
vlib_node_runtime_t * node,
t0->queue_index = queue_id;
t0->device_index = xd->device_index;
t0->buffer_index = buffer_index;
- memcpy (&t0->mb, mb, sizeof (t0->mb));
- memcpy (&t0->buffer, buffer, sizeof (buffer[0]) - sizeof (buffer->pre_data));
- memcpy (t0->buffer.pre_data, buffer->data + buffer->current_data,
+ clib_memcpy (&t0->mb, mb, sizeof (t0->mb));
+ clib_memcpy (&t0->buffer, buffer, sizeof (buffer[0]) - sizeof (buffer->pre_data));
+ clib_memcpy (t0->buffer.pre_data, buffer->data + buffer->current_data,
sizeof (t0->buffer.pre_data));
}
n_retry = (rv == DPDK_TX_RING_SIZE - tx_tail) ? 1 : 0;
}
}
+#if DPDK_VHOST_USER
else if (xd->dev_type == VNET_DPDK_DEV_VHOST_USER)
{
u32 offset = 0;
queue_id = 0;
while (__sync_lock_test_and_set (xd->lockp[queue_id], 1));
}
-#if RTE_VERSION >= RTE_VERSION_NUM(2, 2, 0, 0)
else {
dpdk_device_and_queue_t * dq;
vec_foreach (dq, dm->devices_by_cpu[vm->cpu_index])
assert (dq);
offset = dq->queue_id * VIRTIO_QNUM;
}
-#endif
if (PREDICT_TRUE(tx_head > tx_tail))
{
int i; u32 bytes = 0;
f64 now = vlib_time_now (vm);
if (vring->int_deadline < now ||
- vring->n_since_last_int > dm->vhost_coalesce_frames)
+ vring->n_since_last_int > dm->conf->vhost_coalesce_frames)
dpdk_vhost_user_send_interrupt(vm, xd, offset + VIRTIO_RXQ);
}
f64 now = vlib_time_now (vm);
if (vring->int_deadline < now ||
- vring->n_since_last_int > dm->vhost_coalesce_frames)
+ vring->n_since_last_int > dm->conf->vhost_coalesce_frames)
dpdk_vhost_user_send_interrupt(vm, xd, offset + VIRTIO_RXQ);
}
if (xd->need_txlock)
*xd->lockp[queue_id] = 0;
}
+#endif
#if RTE_LIBRTE_KNI
else if (xd->dev_type == VNET_DPDK_DEV_KNI)
{
mb0 = rte_mbuf_from_vlib_buffer(b0);
mb1 = rte_mbuf_from_vlib_buffer(b1);
- any_clone = b0->clone_count | b1->clone_count;
+ any_clone = (b0->flags & VLIB_BUFFER_RECYCLE)
+ | (b1->flags & VLIB_BUFFER_RECYCLE);
if (PREDICT_FALSE(any_clone != 0))
{
- if (PREDICT_FALSE(b0->clone_count != 0))
- {
- struct rte_mbuf * mb0_new = dpdk_replicate_packet_mb (b0);
- if (PREDICT_FALSE(mb0_new == 0))
- {
- vlib_error_count (vm, node->node_index,
- DPDK_TX_FUNC_ERROR_REPL_FAIL, 1);
- b0->flags |= VLIB_BUFFER_REPL_FAIL;
- }
- else
- mb0 = mb0_new;
- vec_add1 (dm->recycle[my_cpu], bi0);
- }
- if (PREDICT_FALSE(b1->clone_count != 0))
- {
- struct rte_mbuf * mb1_new = dpdk_replicate_packet_mb (b1);
- if (PREDICT_FALSE(mb1_new == 0))
- {
- vlib_error_count (vm, node->node_index,
- DPDK_TX_FUNC_ERROR_REPL_FAIL, 1);
- b1->flags |= VLIB_BUFFER_REPL_FAIL;
- }
- else
- mb1 = mb1_new;
- vec_add1 (dm->recycle[my_cpu], bi1);
- }
- }
+ if (PREDICT_FALSE
+ ((b0->flags & VLIB_BUFFER_RECYCLE) != 0))
+ {
+ struct rte_mbuf * mb0_new = dpdk_replicate_packet_mb (b0);
+ if (PREDICT_FALSE(mb0_new == 0))
+ {
+ vlib_error_count (vm, node->node_index,
+ DPDK_TX_FUNC_ERROR_REPL_FAIL, 1);
+ b0->flags |= VLIB_BUFFER_REPL_FAIL;
+ }
+ else
+ mb0 = mb0_new;
+ vec_add1 (dm->recycle[my_cpu], bi0);
+ }
+ if (PREDICT_FALSE
+ ((b1->flags & VLIB_BUFFER_RECYCLE) != 0))
+ {
+ struct rte_mbuf * mb1_new = dpdk_replicate_packet_mb (b1);
+ if (PREDICT_FALSE(mb1_new == 0))
+ {
+ vlib_error_count (vm, node->node_index,
+ DPDK_TX_FUNC_ERROR_REPL_FAIL, 1);
+ b1->flags |= VLIB_BUFFER_REPL_FAIL;
+ }
+ else
+ mb1 = mb1_new;
+ vec_add1 (dm->recycle[my_cpu], bi1);
+ }
+ }
delta0 = PREDICT_FALSE(b0->flags & VLIB_BUFFER_REPL_FAIL) ? 0 :
- vlib_buffer_length_in_chain (vm, b0) - (i16) mb0->pkt_len;
+ vlib_buffer_length_in_chain (vm, b0) - (i16) mb0->pkt_len;
delta1 = PREDICT_FALSE(b1->flags & VLIB_BUFFER_REPL_FAIL) ? 0 :
- vlib_buffer_length_in_chain (vm, b1) - (i16) mb1->pkt_len;
+ vlib_buffer_length_in_chain (vm, b1) - (i16) mb1->pkt_len;
new_data_len0 = (u16)((i16) mb0->data_len + delta0);
new_data_len1 = (u16)((i16) mb1->data_len + delta1);
mb1->pkt_len = new_pkt_len1;
mb0->data_off = (PREDICT_FALSE(b0->flags & VLIB_BUFFER_REPL_FAIL)) ?
- mb0->data_off : (u16)(RTE_PKTMBUF_HEADROOM + b0->current_data);
+ mb0->data_off : (u16)(RTE_PKTMBUF_HEADROOM + b0->current_data);
mb1->data_off = (PREDICT_FALSE(b1->flags & VLIB_BUFFER_REPL_FAIL)) ?
- mb1->data_off : (u16)(RTE_PKTMBUF_HEADROOM + b1->current_data);
+ mb1->data_off : (u16)(RTE_PKTMBUF_HEADROOM + b1->current_data);
if (PREDICT_FALSE(node->flags & VLIB_NODE_FLAG_TRACE))
- {
+ {
if (b0->flags & VLIB_BUFFER_IS_TRACED)
- dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi0, b0);
+ dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi0, b0);
if (b1->flags & VLIB_BUFFER_IS_TRACED)
- dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi1, b1);
- }
+ dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi1, b1);
+ }
if (PREDICT_TRUE(any_clone == 0))
{
- tx_vector[i % DPDK_TX_RING_SIZE] = mb0;
+ tx_vector[i % DPDK_TX_RING_SIZE] = mb0;
i++;
- tx_vector[i % DPDK_TX_RING_SIZE] = mb1;
+ tx_vector[i % DPDK_TX_RING_SIZE] = mb1;
i++;
}
else
/* cloning was done, need to check for failure */
if (PREDICT_TRUE((b0->flags & VLIB_BUFFER_REPL_FAIL) == 0))
{
- tx_vector[i % DPDK_TX_RING_SIZE] = mb0;
+ tx_vector[i % DPDK_TX_RING_SIZE] = mb0;
i++;
}
if (PREDICT_TRUE((b1->flags & VLIB_BUFFER_REPL_FAIL) == 0))
{
- tx_vector[i % DPDK_TX_RING_SIZE] = mb1;
+ tx_vector[i % DPDK_TX_RING_SIZE] = mb1;
i++;
}
}
-
+
n_left -= 2;
}
while (n_left > 0)
b0 = vlib_get_buffer (vm, bi0);
mb0 = rte_mbuf_from_vlib_buffer(b0);
- if (PREDICT_FALSE(b0->clone_count != 0))
- {
- struct rte_mbuf * mb0_new = dpdk_replicate_packet_mb (b0);
- if (PREDICT_FALSE(mb0_new == 0))
- {
- vlib_error_count (vm, node->node_index,
- DPDK_TX_FUNC_ERROR_REPL_FAIL, 1);
- b0->flags |= VLIB_BUFFER_REPL_FAIL;
- }
- else
- mb0 = mb0_new;
- vec_add1 (dm->recycle[my_cpu], bi0);
- }
+ if (PREDICT_FALSE((b0->flags & VLIB_BUFFER_RECYCLE) != 0))
+ {
+ struct rte_mbuf * mb0_new = dpdk_replicate_packet_mb (b0);
+ if (PREDICT_FALSE(mb0_new == 0))
+ {
+ vlib_error_count (vm, node->node_index,
+ DPDK_TX_FUNC_ERROR_REPL_FAIL, 1);
+ b0->flags |= VLIB_BUFFER_REPL_FAIL;
+ }
+ else
+ mb0 = mb0_new;
+ vec_add1 (dm->recycle[my_cpu], bi0);
+ }
delta0 = PREDICT_FALSE(b0->flags & VLIB_BUFFER_REPL_FAIL) ? 0 :
- vlib_buffer_length_in_chain (vm, b0) - (i16) mb0->pkt_len;
+ vlib_buffer_length_in_chain (vm, b0) - (i16) mb0->pkt_len;
new_data_len0 = (u16)((i16) mb0->data_len + delta0);
new_pkt_len0 = (u16)((i16) mb0->pkt_len + delta0);
mb0->data_len = new_data_len0;
mb0->pkt_len = new_pkt_len0;
mb0->data_off = (PREDICT_FALSE(b0->flags & VLIB_BUFFER_REPL_FAIL)) ?
- mb0->data_off : (u16)(RTE_PKTMBUF_HEADROOM + b0->current_data);
+ mb0->data_off : (u16)(RTE_PKTMBUF_HEADROOM + b0->current_data);
if (PREDICT_FALSE(node->flags & VLIB_NODE_FLAG_TRACE))
- if (b0->flags & VLIB_BUFFER_IS_TRACED)
- dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi0, b0);
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi0, b0);
if (PREDICT_TRUE((b0->flags & VLIB_BUFFER_REPL_FAIL) == 0))
{
- tx_vector[i % DPDK_TX_RING_SIZE] = mb0;
+ tx_vector[i % DPDK_TX_RING_SIZE] = mb0;
i++;
}
n_left--;
vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index, n_packets);
vlib_error_count (vm, node->node_index, DPDK_TX_FUNC_ERROR_PKT_DROP,
- n_packets);
+ n_packets);
while (n_packets--)
rte_pktmbuf_free (tx_vector[ring->tx_tail + n_packets]);
}
/* Reset head/tail to avoid unnecessary wrap */
- ring->tx_head = 0;
- ring->tx_tail = 0;
+ ring->tx_head = 0;
+ ring->tx_tail = 0;
}
/* Recycle replicated buffers */
static int dpdk_device_renumber (vnet_hw_interface_t * hi,
u32 new_dev_instance)
{
+#if DPDK_VHOST_USER
dpdk_main_t * dm = &dpdk_main;
dpdk_device_t * xd = vec_elt_at_index (dm->devices, hi->dev_instance);
if (!xd || xd->dev_type != VNET_DPDK_DEV_VHOST_USER) {
- clib_warning("cannot renumber non-vhost-user interface (sw_if_index: %d)",
- hi->sw_if_index);
- return 0;
+ clib_warning("cannot renumber non-vhost-user interface (sw_if_index: %d)",
+ hi->sw_if_index);
+ return 0;
}
xd->vu_if_id = new_dev_instance;
+#endif
return 0;
}
*/
dpdk_update_counters (xd, vlib_time_now (dm->vlib_main));
- memcpy (&xd->last_cleared_stats, &xd->stats, sizeof(xd->stats));
- memcpy (xd->last_cleared_xstats, xd->xstats,
- vec_len(xd->last_cleared_xstats) *
- sizeof(xd->last_cleared_xstats[0]));
+ clib_memcpy (&xd->last_cleared_stats, &xd->stats, sizeof(xd->stats));
+ clib_memcpy (xd->last_cleared_xstats, xd->xstats,
+ vec_len(xd->last_cleared_xstats) *
+ sizeof(xd->last_cleared_xstats[0]));
}
else
{
memset (&xd->last_stats, 0, sizeof (xd->last_stats));
}
+#if DPDK_VHOST_USER
if (PREDICT_FALSE(xd->dev_type == VNET_DPDK_DEV_VHOST_USER)) {
int i;
for (i = 0; i < xd->rx_q_used * VIRTIO_QNUM; i++) {
- xd->vu_intf->vrings[i].packets = 0;
- xd->vu_intf->vrings[i].bytes = 0;
+ xd->vu_intf->vrings[i].packets = 0;
+ xd->vu_intf->vrings[i].bytes = 0;
}
}
+#endif
}
#ifdef RTE_LIBRTE_KNI
return 0;
}
#endif
+#if DPDK_VHOST_USER
if (xd->dev_type == VNET_DPDK_DEV_VHOST_USER)
{
if (is_up)
return 0;
}
+#endif
if (is_up)
*/
if (xd->pmd != VNET_DPDK_PMD_VMXNET3)
rte_eth_dev_stop (xd->device_index);
+
+ /* For bonded interface, stop slave links */
+ if (xd->pmd == VNET_DPDK_PMD_BOND)
+ {
+ u8 slink[16];
+ int nlink = rte_eth_bond_slaves_get(xd->device_index, slink, 16);
+ while (nlink >=1)
+ {
+ u8 dpdk_port = slink[--nlink];
+ rte_eth_dev_stop (dpdk_port);
+ }
+ }
}
if (rv < 0)
dpdk_device_t * xd = vec_elt_at_index (xm->devices, hw->dev_instance);
vnet_sw_interface_t * t = (vnet_sw_interface_t *) st;
int r, vlan_offload;
+ u32 prev_subifs = xd->vlan_subifs;
+ if (is_add) xd->vlan_subifs++;
+ else if (xd->vlan_subifs) xd->vlan_subifs--;
if (xd->dev_type != VNET_DPDK_DEV_ETH)
return 0;
if (t->sub.eth.flags.no_tags == 1)
return 0;
- if ((t->sub.eth.flags.one_tag != 1) || (t->sub.eth.flags.exact_match != 1 ))
+ if ((t->sub.eth.flags.one_tag != 1) || (t->sub.eth.flags.exact_match != 1 )) {
+ xd->vlan_subifs = prev_subifs;
return clib_error_return (0, "unsupported VLAN setup");
-
+ }
vlan_offload = rte_eth_dev_get_vlan_offload(xd->device_index);
vlan_offload |= ETH_VLAN_FILTER_OFFLOAD;
- if ((r = rte_eth_dev_set_vlan_offload(xd->device_index, vlan_offload)))
+ if ((r = rte_eth_dev_set_vlan_offload(xd->device_index, vlan_offload))) {
+ xd->vlan_subifs = prev_subifs;
return clib_error_return (0, "rte_eth_dev_set_vlan_offload[%d]: err %d",
xd->device_index, r);
+ }
- if ((r = rte_eth_dev_vlan_filter(xd->device_index, t->sub.eth.outer_vlan_id, is_add)))
+ if ((r = rte_eth_dev_vlan_filter(xd->device_index, t->sub.eth.outer_vlan_id, is_add))) {
+ xd->vlan_subifs = prev_subifs;
return clib_error_return (0, "rte_eth_dev_vlan_filter[%d]: err %d",
xd->device_index, r);
+ }
return 0;
}
.name_renumber = dpdk_device_renumber,
};
+VLIB_DEVICE_TX_FUNCTION_MULTIARCH (dpdk_device_class,
+ dpdk_interface_tx)
+
void dpdk_set_flowcontrol_callback (vlib_main_t *vm,
dpdk_flowcontrol_callback_t callback)
{
return dpdk_main.admin_up_down_in_progress;
}
-static uword
+uword
admin_up_down_process (vlib_main_t * vm,
vlib_node_runtime_t * rt,
vlib_frame_t * f)
clib_error_t * error = 0;
uword event_type;
uword *event_data = 0;
- u32 index;
u32 sw_if_index;
u32 flags;
dpdk_main.admin_up_down_in_progress = 1;
- for (index=0; index<vec_len(event_data); index++)
+ switch (event_type) {
+ case UP_DOWN_FLAG_EVENT:
{
- sw_if_index = event_data[index] >> 32;
- flags = (u32) event_data[index];
-
- switch (event_type) {
- case UP_DOWN_FLAG_EVENT:
- error = vnet_sw_interface_set_flags (vnet_get_main(), sw_if_index, flags);
- clib_error_report(error);
- break;
- }
+ if (vec_len(event_data) == 2) {
+ sw_if_index = event_data[0];
+ flags = event_data[1];
+ error = vnet_sw_interface_set_flags (vnet_get_main(), sw_if_index, flags);
+ clib_error_report(error);
+ }
}
+ break;
+ }
vec_reset_length (event_data);
*/
void post_sw_interface_set_flags (vlib_main_t *vm, u32 sw_if_index, u32 flags)
{
- vlib_process_signal_event
+ uword * d = vlib_process_signal_event_data
(vm, admin_up_down_process_node.index,
- UP_DOWN_FLAG_EVENT,
- (((uword)sw_if_index << 32) | flags));
-}
-
-/*
- * Called by the dpdk driver's rte_delay_us() function.
- * Return 0 to have the dpdk do a regular delay loop.
- * Return 1 if to skip the delay loop because we are suspending
- * the calling vlib process instead.
- */
-int rte_delay_us_override (unsigned us) {
- vlib_main_t * vm;
-
- /* Don't bother intercepting for short delays */
- if (us < 10) return 0;
-
- /*
- * Only intercept if we are in a vlib process.
- * If we are called from a vlib worker thread or the vlib main
- * thread then do not intercept. (Must not be called from an
- * independent pthread).
- */
- if (os_get_cpu_number() == 0)
- {
- /*
- * We're in the vlib main thread or a vlib process. Make sure
- * the process is running and we're not still initializing.
- */
- vm = vlib_get_main();
- if (vlib_in_process_context(vm))
- {
- /* Only suspend for the admin_down_process */
- vlib_process_t * proc = vlib_get_current_process(vm);
- if (!(proc->flags & VLIB_PROCESS_IS_RUNNING) ||
- (proc->node_runtime.function != admin_up_down_process))
- return 0;
-
- f64 delay = 1e-6 * us;
- vlib_process_suspend(vm, delay);
- return 1;
- }
- }
- return 0; // no override
+ UP_DOWN_FLAG_EVENT, 2, sizeof(u32));
+ d[0] = sw_if_index;
+ d[1] = flags;
}
/*
dpdk_update_counters (xd, vlib_time_now (dm->vlib_main));
- memcpy(dest, &xd->stats, sizeof(xd->stats));
+ clib_memcpy(dest, &xd->stats, sizeof(xd->stats));
return (0);
}
+
+/*
+ * Return the number of dpdk mbufs
+ */
+u32 dpdk_num_mbufs (void)
+{
+ dpdk_main_t * dm = &dpdk_main;
+
+ return dm->conf->num_mbufs;
+}
+
+/*
+ * Return the pmd type for a given hardware interface
+ */
+dpdk_pmd_t dpdk_get_pmd_type (vnet_hw_interface_t *hi)
+{
+ dpdk_main_t * dm = &dpdk_main;
+ dpdk_device_t * xd;
+
+ assert (hi);
+
+ xd = vec_elt_at_index (dm->devices, hi->dev_instance);
+
+ assert (xd);
+
+ return xd->pmd;
+}
+
+/*
+ * Return the cpu socket for a given hardware interface
+ */
+i8 dpdk_get_cpu_socket (vnet_hw_interface_t *hi)
+{
+ dpdk_main_t * dm = &dpdk_main;
+ dpdk_device_t * xd;
+
+ assert (hi);
+
+ xd = vec_elt_at_index(dm->devices, hi->dev_instance);
+
+ assert (xd);
+
+ return xd->cpu_socket;
+}