#include <rte_dev.h>
#include <rte_log.h>
#include <rte_memory.h>
-#include <rte_memcpy.h>
#include <rte_memzone.h>
#include <rte_tailq.h>
#include <rte_eal.h>
#include <rte_virtio_net.h>
#include <rte_pci_dev_ids.h>
#include <rte_version.h>
+#include <rte_eth_bond.h>
#include <vnet/unix/pcap.h>
#include <vnet/devices/virtio/vhost-user.h>
#define always_inline static inline __attribute__ ((__always_inline__))
#endif
-#define MBUF_SIZE (2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM)
-#define NB_MBUF (32<<10)
+#include <vlib/pci/pci.h>
-vnet_device_class_t dpdk_device_class;
-vlib_node_registration_t dpdk_input_node;
-vlib_node_registration_t dpdk_io_input_node;
-vlib_node_registration_t handoff_dispatch_node;
+#define NB_MBUF (16<<10)
+
+extern vnet_device_class_t dpdk_device_class;
+extern vlib_node_registration_t dpdk_input_node;
+extern vlib_node_registration_t handoff_dispatch_node;
typedef enum {
VNET_DPDK_DEV_ETH = 1, /* Standard DPDK PMD driver */
_ ("rte_i40e_pmd", I40E) \
_ ("rte_i40evf_pmd", I40EVF) \
_ ("rte_virtio_pmd", VIRTIO) \
- _ ("rte_vice_pmd", VICE) \
_ ("rte_enic_pmd", ENIC) \
_ ("rte_vmxnet3_pmd", VMXNET3) \
_ ("AF_PACKET PMD", AF_PACKET) \
+ _ ("rte_bond_pmd", BOND) \
_ ("rte_pmd_fm10k", FM10K) \
- _ ("rte_cxgbe_pmd", CXGBE)
+ _ ("rte_cxgbe_pmd", CXGBE) \
+ _ ("rte_dpaa2_dpni", DPAA2)
typedef enum {
VNET_DPDK_PMD_NONE,
#define _(s,f) VNET_DPDK_PMD_##f,
foreach_dpdk_pmd
#undef _
-#ifdef NETMAP
- VNET_DPDK_PMD_NETMAP,
-#endif
VNET_DPDK_PMD_UNKNOWN, /* must be last */
} dpdk_pmd_t;
VNET_DPDK_PORT_TYPE_ETH_1G,
VNET_DPDK_PORT_TYPE_ETH_10G,
VNET_DPDK_PORT_TYPE_ETH_40G,
+ VNET_DPDK_PORT_TYPE_ETH_BOND,
VNET_DPDK_PORT_TYPE_ETH_SWITCH,
-#ifdef NETMAP
- VNET_DPDK_PORT_TYPE_NETMAP,
-#endif
VNET_DPDK_PORT_TYPE_AF_PACKET,
VNET_DPDK_PORT_TYPE_UNKNOWN,
} dpdk_port_type_t;
int callfd;
int kickfd;
int errfd;
-#if RTE_VERSION >= RTE_VERSION_NUM(2, 2, 0, 0)
int enabled;
-#endif
u32 callfd_idx;
u32 n_since_last_int;
f64 int_deadline;
+ u64 packets;
+ u64 bytes;
} dpdk_vu_vring;
typedef struct {
u64 feature_mask;
u32 num_vrings;
-#if RTE_VERSION >= RTE_VERSION_NUM(2, 2, 0, 0)
dpdk_vu_vring vrings[VHOST_MAX_QUEUE_PAIRS * 2];
-#else
- dpdk_vu_vring vrings[2];
-#endif
u64 region_addr[VHOST_MEMORY_MAX_NREGIONS];
u32 region_fd[VHOST_MEMORY_MAX_NREGIONS];
+ u64 region_offset[VHOST_MEMORY_MAX_NREGIONS];
} dpdk_vu_intf_t;
typedef void (*dpdk_flowcontrol_callback_t) (vlib_main_t *vm,
/* per-worker destination frame queue */
dpdk_frame_t * frames;
+ /* number of sub-interfaces */
+ u16 vlan_subifs;
+
dpdk_device_type_t dev_type:8;
dpdk_pmd_t pmd:8;
i8 cpu_socket;
struct rte_eth_stats stats;
struct rte_eth_stats last_stats;
+ struct rte_eth_stats last_cleared_stats;
struct rte_eth_xstats * xstats;
+ struct rte_eth_xstats * last_cleared_xstats;
f64 time_last_stats_update;
dpdk_port_type_t port_type;
dpdk_efd_agent_t efd_agent;
+ u8 need_txlock; /* Used by VNET_DPDK_DEV_VHOST_USER */
} dpdk_device_t;
-#define MAX_NELTS 32
-typedef struct {
- CLIB_CACHE_LINE_ALIGN_MARK(cacheline0);
- u64 head;
- u64 head_hint;
- u64 tail;
- u32 n_in_use;
- u32 nelts;
- u32 written;
- u32 threshold;
- i32 n_vectors[MAX_NELTS];
-} frame_queue_trace_t;
#define DPDK_TX_RING_SIZE (4 * 1024)
-#define DPDK_STATS_POLL_INTERVAL 10.0
-#define DPDK_LINK_POLL_INTERVAL 3.0
+#define DPDK_STATS_POLL_INTERVAL (10.0)
+#define DPDK_MIN_STATS_POLL_INTERVAL (0.001) /* 1msec */
+
+#define DPDK_LINK_POLL_INTERVAL (3.0)
+#define DPDK_MIN_LINK_POLL_INTERVAL (0.001) /* 1msec */
typedef struct {
CLIB_CACHE_LINE_ALIGN_MARK(cacheline0);
u16 pad;
} dpdk_efd_t;
+#define foreach_dpdk_device_config_item \
+ _ (num_rx_queues) \
+ _ (num_tx_queues) \
+ _ (num_rx_desc) \
+ _ (num_tx_desc) \
+ _ (rss_fn)
+
+typedef struct {
+ vlib_pci_addr_t pci_addr;
+ u8 is_blacklisted;
+ u8 vlan_strip_offload;
+#define DPDK_DEVICE_VLAN_STRIP_DEFAULT 0
+#define DPDK_DEVICE_VLAN_STRIP_OFF 1
+#define DPDK_DEVICE_VLAN_STRIP_ON 2
+
+#define _(x) uword x;
+ foreach_dpdk_device_config_item
+#undef _
+ clib_bitmap_t * workers;
+} dpdk_device_config_t;
+
+typedef struct {
+
+ /* Config stuff */
+ u8 ** eal_init_args;
+ u8 * eal_init_args_str;
+ u8 * uio_driver_name;
+ u8 no_multi_seg;
+ u8 enable_tcp_udp_checksum;
+
+ /* Required config parameters */
+ u8 coremask_set_manually;
+ u8 nchannels_set_manually;
+ u32 coremask;
+ u32 nchannels;
+ u32 num_mbufs;
+ u8 num_kni;/* while kni_init allows u32, port_id in callback fn is only u8 */
+
+ /*
+ * format interface names ala xxxEthernet%d/%d/%d instead of
+ * xxxEthernet%x/%x/%x.
+ */
+ u8 interface_name_format_decimal;
+
+ /* virtio vhost-user switch */
+ u8 use_virtio_vhost;
+
+ /* vhost-user coalescence frames config */
+ u32 vhost_coalesce_frames;
+ f64 vhost_coalesce_time;
+
+ /* per-device config */
+ dpdk_device_config_t default_devconf;
+ dpdk_device_config_t * dev_confs;
+ uword * device_config_index_by_pci_addr;
+
+} dpdk_config_main_t;
+
+dpdk_config_main_t dpdk_config_main;
+
typedef struct {
/* Devices */
/* vlib buffer free list, must be same size as an rte_mbuf */
u32 vlib_buffer_free_list_index;
- /*
- * format interface names ala xxxEthernet%d/%d/%d instead of
- * xxxEthernet%x/%x/%x. For VIRL.
- */
- u8 interface_name_format_decimal;
-
-
/* dpdk worker "threads" */
dpdk_worker_t * workers;
- /* Config stuff */
- u8 ** eal_init_args;
- u8 * eal_init_args_str;
- u8 * eth_if_blacklist;
- u8 * eth_if_whitelist;
- u8 * uio_driver_name;
- u8 no_multi_seg;
-
- /* Required config parameters */
- u8 coremask_set_manually;
- u8 nchannels_set_manually;
- u32 coremask;
- u32 nchannels;
- u32 num_mbufs;
- u32 use_rss;
- u32 max_tx_queues;
- u8 num_kni; /* while kni_init allows u32, port_id in callback fn is only u8 */
/* Ethernet input node index */
u32 ethernet_input_node_index;
- /* dpdk i/o thread initialization barrier */
- volatile u32 io_thread_release;
-
/* pcap tracing [only works if (CLIB_DEBUG > 0)] */
int tx_pcap_enable;
pcap_main_t pcap_main;
u32 pcap_sw_if_index;
u32 pcap_pkts_to_capture;
- /* virtio vhost-user switch */
- u8 use_virtio_vhost;
-
- /* vhost-user coalescence frames config */
- u32 vhost_coalesce_frames;
- f64 vhost_coalesce_time;
-
/* hashes */
uword * dpdk_device_by_kni_port_id;
uword * vu_sw_if_index_by_listener_fd;
*/
u8 admin_up_down_in_progress;
- u8 have_io_threads;
+ u8 use_rss;
/* which cpus are running dpdk-input */
int input_cpu_first_index;
int input_cpu_count;
+ /* control interval of dpdk link state and stat polling */
+ f64 link_state_poll_interval;
+ f64 stat_poll_interval;
+
+ /* Sleep for this many MS after each device poll */
+ u32 poll_sleep;
+
/* convenience */
vlib_main_t * vlib_main;
vnet_main_t * vnet_main;
+ dpdk_config_main_t * conf;
} dpdk_main_t;
dpdk_main_t dpdk_main;
DPDK_RX_N_NEXT,
} dpdk_rx_next_t;
+typedef struct {
+ u32 buffer_index;
+ u16 device_index;
+ u8 queue_index;
+ struct rte_mbuf mb;
+ /* Copy of VLIB buffer; packet data stored in pre_data. */
+ vlib_buffer_t buffer;
+} dpdk_tx_dma_trace_t;
+
+typedef struct {
+ u32 buffer_index;
+ u16 device_index;
+ u16 queue_index;
+ struct rte_mbuf mb;
+ vlib_buffer_t buffer; /* Copy of VLIB buffer; pkt data stored in pre_data. */
+ u8 data[256]; /* First 256 data bytes, used for hexdump */
+} dpdk_rx_dma_trace_t;
+
void vnet_buffer_needs_dpdk_mb (vlib_buffer_t * b);
void dpdk_set_next_node (dpdk_rx_next_t, char *);
-typedef void (*dpdk_io_thread_callback_t) (vlib_main_t *vm);
+clib_error_t * dpdk_set_mac_address (vnet_hw_interface_t * hi, char * address);
+
+clib_error_t * dpdk_set_mc_filter (vnet_hw_interface_t * hi,
+ struct ether_addr mc_addr_vec[], int naddr);
-void dpdk_io_thread (vlib_worker_thread_t * w,
- u32 instances,
- u32 instance_id,
- char *worker_name,
- dpdk_io_thread_callback_t callback);
void dpdk_thread_input (dpdk_main_t * dm, dpdk_device_t * xd);
clib_error_t * dpdk_port_setup (dpdk_main_t * dm, dpdk_device_t * xd);
u32 dpdk_interface_tx_vector (vlib_main_t * vm, u32 dev_instance);
-vlib_frame_queue_elt_t * vlib_get_handoff_queue_elt (u32 vlib_worker_index);
-
-u32 dpdk_get_handoff_node_index (void);
-
void set_efd_bitmap (u8 *bitmap, u32 value, u32 op);
+struct rte_mbuf * dpdk_replicate_packet_mb (vlib_buffer_t * b);
+struct rte_mbuf * dpdk_zerocopy_replicate_packet_mb (vlib_buffer_t * b);
+
#define foreach_dpdk_error \
_(NONE, "no error") \
_(RX_PACKET_ERROR, "Rx packet errors") \
vm->error_main.counters[my_n->error_heap_index+counter_index] += count;
}
+int dpdk_set_stat_poll_interval (f64 interval);
+int dpdk_set_link_state_poll_interval (f64 interval);
void dpdk_update_link_state (dpdk_device_t * xd, f64 now);
void dpdk_device_lock_init(dpdk_device_t * xd);
void dpdk_device_lock_free(dpdk_device_t * xd);
u32 dpdk_get_admin_up_down_in_progress (void);
+u32 dpdk_num_mbufs (void);
+
+dpdk_pmd_t dpdk_get_pmd_type (vnet_hw_interface_t *hi);
+
+i8 dpdk_get_cpu_socket (vnet_hw_interface_t *hi);
+
+void * dpdk_input_multiarch_select();
+void * dpdk_input_rss_multiarch_select();
+void * dpdk_input_efd_multiarch_select();
+
+clib_error_t*
+dpdk_get_hw_interface_stats (u32 hw_if_index, struct rte_eth_stats* dest);
+
+format_function_t format_dpdk_device_name;
+format_function_t format_dpdk_device;
+format_function_t format_dpdk_tx_dma_trace;
+format_function_t format_dpdk_rx_dma_trace;
+format_function_t format_dpdk_rte_mbuf;
+format_function_t format_dpdk_rx_rte_mbuf;
+unformat_function_t unformat_socket_mem;
+clib_error_t * unformat_rss_fn(unformat_input_t * input, uword * rss_fn);
+
+
+static inline void
+dpdk_pmd_constructor_init()
+{
+ /* Add references to DPDK Driver Constructor functions to get the dynamic
+ * loader to pull in the driver library & run the constructors.
+ */
+#define _(d) \
+ do { \
+ void devinitfn_ ##d(void); \
+ __attribute__((unused)) void (* volatile pf)(void); \
+ pf = devinitfn_ ##d; \
+ } while(0);
+
+#ifdef RTE_LIBRTE_EM_PMD
+ _(em_pmd_drv)
+#endif
+
+#ifdef RTE_LIBRTE_IGB_PMD
+ _(pmd_igb_drv)
+#endif
+
+#ifdef RTE_LIBRTE_IXGBE_PMD
+ _(rte_ixgbe_driver)
+#endif
+
+#ifdef RTE_LIBRTE_I40E_PMD
+ _(rte_i40e_driver)
+ _(rte_i40evf_driver)
+#endif
+
+#ifdef RTE_LIBRTE_FM10K_PMD
+ _(rte_fm10k_driver)
+#endif
+
+#ifdef RTE_LIBRTE_VIRTIO_PMD
+ _(rte_virtio_driver)
+#endif
+
+#ifdef RTE_LIBRTE_VMXNET3_PMD
+ _(rte_vmxnet3_driver)
+#endif
+
+#ifdef RTE_LIBRTE_VICE_PMD
+ _(rte_vice_driver)
+#endif
+
+#ifdef RTE_LIBRTE_ENIC_PMD
+ _(rte_enic_driver)
+#endif
+
+#ifdef RTE_LIBRTE_PMD_AF_PACKET
+ _(pmd_af_packet_drv)
+#endif
+
+#ifdef RTE_LIBRTE_CXGBE_PMD
+ _(rte_cxgbe_driver)
+#endif
+
+#ifdef RTE_LIBRTE_PMD_BOND
+ _(bond_drv)
+#endif
+
+#ifdef RTE_LIBRTE_DPAA2_PMD
+ _(pmd_dpaa2_drv)
+#endif
+
+#undef _
+
+/*
+ * At the moment, the ThunderX NIC driver doesn't have
+ * an entry point named "devinitfn_rte_xxx_driver"
+ */
+#define _(d) \
+ do { \
+ void d(void); \
+ __attribute__((unused)) void (* volatile pf)(void); \
+ pf = d; \
+ } while(0);
+
+#ifdef RTE_LIBRTE_THUNDERVNIC_PMD
+ _(rte_nicvf_pmd_init)
+#endif
+#undef _
+
+}
+
uword
-dpdk_input_rss (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f);
+admin_up_down_process (vlib_main_t * vm,
+ vlib_node_runtime_t * rt,
+ vlib_frame_t * f);
#endif /* __included_dpdk_h__ */