#include <rte_dev.h>
#include <rte_log.h>
#include <rte_memory.h>
-#include <rte_memcpy.h>
#include <rte_memzone.h>
#include <rte_tailq.h>
#include <rte_eal.h>
#include <rte_ring.h>
#include <rte_mempool.h>
#include <rte_mbuf.h>
+#ifdef RTE_LIBRTE_KNI
#include <rte_kni.h>
+#endif
#include <rte_virtio_net.h>
#include <rte_pci_dev_ids.h>
#include <rte_version.h>
+#include <rte_eth_bond.h>
#include <vnet/unix/pcap.h>
#include <vnet/devices/virtio/vhost-user.h>
#define always_inline static inline __attribute__ ((__always_inline__))
#endif
-#define MBUF_SIZE (2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM)
#define NB_MBUF (32<<10)
-vnet_device_class_t dpdk_device_class;
-vlib_node_registration_t dpdk_input_node;
-vlib_node_registration_t dpdk_io_input_node;
-vlib_node_registration_t handoff_dispatch_node;
+extern vnet_device_class_t dpdk_device_class;
+extern vlib_node_registration_t dpdk_input_node;
+extern vlib_node_registration_t dpdk_io_input_node;
+extern vlib_node_registration_t handoff_dispatch_node;
typedef enum {
VNET_DPDK_DEV_ETH = 1, /* Standard DPDK PMD driver */
_ ("rte_enic_pmd", ENIC) \
_ ("rte_vmxnet3_pmd", VMXNET3) \
_ ("AF_PACKET PMD", AF_PACKET) \
+ _ ("rte_bond_pmd", BOND) \
_ ("rte_pmd_fm10k", FM10K) \
_ ("rte_cxgbe_pmd", CXGBE)
VNET_DPDK_PORT_TYPE_ETH_1G,
VNET_DPDK_PORT_TYPE_ETH_10G,
VNET_DPDK_PORT_TYPE_ETH_40G,
+ VNET_DPDK_PORT_TYPE_ETH_BOND,
VNET_DPDK_PORT_TYPE_ETH_SWITCH,
#ifdef NETMAP
VNET_DPDK_PORT_TYPE_NETMAP,
int callfd;
int kickfd;
int errfd;
+#if RTE_VERSION >= RTE_VERSION_NUM(2, 2, 0, 0)
+ int enabled;
+#endif
u32 callfd_idx;
u32 n_since_last_int;
f64 int_deadline;
+ u64 packets;
+ u64 bytes;
} dpdk_vu_vring;
typedef struct {
u64 feature_mask;
u32 num_vrings;
+#if RTE_VERSION >= RTE_VERSION_NUM(2, 2, 0, 0)
+ dpdk_vu_vring vrings[VHOST_MAX_QUEUE_PAIRS * 2];
+#else
dpdk_vu_vring vrings[2];
+#endif
u64 region_addr[VHOST_MEMORY_MAX_NREGIONS];
u32 region_fd[VHOST_MEMORY_MAX_NREGIONS];
} dpdk_vu_intf_t;
typedef struct {
CLIB_CACHE_LINE_ALIGN_MARK(cacheline0);
- volatile u32 *lockp;
+ volatile u32 **lockp;
/* Instance ID */
u32 device_index;
struct rte_eth_stats stats;
struct rte_eth_stats last_stats;
+ struct rte_eth_stats last_cleared_stats;
struct rte_eth_xstats * xstats;
+ struct rte_eth_xstats * last_cleared_xstats;
f64 time_last_stats_update;
dpdk_port_type_t port_type;
dpdk_efd_agent_t efd_agent;
+ u8 need_txlock; /* Used by VNET_DPDK_DEV_VHOST_USER */
} dpdk_device_t;
#define MAX_NELTS 32
i32 n_vectors[MAX_NELTS];
} frame_queue_trace_t;
+typedef struct {
+ u64 count[MAX_NELTS];
+} frame_queue_nelt_counter_t;
+
#define DPDK_TX_RING_SIZE (4 * 1024)
-#define DPDK_STATS_POLL_INTERVAL 10.0
-#define DPDK_LINK_POLL_INTERVAL 3.0
+#define DPDK_STATS_POLL_INTERVAL (10.0)
+#define DPDK_MIN_STATS_POLL_INTERVAL (0.001) /* 1msec */
+
+#define DPDK_LINK_POLL_INTERVAL (3.0)
+#define DPDK_MIN_LINK_POLL_INTERVAL (0.001) /* 1msec */
typedef struct {
CLIB_CACHE_LINE_ALIGN_MARK(cacheline0);
/* per-thread recycle lists */
u32 ** recycle;
+ /* buffer flags template, configurable to enable/disable tcp / udp cksum */
+ u32 buffer_flags_template;
+
/* flow control callback. If 0 then flow control is disabled */
dpdk_flowcontrol_callback_t flowcontrol_callback;
/* Config stuff */
u8 ** eal_init_args;
+ u8 * eal_init_args_str;
u8 * eth_if_blacklist;
u8 * eth_if_whitelist;
u8 * uio_driver_name;
u32 nchannels;
u32 num_mbufs;
u32 use_rss;
+ u32 max_tx_queues;
u8 num_kni; /* while kni_init allows u32, port_id in callback fn is only u8 */
/* Ethernet input node index */
int input_cpu_first_index;
int input_cpu_count;
+ /* control interval of dpdk link state and stat polling */
+ f64 link_state_poll_interval;
+ f64 stat_poll_interval;
+
+ /* for frame queue tracing */
+ frame_queue_trace_t *frame_queue_traces;
+ frame_queue_nelt_counter_t *frame_queue_histogram;
+
/* convenience */
vlib_main_t * vlib_main;
vnet_main_t * vnet_main;
DPDK_RX_N_NEXT,
} dpdk_rx_next_t;
+typedef struct {
+ u32 buffer_index;
+ u16 device_index;
+ u8 queue_index;
+ struct rte_mbuf mb;
+ /* Copy of VLIB buffer; packet data stored in pre_data. */
+ vlib_buffer_t buffer;
+} dpdk_tx_dma_trace_t;
+
+typedef struct {
+ u32 buffer_index;
+ u16 device_index;
+ u16 queue_index;
+ struct rte_mbuf mb;
+ vlib_buffer_t buffer; /* Copy of VLIB buffer; pkt data stored in pre_data. */
+} dpdk_rx_dma_trace_t;
+
void vnet_buffer_needs_dpdk_mb (vlib_buffer_t * b);
void dpdk_set_next_node (dpdk_rx_next_t, char *);
+clib_error_t * dpdk_set_mac_address (vnet_hw_interface_t * hi, char * address);
+
+clib_error_t * dpdk_set_mc_filter (vnet_hw_interface_t * hi,
+ struct ether_addr mc_addr_vec[], int naddr);
+
typedef void (*dpdk_io_thread_callback_t) (vlib_main_t *vm);
void dpdk_io_thread (vlib_worker_thread_t * w,
void set_efd_bitmap (u8 *bitmap, u32 value, u32 op);
+struct rte_mbuf * dpdk_replicate_packet_mb (vlib_buffer_t * b);
+struct rte_mbuf * dpdk_zerocopy_replicate_packet_mb (vlib_buffer_t * b);
+
#define foreach_dpdk_error \
_(NONE, "no error") \
_(RX_PACKET_ERROR, "Rx packet errors") \
vm->error_main.counters[my_n->error_heap_index+counter_index] += count;
}
+int dpdk_set_stat_poll_interval (f64 interval);
+int dpdk_set_link_state_poll_interval (f64 interval);
void dpdk_update_link_state (dpdk_device_t * xd, f64 now);
+void dpdk_device_lock_init(dpdk_device_t * xd);
+void dpdk_device_lock_free(dpdk_device_t * xd);
void dpdk_efd_update_counters(dpdk_device_t *xd, u32 n_buffers, u16 enabled);
u32 is_efd_discardable(vlib_thread_main_t *tm,
vlib_buffer_t * b0,
u8 is_server,
u32 * sw_if_index,
u64 feature_mask,
- u8 renumber, u32 custom_dev_instance);
+ u8 renumber, u32 custom_dev_instance,
+ u8 *hwaddr);
int dpdk_vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm,
const char * sock_filename,
u8 is_server,
u32 dpdk_get_admin_up_down_in_progress (void);
-uword
-dpdk_input_rss (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f);
+u32 dpdk_num_mbufs (void);
+
+int dpdk_io_thread_release (void);
+
+dpdk_pmd_t dpdk_get_pmd_type (vnet_hw_interface_t *hi);
+
+i8 dpdk_get_cpu_socket (vnet_hw_interface_t *hi);
+
+void * dpdk_input_multiarch_select();
+void * dpdk_input_rss_multiarch_select();
+void * dpdk_input_efd_multiarch_select();
+
+clib_error_t*
+dpdk_get_hw_interface_stats (u32 hw_if_index, struct rte_eth_stats* dest);
+
+format_function_t format_dpdk_device_name;
+format_function_t format_dpdk_device;
+format_function_t format_dpdk_tx_dma_trace;
+format_function_t format_dpdk_rx_dma_trace;
+format_function_t format_dpdk_rte_mbuf;
+format_function_t format_dpdk_rx_rte_mbuf;
+unformat_function_t unformat_socket_mem;
+
+
+static inline void
+dpdk_pmd_constructor_init()
+{
+ /* Add references to DPDK Driver Constructor functions to get the dynamic
+ * loader to pull in the driver library & run the constructors.
+ */
+#define _(d) \
+ do { \
+ void devinitfn_ ##d(void); \
+ __attribute__((unused)) void (* volatile pf)(void); \
+ pf = devinitfn_ ##d; \
+ } while(0);
+
+#ifdef RTE_LIBRTE_EM_PMD
+ _(em_pmd_drv)
+#endif
+
+#ifdef RTE_LIBRTE_IGB_PMD
+ _(pmd_igb_drv)
+#endif
+
+#ifdef RTE_LIBRTE_IXGBE_PMD
+ _(rte_ixgbe_driver)
+#endif
+
+#ifdef RTE_LIBRTE_I40E_PMD
+ _(rte_i40e_driver)
+ _(rte_i40evf_driver)
+#endif
+
+#ifdef RTE_LIBRTE_FM10K_PMD
+ _(rte_fm10k_driver)
+#endif
+
+#ifdef RTE_LIBRTE_VIRTIO_PMD
+ _(rte_virtio_driver)
+#endif
+
+#ifdef RTE_LIBRTE_VMXNET3_PMD
+ _(rte_vmxnet3_driver)
+#endif
+
+#ifdef RTE_LIBRTE_VICE_PMD
+ _(rte_vice_driver)
+#endif
+
+#ifdef RTE_LIBRTE_ENIC_PMD
+ _(rte_enic_driver)
+#endif
+
+#ifdef RTE_LIBRTE_PMD_AF_PACKET
+ _(pmd_af_packet_drv)
+#endif
+
+#ifdef RTE_LIBRTE_CXGBE_PMD
+ _(rte_cxgbe_driver)
+#endif
+
+#ifdef RTE_LIBRTE_PMD_BOND
+ _(bond_drv)
+#endif
+
+#undef _
+
+/*
+ * At the moment, the ThunderX NIC driver doesn't have
+ * an entry point named "devinitfn_rte_xxx_driver"
+ */
+#define _(d) \
+ do { \
+ void d(void); \
+ __attribute__((unused)) void (* volatile pf)(void); \
+ pf = d; \
+ } while(0);
+
+#ifdef RTE_LIBRTE_THUNDERVNIC_PMD
+ _(rte_nicvf_pmd_init)
+#endif
+#undef _
+
+}
#endif /* __included_dpdk_h__ */