Allow DPDK per interface startup config to enable/disable VLAN stripping
[vpp.git] / vnet / vnet / devices / dpdk / dpdk.h
index 48c1772..05f74b8 100644 (file)
@@ -24,7 +24,6 @@
 #include <rte_dev.h>
 #include <rte_log.h>
 #include <rte_memory.h>
-#include <rte_memcpy.h>
 #include <rte_memzone.h>
 #include <rte_tailq.h>
 #include <rte_eal.h>
@@ -51,6 +50,7 @@
 #include <rte_virtio_net.h>
 #include <rte_pci_dev_ids.h>
 #include <rte_version.h>
+#include <rte_eth_bond.h>
 
 #include <vnet/unix/pcap.h>
 #include <vnet/devices/virtio/vhost-user.h>
 #define always_inline static inline __attribute__ ((__always_inline__))
 #endif
 
-#define MBUF_SIZE (2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM)
-#define NB_MBUF   (32<<10)
+#include <vlib/pci/pci.h>
 
-vnet_device_class_t dpdk_device_class;
-vlib_node_registration_t dpdk_input_node;
-vlib_node_registration_t dpdk_io_input_node;
-vlib_node_registration_t handoff_dispatch_node;
+#define NB_MBUF   (16<<10)
+
+extern vnet_device_class_t dpdk_device_class;
+extern vlib_node_registration_t dpdk_input_node;
+extern vlib_node_registration_t handoff_dispatch_node;
 
 typedef enum {
   VNET_DPDK_DEV_ETH = 1,      /* Standard DPDK PMD driver */
@@ -86,21 +86,19 @@ typedef enum {
   _ ("rte_i40e_pmd", I40E)        \
   _ ("rte_i40evf_pmd", I40EVF)    \
   _ ("rte_virtio_pmd", VIRTIO)    \
-  _ ("rte_vice_pmd", VICE)        \
   _ ("rte_enic_pmd", ENIC)        \
   _ ("rte_vmxnet3_pmd", VMXNET3)  \
   _ ("AF_PACKET PMD", AF_PACKET)  \
+  _ ("rte_bond_pmd", BOND)        \
   _ ("rte_pmd_fm10k", FM10K)      \
-  _ ("rte_cxgbe_pmd", CXGBE)
+  _ ("rte_cxgbe_pmd", CXGBE)      \
+  _ ("rte_dpaa2_dpni", DPAA2)
 
 typedef enum {
   VNET_DPDK_PMD_NONE,
 #define _(s,f) VNET_DPDK_PMD_##f,
   foreach_dpdk_pmd
 #undef _
-#ifdef NETMAP
-  VNET_DPDK_PMD_NETMAP,
-#endif
   VNET_DPDK_PMD_UNKNOWN, /* must be last */
 } dpdk_pmd_t;
 
@@ -108,10 +106,8 @@ typedef enum {
   VNET_DPDK_PORT_TYPE_ETH_1G,
   VNET_DPDK_PORT_TYPE_ETH_10G,
   VNET_DPDK_PORT_TYPE_ETH_40G,
+  VNET_DPDK_PORT_TYPE_ETH_BOND,
   VNET_DPDK_PORT_TYPE_ETH_SWITCH,
-#ifdef NETMAP
-  VNET_DPDK_PORT_TYPE_NETMAP,
-#endif
   VNET_DPDK_PORT_TYPE_AF_PACKET,
   VNET_DPDK_PORT_TYPE_UNKNOWN,
 } dpdk_port_type_t;
@@ -139,12 +135,12 @@ typedef struct {
   int callfd;
   int kickfd;
   int errfd;
-#if RTE_VERSION >= RTE_VERSION_NUM(2, 2, 0, 0)
   int enabled;
-#endif
   u32 callfd_idx;
   u32 n_since_last_int;
   f64 int_deadline;
+  u64 packets;
+  u64 bytes;
 } dpdk_vu_vring;
 
 typedef struct {
@@ -159,13 +155,10 @@ typedef struct {
 
   u64 feature_mask;
   u32 num_vrings;
-#if RTE_VERSION >= RTE_VERSION_NUM(2, 2, 0, 0)
   dpdk_vu_vring vrings[VHOST_MAX_QUEUE_PAIRS * 2];
-#else
-  dpdk_vu_vring vrings[2];
-#endif
   u64 region_addr[VHOST_MEMORY_MAX_NREGIONS];
   u32 region_fd[VHOST_MEMORY_MAX_NREGIONS];
+  u64 region_offset[VHOST_MEMORY_MAX_NREGIONS];
 } dpdk_vu_intf_t;
 
 typedef void (*dpdk_flowcontrol_callback_t) (vlib_main_t *vm,
@@ -205,6 +198,9 @@ typedef struct {
   /* per-worker destination frame queue */
   dpdk_frame_t * frames;
 
+  /* number of sub-interfaces */
+  u16 vlan_subifs;
+
   dpdk_device_type_t dev_type:8;
   dpdk_pmd_t pmd:8;
   i8 cpu_socket;
@@ -241,30 +237,24 @@ typedef struct {
 
   struct rte_eth_stats stats;
   struct rte_eth_stats last_stats;
+  struct rte_eth_stats last_cleared_stats;
   struct rte_eth_xstats * xstats;
+  struct rte_eth_xstats * last_cleared_xstats;
   f64 time_last_stats_update;
   dpdk_port_type_t port_type;
 
   dpdk_efd_agent_t efd_agent;
+  u8 need_txlock; /* Used by VNET_DPDK_DEV_VHOST_USER */
 } dpdk_device_t;
 
-#define MAX_NELTS 32
-typedef struct {
-  CLIB_CACHE_LINE_ALIGN_MARK(cacheline0);
-  u64 head;
-  u64 head_hint;
-  u64 tail;
-  u32 n_in_use;
-  u32 nelts;
-  u32 written;
-  u32 threshold;
-  i32 n_vectors[MAX_NELTS];
-} frame_queue_trace_t;
 
 #define DPDK_TX_RING_SIZE (4 * 1024)
 
-#define DPDK_STATS_POLL_INTERVAL  10.0
-#define DPDK_LINK_POLL_INTERVAL   3.0
+#define DPDK_STATS_POLL_INTERVAL      (10.0)
+#define DPDK_MIN_STATS_POLL_INTERVAL  (0.001) /* 1msec */
+
+#define DPDK_LINK_POLL_INTERVAL       (3.0)
+#define DPDK_MIN_LINK_POLL_INTERVAL   (0.001) /* 1msec */
 
 typedef struct {
   CLIB_CACHE_LINE_ALIGN_MARK(cacheline0);
@@ -294,6 +284,66 @@ typedef struct dpdk_efd_t {
   u16 pad;
 } dpdk_efd_t;
 
+#define foreach_dpdk_device_config_item \
+  _ (num_rx_queues) \
+  _ (num_tx_queues) \
+  _ (num_rx_desc) \
+  _ (num_tx_desc) \
+  _ (rss_fn)
+
+typedef struct {
+    vlib_pci_addr_t pci_addr;
+    u8 is_blacklisted;
+    u8 vlan_strip_offload;
+#define DPDK_DEVICE_VLAN_STRIP_DEFAULT 0
+#define DPDK_DEVICE_VLAN_STRIP_OFF 1
+#define DPDK_DEVICE_VLAN_STRIP_ON  2
+
+#define _(x) uword x;
+    foreach_dpdk_device_config_item
+#undef _
+    clib_bitmap_t * workers;
+} dpdk_device_config_t;
+
+typedef struct {
+
+  /* Config stuff */
+  u8 ** eal_init_args;
+  u8 * eal_init_args_str;
+  u8 * uio_driver_name;
+  u8 no_multi_seg;
+  u8 enable_tcp_udp_checksum;
+
+  /* Required config parameters */
+  u8 coremask_set_manually;
+  u8 nchannels_set_manually;
+  u32 coremask;
+  u32 nchannels;
+  u32 num_mbufs;
+  u8 num_kni;/* while kni_init allows u32, port_id in callback fn is only u8 */
+
+  /*
+   * format interface names ala xxxEthernet%d/%d/%d instead of
+   * xxxEthernet%x/%x/%x.
+   */
+  u8 interface_name_format_decimal;
+
+  /* virtio vhost-user switch */
+  u8 use_virtio_vhost;
+
+  /* vhost-user coalescence frames config */
+  u32 vhost_coalesce_frames;
+  f64 vhost_coalesce_time;
+
+  /* per-device config */
+  dpdk_device_config_t default_devconf;
+  dpdk_device_config_t * dev_confs;
+  uword * device_config_index_by_pci_addr;
+
+} dpdk_config_main_t;
+
+dpdk_config_main_t dpdk_config_main;
+
 typedef struct {
 
   /* Devices */
@@ -312,40 +362,13 @@ typedef struct {
   /* vlib buffer free list, must be same size as an rte_mbuf */
   u32 vlib_buffer_free_list_index;
 
-  /*
-   * format interface names ala xxxEthernet%d/%d/%d instead of
-   * xxxEthernet%x/%x/%x. For VIRL.
-   */
-  u8 interface_name_format_decimal;
-
-
   /* dpdk worker "threads" */
   dpdk_worker_t * workers;
 
-  /* Config stuff */
-  u8 ** eal_init_args;
-  u8 * eal_init_args_str;
-  u8 * eth_if_blacklist;
-  u8 * eth_if_whitelist;
-  u8 * uio_driver_name;
-  u8 no_multi_seg;
-
-  /* Required config parameters */
-  u8 coremask_set_manually;
-  u8 nchannels_set_manually;
-  u32 coremask;
-  u32 nchannels;
-  u32 num_mbufs;
-  u32 use_rss;
-  u32 max_tx_queues;
-  u8 num_kni; /* while kni_init allows u32, port_id in callback fn is only u8 */
 
   /* Ethernet input node index */
   u32 ethernet_input_node_index;
 
-  /* dpdk i/o thread initialization barrier */
-  volatile u32 io_thread_release;
-
   /* pcap tracing [only works if (CLIB_DEBUG > 0)] */
   int tx_pcap_enable;
   pcap_main_t pcap_main;
@@ -353,13 +376,6 @@ typedef struct {
   u32 pcap_sw_if_index;
   u32 pcap_pkts_to_capture;
 
-  /* virtio vhost-user switch */
-  u8 use_virtio_vhost;
-
-  /* vhost-user coalescence frames config */
-  u32 vhost_coalesce_frames;
-  f64 vhost_coalesce_time;
-
   /* hashes */
   uword * dpdk_device_by_kni_port_id;
   uword * vu_sw_if_index_by_listener_fd;
@@ -377,15 +393,23 @@ typedef struct {
    */
   u8 admin_up_down_in_progress;
 
-  u8 have_io_threads;
+  u8 use_rss;
 
   /* which cpus are running dpdk-input */
   int input_cpu_first_index;
   int input_cpu_count;
 
+  /* control interval of dpdk link state and stat polling */
+  f64 link_state_poll_interval;
+  f64 stat_poll_interval;
+
+  /* Sleep for this many MS after each device poll */
+  u32 poll_sleep;
+
   /* convenience */
   vlib_main_t * vlib_main;
   vnet_main_t * vnet_main;
+  dpdk_config_main_t * conf;
 } dpdk_main_t;
 
 dpdk_main_t dpdk_main;
@@ -399,17 +423,33 @@ typedef enum {
   DPDK_RX_N_NEXT,
 } dpdk_rx_next_t;
 
+typedef struct {
+  u32 buffer_index;
+  u16 device_index;
+  u8 queue_index;
+  struct rte_mbuf mb;
+  /* Copy of VLIB buffer; packet data stored in pre_data. */
+  vlib_buffer_t buffer;
+} dpdk_tx_dma_trace_t;
+
+typedef struct {
+  u32 buffer_index;
+  u16 device_index;
+  u16 queue_index;
+  struct rte_mbuf mb;
+  vlib_buffer_t buffer; /* Copy of VLIB buffer; pkt data stored in pre_data. */
+  u8 data[256];         /* First 256 data bytes, used for hexdump */
+} dpdk_rx_dma_trace_t;
+
 void vnet_buffer_needs_dpdk_mb (vlib_buffer_t * b);
 
 void dpdk_set_next_node (dpdk_rx_next_t, char *);
 
-typedef void (*dpdk_io_thread_callback_t) (vlib_main_t *vm);
+clib_error_t * dpdk_set_mac_address (vnet_hw_interface_t * hi, char * address);
+
+clib_error_t * dpdk_set_mc_filter (vnet_hw_interface_t * hi,
+                                   struct ether_addr mc_addr_vec[], int naddr);
 
-void dpdk_io_thread (vlib_worker_thread_t * w,
-                     u32 instances,
-                     u32 instance_id,
-                     char *worker_name,
-                     dpdk_io_thread_callback_t callback);
 void dpdk_thread_input (dpdk_main_t * dm, dpdk_device_t * xd);
 
 clib_error_t * dpdk_port_setup (dpdk_main_t * dm, dpdk_device_t * xd);
@@ -419,12 +459,11 @@ void dpdk_set_flowcontrol_callback (vlib_main_t *vm,
 
 u32 dpdk_interface_tx_vector (vlib_main_t * vm, u32 dev_instance);
 
-vlib_frame_queue_elt_t * vlib_get_handoff_queue_elt (u32 vlib_worker_index);
-
-u32 dpdk_get_handoff_node_index (void);
-
 void set_efd_bitmap (u8 *bitmap, u32 value, u32 op);
 
+struct rte_mbuf * dpdk_replicate_packet_mb (vlib_buffer_t * b);
+struct rte_mbuf * dpdk_zerocopy_replicate_packet_mb (vlib_buffer_t * b);
+
 #define foreach_dpdk_error                                             \
   _(NONE, "no error")                                                  \
   _(RX_PACKET_ERROR, "Rx packet errors")                               \
@@ -458,6 +497,8 @@ void increment_efd_drop_counter (vlib_main_t * vm, u32 counter_index, u32 count)
    vm->error_main.counters[my_n->error_heap_index+counter_index] += count;
 }
 
+int dpdk_set_stat_poll_interval (f64 interval);
+int dpdk_set_link_state_poll_interval (f64 interval);
 void dpdk_update_link_state (dpdk_device_t * xd, f64 now);
 void dpdk_device_lock_init(dpdk_device_t * xd);
 void dpdk_device_lock_free(dpdk_device_t * xd);
@@ -528,7 +569,118 @@ int dpdk_vhost_user_dump_ifs (vnet_main_t * vnm, vlib_main_t * vm,
 
 u32 dpdk_get_admin_up_down_in_progress (void);
 
+u32 dpdk_num_mbufs (void);
+
+dpdk_pmd_t dpdk_get_pmd_type (vnet_hw_interface_t *hi);
+
+i8 dpdk_get_cpu_socket (vnet_hw_interface_t *hi);
+
+void * dpdk_input_multiarch_select();
+void * dpdk_input_rss_multiarch_select();
+void * dpdk_input_efd_multiarch_select();
+
+clib_error_t*
+dpdk_get_hw_interface_stats (u32 hw_if_index, struct rte_eth_stats* dest);
+
+format_function_t format_dpdk_device_name;
+format_function_t format_dpdk_device;
+format_function_t format_dpdk_tx_dma_trace;
+format_function_t format_dpdk_rx_dma_trace;
+format_function_t format_dpdk_rte_mbuf;
+format_function_t format_dpdk_rx_rte_mbuf;
+unformat_function_t unformat_socket_mem;
+clib_error_t * unformat_rss_fn(unformat_input_t * input, uword * rss_fn);
+
+
+static inline void
+dpdk_pmd_constructor_init()
+{
+  /* Add references to DPDK Driver Constructor functions to get the dynamic
+   * loader to pull in the driver library & run the constructors.
+   */
+#define _(d)                                            \
+  do {                                                  \
+    void devinitfn_ ##d(void);                          \
+    __attribute__((unused)) void (* volatile pf)(void); \
+    pf = devinitfn_ ##d;                                \
+  } while(0);
+
+#ifdef RTE_LIBRTE_EM_PMD
+  _(em_pmd_drv)
+#endif
+
+#ifdef RTE_LIBRTE_IGB_PMD
+  _(pmd_igb_drv)
+#endif
+
+#ifdef RTE_LIBRTE_IXGBE_PMD
+  _(rte_ixgbe_driver)
+#endif
+
+#ifdef RTE_LIBRTE_I40E_PMD
+  _(rte_i40e_driver)
+  _(rte_i40evf_driver)
+#endif
+
+#ifdef RTE_LIBRTE_FM10K_PMD
+  _(rte_fm10k_driver)
+#endif
+
+#ifdef RTE_LIBRTE_VIRTIO_PMD
+  _(rte_virtio_driver)
+#endif
+
+#ifdef RTE_LIBRTE_VMXNET3_PMD
+  _(rte_vmxnet3_driver)
+#endif
+
+#ifdef RTE_LIBRTE_VICE_PMD
+  _(rte_vice_driver)
+#endif
+
+#ifdef RTE_LIBRTE_ENIC_PMD
+  _(rte_enic_driver)
+#endif
+
+#ifdef RTE_LIBRTE_PMD_AF_PACKET
+  _(pmd_af_packet_drv)
+#endif
+
+#ifdef RTE_LIBRTE_CXGBE_PMD
+  _(rte_cxgbe_driver)
+#endif
+
+#ifdef RTE_LIBRTE_PMD_BOND
+  _(bond_drv)
+#endif
+
+#ifdef RTE_LIBRTE_DPAA2_PMD
+  _(pmd_dpaa2_drv)
+#endif
+
+#undef _
+
+/*
+ * At the moment, the ThunderX NIC driver doesn't have
+ * an entry point named "devinitfn_rte_xxx_driver"
+ */
+#define _(d)                                          \
+  do {                                                  \
+    void d(void);                                            \
+    __attribute__((unused)) void (* volatile pf)(void); \
+    pf = d;                                          \
+  } while(0);
+
+#ifdef RTE_LIBRTE_THUNDERVNIC_PMD
+  _(rte_nicvf_pmd_init)
+#endif
+#undef _
+
+}
+
 uword
-dpdk_input_rss (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f);
+admin_up_down_process (vlib_main_t * vm,
+                       vlib_node_runtime_t * rt,
+                       vlib_frame_t * f);
 
 #endif /* __included_dpdk_h__ */