vhost: Improve data path
[vpp.git] / vnet / vnet / devices / virtio / vhost-user.h
index d6d5573..3083b61 100644 (file)
 #define VHOST_MEMORY_MAX_NREGIONS       8
 #define VHOST_USER_MSG_HDR_SZ           12
 #define VHOST_VRING_MAX_SIZE            32768
-#define VHOST_NET_VRING_IDX_RX          0
-#define VHOST_NET_VRING_IDX_TX          1
-#define VHOST_NET_VRING_NUM             2
+#define VHOST_VRING_MAX_N               16     //8TX + 8RX
+#define VHOST_VRING_IDX_RX(qid)         (2*qid)
+#define VHOST_VRING_IDX_TX(qid)         (2*qid + 1)
 
 #define VIRTQ_DESC_F_NEXT               1
+#define VIRTQ_DESC_F_INDIRECT           4
 #define VHOST_USER_REPLY_MASK       (0x1 << 2)
 
 #define VHOST_USER_PROTOCOL_F_MQ   0
 #define VHOST_USER_PROTOCOL_F_LOG_SHMFD        1
 #define VHOST_VRING_F_LOG 0
 
-#if RTE_VERSION >= RTE_VERSION_NUM(2, 2, 0, 0)
 #define VHOST_USER_F_PROTOCOL_FEATURES  30
-#define VHOST_USER_PROTOCOL_FEATURES   (1ULL << VHOST_USER_PROTOCOL_F_MQ)
+#define VHOST_USER_PROTOCOL_FEATURES   ((1ULL << VHOST_USER_PROTOCOL_F_MQ) |   \
+                                       (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD))
 
 /* If multiqueue is provided by host, then we suppport it. */
 #define VIRTIO_NET_CTRL_MQ   4
 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET        0
 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN        1
 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX        0x8000
-#endif
+
+#define VRING_USED_F_NO_NOTIFY  1
 
 #define foreach_virtio_net_feature      \
  _ (VIRTIO_NET_F_MRG_RXBUF, 15)         \
- _ (VIRTIO_F_ANY_LAYOUT, 27)            \
- _ (VHOST_F_LOG_ALL, 26)                \
+ _ (VIRTIO_NET_F_CTRL_VQ, 17)           \
  _ (VIRTIO_NET_F_GUEST_ANNOUNCE, 21)    \
- _ (VHOST_USER_F_PROTOCOL_FEATURES, 30)
+ _ (VIRTIO_NET_F_MQ, 22)                \
+ _ (VHOST_F_LOG_ALL, 26)                \
+ _ (VIRTIO_F_ANY_LAYOUT, 27)            \
+ _ (VIRTIO_F_INDIRECT_DESC, 28)         \
+ _ (VHOST_USER_F_PROTOCOL_FEATURES, 30) \
+ _ (VIRTIO_F_VERSION_1, 32)
 
 
-typedef enum {
+typedef enum
+{
 #define _(f,n) FEAT_##f = (n),
   foreach_virtio_net_feature
 #undef _
 } virtio_net_feature_t;
 
-int vhost_user_create_if(vnet_main_t * vnm, vlib_main_t * vm, 
-    const char * sock_filename, u8 is_server,
-    u32 * sw_if_index, u64 feature_mask,
-    u8 renumber, u32 custom_dev_instance, u8 *hwaddr);
-int vhost_user_modify_if(vnet_main_t * vnm, vlib_main_t * vm,
-    const char * sock_filename, u8 is_server,
-    u32 sw_if_index, u64 feature_mask,
-    u8 renumber, u32 custom_dev_instance);
-int vhost_user_delete_if(vnet_main_t * vnm, vlib_main_t * vm, u32 sw_if_index);
-
-typedef struct vhost_user_memory_region {
+int vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm,
+                         const char *sock_filename, u8 is_server,
+                         u32 * sw_if_index, u64 feature_mask,
+                         u8 renumber, u32 custom_dev_instance, u8 * hwaddr);
+int vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm,
+                         const char *sock_filename, u8 is_server,
+                         u32 sw_if_index, u64 feature_mask,
+                         u8 renumber, u32 custom_dev_instance);
+int vhost_user_delete_if (vnet_main_t * vnm, vlib_main_t * vm,
+                         u32 sw_if_index);
+
+/* *INDENT-OFF* */
+typedef struct vhost_user_memory_region
+{
   u64 guest_phys_addr;
   u64 memory_size;
   u64 userspace_addr;
   u64 mmap_offset;
-} vhost_user_memory_region_t;
+} __attribute ((packed)) vhost_user_memory_region_t;
 
-typedef struct vhost_user_memory {
+typedef struct vhost_user_memory
+{
   u32 nregions;
   u32 padding;
   vhost_user_memory_region_t regions[VHOST_MEMORY_MAX_NREGIONS];
-} vhost_user_memory_t;
+} __attribute ((packed)) vhost_user_memory_t;
 
-typedef struct {
-  unsigned int index, num;
-} vhost_vring_state_t;
+typedef struct
+{
+  u32 index, num;
+} __attribute ((packed)) vhost_vring_state_t;
 
-typedef struct {
-  unsigned int index, flags;
+typedef struct
+{
+  u32 index, flags;
   u64 desc_user_addr, used_user_addr, avail_user_addr, log_guest_addr;
-} vhost_vring_addr_t;
+} __attribute ((packed)) vhost_vring_addr_t;
 
-typedef struct vhost_user_log {
+typedef struct vhost_user_log
+{
   u64 size;
   u64 offset;
-} vhost_user_log_t;
+} __attribute ((packed)) vhost_user_log_t;
 
-typedef enum vhost_user_req {
+typedef enum vhost_user_req
+{
   VHOST_USER_NONE = 0,
   VHOST_USER_GET_FEATURES = 1,
   VHOST_USER_SET_FEATURES = 2,
@@ -110,37 +125,40 @@ typedef enum vhost_user_req {
   VHOST_USER_SET_VRING_ERR = 14,
   VHOST_USER_GET_PROTOCOL_FEATURES = 15,
   VHOST_USER_SET_PROTOCOL_FEATURES = 16,
-#if RTE_VERSION >= RTE_VERSION_NUM(2, 2, 0, 0)
   VHOST_USER_GET_QUEUE_NUM = 17,
-#endif
   VHOST_USER_SET_VRING_ENABLE = 18,
   VHOST_USER_MAX
 } vhost_user_req_t;
 
 // vring_desc I/O buffer descriptor
-typedef struct {
+typedef struct
+{
   uint64_t addr;  // packet data buffer address
   uint32_t len;   // packet data buffer size
   uint16_t flags; // (see below)
   uint16_t next;  // optional index next descriptor in chain
 } __attribute ((packed)) vring_desc_t;
 
-typedef struct {
+typedef struct
+{
   uint16_t flags;
-  uint16_t idx;
+  volatile uint16_t idx;
   uint16_t ring[VHOST_VRING_MAX_SIZE];
 } __attribute ((packed)) vring_avail_t;
 
-typedef struct {
+typedef struct
+{
   uint16_t flags;
   uint16_t idx;
-  struct /* vring_used_elem */ {
-    uint32_t id; 
-    uint32_t len; 
-  } ring[VHOST_VRING_MAX_SIZE];
+  struct /* vring_used_elem */
+    {
+      uint32_t id;
+      uint32_t len;
+    } ring[VHOST_VRING_MAX_SIZE];
 } __attribute ((packed)) vring_used_t;
 
-typedef struct {
+typedef struct
+{
   u8 flags;
   u8 gso_type;
   u16 hdr_len;
@@ -155,107 +173,178 @@ typedef struct  {
 } __attribute ((packed)) virtio_net_hdr_mrg_rxbuf_t;
 
 typedef struct vhost_user_msg {
-    vhost_user_req_t request;
-    u32 flags;
-    u32 size;
-    union {
-        u64 u64;
-        vhost_vring_state_t state;
-        vhost_vring_addr_t addr;
-        vhost_user_memory_t memory;
-        vhost_user_log_t log;
+  vhost_user_req_t request;
+  u32 flags;
+  u32 size;
+  union
+    {
+      u64 u64;
+      vhost_vring_state_t state;
+      vhost_vring_addr_t addr;
+      vhost_user_memory_t memory;
+      vhost_user_log_t log;
     };
 } __attribute ((packed)) vhost_user_msg_t;
+/* *INDENT-ON* */
 
-typedef struct {
-  u32 qsz;
+typedef struct
+{
+  CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+  u16 qsz;
   u16 last_avail_idx;
   u16 last_used_idx;
+  u16 n_since_last_int;
   vring_desc_t *desc;
   vring_avail_t *avail;
   vring_used_t *used;
-  u64 log_guest_addr;
-  int callfd;
-  int kickfd;
+  f64 int_deadline;
+  u8 started;
+  u8 enabled;
+  u8 log_used;
+  //Put non-runtime in a different cache line
+    CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
   int errfd;
-  u32 enabled;
-  u32 log_used;
   u32 callfd_idx;
-  u32 n_since_last_int;
-  f64 int_deadline;
+  u32 kickfd_idx;
+  u64 log_guest_addr;
 } vhost_user_vring_t;
 
-typedef struct {
-  CLIB_CACHE_LINE_ALIGN_MARK(cacheline0);
-  volatile u32 * lockp;
+typedef struct
+{
+  CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
   u32 is_up;
   u32 admin_up;
-  u32 unix_fd;
+  u32 unix_server_index;
   u32 unix_file_index;
-  u32 client_fd;
   char sock_filename[256];
   int sock_errno;
-  u8 sock_is_server;
   u32 hw_if_index, sw_if_index;
-  u8 active;
-  
-  u32 nregions;
+
+  //Feature negotiation
   u64 features;
   u64 feature_mask;
   u64 protocol_features;
-  u32 num_vrings;
+
+  //Memory region information
+  u32 nregions;
   vhost_user_memory_region_t regions[VHOST_MEMORY_MAX_NREGIONS];
-  void * region_mmap_addr[VHOST_MEMORY_MAX_NREGIONS];
+  void *region_mmap_addr[VHOST_MEMORY_MAX_NREGIONS];
+  u64 region_guest_addr_lo[VHOST_MEMORY_MAX_NREGIONS];
+  u64 region_guest_addr_hi[VHOST_MEMORY_MAX_NREGIONS];
   u32 region_mmap_fd[VHOST_MEMORY_MAX_NREGIONS];
-  vhost_user_vring_t vrings[2];
+
+  //Virtual rings
+  vhost_user_vring_t vrings[VHOST_VRING_MAX_N];
+  volatile u32 *vring_locks[VHOST_VRING_MAX_N];
+
   int virtio_net_hdr_sz;
   int is_any_layout;
-  u32 * d_trace_buffers;
 
-  void * log_base_addr;
+  void *log_base_addr;
   u64 log_size;
+
+  /* Whether to use spinlock or per_cpu_tx_qid assignment */
+  u8 use_tx_spinlock;
+  u16 *per_cpu_tx_qid;
+
+  /* Vector of workers for this interface */
+  u32 *workers;
 } vhost_user_intf_t;
 
-typedef struct {
-  u32 ** rx_buffers;
+typedef struct
+{
+  u16 vhost_iface_index;
+  u16 qid;
+} vhost_iface_and_queue_t;
+
+typedef struct
+{
+  uword dst;
+  uword src;
+  u32 len;
+} vhost_copy_t;
+
+typedef struct
+{
+  u16 qid; /** The interface queue index (Not the virtio vring idx) */
+  u16 device_index; /** The device index */
+  u32 virtio_ring_flags; /** Runtime queue flags  **/
+  u16 first_desc_len; /** Length of the first data descriptor **/
+  virtio_net_hdr_mrg_rxbuf_t hdr; /** Virtio header **/
+} vhost_trace_t;
+
+
+#define VHOST_USER_RX_BUFFERS_N (2 * VLIB_FRAME_SIZE + 2)
+#define VHOST_USER_COPY_ARRAY_N (4 * VLIB_FRAME_SIZE)
+
+typedef struct
+{
+  vhost_iface_and_queue_t *rx_queues;
+  u32 rx_buffers_len;
+  u32 rx_buffers[VHOST_USER_RX_BUFFERS_N];
+
+  virtio_net_hdr_mrg_rxbuf_t tx_headers[VLIB_FRAME_SIZE];
+  vhost_copy_t copy[VHOST_USER_COPY_ARRAY_N];
+
+  /* This is here so it doesn't end-up
+   * using stack or registers. */
+  vhost_trace_t *current_trace;
+} vhost_cpu_t;
+
+typedef struct
+{
   u32 mtu_bytes;
-  vhost_user_intf_t * vhost_user_interfaces;
-  u32 * vhost_user_inactive_interfaces_index;
-  uword * vhost_user_interface_index_by_listener_fd;
-  uword * vhost_user_interface_index_by_sock_fd;
-  uword * vhost_user_interface_index_by_sw_if_index;
-  u32 * show_dev_instance_by_real_dev_instance;
+  vhost_user_intf_t *vhost_user_interfaces;
+  u32 *show_dev_instance_by_real_dev_instance;
   u32 coalesce_frames;
   f64 coalesce_time;
   int dont_dump_vhost_user_memory;
+
+  /** first cpu index */
+  u32 input_cpu_first_index;
+
+  /** total cpu count */
+  u32 input_cpu_count;
+
+  /** Per-CPU data for vhost-user */
+  vhost_cpu_t *cpus;
+
+  /** Pseudo random iterator */
+  u32 random;
 } vhost_user_main_t;
 
-typedef struct {
-    u8 if_name[64];
-    u32 sw_if_index;
-    u32 virtio_net_hdr_sz;
-    u64 features;
-    u8 is_server;
-    u8 sock_filename[256];
-    u32 num_regions;
-    int sock_errno;
+typedef struct
+{
+  u8 if_name[64];
+  u32 sw_if_index;
+  u32 virtio_net_hdr_sz;
+  u64 features;
+  u8 is_server;
+  u8 sock_filename[256];
+  u32 num_regions;
+  int sock_errno;
 } vhost_user_intf_details_t;
 
-int vhost_user_dump_ifs(vnet_main_t * vnm, vlib_main_t * vm,
-        vhost_user_intf_details_t **out_vuids);
+int vhost_user_dump_ifs (vnet_main_t * vnm, vlib_main_t * vm,
+                        vhost_user_intf_details_t ** out_vuids);
 
 // CLI commands to be used from dpdk
-clib_error_t *
-vhost_user_connect_command_fn (vlib_main_t * vm,
-                 unformat_input_t * input,
-                 vlib_cli_command_t * cmd);
-clib_error_t *
-vhost_user_delete_command_fn (vlib_main_t * vm,
-                 unformat_input_t * input,
-                 vlib_cli_command_t * cmd);
-clib_error_t *
-show_vhost_user_command_fn (vlib_main_t * vm,
-                 unformat_input_t * input,
-                 vlib_cli_command_t * cmd);
+clib_error_t *vhost_user_connect_command_fn (vlib_main_t * vm,
+                                            unformat_input_t * input,
+                                            vlib_cli_command_t * cmd);
+clib_error_t *vhost_user_delete_command_fn (vlib_main_t * vm,
+                                           unformat_input_t * input,
+                                           vlib_cli_command_t * cmd);
+clib_error_t *show_vhost_user_command_fn (vlib_main_t * vm,
+                                         unformat_input_t * input,
+                                         vlib_cli_command_t * cmd);
 
 #endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */