X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fplugins%2Fvmxnet3%2Fvmxnet3.c;h=ff0a7dc706b91c23a347cbcc7d9625982fb92b4f;hb=a57a7005d;hp=e34b3e69a27351b0e2bd187677636f630701861d;hpb=b7b929931a07fbb27b43d5cd105f366c3e29807e;p=vpp.git diff --git a/src/plugins/vmxnet3/vmxnet3.c b/src/plugins/vmxnet3/vmxnet3.c index e34b3e69a27..ff0a7dc706b 100644 --- a/src/plugins/vmxnet3/vmxnet3.c +++ b/src/plugins/vmxnet3/vmxnet3.c @@ -19,7 +19,8 @@ #include #include #include - +#include +#include #include #define PCI_VENDOR_ID_VMWARE 0x15ad @@ -62,14 +63,14 @@ vmxnet3_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, static clib_error_t * vmxnet3_interface_rx_mode_change (vnet_main_t * vnm, u32 hw_if_index, u32 qid, - vnet_hw_interface_rx_mode mode) + vnet_hw_if_rx_mode mode) { vmxnet3_main_t *vmxm = &vmxnet3_main; vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); vmxnet3_device_t *vd = pool_elt_at_index (vmxm->devices, hw->dev_instance); vmxnet3_rxq_t *rxq = vec_elt_at_index (vd->rxqs, qid); - if (mode == VNET_HW_INTERFACE_RX_MODE_POLLING) + if (mode == VNET_HW_IF_RX_MODE_POLLING) rxq->int_mode = 0; else rxq->int_mode = 1; @@ -97,6 +98,35 @@ vmxnet3_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index, node_index); } +static void +vmxnet3_clear_hw_interface_counters (u32 instance) +{ + vmxnet3_main_t *vmxm = &vmxnet3_main; + vmxnet3_device_t *vd = pool_elt_at_index (vmxm->devices, instance); + vmxnet3_tx_queue *tx = VMXNET3_TX_START (vd); + vmxnet3_rx_queue *rx = VMXNET3_RX_START (vd); + u16 qid; + + /* + * Set the "last_cleared_stats" to the current stats, so that + * things appear to clear from a display perspective. + */ + vmxnet3_reg_write (vd, 1, VMXNET3_REG_CMD, VMXNET3_CMD_GET_STATS); + + vec_foreach_index (qid, vd->txqs) + { + vmxnet3_tx_stats *txs = vec_elt_at_index (vd->tx_stats, qid); + clib_memcpy (txs, &tx->stats, sizeof (*txs)); + tx++; + } + vec_foreach_index (qid, vd->rxqs) + { + vmxnet3_rx_stats *rxs = vec_elt_at_index (vd->rx_stats, qid); + clib_memcpy (rxs, &rx->stats, sizeof (*rxs)); + rx++; + } +} + static char *vmxnet3_tx_func_error_strings[] = { #define _(n,s) s, foreach_vmxnet3_tx_func_error @@ -110,6 +140,7 @@ VNET_DEVICE_CLASS (vmxnet3_device_class,) = .format_device = format_vmxnet3_device, .format_device_name = format_vmxnet3_device_name, .admin_up_down_function = vmxnet3_interface_admin_up_down, + .clear_counters = vmxnet3_clear_hw_interface_counters, .rx_mode_change_function = vmxnet3_interface_rx_mode_change, .rx_redirect_to_node = vmxnet3_set_interface_next_node, .tx_function_n_errors = VMXNET3_TX_N_ERROR, @@ -139,37 +170,48 @@ vmxnet3_write_mac (vmxnet3_device_t * vd) static clib_error_t * vmxnet3_provision_driver_shared (vlib_main_t * vm, vmxnet3_device_t * vd) { - vmxnet3_main_t *vmxm = &vmxnet3_main; vmxnet3_shared *shared; - vmxnet3_queues *q; u64 shared_dma; - clib_error_t *error; - u16 qid = 0, rid; - vmxnet3_rxq_t *rxq = vec_elt_at_index (vd->rxqs, qid); - vmxnet3_txq_t *txq = vec_elt_at_index (vd->txqs, qid); + u16 qid, rid; + vmxnet3_tx_queue *tx = VMXNET3_TX_START (vd); + vmxnet3_rx_queue *rx = VMXNET3_RX_START (vd); - vd->dma = vlib_physmem_alloc_aligned (vm, vmxm->physmem_region, &error, - sizeof (*vd->dma), 512); - if (error) - return error; + vd->driver_shared = + vlib_physmem_alloc_aligned_on_numa (vm, sizeof (*vd->driver_shared), 512, + vd->numa_node); + if (vd->driver_shared == 0) + return vlib_physmem_last_error (vm); - clib_memset (vd->dma, 0, sizeof (*vd->dma)); + clib_memset (vd->driver_shared, 0, sizeof (*vd->driver_shared)); - q = &vd->dma->queues; - q->tx.cfg.desc_address = vmxnet3_dma_addr (vm, vd, txq->tx_desc); - q->tx.cfg.comp_address = vmxnet3_dma_addr (vm, vd, txq->tx_comp); - q->tx.cfg.num_desc = txq->size; - q->tx.cfg.num_comp = txq->size; - for (rid = 0; rid < VMXNET3_RX_RING_SIZE; rid++) - { - q->rx.cfg.desc_address[rid] = vmxnet3_dma_addr (vm, vd, + vec_foreach_index (qid, vd->txqs) + { + vmxnet3_txq_t *txq = vec_elt_at_index (vd->txqs, qid); + + tx->cfg.desc_address = vmxnet3_dma_addr (vm, vd, txq->tx_desc); + tx->cfg.comp_address = vmxnet3_dma_addr (vm, vd, txq->tx_comp); + tx->cfg.num_desc = txq->size; + tx->cfg.num_comp = txq->size; + tx++; + } + + vec_foreach_index (qid, vd->rxqs) + { + vmxnet3_rxq_t *rxq = vec_elt_at_index (vd->rxqs, qid); + + for (rid = 0; rid < VMXNET3_RX_RING_SIZE; rid++) + { + rx->cfg.desc_address[rid] = vmxnet3_dma_addr (vm, vd, rxq->rx_desc[rid]); - q->rx.cfg.num_desc[rid] = rxq->size; - } - q->rx.cfg.comp_address = vmxnet3_dma_addr (vm, vd, rxq->rx_comp); - q->rx.cfg.num_comp = rxq->size; + rx->cfg.num_desc[rid] = rxq->size; + } + rx->cfg.comp_address = vmxnet3_dma_addr (vm, vd, rxq->rx_comp); + rx->cfg.num_comp = rxq->size; + rx->cfg.intr_index = qid; + rx++; + } - shared = &vd->dma->shared; + shared = vd->driver_shared; shared->magic = VMXNET3_SHARED_MAGIC; shared->misc.version = VMXNET3_VERSION_MAGIC; if (sizeof (void *) == 4) @@ -178,17 +220,29 @@ vmxnet3_provision_driver_shared (vlib_main_t * vm, vmxnet3_device_t * vd) shared->misc.guest_info = VMXNET3_GOS_BITS_64; shared->misc.guest_info |= VMXNET3_GOS_TYPE_LINUX; shared->misc.version_support = VMXNET3_VERSION_SELECT; + shared->misc.upt_features = VMXNET3_F_RXCSUM; + if (vd->gso_enable) + shared->misc.upt_features |= VMXNET3_F_LRO; + if (vd->num_rx_queues > 1) + { + shared->misc.upt_features |= VMXNET3_F_RSS; + shared->rss.version = 1; + shared->rss.address = vmxnet3_dma_addr (vm, vd, vd->rss); + shared->rss.length = sizeof (*vd->rss); + } + shared->misc.max_num_rx_sg = 0; shared->misc.upt_version_support = VMXNET3_UPT_VERSION_SELECT; - shared->misc.queue_desc_address = vmxnet3_dma_addr (vm, vd, q); - shared->misc.queue_desc_len = sizeof (*q); + shared->misc.queue_desc_address = vmxnet3_dma_addr (vm, vd, vd->queues); + shared->misc.queue_desc_len = sizeof (*tx) * vd->num_tx_queues + + sizeof (*rx) * vd->num_rx_queues; shared->misc.mtu = VMXNET3_MTU; shared->misc.num_tx_queues = vd->num_tx_queues; shared->misc.num_rx_queues = vd->num_rx_queues; shared->interrupt.num_intrs = vd->num_intrs; - shared->interrupt.event_intr_index = 1; + shared->interrupt.event_intr_index = vd->num_rx_queues; shared->interrupt.control = VMXNET3_IC_DISABLE_ALL; shared->rx_filter.mode = VMXNET3_RXMODE_UCAST | VMXNET3_RXMODE_BCAST | - VMXNET3_RXMODE_ALL_MULTI; + VMXNET3_RXMODE_ALL_MULTI | VMXNET3_RXMODE_PROMISC; shared_dma = vmxnet3_dma_addr (vm, vd, shared); vmxnet3_reg_write (vd, 1, VMXNET3_REG_DSAL, shared_dma); @@ -201,7 +255,7 @@ static inline void vmxnet3_enable_interrupt (vmxnet3_device_t * vd) { int i; - vmxnet3_shared *shared = &vd->dma->shared; + vmxnet3_shared *shared = vd->driver_shared; shared->interrupt.control &= ~VMXNET3_IC_DISABLE_ALL; for (i = 0; i < vd->num_intrs; i++) @@ -212,7 +266,7 @@ static inline void vmxnet3_disable_interrupt (vmxnet3_device_t * vd) { int i; - vmxnet3_shared *shared = &vd->dma->shared; + vmxnet3_shared *shared = vd->driver_shared; shared->interrupt.control |= VMXNET3_IC_DISABLE_ALL; for (i = 0; i < vd->num_intrs; i++) @@ -222,30 +276,34 @@ vmxnet3_disable_interrupt (vmxnet3_device_t * vd) static clib_error_t * vmxnet3_rxq_init (vlib_main_t * vm, vmxnet3_device_t * vd, u16 qid, u16 qsz) { - vmxnet3_main_t *vmxm = &vmxnet3_main; vmxnet3_rxq_t *rxq; - clib_error_t *error; + vmxnet3_rx_stats *rxs; u16 rid; + vec_validate (vd->rx_stats, qid); + rxs = vec_elt_at_index (vd->rx_stats, qid); + clib_memset (rxs, 0, sizeof (*rxs)); + vec_validate_aligned (vd->rxqs, qid, CLIB_CACHE_LINE_BYTES); rxq = vec_elt_at_index (vd->rxqs, qid); clib_memset (rxq, 0, sizeof (*rxq)); rxq->size = qsz; for (rid = 0; rid < VMXNET3_RX_RING_SIZE; rid++) { - rxq->rx_desc[rid] = - vlib_physmem_alloc_aligned (vm, vmxm->physmem_region, - &error, qsz * sizeof (*rxq->rx_desc[rid]), - 512); - if (error) - return error; + rxq->rx_desc[rid] = vlib_physmem_alloc_aligned_on_numa + (vm, qsz * sizeof (*rxq->rx_desc[rid]), 512, vd->numa_node); + + if (rxq->rx_desc[rid] == 0) + return vlib_physmem_last_error (vm); + clib_memset (rxq->rx_desc[rid], 0, qsz * sizeof (*rxq->rx_desc[rid])); } - rxq->rx_comp = vlib_physmem_alloc_aligned (vm, vmxm->physmem_region, &error, - qsz * sizeof (*rxq->rx_comp), - 512); - if (error) - return error; + rxq->rx_comp = + vlib_physmem_alloc_aligned_on_numa (vm, qsz * sizeof (*rxq->rx_comp), 512, + vd->numa_node); + if (rxq->rx_comp == 0) + return vlib_physmem_last_error (vm); + clib_memset (rxq->rx_comp, 0, qsz * sizeof (*rxq->rx_comp)); for (rid = 0; rid < VMXNET3_RX_RING_SIZE; rid++) { @@ -264,36 +322,37 @@ vmxnet3_rxq_init (vlib_main_t * vm, vmxnet3_device_t * vd, u16 qid, u16 qsz) static clib_error_t * vmxnet3_txq_init (vlib_main_t * vm, vmxnet3_device_t * vd, u16 qid, u16 qsz) { - vmxnet3_main_t *vmxm = &vmxnet3_main; vmxnet3_txq_t *txq; - clib_error_t *error; - - if (qid >= vd->num_tx_queues) - { - qid = qid % vd->num_tx_queues; - txq = vec_elt_at_index (vd->txqs, qid); - if (txq->lock == 0) - clib_spinlock_init (&txq->lock); - vd->flags |= VMXNET3_DEVICE_F_SHARED_TXQ_LOCK; - return 0; - } + vmxnet3_tx_stats *txs; + u32 size; vec_validate_aligned (vd->txqs, qid, CLIB_CACHE_LINE_BYTES); txq = vec_elt_at_index (vd->txqs, qid); clib_memset (txq, 0, sizeof (*txq)); + clib_spinlock_init (&txq->lock); + + vec_validate (vd->tx_stats, qid); + txs = vec_elt_at_index (vd->tx_stats, qid); + clib_memset (txs, 0, sizeof (*txs)); + txq->size = qsz; - txq->tx_desc = vlib_physmem_alloc_aligned (vm, vmxm->physmem_region, &error, - qsz * sizeof (*txq->tx_desc), - 512); - if (error) - return error; - clib_memset (txq->tx_desc, 0, qsz * sizeof (*txq->tx_desc)); - txq->tx_comp = vlib_physmem_alloc_aligned (vm, vmxm->physmem_region, &error, - qsz * sizeof (*txq->tx_comp), - 512); - if (error) - return error; - clib_memset (txq->tx_comp, 0, qsz * sizeof (*txq->tx_comp)); + txq->reg_txprod = qid * 8 + VMXNET3_REG_TXPROD; + + size = qsz * sizeof (*txq->tx_desc); + txq->tx_desc = + vlib_physmem_alloc_aligned_on_numa (vm, size, 512, vd->numa_node); + if (txq->tx_desc == 0) + return vlib_physmem_last_error (vm); + + clib_memset (txq->tx_desc, 0, size); + + size = qsz * sizeof (*txq->tx_comp); + txq->tx_comp = + vlib_physmem_alloc_aligned_on_numa (vm, size, 512, vd->numa_node); + if (txq->tx_comp == 0) + return vlib_physmem_last_error (vm); + + clib_memset (txq->tx_comp, 0, size); vec_validate_aligned (txq->tx_ring.bufs, txq->size, CLIB_CACHE_LINE_BYTES); txq->tx_ring.gen = VMXNET3_TXF_GEN; txq->tx_comp_ring.gen = VMXNET3_TXCF_GEN; @@ -301,25 +360,53 @@ vmxnet3_txq_init (vlib_main_t * vm, vmxnet3_device_t * vd, u16 qid, u16 qsz) return 0; } +static const u8 vmxnet3_rss_key[VMXNET3_RSS_MAX_KEY_SZ] = { + 0x3b, 0x56, 0xd1, 0x56, 0x13, 0x4a, 0xe7, 0xac, + 0xe8, 0x79, 0x09, 0x75, 0xe8, 0x65, 0x79, 0x28, + 0x35, 0x12, 0xb9, 0x56, 0x7c, 0x76, 0x4b, 0x70, + 0xd8, 0x56, 0xa3, 0x18, 0x9b, 0x0a, 0xee, 0xf3, + 0x96, 0xa6, 0x9f, 0x8f, 0x9e, 0x8c, 0x90, 0xc9, +}; + +static clib_error_t * +vmxnet3_rss_init (vlib_main_t * vm, vmxnet3_device_t * vd) +{ + vmxnet3_rss_shared *rss; + size_t size = sizeof (*rss); + u8 i; + + vd->rss = vlib_physmem_alloc_aligned_on_numa (vm, size, 512, vd->numa_node); + if (vd->rss == 0) + return vlib_physmem_last_error (vm); + + clib_memset (vd->rss, 0, size); + rss = vd->rss; + rss->hash_type = + VMXNET3_RSS_HASH_TYPE_IPV4 | VMXNET3_RSS_HASH_TYPE_TCP_IPV4 | + VMXNET3_RSS_HASH_TYPE_IPV6 | VMXNET3_RSS_HASH_TYPE_TCP_IPV6; + rss->hash_func = VMXNET3_RSS_HASH_FUNC_TOEPLITZ; + rss->hash_key_sz = VMXNET3_RSS_MAX_KEY_SZ; + rss->ind_table_sz = VMXNET3_RSS_MAX_IND_TABLE_SZ; + clib_memcpy (rss->hash_key, vmxnet3_rss_key, VMXNET3_RSS_MAX_KEY_SZ); + for (i = 0; i < rss->ind_table_sz; i++) + rss->ind_table[i] = i % vd->num_rx_queues; + + return 0; +} + static clib_error_t * vmxnet3_device_init (vlib_main_t * vm, vmxnet3_device_t * vd, vmxnet3_create_if_args_t * args) { clib_error_t *error = 0; - u32 ret, i; - vmxnet3_main_t *vmxm = &vmxnet3_main; - vlib_thread_main_t *tm = vlib_get_thread_main (); - - vd->num_tx_queues = 1; - vd->num_rx_queues = 1; - vd->num_intrs = 2; + u32 ret, i, size; /* Quiesce the device */ vmxnet3_reg_write (vd, 1, VMXNET3_REG_CMD, VMXNET3_CMD_QUIESCE_DEV); ret = vmxnet3_reg_read (vd, 1, VMXNET3_REG_CMD); if (ret != 0) { - error = clib_error_return (0, "error on quisecing device rc (%u)", ret); + error = clib_error_return (0, "error on quiescing device rc (%u)", ret); return error; } @@ -336,14 +423,16 @@ vmxnet3_device_init (vlib_main_t * vm, vmxnet3_device_t * vd, vd->version = count_leading_zeros (ret); vd->version = uword_bits - vd->version; - if (vd->version == 0 || vd->version > 3) + if (vd->version == 0) { error = clib_error_return (0, "unsupported hardware version %u", vd->version); return error; } - vmxnet3_reg_write (vd, 1, VMXNET3_REG_VRRS, 1 << (vd->version - 1)); + /* cap support version to 3 */ + vmxnet3_reg_write (vd, 1, VMXNET3_REG_VRRS, + 1 << (clib_min (3, vd->version) - 1)); ret = vmxnet3_reg_read (vd, 1, VMXNET3_REG_UVRS); if (ret & 1) @@ -354,6 +443,21 @@ vmxnet3_device_init (vlib_main_t * vm, vmxnet3_device_t * vd, return error; } + /* GSO is only supported for version >= 3 */ + if (args->enable_gso) + { + if (vd->version >= 3) + vd->gso_enable = 1; + else + { + error = + clib_error_return (0, + "GSO is not supported because hardware version" + " is %u. It must be >= 3", vd->version); + return error; + } + } + vmxnet3_reg_write (vd, 1, VMXNET3_REG_CMD, VMXNET3_CMD_GET_LINK); ret = vmxnet3_reg_read (vd, 1, VMXNET3_REG_CMD); if (ret & 1) @@ -362,9 +466,7 @@ vmxnet3_device_init (vlib_main_t * vm, vmxnet3_device_t * vd, vd->link_speed = ret >> 16; } else - { - vd->flags &= ~VMXNET3_DEVICE_F_LINK_UP; - } + vd->flags &= ~VMXNET3_DEVICE_F_LINK_UP; /* Get the mac address */ ret = vmxnet3_reg_read (vd, 1, VMXNET3_REG_MACL); @@ -372,22 +474,31 @@ vmxnet3_device_init (vlib_main_t * vm, vmxnet3_device_t * vd, ret = vmxnet3_reg_read (vd, 1, VMXNET3_REG_MACH); clib_memcpy (vd->mac_addr + 4, &ret, 2); - if (vmxm->physmem_region_alloc == 0) + size = sizeof (vmxnet3_rx_queue) * vd->num_rx_queues + + sizeof (vmxnet3_tx_queue) * vd->num_tx_queues; + + vd->queues = + vlib_physmem_alloc_aligned_on_numa (vm, size, 512, vd->numa_node); + if (vd->queues == 0) + return vlib_physmem_last_error (vm); + + clib_memset (vd->queues, 0, size); + + if (vd->num_rx_queues > 1) { - u32 flags = VLIB_PHYSMEM_F_INIT_MHEAP | VLIB_PHYSMEM_F_HUGETLB; - error = - vlib_physmem_region_alloc (vm, "vmxnet3 descriptors", 4 << 20, 0, - flags, &vmxm->physmem_region); + error = vmxnet3_rss_init (vm, vd); if (error) return error; - vmxm->physmem_region_alloc = 1; } - error = vmxnet3_rxq_init (vm, vd, 0, args->rxq_size); - if (error) - return error; + for (i = 0; i < vd->num_rx_queues; i++) + { + error = vmxnet3_rxq_init (vm, vd, i, args->rxq_size); + if (error) + return error; + } - for (i = 0; i < tm->n_vlib_mains; i++) + for (i = 0; i < vd->num_tx_queues; i++) { error = vmxnet3_txq_init (vm, vd, i, args->txq_size); if (error) @@ -410,38 +521,26 @@ vmxnet3_device_init (vlib_main_t * vm, vmxnet3_device_t * vd, return error; } - /* Disable interrupts */ - vmxnet3_disable_interrupt (vd); - - vec_foreach_index (i, vd->rxqs) - { - vmxnet3_rxq_t *rxq = vec_elt_at_index (vd->rxqs, i); - - vmxnet3_rxq_refill_ring0 (vm, vd, rxq); - vmxnet3_rxq_refill_ring1 (vm, vd, rxq); - } - vd->flags |= VMXNET3_DEVICE_F_INITIALIZED; - - vmxnet3_enable_interrupt (vd); - return error; } static void -vmxnet3_irq_0_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h, u16 line) +vmxnet3_rxq_irq_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h, u16 line) { vnet_main_t *vnm = vnet_get_main (); vmxnet3_main_t *vmxm = &vmxnet3_main; uword pd = vlib_pci_get_private_data (vm, h); vmxnet3_device_t *vd = pool_elt_at_index (vmxm->devices, pd); u16 qid = line; + vmxnet3_rxq_t *rxq = vec_elt_at_index (vd->rxqs, qid); if (vec_len (vd->rxqs) > qid && vd->rxqs[qid].int_mode != 0) - vnet_device_input_set_interrupt_pending (vnm, vd->hw_if_index, qid); + vnet_hw_if_rx_queue_set_int_pending (vnm, rxq->queue_index); } static void -vmxnet3_irq_1_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h, u16 line) +vmxnet3_event_irq_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h, + u16 line) { vnet_main_t *vnm = vnet_get_main (); vmxnet3_main_t *vmxm = &vmxnet3_main; @@ -455,6 +554,8 @@ vmxnet3_irq_1_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h, u16 line) { vd->flags |= VMXNET3_DEVICE_F_LINK_UP; vd->link_speed = ret >> 16; + vnet_hw_interface_set_link_speed (vnm, vd->hw_if_index, + vd->link_speed * 1000); vnet_hw_interface_set_flags (vnm, vd->hw_if_index, VNET_HW_INTERFACE_FLAG_LINK_UP); } @@ -475,6 +576,24 @@ vmxnet3_queue_size_valid (u16 qsz) return 1; } +static u8 +vmxnet3_tx_queue_num_valid (u16 num) +{ + vlib_thread_main_t *tm = vlib_get_thread_main (); + + if ((num > VMXNET3_TXQ_MAX) || (num > tm->n_vlib_mains)) + return 0; + return 1; +} + +static u8 +vmxnet3_rx_queue_num_valid (u16 num) +{ + if (num > VMXNET3_RXQ_MAX) + return 0; + return 1; +} + void vmxnet3_create_if (vlib_main_t * vm, vmxnet3_create_if_args_t * args) { @@ -483,7 +602,38 @@ vmxnet3_create_if (vlib_main_t * vm, vmxnet3_create_if_args_t * args) vmxnet3_device_t *vd; vlib_pci_dev_handle_t h; clib_error_t *error = 0; + u16 qid; + u32 num_intr; + + if (args->txq_num == 0) + args->txq_num = 1; + if (args->rxq_num == 0) + args->rxq_num = 1; + if (!vmxnet3_rx_queue_num_valid (args->rxq_num)) + { + args->rv = VNET_API_ERROR_INVALID_VALUE; + args->error = + clib_error_return (error, "number of rx queues must be <= %u", + VMXNET3_RXQ_MAX); + vlib_log (VLIB_LOG_LEVEL_ERR, vmxm->log_default, "%U: %s", + format_vlib_pci_addr, &args->addr, + "number of rx queues must be <= %u", VMXNET3_RXQ_MAX); + return; + } + if (!vmxnet3_tx_queue_num_valid (args->txq_num)) + { + args->rv = VNET_API_ERROR_INVALID_VALUE; + args->error = + clib_error_return (error, + "number of tx queues must be <= %u and <= number of " + "CPU's assigned to VPP", VMXNET3_TXQ_MAX); + vlib_log (VLIB_LOG_LEVEL_ERR, vmxm->log_default, "%U: %s", + format_vlib_pci_addr, &args->addr, + "number of tx queues must be <= %u and <= number of " + "CPU's assigned to VPP", VMXNET3_TXQ_MAX); + return; + } if (args->rxq_size == 0) args->rxq_size = VMXNET3_NUM_RX_DESC; if (args->txq_size == 0) @@ -497,68 +647,141 @@ vmxnet3_create_if (vlib_main_t * vm, vmxnet3_create_if_args_t * args) clib_error_return (error, "queue size must be <= 4096, >= 64, " "and multiples of 64"); + vlib_log (VLIB_LOG_LEVEL_ERR, vmxm->log_default, "%U: %s", + format_vlib_pci_addr, &args->addr, + "queue size must be <= 4096, >= 64, and multiples of 64"); return; } /* *INDENT-OFF* */ - pool_foreach (vd, vmxm->devices, ({ + pool_foreach (vd, vmxm->devices) { if (vd->pci_addr.as_u32 == args->addr.as_u32) { - args->rv = VNET_API_ERROR_INVALID_VALUE; + args->rv = VNET_API_ERROR_ADDRESS_IN_USE; args->error = - clib_error_return (error, "PCI address in use"); + clib_error_return (error, "%U: %s", format_vlib_pci_addr, + &args->addr, "pci address in use"); + vlib_log (VLIB_LOG_LEVEL_ERR, vmxm->log_default, "%U: %s", + format_vlib_pci_addr, &args->addr, "pci address in use"); return; } - })); + } /* *INDENT-ON* */ - pool_get (vmxm->devices, vd); - vd->dev_instance = vd - vmxm->devices; - vd->per_interface_next_index = ~0; - vd->pci_addr = args->addr; - - if (args->enable_elog) - vd->flags |= VMXNET3_DEVICE_F_ELOG; + if (args->bind) + { + error = vlib_pci_bind_to_uio (vm, &args->addr, (char *) "auto"); + if (error) + { + args->rv = VNET_API_ERROR_INVALID_INTERFACE; + args->error = + clib_error_return (error, "%U: %s", format_vlib_pci_addr, + &args->addr, + "error encountered on binding pci device"); + vlib_log (VLIB_LOG_LEVEL_ERR, vmxm->log_default, "%U: %s", + format_vlib_pci_addr, &args->addr, + "error encountered on binding pci devicee"); + return; + } + } if ((error = vlib_pci_device_open (vm, &args->addr, vmxnet3_pci_device_ids, &h))) { - pool_put (vmxm->devices, vd); args->rv = VNET_API_ERROR_INVALID_INTERFACE; args->error = - clib_error_return (error, "pci-addr %U", format_vlib_pci_addr, - &args->addr); + clib_error_return (error, "%U: %s", format_vlib_pci_addr, + &args->addr, + "error encountered on pci device open"); + vlib_log (VLIB_LOG_LEVEL_ERR, vmxm->log_default, "%U: %s", + format_vlib_pci_addr, &args->addr, + "error encountered on pci device open"); return; } + + /* + * Do not use vmxnet3_log_error prior to this line since the macro + * references vd->pci_dev_handle + */ + pool_get (vmxm->devices, vd); + vd->num_tx_queues = args->txq_num; + vd->num_rx_queues = args->rxq_num; + vd->dev_instance = vd - vmxm->devices; + vd->per_interface_next_index = ~0; + vd->pci_addr = args->addr; + + if (args->enable_elog) + vd->flags |= VMXNET3_DEVICE_F_ELOG; + vd->pci_dev_handle = h; + vd->numa_node = vlib_pci_get_numa_node (vm, h); + vd->num_intrs = vd->num_rx_queues + 1; // +1 for the event interrupt vlib_pci_set_private_data (vm, h, vd->dev_instance); if ((error = vlib_pci_bus_master_enable (vm, h))) - goto error; + { + vmxnet3_log_error (vd, "error encountered on pci bus master enable"); + goto error; + } if ((error = vlib_pci_map_region (vm, h, 0, (void **) &vd->bar[0]))) - goto error; + { + vmxnet3_log_error (vd, "error encountered on pci map region for bar 0"); + goto error; + } if ((error = vlib_pci_map_region (vm, h, 1, (void **) &vd->bar[1]))) - goto error; + { + vmxnet3_log_error (vd, "error encountered on pci map region for bar 1"); + goto error; + } - if ((error = vlib_pci_register_msix_handler (vm, h, 0, 1, - &vmxnet3_irq_0_handler))) - goto error; + num_intr = vlib_pci_get_num_msix_interrupts (vm, h); + if (num_intr < vd->num_rx_queues + 1) + { + vmxnet3_log_error (vd, + "No sufficient interrupt lines (%u) for rx queues", + num_intr); + error = + clib_error_return (0, + "No sufficient interrupt lines (%u) for rx queues", + num_intr); + goto error; + } + if ((error = vlib_pci_register_msix_handler (vm, h, 0, vd->num_rx_queues, + &vmxnet3_rxq_irq_handler))) + { + vmxnet3_log_error (vd, + "error encountered on pci register msix handler 0"); + goto error; + } - if ((error = vlib_pci_register_msix_handler (vm, h, 1, 1, - &vmxnet3_irq_1_handler))) - goto error; + if ((error = vlib_pci_register_msix_handler (vm, h, vd->num_rx_queues, 1, + &vmxnet3_event_irq_handler))) + { + vmxnet3_log_error (vd, + "error encountered on pci register msix handler 1"); + goto error; + } - if ((error = vlib_pci_enable_msix_irq (vm, h, 0, 2))) - goto error; + if ((error = vlib_pci_enable_msix_irq (vm, h, 0, vd->num_rx_queues + 1))) + { + vmxnet3_log_error (vd, "error encountered on pci enable msix irq"); + goto error; + } if ((error = vlib_pci_intr_enable (vm, h))) - goto error; + { + vmxnet3_log_error (vd, "error encountered on pci interrupt enable"); + goto error; + } if ((error = vmxnet3_device_init (vm, vd, args))) - goto error; + { + vmxnet3_log_error (vd, "error encountered on device init"); + goto error; + } /* create interface */ error = ethernet_register_interface (vnm, vmxnet3_device_class.index, @@ -566,17 +789,62 @@ vmxnet3_create_if (vlib_main_t * vm, vmxnet3_create_if_args_t * args) &vd->hw_if_index, vmxnet3_flag_change); if (error) - goto error; + { + vmxnet3_log_error (vd, + "error encountered on ethernet register interface"); + goto error; + } vnet_sw_interface_t *sw = vnet_get_hw_sw_interface (vnm, vd->hw_if_index); vd->sw_if_index = sw->sw_if_index; args->sw_if_index = sw->sw_if_index; vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, vd->hw_if_index); - hw->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_INT_MODE; - vnet_hw_interface_set_input_node (vnm, vd->hw_if_index, - vmxnet3_input_node.index); - vnet_hw_interface_assign_rx_thread (vnm, vd->hw_if_index, 0, ~0); + hw->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_INT_MODE; + if (vd->gso_enable) + { + hw->caps |= (VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO | + VNET_HW_INTERFACE_CAP_SUPPORTS_TX_TCP_CKSUM | + VNET_HW_INTERFACE_CAP_SUPPORTS_TX_UDP_CKSUM); + } + + vnet_hw_if_set_input_node (vnm, vd->hw_if_index, vmxnet3_input_node.index); + /* Disable interrupts */ + vmxnet3_disable_interrupt (vd); + vec_foreach_index (qid, vd->rxqs) + { + vmxnet3_rxq_t *rxq = vec_elt_at_index (vd->rxqs, qid); + u32 qi, fi; + + qi = vnet_hw_if_register_rx_queue (vnm, vd->hw_if_index, qid, + VNET_HW_IF_RXQ_THREAD_ANY); + fi = vlib_pci_get_msix_file_index (vm, vd->pci_dev_handle, qid); + vnet_hw_if_set_rx_queue_file_index (vnm, qi, fi); + rxq->queue_index = qi; + rxq->buffer_pool_index = + vnet_hw_if_get_rx_queue_numa_node (vnm, rxq->queue_index); + vmxnet3_rxq_refill_ring0 (vm, vd, rxq); + vmxnet3_rxq_refill_ring1 (vm, vd, rxq); + } + + vec_foreach_index (qid, vd->txqs) + { + vmxnet3_txq_t *txq = vec_elt_at_index (vd->txqs, qid); + txq->queue_index = + vnet_hw_if_register_tx_queue (vnm, vd->hw_if_index, qid); + } + for (u32 i = 0; i < vlib_get_n_threads (); i++) + { + u32 qi = vd->txqs[i % vd->num_tx_queues].queue_index; + vnet_hw_if_tx_queue_assign_thread (vnm, qi, i); + } + vnet_hw_if_update_runtime_data (vnm, vd->hw_if_index); + + vd->flags |= VMXNET3_DEVICE_F_INITIALIZED; + vmxnet3_enable_interrupt (vd); + + vnet_hw_interface_set_link_speed (vnm, vd->hw_if_index, + vd->link_speed * 1000); if (vd->flags & VMXNET3_DEVICE_F_LINK_UP) vnet_hw_interface_set_flags (vnm, vd->hw_if_index, VNET_HW_INTERFACE_FLAG_LINK_UP); @@ -607,7 +875,6 @@ vmxnet3_delete_if (vlib_main_t * vm, vmxnet3_device_t * vd) if (vd->hw_if_index) { vnet_hw_interface_set_flags (vnm, vd->hw_if_index, 0); - vnet_hw_interface_unassign_rx_thread (vnm, vd->hw_if_index, 0); ethernet_delete_interface (vnm, vd->hw_if_index); } @@ -629,12 +896,13 @@ vmxnet3_delete_if (vlib_main_t * vm, vmxnet3_device_t * vd) vlib_buffer_free_from_ring (vm, ring->bufs, desc_idx, rxq->size, ring->fill); vec_free (ring->bufs); - vlib_physmem_free (vm, vmxm->physmem_region, rxq->rx_desc[rid]); + vlib_physmem_free (vm, rxq->rx_desc[rid]); } - vlib_physmem_free (vm, vmxm->physmem_region, rxq->rx_comp); + vlib_physmem_free (vm, rxq->rx_comp); } /* *INDENT-ON* */ vec_free (vd->rxqs); + vec_free (vd->rx_stats); /* *INDENT-OFF* */ vec_foreach_index (i, vd->txqs) @@ -654,17 +922,21 @@ vmxnet3_delete_if (vlib_main_t * vm, vmxnet3_device_t * vd) } clib_spinlock_free (&txq->lock); vec_free (txq->tx_ring.bufs); - vlib_physmem_free (vm, vmxm->physmem_region, txq->tx_desc); - vlib_physmem_free (vm, vmxm->physmem_region, txq->tx_comp); + vlib_physmem_free (vm, txq->tx_desc); + vlib_physmem_free (vm, txq->tx_comp); } /* *INDENT-ON* */ vec_free (vd->txqs); + vec_free (vd->tx_stats); - vlib_physmem_free (vm, vmxm->physmem_region, vd->dma); + vlib_physmem_free (vm, vd->driver_shared); + vlib_physmem_free (vm, vd->queues); + vlib_physmem_free (vm, vd->rss); clib_error_free (vd->error); clib_memset (vd, 0, sizeof (*vd)); pool_put (vmxm->devices, vd); + } /*