New upstream version 18.08
[deb_dpdk.git] / drivers / net / enic / enic_main.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2008-2017 Cisco Systems, Inc.  All rights reserved.
3  * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
4  */
5
6 #include <stdio.h>
7
8 #include <sys/stat.h>
9 #include <sys/mman.h>
10 #include <fcntl.h>
11 #include <libgen.h>
12
13 #include <rte_pci.h>
14 #include <rte_bus_pci.h>
15 #include <rte_memzone.h>
16 #include <rte_malloc.h>
17 #include <rte_mbuf.h>
18 #include <rte_string_fns.h>
19 #include <rte_ethdev_driver.h>
20
21 #include "enic_compat.h"
22 #include "enic.h"
23 #include "wq_enet_desc.h"
24 #include "rq_enet_desc.h"
25 #include "cq_enet_desc.h"
26 #include "vnic_enet.h"
27 #include "vnic_dev.h"
28 #include "vnic_wq.h"
29 #include "vnic_rq.h"
30 #include "vnic_cq.h"
31 #include "vnic_intr.h"
32 #include "vnic_nic.h"
33
34 static inline int enic_is_sriov_vf(struct enic *enic)
35 {
36         return enic->pdev->id.device_id == PCI_DEVICE_ID_CISCO_VIC_ENET_VF;
37 }
38
39 static int is_zero_addr(uint8_t *addr)
40 {
41         return !(addr[0] |  addr[1] | addr[2] | addr[3] | addr[4] | addr[5]);
42 }
43
44 static int is_mcast_addr(uint8_t *addr)
45 {
46         return addr[0] & 1;
47 }
48
49 static int is_eth_addr_valid(uint8_t *addr)
50 {
51         return !is_mcast_addr(addr) && !is_zero_addr(addr);
52 }
53
54 static void
55 enic_rxmbuf_queue_release(__rte_unused struct enic *enic, struct vnic_rq *rq)
56 {
57         uint16_t i;
58
59         if (!rq || !rq->mbuf_ring) {
60                 dev_debug(enic, "Pointer to rq or mbuf_ring is NULL");
61                 return;
62         }
63
64         for (i = 0; i < rq->ring.desc_count; i++) {
65                 if (rq->mbuf_ring[i]) {
66                         rte_pktmbuf_free_seg(rq->mbuf_ring[i]);
67                         rq->mbuf_ring[i] = NULL;
68                 }
69         }
70 }
71
72 static void enic_free_wq_buf(struct rte_mbuf **buf)
73 {
74         struct rte_mbuf *mbuf = *buf;
75
76         rte_pktmbuf_free_seg(mbuf);
77         *buf = NULL;
78 }
79
80 static void enic_log_q_error(struct enic *enic)
81 {
82         unsigned int i;
83         u32 error_status;
84
85         for (i = 0; i < enic->wq_count; i++) {
86                 error_status = vnic_wq_error_status(&enic->wq[i]);
87                 if (error_status)
88                         dev_err(enic, "WQ[%d] error_status %d\n", i,
89                                 error_status);
90         }
91
92         for (i = 0; i < enic_vnic_rq_count(enic); i++) {
93                 if (!enic->rq[i].in_use)
94                         continue;
95                 error_status = vnic_rq_error_status(&enic->rq[i]);
96                 if (error_status)
97                         dev_err(enic, "RQ[%d] error_status %d\n", i,
98                                 error_status);
99         }
100 }
101
102 static void enic_clear_soft_stats(struct enic *enic)
103 {
104         struct enic_soft_stats *soft_stats = &enic->soft_stats;
105         rte_atomic64_clear(&soft_stats->rx_nombuf);
106         rte_atomic64_clear(&soft_stats->rx_packet_errors);
107         rte_atomic64_clear(&soft_stats->tx_oversized);
108 }
109
110 static void enic_init_soft_stats(struct enic *enic)
111 {
112         struct enic_soft_stats *soft_stats = &enic->soft_stats;
113         rte_atomic64_init(&soft_stats->rx_nombuf);
114         rte_atomic64_init(&soft_stats->rx_packet_errors);
115         rte_atomic64_init(&soft_stats->tx_oversized);
116         enic_clear_soft_stats(enic);
117 }
118
119 void enic_dev_stats_clear(struct enic *enic)
120 {
121         if (vnic_dev_stats_clear(enic->vdev))
122                 dev_err(enic, "Error in clearing stats\n");
123         enic_clear_soft_stats(enic);
124 }
125
126 int enic_dev_stats_get(struct enic *enic, struct rte_eth_stats *r_stats)
127 {
128         struct vnic_stats *stats;
129         struct enic_soft_stats *soft_stats = &enic->soft_stats;
130         int64_t rx_truncated;
131         uint64_t rx_packet_errors;
132         int ret = vnic_dev_stats_dump(enic->vdev, &stats);
133
134         if (ret) {
135                 dev_err(enic, "Error in getting stats\n");
136                 return ret;
137         }
138
139         /* The number of truncated packets can only be calculated by
140          * subtracting a hardware counter from error packets received by
141          * the driver. Note: this causes transient inaccuracies in the
142          * ipackets count. Also, the length of truncated packets are
143          * counted in ibytes even though truncated packets are dropped
144          * which can make ibytes be slightly higher than it should be.
145          */
146         rx_packet_errors = rte_atomic64_read(&soft_stats->rx_packet_errors);
147         rx_truncated = rx_packet_errors - stats->rx.rx_errors;
148
149         r_stats->ipackets = stats->rx.rx_frames_ok - rx_truncated;
150         r_stats->opackets = stats->tx.tx_frames_ok;
151
152         r_stats->ibytes = stats->rx.rx_bytes_ok;
153         r_stats->obytes = stats->tx.tx_bytes_ok;
154
155         r_stats->ierrors = stats->rx.rx_errors + stats->rx.rx_drop;
156         r_stats->oerrors = stats->tx.tx_errors
157                            + rte_atomic64_read(&soft_stats->tx_oversized);
158
159         r_stats->imissed = stats->rx.rx_no_bufs + rx_truncated;
160
161         r_stats->rx_nombuf = rte_atomic64_read(&soft_stats->rx_nombuf);
162         return 0;
163 }
164
165 int enic_del_mac_address(struct enic *enic, int mac_index)
166 {
167         struct rte_eth_dev *eth_dev = enic->rte_dev;
168         uint8_t *mac_addr = eth_dev->data->mac_addrs[mac_index].addr_bytes;
169
170         return vnic_dev_del_addr(enic->vdev, mac_addr);
171 }
172
173 int enic_set_mac_address(struct enic *enic, uint8_t *mac_addr)
174 {
175         int err;
176
177         if (!is_eth_addr_valid(mac_addr)) {
178                 dev_err(enic, "invalid mac address\n");
179                 return -EINVAL;
180         }
181
182         err = vnic_dev_add_addr(enic->vdev, mac_addr);
183         if (err)
184                 dev_err(enic, "add mac addr failed\n");
185         return err;
186 }
187
188 static void
189 enic_free_rq_buf(struct rte_mbuf **mbuf)
190 {
191         if (*mbuf == NULL)
192                 return;
193
194         rte_pktmbuf_free(*mbuf);
195         *mbuf = NULL;
196 }
197
198 void enic_init_vnic_resources(struct enic *enic)
199 {
200         unsigned int error_interrupt_enable = 1;
201         unsigned int error_interrupt_offset = 0;
202         unsigned int rxq_interrupt_enable = 0;
203         unsigned int rxq_interrupt_offset = ENICPMD_RXQ_INTR_OFFSET;
204         unsigned int index = 0;
205         unsigned int cq_idx;
206         struct vnic_rq *data_rq;
207
208         if (enic->rte_dev->data->dev_conf.intr_conf.rxq)
209                 rxq_interrupt_enable = 1;
210
211         for (index = 0; index < enic->rq_count; index++) {
212                 cq_idx = enic_cq_rq(enic, enic_rte_rq_idx_to_sop_idx(index));
213
214                 vnic_rq_init(&enic->rq[enic_rte_rq_idx_to_sop_idx(index)],
215                         cq_idx,
216                         error_interrupt_enable,
217                         error_interrupt_offset);
218
219                 data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(index)];
220                 if (data_rq->in_use)
221                         vnic_rq_init(data_rq,
222                                      cq_idx,
223                                      error_interrupt_enable,
224                                      error_interrupt_offset);
225
226                 vnic_cq_init(&enic->cq[cq_idx],
227                         0 /* flow_control_enable */,
228                         1 /* color_enable */,
229                         0 /* cq_head */,
230                         0 /* cq_tail */,
231                         1 /* cq_tail_color */,
232                         rxq_interrupt_enable,
233                         1 /* cq_entry_enable */,
234                         0 /* cq_message_enable */,
235                         rxq_interrupt_offset,
236                         0 /* cq_message_addr */);
237                 if (rxq_interrupt_enable)
238                         rxq_interrupt_offset++;
239         }
240
241         for (index = 0; index < enic->wq_count; index++) {
242                 vnic_wq_init(&enic->wq[index],
243                         enic_cq_wq(enic, index),
244                         error_interrupt_enable,
245                         error_interrupt_offset);
246                 /* Compute unsupported ol flags for enic_prep_pkts() */
247                 enic->wq[index].tx_offload_notsup_mask =
248                         PKT_TX_OFFLOAD_MASK ^ enic->tx_offload_mask;
249
250                 cq_idx = enic_cq_wq(enic, index);
251                 vnic_cq_init(&enic->cq[cq_idx],
252                         0 /* flow_control_enable */,
253                         1 /* color_enable */,
254                         0 /* cq_head */,
255                         0 /* cq_tail */,
256                         1 /* cq_tail_color */,
257                         0 /* interrupt_enable */,
258                         0 /* cq_entry_enable */,
259                         1 /* cq_message_enable */,
260                         0 /* interrupt offset */,
261                         (u64)enic->wq[index].cqmsg_rz->iova);
262         }
263
264         for (index = 0; index < enic->intr_count; index++) {
265                 vnic_intr_init(&enic->intr[index],
266                                enic->config.intr_timer_usec,
267                                enic->config.intr_timer_type,
268                                /*mask_on_assertion*/1);
269         }
270 }
271
272
273 static int
274 enic_alloc_rx_queue_mbufs(struct enic *enic, struct vnic_rq *rq)
275 {
276         struct rte_mbuf *mb;
277         struct rq_enet_desc *rqd = rq->ring.descs;
278         unsigned i;
279         dma_addr_t dma_addr;
280         uint32_t max_rx_pkt_len;
281         uint16_t rq_buf_len;
282
283         if (!rq->in_use)
284                 return 0;
285
286         dev_debug(enic, "queue %u, allocating %u rx queue mbufs\n", rq->index,
287                   rq->ring.desc_count);
288
289         /*
290          * If *not* using scatter and the mbuf size is greater than the
291          * requested max packet size (max_rx_pkt_len), then reduce the
292          * posted buffer size to max_rx_pkt_len. HW still receives packets
293          * larger than max_rx_pkt_len, but they will be truncated, which we
294          * drop in the rx handler. Not ideal, but better than returning
295          * large packets when the user is not expecting them.
296          */
297         max_rx_pkt_len = enic->rte_dev->data->dev_conf.rxmode.max_rx_pkt_len;
298         rq_buf_len = rte_pktmbuf_data_room_size(rq->mp) - RTE_PKTMBUF_HEADROOM;
299         if (max_rx_pkt_len < rq_buf_len && !rq->data_queue_enable)
300                 rq_buf_len = max_rx_pkt_len;
301         for (i = 0; i < rq->ring.desc_count; i++, rqd++) {
302                 mb = rte_mbuf_raw_alloc(rq->mp);
303                 if (mb == NULL) {
304                         dev_err(enic, "RX mbuf alloc failed queue_id=%u\n",
305                         (unsigned)rq->index);
306                         return -ENOMEM;
307                 }
308
309                 mb->data_off = RTE_PKTMBUF_HEADROOM;
310                 dma_addr = (dma_addr_t)(mb->buf_iova
311                            + RTE_PKTMBUF_HEADROOM);
312                 rq_enet_desc_enc(rqd, dma_addr,
313                                 (rq->is_sop ? RQ_ENET_TYPE_ONLY_SOP
314                                 : RQ_ENET_TYPE_NOT_SOP),
315                                 rq_buf_len);
316                 rq->mbuf_ring[i] = mb;
317         }
318         /*
319          * Do not post the buffers to the NIC until we enable the RQ via
320          * enic_start_rq().
321          */
322         rq->need_initial_post = true;
323         /* Initialize fetch index while RQ is disabled */
324         iowrite32(0, &rq->ctrl->fetch_index);
325         return 0;
326 }
327
328 /*
329  * Post the Rx buffers for the first time. enic_alloc_rx_queue_mbufs() has
330  * allocated the buffers and filled the RQ descriptor ring. Just need to push
331  * the post index to the NIC.
332  */
333 static void
334 enic_initial_post_rx(struct enic *enic, struct vnic_rq *rq)
335 {
336         if (!rq->in_use || !rq->need_initial_post)
337                 return;
338
339         /* make sure all prior writes are complete before doing the PIO write */
340         rte_rmb();
341
342         /* Post all but the last buffer to VIC. */
343         rq->posted_index = rq->ring.desc_count - 1;
344
345         rq->rx_nb_hold = 0;
346
347         dev_debug(enic, "port=%u, qidx=%u, Write %u posted idx, %u sw held\n",
348                 enic->port_id, rq->index, rq->posted_index, rq->rx_nb_hold);
349         iowrite32(rq->posted_index, &rq->ctrl->posted_index);
350         rte_rmb();
351         rq->need_initial_post = false;
352 }
353
354 static void *
355 enic_alloc_consistent(void *priv, size_t size,
356         dma_addr_t *dma_handle, u8 *name)
357 {
358         void *vaddr;
359         const struct rte_memzone *rz;
360         *dma_handle = 0;
361         struct enic *enic = (struct enic *)priv;
362         struct enic_memzone_entry *mze;
363
364         rz = rte_memzone_reserve_aligned((const char *)name, size,
365                         SOCKET_ID_ANY, RTE_MEMZONE_IOVA_CONTIG, ENIC_ALIGN);
366         if (!rz) {
367                 pr_err("%s : Failed to allocate memory requested for %s\n",
368                         __func__, name);
369                 return NULL;
370         }
371
372         vaddr = rz->addr;
373         *dma_handle = (dma_addr_t)rz->iova;
374
375         mze = rte_malloc("enic memzone entry",
376                          sizeof(struct enic_memzone_entry), 0);
377
378         if (!mze) {
379                 pr_err("%s : Failed to allocate memory for memzone list\n",
380                        __func__);
381                 rte_memzone_free(rz);
382                 return NULL;
383         }
384
385         mze->rz = rz;
386
387         rte_spinlock_lock(&enic->memzone_list_lock);
388         LIST_INSERT_HEAD(&enic->memzone_list, mze, entries);
389         rte_spinlock_unlock(&enic->memzone_list_lock);
390
391         return vaddr;
392 }
393
394 static void
395 enic_free_consistent(void *priv,
396                      __rte_unused size_t size,
397                      void *vaddr,
398                      dma_addr_t dma_handle)
399 {
400         struct enic_memzone_entry *mze;
401         struct enic *enic = (struct enic *)priv;
402
403         rte_spinlock_lock(&enic->memzone_list_lock);
404         LIST_FOREACH(mze, &enic->memzone_list, entries) {
405                 if (mze->rz->addr == vaddr &&
406                     mze->rz->iova == dma_handle)
407                         break;
408         }
409         if (mze == NULL) {
410                 rte_spinlock_unlock(&enic->memzone_list_lock);
411                 dev_warning(enic,
412                             "Tried to free memory, but couldn't find it in the memzone list\n");
413                 return;
414         }
415         LIST_REMOVE(mze, entries);
416         rte_spinlock_unlock(&enic->memzone_list_lock);
417         rte_memzone_free(mze->rz);
418         rte_free(mze);
419 }
420
421 int enic_link_update(struct enic *enic)
422 {
423         struct rte_eth_dev *eth_dev = enic->rte_dev;
424         struct rte_eth_link link;
425
426         memset(&link, 0, sizeof(link));
427         link.link_status = enic_get_link_status(enic);
428         link.link_duplex = ETH_LINK_FULL_DUPLEX;
429         link.link_speed = vnic_dev_port_speed(enic->vdev);
430
431         return rte_eth_linkstatus_set(eth_dev, &link);
432 }
433
434 static void
435 enic_intr_handler(void *arg)
436 {
437         struct rte_eth_dev *dev = (struct rte_eth_dev *)arg;
438         struct enic *enic = pmd_priv(dev);
439
440         vnic_intr_return_all_credits(&enic->intr[ENICPMD_LSC_INTR_OFFSET]);
441
442         enic_link_update(enic);
443         _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL);
444         enic_log_q_error(enic);
445 }
446
447 static int enic_rxq_intr_init(struct enic *enic)
448 {
449         struct rte_intr_handle *intr_handle;
450         uint32_t rxq_intr_count, i;
451         int err;
452
453         intr_handle = enic->rte_dev->intr_handle;
454         if (!enic->rte_dev->data->dev_conf.intr_conf.rxq)
455                 return 0;
456         /*
457          * Rx queue interrupts only work when we have MSI-X interrupts,
458          * one per queue. Sharing one interrupt is technically
459          * possible with VIC, but it is not worth the complications it brings.
460          */
461         if (!rte_intr_cap_multiple(intr_handle)) {
462                 dev_err(enic, "Rx queue interrupts require MSI-X interrupts"
463                         " (vfio-pci driver)\n");
464                 return -ENOTSUP;
465         }
466         rxq_intr_count = enic->intr_count - ENICPMD_RXQ_INTR_OFFSET;
467         err = rte_intr_efd_enable(intr_handle, rxq_intr_count);
468         if (err) {
469                 dev_err(enic, "Failed to enable event fds for Rx queue"
470                         " interrupts\n");
471                 return err;
472         }
473         intr_handle->intr_vec = rte_zmalloc("enic_intr_vec",
474                                             rxq_intr_count * sizeof(int), 0);
475         if (intr_handle->intr_vec == NULL) {
476                 dev_err(enic, "Failed to allocate intr_vec\n");
477                 return -ENOMEM;
478         }
479         for (i = 0; i < rxq_intr_count; i++)
480                 intr_handle->intr_vec[i] = i + ENICPMD_RXQ_INTR_OFFSET;
481         return 0;
482 }
483
484 static void enic_rxq_intr_deinit(struct enic *enic)
485 {
486         struct rte_intr_handle *intr_handle;
487
488         intr_handle = enic->rte_dev->intr_handle;
489         rte_intr_efd_disable(intr_handle);
490         if (intr_handle->intr_vec != NULL) {
491                 rte_free(intr_handle->intr_vec);
492                 intr_handle->intr_vec = NULL;
493         }
494 }
495
496 static void enic_prep_wq_for_simple_tx(struct enic *enic, uint16_t queue_idx)
497 {
498         struct wq_enet_desc *desc;
499         struct vnic_wq *wq;
500         unsigned int i;
501
502         /*
503          * Fill WQ descriptor fields that never change. Every descriptor is
504          * one packet, so set EOP. Also set CQ_ENTRY every ENIC_WQ_CQ_THRESH
505          * descriptors (i.e. request one completion update every 32 packets).
506          */
507         wq = &enic->wq[queue_idx];
508         desc = (struct wq_enet_desc *)wq->ring.descs;
509         for (i = 0; i < wq->ring.desc_count; i++, desc++) {
510                 desc->header_length_flags = 1 << WQ_ENET_FLAGS_EOP_SHIFT;
511                 if (i % ENIC_WQ_CQ_THRESH == ENIC_WQ_CQ_THRESH - 1)
512                         desc->header_length_flags |=
513                                 (1 << WQ_ENET_FLAGS_CQ_ENTRY_SHIFT);
514         }
515 }
516
517 static void pick_rx_handler(struct enic *enic)
518 {
519         struct rte_eth_dev *eth_dev;
520
521         /* Use the non-scatter, simplified RX handler if possible. */
522         eth_dev = enic->rte_dev;
523         if (enic->rq_count > 0 && enic->rq[0].data_queue_enable == 0) {
524                 PMD_INIT_LOG(DEBUG, " use the non-scatter Rx handler");
525                 eth_dev->rx_pkt_burst = &enic_noscatter_recv_pkts;
526         } else {
527                 PMD_INIT_LOG(DEBUG, " use the normal Rx handler");
528                 eth_dev->rx_pkt_burst = &enic_recv_pkts;
529         }
530 }
531
532 int enic_enable(struct enic *enic)
533 {
534         unsigned int index;
535         int err;
536         struct rte_eth_dev *eth_dev = enic->rte_dev;
537
538         eth_dev->data->dev_link.link_speed = vnic_dev_port_speed(enic->vdev);
539         eth_dev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
540
541         /* vnic notification of link status has already been turned on in
542          * enic_dev_init() which is called during probe time.  Here we are
543          * just turning on interrupt vector 0 if needed.
544          */
545         if (eth_dev->data->dev_conf.intr_conf.lsc)
546                 vnic_dev_notify_set(enic->vdev, 0);
547
548         err = enic_rxq_intr_init(enic);
549         if (err)
550                 return err;
551         if (enic_clsf_init(enic))
552                 dev_warning(enic, "Init of hash table for clsf failed."\
553                         "Flow director feature will not work\n");
554
555         for (index = 0; index < enic->rq_count; index++) {
556                 err = enic_alloc_rx_queue_mbufs(enic,
557                         &enic->rq[enic_rte_rq_idx_to_sop_idx(index)]);
558                 if (err) {
559                         dev_err(enic, "Failed to alloc sop RX queue mbufs\n");
560                         return err;
561                 }
562                 err = enic_alloc_rx_queue_mbufs(enic,
563                         &enic->rq[enic_rte_rq_idx_to_data_idx(index)]);
564                 if (err) {
565                         /* release the allocated mbufs for the sop rq*/
566                         enic_rxmbuf_queue_release(enic,
567                                 &enic->rq[enic_rte_rq_idx_to_sop_idx(index)]);
568
569                         dev_err(enic, "Failed to alloc data RX queue mbufs\n");
570                         return err;
571                 }
572         }
573
574         /*
575          * Use the simple TX handler if possible. All offloads must be
576          * disabled.
577          */
578         if (eth_dev->data->dev_conf.txmode.offloads == 0) {
579                 PMD_INIT_LOG(DEBUG, " use the simple tx handler");
580                 eth_dev->tx_pkt_burst = &enic_simple_xmit_pkts;
581                 for (index = 0; index < enic->wq_count; index++)
582                         enic_prep_wq_for_simple_tx(enic, index);
583         } else {
584                 PMD_INIT_LOG(DEBUG, " use the default tx handler");
585                 eth_dev->tx_pkt_burst = &enic_xmit_pkts;
586         }
587
588         pick_rx_handler(enic);
589
590         for (index = 0; index < enic->wq_count; index++)
591                 enic_start_wq(enic, index);
592         for (index = 0; index < enic->rq_count; index++)
593                 enic_start_rq(enic, index);
594
595         vnic_dev_add_addr(enic->vdev, enic->mac_addr);
596
597         vnic_dev_enable_wait(enic->vdev);
598
599         /* Register and enable error interrupt */
600         rte_intr_callback_register(&(enic->pdev->intr_handle),
601                 enic_intr_handler, (void *)enic->rte_dev);
602
603         rte_intr_enable(&(enic->pdev->intr_handle));
604         /* Unmask LSC interrupt */
605         vnic_intr_unmask(&enic->intr[ENICPMD_LSC_INTR_OFFSET]);
606
607         return 0;
608 }
609
610 int enic_alloc_intr_resources(struct enic *enic)
611 {
612         int err;
613         unsigned int i;
614
615         dev_info(enic, "vNIC resources used:  "\
616                 "wq %d rq %d cq %d intr %d\n",
617                 enic->wq_count, enic_vnic_rq_count(enic),
618                 enic->cq_count, enic->intr_count);
619
620         for (i = 0; i < enic->intr_count; i++) {
621                 err = vnic_intr_alloc(enic->vdev, &enic->intr[i], i);
622                 if (err) {
623                         enic_free_vnic_resources(enic);
624                         return err;
625                 }
626         }
627         return 0;
628 }
629
630 void enic_free_rq(void *rxq)
631 {
632         struct vnic_rq *rq_sop, *rq_data;
633         struct enic *enic;
634
635         if (rxq == NULL)
636                 return;
637
638         rq_sop = (struct vnic_rq *)rxq;
639         enic = vnic_dev_priv(rq_sop->vdev);
640         rq_data = &enic->rq[rq_sop->data_queue_idx];
641
642         if (rq_sop->free_mbufs) {
643                 struct rte_mbuf **mb;
644                 int i;
645
646                 mb = rq_sop->free_mbufs;
647                 for (i = ENIC_RX_BURST_MAX - rq_sop->num_free_mbufs;
648                      i < ENIC_RX_BURST_MAX; i++)
649                         rte_pktmbuf_free(mb[i]);
650                 rte_free(rq_sop->free_mbufs);
651                 rq_sop->free_mbufs = NULL;
652                 rq_sop->num_free_mbufs = 0;
653         }
654
655         enic_rxmbuf_queue_release(enic, rq_sop);
656         if (rq_data->in_use)
657                 enic_rxmbuf_queue_release(enic, rq_data);
658
659         rte_free(rq_sop->mbuf_ring);
660         if (rq_data->in_use)
661                 rte_free(rq_data->mbuf_ring);
662
663         rq_sop->mbuf_ring = NULL;
664         rq_data->mbuf_ring = NULL;
665
666         vnic_rq_free(rq_sop);
667         if (rq_data->in_use)
668                 vnic_rq_free(rq_data);
669
670         vnic_cq_free(&enic->cq[enic_sop_rq_idx_to_cq_idx(rq_sop->index)]);
671
672         rq_sop->in_use = 0;
673         rq_data->in_use = 0;
674 }
675
676 void enic_start_wq(struct enic *enic, uint16_t queue_idx)
677 {
678         struct rte_eth_dev *eth_dev = enic->rte_dev;
679         vnic_wq_enable(&enic->wq[queue_idx]);
680         eth_dev->data->tx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STARTED;
681 }
682
683 int enic_stop_wq(struct enic *enic, uint16_t queue_idx)
684 {
685         struct rte_eth_dev *eth_dev = enic->rte_dev;
686         int ret;
687
688         ret = vnic_wq_disable(&enic->wq[queue_idx]);
689         if (ret)
690                 return ret;
691
692         eth_dev->data->tx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STOPPED;
693         return 0;
694 }
695
696 void enic_start_rq(struct enic *enic, uint16_t queue_idx)
697 {
698         struct vnic_rq *rq_sop;
699         struct vnic_rq *rq_data;
700         rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)];
701         rq_data = &enic->rq[rq_sop->data_queue_idx];
702         struct rte_eth_dev *eth_dev = enic->rte_dev;
703
704         if (rq_data->in_use) {
705                 vnic_rq_enable(rq_data);
706                 enic_initial_post_rx(enic, rq_data);
707         }
708         rte_mb();
709         vnic_rq_enable(rq_sop);
710         enic_initial_post_rx(enic, rq_sop);
711         eth_dev->data->rx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STARTED;
712 }
713
714 int enic_stop_rq(struct enic *enic, uint16_t queue_idx)
715 {
716         int ret1 = 0, ret2 = 0;
717         struct rte_eth_dev *eth_dev = enic->rte_dev;
718         struct vnic_rq *rq_sop;
719         struct vnic_rq *rq_data;
720         rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)];
721         rq_data = &enic->rq[rq_sop->data_queue_idx];
722
723         ret2 = vnic_rq_disable(rq_sop);
724         rte_mb();
725         if (rq_data->in_use)
726                 ret1 = vnic_rq_disable(rq_data);
727
728         if (ret2)
729                 return ret2;
730         else if (ret1)
731                 return ret1;
732
733         eth_dev->data->rx_queue_state[queue_idx] = RTE_ETH_QUEUE_STATE_STOPPED;
734         return 0;
735 }
736
737 int enic_alloc_rq(struct enic *enic, uint16_t queue_idx,
738         unsigned int socket_id, struct rte_mempool *mp,
739         uint16_t nb_desc, uint16_t free_thresh)
740 {
741         int rc;
742         uint16_t sop_queue_idx = enic_rte_rq_idx_to_sop_idx(queue_idx);
743         uint16_t data_queue_idx = enic_rte_rq_idx_to_data_idx(queue_idx);
744         struct vnic_rq *rq_sop = &enic->rq[sop_queue_idx];
745         struct vnic_rq *rq_data = &enic->rq[data_queue_idx];
746         unsigned int mbuf_size, mbufs_per_pkt;
747         unsigned int nb_sop_desc, nb_data_desc;
748         uint16_t min_sop, max_sop, min_data, max_data;
749         uint32_t max_rx_pkt_len;
750
751         rq_sop->is_sop = 1;
752         rq_sop->data_queue_idx = data_queue_idx;
753         rq_data->is_sop = 0;
754         rq_data->data_queue_idx = 0;
755         rq_sop->socket_id = socket_id;
756         rq_sop->mp = mp;
757         rq_data->socket_id = socket_id;
758         rq_data->mp = mp;
759         rq_sop->in_use = 1;
760         rq_sop->rx_free_thresh = free_thresh;
761         rq_data->rx_free_thresh = free_thresh;
762         dev_debug(enic, "Set queue_id:%u free thresh:%u\n", queue_idx,
763                   free_thresh);
764
765         mbuf_size = (uint16_t)(rte_pktmbuf_data_room_size(mp) -
766                                RTE_PKTMBUF_HEADROOM);
767         /* max_rx_pkt_len includes the ethernet header and CRC. */
768         max_rx_pkt_len = enic->rte_dev->data->dev_conf.rxmode.max_rx_pkt_len;
769
770         if (enic->rte_dev->data->dev_conf.rxmode.offloads &
771             DEV_RX_OFFLOAD_SCATTER) {
772                 dev_info(enic, "Rq %u Scatter rx mode enabled\n", queue_idx);
773                 /* ceil((max pkt len)/mbuf_size) */
774                 mbufs_per_pkt = (max_rx_pkt_len + mbuf_size - 1) / mbuf_size;
775         } else {
776                 dev_info(enic, "Scatter rx mode disabled\n");
777                 mbufs_per_pkt = 1;
778                 if (max_rx_pkt_len > mbuf_size) {
779                         dev_warning(enic, "The maximum Rx packet size (%u) is"
780                                     " larger than the mbuf size (%u), and"
781                                     " scatter is disabled. Larger packets will"
782                                     " be truncated.\n",
783                                     max_rx_pkt_len, mbuf_size);
784                 }
785         }
786
787         if (mbufs_per_pkt > 1) {
788                 dev_info(enic, "Rq %u Scatter rx mode in use\n", queue_idx);
789                 rq_sop->data_queue_enable = 1;
790                 rq_data->in_use = 1;
791                 /*
792                  * HW does not directly support rxmode.max_rx_pkt_len. HW always
793                  * receives packet sizes up to the "max" MTU.
794                  * If not using scatter, we can achieve the effect of dropping
795                  * larger packets by reducing the size of posted buffers.
796                  * See enic_alloc_rx_queue_mbufs().
797                  */
798                 if (max_rx_pkt_len <
799                     enic_mtu_to_max_rx_pktlen(enic->max_mtu)) {
800                         dev_warning(enic, "rxmode.max_rx_pkt_len is ignored"
801                                     " when scatter rx mode is in use.\n");
802                 }
803         } else {
804                 dev_info(enic, "Rq %u Scatter rx mode not being used\n",
805                          queue_idx);
806                 rq_sop->data_queue_enable = 0;
807                 rq_data->in_use = 0;
808         }
809
810         /* number of descriptors have to be a multiple of 32 */
811         nb_sop_desc = (nb_desc / mbufs_per_pkt) & ENIC_ALIGN_DESCS_MASK;
812         nb_data_desc = (nb_desc - nb_sop_desc) & ENIC_ALIGN_DESCS_MASK;
813
814         rq_sop->max_mbufs_per_pkt = mbufs_per_pkt;
815         rq_data->max_mbufs_per_pkt = mbufs_per_pkt;
816
817         if (mbufs_per_pkt > 1) {
818                 min_sop = ENIC_RX_BURST_MAX;
819                 max_sop = ((enic->config.rq_desc_count /
820                             (mbufs_per_pkt - 1)) & ENIC_ALIGN_DESCS_MASK);
821                 min_data = min_sop * (mbufs_per_pkt - 1);
822                 max_data = enic->config.rq_desc_count;
823         } else {
824                 min_sop = ENIC_RX_BURST_MAX;
825                 max_sop = enic->config.rq_desc_count;
826                 min_data = 0;
827                 max_data = 0;
828         }
829
830         if (nb_desc < (min_sop + min_data)) {
831                 dev_warning(enic,
832                             "Number of rx descs too low, adjusting to minimum\n");
833                 nb_sop_desc = min_sop;
834                 nb_data_desc = min_data;
835         } else if (nb_desc > (max_sop + max_data)) {
836                 dev_warning(enic,
837                             "Number of rx_descs too high, adjusting to maximum\n");
838                 nb_sop_desc = max_sop;
839                 nb_data_desc = max_data;
840         }
841         if (mbufs_per_pkt > 1) {
842                 dev_info(enic, "For max packet size %u and mbuf size %u valid"
843                          " rx descriptor range is %u to %u\n",
844                          max_rx_pkt_len, mbuf_size, min_sop + min_data,
845                          max_sop + max_data);
846         }
847         dev_info(enic, "Using %d rx descriptors (sop %d, data %d)\n",
848                  nb_sop_desc + nb_data_desc, nb_sop_desc, nb_data_desc);
849
850         /* Allocate sop queue resources */
851         rc = vnic_rq_alloc(enic->vdev, rq_sop, sop_queue_idx,
852                 nb_sop_desc, sizeof(struct rq_enet_desc));
853         if (rc) {
854                 dev_err(enic, "error in allocation of sop rq\n");
855                 goto err_exit;
856         }
857         nb_sop_desc = rq_sop->ring.desc_count;
858
859         if (rq_data->in_use) {
860                 /* Allocate data queue resources */
861                 rc = vnic_rq_alloc(enic->vdev, rq_data, data_queue_idx,
862                                    nb_data_desc,
863                                    sizeof(struct rq_enet_desc));
864                 if (rc) {
865                         dev_err(enic, "error in allocation of data rq\n");
866                         goto err_free_rq_sop;
867                 }
868                 nb_data_desc = rq_data->ring.desc_count;
869         }
870         rc = vnic_cq_alloc(enic->vdev, &enic->cq[queue_idx], queue_idx,
871                            socket_id, nb_sop_desc + nb_data_desc,
872                            sizeof(struct cq_enet_rq_desc));
873         if (rc) {
874                 dev_err(enic, "error in allocation of cq for rq\n");
875                 goto err_free_rq_data;
876         }
877
878         /* Allocate the mbuf rings */
879         rq_sop->mbuf_ring = (struct rte_mbuf **)
880                 rte_zmalloc_socket("rq->mbuf_ring",
881                                    sizeof(struct rte_mbuf *) * nb_sop_desc,
882                                    RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
883         if (rq_sop->mbuf_ring == NULL)
884                 goto err_free_cq;
885
886         if (rq_data->in_use) {
887                 rq_data->mbuf_ring = (struct rte_mbuf **)
888                         rte_zmalloc_socket("rq->mbuf_ring",
889                                 sizeof(struct rte_mbuf *) * nb_data_desc,
890                                 RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
891                 if (rq_data->mbuf_ring == NULL)
892                         goto err_free_sop_mbuf;
893         }
894
895         rq_sop->free_mbufs = (struct rte_mbuf **)
896                 rte_zmalloc_socket("rq->free_mbufs",
897                                    sizeof(struct rte_mbuf *) *
898                                    ENIC_RX_BURST_MAX,
899                                    RTE_CACHE_LINE_SIZE, rq_sop->socket_id);
900         if (rq_sop->free_mbufs == NULL)
901                 goto err_free_data_mbuf;
902         rq_sop->num_free_mbufs = 0;
903
904         rq_sop->tot_nb_desc = nb_desc; /* squirl away for MTU update function */
905
906         return 0;
907
908 err_free_data_mbuf:
909         rte_free(rq_data->mbuf_ring);
910 err_free_sop_mbuf:
911         rte_free(rq_sop->mbuf_ring);
912 err_free_cq:
913         /* cleanup on error */
914         vnic_cq_free(&enic->cq[queue_idx]);
915 err_free_rq_data:
916         if (rq_data->in_use)
917                 vnic_rq_free(rq_data);
918 err_free_rq_sop:
919         vnic_rq_free(rq_sop);
920 err_exit:
921         return -ENOMEM;
922 }
923
924 void enic_free_wq(void *txq)
925 {
926         struct vnic_wq *wq;
927         struct enic *enic;
928
929         if (txq == NULL)
930                 return;
931
932         wq = (struct vnic_wq *)txq;
933         enic = vnic_dev_priv(wq->vdev);
934         rte_memzone_free(wq->cqmsg_rz);
935         vnic_wq_free(wq);
936         vnic_cq_free(&enic->cq[enic->rq_count + wq->index]);
937 }
938
939 int enic_alloc_wq(struct enic *enic, uint16_t queue_idx,
940         unsigned int socket_id, uint16_t nb_desc)
941 {
942         int err;
943         struct vnic_wq *wq = &enic->wq[queue_idx];
944         unsigned int cq_index = enic_cq_wq(enic, queue_idx);
945         char name[NAME_MAX];
946         static int instance;
947
948         wq->socket_id = socket_id;
949         /*
950          * rte_eth_tx_queue_setup() checks min, max, and alignment. So just
951          * print an info message for diagnostics.
952          */
953         dev_info(enic, "TX Queues - effective number of descs:%d\n", nb_desc);
954
955         /* Allocate queue resources */
956         err = vnic_wq_alloc(enic->vdev, &enic->wq[queue_idx], queue_idx,
957                 nb_desc,
958                 sizeof(struct wq_enet_desc));
959         if (err) {
960                 dev_err(enic, "error in allocation of wq\n");
961                 return err;
962         }
963
964         err = vnic_cq_alloc(enic->vdev, &enic->cq[cq_index], cq_index,
965                 socket_id, nb_desc,
966                 sizeof(struct cq_enet_wq_desc));
967         if (err) {
968                 vnic_wq_free(wq);
969                 dev_err(enic, "error in allocation of cq for wq\n");
970         }
971
972         /* setup up CQ message */
973         snprintf((char *)name, sizeof(name),
974                  "vnic_cqmsg-%s-%d-%d", enic->bdf_name, queue_idx,
975                 instance++);
976
977         wq->cqmsg_rz = rte_memzone_reserve_aligned((const char *)name,
978                         sizeof(uint32_t), SOCKET_ID_ANY,
979                         RTE_MEMZONE_IOVA_CONTIG, ENIC_ALIGN);
980         if (!wq->cqmsg_rz)
981                 return -ENOMEM;
982
983         return err;
984 }
985
986 int enic_disable(struct enic *enic)
987 {
988         unsigned int i;
989         int err;
990
991         for (i = 0; i < enic->intr_count; i++) {
992                 vnic_intr_mask(&enic->intr[i]);
993                 (void)vnic_intr_masked(&enic->intr[i]); /* flush write */
994         }
995         enic_rxq_intr_deinit(enic);
996         rte_intr_disable(&enic->pdev->intr_handle);
997         rte_intr_callback_unregister(&enic->pdev->intr_handle,
998                                      enic_intr_handler,
999                                      (void *)enic->rte_dev);
1000
1001         vnic_dev_disable(enic->vdev);
1002
1003         enic_clsf_destroy(enic);
1004
1005         if (!enic_is_sriov_vf(enic))
1006                 vnic_dev_del_addr(enic->vdev, enic->mac_addr);
1007
1008         for (i = 0; i < enic->wq_count; i++) {
1009                 err = vnic_wq_disable(&enic->wq[i]);
1010                 if (err)
1011                         return err;
1012         }
1013         for (i = 0; i < enic_vnic_rq_count(enic); i++) {
1014                 if (enic->rq[i].in_use) {
1015                         err = vnic_rq_disable(&enic->rq[i]);
1016                         if (err)
1017                                 return err;
1018                 }
1019         }
1020
1021         /* If we were using interrupts, set the interrupt vector to -1
1022          * to disable interrupts.  We are not disabling link notifcations,
1023          * though, as we want the polling of link status to continue working.
1024          */
1025         if (enic->rte_dev->data->dev_conf.intr_conf.lsc)
1026                 vnic_dev_notify_set(enic->vdev, -1);
1027
1028         vnic_dev_set_reset_flag(enic->vdev, 1);
1029
1030         for (i = 0; i < enic->wq_count; i++)
1031                 vnic_wq_clean(&enic->wq[i], enic_free_wq_buf);
1032
1033         for (i = 0; i < enic_vnic_rq_count(enic); i++)
1034                 if (enic->rq[i].in_use)
1035                         vnic_rq_clean(&enic->rq[i], enic_free_rq_buf);
1036         for (i = 0; i < enic->cq_count; i++)
1037                 vnic_cq_clean(&enic->cq[i]);
1038         for (i = 0; i < enic->intr_count; i++)
1039                 vnic_intr_clean(&enic->intr[i]);
1040
1041         return 0;
1042 }
1043
1044 static int enic_dev_wait(struct vnic_dev *vdev,
1045         int (*start)(struct vnic_dev *, int),
1046         int (*finished)(struct vnic_dev *, int *),
1047         int arg)
1048 {
1049         int done;
1050         int err;
1051         int i;
1052
1053         err = start(vdev, arg);
1054         if (err)
1055                 return err;
1056
1057         /* Wait for func to complete...2 seconds max */
1058         for (i = 0; i < 2000; i++) {
1059                 err = finished(vdev, &done);
1060                 if (err)
1061                         return err;
1062                 if (done)
1063                         return 0;
1064                 usleep(1000);
1065         }
1066         return -ETIMEDOUT;
1067 }
1068
1069 static int enic_dev_open(struct enic *enic)
1070 {
1071         int err;
1072         int flags = CMD_OPENF_IG_DESCCACHE;
1073
1074         err = enic_dev_wait(enic->vdev, vnic_dev_open,
1075                 vnic_dev_open_done, flags);
1076         if (err)
1077                 dev_err(enic_get_dev(enic),
1078                         "vNIC device open failed, err %d\n", err);
1079
1080         return err;
1081 }
1082
1083 static int enic_set_rsskey(struct enic *enic, uint8_t *user_key)
1084 {
1085         dma_addr_t rss_key_buf_pa;
1086         union vnic_rss_key *rss_key_buf_va = NULL;
1087         int err, i;
1088         u8 name[NAME_MAX];
1089
1090         RTE_ASSERT(user_key != NULL);
1091         snprintf((char *)name, NAME_MAX, "rss_key-%s", enic->bdf_name);
1092         rss_key_buf_va = enic_alloc_consistent(enic, sizeof(union vnic_rss_key),
1093                 &rss_key_buf_pa, name);
1094         if (!rss_key_buf_va)
1095                 return -ENOMEM;
1096
1097         for (i = 0; i < ENIC_RSS_HASH_KEY_SIZE; i++)
1098                 rss_key_buf_va->key[i / 10].b[i % 10] = user_key[i];
1099
1100         err = enic_set_rss_key(enic,
1101                 rss_key_buf_pa,
1102                 sizeof(union vnic_rss_key));
1103
1104         /* Save for later queries */
1105         if (!err) {
1106                 rte_memcpy(&enic->rss_key, rss_key_buf_va,
1107                            sizeof(union vnic_rss_key));
1108         }
1109         enic_free_consistent(enic, sizeof(union vnic_rss_key),
1110                 rss_key_buf_va, rss_key_buf_pa);
1111
1112         return err;
1113 }
1114
1115 int enic_set_rss_reta(struct enic *enic, union vnic_rss_cpu *rss_cpu)
1116 {
1117         dma_addr_t rss_cpu_buf_pa;
1118         union vnic_rss_cpu *rss_cpu_buf_va = NULL;
1119         int err;
1120         u8 name[NAME_MAX];
1121
1122         snprintf((char *)name, NAME_MAX, "rss_cpu-%s", enic->bdf_name);
1123         rss_cpu_buf_va = enic_alloc_consistent(enic, sizeof(union vnic_rss_cpu),
1124                 &rss_cpu_buf_pa, name);
1125         if (!rss_cpu_buf_va)
1126                 return -ENOMEM;
1127
1128         rte_memcpy(rss_cpu_buf_va, rss_cpu, sizeof(union vnic_rss_cpu));
1129
1130         err = enic_set_rss_cpu(enic,
1131                 rss_cpu_buf_pa,
1132                 sizeof(union vnic_rss_cpu));
1133
1134         enic_free_consistent(enic, sizeof(union vnic_rss_cpu),
1135                 rss_cpu_buf_va, rss_cpu_buf_pa);
1136
1137         /* Save for later queries */
1138         if (!err)
1139                 rte_memcpy(&enic->rss_cpu, rss_cpu, sizeof(union vnic_rss_cpu));
1140         return err;
1141 }
1142
1143 static int enic_set_niccfg(struct enic *enic, u8 rss_default_cpu,
1144         u8 rss_hash_type, u8 rss_hash_bits, u8 rss_base_cpu, u8 rss_enable)
1145 {
1146         const u8 tso_ipid_split_en = 0;
1147         int err;
1148
1149         err = enic_set_nic_cfg(enic,
1150                 rss_default_cpu, rss_hash_type,
1151                 rss_hash_bits, rss_base_cpu,
1152                 rss_enable, tso_ipid_split_en,
1153                 enic->ig_vlan_strip_en);
1154
1155         return err;
1156 }
1157
1158 /* Initialize RSS with defaults, called from dev_configure */
1159 int enic_init_rss_nic_cfg(struct enic *enic)
1160 {
1161         static uint8_t default_rss_key[] = {
1162                 85, 67, 83, 97, 119, 101, 115, 111, 109, 101,
1163                 80, 65, 76, 79, 117, 110, 105, 113, 117, 101,
1164                 76, 73, 78, 85, 88, 114, 111, 99, 107, 115,
1165                 69, 78, 73, 67, 105, 115, 99, 111, 111, 108,
1166         };
1167         struct rte_eth_rss_conf rss_conf;
1168         union vnic_rss_cpu rss_cpu;
1169         int ret, i;
1170
1171         rss_conf = enic->rte_dev->data->dev_conf.rx_adv_conf.rss_conf;
1172         /*
1173          * If setting key for the first time, and the user gives us none, then
1174          * push the default key to NIC.
1175          */
1176         if (rss_conf.rss_key == NULL) {
1177                 rss_conf.rss_key = default_rss_key;
1178                 rss_conf.rss_key_len = ENIC_RSS_HASH_KEY_SIZE;
1179         }
1180         ret = enic_set_rss_conf(enic, &rss_conf);
1181         if (ret) {
1182                 dev_err(enic, "Failed to configure RSS\n");
1183                 return ret;
1184         }
1185         if (enic->rss_enable) {
1186                 /* If enabling RSS, use the default reta */
1187                 for (i = 0; i < ENIC_RSS_RETA_SIZE; i++) {
1188                         rss_cpu.cpu[i / 4].b[i % 4] =
1189                                 enic_rte_rq_idx_to_sop_idx(i % enic->rq_count);
1190                 }
1191                 ret = enic_set_rss_reta(enic, &rss_cpu);
1192                 if (ret)
1193                         dev_err(enic, "Failed to set RSS indirection table\n");
1194         }
1195         return ret;
1196 }
1197
1198 int enic_setup_finish(struct enic *enic)
1199 {
1200         enic_init_soft_stats(enic);
1201
1202         /* Default conf */
1203         vnic_dev_packet_filter(enic->vdev,
1204                 1 /* directed  */,
1205                 1 /* multicast */,
1206                 1 /* broadcast */,
1207                 0 /* promisc   */,
1208                 1 /* allmulti  */);
1209
1210         enic->promisc = 0;
1211         enic->allmulti = 1;
1212
1213         return 0;
1214 }
1215
1216 static int enic_rss_conf_valid(struct enic *enic,
1217                                struct rte_eth_rss_conf *rss_conf)
1218 {
1219         /* RSS is disabled per VIC settings. Ignore rss_conf. */
1220         if (enic->flow_type_rss_offloads == 0)
1221                 return 0;
1222         if (rss_conf->rss_key != NULL &&
1223             rss_conf->rss_key_len != ENIC_RSS_HASH_KEY_SIZE) {
1224                 dev_err(enic, "Given rss_key is %d bytes, it must be %d\n",
1225                         rss_conf->rss_key_len, ENIC_RSS_HASH_KEY_SIZE);
1226                 return -EINVAL;
1227         }
1228         if (rss_conf->rss_hf != 0 &&
1229             (rss_conf->rss_hf & enic->flow_type_rss_offloads) == 0) {
1230                 dev_err(enic, "Given rss_hf contains none of the supported"
1231                         " types\n");
1232                 return -EINVAL;
1233         }
1234         return 0;
1235 }
1236
1237 /* Set hash type and key according to rss_conf */
1238 int enic_set_rss_conf(struct enic *enic, struct rte_eth_rss_conf *rss_conf)
1239 {
1240         struct rte_eth_dev *eth_dev;
1241         uint64_t rss_hf;
1242         u8 rss_hash_type;
1243         u8 rss_enable;
1244         int ret;
1245
1246         RTE_ASSERT(rss_conf != NULL);
1247         ret = enic_rss_conf_valid(enic, rss_conf);
1248         if (ret) {
1249                 dev_err(enic, "RSS configuration (rss_conf) is invalid\n");
1250                 return ret;
1251         }
1252
1253         eth_dev = enic->rte_dev;
1254         rss_hash_type = 0;
1255         rss_hf = rss_conf->rss_hf & enic->flow_type_rss_offloads;
1256         if (enic->rq_count > 1 &&
1257             (eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) &&
1258             rss_hf != 0) {
1259                 rss_enable = 1;
1260                 if (rss_hf & (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
1261                               ETH_RSS_NONFRAG_IPV4_OTHER))
1262                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_IPV4;
1263                 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1264                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV4;
1265                 if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP) {
1266                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_UDP_IPV4;
1267                         if (enic->udp_rss_weak) {
1268                                 /*
1269                                  * 'TCP' is not a typo. The "weak" version of
1270                                  * UDP RSS requires both the TCP and UDP bits
1271                                  * be set. It does enable TCP RSS as well.
1272                                  */
1273                                 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV4;
1274                         }
1275                 }
1276                 if (rss_hf & (ETH_RSS_IPV6 | ETH_RSS_IPV6_EX |
1277                               ETH_RSS_FRAG_IPV6 | ETH_RSS_NONFRAG_IPV6_OTHER))
1278                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_IPV6;
1279                 if (rss_hf & (ETH_RSS_NONFRAG_IPV6_TCP | ETH_RSS_IPV6_TCP_EX))
1280                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
1281                 if (rss_hf & (ETH_RSS_NONFRAG_IPV6_UDP | ETH_RSS_IPV6_UDP_EX)) {
1282                         rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_UDP_IPV6;
1283                         if (enic->udp_rss_weak)
1284                                 rss_hash_type |= NIC_CFG_RSS_HASH_TYPE_TCP_IPV6;
1285                 }
1286         } else {
1287                 rss_enable = 0;
1288                 rss_hf = 0;
1289         }
1290
1291         /* Set the hash key if provided */
1292         if (rss_enable && rss_conf->rss_key) {
1293                 ret = enic_set_rsskey(enic, rss_conf->rss_key);
1294                 if (ret) {
1295                         dev_err(enic, "Failed to set RSS key\n");
1296                         return ret;
1297                 }
1298         }
1299
1300         ret = enic_set_niccfg(enic, ENIC_RSS_DEFAULT_CPU, rss_hash_type,
1301                               ENIC_RSS_HASH_BITS, ENIC_RSS_BASE_CPU,
1302                               rss_enable);
1303         if (!ret) {
1304                 enic->rss_hf = rss_hf;
1305                 enic->rss_hash_type = rss_hash_type;
1306                 enic->rss_enable = rss_enable;
1307         } else {
1308                 dev_err(enic, "Failed to update RSS configurations."
1309                         " hash=0x%x\n", rss_hash_type);
1310         }
1311         return ret;
1312 }
1313
1314 int enic_set_vlan_strip(struct enic *enic)
1315 {
1316         /*
1317          * Unfortunately, VLAN strip on/off and RSS on/off are configured
1318          * together. So, re-do niccfg, preserving the current RSS settings.
1319          */
1320         return enic_set_niccfg(enic, ENIC_RSS_DEFAULT_CPU, enic->rss_hash_type,
1321                                ENIC_RSS_HASH_BITS, ENIC_RSS_BASE_CPU,
1322                                enic->rss_enable);
1323 }
1324
1325 void enic_add_packet_filter(struct enic *enic)
1326 {
1327         /* Args -> directed, multicast, broadcast, promisc, allmulti */
1328         vnic_dev_packet_filter(enic->vdev, 1, 1, 1,
1329                 enic->promisc, enic->allmulti);
1330 }
1331
1332 int enic_get_link_status(struct enic *enic)
1333 {
1334         return vnic_dev_link_status(enic->vdev);
1335 }
1336
1337 static void enic_dev_deinit(struct enic *enic)
1338 {
1339         struct rte_eth_dev *eth_dev = enic->rte_dev;
1340
1341         /* stop link status checking */
1342         vnic_dev_notify_unset(enic->vdev);
1343
1344         rte_free(eth_dev->data->mac_addrs);
1345         rte_free(enic->cq);
1346         rte_free(enic->intr);
1347         rte_free(enic->rq);
1348         rte_free(enic->wq);
1349 }
1350
1351
1352 int enic_set_vnic_res(struct enic *enic)
1353 {
1354         struct rte_eth_dev *eth_dev = enic->rte_dev;
1355         int rc = 0;
1356         unsigned int required_rq, required_wq, required_cq, required_intr;
1357
1358         /* Always use two vNIC RQs per eth_dev RQ, regardless of Rx scatter. */
1359         required_rq = eth_dev->data->nb_rx_queues * 2;
1360         required_wq = eth_dev->data->nb_tx_queues;
1361         required_cq = eth_dev->data->nb_rx_queues + eth_dev->data->nb_tx_queues;
1362         required_intr = 1; /* 1 for LSC even if intr_conf.lsc is 0 */
1363         if (eth_dev->data->dev_conf.intr_conf.rxq) {
1364                 required_intr += eth_dev->data->nb_rx_queues;
1365         }
1366
1367         if (enic->conf_rq_count < required_rq) {
1368                 dev_err(dev, "Not enough Receive queues. Requested:%u which uses %d RQs on VIC, Configured:%u\n",
1369                         eth_dev->data->nb_rx_queues,
1370                         required_rq, enic->conf_rq_count);
1371                 rc = -EINVAL;
1372         }
1373         if (enic->conf_wq_count < required_wq) {
1374                 dev_err(dev, "Not enough Transmit queues. Requested:%u, Configured:%u\n",
1375                         eth_dev->data->nb_tx_queues, enic->conf_wq_count);
1376                 rc = -EINVAL;
1377         }
1378
1379         if (enic->conf_cq_count < required_cq) {
1380                 dev_err(dev, "Not enough Completion queues. Required:%u, Configured:%u\n",
1381                         required_cq, enic->conf_cq_count);
1382                 rc = -EINVAL;
1383         }
1384         if (enic->conf_intr_count < required_intr) {
1385                 dev_err(dev, "Not enough Interrupts to support Rx queue"
1386                         " interrupts. Required:%u, Configured:%u\n",
1387                         required_intr, enic->conf_intr_count);
1388                 rc = -EINVAL;
1389         }
1390
1391         if (rc == 0) {
1392                 enic->rq_count = eth_dev->data->nb_rx_queues;
1393                 enic->wq_count = eth_dev->data->nb_tx_queues;
1394                 enic->cq_count = enic->rq_count + enic->wq_count;
1395                 enic->intr_count = required_intr;
1396         }
1397
1398         return rc;
1399 }
1400
1401 /* Initialize the completion queue for an RQ */
1402 static int
1403 enic_reinit_rq(struct enic *enic, unsigned int rq_idx)
1404 {
1405         struct vnic_rq *sop_rq, *data_rq;
1406         unsigned int cq_idx;
1407         int rc = 0;
1408
1409         sop_rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1410         data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(rq_idx)];
1411         cq_idx = rq_idx;
1412
1413         vnic_cq_clean(&enic->cq[cq_idx]);
1414         vnic_cq_init(&enic->cq[cq_idx],
1415                      0 /* flow_control_enable */,
1416                      1 /* color_enable */,
1417                      0 /* cq_head */,
1418                      0 /* cq_tail */,
1419                      1 /* cq_tail_color */,
1420                      0 /* interrupt_enable */,
1421                      1 /* cq_entry_enable */,
1422                      0 /* cq_message_enable */,
1423                      0 /* interrupt offset */,
1424                      0 /* cq_message_addr */);
1425
1426
1427         vnic_rq_init_start(sop_rq, enic_cq_rq(enic,
1428                            enic_rte_rq_idx_to_sop_idx(rq_idx)), 0,
1429                            sop_rq->ring.desc_count - 1, 1, 0);
1430         if (data_rq->in_use) {
1431                 vnic_rq_init_start(data_rq,
1432                                    enic_cq_rq(enic,
1433                                    enic_rte_rq_idx_to_data_idx(rq_idx)), 0,
1434                                    data_rq->ring.desc_count - 1, 1, 0);
1435         }
1436
1437         rc = enic_alloc_rx_queue_mbufs(enic, sop_rq);
1438         if (rc)
1439                 return rc;
1440
1441         if (data_rq->in_use) {
1442                 rc = enic_alloc_rx_queue_mbufs(enic, data_rq);
1443                 if (rc) {
1444                         enic_rxmbuf_queue_release(enic, sop_rq);
1445                         return rc;
1446                 }
1447         }
1448
1449         return 0;
1450 }
1451
1452 /* The Cisco NIC can send and receive packets up to a max packet size
1453  * determined by the NIC type and firmware. There is also an MTU
1454  * configured into the NIC via the CIMC/UCSM management interface
1455  * which can be overridden by this function (up to the max packet size).
1456  * Depending on the network setup, doing so may cause packet drops
1457  * and unexpected behavior.
1458  */
1459 int enic_set_mtu(struct enic *enic, uint16_t new_mtu)
1460 {
1461         unsigned int rq_idx;
1462         struct vnic_rq *rq;
1463         int rc = 0;
1464         uint16_t old_mtu;       /* previous setting */
1465         uint16_t config_mtu;    /* Value configured into NIC via CIMC/UCSM */
1466         struct rte_eth_dev *eth_dev = enic->rte_dev;
1467
1468         old_mtu = eth_dev->data->mtu;
1469         config_mtu = enic->config.mtu;
1470
1471         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1472                 return -E_RTE_SECONDARY;
1473
1474         if (new_mtu > enic->max_mtu) {
1475                 dev_err(enic,
1476                         "MTU not updated: requested (%u) greater than max (%u)\n",
1477                         new_mtu, enic->max_mtu);
1478                 return -EINVAL;
1479         }
1480         if (new_mtu < ENIC_MIN_MTU) {
1481                 dev_info(enic,
1482                         "MTU not updated: requested (%u) less than min (%u)\n",
1483                         new_mtu, ENIC_MIN_MTU);
1484                 return -EINVAL;
1485         }
1486         if (new_mtu > config_mtu)
1487                 dev_warning(enic,
1488                         "MTU (%u) is greater than value configured in NIC (%u)\n",
1489                         new_mtu, config_mtu);
1490
1491         /* Update the MTU and maximum packet length */
1492         eth_dev->data->mtu = new_mtu;
1493         eth_dev->data->dev_conf.rxmode.max_rx_pkt_len =
1494                 enic_mtu_to_max_rx_pktlen(new_mtu);
1495
1496         /*
1497          * If the device has not started (enic_enable), nothing to do.
1498          * Later, enic_enable() will set up RQs reflecting the new maximum
1499          * packet length.
1500          */
1501         if (!eth_dev->data->dev_started)
1502                 goto set_mtu_done;
1503
1504         /*
1505          * The device has started, re-do RQs on the fly. In the process, we
1506          * pick up the new maximum packet length.
1507          *
1508          * Some applications rely on the ability to change MTU without stopping
1509          * the device. So keep this behavior for now.
1510          */
1511         rte_spinlock_lock(&enic->mtu_lock);
1512
1513         /* Stop traffic on all RQs */
1514         for (rq_idx = 0; rq_idx < enic->rq_count * 2; rq_idx++) {
1515                 rq = &enic->rq[rq_idx];
1516                 if (rq->is_sop && rq->in_use) {
1517                         rc = enic_stop_rq(enic,
1518                                           enic_sop_rq_idx_to_rte_idx(rq_idx));
1519                         if (rc) {
1520                                 dev_err(enic, "Failed to stop Rq %u\n", rq_idx);
1521                                 goto set_mtu_done;
1522                         }
1523                 }
1524         }
1525
1526         /* replace Rx function with a no-op to avoid getting stale pkts */
1527         eth_dev->rx_pkt_burst = enic_dummy_recv_pkts;
1528         rte_mb();
1529
1530         /* Allow time for threads to exit the real Rx function. */
1531         usleep(100000);
1532
1533         /* now it is safe to reconfigure the RQs */
1534
1535
1536         /* free and reallocate RQs with the new MTU */
1537         for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) {
1538                 rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1539                 if (!rq->in_use)
1540                         continue;
1541
1542                 enic_free_rq(rq);
1543                 rc = enic_alloc_rq(enic, rq_idx, rq->socket_id, rq->mp,
1544                                    rq->tot_nb_desc, rq->rx_free_thresh);
1545                 if (rc) {
1546                         dev_err(enic,
1547                                 "Fatal MTU alloc error- No traffic will pass\n");
1548                         goto set_mtu_done;
1549                 }
1550
1551                 rc = enic_reinit_rq(enic, rq_idx);
1552                 if (rc) {
1553                         dev_err(enic,
1554                                 "Fatal MTU RQ reinit- No traffic will pass\n");
1555                         goto set_mtu_done;
1556                 }
1557         }
1558
1559         /* put back the real receive function */
1560         rte_mb();
1561         pick_rx_handler(enic);
1562         rte_mb();
1563
1564         /* restart Rx traffic */
1565         for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) {
1566                 rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)];
1567                 if (rq->is_sop && rq->in_use)
1568                         enic_start_rq(enic, rq_idx);
1569         }
1570
1571 set_mtu_done:
1572         dev_info(enic, "MTU changed from %u to %u\n",  old_mtu, new_mtu);
1573         rte_spinlock_unlock(&enic->mtu_lock);
1574         return rc;
1575 }
1576
1577 static int enic_dev_init(struct enic *enic)
1578 {
1579         int err;
1580         struct rte_eth_dev *eth_dev = enic->rte_dev;
1581
1582         vnic_dev_intr_coal_timer_info_default(enic->vdev);
1583
1584         /* Get vNIC configuration
1585         */
1586         err = enic_get_vnic_config(enic);
1587         if (err) {
1588                 dev_err(dev, "Get vNIC configuration failed, aborting\n");
1589                 return err;
1590         }
1591
1592         /* Get available resource counts */
1593         enic_get_res_counts(enic);
1594         if (enic->conf_rq_count == 1) {
1595                 dev_err(enic, "Running with only 1 RQ configured in the vNIC is not supported.\n");
1596                 dev_err(enic, "Please configure 2 RQs in the vNIC for each Rx queue used by DPDK.\n");
1597                 dev_err(enic, "See the ENIC PMD guide for more information.\n");
1598                 return -EINVAL;
1599         }
1600         /* Queue counts may be zeros. rte_zmalloc returns NULL in that case. */
1601         enic->cq = rte_zmalloc("enic_vnic_cq", sizeof(struct vnic_cq) *
1602                                enic->conf_cq_count, 8);
1603         enic->intr = rte_zmalloc("enic_vnic_intr", sizeof(struct vnic_intr) *
1604                                  enic->conf_intr_count, 8);
1605         enic->rq = rte_zmalloc("enic_vnic_rq", sizeof(struct vnic_rq) *
1606                                enic->conf_rq_count, 8);
1607         enic->wq = rte_zmalloc("enic_vnic_wq", sizeof(struct vnic_wq) *
1608                                enic->conf_wq_count, 8);
1609         if (enic->conf_cq_count > 0 && enic->cq == NULL) {
1610                 dev_err(enic, "failed to allocate vnic_cq, aborting.\n");
1611                 return -1;
1612         }
1613         if (enic->conf_intr_count > 0 && enic->intr == NULL) {
1614                 dev_err(enic, "failed to allocate vnic_intr, aborting.\n");
1615                 return -1;
1616         }
1617         if (enic->conf_rq_count > 0 && enic->rq == NULL) {
1618                 dev_err(enic, "failed to allocate vnic_rq, aborting.\n");
1619                 return -1;
1620         }
1621         if (enic->conf_wq_count > 0 && enic->wq == NULL) {
1622                 dev_err(enic, "failed to allocate vnic_wq, aborting.\n");
1623                 return -1;
1624         }
1625
1626         /* Get the supported filters */
1627         enic_fdir_info(enic);
1628
1629         eth_dev->data->mac_addrs = rte_zmalloc("enic_mac_addr", ETH_ALEN
1630                                                 * ENIC_MAX_MAC_ADDR, 0);
1631         if (!eth_dev->data->mac_addrs) {
1632                 dev_err(enic, "mac addr storage alloc failed, aborting.\n");
1633                 return -1;
1634         }
1635         ether_addr_copy((struct ether_addr *) enic->mac_addr,
1636                         eth_dev->data->mac_addrs);
1637
1638         vnic_dev_set_reset_flag(enic->vdev, 0);
1639
1640         LIST_INIT(&enic->flows);
1641         rte_spinlock_init(&enic->flows_lock);
1642
1643         /* set up link status checking */
1644         vnic_dev_notify_set(enic->vdev, -1); /* No Intr for notify */
1645
1646         enic->overlay_offload = false;
1647         if (!enic->disable_overlay && enic->vxlan &&
1648             /* 'VXLAN feature' enables VXLAN, NVGRE, and GENEVE. */
1649             vnic_dev_overlay_offload_ctrl(enic->vdev,
1650                                           OVERLAY_FEATURE_VXLAN,
1651                                           OVERLAY_OFFLOAD_ENABLE) == 0) {
1652                 enic->tx_offload_capa |=
1653                         DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM |
1654                         DEV_TX_OFFLOAD_GENEVE_TNL_TSO |
1655                         DEV_TX_OFFLOAD_VXLAN_TNL_TSO;
1656                 /*
1657                  * Do not add PKT_TX_OUTER_{IPV4,IPV6} as they are not
1658                  * 'offload' flags (i.e. not part of PKT_TX_OFFLOAD_MASK).
1659                  */
1660                 enic->tx_offload_mask |=
1661                         PKT_TX_OUTER_IP_CKSUM |
1662                         PKT_TX_TUNNEL_MASK;
1663                 enic->overlay_offload = true;
1664                 enic->vxlan_port = ENIC_DEFAULT_VXLAN_PORT;
1665                 dev_info(enic, "Overlay offload is enabled\n");
1666                 /*
1667                  * Reset the vxlan port to the default, as the NIC firmware
1668                  * does not reset it automatically and keeps the old setting.
1669                  */
1670                 if (vnic_dev_overlay_offload_cfg(enic->vdev,
1671                                                  OVERLAY_CFG_VXLAN_PORT_UPDATE,
1672                                                  ENIC_DEFAULT_VXLAN_PORT)) {
1673                         dev_err(enic, "failed to update vxlan port\n");
1674                         return -EINVAL;
1675                 }
1676         }
1677
1678         return 0;
1679
1680 }
1681
1682 int enic_probe(struct enic *enic)
1683 {
1684         struct rte_pci_device *pdev = enic->pdev;
1685         int err = -1;
1686
1687         dev_debug(enic, " Initializing ENIC PMD\n");
1688
1689         /* if this is a secondary process the hardware is already initialized */
1690         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1691                 return 0;
1692
1693         enic->bar0.vaddr = (void *)pdev->mem_resource[0].addr;
1694         enic->bar0.len = pdev->mem_resource[0].len;
1695
1696         /* Register vNIC device */
1697         enic->vdev = vnic_dev_register(NULL, enic, enic->pdev, &enic->bar0, 1);
1698         if (!enic->vdev) {
1699                 dev_err(enic, "vNIC registration failed, aborting\n");
1700                 goto err_out;
1701         }
1702
1703         LIST_INIT(&enic->memzone_list);
1704         rte_spinlock_init(&enic->memzone_list_lock);
1705
1706         vnic_register_cbacks(enic->vdev,
1707                 enic_alloc_consistent,
1708                 enic_free_consistent);
1709
1710         /*
1711          * Allocate the consistent memory for stats upfront so both primary and
1712          * secondary processes can dump stats.
1713          */
1714         err = vnic_dev_alloc_stats_mem(enic->vdev);
1715         if (err) {
1716                 dev_err(enic, "Failed to allocate cmd memory, aborting\n");
1717                 goto err_out_unregister;
1718         }
1719         /* Issue device open to get device in known state */
1720         err = enic_dev_open(enic);
1721         if (err) {
1722                 dev_err(enic, "vNIC dev open failed, aborting\n");
1723                 goto err_out_unregister;
1724         }
1725
1726         /* Set ingress vlan rewrite mode before vnic initialization */
1727         dev_debug(enic, "Set ig_vlan_rewrite_mode=%u\n",
1728                   enic->ig_vlan_rewrite_mode);
1729         err = vnic_dev_set_ig_vlan_rewrite_mode(enic->vdev,
1730                 enic->ig_vlan_rewrite_mode);
1731         if (err) {
1732                 dev_err(enic,
1733                         "Failed to set ingress vlan rewrite mode, aborting.\n");
1734                 goto err_out_dev_close;
1735         }
1736
1737         /* Issue device init to initialize the vnic-to-switch link.
1738          * We'll start with carrier off and wait for link UP
1739          * notification later to turn on carrier.  We don't need
1740          * to wait here for the vnic-to-switch link initialization
1741          * to complete; link UP notification is the indication that
1742          * the process is complete.
1743          */
1744
1745         err = vnic_dev_init(enic->vdev, 0);
1746         if (err) {
1747                 dev_err(enic, "vNIC dev init failed, aborting\n");
1748                 goto err_out_dev_close;
1749         }
1750
1751         err = enic_dev_init(enic);
1752         if (err) {
1753                 dev_err(enic, "Device initialization failed, aborting\n");
1754                 goto err_out_dev_close;
1755         }
1756
1757         return 0;
1758
1759 err_out_dev_close:
1760         vnic_dev_close(enic->vdev);
1761 err_out_unregister:
1762         vnic_dev_unregister(enic->vdev);
1763 err_out:
1764         return err;
1765 }
1766
1767 void enic_remove(struct enic *enic)
1768 {
1769         enic_dev_deinit(enic);
1770         vnic_dev_close(enic->vdev);
1771         vnic_dev_unregister(enic->vdev);
1772 }