New upstream version 17.05.1
[deb_dpdk.git] / drivers / net / qede / qede_rxtx.c
1 /*
2  * Copyright (c) 2016 QLogic Corporation.
3  * All rights reserved.
4  * www.qlogic.com
5  *
6  * See LICENSE.qede_pmd for copyright and licensing details.
7  */
8
9 #include <rte_net.h>
10 #include "qede_rxtx.h"
11
12 static inline int qede_alloc_rx_buffer(struct qede_rx_queue *rxq)
13 {
14         struct rte_mbuf *new_mb = NULL;
15         struct eth_rx_bd *rx_bd;
16         dma_addr_t mapping;
17         uint16_t idx = rxq->sw_rx_prod & NUM_RX_BDS(rxq);
18
19         new_mb = rte_mbuf_raw_alloc(rxq->mb_pool);
20         if (unlikely(!new_mb)) {
21                 PMD_RX_LOG(ERR, rxq,
22                            "Failed to allocate rx buffer "
23                            "sw_rx_prod %u sw_rx_cons %u mp entries %u free %u",
24                            idx, rxq->sw_rx_cons & NUM_RX_BDS(rxq),
25                            rte_mempool_avail_count(rxq->mb_pool),
26                            rte_mempool_in_use_count(rxq->mb_pool));
27                 return -ENOMEM;
28         }
29         rxq->sw_rx_ring[idx].mbuf = new_mb;
30         rxq->sw_rx_ring[idx].page_offset = 0;
31         mapping = rte_mbuf_data_dma_addr_default(new_mb);
32         /* Advance PROD and get BD pointer */
33         rx_bd = (struct eth_rx_bd *)ecore_chain_produce(&rxq->rx_bd_ring);
34         rx_bd->addr.hi = rte_cpu_to_le_32(U64_HI(mapping));
35         rx_bd->addr.lo = rte_cpu_to_le_32(U64_LO(mapping));
36         rxq->sw_rx_prod++;
37         return 0;
38 }
39
40 static void qede_rx_queue_release_mbufs(struct qede_rx_queue *rxq)
41 {
42         uint16_t i;
43
44         if (rxq->sw_rx_ring != NULL) {
45                 for (i = 0; i < rxq->nb_rx_desc; i++) {
46                         if (rxq->sw_rx_ring[i].mbuf != NULL) {
47                                 rte_pktmbuf_free(rxq->sw_rx_ring[i].mbuf);
48                                 rxq->sw_rx_ring[i].mbuf = NULL;
49                         }
50                 }
51         }
52 }
53
54 void qede_rx_queue_release(void *rx_queue)
55 {
56         struct qede_rx_queue *rxq = rx_queue;
57
58         if (rxq != NULL) {
59                 qede_rx_queue_release_mbufs(rxq);
60                 rte_free(rxq->sw_rx_ring);
61                 rxq->sw_rx_ring = NULL;
62                 rte_free(rxq);
63                 rxq = NULL;
64         }
65 }
66
67 static void qede_tx_queue_release_mbufs(struct qede_tx_queue *txq)
68 {
69         unsigned int i;
70
71         PMD_TX_LOG(DEBUG, txq, "releasing %u mbufs", txq->nb_tx_desc);
72
73         if (txq->sw_tx_ring) {
74                 for (i = 0; i < txq->nb_tx_desc; i++) {
75                         if (txq->sw_tx_ring[i].mbuf) {
76                                 rte_pktmbuf_free(txq->sw_tx_ring[i].mbuf);
77                                 txq->sw_tx_ring[i].mbuf = NULL;
78                         }
79                 }
80         }
81 }
82
83 int
84 qede_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
85                     uint16_t nb_desc, unsigned int socket_id,
86                     __rte_unused const struct rte_eth_rxconf *rx_conf,
87                     struct rte_mempool *mp)
88 {
89         struct qede_dev *qdev = dev->data->dev_private;
90         struct ecore_dev *edev = &qdev->edev;
91         struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
92         struct qede_rx_queue *rxq;
93         uint16_t max_rx_pkt_len;
94         uint16_t bufsz;
95         size_t size;
96         int rc;
97         int i;
98
99         PMD_INIT_FUNC_TRACE(edev);
100
101         /* Note: Ring size/align is controlled by struct rte_eth_desc_lim */
102         if (!rte_is_power_of_2(nb_desc)) {
103                 DP_ERR(edev, "Ring size %u is not power of 2\n",
104                           nb_desc);
105                 return -EINVAL;
106         }
107
108         /* Free memory prior to re-allocation if needed... */
109         if (dev->data->rx_queues[queue_idx] != NULL) {
110                 qede_rx_queue_release(dev->data->rx_queues[queue_idx]);
111                 dev->data->rx_queues[queue_idx] = NULL;
112         }
113
114         /* First allocate the rx queue data structure */
115         rxq = rte_zmalloc_socket("qede_rx_queue", sizeof(struct qede_rx_queue),
116                                  RTE_CACHE_LINE_SIZE, socket_id);
117
118         if (!rxq) {
119                 DP_ERR(edev, "Unable to allocate memory for rxq on socket %u",
120                           socket_id);
121                 return -ENOMEM;
122         }
123
124         rxq->qdev = qdev;
125         rxq->mb_pool = mp;
126         rxq->nb_rx_desc = nb_desc;
127         rxq->queue_id = queue_idx;
128         rxq->port_id = dev->data->port_id;
129         max_rx_pkt_len = (uint16_t)rxmode->max_rx_pkt_len;
130         qdev->mtu = max_rx_pkt_len;
131
132         /* Fix up RX buffer size */
133         bufsz = (uint16_t)rte_pktmbuf_data_room_size(mp) - RTE_PKTMBUF_HEADROOM;
134         if ((rxmode->enable_scatter)                    ||
135             (max_rx_pkt_len + QEDE_ETH_OVERHEAD) > bufsz) {
136                 if (!dev->data->scattered_rx) {
137                         DP_INFO(edev, "Forcing scatter-gather mode\n");
138                         dev->data->scattered_rx = 1;
139                 }
140         }
141         if (dev->data->scattered_rx)
142                 rxq->rx_buf_size = bufsz + QEDE_ETH_OVERHEAD;
143         else
144                 rxq->rx_buf_size = qdev->mtu + QEDE_ETH_OVERHEAD;
145         /* Align to cache-line size if needed */
146         rxq->rx_buf_size = QEDE_CEIL_TO_CACHE_LINE_SIZE(rxq->rx_buf_size);
147
148         DP_INFO(edev, "mtu %u mbufsz %u bd_max_bytes %u scatter_mode %d\n",
149                 qdev->mtu, bufsz, rxq->rx_buf_size, dev->data->scattered_rx);
150
151         /* Allocate the parallel driver ring for Rx buffers */
152         size = sizeof(*rxq->sw_rx_ring) * rxq->nb_rx_desc;
153         rxq->sw_rx_ring = rte_zmalloc_socket("sw_rx_ring", size,
154                                              RTE_CACHE_LINE_SIZE, socket_id);
155         if (!rxq->sw_rx_ring) {
156                 DP_NOTICE(edev, false,
157                           "Unable to alloc memory for sw_rx_ring on socket %u\n",
158                           socket_id);
159                 rte_free(rxq);
160                 rxq = NULL;
161                 return -ENOMEM;
162         }
163
164         /* Allocate FW Rx ring  */
165         rc = qdev->ops->common->chain_alloc(edev,
166                                             ECORE_CHAIN_USE_TO_CONSUME_PRODUCE,
167                                             ECORE_CHAIN_MODE_NEXT_PTR,
168                                             ECORE_CHAIN_CNT_TYPE_U16,
169                                             rxq->nb_rx_desc,
170                                             sizeof(struct eth_rx_bd),
171                                             &rxq->rx_bd_ring,
172                                             NULL);
173
174         if (rc != ECORE_SUCCESS) {
175                 DP_NOTICE(edev, false,
176                           "Unable to alloc memory for rxbd ring on socket %u\n",
177                           socket_id);
178                 rte_free(rxq->sw_rx_ring);
179                 rxq->sw_rx_ring = NULL;
180                 rte_free(rxq);
181                 rxq = NULL;
182                 return -ENOMEM;
183         }
184
185         /* Allocate FW completion ring */
186         rc = qdev->ops->common->chain_alloc(edev,
187                                             ECORE_CHAIN_USE_TO_CONSUME,
188                                             ECORE_CHAIN_MODE_PBL,
189                                             ECORE_CHAIN_CNT_TYPE_U16,
190                                             rxq->nb_rx_desc,
191                                             sizeof(union eth_rx_cqe),
192                                             &rxq->rx_comp_ring,
193                                             NULL);
194
195         if (rc != ECORE_SUCCESS) {
196                 DP_NOTICE(edev, false,
197                           "Unable to alloc memory for cqe ring on socket %u\n",
198                           socket_id);
199                 /* TBD: Freeing RX BD ring */
200                 rte_free(rxq->sw_rx_ring);
201                 rxq->sw_rx_ring = NULL;
202                 rte_free(rxq);
203                 return -ENOMEM;
204         }
205
206         /* Allocate buffers for the Rx ring */
207         for (i = 0; i < rxq->nb_rx_desc; i++) {
208                 rc = qede_alloc_rx_buffer(rxq);
209                 if (rc) {
210                         DP_NOTICE(edev, false,
211                                   "RX buffer allocation failed at idx=%d\n", i);
212                         goto err4;
213                 }
214         }
215
216         dev->data->rx_queues[queue_idx] = rxq;
217
218         DP_INFO(edev, "rxq %d num_desc %u rx_buf_size=%u socket %u\n",
219                   queue_idx, nb_desc, qdev->mtu, socket_id);
220
221         return 0;
222 err4:
223         qede_rx_queue_release(rxq);
224         return -ENOMEM;
225 }
226
227 void qede_tx_queue_release(void *tx_queue)
228 {
229         struct qede_tx_queue *txq = tx_queue;
230
231         if (txq != NULL) {
232                 qede_tx_queue_release_mbufs(txq);
233                 if (txq->sw_tx_ring) {
234                         rte_free(txq->sw_tx_ring);
235                         txq->sw_tx_ring = NULL;
236                 }
237                 rte_free(txq);
238         }
239         txq = NULL;
240 }
241
242 int
243 qede_tx_queue_setup(struct rte_eth_dev *dev,
244                     uint16_t queue_idx,
245                     uint16_t nb_desc,
246                     unsigned int socket_id,
247                     const struct rte_eth_txconf *tx_conf)
248 {
249         struct qede_dev *qdev = dev->data->dev_private;
250         struct ecore_dev *edev = &qdev->edev;
251         struct qede_tx_queue *txq;
252         int rc;
253
254         PMD_INIT_FUNC_TRACE(edev);
255
256         if (!rte_is_power_of_2(nb_desc)) {
257                 DP_ERR(edev, "Ring size %u is not power of 2\n",
258                        nb_desc);
259                 return -EINVAL;
260         }
261
262         /* Free memory prior to re-allocation if needed... */
263         if (dev->data->tx_queues[queue_idx] != NULL) {
264                 qede_tx_queue_release(dev->data->tx_queues[queue_idx]);
265                 dev->data->tx_queues[queue_idx] = NULL;
266         }
267
268         txq = rte_zmalloc_socket("qede_tx_queue", sizeof(struct qede_tx_queue),
269                                  RTE_CACHE_LINE_SIZE, socket_id);
270
271         if (txq == NULL) {
272                 DP_ERR(edev,
273                        "Unable to allocate memory for txq on socket %u",
274                        socket_id);
275                 return -ENOMEM;
276         }
277
278         txq->nb_tx_desc = nb_desc;
279         txq->qdev = qdev;
280         txq->port_id = dev->data->port_id;
281
282         rc = qdev->ops->common->chain_alloc(edev,
283                                             ECORE_CHAIN_USE_TO_CONSUME_PRODUCE,
284                                             ECORE_CHAIN_MODE_PBL,
285                                             ECORE_CHAIN_CNT_TYPE_U16,
286                                             txq->nb_tx_desc,
287                                             sizeof(union eth_tx_bd_types),
288                                             &txq->tx_pbl,
289                                             NULL);
290         if (rc != ECORE_SUCCESS) {
291                 DP_ERR(edev,
292                        "Unable to allocate memory for txbd ring on socket %u",
293                        socket_id);
294                 qede_tx_queue_release(txq);
295                 return -ENOMEM;
296         }
297
298         /* Allocate software ring */
299         txq->sw_tx_ring = rte_zmalloc_socket("txq->sw_tx_ring",
300                                              (sizeof(struct qede_tx_entry) *
301                                               txq->nb_tx_desc),
302                                              RTE_CACHE_LINE_SIZE, socket_id);
303
304         if (!txq->sw_tx_ring) {
305                 DP_ERR(edev,
306                        "Unable to allocate memory for txbd ring on socket %u",
307                        socket_id);
308                 qede_tx_queue_release(txq);
309                 return -ENOMEM;
310         }
311
312         txq->queue_id = queue_idx;
313
314         txq->nb_tx_avail = txq->nb_tx_desc;
315
316         txq->tx_free_thresh =
317             tx_conf->tx_free_thresh ? tx_conf->tx_free_thresh :
318             (txq->nb_tx_desc - QEDE_DEFAULT_TX_FREE_THRESH);
319
320         dev->data->tx_queues[queue_idx] = txq;
321
322         DP_INFO(edev,
323                   "txq %u num_desc %u tx_free_thresh %u socket %u\n",
324                   queue_idx, nb_desc, txq->tx_free_thresh, socket_id);
325
326         return 0;
327 }
328
329 /* This function inits fp content and resets the SB, RXQ and TXQ arrays */
330 static void qede_init_fp(struct qede_dev *qdev)
331 {
332         struct qede_fastpath *fp;
333         uint8_t i;
334         int fp_rx = qdev->fp_num_rx;
335
336         memset((void *)qdev->fp_array, 0, (QEDE_QUEUE_CNT(qdev) *
337                                            sizeof(*qdev->fp_array)));
338         memset((void *)qdev->sb_array, 0, (QEDE_QUEUE_CNT(qdev) *
339                                            sizeof(*qdev->sb_array)));
340         for_each_queue(i) {
341                 fp = &qdev->fp_array[i];
342                 if (fp_rx) {
343                         fp->type = QEDE_FASTPATH_RX;
344                         fp_rx--;
345                 } else{
346                         fp->type = QEDE_FASTPATH_TX;
347                 }
348                 fp->qdev = qdev;
349                 fp->id = i;
350                 fp->sb_info = &qdev->sb_array[i];
351                 snprintf(fp->name, sizeof(fp->name), "%s-fp-%d", "qdev", i);
352         }
353
354 }
355
356 void qede_free_fp_arrays(struct qede_dev *qdev)
357 {
358         /* It asseumes qede_free_mem_load() is called before */
359         if (qdev->fp_array != NULL) {
360                 rte_free(qdev->fp_array);
361                 qdev->fp_array = NULL;
362         }
363
364         if (qdev->sb_array != NULL) {
365                 rte_free(qdev->sb_array);
366                 qdev->sb_array = NULL;
367         }
368 }
369
370 static int qede_alloc_fp_array(struct qede_dev *qdev)
371 {
372         struct ecore_dev *edev = &qdev->edev;
373
374         qdev->fp_array = rte_calloc("fp", QEDE_QUEUE_CNT(qdev),
375                                     sizeof(*qdev->fp_array),
376                                     RTE_CACHE_LINE_SIZE);
377
378         if (!qdev->fp_array) {
379                 DP_ERR(edev, "fp array allocation failed\n");
380                 return -ENOMEM;
381         }
382
383         qdev->sb_array = rte_calloc("sb", QEDE_QUEUE_CNT(qdev),
384                                     sizeof(*qdev->sb_array),
385                                     RTE_CACHE_LINE_SIZE);
386
387         if (!qdev->sb_array) {
388                 DP_ERR(edev, "sb array allocation failed\n");
389                 rte_free(qdev->fp_array);
390                 return -ENOMEM;
391         }
392
393         return 0;
394 }
395
396 /* This function allocates fast-path status block memory */
397 static int
398 qede_alloc_mem_sb(struct qede_dev *qdev, struct ecore_sb_info *sb_info,
399                   uint16_t sb_id)
400 {
401         struct ecore_dev *edev = &qdev->edev;
402         struct status_block *sb_virt;
403         dma_addr_t sb_phys;
404         int rc;
405
406         sb_virt = OSAL_DMA_ALLOC_COHERENT(edev, &sb_phys, sizeof(*sb_virt));
407
408         if (!sb_virt) {
409                 DP_ERR(edev, "Status block allocation failed\n");
410                 return -ENOMEM;
411         }
412
413         rc = qdev->ops->common->sb_init(edev, sb_info,
414                                         sb_virt, sb_phys, sb_id,
415                                         QED_SB_TYPE_L2_QUEUE);
416         if (rc) {
417                 DP_ERR(edev, "Status block initialization failed\n");
418                 /* TBD: No dma_free_coherent possible */
419                 return rc;
420         }
421
422         return 0;
423 }
424
425 int qede_alloc_fp_resc(struct qede_dev *qdev)
426 {
427         struct ecore_dev *edev = &qdev->edev;
428         struct qede_fastpath *fp;
429         uint32_t num_sbs;
430         uint16_t i;
431         uint16_t sb_idx;
432         int rc;
433
434         if (IS_VF(edev))
435                 ecore_vf_get_num_sbs(ECORE_LEADING_HWFN(edev), &num_sbs);
436         else
437                 num_sbs = ecore_cxt_get_proto_cid_count
438                           (ECORE_LEADING_HWFN(edev), PROTOCOLID_ETH, NULL);
439
440         if (num_sbs == 0) {
441                 DP_ERR(edev, "No status blocks available\n");
442                 return -EINVAL;
443         }
444
445         if (qdev->fp_array)
446                 qede_free_fp_arrays(qdev);
447
448         rc = qede_alloc_fp_array(qdev);
449         if (rc != 0)
450                 return rc;
451
452         qede_init_fp(qdev);
453
454         for (i = 0; i < QEDE_QUEUE_CNT(qdev); i++) {
455                 fp = &qdev->fp_array[i];
456                 if (IS_VF(edev))
457                         sb_idx = i % num_sbs;
458                 else
459                         sb_idx = i;
460                 if (qede_alloc_mem_sb(qdev, fp->sb_info, sb_idx)) {
461                         qede_free_fp_arrays(qdev);
462                         return -ENOMEM;
463                 }
464         }
465
466         return 0;
467 }
468
469 void qede_dealloc_fp_resc(struct rte_eth_dev *eth_dev)
470 {
471         struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
472
473         qede_free_mem_load(eth_dev);
474         qede_free_fp_arrays(qdev);
475 }
476
477 static inline void
478 qede_update_rx_prod(__rte_unused struct qede_dev *edev,
479                     struct qede_rx_queue *rxq)
480 {
481         uint16_t bd_prod = ecore_chain_get_prod_idx(&rxq->rx_bd_ring);
482         uint16_t cqe_prod = ecore_chain_get_prod_idx(&rxq->rx_comp_ring);
483         struct eth_rx_prod_data rx_prods = { 0 };
484
485         /* Update producers */
486         rx_prods.bd_prod = rte_cpu_to_le_16(bd_prod);
487         rx_prods.cqe_prod = rte_cpu_to_le_16(cqe_prod);
488
489         /* Make sure that the BD and SGE data is updated before updating the
490          * producers since FW might read the BD/SGE right after the producer
491          * is updated.
492          */
493         rte_wmb();
494
495         internal_ram_wr(rxq->hw_rxq_prod_addr, sizeof(rx_prods),
496                         (uint32_t *)&rx_prods);
497
498         /* mmiowb is needed to synchronize doorbell writes from more than one
499          * processor. It guarantees that the write arrives to the device before
500          * the napi lock is released and another qede_poll is called (possibly
501          * on another CPU). Without this barrier, the next doorbell can bypass
502          * this doorbell. This is applicable to IA64/Altix systems.
503          */
504         rte_wmb();
505
506         PMD_RX_LOG(DEBUG, rxq, "bd_prod %u  cqe_prod %u", bd_prod, cqe_prod);
507 }
508
509 static void
510 qede_update_sge_tpa_params(struct ecore_sge_tpa_params *sge_tpa_params,
511                            uint16_t mtu, bool enable)
512 {
513         /* Enable LRO in split mode */
514         sge_tpa_params->tpa_ipv4_en_flg = enable;
515         sge_tpa_params->tpa_ipv6_en_flg = enable;
516         sge_tpa_params->tpa_ipv4_tunn_en_flg = false;
517         sge_tpa_params->tpa_ipv6_tunn_en_flg = false;
518         /* set if tpa enable changes */
519         sge_tpa_params->update_tpa_en_flg = 1;
520         /* set if tpa parameters should be handled */
521         sge_tpa_params->update_tpa_param_flg = enable;
522
523         sge_tpa_params->max_buffers_per_cqe = 20;
524         /* Enable TPA in split mode. In this mode each TPA segment
525          * starts on the new BD, so there is one BD per segment.
526          */
527         sge_tpa_params->tpa_pkt_split_flg = 1;
528         sge_tpa_params->tpa_hdr_data_split_flg = 0;
529         sge_tpa_params->tpa_gro_consistent_flg = 0;
530         sge_tpa_params->tpa_max_aggs_num = ETH_TPA_MAX_AGGS_NUM;
531         sge_tpa_params->tpa_max_size = 0x7FFF;
532         sge_tpa_params->tpa_min_size_to_start = mtu / 2;
533         sge_tpa_params->tpa_min_size_to_cont = mtu / 2;
534 }
535
536 static int qede_start_queues(struct rte_eth_dev *eth_dev,
537                              __rte_unused bool clear_stats)
538 {
539         struct qede_dev *qdev = eth_dev->data->dev_private;
540         struct ecore_dev *edev = &qdev->edev;
541         struct ecore_queue_start_common_params q_params;
542         struct qed_dev_info *qed_info = &qdev->dev_info.common;
543         struct qed_update_vport_params vport_update_params;
544         struct ecore_sge_tpa_params tpa_params;
545         struct qede_tx_queue *txq;
546         struct qede_fastpath *fp;
547         dma_addr_t p_phys_table;
548         int txq_index;
549         uint16_t page_cnt;
550         int rc, tc, i;
551
552         for_each_queue(i) {
553                 fp = &qdev->fp_array[i];
554                 if (fp->type & QEDE_FASTPATH_RX) {
555                         struct ecore_rxq_start_ret_params ret_params;
556
557                         p_phys_table =
558                             ecore_chain_get_pbl_phys(&fp->rxq->rx_comp_ring);
559                         page_cnt =
560                             ecore_chain_get_page_cnt(&fp->rxq->rx_comp_ring);
561
562                         memset(&ret_params, 0, sizeof(ret_params));
563                         memset(&q_params, 0, sizeof(q_params));
564                         q_params.queue_id = i;
565                         q_params.vport_id = 0;
566                         q_params.sb = fp->sb_info->igu_sb_id;
567                         q_params.sb_idx = RX_PI;
568
569                         ecore_sb_ack(fp->sb_info, IGU_INT_DISABLE, 0);
570
571                         rc = qdev->ops->q_rx_start(edev, i, &q_params,
572                                            fp->rxq->rx_buf_size,
573                                            fp->rxq->rx_bd_ring.p_phys_addr,
574                                            p_phys_table,
575                                            page_cnt,
576                                            &ret_params);
577                         if (rc) {
578                                 DP_ERR(edev, "Start rxq #%d failed %d\n",
579                                        fp->rxq->queue_id, rc);
580                                 return rc;
581                         }
582
583                         /* Use the return parameters */
584                         fp->rxq->hw_rxq_prod_addr = ret_params.p_prod;
585                         fp->rxq->handle = ret_params.p_handle;
586
587                         fp->rxq->hw_cons_ptr =
588                                         &fp->sb_info->sb_virt->pi_array[RX_PI];
589
590                         qede_update_rx_prod(qdev, fp->rxq);
591                 }
592
593                 if (!(fp->type & QEDE_FASTPATH_TX))
594                         continue;
595                 for (tc = 0; tc < qdev->num_tc; tc++) {
596                         struct ecore_txq_start_ret_params ret_params;
597
598                         txq = fp->txqs[tc];
599                         txq_index = tc * QEDE_RSS_COUNT(qdev) + i;
600
601                         p_phys_table = ecore_chain_get_pbl_phys(&txq->tx_pbl);
602                         page_cnt = ecore_chain_get_page_cnt(&txq->tx_pbl);
603
604                         memset(&q_params, 0, sizeof(q_params));
605                         memset(&ret_params, 0, sizeof(ret_params));
606                         q_params.queue_id = txq->queue_id;
607                         q_params.vport_id = 0;
608                         q_params.sb = fp->sb_info->igu_sb_id;
609                         q_params.sb_idx = TX_PI(tc);
610
611                         rc = qdev->ops->q_tx_start(edev, i, &q_params,
612                                                    p_phys_table,
613                                                    page_cnt, /* **pp_doorbell */
614                                                    &ret_params);
615                         if (rc) {
616                                 DP_ERR(edev, "Start txq %u failed %d\n",
617                                        txq_index, rc);
618                                 return rc;
619                         }
620
621                         txq->doorbell_addr = ret_params.p_doorbell;
622                         txq->handle = ret_params.p_handle;
623
624                         txq->hw_cons_ptr =
625                             &fp->sb_info->sb_virt->pi_array[TX_PI(tc)];
626                         SET_FIELD(txq->tx_db.data.params,
627                                   ETH_DB_DATA_DEST, DB_DEST_XCM);
628                         SET_FIELD(txq->tx_db.data.params, ETH_DB_DATA_AGG_CMD,
629                                   DB_AGG_CMD_SET);
630                         SET_FIELD(txq->tx_db.data.params,
631                                   ETH_DB_DATA_AGG_VAL_SEL,
632                                   DQ_XCM_ETH_TX_BD_PROD_CMD);
633
634                         txq->tx_db.data.agg_flags = DQ_XCM_ETH_DQ_CF_CMD;
635                 }
636         }
637
638         /* Prepare and send the vport enable */
639         memset(&vport_update_params, 0, sizeof(vport_update_params));
640         /* Update MTU via vport update */
641         vport_update_params.mtu = qdev->mtu;
642         vport_update_params.vport_id = 0;
643         vport_update_params.update_vport_active_flg = 1;
644         vport_update_params.vport_active_flg = 1;
645
646         /* @DPDK */
647         if (qed_info->mf_mode == MF_NPAR && qed_info->tx_switching) {
648                 /* TBD: Check SRIOV enabled for VF */
649                 vport_update_params.update_tx_switching_flg = 1;
650                 vport_update_params.tx_switching_flg = 1;
651         }
652
653         /* TPA */
654         if (qdev->enable_lro) {
655                 DP_INFO(edev, "Enabling LRO\n");
656                 memset(&tpa_params, 0, sizeof(struct ecore_sge_tpa_params));
657                 qede_update_sge_tpa_params(&tpa_params, qdev->mtu, true);
658                 vport_update_params.sge_tpa_params = &tpa_params;
659         }
660
661         rc = qdev->ops->vport_update(edev, &vport_update_params);
662         if (rc) {
663                 DP_ERR(edev, "Update V-PORT failed %d\n", rc);
664                 return rc;
665         }
666
667         return 0;
668 }
669
670 static bool qede_tunn_exist(uint16_t flag)
671 {
672         return !!((PARSING_AND_ERR_FLAGS_TUNNELEXIST_MASK <<
673                     PARSING_AND_ERR_FLAGS_TUNNELEXIST_SHIFT) & flag);
674 }
675
676 /*
677  * qede_check_tunn_csum_l4:
678  * Returns:
679  * 1 : If L4 csum is enabled AND if the validation has failed.
680  * 0 : Otherwise
681  */
682 static inline uint8_t qede_check_tunn_csum_l4(uint16_t flag)
683 {
684         if ((PARSING_AND_ERR_FLAGS_TUNNELL4CHKSMWASCALCULATED_MASK <<
685              PARSING_AND_ERR_FLAGS_TUNNELL4CHKSMWASCALCULATED_SHIFT) & flag)
686                 return !!((PARSING_AND_ERR_FLAGS_TUNNELL4CHKSMERROR_MASK <<
687                         PARSING_AND_ERR_FLAGS_TUNNELL4CHKSMERROR_SHIFT) & flag);
688
689         return 0;
690 }
691
692 static inline uint8_t qede_check_notunn_csum_l4(uint16_t flag)
693 {
694         if ((PARSING_AND_ERR_FLAGS_L4CHKSMWASCALCULATED_MASK <<
695              PARSING_AND_ERR_FLAGS_L4CHKSMWASCALCULATED_SHIFT) & flag)
696                 return !!((PARSING_AND_ERR_FLAGS_L4CHKSMERROR_MASK <<
697                            PARSING_AND_ERR_FLAGS_L4CHKSMERROR_SHIFT) & flag);
698
699         return 0;
700 }
701
702 static inline uint32_t qede_rx_cqe_to_pkt_type(uint16_t flags)
703 {
704         uint16_t val;
705
706         /* Lookup table */
707         static const uint32_t
708         ptype_lkup_tbl[QEDE_PKT_TYPE_MAX] __rte_cache_aligned = {
709                 [QEDE_PKT_TYPE_IPV4] = RTE_PTYPE_L3_IPV4,
710                 [QEDE_PKT_TYPE_IPV6] = RTE_PTYPE_L3_IPV6,
711                 [QEDE_PKT_TYPE_IPV4_TCP] = RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
712                 [QEDE_PKT_TYPE_IPV6_TCP] = RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
713                 [QEDE_PKT_TYPE_IPV4_UDP] = RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
714                 [QEDE_PKT_TYPE_IPV6_UDP] = RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
715         };
716
717         /* Bits (0..3) provides L3/L4 protocol type */
718         val = ((PARSING_AND_ERR_FLAGS_L3TYPE_MASK <<
719                PARSING_AND_ERR_FLAGS_L3TYPE_SHIFT) |
720                (PARSING_AND_ERR_FLAGS_L4PROTOCOL_MASK <<
721                 PARSING_AND_ERR_FLAGS_L4PROTOCOL_SHIFT)) & flags;
722
723         if (val < QEDE_PKT_TYPE_MAX)
724                 return ptype_lkup_tbl[val] | RTE_PTYPE_L2_ETHER;
725         else
726                 return RTE_PTYPE_UNKNOWN;
727 }
728
729 static inline uint8_t
730 qede_check_notunn_csum_l3(struct rte_mbuf *m, uint16_t flag)
731 {
732         struct ipv4_hdr *ip;
733         uint16_t pkt_csum;
734         uint16_t calc_csum;
735         uint16_t val;
736
737         val = ((PARSING_AND_ERR_FLAGS_IPHDRERROR_MASK <<
738                 PARSING_AND_ERR_FLAGS_IPHDRERROR_SHIFT) & flag);
739
740         if (unlikely(val)) {
741                 m->packet_type = qede_rx_cqe_to_pkt_type(flag);
742                 if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) {
743                         ip = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *,
744                                            sizeof(struct ether_hdr));
745                         pkt_csum = ip->hdr_checksum;
746                         ip->hdr_checksum = 0;
747                         calc_csum = rte_ipv4_cksum(ip);
748                         ip->hdr_checksum = pkt_csum;
749                         return (calc_csum != pkt_csum);
750                 } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) {
751                         return 1;
752                 }
753         }
754         return 0;
755 }
756
757 static inline void qede_rx_bd_ring_consume(struct qede_rx_queue *rxq)
758 {
759         ecore_chain_consume(&rxq->rx_bd_ring);
760         rxq->sw_rx_cons++;
761 }
762
763 static inline void
764 qede_reuse_page(__rte_unused struct qede_dev *qdev,
765                 struct qede_rx_queue *rxq, struct qede_rx_entry *curr_cons)
766 {
767         struct eth_rx_bd *rx_bd_prod = ecore_chain_produce(&rxq->rx_bd_ring);
768         uint16_t idx = rxq->sw_rx_cons & NUM_RX_BDS(rxq);
769         struct qede_rx_entry *curr_prod;
770         dma_addr_t new_mapping;
771
772         curr_prod = &rxq->sw_rx_ring[idx];
773         *curr_prod = *curr_cons;
774
775         new_mapping = rte_mbuf_data_dma_addr_default(curr_prod->mbuf) +
776                       curr_prod->page_offset;
777
778         rx_bd_prod->addr.hi = rte_cpu_to_le_32(U64_HI(new_mapping));
779         rx_bd_prod->addr.lo = rte_cpu_to_le_32(U64_LO(new_mapping));
780
781         rxq->sw_rx_prod++;
782 }
783
784 static inline void
785 qede_recycle_rx_bd_ring(struct qede_rx_queue *rxq,
786                         struct qede_dev *qdev, uint8_t count)
787 {
788         struct qede_rx_entry *curr_cons;
789
790         for (; count > 0; count--) {
791                 curr_cons = &rxq->sw_rx_ring[rxq->sw_rx_cons & NUM_RX_BDS(rxq)];
792                 qede_reuse_page(qdev, rxq, curr_cons);
793                 qede_rx_bd_ring_consume(rxq);
794         }
795 }
796
797 static inline void
798 qede_rx_process_tpa_cmn_cont_end_cqe(__rte_unused struct qede_dev *qdev,
799                                      struct qede_rx_queue *rxq,
800                                      uint8_t agg_index, uint16_t len)
801 {
802         struct qede_agg_info *tpa_info;
803         struct rte_mbuf *curr_frag; /* Pointer to currently filled TPA seg */
804         uint16_t cons_idx;
805
806         /* Under certain conditions it is possible that FW may not consume
807          * additional or new BD. So decision to consume the BD must be made
808          * based on len_list[0].
809          */
810         if (rte_le_to_cpu_16(len)) {
811                 tpa_info = &rxq->tpa_info[agg_index];
812                 cons_idx = rxq->sw_rx_cons & NUM_RX_BDS(rxq);
813                 curr_frag = rxq->sw_rx_ring[cons_idx].mbuf;
814                 assert(curr_frag);
815                 curr_frag->nb_segs = 1;
816                 curr_frag->pkt_len = rte_le_to_cpu_16(len);
817                 curr_frag->data_len = curr_frag->pkt_len;
818                 tpa_info->tpa_tail->next = curr_frag;
819                 tpa_info->tpa_tail = curr_frag;
820                 qede_rx_bd_ring_consume(rxq);
821                 if (unlikely(qede_alloc_rx_buffer(rxq) != 0)) {
822                         PMD_RX_LOG(ERR, rxq, "mbuf allocation fails\n");
823                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
824                         rxq->rx_alloc_errors++;
825                 }
826         }
827 }
828
829 static inline void
830 qede_rx_process_tpa_cont_cqe(struct qede_dev *qdev,
831                              struct qede_rx_queue *rxq,
832                              struct eth_fast_path_rx_tpa_cont_cqe *cqe)
833 {
834         PMD_RX_LOG(INFO, rxq, "TPA cont[%d] - len [%d]\n",
835                    cqe->tpa_agg_index, rte_le_to_cpu_16(cqe->len_list[0]));
836         /* only len_list[0] will have value */
837         qede_rx_process_tpa_cmn_cont_end_cqe(qdev, rxq, cqe->tpa_agg_index,
838                                              cqe->len_list[0]);
839 }
840
841 static inline void
842 qede_rx_process_tpa_end_cqe(struct qede_dev *qdev,
843                             struct qede_rx_queue *rxq,
844                             struct eth_fast_path_rx_tpa_end_cqe *cqe)
845 {
846         struct rte_mbuf *rx_mb; /* Pointer to head of the chained agg */
847
848         qede_rx_process_tpa_cmn_cont_end_cqe(qdev, rxq, cqe->tpa_agg_index,
849                                              cqe->len_list[0]);
850         /* Update total length and frags based on end TPA */
851         rx_mb = rxq->tpa_info[cqe->tpa_agg_index].tpa_head;
852         /* TODO:  Add Sanity Checks */
853         rx_mb->nb_segs = cqe->num_of_bds;
854         rx_mb->pkt_len = cqe->total_packet_len;
855
856         PMD_RX_LOG(INFO, rxq, "TPA End[%d] reason %d cqe_len %d nb_segs %d"
857                    " pkt_len %d\n", cqe->tpa_agg_index, cqe->end_reason,
858                    rte_le_to_cpu_16(cqe->len_list[0]), rx_mb->nb_segs,
859                    rx_mb->pkt_len);
860 }
861
862 static inline uint32_t qede_rx_cqe_to_tunn_pkt_type(uint16_t flags)
863 {
864         uint32_t val;
865
866         /* Lookup table */
867         static const uint32_t
868         ptype_tunn_lkup_tbl[QEDE_PKT_TYPE_TUNN_MAX_TYPE] __rte_cache_aligned = {
869                 [QEDE_PKT_TYPE_UNKNOWN] = RTE_PTYPE_UNKNOWN,
870                 [QEDE_PKT_TYPE_TUNN_GENEVE] = RTE_PTYPE_TUNNEL_GENEVE,
871                 [QEDE_PKT_TYPE_TUNN_GRE] = RTE_PTYPE_TUNNEL_GRE,
872                 [QEDE_PKT_TYPE_TUNN_VXLAN] = RTE_PTYPE_TUNNEL_VXLAN,
873                 [QEDE_PKT_TYPE_TUNN_L2_TENID_NOEXIST_GENEVE] =
874                                 RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L2_ETHER,
875                 [QEDE_PKT_TYPE_TUNN_L2_TENID_NOEXIST_GRE] =
876                                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_L2_ETHER,
877                 [QEDE_PKT_TYPE_TUNN_L2_TENID_NOEXIST_VXLAN] =
878                                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L2_ETHER,
879                 [QEDE_PKT_TYPE_TUNN_L2_TENID_EXIST_GENEVE] =
880                                 RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L2_ETHER,
881                 [QEDE_PKT_TYPE_TUNN_L2_TENID_EXIST_GRE] =
882                                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_L2_ETHER,
883                 [QEDE_PKT_TYPE_TUNN_L2_TENID_EXIST_VXLAN] =
884                                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L2_ETHER,
885                 [QEDE_PKT_TYPE_TUNN_IPV4_TENID_NOEXIST_GENEVE] =
886                                 RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L3_IPV4,
887                 [QEDE_PKT_TYPE_TUNN_IPV4_TENID_NOEXIST_GRE] =
888                                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_L3_IPV4,
889                 [QEDE_PKT_TYPE_TUNN_IPV4_TENID_NOEXIST_VXLAN] =
890                                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L3_IPV4,
891                 [QEDE_PKT_TYPE_TUNN_IPV4_TENID_EXIST_GENEVE] =
892                                 RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L3_IPV4,
893                 [QEDE_PKT_TYPE_TUNN_IPV4_TENID_EXIST_GRE] =
894                                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_L3_IPV4,
895                 [QEDE_PKT_TYPE_TUNN_IPV4_TENID_EXIST_VXLAN] =
896                                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L3_IPV4,
897                 [QEDE_PKT_TYPE_TUNN_IPV6_TENID_NOEXIST_GENEVE] =
898                                 RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L3_IPV6,
899                 [QEDE_PKT_TYPE_TUNN_IPV6_TENID_NOEXIST_GRE] =
900                                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_L3_IPV6,
901                 [QEDE_PKT_TYPE_TUNN_IPV6_TENID_NOEXIST_VXLAN] =
902                                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L3_IPV6,
903                 [QEDE_PKT_TYPE_TUNN_IPV6_TENID_EXIST_GENEVE] =
904                                 RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L3_IPV6,
905                 [QEDE_PKT_TYPE_TUNN_IPV6_TENID_EXIST_GRE] =
906                                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_L3_IPV6,
907                 [QEDE_PKT_TYPE_TUNN_IPV6_TENID_EXIST_VXLAN] =
908                                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L3_IPV6,
909         };
910
911         /* Cover bits[4-0] to include tunn_type and next protocol */
912         val = ((ETH_TUNNEL_PARSING_FLAGS_TYPE_MASK <<
913                 ETH_TUNNEL_PARSING_FLAGS_TYPE_SHIFT) |
914                 (ETH_TUNNEL_PARSING_FLAGS_NEXT_PROTOCOL_MASK <<
915                 ETH_TUNNEL_PARSING_FLAGS_NEXT_PROTOCOL_SHIFT)) & flags;
916
917         if (val < QEDE_PKT_TYPE_TUNN_MAX_TYPE)
918                 return ptype_tunn_lkup_tbl[val];
919         else
920                 return RTE_PTYPE_UNKNOWN;
921 }
922
923 static inline int
924 qede_process_sg_pkts(void *p_rxq,  struct rte_mbuf *rx_mb,
925                      uint8_t num_segs, uint16_t pkt_len)
926 {
927         struct qede_rx_queue *rxq = p_rxq;
928         struct qede_dev *qdev = rxq->qdev;
929         register struct rte_mbuf *seg1 = NULL;
930         register struct rte_mbuf *seg2 = NULL;
931         uint16_t sw_rx_index;
932         uint16_t cur_size;
933
934         seg1 = rx_mb;
935         while (num_segs) {
936                 cur_size = pkt_len > rxq->rx_buf_size ? rxq->rx_buf_size :
937                                                         pkt_len;
938                 if (unlikely(!cur_size)) {
939                         PMD_RX_LOG(ERR, rxq, "Length is 0 while %u BDs"
940                                    " left for mapping jumbo", num_segs);
941                         qede_recycle_rx_bd_ring(rxq, qdev, num_segs);
942                         return -EINVAL;
943                 }
944                 sw_rx_index = rxq->sw_rx_cons & NUM_RX_BDS(rxq);
945                 seg2 = rxq->sw_rx_ring[sw_rx_index].mbuf;
946                 qede_rx_bd_ring_consume(rxq);
947                 pkt_len -= cur_size;
948                 seg2->data_len = cur_size;
949                 seg1->next = seg2;
950                 seg1 = seg1->next;
951                 num_segs--;
952                 rxq->rx_segs++;
953         }
954
955         return 0;
956 }
957
958 uint16_t
959 qede_recv_pkts(void *p_rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
960 {
961         struct qede_rx_queue *rxq = p_rxq;
962         struct qede_dev *qdev = rxq->qdev;
963         struct ecore_dev *edev = &qdev->edev;
964         struct qede_fastpath *fp = &qdev->fp_array[rxq->queue_id];
965         uint16_t hw_comp_cons, sw_comp_cons, sw_rx_index;
966         uint16_t rx_pkt = 0;
967         union eth_rx_cqe *cqe;
968         struct eth_fast_path_rx_reg_cqe *fp_cqe = NULL;
969         register struct rte_mbuf *rx_mb = NULL;
970         register struct rte_mbuf *seg1 = NULL;
971         enum eth_rx_cqe_type cqe_type;
972         uint16_t pkt_len = 0; /* Sum of all BD segments */
973         uint16_t len; /* Length of first BD */
974         uint8_t num_segs = 1;
975         uint16_t preload_idx;
976         uint16_t parse_flag;
977 #ifdef RTE_LIBRTE_QEDE_DEBUG_RX
978         uint8_t bitfield_val;
979         enum rss_hash_type htype;
980 #endif
981         uint8_t tunn_parse_flag;
982         uint8_t j;
983         struct eth_fast_path_rx_tpa_start_cqe *cqe_start_tpa;
984         uint64_t ol_flags;
985         uint32_t packet_type;
986         uint16_t vlan_tci;
987         bool tpa_start_flg;
988         uint8_t offset, tpa_agg_idx, flags;
989         struct qede_agg_info *tpa_info = NULL;
990         uint32_t rss_hash;
991
992         hw_comp_cons = rte_le_to_cpu_16(*rxq->hw_cons_ptr);
993         sw_comp_cons = ecore_chain_get_cons_idx(&rxq->rx_comp_ring);
994
995         rte_rmb();
996
997         if (hw_comp_cons == sw_comp_cons)
998                 return 0;
999
1000         while (sw_comp_cons != hw_comp_cons) {
1001                 ol_flags = 0;
1002                 packet_type = RTE_PTYPE_UNKNOWN;
1003                 vlan_tci = 0;
1004                 tpa_start_flg = false;
1005                 rss_hash = 0;
1006
1007                 /* Get the CQE from the completion ring */
1008                 cqe =
1009                     (union eth_rx_cqe *)ecore_chain_consume(&rxq->rx_comp_ring);
1010                 cqe_type = cqe->fast_path_regular.type;
1011                 PMD_RX_LOG(INFO, rxq, "Rx CQE type %d\n", cqe_type);
1012
1013                 switch (cqe_type) {
1014                 case ETH_RX_CQE_TYPE_REGULAR:
1015                         fp_cqe = &cqe->fast_path_regular;
1016                 break;
1017                 case ETH_RX_CQE_TYPE_TPA_START:
1018                         cqe_start_tpa = &cqe->fast_path_tpa_start;
1019                         tpa_info = &rxq->tpa_info[cqe_start_tpa->tpa_agg_index];
1020                         tpa_start_flg = true;
1021                         /* Mark it as LRO packet */
1022                         ol_flags |= PKT_RX_LRO;
1023                         /* In split mode,  seg_len is same as len_on_first_bd
1024                          * and ext_bd_len_list will be empty since there are
1025                          * no additional buffers
1026                          */
1027                         PMD_RX_LOG(INFO, rxq,
1028                             "TPA start[%d] - len_on_first_bd %d header %d"
1029                             " [bd_list[0] %d], [seg_len %d]\n",
1030                             cqe_start_tpa->tpa_agg_index,
1031                             rte_le_to_cpu_16(cqe_start_tpa->len_on_first_bd),
1032                             cqe_start_tpa->header_len,
1033                             rte_le_to_cpu_16(cqe_start_tpa->ext_bd_len_list[0]),
1034                             rte_le_to_cpu_16(cqe_start_tpa->seg_len));
1035
1036                 break;
1037                 case ETH_RX_CQE_TYPE_TPA_CONT:
1038                         qede_rx_process_tpa_cont_cqe(qdev, rxq,
1039                                                      &cqe->fast_path_tpa_cont);
1040                         goto next_cqe;
1041                 case ETH_RX_CQE_TYPE_TPA_END:
1042                         qede_rx_process_tpa_end_cqe(qdev, rxq,
1043                                                     &cqe->fast_path_tpa_end);
1044                         tpa_agg_idx = cqe->fast_path_tpa_end.tpa_agg_index;
1045                         tpa_info = &rxq->tpa_info[tpa_agg_idx];
1046                         rx_mb = rxq->tpa_info[tpa_agg_idx].tpa_head;
1047                         goto tpa_end;
1048                 case ETH_RX_CQE_TYPE_SLOW_PATH:
1049                         PMD_RX_LOG(INFO, rxq, "Got unexpected slowpath CQE\n");
1050                         qdev->ops->eth_cqe_completion(edev, fp->id,
1051                                 (struct eth_slow_path_rx_cqe *)cqe);
1052                         /* fall-thru */
1053                 default:
1054                         goto next_cqe;
1055                 }
1056
1057                 /* Get the data from the SW ring */
1058                 sw_rx_index = rxq->sw_rx_cons & NUM_RX_BDS(rxq);
1059                 rx_mb = rxq->sw_rx_ring[sw_rx_index].mbuf;
1060                 assert(rx_mb != NULL);
1061
1062                 /* Handle regular CQE or TPA start CQE */
1063                 if (!tpa_start_flg) {
1064                         parse_flag = rte_le_to_cpu_16(fp_cqe->pars_flags.flags);
1065                         offset = fp_cqe->placement_offset;
1066                         len = rte_le_to_cpu_16(fp_cqe->len_on_first_bd);
1067                         pkt_len = rte_le_to_cpu_16(fp_cqe->pkt_len);
1068                         vlan_tci = rte_le_to_cpu_16(fp_cqe->vlan_tag);
1069                         rss_hash = rte_le_to_cpu_32(fp_cqe->rss_hash);
1070 #ifdef RTE_LIBRTE_QEDE_DEBUG_RX
1071                         bitfield_val = fp_cqe->bitfields;
1072                         htype = (uint8_t)GET_FIELD(bitfield_val,
1073                                         ETH_FAST_PATH_RX_REG_CQE_RSS_HASH_TYPE);
1074 #endif
1075                 } else {
1076                         parse_flag =
1077                             rte_le_to_cpu_16(cqe_start_tpa->pars_flags.flags);
1078                         offset = cqe_start_tpa->placement_offset;
1079                         /* seg_len = len_on_first_bd */
1080                         len = rte_le_to_cpu_16(cqe_start_tpa->len_on_first_bd);
1081                         vlan_tci = rte_le_to_cpu_16(cqe_start_tpa->vlan_tag);
1082 #ifdef RTE_LIBRTE_QEDE_DEBUG_RX
1083                         bitfield_val = cqe_start_tpa->bitfields;
1084                         htype = (uint8_t)GET_FIELD(bitfield_val,
1085                                 ETH_FAST_PATH_RX_TPA_START_CQE_RSS_HASH_TYPE);
1086 #endif
1087                         rss_hash = rte_le_to_cpu_32(cqe_start_tpa->rss_hash);
1088                 }
1089                 if (qede_tunn_exist(parse_flag)) {
1090                         PMD_RX_LOG(INFO, rxq, "Rx tunneled packet\n");
1091                         if (unlikely(qede_check_tunn_csum_l4(parse_flag))) {
1092                                 PMD_RX_LOG(ERR, rxq,
1093                                             "L4 csum failed, flags = 0x%x\n",
1094                                             parse_flag);
1095                                 rxq->rx_hw_errors++;
1096                                 ol_flags |= PKT_RX_L4_CKSUM_BAD;
1097                         } else {
1098                                 ol_flags |= PKT_RX_L4_CKSUM_GOOD;
1099                                 if (tpa_start_flg)
1100                                         flags =
1101                                          cqe_start_tpa->tunnel_pars_flags.flags;
1102                                 else
1103                                         flags = fp_cqe->tunnel_pars_flags.flags;
1104                                 tunn_parse_flag = flags;
1105                                 packet_type =
1106                                 qede_rx_cqe_to_tunn_pkt_type(tunn_parse_flag);
1107                         }
1108                 } else {
1109                         PMD_RX_LOG(INFO, rxq, "Rx non-tunneled packet\n");
1110                         if (unlikely(qede_check_notunn_csum_l4(parse_flag))) {
1111                                 PMD_RX_LOG(ERR, rxq,
1112                                             "L4 csum failed, flags = 0x%x\n",
1113                                             parse_flag);
1114                                 rxq->rx_hw_errors++;
1115                                 ol_flags |= PKT_RX_L4_CKSUM_BAD;
1116                         } else {
1117                                 ol_flags |= PKT_RX_L4_CKSUM_GOOD;
1118                         }
1119                         if (unlikely(qede_check_notunn_csum_l3(rx_mb,
1120                                                         parse_flag))) {
1121                                 PMD_RX_LOG(ERR, rxq,
1122                                            "IP csum failed, flags = 0x%x\n",
1123                                            parse_flag);
1124                                 rxq->rx_hw_errors++;
1125                                 ol_flags |= PKT_RX_IP_CKSUM_BAD;
1126                         } else {
1127                                 ol_flags |= PKT_RX_IP_CKSUM_GOOD;
1128                                 packet_type =
1129                                         qede_rx_cqe_to_pkt_type(parse_flag);
1130                         }
1131                 }
1132
1133                 if (CQE_HAS_VLAN(parse_flag)) {
1134                         ol_flags |= PKT_RX_VLAN_PKT;
1135                         if (qdev->vlan_strip_flg) {
1136                                 ol_flags |= PKT_RX_VLAN_STRIPPED;
1137                                 rx_mb->vlan_tci = vlan_tci;
1138                         }
1139                 }
1140                 if (CQE_HAS_OUTER_VLAN(parse_flag)) {
1141                         ol_flags |= PKT_RX_QINQ_PKT;
1142                         if (qdev->vlan_strip_flg) {
1143                                 rx_mb->vlan_tci = vlan_tci;
1144                                 ol_flags |= PKT_RX_QINQ_STRIPPED;
1145                         }
1146                         rx_mb->vlan_tci_outer = 0;
1147                 }
1148                 /* RSS Hash */
1149                 if (qdev->rss_enable) {
1150                         ol_flags |= PKT_RX_RSS_HASH;
1151                         rx_mb->hash.rss = rss_hash;
1152                 }
1153
1154                 if (unlikely(qede_alloc_rx_buffer(rxq) != 0)) {
1155                         PMD_RX_LOG(ERR, rxq,
1156                                    "New buffer allocation failed,"
1157                                    "dropping incoming packet\n");
1158                         qede_recycle_rx_bd_ring(rxq, qdev, fp_cqe->bd_num);
1159                         rte_eth_devices[rxq->port_id].
1160                             data->rx_mbuf_alloc_failed++;
1161                         rxq->rx_alloc_errors++;
1162                         break;
1163                 }
1164                 qede_rx_bd_ring_consume(rxq);
1165
1166                 if (!tpa_start_flg && fp_cqe->bd_num > 1) {
1167                         PMD_RX_LOG(DEBUG, rxq, "Jumbo-over-BD packet: %02x BDs"
1168                                    " len on first: %04x Total Len: %04x",
1169                                    fp_cqe->bd_num, len, pkt_len);
1170                         num_segs = fp_cqe->bd_num - 1;
1171                         seg1 = rx_mb;
1172                         if (qede_process_sg_pkts(p_rxq, seg1, num_segs,
1173                                                  pkt_len - len))
1174                                 goto next_cqe;
1175                         for (j = 0; j < num_segs; j++) {
1176                                 if (qede_alloc_rx_buffer(rxq)) {
1177                                         PMD_RX_LOG(ERR, rxq,
1178                                                 "Buffer allocation failed");
1179                                         rte_eth_devices[rxq->port_id].
1180                                                 data->rx_mbuf_alloc_failed++;
1181                                         rxq->rx_alloc_errors++;
1182                                         break;
1183                                 }
1184                                 rxq->rx_segs++;
1185                         }
1186                 }
1187                 rxq->rx_segs++; /* for the first segment */
1188
1189                 /* Prefetch next mbuf while processing current one. */
1190                 preload_idx = rxq->sw_rx_cons & NUM_RX_BDS(rxq);
1191                 rte_prefetch0(rxq->sw_rx_ring[preload_idx].mbuf);
1192
1193                 /* Update rest of the MBUF fields */
1194                 rx_mb->data_off = offset + RTE_PKTMBUF_HEADROOM;
1195                 rx_mb->port = rxq->port_id;
1196                 rx_mb->ol_flags = ol_flags;
1197                 rx_mb->data_len = len;
1198                 rx_mb->packet_type = packet_type;
1199                 PMD_RX_LOG(INFO, rxq,
1200                            "pkt_type 0x%04x len %u hash_type %d hash_val 0x%x"
1201                            " ol_flags 0x%04lx\n",
1202                            packet_type, len, htype, rx_mb->hash.rss,
1203                            (unsigned long)ol_flags);
1204                 if (!tpa_start_flg) {
1205                         rx_mb->nb_segs = fp_cqe->bd_num;
1206                         rx_mb->pkt_len = pkt_len;
1207                 } else {
1208                         /* store ref to the updated mbuf */
1209                         tpa_info->tpa_head = rx_mb;
1210                         tpa_info->tpa_tail = tpa_info->tpa_head;
1211                 }
1212                 rte_prefetch1(rte_pktmbuf_mtod(rx_mb, void *));
1213 tpa_end:
1214                 if (!tpa_start_flg) {
1215                         rx_pkts[rx_pkt] = rx_mb;
1216                         rx_pkt++;
1217                 }
1218 next_cqe:
1219                 ecore_chain_recycle_consumed(&rxq->rx_comp_ring);
1220                 sw_comp_cons = ecore_chain_get_cons_idx(&rxq->rx_comp_ring);
1221                 if (rx_pkt == nb_pkts) {
1222                         PMD_RX_LOG(DEBUG, rxq,
1223                                    "Budget reached nb_pkts=%u received=%u",
1224                                    rx_pkt, nb_pkts);
1225                         break;
1226                 }
1227         }
1228
1229         qede_update_rx_prod(qdev, rxq);
1230
1231         rxq->rcv_pkts += rx_pkt;
1232
1233         PMD_RX_LOG(DEBUG, rxq, "rx_pkts=%u core=%d", rx_pkt, rte_lcore_id());
1234
1235         return rx_pkt;
1236 }
1237
1238 static inline void
1239 qede_free_tx_pkt(struct qede_tx_queue *txq)
1240 {
1241         struct rte_mbuf *mbuf;
1242         uint16_t nb_segs;
1243         uint16_t idx;
1244
1245         idx = TX_CONS(txq);
1246         mbuf = txq->sw_tx_ring[idx].mbuf;
1247         if (mbuf) {
1248                 nb_segs = mbuf->nb_segs;
1249                 PMD_TX_LOG(DEBUG, txq, "nb_segs to free %u\n", nb_segs);
1250                 while (nb_segs) {
1251                         /* It's like consuming rxbuf in recv() */
1252                         ecore_chain_consume(&txq->tx_pbl);
1253                         txq->nb_tx_avail++;
1254                         nb_segs--;
1255                 }
1256                 rte_pktmbuf_free(mbuf);
1257                 txq->sw_tx_ring[idx].mbuf = NULL;
1258                 txq->sw_tx_cons++;
1259                 PMD_TX_LOG(DEBUG, txq, "Freed tx packet\n");
1260         } else {
1261                 ecore_chain_consume(&txq->tx_pbl);
1262                 txq->nb_tx_avail++;
1263         }
1264 }
1265
1266 static inline void
1267 qede_process_tx_compl(__rte_unused struct ecore_dev *edev,
1268                       struct qede_tx_queue *txq)
1269 {
1270         uint16_t hw_bd_cons;
1271 #ifdef RTE_LIBRTE_QEDE_DEBUG_TX
1272         uint16_t sw_tx_cons;
1273 #endif
1274
1275         rte_compiler_barrier();
1276         hw_bd_cons = rte_le_to_cpu_16(*txq->hw_cons_ptr);
1277 #ifdef RTE_LIBRTE_QEDE_DEBUG_TX
1278         sw_tx_cons = ecore_chain_get_cons_idx(&txq->tx_pbl);
1279         PMD_TX_LOG(DEBUG, txq, "Tx Completions = %u\n",
1280                    abs(hw_bd_cons - sw_tx_cons));
1281 #endif
1282         while (hw_bd_cons !=  ecore_chain_get_cons_idx(&txq->tx_pbl))
1283                 qede_free_tx_pkt(txq);
1284 }
1285
1286 /* Populate scatter gather buffer descriptor fields */
1287 static inline uint8_t
1288 qede_encode_sg_bd(struct qede_tx_queue *p_txq, struct rte_mbuf *m_seg,
1289                   struct eth_tx_2nd_bd **bd2, struct eth_tx_3rd_bd **bd3)
1290 {
1291         struct qede_tx_queue *txq = p_txq;
1292         struct eth_tx_bd *tx_bd = NULL;
1293         dma_addr_t mapping;
1294         uint8_t nb_segs = 0;
1295
1296         /* Check for scattered buffers */
1297         while (m_seg) {
1298                 if (nb_segs == 0) {
1299                         if (!*bd2) {
1300                                 *bd2 = (struct eth_tx_2nd_bd *)
1301                                         ecore_chain_produce(&txq->tx_pbl);
1302                                 memset(*bd2, 0, sizeof(struct eth_tx_2nd_bd));
1303                                 nb_segs++;
1304                         }
1305                         mapping = rte_mbuf_data_dma_addr(m_seg);
1306                         QEDE_BD_SET_ADDR_LEN(*bd2, mapping, m_seg->data_len);
1307                         PMD_TX_LOG(DEBUG, txq, "BD2 len %04x", m_seg->data_len);
1308                 } else if (nb_segs == 1) {
1309                         if (!*bd3) {
1310                                 *bd3 = (struct eth_tx_3rd_bd *)
1311                                         ecore_chain_produce(&txq->tx_pbl);
1312                                 memset(*bd3, 0, sizeof(struct eth_tx_3rd_bd));
1313                                 nb_segs++;
1314                         }
1315                         mapping = rte_mbuf_data_dma_addr(m_seg);
1316                         QEDE_BD_SET_ADDR_LEN(*bd3, mapping, m_seg->data_len);
1317                         PMD_TX_LOG(DEBUG, txq, "BD3 len %04x", m_seg->data_len);
1318                 } else {
1319                         tx_bd = (struct eth_tx_bd *)
1320                                 ecore_chain_produce(&txq->tx_pbl);
1321                         memset(tx_bd, 0, sizeof(*tx_bd));
1322                         nb_segs++;
1323                         mapping = rte_mbuf_data_dma_addr(m_seg);
1324                         QEDE_BD_SET_ADDR_LEN(tx_bd, mapping, m_seg->data_len);
1325                         PMD_TX_LOG(DEBUG, txq, "BD len %04x", m_seg->data_len);
1326                 }
1327                 m_seg = m_seg->next;
1328         }
1329
1330         /* Return total scattered buffers */
1331         return nb_segs;
1332 }
1333
1334 #ifdef RTE_LIBRTE_QEDE_DEBUG_TX
1335 static inline void
1336 print_tx_bd_info(struct qede_tx_queue *txq,
1337                  struct eth_tx_1st_bd *bd1,
1338                  struct eth_tx_2nd_bd *bd2,
1339                  struct eth_tx_3rd_bd *bd3,
1340                  uint64_t tx_ol_flags)
1341 {
1342         char ol_buf[256] = { 0 }; /* for verbose prints */
1343
1344         if (bd1)
1345                 PMD_TX_LOG(INFO, txq,
1346                            "BD1: nbytes=%u nbds=%u bd_flags=%04x bf=%04x",
1347                            rte_cpu_to_le_16(bd1->nbytes), bd1->data.nbds,
1348                            bd1->data.bd_flags.bitfields,
1349                            rte_cpu_to_le_16(bd1->data.bitfields));
1350         if (bd2)
1351                 PMD_TX_LOG(INFO, txq,
1352                            "BD2: nbytes=%u bf=%04x\n",
1353                            rte_cpu_to_le_16(bd2->nbytes), bd2->data.bitfields1);
1354         if (bd3)
1355                 PMD_TX_LOG(INFO, txq,
1356                            "BD3: nbytes=%u bf=%04x mss=%u\n",
1357                            rte_cpu_to_le_16(bd3->nbytes),
1358                            rte_cpu_to_le_16(bd3->data.bitfields),
1359                            rte_cpu_to_le_16(bd3->data.lso_mss));
1360
1361         rte_get_tx_ol_flag_list(tx_ol_flags, ol_buf, sizeof(ol_buf));
1362         PMD_TX_LOG(INFO, txq, "TX offloads = %s\n", ol_buf);
1363 }
1364 #endif
1365
1366 /* TX prepare to check packets meets TX conditions */
1367 uint16_t
1368 #ifdef RTE_LIBRTE_QEDE_DEBUG_TX
1369 qede_xmit_prep_pkts(void *p_txq, struct rte_mbuf **tx_pkts,
1370                     uint16_t nb_pkts)
1371 {
1372         struct qede_tx_queue *txq = p_txq;
1373 #else
1374 qede_xmit_prep_pkts(__rte_unused void *p_txq, struct rte_mbuf **tx_pkts,
1375                     uint16_t nb_pkts)
1376 {
1377 #endif
1378         uint64_t ol_flags;
1379         struct rte_mbuf *m;
1380         uint16_t i;
1381         int ret;
1382
1383         for (i = 0; i < nb_pkts; i++) {
1384                 m = tx_pkts[i];
1385                 ol_flags = m->ol_flags;
1386                 if (ol_flags & PKT_TX_TCP_SEG) {
1387                         if (m->nb_segs >= ETH_TX_MAX_BDS_PER_LSO_PACKET) {
1388                                 rte_errno = -EINVAL;
1389                                 break;
1390                         }
1391                         /* TBD: confirm its ~9700B for both ? */
1392                         if (m->tso_segsz > ETH_TX_MAX_NON_LSO_PKT_LEN) {
1393                                 rte_errno = -EINVAL;
1394                                 break;
1395                         }
1396                 } else {
1397                         if (m->nb_segs >= ETH_TX_MAX_BDS_PER_NON_LSO_PACKET) {
1398                                 rte_errno = -EINVAL;
1399                                 break;
1400                         }
1401                 }
1402                 if (ol_flags & QEDE_TX_OFFLOAD_NOTSUP_MASK) {
1403                         rte_errno = -ENOTSUP;
1404                         break;
1405                 }
1406
1407 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
1408                 ret = rte_validate_tx_offload(m);
1409                 if (ret != 0) {
1410                         rte_errno = ret;
1411                         break;
1412                 }
1413 #endif
1414                 /* TBD: pseudo csum calcuation required iff
1415                  * ETH_TX_DATA_2ND_BD_L4_PSEUDO_CSUM_MODE not set?
1416                  */
1417                 ret = rte_net_intel_cksum_prepare(m);
1418                 if (ret != 0) {
1419                         rte_errno = ret;
1420                         break;
1421                 }
1422         }
1423
1424 #ifdef RTE_LIBRTE_QEDE_DEBUG_TX
1425         if (unlikely(i != nb_pkts))
1426                 PMD_TX_LOG(ERR, txq, "TX prepare failed for %u\n",
1427                            nb_pkts - i);
1428 #endif
1429         return i;
1430 }
1431
1432 uint16_t
1433 qede_xmit_pkts(void *p_txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
1434 {
1435         struct qede_tx_queue *txq = p_txq;
1436         struct qede_dev *qdev = txq->qdev;
1437         struct ecore_dev *edev = &qdev->edev;
1438         struct rte_mbuf *mbuf;
1439         struct rte_mbuf *m_seg = NULL;
1440         uint16_t nb_tx_pkts;
1441         uint16_t bd_prod;
1442         uint16_t idx;
1443         uint16_t nb_frags;
1444         uint16_t nb_pkt_sent = 0;
1445         uint8_t nbds;
1446         bool ipv6_ext_flg;
1447         bool lso_flg;
1448         bool tunn_flg;
1449         struct eth_tx_1st_bd *bd1;
1450         struct eth_tx_2nd_bd *bd2;
1451         struct eth_tx_3rd_bd *bd3;
1452         uint64_t tx_ol_flags;
1453         uint16_t hdr_size;
1454
1455         if (unlikely(txq->nb_tx_avail < txq->tx_free_thresh)) {
1456                 PMD_TX_LOG(DEBUG, txq, "send=%u avail=%u free_thresh=%u",
1457                            nb_pkts, txq->nb_tx_avail, txq->tx_free_thresh);
1458                 qede_process_tx_compl(edev, txq);
1459         }
1460
1461         nb_tx_pkts  = nb_pkts;
1462         bd_prod = rte_cpu_to_le_16(ecore_chain_get_prod_idx(&txq->tx_pbl));
1463         while (nb_tx_pkts--) {
1464                 /* Init flags/values */
1465                 ipv6_ext_flg = false;
1466                 tunn_flg = false;
1467                 lso_flg = false;
1468                 nbds = 0;
1469                 bd1 = NULL;
1470                 bd2 = NULL;
1471                 bd3 = NULL;
1472                 hdr_size = 0;
1473
1474                 mbuf = *tx_pkts++;
1475                 assert(mbuf);
1476
1477                 /* Check minimum TX BDS availability against available BDs */
1478                 if (unlikely(txq->nb_tx_avail < mbuf->nb_segs))
1479                         break;
1480
1481                 tx_ol_flags = mbuf->ol_flags;
1482
1483 #define RTE_ETH_IS_IPV6_HDR_EXT(ptype) ((ptype) & RTE_PTYPE_L3_IPV6_EXT)
1484                 if (RTE_ETH_IS_IPV6_HDR_EXT(mbuf->packet_type))
1485                         ipv6_ext_flg = true;
1486
1487                 if (RTE_ETH_IS_TUNNEL_PKT(mbuf->packet_type))
1488                         tunn_flg = true;
1489
1490                 if (tx_ol_flags & PKT_TX_TCP_SEG)
1491                         lso_flg = true;
1492
1493                 if (lso_flg) {
1494                         if (unlikely(txq->nb_tx_avail <
1495                                                 ETH_TX_MIN_BDS_PER_LSO_PKT))
1496                                 break;
1497                 } else {
1498                         if (unlikely(txq->nb_tx_avail <
1499                                         ETH_TX_MIN_BDS_PER_NON_LSO_PKT))
1500                                 break;
1501                 }
1502
1503                 if (tunn_flg && ipv6_ext_flg) {
1504                         if (unlikely(txq->nb_tx_avail <
1505                                 ETH_TX_MIN_BDS_PER_TUNN_IPV6_WITH_EXT_PKT))
1506                                 break;
1507                 }
1508                 if (ipv6_ext_flg) {
1509                         if (unlikely(txq->nb_tx_avail <
1510                                         ETH_TX_MIN_BDS_PER_IPV6_WITH_EXT_PKT))
1511                                 break;
1512                 }
1513
1514                 /* Fill the entry in the SW ring and the BDs in the FW ring */
1515                 idx = TX_PROD(txq);
1516                 txq->sw_tx_ring[idx].mbuf = mbuf;
1517
1518                 /* BD1 */
1519                 bd1 = (struct eth_tx_1st_bd *)ecore_chain_produce(&txq->tx_pbl);
1520                 memset(bd1, 0, sizeof(struct eth_tx_1st_bd));
1521                 nbds++;
1522
1523                 bd1->data.bd_flags.bitfields |=
1524                         1 << ETH_TX_1ST_BD_FLAGS_START_BD_SHIFT;
1525                 /* FW 8.10.x specific change */
1526                 if (!lso_flg) {
1527                         bd1->data.bitfields |=
1528                         (mbuf->pkt_len & ETH_TX_DATA_1ST_BD_PKT_LEN_MASK)
1529                                 << ETH_TX_DATA_1ST_BD_PKT_LEN_SHIFT;
1530                         /* Map MBUF linear data for DMA and set in the BD1 */
1531                         QEDE_BD_SET_ADDR_LEN(bd1, rte_mbuf_data_dma_addr(mbuf),
1532                                              mbuf->data_len);
1533                 } else {
1534                         /* For LSO, packet header and payload must reside on
1535                          * buffers pointed by different BDs. Using BD1 for HDR
1536                          * and BD2 onwards for data.
1537                          */
1538                         hdr_size = mbuf->l2_len + mbuf->l3_len + mbuf->l4_len;
1539                         QEDE_BD_SET_ADDR_LEN(bd1, rte_mbuf_data_dma_addr(mbuf),
1540                                              hdr_size);
1541                 }
1542
1543                 if (tunn_flg) {
1544                         /* First indicate its a tunnel pkt */
1545                         bd1->data.bitfields |=
1546                                 ETH_TX_DATA_1ST_BD_TUNN_FLAG_MASK <<
1547                                 ETH_TX_DATA_1ST_BD_TUNN_FLAG_SHIFT;
1548
1549                         /* Legacy FW had flipped behavior in regard to this bit
1550                          * i.e. it needed to set to prevent FW from touching
1551                          * encapsulated packets when it didn't need to.
1552                          */
1553                         if (unlikely(txq->is_legacy))
1554                                 bd1->data.bitfields ^=
1555                                         1 << ETH_TX_DATA_1ST_BD_TUNN_FLAG_SHIFT;
1556
1557                         /* Outer IP checksum offload */
1558                         if (tx_ol_flags & PKT_TX_OUTER_IP_CKSUM) {
1559                                 bd1->data.bd_flags.bitfields |=
1560                                         ETH_TX_1ST_BD_FLAGS_TUNN_IP_CSUM_MASK <<
1561                                         ETH_TX_1ST_BD_FLAGS_TUNN_IP_CSUM_SHIFT;
1562                         }
1563
1564                         /* Outer UDP checksum offload */
1565                         bd1->data.bd_flags.bitfields |=
1566                                 ETH_TX_1ST_BD_FLAGS_TUNN_L4_CSUM_MASK <<
1567                                 ETH_TX_1ST_BD_FLAGS_TUNN_L4_CSUM_SHIFT;
1568                 }
1569
1570                 /* Descriptor based VLAN insertion */
1571                 if (tx_ol_flags & (PKT_TX_VLAN_PKT | PKT_TX_QINQ_PKT)) {
1572                         bd1->data.vlan = rte_cpu_to_le_16(mbuf->vlan_tci);
1573                         bd1->data.bd_flags.bitfields |=
1574                             1 << ETH_TX_1ST_BD_FLAGS_VLAN_INSERTION_SHIFT;
1575                 }
1576
1577                 if (lso_flg)
1578                         bd1->data.bd_flags.bitfields |=
1579                                 1 << ETH_TX_1ST_BD_FLAGS_LSO_SHIFT;
1580
1581                 /* Offload the IP checksum in the hardware */
1582                 if ((lso_flg) || (tx_ol_flags & PKT_TX_IP_CKSUM))
1583                         bd1->data.bd_flags.bitfields |=
1584                             1 << ETH_TX_1ST_BD_FLAGS_IP_CSUM_SHIFT;
1585
1586                 /* L4 checksum offload (tcp or udp) */
1587                 if ((lso_flg) || (tx_ol_flags & (PKT_TX_TCP_CKSUM |
1588                                                 PKT_TX_UDP_CKSUM)))
1589                         /* PKT_TX_TCP_SEG implies PKT_TX_TCP_CKSUM */
1590                         bd1->data.bd_flags.bitfields |=
1591                             1 << ETH_TX_1ST_BD_FLAGS_L4_CSUM_SHIFT;
1592
1593                 /* BD2 */
1594                 if (lso_flg || ipv6_ext_flg) {
1595                         bd2 = (struct eth_tx_2nd_bd *)ecore_chain_produce
1596                                                         (&txq->tx_pbl);
1597                         memset(bd2, 0, sizeof(struct eth_tx_2nd_bd));
1598                         nbds++;
1599                         QEDE_BD_SET_ADDR_LEN(bd2,
1600                                             (hdr_size +
1601                                             rte_mbuf_data_dma_addr(mbuf)),
1602                                             mbuf->data_len - hdr_size);
1603                         /* TBD: check pseudo csum iff tx_prepare not called? */
1604                         if (ipv6_ext_flg) {
1605                                 bd2->data.bitfields1 |=
1606                                 ETH_L4_PSEUDO_CSUM_ZERO_LENGTH <<
1607                                 ETH_TX_DATA_2ND_BD_L4_PSEUDO_CSUM_MODE_SHIFT;
1608                         }
1609                 }
1610
1611                 /* BD3 */
1612                 if (lso_flg || ipv6_ext_flg) {
1613                         bd3 = (struct eth_tx_3rd_bd *)ecore_chain_produce
1614                                                         (&txq->tx_pbl);
1615                         memset(bd3, 0, sizeof(struct eth_tx_3rd_bd));
1616                         nbds++;
1617                         if (lso_flg) {
1618                                 bd3->data.lso_mss =
1619                                         rte_cpu_to_le_16(mbuf->tso_segsz);
1620                                 /* Using one header BD */
1621                                 bd3->data.bitfields |=
1622                                         rte_cpu_to_le_16(1 <<
1623                                         ETH_TX_DATA_3RD_BD_HDR_NBD_SHIFT);
1624                         }
1625                 }
1626
1627                 /* Handle fragmented MBUF */
1628                 m_seg = mbuf->next;
1629                 /* Encode scatter gather buffer descriptors if required */
1630                 nb_frags = qede_encode_sg_bd(txq, m_seg, &bd2, &bd3);
1631                 bd1->data.nbds = nbds + nb_frags;
1632                 txq->nb_tx_avail -= bd1->data.nbds;
1633                 txq->sw_tx_prod++;
1634                 rte_prefetch0(txq->sw_tx_ring[TX_PROD(txq)].mbuf);
1635                 bd_prod =
1636                     rte_cpu_to_le_16(ecore_chain_get_prod_idx(&txq->tx_pbl));
1637 #ifdef RTE_LIBRTE_QEDE_DEBUG_TX
1638                 print_tx_bd_info(txq, bd1, bd2, bd3, tx_ol_flags);
1639                 PMD_TX_LOG(INFO, txq, "lso=%d tunn=%d ipv6_ext=%d\n",
1640                            lso_flg, tunn_flg, ipv6_ext_flg);
1641 #endif
1642                 nb_pkt_sent++;
1643                 txq->xmit_pkts++;
1644         }
1645
1646         /* Write value of prod idx into bd_prod */
1647         txq->tx_db.data.bd_prod = bd_prod;
1648         rte_wmb();
1649         rte_compiler_barrier();
1650         DIRECT_REG_WR_RELAXED(edev, txq->doorbell_addr, txq->tx_db.raw);
1651         rte_wmb();
1652
1653         /* Check again for Tx completions */
1654         qede_process_tx_compl(edev, txq);
1655
1656         PMD_TX_LOG(DEBUG, txq, "to_send=%u sent=%u bd_prod=%u core=%d",
1657                    nb_pkts, nb_pkt_sent, TX_PROD(txq), rte_lcore_id());
1658
1659         return nb_pkt_sent;
1660 }
1661
1662 static void qede_init_fp_queue(struct rte_eth_dev *eth_dev)
1663 {
1664         struct qede_dev *qdev = eth_dev->data->dev_private;
1665         struct qede_fastpath *fp;
1666         uint8_t i, txq_index, tc;
1667         int rxq = 0, txq = 0;
1668
1669         for_each_queue(i) {
1670                 fp = &qdev->fp_array[i];
1671                 if (fp->type & QEDE_FASTPATH_RX) {
1672                         fp->rxq = eth_dev->data->rx_queues[i];
1673                         fp->rxq->queue_id = rxq++;
1674                 }
1675
1676                 if (fp->type & QEDE_FASTPATH_TX) {
1677                         for (tc = 0; tc < qdev->num_tc; tc++) {
1678                                 txq_index = tc * QEDE_TSS_COUNT(qdev) + txq;
1679                                 fp->txqs[tc] =
1680                                         eth_dev->data->tx_queues[txq_index];
1681                                 fp->txqs[tc]->queue_id = txq_index;
1682                                 if (qdev->dev_info.is_legacy)
1683                                         fp->txqs[tc]->is_legacy = true;
1684                         }
1685                         txq++;
1686                 }
1687         }
1688 }
1689
1690 int qede_dev_start(struct rte_eth_dev *eth_dev)
1691 {
1692         struct qede_dev *qdev = eth_dev->data->dev_private;
1693         struct ecore_dev *edev = &qdev->edev;
1694         int rc;
1695
1696         DP_INFO(edev, "Device state is %d\n", qdev->state);
1697
1698         if (qdev->state == QEDE_DEV_START) {
1699                 DP_INFO(edev, "Port is already started\n");
1700                 return 0;
1701         }
1702
1703         if (qdev->state == QEDE_DEV_CONFIG)
1704                 qede_init_fp_queue(eth_dev);
1705
1706         rc = qede_start_queues(eth_dev, true);
1707         if (rc) {
1708                 DP_ERR(edev, "Failed to start queues\n");
1709                 /* TBD: free */
1710                 return rc;
1711         }
1712
1713         /* Newer SR-IOV PF driver expects RX/TX queues to be started before
1714          * enabling RSS. Hence RSS configuration is deferred upto this point.
1715          * Also, we would like to retain similar behavior in PF case, so we
1716          * don't do PF/VF specific check here.
1717          */
1718         if (eth_dev->data->dev_conf.rxmode.mq_mode  == ETH_MQ_RX_RSS)
1719                 if (qede_config_rss(eth_dev))
1720                         return -1;
1721
1722         /* Bring-up the link */
1723         qede_dev_set_link_state(eth_dev, true);
1724
1725         /* Start/resume traffic */
1726         qdev->ops->fastpath_start(edev);
1727
1728         qdev->state = QEDE_DEV_START;
1729
1730         DP_INFO(edev, "dev_state is QEDE_DEV_START\n");
1731
1732         return 0;
1733 }
1734
1735 static int qede_drain_txq(struct qede_dev *qdev,
1736                           struct qede_tx_queue *txq, bool allow_drain)
1737 {
1738         struct ecore_dev *edev = &qdev->edev;
1739         int rc, cnt = 1000;
1740
1741         while (txq->sw_tx_cons != txq->sw_tx_prod) {
1742                 qede_process_tx_compl(edev, txq);
1743                 if (!cnt) {
1744                         if (allow_drain) {
1745                                 DP_ERR(edev, "Tx queue[%u] is stuck,"
1746                                           "requesting MCP to drain\n",
1747                                           txq->queue_id);
1748                                 rc = qdev->ops->common->drain(edev);
1749                                 if (rc)
1750                                         return rc;
1751                                 return qede_drain_txq(qdev, txq, false);
1752                         }
1753                         DP_ERR(edev, "Timeout waiting for tx queue[%d]:"
1754                                   "PROD=%d, CONS=%d\n",
1755                                   txq->queue_id, txq->sw_tx_prod,
1756                                   txq->sw_tx_cons);
1757                         return -1;
1758                 }
1759                 cnt--;
1760                 DELAY(1000);
1761                 rte_compiler_barrier();
1762         }
1763
1764         /* FW finished processing, wait for HW to transmit all tx packets */
1765         DELAY(2000);
1766
1767         return 0;
1768 }
1769
1770 static int qede_stop_queues(struct qede_dev *qdev)
1771 {
1772         struct qed_update_vport_params vport_update_params;
1773         struct ecore_dev *edev = &qdev->edev;
1774         struct ecore_sge_tpa_params tpa_params;
1775         struct qede_fastpath *fp;
1776         int rc, tc, i;
1777
1778         /* Disable the vport */
1779         memset(&vport_update_params, 0, sizeof(vport_update_params));
1780         vport_update_params.vport_id = 0;
1781         vport_update_params.update_vport_active_flg = 1;
1782         vport_update_params.vport_active_flg = 0;
1783         vport_update_params.update_rss_flg = 0;
1784         /* Disable TPA */
1785         if (qdev->enable_lro) {
1786                 DP_INFO(edev, "Disabling LRO\n");
1787                 memset(&tpa_params, 0, sizeof(struct ecore_sge_tpa_params));
1788                 qede_update_sge_tpa_params(&tpa_params, qdev->mtu, false);
1789                 vport_update_params.sge_tpa_params = &tpa_params;
1790         }
1791
1792         DP_INFO(edev, "Deactivate vport\n");
1793         rc = qdev->ops->vport_update(edev, &vport_update_params);
1794         if (rc) {
1795                 DP_ERR(edev, "Failed to update vport\n");
1796                 return rc;
1797         }
1798
1799         DP_INFO(edev, "Flushing tx queues\n");
1800
1801         /* Flush Tx queues. If needed, request drain from MCP */
1802         for_each_queue(i) {
1803                 fp = &qdev->fp_array[i];
1804
1805                 if (fp->type & QEDE_FASTPATH_TX) {
1806                         for (tc = 0; tc < qdev->num_tc; tc++) {
1807                                 struct qede_tx_queue *txq = fp->txqs[tc];
1808
1809                                 rc = qede_drain_txq(qdev, txq, true);
1810                                 if (rc)
1811                                         return rc;
1812                         }
1813                 }
1814         }
1815
1816         /* Stop all Queues in reverse order */
1817         for (i = QEDE_QUEUE_CNT(qdev) - 1; i >= 0; i--) {
1818                 fp = &qdev->fp_array[i];
1819
1820                 /* Stop the Tx Queue(s) */
1821                 if (qdev->fp_array[i].type & QEDE_FASTPATH_TX) {
1822                         for (tc = 0; tc < qdev->num_tc; tc++) {
1823                                 struct qede_tx_queue *txq = fp->txqs[tc];
1824                                 DP_INFO(edev, "Stopping tx queues\n");
1825                                 rc = qdev->ops->q_tx_stop(edev, i, txq->handle);
1826                                 if (rc) {
1827                                         DP_ERR(edev, "Failed to stop TXQ #%d\n",
1828                                                i);
1829                                         return rc;
1830                                 }
1831                         }
1832                 }
1833
1834                 /* Stop the Rx Queue */
1835                 if (qdev->fp_array[i].type & QEDE_FASTPATH_RX) {
1836                         DP_INFO(edev, "Stopping rx queues\n");
1837                         rc = qdev->ops->q_rx_stop(edev, i, fp->rxq->handle);
1838                         if (rc) {
1839                                 DP_ERR(edev, "Failed to stop RXQ #%d\n", i);
1840                                 return rc;
1841                         }
1842                 }
1843         }
1844         qede_reset_fp_rings(qdev);
1845
1846         return 0;
1847 }
1848
1849 int qede_reset_fp_rings(struct qede_dev *qdev)
1850 {
1851         struct qede_fastpath *fp;
1852         struct qede_tx_queue *txq;
1853         uint8_t tc;
1854         uint16_t id, i;
1855
1856         for_each_queue(id) {
1857                 fp = &qdev->fp_array[id];
1858
1859                 if (fp->type & QEDE_FASTPATH_RX) {
1860                         DP_INFO(&qdev->edev,
1861                                 "Reset FP chain for RSS %u\n", id);
1862                         qede_rx_queue_release_mbufs(fp->rxq);
1863                         ecore_chain_reset(&fp->rxq->rx_bd_ring);
1864                         ecore_chain_reset(&fp->rxq->rx_comp_ring);
1865                         fp->rxq->sw_rx_prod = 0;
1866                         fp->rxq->sw_rx_cons = 0;
1867                         *fp->rxq->hw_cons_ptr = 0;
1868                         for (i = 0; i < fp->rxq->nb_rx_desc; i++) {
1869                                 if (qede_alloc_rx_buffer(fp->rxq)) {
1870                                         DP_ERR(&qdev->edev,
1871                                                "RX buffer allocation failed\n");
1872                                         return -ENOMEM;
1873                                 }
1874                         }
1875                 }
1876                 if (fp->type & QEDE_FASTPATH_TX) {
1877                         for (tc = 0; tc < qdev->num_tc; tc++) {
1878                                 txq = fp->txqs[tc];
1879                                 qede_tx_queue_release_mbufs(txq);
1880                                 ecore_chain_reset(&txq->tx_pbl);
1881                                 txq->sw_tx_cons = 0;
1882                                 txq->sw_tx_prod = 0;
1883                                 *txq->hw_cons_ptr = 0;
1884                         }
1885                 }
1886         }
1887
1888         return 0;
1889 }
1890
1891 /* This function frees all memory of a single fp */
1892 void qede_free_mem_load(struct rte_eth_dev *eth_dev)
1893 {
1894         struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
1895         struct qede_fastpath *fp;
1896         uint16_t txq_idx;
1897         uint8_t id;
1898         uint8_t tc;
1899
1900         for_each_queue(id) {
1901                 fp = &qdev->fp_array[id];
1902                 if (fp->type & QEDE_FASTPATH_RX) {
1903                         if (!fp->rxq)
1904                                 continue;
1905                         qede_rx_queue_release(fp->rxq);
1906                         eth_dev->data->rx_queues[id] = NULL;
1907                 } else {
1908                         for (tc = 0; tc < qdev->num_tc; tc++) {
1909                                 if (!fp->txqs[tc])
1910                                         continue;
1911                                 txq_idx = fp->txqs[tc]->queue_id;
1912                                 qede_tx_queue_release(fp->txqs[tc]);
1913                                 eth_dev->data->tx_queues[txq_idx] = NULL;
1914                         }
1915                 }
1916         }
1917 }
1918
1919 void qede_dev_stop(struct rte_eth_dev *eth_dev)
1920 {
1921         struct qede_dev *qdev = eth_dev->data->dev_private;
1922         struct ecore_dev *edev = &qdev->edev;
1923
1924         DP_INFO(edev, "port %u\n", eth_dev->data->port_id);
1925
1926         if (qdev->state != QEDE_DEV_START) {
1927                 DP_INFO(edev, "Device not yet started\n");
1928                 return;
1929         }
1930
1931         if (qede_stop_queues(qdev))
1932                 DP_ERR(edev, "Didn't succeed to close queues\n");
1933
1934         DP_INFO(edev, "Stopped queues\n");
1935
1936         qdev->ops->fastpath_stop(edev);
1937
1938         /* Bring the link down */
1939         qede_dev_set_link_state(eth_dev, false);
1940
1941         qdev->state = QEDE_DEV_STOP;
1942
1943         DP_INFO(edev, "dev_state is QEDE_DEV_STOP\n");
1944 }
1945
1946 uint16_t
1947 qede_rxtx_pkts_dummy(__rte_unused void *p_rxq,
1948                      __rte_unused struct rte_mbuf **pkts,
1949                      __rte_unused uint16_t nb_pkts)
1950 {
1951         return 0;
1952 }