2bbb893c27e423e49d12b027ae3c450d40c9b381
[deb_dpdk.git] / drivers / net / liquidio / lio_rxtx.c
1 /*
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2017 Cavium, Inc.. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Cavium, Inc. nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <rte_ethdev.h>
35 #include <rte_cycles.h>
36 #include <rte_malloc.h>
37
38 #include "lio_logs.h"
39 #include "lio_struct.h"
40 #include "lio_ethdev.h"
41 #include "lio_rxtx.h"
42
43 #define LIO_MAX_SG 12
44 /* Flush iq if available tx_desc fall below LIO_FLUSH_WM */
45 #define LIO_FLUSH_WM(_iq) ((_iq)->max_count / 2)
46 #define LIO_PKT_IN_DONE_CNT_MASK 0x00000000FFFFFFFFULL
47
48 static void
49 lio_droq_compute_max_packet_bufs(struct lio_droq *droq)
50 {
51         uint32_t count = 0;
52
53         do {
54                 count += droq->buffer_size;
55         } while (count < LIO_MAX_RX_PKTLEN);
56 }
57
58 static void
59 lio_droq_reset_indices(struct lio_droq *droq)
60 {
61         droq->read_idx  = 0;
62         droq->write_idx = 0;
63         droq->refill_idx = 0;
64         droq->refill_count = 0;
65         rte_atomic64_set(&droq->pkts_pending, 0);
66 }
67
68 static void
69 lio_droq_destroy_ring_buffers(struct lio_droq *droq)
70 {
71         uint32_t i;
72
73         for (i = 0; i < droq->max_count; i++) {
74                 if (droq->recv_buf_list[i].buffer) {
75                         rte_pktmbuf_free((struct rte_mbuf *)
76                                          droq->recv_buf_list[i].buffer);
77                         droq->recv_buf_list[i].buffer = NULL;
78                 }
79         }
80
81         lio_droq_reset_indices(droq);
82 }
83
84 static int
85 lio_droq_setup_ring_buffers(struct lio_device *lio_dev,
86                             struct lio_droq *droq)
87 {
88         struct lio_droq_desc *desc_ring = droq->desc_ring;
89         uint32_t i;
90         void *buf;
91
92         for (i = 0; i < droq->max_count; i++) {
93                 buf = rte_pktmbuf_alloc(droq->mpool);
94                 if (buf == NULL) {
95                         lio_dev_err(lio_dev, "buffer alloc failed\n");
96                         droq->stats.rx_alloc_failure++;
97                         lio_droq_destroy_ring_buffers(droq);
98                         return -ENOMEM;
99                 }
100
101                 droq->recv_buf_list[i].buffer = buf;
102                 droq->info_list[i].length = 0;
103
104                 /* map ring buffers into memory */
105                 desc_ring[i].info_ptr = lio_map_ring_info(droq, i);
106                 desc_ring[i].buffer_ptr =
107                         lio_map_ring(droq->recv_buf_list[i].buffer);
108         }
109
110         lio_droq_reset_indices(droq);
111
112         lio_droq_compute_max_packet_bufs(droq);
113
114         return 0;
115 }
116
117 static void
118 lio_dma_zone_free(struct lio_device *lio_dev, const struct rte_memzone *mz)
119 {
120         const struct rte_memzone *mz_tmp;
121         int ret = 0;
122
123         if (mz == NULL) {
124                 lio_dev_err(lio_dev, "Memzone NULL\n");
125                 return;
126         }
127
128         mz_tmp = rte_memzone_lookup(mz->name);
129         if (mz_tmp == NULL) {
130                 lio_dev_err(lio_dev, "Memzone %s Not Found\n", mz->name);
131                 return;
132         }
133
134         ret = rte_memzone_free(mz);
135         if (ret)
136                 lio_dev_err(lio_dev, "Memzone free Failed ret %d\n", ret);
137 }
138
139 /**
140  *  Frees the space for descriptor ring for the droq.
141  *
142  *  @param lio_dev      - pointer to the lio device structure
143  *  @param q_no         - droq no.
144  */
145 static void
146 lio_delete_droq(struct lio_device *lio_dev, uint32_t q_no)
147 {
148         struct lio_droq *droq = lio_dev->droq[q_no];
149
150         lio_dev_dbg(lio_dev, "OQ[%d]\n", q_no);
151
152         lio_droq_destroy_ring_buffers(droq);
153         rte_free(droq->recv_buf_list);
154         droq->recv_buf_list = NULL;
155         lio_dma_zone_free(lio_dev, droq->info_mz);
156         lio_dma_zone_free(lio_dev, droq->desc_ring_mz);
157
158         memset(droq, 0, LIO_DROQ_SIZE);
159 }
160
161 static void *
162 lio_alloc_info_buffer(struct lio_device *lio_dev,
163                       struct lio_droq *droq, unsigned int socket_id)
164 {
165         droq->info_mz = rte_eth_dma_zone_reserve(lio_dev->eth_dev,
166                                                  "info_list", droq->q_no,
167                                                  (droq->max_count *
168                                                         LIO_DROQ_INFO_SIZE),
169                                                  RTE_CACHE_LINE_SIZE,
170                                                  socket_id);
171
172         if (droq->info_mz == NULL)
173                 return NULL;
174
175         droq->info_list_dma = droq->info_mz->phys_addr;
176         droq->info_alloc_size = droq->info_mz->len;
177         droq->info_base_addr = (size_t)droq->info_mz->addr;
178
179         return droq->info_mz->addr;
180 }
181
182 /**
183  *  Allocates space for the descriptor ring for the droq and
184  *  sets the base addr, num desc etc in Octeon registers.
185  *
186  * @param lio_dev       - pointer to the lio device structure
187  * @param q_no          - droq no.
188  * @param app_ctx       - pointer to application context
189  * @return Success: 0   Failure: -1
190  */
191 static int
192 lio_init_droq(struct lio_device *lio_dev, uint32_t q_no,
193               uint32_t num_descs, uint32_t desc_size,
194               struct rte_mempool *mpool, unsigned int socket_id)
195 {
196         uint32_t c_refill_threshold;
197         uint32_t desc_ring_size;
198         struct lio_droq *droq;
199
200         lio_dev_dbg(lio_dev, "OQ[%d]\n", q_no);
201
202         droq = lio_dev->droq[q_no];
203         droq->lio_dev = lio_dev;
204         droq->q_no = q_no;
205         droq->mpool = mpool;
206
207         c_refill_threshold = LIO_OQ_REFILL_THRESHOLD_CFG(lio_dev);
208
209         droq->max_count = num_descs;
210         droq->buffer_size = desc_size;
211
212         desc_ring_size = droq->max_count * LIO_DROQ_DESC_SIZE;
213         droq->desc_ring_mz = rte_eth_dma_zone_reserve(lio_dev->eth_dev,
214                                                       "droq", q_no,
215                                                       desc_ring_size,
216                                                       RTE_CACHE_LINE_SIZE,
217                                                       socket_id);
218
219         if (droq->desc_ring_mz == NULL) {
220                 lio_dev_err(lio_dev,
221                             "Output queue %d ring alloc failed\n", q_no);
222                 return -1;
223         }
224
225         droq->desc_ring_dma = droq->desc_ring_mz->phys_addr;
226         droq->desc_ring = (struct lio_droq_desc *)droq->desc_ring_mz->addr;
227
228         lio_dev_dbg(lio_dev, "droq[%d]: desc_ring: virt: 0x%p, dma: %lx\n",
229                     q_no, droq->desc_ring, (unsigned long)droq->desc_ring_dma);
230         lio_dev_dbg(lio_dev, "droq[%d]: num_desc: %d\n", q_no,
231                     droq->max_count);
232
233         droq->info_list = lio_alloc_info_buffer(lio_dev, droq, socket_id);
234         if (droq->info_list == NULL) {
235                 lio_dev_err(lio_dev, "Cannot allocate memory for info list.\n");
236                 goto init_droq_fail;
237         }
238
239         droq->recv_buf_list = rte_zmalloc_socket("recv_buf_list",
240                                                  (droq->max_count *
241                                                         LIO_DROQ_RECVBUF_SIZE),
242                                                  RTE_CACHE_LINE_SIZE,
243                                                  socket_id);
244         if (droq->recv_buf_list == NULL) {
245                 lio_dev_err(lio_dev,
246                             "Output queue recv buf list alloc failed\n");
247                 goto init_droq_fail;
248         }
249
250         if (lio_droq_setup_ring_buffers(lio_dev, droq))
251                 goto init_droq_fail;
252
253         droq->refill_threshold = c_refill_threshold;
254
255         rte_spinlock_init(&droq->lock);
256
257         lio_dev->fn_list.setup_oq_regs(lio_dev, q_no);
258
259         lio_dev->io_qmask.oq |= (1ULL << q_no);
260
261         return 0;
262
263 init_droq_fail:
264         lio_delete_droq(lio_dev, q_no);
265
266         return -1;
267 }
268
269 int
270 lio_setup_droq(struct lio_device *lio_dev, int oq_no, int num_descs,
271                int desc_size, struct rte_mempool *mpool, unsigned int socket_id)
272 {
273         struct lio_droq *droq;
274
275         PMD_INIT_FUNC_TRACE();
276
277         if (lio_dev->droq[oq_no]) {
278                 lio_dev_dbg(lio_dev, "Droq %d in use\n", oq_no);
279                 return 0;
280         }
281
282         /* Allocate the DS for the new droq. */
283         droq = rte_zmalloc_socket("ethdev RX queue", sizeof(*droq),
284                                   RTE_CACHE_LINE_SIZE, socket_id);
285         if (droq == NULL)
286                 return -ENOMEM;
287
288         lio_dev->droq[oq_no] = droq;
289
290         /* Initialize the Droq */
291         if (lio_init_droq(lio_dev, oq_no, num_descs, desc_size, mpool,
292                           socket_id)) {
293                 lio_dev_err(lio_dev, "Droq[%u] Initialization Failed\n", oq_no);
294                 rte_free(lio_dev->droq[oq_no]);
295                 lio_dev->droq[oq_no] = NULL;
296                 return -ENOMEM;
297         }
298
299         lio_dev->num_oqs++;
300
301         lio_dev_dbg(lio_dev, "Total number of OQ: %d\n", lio_dev->num_oqs);
302
303         /* Send credit for octeon output queues. credits are always
304          * sent after the output queue is enabled.
305          */
306         rte_write32(lio_dev->droq[oq_no]->max_count,
307                     lio_dev->droq[oq_no]->pkts_credit_reg);
308         rte_wmb();
309
310         return 0;
311 }
312
313 static inline uint32_t
314 lio_droq_get_bufcount(uint32_t buf_size, uint32_t total_len)
315 {
316         uint32_t buf_cnt = 0;
317
318         while (total_len > (buf_size * buf_cnt))
319                 buf_cnt++;
320
321         return buf_cnt;
322 }
323
324 /* If we were not able to refill all buffers, try to move around
325  * the buffers that were not dispatched.
326  */
327 static inline uint32_t
328 lio_droq_refill_pullup_descs(struct lio_droq *droq,
329                              struct lio_droq_desc *desc_ring)
330 {
331         uint32_t refill_index = droq->refill_idx;
332         uint32_t desc_refilled = 0;
333
334         while (refill_index != droq->read_idx) {
335                 if (droq->recv_buf_list[refill_index].buffer) {
336                         droq->recv_buf_list[droq->refill_idx].buffer =
337                                 droq->recv_buf_list[refill_index].buffer;
338                         desc_ring[droq->refill_idx].buffer_ptr =
339                                 desc_ring[refill_index].buffer_ptr;
340                         droq->recv_buf_list[refill_index].buffer = NULL;
341                         desc_ring[refill_index].buffer_ptr = 0;
342                         do {
343                                 droq->refill_idx = lio_incr_index(
344                                                         droq->refill_idx, 1,
345                                                         droq->max_count);
346                                 desc_refilled++;
347                                 droq->refill_count--;
348                         } while (droq->recv_buf_list[droq->refill_idx].buffer);
349                 }
350                 refill_index = lio_incr_index(refill_index, 1,
351                                               droq->max_count);
352         }       /* while */
353
354         return desc_refilled;
355 }
356
357 /* lio_droq_refill
358  *
359  * @param droq          - droq in which descriptors require new buffers.
360  *
361  * Description:
362  *  Called during normal DROQ processing in interrupt mode or by the poll
363  *  thread to refill the descriptors from which buffers were dispatched
364  *  to upper layers. Attempts to allocate new buffers. If that fails, moves
365  *  up buffers (that were not dispatched) to form a contiguous ring.
366  *
367  * Returns:
368  *  No of descriptors refilled.
369  *
370  * Locks:
371  * This routine is called with droq->lock held.
372  */
373 static uint32_t
374 lio_droq_refill(struct lio_droq *droq)
375 {
376         struct lio_droq_desc *desc_ring;
377         uint32_t desc_refilled = 0;
378         void *buf = NULL;
379
380         desc_ring = droq->desc_ring;
381
382         while (droq->refill_count && (desc_refilled < droq->max_count)) {
383                 /* If a valid buffer exists (happens if there is no dispatch),
384                  * reuse the buffer, else allocate.
385                  */
386                 if (droq->recv_buf_list[droq->refill_idx].buffer == NULL) {
387                         buf = rte_pktmbuf_alloc(droq->mpool);
388                         /* If a buffer could not be allocated, no point in
389                          * continuing
390                          */
391                         if (buf == NULL) {
392                                 droq->stats.rx_alloc_failure++;
393                                 break;
394                         }
395
396                         droq->recv_buf_list[droq->refill_idx].buffer = buf;
397                 }
398
399                 desc_ring[droq->refill_idx].buffer_ptr =
400                     lio_map_ring(droq->recv_buf_list[droq->refill_idx].buffer);
401                 /* Reset any previous values in the length field. */
402                 droq->info_list[droq->refill_idx].length = 0;
403
404                 droq->refill_idx = lio_incr_index(droq->refill_idx, 1,
405                                                   droq->max_count);
406                 desc_refilled++;
407                 droq->refill_count--;
408         }
409
410         if (droq->refill_count)
411                 desc_refilled += lio_droq_refill_pullup_descs(droq, desc_ring);
412
413         /* if droq->refill_count
414          * The refill count would not change in pass two. We only moved buffers
415          * to close the gap in the ring, but we would still have the same no. of
416          * buffers to refill.
417          */
418         return desc_refilled;
419 }
420
421 static int
422 lio_droq_fast_process_packet(struct lio_device *lio_dev,
423                              struct lio_droq *droq,
424                              struct rte_mbuf **rx_pkts)
425 {
426         struct rte_mbuf *nicbuf = NULL;
427         struct lio_droq_info *info;
428         uint32_t total_len = 0;
429         int data_total_len = 0;
430         uint32_t pkt_len = 0;
431         union octeon_rh *rh;
432         int data_pkts = 0;
433
434         info = &droq->info_list[droq->read_idx];
435         lio_swap_8B_data((uint64_t *)info, 2);
436
437         if (!info->length)
438                 return -1;
439
440         /* Len of resp hdr in included in the received data len. */
441         info->length -= OCTEON_RH_SIZE;
442         rh = &info->rh;
443
444         total_len += (uint32_t)info->length;
445
446         if (lio_opcode_slow_path(rh)) {
447                 uint32_t buf_cnt;
448
449                 buf_cnt = lio_droq_get_bufcount(droq->buffer_size,
450                                                 (uint32_t)info->length);
451                 droq->read_idx = lio_incr_index(droq->read_idx, buf_cnt,
452                                                 droq->max_count);
453                 droq->refill_count += buf_cnt;
454         } else {
455                 if (info->length <= droq->buffer_size) {
456                         if (rh->r_dh.has_hash)
457                                 pkt_len = (uint32_t)(info->length - 8);
458                         else
459                                 pkt_len = (uint32_t)info->length;
460
461                         nicbuf = droq->recv_buf_list[droq->read_idx].buffer;
462                         droq->recv_buf_list[droq->read_idx].buffer = NULL;
463                         droq->read_idx = lio_incr_index(
464                                                 droq->read_idx, 1,
465                                                 droq->max_count);
466                         droq->refill_count++;
467
468                         if (likely(nicbuf != NULL)) {
469                                 /* We don't have a way to pass flags yet */
470                                 nicbuf->ol_flags = 0;
471                                 if (rh->r_dh.has_hash) {
472                                         uint64_t *hash_ptr;
473
474                                         nicbuf->ol_flags |= PKT_RX_RSS_HASH;
475                                         hash_ptr = rte_pktmbuf_mtod(nicbuf,
476                                                                     uint64_t *);
477                                         lio_swap_8B_data(hash_ptr, 1);
478                                         nicbuf->hash.rss = (uint32_t)*hash_ptr;
479                                         nicbuf->data_off += 8;
480                                 }
481
482                                 nicbuf->pkt_len = pkt_len;
483                                 nicbuf->data_len = pkt_len;
484                                 nicbuf->port = lio_dev->port_id;
485                                 /* Store the mbuf */
486                                 rx_pkts[data_pkts++] = nicbuf;
487                                 data_total_len += pkt_len;
488                         }
489
490                         /* Prefetch buffer pointers when on a cache line
491                          * boundary
492                          */
493                         if ((droq->read_idx & 3) == 0) {
494                                 rte_prefetch0(
495                                     &droq->recv_buf_list[droq->read_idx]);
496                                 rte_prefetch0(
497                                     &droq->info_list[droq->read_idx]);
498                         }
499                 } else {
500                         struct rte_mbuf *first_buf = NULL;
501                         struct rte_mbuf *last_buf = NULL;
502
503                         while (pkt_len < info->length) {
504                                 int cpy_len = 0;
505
506                                 cpy_len = ((pkt_len + droq->buffer_size) >
507                                                 info->length)
508                                                 ? ((uint32_t)info->length -
509                                                         pkt_len)
510                                                 : droq->buffer_size;
511
512                                 nicbuf =
513                                     droq->recv_buf_list[droq->read_idx].buffer;
514                                 droq->recv_buf_list[droq->read_idx].buffer =
515                                     NULL;
516
517                                 if (likely(nicbuf != NULL)) {
518                                         /* Note the first seg */
519                                         if (!pkt_len)
520                                                 first_buf = nicbuf;
521
522                                         nicbuf->port = lio_dev->port_id;
523                                         /* We don't have a way to pass
524                                          * flags yet
525                                          */
526                                         nicbuf->ol_flags = 0;
527                                         if ((!pkt_len) && (rh->r_dh.has_hash)) {
528                                                 uint64_t *hash_ptr;
529
530                                                 nicbuf->ol_flags |=
531                                                     PKT_RX_RSS_HASH;
532                                                 hash_ptr = rte_pktmbuf_mtod(
533                                                     nicbuf, uint64_t *);
534                                                 lio_swap_8B_data(hash_ptr, 1);
535                                                 nicbuf->hash.rss =
536                                                     (uint32_t)*hash_ptr;
537                                                 nicbuf->data_off += 8;
538                                                 nicbuf->pkt_len = cpy_len - 8;
539                                                 nicbuf->data_len = cpy_len - 8;
540                                         } else {
541                                                 nicbuf->pkt_len = cpy_len;
542                                                 nicbuf->data_len = cpy_len;
543                                         }
544
545                                         if (pkt_len)
546                                                 first_buf->nb_segs++;
547
548                                         if (last_buf)
549                                                 last_buf->next = nicbuf;
550
551                                         last_buf = nicbuf;
552                                 } else {
553                                         PMD_RX_LOG(lio_dev, ERR, "no buf\n");
554                                 }
555
556                                 pkt_len += cpy_len;
557                                 droq->read_idx = lio_incr_index(
558                                                         droq->read_idx,
559                                                         1, droq->max_count);
560                                 droq->refill_count++;
561
562                                 /* Prefetch buffer pointers when on a
563                                  * cache line boundary
564                                  */
565                                 if ((droq->read_idx & 3) == 0) {
566                                         rte_prefetch0(&droq->recv_buf_list
567                                                               [droq->read_idx]);
568
569                                         rte_prefetch0(
570                                             &droq->info_list[droq->read_idx]);
571                                 }
572                         }
573                         rx_pkts[data_pkts++] = first_buf;
574                         if (rh->r_dh.has_hash)
575                                 data_total_len += (pkt_len - 8);
576                         else
577                                 data_total_len += pkt_len;
578                 }
579
580                 /* Inform upper layer about packet checksum verification */
581                 struct rte_mbuf *m = rx_pkts[data_pkts - 1];
582
583                 if (rh->r_dh.csum_verified & LIO_IP_CSUM_VERIFIED)
584                         m->ol_flags |= PKT_RX_IP_CKSUM_GOOD;
585
586                 if (rh->r_dh.csum_verified & LIO_L4_CSUM_VERIFIED)
587                         m->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
588         }
589
590         if (droq->refill_count >= droq->refill_threshold) {
591                 int desc_refilled = lio_droq_refill(droq);
592
593                 /* Flush the droq descriptor data to memory to be sure
594                  * that when we update the credits the data in memory is
595                  * accurate.
596                  */
597                 rte_wmb();
598                 rte_write32(desc_refilled, droq->pkts_credit_reg);
599                 /* make sure mmio write completes */
600                 rte_wmb();
601         }
602
603         info->length = 0;
604         info->rh.rh64 = 0;
605
606         droq->stats.pkts_received++;
607         droq->stats.rx_pkts_received += data_pkts;
608         droq->stats.rx_bytes_received += data_total_len;
609         droq->stats.bytes_received += total_len;
610
611         return data_pkts;
612 }
613
614 static uint32_t
615 lio_droq_fast_process_packets(struct lio_device *lio_dev,
616                               struct lio_droq *droq,
617                               struct rte_mbuf **rx_pkts,
618                               uint32_t pkts_to_process)
619 {
620         int ret, data_pkts = 0;
621         uint32_t pkt;
622
623         for (pkt = 0; pkt < pkts_to_process; pkt++) {
624                 ret = lio_droq_fast_process_packet(lio_dev, droq,
625                                                    &rx_pkts[data_pkts]);
626                 if (ret < 0) {
627                         lio_dev_err(lio_dev, "Port[%d] DROQ[%d] idx: %d len:0, pkt_cnt: %d\n",
628                                     lio_dev->port_id, droq->q_no,
629                                     droq->read_idx, pkts_to_process);
630                         break;
631                 }
632                 data_pkts += ret;
633         }
634
635         rte_atomic64_sub(&droq->pkts_pending, pkt);
636
637         return data_pkts;
638 }
639
640 static inline uint32_t
641 lio_droq_check_hw_for_pkts(struct lio_droq *droq)
642 {
643         uint32_t last_count;
644         uint32_t pkt_count;
645
646         pkt_count = rte_read32(droq->pkts_sent_reg);
647
648         last_count = pkt_count - droq->pkt_count;
649         droq->pkt_count = pkt_count;
650
651         if (last_count)
652                 rte_atomic64_add(&droq->pkts_pending, last_count);
653
654         return last_count;
655 }
656
657 uint16_t
658 lio_dev_recv_pkts(void *rx_queue,
659                   struct rte_mbuf **rx_pkts,
660                   uint16_t budget)
661 {
662         struct lio_droq *droq = rx_queue;
663         struct lio_device *lio_dev = droq->lio_dev;
664         uint32_t pkts_processed = 0;
665         uint32_t pkt_count = 0;
666
667         lio_droq_check_hw_for_pkts(droq);
668
669         pkt_count = rte_atomic64_read(&droq->pkts_pending);
670         if (!pkt_count)
671                 return 0;
672
673         if (pkt_count > budget)
674                 pkt_count = budget;
675
676         /* Grab the lock */
677         rte_spinlock_lock(&droq->lock);
678         pkts_processed = lio_droq_fast_process_packets(lio_dev,
679                                                        droq, rx_pkts,
680                                                        pkt_count);
681
682         if (droq->pkt_count) {
683                 rte_write32(droq->pkt_count, droq->pkts_sent_reg);
684                 droq->pkt_count = 0;
685         }
686
687         /* Release the spin lock */
688         rte_spinlock_unlock(&droq->lock);
689
690         return pkts_processed;
691 }
692
693 void
694 lio_delete_droq_queue(struct lio_device *lio_dev,
695                       int oq_no)
696 {
697         lio_delete_droq(lio_dev, oq_no);
698         lio_dev->num_oqs--;
699         rte_free(lio_dev->droq[oq_no]);
700         lio_dev->droq[oq_no] = NULL;
701 }
702
703 /**
704  *  lio_init_instr_queue()
705  *  @param lio_dev      - pointer to the lio device structure.
706  *  @param txpciq       - queue to be initialized.
707  *
708  *  Called at driver init time for each input queue. iq_conf has the
709  *  configuration parameters for the queue.
710  *
711  *  @return  Success: 0 Failure: -1
712  */
713 static int
714 lio_init_instr_queue(struct lio_device *lio_dev,
715                      union octeon_txpciq txpciq,
716                      uint32_t num_descs, unsigned int socket_id)
717 {
718         uint32_t iq_no = (uint32_t)txpciq.s.q_no;
719         struct lio_instr_queue *iq;
720         uint32_t instr_type;
721         uint32_t q_size;
722
723         instr_type = LIO_IQ_INSTR_TYPE(lio_dev);
724
725         q_size = instr_type * num_descs;
726         iq = lio_dev->instr_queue[iq_no];
727         iq->iq_mz = rte_eth_dma_zone_reserve(lio_dev->eth_dev,
728                                              "instr_queue", iq_no, q_size,
729                                              RTE_CACHE_LINE_SIZE,
730                                              socket_id);
731         if (iq->iq_mz == NULL) {
732                 lio_dev_err(lio_dev, "Cannot allocate memory for instr queue %d\n",
733                             iq_no);
734                 return -1;
735         }
736
737         iq->base_addr_dma = iq->iq_mz->phys_addr;
738         iq->base_addr = (uint8_t *)iq->iq_mz->addr;
739
740         iq->max_count = num_descs;
741
742         /* Initialize a list to holds requests that have been posted to Octeon
743          * but has yet to be fetched by octeon
744          */
745         iq->request_list = rte_zmalloc_socket("request_list",
746                                               sizeof(*iq->request_list) *
747                                                         num_descs,
748                                               RTE_CACHE_LINE_SIZE,
749                                               socket_id);
750         if (iq->request_list == NULL) {
751                 lio_dev_err(lio_dev, "Alloc failed for IQ[%d] nr free list\n",
752                             iq_no);
753                 lio_dma_zone_free(lio_dev, iq->iq_mz);
754                 return -1;
755         }
756
757         lio_dev_dbg(lio_dev, "IQ[%d]: base: %p basedma: %lx count: %d\n",
758                     iq_no, iq->base_addr, (unsigned long)iq->base_addr_dma,
759                     iq->max_count);
760
761         iq->lio_dev = lio_dev;
762         iq->txpciq.txpciq64 = txpciq.txpciq64;
763         iq->fill_cnt = 0;
764         iq->host_write_index = 0;
765         iq->lio_read_index = 0;
766         iq->flush_index = 0;
767
768         rte_atomic64_set(&iq->instr_pending, 0);
769
770         /* Initialize the spinlock for this instruction queue */
771         rte_spinlock_init(&iq->lock);
772         rte_spinlock_init(&iq->post_lock);
773
774         rte_atomic64_clear(&iq->iq_flush_running);
775
776         lio_dev->io_qmask.iq |= (1ULL << iq_no);
777
778         /* Set the 32B/64B mode for each input queue */
779         lio_dev->io_qmask.iq64B |= ((instr_type == 64) << iq_no);
780         iq->iqcmd_64B = (instr_type == 64);
781
782         lio_dev->fn_list.setup_iq_regs(lio_dev, iq_no);
783
784         return 0;
785 }
786
787 int
788 lio_setup_instr_queue0(struct lio_device *lio_dev)
789 {
790         union octeon_txpciq txpciq;
791         uint32_t num_descs = 0;
792         uint32_t iq_no = 0;
793
794         num_descs = LIO_NUM_DEF_TX_DESCS_CFG(lio_dev);
795
796         lio_dev->num_iqs = 0;
797
798         lio_dev->instr_queue[0] = rte_zmalloc(NULL,
799                                         sizeof(struct lio_instr_queue), 0);
800         if (lio_dev->instr_queue[0] == NULL)
801                 return -ENOMEM;
802
803         lio_dev->instr_queue[0]->q_index = 0;
804         lio_dev->instr_queue[0]->app_ctx = (void *)(size_t)0;
805         txpciq.txpciq64 = 0;
806         txpciq.s.q_no = iq_no;
807         txpciq.s.pkind = lio_dev->pfvf_hsword.pkind;
808         txpciq.s.use_qpg = 0;
809         txpciq.s.qpg = 0;
810         if (lio_init_instr_queue(lio_dev, txpciq, num_descs, SOCKET_ID_ANY)) {
811                 rte_free(lio_dev->instr_queue[0]);
812                 lio_dev->instr_queue[0] = NULL;
813                 return -1;
814         }
815
816         lio_dev->num_iqs++;
817
818         return 0;
819 }
820
821 /**
822  *  lio_delete_instr_queue()
823  *  @param lio_dev      - pointer to the lio device structure.
824  *  @param iq_no        - queue to be deleted.
825  *
826  *  Called at driver unload time for each input queue. Deletes all
827  *  allocated resources for the input queue.
828  */
829 static void
830 lio_delete_instr_queue(struct lio_device *lio_dev, uint32_t iq_no)
831 {
832         struct lio_instr_queue *iq = lio_dev->instr_queue[iq_no];
833
834         rte_free(iq->request_list);
835         iq->request_list = NULL;
836         lio_dma_zone_free(lio_dev, iq->iq_mz);
837 }
838
839 void
840 lio_free_instr_queue0(struct lio_device *lio_dev)
841 {
842         lio_delete_instr_queue(lio_dev, 0);
843         rte_free(lio_dev->instr_queue[0]);
844         lio_dev->instr_queue[0] = NULL;
845         lio_dev->num_iqs--;
846 }
847
848 /* Return 0 on success, -1 on failure */
849 int
850 lio_setup_iq(struct lio_device *lio_dev, int q_index,
851              union octeon_txpciq txpciq, uint32_t num_descs, void *app_ctx,
852              unsigned int socket_id)
853 {
854         uint32_t iq_no = (uint32_t)txpciq.s.q_no;
855
856         if (lio_dev->instr_queue[iq_no]) {
857                 lio_dev_dbg(lio_dev, "IQ is in use. Cannot create the IQ: %d again\n",
858                             iq_no);
859                 lio_dev->instr_queue[iq_no]->txpciq.txpciq64 = txpciq.txpciq64;
860                 lio_dev->instr_queue[iq_no]->app_ctx = app_ctx;
861                 return 0;
862         }
863
864         lio_dev->instr_queue[iq_no] = rte_zmalloc_socket("ethdev TX queue",
865                                                 sizeof(struct lio_instr_queue),
866                                                 RTE_CACHE_LINE_SIZE, socket_id);
867         if (lio_dev->instr_queue[iq_no] == NULL)
868                 return -1;
869
870         lio_dev->instr_queue[iq_no]->q_index = q_index;
871         lio_dev->instr_queue[iq_no]->app_ctx = app_ctx;
872
873         if (lio_init_instr_queue(lio_dev, txpciq, num_descs, socket_id))
874                 goto release_lio_iq;
875
876         lio_dev->num_iqs++;
877         if (lio_dev->fn_list.enable_io_queues(lio_dev))
878                 goto delete_lio_iq;
879
880         return 0;
881
882 delete_lio_iq:
883         lio_delete_instr_queue(lio_dev, iq_no);
884         lio_dev->num_iqs--;
885 release_lio_iq:
886         rte_free(lio_dev->instr_queue[iq_no]);
887         lio_dev->instr_queue[iq_no] = NULL;
888
889         return -1;
890 }
891
892 int
893 lio_wait_for_instr_fetch(struct lio_device *lio_dev)
894 {
895         int pending, instr_cnt;
896         int i, retry = 1000;
897
898         do {
899                 instr_cnt = 0;
900
901                 for (i = 0; i < LIO_MAX_INSTR_QUEUES(lio_dev); i++) {
902                         if (!(lio_dev->io_qmask.iq & (1ULL << i)))
903                                 continue;
904
905                         if (lio_dev->instr_queue[i] == NULL)
906                                 break;
907
908                         pending = rte_atomic64_read(
909                             &lio_dev->instr_queue[i]->instr_pending);
910                         if (pending)
911                                 lio_flush_iq(lio_dev, lio_dev->instr_queue[i]);
912
913                         instr_cnt += pending;
914                 }
915
916                 if (instr_cnt == 0)
917                         break;
918
919                 rte_delay_ms(1);
920
921         } while (retry-- && instr_cnt);
922
923         return instr_cnt;
924 }
925
926 static inline void
927 lio_ring_doorbell(struct lio_device *lio_dev,
928                   struct lio_instr_queue *iq)
929 {
930         if (rte_atomic64_read(&lio_dev->status) == LIO_DEV_RUNNING) {
931                 rte_write32(iq->fill_cnt, iq->doorbell_reg);
932                 /* make sure doorbell write goes through */
933                 rte_wmb();
934                 iq->fill_cnt = 0;
935         }
936 }
937
938 static inline void
939 copy_cmd_into_iq(struct lio_instr_queue *iq, uint8_t *cmd)
940 {
941         uint8_t *iqptr, cmdsize;
942
943         cmdsize = ((iq->iqcmd_64B) ? 64 : 32);
944         iqptr = iq->base_addr + (cmdsize * iq->host_write_index);
945
946         rte_memcpy(iqptr, cmd, cmdsize);
947 }
948
949 static inline struct lio_iq_post_status
950 post_command2(struct lio_instr_queue *iq, uint8_t *cmd)
951 {
952         struct lio_iq_post_status st;
953
954         st.status = LIO_IQ_SEND_OK;
955
956         /* This ensures that the read index does not wrap around to the same
957          * position if queue gets full before Octeon could fetch any instr.
958          */
959         if (rte_atomic64_read(&iq->instr_pending) >=
960                         (int32_t)(iq->max_count - 1)) {
961                 st.status = LIO_IQ_SEND_FAILED;
962                 st.index = -1;
963                 return st;
964         }
965
966         if (rte_atomic64_read(&iq->instr_pending) >=
967                         (int32_t)(iq->max_count - 2))
968                 st.status = LIO_IQ_SEND_STOP;
969
970         copy_cmd_into_iq(iq, cmd);
971
972         /* "index" is returned, host_write_index is modified. */
973         st.index = iq->host_write_index;
974         iq->host_write_index = lio_incr_index(iq->host_write_index, 1,
975                                               iq->max_count);
976         iq->fill_cnt++;
977
978         /* Flush the command into memory. We need to be sure the data is in
979          * memory before indicating that the instruction is pending.
980          */
981         rte_wmb();
982
983         rte_atomic64_inc(&iq->instr_pending);
984
985         return st;
986 }
987
988 static inline void
989 lio_add_to_request_list(struct lio_instr_queue *iq,
990                         int idx, void *buf, int reqtype)
991 {
992         iq->request_list[idx].buf = buf;
993         iq->request_list[idx].reqtype = reqtype;
994 }
995
996 static inline void
997 lio_free_netsgbuf(void *buf)
998 {
999         struct lio_buf_free_info *finfo = buf;
1000         struct lio_device *lio_dev = finfo->lio_dev;
1001         struct rte_mbuf *m = finfo->mbuf;
1002         struct lio_gather *g = finfo->g;
1003         uint8_t iq = finfo->iq_no;
1004
1005         /* This will take care of multiple segments also */
1006         rte_pktmbuf_free(m);
1007
1008         rte_spinlock_lock(&lio_dev->glist_lock[iq]);
1009         STAILQ_INSERT_TAIL(&lio_dev->glist_head[iq], &g->list, entries);
1010         rte_spinlock_unlock(&lio_dev->glist_lock[iq]);
1011         rte_free(finfo);
1012 }
1013
1014 /* Can only run in process context */
1015 static int
1016 lio_process_iq_request_list(struct lio_device *lio_dev,
1017                             struct lio_instr_queue *iq)
1018 {
1019         struct octeon_instr_irh *irh = NULL;
1020         uint32_t old = iq->flush_index;
1021         struct lio_soft_command *sc;
1022         uint32_t inst_count = 0;
1023         int reqtype;
1024         void *buf;
1025
1026         while (old != iq->lio_read_index) {
1027                 reqtype = iq->request_list[old].reqtype;
1028                 buf     = iq->request_list[old].buf;
1029
1030                 if (reqtype == LIO_REQTYPE_NONE)
1031                         goto skip_this;
1032
1033                 switch (reqtype) {
1034                 case LIO_REQTYPE_NORESP_NET:
1035                         rte_pktmbuf_free((struct rte_mbuf *)buf);
1036                         break;
1037                 case LIO_REQTYPE_NORESP_NET_SG:
1038                         lio_free_netsgbuf(buf);
1039                         break;
1040                 case LIO_REQTYPE_SOFT_COMMAND:
1041                         sc = buf;
1042                         irh = (struct octeon_instr_irh *)&sc->cmd.cmd3.irh;
1043                         if (irh->rflag) {
1044                                 /* We're expecting a response from Octeon.
1045                                  * It's up to lio_process_ordered_list() to
1046                                  * process sc. Add sc to the ordered soft
1047                                  * command response list because we expect
1048                                  * a response from Octeon.
1049                                  */
1050                                 rte_spinlock_lock(&lio_dev->response_list.lock);
1051                                 rte_atomic64_inc(
1052                                     &lio_dev->response_list.pending_req_count);
1053                                 STAILQ_INSERT_TAIL(
1054                                         &lio_dev->response_list.head,
1055                                         &sc->node, entries);
1056                                 rte_spinlock_unlock(
1057                                                 &lio_dev->response_list.lock);
1058                         } else {
1059                                 if (sc->callback) {
1060                                         /* This callback must not sleep */
1061                                         sc->callback(LIO_REQUEST_DONE,
1062                                                      sc->callback_arg);
1063                                 }
1064                         }
1065                         break;
1066                 default:
1067                         lio_dev_err(lio_dev,
1068                                     "Unknown reqtype: %d buf: %p at idx %d\n",
1069                                     reqtype, buf, old);
1070                 }
1071
1072                 iq->request_list[old].buf = NULL;
1073                 iq->request_list[old].reqtype = 0;
1074
1075 skip_this:
1076                 inst_count++;
1077                 old = lio_incr_index(old, 1, iq->max_count);
1078         }
1079
1080         iq->flush_index = old;
1081
1082         return inst_count;
1083 }
1084
1085 static void
1086 lio_update_read_index(struct lio_instr_queue *iq)
1087 {
1088         uint32_t pkt_in_done = rte_read32(iq->inst_cnt_reg);
1089         uint32_t last_done;
1090
1091         last_done = pkt_in_done - iq->pkt_in_done;
1092         iq->pkt_in_done = pkt_in_done;
1093
1094         /* Add last_done and modulo with the IQ size to get new index */
1095         iq->lio_read_index = (iq->lio_read_index +
1096                         (uint32_t)(last_done & LIO_PKT_IN_DONE_CNT_MASK)) %
1097                         iq->max_count;
1098 }
1099
1100 int
1101 lio_flush_iq(struct lio_device *lio_dev, struct lio_instr_queue *iq)
1102 {
1103         uint32_t tot_inst_processed = 0;
1104         uint32_t inst_processed = 0;
1105         int tx_done = 1;
1106
1107         if (rte_atomic64_test_and_set(&iq->iq_flush_running) == 0)
1108                 return tx_done;
1109
1110         rte_spinlock_lock(&iq->lock);
1111
1112         lio_update_read_index(iq);
1113
1114         do {
1115                 /* Process any outstanding IQ packets. */
1116                 if (iq->flush_index == iq->lio_read_index)
1117                         break;
1118
1119                 inst_processed = lio_process_iq_request_list(lio_dev, iq);
1120
1121                 if (inst_processed) {
1122                         rte_atomic64_sub(&iq->instr_pending, inst_processed);
1123                         iq->stats.instr_processed += inst_processed;
1124                 }
1125
1126                 tot_inst_processed += inst_processed;
1127                 inst_processed = 0;
1128
1129         } while (1);
1130
1131         rte_spinlock_unlock(&iq->lock);
1132
1133         rte_atomic64_clear(&iq->iq_flush_running);
1134
1135         return tx_done;
1136 }
1137
1138 static int
1139 lio_send_command(struct lio_device *lio_dev, uint32_t iq_no, void *cmd,
1140                  void *buf, uint32_t datasize, uint32_t reqtype)
1141 {
1142         struct lio_instr_queue *iq = lio_dev->instr_queue[iq_no];
1143         struct lio_iq_post_status st;
1144
1145         rte_spinlock_lock(&iq->post_lock);
1146
1147         st = post_command2(iq, cmd);
1148
1149         if (st.status != LIO_IQ_SEND_FAILED) {
1150                 lio_add_to_request_list(iq, st.index, buf, reqtype);
1151                 LIO_INCR_INSTRQUEUE_PKT_COUNT(lio_dev, iq_no, bytes_sent,
1152                                               datasize);
1153                 LIO_INCR_INSTRQUEUE_PKT_COUNT(lio_dev, iq_no, instr_posted, 1);
1154
1155                 lio_ring_doorbell(lio_dev, iq);
1156         } else {
1157                 LIO_INCR_INSTRQUEUE_PKT_COUNT(lio_dev, iq_no, instr_dropped, 1);
1158         }
1159
1160         rte_spinlock_unlock(&iq->post_lock);
1161
1162         return st.status;
1163 }
1164
1165 void
1166 lio_prepare_soft_command(struct lio_device *lio_dev,
1167                          struct lio_soft_command *sc, uint8_t opcode,
1168                          uint8_t subcode, uint32_t irh_ossp, uint64_t ossp0,
1169                          uint64_t ossp1)
1170 {
1171         struct octeon_instr_pki_ih3 *pki_ih3;
1172         struct octeon_instr_ih3 *ih3;
1173         struct octeon_instr_irh *irh;
1174         struct octeon_instr_rdp *rdp;
1175
1176         RTE_ASSERT(opcode <= 15);
1177         RTE_ASSERT(subcode <= 127);
1178
1179         ih3       = (struct octeon_instr_ih3 *)&sc->cmd.cmd3.ih3;
1180
1181         ih3->pkind = lio_dev->instr_queue[sc->iq_no]->txpciq.s.pkind;
1182
1183         pki_ih3 = (struct octeon_instr_pki_ih3 *)&sc->cmd.cmd3.pki_ih3;
1184
1185         pki_ih3->w      = 1;
1186         pki_ih3->raw    = 1;
1187         pki_ih3->utag   = 1;
1188         pki_ih3->uqpg   = lio_dev->instr_queue[sc->iq_no]->txpciq.s.use_qpg;
1189         pki_ih3->utt    = 1;
1190
1191         pki_ih3->tag    = LIO_CONTROL;
1192         pki_ih3->tagtype = OCTEON_ATOMIC_TAG;
1193         pki_ih3->qpg    = lio_dev->instr_queue[sc->iq_no]->txpciq.s.qpg;
1194         pki_ih3->pm     = 0x7;
1195         pki_ih3->sl     = 8;
1196
1197         if (sc->datasize)
1198                 ih3->dlengsz = sc->datasize;
1199
1200         irh             = (struct octeon_instr_irh *)&sc->cmd.cmd3.irh;
1201         irh->opcode     = opcode;
1202         irh->subcode    = subcode;
1203
1204         /* opcode/subcode specific parameters (ossp) */
1205         irh->ossp = irh_ossp;
1206         sc->cmd.cmd3.ossp[0] = ossp0;
1207         sc->cmd.cmd3.ossp[1] = ossp1;
1208
1209         if (sc->rdatasize) {
1210                 rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd3.rdp;
1211                 rdp->pcie_port = lio_dev->pcie_port;
1212                 rdp->rlen      = sc->rdatasize;
1213                 irh->rflag = 1;
1214                 /* PKI IH3 */
1215                 ih3->fsz    = OCTEON_SOFT_CMD_RESP_IH3;
1216         } else {
1217                 irh->rflag = 0;
1218                 /* PKI IH3 */
1219                 ih3->fsz    = OCTEON_PCI_CMD_O3;
1220         }
1221 }
1222
1223 int
1224 lio_send_soft_command(struct lio_device *lio_dev,
1225                       struct lio_soft_command *sc)
1226 {
1227         struct octeon_instr_ih3 *ih3;
1228         struct octeon_instr_irh *irh;
1229         uint32_t len = 0;
1230
1231         ih3 = (struct octeon_instr_ih3 *)&sc->cmd.cmd3.ih3;
1232         if (ih3->dlengsz) {
1233                 RTE_ASSERT(sc->dmadptr);
1234                 sc->cmd.cmd3.dptr = sc->dmadptr;
1235         }
1236
1237         irh = (struct octeon_instr_irh *)&sc->cmd.cmd3.irh;
1238         if (irh->rflag) {
1239                 RTE_ASSERT(sc->dmarptr);
1240                 RTE_ASSERT(sc->status_word != NULL);
1241                 *sc->status_word = LIO_COMPLETION_WORD_INIT;
1242                 sc->cmd.cmd3.rptr = sc->dmarptr;
1243         }
1244
1245         len = (uint32_t)ih3->dlengsz;
1246
1247         if (sc->wait_time)
1248                 sc->timeout = lio_uptime + sc->wait_time;
1249
1250         return lio_send_command(lio_dev, sc->iq_no, &sc->cmd, sc, len,
1251                                 LIO_REQTYPE_SOFT_COMMAND);
1252 }
1253
1254 int
1255 lio_setup_sc_buffer_pool(struct lio_device *lio_dev)
1256 {
1257         char sc_pool_name[RTE_MEMPOOL_NAMESIZE];
1258         uint16_t buf_size;
1259
1260         buf_size = LIO_SOFT_COMMAND_BUFFER_SIZE + RTE_PKTMBUF_HEADROOM;
1261         snprintf(sc_pool_name, sizeof(sc_pool_name),
1262                  "lio_sc_pool_%u", lio_dev->port_id);
1263         lio_dev->sc_buf_pool = rte_pktmbuf_pool_create(sc_pool_name,
1264                                                 LIO_MAX_SOFT_COMMAND_BUFFERS,
1265                                                 0, 0, buf_size, SOCKET_ID_ANY);
1266         return 0;
1267 }
1268
1269 void
1270 lio_free_sc_buffer_pool(struct lio_device *lio_dev)
1271 {
1272         rte_mempool_free(lio_dev->sc_buf_pool);
1273 }
1274
1275 struct lio_soft_command *
1276 lio_alloc_soft_command(struct lio_device *lio_dev, uint32_t datasize,
1277                        uint32_t rdatasize, uint32_t ctxsize)
1278 {
1279         uint32_t offset = sizeof(struct lio_soft_command);
1280         struct lio_soft_command *sc;
1281         struct rte_mbuf *m;
1282         uint64_t dma_addr;
1283
1284         RTE_ASSERT((offset + datasize + rdatasize + ctxsize) <=
1285                    LIO_SOFT_COMMAND_BUFFER_SIZE);
1286
1287         m = rte_pktmbuf_alloc(lio_dev->sc_buf_pool);
1288         if (m == NULL) {
1289                 lio_dev_err(lio_dev, "Cannot allocate mbuf for sc\n");
1290                 return NULL;
1291         }
1292
1293         /* set rte_mbuf data size and there is only 1 segment */
1294         m->pkt_len = LIO_SOFT_COMMAND_BUFFER_SIZE;
1295         m->data_len = LIO_SOFT_COMMAND_BUFFER_SIZE;
1296
1297         /* use rte_mbuf buffer for soft command */
1298         sc = rte_pktmbuf_mtod(m, struct lio_soft_command *);
1299         memset(sc, 0, LIO_SOFT_COMMAND_BUFFER_SIZE);
1300         sc->size = LIO_SOFT_COMMAND_BUFFER_SIZE;
1301         sc->dma_addr = rte_mbuf_data_dma_addr(m);
1302         sc->mbuf = m;
1303
1304         dma_addr = sc->dma_addr;
1305
1306         if (ctxsize) {
1307                 sc->ctxptr = (uint8_t *)sc + offset;
1308                 sc->ctxsize = ctxsize;
1309         }
1310
1311         /* Start data at 128 byte boundary */
1312         offset = (offset + ctxsize + 127) & 0xffffff80;
1313
1314         if (datasize) {
1315                 sc->virtdptr = (uint8_t *)sc + offset;
1316                 sc->dmadptr = dma_addr + offset;
1317                 sc->datasize = datasize;
1318         }
1319
1320         /* Start rdata at 128 byte boundary */
1321         offset = (offset + datasize + 127) & 0xffffff80;
1322
1323         if (rdatasize) {
1324                 RTE_ASSERT(rdatasize >= 16);
1325                 sc->virtrptr = (uint8_t *)sc + offset;
1326                 sc->dmarptr = dma_addr + offset;
1327                 sc->rdatasize = rdatasize;
1328                 sc->status_word = (uint64_t *)((uint8_t *)(sc->virtrptr) +
1329                                                rdatasize - 8);
1330         }
1331
1332         return sc;
1333 }
1334
1335 void
1336 lio_free_soft_command(struct lio_soft_command *sc)
1337 {
1338         rte_pktmbuf_free(sc->mbuf);
1339 }
1340
1341 void
1342 lio_setup_response_list(struct lio_device *lio_dev)
1343 {
1344         STAILQ_INIT(&lio_dev->response_list.head);
1345         rte_spinlock_init(&lio_dev->response_list.lock);
1346         rte_atomic64_set(&lio_dev->response_list.pending_req_count, 0);
1347 }
1348
1349 int
1350 lio_process_ordered_list(struct lio_device *lio_dev)
1351 {
1352         int resp_to_process = LIO_MAX_ORD_REQS_TO_PROCESS;
1353         struct lio_response_list *ordered_sc_list;
1354         struct lio_soft_command *sc;
1355         int request_complete = 0;
1356         uint64_t status64;
1357         uint32_t status;
1358
1359         ordered_sc_list = &lio_dev->response_list;
1360
1361         do {
1362                 rte_spinlock_lock(&ordered_sc_list->lock);
1363
1364                 if (STAILQ_EMPTY(&ordered_sc_list->head)) {
1365                         /* ordered_sc_list is empty; there is
1366                          * nothing to process
1367                          */
1368                         rte_spinlock_unlock(&ordered_sc_list->lock);
1369                         return -1;
1370                 }
1371
1372                 sc = LIO_STQUEUE_FIRST_ENTRY(&ordered_sc_list->head,
1373                                              struct lio_soft_command, node);
1374
1375                 status = LIO_REQUEST_PENDING;
1376
1377                 /* check if octeon has finished DMA'ing a response
1378                  * to where rptr is pointing to
1379                  */
1380                 status64 = *sc->status_word;
1381
1382                 if (status64 != LIO_COMPLETION_WORD_INIT) {
1383                         /* This logic ensures that all 64b have been written.
1384                          * 1. check byte 0 for non-FF
1385                          * 2. if non-FF, then swap result from BE to host order
1386                          * 3. check byte 7 (swapped to 0) for non-FF
1387                          * 4. if non-FF, use the low 32-bit status code
1388                          * 5. if either byte 0 or byte 7 is FF, don't use status
1389                          */
1390                         if ((status64 & 0xff) != 0xff) {
1391                                 lio_swap_8B_data(&status64, 1);
1392                                 if (((status64 & 0xff) != 0xff)) {
1393                                         /* retrieve 16-bit firmware status */
1394                                         status = (uint32_t)(status64 &
1395                                                             0xffffULL);
1396                                         if (status) {
1397                                                 status =
1398                                                 LIO_FIRMWARE_STATUS_CODE(
1399                                                                         status);
1400                                         } else {
1401                                                 /* i.e. no error */
1402                                                 status = LIO_REQUEST_DONE;
1403                                         }
1404                                 }
1405                         }
1406                 } else if ((sc->timeout && lio_check_timeout(lio_uptime,
1407                                                              sc->timeout))) {
1408                         lio_dev_err(lio_dev,
1409                                     "cmd failed, timeout (%ld, %ld)\n",
1410                                     (long)lio_uptime, (long)sc->timeout);
1411                         status = LIO_REQUEST_TIMEOUT;
1412                 }
1413
1414                 if (status != LIO_REQUEST_PENDING) {
1415                         /* we have received a response or we have timed out.
1416                          * remove node from linked list
1417                          */
1418                         STAILQ_REMOVE(&ordered_sc_list->head,
1419                                       &sc->node, lio_stailq_node, entries);
1420                         rte_atomic64_dec(
1421                             &lio_dev->response_list.pending_req_count);
1422                         rte_spinlock_unlock(&ordered_sc_list->lock);
1423
1424                         if (sc->callback)
1425                                 sc->callback(status, sc->callback_arg);
1426
1427                         request_complete++;
1428                 } else {
1429                         /* no response yet */
1430                         request_complete = 0;
1431                         rte_spinlock_unlock(&ordered_sc_list->lock);
1432                 }
1433
1434                 /* If we hit the Max Ordered requests to process every loop,
1435                  * we quit and let this function be invoked the next time
1436                  * the poll thread runs to process the remaining requests.
1437                  * This function can take up the entire CPU if there is
1438                  * no upper limit to the requests processed.
1439                  */
1440                 if (request_complete >= resp_to_process)
1441                         break;
1442         } while (request_complete);
1443
1444         return 0;
1445 }
1446
1447 static inline struct lio_stailq_node *
1448 list_delete_first_node(struct lio_stailq_head *head)
1449 {
1450         struct lio_stailq_node *node;
1451
1452         if (STAILQ_EMPTY(head))
1453                 node = NULL;
1454         else
1455                 node = STAILQ_FIRST(head);
1456
1457         if (node)
1458                 STAILQ_REMOVE(head, node, lio_stailq_node, entries);
1459
1460         return node;
1461 }
1462
1463 void
1464 lio_delete_sglist(struct lio_instr_queue *txq)
1465 {
1466         struct lio_device *lio_dev = txq->lio_dev;
1467         int iq_no = txq->q_index;
1468         struct lio_gather *g;
1469
1470         if (lio_dev->glist_head == NULL)
1471                 return;
1472
1473         do {
1474                 g = (struct lio_gather *)list_delete_first_node(
1475                                                 &lio_dev->glist_head[iq_no]);
1476                 if (g) {
1477                         if (g->sg)
1478                                 rte_free(
1479                                     (void *)((unsigned long)g->sg - g->adjust));
1480                         rte_free(g);
1481                 }
1482         } while (g);
1483 }
1484
1485 /**
1486  * \brief Setup gather lists
1487  * @param lio per-network private data
1488  */
1489 int
1490 lio_setup_sglists(struct lio_device *lio_dev, int iq_no,
1491                   int fw_mapped_iq, int num_descs, unsigned int socket_id)
1492 {
1493         struct lio_gather *g;
1494         int i;
1495
1496         rte_spinlock_init(&lio_dev->glist_lock[iq_no]);
1497
1498         STAILQ_INIT(&lio_dev->glist_head[iq_no]);
1499
1500         for (i = 0; i < num_descs; i++) {
1501                 g = rte_zmalloc_socket(NULL, sizeof(*g), RTE_CACHE_LINE_SIZE,
1502                                        socket_id);
1503                 if (g == NULL) {
1504                         lio_dev_err(lio_dev,
1505                                     "lio_gather memory allocation failed for qno %d\n",
1506                                     iq_no);
1507                         break;
1508                 }
1509
1510                 g->sg_size =
1511                     ((ROUNDUP4(LIO_MAX_SG) >> 2) * LIO_SG_ENTRY_SIZE);
1512
1513                 g->sg = rte_zmalloc_socket(NULL, g->sg_size + 8,
1514                                            RTE_CACHE_LINE_SIZE, socket_id);
1515                 if (g->sg == NULL) {
1516                         lio_dev_err(lio_dev,
1517                                     "sg list memory allocation failed for qno %d\n",
1518                                     iq_no);
1519                         rte_free(g);
1520                         break;
1521                 }
1522
1523                 /* The gather component should be aligned on 64-bit boundary */
1524                 if (((unsigned long)g->sg) & 7) {
1525                         g->adjust = 8 - (((unsigned long)g->sg) & 7);
1526                         g->sg =
1527                             (struct lio_sg_entry *)((unsigned long)g->sg +
1528                                                        g->adjust);
1529                 }
1530
1531                 STAILQ_INSERT_TAIL(&lio_dev->glist_head[iq_no], &g->list,
1532                                    entries);
1533         }
1534
1535         if (i != num_descs) {
1536                 lio_delete_sglist(lio_dev->instr_queue[fw_mapped_iq]);
1537                 return -ENOMEM;
1538         }
1539
1540         return 0;
1541 }
1542
1543 void
1544 lio_delete_instruction_queue(struct lio_device *lio_dev, int iq_no)
1545 {
1546         lio_delete_instr_queue(lio_dev, iq_no);
1547         rte_free(lio_dev->instr_queue[iq_no]);
1548         lio_dev->instr_queue[iq_no] = NULL;
1549         lio_dev->num_iqs--;
1550 }
1551
1552 static inline uint32_t
1553 lio_iq_get_available(struct lio_device *lio_dev, uint32_t q_no)
1554 {
1555         return ((lio_dev->instr_queue[q_no]->max_count - 1) -
1556                 (uint32_t)rte_atomic64_read(
1557                                 &lio_dev->instr_queue[q_no]->instr_pending));
1558 }
1559
1560 static inline int
1561 lio_iq_is_full(struct lio_device *lio_dev, uint32_t q_no)
1562 {
1563         return ((uint32_t)rte_atomic64_read(
1564                                 &lio_dev->instr_queue[q_no]->instr_pending) >=
1565                                 (lio_dev->instr_queue[q_no]->max_count - 2));
1566 }
1567
1568 static int
1569 lio_dev_cleanup_iq(struct lio_device *lio_dev, int iq_no)
1570 {
1571         struct lio_instr_queue *iq = lio_dev->instr_queue[iq_no];
1572         uint32_t count = 10000;
1573
1574         while ((lio_iq_get_available(lio_dev, iq_no) < LIO_FLUSH_WM(iq)) &&
1575                         --count)
1576                 lio_flush_iq(lio_dev, iq);
1577
1578         return count ? 0 : 1;
1579 }
1580
1581 static void
1582 lio_ctrl_cmd_callback(uint32_t status __rte_unused, void *sc_ptr)
1583 {
1584         struct lio_soft_command *sc = sc_ptr;
1585         struct lio_dev_ctrl_cmd *ctrl_cmd;
1586         struct lio_ctrl_pkt *ctrl_pkt;
1587
1588         ctrl_pkt = (struct lio_ctrl_pkt *)sc->ctxptr;
1589         ctrl_cmd = ctrl_pkt->ctrl_cmd;
1590         ctrl_cmd->cond = 1;
1591
1592         lio_free_soft_command(sc);
1593 }
1594
1595 static inline struct lio_soft_command *
1596 lio_alloc_ctrl_pkt_sc(struct lio_device *lio_dev,
1597                       struct lio_ctrl_pkt *ctrl_pkt)
1598 {
1599         struct lio_soft_command *sc = NULL;
1600         uint32_t uddsize, datasize;
1601         uint32_t rdatasize;
1602         uint8_t *data;
1603
1604         uddsize = (uint32_t)(ctrl_pkt->ncmd.s.more * 8);
1605
1606         datasize = OCTEON_CMD_SIZE + uddsize;
1607         rdatasize = (ctrl_pkt->wait_time) ? 16 : 0;
1608
1609         sc = lio_alloc_soft_command(lio_dev, datasize,
1610                                     rdatasize, sizeof(struct lio_ctrl_pkt));
1611         if (sc == NULL)
1612                 return NULL;
1613
1614         rte_memcpy(sc->ctxptr, ctrl_pkt, sizeof(struct lio_ctrl_pkt));
1615
1616         data = (uint8_t *)sc->virtdptr;
1617
1618         rte_memcpy(data, &ctrl_pkt->ncmd, OCTEON_CMD_SIZE);
1619
1620         lio_swap_8B_data((uint64_t *)data, OCTEON_CMD_SIZE >> 3);
1621
1622         if (uddsize) {
1623                 /* Endian-Swap for UDD should have been done by caller. */
1624                 rte_memcpy(data + OCTEON_CMD_SIZE, ctrl_pkt->udd, uddsize);
1625         }
1626
1627         sc->iq_no = (uint32_t)ctrl_pkt->iq_no;
1628
1629         lio_prepare_soft_command(lio_dev, sc,
1630                                  LIO_OPCODE, LIO_OPCODE_CMD,
1631                                  0, 0, 0);
1632
1633         sc->callback = lio_ctrl_cmd_callback;
1634         sc->callback_arg = sc;
1635         sc->wait_time = ctrl_pkt->wait_time;
1636
1637         return sc;
1638 }
1639
1640 int
1641 lio_send_ctrl_pkt(struct lio_device *lio_dev, struct lio_ctrl_pkt *ctrl_pkt)
1642 {
1643         struct lio_soft_command *sc = NULL;
1644         int retval;
1645
1646         sc = lio_alloc_ctrl_pkt_sc(lio_dev, ctrl_pkt);
1647         if (sc == NULL) {
1648                 lio_dev_err(lio_dev, "soft command allocation failed\n");
1649                 return -1;
1650         }
1651
1652         retval = lio_send_soft_command(lio_dev, sc);
1653         if (retval == LIO_IQ_SEND_FAILED) {
1654                 lio_free_soft_command(sc);
1655                 lio_dev_err(lio_dev, "Port: %d soft command: %d send failed status: %x\n",
1656                             lio_dev->port_id, ctrl_pkt->ncmd.s.cmd, retval);
1657                 return -1;
1658         }
1659
1660         return retval;
1661 }
1662
1663 /** Send data packet to the device
1664  *  @param lio_dev - lio device pointer
1665  *  @param ndata   - control structure with queueing, and buffer information
1666  *
1667  *  @returns IQ_FAILED if it failed to add to the input queue. IQ_STOP if it the
1668  *  queue should be stopped, and LIO_IQ_SEND_OK if it sent okay.
1669  */
1670 static inline int
1671 lio_send_data_pkt(struct lio_device *lio_dev, struct lio_data_pkt *ndata)
1672 {
1673         return lio_send_command(lio_dev, ndata->q_no, &ndata->cmd,
1674                                 ndata->buf, ndata->datasize, ndata->reqtype);
1675 }
1676
1677 uint16_t
1678 lio_dev_xmit_pkts(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts)
1679 {
1680         struct lio_instr_queue *txq = tx_queue;
1681         union lio_cmd_setup cmdsetup;
1682         struct lio_device *lio_dev;
1683         struct lio_iq_stats *stats;
1684         struct lio_data_pkt ndata;
1685         int i, processed = 0;
1686         struct rte_mbuf *m;
1687         uint32_t tag = 0;
1688         int status = 0;
1689         int iq_no;
1690
1691         lio_dev = txq->lio_dev;
1692         iq_no = txq->txpciq.s.q_no;
1693         stats = &lio_dev->instr_queue[iq_no]->stats;
1694
1695         if (!lio_dev->intf_open || !lio_dev->linfo.link.s.link_up) {
1696                 PMD_TX_LOG(lio_dev, ERR, "Transmit failed link_status : %d\n",
1697                            lio_dev->linfo.link.s.link_up);
1698                 goto xmit_failed;
1699         }
1700
1701         lio_dev_cleanup_iq(lio_dev, iq_no);
1702
1703         for (i = 0; i < nb_pkts; i++) {
1704                 uint32_t pkt_len = 0;
1705
1706                 m = pkts[i];
1707
1708                 /* Prepare the attributes for the data to be passed to BASE. */
1709                 memset(&ndata, 0, sizeof(struct lio_data_pkt));
1710
1711                 ndata.buf = m;
1712
1713                 ndata.q_no = iq_no;
1714                 if (lio_iq_is_full(lio_dev, ndata.q_no)) {
1715                         stats->tx_iq_busy++;
1716                         if (lio_dev_cleanup_iq(lio_dev, iq_no)) {
1717                                 PMD_TX_LOG(lio_dev, ERR,
1718                                            "Transmit failed iq:%d full\n",
1719                                            ndata.q_no);
1720                                 break;
1721                         }
1722                 }
1723
1724                 cmdsetup.cmd_setup64 = 0;
1725                 cmdsetup.s.iq_no = iq_no;
1726
1727                 /* check checksum offload flags to form cmd */
1728                 if (m->ol_flags & PKT_TX_IP_CKSUM)
1729                         cmdsetup.s.ip_csum = 1;
1730
1731                 if (m->ol_flags & PKT_TX_OUTER_IP_CKSUM)
1732                         cmdsetup.s.tnl_csum = 1;
1733                 else if ((m->ol_flags & PKT_TX_TCP_CKSUM) ||
1734                                 (m->ol_flags & PKT_TX_UDP_CKSUM))
1735                         cmdsetup.s.transport_csum = 1;
1736
1737                 if (m->nb_segs == 1) {
1738                         pkt_len = rte_pktmbuf_data_len(m);
1739                         cmdsetup.s.u.datasize = pkt_len;
1740                         lio_prepare_pci_cmd(lio_dev, &ndata.cmd,
1741                                             &cmdsetup, tag);
1742                         ndata.cmd.cmd3.dptr = rte_mbuf_data_dma_addr(m);
1743                         ndata.reqtype = LIO_REQTYPE_NORESP_NET;
1744                 } else {
1745                         struct lio_buf_free_info *finfo;
1746                         struct lio_gather *g;
1747                         phys_addr_t phyaddr;
1748                         int i, frags;
1749
1750                         finfo = (struct lio_buf_free_info *)rte_malloc(NULL,
1751                                                         sizeof(*finfo), 0);
1752                         if (finfo == NULL) {
1753                                 PMD_TX_LOG(lio_dev, ERR,
1754                                            "free buffer alloc failed\n");
1755                                 goto xmit_failed;
1756                         }
1757
1758                         rte_spinlock_lock(&lio_dev->glist_lock[iq_no]);
1759                         g = (struct lio_gather *)list_delete_first_node(
1760                                                 &lio_dev->glist_head[iq_no]);
1761                         rte_spinlock_unlock(&lio_dev->glist_lock[iq_no]);
1762                         if (g == NULL) {
1763                                 PMD_TX_LOG(lio_dev, ERR,
1764                                            "Transmit scatter gather: glist null!\n");
1765                                 goto xmit_failed;
1766                         }
1767
1768                         cmdsetup.s.gather = 1;
1769                         cmdsetup.s.u.gatherptrs = m->nb_segs;
1770                         lio_prepare_pci_cmd(lio_dev, &ndata.cmd,
1771                                             &cmdsetup, tag);
1772
1773                         memset(g->sg, 0, g->sg_size);
1774                         g->sg[0].ptr[0] = rte_mbuf_data_dma_addr(m);
1775                         lio_add_sg_size(&g->sg[0], m->data_len, 0);
1776                         pkt_len = m->data_len;
1777                         finfo->mbuf = m;
1778
1779                         /* First seg taken care above */
1780                         frags = m->nb_segs - 1;
1781                         i = 1;
1782                         m = m->next;
1783                         while (frags--) {
1784                                 g->sg[(i >> 2)].ptr[(i & 3)] =
1785                                                 rte_mbuf_data_dma_addr(m);
1786                                 lio_add_sg_size(&g->sg[(i >> 2)],
1787                                                 m->data_len, (i & 3));
1788                                 pkt_len += m->data_len;
1789                                 i++;
1790                                 m = m->next;
1791                         }
1792
1793                         phyaddr = rte_mem_virt2phy(g->sg);
1794                         if (phyaddr == RTE_BAD_PHYS_ADDR) {
1795                                 PMD_TX_LOG(lio_dev, ERR, "bad phys addr\n");
1796                                 goto xmit_failed;
1797                         }
1798
1799                         ndata.cmd.cmd3.dptr = phyaddr;
1800                         ndata.reqtype = LIO_REQTYPE_NORESP_NET_SG;
1801
1802                         finfo->g = g;
1803                         finfo->lio_dev = lio_dev;
1804                         finfo->iq_no = (uint64_t)iq_no;
1805                         ndata.buf = finfo;
1806                 }
1807
1808                 ndata.datasize = pkt_len;
1809
1810                 status = lio_send_data_pkt(lio_dev, &ndata);
1811
1812                 if (unlikely(status == LIO_IQ_SEND_FAILED)) {
1813                         PMD_TX_LOG(lio_dev, ERR, "send failed\n");
1814                         break;
1815                 }
1816
1817                 if (unlikely(status == LIO_IQ_SEND_STOP)) {
1818                         PMD_TX_LOG(lio_dev, DEBUG, "iq full\n");
1819                         /* create space as iq is full */
1820                         lio_dev_cleanup_iq(lio_dev, iq_no);
1821                 }
1822
1823                 stats->tx_done++;
1824                 stats->tx_tot_bytes += pkt_len;
1825                 processed++;
1826         }
1827
1828 xmit_failed:
1829         stats->tx_dropped += (nb_pkts - processed);
1830
1831         return processed;
1832 }
1833
1834 void
1835 lio_dev_clear_queues(struct rte_eth_dev *eth_dev)
1836 {
1837         struct lio_instr_queue *txq;
1838         struct lio_droq *rxq;
1839         uint16_t i;
1840
1841         for (i = 0; i < eth_dev->data->nb_tx_queues; i++) {
1842                 txq = eth_dev->data->tx_queues[i];
1843                 if (txq != NULL) {
1844                         lio_dev_tx_queue_release(txq);
1845                         eth_dev->data->tx_queues[i] = NULL;
1846                 }
1847         }
1848
1849         for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
1850                 rxq = eth_dev->data->rx_queues[i];
1851                 if (rxq != NULL) {
1852                         lio_dev_rx_queue_release(rxq);
1853                         eth_dev->data->rx_queues[i] = NULL;
1854                 }
1855         }
1856 }