New upstream version 18.02
[deb_dpdk.git] / drivers / net / liquidio / lio_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2017 Cavium, Inc
3  */
4
5 #include <rte_ethdev_driver.h>
6 #include <rte_cycles.h>
7 #include <rte_malloc.h>
8
9 #include "lio_logs.h"
10 #include "lio_struct.h"
11 #include "lio_ethdev.h"
12 #include "lio_rxtx.h"
13
14 #define LIO_MAX_SG 12
15 /* Flush iq if available tx_desc fall below LIO_FLUSH_WM */
16 #define LIO_FLUSH_WM(_iq) ((_iq)->nb_desc / 2)
17 #define LIO_PKT_IN_DONE_CNT_MASK 0x00000000FFFFFFFFULL
18
19 static void
20 lio_droq_compute_max_packet_bufs(struct lio_droq *droq)
21 {
22         uint32_t count = 0;
23
24         do {
25                 count += droq->buffer_size;
26         } while (count < LIO_MAX_RX_PKTLEN);
27 }
28
29 static void
30 lio_droq_reset_indices(struct lio_droq *droq)
31 {
32         droq->read_idx  = 0;
33         droq->write_idx = 0;
34         droq->refill_idx = 0;
35         droq->refill_count = 0;
36         rte_atomic64_set(&droq->pkts_pending, 0);
37 }
38
39 static void
40 lio_droq_destroy_ring_buffers(struct lio_droq *droq)
41 {
42         uint32_t i;
43
44         for (i = 0; i < droq->nb_desc; i++) {
45                 if (droq->recv_buf_list[i].buffer) {
46                         rte_pktmbuf_free((struct rte_mbuf *)
47                                          droq->recv_buf_list[i].buffer);
48                         droq->recv_buf_list[i].buffer = NULL;
49                 }
50         }
51
52         lio_droq_reset_indices(droq);
53 }
54
55 static int
56 lio_droq_setup_ring_buffers(struct lio_device *lio_dev,
57                             struct lio_droq *droq)
58 {
59         struct lio_droq_desc *desc_ring = droq->desc_ring;
60         uint32_t i;
61         void *buf;
62
63         for (i = 0; i < droq->nb_desc; i++) {
64                 buf = rte_pktmbuf_alloc(droq->mpool);
65                 if (buf == NULL) {
66                         lio_dev_err(lio_dev, "buffer alloc failed\n");
67                         droq->stats.rx_alloc_failure++;
68                         lio_droq_destroy_ring_buffers(droq);
69                         return -ENOMEM;
70                 }
71
72                 droq->recv_buf_list[i].buffer = buf;
73                 droq->info_list[i].length = 0;
74
75                 /* map ring buffers into memory */
76                 desc_ring[i].info_ptr = lio_map_ring_info(droq, i);
77                 desc_ring[i].buffer_ptr =
78                         lio_map_ring(droq->recv_buf_list[i].buffer);
79         }
80
81         lio_droq_reset_indices(droq);
82
83         lio_droq_compute_max_packet_bufs(droq);
84
85         return 0;
86 }
87
88 static void
89 lio_dma_zone_free(struct lio_device *lio_dev, const struct rte_memzone *mz)
90 {
91         const struct rte_memzone *mz_tmp;
92         int ret = 0;
93
94         if (mz == NULL) {
95                 lio_dev_err(lio_dev, "Memzone NULL\n");
96                 return;
97         }
98
99         mz_tmp = rte_memzone_lookup(mz->name);
100         if (mz_tmp == NULL) {
101                 lio_dev_err(lio_dev, "Memzone %s Not Found\n", mz->name);
102                 return;
103         }
104
105         ret = rte_memzone_free(mz);
106         if (ret)
107                 lio_dev_err(lio_dev, "Memzone free Failed ret %d\n", ret);
108 }
109
110 /**
111  *  Frees the space for descriptor ring for the droq.
112  *
113  *  @param lio_dev      - pointer to the lio device structure
114  *  @param q_no         - droq no.
115  */
116 static void
117 lio_delete_droq(struct lio_device *lio_dev, uint32_t q_no)
118 {
119         struct lio_droq *droq = lio_dev->droq[q_no];
120
121         lio_dev_dbg(lio_dev, "OQ[%d]\n", q_no);
122
123         lio_droq_destroy_ring_buffers(droq);
124         rte_free(droq->recv_buf_list);
125         droq->recv_buf_list = NULL;
126         lio_dma_zone_free(lio_dev, droq->info_mz);
127         lio_dma_zone_free(lio_dev, droq->desc_ring_mz);
128
129         memset(droq, 0, LIO_DROQ_SIZE);
130 }
131
132 static void *
133 lio_alloc_info_buffer(struct lio_device *lio_dev,
134                       struct lio_droq *droq, unsigned int socket_id)
135 {
136         droq->info_mz = rte_eth_dma_zone_reserve(lio_dev->eth_dev,
137                                                  "info_list", droq->q_no,
138                                                  (droq->nb_desc *
139                                                         LIO_DROQ_INFO_SIZE),
140                                                  RTE_CACHE_LINE_SIZE,
141                                                  socket_id);
142
143         if (droq->info_mz == NULL)
144                 return NULL;
145
146         droq->info_list_dma = droq->info_mz->iova;
147         droq->info_alloc_size = droq->info_mz->len;
148         droq->info_base_addr = (size_t)droq->info_mz->addr;
149
150         return droq->info_mz->addr;
151 }
152
153 /**
154  *  Allocates space for the descriptor ring for the droq and
155  *  sets the base addr, num desc etc in Octeon registers.
156  *
157  * @param lio_dev       - pointer to the lio device structure
158  * @param q_no          - droq no.
159  * @param app_ctx       - pointer to application context
160  * @return Success: 0   Failure: -1
161  */
162 static int
163 lio_init_droq(struct lio_device *lio_dev, uint32_t q_no,
164               uint32_t num_descs, uint32_t desc_size,
165               struct rte_mempool *mpool, unsigned int socket_id)
166 {
167         uint32_t c_refill_threshold;
168         uint32_t desc_ring_size;
169         struct lio_droq *droq;
170
171         lio_dev_dbg(lio_dev, "OQ[%d]\n", q_no);
172
173         droq = lio_dev->droq[q_no];
174         droq->lio_dev = lio_dev;
175         droq->q_no = q_no;
176         droq->mpool = mpool;
177
178         c_refill_threshold = LIO_OQ_REFILL_THRESHOLD_CFG(lio_dev);
179
180         droq->nb_desc = num_descs;
181         droq->buffer_size = desc_size;
182
183         desc_ring_size = droq->nb_desc * LIO_DROQ_DESC_SIZE;
184         droq->desc_ring_mz = rte_eth_dma_zone_reserve(lio_dev->eth_dev,
185                                                       "droq", q_no,
186                                                       desc_ring_size,
187                                                       RTE_CACHE_LINE_SIZE,
188                                                       socket_id);
189
190         if (droq->desc_ring_mz == NULL) {
191                 lio_dev_err(lio_dev,
192                             "Output queue %d ring alloc failed\n", q_no);
193                 return -1;
194         }
195
196         droq->desc_ring_dma = droq->desc_ring_mz->iova;
197         droq->desc_ring = (struct lio_droq_desc *)droq->desc_ring_mz->addr;
198
199         lio_dev_dbg(lio_dev, "droq[%d]: desc_ring: virt: 0x%p, dma: %lx\n",
200                     q_no, droq->desc_ring, (unsigned long)droq->desc_ring_dma);
201         lio_dev_dbg(lio_dev, "droq[%d]: num_desc: %d\n", q_no,
202                     droq->nb_desc);
203
204         droq->info_list = lio_alloc_info_buffer(lio_dev, droq, socket_id);
205         if (droq->info_list == NULL) {
206                 lio_dev_err(lio_dev, "Cannot allocate memory for info list.\n");
207                 goto init_droq_fail;
208         }
209
210         droq->recv_buf_list = rte_zmalloc_socket("recv_buf_list",
211                                                  (droq->nb_desc *
212                                                         LIO_DROQ_RECVBUF_SIZE),
213                                                  RTE_CACHE_LINE_SIZE,
214                                                  socket_id);
215         if (droq->recv_buf_list == NULL) {
216                 lio_dev_err(lio_dev,
217                             "Output queue recv buf list alloc failed\n");
218                 goto init_droq_fail;
219         }
220
221         if (lio_droq_setup_ring_buffers(lio_dev, droq))
222                 goto init_droq_fail;
223
224         droq->refill_threshold = c_refill_threshold;
225
226         rte_spinlock_init(&droq->lock);
227
228         lio_dev->fn_list.setup_oq_regs(lio_dev, q_no);
229
230         lio_dev->io_qmask.oq |= (1ULL << q_no);
231
232         return 0;
233
234 init_droq_fail:
235         lio_delete_droq(lio_dev, q_no);
236
237         return -1;
238 }
239
240 int
241 lio_setup_droq(struct lio_device *lio_dev, int oq_no, int num_descs,
242                int desc_size, struct rte_mempool *mpool, unsigned int socket_id)
243 {
244         struct lio_droq *droq;
245
246         PMD_INIT_FUNC_TRACE();
247
248         /* Allocate the DS for the new droq. */
249         droq = rte_zmalloc_socket("ethdev RX queue", sizeof(*droq),
250                                   RTE_CACHE_LINE_SIZE, socket_id);
251         if (droq == NULL)
252                 return -ENOMEM;
253
254         lio_dev->droq[oq_no] = droq;
255
256         /* Initialize the Droq */
257         if (lio_init_droq(lio_dev, oq_no, num_descs, desc_size, mpool,
258                           socket_id)) {
259                 lio_dev_err(lio_dev, "Droq[%u] Initialization Failed\n", oq_no);
260                 rte_free(lio_dev->droq[oq_no]);
261                 lio_dev->droq[oq_no] = NULL;
262                 return -ENOMEM;
263         }
264
265         lio_dev->num_oqs++;
266
267         lio_dev_dbg(lio_dev, "Total number of OQ: %d\n", lio_dev->num_oqs);
268
269         /* Send credit for octeon output queues. credits are always
270          * sent after the output queue is enabled.
271          */
272         rte_write32(lio_dev->droq[oq_no]->nb_desc,
273                     lio_dev->droq[oq_no]->pkts_credit_reg);
274         rte_wmb();
275
276         return 0;
277 }
278
279 static inline uint32_t
280 lio_droq_get_bufcount(uint32_t buf_size, uint32_t total_len)
281 {
282         uint32_t buf_cnt = 0;
283
284         while (total_len > (buf_size * buf_cnt))
285                 buf_cnt++;
286
287         return buf_cnt;
288 }
289
290 /* If we were not able to refill all buffers, try to move around
291  * the buffers that were not dispatched.
292  */
293 static inline uint32_t
294 lio_droq_refill_pullup_descs(struct lio_droq *droq,
295                              struct lio_droq_desc *desc_ring)
296 {
297         uint32_t refill_index = droq->refill_idx;
298         uint32_t desc_refilled = 0;
299
300         while (refill_index != droq->read_idx) {
301                 if (droq->recv_buf_list[refill_index].buffer) {
302                         droq->recv_buf_list[droq->refill_idx].buffer =
303                                 droq->recv_buf_list[refill_index].buffer;
304                         desc_ring[droq->refill_idx].buffer_ptr =
305                                 desc_ring[refill_index].buffer_ptr;
306                         droq->recv_buf_list[refill_index].buffer = NULL;
307                         desc_ring[refill_index].buffer_ptr = 0;
308                         do {
309                                 droq->refill_idx = lio_incr_index(
310                                                         droq->refill_idx, 1,
311                                                         droq->nb_desc);
312                                 desc_refilled++;
313                                 droq->refill_count--;
314                         } while (droq->recv_buf_list[droq->refill_idx].buffer);
315                 }
316                 refill_index = lio_incr_index(refill_index, 1,
317                                               droq->nb_desc);
318         }       /* while */
319
320         return desc_refilled;
321 }
322
323 /* lio_droq_refill
324  *
325  * @param droq          - droq in which descriptors require new buffers.
326  *
327  * Description:
328  *  Called during normal DROQ processing in interrupt mode or by the poll
329  *  thread to refill the descriptors from which buffers were dispatched
330  *  to upper layers. Attempts to allocate new buffers. If that fails, moves
331  *  up buffers (that were not dispatched) to form a contiguous ring.
332  *
333  * Returns:
334  *  No of descriptors refilled.
335  *
336  * Locks:
337  * This routine is called with droq->lock held.
338  */
339 static uint32_t
340 lio_droq_refill(struct lio_droq *droq)
341 {
342         struct lio_droq_desc *desc_ring;
343         uint32_t desc_refilled = 0;
344         void *buf = NULL;
345
346         desc_ring = droq->desc_ring;
347
348         while (droq->refill_count && (desc_refilled < droq->nb_desc)) {
349                 /* If a valid buffer exists (happens if there is no dispatch),
350                  * reuse the buffer, else allocate.
351                  */
352                 if (droq->recv_buf_list[droq->refill_idx].buffer == NULL) {
353                         buf = rte_pktmbuf_alloc(droq->mpool);
354                         /* If a buffer could not be allocated, no point in
355                          * continuing
356                          */
357                         if (buf == NULL) {
358                                 droq->stats.rx_alloc_failure++;
359                                 break;
360                         }
361
362                         droq->recv_buf_list[droq->refill_idx].buffer = buf;
363                 }
364
365                 desc_ring[droq->refill_idx].buffer_ptr =
366                     lio_map_ring(droq->recv_buf_list[droq->refill_idx].buffer);
367                 /* Reset any previous values in the length field. */
368                 droq->info_list[droq->refill_idx].length = 0;
369
370                 droq->refill_idx = lio_incr_index(droq->refill_idx, 1,
371                                                   droq->nb_desc);
372                 desc_refilled++;
373                 droq->refill_count--;
374         }
375
376         if (droq->refill_count)
377                 desc_refilled += lio_droq_refill_pullup_descs(droq, desc_ring);
378
379         /* if droq->refill_count
380          * The refill count would not change in pass two. We only moved buffers
381          * to close the gap in the ring, but we would still have the same no. of
382          * buffers to refill.
383          */
384         return desc_refilled;
385 }
386
387 static int
388 lio_droq_fast_process_packet(struct lio_device *lio_dev,
389                              struct lio_droq *droq,
390                              struct rte_mbuf **rx_pkts)
391 {
392         struct rte_mbuf *nicbuf = NULL;
393         struct lio_droq_info *info;
394         uint32_t total_len = 0;
395         int data_total_len = 0;
396         uint32_t pkt_len = 0;
397         union octeon_rh *rh;
398         int data_pkts = 0;
399
400         info = &droq->info_list[droq->read_idx];
401         lio_swap_8B_data((uint64_t *)info, 2);
402
403         if (!info->length)
404                 return -1;
405
406         /* Len of resp hdr in included in the received data len. */
407         info->length -= OCTEON_RH_SIZE;
408         rh = &info->rh;
409
410         total_len += (uint32_t)info->length;
411
412         if (lio_opcode_slow_path(rh)) {
413                 uint32_t buf_cnt;
414
415                 buf_cnt = lio_droq_get_bufcount(droq->buffer_size,
416                                                 (uint32_t)info->length);
417                 droq->read_idx = lio_incr_index(droq->read_idx, buf_cnt,
418                                                 droq->nb_desc);
419                 droq->refill_count += buf_cnt;
420         } else {
421                 if (info->length <= droq->buffer_size) {
422                         if (rh->r_dh.has_hash)
423                                 pkt_len = (uint32_t)(info->length - 8);
424                         else
425                                 pkt_len = (uint32_t)info->length;
426
427                         nicbuf = droq->recv_buf_list[droq->read_idx].buffer;
428                         droq->recv_buf_list[droq->read_idx].buffer = NULL;
429                         droq->read_idx = lio_incr_index(
430                                                 droq->read_idx, 1,
431                                                 droq->nb_desc);
432                         droq->refill_count++;
433
434                         if (likely(nicbuf != NULL)) {
435                                 /* We don't have a way to pass flags yet */
436                                 nicbuf->ol_flags = 0;
437                                 if (rh->r_dh.has_hash) {
438                                         uint64_t *hash_ptr;
439
440                                         nicbuf->ol_flags |= PKT_RX_RSS_HASH;
441                                         hash_ptr = rte_pktmbuf_mtod(nicbuf,
442                                                                     uint64_t *);
443                                         lio_swap_8B_data(hash_ptr, 1);
444                                         nicbuf->hash.rss = (uint32_t)*hash_ptr;
445                                         nicbuf->data_off += 8;
446                                 }
447
448                                 nicbuf->pkt_len = pkt_len;
449                                 nicbuf->data_len = pkt_len;
450                                 nicbuf->port = lio_dev->port_id;
451                                 /* Store the mbuf */
452                                 rx_pkts[data_pkts++] = nicbuf;
453                                 data_total_len += pkt_len;
454                         }
455
456                         /* Prefetch buffer pointers when on a cache line
457                          * boundary
458                          */
459                         if ((droq->read_idx & 3) == 0) {
460                                 rte_prefetch0(
461                                     &droq->recv_buf_list[droq->read_idx]);
462                                 rte_prefetch0(
463                                     &droq->info_list[droq->read_idx]);
464                         }
465                 } else {
466                         struct rte_mbuf *first_buf = NULL;
467                         struct rte_mbuf *last_buf = NULL;
468
469                         while (pkt_len < info->length) {
470                                 int cpy_len = 0;
471
472                                 cpy_len = ((pkt_len + droq->buffer_size) >
473                                                 info->length)
474                                                 ? ((uint32_t)info->length -
475                                                         pkt_len)
476                                                 : droq->buffer_size;
477
478                                 nicbuf =
479                                     droq->recv_buf_list[droq->read_idx].buffer;
480                                 droq->recv_buf_list[droq->read_idx].buffer =
481                                     NULL;
482
483                                 if (likely(nicbuf != NULL)) {
484                                         /* Note the first seg */
485                                         if (!pkt_len)
486                                                 first_buf = nicbuf;
487
488                                         nicbuf->port = lio_dev->port_id;
489                                         /* We don't have a way to pass
490                                          * flags yet
491                                          */
492                                         nicbuf->ol_flags = 0;
493                                         if ((!pkt_len) && (rh->r_dh.has_hash)) {
494                                                 uint64_t *hash_ptr;
495
496                                                 nicbuf->ol_flags |=
497                                                     PKT_RX_RSS_HASH;
498                                                 hash_ptr = rte_pktmbuf_mtod(
499                                                     nicbuf, uint64_t *);
500                                                 lio_swap_8B_data(hash_ptr, 1);
501                                                 nicbuf->hash.rss =
502                                                     (uint32_t)*hash_ptr;
503                                                 nicbuf->data_off += 8;
504                                                 nicbuf->pkt_len = cpy_len - 8;
505                                                 nicbuf->data_len = cpy_len - 8;
506                                         } else {
507                                                 nicbuf->pkt_len = cpy_len;
508                                                 nicbuf->data_len = cpy_len;
509                                         }
510
511                                         if (pkt_len)
512                                                 first_buf->nb_segs++;
513
514                                         if (last_buf)
515                                                 last_buf->next = nicbuf;
516
517                                         last_buf = nicbuf;
518                                 } else {
519                                         PMD_RX_LOG(lio_dev, ERR, "no buf\n");
520                                 }
521
522                                 pkt_len += cpy_len;
523                                 droq->read_idx = lio_incr_index(
524                                                         droq->read_idx,
525                                                         1, droq->nb_desc);
526                                 droq->refill_count++;
527
528                                 /* Prefetch buffer pointers when on a
529                                  * cache line boundary
530                                  */
531                                 if ((droq->read_idx & 3) == 0) {
532                                         rte_prefetch0(&droq->recv_buf_list
533                                                               [droq->read_idx]);
534
535                                         rte_prefetch0(
536                                             &droq->info_list[droq->read_idx]);
537                                 }
538                         }
539                         rx_pkts[data_pkts++] = first_buf;
540                         if (rh->r_dh.has_hash)
541                                 data_total_len += (pkt_len - 8);
542                         else
543                                 data_total_len += pkt_len;
544                 }
545
546                 /* Inform upper layer about packet checksum verification */
547                 struct rte_mbuf *m = rx_pkts[data_pkts - 1];
548
549                 if (rh->r_dh.csum_verified & LIO_IP_CSUM_VERIFIED)
550                         m->ol_flags |= PKT_RX_IP_CKSUM_GOOD;
551
552                 if (rh->r_dh.csum_verified & LIO_L4_CSUM_VERIFIED)
553                         m->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
554         }
555
556         if (droq->refill_count >= droq->refill_threshold) {
557                 int desc_refilled = lio_droq_refill(droq);
558
559                 /* Flush the droq descriptor data to memory to be sure
560                  * that when we update the credits the data in memory is
561                  * accurate.
562                  */
563                 rte_wmb();
564                 rte_write32(desc_refilled, droq->pkts_credit_reg);
565                 /* make sure mmio write completes */
566                 rte_wmb();
567         }
568
569         info->length = 0;
570         info->rh.rh64 = 0;
571
572         droq->stats.pkts_received++;
573         droq->stats.rx_pkts_received += data_pkts;
574         droq->stats.rx_bytes_received += data_total_len;
575         droq->stats.bytes_received += total_len;
576
577         return data_pkts;
578 }
579
580 static uint32_t
581 lio_droq_fast_process_packets(struct lio_device *lio_dev,
582                               struct lio_droq *droq,
583                               struct rte_mbuf **rx_pkts,
584                               uint32_t pkts_to_process)
585 {
586         int ret, data_pkts = 0;
587         uint32_t pkt;
588
589         for (pkt = 0; pkt < pkts_to_process; pkt++) {
590                 ret = lio_droq_fast_process_packet(lio_dev, droq,
591                                                    &rx_pkts[data_pkts]);
592                 if (ret < 0) {
593                         lio_dev_err(lio_dev, "Port[%d] DROQ[%d] idx: %d len:0, pkt_cnt: %d\n",
594                                     lio_dev->port_id, droq->q_no,
595                                     droq->read_idx, pkts_to_process);
596                         break;
597                 }
598                 data_pkts += ret;
599         }
600
601         rte_atomic64_sub(&droq->pkts_pending, pkt);
602
603         return data_pkts;
604 }
605
606 static inline uint32_t
607 lio_droq_check_hw_for_pkts(struct lio_droq *droq)
608 {
609         uint32_t last_count;
610         uint32_t pkt_count;
611
612         pkt_count = rte_read32(droq->pkts_sent_reg);
613
614         last_count = pkt_count - droq->pkt_count;
615         droq->pkt_count = pkt_count;
616
617         if (last_count)
618                 rte_atomic64_add(&droq->pkts_pending, last_count);
619
620         return last_count;
621 }
622
623 uint16_t
624 lio_dev_recv_pkts(void *rx_queue,
625                   struct rte_mbuf **rx_pkts,
626                   uint16_t budget)
627 {
628         struct lio_droq *droq = rx_queue;
629         struct lio_device *lio_dev = droq->lio_dev;
630         uint32_t pkts_processed = 0;
631         uint32_t pkt_count = 0;
632
633         lio_droq_check_hw_for_pkts(droq);
634
635         pkt_count = rte_atomic64_read(&droq->pkts_pending);
636         if (!pkt_count)
637                 return 0;
638
639         if (pkt_count > budget)
640                 pkt_count = budget;
641
642         /* Grab the lock */
643         rte_spinlock_lock(&droq->lock);
644         pkts_processed = lio_droq_fast_process_packets(lio_dev,
645                                                        droq, rx_pkts,
646                                                        pkt_count);
647
648         if (droq->pkt_count) {
649                 rte_write32(droq->pkt_count, droq->pkts_sent_reg);
650                 droq->pkt_count = 0;
651         }
652
653         /* Release the spin lock */
654         rte_spinlock_unlock(&droq->lock);
655
656         return pkts_processed;
657 }
658
659 void
660 lio_delete_droq_queue(struct lio_device *lio_dev,
661                       int oq_no)
662 {
663         lio_delete_droq(lio_dev, oq_no);
664         lio_dev->num_oqs--;
665         rte_free(lio_dev->droq[oq_no]);
666         lio_dev->droq[oq_no] = NULL;
667 }
668
669 /**
670  *  lio_init_instr_queue()
671  *  @param lio_dev      - pointer to the lio device structure.
672  *  @param txpciq       - queue to be initialized.
673  *
674  *  Called at driver init time for each input queue. iq_conf has the
675  *  configuration parameters for the queue.
676  *
677  *  @return  Success: 0 Failure: -1
678  */
679 static int
680 lio_init_instr_queue(struct lio_device *lio_dev,
681                      union octeon_txpciq txpciq,
682                      uint32_t num_descs, unsigned int socket_id)
683 {
684         uint32_t iq_no = (uint32_t)txpciq.s.q_no;
685         struct lio_instr_queue *iq;
686         uint32_t instr_type;
687         uint32_t q_size;
688
689         instr_type = LIO_IQ_INSTR_TYPE(lio_dev);
690
691         q_size = instr_type * num_descs;
692         iq = lio_dev->instr_queue[iq_no];
693         iq->iq_mz = rte_eth_dma_zone_reserve(lio_dev->eth_dev,
694                                              "instr_queue", iq_no, q_size,
695                                              RTE_CACHE_LINE_SIZE,
696                                              socket_id);
697         if (iq->iq_mz == NULL) {
698                 lio_dev_err(lio_dev, "Cannot allocate memory for instr queue %d\n",
699                             iq_no);
700                 return -1;
701         }
702
703         iq->base_addr_dma = iq->iq_mz->iova;
704         iq->base_addr = (uint8_t *)iq->iq_mz->addr;
705
706         iq->nb_desc = num_descs;
707
708         /* Initialize a list to holds requests that have been posted to Octeon
709          * but has yet to be fetched by octeon
710          */
711         iq->request_list = rte_zmalloc_socket("request_list",
712                                               sizeof(*iq->request_list) *
713                                                         num_descs,
714                                               RTE_CACHE_LINE_SIZE,
715                                               socket_id);
716         if (iq->request_list == NULL) {
717                 lio_dev_err(lio_dev, "Alloc failed for IQ[%d] nr free list\n",
718                             iq_no);
719                 lio_dma_zone_free(lio_dev, iq->iq_mz);
720                 return -1;
721         }
722
723         lio_dev_dbg(lio_dev, "IQ[%d]: base: %p basedma: %lx count: %d\n",
724                     iq_no, iq->base_addr, (unsigned long)iq->base_addr_dma,
725                     iq->nb_desc);
726
727         iq->lio_dev = lio_dev;
728         iq->txpciq.txpciq64 = txpciq.txpciq64;
729         iq->fill_cnt = 0;
730         iq->host_write_index = 0;
731         iq->lio_read_index = 0;
732         iq->flush_index = 0;
733
734         rte_atomic64_set(&iq->instr_pending, 0);
735
736         /* Initialize the spinlock for this instruction queue */
737         rte_spinlock_init(&iq->lock);
738         rte_spinlock_init(&iq->post_lock);
739
740         rte_atomic64_clear(&iq->iq_flush_running);
741
742         lio_dev->io_qmask.iq |= (1ULL << iq_no);
743
744         /* Set the 32B/64B mode for each input queue */
745         lio_dev->io_qmask.iq64B |= ((instr_type == 64) << iq_no);
746         iq->iqcmd_64B = (instr_type == 64);
747
748         lio_dev->fn_list.setup_iq_regs(lio_dev, iq_no);
749
750         return 0;
751 }
752
753 int
754 lio_setup_instr_queue0(struct lio_device *lio_dev)
755 {
756         union octeon_txpciq txpciq;
757         uint32_t num_descs = 0;
758         uint32_t iq_no = 0;
759
760         num_descs = LIO_NUM_DEF_TX_DESCS_CFG(lio_dev);
761
762         lio_dev->num_iqs = 0;
763
764         lio_dev->instr_queue[0] = rte_zmalloc(NULL,
765                                         sizeof(struct lio_instr_queue), 0);
766         if (lio_dev->instr_queue[0] == NULL)
767                 return -ENOMEM;
768
769         lio_dev->instr_queue[0]->q_index = 0;
770         lio_dev->instr_queue[0]->app_ctx = (void *)(size_t)0;
771         txpciq.txpciq64 = 0;
772         txpciq.s.q_no = iq_no;
773         txpciq.s.pkind = lio_dev->pfvf_hsword.pkind;
774         txpciq.s.use_qpg = 0;
775         txpciq.s.qpg = 0;
776         if (lio_init_instr_queue(lio_dev, txpciq, num_descs, SOCKET_ID_ANY)) {
777                 rte_free(lio_dev->instr_queue[0]);
778                 lio_dev->instr_queue[0] = NULL;
779                 return -1;
780         }
781
782         lio_dev->num_iqs++;
783
784         return 0;
785 }
786
787 /**
788  *  lio_delete_instr_queue()
789  *  @param lio_dev      - pointer to the lio device structure.
790  *  @param iq_no        - queue to be deleted.
791  *
792  *  Called at driver unload time for each input queue. Deletes all
793  *  allocated resources for the input queue.
794  */
795 static void
796 lio_delete_instr_queue(struct lio_device *lio_dev, uint32_t iq_no)
797 {
798         struct lio_instr_queue *iq = lio_dev->instr_queue[iq_no];
799
800         rte_free(iq->request_list);
801         iq->request_list = NULL;
802         lio_dma_zone_free(lio_dev, iq->iq_mz);
803 }
804
805 void
806 lio_free_instr_queue0(struct lio_device *lio_dev)
807 {
808         lio_delete_instr_queue(lio_dev, 0);
809         rte_free(lio_dev->instr_queue[0]);
810         lio_dev->instr_queue[0] = NULL;
811         lio_dev->num_iqs--;
812 }
813
814 /* Return 0 on success, -1 on failure */
815 int
816 lio_setup_iq(struct lio_device *lio_dev, int q_index,
817              union octeon_txpciq txpciq, uint32_t num_descs, void *app_ctx,
818              unsigned int socket_id)
819 {
820         uint32_t iq_no = (uint32_t)txpciq.s.q_no;
821
822         lio_dev->instr_queue[iq_no] = rte_zmalloc_socket("ethdev TX queue",
823                                                 sizeof(struct lio_instr_queue),
824                                                 RTE_CACHE_LINE_SIZE, socket_id);
825         if (lio_dev->instr_queue[iq_no] == NULL)
826                 return -1;
827
828         lio_dev->instr_queue[iq_no]->q_index = q_index;
829         lio_dev->instr_queue[iq_no]->app_ctx = app_ctx;
830
831         if (lio_init_instr_queue(lio_dev, txpciq, num_descs, socket_id)) {
832                 rte_free(lio_dev->instr_queue[iq_no]);
833                 lio_dev->instr_queue[iq_no] = NULL;
834                 return -1;
835         }
836
837         lio_dev->num_iqs++;
838
839         return 0;
840 }
841
842 int
843 lio_wait_for_instr_fetch(struct lio_device *lio_dev)
844 {
845         int pending, instr_cnt;
846         int i, retry = 1000;
847
848         do {
849                 instr_cnt = 0;
850
851                 for (i = 0; i < LIO_MAX_INSTR_QUEUES(lio_dev); i++) {
852                         if (!(lio_dev->io_qmask.iq & (1ULL << i)))
853                                 continue;
854
855                         if (lio_dev->instr_queue[i] == NULL)
856                                 break;
857
858                         pending = rte_atomic64_read(
859                             &lio_dev->instr_queue[i]->instr_pending);
860                         if (pending)
861                                 lio_flush_iq(lio_dev, lio_dev->instr_queue[i]);
862
863                         instr_cnt += pending;
864                 }
865
866                 if (instr_cnt == 0)
867                         break;
868
869                 rte_delay_ms(1);
870
871         } while (retry-- && instr_cnt);
872
873         return instr_cnt;
874 }
875
876 static inline void
877 lio_ring_doorbell(struct lio_device *lio_dev,
878                   struct lio_instr_queue *iq)
879 {
880         if (rte_atomic64_read(&lio_dev->status) == LIO_DEV_RUNNING) {
881                 rte_write32(iq->fill_cnt, iq->doorbell_reg);
882                 /* make sure doorbell write goes through */
883                 rte_wmb();
884                 iq->fill_cnt = 0;
885         }
886 }
887
888 static inline void
889 copy_cmd_into_iq(struct lio_instr_queue *iq, uint8_t *cmd)
890 {
891         uint8_t *iqptr, cmdsize;
892
893         cmdsize = ((iq->iqcmd_64B) ? 64 : 32);
894         iqptr = iq->base_addr + (cmdsize * iq->host_write_index);
895
896         rte_memcpy(iqptr, cmd, cmdsize);
897 }
898
899 static inline struct lio_iq_post_status
900 post_command2(struct lio_instr_queue *iq, uint8_t *cmd)
901 {
902         struct lio_iq_post_status st;
903
904         st.status = LIO_IQ_SEND_OK;
905
906         /* This ensures that the read index does not wrap around to the same
907          * position if queue gets full before Octeon could fetch any instr.
908          */
909         if (rte_atomic64_read(&iq->instr_pending) >=
910                         (int32_t)(iq->nb_desc - 1)) {
911                 st.status = LIO_IQ_SEND_FAILED;
912                 st.index = -1;
913                 return st;
914         }
915
916         if (rte_atomic64_read(&iq->instr_pending) >=
917                         (int32_t)(iq->nb_desc - 2))
918                 st.status = LIO_IQ_SEND_STOP;
919
920         copy_cmd_into_iq(iq, cmd);
921
922         /* "index" is returned, host_write_index is modified. */
923         st.index = iq->host_write_index;
924         iq->host_write_index = lio_incr_index(iq->host_write_index, 1,
925                                               iq->nb_desc);
926         iq->fill_cnt++;
927
928         /* Flush the command into memory. We need to be sure the data is in
929          * memory before indicating that the instruction is pending.
930          */
931         rte_wmb();
932
933         rte_atomic64_inc(&iq->instr_pending);
934
935         return st;
936 }
937
938 static inline void
939 lio_add_to_request_list(struct lio_instr_queue *iq,
940                         int idx, void *buf, int reqtype)
941 {
942         iq->request_list[idx].buf = buf;
943         iq->request_list[idx].reqtype = reqtype;
944 }
945
946 static inline void
947 lio_free_netsgbuf(void *buf)
948 {
949         struct lio_buf_free_info *finfo = buf;
950         struct lio_device *lio_dev = finfo->lio_dev;
951         struct rte_mbuf *m = finfo->mbuf;
952         struct lio_gather *g = finfo->g;
953         uint8_t iq = finfo->iq_no;
954
955         /* This will take care of multiple segments also */
956         rte_pktmbuf_free(m);
957
958         rte_spinlock_lock(&lio_dev->glist_lock[iq]);
959         STAILQ_INSERT_TAIL(&lio_dev->glist_head[iq], &g->list, entries);
960         rte_spinlock_unlock(&lio_dev->glist_lock[iq]);
961         rte_free(finfo);
962 }
963
964 /* Can only run in process context */
965 static int
966 lio_process_iq_request_list(struct lio_device *lio_dev,
967                             struct lio_instr_queue *iq)
968 {
969         struct octeon_instr_irh *irh = NULL;
970         uint32_t old = iq->flush_index;
971         struct lio_soft_command *sc;
972         uint32_t inst_count = 0;
973         int reqtype;
974         void *buf;
975
976         while (old != iq->lio_read_index) {
977                 reqtype = iq->request_list[old].reqtype;
978                 buf     = iq->request_list[old].buf;
979
980                 if (reqtype == LIO_REQTYPE_NONE)
981                         goto skip_this;
982
983                 switch (reqtype) {
984                 case LIO_REQTYPE_NORESP_NET:
985                         rte_pktmbuf_free((struct rte_mbuf *)buf);
986                         break;
987                 case LIO_REQTYPE_NORESP_NET_SG:
988                         lio_free_netsgbuf(buf);
989                         break;
990                 case LIO_REQTYPE_SOFT_COMMAND:
991                         sc = buf;
992                         irh = (struct octeon_instr_irh *)&sc->cmd.cmd3.irh;
993                         if (irh->rflag) {
994                                 /* We're expecting a response from Octeon.
995                                  * It's up to lio_process_ordered_list() to
996                                  * process sc. Add sc to the ordered soft
997                                  * command response list because we expect
998                                  * a response from Octeon.
999                                  */
1000                                 rte_spinlock_lock(&lio_dev->response_list.lock);
1001                                 rte_atomic64_inc(
1002                                     &lio_dev->response_list.pending_req_count);
1003                                 STAILQ_INSERT_TAIL(
1004                                         &lio_dev->response_list.head,
1005                                         &sc->node, entries);
1006                                 rte_spinlock_unlock(
1007                                                 &lio_dev->response_list.lock);
1008                         } else {
1009                                 if (sc->callback) {
1010                                         /* This callback must not sleep */
1011                                         sc->callback(LIO_REQUEST_DONE,
1012                                                      sc->callback_arg);
1013                                 }
1014                         }
1015                         break;
1016                 default:
1017                         lio_dev_err(lio_dev,
1018                                     "Unknown reqtype: %d buf: %p at idx %d\n",
1019                                     reqtype, buf, old);
1020                 }
1021
1022                 iq->request_list[old].buf = NULL;
1023                 iq->request_list[old].reqtype = 0;
1024
1025 skip_this:
1026                 inst_count++;
1027                 old = lio_incr_index(old, 1, iq->nb_desc);
1028         }
1029
1030         iq->flush_index = old;
1031
1032         return inst_count;
1033 }
1034
1035 static void
1036 lio_update_read_index(struct lio_instr_queue *iq)
1037 {
1038         uint32_t pkt_in_done = rte_read32(iq->inst_cnt_reg);
1039         uint32_t last_done;
1040
1041         last_done = pkt_in_done - iq->pkt_in_done;
1042         iq->pkt_in_done = pkt_in_done;
1043
1044         /* Add last_done and modulo with the IQ size to get new index */
1045         iq->lio_read_index = (iq->lio_read_index +
1046                         (uint32_t)(last_done & LIO_PKT_IN_DONE_CNT_MASK)) %
1047                         iq->nb_desc;
1048 }
1049
1050 int
1051 lio_flush_iq(struct lio_device *lio_dev, struct lio_instr_queue *iq)
1052 {
1053         uint32_t tot_inst_processed = 0;
1054         uint32_t inst_processed = 0;
1055         int tx_done = 1;
1056
1057         if (rte_atomic64_test_and_set(&iq->iq_flush_running) == 0)
1058                 return tx_done;
1059
1060         rte_spinlock_lock(&iq->lock);
1061
1062         lio_update_read_index(iq);
1063
1064         do {
1065                 /* Process any outstanding IQ packets. */
1066                 if (iq->flush_index == iq->lio_read_index)
1067                         break;
1068
1069                 inst_processed = lio_process_iq_request_list(lio_dev, iq);
1070
1071                 if (inst_processed) {
1072                         rte_atomic64_sub(&iq->instr_pending, inst_processed);
1073                         iq->stats.instr_processed += inst_processed;
1074                 }
1075
1076                 tot_inst_processed += inst_processed;
1077                 inst_processed = 0;
1078
1079         } while (1);
1080
1081         rte_spinlock_unlock(&iq->lock);
1082
1083         rte_atomic64_clear(&iq->iq_flush_running);
1084
1085         return tx_done;
1086 }
1087
1088 static int
1089 lio_send_command(struct lio_device *lio_dev, uint32_t iq_no, void *cmd,
1090                  void *buf, uint32_t datasize, uint32_t reqtype)
1091 {
1092         struct lio_instr_queue *iq = lio_dev->instr_queue[iq_no];
1093         struct lio_iq_post_status st;
1094
1095         rte_spinlock_lock(&iq->post_lock);
1096
1097         st = post_command2(iq, cmd);
1098
1099         if (st.status != LIO_IQ_SEND_FAILED) {
1100                 lio_add_to_request_list(iq, st.index, buf, reqtype);
1101                 LIO_INCR_INSTRQUEUE_PKT_COUNT(lio_dev, iq_no, bytes_sent,
1102                                               datasize);
1103                 LIO_INCR_INSTRQUEUE_PKT_COUNT(lio_dev, iq_no, instr_posted, 1);
1104
1105                 lio_ring_doorbell(lio_dev, iq);
1106         } else {
1107                 LIO_INCR_INSTRQUEUE_PKT_COUNT(lio_dev, iq_no, instr_dropped, 1);
1108         }
1109
1110         rte_spinlock_unlock(&iq->post_lock);
1111
1112         return st.status;
1113 }
1114
1115 void
1116 lio_prepare_soft_command(struct lio_device *lio_dev,
1117                          struct lio_soft_command *sc, uint8_t opcode,
1118                          uint8_t subcode, uint32_t irh_ossp, uint64_t ossp0,
1119                          uint64_t ossp1)
1120 {
1121         struct octeon_instr_pki_ih3 *pki_ih3;
1122         struct octeon_instr_ih3 *ih3;
1123         struct octeon_instr_irh *irh;
1124         struct octeon_instr_rdp *rdp;
1125
1126         RTE_ASSERT(opcode <= 15);
1127         RTE_ASSERT(subcode <= 127);
1128
1129         ih3       = (struct octeon_instr_ih3 *)&sc->cmd.cmd3.ih3;
1130
1131         ih3->pkind = lio_dev->instr_queue[sc->iq_no]->txpciq.s.pkind;
1132
1133         pki_ih3 = (struct octeon_instr_pki_ih3 *)&sc->cmd.cmd3.pki_ih3;
1134
1135         pki_ih3->w      = 1;
1136         pki_ih3->raw    = 1;
1137         pki_ih3->utag   = 1;
1138         pki_ih3->uqpg   = lio_dev->instr_queue[sc->iq_no]->txpciq.s.use_qpg;
1139         pki_ih3->utt    = 1;
1140
1141         pki_ih3->tag    = LIO_CONTROL;
1142         pki_ih3->tagtype = OCTEON_ATOMIC_TAG;
1143         pki_ih3->qpg    = lio_dev->instr_queue[sc->iq_no]->txpciq.s.qpg;
1144         pki_ih3->pm     = 0x7;
1145         pki_ih3->sl     = 8;
1146
1147         if (sc->datasize)
1148                 ih3->dlengsz = sc->datasize;
1149
1150         irh             = (struct octeon_instr_irh *)&sc->cmd.cmd3.irh;
1151         irh->opcode     = opcode;
1152         irh->subcode    = subcode;
1153
1154         /* opcode/subcode specific parameters (ossp) */
1155         irh->ossp = irh_ossp;
1156         sc->cmd.cmd3.ossp[0] = ossp0;
1157         sc->cmd.cmd3.ossp[1] = ossp1;
1158
1159         if (sc->rdatasize) {
1160                 rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd3.rdp;
1161                 rdp->pcie_port = lio_dev->pcie_port;
1162                 rdp->rlen      = sc->rdatasize;
1163                 irh->rflag = 1;
1164                 /* PKI IH3 */
1165                 ih3->fsz    = OCTEON_SOFT_CMD_RESP_IH3;
1166         } else {
1167                 irh->rflag = 0;
1168                 /* PKI IH3 */
1169                 ih3->fsz    = OCTEON_PCI_CMD_O3;
1170         }
1171 }
1172
1173 int
1174 lio_send_soft_command(struct lio_device *lio_dev,
1175                       struct lio_soft_command *sc)
1176 {
1177         struct octeon_instr_ih3 *ih3;
1178         struct octeon_instr_irh *irh;
1179         uint32_t len = 0;
1180
1181         ih3 = (struct octeon_instr_ih3 *)&sc->cmd.cmd3.ih3;
1182         if (ih3->dlengsz) {
1183                 RTE_ASSERT(sc->dmadptr);
1184                 sc->cmd.cmd3.dptr = sc->dmadptr;
1185         }
1186
1187         irh = (struct octeon_instr_irh *)&sc->cmd.cmd3.irh;
1188         if (irh->rflag) {
1189                 RTE_ASSERT(sc->dmarptr);
1190                 RTE_ASSERT(sc->status_word != NULL);
1191                 *sc->status_word = LIO_COMPLETION_WORD_INIT;
1192                 sc->cmd.cmd3.rptr = sc->dmarptr;
1193         }
1194
1195         len = (uint32_t)ih3->dlengsz;
1196
1197         if (sc->wait_time)
1198                 sc->timeout = lio_uptime + sc->wait_time;
1199
1200         return lio_send_command(lio_dev, sc->iq_no, &sc->cmd, sc, len,
1201                                 LIO_REQTYPE_SOFT_COMMAND);
1202 }
1203
1204 int
1205 lio_setup_sc_buffer_pool(struct lio_device *lio_dev)
1206 {
1207         char sc_pool_name[RTE_MEMPOOL_NAMESIZE];
1208         uint16_t buf_size;
1209
1210         buf_size = LIO_SOFT_COMMAND_BUFFER_SIZE + RTE_PKTMBUF_HEADROOM;
1211         snprintf(sc_pool_name, sizeof(sc_pool_name),
1212                  "lio_sc_pool_%u", lio_dev->port_id);
1213         lio_dev->sc_buf_pool = rte_pktmbuf_pool_create(sc_pool_name,
1214                                                 LIO_MAX_SOFT_COMMAND_BUFFERS,
1215                                                 0, 0, buf_size, SOCKET_ID_ANY);
1216         return 0;
1217 }
1218
1219 void
1220 lio_free_sc_buffer_pool(struct lio_device *lio_dev)
1221 {
1222         rte_mempool_free(lio_dev->sc_buf_pool);
1223 }
1224
1225 struct lio_soft_command *
1226 lio_alloc_soft_command(struct lio_device *lio_dev, uint32_t datasize,
1227                        uint32_t rdatasize, uint32_t ctxsize)
1228 {
1229         uint32_t offset = sizeof(struct lio_soft_command);
1230         struct lio_soft_command *sc;
1231         struct rte_mbuf *m;
1232         uint64_t dma_addr;
1233
1234         RTE_ASSERT((offset + datasize + rdatasize + ctxsize) <=
1235                    LIO_SOFT_COMMAND_BUFFER_SIZE);
1236
1237         m = rte_pktmbuf_alloc(lio_dev->sc_buf_pool);
1238         if (m == NULL) {
1239                 lio_dev_err(lio_dev, "Cannot allocate mbuf for sc\n");
1240                 return NULL;
1241         }
1242
1243         /* set rte_mbuf data size and there is only 1 segment */
1244         m->pkt_len = LIO_SOFT_COMMAND_BUFFER_SIZE;
1245         m->data_len = LIO_SOFT_COMMAND_BUFFER_SIZE;
1246
1247         /* use rte_mbuf buffer for soft command */
1248         sc = rte_pktmbuf_mtod(m, struct lio_soft_command *);
1249         memset(sc, 0, LIO_SOFT_COMMAND_BUFFER_SIZE);
1250         sc->size = LIO_SOFT_COMMAND_BUFFER_SIZE;
1251         sc->dma_addr = rte_mbuf_data_iova(m);
1252         sc->mbuf = m;
1253
1254         dma_addr = sc->dma_addr;
1255
1256         if (ctxsize) {
1257                 sc->ctxptr = (uint8_t *)sc + offset;
1258                 sc->ctxsize = ctxsize;
1259         }
1260
1261         /* Start data at 128 byte boundary */
1262         offset = (offset + ctxsize + 127) & 0xffffff80;
1263
1264         if (datasize) {
1265                 sc->virtdptr = (uint8_t *)sc + offset;
1266                 sc->dmadptr = dma_addr + offset;
1267                 sc->datasize = datasize;
1268         }
1269
1270         /* Start rdata at 128 byte boundary */
1271         offset = (offset + datasize + 127) & 0xffffff80;
1272
1273         if (rdatasize) {
1274                 RTE_ASSERT(rdatasize >= 16);
1275                 sc->virtrptr = (uint8_t *)sc + offset;
1276                 sc->dmarptr = dma_addr + offset;
1277                 sc->rdatasize = rdatasize;
1278                 sc->status_word = (uint64_t *)((uint8_t *)(sc->virtrptr) +
1279                                                rdatasize - 8);
1280         }
1281
1282         return sc;
1283 }
1284
1285 void
1286 lio_free_soft_command(struct lio_soft_command *sc)
1287 {
1288         rte_pktmbuf_free(sc->mbuf);
1289 }
1290
1291 void
1292 lio_setup_response_list(struct lio_device *lio_dev)
1293 {
1294         STAILQ_INIT(&lio_dev->response_list.head);
1295         rte_spinlock_init(&lio_dev->response_list.lock);
1296         rte_atomic64_set(&lio_dev->response_list.pending_req_count, 0);
1297 }
1298
1299 int
1300 lio_process_ordered_list(struct lio_device *lio_dev)
1301 {
1302         int resp_to_process = LIO_MAX_ORD_REQS_TO_PROCESS;
1303         struct lio_response_list *ordered_sc_list;
1304         struct lio_soft_command *sc;
1305         int request_complete = 0;
1306         uint64_t status64;
1307         uint32_t status;
1308
1309         ordered_sc_list = &lio_dev->response_list;
1310
1311         do {
1312                 rte_spinlock_lock(&ordered_sc_list->lock);
1313
1314                 if (STAILQ_EMPTY(&ordered_sc_list->head)) {
1315                         /* ordered_sc_list is empty; there is
1316                          * nothing to process
1317                          */
1318                         rte_spinlock_unlock(&ordered_sc_list->lock);
1319                         return -1;
1320                 }
1321
1322                 sc = LIO_STQUEUE_FIRST_ENTRY(&ordered_sc_list->head,
1323                                              struct lio_soft_command, node);
1324
1325                 status = LIO_REQUEST_PENDING;
1326
1327                 /* check if octeon has finished DMA'ing a response
1328                  * to where rptr is pointing to
1329                  */
1330                 status64 = *sc->status_word;
1331
1332                 if (status64 != LIO_COMPLETION_WORD_INIT) {
1333                         /* This logic ensures that all 64b have been written.
1334                          * 1. check byte 0 for non-FF
1335                          * 2. if non-FF, then swap result from BE to host order
1336                          * 3. check byte 7 (swapped to 0) for non-FF
1337                          * 4. if non-FF, use the low 32-bit status code
1338                          * 5. if either byte 0 or byte 7 is FF, don't use status
1339                          */
1340                         if ((status64 & 0xff) != 0xff) {
1341                                 lio_swap_8B_data(&status64, 1);
1342                                 if (((status64 & 0xff) != 0xff)) {
1343                                         /* retrieve 16-bit firmware status */
1344                                         status = (uint32_t)(status64 &
1345                                                             0xffffULL);
1346                                         if (status) {
1347                                                 status =
1348                                                 LIO_FIRMWARE_STATUS_CODE(
1349                                                                         status);
1350                                         } else {
1351                                                 /* i.e. no error */
1352                                                 status = LIO_REQUEST_DONE;
1353                                         }
1354                                 }
1355                         }
1356                 } else if ((sc->timeout && lio_check_timeout(lio_uptime,
1357                                                              sc->timeout))) {
1358                         lio_dev_err(lio_dev,
1359                                     "cmd failed, timeout (%ld, %ld)\n",
1360                                     (long)lio_uptime, (long)sc->timeout);
1361                         status = LIO_REQUEST_TIMEOUT;
1362                 }
1363
1364                 if (status != LIO_REQUEST_PENDING) {
1365                         /* we have received a response or we have timed out.
1366                          * remove node from linked list
1367                          */
1368                         STAILQ_REMOVE(&ordered_sc_list->head,
1369                                       &sc->node, lio_stailq_node, entries);
1370                         rte_atomic64_dec(
1371                             &lio_dev->response_list.pending_req_count);
1372                         rte_spinlock_unlock(&ordered_sc_list->lock);
1373
1374                         if (sc->callback)
1375                                 sc->callback(status, sc->callback_arg);
1376
1377                         request_complete++;
1378                 } else {
1379                         /* no response yet */
1380                         request_complete = 0;
1381                         rte_spinlock_unlock(&ordered_sc_list->lock);
1382                 }
1383
1384                 /* If we hit the Max Ordered requests to process every loop,
1385                  * we quit and let this function be invoked the next time
1386                  * the poll thread runs to process the remaining requests.
1387                  * This function can take up the entire CPU if there is
1388                  * no upper limit to the requests processed.
1389                  */
1390                 if (request_complete >= resp_to_process)
1391                         break;
1392         } while (request_complete);
1393
1394         return 0;
1395 }
1396
1397 static inline struct lio_stailq_node *
1398 list_delete_first_node(struct lio_stailq_head *head)
1399 {
1400         struct lio_stailq_node *node;
1401
1402         if (STAILQ_EMPTY(head))
1403                 node = NULL;
1404         else
1405                 node = STAILQ_FIRST(head);
1406
1407         if (node)
1408                 STAILQ_REMOVE(head, node, lio_stailq_node, entries);
1409
1410         return node;
1411 }
1412
1413 void
1414 lio_delete_sglist(struct lio_instr_queue *txq)
1415 {
1416         struct lio_device *lio_dev = txq->lio_dev;
1417         int iq_no = txq->q_index;
1418         struct lio_gather *g;
1419
1420         if (lio_dev->glist_head == NULL)
1421                 return;
1422
1423         do {
1424                 g = (struct lio_gather *)list_delete_first_node(
1425                                                 &lio_dev->glist_head[iq_no]);
1426                 if (g) {
1427                         if (g->sg)
1428                                 rte_free(
1429                                     (void *)((unsigned long)g->sg - g->adjust));
1430                         rte_free(g);
1431                 }
1432         } while (g);
1433 }
1434
1435 /**
1436  * \brief Setup gather lists
1437  * @param lio per-network private data
1438  */
1439 int
1440 lio_setup_sglists(struct lio_device *lio_dev, int iq_no,
1441                   int fw_mapped_iq, int num_descs, unsigned int socket_id)
1442 {
1443         struct lio_gather *g;
1444         int i;
1445
1446         rte_spinlock_init(&lio_dev->glist_lock[iq_no]);
1447
1448         STAILQ_INIT(&lio_dev->glist_head[iq_no]);
1449
1450         for (i = 0; i < num_descs; i++) {
1451                 g = rte_zmalloc_socket(NULL, sizeof(*g), RTE_CACHE_LINE_SIZE,
1452                                        socket_id);
1453                 if (g == NULL) {
1454                         lio_dev_err(lio_dev,
1455                                     "lio_gather memory allocation failed for qno %d\n",
1456                                     iq_no);
1457                         break;
1458                 }
1459
1460                 g->sg_size =
1461                     ((ROUNDUP4(LIO_MAX_SG) >> 2) * LIO_SG_ENTRY_SIZE);
1462
1463                 g->sg = rte_zmalloc_socket(NULL, g->sg_size + 8,
1464                                            RTE_CACHE_LINE_SIZE, socket_id);
1465                 if (g->sg == NULL) {
1466                         lio_dev_err(lio_dev,
1467                                     "sg list memory allocation failed for qno %d\n",
1468                                     iq_no);
1469                         rte_free(g);
1470                         break;
1471                 }
1472
1473                 /* The gather component should be aligned on 64-bit boundary */
1474                 if (((unsigned long)g->sg) & 7) {
1475                         g->adjust = 8 - (((unsigned long)g->sg) & 7);
1476                         g->sg =
1477                             (struct lio_sg_entry *)((unsigned long)g->sg +
1478                                                        g->adjust);
1479                 }
1480
1481                 STAILQ_INSERT_TAIL(&lio_dev->glist_head[iq_no], &g->list,
1482                                    entries);
1483         }
1484
1485         if (i != num_descs) {
1486                 lio_delete_sglist(lio_dev->instr_queue[fw_mapped_iq]);
1487                 return -ENOMEM;
1488         }
1489
1490         return 0;
1491 }
1492
1493 void
1494 lio_delete_instruction_queue(struct lio_device *lio_dev, int iq_no)
1495 {
1496         lio_delete_instr_queue(lio_dev, iq_no);
1497         rte_free(lio_dev->instr_queue[iq_no]);
1498         lio_dev->instr_queue[iq_no] = NULL;
1499         lio_dev->num_iqs--;
1500 }
1501
1502 static inline uint32_t
1503 lio_iq_get_available(struct lio_device *lio_dev, uint32_t q_no)
1504 {
1505         return ((lio_dev->instr_queue[q_no]->nb_desc - 1) -
1506                 (uint32_t)rte_atomic64_read(
1507                                 &lio_dev->instr_queue[q_no]->instr_pending));
1508 }
1509
1510 static inline int
1511 lio_iq_is_full(struct lio_device *lio_dev, uint32_t q_no)
1512 {
1513         return ((uint32_t)rte_atomic64_read(
1514                                 &lio_dev->instr_queue[q_no]->instr_pending) >=
1515                                 (lio_dev->instr_queue[q_no]->nb_desc - 2));
1516 }
1517
1518 static int
1519 lio_dev_cleanup_iq(struct lio_device *lio_dev, int iq_no)
1520 {
1521         struct lio_instr_queue *iq = lio_dev->instr_queue[iq_no];
1522         uint32_t count = 10000;
1523
1524         while ((lio_iq_get_available(lio_dev, iq_no) < LIO_FLUSH_WM(iq)) &&
1525                         --count)
1526                 lio_flush_iq(lio_dev, iq);
1527
1528         return count ? 0 : 1;
1529 }
1530
1531 static void
1532 lio_ctrl_cmd_callback(uint32_t status __rte_unused, void *sc_ptr)
1533 {
1534         struct lio_soft_command *sc = sc_ptr;
1535         struct lio_dev_ctrl_cmd *ctrl_cmd;
1536         struct lio_ctrl_pkt *ctrl_pkt;
1537
1538         ctrl_pkt = (struct lio_ctrl_pkt *)sc->ctxptr;
1539         ctrl_cmd = ctrl_pkt->ctrl_cmd;
1540         ctrl_cmd->cond = 1;
1541
1542         lio_free_soft_command(sc);
1543 }
1544
1545 static inline struct lio_soft_command *
1546 lio_alloc_ctrl_pkt_sc(struct lio_device *lio_dev,
1547                       struct lio_ctrl_pkt *ctrl_pkt)
1548 {
1549         struct lio_soft_command *sc = NULL;
1550         uint32_t uddsize, datasize;
1551         uint32_t rdatasize;
1552         uint8_t *data;
1553
1554         uddsize = (uint32_t)(ctrl_pkt->ncmd.s.more * 8);
1555
1556         datasize = OCTEON_CMD_SIZE + uddsize;
1557         rdatasize = (ctrl_pkt->wait_time) ? 16 : 0;
1558
1559         sc = lio_alloc_soft_command(lio_dev, datasize,
1560                                     rdatasize, sizeof(struct lio_ctrl_pkt));
1561         if (sc == NULL)
1562                 return NULL;
1563
1564         rte_memcpy(sc->ctxptr, ctrl_pkt, sizeof(struct lio_ctrl_pkt));
1565
1566         data = (uint8_t *)sc->virtdptr;
1567
1568         rte_memcpy(data, &ctrl_pkt->ncmd, OCTEON_CMD_SIZE);
1569
1570         lio_swap_8B_data((uint64_t *)data, OCTEON_CMD_SIZE >> 3);
1571
1572         if (uddsize) {
1573                 /* Endian-Swap for UDD should have been done by caller. */
1574                 rte_memcpy(data + OCTEON_CMD_SIZE, ctrl_pkt->udd, uddsize);
1575         }
1576
1577         sc->iq_no = (uint32_t)ctrl_pkt->iq_no;
1578
1579         lio_prepare_soft_command(lio_dev, sc,
1580                                  LIO_OPCODE, LIO_OPCODE_CMD,
1581                                  0, 0, 0);
1582
1583         sc->callback = lio_ctrl_cmd_callback;
1584         sc->callback_arg = sc;
1585         sc->wait_time = ctrl_pkt->wait_time;
1586
1587         return sc;
1588 }
1589
1590 int
1591 lio_send_ctrl_pkt(struct lio_device *lio_dev, struct lio_ctrl_pkt *ctrl_pkt)
1592 {
1593         struct lio_soft_command *sc = NULL;
1594         int retval;
1595
1596         sc = lio_alloc_ctrl_pkt_sc(lio_dev, ctrl_pkt);
1597         if (sc == NULL) {
1598                 lio_dev_err(lio_dev, "soft command allocation failed\n");
1599                 return -1;
1600         }
1601
1602         retval = lio_send_soft_command(lio_dev, sc);
1603         if (retval == LIO_IQ_SEND_FAILED) {
1604                 lio_free_soft_command(sc);
1605                 lio_dev_err(lio_dev, "Port: %d soft command: %d send failed status: %x\n",
1606                             lio_dev->port_id, ctrl_pkt->ncmd.s.cmd, retval);
1607                 return -1;
1608         }
1609
1610         return retval;
1611 }
1612
1613 /** Send data packet to the device
1614  *  @param lio_dev - lio device pointer
1615  *  @param ndata   - control structure with queueing, and buffer information
1616  *
1617  *  @returns IQ_FAILED if it failed to add to the input queue. IQ_STOP if it the
1618  *  queue should be stopped, and LIO_IQ_SEND_OK if it sent okay.
1619  */
1620 static inline int
1621 lio_send_data_pkt(struct lio_device *lio_dev, struct lio_data_pkt *ndata)
1622 {
1623         return lio_send_command(lio_dev, ndata->q_no, &ndata->cmd,
1624                                 ndata->buf, ndata->datasize, ndata->reqtype);
1625 }
1626
1627 uint16_t
1628 lio_dev_xmit_pkts(void *tx_queue, struct rte_mbuf **pkts, uint16_t nb_pkts)
1629 {
1630         struct lio_instr_queue *txq = tx_queue;
1631         union lio_cmd_setup cmdsetup;
1632         struct lio_device *lio_dev;
1633         struct lio_iq_stats *stats;
1634         struct lio_data_pkt ndata;
1635         int i, processed = 0;
1636         struct rte_mbuf *m;
1637         uint32_t tag = 0;
1638         int status = 0;
1639         int iq_no;
1640
1641         lio_dev = txq->lio_dev;
1642         iq_no = txq->txpciq.s.q_no;
1643         stats = &lio_dev->instr_queue[iq_no]->stats;
1644
1645         if (!lio_dev->intf_open || !lio_dev->linfo.link.s.link_up) {
1646                 PMD_TX_LOG(lio_dev, ERR, "Transmit failed link_status : %d\n",
1647                            lio_dev->linfo.link.s.link_up);
1648                 goto xmit_failed;
1649         }
1650
1651         lio_dev_cleanup_iq(lio_dev, iq_no);
1652
1653         for (i = 0; i < nb_pkts; i++) {
1654                 uint32_t pkt_len = 0;
1655
1656                 m = pkts[i];
1657
1658                 /* Prepare the attributes for the data to be passed to BASE. */
1659                 memset(&ndata, 0, sizeof(struct lio_data_pkt));
1660
1661                 ndata.buf = m;
1662
1663                 ndata.q_no = iq_no;
1664                 if (lio_iq_is_full(lio_dev, ndata.q_no)) {
1665                         stats->tx_iq_busy++;
1666                         if (lio_dev_cleanup_iq(lio_dev, iq_no)) {
1667                                 PMD_TX_LOG(lio_dev, ERR,
1668                                            "Transmit failed iq:%d full\n",
1669                                            ndata.q_no);
1670                                 break;
1671                         }
1672                 }
1673
1674                 cmdsetup.cmd_setup64 = 0;
1675                 cmdsetup.s.iq_no = iq_no;
1676
1677                 /* check checksum offload flags to form cmd */
1678                 if (m->ol_flags & PKT_TX_IP_CKSUM)
1679                         cmdsetup.s.ip_csum = 1;
1680
1681                 if (m->ol_flags & PKT_TX_OUTER_IP_CKSUM)
1682                         cmdsetup.s.tnl_csum = 1;
1683                 else if ((m->ol_flags & PKT_TX_TCP_CKSUM) ||
1684                                 (m->ol_flags & PKT_TX_UDP_CKSUM))
1685                         cmdsetup.s.transport_csum = 1;
1686
1687                 if (m->nb_segs == 1) {
1688                         pkt_len = rte_pktmbuf_data_len(m);
1689                         cmdsetup.s.u.datasize = pkt_len;
1690                         lio_prepare_pci_cmd(lio_dev, &ndata.cmd,
1691                                             &cmdsetup, tag);
1692                         ndata.cmd.cmd3.dptr = rte_mbuf_data_iova(m);
1693                         ndata.reqtype = LIO_REQTYPE_NORESP_NET;
1694                 } else {
1695                         struct lio_buf_free_info *finfo;
1696                         struct lio_gather *g;
1697                         rte_iova_t phyaddr;
1698                         int i, frags;
1699
1700                         finfo = (struct lio_buf_free_info *)rte_malloc(NULL,
1701                                                         sizeof(*finfo), 0);
1702                         if (finfo == NULL) {
1703                                 PMD_TX_LOG(lio_dev, ERR,
1704                                            "free buffer alloc failed\n");
1705                                 goto xmit_failed;
1706                         }
1707
1708                         rte_spinlock_lock(&lio_dev->glist_lock[iq_no]);
1709                         g = (struct lio_gather *)list_delete_first_node(
1710                                                 &lio_dev->glist_head[iq_no]);
1711                         rte_spinlock_unlock(&lio_dev->glist_lock[iq_no]);
1712                         if (g == NULL) {
1713                                 PMD_TX_LOG(lio_dev, ERR,
1714                                            "Transmit scatter gather: glist null!\n");
1715                                 goto xmit_failed;
1716                         }
1717
1718                         cmdsetup.s.gather = 1;
1719                         cmdsetup.s.u.gatherptrs = m->nb_segs;
1720                         lio_prepare_pci_cmd(lio_dev, &ndata.cmd,
1721                                             &cmdsetup, tag);
1722
1723                         memset(g->sg, 0, g->sg_size);
1724                         g->sg[0].ptr[0] = rte_mbuf_data_iova(m);
1725                         lio_add_sg_size(&g->sg[0], m->data_len, 0);
1726                         pkt_len = m->data_len;
1727                         finfo->mbuf = m;
1728
1729                         /* First seg taken care above */
1730                         frags = m->nb_segs - 1;
1731                         i = 1;
1732                         m = m->next;
1733                         while (frags--) {
1734                                 g->sg[(i >> 2)].ptr[(i & 3)] =
1735                                                 rte_mbuf_data_iova(m);
1736                                 lio_add_sg_size(&g->sg[(i >> 2)],
1737                                                 m->data_len, (i & 3));
1738                                 pkt_len += m->data_len;
1739                                 i++;
1740                                 m = m->next;
1741                         }
1742
1743                         phyaddr = rte_mem_virt2iova(g->sg);
1744                         if (phyaddr == RTE_BAD_IOVA) {
1745                                 PMD_TX_LOG(lio_dev, ERR, "bad phys addr\n");
1746                                 goto xmit_failed;
1747                         }
1748
1749                         ndata.cmd.cmd3.dptr = phyaddr;
1750                         ndata.reqtype = LIO_REQTYPE_NORESP_NET_SG;
1751
1752                         finfo->g = g;
1753                         finfo->lio_dev = lio_dev;
1754                         finfo->iq_no = (uint64_t)iq_no;
1755                         ndata.buf = finfo;
1756                 }
1757
1758                 ndata.datasize = pkt_len;
1759
1760                 status = lio_send_data_pkt(lio_dev, &ndata);
1761
1762                 if (unlikely(status == LIO_IQ_SEND_FAILED)) {
1763                         PMD_TX_LOG(lio_dev, ERR, "send failed\n");
1764                         break;
1765                 }
1766
1767                 if (unlikely(status == LIO_IQ_SEND_STOP)) {
1768                         PMD_TX_LOG(lio_dev, DEBUG, "iq full\n");
1769                         /* create space as iq is full */
1770                         lio_dev_cleanup_iq(lio_dev, iq_no);
1771                 }
1772
1773                 stats->tx_done++;
1774                 stats->tx_tot_bytes += pkt_len;
1775                 processed++;
1776         }
1777
1778 xmit_failed:
1779         stats->tx_dropped += (nb_pkts - processed);
1780
1781         return processed;
1782 }
1783
1784 void
1785 lio_dev_clear_queues(struct rte_eth_dev *eth_dev)
1786 {
1787         struct lio_instr_queue *txq;
1788         struct lio_droq *rxq;
1789         uint16_t i;
1790
1791         for (i = 0; i < eth_dev->data->nb_tx_queues; i++) {
1792                 txq = eth_dev->data->tx_queues[i];
1793                 if (txq != NULL) {
1794                         lio_dev_tx_queue_release(txq);
1795                         eth_dev->data->tx_queues[i] = NULL;
1796                 }
1797         }
1798
1799         for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
1800                 rxq = eth_dev->data->rx_queues[i];
1801                 if (rxq != NULL) {
1802                         lio_dev_rx_queue_release(rxq);
1803                         eth_dev->data->rx_queues[i] = NULL;
1804                 }
1805         }
1806 }