New upstream version 18.11-rc1
[deb_dpdk.git] / drivers / net / pcap / rte_eth_pcap.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation.
3  * Copyright(c) 2014 6WIND S.A.
4  * All rights reserved.
5  */
6
7 #include <time.h>
8
9 #include <net/if.h>
10 #include <sys/socket.h>
11 #include <sys/ioctl.h>
12 #include <unistd.h>
13
14 #if defined(RTE_EXEC_ENV_BSDAPP)
15 #include <sys/sysctl.h>
16 #include <net/if_dl.h>
17 #endif
18
19 #include <pcap.h>
20
21 #include <rte_cycles.h>
22 #include <rte_ethdev_driver.h>
23 #include <rte_ethdev_vdev.h>
24 #include <rte_kvargs.h>
25 #include <rte_malloc.h>
26 #include <rte_mbuf.h>
27 #include <rte_bus_vdev.h>
28 #include <rte_string_fns.h>
29
30 #define RTE_ETH_PCAP_SNAPSHOT_LEN 65535
31 #define RTE_ETH_PCAP_SNAPLEN ETHER_MAX_JUMBO_FRAME_LEN
32 #define RTE_ETH_PCAP_PROMISC 1
33 #define RTE_ETH_PCAP_TIMEOUT -1
34
35 #define ETH_PCAP_RX_PCAP_ARG  "rx_pcap"
36 #define ETH_PCAP_TX_PCAP_ARG  "tx_pcap"
37 #define ETH_PCAP_RX_IFACE_ARG "rx_iface"
38 #define ETH_PCAP_RX_IFACE_IN_ARG "rx_iface_in"
39 #define ETH_PCAP_TX_IFACE_ARG "tx_iface"
40 #define ETH_PCAP_IFACE_ARG    "iface"
41 #define ETH_PCAP_PHY_MAC_ARG  "phy_mac"
42
43 #define ETH_PCAP_ARG_MAXLEN     64
44
45 #define RTE_PMD_PCAP_MAX_QUEUES 16
46
47 static char errbuf[PCAP_ERRBUF_SIZE];
48 static unsigned char tx_pcap_data[RTE_ETH_PCAP_SNAPLEN];
49 static struct timeval start_time;
50 static uint64_t start_cycles;
51 static uint64_t hz;
52 static uint8_t iface_idx;
53
54 struct queue_stat {
55         volatile unsigned long pkts;
56         volatile unsigned long bytes;
57         volatile unsigned long err_pkts;
58 };
59
60 struct pcap_rx_queue {
61         pcap_t *pcap;
62         uint16_t in_port;
63         struct rte_mempool *mb_pool;
64         struct queue_stat rx_stat;
65         char name[PATH_MAX];
66         char type[ETH_PCAP_ARG_MAXLEN];
67 };
68
69 struct pcap_tx_queue {
70         pcap_dumper_t *dumper;
71         pcap_t *pcap;
72         struct queue_stat tx_stat;
73         char name[PATH_MAX];
74         char type[ETH_PCAP_ARG_MAXLEN];
75 };
76
77 struct pmd_internals {
78         struct pcap_rx_queue rx_queue[RTE_PMD_PCAP_MAX_QUEUES];
79         struct pcap_tx_queue tx_queue[RTE_PMD_PCAP_MAX_QUEUES];
80         struct ether_addr eth_addr;
81         int if_index;
82         int single_iface;
83         int phy_mac;
84 };
85
86 struct pmd_devargs {
87         unsigned int num_of_queue;
88         struct devargs_queue {
89                 pcap_dumper_t *dumper;
90                 pcap_t *pcap;
91                 const char *name;
92                 const char *type;
93         } queue[RTE_PMD_PCAP_MAX_QUEUES];
94         int phy_mac;
95 };
96
97 static const char *valid_arguments[] = {
98         ETH_PCAP_RX_PCAP_ARG,
99         ETH_PCAP_TX_PCAP_ARG,
100         ETH_PCAP_RX_IFACE_ARG,
101         ETH_PCAP_RX_IFACE_IN_ARG,
102         ETH_PCAP_TX_IFACE_ARG,
103         ETH_PCAP_IFACE_ARG,
104         ETH_PCAP_PHY_MAC_ARG,
105         NULL
106 };
107
108 static struct rte_eth_link pmd_link = {
109                 .link_speed = ETH_SPEED_NUM_10G,
110                 .link_duplex = ETH_LINK_FULL_DUPLEX,
111                 .link_status = ETH_LINK_DOWN,
112                 .link_autoneg = ETH_LINK_FIXED,
113 };
114
115 static int eth_pcap_logtype;
116
117 #define PMD_LOG(level, fmt, args...) \
118         rte_log(RTE_LOG_ ## level, eth_pcap_logtype, \
119                 "%s(): " fmt "\n", __func__, ##args)
120
121 static int
122 eth_pcap_rx_jumbo(struct rte_mempool *mb_pool, struct rte_mbuf *mbuf,
123                 const u_char *data, uint16_t data_len)
124 {
125         /* Copy the first segment. */
126         uint16_t len = rte_pktmbuf_tailroom(mbuf);
127         struct rte_mbuf *m = mbuf;
128
129         rte_memcpy(rte_pktmbuf_append(mbuf, len), data, len);
130         data_len -= len;
131         data += len;
132
133         while (data_len > 0) {
134                 /* Allocate next mbuf and point to that. */
135                 m->next = rte_pktmbuf_alloc(mb_pool);
136
137                 if (unlikely(!m->next))
138                         return -1;
139
140                 m = m->next;
141
142                 /* Headroom is not needed in chained mbufs. */
143                 rte_pktmbuf_prepend(m, rte_pktmbuf_headroom(m));
144                 m->pkt_len = 0;
145                 m->data_len = 0;
146
147                 /* Copy next segment. */
148                 len = RTE_MIN(rte_pktmbuf_tailroom(m), data_len);
149                 rte_memcpy(rte_pktmbuf_append(m, len), data, len);
150
151                 mbuf->nb_segs++;
152                 data_len -= len;
153                 data += len;
154         }
155
156         return mbuf->nb_segs;
157 }
158
159 /* Copy data from mbuf chain to a buffer suitable for writing to a PCAP file. */
160 static void
161 eth_pcap_gather_data(unsigned char *data, struct rte_mbuf *mbuf)
162 {
163         uint16_t data_len = 0;
164
165         while (mbuf) {
166                 rte_memcpy(data + data_len, rte_pktmbuf_mtod(mbuf, void *),
167                         mbuf->data_len);
168
169                 data_len += mbuf->data_len;
170                 mbuf = mbuf->next;
171         }
172 }
173
174 static uint16_t
175 eth_pcap_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
176 {
177         unsigned int i;
178         struct pcap_pkthdr header;
179         const u_char *packet;
180         struct rte_mbuf *mbuf;
181         struct pcap_rx_queue *pcap_q = queue;
182         uint16_t num_rx = 0;
183         uint16_t buf_size;
184         uint32_t rx_bytes = 0;
185
186         if (unlikely(pcap_q->pcap == NULL || nb_pkts == 0))
187                 return 0;
188
189         /* Reads the given number of packets from the pcap file one by one
190          * and copies the packet data into a newly allocated mbuf to return.
191          */
192         for (i = 0; i < nb_pkts; i++) {
193                 /* Get the next PCAP packet */
194                 packet = pcap_next(pcap_q->pcap, &header);
195                 if (unlikely(packet == NULL))
196                         break;
197
198                 mbuf = rte_pktmbuf_alloc(pcap_q->mb_pool);
199                 if (unlikely(mbuf == NULL))
200                         break;
201
202                 /* Now get the space available for data in the mbuf */
203                 buf_size = rte_pktmbuf_data_room_size(pcap_q->mb_pool) -
204                                 RTE_PKTMBUF_HEADROOM;
205
206                 if (header.caplen <= buf_size) {
207                         /* pcap packet will fit in the mbuf, can copy it */
208                         rte_memcpy(rte_pktmbuf_mtod(mbuf, void *), packet,
209                                         header.caplen);
210                         mbuf->data_len = (uint16_t)header.caplen;
211                 } else {
212                         /* Try read jumbo frame into multi mbufs. */
213                         if (unlikely(eth_pcap_rx_jumbo(pcap_q->mb_pool,
214                                                        mbuf,
215                                                        packet,
216                                                        header.caplen) == -1)) {
217                                 rte_pktmbuf_free(mbuf);
218                                 break;
219                         }
220                 }
221
222                 mbuf->pkt_len = (uint16_t)header.caplen;
223                 mbuf->port = pcap_q->in_port;
224                 bufs[num_rx] = mbuf;
225                 num_rx++;
226                 rx_bytes += header.caplen;
227         }
228         pcap_q->rx_stat.pkts += num_rx;
229         pcap_q->rx_stat.bytes += rx_bytes;
230
231         return num_rx;
232 }
233
234 static inline void
235 calculate_timestamp(struct timeval *ts) {
236         uint64_t cycles;
237         struct timeval cur_time;
238
239         cycles = rte_get_timer_cycles() - start_cycles;
240         cur_time.tv_sec = cycles / hz;
241         cur_time.tv_usec = (cycles % hz) * 1e6 / hz;
242         timeradd(&start_time, &cur_time, ts);
243 }
244
245 /*
246  * Callback to handle writing packets to a pcap file.
247  */
248 static uint16_t
249 eth_pcap_tx_dumper(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
250 {
251         unsigned int i;
252         struct rte_mbuf *mbuf;
253         struct pcap_tx_queue *dumper_q = queue;
254         uint16_t num_tx = 0;
255         uint32_t tx_bytes = 0;
256         struct pcap_pkthdr header;
257
258         if (dumper_q->dumper == NULL || nb_pkts == 0)
259                 return 0;
260
261         /* writes the nb_pkts packets to the previously opened pcap file
262          * dumper */
263         for (i = 0; i < nb_pkts; i++) {
264                 mbuf = bufs[i];
265                 calculate_timestamp(&header.ts);
266                 header.len = mbuf->pkt_len;
267                 header.caplen = header.len;
268
269                 if (likely(mbuf->nb_segs == 1)) {
270                         pcap_dump((u_char *)dumper_q->dumper, &header,
271                                   rte_pktmbuf_mtod(mbuf, void*));
272                 } else {
273                         if (mbuf->pkt_len <= ETHER_MAX_JUMBO_FRAME_LEN) {
274                                 eth_pcap_gather_data(tx_pcap_data, mbuf);
275                                 pcap_dump((u_char *)dumper_q->dumper, &header,
276                                           tx_pcap_data);
277                         } else {
278                                 PMD_LOG(ERR,
279                                         "Dropping PCAP packet. Size (%d) > max jumbo size (%d).",
280                                         mbuf->pkt_len,
281                                         ETHER_MAX_JUMBO_FRAME_LEN);
282
283                                 rte_pktmbuf_free(mbuf);
284                                 break;
285                         }
286                 }
287
288                 num_tx++;
289                 tx_bytes += mbuf->pkt_len;
290                 rte_pktmbuf_free(mbuf);
291         }
292
293         /*
294          * Since there's no place to hook a callback when the forwarding
295          * process stops and to make sure the pcap file is actually written,
296          * we flush the pcap dumper within each burst.
297          */
298         pcap_dump_flush(dumper_q->dumper);
299         dumper_q->tx_stat.pkts += num_tx;
300         dumper_q->tx_stat.bytes += tx_bytes;
301         dumper_q->tx_stat.err_pkts += nb_pkts - num_tx;
302
303         return num_tx;
304 }
305
306 /*
307  * Callback to handle sending packets through a real NIC.
308  */
309 static uint16_t
310 eth_pcap_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
311 {
312         unsigned int i;
313         int ret;
314         struct rte_mbuf *mbuf;
315         struct pcap_tx_queue *tx_queue = queue;
316         uint16_t num_tx = 0;
317         uint32_t tx_bytes = 0;
318
319         if (unlikely(nb_pkts == 0 || tx_queue->pcap == NULL))
320                 return 0;
321
322         for (i = 0; i < nb_pkts; i++) {
323                 mbuf = bufs[i];
324
325                 if (likely(mbuf->nb_segs == 1)) {
326                         ret = pcap_sendpacket(tx_queue->pcap,
327                                         rte_pktmbuf_mtod(mbuf, u_char *),
328                                         mbuf->pkt_len);
329                 } else {
330                         if (mbuf->pkt_len <= ETHER_MAX_JUMBO_FRAME_LEN) {
331                                 eth_pcap_gather_data(tx_pcap_data, mbuf);
332                                 ret = pcap_sendpacket(tx_queue->pcap,
333                                                 tx_pcap_data, mbuf->pkt_len);
334                         } else {
335                                 PMD_LOG(ERR,
336                                         "Dropping PCAP packet. Size (%d) > max jumbo size (%d).",
337                                         mbuf->pkt_len,
338                                         ETHER_MAX_JUMBO_FRAME_LEN);
339
340                                 rte_pktmbuf_free(mbuf);
341                                 break;
342                         }
343                 }
344
345                 if (unlikely(ret != 0))
346                         break;
347                 num_tx++;
348                 tx_bytes += mbuf->pkt_len;
349                 rte_pktmbuf_free(mbuf);
350         }
351
352         tx_queue->tx_stat.pkts += num_tx;
353         tx_queue->tx_stat.bytes += tx_bytes;
354         tx_queue->tx_stat.err_pkts += nb_pkts - num_tx;
355
356         return num_tx;
357 }
358
359 /*
360  * pcap_open_live wrapper function
361  */
362 static inline int
363 open_iface_live(const char *iface, pcap_t **pcap) {
364         *pcap = pcap_open_live(iface, RTE_ETH_PCAP_SNAPLEN,
365                         RTE_ETH_PCAP_PROMISC, RTE_ETH_PCAP_TIMEOUT, errbuf);
366
367         if (*pcap == NULL) {
368                 PMD_LOG(ERR, "Couldn't open %s: %s", iface, errbuf);
369                 return -1;
370         }
371
372         return 0;
373 }
374
375 static int
376 open_single_iface(const char *iface, pcap_t **pcap)
377 {
378         if (open_iface_live(iface, pcap) < 0) {
379                 PMD_LOG(ERR, "Couldn't open interface %s", iface);
380                 return -1;
381         }
382
383         return 0;
384 }
385
386 static int
387 open_single_tx_pcap(const char *pcap_filename, pcap_dumper_t **dumper)
388 {
389         pcap_t *tx_pcap;
390
391         /*
392          * We need to create a dummy empty pcap_t to use it
393          * with pcap_dump_open(). We create big enough an Ethernet
394          * pcap holder.
395          */
396         tx_pcap = pcap_open_dead(DLT_EN10MB, RTE_ETH_PCAP_SNAPSHOT_LEN);
397         if (tx_pcap == NULL) {
398                 PMD_LOG(ERR, "Couldn't create dead pcap");
399                 return -1;
400         }
401
402         /* The dumper is created using the previous pcap_t reference */
403         *dumper = pcap_dump_open(tx_pcap, pcap_filename);
404         if (*dumper == NULL) {
405                 pcap_close(tx_pcap);
406                 PMD_LOG(ERR, "Couldn't open %s for writing.",
407                         pcap_filename);
408                 return -1;
409         }
410
411         pcap_close(tx_pcap);
412         return 0;
413 }
414
415 static int
416 open_single_rx_pcap(const char *pcap_filename, pcap_t **pcap)
417 {
418         *pcap = pcap_open_offline(pcap_filename, errbuf);
419         if (*pcap == NULL) {
420                 PMD_LOG(ERR, "Couldn't open %s: %s", pcap_filename,
421                         errbuf);
422                 return -1;
423         }
424
425         return 0;
426 }
427
428 static int
429 eth_dev_start(struct rte_eth_dev *dev)
430 {
431         unsigned int i;
432         struct pmd_internals *internals = dev->data->dev_private;
433         struct pcap_tx_queue *tx;
434         struct pcap_rx_queue *rx;
435
436         /* Special iface case. Single pcap is open and shared between tx/rx. */
437         if (internals->single_iface) {
438                 tx = &internals->tx_queue[0];
439                 rx = &internals->rx_queue[0];
440
441                 if (!tx->pcap && strcmp(tx->type, ETH_PCAP_IFACE_ARG) == 0) {
442                         if (open_single_iface(tx->name, &tx->pcap) < 0)
443                                 return -1;
444                         rx->pcap = tx->pcap;
445                 }
446
447                 goto status_up;
448         }
449
450         /* If not open already, open tx pcaps/dumpers */
451         for (i = 0; i < dev->data->nb_tx_queues; i++) {
452                 tx = &internals->tx_queue[i];
453
454                 if (!tx->dumper &&
455                                 strcmp(tx->type, ETH_PCAP_TX_PCAP_ARG) == 0) {
456                         if (open_single_tx_pcap(tx->name, &tx->dumper) < 0)
457                                 return -1;
458                 } else if (!tx->pcap &&
459                                 strcmp(tx->type, ETH_PCAP_TX_IFACE_ARG) == 0) {
460                         if (open_single_iface(tx->name, &tx->pcap) < 0)
461                                 return -1;
462                 }
463         }
464
465         /* If not open already, open rx pcaps */
466         for (i = 0; i < dev->data->nb_rx_queues; i++) {
467                 rx = &internals->rx_queue[i];
468
469                 if (rx->pcap != NULL)
470                         continue;
471
472                 if (strcmp(rx->type, ETH_PCAP_RX_PCAP_ARG) == 0) {
473                         if (open_single_rx_pcap(rx->name, &rx->pcap) < 0)
474                                 return -1;
475                 } else if (strcmp(rx->type, ETH_PCAP_RX_IFACE_ARG) == 0) {
476                         if (open_single_iface(rx->name, &rx->pcap) < 0)
477                                 return -1;
478                 }
479         }
480
481 status_up:
482         for (i = 0; i < dev->data->nb_rx_queues; i++)
483                 dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED;
484
485         for (i = 0; i < dev->data->nb_tx_queues; i++)
486                 dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED;
487
488         dev->data->dev_link.link_status = ETH_LINK_UP;
489
490         return 0;
491 }
492
493 /*
494  * This function gets called when the current port gets stopped.
495  * Is the only place for us to close all the tx streams dumpers.
496  * If not called the dumpers will be flushed within each tx burst.
497  */
498 static void
499 eth_dev_stop(struct rte_eth_dev *dev)
500 {
501         unsigned int i;
502         struct pmd_internals *internals = dev->data->dev_private;
503         struct pcap_tx_queue *tx;
504         struct pcap_rx_queue *rx;
505
506         /* Special iface case. Single pcap is open and shared between tx/rx. */
507         if (internals->single_iface) {
508                 tx = &internals->tx_queue[0];
509                 rx = &internals->rx_queue[0];
510                 pcap_close(tx->pcap);
511                 tx->pcap = NULL;
512                 rx->pcap = NULL;
513                 goto status_down;
514         }
515
516         for (i = 0; i < dev->data->nb_tx_queues; i++) {
517                 tx = &internals->tx_queue[i];
518
519                 if (tx->dumper != NULL) {
520                         pcap_dump_close(tx->dumper);
521                         tx->dumper = NULL;
522                 }
523
524                 if (tx->pcap != NULL) {
525                         pcap_close(tx->pcap);
526                         tx->pcap = NULL;
527                 }
528         }
529
530         for (i = 0; i < dev->data->nb_rx_queues; i++) {
531                 rx = &internals->rx_queue[i];
532
533                 if (rx->pcap != NULL) {
534                         pcap_close(rx->pcap);
535                         rx->pcap = NULL;
536                 }
537         }
538
539 status_down:
540         for (i = 0; i < dev->data->nb_rx_queues; i++)
541                 dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED;
542
543         for (i = 0; i < dev->data->nb_tx_queues; i++)
544                 dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED;
545
546         dev->data->dev_link.link_status = ETH_LINK_DOWN;
547 }
548
549 static int
550 eth_dev_configure(struct rte_eth_dev *dev __rte_unused)
551 {
552         return 0;
553 }
554
555 static void
556 eth_dev_info(struct rte_eth_dev *dev,
557                 struct rte_eth_dev_info *dev_info)
558 {
559         struct pmd_internals *internals = dev->data->dev_private;
560
561         dev_info->if_index = internals->if_index;
562         dev_info->max_mac_addrs = 1;
563         dev_info->max_rx_pktlen = (uint32_t) -1;
564         dev_info->max_rx_queues = dev->data->nb_rx_queues;
565         dev_info->max_tx_queues = dev->data->nb_tx_queues;
566         dev_info->min_rx_bufsize = 0;
567 }
568
569 static int
570 eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
571 {
572         unsigned int i;
573         unsigned long rx_packets_total = 0, rx_bytes_total = 0;
574         unsigned long tx_packets_total = 0, tx_bytes_total = 0;
575         unsigned long tx_packets_err_total = 0;
576         const struct pmd_internals *internal = dev->data->dev_private;
577
578         for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
579                         i < dev->data->nb_rx_queues; i++) {
580                 stats->q_ipackets[i] = internal->rx_queue[i].rx_stat.pkts;
581                 stats->q_ibytes[i] = internal->rx_queue[i].rx_stat.bytes;
582                 rx_packets_total += stats->q_ipackets[i];
583                 rx_bytes_total += stats->q_ibytes[i];
584         }
585
586         for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
587                         i < dev->data->nb_tx_queues; i++) {
588                 stats->q_opackets[i] = internal->tx_queue[i].tx_stat.pkts;
589                 stats->q_obytes[i] = internal->tx_queue[i].tx_stat.bytes;
590                 stats->q_errors[i] = internal->tx_queue[i].tx_stat.err_pkts;
591                 tx_packets_total += stats->q_opackets[i];
592                 tx_bytes_total += stats->q_obytes[i];
593                 tx_packets_err_total += stats->q_errors[i];
594         }
595
596         stats->ipackets = rx_packets_total;
597         stats->ibytes = rx_bytes_total;
598         stats->opackets = tx_packets_total;
599         stats->obytes = tx_bytes_total;
600         stats->oerrors = tx_packets_err_total;
601
602         return 0;
603 }
604
605 static void
606 eth_stats_reset(struct rte_eth_dev *dev)
607 {
608         unsigned int i;
609         struct pmd_internals *internal = dev->data->dev_private;
610
611         for (i = 0; i < dev->data->nb_rx_queues; i++) {
612                 internal->rx_queue[i].rx_stat.pkts = 0;
613                 internal->rx_queue[i].rx_stat.bytes = 0;
614         }
615
616         for (i = 0; i < dev->data->nb_tx_queues; i++) {
617                 internal->tx_queue[i].tx_stat.pkts = 0;
618                 internal->tx_queue[i].tx_stat.bytes = 0;
619                 internal->tx_queue[i].tx_stat.err_pkts = 0;
620         }
621 }
622
623 static void
624 eth_dev_close(struct rte_eth_dev *dev __rte_unused)
625 {
626 }
627
628 static void
629 eth_queue_release(void *q __rte_unused)
630 {
631 }
632
633 static int
634 eth_link_update(struct rte_eth_dev *dev __rte_unused,
635                 int wait_to_complete __rte_unused)
636 {
637         return 0;
638 }
639
640 static int
641 eth_rx_queue_setup(struct rte_eth_dev *dev,
642                 uint16_t rx_queue_id,
643                 uint16_t nb_rx_desc __rte_unused,
644                 unsigned int socket_id __rte_unused,
645                 const struct rte_eth_rxconf *rx_conf __rte_unused,
646                 struct rte_mempool *mb_pool)
647 {
648         struct pmd_internals *internals = dev->data->dev_private;
649         struct pcap_rx_queue *pcap_q = &internals->rx_queue[rx_queue_id];
650
651         pcap_q->mb_pool = mb_pool;
652         dev->data->rx_queues[rx_queue_id] = pcap_q;
653         pcap_q->in_port = dev->data->port_id;
654
655         return 0;
656 }
657
658 static int
659 eth_tx_queue_setup(struct rte_eth_dev *dev,
660                 uint16_t tx_queue_id,
661                 uint16_t nb_tx_desc __rte_unused,
662                 unsigned int socket_id __rte_unused,
663                 const struct rte_eth_txconf *tx_conf __rte_unused)
664 {
665         struct pmd_internals *internals = dev->data->dev_private;
666
667         dev->data->tx_queues[tx_queue_id] = &internals->tx_queue[tx_queue_id];
668
669         return 0;
670 }
671
672 static int
673 eth_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
674 {
675         dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
676
677         return 0;
678 }
679
680 static int
681 eth_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
682 {
683         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
684
685         return 0;
686 }
687
688 static int
689 eth_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
690 {
691         dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
692
693         return 0;
694 }
695
696 static int
697 eth_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
698 {
699         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
700
701         return 0;
702 }
703
704 static const struct eth_dev_ops ops = {
705         .dev_start = eth_dev_start,
706         .dev_stop = eth_dev_stop,
707         .dev_close = eth_dev_close,
708         .dev_configure = eth_dev_configure,
709         .dev_infos_get = eth_dev_info,
710         .rx_queue_setup = eth_rx_queue_setup,
711         .tx_queue_setup = eth_tx_queue_setup,
712         .rx_queue_start = eth_rx_queue_start,
713         .tx_queue_start = eth_tx_queue_start,
714         .rx_queue_stop = eth_rx_queue_stop,
715         .tx_queue_stop = eth_tx_queue_stop,
716         .rx_queue_release = eth_queue_release,
717         .tx_queue_release = eth_queue_release,
718         .link_update = eth_link_update,
719         .stats_get = eth_stats_get,
720         .stats_reset = eth_stats_reset,
721 };
722
723 static int
724 add_queue(struct pmd_devargs *pmd, const char *name, const char *type,
725                 pcap_t *pcap, pcap_dumper_t *dumper)
726 {
727         if (pmd->num_of_queue >= RTE_PMD_PCAP_MAX_QUEUES)
728                 return -1;
729         if (pcap)
730                 pmd->queue[pmd->num_of_queue].pcap = pcap;
731         if (dumper)
732                 pmd->queue[pmd->num_of_queue].dumper = dumper;
733         pmd->queue[pmd->num_of_queue].name = name;
734         pmd->queue[pmd->num_of_queue].type = type;
735         pmd->num_of_queue++;
736         return 0;
737 }
738
739 /*
740  * Function handler that opens the pcap file for reading a stores a
741  * reference of it for use it later on.
742  */
743 static int
744 open_rx_pcap(const char *key, const char *value, void *extra_args)
745 {
746         const char *pcap_filename = value;
747         struct pmd_devargs *rx = extra_args;
748         pcap_t *pcap = NULL;
749
750         if (open_single_rx_pcap(pcap_filename, &pcap) < 0)
751                 return -1;
752
753         if (add_queue(rx, pcap_filename, key, pcap, NULL) < 0) {
754                 pcap_close(pcap);
755                 return -1;
756         }
757
758         return 0;
759 }
760
761 /*
762  * Opens a pcap file for writing and stores a reference to it
763  * for use it later on.
764  */
765 static int
766 open_tx_pcap(const char *key, const char *value, void *extra_args)
767 {
768         const char *pcap_filename = value;
769         struct pmd_devargs *dumpers = extra_args;
770         pcap_dumper_t *dumper;
771
772         if (open_single_tx_pcap(pcap_filename, &dumper) < 0)
773                 return -1;
774
775         if (add_queue(dumpers, pcap_filename, key, NULL, dumper) < 0) {
776                 pcap_dump_close(dumper);
777                 return -1;
778         }
779
780         return 0;
781 }
782
783 /*
784  * Opens an interface for reading and writing
785  */
786 static inline int
787 open_rx_tx_iface(const char *key, const char *value, void *extra_args)
788 {
789         const char *iface = value;
790         struct pmd_devargs *tx = extra_args;
791         pcap_t *pcap = NULL;
792
793         if (open_single_iface(iface, &pcap) < 0)
794                 return -1;
795
796         tx->queue[0].pcap = pcap;
797         tx->queue[0].name = iface;
798         tx->queue[0].type = key;
799
800         return 0;
801 }
802
803 static inline int
804 set_iface_direction(const char *iface, pcap_t *pcap,
805                 pcap_direction_t direction)
806 {
807         const char *direction_str = (direction == PCAP_D_IN) ? "IN" : "OUT";
808         if (pcap_setdirection(pcap, direction) < 0) {
809                 PMD_LOG(ERR, "Setting %s pcap direction %s failed - %s\n",
810                                 iface, direction_str, pcap_geterr(pcap));
811                 return -1;
812         }
813         PMD_LOG(INFO, "Setting %s pcap direction %s\n",
814                         iface, direction_str);
815         return 0;
816 }
817
818 static inline int
819 open_iface(const char *key, const char *value, void *extra_args)
820 {
821         const char *iface = value;
822         struct pmd_devargs *pmd = extra_args;
823         pcap_t *pcap = NULL;
824
825         if (open_single_iface(iface, &pcap) < 0)
826                 return -1;
827         if (add_queue(pmd, iface, key, pcap, NULL) < 0) {
828                 pcap_close(pcap);
829                 return -1;
830         }
831
832         return 0;
833 }
834
835 /*
836  * Opens a NIC for reading packets from it
837  */
838 static inline int
839 open_rx_iface(const char *key, const char *value, void *extra_args)
840 {
841         int ret = open_iface(key, value, extra_args);
842         if (ret < 0)
843                 return ret;
844         if (strcmp(key, ETH_PCAP_RX_IFACE_IN_ARG) == 0) {
845                 struct pmd_devargs *pmd = extra_args;
846                 unsigned int qid = pmd->num_of_queue - 1;
847
848                 set_iface_direction(pmd->queue[qid].name,
849                                 pmd->queue[qid].pcap,
850                                 PCAP_D_IN);
851         }
852
853         return 0;
854 }
855
856 static inline int
857 rx_iface_args_process(const char *key, const char *value, void *extra_args)
858 {
859         if (strcmp(key, ETH_PCAP_RX_IFACE_ARG) == 0 ||
860                         strcmp(key, ETH_PCAP_RX_IFACE_IN_ARG) == 0)
861                 return open_rx_iface(key, value, extra_args);
862
863         return 0;
864 }
865
866 /*
867  * Opens a NIC for writing packets to it
868  */
869 static int
870 open_tx_iface(const char *key, const char *value, void *extra_args)
871 {
872         return open_iface(key, value, extra_args);
873 }
874
875 static int
876 select_phy_mac(const char *key __rte_unused, const char *value,
877                 void *extra_args)
878 {
879         if (extra_args) {
880                 const int phy_mac = atoi(value);
881                 int *enable_phy_mac = extra_args;
882
883                 if (phy_mac)
884                         *enable_phy_mac = 1;
885         }
886         return 0;
887 }
888
889 static struct rte_vdev_driver pmd_pcap_drv;
890
891 static int
892 pmd_init_internals(struct rte_vdev_device *vdev,
893                 const unsigned int nb_rx_queues,
894                 const unsigned int nb_tx_queues,
895                 struct pmd_internals **internals,
896                 struct rte_eth_dev **eth_dev)
897 {
898         struct rte_eth_dev_data *data;
899         unsigned int numa_node = vdev->device.numa_node;
900
901         PMD_LOG(INFO, "Creating pcap-backed ethdev on numa socket %d",
902                 numa_node);
903
904         /* reserve an ethdev entry */
905         *eth_dev = rte_eth_vdev_allocate(vdev, sizeof(**internals));
906         if (!(*eth_dev))
907                 return -1;
908
909         /* now put it all together
910          * - store queue data in internals,
911          * - store numa_node info in eth_dev
912          * - point eth_dev_data to internals
913          * - and point eth_dev structure to new eth_dev_data structure
914          */
915         *internals = (*eth_dev)->data->dev_private;
916         /*
917          * Interface MAC = 02:70:63:61:70:<iface_idx>
918          * derived from: 'locally administered':'p':'c':'a':'p':'iface_idx'
919          * where the middle 4 characters are converted to hex.
920          */
921         (*internals)->eth_addr = (struct ether_addr) {
922                 .addr_bytes = { 0x02, 0x70, 0x63, 0x61, 0x70, iface_idx++ }
923         };
924         (*internals)->phy_mac = 0;
925         data = (*eth_dev)->data;
926         data->nb_rx_queues = (uint16_t)nb_rx_queues;
927         data->nb_tx_queues = (uint16_t)nb_tx_queues;
928         data->dev_link = pmd_link;
929         data->mac_addrs = &(*internals)->eth_addr;
930
931         /*
932          * NOTE: we'll replace the data element, of originally allocated
933          * eth_dev so the rings are local per-process
934          */
935         (*eth_dev)->dev_ops = &ops;
936
937         return 0;
938 }
939
940 static int
941 eth_pcap_update_mac(const char *if_name, struct rte_eth_dev *eth_dev,
942                 const unsigned int numa_node)
943 {
944 #if defined(RTE_EXEC_ENV_LINUXAPP)
945         void *mac_addrs;
946         struct ifreq ifr;
947         int if_fd = socket(AF_INET, SOCK_DGRAM, 0);
948
949         if (if_fd == -1)
950                 return -1;
951
952         rte_strscpy(ifr.ifr_name, if_name, sizeof(ifr.ifr_name));
953         if (ioctl(if_fd, SIOCGIFHWADDR, &ifr)) {
954                 close(if_fd);
955                 return -1;
956         }
957
958         mac_addrs = rte_zmalloc_socket(NULL, ETHER_ADDR_LEN, 0, numa_node);
959         if (!mac_addrs) {
960                 close(if_fd);
961                 return -1;
962         }
963
964         PMD_LOG(INFO, "Setting phy MAC for %s", if_name);
965         eth_dev->data->mac_addrs = mac_addrs;
966         rte_memcpy(eth_dev->data->mac_addrs[0].addr_bytes,
967                         ifr.ifr_hwaddr.sa_data, ETHER_ADDR_LEN);
968
969         close(if_fd);
970
971         return 0;
972
973 #elif defined(RTE_EXEC_ENV_BSDAPP)
974         void *mac_addrs;
975         struct if_msghdr *ifm;
976         struct sockaddr_dl *sdl;
977         int mib[6];
978         size_t len = 0;
979         char *buf;
980
981         mib[0] = CTL_NET;
982         mib[1] = AF_ROUTE;
983         mib[2] = 0;
984         mib[3] = AF_LINK;
985         mib[4] = NET_RT_IFLIST;
986         mib[5] = if_nametoindex(if_name);
987
988         if (sysctl(mib, 6, NULL, &len, NULL, 0) < 0)
989                 return -1;
990
991         if (len == 0)
992                 return -1;
993
994         buf = rte_malloc(NULL, len, 0);
995         if (!buf)
996                 return -1;
997
998         if (sysctl(mib, 6, buf, &len, NULL, 0) < 0) {
999                 rte_free(buf);
1000                 return -1;
1001         }
1002         ifm = (struct if_msghdr *)buf;
1003         sdl = (struct sockaddr_dl *)(ifm + 1);
1004
1005         mac_addrs = rte_zmalloc_socket(NULL, ETHER_ADDR_LEN, 0, numa_node);
1006         if (!mac_addrs) {
1007                 rte_free(buf);
1008                 return -1;
1009         }
1010
1011         PMD_LOG(INFO, "Setting phy MAC for %s", if_name);
1012         eth_dev->data->mac_addrs = mac_addrs;
1013         rte_memcpy(eth_dev->data->mac_addrs[0].addr_bytes,
1014                         LLADDR(sdl), ETHER_ADDR_LEN);
1015
1016         rte_free(buf);
1017
1018         return 0;
1019 #else
1020         return -1;
1021 #endif
1022 }
1023
1024 static int
1025 eth_from_pcaps_common(struct rte_vdev_device *vdev,
1026                 struct pmd_devargs *rx_queues, const unsigned int nb_rx_queues,
1027                 struct pmd_devargs *tx_queues, const unsigned int nb_tx_queues,
1028                 struct pmd_internals **internals, struct rte_eth_dev **eth_dev)
1029 {
1030         unsigned int i;
1031
1032         /* do some parameter checking */
1033         if (rx_queues == NULL && nb_rx_queues > 0)
1034                 return -1;
1035         if (tx_queues == NULL && nb_tx_queues > 0)
1036                 return -1;
1037
1038         if (pmd_init_internals(vdev, nb_rx_queues, nb_tx_queues, internals,
1039                         eth_dev) < 0)
1040                 return -1;
1041
1042         for (i = 0; i < nb_rx_queues; i++) {
1043                 struct pcap_rx_queue *rx = &(*internals)->rx_queue[i];
1044                 struct devargs_queue *queue = &rx_queues->queue[i];
1045
1046                 rx->pcap = queue->pcap;
1047                 snprintf(rx->name, sizeof(rx->name), "%s", queue->name);
1048                 snprintf(rx->type, sizeof(rx->type), "%s", queue->type);
1049         }
1050
1051         for (i = 0; i < nb_tx_queues; i++) {
1052                 struct pcap_tx_queue *tx = &(*internals)->tx_queue[i];
1053                 struct devargs_queue *queue = &tx_queues->queue[i];
1054
1055                 tx->dumper = queue->dumper;
1056                 tx->pcap = queue->pcap;
1057                 snprintf(tx->name, sizeof(tx->name), "%s", queue->name);
1058                 snprintf(tx->type, sizeof(tx->type), "%s", queue->type);
1059         }
1060
1061         return 0;
1062 }
1063
1064 static int
1065 eth_from_pcaps(struct rte_vdev_device *vdev,
1066                 struct pmd_devargs *rx_queues, const unsigned int nb_rx_queues,
1067                 struct pmd_devargs *tx_queues, const unsigned int nb_tx_queues,
1068                 int single_iface, unsigned int using_dumpers)
1069 {
1070         struct pmd_internals *internals = NULL;
1071         struct rte_eth_dev *eth_dev = NULL;
1072         int ret;
1073
1074         ret = eth_from_pcaps_common(vdev, rx_queues, nb_rx_queues,
1075                 tx_queues, nb_tx_queues, &internals, &eth_dev);
1076
1077         if (ret < 0)
1078                 return ret;
1079
1080         /* store weather we are using a single interface for rx/tx or not */
1081         internals->single_iface = single_iface;
1082
1083         if (single_iface) {
1084                 internals->if_index = if_nametoindex(rx_queues->queue[0].name);
1085
1086                 /* phy_mac arg is applied only only if "iface" devarg is provided */
1087                 if (rx_queues->phy_mac) {
1088                         int ret = eth_pcap_update_mac(rx_queues->queue[0].name,
1089                                         eth_dev, vdev->device.numa_node);
1090                         if (ret == 0)
1091                                 internals->phy_mac = 1;
1092                 }
1093         }
1094
1095         eth_dev->rx_pkt_burst = eth_pcap_rx;
1096
1097         if (using_dumpers)
1098                 eth_dev->tx_pkt_burst = eth_pcap_tx_dumper;
1099         else
1100                 eth_dev->tx_pkt_burst = eth_pcap_tx;
1101
1102         rte_eth_dev_probing_finish(eth_dev);
1103         return 0;
1104 }
1105
1106 static int
1107 pmd_pcap_probe(struct rte_vdev_device *dev)
1108 {
1109         const char *name;
1110         unsigned int is_rx_pcap = 0, is_tx_pcap = 0;
1111         struct rte_kvargs *kvlist;
1112         struct pmd_devargs pcaps = {0};
1113         struct pmd_devargs dumpers = {0};
1114         struct rte_eth_dev *eth_dev;
1115         int single_iface = 0;
1116         int ret;
1117
1118         name = rte_vdev_device_name(dev);
1119         PMD_LOG(INFO, "Initializing pmd_pcap for %s", name);
1120
1121         gettimeofday(&start_time, NULL);
1122         start_cycles = rte_get_timer_cycles();
1123         hz = rte_get_timer_hz();
1124
1125         if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1126                 eth_dev = rte_eth_dev_attach_secondary(name);
1127                 if (!eth_dev) {
1128                         PMD_LOG(ERR, "Failed to probe %s", name);
1129                         return -1;
1130                 }
1131                 /* TODO: request info from primary to set up Rx and Tx */
1132                 eth_dev->dev_ops = &ops;
1133                 eth_dev->device = &dev->device;
1134                 rte_eth_dev_probing_finish(eth_dev);
1135                 return 0;
1136         }
1137
1138         kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_arguments);
1139         if (kvlist == NULL)
1140                 return -1;
1141
1142         /*
1143          * If iface argument is passed we open the NICs and use them for
1144          * reading / writing
1145          */
1146         if (rte_kvargs_count(kvlist, ETH_PCAP_IFACE_ARG) == 1) {
1147
1148                 ret = rte_kvargs_process(kvlist, ETH_PCAP_IFACE_ARG,
1149                                 &open_rx_tx_iface, &pcaps);
1150                 if (ret < 0)
1151                         goto free_kvlist;
1152
1153                 dumpers.queue[0] = pcaps.queue[0];
1154
1155                 ret = rte_kvargs_process(kvlist, ETH_PCAP_PHY_MAC_ARG,
1156                                 &select_phy_mac, &pcaps.phy_mac);
1157                 if (ret < 0)
1158                         goto free_kvlist;
1159
1160                 dumpers.phy_mac = pcaps.phy_mac;
1161
1162                 single_iface = 1;
1163                 pcaps.num_of_queue = 1;
1164                 dumpers.num_of_queue = 1;
1165
1166                 goto create_eth;
1167         }
1168
1169         /*
1170          * We check whether we want to open a RX stream from a real NIC or a
1171          * pcap file
1172          */
1173         is_rx_pcap = rte_kvargs_count(kvlist, ETH_PCAP_RX_PCAP_ARG) ? 1 : 0;
1174         pcaps.num_of_queue = 0;
1175
1176         if (is_rx_pcap) {
1177                 ret = rte_kvargs_process(kvlist, ETH_PCAP_RX_PCAP_ARG,
1178                                 &open_rx_pcap, &pcaps);
1179         } else {
1180                 ret = rte_kvargs_process(kvlist, NULL,
1181                                 &rx_iface_args_process, &pcaps);
1182         }
1183
1184         if (ret < 0)
1185                 goto free_kvlist;
1186
1187         /*
1188          * We check whether we want to open a TX stream to a real NIC or a
1189          * pcap file
1190          */
1191         is_tx_pcap = rte_kvargs_count(kvlist, ETH_PCAP_TX_PCAP_ARG) ? 1 : 0;
1192         dumpers.num_of_queue = 0;
1193
1194         if (is_tx_pcap)
1195                 ret = rte_kvargs_process(kvlist, ETH_PCAP_TX_PCAP_ARG,
1196                                 &open_tx_pcap, &dumpers);
1197         else
1198                 ret = rte_kvargs_process(kvlist, ETH_PCAP_TX_IFACE_ARG,
1199                                 &open_tx_iface, &dumpers);
1200
1201         if (ret < 0)
1202                 goto free_kvlist;
1203
1204 create_eth:
1205         ret = eth_from_pcaps(dev, &pcaps, pcaps.num_of_queue, &dumpers,
1206                 dumpers.num_of_queue, single_iface, is_tx_pcap);
1207
1208 free_kvlist:
1209         rte_kvargs_free(kvlist);
1210
1211         return ret;
1212 }
1213
1214 static int
1215 pmd_pcap_remove(struct rte_vdev_device *dev)
1216 {
1217         struct pmd_internals *internals = NULL;
1218         struct rte_eth_dev *eth_dev = NULL;
1219
1220         PMD_LOG(INFO, "Closing pcap ethdev on numa socket %d",
1221                         rte_socket_id());
1222
1223         if (!dev)
1224                 return -1;
1225
1226         /* reserve an ethdev entry */
1227         eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(dev));
1228         if (eth_dev == NULL)
1229                 return -1;
1230
1231         if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
1232                 internals = eth_dev->data->dev_private;
1233                 if (internals != NULL && internals->phy_mac == 0)
1234                         /* not dynamically allocated, must not be freed */
1235                         eth_dev->data->mac_addrs = NULL;
1236         }
1237
1238         rte_eth_dev_release_port(eth_dev);
1239
1240         return 0;
1241 }
1242
1243 static struct rte_vdev_driver pmd_pcap_drv = {
1244         .probe = pmd_pcap_probe,
1245         .remove = pmd_pcap_remove,
1246 };
1247
1248 RTE_PMD_REGISTER_VDEV(net_pcap, pmd_pcap_drv);
1249 RTE_PMD_REGISTER_ALIAS(net_pcap, eth_pcap);
1250 RTE_PMD_REGISTER_PARAM_STRING(net_pcap,
1251         ETH_PCAP_RX_PCAP_ARG "=<string> "
1252         ETH_PCAP_TX_PCAP_ARG "=<string> "
1253         ETH_PCAP_RX_IFACE_ARG "=<ifc> "
1254         ETH_PCAP_RX_IFACE_IN_ARG "=<ifc> "
1255         ETH_PCAP_TX_IFACE_ARG "=<ifc> "
1256         ETH_PCAP_IFACE_ARG "=<ifc> "
1257         ETH_PCAP_PHY_MAC_ARG "=<int>");
1258
1259 RTE_INIT(eth_pcap_init_log)
1260 {
1261         eth_pcap_logtype = rte_log_register("pmd.net.pcap");
1262         if (eth_pcap_logtype >= 0)
1263                 rte_log_set_level(eth_pcap_logtype, RTE_LOG_NOTICE);
1264 }