New upstream version 18.08
[deb_dpdk.git] / examples / l3fwd-power / main.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2018 Intel Corporation
3  */
4
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <stdint.h>
8 #include <inttypes.h>
9 #include <sys/types.h>
10 #include <string.h>
11 #include <sys/queue.h>
12 #include <stdarg.h>
13 #include <errno.h>
14 #include <getopt.h>
15 #include <unistd.h>
16 #include <signal.h>
17
18 #include <rte_common.h>
19 #include <rte_byteorder.h>
20 #include <rte_log.h>
21 #include <rte_malloc.h>
22 #include <rte_memory.h>
23 #include <rte_memcpy.h>
24 #include <rte_eal.h>
25 #include <rte_launch.h>
26 #include <rte_atomic.h>
27 #include <rte_cycles.h>
28 #include <rte_prefetch.h>
29 #include <rte_lcore.h>
30 #include <rte_per_lcore.h>
31 #include <rte_branch_prediction.h>
32 #include <rte_interrupts.h>
33 #include <rte_random.h>
34 #include <rte_debug.h>
35 #include <rte_ether.h>
36 #include <rte_ethdev.h>
37 #include <rte_mempool.h>
38 #include <rte_mbuf.h>
39 #include <rte_ip.h>
40 #include <rte_tcp.h>
41 #include <rte_udp.h>
42 #include <rte_string_fns.h>
43 #include <rte_timer.h>
44 #include <rte_power.h>
45 #include <rte_spinlock.h>
46
47 #include "perf_core.h"
48 #include "main.h"
49
50 #define RTE_LOGTYPE_L3FWD_POWER RTE_LOGTYPE_USER1
51
52 #define MAX_PKT_BURST 32
53
54 #define MIN_ZERO_POLL_COUNT 10
55
56 /* 100 ms interval */
57 #define TIMER_NUMBER_PER_SECOND           10
58 /* 100000 us */
59 #define SCALING_PERIOD                    (1000000/TIMER_NUMBER_PER_SECOND)
60 #define SCALING_DOWN_TIME_RATIO_THRESHOLD 0.25
61
62 #define APP_LOOKUP_EXACT_MATCH          0
63 #define APP_LOOKUP_LPM                  1
64 #define DO_RFC_1812_CHECKS
65
66 #ifndef APP_LOOKUP_METHOD
67 #define APP_LOOKUP_METHOD             APP_LOOKUP_LPM
68 #endif
69
70 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
71 #include <rte_hash.h>
72 #elif (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
73 #include <rte_lpm.h>
74 #else
75 #error "APP_LOOKUP_METHOD set to incorrect value"
76 #endif
77
78 #ifndef IPv6_BYTES
79 #define IPv6_BYTES_FMT "%02x%02x:%02x%02x:%02x%02x:%02x%02x:"\
80                        "%02x%02x:%02x%02x:%02x%02x:%02x%02x"
81 #define IPv6_BYTES(addr) \
82         addr[0],  addr[1], addr[2],  addr[3], \
83         addr[4],  addr[5], addr[6],  addr[7], \
84         addr[8],  addr[9], addr[10], addr[11],\
85         addr[12], addr[13],addr[14], addr[15]
86 #endif
87
88 #define MAX_JUMBO_PKT_LEN  9600
89
90 #define IPV6_ADDR_LEN 16
91
92 #define MEMPOOL_CACHE_SIZE 256
93
94 /*
95  * This expression is used to calculate the number of mbufs needed depending on
96  * user input, taking into account memory for rx and tx hardware rings, cache
97  * per lcore and mtable per port per lcore. RTE_MAX is used to ensure that
98  * NB_MBUF never goes below a minimum value of 8192.
99  */
100
101 #define NB_MBUF RTE_MAX ( \
102         (nb_ports*nb_rx_queue*nb_rxd + \
103         nb_ports*nb_lcores*MAX_PKT_BURST + \
104         nb_ports*n_tx_queue*nb_txd + \
105         nb_lcores*MEMPOOL_CACHE_SIZE), \
106         (unsigned)8192)
107
108 #define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */
109
110 #define NB_SOCKETS 8
111
112 /* Configure how many packets ahead to prefetch, when reading packets */
113 #define PREFETCH_OFFSET 3
114
115 /*
116  * Configurable number of RX/TX ring descriptors
117  */
118 #define RTE_TEST_RX_DESC_DEFAULT 1024
119 #define RTE_TEST_TX_DESC_DEFAULT 1024
120 static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
121 static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
122
123 /* ethernet addresses of ports */
124 static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS];
125
126 /* ethernet addresses of ports */
127 static rte_spinlock_t locks[RTE_MAX_ETHPORTS];
128
129 /* mask of enabled ports */
130 static uint32_t enabled_port_mask = 0;
131 /* Ports set in promiscuous mode off by default. */
132 static int promiscuous_on = 0;
133 /* NUMA is enabled by default. */
134 static int numa_on = 1;
135 static int parse_ptype; /**< Parse packet type using rx callback, and */
136                         /**< disabled by default */
137
138 enum freq_scale_hint_t
139 {
140         FREQ_LOWER    =      -1,
141         FREQ_CURRENT  =       0,
142         FREQ_HIGHER   =       1,
143         FREQ_HIGHEST  =       2
144 };
145
146 struct lcore_rx_queue {
147         uint16_t port_id;
148         uint8_t queue_id;
149         enum freq_scale_hint_t freq_up_hint;
150         uint32_t zero_rx_packet_count;
151         uint32_t idle_hint;
152 } __rte_cache_aligned;
153
154 #define MAX_RX_QUEUE_PER_LCORE 16
155 #define MAX_TX_QUEUE_PER_PORT RTE_MAX_ETHPORTS
156 #define MAX_RX_QUEUE_PER_PORT 128
157
158 #define MAX_RX_QUEUE_INTERRUPT_PER_PORT 16
159
160
161 struct lcore_params lcore_params_array[MAX_LCORE_PARAMS];
162 static struct lcore_params lcore_params_array_default[] = {
163         {0, 0, 2},
164         {0, 1, 2},
165         {0, 2, 2},
166         {1, 0, 2},
167         {1, 1, 2},
168         {1, 2, 2},
169         {2, 0, 2},
170         {3, 0, 3},
171         {3, 1, 3},
172 };
173
174 struct lcore_params *lcore_params = lcore_params_array_default;
175 uint16_t nb_lcore_params = sizeof(lcore_params_array_default) /
176                                 sizeof(lcore_params_array_default[0]);
177
178 static struct rte_eth_conf port_conf = {
179         .rxmode = {
180                 .mq_mode        = ETH_MQ_RX_RSS,
181                 .max_rx_pkt_len = ETHER_MAX_LEN,
182                 .split_hdr_size = 0,
183                 .offloads = (DEV_RX_OFFLOAD_CRC_STRIP |
184                              DEV_RX_OFFLOAD_CHECKSUM),
185         },
186         .rx_adv_conf = {
187                 .rss_conf = {
188                         .rss_key = NULL,
189                         .rss_hf = ETH_RSS_UDP,
190                 },
191         },
192         .txmode = {
193                 .mq_mode = ETH_MQ_TX_NONE,
194         },
195         .intr_conf = {
196                 .rxq = 1,
197         },
198 };
199
200 static struct rte_mempool * pktmbuf_pool[NB_SOCKETS];
201
202
203 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
204
205 #ifdef RTE_ARCH_X86
206 #include <rte_hash_crc.h>
207 #define DEFAULT_HASH_FUNC       rte_hash_crc
208 #else
209 #include <rte_jhash.h>
210 #define DEFAULT_HASH_FUNC       rte_jhash
211 #endif
212
213 struct ipv4_5tuple {
214         uint32_t ip_dst;
215         uint32_t ip_src;
216         uint16_t port_dst;
217         uint16_t port_src;
218         uint8_t  proto;
219 } __attribute__((__packed__));
220
221 struct ipv6_5tuple {
222         uint8_t  ip_dst[IPV6_ADDR_LEN];
223         uint8_t  ip_src[IPV6_ADDR_LEN];
224         uint16_t port_dst;
225         uint16_t port_src;
226         uint8_t  proto;
227 } __attribute__((__packed__));
228
229 struct ipv4_l3fwd_route {
230         struct ipv4_5tuple key;
231         uint8_t if_out;
232 };
233
234 struct ipv6_l3fwd_route {
235         struct ipv6_5tuple key;
236         uint8_t if_out;
237 };
238
239 static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = {
240         {{IPv4(100,10,0,1), IPv4(200,10,0,1), 101, 11, IPPROTO_TCP}, 0},
241         {{IPv4(100,20,0,2), IPv4(200,20,0,2), 102, 12, IPPROTO_TCP}, 1},
242         {{IPv4(100,30,0,3), IPv4(200,30,0,3), 103, 13, IPPROTO_TCP}, 2},
243         {{IPv4(100,40,0,4), IPv4(200,40,0,4), 104, 14, IPPROTO_TCP}, 3},
244 };
245
246 static struct ipv6_l3fwd_route ipv6_l3fwd_route_array[] = {
247         {
248                 {
249                         {0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
250                          0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 0x05},
251                         {0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
252                          0x02, 0x1e, 0x67, 0xff, 0xfe, 0x0d, 0xb6, 0x0a},
253                          1, 10, IPPROTO_UDP
254                 }, 4
255         },
256 };
257
258 typedef struct rte_hash lookup_struct_t;
259 static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS];
260 static lookup_struct_t *ipv6_l3fwd_lookup_struct[NB_SOCKETS];
261
262 #define L3FWD_HASH_ENTRIES      1024
263
264 #define IPV4_L3FWD_NUM_ROUTES \
265         (sizeof(ipv4_l3fwd_route_array) / sizeof(ipv4_l3fwd_route_array[0]))
266
267 #define IPV6_L3FWD_NUM_ROUTES \
268         (sizeof(ipv6_l3fwd_route_array) / sizeof(ipv6_l3fwd_route_array[0]))
269
270 static uint16_t ipv4_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned;
271 static uint16_t ipv6_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned;
272 #endif
273
274 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
275 struct ipv4_l3fwd_route {
276         uint32_t ip;
277         uint8_t  depth;
278         uint8_t  if_out;
279 };
280
281 static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = {
282         {IPv4(1,1,1,0), 24, 0},
283         {IPv4(2,1,1,0), 24, 1},
284         {IPv4(3,1,1,0), 24, 2},
285         {IPv4(4,1,1,0), 24, 3},
286         {IPv4(5,1,1,0), 24, 4},
287         {IPv4(6,1,1,0), 24, 5},
288         {IPv4(7,1,1,0), 24, 6},
289         {IPv4(8,1,1,0), 24, 7},
290 };
291
292 #define IPV4_L3FWD_NUM_ROUTES \
293         (sizeof(ipv4_l3fwd_route_array) / sizeof(ipv4_l3fwd_route_array[0]))
294
295 #define IPV4_L3FWD_LPM_MAX_RULES     1024
296
297 typedef struct rte_lpm lookup_struct_t;
298 static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS];
299 #endif
300
301 struct lcore_conf {
302         uint16_t n_rx_queue;
303         struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE];
304         uint16_t n_tx_port;
305         uint16_t tx_port_id[RTE_MAX_ETHPORTS];
306         uint16_t tx_queue_id[RTE_MAX_ETHPORTS];
307         struct rte_eth_dev_tx_buffer *tx_buffer[RTE_MAX_ETHPORTS];
308         lookup_struct_t * ipv4_lookup_struct;
309         lookup_struct_t * ipv6_lookup_struct;
310 } __rte_cache_aligned;
311
312 struct lcore_stats {
313         /* total sleep time in ms since last frequency scaling down */
314         uint32_t sleep_time;
315         /* number of long sleep recently */
316         uint32_t nb_long_sleep;
317         /* freq. scaling up trend */
318         uint32_t trend;
319         /* total packet processed recently */
320         uint64_t nb_rx_processed;
321         /* total iterations looped recently */
322         uint64_t nb_iteration_looped;
323         uint32_t padding[9];
324 } __rte_cache_aligned;
325
326 static struct lcore_conf lcore_conf[RTE_MAX_LCORE] __rte_cache_aligned;
327 static struct lcore_stats stats[RTE_MAX_LCORE] __rte_cache_aligned;
328 static struct rte_timer power_timers[RTE_MAX_LCORE];
329
330 static inline uint32_t power_idle_heuristic(uint32_t zero_rx_packet_count);
331 static inline enum freq_scale_hint_t power_freq_scaleup_heuristic( \
332                 unsigned int lcore_id, uint16_t port_id, uint16_t queue_id);
333
334 /* exit signal handler */
335 static void
336 signal_exit_now(int sigtype)
337 {
338         unsigned lcore_id;
339         unsigned int portid;
340         int ret;
341
342         if (sigtype == SIGINT) {
343                 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
344                         if (rte_lcore_is_enabled(lcore_id) == 0)
345                                 continue;
346
347                         /* init power management library */
348                         ret = rte_power_exit(lcore_id);
349                         if (ret)
350                                 rte_exit(EXIT_FAILURE, "Power management "
351                                         "library de-initialization failed on "
352                                                         "core%u\n", lcore_id);
353                 }
354
355                 RTE_ETH_FOREACH_DEV(portid) {
356                         if ((enabled_port_mask & (1 << portid)) == 0)
357                                 continue;
358
359                         rte_eth_dev_stop(portid);
360                         rte_eth_dev_close(portid);
361                 }
362         }
363
364         rte_exit(EXIT_SUCCESS, "User forced exit\n");
365 }
366
367 /*  Freqency scale down timer callback */
368 static void
369 power_timer_cb(__attribute__((unused)) struct rte_timer *tim,
370                           __attribute__((unused)) void *arg)
371 {
372         uint64_t hz;
373         float sleep_time_ratio;
374         unsigned lcore_id = rte_lcore_id();
375
376         /* accumulate total execution time in us when callback is invoked */
377         sleep_time_ratio = (float)(stats[lcore_id].sleep_time) /
378                                         (float)SCALING_PERIOD;
379         /**
380          * check whether need to scale down frequency a step if it sleep a lot.
381          */
382         if (sleep_time_ratio >= SCALING_DOWN_TIME_RATIO_THRESHOLD) {
383                 if (rte_power_freq_down)
384                         rte_power_freq_down(lcore_id);
385         }
386         else if ( (unsigned)(stats[lcore_id].nb_rx_processed /
387                 stats[lcore_id].nb_iteration_looped) < MAX_PKT_BURST) {
388                 /**
389                  * scale down a step if average packet per iteration less
390                  * than expectation.
391                  */
392                 if (rte_power_freq_down)
393                         rte_power_freq_down(lcore_id);
394         }
395
396         /**
397          * initialize another timer according to current frequency to ensure
398          * timer interval is relatively fixed.
399          */
400         hz = rte_get_timer_hz();
401         rte_timer_reset(&power_timers[lcore_id], hz/TIMER_NUMBER_PER_SECOND,
402                                 SINGLE, lcore_id, power_timer_cb, NULL);
403
404         stats[lcore_id].nb_rx_processed = 0;
405         stats[lcore_id].nb_iteration_looped = 0;
406
407         stats[lcore_id].sleep_time = 0;
408 }
409
410 /* Enqueue a single packet, and send burst if queue is filled */
411 static inline int
412 send_single_packet(struct rte_mbuf *m, uint16_t port)
413 {
414         uint32_t lcore_id;
415         struct lcore_conf *qconf;
416
417         lcore_id = rte_lcore_id();
418         qconf = &lcore_conf[lcore_id];
419
420         rte_eth_tx_buffer(port, qconf->tx_queue_id[port],
421                         qconf->tx_buffer[port], m);
422
423         return 0;
424 }
425
426 #ifdef DO_RFC_1812_CHECKS
427 static inline int
428 is_valid_ipv4_pkt(struct ipv4_hdr *pkt, uint32_t link_len)
429 {
430         /* From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2 */
431         /*
432          * 1. The packet length reported by the Link Layer must be large
433          * enough to hold the minimum length legal IP datagram (20 bytes).
434          */
435         if (link_len < sizeof(struct ipv4_hdr))
436                 return -1;
437
438         /* 2. The IP checksum must be correct. */
439         /* this is checked in H/W */
440
441         /*
442          * 3. The IP version number must be 4. If the version number is not 4
443          * then the packet may be another version of IP, such as IPng or
444          * ST-II.
445          */
446         if (((pkt->version_ihl) >> 4) != 4)
447                 return -3;
448         /*
449          * 4. The IP header length field must be large enough to hold the
450          * minimum length legal IP datagram (20 bytes = 5 words).
451          */
452         if ((pkt->version_ihl & 0xf) < 5)
453                 return -4;
454
455         /*
456          * 5. The IP total length field must be large enough to hold the IP
457          * datagram header, whose length is specified in the IP header length
458          * field.
459          */
460         if (rte_cpu_to_be_16(pkt->total_length) < sizeof(struct ipv4_hdr))
461                 return -5;
462
463         return 0;
464 }
465 #endif
466
467 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
468 static void
469 print_ipv4_key(struct ipv4_5tuple key)
470 {
471         printf("IP dst = %08x, IP src = %08x, port dst = %d, port src = %d, "
472                 "proto = %d\n", (unsigned)key.ip_dst, (unsigned)key.ip_src,
473                                 key.port_dst, key.port_src, key.proto);
474 }
475 static void
476 print_ipv6_key(struct ipv6_5tuple key)
477 {
478         printf( "IP dst = " IPv6_BYTES_FMT ", IP src = " IPv6_BYTES_FMT ", "
479                 "port dst = %d, port src = %d, proto = %d\n",
480                 IPv6_BYTES(key.ip_dst), IPv6_BYTES(key.ip_src),
481                 key.port_dst, key.port_src, key.proto);
482 }
483
484 static inline uint16_t
485 get_ipv4_dst_port(struct ipv4_hdr *ipv4_hdr, uint16_t portid,
486                 lookup_struct_t * ipv4_l3fwd_lookup_struct)
487 {
488         struct ipv4_5tuple key;
489         struct tcp_hdr *tcp;
490         struct udp_hdr *udp;
491         int ret = 0;
492
493         key.ip_dst = rte_be_to_cpu_32(ipv4_hdr->dst_addr);
494         key.ip_src = rte_be_to_cpu_32(ipv4_hdr->src_addr);
495         key.proto = ipv4_hdr->next_proto_id;
496
497         switch (ipv4_hdr->next_proto_id) {
498         case IPPROTO_TCP:
499                 tcp = (struct tcp_hdr *)((unsigned char *)ipv4_hdr +
500                                         sizeof(struct ipv4_hdr));
501                 key.port_dst = rte_be_to_cpu_16(tcp->dst_port);
502                 key.port_src = rte_be_to_cpu_16(tcp->src_port);
503                 break;
504
505         case IPPROTO_UDP:
506                 udp = (struct udp_hdr *)((unsigned char *)ipv4_hdr +
507                                         sizeof(struct ipv4_hdr));
508                 key.port_dst = rte_be_to_cpu_16(udp->dst_port);
509                 key.port_src = rte_be_to_cpu_16(udp->src_port);
510                 break;
511
512         default:
513                 key.port_dst = 0;
514                 key.port_src = 0;
515                 break;
516         }
517
518         /* Find destination port */
519         ret = rte_hash_lookup(ipv4_l3fwd_lookup_struct, (const void *)&key);
520         return ((ret < 0) ? portid : ipv4_l3fwd_out_if[ret]);
521 }
522
523 static inline uint16_t
524 get_ipv6_dst_port(struct ipv6_hdr *ipv6_hdr, uint16_t portid,
525                         lookup_struct_t *ipv6_l3fwd_lookup_struct)
526 {
527         struct ipv6_5tuple key;
528         struct tcp_hdr *tcp;
529         struct udp_hdr *udp;
530         int ret = 0;
531
532         memcpy(key.ip_dst, ipv6_hdr->dst_addr, IPV6_ADDR_LEN);
533         memcpy(key.ip_src, ipv6_hdr->src_addr, IPV6_ADDR_LEN);
534
535         key.proto = ipv6_hdr->proto;
536
537         switch (ipv6_hdr->proto) {
538         case IPPROTO_TCP:
539                 tcp = (struct tcp_hdr *)((unsigned char *) ipv6_hdr +
540                                         sizeof(struct ipv6_hdr));
541                 key.port_dst = rte_be_to_cpu_16(tcp->dst_port);
542                 key.port_src = rte_be_to_cpu_16(tcp->src_port);
543                 break;
544
545         case IPPROTO_UDP:
546                 udp = (struct udp_hdr *)((unsigned char *) ipv6_hdr +
547                                         sizeof(struct ipv6_hdr));
548                 key.port_dst = rte_be_to_cpu_16(udp->dst_port);
549                 key.port_src = rte_be_to_cpu_16(udp->src_port);
550                 break;
551
552         default:
553                 key.port_dst = 0;
554                 key.port_src = 0;
555                 break;
556         }
557
558         /* Find destination port */
559         ret = rte_hash_lookup(ipv6_l3fwd_lookup_struct, (const void *)&key);
560         return ((ret < 0) ? portid : ipv6_l3fwd_out_if[ret]);
561 }
562 #endif
563
564 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
565 static inline uint16_t
566 get_ipv4_dst_port(struct ipv4_hdr *ipv4_hdr, uint16_t portid,
567                 lookup_struct_t *ipv4_l3fwd_lookup_struct)
568 {
569         uint32_t next_hop;
570
571         return ((rte_lpm_lookup(ipv4_l3fwd_lookup_struct,
572                         rte_be_to_cpu_32(ipv4_hdr->dst_addr), &next_hop) == 0)?
573                         next_hop : portid);
574 }
575 #endif
576
577 static inline void
578 parse_ptype_one(struct rte_mbuf *m)
579 {
580         struct ether_hdr *eth_hdr;
581         uint32_t packet_type = RTE_PTYPE_UNKNOWN;
582         uint16_t ether_type;
583
584         eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
585         ether_type = eth_hdr->ether_type;
586         if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
587                 packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
588         else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv6))
589                 packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
590
591         m->packet_type = packet_type;
592 }
593
594 static uint16_t
595 cb_parse_ptype(uint16_t port __rte_unused, uint16_t queue __rte_unused,
596                struct rte_mbuf *pkts[], uint16_t nb_pkts,
597                uint16_t max_pkts __rte_unused,
598                void *user_param __rte_unused)
599 {
600         unsigned int i;
601
602         for (i = 0; i < nb_pkts; ++i)
603                 parse_ptype_one(pkts[i]);
604
605         return nb_pkts;
606 }
607
608 static int
609 add_cb_parse_ptype(uint16_t portid, uint16_t queueid)
610 {
611         printf("Port %d: softly parse packet type info\n", portid);
612         if (rte_eth_add_rx_callback(portid, queueid, cb_parse_ptype, NULL))
613                 return 0;
614
615         printf("Failed to add rx callback: port=%d\n", portid);
616         return -1;
617 }
618
619 static inline void
620 l3fwd_simple_forward(struct rte_mbuf *m, uint16_t portid,
621                                 struct lcore_conf *qconf)
622 {
623         struct ether_hdr *eth_hdr;
624         struct ipv4_hdr *ipv4_hdr;
625         void *d_addr_bytes;
626         uint16_t dst_port;
627
628         eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
629
630         if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) {
631                 /* Handle IPv4 headers.*/
632                 ipv4_hdr =
633                         rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *,
634                                                 sizeof(struct ether_hdr));
635
636 #ifdef DO_RFC_1812_CHECKS
637                 /* Check to make sure the packet is valid (RFC1812) */
638                 if (is_valid_ipv4_pkt(ipv4_hdr, m->pkt_len) < 0) {
639                         rte_pktmbuf_free(m);
640                         return;
641                 }
642 #endif
643
644                 dst_port = get_ipv4_dst_port(ipv4_hdr, portid,
645                                         qconf->ipv4_lookup_struct);
646                 if (dst_port >= RTE_MAX_ETHPORTS ||
647                                 (enabled_port_mask & 1 << dst_port) == 0)
648                         dst_port = portid;
649
650                 /* 02:00:00:00:00:xx */
651                 d_addr_bytes = &eth_hdr->d_addr.addr_bytes[0];
652                 *((uint64_t *)d_addr_bytes) =
653                         0x000000000002 + ((uint64_t)dst_port << 40);
654
655 #ifdef DO_RFC_1812_CHECKS
656                 /* Update time to live and header checksum */
657                 --(ipv4_hdr->time_to_live);
658                 ++(ipv4_hdr->hdr_checksum);
659 #endif
660
661                 /* src addr */
662                 ether_addr_copy(&ports_eth_addr[dst_port], &eth_hdr->s_addr);
663
664                 send_single_packet(m, dst_port);
665         } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) {
666                 /* Handle IPv6 headers.*/
667 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
668                 struct ipv6_hdr *ipv6_hdr;
669
670                 ipv6_hdr =
671                         rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *,
672                                                 sizeof(struct ether_hdr));
673
674                 dst_port = get_ipv6_dst_port(ipv6_hdr, portid,
675                                         qconf->ipv6_lookup_struct);
676
677                 if (dst_port >= RTE_MAX_ETHPORTS ||
678                                 (enabled_port_mask & 1 << dst_port) == 0)
679                         dst_port = portid;
680
681                 /* 02:00:00:00:00:xx */
682                 d_addr_bytes = &eth_hdr->d_addr.addr_bytes[0];
683                 *((uint64_t *)d_addr_bytes) =
684                         0x000000000002 + ((uint64_t)dst_port << 40);
685
686                 /* src addr */
687                 ether_addr_copy(&ports_eth_addr[dst_port], &eth_hdr->s_addr);
688
689                 send_single_packet(m, dst_port);
690 #else
691                 /* We don't currently handle IPv6 packets in LPM mode. */
692                 rte_pktmbuf_free(m);
693 #endif
694         } else
695                 rte_pktmbuf_free(m);
696
697 }
698
699 #define MINIMUM_SLEEP_TIME         1
700 #define SUSPEND_THRESHOLD          300
701
702 static inline uint32_t
703 power_idle_heuristic(uint32_t zero_rx_packet_count)
704 {
705         /* If zero count is less than 100,  sleep 1us */
706         if (zero_rx_packet_count < SUSPEND_THRESHOLD)
707                 return MINIMUM_SLEEP_TIME;
708         /* If zero count is less than 1000, sleep 100 us which is the
709                 minimum latency switching from C3/C6 to C0
710         */
711         else
712                 return SUSPEND_THRESHOLD;
713 }
714
715 static inline enum freq_scale_hint_t
716 power_freq_scaleup_heuristic(unsigned lcore_id,
717                              uint16_t port_id,
718                              uint16_t queue_id)
719 {
720         uint32_t rxq_count = rte_eth_rx_queue_count(port_id, queue_id);
721 /**
722  * HW Rx queue size is 128 by default, Rx burst read at maximum 32 entries
723  * per iteration
724  */
725 #define FREQ_GEAR1_RX_PACKET_THRESHOLD             MAX_PKT_BURST
726 #define FREQ_GEAR2_RX_PACKET_THRESHOLD             (MAX_PKT_BURST*2)
727 #define FREQ_GEAR3_RX_PACKET_THRESHOLD             (MAX_PKT_BURST*3)
728 #define FREQ_UP_TREND1_ACC   1
729 #define FREQ_UP_TREND2_ACC   100
730 #define FREQ_UP_THRESHOLD    10000
731
732         if (likely(rxq_count > FREQ_GEAR3_RX_PACKET_THRESHOLD)) {
733                 stats[lcore_id].trend = 0;
734                 return FREQ_HIGHEST;
735         } else if (likely(rxq_count > FREQ_GEAR2_RX_PACKET_THRESHOLD))
736                 stats[lcore_id].trend += FREQ_UP_TREND2_ACC;
737         else if (likely(rxq_count > FREQ_GEAR1_RX_PACKET_THRESHOLD))
738                 stats[lcore_id].trend += FREQ_UP_TREND1_ACC;
739
740         if (likely(stats[lcore_id].trend > FREQ_UP_THRESHOLD)) {
741                 stats[lcore_id].trend = 0;
742                 return FREQ_HIGHER;
743         }
744
745         return FREQ_CURRENT;
746 }
747
748 /**
749  * force polling thread sleep until one-shot rx interrupt triggers
750  * @param port_id
751  *  Port id.
752  * @param queue_id
753  *  Rx queue id.
754  * @return
755  *  0 on success
756  */
757 static int
758 sleep_until_rx_interrupt(int num)
759 {
760         struct rte_epoll_event event[num];
761         int n, i;
762         uint16_t port_id;
763         uint8_t queue_id;
764         void *data;
765
766         RTE_LOG(INFO, L3FWD_POWER,
767                 "lcore %u sleeps until interrupt triggers\n",
768                 rte_lcore_id());
769
770         n = rte_epoll_wait(RTE_EPOLL_PER_THREAD, event, num, -1);
771         for (i = 0; i < n; i++) {
772                 data = event[i].epdata.data;
773                 port_id = ((uintptr_t)data) >> CHAR_BIT;
774                 queue_id = ((uintptr_t)data) &
775                         RTE_LEN2MASK(CHAR_BIT, uint8_t);
776                 rte_eth_dev_rx_intr_disable(port_id, queue_id);
777                 RTE_LOG(INFO, L3FWD_POWER,
778                         "lcore %u is waked up from rx interrupt on"
779                         " port %d queue %d\n",
780                         rte_lcore_id(), port_id, queue_id);
781         }
782
783         return 0;
784 }
785
786 static void turn_on_intr(struct lcore_conf *qconf)
787 {
788         int i;
789         struct lcore_rx_queue *rx_queue;
790         uint8_t queue_id;
791         uint16_t port_id;
792
793         for (i = 0; i < qconf->n_rx_queue; ++i) {
794                 rx_queue = &(qconf->rx_queue_list[i]);
795                 port_id = rx_queue->port_id;
796                 queue_id = rx_queue->queue_id;
797
798                 rte_spinlock_lock(&(locks[port_id]));
799                 rte_eth_dev_rx_intr_enable(port_id, queue_id);
800                 rte_spinlock_unlock(&(locks[port_id]));
801         }
802 }
803
804 static int event_register(struct lcore_conf *qconf)
805 {
806         struct lcore_rx_queue *rx_queue;
807         uint8_t queueid;
808         uint16_t portid;
809         uint32_t data;
810         int ret;
811         int i;
812
813         for (i = 0; i < qconf->n_rx_queue; ++i) {
814                 rx_queue = &(qconf->rx_queue_list[i]);
815                 portid = rx_queue->port_id;
816                 queueid = rx_queue->queue_id;
817                 data = portid << CHAR_BIT | queueid;
818
819                 ret = rte_eth_dev_rx_intr_ctl_q(portid, queueid,
820                                                 RTE_EPOLL_PER_THREAD,
821                                                 RTE_INTR_EVENT_ADD,
822                                                 (void *)((uintptr_t)data));
823                 if (ret)
824                         return ret;
825         }
826
827         return 0;
828 }
829
830 /* main processing loop */
831 static int
832 main_loop(__attribute__((unused)) void *dummy)
833 {
834         struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
835         unsigned lcore_id;
836         uint64_t prev_tsc, diff_tsc, cur_tsc, tim_res_tsc, hz;
837         uint64_t prev_tsc_power = 0, cur_tsc_power, diff_tsc_power;
838         int i, j, nb_rx;
839         uint8_t queueid;
840         uint16_t portid;
841         struct lcore_conf *qconf;
842         struct lcore_rx_queue *rx_queue;
843         enum freq_scale_hint_t lcore_scaleup_hint;
844         uint32_t lcore_rx_idle_count = 0;
845         uint32_t lcore_idle_hint = 0;
846         int intr_en = 0;
847
848         const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US;
849
850         prev_tsc = 0;
851         hz = rte_get_timer_hz();
852         tim_res_tsc = hz/TIMER_NUMBER_PER_SECOND;
853
854         lcore_id = rte_lcore_id();
855         qconf = &lcore_conf[lcore_id];
856
857         if (qconf->n_rx_queue == 0) {
858                 RTE_LOG(INFO, L3FWD_POWER, "lcore %u has nothing to do\n", lcore_id);
859                 return 0;
860         }
861
862         RTE_LOG(INFO, L3FWD_POWER, "entering main loop on lcore %u\n", lcore_id);
863
864         for (i = 0; i < qconf->n_rx_queue; i++) {
865                 portid = qconf->rx_queue_list[i].port_id;
866                 queueid = qconf->rx_queue_list[i].queue_id;
867                 RTE_LOG(INFO, L3FWD_POWER, " -- lcoreid=%u portid=%u "
868                         "rxqueueid=%hhu\n", lcore_id, portid, queueid);
869         }
870
871         /* add into event wait list */
872         if (event_register(qconf) == 0)
873                 intr_en = 1;
874         else
875                 RTE_LOG(INFO, L3FWD_POWER, "RX interrupt won't enable.\n");
876
877         while (1) {
878                 stats[lcore_id].nb_iteration_looped++;
879
880                 cur_tsc = rte_rdtsc();
881                 cur_tsc_power = cur_tsc;
882
883                 /*
884                  * TX burst queue drain
885                  */
886                 diff_tsc = cur_tsc - prev_tsc;
887                 if (unlikely(diff_tsc > drain_tsc)) {
888                         for (i = 0; i < qconf->n_tx_port; ++i) {
889                                 portid = qconf->tx_port_id[i];
890                                 rte_eth_tx_buffer_flush(portid,
891                                                 qconf->tx_queue_id[portid],
892                                                 qconf->tx_buffer[portid]);
893                         }
894                         prev_tsc = cur_tsc;
895                 }
896
897                 diff_tsc_power = cur_tsc_power - prev_tsc_power;
898                 if (diff_tsc_power > tim_res_tsc) {
899                         rte_timer_manage();
900                         prev_tsc_power = cur_tsc_power;
901                 }
902
903 start_rx:
904                 /*
905                  * Read packet from RX queues
906                  */
907                 lcore_scaleup_hint = FREQ_CURRENT;
908                 lcore_rx_idle_count = 0;
909                 for (i = 0; i < qconf->n_rx_queue; ++i) {
910                         rx_queue = &(qconf->rx_queue_list[i]);
911                         rx_queue->idle_hint = 0;
912                         portid = rx_queue->port_id;
913                         queueid = rx_queue->queue_id;
914
915                         nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst,
916                                                                 MAX_PKT_BURST);
917
918                         stats[lcore_id].nb_rx_processed += nb_rx;
919                         if (unlikely(nb_rx == 0)) {
920                                 /**
921                                  * no packet received from rx queue, try to
922                                  * sleep for a while forcing CPU enter deeper
923                                  * C states.
924                                  */
925                                 rx_queue->zero_rx_packet_count++;
926
927                                 if (rx_queue->zero_rx_packet_count <=
928                                                         MIN_ZERO_POLL_COUNT)
929                                         continue;
930
931                                 rx_queue->idle_hint = power_idle_heuristic(\
932                                         rx_queue->zero_rx_packet_count);
933                                 lcore_rx_idle_count++;
934                         } else {
935                                 rx_queue->zero_rx_packet_count = 0;
936
937                                 /**
938                                  * do not scale up frequency immediately as
939                                  * user to kernel space communication is costly
940                                  * which might impact packet I/O for received
941                                  * packets.
942                                  */
943                                 rx_queue->freq_up_hint =
944                                         power_freq_scaleup_heuristic(lcore_id,
945                                                         portid, queueid);
946                         }
947
948                         /* Prefetch first packets */
949                         for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) {
950                                 rte_prefetch0(rte_pktmbuf_mtod(
951                                                 pkts_burst[j], void *));
952                         }
953
954                         /* Prefetch and forward already prefetched packets */
955                         for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) {
956                                 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[
957                                                 j + PREFETCH_OFFSET], void *));
958                                 l3fwd_simple_forward(pkts_burst[j], portid,
959                                                                 qconf);
960                         }
961
962                         /* Forward remaining prefetched packets */
963                         for (; j < nb_rx; j++) {
964                                 l3fwd_simple_forward(pkts_burst[j], portid,
965                                                                 qconf);
966                         }
967                 }
968
969                 if (likely(lcore_rx_idle_count != qconf->n_rx_queue)) {
970                         for (i = 1, lcore_scaleup_hint =
971                                 qconf->rx_queue_list[0].freq_up_hint;
972                                         i < qconf->n_rx_queue; ++i) {
973                                 rx_queue = &(qconf->rx_queue_list[i]);
974                                 if (rx_queue->freq_up_hint >
975                                                 lcore_scaleup_hint)
976                                         lcore_scaleup_hint =
977                                                 rx_queue->freq_up_hint;
978                         }
979
980                         if (lcore_scaleup_hint == FREQ_HIGHEST) {
981                                 if (rte_power_freq_max)
982                                         rte_power_freq_max(lcore_id);
983                         } else if (lcore_scaleup_hint == FREQ_HIGHER) {
984                                 if (rte_power_freq_up)
985                                         rte_power_freq_up(lcore_id);
986                         }
987                 } else {
988                         /**
989                          * All Rx queues empty in recent consecutive polls,
990                          * sleep in a conservative manner, meaning sleep as
991                          * less as possible.
992                          */
993                         for (i = 1, lcore_idle_hint =
994                                 qconf->rx_queue_list[0].idle_hint;
995                                         i < qconf->n_rx_queue; ++i) {
996                                 rx_queue = &(qconf->rx_queue_list[i]);
997                                 if (rx_queue->idle_hint < lcore_idle_hint)
998                                         lcore_idle_hint = rx_queue->idle_hint;
999                         }
1000
1001                         if (lcore_idle_hint < SUSPEND_THRESHOLD)
1002                                 /**
1003                                  * execute "pause" instruction to avoid context
1004                                  * switch which generally take hundred of
1005                                  * microseconds for short sleep.
1006                                  */
1007                                 rte_delay_us(lcore_idle_hint);
1008                         else {
1009                                 /* suspend until rx interrupt trigges */
1010                                 if (intr_en) {
1011                                         turn_on_intr(qconf);
1012                                         sleep_until_rx_interrupt(
1013                                                 qconf->n_rx_queue);
1014                                         /**
1015                                          * start receiving packets immediately
1016                                          */
1017                                         goto start_rx;
1018                                 }
1019                         }
1020                         stats[lcore_id].sleep_time += lcore_idle_hint;
1021                 }
1022         }
1023 }
1024
1025 static int
1026 check_lcore_params(void)
1027 {
1028         uint8_t queue, lcore;
1029         uint16_t i;
1030         int socketid;
1031
1032         for (i = 0; i < nb_lcore_params; ++i) {
1033                 queue = lcore_params[i].queue_id;
1034                 if (queue >= MAX_RX_QUEUE_PER_PORT) {
1035                         printf("invalid queue number: %hhu\n", queue);
1036                         return -1;
1037                 }
1038                 lcore = lcore_params[i].lcore_id;
1039                 if (!rte_lcore_is_enabled(lcore)) {
1040                         printf("error: lcore %hhu is not enabled in lcore "
1041                                                         "mask\n", lcore);
1042                         return -1;
1043                 }
1044                 if ((socketid = rte_lcore_to_socket_id(lcore) != 0) &&
1045                                                         (numa_on == 0)) {
1046                         printf("warning: lcore %hhu is on socket %d with numa "
1047                                                 "off\n", lcore, socketid);
1048                 }
1049         }
1050         return 0;
1051 }
1052
1053 static int
1054 check_port_config(void)
1055 {
1056         unsigned portid;
1057         uint16_t i;
1058
1059         for (i = 0; i < nb_lcore_params; ++i) {
1060                 portid = lcore_params[i].port_id;
1061                 if ((enabled_port_mask & (1 << portid)) == 0) {
1062                         printf("port %u is not enabled in port mask\n",
1063                                                                 portid);
1064                         return -1;
1065                 }
1066                 if (!rte_eth_dev_is_valid_port(portid)) {
1067                         printf("port %u is not present on the board\n",
1068                                                                 portid);
1069                         return -1;
1070                 }
1071         }
1072         return 0;
1073 }
1074
1075 static uint8_t
1076 get_port_n_rx_queues(const uint16_t port)
1077 {
1078         int queue = -1;
1079         uint16_t i;
1080
1081         for (i = 0; i < nb_lcore_params; ++i) {
1082                 if (lcore_params[i].port_id == port &&
1083                                 lcore_params[i].queue_id > queue)
1084                         queue = lcore_params[i].queue_id;
1085         }
1086         return (uint8_t)(++queue);
1087 }
1088
1089 static int
1090 init_lcore_rx_queues(void)
1091 {
1092         uint16_t i, nb_rx_queue;
1093         uint8_t lcore;
1094
1095         for (i = 0; i < nb_lcore_params; ++i) {
1096                 lcore = lcore_params[i].lcore_id;
1097                 nb_rx_queue = lcore_conf[lcore].n_rx_queue;
1098                 if (nb_rx_queue >= MAX_RX_QUEUE_PER_LCORE) {
1099                         printf("error: too many queues (%u) for lcore: %u\n",
1100                                 (unsigned)nb_rx_queue + 1, (unsigned)lcore);
1101                         return -1;
1102                 } else {
1103                         lcore_conf[lcore].rx_queue_list[nb_rx_queue].port_id =
1104                                 lcore_params[i].port_id;
1105                         lcore_conf[lcore].rx_queue_list[nb_rx_queue].queue_id =
1106                                 lcore_params[i].queue_id;
1107                         lcore_conf[lcore].n_rx_queue++;
1108                 }
1109         }
1110         return 0;
1111 }
1112
1113 /* display usage */
1114 static void
1115 print_usage(const char *prgname)
1116 {
1117         printf ("%s [EAL options] -- -p PORTMASK -P"
1118                 "  [--config (port,queue,lcore)[,(port,queue,lcore]]"
1119                 "  [--high-perf-cores CORELIST"
1120                 "  [--perf-config (port,queue,hi_perf,lcore_index)[,(port,queue,hi_perf,lcore_index]]"
1121                 "  [--enable-jumbo [--max-pkt-len PKTLEN]]\n"
1122                 "  -p PORTMASK: hexadecimal bitmask of ports to configure\n"
1123                 "  -P : enable promiscuous mode\n"
1124                 "  --config (port,queue,lcore): rx queues configuration\n"
1125                 "  --high-perf-cores CORELIST: list of high performance cores\n"
1126                 "  --perf-config: similar as config, cores specified as indices"
1127                 " for bins containing high or regular performance cores\n"
1128                 "  --no-numa: optional, disable numa awareness\n"
1129                 "  --enable-jumbo: enable jumbo frame"
1130                 " which max packet len is PKTLEN in decimal (64-9600)\n"
1131                 "  --parse-ptype: parse packet type by software\n",
1132                 prgname);
1133 }
1134
1135 static int parse_max_pkt_len(const char *pktlen)
1136 {
1137         char *end = NULL;
1138         unsigned long len;
1139
1140         /* parse decimal string */
1141         len = strtoul(pktlen, &end, 10);
1142         if ((pktlen[0] == '\0') || (end == NULL) || (*end != '\0'))
1143                 return -1;
1144
1145         if (len == 0)
1146                 return -1;
1147
1148         return len;
1149 }
1150
1151 static int
1152 parse_portmask(const char *portmask)
1153 {
1154         char *end = NULL;
1155         unsigned long pm;
1156
1157         /* parse hexadecimal string */
1158         pm = strtoul(portmask, &end, 16);
1159         if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
1160                 return -1;
1161
1162         if (pm == 0)
1163                 return -1;
1164
1165         return pm;
1166 }
1167
1168 static int
1169 parse_config(const char *q_arg)
1170 {
1171         char s[256];
1172         const char *p, *p0 = q_arg;
1173         char *end;
1174         enum fieldnames {
1175                 FLD_PORT = 0,
1176                 FLD_QUEUE,
1177                 FLD_LCORE,
1178                 _NUM_FLD
1179         };
1180         unsigned long int_fld[_NUM_FLD];
1181         char *str_fld[_NUM_FLD];
1182         int i;
1183         unsigned size;
1184
1185         nb_lcore_params = 0;
1186
1187         while ((p = strchr(p0,'(')) != NULL) {
1188                 ++p;
1189                 if((p0 = strchr(p,')')) == NULL)
1190                         return -1;
1191
1192                 size = p0 - p;
1193                 if(size >= sizeof(s))
1194                         return -1;
1195
1196                 snprintf(s, sizeof(s), "%.*s", size, p);
1197                 if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') !=
1198                                                                 _NUM_FLD)
1199                         return -1;
1200                 for (i = 0; i < _NUM_FLD; i++){
1201                         errno = 0;
1202                         int_fld[i] = strtoul(str_fld[i], &end, 0);
1203                         if (errno != 0 || end == str_fld[i] || int_fld[i] >
1204                                                                         255)
1205                                 return -1;
1206                 }
1207                 if (nb_lcore_params >= MAX_LCORE_PARAMS) {
1208                         printf("exceeded max number of lcore params: %hu\n",
1209                                 nb_lcore_params);
1210                         return -1;
1211                 }
1212                 lcore_params_array[nb_lcore_params].port_id =
1213                                 (uint8_t)int_fld[FLD_PORT];
1214                 lcore_params_array[nb_lcore_params].queue_id =
1215                                 (uint8_t)int_fld[FLD_QUEUE];
1216                 lcore_params_array[nb_lcore_params].lcore_id =
1217                                 (uint8_t)int_fld[FLD_LCORE];
1218                 ++nb_lcore_params;
1219         }
1220         lcore_params = lcore_params_array;
1221
1222         return 0;
1223 }
1224
1225 #define CMD_LINE_OPT_PARSE_PTYPE "parse-ptype"
1226
1227 /* Parse the argument given in the command line of the application */
1228 static int
1229 parse_args(int argc, char **argv)
1230 {
1231         int opt, ret;
1232         char **argvopt;
1233         int option_index;
1234         char *prgname = argv[0];
1235         static struct option lgopts[] = {
1236                 {"config", 1, 0, 0},
1237                 {"perf-config", 1, 0, 0},
1238                 {"high-perf-cores", 1, 0, 0},
1239                 {"no-numa", 0, 0, 0},
1240                 {"enable-jumbo", 0, 0, 0},
1241                 {CMD_LINE_OPT_PARSE_PTYPE, 0, 0, 0},
1242                 {NULL, 0, 0, 0}
1243         };
1244
1245         argvopt = argv;
1246
1247         while ((opt = getopt_long(argc, argvopt, "p:P",
1248                                 lgopts, &option_index)) != EOF) {
1249
1250                 switch (opt) {
1251                 /* portmask */
1252                 case 'p':
1253                         enabled_port_mask = parse_portmask(optarg);
1254                         if (enabled_port_mask == 0) {
1255                                 printf("invalid portmask\n");
1256                                 print_usage(prgname);
1257                                 return -1;
1258                         }
1259                         break;
1260                 case 'P':
1261                         printf("Promiscuous mode selected\n");
1262                         promiscuous_on = 1;
1263                         break;
1264
1265                 /* long options */
1266                 case 0:
1267                         if (!strncmp(lgopts[option_index].name, "config", 6)) {
1268                                 ret = parse_config(optarg);
1269                                 if (ret) {
1270                                         printf("invalid config\n");
1271                                         print_usage(prgname);
1272                                         return -1;
1273                                 }
1274                         }
1275
1276                         if (!strncmp(lgopts[option_index].name,
1277                                         "perf-config", 11)) {
1278                                 ret = parse_perf_config(optarg);
1279                                 if (ret) {
1280                                         printf("invalid perf-config\n");
1281                                         print_usage(prgname);
1282                                         return -1;
1283                                 }
1284                         }
1285
1286                         if (!strncmp(lgopts[option_index].name,
1287                                         "high-perf-cores", 15)) {
1288                                 ret = parse_perf_core_list(optarg);
1289                                 if (ret) {
1290                                         printf("invalid high-perf-cores\n");
1291                                         print_usage(prgname);
1292                                         return -1;
1293                                 }
1294                         }
1295
1296                         if (!strncmp(lgopts[option_index].name,
1297                                                 "no-numa", 7)) {
1298                                 printf("numa is disabled \n");
1299                                 numa_on = 0;
1300                         }
1301
1302                         if (!strncmp(lgopts[option_index].name,
1303                                         "enable-jumbo", 12)) {
1304                                 struct option lenopts =
1305                                         {"max-pkt-len", required_argument, \
1306                                                                         0, 0};
1307
1308                                 printf("jumbo frame is enabled \n");
1309                                 port_conf.rxmode.offloads |=
1310                                                 DEV_RX_OFFLOAD_JUMBO_FRAME;
1311                                 port_conf.txmode.offloads |=
1312                                                 DEV_TX_OFFLOAD_MULTI_SEGS;
1313
1314                                 /**
1315                                  * if no max-pkt-len set, use the default value
1316                                  * ETHER_MAX_LEN
1317                                  */
1318                                 if (0 == getopt_long(argc, argvopt, "",
1319                                                 &lenopts, &option_index)) {
1320                                         ret = parse_max_pkt_len(optarg);
1321                                         if ((ret < 64) ||
1322                                                 (ret > MAX_JUMBO_PKT_LEN)){
1323                                                 printf("invalid packet "
1324                                                                 "length\n");
1325                                                 print_usage(prgname);
1326                                                 return -1;
1327                                         }
1328                                         port_conf.rxmode.max_rx_pkt_len = ret;
1329                                 }
1330                                 printf("set jumbo frame "
1331                                         "max packet length to %u\n",
1332                                 (unsigned int)port_conf.rxmode.max_rx_pkt_len);
1333                         }
1334
1335                         if (!strncmp(lgopts[option_index].name,
1336                                      CMD_LINE_OPT_PARSE_PTYPE,
1337                                      sizeof(CMD_LINE_OPT_PARSE_PTYPE))) {
1338                                 printf("soft parse-ptype is enabled\n");
1339                                 parse_ptype = 1;
1340                         }
1341
1342                         break;
1343
1344                 default:
1345                         print_usage(prgname);
1346                         return -1;
1347                 }
1348         }
1349
1350         if (optind >= 0)
1351                 argv[optind-1] = prgname;
1352
1353         ret = optind-1;
1354         optind = 1; /* reset getopt lib */
1355         return ret;
1356 }
1357
1358 static void
1359 print_ethaddr(const char *name, const struct ether_addr *eth_addr)
1360 {
1361         char buf[ETHER_ADDR_FMT_SIZE];
1362         ether_format_addr(buf, ETHER_ADDR_FMT_SIZE, eth_addr);
1363         printf("%s%s", name, buf);
1364 }
1365
1366 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
1367 static void
1368 setup_hash(int socketid)
1369 {
1370         struct rte_hash_parameters ipv4_l3fwd_hash_params = {
1371                 .name = NULL,
1372                 .entries = L3FWD_HASH_ENTRIES,
1373                 .key_len = sizeof(struct ipv4_5tuple),
1374                 .hash_func = DEFAULT_HASH_FUNC,
1375                 .hash_func_init_val = 0,
1376         };
1377
1378         struct rte_hash_parameters ipv6_l3fwd_hash_params = {
1379                 .name = NULL,
1380                 .entries = L3FWD_HASH_ENTRIES,
1381                 .key_len = sizeof(struct ipv6_5tuple),
1382                 .hash_func = DEFAULT_HASH_FUNC,
1383                 .hash_func_init_val = 0,
1384         };
1385
1386         unsigned i;
1387         int ret;
1388         char s[64];
1389
1390         /* create ipv4 hash */
1391         snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid);
1392         ipv4_l3fwd_hash_params.name = s;
1393         ipv4_l3fwd_hash_params.socket_id = socketid;
1394         ipv4_l3fwd_lookup_struct[socketid] =
1395                 rte_hash_create(&ipv4_l3fwd_hash_params);
1396         if (ipv4_l3fwd_lookup_struct[socketid] == NULL)
1397                 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on "
1398                                 "socket %d\n", socketid);
1399
1400         /* create ipv6 hash */
1401         snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid);
1402         ipv6_l3fwd_hash_params.name = s;
1403         ipv6_l3fwd_hash_params.socket_id = socketid;
1404         ipv6_l3fwd_lookup_struct[socketid] =
1405                 rte_hash_create(&ipv6_l3fwd_hash_params);
1406         if (ipv6_l3fwd_lookup_struct[socketid] == NULL)
1407                 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on "
1408                                 "socket %d\n", socketid);
1409
1410
1411         /* populate the ipv4 hash */
1412         for (i = 0; i < IPV4_L3FWD_NUM_ROUTES; i++) {
1413                 ret = rte_hash_add_key (ipv4_l3fwd_lookup_struct[socketid],
1414                                 (void *) &ipv4_l3fwd_route_array[i].key);
1415                 if (ret < 0) {
1416                         rte_exit(EXIT_FAILURE, "Unable to add entry %u to the"
1417                                 "l3fwd hash on socket %d\n", i, socketid);
1418                 }
1419                 ipv4_l3fwd_out_if[ret] = ipv4_l3fwd_route_array[i].if_out;
1420                 printf("Hash: Adding key\n");
1421                 print_ipv4_key(ipv4_l3fwd_route_array[i].key);
1422         }
1423
1424         /* populate the ipv6 hash */
1425         for (i = 0; i < IPV6_L3FWD_NUM_ROUTES; i++) {
1426                 ret = rte_hash_add_key (ipv6_l3fwd_lookup_struct[socketid],
1427                                 (void *) &ipv6_l3fwd_route_array[i].key);
1428                 if (ret < 0) {
1429                         rte_exit(EXIT_FAILURE, "Unable to add entry %u to the"
1430                                 "l3fwd hash on socket %d\n", i, socketid);
1431                 }
1432                 ipv6_l3fwd_out_if[ret] = ipv6_l3fwd_route_array[i].if_out;
1433                 printf("Hash: Adding key\n");
1434                 print_ipv6_key(ipv6_l3fwd_route_array[i].key);
1435         }
1436 }
1437 #endif
1438
1439 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
1440 static void
1441 setup_lpm(int socketid)
1442 {
1443         unsigned i;
1444         int ret;
1445         char s[64];
1446
1447         /* create the LPM table */
1448         struct rte_lpm_config lpm_ipv4_config;
1449
1450         lpm_ipv4_config.max_rules = IPV4_L3FWD_LPM_MAX_RULES;
1451         lpm_ipv4_config.number_tbl8s = 256;
1452         lpm_ipv4_config.flags = 0;
1453
1454         snprintf(s, sizeof(s), "IPV4_L3FWD_LPM_%d", socketid);
1455         ipv4_l3fwd_lookup_struct[socketid] =
1456                         rte_lpm_create(s, socketid, &lpm_ipv4_config);
1457         if (ipv4_l3fwd_lookup_struct[socketid] == NULL)
1458                 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd LPM table"
1459                                 " on socket %d\n", socketid);
1460
1461         /* populate the LPM table */
1462         for (i = 0; i < IPV4_L3FWD_NUM_ROUTES; i++) {
1463                 ret = rte_lpm_add(ipv4_l3fwd_lookup_struct[socketid],
1464                         ipv4_l3fwd_route_array[i].ip,
1465                         ipv4_l3fwd_route_array[i].depth,
1466                         ipv4_l3fwd_route_array[i].if_out);
1467
1468                 if (ret < 0) {
1469                         rte_exit(EXIT_FAILURE, "Unable to add entry %u to the "
1470                                 "l3fwd LPM table on socket %d\n",
1471                                 i, socketid);
1472                 }
1473
1474                 printf("LPM: Adding route 0x%08x / %d (%d)\n",
1475                         (unsigned)ipv4_l3fwd_route_array[i].ip,
1476                         ipv4_l3fwd_route_array[i].depth,
1477                         ipv4_l3fwd_route_array[i].if_out);
1478         }
1479 }
1480 #endif
1481
1482 static int
1483 init_mem(unsigned nb_mbuf)
1484 {
1485         struct lcore_conf *qconf;
1486         int socketid;
1487         unsigned lcore_id;
1488         char s[64];
1489
1490         for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
1491                 if (rte_lcore_is_enabled(lcore_id) == 0)
1492                         continue;
1493
1494                 if (numa_on)
1495                         socketid = rte_lcore_to_socket_id(lcore_id);
1496                 else
1497                         socketid = 0;
1498
1499                 if (socketid >= NB_SOCKETS) {
1500                         rte_exit(EXIT_FAILURE, "Socket %d of lcore %u is "
1501                                         "out of range %d\n", socketid,
1502                                                 lcore_id, NB_SOCKETS);
1503                 }
1504                 if (pktmbuf_pool[socketid] == NULL) {
1505                         snprintf(s, sizeof(s), "mbuf_pool_%d", socketid);
1506                         pktmbuf_pool[socketid] =
1507                                 rte_pktmbuf_pool_create(s, nb_mbuf,
1508                                         MEMPOOL_CACHE_SIZE, 0,
1509                                         RTE_MBUF_DEFAULT_BUF_SIZE,
1510                                         socketid);
1511                         if (pktmbuf_pool[socketid] == NULL)
1512                                 rte_exit(EXIT_FAILURE,
1513                                         "Cannot init mbuf pool on socket %d\n",
1514                                                                 socketid);
1515                         else
1516                                 printf("Allocated mbuf pool on socket %d\n",
1517                                                                 socketid);
1518
1519 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
1520                         setup_lpm(socketid);
1521 #else
1522                         setup_hash(socketid);
1523 #endif
1524                 }
1525                 qconf = &lcore_conf[lcore_id];
1526                 qconf->ipv4_lookup_struct = ipv4_l3fwd_lookup_struct[socketid];
1527 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
1528                 qconf->ipv6_lookup_struct = ipv6_l3fwd_lookup_struct[socketid];
1529 #endif
1530         }
1531         return 0;
1532 }
1533
1534 /* Check the link status of all ports in up to 9s, and print them finally */
1535 static void
1536 check_all_ports_link_status(uint32_t port_mask)
1537 {
1538 #define CHECK_INTERVAL 100 /* 100ms */
1539 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
1540         uint8_t count, all_ports_up, print_flag = 0;
1541         uint16_t portid;
1542         struct rte_eth_link link;
1543
1544         printf("\nChecking link status");
1545         fflush(stdout);
1546         for (count = 0; count <= MAX_CHECK_TIME; count++) {
1547                 all_ports_up = 1;
1548                 RTE_ETH_FOREACH_DEV(portid) {
1549                         if ((port_mask & (1 << portid)) == 0)
1550                                 continue;
1551                         memset(&link, 0, sizeof(link));
1552                         rte_eth_link_get_nowait(portid, &link);
1553                         /* print link status if flag set */
1554                         if (print_flag == 1) {
1555                                 if (link.link_status)
1556                                         printf("Port %d Link Up - speed %u "
1557                                                 "Mbps - %s\n", (uint8_t)portid,
1558                                                 (unsigned)link.link_speed,
1559                                 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
1560                                         ("full-duplex") : ("half-duplex\n"));
1561                                 else
1562                                         printf("Port %d Link Down\n",
1563                                                 (uint8_t)portid);
1564                                 continue;
1565                         }
1566                         /* clear all_ports_up flag if any link down */
1567                         if (link.link_status == ETH_LINK_DOWN) {
1568                                 all_ports_up = 0;
1569                                 break;
1570                         }
1571                 }
1572                 /* after finally printing all link status, get out */
1573                 if (print_flag == 1)
1574                         break;
1575
1576                 if (all_ports_up == 0) {
1577                         printf(".");
1578                         fflush(stdout);
1579                         rte_delay_ms(CHECK_INTERVAL);
1580                 }
1581
1582                 /* set the print_flag if all ports up or timeout */
1583                 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
1584                         print_flag = 1;
1585                         printf("done\n");
1586                 }
1587         }
1588 }
1589
1590 static int check_ptype(uint16_t portid)
1591 {
1592         int i, ret;
1593         int ptype_l3_ipv4 = 0;
1594 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
1595         int ptype_l3_ipv6 = 0;
1596 #endif
1597         uint32_t ptype_mask = RTE_PTYPE_L3_MASK;
1598
1599         ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, NULL, 0);
1600         if (ret <= 0)
1601                 return 0;
1602
1603         uint32_t ptypes[ret];
1604
1605         ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, ptypes, ret);
1606         for (i = 0; i < ret; ++i) {
1607                 if (ptypes[i] & RTE_PTYPE_L3_IPV4)
1608                         ptype_l3_ipv4 = 1;
1609 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
1610                 if (ptypes[i] & RTE_PTYPE_L3_IPV6)
1611                         ptype_l3_ipv6 = 1;
1612 #endif
1613         }
1614
1615         if (ptype_l3_ipv4 == 0)
1616                 printf("port %d cannot parse RTE_PTYPE_L3_IPV4\n", portid);
1617
1618 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
1619         if (ptype_l3_ipv6 == 0)
1620                 printf("port %d cannot parse RTE_PTYPE_L3_IPV6\n", portid);
1621 #endif
1622
1623 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
1624         if (ptype_l3_ipv4)
1625 #else /* APP_LOOKUP_EXACT_MATCH */
1626         if (ptype_l3_ipv4 && ptype_l3_ipv6)
1627 #endif
1628                 return 1;
1629
1630         return 0;
1631
1632 }
1633
1634 static int
1635 init_power_library(void)
1636 {
1637         int ret = 0, lcore_id;
1638         for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
1639                 if (rte_lcore_is_enabled(lcore_id)) {
1640                         /* init power management library */
1641                         ret = rte_power_init(lcore_id);
1642                         if (ret)
1643                                 RTE_LOG(ERR, POWER,
1644                                 "Library initialization failed on core %u\n",
1645                                 lcore_id);
1646                 }
1647         }
1648         return ret;
1649 }
1650
1651 int
1652 main(int argc, char **argv)
1653 {
1654         struct lcore_conf *qconf;
1655         struct rte_eth_dev_info dev_info;
1656         struct rte_eth_txconf *txconf;
1657         int ret;
1658         uint16_t nb_ports;
1659         uint16_t queueid;
1660         unsigned lcore_id;
1661         uint64_t hz;
1662         uint32_t n_tx_queue, nb_lcores;
1663         uint32_t dev_rxq_num, dev_txq_num;
1664         uint8_t nb_rx_queue, queue, socketid;
1665         uint16_t portid;
1666
1667         /* catch SIGINT and restore cpufreq governor to ondemand */
1668         signal(SIGINT, signal_exit_now);
1669
1670         /* init EAL */
1671         ret = rte_eal_init(argc, argv);
1672         if (ret < 0)
1673                 rte_exit(EXIT_FAILURE, "Invalid EAL parameters\n");
1674         argc -= ret;
1675         argv += ret;
1676
1677         /* init RTE timer library to be used late */
1678         rte_timer_subsystem_init();
1679
1680         /* parse application arguments (after the EAL ones) */
1681         ret = parse_args(argc, argv);
1682         if (ret < 0)
1683                 rte_exit(EXIT_FAILURE, "Invalid L3FWD parameters\n");
1684
1685         if (init_power_library())
1686                 rte_exit(EXIT_FAILURE, "init_power_library failed\n");
1687
1688         if (update_lcore_params() < 0)
1689                 rte_exit(EXIT_FAILURE, "update_lcore_params failed\n");
1690
1691         if (check_lcore_params() < 0)
1692                 rte_exit(EXIT_FAILURE, "check_lcore_params failed\n");
1693
1694         ret = init_lcore_rx_queues();
1695         if (ret < 0)
1696                 rte_exit(EXIT_FAILURE, "init_lcore_rx_queues failed\n");
1697
1698         nb_ports = rte_eth_dev_count_avail();
1699
1700         if (check_port_config() < 0)
1701                 rte_exit(EXIT_FAILURE, "check_port_config failed\n");
1702
1703         nb_lcores = rte_lcore_count();
1704
1705         /* initialize all ports */
1706         RTE_ETH_FOREACH_DEV(portid) {
1707                 struct rte_eth_conf local_port_conf = port_conf;
1708
1709                 /* skip ports that are not enabled */
1710                 if ((enabled_port_mask & (1 << portid)) == 0) {
1711                         printf("\nSkipping disabled port %d\n", portid);
1712                         continue;
1713                 }
1714
1715                 /* init port */
1716                 printf("Initializing port %d ... ", portid );
1717                 fflush(stdout);
1718
1719                 rte_eth_dev_info_get(portid, &dev_info);
1720                 dev_rxq_num = dev_info.max_rx_queues;
1721                 dev_txq_num = dev_info.max_tx_queues;
1722
1723                 nb_rx_queue = get_port_n_rx_queues(portid);
1724                 if (nb_rx_queue > dev_rxq_num)
1725                         rte_exit(EXIT_FAILURE,
1726                                 "Cannot configure not existed rxq: "
1727                                 "port=%d\n", portid);
1728
1729                 n_tx_queue = nb_lcores;
1730                 if (n_tx_queue > dev_txq_num)
1731                         n_tx_queue = dev_txq_num;
1732                 printf("Creating queues: nb_rxq=%d nb_txq=%u... ",
1733                         nb_rx_queue, (unsigned)n_tx_queue );
1734                 /* If number of Rx queue is 0, no need to enable Rx interrupt */
1735                 if (nb_rx_queue == 0)
1736                         local_port_conf.intr_conf.rxq = 0;
1737                 rte_eth_dev_info_get(portid, &dev_info);
1738                 if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE)
1739                         local_port_conf.txmode.offloads |=
1740                                 DEV_TX_OFFLOAD_MBUF_FAST_FREE;
1741
1742                 local_port_conf.rx_adv_conf.rss_conf.rss_hf &=
1743                         dev_info.flow_type_rss_offloads;
1744                 if (local_port_conf.rx_adv_conf.rss_conf.rss_hf !=
1745                                 port_conf.rx_adv_conf.rss_conf.rss_hf) {
1746                         printf("Port %u modified RSS hash function based on hardware support,"
1747                                 "requested:%#"PRIx64" configured:%#"PRIx64"\n",
1748                                 portid,
1749                                 port_conf.rx_adv_conf.rss_conf.rss_hf,
1750                                 local_port_conf.rx_adv_conf.rss_conf.rss_hf);
1751                 }
1752
1753                 ret = rte_eth_dev_configure(portid, nb_rx_queue,
1754                                         (uint16_t)n_tx_queue, &local_port_conf);
1755                 if (ret < 0)
1756                         rte_exit(EXIT_FAILURE, "Cannot configure device: "
1757                                         "err=%d, port=%d\n", ret, portid);
1758
1759                 ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd,
1760                                                        &nb_txd);
1761                 if (ret < 0)
1762                         rte_exit(EXIT_FAILURE,
1763                                  "Cannot adjust number of descriptors: err=%d, port=%d\n",
1764                                  ret, portid);
1765
1766                 rte_eth_macaddr_get(portid, &ports_eth_addr[portid]);
1767                 print_ethaddr(" Address:", &ports_eth_addr[portid]);
1768                 printf(", ");
1769
1770                 /* init memory */
1771                 ret = init_mem(NB_MBUF);
1772                 if (ret < 0)
1773                         rte_exit(EXIT_FAILURE, "init_mem failed\n");
1774
1775                 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
1776                         if (rte_lcore_is_enabled(lcore_id) == 0)
1777                                 continue;
1778
1779                         /* Initialize TX buffers */
1780                         qconf = &lcore_conf[lcore_id];
1781                         qconf->tx_buffer[portid] = rte_zmalloc_socket("tx_buffer",
1782                                 RTE_ETH_TX_BUFFER_SIZE(MAX_PKT_BURST), 0,
1783                                 rte_eth_dev_socket_id(portid));
1784                         if (qconf->tx_buffer[portid] == NULL)
1785                                 rte_exit(EXIT_FAILURE, "Can't allocate tx buffer for port %u\n",
1786                                                  portid);
1787
1788                         rte_eth_tx_buffer_init(qconf->tx_buffer[portid], MAX_PKT_BURST);
1789                 }
1790
1791                 /* init one TX queue per couple (lcore,port) */
1792                 queueid = 0;
1793                 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
1794                         if (rte_lcore_is_enabled(lcore_id) == 0)
1795                                 continue;
1796
1797                         if (queueid >= dev_txq_num)
1798                                 continue;
1799
1800                         if (numa_on)
1801                                 socketid = \
1802                                 (uint8_t)rte_lcore_to_socket_id(lcore_id);
1803                         else
1804                                 socketid = 0;
1805
1806                         printf("txq=%u,%d,%d ", lcore_id, queueid, socketid);
1807                         fflush(stdout);
1808
1809                         txconf = &dev_info.default_txconf;
1810                         txconf->offloads = local_port_conf.txmode.offloads;
1811                         ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd,
1812                                                      socketid, txconf);
1813                         if (ret < 0)
1814                                 rte_exit(EXIT_FAILURE,
1815                                         "rte_eth_tx_queue_setup: err=%d, "
1816                                                 "port=%d\n", ret, portid);
1817
1818                         qconf = &lcore_conf[lcore_id];
1819                         qconf->tx_queue_id[portid] = queueid;
1820                         queueid++;
1821
1822                         qconf->tx_port_id[qconf->n_tx_port] = portid;
1823                         qconf->n_tx_port++;
1824                 }
1825                 printf("\n");
1826         }
1827
1828         for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
1829                 if (rte_lcore_is_enabled(lcore_id) == 0)
1830                         continue;
1831
1832                 /* init timer structures for each enabled lcore */
1833                 rte_timer_init(&power_timers[lcore_id]);
1834                 hz = rte_get_timer_hz();
1835                 rte_timer_reset(&power_timers[lcore_id],
1836                         hz/TIMER_NUMBER_PER_SECOND, SINGLE, lcore_id,
1837                                                 power_timer_cb, NULL);
1838
1839                 qconf = &lcore_conf[lcore_id];
1840                 printf("\nInitializing rx queues on lcore %u ... ", lcore_id );
1841                 fflush(stdout);
1842                 /* init RX queues */
1843                 for(queue = 0; queue < qconf->n_rx_queue; ++queue) {
1844                         struct rte_eth_rxconf rxq_conf;
1845                         struct rte_eth_dev *dev;
1846                         struct rte_eth_conf *conf;
1847
1848                         portid = qconf->rx_queue_list[queue].port_id;
1849                         queueid = qconf->rx_queue_list[queue].queue_id;
1850                         dev = &rte_eth_devices[portid];
1851                         conf = &dev->data->dev_conf;
1852
1853                         if (numa_on)
1854                                 socketid = \
1855                                 (uint8_t)rte_lcore_to_socket_id(lcore_id);
1856                         else
1857                                 socketid = 0;
1858
1859                         printf("rxq=%d,%d,%d ", portid, queueid, socketid);
1860                         fflush(stdout);
1861
1862                         rte_eth_dev_info_get(portid, &dev_info);
1863                         rxq_conf = dev_info.default_rxconf;
1864                         rxq_conf.offloads = conf->rxmode.offloads;
1865                         ret = rte_eth_rx_queue_setup(portid, queueid, nb_rxd,
1866                                 socketid, &rxq_conf,
1867                                 pktmbuf_pool[socketid]);
1868                         if (ret < 0)
1869                                 rte_exit(EXIT_FAILURE,
1870                                         "rte_eth_rx_queue_setup: err=%d, "
1871                                                 "port=%d\n", ret, portid);
1872
1873                         if (parse_ptype) {
1874                                 if (add_cb_parse_ptype(portid, queueid) < 0)
1875                                         rte_exit(EXIT_FAILURE,
1876                                                  "Fail to add ptype cb\n");
1877                         } else if (!check_ptype(portid))
1878                                 rte_exit(EXIT_FAILURE,
1879                                          "PMD can not provide needed ptypes\n");
1880                 }
1881         }
1882
1883         printf("\n");
1884
1885         /* start ports */
1886         RTE_ETH_FOREACH_DEV(portid) {
1887                 if ((enabled_port_mask & (1 << portid)) == 0) {
1888                         continue;
1889                 }
1890                 /* Start device */
1891                 ret = rte_eth_dev_start(portid);
1892                 if (ret < 0)
1893                         rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, "
1894                                                 "port=%d\n", ret, portid);
1895                 /*
1896                  * If enabled, put device in promiscuous mode.
1897                  * This allows IO forwarding mode to forward packets
1898                  * to itself through 2 cross-connected  ports of the
1899                  * target machine.
1900                  */
1901                 if (promiscuous_on)
1902                         rte_eth_promiscuous_enable(portid);
1903                 /* initialize spinlock for each port */
1904                 rte_spinlock_init(&(locks[portid]));
1905         }
1906
1907         check_all_ports_link_status(enabled_port_mask);
1908
1909         /* launch per-lcore init on every lcore */
1910         rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER);
1911         RTE_LCORE_FOREACH_SLAVE(lcore_id) {
1912                 if (rte_eal_wait_lcore(lcore_id) < 0)
1913                         return -1;
1914         }
1915
1916         return 0;
1917 }