New upstream version 17.11-rc3
[deb_dpdk.git] / examples / load_balancer / init.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <stdint.h>
37 #include <inttypes.h>
38 #include <sys/types.h>
39 #include <string.h>
40 #include <sys/queue.h>
41 #include <stdarg.h>
42 #include <errno.h>
43 #include <getopt.h>
44
45 #include <rte_common.h>
46 #include <rte_byteorder.h>
47 #include <rte_log.h>
48 #include <rte_memory.h>
49 #include <rte_memcpy.h>
50 #include <rte_eal.h>
51 #include <rte_launch.h>
52 #include <rte_atomic.h>
53 #include <rte_cycles.h>
54 #include <rte_prefetch.h>
55 #include <rte_lcore.h>
56 #include <rte_per_lcore.h>
57 #include <rte_branch_prediction.h>
58 #include <rte_interrupts.h>
59 #include <rte_random.h>
60 #include <rte_debug.h>
61 #include <rte_ether.h>
62 #include <rte_ethdev.h>
63 #include <rte_ring.h>
64 #include <rte_mempool.h>
65 #include <rte_mbuf.h>
66 #include <rte_string_fns.h>
67 #include <rte_ip.h>
68 #include <rte_tcp.h>
69 #include <rte_lpm.h>
70
71 #include "main.h"
72
73 static struct rte_eth_conf port_conf = {
74         .rxmode = {
75                 .mq_mode        = ETH_MQ_RX_RSS,
76                 .split_hdr_size = 0,
77                 .header_split   = 0, /**< Header Split disabled */
78                 .hw_ip_checksum = 1, /**< IP checksum offload enabled */
79                 .hw_vlan_filter = 0, /**< VLAN filtering disabled */
80                 .jumbo_frame    = 0, /**< Jumbo Frame Support disabled */
81                 .hw_strip_crc   = 1, /**< CRC stripped by hardware */
82         },
83         .rx_adv_conf = {
84                 .rss_conf = {
85                         .rss_key = NULL,
86                         .rss_hf = ETH_RSS_IP,
87                 },
88         },
89         .txmode = {
90                 .mq_mode = ETH_MQ_TX_NONE,
91         },
92 };
93
94 static void
95 app_assign_worker_ids(void)
96 {
97         uint32_t lcore, worker_id;
98
99         /* Assign ID for each worker */
100         worker_id = 0;
101         for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) {
102                 struct app_lcore_params_worker *lp_worker = &app.lcore_params[lcore].worker;
103
104                 if (app.lcore_params[lcore].type != e_APP_LCORE_WORKER) {
105                         continue;
106                 }
107
108                 lp_worker->worker_id = worker_id;
109                 worker_id ++;
110         }
111 }
112
113 static void
114 app_init_mbuf_pools(void)
115 {
116         unsigned socket, lcore;
117
118         /* Init the buffer pools */
119         for (socket = 0; socket < APP_MAX_SOCKETS; socket ++) {
120                 char name[32];
121                 if (app_is_socket_used(socket) == 0) {
122                         continue;
123                 }
124
125                 snprintf(name, sizeof(name), "mbuf_pool_%u", socket);
126                 printf("Creating the mbuf pool for socket %u ...\n", socket);
127                 app.pools[socket] = rte_pktmbuf_pool_create(
128                         name, APP_DEFAULT_MEMPOOL_BUFFERS,
129                         APP_DEFAULT_MEMPOOL_CACHE_SIZE,
130                         0, APP_DEFAULT_MBUF_DATA_SIZE, socket);
131                 if (app.pools[socket] == NULL) {
132                         rte_panic("Cannot create mbuf pool on socket %u\n", socket);
133                 }
134         }
135
136         for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) {
137                 if (app.lcore_params[lcore].type == e_APP_LCORE_DISABLED) {
138                         continue;
139                 }
140
141                 socket = rte_lcore_to_socket_id(lcore);
142                 app.lcore_params[lcore].pool = app.pools[socket];
143         }
144 }
145
146 static void
147 app_init_lpm_tables(void)
148 {
149         unsigned socket, lcore;
150
151         /* Init the LPM tables */
152         for (socket = 0; socket < APP_MAX_SOCKETS; socket ++) {
153                 char name[32];
154                 uint32_t rule;
155
156                 if (app_is_socket_used(socket) == 0) {
157                         continue;
158                 }
159
160                 struct rte_lpm_config lpm_config;
161
162                 lpm_config.max_rules = APP_MAX_LPM_RULES;
163                 lpm_config.number_tbl8s = 256;
164                 lpm_config.flags = 0;
165                 snprintf(name, sizeof(name), "lpm_table_%u", socket);
166                 printf("Creating the LPM table for socket %u ...\n", socket);
167                 app.lpm_tables[socket] = rte_lpm_create(
168                         name,
169                         socket,
170                         &lpm_config);
171                 if (app.lpm_tables[socket] == NULL) {
172                         rte_panic("Unable to create LPM table on socket %u\n", socket);
173                 }
174
175                 for (rule = 0; rule < app.n_lpm_rules; rule ++) {
176                         int ret;
177
178                         ret = rte_lpm_add(app.lpm_tables[socket],
179                                 app.lpm_rules[rule].ip,
180                                 app.lpm_rules[rule].depth,
181                                 app.lpm_rules[rule].if_out);
182
183                         if (ret < 0) {
184                                 rte_panic("Unable to add entry %u (%x/%u => %u) to the LPM table on socket %u (%d)\n",
185                                         (unsigned) rule,
186                                         (unsigned) app.lpm_rules[rule].ip,
187                                         (unsigned) app.lpm_rules[rule].depth,
188                                         (unsigned) app.lpm_rules[rule].if_out,
189                                         socket,
190                                         ret);
191                         }
192                 }
193
194         }
195
196         for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) {
197                 if (app.lcore_params[lcore].type != e_APP_LCORE_WORKER) {
198                         continue;
199                 }
200
201                 socket = rte_lcore_to_socket_id(lcore);
202                 app.lcore_params[lcore].worker.lpm_table = app.lpm_tables[socket];
203         }
204 }
205
206 static void
207 app_init_rings_rx(void)
208 {
209         unsigned lcore;
210
211         /* Initialize the rings for the RX side */
212         for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) {
213                 struct app_lcore_params_io *lp_io = &app.lcore_params[lcore].io;
214                 unsigned socket_io, lcore_worker;
215
216                 if ((app.lcore_params[lcore].type != e_APP_LCORE_IO) ||
217                     (lp_io->rx.n_nic_queues == 0)) {
218                         continue;
219                 }
220
221                 socket_io = rte_lcore_to_socket_id(lcore);
222
223                 for (lcore_worker = 0; lcore_worker < APP_MAX_LCORES; lcore_worker ++) {
224                         char name[32];
225                         struct app_lcore_params_worker *lp_worker = &app.lcore_params[lcore_worker].worker;
226                         struct rte_ring *ring = NULL;
227
228                         if (app.lcore_params[lcore_worker].type != e_APP_LCORE_WORKER) {
229                                 continue;
230                         }
231
232                         printf("Creating ring to connect I/O lcore %u (socket %u) with worker lcore %u ...\n",
233                                 lcore,
234                                 socket_io,
235                                 lcore_worker);
236                         snprintf(name, sizeof(name), "app_ring_rx_s%u_io%u_w%u",
237                                 socket_io,
238                                 lcore,
239                                 lcore_worker);
240                         ring = rte_ring_create(
241                                 name,
242                                 app.ring_rx_size,
243                                 socket_io,
244                                 RING_F_SP_ENQ | RING_F_SC_DEQ);
245                         if (ring == NULL) {
246                                 rte_panic("Cannot create ring to connect I/O core %u with worker core %u\n",
247                                         lcore,
248                                         lcore_worker);
249                         }
250
251                         lp_io->rx.rings[lp_io->rx.n_rings] = ring;
252                         lp_io->rx.n_rings ++;
253
254                         lp_worker->rings_in[lp_worker->n_rings_in] = ring;
255                         lp_worker->n_rings_in ++;
256                 }
257         }
258
259         for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) {
260                 struct app_lcore_params_io *lp_io = &app.lcore_params[lcore].io;
261
262                 if ((app.lcore_params[lcore].type != e_APP_LCORE_IO) ||
263                     (lp_io->rx.n_nic_queues == 0)) {
264                         continue;
265                 }
266
267                 if (lp_io->rx.n_rings != app_get_lcores_worker()) {
268                         rte_panic("Algorithmic error (I/O RX rings)\n");
269                 }
270         }
271
272         for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) {
273                 struct app_lcore_params_worker *lp_worker = &app.lcore_params[lcore].worker;
274
275                 if (app.lcore_params[lcore].type != e_APP_LCORE_WORKER) {
276                         continue;
277                 }
278
279                 if (lp_worker->n_rings_in != app_get_lcores_io_rx()) {
280                         rte_panic("Algorithmic error (worker input rings)\n");
281                 }
282         }
283 }
284
285 static void
286 app_init_rings_tx(void)
287 {
288         unsigned lcore;
289
290         /* Initialize the rings for the TX side */
291         for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) {
292                 struct app_lcore_params_worker *lp_worker = &app.lcore_params[lcore].worker;
293                 unsigned port;
294
295                 if (app.lcore_params[lcore].type != e_APP_LCORE_WORKER) {
296                         continue;
297                 }
298
299                 for (port = 0; port < APP_MAX_NIC_PORTS; port ++) {
300                         char name[32];
301                         struct app_lcore_params_io *lp_io = NULL;
302                         struct rte_ring *ring;
303                         uint32_t socket_io, lcore_io;
304
305                         if (app.nic_tx_port_mask[port] == 0) {
306                                 continue;
307                         }
308
309                         if (app_get_lcore_for_nic_tx(port, &lcore_io) < 0) {
310                                 rte_panic("Algorithmic error (no I/O core to handle TX of port %u)\n",
311                                         port);
312                         }
313
314                         lp_io = &app.lcore_params[lcore_io].io;
315                         socket_io = rte_lcore_to_socket_id(lcore_io);
316
317                         printf("Creating ring to connect worker lcore %u with TX port %u (through I/O lcore %u) (socket %u) ...\n",
318                                 lcore, port, (unsigned)lcore_io, (unsigned)socket_io);
319                         snprintf(name, sizeof(name), "app_ring_tx_s%u_w%u_p%u", socket_io, lcore, port);
320                         ring = rte_ring_create(
321                                 name,
322                                 app.ring_tx_size,
323                                 socket_io,
324                                 RING_F_SP_ENQ | RING_F_SC_DEQ);
325                         if (ring == NULL) {
326                                 rte_panic("Cannot create ring to connect worker core %u with TX port %u\n",
327                                         lcore,
328                                         port);
329                         }
330
331                         lp_worker->rings_out[port] = ring;
332                         lp_io->tx.rings[port][lp_worker->worker_id] = ring;
333                 }
334         }
335
336         for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) {
337                 struct app_lcore_params_io *lp_io = &app.lcore_params[lcore].io;
338                 unsigned i;
339
340                 if ((app.lcore_params[lcore].type != e_APP_LCORE_IO) ||
341                     (lp_io->tx.n_nic_ports == 0)) {
342                         continue;
343                 }
344
345                 for (i = 0; i < lp_io->tx.n_nic_ports; i ++){
346                         unsigned port, j;
347
348                         port = lp_io->tx.nic_ports[i];
349                         for (j = 0; j < app_get_lcores_worker(); j ++) {
350                                 if (lp_io->tx.rings[port][j] == NULL) {
351                                         rte_panic("Algorithmic error (I/O TX rings)\n");
352                                 }
353                         }
354                 }
355         }
356 }
357
358 /* Check the link status of all ports in up to 9s, and print them finally */
359 static void
360 check_all_ports_link_status(uint16_t port_num, uint32_t port_mask)
361 {
362 #define CHECK_INTERVAL 100 /* 100ms */
363 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
364         uint16_t portid;
365         uint8_t count, all_ports_up, print_flag = 0;
366         struct rte_eth_link link;
367         uint32_t n_rx_queues, n_tx_queues;
368
369         printf("\nChecking link status");
370         fflush(stdout);
371         for (count = 0; count <= MAX_CHECK_TIME; count++) {
372                 all_ports_up = 1;
373                 for (portid = 0; portid < port_num; portid++) {
374                         if ((port_mask & (1 << portid)) == 0)
375                                 continue;
376                         n_rx_queues = app_get_nic_rx_queues_per_port(portid);
377                         n_tx_queues = app.nic_tx_port_mask[portid];
378                         if ((n_rx_queues == 0) && (n_tx_queues == 0))
379                                 continue;
380                         memset(&link, 0, sizeof(link));
381                         rte_eth_link_get_nowait(portid, &link);
382                         /* print link status if flag set */
383                         if (print_flag == 1) {
384                                 if (link.link_status)
385                                         printf(
386                                         "Port%d Link Up - speed %uMbps - %s\n",
387                                                 portid, link.link_speed,
388                                 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
389                                         ("full-duplex") : ("half-duplex\n"));
390                                 else
391                                         printf("Port %d Link Down\n", portid);
392                                 continue;
393                         }
394                         /* clear all_ports_up flag if any link down */
395                         if (link.link_status == ETH_LINK_DOWN) {
396                                 all_ports_up = 0;
397                                 break;
398                         }
399                 }
400                 /* after finally printing all link status, get out */
401                 if (print_flag == 1)
402                         break;
403
404                 if (all_ports_up == 0) {
405                         printf(".");
406                         fflush(stdout);
407                         rte_delay_ms(CHECK_INTERVAL);
408                 }
409
410                 /* set the print_flag if all ports up or timeout */
411                 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
412                         print_flag = 1;
413                         printf("done\n");
414                 }
415         }
416 }
417
418 static void
419 app_init_nics(void)
420 {
421         unsigned socket;
422         uint32_t lcore;
423         uint16_t port;
424         uint8_t queue;
425         int ret;
426         uint32_t n_rx_queues, n_tx_queues;
427
428         /* Init NIC ports and queues, then start the ports */
429         for (port = 0; port < APP_MAX_NIC_PORTS; port ++) {
430                 struct rte_mempool *pool;
431                 uint16_t nic_rx_ring_size;
432                 uint16_t nic_tx_ring_size;
433
434                 n_rx_queues = app_get_nic_rx_queues_per_port(port);
435                 n_tx_queues = app.nic_tx_port_mask[port];
436
437                 if ((n_rx_queues == 0) && (n_tx_queues == 0)) {
438                         continue;
439                 }
440
441                 /* Init port */
442                 printf("Initializing NIC port %u ...\n", port);
443                 ret = rte_eth_dev_configure(
444                         port,
445                         (uint8_t) n_rx_queues,
446                         (uint8_t) n_tx_queues,
447                         &port_conf);
448                 if (ret < 0) {
449                         rte_panic("Cannot init NIC port %u (%d)\n", port, ret);
450                 }
451                 rte_eth_promiscuous_enable(port);
452
453                 nic_rx_ring_size = app.nic_rx_ring_size;
454                 nic_tx_ring_size = app.nic_tx_ring_size;
455                 ret = rte_eth_dev_adjust_nb_rx_tx_desc(
456                         port, &nic_rx_ring_size, &nic_tx_ring_size);
457                 if (ret < 0) {
458                         rte_panic("Cannot adjust number of descriptors for port %u (%d)\n",
459                                   port, ret);
460                 }
461                 app.nic_rx_ring_size = nic_rx_ring_size;
462                 app.nic_tx_ring_size = nic_tx_ring_size;
463
464                 /* Init RX queues */
465                 for (queue = 0; queue < APP_MAX_RX_QUEUES_PER_NIC_PORT; queue ++) {
466                         if (app.nic_rx_queue_mask[port][queue] == 0) {
467                                 continue;
468                         }
469
470                         app_get_lcore_for_nic_rx(port, queue, &lcore);
471                         socket = rte_lcore_to_socket_id(lcore);
472                         pool = app.lcore_params[lcore].pool;
473
474                         printf("Initializing NIC port %u RX queue %u ...\n",
475                                 port, queue);
476                         ret = rte_eth_rx_queue_setup(
477                                 port,
478                                 queue,
479                                 (uint16_t) app.nic_rx_ring_size,
480                                 socket,
481                                 NULL,
482                                 pool);
483                         if (ret < 0) {
484                                 rte_panic("Cannot init RX queue %u for port %u (%d)\n",
485                                           queue, port, ret);
486                         }
487                 }
488
489                 /* Init TX queues */
490                 if (app.nic_tx_port_mask[port] == 1) {
491                         app_get_lcore_for_nic_tx(port, &lcore);
492                         socket = rte_lcore_to_socket_id(lcore);
493                         printf("Initializing NIC port %u TX queue 0 ...\n",
494                                 port);
495                         ret = rte_eth_tx_queue_setup(
496                                 port,
497                                 0,
498                                 (uint16_t) app.nic_tx_ring_size,
499                                 socket,
500                                 NULL);
501                         if (ret < 0) {
502                                 rte_panic("Cannot init TX queue 0 for port %d (%d)\n",
503                                         port,
504                                         ret);
505                         }
506                 }
507
508                 /* Start port */
509                 ret = rte_eth_dev_start(port);
510                 if (ret < 0) {
511                         rte_panic("Cannot start port %d (%d)\n", port, ret);
512                 }
513         }
514
515         check_all_ports_link_status(APP_MAX_NIC_PORTS, (~0x0));
516 }
517
518 void
519 app_init(void)
520 {
521         app_assign_worker_ids();
522         app_init_mbuf_pools();
523         app_init_lpm_tables();
524         app_init_rings_rx();
525         app_init_rings_tx();
526         app_init_nics();
527
528         printf("Initialization completed.\n");
529 }