New upstream version 18.02
[deb_dpdk.git] / app / test-eventdev / test_perf_common.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2017 Cavium, Inc
3  */
4
5 #include "test_perf_common.h"
6
7 int
8 perf_test_result(struct evt_test *test, struct evt_options *opt)
9 {
10         RTE_SET_USED(opt);
11         int i;
12         uint64_t total = 0;
13         struct test_perf *t = evt_test_priv(test);
14
15         printf("Packet distribution across worker cores :\n");
16         for (i = 0; i < t->nb_workers; i++)
17                 total += t->worker[i].processed_pkts;
18         for (i = 0; i < t->nb_workers; i++)
19                 printf("Worker %d packets: "CLGRN"%"PRIx64" "CLNRM"percentage:"
20                                 CLGRN" %3.2f\n"CLNRM, i,
21                                 t->worker[i].processed_pkts,
22                                 (((double)t->worker[i].processed_pkts)/total)
23                                 * 100);
24
25         return t->result;
26 }
27
28 static inline int
29 perf_producer(void *arg)
30 {
31         struct prod_data *p  = arg;
32         struct test_perf *t = p->t;
33         struct evt_options *opt = t->opt;
34         const uint8_t dev_id = p->dev_id;
35         const uint8_t port = p->port_id;
36         struct rte_mempool *pool = t->pool;
37         const uint64_t nb_pkts = t->nb_pkts;
38         const uint32_t nb_flows = t->nb_flows;
39         uint32_t flow_counter = 0;
40         uint64_t count = 0;
41         struct perf_elt *m;
42         struct rte_event ev;
43
44         if (opt->verbose_level > 1)
45                 printf("%s(): lcore %d dev_id %d port=%d queue %d\n", __func__,
46                                 rte_lcore_id(), dev_id, port, p->queue_id);
47
48         ev.event = 0;
49         ev.op = RTE_EVENT_OP_NEW;
50         ev.queue_id = p->queue_id;
51         ev.sched_type = t->opt->sched_type_list[0];
52         ev.priority = RTE_EVENT_DEV_PRIORITY_NORMAL;
53         ev.event_type =  RTE_EVENT_TYPE_CPU;
54         ev.sub_event_type = 0; /* stage 0 */
55
56         while (count < nb_pkts && t->done == false) {
57                 if (rte_mempool_get(pool, (void **)&m) < 0)
58                         continue;
59
60                 ev.flow_id = flow_counter++ % nb_flows;
61                 ev.event_ptr = m;
62                 m->timestamp = rte_get_timer_cycles();
63                 while (rte_event_enqueue_burst(dev_id, port, &ev, 1) != 1) {
64                         if (t->done)
65                                 break;
66                         rte_pause();
67                         m->timestamp = rte_get_timer_cycles();
68                 }
69                 count++;
70         }
71
72         return 0;
73 }
74
75 static int
76 perf_producer_wrapper(void *arg)
77 {
78         struct prod_data *p  = arg;
79         struct test_perf *t = p->t;
80         /* Launch the producer function only in case of synthetic producer. */
81         if (t->opt->prod_type == EVT_PROD_TYPE_SYNT)
82                 return perf_producer(arg);
83         return 0;
84 }
85
86 static inline uint64_t
87 processed_pkts(struct test_perf *t)
88 {
89         uint8_t i;
90         uint64_t total = 0;
91
92         rte_smp_rmb();
93         for (i = 0; i < t->nb_workers; i++)
94                 total += t->worker[i].processed_pkts;
95
96         return total;
97 }
98
99 static inline uint64_t
100 total_latency(struct test_perf *t)
101 {
102         uint8_t i;
103         uint64_t total = 0;
104
105         rte_smp_rmb();
106         for (i = 0; i < t->nb_workers; i++)
107                 total += t->worker[i].latency;
108
109         return total;
110 }
111
112
113 int
114 perf_launch_lcores(struct evt_test *test, struct evt_options *opt,
115                 int (*worker)(void *))
116 {
117         int ret, lcore_id;
118         struct test_perf *t = evt_test_priv(test);
119
120         int port_idx = 0;
121         /* launch workers */
122         RTE_LCORE_FOREACH_SLAVE(lcore_id) {
123                 if (!(opt->wlcores[lcore_id]))
124                         continue;
125
126                 ret = rte_eal_remote_launch(worker,
127                                  &t->worker[port_idx], lcore_id);
128                 if (ret) {
129                         evt_err("failed to launch worker %d", lcore_id);
130                         return ret;
131                 }
132                 port_idx++;
133         }
134
135         /* launch producers */
136         RTE_LCORE_FOREACH_SLAVE(lcore_id) {
137                 if (!(opt->plcores[lcore_id]))
138                         continue;
139
140                 ret = rte_eal_remote_launch(perf_producer_wrapper,
141                                 &t->prod[port_idx], lcore_id);
142                 if (ret) {
143                         evt_err("failed to launch perf_producer %d", lcore_id);
144                         return ret;
145                 }
146                 port_idx++;
147         }
148
149         const uint64_t total_pkts = opt->nb_pkts *
150                         evt_nr_active_lcores(opt->plcores);
151
152         uint64_t dead_lock_cycles = rte_get_timer_cycles();
153         int64_t dead_lock_remaining  =  total_pkts;
154         const uint64_t dead_lock_sample = rte_get_timer_hz() * 5;
155
156         uint64_t perf_cycles = rte_get_timer_cycles();
157         int64_t perf_remaining  = total_pkts;
158         const uint64_t perf_sample = rte_get_timer_hz();
159
160         static float total_mpps;
161         static uint64_t samples;
162
163         const uint64_t freq_mhz = rte_get_timer_hz() / 1000000;
164         int64_t remaining = t->outstand_pkts - processed_pkts(t);
165
166         while (t->done == false) {
167                 const uint64_t new_cycles = rte_get_timer_cycles();
168
169                 if ((new_cycles - perf_cycles) > perf_sample) {
170                         const uint64_t latency = total_latency(t);
171                         const uint64_t pkts = processed_pkts(t);
172
173                         remaining = t->outstand_pkts - pkts;
174                         float mpps = (float)(perf_remaining-remaining)/1000000;
175
176                         perf_remaining = remaining;
177                         perf_cycles = new_cycles;
178                         total_mpps += mpps;
179                         ++samples;
180                         if (opt->fwd_latency && pkts > 0) {
181                                 printf(CLGRN"\r%.3f mpps avg %.3f mpps [avg fwd latency %.3f us] "CLNRM,
182                                         mpps, total_mpps/samples,
183                                         (float)(latency/pkts)/freq_mhz);
184                         } else {
185                                 printf(CLGRN"\r%.3f mpps avg %.3f mpps"CLNRM,
186                                         mpps, total_mpps/samples);
187                         }
188                         fflush(stdout);
189
190                         if (remaining <= 0) {
191                                 t->result = EVT_TEST_SUCCESS;
192                                 if (opt->prod_type == EVT_PROD_TYPE_SYNT) {
193                                         t->done = true;
194                                         rte_smp_wmb();
195                                         break;
196                                 }
197                         }
198                 }
199
200                 if (new_cycles - dead_lock_cycles > dead_lock_sample &&
201                                 opt->prod_type == EVT_PROD_TYPE_SYNT) {
202                         remaining = t->outstand_pkts - processed_pkts(t);
203                         if (dead_lock_remaining == remaining) {
204                                 rte_event_dev_dump(opt->dev_id, stdout);
205                                 evt_err("No schedules for seconds, deadlock");
206                                 t->done = true;
207                                 rte_smp_wmb();
208                                 break;
209                         }
210                         dead_lock_remaining = remaining;
211                         dead_lock_cycles = new_cycles;
212                 }
213         }
214         printf("\n");
215         return 0;
216 }
217
218 static int
219 perf_event_rx_adapter_setup(struct evt_options *opt, uint8_t stride,
220                 struct rte_event_port_conf prod_conf)
221 {
222         int ret = 0;
223         uint16_t prod;
224         struct rte_event_eth_rx_adapter_queue_conf queue_conf;
225
226         memset(&queue_conf, 0,
227                         sizeof(struct rte_event_eth_rx_adapter_queue_conf));
228         queue_conf.ev.sched_type = opt->sched_type_list[0];
229         for (prod = 0; prod < rte_eth_dev_count(); prod++) {
230                 uint32_t cap;
231
232                 ret = rte_event_eth_rx_adapter_caps_get(opt->dev_id,
233                                 prod, &cap);
234                 if (ret) {
235                         evt_err("failed to get event rx adapter[%d]"
236                                         " capabilities",
237                                         opt->dev_id);
238                         return ret;
239                 }
240                 queue_conf.ev.queue_id = prod * stride;
241                 ret = rte_event_eth_rx_adapter_create(prod, opt->dev_id,
242                                 &prod_conf);
243                 if (ret) {
244                         evt_err("failed to create rx adapter[%d]", prod);
245                         return ret;
246                 }
247                 ret = rte_event_eth_rx_adapter_queue_add(prod, prod, -1,
248                                 &queue_conf);
249                 if (ret) {
250                         evt_err("failed to add rx queues to adapter[%d]", prod);
251                         return ret;
252                 }
253
254                 if (!(cap & RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT)) {
255                         uint32_t service_id;
256
257                         rte_event_eth_rx_adapter_service_id_get(prod,
258                                         &service_id);
259                         ret = evt_service_setup(service_id);
260                         if (ret) {
261                                 evt_err("Failed to setup service core"
262                                                 " for Rx adapter\n");
263                                 return ret;
264                         }
265                 }
266
267                 ret = rte_eth_dev_start(prod);
268                 if (ret) {
269                         evt_err("Ethernet dev [%d] failed to start."
270                                         " Using synthetic producer", prod);
271                         return ret;
272                 }
273
274                 ret = rte_event_eth_rx_adapter_start(prod);
275                 if (ret) {
276                         evt_err("Rx adapter[%d] start failed", prod);
277                         return ret;
278                 }
279                 printf("%s: Port[%d] using Rx adapter[%d] started\n", __func__,
280                                 prod, prod);
281         }
282
283         return ret;
284 }
285
286 int
287 perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt,
288                                 uint8_t stride, uint8_t nb_queues,
289                                 const struct rte_event_port_conf *port_conf)
290 {
291         struct test_perf *t = evt_test_priv(test);
292         uint16_t port, prod;
293         int ret = -1;
294
295         /* setup one port per worker, linking to all queues */
296         for (port = 0; port < evt_nr_active_lcores(opt->wlcores);
297                                 port++) {
298                 struct worker_data *w = &t->worker[port];
299
300                 w->dev_id = opt->dev_id;
301                 w->port_id = port;
302                 w->t = t;
303                 w->processed_pkts = 0;
304                 w->latency = 0;
305
306                 ret = rte_event_port_setup(opt->dev_id, port, port_conf);
307                 if (ret) {
308                         evt_err("failed to setup port %d", port);
309                         return ret;
310                 }
311
312                 ret = rte_event_port_link(opt->dev_id, port, NULL, NULL, 0);
313                 if (ret != nb_queues) {
314                         evt_err("failed to link all queues to port %d", port);
315                         return -EINVAL;
316                 }
317         }
318
319         /* port for producers, no links */
320         if (opt->prod_type == EVT_PROD_TYPE_ETH_RX_ADPTR) {
321                 for ( ; port < perf_nb_event_ports(opt); port++) {
322                         struct prod_data *p = &t->prod[port];
323                         p->t = t;
324                 }
325
326                 ret = perf_event_rx_adapter_setup(opt, stride, *port_conf);
327                 if (ret)
328                         return ret;
329         } else {
330                 prod = 0;
331                 for ( ; port < perf_nb_event_ports(opt); port++) {
332                         struct prod_data *p = &t->prod[port];
333
334                         p->dev_id = opt->dev_id;
335                         p->port_id = port;
336                         p->queue_id = prod * stride;
337                         p->t = t;
338
339                         ret = rte_event_port_setup(opt->dev_id, port,
340                                         port_conf);
341                         if (ret) {
342                                 evt_err("failed to setup port %d", port);
343                                 return ret;
344                         }
345                         prod++;
346                 }
347         }
348
349         return ret;
350 }
351
352 int
353 perf_opt_check(struct evt_options *opt, uint64_t nb_queues)
354 {
355         unsigned int lcores;
356
357         /* N producer + N worker + 1 master when producer cores are used
358          * Else N worker + 1 master when Rx adapter is used
359          */
360         lcores = opt->prod_type == EVT_PROD_TYPE_SYNT ? 3 : 2;
361
362         if (rte_lcore_count() < lcores) {
363                 evt_err("test need minimum %d lcores", lcores);
364                 return -1;
365         }
366
367         /* Validate worker lcores */
368         if (evt_lcores_has_overlap(opt->wlcores, rte_get_master_lcore())) {
369                 evt_err("worker lcores overlaps with master lcore");
370                 return -1;
371         }
372         if (evt_lcores_has_overlap_multi(opt->wlcores, opt->plcores)) {
373                 evt_err("worker lcores overlaps producer lcores");
374                 return -1;
375         }
376         if (evt_has_disabled_lcore(opt->wlcores)) {
377                 evt_err("one or more workers lcores are not enabled");
378                 return -1;
379         }
380         if (!evt_has_active_lcore(opt->wlcores)) {
381                 evt_err("minimum one worker is required");
382                 return -1;
383         }
384
385         if (opt->prod_type == EVT_PROD_TYPE_SYNT) {
386                 /* Validate producer lcores */
387                 if (evt_lcores_has_overlap(opt->plcores,
388                                         rte_get_master_lcore())) {
389                         evt_err("producer lcores overlaps with master lcore");
390                         return -1;
391                 }
392                 if (evt_has_disabled_lcore(opt->plcores)) {
393                         evt_err("one or more producer lcores are not enabled");
394                         return -1;
395                 }
396                 if (!evt_has_active_lcore(opt->plcores)) {
397                         evt_err("minimum one producer is required");
398                         return -1;
399                 }
400         }
401
402         if (evt_has_invalid_stage(opt))
403                 return -1;
404
405         if (evt_has_invalid_sched_type(opt))
406                 return -1;
407
408         if (nb_queues > EVT_MAX_QUEUES) {
409                 evt_err("number of queues exceeds %d", EVT_MAX_QUEUES);
410                 return -1;
411         }
412         if (perf_nb_event_ports(opt) > EVT_MAX_PORTS) {
413                 evt_err("number of ports exceeds %d", EVT_MAX_PORTS);
414                 return -1;
415         }
416
417         /* Fixups */
418         if (opt->nb_stages == 1 && opt->fwd_latency) {
419                 evt_info("fwd_latency is valid when nb_stages > 1, disabling");
420                 opt->fwd_latency = 0;
421         }
422         if (opt->fwd_latency && !opt->q_priority) {
423                 evt_info("enabled queue priority for latency measurement");
424                 opt->q_priority = 1;
425         }
426         if (opt->nb_pkts == 0)
427                 opt->nb_pkts = INT64_MAX/evt_nr_active_lcores(opt->plcores);
428
429         return 0;
430 }
431
432 void
433 perf_opt_dump(struct evt_options *opt, uint8_t nb_queues)
434 {
435         evt_dump("nb_prod_lcores", "%d", evt_nr_active_lcores(opt->plcores));
436         evt_dump_producer_lcores(opt);
437         evt_dump("nb_worker_lcores", "%d", evt_nr_active_lcores(opt->wlcores));
438         evt_dump_worker_lcores(opt);
439         evt_dump_nb_stages(opt);
440         evt_dump("nb_evdev_ports", "%d", perf_nb_event_ports(opt));
441         evt_dump("nb_evdev_queues", "%d", nb_queues);
442         evt_dump_queue_priority(opt);
443         evt_dump_sched_type_list(opt);
444         evt_dump_producer_type(opt);
445 }
446
447 void
448 perf_eventdev_destroy(struct evt_test *test, struct evt_options *opt)
449 {
450         RTE_SET_USED(test);
451
452         rte_event_dev_stop(opt->dev_id);
453         rte_event_dev_close(opt->dev_id);
454 }
455
456 static inline void
457 perf_elt_init(struct rte_mempool *mp, void *arg __rte_unused,
458             void *obj, unsigned i __rte_unused)
459 {
460         memset(obj, 0, mp->elt_size);
461 }
462
463 #define NB_RX_DESC                      128
464 #define NB_TX_DESC                      512
465 int
466 perf_ethdev_setup(struct evt_test *test, struct evt_options *opt)
467 {
468         int i;
469         struct test_perf *t = evt_test_priv(test);
470         struct rte_eth_conf port_conf = {
471                 .rxmode = {
472                         .mq_mode = ETH_MQ_RX_RSS,
473                         .max_rx_pkt_len = ETHER_MAX_LEN,
474                         .split_hdr_size = 0,
475                         .header_split   = 0,
476                         .hw_ip_checksum = 0,
477                         .hw_vlan_filter = 0,
478                         .hw_vlan_strip  = 0,
479                         .hw_vlan_extend = 0,
480                         .jumbo_frame    = 0,
481                         .hw_strip_crc   = 1,
482                 },
483                 .rx_adv_conf = {
484                         .rss_conf = {
485                                 .rss_key = NULL,
486                                 .rss_hf = ETH_RSS_IP,
487                         },
488                 },
489         };
490
491         if (opt->prod_type == EVT_PROD_TYPE_SYNT)
492                 return 0;
493
494         if (!rte_eth_dev_count()) {
495                 evt_err("No ethernet ports found.");
496                 return -ENODEV;
497         }
498
499         for (i = 0; i < rte_eth_dev_count(); i++) {
500
501                 if (rte_eth_dev_configure(i, 1, 1,
502                                         &port_conf)
503                                 < 0) {
504                         evt_err("Failed to configure eth port [%d]", i);
505                         return -EINVAL;
506                 }
507
508                 if (rte_eth_rx_queue_setup(i, 0, NB_RX_DESC,
509                                 rte_socket_id(), NULL, t->pool) < 0) {
510                         evt_err("Failed to setup eth port [%d] rx_queue: %d.",
511                                         i, 0);
512                         return -EINVAL;
513                 }
514
515                 if (rte_eth_tx_queue_setup(i, 0, NB_TX_DESC,
516                                         rte_socket_id(), NULL) < 0) {
517                         evt_err("Failed to setup eth port [%d] tx_queue: %d.",
518                                         i, 0);
519                         return -EINVAL;
520                 }
521
522                 rte_eth_promiscuous_enable(i);
523         }
524
525         return 0;
526 }
527
528 void perf_ethdev_destroy(struct evt_test *test, struct evt_options *opt)
529 {
530         int i;
531         RTE_SET_USED(test);
532
533         if (opt->prod_type == EVT_PROD_TYPE_ETH_RX_ADPTR) {
534                 for (i = 0; i < rte_eth_dev_count(); i++) {
535                         rte_event_eth_rx_adapter_stop(i);
536                         rte_eth_dev_stop(i);
537                         rte_eth_dev_close(i);
538                 }
539         }
540 }
541
542 int
543 perf_mempool_setup(struct evt_test *test, struct evt_options *opt)
544 {
545         struct test_perf *t = evt_test_priv(test);
546
547         if (opt->prod_type == EVT_PROD_TYPE_SYNT) {
548                 t->pool = rte_mempool_create(test->name, /* mempool name */
549                                 opt->pool_sz, /* number of elements*/
550                                 sizeof(struct perf_elt), /* element size*/
551                                 512, /* cache size*/
552                                 0, NULL, NULL,
553                                 perf_elt_init, /* obj constructor */
554                                 NULL, opt->socket_id, 0); /* flags */
555         } else {
556                 t->pool = rte_pktmbuf_pool_create(test->name, /* mempool name */
557                                 opt->pool_sz, /* number of elements*/
558                                 512, /* cache size*/
559                                 0,
560                                 RTE_MBUF_DEFAULT_BUF_SIZE,
561                                 opt->socket_id); /* flags */
562
563         }
564
565         if (t->pool == NULL) {
566                 evt_err("failed to create mempool");
567                 return -ENOMEM;
568         }
569
570         return 0;
571 }
572
573 void
574 perf_mempool_destroy(struct evt_test *test, struct evt_options *opt)
575 {
576         RTE_SET_USED(opt);
577         struct test_perf *t = evt_test_priv(test);
578
579         rte_mempool_free(t->pool);
580 }
581
582 int
583 perf_test_setup(struct evt_test *test, struct evt_options *opt)
584 {
585         void *test_perf;
586
587         test_perf = rte_zmalloc_socket(test->name, sizeof(struct test_perf),
588                                 RTE_CACHE_LINE_SIZE, opt->socket_id);
589         if (test_perf  == NULL) {
590                 evt_err("failed to allocate test_perf memory");
591                 goto nomem;
592         }
593         test->test_priv = test_perf;
594
595         struct test_perf *t = evt_test_priv(test);
596
597         t->outstand_pkts = opt->nb_pkts * evt_nr_active_lcores(opt->plcores);
598         t->nb_workers = evt_nr_active_lcores(opt->wlcores);
599         t->done = false;
600         t->nb_pkts = opt->nb_pkts;
601         t->nb_flows = opt->nb_flows;
602         t->result = EVT_TEST_FAILED;
603         t->opt = opt;
604         memcpy(t->sched_type_list, opt->sched_type_list,
605                         sizeof(opt->sched_type_list));
606         return 0;
607 nomem:
608         return -ENOMEM;
609 }
610
611 void
612 perf_test_destroy(struct evt_test *test, struct evt_options *opt)
613 {
614         RTE_SET_USED(opt);
615
616         rte_free(test->test_priv);
617 }