app/test-eventdev/test_perf_common.c

   1 /*
   2  *   BSD LICENSE
   3  *
   4  *   Copyright (C) Cavium, Inc 2017.
   5  *
   6  *   Redistribution and use in source and binary forms, with or without
   7  *   modification, are permitted provided that the following conditions
   8  *   are met:
   9  *
  10  *     * Redistributions of source code must retain the above copyright
  11  *       notice, this list of conditions and the following disclaimer.
  12  *     * Redistributions in binary form must reproduce the above copyright
  13  *       notice, this list of conditions and the following disclaimer in
  14  *       the documentation and/or other materials provided with the
  15  *       distribution.
  16  *     * Neither the name of Cavium, Inc nor the names of its
  17  *       contributors may be used to endorse or promote products derived
  18  *       from this software without specific prior written permission.
  19  *
  20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31  */
  32
  33 #include "test_perf_common.h"
  34
  35 int
  36 perf_test_result(struct evt_test *test, struct evt_options *opt)
  37 {
  38         RTE_SET_USED(opt);
  39         struct test_perf *t = evt_test_priv(test);
  40
  41         return t->result;
  42 }
  43
  44 static inline int
  45 perf_producer(void *arg)
  46 {
  47         struct prod_data *p  = arg;
  48         struct test_perf *t = p->t;
  49         struct evt_options *opt = t->opt;
  50         const uint8_t dev_id = p->dev_id;
  51         const uint8_t port = p->port_id;
  52         struct rte_mempool *pool = t->pool;
  53         const uint64_t nb_pkts = t->nb_pkts;
  54         const uint32_t nb_flows = t->nb_flows;
  55         uint32_t flow_counter = 0;
  56         uint64_t count = 0;
  57         struct perf_elt *m;
  58         struct rte_event ev;
  59
  60         if (opt->verbose_level > 1)
  61                 printf("%s(): lcore %d dev_id %d port=%d queue %d\n", __func__,
  62                                 rte_lcore_id(), dev_id, port, p->queue_id);
  63
  64         ev.event = 0;
  65         ev.op = RTE_EVENT_OP_NEW;
  66         ev.queue_id = p->queue_id;
  67         ev.sched_type = t->opt->sched_type_list[0];
  68         ev.priority = RTE_EVENT_DEV_PRIORITY_NORMAL;
  69         ev.event_type =  RTE_EVENT_TYPE_CPU;
  70         ev.sub_event_type = 0; /* stage 0 */
  71
  72         while (count < nb_pkts && t->done == false) {
  73                 if (rte_mempool_get(pool, (void **)&m) < 0)
  74                         continue;
  75
  76                 ev.flow_id = flow_counter++ % nb_flows;
  77                 ev.event_ptr = m;
  78                 m->timestamp = rte_get_timer_cycles();
  79                 while (rte_event_enqueue_burst(dev_id, port, &ev, 1) != 1) {
  80                         if (t->done)
  81                                 break;
  82                         rte_pause();
  83                         m->timestamp = rte_get_timer_cycles();
  84                 }
  85                 count++;
  86         }
  87
  88         return 0;
  89 }
  90
  91 static inline int
  92 scheduler(void *arg)
  93 {
  94         struct test_perf *t = arg;
  95         const uint8_t dev_id = t->opt->dev_id;
  96
  97         while (t->done == false)
  98                 rte_event_schedule(dev_id);
  99
 100         return 0;
 101 }
 102
 103 static inline uint64_t
 104 processed_pkts(struct test_perf *t)
 105 {
 106         uint8_t i;
 107         uint64_t total = 0;
 108
 109         rte_smp_rmb();
 110         for (i = 0; i < t->nb_workers; i++)
 111                 total += t->worker[i].processed_pkts;
 112
 113         return total;
 114 }
 115
 116 static inline uint64_t
 117 total_latency(struct test_perf *t)
 118 {
 119         uint8_t i;
 120         uint64_t total = 0;
 121
 122         rte_smp_rmb();
 123         for (i = 0; i < t->nb_workers; i++)
 124                 total += t->worker[i].latency;
 125
 126         return total;
 127 }
 128
 129
 130 int
 131 perf_launch_lcores(struct evt_test *test, struct evt_options *opt,
 132                 int (*worker)(void *))
 133 {
 134         int ret, lcore_id;
 135         struct test_perf *t = evt_test_priv(test);
 136
 137         int port_idx = 0;
 138         /* launch workers */
 139         RTE_LCORE_FOREACH_SLAVE(lcore_id) {
 140                 if (!(opt->wlcores[lcore_id]))
 141                         continue;
 142
 143                 ret = rte_eal_remote_launch(worker,
 144                                  &t->worker[port_idx], lcore_id);
 145                 if (ret) {
 146                         evt_err("failed to launch worker %d", lcore_id);
 147                         return ret;
 148                 }
 149                 port_idx++;
 150         }
 151
 152         /* launch producers */
 153         RTE_LCORE_FOREACH_SLAVE(lcore_id) {
 154                 if (!(opt->plcores[lcore_id]))
 155                         continue;
 156
 157                 ret = rte_eal_remote_launch(perf_producer, &t->prod[port_idx],
 158                                          lcore_id);
 159                 if (ret) {
 160                         evt_err("failed to launch perf_producer %d", lcore_id);
 161                         return ret;
 162                 }
 163                 port_idx++;
 164         }
 165
 166         /* launch scheduler */
 167         if (!evt_has_distributed_sched(opt->dev_id)) {
 168                 ret = rte_eal_remote_launch(scheduler, t, opt->slcore);
 169                 if (ret) {
 170                         evt_err("failed to launch sched %d", opt->slcore);
 171                         return ret;
 172                 }
 173         }
 174
 175         const uint64_t total_pkts = opt->nb_pkts *
 176                         evt_nr_active_lcores(opt->plcores);
 177
 178         uint64_t dead_lock_cycles = rte_get_timer_cycles();
 179         int64_t dead_lock_remaining  =  total_pkts;
 180         const uint64_t dead_lock_sample = rte_get_timer_hz() * 5;
 181
 182         uint64_t perf_cycles = rte_get_timer_cycles();
 183         int64_t perf_remaining  = total_pkts;
 184         const uint64_t perf_sample = rte_get_timer_hz();
 185
 186         static float total_mpps;
 187         static uint64_t samples;
 188
 189         const uint64_t freq_mhz = rte_get_timer_hz() / 1000000;
 190         int64_t remaining = t->outstand_pkts - processed_pkts(t);
 191
 192         while (t->done == false) {
 193                 const uint64_t new_cycles = rte_get_timer_cycles();
 194
 195                 if ((new_cycles - perf_cycles) > perf_sample) {
 196                         const uint64_t latency = total_latency(t);
 197                         const uint64_t pkts = processed_pkts(t);
 198
 199                         remaining = t->outstand_pkts - pkts;
 200                         float mpps = (float)(perf_remaining-remaining)/1000000;
 201
 202                         perf_remaining = remaining;
 203                         perf_cycles = new_cycles;
 204                         total_mpps += mpps;
 205                         ++samples;
 206                         if (opt->fwd_latency && pkts > 0) {
 207                                 printf(CLGRN"\r%.3f mpps avg %.3f mpps [avg fwd latency %.3f us] "CLNRM,
 208                                         mpps, total_mpps/samples,
 209                                         (float)(latency/pkts)/freq_mhz);
 210                         } else {
 211                                 printf(CLGRN"\r%.3f mpps avg %.3f mpps"CLNRM,
 212                                         mpps, total_mpps/samples);
 213                         }
 214                         fflush(stdout);
 215
 216                         if (remaining <= 0) {
 217                                 t->done = true;
 218                                 t->result = EVT_TEST_SUCCESS;
 219                                 rte_smp_wmb();
 220                                 break;
 221                         }
 222                 }
 223
 224                 if (new_cycles - dead_lock_cycles > dead_lock_sample) {
 225                         remaining = t->outstand_pkts - processed_pkts(t);
 226                         if (dead_lock_remaining == remaining) {
 227                                 rte_event_dev_dump(opt->dev_id, stdout);
 228                                 evt_err("No schedules for seconds, deadlock");
 229                                 t->done = true;
 230                                 rte_smp_wmb();
 231                                 break;
 232                         }
 233                         dead_lock_remaining = remaining;
 234                         dead_lock_cycles = new_cycles;
 235                 }
 236         }
 237         printf("\n");
 238         return 0;
 239 }
 240
 241 int
 242 perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt,
 243                                 uint8_t stride, uint8_t nb_queues)
 244 {
 245         struct test_perf *t = evt_test_priv(test);
 246         uint8_t port, prod;
 247         int ret = -1;
 248
 249         /* port configuration */
 250         const struct rte_event_port_conf wkr_p_conf = {
 251                         .dequeue_depth = opt->wkr_deq_dep,
 252                         .enqueue_depth = 64,
 253                         .new_event_threshold = 4096,
 254         };
 255
 256         /* setup one port per worker, linking to all queues */
 257         for (port = 0; port < evt_nr_active_lcores(opt->wlcores);
 258                                 port++) {
 259                 struct worker_data *w = &t->worker[port];
 260
 261                 w->dev_id = opt->dev_id;
 262                 w->port_id = port;
 263                 w->t = t;
 264                 w->processed_pkts = 0;
 265                 w->latency = 0;
 266
 267                 ret = rte_event_port_setup(opt->dev_id, port, &wkr_p_conf);
 268                 if (ret) {
 269                         evt_err("failed to setup port %d", port);
 270                         return ret;
 271                 }
 272
 273                 ret = rte_event_port_link(opt->dev_id, port, NULL, NULL, 0);
 274                 if (ret != nb_queues) {
 275                         evt_err("failed to link all queues to port %d", port);
 276                         return -EINVAL;
 277                 }
 278         }
 279
 280         /* port for producers, no links */
 281         const struct rte_event_port_conf prod_conf = {
 282                         .dequeue_depth = 8,
 283                         .enqueue_depth = 32,
 284                         .new_event_threshold = 1200,
 285         };
 286         prod = 0;
 287         for ( ; port < perf_nb_event_ports(opt); port++) {
 288                 struct prod_data *p = &t->prod[port];
 289
 290                 p->dev_id = opt->dev_id;
 291                 p->port_id = port;
 292                 p->queue_id = prod * stride;
 293                 p->t = t;
 294
 295                 ret = rte_event_port_setup(opt->dev_id, port, &prod_conf);
 296                 if (ret) {
 297                         evt_err("failed to setup port %d", port);
 298                         return ret;
 299                 }
 300                 prod++;
 301         }
 302
 303         return ret;
 304 }
 305
 306 int
 307 perf_opt_check(struct evt_options *opt, uint64_t nb_queues)
 308 {
 309         unsigned int lcores;
 310         bool need_slcore = !evt_has_distributed_sched(opt->dev_id);
 311
 312         /* N producer + N worker + 1 scheduler(based on dev capa) + 1 master */
 313         lcores = need_slcore ? 4 : 3;
 314
 315         if (rte_lcore_count() < lcores) {
 316                 evt_err("test need minimum %d lcores", lcores);
 317                 return -1;
 318         }
 319
 320         /* Validate worker lcores */
 321         if (evt_lcores_has_overlap(opt->wlcores, rte_get_master_lcore())) {
 322                 evt_err("worker lcores overlaps with master lcore");
 323                 return -1;
 324         }
 325         if (need_slcore && evt_lcores_has_overlap(opt->wlcores, opt->slcore)) {
 326                 evt_err("worker lcores overlaps with scheduler lcore");
 327                 return -1;
 328         }
 329         if (evt_lcores_has_overlap_multi(opt->wlcores, opt->plcores)) {
 330                 evt_err("worker lcores overlaps producer lcores");
 331                 return -1;
 332         }
 333         if (evt_has_disabled_lcore(opt->wlcores)) {
 334                 evt_err("one or more workers lcores are not enabled");
 335                 return -1;
 336         }
 337         if (!evt_has_active_lcore(opt->wlcores)) {
 338                 evt_err("minimum one worker is required");
 339                 return -1;
 340         }
 341
 342         /* Validate producer lcores */
 343         if (evt_lcores_has_overlap(opt->plcores, rte_get_master_lcore())) {
 344                 evt_err("producer lcores overlaps with master lcore");
 345                 return -1;
 346         }
 347         if (need_slcore && evt_lcores_has_overlap(opt->plcores, opt->slcore)) {
 348                 evt_err("producer lcores overlaps with scheduler lcore");
 349                 return -1;
 350         }
 351         if (evt_has_disabled_lcore(opt->plcores)) {
 352                 evt_err("one or more producer lcores are not enabled");
 353                 return -1;
 354         }
 355         if (!evt_has_active_lcore(opt->plcores)) {
 356                 evt_err("minimum one producer is required");
 357                 return -1;
 358         }
 359
 360         /* Validate scheduler lcore */
 361         if (!evt_has_distributed_sched(opt->dev_id) &&
 362                         opt->slcore == (int)rte_get_master_lcore()) {
 363                 evt_err("scheduler lcore and master lcore should be different");
 364                 return -1;
 365         }
 366         if (need_slcore && !rte_lcore_is_enabled(opt->slcore)) {
 367                 evt_err("scheduler lcore is not enabled");
 368                 return -1;
 369         }
 370
 371         if (evt_has_invalid_stage(opt))
 372                 return -1;
 373
 374         if (evt_has_invalid_sched_type(opt))
 375                 return -1;
 376
 377         if (nb_queues > EVT_MAX_QUEUES) {
 378                 evt_err("number of queues exceeds %d", EVT_MAX_QUEUES);
 379                 return -1;
 380         }
 381         if (perf_nb_event_ports(opt) > EVT_MAX_PORTS) {
 382                 evt_err("number of ports exceeds %d", EVT_MAX_PORTS);
 383                 return -1;
 384         }
 385
 386         /* Fixups */
 387         if (opt->nb_stages == 1 && opt->fwd_latency) {
 388                 evt_info("fwd_latency is valid when nb_stages > 1, disabling");
 389                 opt->fwd_latency = 0;
 390         }
 391         if (opt->fwd_latency && !opt->q_priority) {
 392                 evt_info("enabled queue priority for latency measurement");
 393                 opt->q_priority = 1;
 394         }
 395         if (opt->nb_pkts == 0)
 396                 opt->nb_pkts = INT64_MAX/evt_nr_active_lcores(opt->plcores);
 397
 398         return 0;
 399 }
 400
 401 void
 402 perf_opt_dump(struct evt_options *opt, uint8_t nb_queues)
 403 {
 404         evt_dump("nb_prod_lcores", "%d", evt_nr_active_lcores(opt->plcores));
 405         evt_dump_producer_lcores(opt);
 406         evt_dump("nb_worker_lcores", "%d", evt_nr_active_lcores(opt->wlcores));
 407         evt_dump_worker_lcores(opt);
 408         if (!evt_has_distributed_sched(opt->dev_id))
 409                 evt_dump_scheduler_lcore(opt);
 410         evt_dump_nb_stages(opt);
 411         evt_dump("nb_evdev_ports", "%d", perf_nb_event_ports(opt));
 412         evt_dump("nb_evdev_queues", "%d", nb_queues);
 413         evt_dump_queue_priority(opt);
 414         evt_dump_sched_type_list(opt);
 415 }
 416
 417 void
 418 perf_eventdev_destroy(struct evt_test *test, struct evt_options *opt)
 419 {
 420         RTE_SET_USED(test);
 421
 422         rte_event_dev_stop(opt->dev_id);
 423         rte_event_dev_close(opt->dev_id);
 424 }
 425
 426 static inline void
 427 perf_elt_init(struct rte_mempool *mp, void *arg __rte_unused,
 428             void *obj, unsigned i __rte_unused)
 429 {
 430         memset(obj, 0, mp->elt_size);
 431 }
 432
 433 int
 434 perf_mempool_setup(struct evt_test *test, struct evt_options *opt)
 435 {
 436         struct test_perf *t = evt_test_priv(test);
 437
 438         t->pool = rte_mempool_create(test->name, /* mempool name */
 439                                 opt->pool_sz, /* number of elements*/
 440                                 sizeof(struct perf_elt), /* element size*/
 441                                 512, /* cache size*/
 442                                 0, NULL, NULL,
 443                                 perf_elt_init, /* obj constructor */
 444                                 NULL, opt->socket_id, 0); /* flags */
 445         if (t->pool == NULL) {
 446                 evt_err("failed to create mempool");
 447                 return -ENOMEM;
 448         }
 449
 450         return 0;
 451 }
 452
 453 void
 454 perf_mempool_destroy(struct evt_test *test, struct evt_options *opt)
 455 {
 456         RTE_SET_USED(opt);
 457         struct test_perf *t = evt_test_priv(test);
 458
 459         rte_mempool_free(t->pool);
 460 }
 461
 462 int
 463 perf_test_setup(struct evt_test *test, struct evt_options *opt)
 464 {
 465         void *test_perf;
 466
 467         test_perf = rte_zmalloc_socket(test->name, sizeof(struct test_perf),
 468                                 RTE_CACHE_LINE_SIZE, opt->socket_id);
 469         if (test_perf  == NULL) {
 470                 evt_err("failed to allocate test_perf memory");
 471                 goto nomem;
 472         }
 473         test->test_priv = test_perf;
 474
 475         struct test_perf *t = evt_test_priv(test);
 476
 477         t->outstand_pkts = opt->nb_pkts * evt_nr_active_lcores(opt->plcores);
 478         t->nb_workers = evt_nr_active_lcores(opt->wlcores);
 479         t->done = false;
 480         t->nb_pkts = opt->nb_pkts;
 481         t->nb_flows = opt->nb_flows;
 482         t->result = EVT_TEST_FAILED;
 483         t->opt = opt;
 484         memcpy(t->sched_type_list, opt->sched_type_list,
 485                         sizeof(opt->sched_type_list));
 486         return 0;
 487 nomem:
 488         return -ENOMEM;
 489 }
 490
 491 void
 492 perf_test_destroy(struct evt_test *test, struct evt_options *opt)
 493 {
 494         RTE_SET_USED(opt);
 495
 496         rte_free(test->test_priv);
 497 }