New upstream version 18.11-rc1
[deb_dpdk.git] / lib / librte_power / rte_power_empty_poll.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2018 Intel Corporation
3  */
4
5 #include <string.h>
6
7 #include <rte_lcore.h>
8 #include <rte_cycles.h>
9 #include <rte_atomic.h>
10 #include <rte_malloc.h>
11 #include <inttypes.h>
12
13 #include "rte_power.h"
14 #include "rte_power_empty_poll.h"
15
16 #define INTERVALS_PER_SECOND 100     /* (10ms) */
17 #define SECONDS_TO_TRAIN_FOR 2
18 #define DEFAULT_MED_TO_HIGH_PERCENT_THRESHOLD 70
19 #define DEFAULT_HIGH_TO_MED_PERCENT_THRESHOLD 30
20 #define DEFAULT_CYCLES_PER_PACKET 800
21
22 static struct ep_params *ep_params;
23 static uint32_t med_to_high_threshold = DEFAULT_MED_TO_HIGH_PERCENT_THRESHOLD;
24 static uint32_t high_to_med_threshold = DEFAULT_HIGH_TO_MED_PERCENT_THRESHOLD;
25
26 static uint32_t avail_freqs[RTE_MAX_LCORE][NUM_FREQS];
27
28 static uint32_t total_avail_freqs[RTE_MAX_LCORE];
29
30 static uint32_t freq_index[NUM_FREQ];
31
32 static uint32_t
33 get_freq_index(enum freq_val index)
34 {
35         return freq_index[index];
36 }
37
38
39 static int
40 set_power_freq(int lcore_id, enum freq_val freq, bool specific_freq)
41 {
42         int err = 0;
43         uint32_t power_freq_index;
44         if (!specific_freq)
45                 power_freq_index = get_freq_index(freq);
46         else
47                 power_freq_index = freq;
48
49         err = rte_power_set_freq(lcore_id, power_freq_index);
50
51         return err;
52 }
53
54
55 static inline void __attribute__((always_inline))
56 exit_training_state(struct priority_worker *poll_stats)
57 {
58         RTE_SET_USED(poll_stats);
59 }
60
61 static inline void __attribute__((always_inline))
62 enter_training_state(struct priority_worker *poll_stats)
63 {
64         poll_stats->iter_counter = 0;
65         poll_stats->cur_freq = LOW;
66         poll_stats->queue_state = TRAINING;
67 }
68
69 static inline void __attribute__((always_inline))
70 enter_normal_state(struct priority_worker *poll_stats)
71 {
72         /* Clear the averages arrays and strs */
73         memset(poll_stats->edpi_av, 0, sizeof(poll_stats->edpi_av));
74         poll_stats->ec = 0;
75         memset(poll_stats->ppi_av, 0, sizeof(poll_stats->ppi_av));
76         poll_stats->pc = 0;
77
78         poll_stats->cur_freq = MED;
79         poll_stats->iter_counter = 0;
80         poll_stats->threshold_ctr = 0;
81         poll_stats->queue_state = MED_NORMAL;
82         RTE_LOG(INFO, POWER, "Set the power freq to MED\n");
83         set_power_freq(poll_stats->lcore_id, MED, false);
84
85         poll_stats->thresh[MED].threshold_percent = med_to_high_threshold;
86         poll_stats->thresh[HGH].threshold_percent = high_to_med_threshold;
87 }
88
89 static inline void __attribute__((always_inline))
90 enter_busy_state(struct priority_worker *poll_stats)
91 {
92         memset(poll_stats->edpi_av, 0, sizeof(poll_stats->edpi_av));
93         poll_stats->ec = 0;
94         memset(poll_stats->ppi_av, 0, sizeof(poll_stats->ppi_av));
95         poll_stats->pc = 0;
96
97         poll_stats->cur_freq = HGH;
98         poll_stats->iter_counter = 0;
99         poll_stats->threshold_ctr = 0;
100         poll_stats->queue_state = HGH_BUSY;
101         set_power_freq(poll_stats->lcore_id, HGH, false);
102 }
103
104 static inline void __attribute__((always_inline))
105 enter_purge_state(struct priority_worker *poll_stats)
106 {
107         poll_stats->iter_counter = 0;
108         poll_stats->queue_state = LOW_PURGE;
109 }
110
111 static inline void __attribute__((always_inline))
112 set_state(struct priority_worker *poll_stats,
113                 enum queue_state new_state)
114 {
115         enum queue_state old_state = poll_stats->queue_state;
116         if (old_state != new_state) {
117
118                 /* Call any old state exit functions */
119                 if (old_state == TRAINING)
120                         exit_training_state(poll_stats);
121
122                 /* Call any new state entry functions */
123                 if (new_state == TRAINING)
124                         enter_training_state(poll_stats);
125                 if (new_state == MED_NORMAL)
126                         enter_normal_state(poll_stats);
127                 if (new_state == HGH_BUSY)
128                         enter_busy_state(poll_stats);
129                 if (new_state == LOW_PURGE)
130                         enter_purge_state(poll_stats);
131         }
132 }
133
134 static inline void __attribute__((always_inline))
135 set_policy(struct priority_worker *poll_stats,
136                 struct ep_policy *policy)
137 {
138         set_state(poll_stats, policy->state);
139
140         if (policy->state == TRAINING)
141                 return;
142
143         poll_stats->thresh[MED_NORMAL].base_edpi = policy->med_base_edpi;
144         poll_stats->thresh[HGH_BUSY].base_edpi = policy->hgh_base_edpi;
145
146         poll_stats->thresh[MED_NORMAL].trained = true;
147         poll_stats->thresh[HGH_BUSY].trained = true;
148
149 }
150
151 static void
152 update_training_stats(struct priority_worker *poll_stats,
153                 uint32_t freq,
154                 bool specific_freq,
155                 uint32_t max_train_iter)
156 {
157         RTE_SET_USED(specific_freq);
158
159         char pfi_str[32];
160         uint64_t p0_empty_deq;
161
162         sprintf(pfi_str, "%02d", freq);
163
164         if (poll_stats->cur_freq == freq &&
165                         poll_stats->thresh[freq].trained == false) {
166                 if (poll_stats->thresh[freq].cur_train_iter == 0) {
167
168                         set_power_freq(poll_stats->lcore_id,
169                                         freq, specific_freq);
170
171                         poll_stats->empty_dequeues_prev =
172                                 poll_stats->empty_dequeues;
173
174                         poll_stats->thresh[freq].cur_train_iter++;
175
176                         return;
177                 } else if (poll_stats->thresh[freq].cur_train_iter
178                                 <= max_train_iter) {
179
180                         p0_empty_deq = poll_stats->empty_dequeues -
181                                 poll_stats->empty_dequeues_prev;
182
183                         poll_stats->empty_dequeues_prev =
184                                 poll_stats->empty_dequeues;
185
186                         poll_stats->thresh[freq].base_edpi += p0_empty_deq;
187                         poll_stats->thresh[freq].cur_train_iter++;
188
189                 } else {
190                         if (poll_stats->thresh[freq].trained == false) {
191                                 poll_stats->thresh[freq].base_edpi =
192                                         poll_stats->thresh[freq].base_edpi /
193                                         max_train_iter;
194
195                                 /* Add on a factor of 0.05%
196                                  * this should remove any
197                                  * false negatives when the system is 0% busy
198                                  */
199                                 poll_stats->thresh[freq].base_edpi +=
200                                 poll_stats->thresh[freq].base_edpi / 2000;
201
202                                 poll_stats->thresh[freq].trained = true;
203                                 poll_stats->cur_freq++;
204
205                         }
206                 }
207         }
208 }
209
210 static inline uint32_t __attribute__((always_inline))
211 update_stats(struct priority_worker *poll_stats)
212 {
213         uint64_t tot_edpi = 0, tot_ppi = 0;
214         uint32_t j, percent;
215
216         struct priority_worker *s = poll_stats;
217
218         uint64_t cur_edpi = s->empty_dequeues - s->empty_dequeues_prev;
219
220         s->empty_dequeues_prev = s->empty_dequeues;
221
222         uint64_t ppi = s->num_dequeue_pkts - s->num_dequeue_pkts_prev;
223
224         s->num_dequeue_pkts_prev = s->num_dequeue_pkts;
225
226         if (s->thresh[s->cur_freq].base_edpi < cur_edpi) {
227
228                 /* edpi mean empty poll counter difference per interval */
229                 RTE_LOG(DEBUG, POWER, "cur_edpi is too large "
230                                 "cur edpi %"PRId64" "
231                                 "base edpi %"PRId64"\n",
232                                 cur_edpi,
233                                 s->thresh[s->cur_freq].base_edpi);
234                 /* Value to make us fail need debug log*/
235                 return 1000UL;
236         }
237
238         s->edpi_av[s->ec++ % BINS_AV] = cur_edpi;
239         s->ppi_av[s->pc++ % BINS_AV] = ppi;
240
241         for (j = 0; j < BINS_AV; j++) {
242                 tot_edpi += s->edpi_av[j];
243                 tot_ppi += s->ppi_av[j];
244         }
245
246         tot_edpi = tot_edpi / BINS_AV;
247
248         percent = 100 - (uint32_t)(((float)tot_edpi /
249                         (float)s->thresh[s->cur_freq].base_edpi) * 100);
250
251         return (uint32_t)percent;
252 }
253
254
255 static inline void  __attribute__((always_inline))
256 update_stats_normal(struct priority_worker *poll_stats)
257 {
258         uint32_t percent;
259
260         if (poll_stats->thresh[poll_stats->cur_freq].base_edpi == 0) {
261
262                 enum freq_val cur_freq = poll_stats->cur_freq;
263
264                 /* edpi mean empty poll counter difference per interval */
265                 RTE_LOG(DEBUG, POWER, "cure freq is %d, edpi is %"PRIu64"\n",
266                                 cur_freq,
267                                 poll_stats->thresh[cur_freq].base_edpi);
268                 return;
269         }
270
271         percent = update_stats(poll_stats);
272
273         if (percent > 100) {
274                 /* edpi mean empty poll counter difference per interval */
275                 RTE_LOG(DEBUG, POWER, "Edpi is bigger than threshold\n");
276                 return;
277         }
278
279         if (poll_stats->cur_freq == LOW)
280                 RTE_LOG(INFO, POWER, "Purge Mode is not currently supported\n");
281         else if (poll_stats->cur_freq == MED) {
282
283                 if (percent >
284                         poll_stats->thresh[MED].threshold_percent) {
285
286                         if (poll_stats->threshold_ctr < INTERVALS_PER_SECOND)
287                                 poll_stats->threshold_ctr++;
288                         else {
289                                 set_state(poll_stats, HGH_BUSY);
290                                 RTE_LOG(INFO, POWER, "MOVE to HGH\n");
291                         }
292
293                 } else {
294                         /* reset */
295                         poll_stats->threshold_ctr = 0;
296                 }
297
298         } else if (poll_stats->cur_freq == HGH) {
299
300                 if (percent <
301                                 poll_stats->thresh[HGH].threshold_percent) {
302
303                         if (poll_stats->threshold_ctr < INTERVALS_PER_SECOND)
304                                 poll_stats->threshold_ctr++;
305                         else {
306                                 set_state(poll_stats, MED_NORMAL);
307                                 RTE_LOG(INFO, POWER, "MOVE to MED\n");
308                         }
309                 } else {
310                         /* reset */
311                         poll_stats->threshold_ctr = 0;
312                 }
313
314         }
315 }
316
317 static int
318 empty_poll_training(struct priority_worker *poll_stats,
319                 uint32_t max_train_iter)
320 {
321
322         if (poll_stats->iter_counter < INTERVALS_PER_SECOND) {
323                 poll_stats->iter_counter++;
324                 return 0;
325         }
326
327
328         update_training_stats(poll_stats,
329                         LOW,
330                         false,
331                         max_train_iter);
332
333         update_training_stats(poll_stats,
334                         MED,
335                         false,
336                         max_train_iter);
337
338         update_training_stats(poll_stats,
339                         HGH,
340                         false,
341                         max_train_iter);
342
343
344         if (poll_stats->thresh[LOW].trained == true
345                         && poll_stats->thresh[MED].trained == true
346                         && poll_stats->thresh[HGH].trained == true) {
347
348                 set_state(poll_stats, MED_NORMAL);
349
350                 RTE_LOG(INFO, POWER, "LOW threshold is %"PRIu64"\n",
351                                 poll_stats->thresh[LOW].base_edpi);
352
353                 RTE_LOG(INFO, POWER, "MED threshold is %"PRIu64"\n",
354                                 poll_stats->thresh[MED].base_edpi);
355
356
357                 RTE_LOG(INFO, POWER, "HIGH threshold is %"PRIu64"\n",
358                                 poll_stats->thresh[HGH].base_edpi);
359
360                 RTE_LOG(INFO, POWER, "Training is Complete for %d\n",
361                                 poll_stats->lcore_id);
362         }
363
364         return 0;
365 }
366
367 void __rte_experimental
368 rte_empty_poll_detection(struct rte_timer *tim, void *arg)
369 {
370
371         uint32_t i;
372
373         struct priority_worker *poll_stats;
374
375         RTE_SET_USED(tim);
376
377         RTE_SET_USED(arg);
378
379         for (i = 0; i < NUM_NODES; i++) {
380
381                 poll_stats = &(ep_params->wrk_data.wrk_stats[i]);
382
383                 if (rte_lcore_is_enabled(poll_stats->lcore_id) == 0)
384                         continue;
385
386                 switch (poll_stats->queue_state) {
387                 case(TRAINING):
388                         empty_poll_training(poll_stats,
389                                         ep_params->max_train_iter);
390                         break;
391
392                 case(HGH_BUSY):
393                 case(MED_NORMAL):
394                         update_stats_normal(poll_stats);
395                         break;
396
397                 case(LOW_PURGE):
398                         break;
399                 default:
400                         break;
401
402                 }
403
404         }
405
406 }
407
408 int __rte_experimental
409 rte_power_empty_poll_stat_init(struct ep_params **eptr, uint8_t *freq_tlb,
410                 struct ep_policy *policy)
411 {
412         uint32_t i;
413         /* Allocate the ep_params structure */
414         ep_params = rte_zmalloc_socket(NULL,
415                         sizeof(struct ep_params),
416                         0,
417                         rte_socket_id());
418
419         if (!ep_params)
420                 return -1;
421
422         if (freq_tlb == NULL) {
423                 freq_index[LOW] = 14;
424                 freq_index[MED] = 9;
425                 freq_index[HGH] = 1;
426         } else {
427                 freq_index[LOW] = freq_tlb[LOW];
428                 freq_index[MED] = freq_tlb[MED];
429                 freq_index[HGH] = freq_tlb[HGH];
430         }
431
432         RTE_LOG(INFO, POWER, "Initialize the Empty Poll\n");
433
434         /* Train for pre-defined period */
435         ep_params->max_train_iter = INTERVALS_PER_SECOND * SECONDS_TO_TRAIN_FOR;
436
437         struct stats_data *w = &ep_params->wrk_data;
438
439         *eptr = ep_params;
440
441         /* initialize all wrk_stats state */
442         for (i = 0; i < NUM_NODES; i++) {
443
444                 if (rte_lcore_is_enabled(i) == 0)
445                         continue;
446                 /*init the freqs table */
447                 total_avail_freqs[i] = rte_power_freqs(i,
448                                 avail_freqs[i],
449                                 NUM_FREQS);
450
451                 RTE_LOG(INFO, POWER, "total avail freq is %d , lcoreid %d\n",
452                                 total_avail_freqs[i],
453                                 i);
454
455                 if (get_freq_index(LOW) > total_avail_freqs[i])
456                         return -1;
457
458                 if (rte_get_master_lcore() != i) {
459                         w->wrk_stats[i].lcore_id = i;
460                         set_policy(&w->wrk_stats[i], policy);
461                 }
462         }
463
464         return 0;
465 }
466
467 void __rte_experimental
468 rte_power_empty_poll_stat_free(void)
469 {
470
471         RTE_LOG(INFO, POWER, "Close the Empty Poll\n");
472
473         if (ep_params != NULL)
474                 rte_free(ep_params);
475 }
476
477 int __rte_experimental
478 rte_power_empty_poll_stat_update(unsigned int lcore_id)
479 {
480         struct priority_worker *poll_stats;
481
482         if (lcore_id >= NUM_NODES)
483                 return -1;
484
485         poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
486
487         if (poll_stats->lcore_id == 0)
488                 poll_stats->lcore_id = lcore_id;
489
490         poll_stats->empty_dequeues++;
491
492         return 0;
493 }
494
495 int __rte_experimental
496 rte_power_poll_stat_update(unsigned int lcore_id, uint8_t nb_pkt)
497 {
498
499         struct priority_worker *poll_stats;
500
501         if (lcore_id >= NUM_NODES)
502                 return -1;
503
504         poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
505
506         if (poll_stats->lcore_id == 0)
507                 poll_stats->lcore_id = lcore_id;
508
509         poll_stats->num_dequeue_pkts += nb_pkt;
510
511         return 0;
512 }
513
514
515 uint64_t __rte_experimental
516 rte_power_empty_poll_stat_fetch(unsigned int lcore_id)
517 {
518         struct priority_worker *poll_stats;
519
520         if (lcore_id >= NUM_NODES)
521                 return -1;
522
523         poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
524
525         if (poll_stats->lcore_id == 0)
526                 poll_stats->lcore_id = lcore_id;
527
528         return poll_stats->empty_dequeues;
529 }
530
531 uint64_t __rte_experimental
532 rte_power_poll_stat_fetch(unsigned int lcore_id)
533 {
534         struct priority_worker *poll_stats;
535
536         if (lcore_id >= NUM_NODES)
537                 return -1;
538
539         poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
540
541         if (poll_stats->lcore_id == 0)
542                 poll_stats->lcore_id = lcore_id;
543
544         return poll_stats->num_dequeue_pkts;
545 }