New upstream version 18.08
[deb_dpdk.git] / examples / vm_power_manager / channel_monitor.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4
5 #include <unistd.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <stdint.h>
9 #include <signal.h>
10 #include <errno.h>
11 #include <string.h>
12 #include <sys/types.h>
13 #include <sys/epoll.h>
14 #include <sys/queue.h>
15 #include <sys/time.h>
16
17 #include <rte_log.h>
18 #include <rte_memory.h>
19 #include <rte_malloc.h>
20 #include <rte_atomic.h>
21 #include <rte_cycles.h>
22 #include <rte_ethdev.h>
23 #include <rte_pmd_i40e.h>
24
25 #include <libvirt/libvirt.h>
26 #include "channel_monitor.h"
27 #include "channel_commands.h"
28 #include "channel_manager.h"
29 #include "power_manager.h"
30 #include "oob_monitor.h"
31
32 #define RTE_LOGTYPE_CHANNEL_MONITOR RTE_LOGTYPE_USER1
33
34 #define MAX_EVENTS 256
35
36 uint64_t vsi_pkt_count_prev[384];
37 uint64_t rdtsc_prev[384];
38
39 double time_period_ms = 1;
40 static volatile unsigned run_loop = 1;
41 static int global_event_fd;
42 static unsigned int policy_is_set;
43 static struct epoll_event *global_events_list;
44 static struct policy policies[MAX_VMS];
45
46 void channel_monitor_exit(void)
47 {
48         run_loop = 0;
49         rte_free(global_events_list);
50 }
51
52 static void
53 core_share(int pNo, int z, int x, int t)
54 {
55         if (policies[pNo].core_share[z].pcpu == lvm_info[x].pcpus[t]) {
56                 if (strcmp(policies[pNo].pkt.vm_name,
57                                 lvm_info[x].vm_name) != 0) {
58                         policies[pNo].core_share[z].status = 1;
59                         power_manager_scale_core_max(
60                                         policies[pNo].core_share[z].pcpu);
61                 }
62         }
63 }
64
65 static void
66 core_share_status(int pNo)
67 {
68
69         int noVms, noVcpus, z, x, t;
70
71         get_all_vm(&noVms, &noVcpus);
72
73         /* Reset Core Share Status. */
74         for (z = 0; z < noVcpus; z++)
75                 policies[pNo].core_share[z].status = 0;
76
77         /* Foreach vcpu in a policy. */
78         for (z = 0; z < policies[pNo].pkt.num_vcpu; z++) {
79                 /* Foreach VM on the platform. */
80                 for (x = 0; x < noVms; x++) {
81                         /* Foreach vcpu of VMs on platform. */
82                         for (t = 0; t < lvm_info[x].num_cpus; t++)
83                                 core_share(pNo, z, x, t);
84                 }
85         }
86 }
87
88 static void
89 get_pcpu_to_control(struct policy *pol)
90 {
91
92         /* Convert vcpu to pcpu. */
93         struct vm_info info;
94         int pcpu, count;
95         uint64_t mask_u64b;
96         struct core_info *ci;
97         int ret;
98
99         ci = get_core_info();
100
101         RTE_LOG(INFO, CHANNEL_MONITOR, "Looking for pcpu for %s\n",
102                         pol->pkt.vm_name);
103         get_info_vm(pol->pkt.vm_name, &info);
104
105         for (count = 0; count < pol->pkt.num_vcpu; count++) {
106                 mask_u64b = info.pcpu_mask[pol->pkt.vcpu_to_control[count]];
107                 for (pcpu = 0; mask_u64b; mask_u64b &= ~(1ULL << pcpu++)) {
108                         if ((mask_u64b >> pcpu) & 1) {
109                                 if (pol->pkt.policy_to_use == BRANCH_RATIO) {
110                                         ci->cd[pcpu].oob_enabled = 1;
111                                         ret = add_core_to_monitor(pcpu);
112                                         if (ret == 0)
113                                                 printf("Monitoring pcpu %d via Branch Ratio\n",
114                                                                 pcpu);
115                                         else
116                                                 printf("Failed to start OOB Monitoring pcpu %d\n",
117                                                                 pcpu);
118
119                                 } else {
120                                         pol->core_share[count].pcpu = pcpu;
121                                         printf("Monitoring pcpu %d\n", pcpu);
122                                 }
123                         }
124                 }
125         }
126 }
127
128 static int
129 get_pfid(struct policy *pol)
130 {
131
132         int i, x, ret = 0;
133
134         for (i = 0; i < pol->pkt.nb_mac_to_monitor; i++) {
135
136                 RTE_ETH_FOREACH_DEV(x) {
137                         ret = rte_pmd_i40e_query_vfid_by_mac(x,
138                                 (struct ether_addr *)&(pol->pkt.vfid[i]));
139                         if (ret != -EINVAL) {
140                                 pol->port[i] = x;
141                                 break;
142                         }
143                 }
144                 if (ret == -EINVAL || ret == -ENOTSUP || ret == ENODEV) {
145                         RTE_LOG(INFO, CHANNEL_MONITOR,
146                                 "Error with Policy. MAC not found on "
147                                 "attached ports ");
148                         pol->enabled = 0;
149                         return ret;
150                 }
151                 pol->pfid[i] = ret;
152         }
153         return 1;
154 }
155
156 static int
157 update_policy(struct channel_packet *pkt)
158 {
159
160         unsigned int updated = 0;
161         int i;
162
163         for (i = 0; i < MAX_VMS; i++) {
164                 if (strcmp(policies[i].pkt.vm_name, pkt->vm_name) == 0) {
165                         policies[i].pkt = *pkt;
166                         get_pcpu_to_control(&policies[i]);
167                         if (get_pfid(&policies[i]) == -1) {
168                                 updated = 1;
169                                 break;
170                         }
171                         core_share_status(i);
172                         policies[i].enabled = 1;
173                         updated = 1;
174                 }
175         }
176         if (!updated) {
177                 for (i = 0; i < MAX_VMS; i++) {
178                         if (policies[i].enabled == 0) {
179                                 policies[i].pkt = *pkt;
180                                 get_pcpu_to_control(&policies[i]);
181                                 if (get_pfid(&policies[i]) == -1)
182                                         break;
183                                 core_share_status(i);
184                                 policies[i].enabled = 1;
185                                 break;
186                         }
187                 }
188         }
189         return 0;
190 }
191
192 static uint64_t
193 get_pkt_diff(struct policy *pol)
194 {
195
196         uint64_t vsi_pkt_count,
197                 vsi_pkt_total = 0,
198                 vsi_pkt_count_prev_total = 0;
199         double rdtsc_curr, rdtsc_diff, diff;
200         int x;
201         struct rte_eth_stats vf_stats;
202
203         for (x = 0; x < pol->pkt.nb_mac_to_monitor; x++) {
204
205                 /*Read vsi stats*/
206                 if (rte_pmd_i40e_get_vf_stats(x, pol->pfid[x], &vf_stats) == 0)
207                         vsi_pkt_count = vf_stats.ipackets;
208                 else
209                         vsi_pkt_count = -1;
210
211                 vsi_pkt_total += vsi_pkt_count;
212
213                 vsi_pkt_count_prev_total += vsi_pkt_count_prev[pol->pfid[x]];
214                 vsi_pkt_count_prev[pol->pfid[x]] = vsi_pkt_count;
215         }
216
217         rdtsc_curr = rte_rdtsc_precise();
218         rdtsc_diff = rdtsc_curr - rdtsc_prev[pol->pfid[x-1]];
219         rdtsc_prev[pol->pfid[x-1]] = rdtsc_curr;
220
221         diff = (vsi_pkt_total - vsi_pkt_count_prev_total) *
222                         ((double)rte_get_tsc_hz() / rdtsc_diff);
223
224         return diff;
225 }
226
227 static void
228 apply_traffic_profile(struct policy *pol)
229 {
230
231         int count;
232         uint64_t diff = 0;
233
234         diff = get_pkt_diff(pol);
235
236         RTE_LOG(INFO, CHANNEL_MONITOR, "Applying traffic profile\n");
237
238         if (diff >= (pol->pkt.traffic_policy.max_max_packet_thresh)) {
239                 for (count = 0; count < pol->pkt.num_vcpu; count++) {
240                         if (pol->core_share[count].status != 1)
241                                 power_manager_scale_core_max(
242                                                 pol->core_share[count].pcpu);
243                 }
244         } else if (diff >= (pol->pkt.traffic_policy.avg_max_packet_thresh)) {
245                 for (count = 0; count < pol->pkt.num_vcpu; count++) {
246                         if (pol->core_share[count].status != 1)
247                                 power_manager_scale_core_med(
248                                                 pol->core_share[count].pcpu);
249                 }
250         } else if (diff < (pol->pkt.traffic_policy.avg_max_packet_thresh)) {
251                 for (count = 0; count < pol->pkt.num_vcpu; count++) {
252                         if (pol->core_share[count].status != 1)
253                                 power_manager_scale_core_min(
254                                                 pol->core_share[count].pcpu);
255                 }
256         }
257 }
258
259 static void
260 apply_time_profile(struct policy *pol)
261 {
262
263         int count, x;
264         struct timeval tv;
265         struct tm *ptm;
266         char time_string[40];
267
268         /* Obtain the time of day, and convert it to a tm struct. */
269         gettimeofday(&tv, NULL);
270         ptm = localtime(&tv.tv_sec);
271         /* Format the date and time, down to a single second. */
272         strftime(time_string, sizeof(time_string), "%Y-%m-%d %H:%M:%S", ptm);
273
274         for (x = 0; x < HOURS; x++) {
275
276                 if (ptm->tm_hour == pol->pkt.timer_policy.busy_hours[x]) {
277                         for (count = 0; count < pol->pkt.num_vcpu; count++) {
278                                 if (pol->core_share[count].status != 1) {
279                                         power_manager_scale_core_max(
280                                                 pol->core_share[count].pcpu);
281                                 RTE_LOG(INFO, CHANNEL_MONITOR,
282                                         "Scaling up core %d to max\n",
283                                         pol->core_share[count].pcpu);
284                                 }
285                         }
286                         break;
287                 } else if (ptm->tm_hour ==
288                                 pol->pkt.timer_policy.quiet_hours[x]) {
289                         for (count = 0; count < pol->pkt.num_vcpu; count++) {
290                                 if (pol->core_share[count].status != 1) {
291                                         power_manager_scale_core_min(
292                                                 pol->core_share[count].pcpu);
293                                 RTE_LOG(INFO, CHANNEL_MONITOR,
294                                         "Scaling down core %d to min\n",
295                                         pol->core_share[count].pcpu);
296                         }
297                 }
298                         break;
299                 } else if (ptm->tm_hour ==
300                         pol->pkt.timer_policy.hours_to_use_traffic_profile[x]) {
301                         apply_traffic_profile(pol);
302                         break;
303                 }
304         }
305 }
306
307 static void
308 apply_workload_profile(struct policy *pol)
309 {
310
311         int count;
312
313         if (pol->pkt.workload == HIGH) {
314                 for (count = 0; count < pol->pkt.num_vcpu; count++) {
315                         if (pol->core_share[count].status != 1)
316                                 power_manager_scale_core_max(
317                                                 pol->core_share[count].pcpu);
318                 }
319         } else if (pol->pkt.workload == MEDIUM) {
320                 for (count = 0; count < pol->pkt.num_vcpu; count++) {
321                         if (pol->core_share[count].status != 1)
322                                 power_manager_scale_core_med(
323                                                 pol->core_share[count].pcpu);
324                 }
325         } else if (pol->pkt.workload == LOW) {
326                 for (count = 0; count < pol->pkt.num_vcpu; count++) {
327                         if (pol->core_share[count].status != 1)
328                                 power_manager_scale_core_min(
329                                                 pol->core_share[count].pcpu);
330                 }
331         }
332 }
333
334 static void
335 apply_policy(struct policy *pol)
336 {
337
338         struct channel_packet *pkt = &pol->pkt;
339
340         /*Check policy to use*/
341         if (pkt->policy_to_use == TRAFFIC)
342                 apply_traffic_profile(pol);
343         else if (pkt->policy_to_use == TIME)
344                 apply_time_profile(pol);
345         else if (pkt->policy_to_use == WORKLOAD)
346                 apply_workload_profile(pol);
347 }
348
349
350 static int
351 process_request(struct channel_packet *pkt, struct channel_info *chan_info)
352 {
353         uint64_t core_mask;
354
355         if (chan_info == NULL)
356                 return -1;
357
358         if (rte_atomic32_cmpset(&(chan_info->status), CHANNEL_MGR_CHANNEL_CONNECTED,
359                         CHANNEL_MGR_CHANNEL_PROCESSING) == 0)
360                 return -1;
361
362         if (pkt->command == CPU_POWER) {
363                 core_mask = get_pcpus_mask(chan_info, pkt->resource_id);
364                 if (core_mask == 0) {
365                         RTE_LOG(ERR, CHANNEL_MONITOR, "Error get physical CPU mask for "
366                                 "channel '%s' using vCPU(%u)\n", chan_info->channel_path,
367                                 (unsigned)pkt->unit);
368                         return -1;
369                 }
370                 if (__builtin_popcountll(core_mask) == 1) {
371
372                         unsigned core_num = __builtin_ffsll(core_mask) - 1;
373
374                         switch (pkt->unit) {
375                         case(CPU_POWER_SCALE_MIN):
376                                         power_manager_scale_core_min(core_num);
377                         break;
378                         case(CPU_POWER_SCALE_MAX):
379                                         power_manager_scale_core_max(core_num);
380                         break;
381                         case(CPU_POWER_SCALE_DOWN):
382                                         power_manager_scale_core_down(core_num);
383                         break;
384                         case(CPU_POWER_SCALE_UP):
385                                         power_manager_scale_core_up(core_num);
386                         break;
387                         case(CPU_POWER_ENABLE_TURBO):
388                                 power_manager_enable_turbo_core(core_num);
389                         break;
390                         case(CPU_POWER_DISABLE_TURBO):
391                                 power_manager_disable_turbo_core(core_num);
392                         break;
393                         default:
394                                 break;
395                         }
396                 } else {
397                         switch (pkt->unit) {
398                         case(CPU_POWER_SCALE_MIN):
399                                         power_manager_scale_mask_min(core_mask);
400                         break;
401                         case(CPU_POWER_SCALE_MAX):
402                                         power_manager_scale_mask_max(core_mask);
403                         break;
404                         case(CPU_POWER_SCALE_DOWN):
405                                         power_manager_scale_mask_down(core_mask);
406                         break;
407                         case(CPU_POWER_SCALE_UP):
408                                         power_manager_scale_mask_up(core_mask);
409                         break;
410                         case(CPU_POWER_ENABLE_TURBO):
411                                 power_manager_enable_turbo_mask(core_mask);
412                         break;
413                         case(CPU_POWER_DISABLE_TURBO):
414                                 power_manager_disable_turbo_mask(core_mask);
415                         break;
416                         default:
417                                 break;
418                         }
419
420                 }
421         }
422
423         if (pkt->command == PKT_POLICY) {
424                 RTE_LOG(INFO, CHANNEL_MONITOR, "\nProcessing Policy request from Guest\n");
425                 update_policy(pkt);
426                 policy_is_set = 1;
427         }
428
429         /* Return is not checked as channel status may have been set to DISABLED
430          * from management thread
431          */
432         rte_atomic32_cmpset(&(chan_info->status), CHANNEL_MGR_CHANNEL_PROCESSING,
433                         CHANNEL_MGR_CHANNEL_CONNECTED);
434         return 0;
435
436 }
437
438 int
439 add_channel_to_monitor(struct channel_info **chan_info)
440 {
441         struct channel_info *info = *chan_info;
442         struct epoll_event event;
443
444         event.events = EPOLLIN;
445         event.data.ptr = info;
446         if (epoll_ctl(global_event_fd, EPOLL_CTL_ADD, info->fd, &event) < 0) {
447                 RTE_LOG(ERR, CHANNEL_MONITOR, "Unable to add channel '%s' "
448                                 "to epoll\n", info->channel_path);
449                 return -1;
450         }
451         return 0;
452 }
453
454 int
455 remove_channel_from_monitor(struct channel_info *chan_info)
456 {
457         if (epoll_ctl(global_event_fd, EPOLL_CTL_DEL, chan_info->fd, NULL) < 0) {
458                 RTE_LOG(ERR, CHANNEL_MONITOR, "Unable to remove channel '%s' "
459                                 "from epoll\n", chan_info->channel_path);
460                 return -1;
461         }
462         return 0;
463 }
464
465 int
466 channel_monitor_init(void)
467 {
468         global_event_fd = epoll_create1(0);
469         if (global_event_fd == 0) {
470                 RTE_LOG(ERR, CHANNEL_MONITOR, "Error creating epoll context with "
471                                 "error %s\n", strerror(errno));
472                 return -1;
473         }
474         global_events_list = rte_malloc("epoll_events", sizeof(*global_events_list)
475                         * MAX_EVENTS, RTE_CACHE_LINE_SIZE);
476         if (global_events_list == NULL) {
477                 RTE_LOG(ERR, CHANNEL_MONITOR, "Unable to rte_malloc for "
478                                 "epoll events\n");
479                 return -1;
480         }
481         return 0;
482 }
483
484 void
485 run_channel_monitor(void)
486 {
487         while (run_loop) {
488                 int n_events, i;
489
490                 n_events = epoll_wait(global_event_fd, global_events_list,
491                                 MAX_EVENTS, 1);
492                 if (!run_loop)
493                         break;
494                 for (i = 0; i < n_events; i++) {
495                         struct channel_info *chan_info = (struct channel_info *)
496                                         global_events_list[i].data.ptr;
497                         if ((global_events_list[i].events & EPOLLERR) ||
498                                 (global_events_list[i].events & EPOLLHUP)) {
499                                 RTE_LOG(DEBUG, CHANNEL_MONITOR, "Remote closed connection for "
500                                                 "channel '%s'\n",
501                                                 chan_info->channel_path);
502                                 remove_channel(&chan_info);
503                                 continue;
504                         }
505                         if (global_events_list[i].events & EPOLLIN) {
506
507                                 int n_bytes, err = 0;
508                                 struct channel_packet pkt;
509                                 void *buffer = &pkt;
510                                 int buffer_len = sizeof(pkt);
511
512                                 while (buffer_len > 0) {
513                                         n_bytes = read(chan_info->fd,
514                                                         buffer, buffer_len);
515                                         if (n_bytes == buffer_len)
516                                                 break;
517                                         if (n_bytes == -1) {
518                                                 err = errno;
519                                                 RTE_LOG(DEBUG, CHANNEL_MONITOR,
520                                                         "Received error on "
521                                                         "channel '%s' read: %s\n",
522                                                         chan_info->channel_path,
523                                                         strerror(err));
524                                                 remove_channel(&chan_info);
525                                                 break;
526                                         }
527                                         buffer = (char *)buffer + n_bytes;
528                                         buffer_len -= n_bytes;
529                                 }
530                                 if (!err)
531                                         process_request(&pkt, chan_info);
532                         }
533                 }
534                 rte_delay_us(time_period_ms*1000);
535                 if (policy_is_set) {
536                         int j;
537
538                         for (j = 0; j < MAX_VMS; j++) {
539                                 if (policies[j].enabled == 1)
540                                         apply_policy(&policies[j]);
541                         }
542                 }
543         }
544 }