New upstream version 17.11-rc3
[deb_dpdk.git] / examples / vm_power_manager / channel_monitor.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <unistd.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <stdint.h>
38 #include <signal.h>
39 #include <errno.h>
40 #include <string.h>
41 #include <sys/types.h>
42 #include <sys/epoll.h>
43 #include <sys/queue.h>
44 #include <sys/time.h>
45
46 #include <rte_log.h>
47 #include <rte_memory.h>
48 #include <rte_malloc.h>
49 #include <rte_atomic.h>
50 #include <rte_cycles.h>
51 #include <rte_ethdev.h>
52 #include <rte_pmd_i40e.h>
53
54 #include <libvirt/libvirt.h>
55 #include "channel_monitor.h"
56 #include "channel_commands.h"
57 #include "channel_manager.h"
58 #include "power_manager.h"
59
60 #define RTE_LOGTYPE_CHANNEL_MONITOR RTE_LOGTYPE_USER1
61
62 #define MAX_EVENTS 256
63
64 uint64_t vsi_pkt_count_prev[384];
65 uint64_t rdtsc_prev[384];
66
67 double time_period_ms = 1;
68 static volatile unsigned run_loop = 1;
69 static int global_event_fd;
70 static unsigned int policy_is_set;
71 static struct epoll_event *global_events_list;
72 static struct policy policies[MAX_VMS];
73
74 void channel_monitor_exit(void)
75 {
76         run_loop = 0;
77         rte_free(global_events_list);
78 }
79
80 static void
81 core_share(int pNo, int z, int x, int t)
82 {
83         if (policies[pNo].core_share[z].pcpu == lvm_info[x].pcpus[t]) {
84                 if (strcmp(policies[pNo].pkt.vm_name,
85                                 lvm_info[x].vm_name) != 0) {
86                         policies[pNo].core_share[z].status = 1;
87                         power_manager_scale_core_max(
88                                         policies[pNo].core_share[z].pcpu);
89                 }
90         }
91 }
92
93 static void
94 core_share_status(int pNo)
95 {
96
97         int noVms, noVcpus, z, x, t;
98
99         get_all_vm(&noVms, &noVcpus);
100
101         /* Reset Core Share Status. */
102         for (z = 0; z < noVcpus; z++)
103                 policies[pNo].core_share[z].status = 0;
104
105         /* Foreach vcpu in a policy. */
106         for (z = 0; z < policies[pNo].pkt.num_vcpu; z++) {
107                 /* Foreach VM on the platform. */
108                 for (x = 0; x < noVms; x++) {
109                         /* Foreach vcpu of VMs on platform. */
110                         for (t = 0; t < lvm_info[x].num_cpus; t++)
111                                 core_share(pNo, z, x, t);
112                 }
113         }
114 }
115
116 static void
117 get_pcpu_to_control(struct policy *pol)
118 {
119
120         /* Convert vcpu to pcpu. */
121         struct vm_info info;
122         int pcpu, count;
123         uint64_t mask_u64b;
124
125         RTE_LOG(INFO, CHANNEL_MONITOR, "Looking for pcpu for %s\n",
126                         pol->pkt.vm_name);
127         get_info_vm(pol->pkt.vm_name, &info);
128
129         for (count = 0; count < pol->pkt.num_vcpu; count++) {
130                 mask_u64b = info.pcpu_mask[pol->pkt.vcpu_to_control[count]];
131                 for (pcpu = 0; mask_u64b; mask_u64b &= ~(1ULL << pcpu++)) {
132                         if ((mask_u64b >> pcpu) & 1)
133                                 pol->core_share[count].pcpu = pcpu;
134                 }
135         }
136 }
137
138 static int
139 get_pfid(struct policy *pol)
140 {
141
142         int i, x, ret = 0, nb_ports;
143
144         nb_ports = rte_eth_dev_count();
145         for (i = 0; i < pol->pkt.nb_mac_to_monitor; i++) {
146
147                 for (x = 0; x < nb_ports; x++) {
148                         ret = rte_pmd_i40e_query_vfid_by_mac(x,
149                                 (struct ether_addr *)&(pol->pkt.vfid[i]));
150                         if (ret != -EINVAL) {
151                                 pol->port[i] = x;
152                                 break;
153                         }
154                 }
155                 if (ret == -EINVAL || ret == -ENOTSUP || ret == ENODEV) {
156                         RTE_LOG(INFO, CHANNEL_MONITOR,
157                                 "Error with Policy. MAC not found on "
158                                 "attached ports ");
159                         pol->enabled = 0;
160                         return ret;
161                 }
162                 pol->pfid[i] = ret;
163         }
164         return 1;
165 }
166
167 static int
168 update_policy(struct channel_packet *pkt)
169 {
170
171         unsigned int updated = 0;
172         int i;
173
174         for (i = 0; i < MAX_VMS; i++) {
175                 if (strcmp(policies[i].pkt.vm_name, pkt->vm_name) == 0) {
176                         policies[i].pkt = *pkt;
177                         get_pcpu_to_control(&policies[i]);
178                         if (get_pfid(&policies[i]) == -1) {
179                                 updated = 1;
180                                 break;
181                         }
182                         core_share_status(i);
183                         policies[i].enabled = 1;
184                         updated = 1;
185                 }
186         }
187         if (!updated) {
188                 for (i = 0; i < MAX_VMS; i++) {
189                         if (policies[i].enabled == 0) {
190                                 policies[i].pkt = *pkt;
191                                 get_pcpu_to_control(&policies[i]);
192                                 if (get_pfid(&policies[i]) == -1)
193                                         break;
194                                 core_share_status(i);
195                                 policies[i].enabled = 1;
196                                 break;
197                         }
198                 }
199         }
200         return 0;
201 }
202
203 static uint64_t
204 get_pkt_diff(struct policy *pol)
205 {
206
207         uint64_t vsi_pkt_count,
208                 vsi_pkt_total = 0,
209                 vsi_pkt_count_prev_total = 0;
210         double rdtsc_curr, rdtsc_diff, diff;
211         int x;
212         struct rte_eth_stats vf_stats;
213
214         for (x = 0; x < pol->pkt.nb_mac_to_monitor; x++) {
215
216                 /*Read vsi stats*/
217                 if (rte_pmd_i40e_get_vf_stats(x, pol->pfid[x], &vf_stats) == 0)
218                         vsi_pkt_count = vf_stats.ipackets;
219                 else
220                         vsi_pkt_count = -1;
221
222                 vsi_pkt_total += vsi_pkt_count;
223
224                 vsi_pkt_count_prev_total += vsi_pkt_count_prev[pol->pfid[x]];
225                 vsi_pkt_count_prev[pol->pfid[x]] = vsi_pkt_count;
226         }
227
228         rdtsc_curr = rte_rdtsc_precise();
229         rdtsc_diff = rdtsc_curr - rdtsc_prev[pol->pfid[x-1]];
230         rdtsc_prev[pol->pfid[x-1]] = rdtsc_curr;
231
232         diff = (vsi_pkt_total - vsi_pkt_count_prev_total) *
233                         ((double)rte_get_tsc_hz() / rdtsc_diff);
234
235         return diff;
236 }
237
238 static void
239 apply_traffic_profile(struct policy *pol)
240 {
241
242         int count;
243         uint64_t diff = 0;
244
245         diff = get_pkt_diff(pol);
246
247         RTE_LOG(INFO, CHANNEL_MONITOR, "Applying traffic profile\n");
248
249         if (diff >= (pol->pkt.traffic_policy.max_max_packet_thresh)) {
250                 for (count = 0; count < pol->pkt.num_vcpu; count++) {
251                         if (pol->core_share[count].status != 1)
252                                 power_manager_scale_core_max(
253                                                 pol->core_share[count].pcpu);
254                 }
255         } else if (diff >= (pol->pkt.traffic_policy.avg_max_packet_thresh)) {
256                 for (count = 0; count < pol->pkt.num_vcpu; count++) {
257                         if (pol->core_share[count].status != 1)
258                                 power_manager_scale_core_med(
259                                                 pol->core_share[count].pcpu);
260                 }
261         } else if (diff < (pol->pkt.traffic_policy.avg_max_packet_thresh)) {
262                 for (count = 0; count < pol->pkt.num_vcpu; count++) {
263                         if (pol->core_share[count].status != 1)
264                                 power_manager_scale_core_min(
265                                                 pol->core_share[count].pcpu);
266                 }
267         }
268 }
269
270 static void
271 apply_time_profile(struct policy *pol)
272 {
273
274         int count, x;
275         struct timeval tv;
276         struct tm *ptm;
277         char time_string[40];
278
279         /* Obtain the time of day, and convert it to a tm struct. */
280         gettimeofday(&tv, NULL);
281         ptm = localtime(&tv.tv_sec);
282         /* Format the date and time, down to a single second. */
283         strftime(time_string, sizeof(time_string), "%Y-%m-%d %H:%M:%S", ptm);
284
285         for (x = 0; x < HOURS; x++) {
286
287                 if (ptm->tm_hour == pol->pkt.timer_policy.busy_hours[x]) {
288                         for (count = 0; count < pol->pkt.num_vcpu; count++) {
289                                 if (pol->core_share[count].status != 1) {
290                                         power_manager_scale_core_max(
291                                                 pol->core_share[count].pcpu);
292                                 RTE_LOG(INFO, CHANNEL_MONITOR,
293                                         "Scaling up core %d to max\n",
294                                         pol->core_share[count].pcpu);
295                                 }
296                         }
297                         break;
298                 } else if (ptm->tm_hour ==
299                                 pol->pkt.timer_policy.quiet_hours[x]) {
300                         for (count = 0; count < pol->pkt.num_vcpu; count++) {
301                                 if (pol->core_share[count].status != 1) {
302                                         power_manager_scale_core_min(
303                                                 pol->core_share[count].pcpu);
304                                 RTE_LOG(INFO, CHANNEL_MONITOR,
305                                         "Scaling down core %d to min\n",
306                                         pol->core_share[count].pcpu);
307                         }
308                 }
309                         break;
310                 } else if (ptm->tm_hour ==
311                         pol->pkt.timer_policy.hours_to_use_traffic_profile[x]) {
312                         apply_traffic_profile(pol);
313                         break;
314                 }
315         }
316 }
317
318 static void
319 apply_workload_profile(struct policy *pol)
320 {
321
322         int count;
323
324         if (pol->pkt.workload == HIGH) {
325                 for (count = 0; count < pol->pkt.num_vcpu; count++) {
326                         if (pol->core_share[count].status != 1)
327                                 power_manager_scale_core_max(
328                                                 pol->core_share[count].pcpu);
329                 }
330         } else if (pol->pkt.workload == MEDIUM) {
331                 for (count = 0; count < pol->pkt.num_vcpu; count++) {
332                         if (pol->core_share[count].status != 1)
333                                 power_manager_scale_core_med(
334                                                 pol->core_share[count].pcpu);
335                 }
336         } else if (pol->pkt.workload == LOW) {
337                 for (count = 0; count < pol->pkt.num_vcpu; count++) {
338                         if (pol->core_share[count].status != 1)
339                                 power_manager_scale_core_min(
340                                                 pol->core_share[count].pcpu);
341                 }
342         }
343 }
344
345 static void
346 apply_policy(struct policy *pol)
347 {
348
349         struct channel_packet *pkt = &pol->pkt;
350
351         /*Check policy to use*/
352         if (pkt->policy_to_use == TRAFFIC)
353                 apply_traffic_profile(pol);
354         else if (pkt->policy_to_use == TIME)
355                 apply_time_profile(pol);
356         else if (pkt->policy_to_use == WORKLOAD)
357                 apply_workload_profile(pol);
358 }
359
360
361 static int
362 process_request(struct channel_packet *pkt, struct channel_info *chan_info)
363 {
364         uint64_t core_mask;
365
366         if (chan_info == NULL)
367                 return -1;
368
369         if (rte_atomic32_cmpset(&(chan_info->status), CHANNEL_MGR_CHANNEL_CONNECTED,
370                         CHANNEL_MGR_CHANNEL_PROCESSING) == 0)
371                 return -1;
372
373         if (pkt->command == CPU_POWER) {
374                 core_mask = get_pcpus_mask(chan_info, pkt->resource_id);
375                 if (core_mask == 0) {
376                         RTE_LOG(ERR, CHANNEL_MONITOR, "Error get physical CPU mask for "
377                                 "channel '%s' using vCPU(%u)\n", chan_info->channel_path,
378                                 (unsigned)pkt->unit);
379                         return -1;
380                 }
381                 if (__builtin_popcountll(core_mask) == 1) {
382
383                         unsigned core_num = __builtin_ffsll(core_mask) - 1;
384
385                         switch (pkt->unit) {
386                         case(CPU_POWER_SCALE_MIN):
387                                         power_manager_scale_core_min(core_num);
388                         break;
389                         case(CPU_POWER_SCALE_MAX):
390                                         power_manager_scale_core_max(core_num);
391                         break;
392                         case(CPU_POWER_SCALE_DOWN):
393                                         power_manager_scale_core_down(core_num);
394                         break;
395                         case(CPU_POWER_SCALE_UP):
396                                         power_manager_scale_core_up(core_num);
397                         break;
398                         case(CPU_POWER_ENABLE_TURBO):
399                                 power_manager_enable_turbo_core(core_num);
400                         break;
401                         case(CPU_POWER_DISABLE_TURBO):
402                                 power_manager_disable_turbo_core(core_num);
403                         break;
404                         default:
405                                 break;
406                         }
407                 } else {
408                         switch (pkt->unit) {
409                         case(CPU_POWER_SCALE_MIN):
410                                         power_manager_scale_mask_min(core_mask);
411                         break;
412                         case(CPU_POWER_SCALE_MAX):
413                                         power_manager_scale_mask_max(core_mask);
414                         break;
415                         case(CPU_POWER_SCALE_DOWN):
416                                         power_manager_scale_mask_down(core_mask);
417                         break;
418                         case(CPU_POWER_SCALE_UP):
419                                         power_manager_scale_mask_up(core_mask);
420                         break;
421                         case(CPU_POWER_ENABLE_TURBO):
422                                 power_manager_enable_turbo_mask(core_mask);
423                         break;
424                         case(CPU_POWER_DISABLE_TURBO):
425                                 power_manager_disable_turbo_mask(core_mask);
426                         break;
427                         default:
428                                 break;
429                         }
430
431                 }
432         }
433
434         if (pkt->command == PKT_POLICY) {
435                 RTE_LOG(INFO, CHANNEL_MONITOR, "\nProcessing Policy request from Guest\n");
436                 update_policy(pkt);
437                 policy_is_set = 1;
438         }
439
440         /* Return is not checked as channel status may have been set to DISABLED
441          * from management thread
442          */
443         rte_atomic32_cmpset(&(chan_info->status), CHANNEL_MGR_CHANNEL_PROCESSING,
444                         CHANNEL_MGR_CHANNEL_CONNECTED);
445         return 0;
446
447 }
448
449 int
450 add_channel_to_monitor(struct channel_info **chan_info)
451 {
452         struct channel_info *info = *chan_info;
453         struct epoll_event event;
454
455         event.events = EPOLLIN;
456         event.data.ptr = info;
457         if (epoll_ctl(global_event_fd, EPOLL_CTL_ADD, info->fd, &event) < 0) {
458                 RTE_LOG(ERR, CHANNEL_MONITOR, "Unable to add channel '%s' "
459                                 "to epoll\n", info->channel_path);
460                 return -1;
461         }
462         return 0;
463 }
464
465 int
466 remove_channel_from_monitor(struct channel_info *chan_info)
467 {
468         if (epoll_ctl(global_event_fd, EPOLL_CTL_DEL, chan_info->fd, NULL) < 0) {
469                 RTE_LOG(ERR, CHANNEL_MONITOR, "Unable to remove channel '%s' "
470                                 "from epoll\n", chan_info->channel_path);
471                 return -1;
472         }
473         return 0;
474 }
475
476 int
477 channel_monitor_init(void)
478 {
479         global_event_fd = epoll_create1(0);
480         if (global_event_fd == 0) {
481                 RTE_LOG(ERR, CHANNEL_MONITOR, "Error creating epoll context with "
482                                 "error %s\n", strerror(errno));
483                 return -1;
484         }
485         global_events_list = rte_malloc("epoll_events", sizeof(*global_events_list)
486                         * MAX_EVENTS, RTE_CACHE_LINE_SIZE);
487         if (global_events_list == NULL) {
488                 RTE_LOG(ERR, CHANNEL_MONITOR, "Unable to rte_malloc for "
489                                 "epoll events\n");
490                 return -1;
491         }
492         return 0;
493 }
494
495 void
496 run_channel_monitor(void)
497 {
498         while (run_loop) {
499                 int n_events, i;
500
501                 n_events = epoll_wait(global_event_fd, global_events_list,
502                                 MAX_EVENTS, 1);
503                 if (!run_loop)
504                         break;
505                 for (i = 0; i < n_events; i++) {
506                         struct channel_info *chan_info = (struct channel_info *)
507                                         global_events_list[i].data.ptr;
508                         if ((global_events_list[i].events & EPOLLERR) ||
509                                 (global_events_list[i].events & EPOLLHUP)) {
510                                 RTE_LOG(DEBUG, CHANNEL_MONITOR, "Remote closed connection for "
511                                                 "channel '%s'\n",
512                                                 chan_info->channel_path);
513                                 remove_channel(&chan_info);
514                                 continue;
515                         }
516                         if (global_events_list[i].events & EPOLLIN) {
517
518                                 int n_bytes, err = 0;
519                                 struct channel_packet pkt;
520                                 void *buffer = &pkt;
521                                 int buffer_len = sizeof(pkt);
522
523                                 while (buffer_len > 0) {
524                                         n_bytes = read(chan_info->fd,
525                                                         buffer, buffer_len);
526                                         if (n_bytes == buffer_len)
527                                                 break;
528                                         if (n_bytes == -1) {
529                                                 err = errno;
530                                                 RTE_LOG(DEBUG, CHANNEL_MONITOR,
531                                                         "Received error on "
532                                                         "channel '%s' read: %s\n",
533                                                         chan_info->channel_path,
534                                                         strerror(err));
535                                                 remove_channel(&chan_info);
536                                                 break;
537                                         }
538                                         buffer = (char *)buffer + n_bytes;
539                                         buffer_len -= n_bytes;
540                                 }
541                                 if (!err)
542                                         process_request(&pkt, chan_info);
543                         }
544                 }
545                 rte_delay_us(time_period_ms*1000);
546                 if (policy_is_set) {
547                         int j;
548
549                         for (j = 0; j < MAX_VMS; j++) {
550                                 if (policies[j].enabled == 1)
551                                         apply_policy(&policies[j]);
552                         }
553                 }
554         }
555 }