2 * Copyright (c) 2020 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 #include <vnet/vnet.h>
18 #include <vlibapi/api.h>
19 #include <vlibmemory/api.h>
20 #include <vnet/plugin/plugin.h>
21 #include <vpp/app/version.h>
22 #include <linux/limits.h>
23 #include <sys/ioctl.h>
25 #include <perfmon/perfmon.h>
27 perfmon_main_t perfmon_main;
29 VLIB_PLUGIN_REGISTER () = {
30 .version = VPP_BUILD_VER,
31 .description = "Performance Monitor",
34 VLIB_REGISTER_LOG_CLASS (if_default_log, static) = {
35 .class_name = "perfmon",
38 #define log_debug(fmt, ...) \
39 vlib_log_debug (if_default_log.class, fmt, __VA_ARGS__)
40 #define log_warn(fmt, ...) \
41 vlib_log_warn (if_default_log.class, fmt, __VA_ARGS__)
42 #define log_err(fmt, ...) vlib_log_err (if_default_log.class, fmt, __VA_ARGS__)
45 perfmon_reset (vlib_main_t *vm)
47 perfmon_main_t *pm = &perfmon_main;
48 uword page_size = clib_mem_get_page_size ();
51 for (int i = 0; i < vlib_get_n_threads (); i++)
52 vlib_node_set_dispatch_wrapper (vlib_get_main_by_index (i), 0);
54 for (int i = 0; i < vec_len (pm->fds_to_close); i++)
55 close (pm->fds_to_close[i]);
56 vec_free (pm->fds_to_close);
57 vec_free (pm->group_fds);
58 if (pm->default_instance_type)
60 perfmon_instance_type_t *it = pm->default_instance_type;
61 for (int i = 0; i < vec_len (it->instances); i++)
62 vec_free (it->instances[i].name);
63 vec_free (it->instances);
64 vec_free (pm->default_instance_type);
67 for (int i = 0; i < vec_len (pm->thread_runtimes); i++)
69 perfmon_thread_runtime_t *tr = vec_elt_at_index (pm->thread_runtimes, i);
70 vec_free (tr->node_stats);
71 for (int j = 0; j < PERF_MAX_EVENTS; j++)
72 if (tr->mmap_pages[j])
73 munmap (tr->mmap_pages[j], page_size);
75 vec_free (pm->thread_runtimes);
78 pm->active_instance_type = 0;
79 pm->active_bundle = 0;
83 perfmon_set (vlib_main_t *vm, perfmon_bundle_t *b)
85 clib_error_t *err = 0;
86 perfmon_main_t *pm = &perfmon_main;
89 int n_nodes = vec_len (vm->node_main.nodes);
90 uword page_size = clib_mem_get_page_size ();
91 u32 instance_type = 0;
93 perfmon_instance_type_t *it = 0;
100 if (b->active_type == PERFMON_BUNDLE_TYPE_NODE)
103 if (s->instances_by_type == 0)
105 vec_add2 (pm->default_instance_type, it, 1);
106 it->name = is_node ? "Thread/Node" : "Thread";
107 for (int i = 0; i < vlib_get_n_threads (); i++)
109 vlib_worker_thread_t *w = vlib_worker_threads + i;
110 perfmon_instance_t *in;
111 vec_add2 (it->instances, in, 1);
114 in->name = (char *) format (0, "%s (%u)%c", w->name, i, 0);
117 vec_validate (pm->thread_runtimes, vlib_get_n_threads () - 1);
121 e = s->events + b->events[0];
123 if (e->type_from_instance)
125 instance_type = e->instance_type;
126 for (int i = 1; i < b->n_events; i++)
128 e = s->events + b->events[i];
129 ASSERT (e->type_from_instance == 1 &&
130 e->instance_type == instance_type);
133 it = vec_elt_at_index (s->instances_by_type, instance_type);
136 pm->active_instance_type = it;
138 for (int i = 0; i < vec_len (it->instances); i++)
140 perfmon_instance_t *in = vec_elt_at_index (it->instances, i);
142 vec_validate (pm->group_fds, i);
143 pm->group_fds[i] = -1;
145 for (int j = 0; j < b->n_events; j++)
148 perfmon_event_t *e = s->events + b->events[j];
149 struct perf_event_attr pe = {
150 .size = sizeof (struct perf_event_attr),
151 .type = e->type_from_instance ? in->type : e->type,
153 .exclude_kernel = e->exclude_kernel,
155 (PERF_FORMAT_GROUP | PERF_FORMAT_TOTAL_TIME_ENABLED |
156 PERF_FORMAT_TOTAL_TIME_RUNNING),
160 log_debug ("perf_event_open pe.type=%u pe.config=0x%x pid=%d "
161 "cpu=%d group_fd=%d",
162 pe.type, pe.config, in->pid, in->cpu, pm->group_fds[i]);
163 fd = syscall (__NR_perf_event_open, &pe, in->pid, in->cpu,
164 pm->group_fds[i], 0);
168 err = clib_error_return_unix (0, "perf_event_open");
172 vec_add1 (pm->fds_to_close, fd);
174 if (pm->group_fds[i] == -1)
175 pm->group_fds[i] = fd;
179 perfmon_thread_runtime_t *tr;
180 tr = vec_elt_at_index (pm->thread_runtimes, i);
182 mmap (0, page_size, PROT_READ, MAP_SHARED, fd, 0);
184 if (tr->mmap_pages[j] == MAP_FAILED)
186 err = clib_error_return_unix (0, "mmap");
194 perfmon_thread_runtime_t *rt;
195 rt = vec_elt_at_index (pm->thread_runtimes, i);
197 rt->n_events = b->n_events;
198 rt->n_nodes = n_nodes;
199 rt->preserve_samples = b->preserve_samples;
200 vec_validate_aligned (rt->node_stats, n_nodes - 1,
201 CLIB_CACHE_LINE_BYTES);
205 pm->active_bundle = b;
210 log_err ("%U", format_clib_error, err);
216 static_always_inline u32
217 perfmon_mmap_read_index (const struct perf_event_mmap_page *mmap_page)
222 /* See documentation in /usr/include/linux/perf_event.h, for more details
223 * but the 2 main important things are:
224 * 1) if seq != mmap_page->lock, it means the kernel is currently updating
225 * the user page and we need to read it again
226 * 2) if idx == 0, it means the perf event is currently turned off and we
227 * just need to read the kernel-updated 'offset', otherwise we must also
228 * add the current hw value (hence rdmpc) */
231 seq = mmap_page->lock;
232 CLIB_COMPILER_BARRIER ();
234 idx = mmap_page->index;
236 CLIB_COMPILER_BARRIER ();
238 while (mmap_page->lock != seq);
244 perfmon_start (vlib_main_t *vm, perfmon_bundle_t *b)
246 clib_error_t *err = 0;
247 perfmon_main_t *pm = &perfmon_main;
250 if (pm->is_running == 1)
251 return clib_error_return (0, "already running");
253 if ((err = perfmon_set (vm, b)) != 0)
256 n_groups = vec_len (pm->group_fds);
258 for (int i = 0; i < n_groups; i++)
260 if (ioctl (pm->group_fds[i], PERF_EVENT_IOC_ENABLE,
261 PERF_IOC_FLAG_GROUP) == -1)
264 return clib_error_return_unix (0, "ioctl(PERF_EVENT_IOC_ENABLE)");
267 if (b->active_type == PERFMON_BUNDLE_TYPE_NODE)
269 for (int i = 0; i < vec_len (pm->thread_runtimes); i++)
271 perfmon_thread_runtime_t *tr;
272 tr = vec_elt_at_index (pm->thread_runtimes, i);
274 for (int j = 0; j < b->n_events; j++)
276 tr->indexes[j] = perfmon_mmap_read_index (tr->mmap_pages[j]);
278 /* if a zero index is returned generate error */
282 return clib_error_return (0, "invalid rdpmc index");
287 for (int i = 0; i < vlib_get_n_threads (); i++)
288 vlib_node_set_dispatch_wrapper (
289 vlib_get_main_by_index (i), perfmon_dispatch_wrappers[b->n_events]);
291 pm->sample_time = vlib_time_now (vm);
298 perfmon_stop (vlib_main_t *vm)
300 perfmon_main_t *pm = &perfmon_main;
301 int n_groups = vec_len (pm->group_fds);
303 if (pm->is_running != 1)
304 return clib_error_return (0, "not running");
306 if (pm->active_bundle->active_type == PERFMON_BUNDLE_TYPE_NODE)
308 for (int i = 0; i < vlib_get_n_threads (); i++)
309 vlib_node_set_dispatch_wrapper (vlib_get_main_by_index (i), 0);
312 for (int i = 0; i < n_groups; i++)
314 if (ioctl (pm->group_fds[i], PERF_EVENT_IOC_DISABLE,
315 PERF_IOC_FLAG_GROUP) == -1)
318 return clib_error_return_unix (0, "ioctl(PERF_EVENT_IOC_DISABLE)");
323 pm->sample_time = vlib_time_now (vm) - pm->sample_time;
327 static_always_inline u8
328 is_bundle_supported (perfmon_bundle_t *b)
330 perfmon_cpu_supports_t *supports = b->cpu_supports;
332 if (!b->cpu_supports)
335 for (int i = 0; i < b->n_cpu_supports; ++i)
336 if (supports[i].cpu_supports ())
342 static clib_error_t *
343 perfmon_init (vlib_main_t *vm)
345 perfmon_main_t *pm = &perfmon_main;
346 perfmon_source_t *s = pm->sources;
347 perfmon_bundle_t *b = pm->bundles;
349 pm->source_by_name = hash_create_string (0, sizeof (uword));
353 if (hash_get_mem (pm->source_by_name, s->name) != 0)
354 clib_panic ("duplicate source name '%s'", s->name);
355 if (s->init_fn && ((err = (s->init_fn) (vm, s))))
357 log_warn ("skipping source '%s' - %U", s->name, format_clib_error,
359 clib_error_free (err);
364 hash_set_mem (pm->source_by_name, s->name, s);
365 log_debug ("source '%s' regisrtered", s->name);
369 pm->bundle_by_name = hash_create_string (0, sizeof (uword));
375 if (!is_bundle_supported (b))
377 log_warn ("skipping bundle '%s' - not supported", b->name);
382 if (hash_get_mem (pm->bundle_by_name, b->name) != 0)
383 clib_panic ("duplicate bundle name '%s'", b->name);
385 if ((p = hash_get_mem (pm->source_by_name, b->source)) == 0)
387 log_debug ("missing source '%s', skipping bundle '%s'", b->source,
393 b->src = (perfmon_source_t *) p[0];
394 if (b->init_fn && ((err = (b->init_fn) (vm, b))))
396 log_warn ("skipping bundle '%s' - %U", b->name, format_clib_error,
398 clib_error_free (err);
403 hash_set_mem (pm->bundle_by_name, b->name, b);
404 log_debug ("bundle '%s' regisrtered", b->name);
412 VLIB_INIT_FUNCTION (perfmon_init);