2 * Copyright (c) 2019 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
20 #include <linux-cp/lcp_nl.h>
22 #include <netlink/route/rule.h>
23 #include <netlink/msg.h>
24 #include <netlink/netlink.h>
25 #include <netlink/socket.h>
26 #include <netlink/route/link.h>
27 #include <netlink/route/route.h>
28 #include <netlink/route/neighbour.h>
29 #include <netlink/route/addr.h>
31 #include <vlib/vlib.h>
32 #include <vlib/unix/unix.h>
33 #include <vppinfra/error.h>
35 #include <vnet/fib/fib_table.h>
37 #include <libmnl/libmnl.h>
39 #include <plugins/linux-cp/lcp_interface.h>
41 typedef enum nl_event_type_t_
47 typedef struct nl_main
50 struct nl_sock *sk_route;
51 vlib_log_class_t nl_logger;
53 struct nl_cache *nl_caches[LCP_NL_N_OBJS];
54 nl_msg_info_t *nl_msg_queue;
55 uword clib_file_index;
64 #define NL_RX_BUF_SIZE_DEF (1 << 27) /* 128 MB */
65 #define NL_TX_BUF_SIZE_DEF (1 << 18) /* 256 kB */
66 #define NL_BATCH_SIZE_DEF (1 << 11) /* 2048 */
67 #define NL_BATCH_DELAY_MS_DEF 50 /* 50 ms, max 20 batch/s */
69 static nl_main_t nl_main = {
70 .rx_buf_size = NL_RX_BUF_SIZE_DEF,
71 .tx_buf_size = NL_TX_BUF_SIZE_DEF,
72 .batch_size = NL_BATCH_SIZE_DEF,
73 .batch_delay_ms = NL_BATCH_DELAY_MS_DEF,
76 /* #define foreach_nl_nft_proto \ */
77 /* _(IP4, "ip", AF_INT) \ */
78 /* _(IP6, "ip6", NFPROTO_IPV6) */
80 /* typedef enum nl_nft_proto_t_ */
82 /* #define _(a,b,c) NL_NFT_PROTO_##a = c, */
83 /* foreach_nl_nft_proto */
85 /* } nl_nft_proto_t; */
87 #define FOREACH_VFT(__func, __arg) \
89 nl_main_t *nm = &nl_main; \
91 vec_foreach (__nv, nm->nl_vfts) \
93 if (!__nv->__func.cb) \
96 if (!__nv->__func.is_mp_safe) \
97 vlib_worker_thread_barrier_sync (vlib_get_main ()); \
99 __nv->__func.cb (__arg); \
101 if (!__nv->__func.is_mp_safe) \
102 vlib_worker_thread_barrier_release (vlib_get_main ()); \
106 #define FOREACH_VFT_CTX(__func, __arg, __ctx) \
108 nl_main_t *nm = &nl_main; \
110 vec_foreach (__nv, nm->nl_vfts) \
112 if (!__nv->__func.cb) \
115 if (!__nv->__func.is_mp_safe) \
116 vlib_worker_thread_barrier_sync (vlib_get_main ()); \
118 __nv->__func.cb (__arg, __ctx); \
120 if (!__nv->__func.is_mp_safe) \
121 vlib_worker_thread_barrier_release (vlib_get_main ()); \
126 nl_register_vft (const nl_vft_t *nv)
128 nl_main_t *nm = &nl_main;
130 vec_add1 (nm->nl_vfts, *nv);
133 #define NL_DBG(...) vlib_log_debug (nl_main.nl_logger, __VA_ARGS__);
134 #define NL_INFO(...) vlib_log_notice (nl_main.nl_logger, __VA_ARGS__);
135 #define NL_ERROR(...) vlib_log_err (nl_main.nl_logger, __VA_ARGS__);
137 static void lcp_nl_open_socket (void);
138 static void lcp_nl_close_socket (void);
141 nl_route_del (struct rtnl_route *rr, void *arg)
143 FOREACH_VFT (nvl_rt_route_del, rr);
147 nl_route_add (struct rtnl_route *rr, void *arg)
149 FOREACH_VFT (nvl_rt_route_add, rr);
153 nl_neigh_del (struct rtnl_neigh *rn, void *arg)
155 FOREACH_VFT (nvl_rt_neigh_del, rn);
159 nl_neigh_add (struct rtnl_neigh *rn, void *arg)
161 FOREACH_VFT (nvl_rt_neigh_add, rn);
165 nl_link_addr_del (struct rtnl_addr *rla, void *arg)
167 FOREACH_VFT (nvl_rt_addr_del, rla);
171 nl_link_addr_add (struct rtnl_addr *rla, void *arg)
173 FOREACH_VFT (nvl_rt_addr_add, rla);
177 nl_link_del (struct rtnl_link *rl, void *arg)
179 FOREACH_VFT_CTX (nvl_rt_link_del, rl, arg);
183 nl_link_add (struct rtnl_link *rl, void *arg)
185 FOREACH_VFT_CTX (nvl_rt_link_add, rl, arg);
189 nl_route_dispatch (struct nl_object *obj, void *arg)
191 /* nothing can be done without interface mappings */
192 if (!lcp_itf_num_pairs ())
195 switch (nl_object_get_msgtype (obj))
198 nl_route_add ((struct rtnl_route *) obj, arg);
201 nl_route_del ((struct rtnl_route *) obj, arg);
204 nl_neigh_add ((struct rtnl_neigh *) obj, arg);
207 nl_neigh_del ((struct rtnl_neigh *) obj, arg);
210 nl_link_addr_add ((struct rtnl_addr *) obj, arg);
213 nl_link_addr_del ((struct rtnl_addr *) obj, arg);
216 nl_link_add ((struct rtnl_link *) obj, arg);
219 nl_link_del ((struct rtnl_link *) obj, arg);
222 NL_INFO ("unhandled: %s", nl_object_get_type (obj));
228 nl_route_process_msgs (void)
230 nl_main_t *nm = &nl_main;
231 nl_msg_info_t *msg_info;
234 /* process a batch of messages. break if we hit our limit */
235 vec_foreach (msg_info, nm->nl_msg_queue)
237 if ((err = nl_msg_parse (msg_info->msg, nl_route_dispatch, msg_info)) <
239 NL_ERROR ("Unable to parse object: %s", nl_geterror (err));
240 nlmsg_free (msg_info->msg);
241 if (++n_msgs >= nm->batch_size)
245 /* remove the messages we processed from the head of the queue */
247 vec_delete (nm->nl_msg_queue, n_msgs, 0);
249 NL_INFO ("Processed %u messages", n_msgs);
254 #define DAY_F64 (1.0 * (24 * 60 * 60))
257 nl_route_process (vlib_main_t *vm, vlib_node_runtime_t *node,
260 nl_main_t *nm = &nl_main;
262 uword *event_data = 0;
263 f64 wait_time = DAY_F64;
267 /* If we process a batch of messages and stop because we reached the
268 * batch size limit, we want to wake up after the batch delay and
269 * process more. Otherwise we just want to wait for a read event.
271 vlib_process_wait_for_event_or_clock (vm, wait_time);
272 event_type = vlib_process_get_events (vm, &event_data);
276 /* process batch of queued messages on timeout or read event signal */
279 nl_route_process_msgs ();
280 wait_time = (vec_len (nm->nl_msg_queue) != 0) ?
281 nm->batch_delay_ms * 1e-3 :
285 /* reopen the socket if there was an error polling/reading it */
287 lcp_nl_close_socket ();
288 lcp_nl_open_socket ();
292 NL_ERROR ("Unknown event type: %u", (u32) event_type);
295 vec_reset_length (event_data);
297 return frame->n_vectors;
300 VLIB_REGISTER_NODE (nl_route_process_node, static) = {
301 .function = nl_route_process,
302 .name = "linux-cp-netlink-process",
303 .type = VLIB_NODE_TYPE_PROCESS,
304 .process_log2_n_stack_bytes = 17,
308 nl_route_cb (struct nl_msg *msg, void *arg)
310 nl_main_t *nm = &nl_main;
311 nl_msg_info_t *msg_info = 0;
313 /* delay processing - increment ref count and queue for later */
314 vec_add2 (nm->nl_msg_queue, msg_info, 1);
316 /* store a timestamp for the message */
317 msg_info->ts = vlib_time_now (vlib_get_main ());
321 /* notify process node */
322 vlib_process_signal_event (vlib_get_main (), nl_route_process_node.index,
329 lcp_nl_drain_messages (void)
332 nl_main_t *nm = &nl_main;
334 /* Read until there's an error. Unless the error is ENOBUFS, which means
335 * the kernel couldn't send a message due to socket buffer overflow.
336 * Continue reading when that happens.
338 * libnl translates both ENOBUFS and ENOMEM to NLE_NOMEM. So we need to
339 * check return status and errno to make sure we should keep going.
341 while ((err = nl_recvmsgs_default (nm->sk_route)) > -1 ||
342 (err == -NLE_NOMEM && errno == ENOBUFS))
345 /* If there was an error other then EAGAIN, signal process node */
346 if (err != -NLE_AGAIN)
347 vlib_process_signal_event (vlib_get_main (), nl_route_process_node.index,
354 lcp_nl_pair_add_cb (lcp_itf_pair_t *pair)
356 lcp_nl_drain_messages ();
359 static clib_error_t *
360 nl_route_read_cb (clib_file_t *f)
363 err = lcp_nl_drain_messages ();
364 if (err < 0 && err != -NLE_AGAIN)
365 NL_ERROR ("Error reading netlink socket (fd %d): %s (%d)",
366 f->file_descriptor, nl_geterror (err), err);
371 static clib_error_t *
372 nl_route_error_cb (clib_file_t *f)
374 NL_ERROR ("Error polling netlink socket (fd %d)", f->file_descriptor);
376 /* notify process node */
377 vlib_process_signal_event (vlib_get_main (), nl_route_process_node.index,
380 return clib_error_return (0, "Error polling netlink socket %d",
385 lcp_nl_get_cache (lcp_nl_obj_t t)
387 nl_main_t *nm = &nl_main;
389 return nm->nl_caches[t];
392 /* Set the RX buffer size to be used on the netlink socket */
394 lcp_nl_set_buffer_size (u32 buf_size)
396 nl_main_t *nm = &nl_main;
398 nm->rx_buf_size = buf_size;
401 nl_socket_set_buffer_size (nm->sk_route, nm->rx_buf_size, nm->tx_buf_size);
404 /* Set the batch size - maximum netlink messages to process at one time */
406 lcp_nl_set_batch_size (u32 batch_size)
408 nl_main_t *nm = &nl_main;
410 nm->batch_size = batch_size;
413 /* Set the batch delay - how long to wait in ms between processing batches */
415 lcp_nl_set_batch_delay (u32 batch_delay_ms)
417 nl_main_t *nm = &nl_main;
419 nm->batch_delay_ms = batch_delay_ms;
422 static clib_error_t *
423 lcp_itf_pair_config (vlib_main_t *vm, unformat_input_t *input)
425 u32 buf_size, batch_size, batch_delay_ms;
427 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
429 if (unformat (input, "nl-rx-buffer-size %u", &buf_size))
430 lcp_nl_set_buffer_size (buf_size);
431 else if (unformat (input, "nl-batch-size %u", &batch_size))
432 lcp_nl_set_batch_size (batch_size);
433 else if (unformat (input, "nl-batch-delay-ms %u", &batch_delay_ms))
434 lcp_nl_set_batch_delay (batch_delay_ms);
436 return clib_error_return (0, "invalid netlink option: %U",
437 format_unformat_error, input);
443 VLIB_CONFIG_FUNCTION (lcp_itf_pair_config, "linux-nl");
446 lcp_nl_close_socket (void)
448 nl_main_t *nm = &nl_main;
450 /* delete existing fd from epoll fd set */
451 if (nm->clib_file_index != ~0)
453 clib_file_main_t *fm = &file_main;
454 clib_file_t *f = clib_file_get (fm, nm->clib_file_index);
458 NL_INFO ("Stopping poll of fd %u", f->file_descriptor);
459 fm->file_update (f, UNIX_FILE_UPDATE_DELETE);
462 /* stored index was not a valid file, reset stored index to ~0 */
463 nm->clib_file_index = ~0;
466 /* If we already created a socket, close/free it */
469 NL_INFO ("Closing netlink socket %d", nl_socket_get_fd (nm->sk_route));
470 nl_socket_free (nm->sk_route);
476 lcp_nl_open_socket (void)
478 nl_main_t *nm = &nl_main;
479 int dest_ns_fd, curr_ns_fd;
481 /* Allocate a new socket for both routes and acls
482 * Notifications do not use sequence numbers, disable sequence number
484 * Define a callback function, which will be called for each notification
487 nm->sk_route = nl_socket_alloc ();
488 nl_socket_disable_seq_check (nm->sk_route);
490 dest_ns_fd = lcp_get_default_ns_fd ();
493 curr_ns_fd = open ("/proc/self/ns/net", O_RDONLY);
494 setns (dest_ns_fd, CLONE_NEWNET);
497 nl_connect (nm->sk_route, NETLINK_ROUTE);
501 setns (curr_ns_fd, CLONE_NEWNET);
505 /* Subscribe to all the 'routing' notifications on the route socket */
506 nl_socket_add_memberships (nm->sk_route, RTNLGRP_LINK, RTNLGRP_IPV6_IFADDR,
507 RTNLGRP_IPV4_IFADDR, RTNLGRP_IPV4_ROUTE,
508 RTNLGRP_IPV6_ROUTE, RTNLGRP_NEIGH, RTNLGRP_NOTIFY,
509 #ifdef RTNLGRP_MPLS_ROUTE /* not defined on CentOS/RHEL 7 */
512 RTNLGRP_IPV4_RULE, RTNLGRP_IPV6_RULE, 0);
514 /* Set socket in nonblocking mode and increase buffer sizes */
515 nl_socket_set_nonblocking (nm->sk_route);
516 nl_socket_set_buffer_size (nm->sk_route, nm->rx_buf_size, nm->tx_buf_size);
518 if (nm->clib_file_index == ~0)
520 clib_file_t rt_file = {
521 .read_function = nl_route_read_cb,
522 .error_function = nl_route_error_cb,
523 .file_descriptor = nl_socket_get_fd (nm->sk_route),
524 .description = format (0, "linux-cp netlink route socket"),
527 nm->clib_file_index = clib_file_add (&file_main, &rt_file);
528 NL_INFO ("Added file %u", nm->clib_file_index);
531 /* clib file already created and socket was closed due to error */
533 clib_file_main_t *fm = &file_main;
534 clib_file_t *f = clib_file_get (fm, nm->clib_file_index);
536 f->file_descriptor = nl_socket_get_fd (nm->sk_route);
537 fm->file_update (f, UNIX_FILE_UPDATE_ADD);
538 NL_INFO ("Starting poll of %d", f->file_descriptor);
541 nl_socket_modify_cb (nm->sk_route, NL_CB_VALID, NL_CB_CUSTOM, nl_route_cb,
543 NL_INFO ("Opened netlink socket %d", nl_socket_get_fd (nm->sk_route));
546 #include <vnet/plugin/plugin.h>
548 lcp_nl_init (vlib_main_t *vm)
550 nl_main_t *nm = &nl_main;
551 lcp_itf_pair_vft_t nl_itf_pair_vft = {
552 .pair_add_fn = lcp_nl_pair_add_cb,
555 nm->clib_file_index = ~0;
556 nm->nl_logger = vlib_log_register_class ("nl", "nl");
558 lcp_nl_open_socket ();
559 lcp_itf_pair_register_vft (&nl_itf_pair_vft);
564 VLIB_INIT_FUNCTION (lcp_nl_init) = {
565 .runs_after = VLIB_INITS ("lcp_interface_init", "tuntap_init",
569 #include <vpp/app/version.h>
570 VLIB_PLUGIN_REGISTER () = {
571 .version = VPP_BUILD_VER,
572 .description = "linux Control Plane - Netlink listener",
573 .default_disabled = 1,
577 * fd.io coding-style-patch-verification: ON
580 * eval: (c-set-style "gnu")