2 * node.c - vrrp packet handling node definitions
4 * Copyright 2019-2020 Rubicon Communications, LLC (Netgate)
6 * SPDX-License-Identifier: Apache-2.0
10 #include <vlibmemory/api.h>
11 #include <vnet/vnet.h>
12 #include <vnet/ip/ip4_packet.h>
13 #include <vnet/ip/ip6_link.h>
14 #include <vnet/ethernet/arp_packet.h>
15 #include <vnet/pg/pg.h>
16 #include <vppinfra/error.h>
17 #include <vrrp/vrrp.h>
18 #include <vrrp/vrrp_packet.h>
25 u8 addrs[256]; /* print up to 64 IPv4 or 16 IPv6 addresses */
28 /* packet trace format function */
30 format_vrrp_trace (u8 * s, va_list * args)
32 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
33 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
34 vrrp_trace_t *t = va_arg (*args, vrrp_trace_t *);
37 s = format (s, "VRRP: sw_if_index %d IPv%d\n",
38 t->sw_if_index, (t->is_ipv6) ? 6 : 4);
39 s = format (s, " %U\n", format_vrrp_packet_hdr, &t->vrrp);
40 s = format (s, " addresses: ");
42 for (i = 0; i < t->vrrp.n_addrs; i++)
45 s = format (s, "%U ", format_ip6_address,
46 (ip6_address_t *) (t->addrs + i * 16));
48 s = format (s, "%U ", format_ip4_address,
49 (ip4_address_t *) (t->addrs + i * 4));
55 extern vlib_node_registration_t vrrp4_input_node;
56 extern vlib_node_registration_t vrrp6_input_node;
57 extern vlib_node_registration_t vrrp4_arp_input_node;
58 extern vlib_node_registration_t vrrp6_nd_input_node;
60 #define foreach_vrrp_error \
61 _(RECEIVED, "VRRP packets processed") \
62 _(BAD_TTL, "VRRP advertisement TTL is not 255") \
63 _(NOT_VERSION_3, "VRRP version is not 3") \
64 _(INCOMPLETE_PKT, "VRRP packet has wrong size") \
65 _(BAD_CHECKSUM, "VRRP checksum is invalid") \
66 _(UNKNOWN_VR, "VRRP message does not match known VRs") \
67 _(ADDR_MISMATCH, "VR addrs do not match configuration")
71 #define _(sym,str) VRRP_ERROR_##sym,
77 static char *vrrp_error_strings[] = {
78 #define _(sym,string) string,
89 typedef struct vrrp_input_process_args
93 } vrrp_input_process_args_t;
95 /* Given a VR and a pointer to the VRRP header of an incoming packet,
96 * compare the local src address to the peers. Return < 0 if the local
97 * address < the peer address, 0 if they're equal, > 0 if
98 * the local address > the peer address
101 vrrp_vr_addr_cmp (vrrp_vr_t * vr, vrrp_header_t * pkt)
103 vrrp_vr_config_t *vrc = &vr->config;
104 void *peer_addr, *local_addr;
108 clib_memset (&addr, 0, sizeof (addr));
110 if (vrrp_vr_is_ipv6 (vr))
112 peer_addr = &(((ip6_header_t *) pkt) - 1)->src_address;
113 local_addr = &addr.ip6;
115 ip6_address_copy (local_addr,
116 ip6_get_link_local_address (vrc->sw_if_index));
120 peer_addr = &(((ip4_header_t *) pkt) - 1)->src_address;
121 local_addr = &addr.ip4;
123 ip4_src_address_for_packet (&ip4_main.lookup_main,
124 vrc->sw_if_index, local_addr);
127 return memcmp (local_addr, peer_addr, addr_size);
131 vrrp_input_process_master (vrrp_vr_t * vr, vrrp_header_t * pkt)
133 /* received priority 0, another VR is shutting down. send an adv and
134 * remain in the master state
136 if (pkt->priority == 0)
138 clib_warning ("Received shutdown message from a peer on VR %U",
139 format_vrrp_vr_key, vr);
140 vrrp_adv_send (vr, 0);
141 vrrp_vr_timer_set (vr, VRRP_VR_TIMER_ADV);
146 * - received priority > adjusted priority, or
147 * - received priority == adjusted priority and peer addr > local addr
148 * allow the local VR to be preempted by the peer
150 if ((pkt->priority > vrrp_vr_priority (vr)) ||
151 ((pkt->priority == vrrp_vr_priority (vr)) &&
152 (vrrp_vr_addr_cmp (vr, pkt) < 0)))
154 vrrp_vr_transition (vr, VRRP_VR_STATE_BACKUP, pkt);
159 /* if we made it this far, eiher received prority < adjusted priority or
160 * received == adjusted and local addr > peer addr. Ignore.
165 /* RFC 5798 section 6.4.2 */
167 vrrp_input_process_backup (vrrp_vr_t * vr, vrrp_header_t * pkt)
169 vrrp_vr_config_t *vrc = &vr->config;
170 vrrp_vr_runtime_t *vrt = &vr->runtime;
172 /* master shutting down, ready for election */
173 if (pkt->priority == 0)
175 clib_warning ("Master for VR %U is shutting down", format_vrrp_vr_key,
177 vrt->master_down_int = vrt->skew;
178 vrrp_vr_timer_set (vr, VRRP_VR_TIMER_MASTER_DOWN);
182 /* no preempt set or adv from a higher priority router, update timers */
183 if (!(vrc->flags & VRRP_VR_PREEMPT) ||
184 (pkt->priority >= vrrp_vr_priority (vr)))
186 vrt->master_adv_int = clib_net_to_host_u16 (pkt->rsvd_and_max_adv_int);
187 vrt->master_adv_int &= ((u16) 0x0fff); /* ignore rsvd bits */
189 vrrp_vr_skew_compute (vr);
190 vrrp_vr_master_down_compute (vr);
191 vrrp_vr_timer_set (vr, VRRP_VR_TIMER_MASTER_DOWN);
195 /* preempt set or our priority > received, continue to wait on master down */
200 vrrp_input_process (vrrp_input_process_args_t * args)
204 vr = vrrp_vr_lookup_index (args->vr_index);
208 clib_warning ("Error retrieving VR with index %u", args->vr_index);
212 switch (vr->runtime.state)
214 case VRRP_VR_STATE_INIT:
216 case VRRP_VR_STATE_BACKUP:
217 /* this is usually the only state an advertisement should be received */
218 vrrp_input_process_backup (vr, args->pkt);
220 case VRRP_VR_STATE_MASTER:
221 /* might be getting preempted. or have a misbehaving peer */
222 clib_warning ("Received advertisement for master VR %U",
223 format_vrrp_vr_key, vr);
224 vrrp_input_process_master (vr, args->pkt);
227 clib_warning ("Received advertisement for VR %U in unknown state %d",
228 format_vrrp_vr_key, vr, vr->runtime.state);
241 } vrrp_arp_nd_trace_t;
245 format_vrrp_arp_nd_input_trace (u8 * s, va_list * va)
247 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
248 CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
249 vrrp_arp_nd_trace_t *t = va_arg (*va, vrrp_arp_nd_trace_t *);
251 s = format (s, "address %U",
252 (t->is_ipv6) ? format_ip6_address : format_ip4_address,
253 (t->is_ipv6) ? (void *) &t->ip.ip6 : (void *) &t->ip.ip4);
255 if (t->vr_index != ~0)
256 s = format (s, ": vr_index %u vr_id %u", t->vr_index, t->vr_id);
263 VRRP_ARP_INPUT_NEXT_DROP,
264 VRRP_ARP_INPUT_NEXT_REPLY_TX,
270 VRRP_ND_INPUT_NEXT_DROP,
271 VRRP_ND_INPUT_NEXT_REPLY_TX,
275 static_always_inline void
276 vrrp_arp_nd_next (vlib_buffer_t * b, u32 * next_index, u32 * vr_index,
279 vnet_main_t *vnm = vnet_get_main ();
280 vlib_main_t *vm = vlib_get_main ();
281 ethernet_header_t *eth, *eth_new;
282 void *lookup_addr = 0;
285 vnet_link_t link_type;
286 u8 *rewrite, rewrite_len;
289 ip6_header_t *ip6 = 0;
290 icmp6_neighbor_solicitation_or_advertisement_header_t *sol_adv = 0;
291 icmp6_neighbor_discovery_ethernet_link_layer_address_option_t *lladdr = 0;
293 ethernet_arp_header_t *arp;
294 ip4_address_t ip4_addr;
298 ip6 = vlib_buffer_get_current (b);
300 /* we only care about about ICMP6 neighbor solicitiations */
301 if (ip6->protocol != IP_PROTOCOL_ICMP6)
304 sol_adv = ip6_next_header (ip6);
305 lladdr = (void *) (sol_adv + 1);
307 /* skip anything other than neighbor solicitations */
308 if (sol_adv->icmp.type != ICMP6_neighbor_solicitation)
311 lookup_addr = &sol_adv->target_address;
312 link_type = VNET_LINK_IP6;
316 arp = vlib_buffer_get_current (b);
318 /* skip non-request packets */
319 if (arp->opcode != clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_request))
322 lookup_addr = &arp->ip4_over_ethernet[1].ip4;
323 link_type = VNET_LINK_ARP;
326 sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
328 /* Don't bother with a hash lookup if no VRs configured on this interface */
329 if (!vrrp_intf_num_vrs (sw_if_index, is_ipv6))
332 /* skip requests that are not for VRRP addresses */
333 *vr_index = vrrp_vr_lookup_address (sw_if_index, is_ipv6, lookup_addr);
337 /* only reply if the VR is in the master state */
338 vr = vrrp_vr_lookup_index (*vr_index);
339 if (!vr || vr->runtime.state != VRRP_VR_STATE_MASTER)
342 eth = ethernet_buffer_get_header (b);
343 rewrite = ethernet_build_rewrite (vnm, sw_if_index, link_type,
345 rewrite_len = vec_len (rewrite);
346 if (rewrite_len == 0)
349 /* send the reply out the incoming interface */
350 *next_index = VRRP_ARP_INPUT_NEXT_REPLY_TX;
351 vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
353 /* the outbound ethernet & vlan headers may have a different length than
354 * the received header, so get a pointer to the new start of the packet
355 * and write the header there.
357 vlib_buffer_advance (b, -rewrite_len);
358 eth_new = vlib_buffer_get_current (b);
359 clib_memcpy_fast (eth_new, rewrite, rewrite_len);
364 if (ip6_address_is_unspecified (&ip6->src_address))
365 ip6_set_reserved_multicast_address (&ip6->dst_address,
366 IP6_MULTICAST_SCOPE_link_local,
367 IP6_MULTICAST_GROUP_ID_all_hosts);
369 ip6->dst_address = ip6->src_address;
371 ip6->src_address = sol_adv->target_address;
372 ip6->hop_limit = 255;
373 sol_adv->icmp.type = ICMP6_neighbor_advertisement;
374 sol_adv->icmp.checksum = 0;
375 sol_adv->advertisement_flags =
376 clib_host_to_net_u32 (ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_ROUTER
377 | ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_SOLICITED
378 | ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_OVERRIDE);
380 clib_memcpy (lladdr->ethernet_address, vr->runtime.mac.bytes,
381 sizeof (mac_address_t));
382 lladdr->header.type =
383 ICMP6_NEIGHBOR_DISCOVERY_OPTION_target_link_layer_address;
385 sol_adv->icmp.checksum =
386 ip6_tcp_udp_icmp_compute_checksum (vm, b, ip6, &bogus_length);
391 ip4_addr = arp->ip4_over_ethernet[1].ip4;
393 arp->opcode = clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply);
394 arp->ip4_over_ethernet[1] = arp->ip4_over_ethernet[0];
396 arp->ip4_over_ethernet[0].mac = vr->runtime.mac;
397 arp->ip4_over_ethernet[0].ip4 = ip4_addr;
401 static_always_inline uword
402 vrrp_arp_nd_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
403 vlib_frame_t * frame, u8 is_ipv6)
405 u32 n_left_from, *from, next_index, *to_next;
407 from = vlib_frame_vector_args (frame);
408 n_left_from = frame->n_vectors;
409 next_index = node->cached_next_index;
411 while (n_left_from > 0)
415 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
417 while (n_left_from > 0 && n_left_to_next > 0)
432 b0 = vlib_get_buffer (vm, bi0);
434 vnet_feature_next (&next0, b0);
435 vrrp_arp_nd_next (b0, &next0, &vr_index, is_ipv6);
437 if (b0->flags & VLIB_BUFFER_IS_TRACED)
439 vrrp_arp_nd_trace_t *t =
440 vlib_add_trace (vm, node, b0, sizeof (*t));
446 icmp6_neighbor_solicitation_or_advertisement_header_t
449 ip0 = vlib_buffer_get_current (b0);
450 sol_adv0 = ip6_next_header (ip0);
451 t->ip.ip6 = sol_adv0->target_address;
455 ethernet_arp_header_t *arp0;
457 arp0 = vlib_buffer_get_current (b0);
458 t->ip.ip4 = arp0->ip4_over_ethernet[0].ip4;
461 vr = vrrp_vr_lookup_index (vr_index);
463 t->vr_id = vr->config.vr_id;
465 t->vr_index = vr_index;
466 t->is_ipv6 = is_ipv6;
469 vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
470 n_left_to_next, bi0, next0);
473 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
476 return frame->n_vectors;
479 VLIB_NODE_FN (vrrp4_arp_input_node) (vlib_main_t * vm,
480 vlib_node_runtime_t * node,
481 vlib_frame_t * frame)
483 return vrrp_arp_nd_input_inline (vm, node, frame, 0 /* is_ipv6 */ );
487 VLIB_REGISTER_NODE (vrrp4_arp_input_node) =
489 .name = "vrrp4-arp-input",
490 .vector_size = sizeof (u32),
491 .format_trace = format_vrrp_arp_nd_input_trace,
492 .type = VLIB_NODE_TYPE_INTERNAL,
494 .n_errors = ARRAY_LEN(vrrp_error_strings),
495 .error_strings = vrrp_error_strings,
497 .n_next_nodes = VRRP_ARP_N_NEXT,
500 [VRRP_ARP_INPUT_NEXT_DROP] = "error-drop",
501 [VRRP_ARP_INPUT_NEXT_REPLY_TX] = "interface-output",
505 VNET_FEATURE_INIT (vrrp4_arp_feat_node, static) =
508 .node_name = "vrrp4-arp-input",
509 .runs_before = VNET_FEATURES ("arp-reply"),
512 VLIB_NODE_FN (vrrp6_nd_input_node) (vlib_main_t * vm,
513 vlib_node_runtime_t * node,
514 vlib_frame_t * frame)
516 return vrrp_arp_nd_input_inline (vm, node, frame, 1 /* is_ipv6 */);
520 VLIB_REGISTER_NODE (vrrp6_nd_input_node) =
522 .name = "vrrp6-nd-input",
523 .vector_size = sizeof (u32),
524 .format_trace = format_vrrp_arp_nd_input_trace,
525 .type = VLIB_NODE_TYPE_INTERNAL,
527 .n_errors = ARRAY_LEN(vrrp_error_strings),
528 .error_strings = vrrp_error_strings,
530 .n_next_nodes = VRRP_ND_N_NEXT,
533 [VRRP_ND_INPUT_NEXT_DROP] = "error-drop",
534 [VRRP_ND_INPUT_NEXT_REPLY_TX] = "interface-output",
538 VNET_FEATURE_INIT (vrrp6_nd_feat_node, static) =
540 .arc_name = "ip6-local",
541 .node_name = "vrrp6-nd-input",
542 .runs_before = VNET_FEATURES ("ip6-local-end-of-arc"),
545 static_always_inline uword
546 vrrp_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
547 vlib_frame_t * frame, u8 is_ipv6)
549 u32 n_left_from, *from;
550 vrrp_main_t *vmp = &vrrp_main;
552 from = vlib_frame_vector_args (frame);
553 n_left_from = frame->n_vectors;
555 while (n_left_from > 0)
561 vrrp_header_t *vrrp0;
563 vrrp_input_process_args_t args0;
570 b0 = vlib_get_buffer (vm, bi0);
572 ip0 = vlib_buffer_get_current (b0);
576 ip6_header_t *ip6 = ip0;
578 vrrp0 = (vrrp_header_t *) (ip6 + 1);
579 ttl0 = &ip6->hop_limit;
581 payload_len0 = clib_net_to_host_u16 (ip6->payload_length);
582 vlib_buffer_advance (b0, sizeof (*ip6));
586 ip4_header_t *ip4 = ip0;
588 vrrp0 = (vrrp_header_t *) (ip4 + 1);
591 payload_len0 = clib_net_to_host_u16 (ip4->length) - sizeof(*ip4);
592 vlib_buffer_advance (b0, sizeof (*ip4));
595 next0 = VRRP_INPUT_NEXT_DROP;
597 error0 = VRRP_ERROR_RECEIVED;
599 /* Validation from RFC 5798 sec 7.1 */
601 /* checksum set to 0 for calculation, save original value */
602 rx_csum0 = vrrp0->checksum;
605 /* Mandatory - TTL/hop limit must be 255 */
608 error0 = VRRP_ERROR_BAD_TTL;
612 /* Mandatory - VRRP version must be 3 */
613 if ((vrrp0->vrrp_version_and_type >> 4) != 3)
615 error0 = VRRP_ERROR_NOT_VERSION_3;
619 /* Mandatory - packet must be complete */
620 if (b0->current_length < sizeof (*vrrp0) +
621 ((u32) vrrp0->n_addrs) * addr_len)
623 error0 = VRRP_ERROR_INCOMPLETE_PKT;
627 /* Mandatory - checksum must be correct */
628 if (rx_csum0 != vrrp_adv_csum (ip0, vrrp0, is_ipv6, payload_len0))
630 error0 = VRRP_ERROR_BAD_CHECKSUM;
634 /* Mandatory - VR must be configured on the interface adv received on */
636 vrrp_vr_lookup (vnet_buffer(b0)->sw_if_index[VLIB_RX],
637 vrrp0->vr_id, is_ipv6)))
639 error0 = VRRP_ERROR_UNKNOWN_VR;
643 /* Optional - count of addresses should match configuration */
644 /* Could also check that addresses match, but likely to be O(n^2) */
645 if (vrrp0->n_addrs != vec_len (vr0->config.vr_addrs))
647 error0 = VRRP_ERROR_ADDR_MISMATCH;
651 /* signal main thread to process contents of packet */
652 args0.vr_index = vr0 - vmp->vrs;
655 vl_api_rpc_call_main_thread (vrrp_input_process, (u8 *) &args0,
659 vrrp0->checksum = rx_csum0; /* restore csum for correct trace output */
660 b0->error = node->errors[error0];
662 if (b0->flags & VLIB_BUFFER_IS_TRACED)
664 vrrp_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
665 size_t addr_len = (is_ipv6 ? 16 : 4);
667 t->sw_if_index = vnet_buffer(b0)->sw_if_index[VLIB_RX];
668 t->is_ipv6 = is_ipv6;
669 clib_memcpy_fast (&t->vrrp, vrrp0, sizeof (*vrrp0));
670 clib_memcpy_fast (t->addrs, (void *) (vrrp0 + 1),
671 (size_t) vrrp0->n_addrs * addr_len);
674 /* always drop, never forward or reply here */
675 vlib_set_next_frame_buffer (vm, node, next0, bi0);
681 return frame->n_vectors;
684 VLIB_NODE_FN (vrrp4_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
685 vlib_frame_t * frame)
687 return vrrp_input_inline (vm, node, frame, 0);
691 VLIB_REGISTER_NODE (vrrp4_input_node) =
693 .name = "vrrp4-input",
694 .vector_size = sizeof (u32),
695 .format_trace = format_vrrp_trace,
696 .type = VLIB_NODE_TYPE_INTERNAL,
698 .n_errors = ARRAY_LEN(vrrp_error_strings),
699 .error_strings = vrrp_error_strings,
701 .n_next_nodes = VRRP_INPUT_N_NEXT,
704 [VRRP_INPUT_NEXT_DROP] = "error-drop",
708 VLIB_NODE_FN (vrrp6_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
709 vlib_frame_t * frame)
711 return vrrp_input_inline (vm, node, frame, 1);
714 VLIB_REGISTER_NODE (vrrp6_input_node) =
716 .name = "vrrp6-input",
717 .vector_size = sizeof (u32),
718 .format_trace = format_vrrp_trace,
719 .type = VLIB_NODE_TYPE_INTERNAL,
721 .n_errors = ARRAY_LEN(vrrp_error_strings),
722 .error_strings = vrrp_error_strings,
724 .n_next_nodes = VRRP_INPUT_N_NEXT,
727 [VRRP_INPUT_NEXT_DROP] = "error-drop",
731 static clib_error_t *
732 vrrp_input_init (vlib_main_t *vm)
736 if ((error = vlib_call_init_function (vm, vrrp_init)))
739 ip4_register_protocol (IP_PROTOCOL_VRRP, vrrp4_input_node.index);
740 ip6_register_protocol (IP_PROTOCOL_VRRP, vrrp6_input_node.index);
745 VLIB_INIT_FUNCTION (vrrp_input_init);
750 * fd.io coding-style-patch-verification: ON
753 * eval: (c-set-style "gnu")