2 * Copyright (c) 2019 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 #include <nat/nat_inlines.h>
17 #include <nat/nat44/ed_inlines.h>
18 #include <nat/nat44-ei/nat44_ei_ha.h>
19 #include <vnet/udp/udp_local.h>
21 #include <vppinfra/atomics.h>
23 /* number of retries */
24 #define NAT_HA_RETRIES 3
26 #define foreach_nat_ha_counter \
27 _(RECV_ADD, "add-event-recv", 0) \
28 _(RECV_DEL, "del-event-recv", 1) \
29 _(RECV_REFRESH, "refresh-event-recv", 2) \
30 _(SEND_ADD, "add-event-send", 3) \
31 _(SEND_DEL, "del-event-send", 4) \
32 _(SEND_REFRESH, "refresh-event-send", 5) \
33 _(RECV_ACK, "ack-recv", 6) \
34 _(SEND_ACK, "ack-send", 7) \
35 _(RETRY_COUNT, "retry-count", 8) \
36 _(MISSED_COUNT, "missed-count", 9)
38 /* NAT HA protocol version */
39 #define NAT_HA_VERSION 0x01
41 /* NAT HA protocol flags */
42 #define NAT_HA_FLAG_ACK 0x01
44 /* NAT HA event types */
50 } nat_ha_event_type_t;
52 /* NAT HA protocol header */
63 /* thread index where events originated */
65 } __attribute__ ((packed)) nat_ha_message_header_t;
67 /* NAT HA protocol event data */
86 } __attribute__ ((packed)) nat_ha_event_t;
90 #define _(N, s, v) NAT_HA_COUNTER_##N = v,
91 foreach_nat_ha_counter
96 /* data waiting for ACK */
103 /* next retry time */
109 } nat_ha_resend_entry_t;
111 /* per thread data */
114 /* buffer under construction */
115 vlib_buffer_t *state_sync_buffer;
116 /* frame containing NAT HA buffers */
117 vlib_frame_t *state_sync_frame;
118 /* number of events */
119 u16 state_sync_count;
120 /* next event offset */
121 u32 state_sync_next_event_offset;
122 /* data waiting for ACK */
123 nat_ha_resend_entry_t *resend_queue;
124 } nat_ha_per_thread_data_t;
126 /* NAT HA settings */
127 typedef struct nat_ha_main_s
130 /* local IP address and UDP port */
131 ip4_address_t src_ip_address;
133 /* failvoer IP address and UDP port */
134 ip4_address_t dst_ip_address;
136 /* path MTU between local and failover */
137 u32 state_sync_path_mtu;
138 /* number of seconds after which to send session counters refresh */
139 u32 session_refresh_interval;
141 vlib_simple_counter_main_t counters[NAT_HA_N_COUNTERS];
142 vlib_main_t *vlib_main;
143 /* sequence number counter */
145 /* 1 if resync in progress */
147 /* number of remaing ACK for resync */
148 u32 resync_ack_count;
149 /* number of missed ACK for resync */
150 u32 resync_ack_missed;
152 nat_ha_resync_event_cb_t event_callback;
155 /* per thread data */
157 nat_ha_per_thread_data_t *per_thread_data;
159 u32 ha_handoff_node_index;
160 u32 ha_process_node_index;
161 u32 ha_worker_node_index;
164 /* worker handoff frame-queue index */
168 nat_ha_main_t nat_ha_main;
170 static_always_inline void
171 nat44_ei_ha_sadd (ip4_address_t *in_addr, u16 in_port, ip4_address_t *out_addr,
172 u16 out_port, ip4_address_t *eh_addr, u16 eh_port,
173 ip4_address_t *ehn_addr, u16 ehn_port, u8 proto,
174 u32 fib_index, u16 flags, u32 thread_index)
176 snat_main_t *sm = &snat_main;
177 snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
180 clib_bihash_kv_8_8_t kv;
181 vlib_main_t *vm = vlib_get_main ();
182 f64 now = vlib_time_now (vm);
183 nat_outside_fib_t *outside_fib;
184 fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
186 .fp_proto = FIB_PROTOCOL_IP4,
189 .ip4.as_u32 = eh_addr->as_u32,
193 if (!(flags & SNAT_SESSION_FLAG_STATIC_MAPPING))
195 if (nat_set_outside_address_and_port (sm->addresses, thread_index,
196 *out_addr, out_port, proto))
200 u = nat_user_get_or_create (sm, in_addr, fib_index, thread_index);
204 s = nat_session_alloc_or_recycle (sm, u, thread_index, now);
208 if (sm->endpoint_dependent)
210 nat_ed_lru_insert (tsm, s, now, nat_proto_to_ip_proto (proto));
213 s->out2in.addr.as_u32 = out_addr->as_u32;
214 s->out2in.port = out_port;
215 s->nat_proto = proto;
218 s->ext_host_addr.as_u32 = eh_addr->as_u32;
219 s->ext_host_port = eh_port;
220 user_session_increment (sm, u, snat_is_session_static (s));
221 switch (vec_len (sm->outside_fibs))
224 s->out2in.fib_index = sm->outside_fib_index;
227 s->out2in.fib_index = sm->outside_fibs[0].fib_index;
230 vec_foreach (outside_fib, sm->outside_fibs)
232 fei = fib_table_lookup (outside_fib->fib_index, &pfx);
233 if (FIB_NODE_INDEX_INVALID != fei)
235 if (fib_entry_get_resolving_interface (fei) != ~0)
237 s->out2in.fib_index = outside_fib->fib_index;
244 init_nat_o2i_kv (&kv, s, s - tsm->sessions);
245 if (clib_bihash_add_del_8_8 (&tsm->out2in, &kv, 1))
246 nat_elog_warn ("out2in key add failed");
248 s->in2out.addr.as_u32 = in_addr->as_u32;
249 s->in2out.port = in_port;
250 s->in2out.fib_index = fib_index;
251 init_nat_i2o_kv (&kv, s, s - tsm->sessions);
252 if (clib_bihash_add_del_8_8 (&tsm->in2out, &kv, 1))
253 nat_elog_warn ("in2out key add failed");
256 static_always_inline void
257 nat44_ei_ha_sdel (ip4_address_t *out_addr, u16 out_port,
258 ip4_address_t *eh_addr, u16 eh_port, u8 proto, u32 fib_index,
261 snat_main_t *sm = &snat_main;
262 clib_bihash_kv_8_8_t kv, value;
265 snat_main_per_thread_data_t *tsm;
267 if (sm->num_workers > 1)
268 thread_index = sm->first_worker_index +
269 (sm->workers[(clib_net_to_host_u16 (out_port) - 1024) /
270 sm->port_per_thread]);
272 thread_index = sm->num_workers;
273 tsm = vec_elt_at_index (sm->per_thread_data, thread_index);
275 init_nat_k (&kv, *out_addr, out_port, fib_index, proto);
276 if (clib_bihash_search_8_8 (&tsm->out2in, &kv, &value))
279 s = pool_elt_at_index (tsm->sessions, value.value);
280 nat_free_session_data (sm, s, thread_index, 1);
281 nat44_delete_session (sm, s, thread_index);
284 static_always_inline void
285 nat44_ei_ha_sref (ip4_address_t *out_addr, u16 out_port,
286 ip4_address_t *eh_addr, u16 eh_port, u8 proto, u32 fib_index,
287 u32 total_pkts, u64 total_bytes, u32 thread_index)
289 snat_main_t *sm = &snat_main;
290 clib_bihash_kv_8_8_t kv, value;
292 snat_main_per_thread_data_t *tsm;
294 tsm = vec_elt_at_index (sm->per_thread_data, thread_index);
296 init_nat_k (&kv, *out_addr, out_port, fib_index, proto);
297 if (clib_bihash_search_8_8 (&tsm->out2in, &kv, &value))
300 s = pool_elt_at_index (tsm->sessions, value.value);
301 s->total_pkts = total_pkts;
302 s->total_bytes = total_bytes;
306 nat_ha_resync_fin (void)
308 nat_ha_main_t *ha = &nat_ha_main;
310 /* if no more resync ACK remainig we are done */
311 if (ha->resync_ack_count)
315 if (ha->resync_ack_missed)
317 nat_elog_info ("resync completed with result FAILED");
321 nat_elog_info ("resync completed with result SUCCESS");
323 if (ha->event_callback)
324 ha->event_callback (ha->client_index, ha->pid, ha->resync_ack_missed);
327 /* cache HA NAT data waiting for ACK */
329 nat_ha_resend_queue_add (u32 seq, u8 * data, u8 data_len, u8 is_resync,
332 nat_ha_main_t *ha = &nat_ha_main;
333 nat_ha_per_thread_data_t *td = &ha->per_thread_data[thread_index];
334 nat_ha_resend_entry_t *entry;
335 f64 now = vlib_time_now (ha->vlib_main);
337 vec_add2 (td->resend_queue, entry, 1);
338 clib_memset (entry, 0, sizeof (*entry));
339 entry->retry_timer = now + 2.0;
341 entry->is_resync = is_resync;
342 vec_add (entry->data, data, data_len);
347 static_always_inline void
348 nat_ha_ack_recv (u32 seq, u32 thread_index)
350 nat_ha_main_t *ha = &nat_ha_main;
351 nat_ha_per_thread_data_t *td = &ha->per_thread_data[thread_index];
354 vec_foreach_index (i, td->resend_queue)
356 if (td->resend_queue[i].seq != seq)
359 vlib_increment_simple_counter (&ha->counters[NAT_HA_COUNTER_RECV_ACK],
361 /* ACK received remove cached data */
362 if (td->resend_queue[i].is_resync)
364 clib_atomic_fetch_sub (&ha->resync_ack_count, 1);
365 nat_ha_resync_fin ();
367 vec_free (td->resend_queue[i].data);
368 vec_del1 (td->resend_queue, i);
369 nat_elog_debug_X1 ("ACK for seq %d received", "i4",
370 clib_net_to_host_u32 (seq));
376 /* scan non-ACKed HA NAT for retry */
378 nat_ha_resend_scan (f64 now, u32 thread_index)
380 nat_ha_main_t *ha = &nat_ha_main;
381 nat_ha_per_thread_data_t *td = &ha->per_thread_data[thread_index];
382 u32 i, *del, *to_delete = 0;
383 vlib_main_t *vm = ha->vlib_main;
384 vlib_buffer_t *b = 0;
389 vec_foreach_index (i, td->resend_queue)
391 if (td->resend_queue[i].retry_timer > now)
394 /* maximum retry reached delete cached data */
395 if (td->resend_queue[i].retry_count >= NAT_HA_RETRIES)
397 nat_elog_notice_X1 ("seq %d missed", "i4",
398 clib_net_to_host_u32 (td->resend_queue[i].seq));
399 if (td->resend_queue[i].is_resync)
401 clib_atomic_fetch_add (&ha->resync_ack_missed, 1);
402 clib_atomic_fetch_sub (&ha->resync_ack_count, 1);
403 nat_ha_resync_fin ();
405 vec_add1 (to_delete, i);
406 vlib_increment_simple_counter (&ha->counters
407 [NAT_HA_COUNTER_MISSED_COUNT],
412 /* retry to send non-ACKed data */
413 nat_elog_debug_X1 ("state sync seq %d resend", "i4",
414 clib_net_to_host_u32 (td->resend_queue[i].seq));
415 td->resend_queue[i].retry_count++;
416 vlib_increment_simple_counter (&ha->counters[NAT_HA_COUNTER_RETRY_COUNT],
418 if (vlib_buffer_alloc (vm, &bi, 1) != 1)
420 nat_elog_warn ("HA NAT state sync can't allocate buffer");
423 b = vlib_get_buffer (vm, bi);
424 b->current_length = vec_len (td->resend_queue[i].data);
425 b->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
426 b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
427 vnet_buffer (b)->sw_if_index[VLIB_RX] = 0;
428 vnet_buffer (b)->sw_if_index[VLIB_TX] = 0;
429 ip = vlib_buffer_get_current (b);
430 clib_memcpy (ip, td->resend_queue[i].data,
431 vec_len (td->resend_queue[i].data));
432 f = vlib_get_frame_to_node (vm, ip4_lookup_node.index);
433 to_next = vlib_frame_vector_args (f);
436 vlib_put_frame_to_node (vm, ip4_lookup_node.index, f);
437 td->resend_queue[i].retry_timer = now + 2.0;
440 vec_foreach (del, to_delete)
442 vec_free (td->resend_queue[*del].data);
443 vec_del1 (td->resend_queue, *del);
445 vec_free (to_delete);
451 nat_ha_main_t *ha = &nat_ha_main;
458 nat_ha_main_t *ha = &nat_ha_main;
464 nat_ha_set_node_indexes (nat_ha_main_t *ha, vlib_main_t *vm)
468 node = vlib_get_node_by_name (vm, (u8 *) "nat-ha-handoff");
469 ha->ha_handoff_node_index = node->index;
470 node = vlib_get_node_by_name (vm, (u8 *) "nat-ha-process");
471 ha->ha_process_node_index = node->index;
472 node = vlib_get_node_by_name (vm, (u8 *) "nat-ha-worker");
473 ha->ha_worker_node_index = node->index;
474 node = vlib_get_node_by_name (vm, (u8 *) "nat-ha");
475 ha->ha_node_index = node->index;
479 nat_ha_init (vlib_main_t * vm, u32 num_workers, u32 num_threads)
481 nat_ha_main_t *ha = &nat_ha_main;
482 clib_memset (ha, 0, sizeof (*ha));
484 nat_ha_set_node_indexes (ha, vm);
489 ha->num_workers = num_workers;
490 vec_validate (ha->per_thread_data, num_threads);
492 #define _(N, s, v) ha->counters[v].name = s; \
493 ha->counters[v].stat_segment_name = "/nat44/ha/" s; \
494 vlib_validate_simple_counter(&ha->counters[v], 0); \
495 vlib_zero_simple_counter(&ha->counters[v], 0);
496 foreach_nat_ha_counter
501 nat_ha_set_listener (ip4_address_t * addr, u16 port, u32 path_mtu)
503 nat_ha_main_t *ha = &nat_ha_main;
505 /* unregister previously set UDP port */
507 udp_unregister_dst_port (ha->vlib_main, ha->src_port, 1);
509 ha->src_ip_address.as_u32 = addr->as_u32;
511 ha->state_sync_path_mtu = path_mtu;
515 /* if multiple worker threads first go to handoff node */
516 if (ha->num_workers > 1)
518 if (ha->fq_index == ~0)
519 ha->fq_index = vlib_frame_queue_main_init (ha->ha_node_index, 0);
520 udp_register_dst_port (ha->vlib_main, port,
521 ha->ha_handoff_node_index, 1);
525 udp_register_dst_port (ha->vlib_main, port, ha->ha_node_index, 1);
527 nat_elog_info_X1 ("HA listening on port %d for state sync", "i4", port);
534 nat_ha_get_listener (ip4_address_t * addr, u16 * port, u32 * path_mtu)
536 nat_ha_main_t *ha = &nat_ha_main;
538 addr->as_u32 = ha->src_ip_address.as_u32;
539 *port = ha->src_port;
540 *path_mtu = ha->state_sync_path_mtu;
544 nat_ha_set_failover (ip4_address_t * addr, u16 port,
545 u32 session_refresh_interval)
547 nat_ha_main_t *ha = &nat_ha_main;
549 ha->dst_ip_address.as_u32 = addr->as_u32;
551 ha->session_refresh_interval = session_refresh_interval;
553 vlib_process_signal_event (ha->vlib_main, ha->ha_process_node_index, 1, 0);
559 nat_ha_get_failover (ip4_address_t * addr, u16 * port,
560 u32 * session_refresh_interval)
562 nat_ha_main_t *ha = &nat_ha_main;
564 addr->as_u32 = ha->dst_ip_address.as_u32;
565 *port = ha->dst_port;
566 *session_refresh_interval = ha->session_refresh_interval;
569 static_always_inline void
570 nat_ha_recv_add (nat_ha_event_t * event, f64 now, u32 thread_index)
572 nat_ha_main_t *ha = &nat_ha_main;
573 ip4_address_t in_addr, out_addr, eh_addr, ehn_addr;
577 vlib_increment_simple_counter (&ha->counters[NAT_HA_COUNTER_RECV_ADD],
580 in_addr.as_u32 = event->in_addr;
581 out_addr.as_u32 = event->out_addr;
582 eh_addr.as_u32 = event->eh_addr;
583 ehn_addr.as_u32 = event->ehn_addr;
584 fib_index = clib_net_to_host_u32 (event->fib_index);
585 flags = clib_net_to_host_u16 (event->flags);
587 nat44_ei_ha_sadd (&in_addr, event->in_port, &out_addr, event->out_port,
588 &eh_addr, event->eh_port, &ehn_addr, event->ehn_port,
589 event->protocol, fib_index, flags, thread_index);
592 static_always_inline void
593 nat_ha_recv_del (nat_ha_event_t * event, u32 thread_index)
595 nat_ha_main_t *ha = &nat_ha_main;
596 ip4_address_t out_addr, eh_addr;
599 vlib_increment_simple_counter (&ha->counters[NAT_HA_COUNTER_RECV_DEL],
602 out_addr.as_u32 = event->out_addr;
603 eh_addr.as_u32 = event->eh_addr;
604 fib_index = clib_net_to_host_u32 (event->fib_index);
606 nat44_ei_ha_sdel (&out_addr, event->out_port, &eh_addr, event->eh_port,
607 event->protocol, fib_index, thread_index);
610 static_always_inline void
611 nat_ha_recv_refresh (nat_ha_event_t * event, f64 now, u32 thread_index)
613 nat_ha_main_t *ha = &nat_ha_main;
614 ip4_address_t out_addr, eh_addr;
615 u32 fib_index, total_pkts;
618 vlib_increment_simple_counter (&ha->counters[NAT_HA_COUNTER_RECV_REFRESH],
621 out_addr.as_u32 = event->out_addr;
622 eh_addr.as_u32 = event->eh_addr;
623 fib_index = clib_net_to_host_u32 (event->fib_index);
624 total_pkts = clib_net_to_host_u32 (event->total_pkts);
625 total_bytes = clib_net_to_host_u64 (event->total_bytes);
627 nat44_ei_ha_sref (&out_addr, event->out_port, &eh_addr, event->eh_port,
628 event->protocol, fib_index, total_pkts, total_bytes,
632 /* process received NAT HA event */
633 static_always_inline void
634 nat_ha_event_process (nat_ha_event_t * event, f64 now, u32 thread_index)
636 switch (event->event_type)
639 nat_ha_recv_add (event, now, thread_index);
642 nat_ha_recv_del (event, thread_index);
645 nat_ha_recv_refresh (event, now, thread_index);
648 nat_elog_notice_X1 ("Unsupported HA event type %d", "i4",
655 nat_ha_header_create (vlib_buffer_t * b, u32 * offset, u32 thread_index)
657 nat_ha_main_t *ha = &nat_ha_main;
658 nat_ha_message_header_t *h;
664 b->current_length = sizeof (*ip) + sizeof (*udp) + sizeof (*h);
665 b->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
666 b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
667 vnet_buffer (b)->sw_if_index[VLIB_RX] = 0;
668 vnet_buffer (b)->sw_if_index[VLIB_TX] = 0;
669 ip = vlib_buffer_get_current (b);
670 udp = (udp_header_t *) (ip + 1);
671 h = (nat_ha_message_header_t *) (udp + 1);
674 ip->ip_version_and_header_length = 0x45;
676 ip->protocol = IP_PROTOCOL_UDP;
677 ip->flags_and_fragment_offset =
678 clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT);
679 ip->src_address.as_u32 = ha->src_ip_address.as_u32;
680 ip->dst_address.as_u32 = ha->dst_ip_address.as_u32;
682 udp->src_port = clib_host_to_net_u16 (ha->src_port);
683 udp->dst_port = clib_host_to_net_u16 (ha->dst_port);
686 /* NAT HA protocol header */
687 h->version = NAT_HA_VERSION;
690 h->thread_index = clib_host_to_net_u32 (thread_index);
691 sequence_number = clib_atomic_fetch_add (&ha->sequence_number, 1);
692 h->sequence_number = clib_host_to_net_u32 (sequence_number);
695 sizeof (ip4_header_t) + sizeof (udp_header_t) +
696 sizeof (nat_ha_message_header_t);
700 nat_ha_send (vlib_frame_t * f, vlib_buffer_t * b, u8 is_resync,
703 nat_ha_main_t *ha = &nat_ha_main;
704 nat_ha_per_thread_data_t *td = &ha->per_thread_data[thread_index];
705 nat_ha_message_header_t *h;
708 vlib_main_t *vm = vlib_mains[thread_index];
710 ip = vlib_buffer_get_current (b);
711 udp = ip4_next_header (ip);
712 h = (nat_ha_message_header_t *) (udp + 1);
714 h->count = clib_host_to_net_u16 (td->state_sync_count);
716 ip->length = clib_host_to_net_u16 (b->current_length);
717 ip->checksum = ip4_header_checksum (ip);
718 udp->length = clib_host_to_net_u16 (b->current_length - sizeof (*ip));
720 nat_ha_resend_queue_add (h->sequence_number, (u8 *) ip, b->current_length,
721 is_resync, thread_index);
723 vlib_put_frame_to_node (vm, ip4_lookup_node.index, f);
726 /* add NAT HA protocol event */
727 static_always_inline void
728 nat_ha_event_add (nat_ha_event_t * event, u8 do_flush, u32 thread_index,
731 nat_ha_main_t *ha = &nat_ha_main;
732 nat_ha_per_thread_data_t *td = &ha->per_thread_data[thread_index];
733 vlib_main_t *vm = vlib_mains[thread_index];
734 vlib_buffer_t *b = 0;
738 b = td->state_sync_buffer;
740 if (PREDICT_FALSE (b == 0))
745 if (vlib_buffer_alloc (vm, &bi, 1) != 1)
747 nat_elog_warn ("HA NAT state sync can't allocate buffer");
751 b = td->state_sync_buffer = vlib_get_buffer (vm, bi);
752 clib_memset (vnet_buffer (b), 0, sizeof (*vnet_buffer (b)));
753 VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b);
758 bi = vlib_get_buffer_index (vm, b);
759 offset = td->state_sync_next_event_offset;
762 f = td->state_sync_frame;
763 if (PREDICT_FALSE (f == 0))
766 f = vlib_get_frame_to_node (vm, ip4_lookup_node.index);
767 td->state_sync_frame = f;
768 to_next = vlib_frame_vector_args (f);
773 if (PREDICT_FALSE (td->state_sync_count == 0))
774 nat_ha_header_create (b, &offset, thread_index);
776 if (PREDICT_TRUE (do_flush == 0))
778 clib_memcpy_fast (b->data + offset, event, sizeof (*event));
779 offset += sizeof (*event);
780 td->state_sync_count++;
781 b->current_length += sizeof (*event);
783 switch (event->event_type)
786 vlib_increment_simple_counter (&ha->counters
787 [NAT_HA_COUNTER_SEND_ADD],
791 vlib_increment_simple_counter (&ha->counters
792 [NAT_HA_COUNTER_SEND_DEL],
796 vlib_increment_simple_counter (&ha->counters
797 [NAT_HA_COUNTER_SEND_REFRESH],
806 (do_flush || offset + (sizeof (*event)) > ha->state_sync_path_mtu))
808 nat_ha_send (f, b, is_resync, thread_index);
809 td->state_sync_buffer = 0;
810 td->state_sync_frame = 0;
811 td->state_sync_count = 0;
815 clib_atomic_fetch_add (&ha->resync_ack_count, 1);
816 nat_ha_resync_fin ();
820 td->state_sync_next_event_offset = offset;
823 #define skip_if_disabled() \
825 nat_ha_main_t *ha = &nat_ha_main; \
826 if (PREDICT_TRUE (!ha->dst_port)) \
831 nat_ha_flush (u8 is_resync)
834 nat_ha_event_add (0, 1, 0, is_resync);
838 nat_ha_sadd (ip4_address_t * in_addr, u16 in_port, ip4_address_t * out_addr,
839 u16 out_port, ip4_address_t * eh_addr, u16 eh_port,
840 ip4_address_t * ehn_addr, u16 ehn_port, u8 proto, u32 fib_index,
841 u16 flags, u32 thread_index, u8 is_resync)
843 nat_ha_event_t event;
847 clib_memset (&event, 0, sizeof (event));
848 event.event_type = NAT_HA_ADD;
849 event.flags = clib_host_to_net_u16 (flags);
850 event.in_addr = in_addr->as_u32;
851 event.in_port = in_port;
852 event.out_addr = out_addr->as_u32;
853 event.out_port = out_port;
854 event.eh_addr = eh_addr->as_u32;
855 event.eh_port = eh_port;
856 event.ehn_addr = ehn_addr->as_u32;
857 event.ehn_port = ehn_port;
858 event.fib_index = clib_host_to_net_u32 (fib_index);
859 event.protocol = proto;
860 nat_ha_event_add (&event, 0, thread_index, is_resync);
864 nat_ha_sdel (ip4_address_t * out_addr, u16 out_port, ip4_address_t * eh_addr,
865 u16 eh_port, u8 proto, u32 fib_index, u32 thread_index)
867 nat_ha_event_t event;
871 clib_memset (&event, 0, sizeof (event));
872 event.event_type = NAT_HA_DEL;
873 event.out_addr = out_addr->as_u32;
874 event.out_port = out_port;
875 event.eh_addr = eh_addr->as_u32;
876 event.eh_port = eh_port;
877 event.fib_index = clib_host_to_net_u32 (fib_index);
878 event.protocol = proto;
879 nat_ha_event_add (&event, 0, thread_index, 0);
883 nat_ha_sref (ip4_address_t * out_addr, u16 out_port, ip4_address_t * eh_addr,
884 u16 eh_port, u8 proto, u32 fib_index, u32 total_pkts,
885 u64 total_bytes, u32 thread_index, f64 * last_refreshed, f64 now)
887 nat_ha_main_t *ha = &nat_ha_main;
888 nat_ha_event_t event;
892 if ((*last_refreshed + ha->session_refresh_interval) > now)
895 *last_refreshed = now;
896 clib_memset (&event, 0, sizeof (event));
897 event.event_type = NAT_HA_REFRESH;
898 event.out_addr = out_addr->as_u32;
899 event.out_port = out_port;
900 event.eh_addr = eh_addr->as_u32;
901 event.eh_port = eh_port;
902 event.fib_index = clib_host_to_net_u32 (fib_index);
903 event.protocol = proto;
904 event.total_pkts = clib_host_to_net_u32 (total_pkts);
905 event.total_bytes = clib_host_to_net_u64 (total_bytes);
906 nat_ha_event_add (&event, 0, thread_index, 0);
909 static_always_inline u8
912 nat_ha_main_t *ha = &nat_ha_main;
916 /* per thread process waiting for interrupt */
918 nat_ha_worker_fn (vlib_main_t * vm, vlib_node_runtime_t * rt,
921 u32 thread_index = vm->thread_index;
923 if (plugin_enabled () == 0)
926 /* flush HA NAT data under construction */
927 nat_ha_event_add (0, 1, thread_index, 0);
928 /* scan if we need to resend some non-ACKed data */
929 nat_ha_resend_scan (vlib_time_now (vm), thread_index);
934 VLIB_REGISTER_NODE (nat_ha_worker_node) = {
935 .function = nat_ha_worker_fn,
936 .type = VLIB_NODE_TYPE_INPUT,
937 .state = VLIB_NODE_STATE_INTERRUPT,
938 .name = "nat-ha-worker",
942 /* periodically send interrupt to each thread */
944 nat_ha_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
946 nat_ha_main_t *ha = &nat_ha_main;
948 uword *event_data = 0;
951 vlib_process_wait_for_event (vm);
952 event_type = vlib_process_get_events (vm, &event_data);
954 nat_elog_info ("nat-ha-process: bogus kickoff event received");
955 vec_reset_length (event_data);
959 vlib_process_wait_for_event_or_clock (vm, 1.0);
960 event_type = vlib_process_get_events (vm, &event_data);
961 vec_reset_length (event_data);
962 for (ti = 0; ti < vec_len (vlib_mains); ti++)
964 if (ti >= vec_len (ha->per_thread_data))
967 vlib_node_set_interrupt_pending (vlib_mains[ti],
968 nat_ha_worker_node.index);
976 VLIB_REGISTER_NODE (nat_ha_process_node) = {
977 .function = nat_ha_process,
978 .type = VLIB_NODE_TYPE_PROCESS,
979 .name = "nat-ha-process",
984 nat_ha_get_resync_status (u8 * in_resync, u32 * resync_ack_missed)
986 nat_ha_main_t *ha = &nat_ha_main;
988 *in_resync = ha->in_resync;
989 *resync_ack_missed = ha->resync_ack_missed;
999 format_nat_ha_trace (u8 * s, va_list * args)
1001 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1002 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1003 nat_ha_trace_t *t = va_arg (*args, nat_ha_trace_t *);
1006 format (s, "nat-ha: %u events from %U", t->event_count,
1007 format_ip4_address, &t->addr);
1014 NAT_HA_NEXT_IP4_LOOKUP,
1019 #define foreach_nat_ha_error \
1020 _(PROCESSED, "pkts-processed") \
1021 _(BAD_VERSION, "bad-version")
1025 #define _(sym, str) NAT_HA_ERROR_##sym,
1026 foreach_nat_ha_error
1031 static char *nat_ha_error_strings[] = {
1032 #define _(sym, str) str,
1033 foreach_nat_ha_error
1037 /* process received HA NAT protocol messages */
1039 nat_ha_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
1040 vlib_frame_t * frame)
1042 u32 n_left_from, *from, next_index, *to_next;
1043 f64 now = vlib_time_now (vm);
1044 u32 thread_index = vm->thread_index;
1045 u32 pkts_processed = 0;
1046 ip4_main_t *i4m = &ip4_main;
1047 u8 host_config_ttl = i4m->host_config.ttl;
1048 nat_ha_main_t *ha = &nat_ha_main;
1050 from = vlib_frame_vector_args (frame);
1051 n_left_from = frame->n_vectors;
1052 next_index = node->cached_next_index;
1054 while (n_left_from > 0)
1058 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1060 while (n_left_from > 0 && n_left_to_next > 0)
1062 u32 bi0, next0, src_addr0, dst_addr0;;
1064 nat_ha_message_header_t *h0;
1066 u16 event_count0, src_port0, dst_port0, old_len0;
1076 n_left_to_next -= 1;
1078 b0 = vlib_get_buffer (vm, bi0);
1079 h0 = vlib_buffer_get_current (b0);
1080 vlib_buffer_advance (b0, -sizeof (*udp0));
1081 udp0 = vlib_buffer_get_current (b0);
1082 vlib_buffer_advance (b0, -sizeof (*ip0));
1083 ip0 = vlib_buffer_get_current (b0);
1085 next0 = NAT_HA_NEXT_DROP;
1087 if (h0->version != NAT_HA_VERSION)
1089 b0->error = node->errors[NAT_HA_ERROR_BAD_VERSION];
1093 event_count0 = clib_net_to_host_u16 (h0->count);
1094 /* ACK for previously send data */
1095 if (!event_count0 && (h0->flags & NAT_HA_FLAG_ACK))
1097 nat_ha_ack_recv (h0->sequence_number, thread_index);
1098 b0->error = node->errors[NAT_HA_ERROR_PROCESSED];
1102 e0 = (nat_ha_event_t *) (h0 + 1);
1104 /* process each event */
1105 while (event_count0)
1107 nat_ha_event_process (e0, now, thread_index);
1109 e0 = (nat_ha_event_t *) ((u8 *) e0 + sizeof (nat_ha_event_t));
1112 next0 = NAT_HA_NEXT_IP4_LOOKUP;
1115 /* reply with ACK */
1116 b0->current_length = sizeof (*ip0) + sizeof (*udp0) + sizeof (*h0);
1118 src_addr0 = ip0->src_address.data_u32;
1119 dst_addr0 = ip0->dst_address.data_u32;
1120 ip0->src_address.data_u32 = dst_addr0;
1121 ip0->dst_address.data_u32 = src_addr0;
1122 old_len0 = ip0->length;
1123 ip0->length = clib_host_to_net_u16 (b0->current_length);
1125 sum0 = ip0->checksum;
1126 sum0 = ip_csum_update (sum0, ip0->ttl, host_config_ttl,
1128 ip0->ttl = host_config_ttl;
1130 ip_csum_update (sum0, old_len0, ip0->length, ip4_header_t,
1132 ip0->checksum = ip_csum_fold (sum0);
1135 src_port0 = udp0->src_port;
1136 dst_port0 = udp0->dst_port;
1137 udp0->src_port = dst_port0;
1138 udp0->dst_port = src_port0;
1140 clib_host_to_net_u16 (b0->current_length - sizeof (*ip0));
1142 h0->flags = NAT_HA_FLAG_ACK;
1144 vlib_increment_simple_counter (&ha->counters
1145 [NAT_HA_COUNTER_SEND_ACK],
1146 thread_index, 0, 1);
1149 if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1150 && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1152 nat_ha_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
1154 (void *) (b0->data + vnet_buffer (b0)->l3_hdr_offset);
1155 t->event_count = clib_net_to_host_u16 (h0->count);
1156 t->addr.as_u32 = ip->src_address.data_u32;
1159 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1160 to_next, n_left_to_next,
1164 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1167 vlib_node_increment_counter (vm, ha->ha_node_index, NAT_HA_ERROR_PROCESSED,
1170 return frame->n_vectors;
1174 VLIB_REGISTER_NODE (nat_ha_node) = {
1175 .function = nat_ha_node_fn,
1177 .vector_size = sizeof (u32),
1178 .format_trace = format_nat_ha_trace,
1179 .type = VLIB_NODE_TYPE_INTERNAL,
1180 .n_errors = ARRAY_LEN (nat_ha_error_strings),
1181 .error_strings = nat_ha_error_strings,
1182 .n_next_nodes = NAT_HA_N_NEXT,
1184 [NAT_HA_NEXT_IP4_LOOKUP] = "ip4-lookup",
1185 [NAT_HA_NEXT_DROP] = "error-drop",
1192 u32 next_worker_index;
1194 } nat_ha_handoff_trace_t;
1196 #define foreach_nat_ha_handoff_error \
1197 _(CONGESTION_DROP, "congestion drop") \
1198 _(SAME_WORKER, "same worker") \
1199 _(DO_HANDOFF, "do handoff")
1203 #define _(sym,str) NAT_HA_HANDOFF_ERROR_##sym,
1204 foreach_nat_ha_handoff_error
1206 NAT_HA_HANDOFF_N_ERROR,
1207 } nat_ha_handoff_error_t;
1209 static char *nat_ha_handoff_error_strings[] = {
1210 #define _(sym,string) string,
1211 foreach_nat_ha_handoff_error
1216 format_nat_ha_handoff_trace (u8 * s, va_list * args)
1218 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1219 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1220 nat_ha_handoff_trace_t *t = va_arg (*args, nat_ha_handoff_trace_t *);
1223 format (s, "NAT_HA_WORKER_HANDOFF: next-worker %d", t->next_worker_index);
1228 /* do worker handoff based on thread_index in NAT HA protcol header */
1230 nat_ha_handoff_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
1231 vlib_frame_t * frame)
1233 nat_ha_main_t *ha = &nat_ha_main;
1234 vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1235 u32 n_enq, n_left_from, *from;
1236 u16 thread_indices[VLIB_FRAME_SIZE], *ti;
1237 u32 thread_index = vm->thread_index;
1238 u32 do_handoff = 0, same_worker = 0;
1240 from = vlib_frame_vector_args (frame);
1241 n_left_from = frame->n_vectors;
1242 vlib_get_buffers (vm, from, bufs, n_left_from);
1245 ti = thread_indices;
1247 while (n_left_from > 0)
1249 nat_ha_message_header_t *h0;
1251 h0 = vlib_buffer_get_current (b[0]);
1252 ti[0] = clib_net_to_host_u32 (h0->thread_index);
1254 if (ti[0] != thread_index)
1259 if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1260 && (b[0]->flags & VLIB_BUFFER_IS_TRACED)))
1262 nat_ha_handoff_trace_t *t =
1263 vlib_add_trace (vm, node, b[0], sizeof (*t));
1264 t->next_worker_index = ti[0];
1273 vlib_buffer_enqueue_to_thread (vm, ha->fq_index, from, thread_indices,
1274 frame->n_vectors, 1);
1276 if (n_enq < frame->n_vectors)
1277 vlib_node_increment_counter (vm, node->node_index,
1278 NAT_HA_HANDOFF_ERROR_CONGESTION_DROP,
1279 frame->n_vectors - n_enq);
1280 vlib_node_increment_counter (vm, node->node_index,
1281 NAT_HA_HANDOFF_ERROR_SAME_WORKER, same_worker);
1282 vlib_node_increment_counter (vm, node->node_index,
1283 NAT_HA_HANDOFF_ERROR_DO_HANDOFF, do_handoff);
1284 return frame->n_vectors;
1288 nat_ha_resync (u32 client_index, u32 pid,
1289 nat_ha_resync_event_cb_t event_callback)
1295 VLIB_REGISTER_NODE (nat_ha_handoff_node) = {
1296 .function = nat_ha_handoff_node_fn,
1297 .name = "nat-ha-handoff",
1298 .vector_size = sizeof (u32),
1299 .format_trace = format_nat_ha_handoff_trace,
1300 .type = VLIB_NODE_TYPE_INTERNAL,
1301 .n_errors = ARRAY_LEN(nat_ha_handoff_error_strings),
1302 .error_strings = nat_ha_handoff_error_strings,
1311 * fd.io coding-style-patch-verification: ON
1314 * eval: (c-set-style "gnu")