2 * Copyright (c) 2019 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 //#include <vnet/fib/fib_source.h>
17 #include <vnet/fib/fib_table.h>
18 #include <vnet/udp/udp_local.h>
19 #include <vppinfra/atomics.h>
21 #include <nat/lib/log.h>
23 #include <nat/nat44-ei/nat44_ei.h>
24 #include <nat/nat44-ei/nat44_ei_ha.h>
25 #include <nat/nat44-ei/nat44_ei_inlines.h>
27 /* number of retries */
28 #define NAT_HA_RETRIES 3
30 #define foreach_nat_ha_counter \
31 _(RECV_ADD, "add-event-recv", 0) \
32 _(RECV_DEL, "del-event-recv", 1) \
33 _(RECV_REFRESH, "refresh-event-recv", 2) \
34 _(SEND_ADD, "add-event-send", 3) \
35 _(SEND_DEL, "del-event-send", 4) \
36 _(SEND_REFRESH, "refresh-event-send", 5) \
37 _(RECV_ACK, "ack-recv", 6) \
38 _(SEND_ACK, "ack-send", 7) \
39 _(RETRY_COUNT, "retry-count", 8) \
40 _(MISSED_COUNT, "missed-count", 9)
42 /* NAT HA protocol version */
43 #define NAT_HA_VERSION 0x01
45 /* NAT HA protocol flags */
46 #define NAT_HA_FLAG_ACK 0x01
48 /* NAT HA event types */
54 } nat_ha_event_type_t;
56 /* NAT HA protocol header */
67 /* thread index where events originated */
69 } __attribute__ ((packed)) nat_ha_message_header_t;
71 /* NAT HA protocol event data */
90 } __attribute__ ((packed)) nat_ha_event_t;
94 #define _(N, s, v) NAT_HA_COUNTER_##N = v,
95 foreach_nat_ha_counter
100 /* data waiting for ACK */
103 /* sequence number */
107 /* next retry time */
113 } nat_ha_resend_entry_t;
115 /* per thread data */
118 /* buffer under construction */
119 vlib_buffer_t *state_sync_buffer;
120 /* frame containing NAT HA buffers */
121 vlib_frame_t *state_sync_frame;
122 /* number of events */
123 u16 state_sync_count;
124 /* next event offset */
125 u32 state_sync_next_event_offset;
126 /* data waiting for ACK */
127 nat_ha_resend_entry_t *resend_queue;
128 } nat_ha_per_thread_data_t;
130 /* NAT HA settings */
131 typedef struct nat_ha_main_s
134 /* local IP address and UDP port */
135 ip4_address_t src_ip_address;
137 /* failvoer IP address and UDP port */
138 ip4_address_t dst_ip_address;
140 /* path MTU between local and failover */
141 u32 state_sync_path_mtu;
142 /* number of seconds after which to send session counters refresh */
143 u32 session_refresh_interval;
145 vlib_simple_counter_main_t counters[NAT_HA_N_COUNTERS];
146 /* sequence number counter */
148 /* 1 if resync in progress */
150 /* number of remaing ACK for resync */
151 u32 resync_ack_count;
152 /* number of missed ACK for resync */
153 u32 resync_ack_missed;
155 nat_ha_resync_event_cb_t event_callback;
158 /* per thread data */
160 nat_ha_per_thread_data_t *per_thread_data;
162 u32 ha_handoff_node_index;
163 u32 ha_process_node_index;
164 u32 ha_worker_node_index;
167 /* worker handoff frame-queue index */
171 nat_ha_main_t nat_ha_main;
173 static_always_inline void
174 nat44_ei_ha_sadd (ip4_address_t *in_addr, u16 in_port, ip4_address_t *out_addr,
175 u16 out_port, ip4_address_t *eh_addr, u16 eh_port,
176 ip4_address_t *ehn_addr, u16 ehn_port, u8 proto,
177 u32 fib_index, u16 flags, u32 thread_index)
179 nat44_ei_main_t *nm = &nat44_ei_main;
180 nat44_ei_main_per_thread_data_t *tnm = &nm->per_thread_data[thread_index];
182 nat44_ei_session_t *s;
183 clib_bihash_kv_8_8_t kv;
184 vlib_main_t *vm = vlib_get_main ();
185 f64 now = vlib_time_now (vm);
186 nat44_ei_outside_fib_t *outside_fib;
187 fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
189 .fp_proto = FIB_PROTOCOL_IP4,
192 .ip4.as_u32 = eh_addr->as_u32,
196 if (!(flags & NAT44_EI_SESSION_FLAG_STATIC_MAPPING))
198 if (nat44_ei_set_outside_address_and_port (nm->addresses, thread_index,
199 *out_addr, out_port, proto))
203 u = nat44_ei_user_get_or_create (nm, in_addr, fib_index, thread_index);
207 s = nat44_ei_session_alloc_or_recycle (nm, u, thread_index, now);
211 s->out2in.addr.as_u32 = out_addr->as_u32;
212 s->out2in.port = out_port;
213 s->nat_proto = proto;
216 s->ext_host_addr.as_u32 = eh_addr->as_u32;
217 s->ext_host_port = eh_port;
218 nat44_ei_user_session_increment (nm, u, nat44_ei_is_session_static (s));
219 switch (vec_len (nm->outside_fibs))
222 s->out2in.fib_index = nm->outside_fib_index;
225 s->out2in.fib_index = nm->outside_fibs[0].fib_index;
228 vec_foreach (outside_fib, nm->outside_fibs)
230 fei = fib_table_lookup (outside_fib->fib_index, &pfx);
231 if (FIB_NODE_INDEX_INVALID != fei)
233 if (fib_entry_get_resolving_interface (fei) != ~0)
235 s->out2in.fib_index = outside_fib->fib_index;
242 init_nat_o2i_kv (&kv, s, thread_index, s - tnm->sessions);
243 if (clib_bihash_add_del_8_8 (&nm->out2in, &kv, 1))
244 nat_elog_warn (nm, "out2in key add failed");
246 s->in2out.addr.as_u32 = in_addr->as_u32;
247 s->in2out.port = in_port;
248 s->in2out.fib_index = fib_index;
249 init_nat_i2o_kv (&kv, s, thread_index, s - tnm->sessions);
250 if (clib_bihash_add_del_8_8 (&nm->in2out, &kv, 1))
251 nat_elog_warn (nm, "in2out key add failed");
254 static_always_inline void
255 nat44_ei_ha_sdel (ip4_address_t *out_addr, u16 out_port,
256 ip4_address_t *eh_addr, u16 eh_port, u8 proto, u32 fib_index,
259 nat44_ei_main_t *nm = &nat44_ei_main;
260 clib_bihash_kv_8_8_t kv, value;
262 nat44_ei_session_t *s;
263 nat44_ei_main_per_thread_data_t *tnm;
265 if (nm->num_workers > 1)
266 thread_index = nm->first_worker_index +
267 (nm->workers[(clib_net_to_host_u16 (out_port) - 1024) /
268 nm->port_per_thread]);
270 thread_index = nm->num_workers;
271 tnm = vec_elt_at_index (nm->per_thread_data, thread_index);
273 init_nat_k (&kv, *out_addr, out_port, fib_index, proto);
274 if (clib_bihash_search_8_8 (&nm->out2in, &kv, &value))
277 s = pool_elt_at_index (tnm->sessions, nat_value_get_session_index (&value));
278 nat44_ei_free_session_data_v2 (nm, s, thread_index, 1);
279 nat44_ei_delete_session (nm, s, thread_index);
282 static_always_inline void
283 nat44_ei_ha_sref (ip4_address_t *out_addr, u16 out_port,
284 ip4_address_t *eh_addr, u16 eh_port, u8 proto, u32 fib_index,
285 u32 total_pkts, u64 total_bytes, u32 thread_index)
287 nat44_ei_main_t *nm = &nat44_ei_main;
288 clib_bihash_kv_8_8_t kv, value;
289 nat44_ei_session_t *s;
290 nat44_ei_main_per_thread_data_t *tnm;
292 tnm = vec_elt_at_index (nm->per_thread_data, thread_index);
294 init_nat_k (&kv, *out_addr, out_port, fib_index, proto);
295 if (clib_bihash_search_8_8 (&nm->out2in, &kv, &value))
298 s = pool_elt_at_index (tnm->sessions, nat_value_get_session_index (&value));
299 s->total_pkts = total_pkts;
300 s->total_bytes = total_bytes;
304 nat_ha_resync_fin (void)
306 nat44_ei_main_t *nm = &nat44_ei_main;
307 nat_ha_main_t *ha = &nat_ha_main;
309 /* if no more resync ACK remainig we are done */
310 if (ha->resync_ack_count)
314 if (ha->resync_ack_missed)
316 nat_elog_info (nm, "resync completed with result FAILED");
320 nat_elog_info (nm, "resync completed with result SUCCESS");
322 if (ha->event_callback)
323 ha->event_callback (ha->client_index, ha->pid, ha->resync_ack_missed);
326 /* cache HA NAT data waiting for ACK */
328 nat_ha_resend_queue_add (vlib_main_t *vm, u32 seq, u8 *data, u8 data_len,
329 u8 is_resync, u32 vlib_thread_index)
331 nat_ha_main_t *ha = &nat_ha_main;
332 nat_ha_per_thread_data_t *td = &ha->per_thread_data[vlib_thread_index];
333 nat_ha_resend_entry_t *entry;
334 f64 now = vlib_time_now (vm);
336 vec_add2 (td->resend_queue, entry, 1);
337 clib_memset (entry, 0, sizeof (*entry));
338 entry->retry_timer = now + 2.0;
340 entry->is_resync = is_resync;
341 vec_add (entry->data, data, data_len);
346 static_always_inline void
347 nat_ha_ack_recv (u32 seq, u32 thread_index)
349 nat44_ei_main_t *nm = &nat44_ei_main;
350 nat_ha_main_t *ha = &nat_ha_main;
351 nat_ha_per_thread_data_t *td = &ha->per_thread_data[thread_index];
354 vec_foreach_index (i, td->resend_queue)
356 if (td->resend_queue[i].seq != seq)
359 vlib_increment_simple_counter (&ha->counters[NAT_HA_COUNTER_RECV_ACK],
361 /* ACK received remove cached data */
362 if (td->resend_queue[i].is_resync)
364 clib_atomic_fetch_sub (&ha->resync_ack_count, 1);
365 nat_ha_resync_fin ();
367 vec_free (td->resend_queue[i].data);
368 vec_del1 (td->resend_queue, i);
369 nat_elog_debug_X1 (nm, "ACK for seq %d received", "i4",
370 clib_net_to_host_u32 (seq));
376 /* scan non-ACKed HA NAT for retry */
378 nat_ha_resend_scan (vlib_main_t *vm, u32 thread_index)
380 nat44_ei_main_t *nm = &nat44_ei_main;
381 nat_ha_main_t *ha = &nat_ha_main;
382 nat_ha_per_thread_data_t *td = &ha->per_thread_data[thread_index];
383 u32 i, *del, *to_delete = 0;
384 vlib_buffer_t *b = 0;
388 f64 now = vlib_time_now (vm);
390 vec_foreach_index (i, td->resend_queue)
392 if (td->resend_queue[i].retry_timer > now)
395 /* maximum retry reached delete cached data */
396 if (td->resend_queue[i].retry_count >= NAT_HA_RETRIES)
398 nat_elog_notice_X1 (nm, "seq %d missed", "i4",
399 clib_net_to_host_u32 (td->resend_queue[i].seq));
400 if (td->resend_queue[i].is_resync)
402 clib_atomic_fetch_add (&ha->resync_ack_missed, 1);
403 clib_atomic_fetch_sub (&ha->resync_ack_count, 1);
404 nat_ha_resync_fin ();
406 vec_add1 (to_delete, i);
407 vlib_increment_simple_counter (&ha->counters
408 [NAT_HA_COUNTER_MISSED_COUNT],
413 /* retry to send non-ACKed data */
414 nat_elog_debug_X1 (nm, "state sync seq %d resend", "i4",
415 clib_net_to_host_u32 (td->resend_queue[i].seq));
416 td->resend_queue[i].retry_count++;
417 vlib_increment_simple_counter (&ha->counters[NAT_HA_COUNTER_RETRY_COUNT],
419 if (vlib_buffer_alloc (vm, &bi, 1) != 1)
421 nat_elog_warn (nm, "HA NAT state sync can't allocate buffer");
424 b = vlib_get_buffer (vm, bi);
425 b->current_length = vec_len (td->resend_queue[i].data);
426 b->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
427 b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
428 vnet_buffer (b)->sw_if_index[VLIB_RX] = 0;
429 vnet_buffer (b)->sw_if_index[VLIB_TX] = 0;
430 ip = vlib_buffer_get_current (b);
431 clib_memcpy (ip, td->resend_queue[i].data,
432 vec_len (td->resend_queue[i].data));
433 f = vlib_get_frame_to_node (vm, ip4_lookup_node.index);
434 to_next = vlib_frame_vector_args (f);
437 vlib_put_frame_to_node (vm, ip4_lookup_node.index, f);
438 td->resend_queue[i].retry_timer = now + 2.0;
441 vec_foreach (del, to_delete)
443 vec_free (td->resend_queue[*del].data);
444 vec_del1 (td->resend_queue, *del);
446 vec_free (to_delete);
452 nat_ha_main_t *ha = &nat_ha_main;
459 nat_ha_main_t *ha = &nat_ha_main;
465 nat_ha_set_node_indexes (nat_ha_main_t *ha, vlib_main_t *vm)
469 node = vlib_get_node_by_name (vm, (u8 *) "nat44-ei-ha-handoff");
470 ha->ha_handoff_node_index = node->index;
471 node = vlib_get_node_by_name (vm, (u8 *) "nat44-ei-ha-process");
472 ha->ha_process_node_index = node->index;
473 node = vlib_get_node_by_name (vm, (u8 *) "nat44-ei-ha-worker");
474 ha->ha_worker_node_index = node->index;
475 node = vlib_get_node_by_name (vm, (u8 *) "nat44-ei-ha");
476 ha->ha_node_index = node->index;
480 nat_ha_init (vlib_main_t * vm, u32 num_workers, u32 num_threads)
482 nat_ha_main_t *ha = &nat_ha_main;
483 clib_memset (ha, 0, sizeof (*ha));
485 nat_ha_set_node_indexes (ha, vm);
489 ha->num_workers = num_workers;
490 vec_validate (ha->per_thread_data, num_threads);
493 ha->counters[v].name = s; \
494 ha->counters[v].stat_segment_name = "/nat44-ei/ha/" s; \
495 vlib_validate_simple_counter (&ha->counters[v], 0); \
496 vlib_zero_simple_counter (&ha->counters[v], 0);
497 foreach_nat_ha_counter
502 nat_ha_set_listener (vlib_main_t *vm, ip4_address_t *addr, u16 port,
505 nat44_ei_main_t *nm = &nat44_ei_main;
506 nat_ha_main_t *ha = &nat_ha_main;
508 /* unregister previously set UDP port */
510 udp_unregister_dst_port (vm, ha->src_port, 1);
512 ha->src_ip_address.as_u32 = addr->as_u32;
514 ha->state_sync_path_mtu = path_mtu;
518 /* if multiple worker threads first go to handoff node */
519 if (ha->num_workers > 1)
521 if (ha->fq_index == ~0)
522 ha->fq_index = vlib_frame_queue_main_init (ha->ha_node_index, 0);
523 udp_register_dst_port (vm, port, ha->ha_handoff_node_index, 1);
527 udp_register_dst_port (vm, port, ha->ha_node_index, 1);
529 nat_elog_info_X1 (nm, "HA listening on port %d for state sync", "i4",
537 nat_ha_get_listener (ip4_address_t * addr, u16 * port, u32 * path_mtu)
539 nat_ha_main_t *ha = &nat_ha_main;
541 addr->as_u32 = ha->src_ip_address.as_u32;
542 *port = ha->src_port;
543 *path_mtu = ha->state_sync_path_mtu;
547 nat_ha_set_failover (vlib_main_t *vm, ip4_address_t *addr, u16 port,
548 u32 session_refresh_interval)
550 nat_ha_main_t *ha = &nat_ha_main;
552 ha->dst_ip_address.as_u32 = addr->as_u32;
554 ha->session_refresh_interval = session_refresh_interval;
556 vlib_process_signal_event (vm, ha->ha_process_node_index, 1, 0);
562 nat_ha_get_failover (ip4_address_t * addr, u16 * port,
563 u32 * session_refresh_interval)
565 nat_ha_main_t *ha = &nat_ha_main;
567 addr->as_u32 = ha->dst_ip_address.as_u32;
568 *port = ha->dst_port;
569 *session_refresh_interval = ha->session_refresh_interval;
572 static_always_inline void
573 nat_ha_recv_add (nat_ha_event_t * event, f64 now, u32 thread_index)
575 nat_ha_main_t *ha = &nat_ha_main;
576 ip4_address_t in_addr, out_addr, eh_addr, ehn_addr;
580 vlib_increment_simple_counter (&ha->counters[NAT_HA_COUNTER_RECV_ADD],
583 in_addr.as_u32 = event->in_addr;
584 out_addr.as_u32 = event->out_addr;
585 eh_addr.as_u32 = event->eh_addr;
586 ehn_addr.as_u32 = event->ehn_addr;
587 fib_index = clib_net_to_host_u32 (event->fib_index);
588 flags = clib_net_to_host_u16 (event->flags);
590 nat44_ei_ha_sadd (&in_addr, event->in_port, &out_addr, event->out_port,
591 &eh_addr, event->eh_port, &ehn_addr, event->ehn_port,
592 event->protocol, fib_index, flags, thread_index);
595 static_always_inline void
596 nat_ha_recv_del (nat_ha_event_t * event, u32 thread_index)
598 nat_ha_main_t *ha = &nat_ha_main;
599 ip4_address_t out_addr, eh_addr;
602 vlib_increment_simple_counter (&ha->counters[NAT_HA_COUNTER_RECV_DEL],
605 out_addr.as_u32 = event->out_addr;
606 eh_addr.as_u32 = event->eh_addr;
607 fib_index = clib_net_to_host_u32 (event->fib_index);
609 nat44_ei_ha_sdel (&out_addr, event->out_port, &eh_addr, event->eh_port,
610 event->protocol, fib_index, thread_index);
613 static_always_inline void
614 nat_ha_recv_refresh (nat_ha_event_t * event, f64 now, u32 thread_index)
616 nat_ha_main_t *ha = &nat_ha_main;
617 ip4_address_t out_addr, eh_addr;
618 u32 fib_index, total_pkts;
621 vlib_increment_simple_counter (&ha->counters[NAT_HA_COUNTER_RECV_REFRESH],
624 out_addr.as_u32 = event->out_addr;
625 eh_addr.as_u32 = event->eh_addr;
626 fib_index = clib_net_to_host_u32 (event->fib_index);
627 total_pkts = clib_net_to_host_u32 (event->total_pkts);
628 total_bytes = clib_net_to_host_u64 (event->total_bytes);
630 nat44_ei_ha_sref (&out_addr, event->out_port, &eh_addr, event->eh_port,
631 event->protocol, fib_index, total_pkts, total_bytes,
635 /* process received NAT HA event */
636 static_always_inline void
637 nat_ha_event_process (nat_ha_event_t * event, f64 now, u32 thread_index)
639 nat44_ei_main_t *nm = &nat44_ei_main;
640 switch (event->event_type)
643 nat_ha_recv_add (event, now, thread_index);
646 nat_ha_recv_del (event, thread_index);
649 nat_ha_recv_refresh (event, now, thread_index);
652 nat_elog_notice_X1 (nm, "Unsupported HA event type %d", "i4",
659 nat_ha_header_create (vlib_buffer_t * b, u32 * offset, u32 thread_index)
661 nat_ha_main_t *ha = &nat_ha_main;
662 nat_ha_message_header_t *h;
668 b->current_length = sizeof (*ip) + sizeof (*udp) + sizeof (*h);
669 b->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
670 b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
671 vnet_buffer (b)->sw_if_index[VLIB_RX] = 0;
672 vnet_buffer (b)->sw_if_index[VLIB_TX] = 0;
673 ip = vlib_buffer_get_current (b);
674 udp = (udp_header_t *) (ip + 1);
675 h = (nat_ha_message_header_t *) (udp + 1);
678 ip->ip_version_and_header_length = 0x45;
680 ip->protocol = IP_PROTOCOL_UDP;
681 ip->flags_and_fragment_offset =
682 clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT);
683 ip->src_address.as_u32 = ha->src_ip_address.as_u32;
684 ip->dst_address.as_u32 = ha->dst_ip_address.as_u32;
686 udp->src_port = clib_host_to_net_u16 (ha->src_port);
687 udp->dst_port = clib_host_to_net_u16 (ha->dst_port);
690 /* NAT HA protocol header */
691 h->version = NAT_HA_VERSION;
694 h->thread_index = clib_host_to_net_u32 (thread_index);
695 sequence_number = clib_atomic_fetch_add (&ha->sequence_number, 1);
696 h->sequence_number = clib_host_to_net_u32 (sequence_number);
699 sizeof (ip4_header_t) + sizeof (udp_header_t) +
700 sizeof (nat_ha_message_header_t);
704 nat_ha_send (vlib_frame_t *f, vlib_buffer_t *b, u8 is_resync,
705 u32 vlib_thread_index)
707 nat_ha_main_t *ha = &nat_ha_main;
708 nat_ha_per_thread_data_t *td = &ha->per_thread_data[vlib_thread_index];
709 nat_ha_message_header_t *h;
712 vlib_main_t *vm = vlib_get_main_by_index (vlib_thread_index);
714 ip = vlib_buffer_get_current (b);
715 udp = ip4_next_header (ip);
716 h = (nat_ha_message_header_t *) (udp + 1);
718 h->count = clib_host_to_net_u16 (td->state_sync_count);
720 ip->length = clib_host_to_net_u16 (b->current_length);
721 ip->checksum = ip4_header_checksum (ip);
722 udp->length = clib_host_to_net_u16 (b->current_length - sizeof (*ip));
724 nat_ha_resend_queue_add (vm, h->sequence_number, (u8 *) ip,
725 b->current_length, is_resync, vlib_thread_index);
727 vlib_put_frame_to_node (vm, ip4_lookup_node.index, f);
730 /* add NAT HA protocol event */
731 static_always_inline void
732 nat_ha_event_add (nat_ha_event_t *event, u8 do_flush, u32 session_thread_index,
735 nat44_ei_main_t *nm = &nat44_ei_main;
736 nat_ha_main_t *ha = &nat_ha_main;
737 u32 vlib_thread_index = vlib_get_thread_index ();
738 nat_ha_per_thread_data_t *td = &ha->per_thread_data[vlib_thread_index];
739 vlib_main_t *vm = vlib_get_main_by_index (vlib_thread_index);
740 vlib_buffer_t *b = 0;
744 b = td->state_sync_buffer;
746 if (PREDICT_FALSE (b == 0))
751 if (vlib_buffer_alloc (vm, &bi, 1) != 1)
753 nat_elog_warn (nm, "HA NAT state sync can't allocate buffer");
757 b = td->state_sync_buffer = vlib_get_buffer (vm, bi);
758 clib_memset (vnet_buffer (b), 0, sizeof (*vnet_buffer (b)));
759 VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b);
764 bi = vlib_get_buffer_index (vm, b);
765 offset = td->state_sync_next_event_offset;
768 f = td->state_sync_frame;
769 if (PREDICT_FALSE (f == 0))
772 f = vlib_get_frame_to_node (vm, ip4_lookup_node.index);
773 td->state_sync_frame = f;
774 to_next = vlib_frame_vector_args (f);
779 if (PREDICT_FALSE (td->state_sync_count == 0))
780 nat_ha_header_create (b, &offset, session_thread_index);
782 if (PREDICT_TRUE (do_flush == 0))
784 clib_memcpy_fast (b->data + offset, event, sizeof (*event));
785 offset += sizeof (*event);
786 td->state_sync_count++;
787 b->current_length += sizeof (*event);
789 switch (event->event_type)
792 vlib_increment_simple_counter (
793 &ha->counters[NAT_HA_COUNTER_SEND_ADD], vlib_thread_index, 0, 1);
796 vlib_increment_simple_counter (
797 &ha->counters[NAT_HA_COUNTER_SEND_DEL], vlib_thread_index, 0, 1);
800 vlib_increment_simple_counter (
801 &ha->counters[NAT_HA_COUNTER_SEND_REFRESH], vlib_thread_index, 0,
810 (do_flush || offset + (sizeof (*event)) > ha->state_sync_path_mtu))
812 nat_ha_send (f, b, is_resync, vlib_thread_index);
813 td->state_sync_buffer = 0;
814 td->state_sync_frame = 0;
815 td->state_sync_count = 0;
819 clib_atomic_fetch_add (&ha->resync_ack_count, 1);
820 nat_ha_resync_fin ();
824 td->state_sync_next_event_offset = offset;
827 #define skip_if_disabled() \
829 nat_ha_main_t *ha = &nat_ha_main; \
830 if (PREDICT_TRUE (!ha->dst_port)) \
835 nat_ha_flush (u8 is_resync)
838 nat_ha_event_add (0, 1, 0, is_resync);
842 nat_ha_sadd (ip4_address_t * in_addr, u16 in_port, ip4_address_t * out_addr,
843 u16 out_port, ip4_address_t * eh_addr, u16 eh_port,
844 ip4_address_t * ehn_addr, u16 ehn_port, u8 proto, u32 fib_index,
845 u16 flags, u32 thread_index, u8 is_resync)
847 nat_ha_event_t event;
851 clib_memset (&event, 0, sizeof (event));
852 event.event_type = NAT_HA_ADD;
853 event.flags = clib_host_to_net_u16 (flags);
854 event.in_addr = in_addr->as_u32;
855 event.in_port = in_port;
856 event.out_addr = out_addr->as_u32;
857 event.out_port = out_port;
858 event.eh_addr = eh_addr->as_u32;
859 event.eh_port = eh_port;
860 event.ehn_addr = ehn_addr->as_u32;
861 event.ehn_port = ehn_port;
862 event.fib_index = clib_host_to_net_u32 (fib_index);
863 event.protocol = proto;
864 nat_ha_event_add (&event, 0, thread_index, is_resync);
868 nat_ha_sdel (ip4_address_t *out_addr, u16 out_port, ip4_address_t *eh_addr,
869 u16 eh_port, u8 proto, u32 fib_index, u32 session_thread_index)
871 nat_ha_event_t event;
875 clib_memset (&event, 0, sizeof (event));
876 event.event_type = NAT_HA_DEL;
877 event.out_addr = out_addr->as_u32;
878 event.out_port = out_port;
879 event.eh_addr = eh_addr->as_u32;
880 event.eh_port = eh_port;
881 event.fib_index = clib_host_to_net_u32 (fib_index);
882 event.protocol = proto;
883 nat_ha_event_add (&event, 0, session_thread_index, 0);
887 nat_ha_sref (ip4_address_t * out_addr, u16 out_port, ip4_address_t * eh_addr,
888 u16 eh_port, u8 proto, u32 fib_index, u32 total_pkts,
889 u64 total_bytes, u32 thread_index, f64 * last_refreshed, f64 now)
891 nat_ha_main_t *ha = &nat_ha_main;
892 nat_ha_event_t event;
896 if ((*last_refreshed + ha->session_refresh_interval) > now)
899 *last_refreshed = now;
900 clib_memset (&event, 0, sizeof (event));
901 event.event_type = NAT_HA_REFRESH;
902 event.out_addr = out_addr->as_u32;
903 event.out_port = out_port;
904 event.eh_addr = eh_addr->as_u32;
905 event.eh_port = eh_port;
906 event.fib_index = clib_host_to_net_u32 (fib_index);
907 event.protocol = proto;
908 event.total_pkts = clib_host_to_net_u32 (total_pkts);
909 event.total_bytes = clib_host_to_net_u64 (total_bytes);
910 nat_ha_event_add (&event, 0, thread_index, 0);
913 static_always_inline u8
916 nat_ha_main_t *ha = &nat_ha_main;
920 /* per thread process waiting for interrupt */
922 nat_ha_worker_fn (vlib_main_t * vm, vlib_node_runtime_t * rt,
925 u32 thread_index = vm->thread_index;
927 if (plugin_enabled () == 0)
930 /* flush HA NAT data under construction */
931 nat_ha_event_add (0, 1, thread_index, 0);
932 /* scan if we need to resend some non-ACKed data */
933 nat_ha_resend_scan (vm, thread_index);
938 VLIB_REGISTER_NODE (nat_ha_worker_node) = {
939 .function = nat_ha_worker_fn,
940 .type = VLIB_NODE_TYPE_INPUT,
941 .state = VLIB_NODE_STATE_INTERRUPT,
942 .name = "nat44-ei-ha-worker",
946 /* periodically send interrupt to each thread */
948 nat_ha_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
950 nat44_ei_main_t *nm = &nat44_ei_main;
951 nat_ha_main_t *ha = &nat_ha_main;
953 uword *event_data = 0;
956 vlib_process_wait_for_event (vm);
957 event_type = vlib_process_get_events (vm, &event_data);
959 nat_elog_info (nm, "nat44-ei-ha-process: bogus kickoff event received");
960 vec_reset_length (event_data);
964 vlib_process_wait_for_event_or_clock (vm, 1.0);
965 event_type = vlib_process_get_events (vm, &event_data);
966 vec_reset_length (event_data);
967 for (ti = 0; ti < vlib_get_n_threads (); ti++)
969 if (ti >= vec_len (ha->per_thread_data))
972 vlib_node_set_interrupt_pending (vlib_get_main_by_index (ti),
973 nat_ha_worker_node.index);
981 VLIB_REGISTER_NODE (nat_ha_process_node) = {
982 .function = nat_ha_process,
983 .type = VLIB_NODE_TYPE_PROCESS,
984 .name = "nat44-ei-ha-process",
989 nat_ha_get_resync_status (u8 * in_resync, u32 * resync_ack_missed)
991 nat_ha_main_t *ha = &nat_ha_main;
993 *in_resync = ha->in_resync;
994 *resync_ack_missed = ha->resync_ack_missed;
1004 format_nat_ha_trace (u8 * s, va_list * args)
1006 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1007 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1008 nat_ha_trace_t *t = va_arg (*args, nat_ha_trace_t *);
1010 s = format (s, "nat44-ei-ha: %u events from %U", t->event_count,
1011 format_ip4_address, &t->addr);
1018 NAT_HA_NEXT_IP4_LOOKUP,
1023 #define foreach_nat_ha_error \
1024 _(PROCESSED, "pkts-processed") \
1025 _(BAD_VERSION, "bad-version")
1029 #define _(sym, str) NAT_HA_ERROR_##sym,
1030 foreach_nat_ha_error
1035 static char *nat_ha_error_strings[] = {
1036 #define _(sym, str) str,
1037 foreach_nat_ha_error
1041 /* process received HA NAT protocol messages */
1043 nat_ha_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
1044 vlib_frame_t * frame)
1046 u32 n_left_from, *from, next_index, *to_next;
1047 f64 now = vlib_time_now (vm);
1048 u32 thread_index = vm->thread_index;
1049 u32 pkts_processed = 0;
1050 ip4_main_t *i4m = &ip4_main;
1051 u8 host_config_ttl = i4m->host_config.ttl;
1052 nat_ha_main_t *ha = &nat_ha_main;
1054 from = vlib_frame_vector_args (frame);
1055 n_left_from = frame->n_vectors;
1056 next_index = node->cached_next_index;
1058 while (n_left_from > 0)
1062 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1064 while (n_left_from > 0 && n_left_to_next > 0)
1066 u32 bi0, next0, src_addr0, dst_addr0;;
1068 nat_ha_message_header_t *h0;
1070 u16 event_count0, src_port0, dst_port0, old_len0;
1080 n_left_to_next -= 1;
1082 b0 = vlib_get_buffer (vm, bi0);
1083 h0 = vlib_buffer_get_current (b0);
1084 vlib_buffer_advance (b0, -sizeof (*udp0));
1085 udp0 = vlib_buffer_get_current (b0);
1086 vlib_buffer_advance (b0, -sizeof (*ip0));
1087 ip0 = vlib_buffer_get_current (b0);
1089 next0 = NAT_HA_NEXT_DROP;
1091 if (h0->version != NAT_HA_VERSION)
1093 b0->error = node->errors[NAT_HA_ERROR_BAD_VERSION];
1097 event_count0 = clib_net_to_host_u16 (h0->count);
1098 /* ACK for previously send data */
1099 if (!event_count0 && (h0->flags & NAT_HA_FLAG_ACK))
1101 nat_ha_ack_recv (h0->sequence_number, thread_index);
1102 b0->error = node->errors[NAT_HA_ERROR_PROCESSED];
1106 e0 = (nat_ha_event_t *) (h0 + 1);
1108 /* process each event */
1109 while (event_count0)
1111 nat_ha_event_process (e0, now, thread_index);
1113 e0 = (nat_ha_event_t *) ((u8 *) e0 + sizeof (nat_ha_event_t));
1116 next0 = NAT_HA_NEXT_IP4_LOOKUP;
1119 /* reply with ACK */
1120 b0->current_length = sizeof (*ip0) + sizeof (*udp0) + sizeof (*h0);
1122 src_addr0 = ip0->src_address.data_u32;
1123 dst_addr0 = ip0->dst_address.data_u32;
1124 ip0->src_address.data_u32 = dst_addr0;
1125 ip0->dst_address.data_u32 = src_addr0;
1126 old_len0 = ip0->length;
1127 ip0->length = clib_host_to_net_u16 (b0->current_length);
1129 sum0 = ip0->checksum;
1130 sum0 = ip_csum_update (sum0, ip0->ttl, host_config_ttl,
1132 ip0->ttl = host_config_ttl;
1134 ip_csum_update (sum0, old_len0, ip0->length, ip4_header_t,
1136 ip0->checksum = ip_csum_fold (sum0);
1139 src_port0 = udp0->src_port;
1140 dst_port0 = udp0->dst_port;
1141 udp0->src_port = dst_port0;
1142 udp0->dst_port = src_port0;
1144 clib_host_to_net_u16 (b0->current_length - sizeof (*ip0));
1146 h0->flags = NAT_HA_FLAG_ACK;
1148 vlib_increment_simple_counter (&ha->counters
1149 [NAT_HA_COUNTER_SEND_ACK],
1150 thread_index, 0, 1);
1153 if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1154 && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1156 nat_ha_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
1158 (void *) (b0->data + vnet_buffer (b0)->l3_hdr_offset);
1159 t->event_count = clib_net_to_host_u16 (h0->count);
1160 t->addr.as_u32 = ip->src_address.data_u32;
1163 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1164 to_next, n_left_to_next,
1168 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1171 vlib_node_increment_counter (vm, ha->ha_node_index, NAT_HA_ERROR_PROCESSED,
1174 return frame->n_vectors;
1178 VLIB_REGISTER_NODE (nat_ha_node) = {
1179 .function = nat_ha_node_fn,
1180 .name = "nat44-ei-ha",
1181 .vector_size = sizeof (u32),
1182 .format_trace = format_nat_ha_trace,
1183 .type = VLIB_NODE_TYPE_INTERNAL,
1184 .n_errors = ARRAY_LEN (nat_ha_error_strings),
1185 .error_strings = nat_ha_error_strings,
1186 .n_next_nodes = NAT_HA_N_NEXT,
1188 [NAT_HA_NEXT_IP4_LOOKUP] = "ip4-lookup",
1189 [NAT_HA_NEXT_DROP] = "error-drop",
1196 u32 next_worker_index;
1198 } nat_ha_handoff_trace_t;
1200 #define foreach_nat_ha_handoff_error \
1201 _(CONGESTION_DROP, "congestion drop") \
1202 _(SAME_WORKER, "same worker") \
1203 _(DO_HANDOFF, "do handoff")
1207 #define _(sym,str) NAT_HA_HANDOFF_ERROR_##sym,
1208 foreach_nat_ha_handoff_error
1210 NAT_HA_HANDOFF_N_ERROR,
1211 } nat_ha_handoff_error_t;
1213 static char *nat_ha_handoff_error_strings[] = {
1214 #define _(sym,string) string,
1215 foreach_nat_ha_handoff_error
1220 format_nat_ha_handoff_trace (u8 * s, va_list * args)
1222 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1223 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1224 nat_ha_handoff_trace_t *t = va_arg (*args, nat_ha_handoff_trace_t *);
1227 format (s, "NAT_HA_WORKER_HANDOFF: next-worker %d", t->next_worker_index);
1232 /* do worker handoff based on thread_index in NAT HA protcol header */
1234 nat_ha_handoff_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
1235 vlib_frame_t * frame)
1237 nat_ha_main_t *ha = &nat_ha_main;
1238 vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1239 u32 n_enq, n_left_from, *from;
1240 u16 thread_indices[VLIB_FRAME_SIZE], *ti;
1241 u32 thread_index = vm->thread_index;
1242 u32 do_handoff = 0, same_worker = 0;
1244 from = vlib_frame_vector_args (frame);
1245 n_left_from = frame->n_vectors;
1246 vlib_get_buffers (vm, from, bufs, n_left_from);
1249 ti = thread_indices;
1251 while (n_left_from > 0)
1253 nat_ha_message_header_t *h0;
1255 h0 = vlib_buffer_get_current (b[0]);
1256 ti[0] = clib_net_to_host_u32 (h0->thread_index);
1258 if (ti[0] != thread_index)
1263 if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1264 && (b[0]->flags & VLIB_BUFFER_IS_TRACED)))
1266 nat_ha_handoff_trace_t *t =
1267 vlib_add_trace (vm, node, b[0], sizeof (*t));
1268 t->next_worker_index = ti[0];
1277 vlib_buffer_enqueue_to_thread (vm, ha->fq_index, from, thread_indices,
1278 frame->n_vectors, 1);
1280 if (n_enq < frame->n_vectors)
1281 vlib_node_increment_counter (vm, node->node_index,
1282 NAT_HA_HANDOFF_ERROR_CONGESTION_DROP,
1283 frame->n_vectors - n_enq);
1284 vlib_node_increment_counter (vm, node->node_index,
1285 NAT_HA_HANDOFF_ERROR_SAME_WORKER, same_worker);
1286 vlib_node_increment_counter (vm, node->node_index,
1287 NAT_HA_HANDOFF_ERROR_DO_HANDOFF, do_handoff);
1288 return frame->n_vectors;
1292 nat_ha_resync (u32 client_index, u32 pid,
1293 nat_ha_resync_event_cb_t event_callback)
1299 VLIB_REGISTER_NODE (nat_ha_handoff_node) = {
1300 .function = nat_ha_handoff_node_fn,
1301 .name = "nat44-ei-ha-handoff",
1302 .vector_size = sizeof (u32),
1303 .format_trace = format_nat_ha_handoff_trace,
1304 .type = VLIB_NODE_TYPE_INTERNAL,
1305 .n_errors = ARRAY_LEN(nat_ha_handoff_error_strings),
1306 .error_strings = nat_ha_handoff_error_strings,
1315 * fd.io coding-style-patch-verification: ON
1318 * eval: (c-set-style "gnu")