2 * Copyright (c) 2019 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 //#include <vnet/fib/fib_source.h>
17 #include <vnet/fib/fib_table.h>
18 #include <vnet/udp/udp_local.h>
19 #include <vppinfra/atomics.h>
21 #include <nat/lib/log.h>
23 #include <nat/nat44-ei/nat44_ei.h>
24 #include <nat/nat44-ei/nat44_ei_ha.h>
25 #include <nat/nat44-ei/nat44_ei_inlines.h>
27 /* number of retries */
28 #define NAT_HA_RETRIES 3
30 #define foreach_nat_ha_counter \
31 _(RECV_ADD, "add-event-recv", 0) \
32 _(RECV_DEL, "del-event-recv", 1) \
33 _(RECV_REFRESH, "refresh-event-recv", 2) \
34 _(SEND_ADD, "add-event-send", 3) \
35 _(SEND_DEL, "del-event-send", 4) \
36 _(SEND_REFRESH, "refresh-event-send", 5) \
37 _(RECV_ACK, "ack-recv", 6) \
38 _(SEND_ACK, "ack-send", 7) \
39 _(RETRY_COUNT, "retry-count", 8) \
40 _(MISSED_COUNT, "missed-count", 9)
42 /* NAT HA protocol version */
43 #define NAT_HA_VERSION 0x01
45 /* NAT HA protocol flags */
46 #define NAT_HA_FLAG_ACK 0x01
48 /* NAT HA event types */
54 } nat_ha_event_type_t;
56 /* NAT HA protocol header */
67 /* thread index where events originated */
69 } __attribute__ ((packed)) nat_ha_message_header_t;
71 /* NAT HA protocol event data */
90 } __attribute__ ((packed)) nat_ha_event_t;
94 #define _(N, s, v) NAT_HA_COUNTER_##N = v,
95 foreach_nat_ha_counter
100 /* data waiting for ACK */
103 /* sequence number */
107 /* next retry time */
113 } nat_ha_resend_entry_t;
115 /* per thread data */
118 /* buffer under construction */
119 vlib_buffer_t *state_sync_buffer;
120 /* frame containing NAT HA buffers */
121 vlib_frame_t *state_sync_frame;
122 /* number of events */
123 u16 state_sync_count;
124 /* next event offset */
125 u32 state_sync_next_event_offset;
126 /* data waiting for ACK */
127 nat_ha_resend_entry_t *resend_queue;
128 } nat_ha_per_thread_data_t;
130 /* NAT HA settings */
131 typedef struct nat_ha_main_s
134 /* local IP address and UDP port */
135 ip4_address_t src_ip_address;
137 /* failvoer IP address and UDP port */
138 ip4_address_t dst_ip_address;
140 /* path MTU between local and failover */
141 u32 state_sync_path_mtu;
142 /* number of seconds after which to send session counters refresh */
143 u32 session_refresh_interval;
145 vlib_simple_counter_main_t counters[NAT_HA_N_COUNTERS];
146 /* sequence number counter */
148 /* 1 if resync in progress */
150 /* number of remaing ACK for resync */
151 u32 resync_ack_count;
152 /* number of missed ACK for resync */
153 u32 resync_ack_missed;
155 nat_ha_resync_event_cb_t event_callback;
158 /* per thread data */
160 nat_ha_per_thread_data_t *per_thread_data;
162 u32 ha_handoff_node_index;
163 u32 ha_process_node_index;
164 u32 ha_worker_node_index;
167 /* worker handoff frame-queue index */
171 nat_ha_main_t nat_ha_main;
173 static_always_inline void
174 nat44_ei_ha_sadd (ip4_address_t *in_addr, u16 in_port, ip4_address_t *out_addr,
175 u16 out_port, ip4_address_t *eh_addr, u16 eh_port,
176 ip4_address_t *ehn_addr, u16 ehn_port, u8 proto,
177 u32 fib_index, u16 flags, u32 thread_index)
179 nat44_ei_main_t *nm = &nat44_ei_main;
180 nat44_ei_main_per_thread_data_t *tnm = &nm->per_thread_data[thread_index];
182 nat44_ei_session_t *s;
183 clib_bihash_kv_8_8_t kv;
184 vlib_main_t *vm = vlib_get_main ();
185 f64 now = vlib_time_now (vm);
186 nat44_ei_outside_fib_t *outside_fib;
187 fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
189 .fp_proto = FIB_PROTOCOL_IP4,
192 .ip4.as_u32 = eh_addr->as_u32,
196 if (!(flags & NAT44_EI_SESSION_FLAG_STATIC_MAPPING))
198 if (nat44_ei_set_outside_address_and_port (nm->addresses, thread_index,
199 *out_addr, out_port, proto))
203 u = nat44_ei_user_get_or_create (nm, in_addr, fib_index, thread_index);
207 s = nat44_ei_session_alloc_or_recycle (nm, u, thread_index, now);
211 s->out2in.addr.as_u32 = out_addr->as_u32;
212 s->out2in.port = out_port;
213 s->nat_proto = proto;
216 s->ext_host_addr.as_u32 = eh_addr->as_u32;
217 s->ext_host_port = eh_port;
218 nat44_ei_user_session_increment (nm, u, nat44_ei_is_session_static (s));
219 switch (vec_len (nm->outside_fibs))
222 s->out2in.fib_index = nm->outside_fib_index;
225 s->out2in.fib_index = nm->outside_fibs[0].fib_index;
228 vec_foreach (outside_fib, nm->outside_fibs)
230 fei = fib_table_lookup (outside_fib->fib_index, &pfx);
231 if (FIB_NODE_INDEX_INVALID != fei)
233 if (fib_entry_get_resolving_interface (fei) != ~0)
235 s->out2in.fib_index = outside_fib->fib_index;
242 init_nat_o2i_kv (&kv, s, thread_index, s - tnm->sessions);
243 if (clib_bihash_add_del_8_8 (&nm->out2in, &kv, 1))
244 nat_elog_warn (nm, "out2in key add failed");
246 s->in2out.addr.as_u32 = in_addr->as_u32;
247 s->in2out.port = in_port;
248 s->in2out.fib_index = fib_index;
249 init_nat_i2o_kv (&kv, s, thread_index, s - tnm->sessions);
250 if (clib_bihash_add_del_8_8 (&nm->in2out, &kv, 1))
251 nat_elog_warn (nm, "in2out key add failed");
254 static_always_inline void
255 nat44_ei_ha_sdel (ip4_address_t *out_addr, u16 out_port,
256 ip4_address_t *eh_addr, u16 eh_port, u8 proto, u32 fib_index,
259 nat44_ei_main_t *nm = &nat44_ei_main;
260 clib_bihash_kv_8_8_t kv, value;
261 nat44_ei_session_t *s;
262 nat44_ei_main_per_thread_data_t *tnm;
264 init_nat_k (&kv, *out_addr, out_port, fib_index, proto);
265 if (clib_bihash_search_8_8 (&nm->out2in, &kv, &value))
268 ASSERT (thread_index == nat_value_get_thread_index (&value));
269 tnm = vec_elt_at_index (nm->per_thread_data, thread_index);
270 s = pool_elt_at_index (tnm->sessions, nat_value_get_session_index (&value));
271 nat44_ei_free_session_data_v2 (nm, s, thread_index, 1);
272 nat44_ei_delete_session (nm, s, thread_index);
275 static_always_inline void
276 nat44_ei_ha_sref (ip4_address_t *out_addr, u16 out_port,
277 ip4_address_t *eh_addr, u16 eh_port, u8 proto, u32 fib_index,
278 u32 total_pkts, u64 total_bytes, u32 thread_index)
280 nat44_ei_main_t *nm = &nat44_ei_main;
281 clib_bihash_kv_8_8_t kv, value;
282 nat44_ei_session_t *s;
283 nat44_ei_main_per_thread_data_t *tnm;
285 tnm = vec_elt_at_index (nm->per_thread_data, thread_index);
287 init_nat_k (&kv, *out_addr, out_port, fib_index, proto);
288 if (clib_bihash_search_8_8 (&nm->out2in, &kv, &value))
291 s = pool_elt_at_index (tnm->sessions, nat_value_get_session_index (&value));
292 s->total_pkts = total_pkts;
293 s->total_bytes = total_bytes;
297 nat_ha_resync_fin (void)
299 nat44_ei_main_t *nm = &nat44_ei_main;
300 nat_ha_main_t *ha = &nat_ha_main;
302 /* if no more resync ACK remainig we are done */
303 if (ha->resync_ack_count)
307 if (ha->resync_ack_missed)
309 nat_elog_info (nm, "resync completed with result FAILED");
313 nat_elog_info (nm, "resync completed with result SUCCESS");
315 if (ha->event_callback)
316 ha->event_callback (ha->client_index, ha->pid, ha->resync_ack_missed);
319 /* cache HA NAT data waiting for ACK */
321 nat_ha_resend_queue_add (vlib_main_t *vm, u32 seq, u8 *data, u8 data_len,
322 u8 is_resync, u32 vlib_thread_index)
324 nat_ha_main_t *ha = &nat_ha_main;
325 nat_ha_per_thread_data_t *td = &ha->per_thread_data[vlib_thread_index];
326 nat_ha_resend_entry_t *entry;
327 f64 now = vlib_time_now (vm);
329 vec_add2 (td->resend_queue, entry, 1);
330 clib_memset (entry, 0, sizeof (*entry));
331 entry->retry_timer = now + 2.0;
333 entry->is_resync = is_resync;
334 vec_add (entry->data, data, data_len);
339 static_always_inline void
340 nat_ha_ack_recv (u32 seq, u32 thread_index)
342 nat44_ei_main_t *nm = &nat44_ei_main;
343 nat_ha_main_t *ha = &nat_ha_main;
344 nat_ha_per_thread_data_t *td = &ha->per_thread_data[thread_index];
347 vec_foreach_index (i, td->resend_queue)
349 if (td->resend_queue[i].seq != seq)
352 vlib_increment_simple_counter (&ha->counters[NAT_HA_COUNTER_RECV_ACK],
354 /* ACK received remove cached data */
355 if (td->resend_queue[i].is_resync)
357 clib_atomic_fetch_sub (&ha->resync_ack_count, 1);
358 nat_ha_resync_fin ();
360 vec_free (td->resend_queue[i].data);
361 vec_del1 (td->resend_queue, i);
362 nat_elog_debug_X1 (nm, "ACK for seq %d received", "i4",
363 clib_net_to_host_u32 (seq));
369 /* scan non-ACKed HA NAT for retry */
371 nat_ha_resend_scan (vlib_main_t *vm, u32 thread_index)
373 nat44_ei_main_t *nm = &nat44_ei_main;
374 nat_ha_main_t *ha = &nat_ha_main;
375 nat_ha_per_thread_data_t *td = &ha->per_thread_data[thread_index];
376 u32 i, *del, *to_delete = 0;
377 vlib_buffer_t *b = 0;
381 f64 now = vlib_time_now (vm);
383 vec_foreach_index (i, td->resend_queue)
385 if (td->resend_queue[i].retry_timer > now)
388 /* maximum retry reached delete cached data */
389 if (td->resend_queue[i].retry_count >= NAT_HA_RETRIES)
391 nat_elog_notice_X1 (nm, "seq %d missed", "i4",
392 clib_net_to_host_u32 (td->resend_queue[i].seq));
393 if (td->resend_queue[i].is_resync)
395 clib_atomic_fetch_add (&ha->resync_ack_missed, 1);
396 clib_atomic_fetch_sub (&ha->resync_ack_count, 1);
397 nat_ha_resync_fin ();
399 vec_add1 (to_delete, i);
400 vlib_increment_simple_counter (&ha->counters
401 [NAT_HA_COUNTER_MISSED_COUNT],
406 /* retry to send non-ACKed data */
407 nat_elog_debug_X1 (nm, "state sync seq %d resend", "i4",
408 clib_net_to_host_u32 (td->resend_queue[i].seq));
409 td->resend_queue[i].retry_count++;
410 vlib_increment_simple_counter (&ha->counters[NAT_HA_COUNTER_RETRY_COUNT],
412 if (vlib_buffer_alloc (vm, &bi, 1) != 1)
414 nat_elog_warn (nm, "HA NAT state sync can't allocate buffer");
417 b = vlib_get_buffer (vm, bi);
418 b->current_length = vec_len (td->resend_queue[i].data);
419 b->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
420 b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
421 vnet_buffer (b)->sw_if_index[VLIB_RX] = 0;
422 vnet_buffer (b)->sw_if_index[VLIB_TX] = 0;
423 ip = vlib_buffer_get_current (b);
424 clib_memcpy (ip, td->resend_queue[i].data,
425 vec_len (td->resend_queue[i].data));
426 f = vlib_get_frame_to_node (vm, ip4_lookup_node.index);
427 to_next = vlib_frame_vector_args (f);
430 vlib_put_frame_to_node (vm, ip4_lookup_node.index, f);
431 td->resend_queue[i].retry_timer = now + 2.0;
434 vec_foreach (del, to_delete)
436 vec_free (td->resend_queue[*del].data);
437 vec_del1 (td->resend_queue, *del);
439 vec_free (to_delete);
445 nat_ha_main_t *ha = &nat_ha_main;
452 nat_ha_main_t *ha = &nat_ha_main;
458 nat_ha_set_node_indexes (nat_ha_main_t *ha, vlib_main_t *vm)
462 node = vlib_get_node_by_name (vm, (u8 *) "nat44-ei-ha-handoff");
463 ha->ha_handoff_node_index = node->index;
464 node = vlib_get_node_by_name (vm, (u8 *) "nat44-ei-ha-process");
465 ha->ha_process_node_index = node->index;
466 node = vlib_get_node_by_name (vm, (u8 *) "nat44-ei-ha-worker");
467 ha->ha_worker_node_index = node->index;
468 node = vlib_get_node_by_name (vm, (u8 *) "nat44-ei-ha");
469 ha->ha_node_index = node->index;
473 nat_ha_init (vlib_main_t * vm, u32 num_workers, u32 num_threads)
475 nat_ha_main_t *ha = &nat_ha_main;
476 clib_memset (ha, 0, sizeof (*ha));
478 nat_ha_set_node_indexes (ha, vm);
482 ha->num_workers = num_workers;
483 vec_validate (ha->per_thread_data, num_threads);
486 ha->counters[v].name = s; \
487 ha->counters[v].stat_segment_name = "/nat44-ei/ha/" s; \
488 vlib_validate_simple_counter (&ha->counters[v], 0); \
489 vlib_zero_simple_counter (&ha->counters[v], 0);
490 foreach_nat_ha_counter
495 nat_ha_set_listener (vlib_main_t *vm, ip4_address_t *addr, u16 port,
498 nat44_ei_main_t *nm = &nat44_ei_main;
499 nat_ha_main_t *ha = &nat_ha_main;
501 /* unregister previously set UDP port */
503 udp_unregister_dst_port (vm, ha->src_port, 1);
505 ha->src_ip_address.as_u32 = addr->as_u32;
507 ha->state_sync_path_mtu = path_mtu;
511 /* if multiple worker threads first go to handoff node */
512 if (ha->num_workers > 1)
514 if (ha->fq_index == ~0)
515 ha->fq_index = vlib_frame_queue_main_init (ha->ha_node_index, 0);
516 udp_register_dst_port (vm, port, ha->ha_handoff_node_index, 1);
520 udp_register_dst_port (vm, port, ha->ha_node_index, 1);
522 nat_elog_info_X1 (nm, "HA listening on port %d for state sync", "i4",
530 nat_ha_get_listener (ip4_address_t * addr, u16 * port, u32 * path_mtu)
532 nat_ha_main_t *ha = &nat_ha_main;
534 addr->as_u32 = ha->src_ip_address.as_u32;
535 *port = ha->src_port;
536 *path_mtu = ha->state_sync_path_mtu;
540 nat_ha_set_failover (vlib_main_t *vm, ip4_address_t *addr, u16 port,
541 u32 session_refresh_interval)
543 nat_ha_main_t *ha = &nat_ha_main;
545 ha->dst_ip_address.as_u32 = addr->as_u32;
547 ha->session_refresh_interval = session_refresh_interval;
549 vlib_process_signal_event (vm, ha->ha_process_node_index, 1, 0);
555 nat_ha_get_failover (ip4_address_t * addr, u16 * port,
556 u32 * session_refresh_interval)
558 nat_ha_main_t *ha = &nat_ha_main;
560 addr->as_u32 = ha->dst_ip_address.as_u32;
561 *port = ha->dst_port;
562 *session_refresh_interval = ha->session_refresh_interval;
565 static_always_inline void
566 nat_ha_recv_add (nat_ha_event_t * event, f64 now, u32 thread_index)
568 nat_ha_main_t *ha = &nat_ha_main;
569 ip4_address_t in_addr, out_addr, eh_addr, ehn_addr;
573 vlib_increment_simple_counter (&ha->counters[NAT_HA_COUNTER_RECV_ADD],
576 in_addr.as_u32 = event->in_addr;
577 out_addr.as_u32 = event->out_addr;
578 eh_addr.as_u32 = event->eh_addr;
579 ehn_addr.as_u32 = event->ehn_addr;
580 fib_index = clib_net_to_host_u32 (event->fib_index);
581 flags = clib_net_to_host_u16 (event->flags);
583 nat44_ei_ha_sadd (&in_addr, event->in_port, &out_addr, event->out_port,
584 &eh_addr, event->eh_port, &ehn_addr, event->ehn_port,
585 event->protocol, fib_index, flags, thread_index);
588 static_always_inline void
589 nat_ha_recv_del (nat_ha_event_t * event, u32 thread_index)
591 nat_ha_main_t *ha = &nat_ha_main;
592 ip4_address_t out_addr, eh_addr;
595 vlib_increment_simple_counter (&ha->counters[NAT_HA_COUNTER_RECV_DEL],
598 out_addr.as_u32 = event->out_addr;
599 eh_addr.as_u32 = event->eh_addr;
600 fib_index = clib_net_to_host_u32 (event->fib_index);
602 nat44_ei_ha_sdel (&out_addr, event->out_port, &eh_addr, event->eh_port,
603 event->protocol, fib_index, thread_index);
606 static_always_inline void
607 nat_ha_recv_refresh (nat_ha_event_t * event, f64 now, u32 thread_index)
609 nat_ha_main_t *ha = &nat_ha_main;
610 ip4_address_t out_addr, eh_addr;
611 u32 fib_index, total_pkts;
614 vlib_increment_simple_counter (&ha->counters[NAT_HA_COUNTER_RECV_REFRESH],
617 out_addr.as_u32 = event->out_addr;
618 eh_addr.as_u32 = event->eh_addr;
619 fib_index = clib_net_to_host_u32 (event->fib_index);
620 total_pkts = clib_net_to_host_u32 (event->total_pkts);
621 total_bytes = clib_net_to_host_u64 (event->total_bytes);
623 nat44_ei_ha_sref (&out_addr, event->out_port, &eh_addr, event->eh_port,
624 event->protocol, fib_index, total_pkts, total_bytes,
628 /* process received NAT HA event */
629 static_always_inline void
630 nat_ha_event_process (nat_ha_event_t * event, f64 now, u32 thread_index)
632 nat44_ei_main_t *nm = &nat44_ei_main;
633 switch (event->event_type)
636 nat_ha_recv_add (event, now, thread_index);
639 nat_ha_recv_del (event, thread_index);
642 nat_ha_recv_refresh (event, now, thread_index);
645 nat_elog_notice_X1 (nm, "Unsupported HA event type %d", "i4",
652 nat_ha_header_create (vlib_buffer_t * b, u32 * offset, u32 thread_index)
654 nat_ha_main_t *ha = &nat_ha_main;
655 nat_ha_message_header_t *h;
661 b->current_length = sizeof (*ip) + sizeof (*udp) + sizeof (*h);
662 b->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
663 b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
664 vnet_buffer (b)->sw_if_index[VLIB_RX] = 0;
665 vnet_buffer (b)->sw_if_index[VLIB_TX] = 0;
666 ip = vlib_buffer_get_current (b);
667 udp = (udp_header_t *) (ip + 1);
668 h = (nat_ha_message_header_t *) (udp + 1);
671 ip->ip_version_and_header_length = 0x45;
673 ip->protocol = IP_PROTOCOL_UDP;
674 ip->flags_and_fragment_offset =
675 clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT);
676 ip->src_address.as_u32 = ha->src_ip_address.as_u32;
677 ip->dst_address.as_u32 = ha->dst_ip_address.as_u32;
679 udp->src_port = clib_host_to_net_u16 (ha->src_port);
680 udp->dst_port = clib_host_to_net_u16 (ha->dst_port);
683 /* NAT HA protocol header */
684 h->version = NAT_HA_VERSION;
687 h->thread_index = clib_host_to_net_u32 (thread_index);
688 sequence_number = clib_atomic_fetch_add (&ha->sequence_number, 1);
689 h->sequence_number = clib_host_to_net_u32 (sequence_number);
692 sizeof (ip4_header_t) + sizeof (udp_header_t) +
693 sizeof (nat_ha_message_header_t);
697 nat_ha_send (vlib_frame_t *f, vlib_buffer_t *b, u8 is_resync,
698 u32 vlib_thread_index)
700 nat_ha_main_t *ha = &nat_ha_main;
701 nat_ha_per_thread_data_t *td = &ha->per_thread_data[vlib_thread_index];
702 nat_ha_message_header_t *h;
705 vlib_main_t *vm = vlib_get_main_by_index (vlib_thread_index);
707 ip = vlib_buffer_get_current (b);
708 udp = ip4_next_header (ip);
709 h = (nat_ha_message_header_t *) (udp + 1);
711 h->count = clib_host_to_net_u16 (td->state_sync_count);
713 ip->length = clib_host_to_net_u16 (b->current_length);
714 ip->checksum = ip4_header_checksum (ip);
715 udp->length = clib_host_to_net_u16 (b->current_length - sizeof (*ip));
717 nat_ha_resend_queue_add (vm, h->sequence_number, (u8 *) ip,
718 b->current_length, is_resync, vlib_thread_index);
720 vlib_put_frame_to_node (vm, ip4_lookup_node.index, f);
723 /* add NAT HA protocol event */
724 static_always_inline void
725 nat_ha_event_add (nat_ha_event_t *event, u8 do_flush, u32 session_thread_index,
728 nat44_ei_main_t *nm = &nat44_ei_main;
729 nat_ha_main_t *ha = &nat_ha_main;
730 u32 vlib_thread_index = vlib_get_thread_index ();
731 nat_ha_per_thread_data_t *td = &ha->per_thread_data[vlib_thread_index];
732 vlib_main_t *vm = vlib_get_main_by_index (vlib_thread_index);
733 vlib_buffer_t *b = 0;
737 b = td->state_sync_buffer;
739 if (PREDICT_FALSE (b == 0))
744 if (vlib_buffer_alloc (vm, &bi, 1) != 1)
746 nat_elog_warn (nm, "HA NAT state sync can't allocate buffer");
750 b = td->state_sync_buffer = vlib_get_buffer (vm, bi);
751 clib_memset (vnet_buffer (b), 0, sizeof (*vnet_buffer (b)));
756 bi = vlib_get_buffer_index (vm, b);
757 offset = td->state_sync_next_event_offset;
760 f = td->state_sync_frame;
761 if (PREDICT_FALSE (f == 0))
764 f = vlib_get_frame_to_node (vm, ip4_lookup_node.index);
765 td->state_sync_frame = f;
766 to_next = vlib_frame_vector_args (f);
771 if (PREDICT_FALSE (td->state_sync_count == 0))
772 nat_ha_header_create (b, &offset, session_thread_index);
774 if (PREDICT_TRUE (do_flush == 0))
776 clib_memcpy_fast (b->data + offset, event, sizeof (*event));
777 offset += sizeof (*event);
778 td->state_sync_count++;
779 b->current_length += sizeof (*event);
781 switch (event->event_type)
784 vlib_increment_simple_counter (
785 &ha->counters[NAT_HA_COUNTER_SEND_ADD], vlib_thread_index, 0, 1);
788 vlib_increment_simple_counter (
789 &ha->counters[NAT_HA_COUNTER_SEND_DEL], vlib_thread_index, 0, 1);
792 vlib_increment_simple_counter (
793 &ha->counters[NAT_HA_COUNTER_SEND_REFRESH], vlib_thread_index, 0,
802 (do_flush || offset + (sizeof (*event)) > ha->state_sync_path_mtu))
804 nat_ha_send (f, b, is_resync, vlib_thread_index);
805 td->state_sync_buffer = 0;
806 td->state_sync_frame = 0;
807 td->state_sync_count = 0;
811 clib_atomic_fetch_add (&ha->resync_ack_count, 1);
812 nat_ha_resync_fin ();
816 td->state_sync_next_event_offset = offset;
819 #define skip_if_disabled() \
821 nat_ha_main_t *ha = &nat_ha_main; \
822 if (PREDICT_TRUE (!ha->dst_port)) \
827 nat_ha_flush (u8 is_resync)
830 nat_ha_event_add (0, 1, 0, is_resync);
834 nat_ha_sadd (ip4_address_t * in_addr, u16 in_port, ip4_address_t * out_addr,
835 u16 out_port, ip4_address_t * eh_addr, u16 eh_port,
836 ip4_address_t * ehn_addr, u16 ehn_port, u8 proto, u32 fib_index,
837 u16 flags, u32 thread_index, u8 is_resync)
839 nat_ha_event_t event;
843 clib_memset (&event, 0, sizeof (event));
844 event.event_type = NAT_HA_ADD;
845 event.flags = clib_host_to_net_u16 (flags);
846 event.in_addr = in_addr->as_u32;
847 event.in_port = in_port;
848 event.out_addr = out_addr->as_u32;
849 event.out_port = out_port;
850 event.eh_addr = eh_addr->as_u32;
851 event.eh_port = eh_port;
852 event.ehn_addr = ehn_addr->as_u32;
853 event.ehn_port = ehn_port;
854 event.fib_index = clib_host_to_net_u32 (fib_index);
855 event.protocol = proto;
856 nat_ha_event_add (&event, 0, thread_index, is_resync);
860 nat_ha_sdel (ip4_address_t *out_addr, u16 out_port, ip4_address_t *eh_addr,
861 u16 eh_port, u8 proto, u32 fib_index, u32 session_thread_index)
863 nat_ha_event_t event;
867 clib_memset (&event, 0, sizeof (event));
868 event.event_type = NAT_HA_DEL;
869 event.out_addr = out_addr->as_u32;
870 event.out_port = out_port;
871 event.eh_addr = eh_addr->as_u32;
872 event.eh_port = eh_port;
873 event.fib_index = clib_host_to_net_u32 (fib_index);
874 event.protocol = proto;
875 nat_ha_event_add (&event, 0, session_thread_index, 0);
879 nat_ha_sref (ip4_address_t * out_addr, u16 out_port, ip4_address_t * eh_addr,
880 u16 eh_port, u8 proto, u32 fib_index, u32 total_pkts,
881 u64 total_bytes, u32 thread_index, f64 * last_refreshed, f64 now)
883 nat_ha_main_t *ha = &nat_ha_main;
884 nat_ha_event_t event;
888 if ((*last_refreshed + ha->session_refresh_interval) > now)
891 *last_refreshed = now;
892 clib_memset (&event, 0, sizeof (event));
893 event.event_type = NAT_HA_REFRESH;
894 event.out_addr = out_addr->as_u32;
895 event.out_port = out_port;
896 event.eh_addr = eh_addr->as_u32;
897 event.eh_port = eh_port;
898 event.fib_index = clib_host_to_net_u32 (fib_index);
899 event.protocol = proto;
900 event.total_pkts = clib_host_to_net_u32 (total_pkts);
901 event.total_bytes = clib_host_to_net_u64 (total_bytes);
902 nat_ha_event_add (&event, 0, thread_index, 0);
905 static_always_inline u8
908 nat_ha_main_t *ha = &nat_ha_main;
912 /* per thread process waiting for interrupt */
914 nat_ha_worker_fn (vlib_main_t * vm, vlib_node_runtime_t * rt,
917 u32 thread_index = vm->thread_index;
919 if (plugin_enabled () == 0)
922 /* flush HA NAT data under construction */
923 nat_ha_event_add (0, 1, thread_index, 0);
924 /* scan if we need to resend some non-ACKed data */
925 nat_ha_resend_scan (vm, thread_index);
930 VLIB_REGISTER_NODE (nat_ha_worker_node) = {
931 .function = nat_ha_worker_fn,
932 .type = VLIB_NODE_TYPE_INPUT,
933 .state = VLIB_NODE_STATE_INTERRUPT,
934 .name = "nat44-ei-ha-worker",
938 /* periodically send interrupt to each thread */
940 nat_ha_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
942 nat44_ei_main_t *nm = &nat44_ei_main;
943 nat_ha_main_t *ha = &nat_ha_main;
945 uword *event_data = 0;
948 vlib_process_wait_for_event (vm);
949 event_type = vlib_process_get_events (vm, &event_data);
951 nat_elog_info (nm, "nat44-ei-ha-process: bogus kickoff event received");
952 vec_reset_length (event_data);
956 vlib_process_wait_for_event_or_clock (vm, 1.0);
957 event_type = vlib_process_get_events (vm, &event_data);
958 vec_reset_length (event_data);
959 for (ti = 0; ti < vlib_get_n_threads (); ti++)
961 if (ti >= vec_len (ha->per_thread_data))
964 vlib_node_set_interrupt_pending (vlib_get_main_by_index (ti),
965 nat_ha_worker_node.index);
973 VLIB_REGISTER_NODE (nat_ha_process_node) = {
974 .function = nat_ha_process,
975 .type = VLIB_NODE_TYPE_PROCESS,
976 .name = "nat44-ei-ha-process",
981 nat_ha_get_resync_status (u8 * in_resync, u32 * resync_ack_missed)
983 nat_ha_main_t *ha = &nat_ha_main;
985 *in_resync = ha->in_resync;
986 *resync_ack_missed = ha->resync_ack_missed;
996 format_nat_ha_trace (u8 * s, va_list * args)
998 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
999 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1000 nat_ha_trace_t *t = va_arg (*args, nat_ha_trace_t *);
1002 s = format (s, "nat44-ei-ha: %u events from %U", t->event_count,
1003 format_ip4_address, &t->addr);
1010 NAT_HA_NEXT_IP4_LOOKUP,
1015 #define foreach_nat_ha_error \
1016 _(PROCESSED, "pkts-processed") \
1017 _(BAD_VERSION, "bad-version")
1021 #define _(sym, str) NAT_HA_ERROR_##sym,
1022 foreach_nat_ha_error
1027 static char *nat_ha_error_strings[] = {
1028 #define _(sym, str) str,
1029 foreach_nat_ha_error
1033 /* process received HA NAT protocol messages */
1035 nat_ha_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
1036 vlib_frame_t * frame)
1038 u32 n_left_from, *from, next_index, *to_next;
1039 f64 now = vlib_time_now (vm);
1040 u32 thread_index = vm->thread_index;
1041 u32 pkts_processed = 0;
1042 ip4_main_t *i4m = &ip4_main;
1043 u8 host_config_ttl = i4m->host_config.ttl;
1044 nat_ha_main_t *ha = &nat_ha_main;
1046 from = vlib_frame_vector_args (frame);
1047 n_left_from = frame->n_vectors;
1048 next_index = node->cached_next_index;
1050 while (n_left_from > 0)
1054 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1056 while (n_left_from > 0 && n_left_to_next > 0)
1058 u32 bi0, next0, src_addr0, dst_addr0;;
1060 nat_ha_message_header_t *h0;
1062 u16 event_count0, src_port0, dst_port0, old_len0;
1072 n_left_to_next -= 1;
1074 b0 = vlib_get_buffer (vm, bi0);
1075 h0 = vlib_buffer_get_current (b0);
1076 vlib_buffer_advance (b0, -sizeof (*udp0));
1077 udp0 = vlib_buffer_get_current (b0);
1078 vlib_buffer_advance (b0, -sizeof (*ip0));
1079 ip0 = vlib_buffer_get_current (b0);
1081 next0 = NAT_HA_NEXT_DROP;
1083 if (h0->version != NAT_HA_VERSION)
1085 b0->error = node->errors[NAT_HA_ERROR_BAD_VERSION];
1089 event_count0 = clib_net_to_host_u16 (h0->count);
1090 /* ACK for previously send data */
1091 if (!event_count0 && (h0->flags & NAT_HA_FLAG_ACK))
1093 nat_ha_ack_recv (h0->sequence_number, thread_index);
1094 b0->error = node->errors[NAT_HA_ERROR_PROCESSED];
1098 e0 = (nat_ha_event_t *) (h0 + 1);
1100 /* process each event */
1101 while (event_count0)
1103 nat_ha_event_process (e0, now, thread_index);
1105 e0 = (nat_ha_event_t *) ((u8 *) e0 + sizeof (nat_ha_event_t));
1108 next0 = NAT_HA_NEXT_IP4_LOOKUP;
1111 /* reply with ACK */
1112 b0->current_length = sizeof (*ip0) + sizeof (*udp0) + sizeof (*h0);
1114 src_addr0 = ip0->src_address.data_u32;
1115 dst_addr0 = ip0->dst_address.data_u32;
1116 ip0->src_address.data_u32 = dst_addr0;
1117 ip0->dst_address.data_u32 = src_addr0;
1118 old_len0 = ip0->length;
1119 ip0->length = clib_host_to_net_u16 (b0->current_length);
1121 sum0 = ip0->checksum;
1122 sum0 = ip_csum_update (sum0, ip0->ttl, host_config_ttl,
1124 ip0->ttl = host_config_ttl;
1126 ip_csum_update (sum0, old_len0, ip0->length, ip4_header_t,
1128 ip0->checksum = ip_csum_fold (sum0);
1131 src_port0 = udp0->src_port;
1132 dst_port0 = udp0->dst_port;
1133 udp0->src_port = dst_port0;
1134 udp0->dst_port = src_port0;
1136 clib_host_to_net_u16 (b0->current_length - sizeof (*ip0));
1138 h0->flags = NAT_HA_FLAG_ACK;
1140 vlib_increment_simple_counter (&ha->counters
1141 [NAT_HA_COUNTER_SEND_ACK],
1142 thread_index, 0, 1);
1145 if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1146 && (b0->flags & VLIB_BUFFER_IS_TRACED)))
1148 nat_ha_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
1150 (void *) (b0->data + vnet_buffer (b0)->l3_hdr_offset);
1151 t->event_count = clib_net_to_host_u16 (h0->count);
1152 t->addr.as_u32 = ip->src_address.data_u32;
1155 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1156 to_next, n_left_to_next,
1160 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1163 vlib_node_increment_counter (vm, ha->ha_node_index, NAT_HA_ERROR_PROCESSED,
1166 return frame->n_vectors;
1170 VLIB_REGISTER_NODE (nat_ha_node) = {
1171 .function = nat_ha_node_fn,
1172 .name = "nat44-ei-ha",
1173 .vector_size = sizeof (u32),
1174 .format_trace = format_nat_ha_trace,
1175 .type = VLIB_NODE_TYPE_INTERNAL,
1176 .n_errors = ARRAY_LEN (nat_ha_error_strings),
1177 .error_strings = nat_ha_error_strings,
1178 .n_next_nodes = NAT_HA_N_NEXT,
1180 [NAT_HA_NEXT_IP4_LOOKUP] = "ip4-lookup",
1181 [NAT_HA_NEXT_DROP] = "error-drop",
1188 u32 next_worker_index;
1190 } nat_ha_handoff_trace_t;
1192 #define foreach_nat_ha_handoff_error \
1193 _(CONGESTION_DROP, "congestion drop") \
1194 _(SAME_WORKER, "same worker") \
1195 _(DO_HANDOFF, "do handoff")
1199 #define _(sym,str) NAT_HA_HANDOFF_ERROR_##sym,
1200 foreach_nat_ha_handoff_error
1202 NAT_HA_HANDOFF_N_ERROR,
1203 } nat_ha_handoff_error_t;
1205 static char *nat_ha_handoff_error_strings[] = {
1206 #define _(sym,string) string,
1207 foreach_nat_ha_handoff_error
1212 format_nat_ha_handoff_trace (u8 * s, va_list * args)
1214 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1215 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1216 nat_ha_handoff_trace_t *t = va_arg (*args, nat_ha_handoff_trace_t *);
1219 format (s, "NAT_HA_WORKER_HANDOFF: next-worker %d", t->next_worker_index);
1224 /* do worker handoff based on thread_index in NAT HA protcol header */
1226 nat_ha_handoff_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
1227 vlib_frame_t * frame)
1229 nat_ha_main_t *ha = &nat_ha_main;
1230 vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1231 u32 n_enq, n_left_from, *from;
1232 u16 thread_indices[VLIB_FRAME_SIZE], *ti;
1233 u32 thread_index = vm->thread_index;
1234 u32 do_handoff = 0, same_worker = 0;
1236 from = vlib_frame_vector_args (frame);
1237 n_left_from = frame->n_vectors;
1238 vlib_get_buffers (vm, from, bufs, n_left_from);
1241 ti = thread_indices;
1243 while (n_left_from > 0)
1245 nat_ha_message_header_t *h0;
1247 h0 = vlib_buffer_get_current (b[0]);
1248 ti[0] = clib_net_to_host_u32 (h0->thread_index);
1250 if (ti[0] != thread_index)
1255 if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
1256 && (b[0]->flags & VLIB_BUFFER_IS_TRACED)))
1258 nat_ha_handoff_trace_t *t =
1259 vlib_add_trace (vm, node, b[0], sizeof (*t));
1260 t->next_worker_index = ti[0];
1268 n_enq = vlib_buffer_enqueue_to_thread (vm, node, ha->fq_index, from,
1269 thread_indices, frame->n_vectors, 1);
1271 if (n_enq < frame->n_vectors)
1272 vlib_node_increment_counter (vm, node->node_index,
1273 NAT_HA_HANDOFF_ERROR_CONGESTION_DROP,
1274 frame->n_vectors - n_enq);
1275 vlib_node_increment_counter (vm, node->node_index,
1276 NAT_HA_HANDOFF_ERROR_SAME_WORKER, same_worker);
1277 vlib_node_increment_counter (vm, node->node_index,
1278 NAT_HA_HANDOFF_ERROR_DO_HANDOFF, do_handoff);
1279 return frame->n_vectors;
1283 nat_ha_resync (u32 client_index, u32 pid,
1284 nat_ha_resync_event_cb_t event_callback)
1290 VLIB_REGISTER_NODE (nat_ha_handoff_node) = {
1291 .function = nat_ha_handoff_node_fn,
1292 .name = "nat44-ei-ha-handoff",
1293 .vector_size = sizeof (u32),
1294 .format_trace = format_nat_ha_handoff_trace,
1295 .type = VLIB_NODE_TYPE_INTERNAL,
1296 .n_errors = ARRAY_LEN(nat_ha_handoff_error_strings),
1297 .error_strings = nat_ha_handoff_error_strings,
1306 * fd.io coding-style-patch-verification: ON
1309 * eval: (c-set-style "gnu")