2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
17 #include <vnet/ip/ip_frag.h>
18 #include <vnet/ip/ip4_to_ip6.h>
19 #include <vnet/ip/ip6_to_ip4.h>
20 #include <vnet/ip/reass/ip4_sv_reass.h>
24 IP6_MAP_NEXT_IP4_LOOKUP,
25 #ifdef MAP_SKIP_IP6_LOOKUP
26 IP6_MAP_NEXT_IP4_REWRITE,
28 IP6_MAP_NEXT_IP6_REASS,
29 IP6_MAP_NEXT_IP4_REASS,
30 IP6_MAP_NEXT_IP4_FRAGMENT,
31 IP6_MAP_NEXT_IP6_ICMP_RELAY,
32 IP6_MAP_NEXT_IP6_LOCAL,
38 enum ip6_map_ip6_reass_next_e
40 IP6_MAP_IP6_REASS_NEXT_IP6_MAP,
41 IP6_MAP_IP6_REASS_NEXT_DROP,
42 IP6_MAP_IP6_REASS_N_NEXT,
45 enum ip6_map_post_ip4_reass_next_e
47 IP6_MAP_POST_IP4_REASS_NEXT_IP4_LOOKUP,
48 IP6_MAP_POST_IP4_REASS_NEXT_IP4_FRAGMENT,
49 IP6_MAP_POST_IP4_REASS_NEXT_DROP,
50 IP6_MAP_POST_IP4_REASS_N_NEXT,
53 enum ip6_icmp_relay_next_e
55 IP6_ICMP_RELAY_NEXT_IP4_LOOKUP,
56 IP6_ICMP_RELAY_NEXT_DROP,
57 IP6_ICMP_RELAY_N_NEXT,
60 vlib_node_registration_t ip6_map_post_ip4_reass_node;
61 vlib_node_registration_t ip6_map_ip6_reass_node;
62 static vlib_node_registration_t ip6_map_icmp_relay_node;
69 } map_ip6_map_ip4_reass_trace_t;
72 format_ip6_map_post_ip4_reass_trace (u8 * s, va_list * args)
74 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
75 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
76 map_ip6_map_ip4_reass_trace_t *t =
77 va_arg (*args, map_ip6_map_ip4_reass_trace_t *);
78 return format (s, "MAP domain index: %d L4 port: %u Status: %s",
79 t->map_domain_index, clib_net_to_host_u16 (t->port),
80 t->cached ? "cached" : "forwarded");
88 } map_ip6_map_ip6_reass_trace_t;
91 format_ip6_map_ip6_reass_trace (u8 * s, va_list * args)
93 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
94 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
95 map_ip6_map_ip6_reass_trace_t *t =
96 va_arg (*args, map_ip6_map_ip6_reass_trace_t *);
97 return format (s, "Offset: %d Fragment length: %d Status: %s", t->offset,
98 t->frag_len, t->out ? "out" : "in");
104 static_always_inline bool
105 ip6_map_sec_check (map_domain_t * d, u16 port, ip4_header_t * ip4,
108 u16 sp4 = clib_net_to_host_u16 (port);
109 u32 sa4 = clib_net_to_host_u32 (ip4->src_address.as_u32);
110 u64 sal6 = map_get_pfx (d, sa4, sp4);
111 u64 sar6 = map_get_sfx (d, sa4, sp4);
114 (sal6 != clib_net_to_host_u64 (ip6->src_address.as_u64[0])
115 || sar6 != clib_net_to_host_u64 (ip6->src_address.as_u64[1])))
120 static_always_inline void
121 ip6_map_security_check (map_domain_t * d, vlib_buffer_t * b0,
122 ip4_header_t * ip4, ip6_header_t * ip6, u32 * next,
125 map_main_t *mm = &map_main;
126 if (d->ea_bits_len || d->rules)
128 if (d->psid_length > 0)
130 if (!ip4_is_fragment (ip4))
132 u16 port = ip4_get_port (ip4, 1);
137 ip6_map_sec_check (d, port, ip4,
138 ip6) ? MAP_ERROR_NONE :
139 MAP_ERROR_DECAP_SEC_CHECK;
143 *error = MAP_ERROR_BAD_PROTOCOL;
148 if (mm->sec_check_frag)
150 vnet_buffer (b0)->ip.reass.next_index =
151 map_main.ip4_sv_reass_custom_next_index;
152 *next = IP6_MAP_NEXT_IP4_REASS;
159 static_always_inline bool
160 ip6_map_ip4_lookup_bypass (vlib_buffer_t * p0, ip4_header_t * ip)
162 #ifdef MAP_SKIP_IP6_LOOKUP
163 if (FIB_NODE_INDEX_INVALID != pre_resolved[FIB_PROTOCOL_IP4].fei)
165 vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
166 pre_resolved[FIB_PROTOCOL_IP4].dpo.dpoi_index;
177 ip6_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
179 u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
180 vlib_node_runtime_t *error_node =
181 vlib_node_get_runtime (vm, ip6_map_node.index);
182 map_main_t *mm = &map_main;
183 vlib_combined_counter_main_t *cm = mm->domain_counters;
184 u32 thread_index = vm->thread_index;
186 from = vlib_frame_vector_args (frame);
187 n_left_from = frame->n_vectors;
188 next_index = node->cached_next_index;
189 while (n_left_from > 0)
191 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
194 while (n_left_from >= 4 && n_left_to_next >= 2)
197 vlib_buffer_t *p0, *p1;
198 u8 error0 = MAP_ERROR_NONE;
199 u8 error1 = MAP_ERROR_NONE;
200 map_domain_t *d0 = 0, *d1 = 0;
201 ip4_header_t *ip40, *ip41;
202 ip6_header_t *ip60, *ip61;
203 u16 port0 = 0, port1 = 0;
204 u32 map_domain_index0 = ~0, map_domain_index1 = ~0;
205 u32 next0 = IP6_MAP_NEXT_IP4_LOOKUP;
206 u32 next1 = IP6_MAP_NEXT_IP4_LOOKUP;
208 /* Prefetch next iteration. */
210 vlib_buffer_t *p2, *p3;
212 p2 = vlib_get_buffer (vm, from[2]);
213 p3 = vlib_get_buffer (vm, from[3]);
215 vlib_prefetch_buffer_header (p2, LOAD);
216 vlib_prefetch_buffer_header (p3, LOAD);
218 /* IPv6 + IPv4 header + 8 bytes of ULP */
219 CLIB_PREFETCH (p2->data, 68, LOAD);
220 CLIB_PREFETCH (p3->data, 68, LOAD);
223 pi0 = to_next[0] = from[0];
224 pi1 = to_next[1] = from[1];
230 p0 = vlib_get_buffer (vm, pi0);
231 p1 = vlib_get_buffer (vm, pi1);
232 ip60 = vlib_buffer_get_current (p0);
233 ip61 = vlib_buffer_get_current (p1);
234 vlib_buffer_advance (p0, sizeof (ip6_header_t));
235 vlib_buffer_advance (p1, sizeof (ip6_header_t));
236 ip40 = vlib_buffer_get_current (p0);
237 ip41 = vlib_buffer_get_current (p1);
240 * Encapsulated IPv4 packet
241 * - IPv4 fragmented -> Pass to virtual reassembly unless security check disabled
242 * - Lookup/Rewrite or Fragment node in case of packet > MTU
243 * Fragmented IPv6 packet
245 * - Error -> Pass to ICMPv6/ICMPv4 relay
246 * - Info -> Pass to IPv6 local
247 * Anything else -> drop
250 (ip60->protocol == IP_PROTOCOL_IP_IN_IP
251 && clib_net_to_host_u16 (ip60->payload_length) > 20))
254 ip4_map_get_domain ((ip4_address_t *) & ip40->
255 src_address.as_u32, &map_domain_index0,
258 else if (ip60->protocol == IP_PROTOCOL_ICMP6 &&
259 clib_net_to_host_u16 (ip60->payload_length) >
260 sizeof (icmp46_header_t))
262 icmp46_header_t *icmp = (void *) (ip60 + 1);
263 next0 = (icmp->type == ICMP6_echo_request
265 ICMP6_echo_reply) ? IP6_MAP_NEXT_IP6_LOCAL :
266 IP6_MAP_NEXT_IP6_ICMP_RELAY;
268 else if (ip60->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION)
270 next0 = IP6_MAP_NEXT_IP6_REASS;
274 error0 = MAP_ERROR_BAD_PROTOCOL;
277 (ip61->protocol == IP_PROTOCOL_IP_IN_IP
278 && clib_net_to_host_u16 (ip61->payload_length) > 20))
281 ip4_map_get_domain ((ip4_address_t *) & ip41->
282 src_address.as_u32, &map_domain_index1,
285 else if (ip61->protocol == IP_PROTOCOL_ICMP6 &&
286 clib_net_to_host_u16 (ip61->payload_length) >
287 sizeof (icmp46_header_t))
289 icmp46_header_t *icmp = (void *) (ip61 + 1);
290 next1 = (icmp->type == ICMP6_echo_request
292 ICMP6_echo_reply) ? IP6_MAP_NEXT_IP6_LOCAL :
293 IP6_MAP_NEXT_IP6_ICMP_RELAY;
295 else if (ip61->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION)
297 next1 = IP6_MAP_NEXT_IP6_REASS;
301 error1 = MAP_ERROR_BAD_PROTOCOL;
306 /* MAP inbound security check */
307 ip6_map_security_check (d0, p0, ip40, ip60, &next0, &error0);
309 if (PREDICT_TRUE (error0 == MAP_ERROR_NONE &&
310 next0 == IP6_MAP_NEXT_IP4_LOOKUP))
314 && (clib_host_to_net_u16 (ip40->length) > d0->mtu)))
316 vnet_buffer (p0)->ip_frag.flags = 0;
317 vnet_buffer (p0)->ip_frag.next_index =
318 IP4_FRAG_NEXT_IP4_LOOKUP;
319 vnet_buffer (p0)->ip_frag.mtu = d0->mtu;
320 next0 = IP6_MAP_NEXT_IP4_FRAGMENT;
325 ip6_map_ip4_lookup_bypass (p0,
327 IP6_MAP_NEXT_IP4_REWRITE : next0;
329 vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
331 map_domain_index0, 1,
338 /* MAP inbound security check */
339 ip6_map_security_check (d1, p1, ip41, ip61, &next1, &error1);
341 if (PREDICT_TRUE (error1 == MAP_ERROR_NONE &&
342 next1 == IP6_MAP_NEXT_IP4_LOOKUP))
346 && (clib_host_to_net_u16 (ip41->length) > d1->mtu)))
348 vnet_buffer (p1)->ip_frag.flags = 0;
349 vnet_buffer (p1)->ip_frag.next_index =
350 IP4_FRAG_NEXT_IP4_LOOKUP;
351 vnet_buffer (p1)->ip_frag.mtu = d1->mtu;
352 next1 = IP6_MAP_NEXT_IP4_FRAGMENT;
357 ip6_map_ip4_lookup_bypass (p1,
359 IP6_MAP_NEXT_IP4_REWRITE : next1;
361 vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
363 map_domain_index1, 1,
369 if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
371 map_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr));
372 tr->map_domain_index = map_domain_index0;
376 if (PREDICT_FALSE (p1->flags & VLIB_BUFFER_IS_TRACED))
378 map_trace_t *tr = vlib_add_trace (vm, node, p1, sizeof (*tr));
379 tr->map_domain_index = map_domain_index1;
383 if (error0 == MAP_ERROR_DECAP_SEC_CHECK && mm->icmp6_enabled)
385 /* Set ICMP parameters */
386 vlib_buffer_advance (p0, -sizeof (ip6_header_t));
387 icmp6_error_set_vnet_buffer (p0, ICMP6_destination_unreachable,
388 ICMP6_destination_unreachable_source_address_failed_policy,
390 next0 = IP6_MAP_NEXT_ICMP;
394 next0 = (error0 == MAP_ERROR_NONE) ? next0 : IP6_MAP_NEXT_DROP;
397 if (error1 == MAP_ERROR_DECAP_SEC_CHECK && mm->icmp6_enabled)
399 /* Set ICMP parameters */
400 vlib_buffer_advance (p1, -sizeof (ip6_header_t));
401 icmp6_error_set_vnet_buffer (p1, ICMP6_destination_unreachable,
402 ICMP6_destination_unreachable_source_address_failed_policy,
404 next1 = IP6_MAP_NEXT_ICMP;
408 next1 = (error1 == MAP_ERROR_NONE) ? next1 : IP6_MAP_NEXT_DROP;
412 if (next0 == IP6_MAP_NEXT_IP6_LOCAL)
413 vlib_buffer_advance (p0, -sizeof (ip6_header_t));
414 if (next1 == IP6_MAP_NEXT_IP6_LOCAL)
415 vlib_buffer_advance (p1, -sizeof (ip6_header_t));
417 p0->error = error_node->errors[error0];
418 p1->error = error_node->errors[error1];
419 vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
420 n_left_to_next, pi0, pi1, next0,
425 while (n_left_from > 0 && n_left_to_next > 0)
429 u8 error0 = MAP_ERROR_NONE;
430 map_domain_t *d0 = 0;
434 u32 map_domain_index0 = ~0;
435 u32 next0 = IP6_MAP_NEXT_IP4_LOOKUP;
437 pi0 = to_next[0] = from[0];
443 p0 = vlib_get_buffer (vm, pi0);
444 ip60 = vlib_buffer_get_current (p0);
445 vlib_buffer_advance (p0, sizeof (ip6_header_t));
446 ip40 = vlib_buffer_get_current (p0);
449 * Encapsulated IPv4 packet
450 * - IPv4 fragmented -> Pass to virtual reassembly unless security check disabled
451 * - Lookup/Rewrite or Fragment node in case of packet > MTU
452 * Fragmented IPv6 packet
454 * - Error -> Pass to ICMPv6/ICMPv4 relay
455 * - Info -> Pass to IPv6 local
456 * Anything else -> drop
459 (ip60->protocol == IP_PROTOCOL_IP_IN_IP
460 && clib_net_to_host_u16 (ip60->payload_length) > 20))
463 ip4_map_get_domain ((ip4_address_t *) & ip40->
464 src_address.as_u32, &map_domain_index0,
467 else if (ip60->protocol == IP_PROTOCOL_ICMP6 &&
468 clib_net_to_host_u16 (ip60->payload_length) >
469 sizeof (icmp46_header_t))
471 icmp46_header_t *icmp = (void *) (ip60 + 1);
472 next0 = (icmp->type == ICMP6_echo_request
474 ICMP6_echo_reply) ? IP6_MAP_NEXT_IP6_LOCAL :
475 IP6_MAP_NEXT_IP6_ICMP_RELAY;
477 else if (ip60->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION &&
478 (((ip6_frag_hdr_t *) (ip60 + 1))->next_hdr ==
479 IP_PROTOCOL_IP_IN_IP))
481 next0 = IP6_MAP_NEXT_IP6_REASS;
485 /* XXX: Move get_domain to ip6_get_domain lookup on source */
486 //error0 = MAP_ERROR_BAD_PROTOCOL;
487 vlib_buffer_advance (p0, -sizeof (ip6_header_t));
488 vnet_feature_next (&next0, p0);
493 /* MAP inbound security check */
494 ip6_map_security_check (d0, p0, ip40, ip60, &next0, &error0);
496 if (PREDICT_TRUE (error0 == MAP_ERROR_NONE &&
497 next0 == IP6_MAP_NEXT_IP4_LOOKUP))
501 && (clib_host_to_net_u16 (ip40->length) > d0->mtu)))
503 vnet_buffer (p0)->ip_frag.flags = 0;
504 vnet_buffer (p0)->ip_frag.next_index =
505 IP4_FRAG_NEXT_IP4_LOOKUP;
506 vnet_buffer (p0)->ip_frag.mtu = d0->mtu;
507 next0 = IP6_MAP_NEXT_IP4_FRAGMENT;
512 ip6_map_ip4_lookup_bypass (p0,
514 IP6_MAP_NEXT_IP4_REWRITE : next0;
516 vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
518 map_domain_index0, 1,
524 if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
526 map_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr));
527 tr->map_domain_index = map_domain_index0;
528 tr->port = (u16) port0;
531 if (mm->icmp6_enabled &&
532 (error0 == MAP_ERROR_DECAP_SEC_CHECK
533 || error0 == MAP_ERROR_NO_DOMAIN))
535 /* Set ICMP parameters */
536 vlib_buffer_advance (p0, -sizeof (ip6_header_t));
537 icmp6_error_set_vnet_buffer (p0, ICMP6_destination_unreachable,
538 ICMP6_destination_unreachable_source_address_failed_policy,
540 next0 = IP6_MAP_NEXT_ICMP;
544 next0 = (error0 == MAP_ERROR_NONE) ? next0 : IP6_MAP_NEXT_DROP;
548 if (next0 == IP6_MAP_NEXT_IP6_LOCAL)
549 vlib_buffer_advance (p0, -sizeof (ip6_header_t));
551 p0->error = error_node->errors[error0];
552 vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
553 n_left_to_next, pi0, next0);
555 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
558 return frame->n_vectors;
562 static_always_inline void
563 ip6_map_ip6_reass_prepare (vlib_main_t * vm, vlib_node_runtime_t * node,
564 map_ip6_reass_t * r, u32 ** fragments_ready,
565 u32 ** fragments_to_drop)
569 ip6_frag_hdr_t *frag0;
572 if (!r->ip4_header.ip_version_and_header_length)
575 //The IP header is here, we need to check for packets
576 //that can be forwarded
578 for (i = 0; i < MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++)
580 if (r->fragments[i].pi == ~0 ||
581 ((!r->fragments[i].next_data_len)
582 && (r->fragments[i].next_data_offset != (0xffff))))
585 p0 = vlib_get_buffer (vm, r->fragments[i].pi);
586 ip60 = vlib_buffer_get_current (p0);
587 frag0 = (ip6_frag_hdr_t *) (ip60 + 1);
588 ip40 = (ip4_header_t *) (frag0 + 1);
590 if (ip6_frag_hdr_offset (frag0))
592 //Not first fragment, add the IPv4 header
593 clib_memcpy_fast (ip40, &r->ip4_header, 20);
596 #ifdef MAP_IP6_REASS_COUNT_BYTES
598 clib_net_to_host_u16 (ip60->payload_length) - sizeof (*frag0);
601 if (ip6_frag_hdr_more (frag0))
603 //Not last fragment, we copy end of next
604 clib_memcpy_fast (u8_ptr_add (ip60, p0->current_length),
605 r->fragments[i].next_data, 20);
606 p0->current_length += 20;
607 ip60->payload_length = u16_net_add (ip60->payload_length, 20);
610 if (!ip4_is_fragment (ip40))
612 ip40->fragment_id = frag_id_6to4 (frag0->identification);
613 ip40->flags_and_fragment_offset =
614 clib_host_to_net_u16 (ip6_frag_hdr_offset (frag0));
618 ip40->flags_and_fragment_offset =
619 clib_host_to_net_u16 (ip4_get_fragment_offset (ip40) +
620 ip6_frag_hdr_offset (frag0));
623 if (ip6_frag_hdr_more (frag0))
624 ip40->flags_and_fragment_offset |=
625 clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS);
628 clib_host_to_net_u16 (p0->current_length - sizeof (*ip60) -
630 ip40->checksum = ip4_header_checksum (ip40);
632 if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
634 map_ip6_map_ip6_reass_trace_t *tr =
635 vlib_add_trace (vm, node, p0, sizeof (*tr));
636 tr->offset = ip4_get_fragment_offset (ip40);
637 tr->frag_len = clib_net_to_host_u16 (ip40->length) - sizeof (*ip40);
641 vec_add1 (*fragments_ready, r->fragments[i].pi);
642 r->fragments[i].pi = ~0;
643 r->fragments[i].next_data_len = 0;
644 r->fragments[i].next_data_offset = 0;
645 map_main.ip6_reass_buffered_counter--;
647 //TODO: Best solution would be that ip6_map handles extension headers
648 // and ignores atomic fragment. But in the meantime, let's just copy the header.
650 u8 protocol = frag0->next_hdr;
651 memmove (u8_ptr_add (ip40, -sizeof (*ip60)), ip60, sizeof (*ip60));
652 ((ip6_header_t *) u8_ptr_add (ip40, -sizeof (*ip60)))->protocol =
654 vlib_buffer_advance (p0, sizeof (*frag0));
659 map_ip6_drop_pi (u32 pi)
661 vlib_main_t *vm = vlib_get_main ();
662 vlib_node_runtime_t *n =
663 vlib_node_get_runtime (vm, ip6_map_ip6_reass_node.index);
664 vlib_set_next_frame_buffer (vm, n, IP6_MAP_IP6_REASS_NEXT_DROP, pi);
669 * TODO: We should count the number of successfully
670 * transmitted fragment bytes and compare that to the last fragment
671 * offset such that we can free the reassembly structure when all fragments
672 * have been forwarded.
675 ip6_map_ip6_reass (vlib_main_t * vm,
676 vlib_node_runtime_t * node, vlib_frame_t * frame)
678 u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
679 vlib_node_runtime_t *error_node =
680 vlib_node_get_runtime (vm, ip6_map_ip6_reass_node.index);
681 u32 *fragments_to_drop = NULL;
682 u32 *fragments_ready = NULL;
684 from = vlib_frame_vector_args (frame);
685 n_left_from = frame->n_vectors;
686 next_index = node->cached_next_index;
687 while (n_left_from > 0)
689 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
692 while (n_left_from > 0 && n_left_to_next > 0)
696 u8 error0 = MAP_ERROR_NONE;
698 ip6_frag_hdr_t *frag0;
703 pi0 = to_next[0] = from[0];
709 p0 = vlib_get_buffer (vm, pi0);
710 ip60 = vlib_buffer_get_current (p0);
711 frag0 = (ip6_frag_hdr_t *) (ip60 + 1);
713 clib_host_to_net_u16 (frag0->fragment_offset_and_more) & (~7);
715 clib_net_to_host_u16 (ip60->payload_length) - sizeof (*frag0);
717 ip6_frag_hdr_more (frag0) ? (offset + frag_len) : (0xffff);
719 //FIXME: Support other extension headers, maybe
721 if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
723 map_ip6_map_ip6_reass_trace_t *tr =
724 vlib_add_trace (vm, node, p0, sizeof (*tr));
726 tr->frag_len = frag_len;
730 map_ip6_reass_lock ();
732 map_ip6_reass_get (&ip60->src_address, &ip60->dst_address,
733 frag0->identification, frag0->next_hdr,
735 //FIXME: Use better error codes
736 if (PREDICT_FALSE (!r))
738 // Could not create a caching entry
739 error0 = MAP_ERROR_FRAGMENT_MEMORY;
741 else if (PREDICT_FALSE ((frag_len <= 20 &&
742 (ip6_frag_hdr_more (frag0) || (!offset)))))
744 //Very small fragment are restricted to the last one and
745 //can't be the first one
746 error0 = MAP_ERROR_FRAGMENT_MALFORMED;
749 if (map_ip6_reass_add_fragment
750 (r, pi0, offset, next_offset, (u8 *) (frag0 + 1), frag_len))
752 map_ip6_reass_free (r, &fragments_to_drop);
753 error0 = MAP_ERROR_FRAGMENT_MEMORY;
757 #ifdef MAP_IP6_REASS_COUNT_BYTES
758 if (!ip6_frag_hdr_more (frag0))
759 r->expected_total = offset + frag_len;
761 ip6_map_ip6_reass_prepare (vm, node, r, &fragments_ready,
763 #ifdef MAP_IP6_REASS_COUNT_BYTES
764 if (r->forwarded >= r->expected_total)
765 map_ip6_reass_free (r, &fragments_to_drop);
768 map_ip6_reass_unlock ();
770 if (error0 == MAP_ERROR_NONE)
780 //All data from that packet was copied no need to keep it, but this is not an error
781 p0->error = error_node->errors[MAP_ERROR_NONE];
782 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
783 to_next, n_left_to_next,
785 IP6_MAP_IP6_REASS_NEXT_DROP);
790 p0->error = error_node->errors[error0];
791 vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
793 IP6_MAP_IP6_REASS_NEXT_DROP);
796 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
799 map_send_all_to_node (vm, fragments_ready, node,
800 &error_node->errors[MAP_ERROR_NONE],
801 IP6_MAP_IP6_REASS_NEXT_IP6_MAP);
802 map_send_all_to_node (vm, fragments_to_drop, node,
803 &error_node->errors[MAP_ERROR_FRAGMENT_DROPPED],
804 IP6_MAP_IP6_REASS_NEXT_DROP);
806 vec_free (fragments_to_drop);
807 vec_free (fragments_ready);
808 return frame->n_vectors;
812 * ip6_map_post_ip4_reass
815 ip6_map_post_ip4_reass (vlib_main_t * vm,
816 vlib_node_runtime_t * node, vlib_frame_t * frame)
818 u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
819 vlib_node_runtime_t *error_node =
820 vlib_node_get_runtime (vm, ip6_map_post_ip4_reass_node.index);
821 map_main_t *mm = &map_main;
822 vlib_combined_counter_main_t *cm = mm->domain_counters;
823 u32 thread_index = vm->thread_index;
825 from = vlib_frame_vector_args (frame);
826 n_left_from = frame->n_vectors;
827 next_index = node->cached_next_index;
828 while (n_left_from > 0)
830 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
833 while (n_left_from > 0 && n_left_to_next > 0)
837 u8 error0 = MAP_ERROR_NONE;
842 u32 map_domain_index0 = ~0;
843 u32 next0 = IP6_MAP_POST_IP4_REASS_NEXT_IP4_LOOKUP;
845 pi0 = to_next[0] = from[0];
851 p0 = vlib_get_buffer (vm, pi0);
852 ip40 = vlib_buffer_get_current (p0);
853 ip60 = ((ip6_header_t *) ip40) - 1;
856 ip4_map_get_domain ((ip4_address_t *) & ip40->src_address.as_u32,
857 &map_domain_index0, &error0);
859 port0 = vnet_buffer (p0)->ip.reass.l4_src_port;
861 if (PREDICT_TRUE (error0 == MAP_ERROR_NONE))
863 ip6_map_sec_check (d0, port0, ip40,
864 ip60) ? MAP_ERROR_NONE :
865 MAP_ERROR_DECAP_SEC_CHECK;
868 (d0->mtu && (clib_host_to_net_u16 (ip40->length) > d0->mtu)
869 && error0 == MAP_ERROR_NONE))
871 vnet_buffer (p0)->ip_frag.flags = 0;
872 vnet_buffer (p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP;
873 vnet_buffer (p0)->ip_frag.mtu = d0->mtu;
874 next0 = IP6_MAP_POST_IP4_REASS_NEXT_IP4_FRAGMENT;
877 if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
879 map_ip6_map_ip4_reass_trace_t *tr =
880 vlib_add_trace (vm, node, p0, sizeof (*tr));
881 tr->map_domain_index = map_domain_index0;
885 if (error0 == MAP_ERROR_NONE)
886 vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
888 map_domain_index0, 1,
893 MAP_ERROR_NONE) ? next0 : IP6_MAP_POST_IP4_REASS_NEXT_DROP;
894 p0->error = error_node->errors[error0];
895 vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
896 n_left_to_next, pi0, next0);
899 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
901 return frame->n_vectors;
908 ip6_map_icmp_relay (vlib_main_t * vm,
909 vlib_node_runtime_t * node, vlib_frame_t * frame)
911 u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
912 vlib_node_runtime_t *error_node =
913 vlib_node_get_runtime (vm, ip6_map_icmp_relay_node.index);
914 map_main_t *mm = &map_main;
915 u32 thread_index = vm->thread_index;
916 u16 *fragment_ids, *fid;
918 from = vlib_frame_vector_args (frame);
919 n_left_from = frame->n_vectors;
920 next_index = node->cached_next_index;
922 /* Get random fragment IDs for replies. */
924 clib_random_buffer_get_data (&vm->random_buffer,
925 n_left_from * sizeof (fragment_ids[0]));
927 while (n_left_from > 0)
929 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
932 while (n_left_from > 0 && n_left_to_next > 0)
936 u8 error0 = MAP_ERROR_NONE;
938 u32 next0 = IP6_ICMP_RELAY_NEXT_IP4_LOOKUP;
941 pi0 = to_next[0] = from[0];
947 p0 = vlib_get_buffer (vm, pi0);
948 ip60 = vlib_buffer_get_current (p0);
949 u16 tlen = clib_net_to_host_u16 (ip60->payload_length);
956 * Original IPv4 header / packet
960 * Original IPv4 header / packet
963 /* Need at least ICMP(8) + IPv6(40) + IPv4(20) + L4 header(8) */
966 error0 = MAP_ERROR_ICMP_RELAY;
970 icmp46_header_t *icmp60 = (icmp46_header_t *) (ip60 + 1);
971 ip6_header_t *inner_ip60 = (ip6_header_t *) (icmp60 + 2);
973 if (inner_ip60->protocol != IP_PROTOCOL_IP_IN_IP)
975 error0 = MAP_ERROR_ICMP_RELAY;
979 ip4_header_t *inner_ip40 = (ip4_header_t *) (inner_ip60 + 1);
980 vlib_buffer_advance (p0, 60); /* sizeof ( IPv6 + ICMP + IPv6 - IPv4 - ICMP ) */
981 ip4_header_t *new_ip40 = vlib_buffer_get_current (p0);
982 icmp46_header_t *new_icmp40 = (icmp46_header_t *) (new_ip40 + 1);
985 * Relay according to RFC2473, section 8.3
987 switch (icmp60->type)
989 case ICMP6_destination_unreachable:
990 case ICMP6_time_exceeded:
991 case ICMP6_parameter_problem:
992 /* Type 3 - destination unreachable, Code 1 - host unreachable */
993 new_icmp40->type = ICMP4_destination_unreachable;
995 ICMP4_destination_unreachable_destination_unreachable_host;
998 case ICMP6_packet_too_big:
999 /* Type 3 - destination unreachable, Code 4 - packet too big */
1000 /* Potential TODO: Adjust domain tunnel MTU based on the value received here */
1001 mtu = clib_net_to_host_u32 (*((u32 *) (icmp60 + 1)));
1005 (inner_ip40->flags_and_fragment_offset &
1006 clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT)))
1008 error0 = MAP_ERROR_ICMP_RELAY;
1012 new_icmp40->type = ICMP4_destination_unreachable;
1014 ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set;
1015 *((u32 *) (new_icmp40 + 1)) =
1016 clib_host_to_net_u32 (mtu < 1280 ? 1280 : mtu);
1020 error0 = MAP_ERROR_ICMP_RELAY;
1025 * Ensure the total ICMP packet is no longer than 576 bytes (RFC1812)
1027 new_ip40->ip_version_and_header_length = 0x45;
1029 u16 nlen = (tlen - 20) > 576 ? 576 : tlen - 20;
1030 new_ip40->length = clib_host_to_net_u16 (nlen);
1031 new_ip40->fragment_id = fid[0];
1034 new_ip40->protocol = IP_PROTOCOL_ICMP;
1035 new_ip40->src_address = mm->icmp4_src_address;
1036 new_ip40->dst_address = inner_ip40->src_address;
1037 new_ip40->checksum = ip4_header_checksum (new_ip40);
1039 new_icmp40->checksum = 0;
1040 ip_csum_t sum = ip_incremental_checksum (0, new_icmp40, nlen - 20);
1041 new_icmp40->checksum = ~ip_csum_fold (sum);
1043 vlib_increment_simple_counter (&mm->icmp_relayed, thread_index, 0,
1047 if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
1049 map_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr));
1050 tr->map_domain_index = 0;
1055 (error0 == MAP_ERROR_NONE) ? next0 : IP6_ICMP_RELAY_NEXT_DROP;
1056 p0->error = error_node->errors[error0];
1057 vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1058 n_left_to_next, pi0, next0);
1060 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1063 return frame->n_vectors;
1067 static char *map_error_strings[] = {
1068 #define _(sym,string) string,
1074 VNET_FEATURE_INIT (ip6_map_feature, static) =
1076 .arc_name = "ip6-unicast",
1077 .node_name = "ip6-map",
1078 .runs_before = VNET_FEATURES ("ip6-flow-classify"),
1081 VLIB_REGISTER_NODE(ip6_map_node) = {
1082 .function = ip6_map,
1084 .vector_size = sizeof(u32),
1085 .format_trace = format_map_trace,
1086 .type = VLIB_NODE_TYPE_INTERNAL,
1088 .n_errors = MAP_N_ERROR,
1089 .error_strings = map_error_strings,
1091 .n_next_nodes = IP6_MAP_N_NEXT,
1093 [IP6_MAP_NEXT_IP4_LOOKUP] = "ip4-lookup",
1094 #ifdef MAP_SKIP_IP6_LOOKUP
1095 [IP6_MAP_NEXT_IP4_REWRITE] = "ip4-load-balance",
1097 [IP6_MAP_NEXT_IP6_REASS] = "ip6-map-ip6-reass",
1098 [IP6_MAP_NEXT_IP4_REASS] = "ip4-sv-reassembly-custom-next",
1099 [IP6_MAP_NEXT_IP4_FRAGMENT] = "ip4-frag",
1100 [IP6_MAP_NEXT_IP6_ICMP_RELAY] = "ip6-map-icmp-relay",
1101 [IP6_MAP_NEXT_IP6_LOCAL] = "ip6-local",
1102 [IP6_MAP_NEXT_DROP] = "error-drop",
1103 [IP6_MAP_NEXT_ICMP] = "ip6-icmp-error",
1109 VLIB_REGISTER_NODE(ip6_map_ip6_reass_node) = {
1110 .function = ip6_map_ip6_reass,
1111 .name = "ip6-map-ip6-reass",
1112 .vector_size = sizeof(u32),
1113 .format_trace = format_ip6_map_ip6_reass_trace,
1114 .type = VLIB_NODE_TYPE_INTERNAL,
1115 .n_errors = MAP_N_ERROR,
1116 .error_strings = map_error_strings,
1117 .n_next_nodes = IP6_MAP_IP6_REASS_N_NEXT,
1119 [IP6_MAP_IP6_REASS_NEXT_IP6_MAP] = "ip6-map",
1120 [IP6_MAP_IP6_REASS_NEXT_DROP] = "error-drop",
1126 VLIB_REGISTER_NODE(ip6_map_post_ip4_reass_node) = {
1127 .function = ip6_map_post_ip4_reass,
1128 .name = "ip6-map-post-ip4-reass",
1129 .vector_size = sizeof(u32),
1130 .format_trace = format_ip6_map_post_ip4_reass_trace,
1131 .type = VLIB_NODE_TYPE_INTERNAL,
1132 .n_errors = MAP_N_ERROR,
1133 .error_strings = map_error_strings,
1134 .n_next_nodes = IP6_MAP_POST_IP4_REASS_N_NEXT,
1136 [IP6_MAP_POST_IP4_REASS_NEXT_IP4_LOOKUP] = "ip4-lookup",
1137 [IP6_MAP_POST_IP4_REASS_NEXT_IP4_FRAGMENT] = "ip4-frag",
1138 [IP6_MAP_POST_IP4_REASS_NEXT_DROP] = "error-drop",
1144 VLIB_REGISTER_NODE(ip6_map_icmp_relay_node, static) = {
1145 .function = ip6_map_icmp_relay,
1146 .name = "ip6-map-icmp-relay",
1147 .vector_size = sizeof(u32),
1148 .format_trace = format_map_trace, //FIXME
1149 .type = VLIB_NODE_TYPE_INTERNAL,
1150 .n_errors = MAP_N_ERROR,
1151 .error_strings = map_error_strings,
1152 .n_next_nodes = IP6_ICMP_RELAY_N_NEXT,
1154 [IP6_ICMP_RELAY_NEXT_IP4_LOOKUP] = "ip4-lookup",
1155 [IP6_ICMP_RELAY_NEXT_DROP] = "error-drop",
1161 ip6_map_init (vlib_main_t * vm)
1163 map_main.ip4_sv_reass_custom_next_index =
1164 ip4_sv_reass_custom_register_next_node
1165 (ip6_map_post_ip4_reass_node.index);
1169 VLIB_INIT_FUNCTION (ip6_map_init) =
1171 .runs_after = VLIB_INITS ("map_init"),};
1174 * fd.io coding-style-patch-verification: ON
1177 * eval: (c-set-style "gnu")