2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
17 #include "../ip/ip_frag.h"
18 #include <vnet/ip/ip4_to_ip6.h>
19 #include <vnet/ip/ip6_to_ip4.h>
23 IP6_MAP_NEXT_IP4_LOOKUP,
24 #ifdef MAP_SKIP_IP6_LOOKUP
25 IP6_MAP_NEXT_IP4_REWRITE,
27 IP6_MAP_NEXT_IP6_REASS,
28 IP6_MAP_NEXT_IP4_REASS,
29 IP6_MAP_NEXT_IP4_FRAGMENT,
30 IP6_MAP_NEXT_IP6_ICMP_RELAY,
31 IP6_MAP_NEXT_IP6_LOCAL,
37 enum ip6_map_ip6_reass_next_e
39 IP6_MAP_IP6_REASS_NEXT_IP6_MAP,
40 IP6_MAP_IP6_REASS_NEXT_DROP,
41 IP6_MAP_IP6_REASS_N_NEXT,
44 enum ip6_map_ip4_reass_next_e
46 IP6_MAP_IP4_REASS_NEXT_IP4_LOOKUP,
47 IP6_MAP_IP4_REASS_NEXT_IP4_FRAGMENT,
48 IP6_MAP_IP4_REASS_NEXT_DROP,
49 IP6_MAP_IP4_REASS_N_NEXT,
52 enum ip6_icmp_relay_next_e
54 IP6_ICMP_RELAY_NEXT_IP4_LOOKUP,
55 IP6_ICMP_RELAY_NEXT_DROP,
56 IP6_ICMP_RELAY_N_NEXT,
59 vlib_node_registration_t ip6_map_ip4_reass_node;
60 vlib_node_registration_t ip6_map_ip6_reass_node;
61 static vlib_node_registration_t ip6_map_icmp_relay_node;
68 } map_ip6_map_ip4_reass_trace_t;
71 format_ip6_map_ip4_reass_trace (u8 * s, va_list * args)
73 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
74 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
75 map_ip6_map_ip4_reass_trace_t *t =
76 va_arg (*args, map_ip6_map_ip4_reass_trace_t *);
77 return format (s, "MAP domain index: %d L4 port: %u Status: %s",
78 t->map_domain_index, t->port,
79 t->cached ? "cached" : "forwarded");
87 } map_ip6_map_ip6_reass_trace_t;
90 format_ip6_map_ip6_reass_trace (u8 * s, va_list * args)
92 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
93 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
94 map_ip6_map_ip6_reass_trace_t *t =
95 va_arg (*args, map_ip6_map_ip6_reass_trace_t *);
96 return format (s, "Offset: %d Fragment length: %d Status: %s", t->offset,
97 t->frag_len, t->out ? "out" : "in");
103 static_always_inline bool
104 ip6_map_sec_check (map_domain_t * d, u16 port, ip4_header_t * ip4,
107 u16 sp4 = clib_net_to_host_u16 (port);
108 u32 sa4 = clib_net_to_host_u32 (ip4->src_address.as_u32);
109 u64 sal6 = map_get_pfx (d, sa4, sp4);
110 u64 sar6 = map_get_sfx (d, sa4, sp4);
113 (sal6 != clib_net_to_host_u64 (ip6->src_address.as_u64[0])
114 || sar6 != clib_net_to_host_u64 (ip6->src_address.as_u64[1])))
119 static_always_inline void
120 ip6_map_security_check (map_domain_t * d, ip4_header_t * ip4,
121 ip6_header_t * ip6, u32 * next, u8 * error)
123 map_main_t *mm = &map_main;
124 if (d->ea_bits_len || d->rules)
126 if (d->psid_length > 0)
128 if (!ip4_is_fragment (ip4))
130 u16 port = ip4_get_port (ip4, 1);
135 ip6_map_sec_check (d, port, ip4,
136 ip6) ? MAP_ERROR_NONE :
137 MAP_ERROR_DECAP_SEC_CHECK;
141 *error = MAP_ERROR_BAD_PROTOCOL;
146 *next = mm->sec_check_frag ? IP6_MAP_NEXT_IP4_REASS : *next;
152 static_always_inline bool
153 ip6_map_ip4_lookup_bypass (vlib_buffer_t * p0, ip4_header_t * ip)
155 #ifdef MAP_SKIP_IP6_LOOKUP
156 if (FIB_NODE_INDEX_INVALID != pre_resolved[FIB_PROTOCOL_IP4].fei)
158 vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
159 pre_resolved[FIB_PROTOCOL_IP4].dpo.dpoi_index;
170 ip6_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
172 u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
173 vlib_node_runtime_t *error_node =
174 vlib_node_get_runtime (vm, ip6_map_node.index);
175 map_main_t *mm = &map_main;
176 vlib_combined_counter_main_t *cm = mm->domain_counters;
177 u32 thread_index = vlib_get_thread_index ();
179 from = vlib_frame_vector_args (frame);
180 n_left_from = frame->n_vectors;
181 next_index = node->cached_next_index;
182 while (n_left_from > 0)
184 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
187 while (n_left_from >= 4 && n_left_to_next >= 2)
190 vlib_buffer_t *p0, *p1;
191 u8 error0 = MAP_ERROR_NONE;
192 u8 error1 = MAP_ERROR_NONE;
193 map_domain_t *d0 = 0, *d1 = 0;
194 ip4_header_t *ip40, *ip41;
195 ip6_header_t *ip60, *ip61;
196 u16 port0 = 0, port1 = 0;
197 u32 map_domain_index0 = ~0, map_domain_index1 = ~0;
198 u32 next0 = IP6_MAP_NEXT_IP4_LOOKUP;
199 u32 next1 = IP6_MAP_NEXT_IP4_LOOKUP;
201 /* Prefetch next iteration. */
203 vlib_buffer_t *p2, *p3;
205 p2 = vlib_get_buffer (vm, from[2]);
206 p3 = vlib_get_buffer (vm, from[3]);
208 vlib_prefetch_buffer_header (p2, LOAD);
209 vlib_prefetch_buffer_header (p3, LOAD);
211 /* IPv6 + IPv4 header + 8 bytes of ULP */
212 CLIB_PREFETCH (p2->data, 68, LOAD);
213 CLIB_PREFETCH (p3->data, 68, LOAD);
216 pi0 = to_next[0] = from[0];
217 pi1 = to_next[1] = from[1];
223 p0 = vlib_get_buffer (vm, pi0);
224 p1 = vlib_get_buffer (vm, pi1);
225 ip60 = vlib_buffer_get_current (p0);
226 ip61 = vlib_buffer_get_current (p1);
227 vlib_buffer_advance (p0, sizeof (ip6_header_t));
228 vlib_buffer_advance (p1, sizeof (ip6_header_t));
229 ip40 = vlib_buffer_get_current (p0);
230 ip41 = vlib_buffer_get_current (p1);
233 * Encapsulated IPv4 packet
234 * - IPv4 fragmented -> Pass to virtual reassembly unless security check disabled
235 * - Lookup/Rewrite or Fragment node in case of packet > MTU
236 * Fragmented IPv6 packet
238 * - Error -> Pass to ICMPv6/ICMPv4 relay
239 * - Info -> Pass to IPv6 local
240 * Anything else -> drop
243 (ip60->protocol == IP_PROTOCOL_IP_IN_IP
244 && clib_net_to_host_u16 (ip60->payload_length) > 20))
247 ip6_map_get_domain (vnet_buffer (p0)->ip.adj_index[VLIB_TX],
248 (ip4_address_t *) & ip40->
249 src_address.as_u32, &map_domain_index0,
252 else if (ip60->protocol == IP_PROTOCOL_ICMP6 &&
253 clib_net_to_host_u16 (ip60->payload_length) >
254 sizeof (icmp46_header_t))
256 icmp46_header_t *icmp = (void *) (ip60 + 1);
257 next0 = (icmp->type == ICMP6_echo_request
259 ICMP6_echo_reply) ? IP6_MAP_NEXT_IP6_LOCAL :
260 IP6_MAP_NEXT_IP6_ICMP_RELAY;
262 else if (ip60->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION)
264 next0 = IP6_MAP_NEXT_IP6_REASS;
268 error0 = MAP_ERROR_BAD_PROTOCOL;
271 (ip61->protocol == IP_PROTOCOL_IP_IN_IP
272 && clib_net_to_host_u16 (ip61->payload_length) > 20))
275 ip6_map_get_domain (vnet_buffer (p1)->ip.adj_index[VLIB_TX],
276 (ip4_address_t *) & ip41->
277 src_address.as_u32, &map_domain_index1,
280 else if (ip61->protocol == IP_PROTOCOL_ICMP6 &&
281 clib_net_to_host_u16 (ip61->payload_length) >
282 sizeof (icmp46_header_t))
284 icmp46_header_t *icmp = (void *) (ip61 + 1);
285 next1 = (icmp->type == ICMP6_echo_request
287 ICMP6_echo_reply) ? IP6_MAP_NEXT_IP6_LOCAL :
288 IP6_MAP_NEXT_IP6_ICMP_RELAY;
290 else if (ip61->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION)
292 next1 = IP6_MAP_NEXT_IP6_REASS;
296 error1 = MAP_ERROR_BAD_PROTOCOL;
301 /* MAP inbound security check */
302 ip6_map_security_check (d0, ip40, ip60, &next0, &error0);
304 if (PREDICT_TRUE (error0 == MAP_ERROR_NONE &&
305 next0 == IP6_MAP_NEXT_IP4_LOOKUP))
309 && (clib_host_to_net_u16 (ip40->length) > d0->mtu)))
311 vnet_buffer (p0)->ip_frag.header_offset = 0;
312 vnet_buffer (p0)->ip_frag.flags = 0;
313 vnet_buffer (p0)->ip_frag.next_index =
314 IP4_FRAG_NEXT_IP4_LOOKUP;
315 vnet_buffer (p0)->ip_frag.mtu = d0->mtu;
316 next0 = IP6_MAP_NEXT_IP4_FRAGMENT;
321 ip6_map_ip4_lookup_bypass (p0,
323 IP6_MAP_NEXT_IP4_REWRITE : next0;
325 vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
327 map_domain_index0, 1,
334 /* MAP inbound security check */
335 ip6_map_security_check (d1, ip41, ip61, &next1, &error1);
337 if (PREDICT_TRUE (error1 == MAP_ERROR_NONE &&
338 next1 == IP6_MAP_NEXT_IP4_LOOKUP))
342 && (clib_host_to_net_u16 (ip41->length) > d1->mtu)))
344 vnet_buffer (p1)->ip_frag.header_offset = 0;
345 vnet_buffer (p1)->ip_frag.flags = 0;
346 vnet_buffer (p1)->ip_frag.next_index =
347 IP4_FRAG_NEXT_IP4_LOOKUP;
348 vnet_buffer (p1)->ip_frag.mtu = d1->mtu;
349 next1 = IP6_MAP_NEXT_IP4_FRAGMENT;
354 ip6_map_ip4_lookup_bypass (p1,
356 IP6_MAP_NEXT_IP4_REWRITE : next1;
358 vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
360 map_domain_index1, 1,
366 if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
368 map_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr));
369 tr->map_domain_index = map_domain_index0;
373 if (PREDICT_FALSE (p1->flags & VLIB_BUFFER_IS_TRACED))
375 map_trace_t *tr = vlib_add_trace (vm, node, p1, sizeof (*tr));
376 tr->map_domain_index = map_domain_index1;
380 if (error0 == MAP_ERROR_DECAP_SEC_CHECK && mm->icmp6_enabled)
382 /* Set ICMP parameters */
383 vlib_buffer_advance (p0, -sizeof (ip6_header_t));
384 icmp6_error_set_vnet_buffer (p0, ICMP6_destination_unreachable,
385 ICMP6_destination_unreachable_source_address_failed_policy,
387 next0 = IP6_MAP_NEXT_ICMP;
391 next0 = (error0 == MAP_ERROR_NONE) ? next0 : IP6_MAP_NEXT_DROP;
394 if (error1 == MAP_ERROR_DECAP_SEC_CHECK && mm->icmp6_enabled)
396 /* Set ICMP parameters */
397 vlib_buffer_advance (p1, -sizeof (ip6_header_t));
398 icmp6_error_set_vnet_buffer (p1, ICMP6_destination_unreachable,
399 ICMP6_destination_unreachable_source_address_failed_policy,
401 next1 = IP6_MAP_NEXT_ICMP;
405 next1 = (error1 == MAP_ERROR_NONE) ? next1 : IP6_MAP_NEXT_DROP;
409 if (next0 == IP6_MAP_NEXT_IP6_LOCAL)
410 vlib_buffer_advance (p0, -sizeof (ip6_header_t));
411 if (next1 == IP6_MAP_NEXT_IP6_LOCAL)
412 vlib_buffer_advance (p1, -sizeof (ip6_header_t));
414 p0->error = error_node->errors[error0];
415 p1->error = error_node->errors[error1];
416 vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
417 n_left_to_next, pi0, pi1, next0,
422 while (n_left_from > 0 && n_left_to_next > 0)
426 u8 error0 = MAP_ERROR_NONE;
427 map_domain_t *d0 = 0;
431 u32 map_domain_index0 = ~0;
432 u32 next0 = IP6_MAP_NEXT_IP4_LOOKUP;
434 pi0 = to_next[0] = from[0];
440 p0 = vlib_get_buffer (vm, pi0);
441 ip60 = vlib_buffer_get_current (p0);
442 vlib_buffer_advance (p0, sizeof (ip6_header_t));
443 ip40 = vlib_buffer_get_current (p0);
446 * Encapsulated IPv4 packet
447 * - IPv4 fragmented -> Pass to virtual reassembly unless security check disabled
448 * - Lookup/Rewrite or Fragment node in case of packet > MTU
449 * Fragmented IPv6 packet
451 * - Error -> Pass to ICMPv6/ICMPv4 relay
452 * - Info -> Pass to IPv6 local
453 * Anything else -> drop
456 (ip60->protocol == IP_PROTOCOL_IP_IN_IP
457 && clib_net_to_host_u16 (ip60->payload_length) > 20))
460 ip6_map_get_domain (vnet_buffer (p0)->ip.adj_index[VLIB_TX],
461 (ip4_address_t *) & ip40->
462 src_address.as_u32, &map_domain_index0,
465 else if (ip60->protocol == IP_PROTOCOL_ICMP6 &&
466 clib_net_to_host_u16 (ip60->payload_length) >
467 sizeof (icmp46_header_t))
469 icmp46_header_t *icmp = (void *) (ip60 + 1);
470 next0 = (icmp->type == ICMP6_echo_request
472 ICMP6_echo_reply) ? IP6_MAP_NEXT_IP6_LOCAL :
473 IP6_MAP_NEXT_IP6_ICMP_RELAY;
475 else if (ip60->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION &&
476 (((ip6_frag_hdr_t *) (ip60 + 1))->next_hdr ==
477 IP_PROTOCOL_IP_IN_IP))
479 next0 = IP6_MAP_NEXT_IP6_REASS;
483 error0 = MAP_ERROR_BAD_PROTOCOL;
488 /* MAP inbound security check */
489 ip6_map_security_check (d0, ip40, ip60, &next0, &error0);
491 if (PREDICT_TRUE (error0 == MAP_ERROR_NONE &&
492 next0 == IP6_MAP_NEXT_IP4_LOOKUP))
496 && (clib_host_to_net_u16 (ip40->length) > d0->mtu)))
498 vnet_buffer (p0)->ip_frag.header_offset = 0;
499 vnet_buffer (p0)->ip_frag.flags = 0;
500 vnet_buffer (p0)->ip_frag.next_index =
501 IP4_FRAG_NEXT_IP4_LOOKUP;
502 vnet_buffer (p0)->ip_frag.mtu = d0->mtu;
503 next0 = IP6_MAP_NEXT_IP4_FRAGMENT;
508 ip6_map_ip4_lookup_bypass (p0,
510 IP6_MAP_NEXT_IP4_REWRITE : next0;
512 vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
514 map_domain_index0, 1,
520 if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
522 map_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr));
523 tr->map_domain_index = map_domain_index0;
524 tr->port = (u16) port0;
527 if (mm->icmp6_enabled &&
528 (error0 == MAP_ERROR_DECAP_SEC_CHECK
529 || error0 == MAP_ERROR_NO_DOMAIN))
531 /* Set ICMP parameters */
532 vlib_buffer_advance (p0, -sizeof (ip6_header_t));
533 icmp6_error_set_vnet_buffer (p0, ICMP6_destination_unreachable,
534 ICMP6_destination_unreachable_source_address_failed_policy,
536 next0 = IP6_MAP_NEXT_ICMP;
540 next0 = (error0 == MAP_ERROR_NONE) ? next0 : IP6_MAP_NEXT_DROP;
544 if (next0 == IP6_MAP_NEXT_IP6_LOCAL)
545 vlib_buffer_advance (p0, -sizeof (ip6_header_t));
547 p0->error = error_node->errors[error0];
548 vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
549 n_left_to_next, pi0, next0);
551 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
554 return frame->n_vectors;
558 static_always_inline void
559 ip6_map_ip6_reass_prepare (vlib_main_t * vm, vlib_node_runtime_t * node,
560 map_ip6_reass_t * r, u32 ** fragments_ready,
561 u32 ** fragments_to_drop)
565 ip6_frag_hdr_t *frag0;
568 if (!r->ip4_header.ip_version_and_header_length)
571 //The IP header is here, we need to check for packets
572 //that can be forwarded
574 for (i = 0; i < MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++)
576 if (r->fragments[i].pi == ~0 ||
577 ((!r->fragments[i].next_data_len)
578 && (r->fragments[i].next_data_offset != (0xffff))))
581 p0 = vlib_get_buffer (vm, r->fragments[i].pi);
582 ip60 = vlib_buffer_get_current (p0);
583 frag0 = (ip6_frag_hdr_t *) (ip60 + 1);
584 ip40 = (ip4_header_t *) (frag0 + 1);
586 if (ip6_frag_hdr_offset (frag0))
588 //Not first fragment, add the IPv4 header
589 clib_memcpy (ip40, &r->ip4_header, 20);
592 #ifdef MAP_IP6_REASS_COUNT_BYTES
594 clib_net_to_host_u16 (ip60->payload_length) - sizeof (*frag0);
597 if (ip6_frag_hdr_more (frag0))
599 //Not last fragment, we copy end of next
600 clib_memcpy (u8_ptr_add (ip60, p0->current_length),
601 r->fragments[i].next_data, 20);
602 p0->current_length += 20;
603 ip60->payload_length = u16_net_add (ip60->payload_length, 20);
606 if (!ip4_is_fragment (ip40))
608 ip40->fragment_id = frag_id_6to4 (frag0->identification);
609 ip40->flags_and_fragment_offset =
610 clib_host_to_net_u16 (ip6_frag_hdr_offset (frag0));
614 ip40->flags_and_fragment_offset =
615 clib_host_to_net_u16 (ip4_get_fragment_offset (ip40) +
616 ip6_frag_hdr_offset (frag0));
619 if (ip6_frag_hdr_more (frag0))
620 ip40->flags_and_fragment_offset |=
621 clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS);
624 clib_host_to_net_u16 (p0->current_length - sizeof (*ip60) -
626 ip40->checksum = ip4_header_checksum (ip40);
628 if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
630 map_ip6_map_ip6_reass_trace_t *tr =
631 vlib_add_trace (vm, node, p0, sizeof (*tr));
632 tr->offset = ip4_get_fragment_offset (ip40);
633 tr->frag_len = clib_net_to_host_u16 (ip40->length) - sizeof (*ip40);
637 vec_add1 (*fragments_ready, r->fragments[i].pi);
638 r->fragments[i].pi = ~0;
639 r->fragments[i].next_data_len = 0;
640 r->fragments[i].next_data_offset = 0;
641 map_main.ip6_reass_buffered_counter--;
643 //TODO: Best solution would be that ip6_map handles extension headers
644 // and ignores atomic fragment. But in the meantime, let's just copy the header.
646 u8 protocol = frag0->next_hdr;
647 memmove (u8_ptr_add (ip40, -sizeof (*ip60)), ip60, sizeof (*ip60));
648 ((ip6_header_t *) u8_ptr_add (ip40, -sizeof (*ip60)))->protocol =
650 vlib_buffer_advance (p0, sizeof (*frag0));
655 map_ip6_drop_pi (u32 pi)
657 vlib_main_t *vm = vlib_get_main ();
658 vlib_node_runtime_t *n =
659 vlib_node_get_runtime (vm, ip6_map_ip6_reass_node.index);
660 vlib_set_next_frame_buffer (vm, n, IP6_MAP_IP6_REASS_NEXT_DROP, pi);
664 map_ip4_drop_pi (u32 pi)
666 vlib_main_t *vm = vlib_get_main ();
667 vlib_node_runtime_t *n =
668 vlib_node_get_runtime (vm, ip6_map_ip4_reass_node.index);
669 vlib_set_next_frame_buffer (vm, n, IP6_MAP_IP4_REASS_NEXT_DROP, pi);
674 * TODO: We should count the number of successfully
675 * transmitted fragment bytes and compare that to the last fragment
676 * offset such that we can free the reassembly structure when all fragments
677 * have been forwarded.
680 ip6_map_ip6_reass (vlib_main_t * vm,
681 vlib_node_runtime_t * node, vlib_frame_t * frame)
683 u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
684 vlib_node_runtime_t *error_node =
685 vlib_node_get_runtime (vm, ip6_map_ip6_reass_node.index);
686 u32 *fragments_to_drop = NULL;
687 u32 *fragments_ready = NULL;
689 from = vlib_frame_vector_args (frame);
690 n_left_from = frame->n_vectors;
691 next_index = node->cached_next_index;
692 while (n_left_from > 0)
694 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
697 while (n_left_from > 0 && n_left_to_next > 0)
701 u8 error0 = MAP_ERROR_NONE;
703 ip6_frag_hdr_t *frag0;
708 pi0 = to_next[0] = from[0];
714 p0 = vlib_get_buffer (vm, pi0);
715 ip60 = vlib_buffer_get_current (p0);
716 frag0 = (ip6_frag_hdr_t *) (ip60 + 1);
718 clib_host_to_net_u16 (frag0->fragment_offset_and_more) & (~7);
720 clib_net_to_host_u16 (ip60->payload_length) - sizeof (*frag0);
722 ip6_frag_hdr_more (frag0) ? (offset + frag_len) : (0xffff);
724 //FIXME: Support other extension headers, maybe
726 if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
728 map_ip6_map_ip6_reass_trace_t *tr =
729 vlib_add_trace (vm, node, p0, sizeof (*tr));
731 tr->frag_len = frag_len;
735 map_ip6_reass_lock ();
737 map_ip6_reass_get (&ip60->src_address, &ip60->dst_address,
738 frag0->identification, frag0->next_hdr,
740 //FIXME: Use better error codes
741 if (PREDICT_FALSE (!r))
743 // Could not create a caching entry
744 error0 = MAP_ERROR_FRAGMENT_MEMORY;
746 else if (PREDICT_FALSE ((frag_len <= 20 &&
747 (ip6_frag_hdr_more (frag0) || (!offset)))))
749 //Very small fragment are restricted to the last one and
750 //can't be the first one
751 error0 = MAP_ERROR_FRAGMENT_MALFORMED;
754 if (map_ip6_reass_add_fragment
755 (r, pi0, offset, next_offset, (u8 *) (frag0 + 1), frag_len))
757 map_ip6_reass_free (r, &fragments_to_drop);
758 error0 = MAP_ERROR_FRAGMENT_MEMORY;
762 #ifdef MAP_IP6_REASS_COUNT_BYTES
763 if (!ip6_frag_hdr_more (frag0))
764 r->expected_total = offset + frag_len;
766 ip6_map_ip6_reass_prepare (vm, node, r, &fragments_ready,
768 #ifdef MAP_IP6_REASS_COUNT_BYTES
769 if (r->forwarded >= r->expected_total)
770 map_ip6_reass_free (r, &fragments_to_drop);
773 map_ip6_reass_unlock ();
775 if (error0 == MAP_ERROR_NONE)
785 //All data from that packet was copied no need to keep it, but this is not an error
786 p0->error = error_node->errors[MAP_ERROR_NONE];
787 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
788 to_next, n_left_to_next,
790 IP6_MAP_IP6_REASS_NEXT_DROP);
795 p0->error = error_node->errors[error0];
796 vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
798 IP6_MAP_IP6_REASS_NEXT_DROP);
801 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
804 map_send_all_to_node (vm, fragments_ready, node,
805 &error_node->errors[MAP_ERROR_NONE],
806 IP6_MAP_IP6_REASS_NEXT_IP6_MAP);
807 map_send_all_to_node (vm, fragments_to_drop, node,
808 &error_node->errors[MAP_ERROR_FRAGMENT_DROPPED],
809 IP6_MAP_IP6_REASS_NEXT_DROP);
811 vec_free (fragments_to_drop);
812 vec_free (fragments_ready);
813 return frame->n_vectors;
820 ip6_map_ip4_reass (vlib_main_t * vm,
821 vlib_node_runtime_t * node, vlib_frame_t * frame)
823 u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
824 vlib_node_runtime_t *error_node =
825 vlib_node_get_runtime (vm, ip6_map_ip4_reass_node.index);
826 map_main_t *mm = &map_main;
827 vlib_combined_counter_main_t *cm = mm->domain_counters;
828 u32 thread_index = vlib_get_thread_index ();
829 u32 *fragments_to_drop = NULL;
830 u32 *fragments_to_loopback = NULL;
832 from = vlib_frame_vector_args (frame);
833 n_left_from = frame->n_vectors;
834 next_index = node->cached_next_index;
835 while (n_left_from > 0)
837 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
840 while (n_left_from > 0 && n_left_to_next > 0)
844 u8 error0 = MAP_ERROR_NONE;
849 u32 map_domain_index0 = ~0;
850 u32 next0 = IP6_MAP_IP4_REASS_NEXT_IP4_LOOKUP;
853 pi0 = to_next[0] = from[0];
859 p0 = vlib_get_buffer (vm, pi0);
860 ip40 = vlib_buffer_get_current (p0);
861 ip60 = ((ip6_header_t *) ip40) - 1;
864 ip6_map_get_domain (vnet_buffer (p0)->ip.adj_index[VLIB_TX],
865 (ip4_address_t *) & ip40->src_address.as_u32,
866 &map_domain_index0, &error0);
868 map_ip4_reass_lock ();
869 //This node only deals with fragmented ip4
870 map_ip4_reass_t *r = map_ip4_reass_get (ip40->src_address.as_u32,
871 ip40->dst_address.as_u32,
875 if (PREDICT_FALSE (!r))
877 // Could not create a caching entry
878 error0 = MAP_ERROR_FRAGMENT_MEMORY;
880 else if (PREDICT_TRUE (ip4_get_fragment_offset (ip40)))
882 // This is a fragment
885 // We know the port already
888 else if (map_ip4_reass_add_fragment (r, pi0))
890 // Not enough space for caching
891 error0 = MAP_ERROR_FRAGMENT_MEMORY;
892 map_ip4_reass_free (r, &fragments_to_drop);
899 else if ((port0 = ip4_get_port (ip40, 1)) == 0)
901 // Could not find port from first fragment. Stop reassembling.
902 error0 = MAP_ERROR_BAD_PROTOCOL;
904 map_ip4_reass_free (r, &fragments_to_drop);
908 // Found port. Remember it and loopback saved fragments
910 map_ip4_reass_get_fragments (r, &fragments_to_loopback);
913 #ifdef MAP_IP4_REASS_COUNT_BYTES
916 r->forwarded += clib_host_to_net_u16 (ip40->length) - 20;
917 if (!ip4_get_fragment_more (ip40))
919 ip4_get_fragment_offset (ip40) * 8 +
920 clib_host_to_net_u16 (ip40->length) - 20;
921 if (r->forwarded >= r->expected_total)
922 map_ip4_reass_free (r, &fragments_to_drop);
926 map_ip4_reass_unlock ();
928 if (PREDICT_TRUE (error0 == MAP_ERROR_NONE))
930 ip6_map_sec_check (d0, port0, ip40,
931 ip60) ? MAP_ERROR_NONE :
932 MAP_ERROR_DECAP_SEC_CHECK;
935 (d0->mtu && (clib_host_to_net_u16 (ip40->length) > d0->mtu)
936 && error0 == MAP_ERROR_NONE && !cached))
938 vnet_buffer (p0)->ip_frag.header_offset = 0;
939 vnet_buffer (p0)->ip_frag.flags = 0;
940 vnet_buffer (p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP;
941 vnet_buffer (p0)->ip_frag.mtu = d0->mtu;
942 next0 = IP6_MAP_IP4_REASS_NEXT_IP4_FRAGMENT;
945 if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
947 map_ip6_map_ip4_reass_trace_t *tr =
948 vlib_add_trace (vm, node, p0, sizeof (*tr));
949 tr->map_domain_index = map_domain_index0;
962 if (error0 == MAP_ERROR_NONE)
963 vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
965 map_domain_index0, 1,
970 MAP_ERROR_NONE) ? next0 : IP6_MAP_IP4_REASS_NEXT_DROP;
971 p0->error = error_node->errors[error0];
972 vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
973 n_left_to_next, pi0, next0);
976 //Loopback when we reach the end of the inpu vector
977 if (n_left_from == 0 && vec_len (fragments_to_loopback))
979 from = vlib_frame_vector_args (frame);
980 u32 len = vec_len (fragments_to_loopback);
981 if (len <= VLIB_FRAME_SIZE)
983 clib_memcpy (from, fragments_to_loopback,
986 vec_reset_length (fragments_to_loopback);
991 fragments_to_loopback + (len -
993 sizeof (u32) * VLIB_FRAME_SIZE);
994 n_left_from = VLIB_FRAME_SIZE;
995 _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
999 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1001 map_send_all_to_node (vm, fragments_to_drop, node,
1002 &error_node->errors[MAP_ERROR_FRAGMENT_DROPPED],
1003 IP6_MAP_IP4_REASS_NEXT_DROP);
1005 vec_free (fragments_to_drop);
1006 vec_free (fragments_to_loopback);
1007 return frame->n_vectors;
1014 ip6_map_icmp_relay (vlib_main_t * vm,
1015 vlib_node_runtime_t * node, vlib_frame_t * frame)
1017 u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
1018 vlib_node_runtime_t *error_node =
1019 vlib_node_get_runtime (vm, ip6_map_icmp_relay_node.index);
1020 map_main_t *mm = &map_main;
1021 u32 thread_index = vlib_get_thread_index ();
1022 u16 *fragment_ids, *fid;
1024 from = vlib_frame_vector_args (frame);
1025 n_left_from = frame->n_vectors;
1026 next_index = node->cached_next_index;
1028 /* Get random fragment IDs for replies. */
1029 fid = fragment_ids =
1030 clib_random_buffer_get_data (&vm->random_buffer,
1031 n_left_from * sizeof (fragment_ids[0]));
1033 while (n_left_from > 0)
1035 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1038 while (n_left_from > 0 && n_left_to_next > 0)
1042 u8 error0 = MAP_ERROR_NONE;
1044 u32 next0 = IP6_ICMP_RELAY_NEXT_IP4_LOOKUP;
1047 pi0 = to_next[0] = from[0];
1051 n_left_to_next -= 1;
1053 p0 = vlib_get_buffer (vm, pi0);
1054 ip60 = vlib_buffer_get_current (p0);
1055 u16 tlen = clib_net_to_host_u16 (ip60->payload_length);
1062 * Original IPv4 header / packet
1066 * Original IPv4 header / packet
1069 /* Need at least ICMP(8) + IPv6(40) + IPv4(20) + L4 header(8) */
1072 error0 = MAP_ERROR_ICMP_RELAY;
1076 icmp46_header_t *icmp60 = (icmp46_header_t *) (ip60 + 1);
1077 ip6_header_t *inner_ip60 = (ip6_header_t *) (icmp60 + 2);
1079 if (inner_ip60->protocol != IP_PROTOCOL_IP_IN_IP)
1081 error0 = MAP_ERROR_ICMP_RELAY;
1085 ip4_header_t *inner_ip40 = (ip4_header_t *) (inner_ip60 + 1);
1086 vlib_buffer_advance (p0, 60); /* sizeof ( IPv6 + ICMP + IPv6 - IPv4 - ICMP ) */
1087 ip4_header_t *new_ip40 = vlib_buffer_get_current (p0);
1088 icmp46_header_t *new_icmp40 = (icmp46_header_t *) (new_ip40 + 1);
1091 * Relay according to RFC2473, section 8.3
1093 switch (icmp60->type)
1095 case ICMP6_destination_unreachable:
1096 case ICMP6_time_exceeded:
1097 case ICMP6_parameter_problem:
1098 /* Type 3 - destination unreachable, Code 1 - host unreachable */
1099 new_icmp40->type = ICMP4_destination_unreachable;
1101 ICMP4_destination_unreachable_destination_unreachable_host;
1104 case ICMP6_packet_too_big:
1105 /* Type 3 - destination unreachable, Code 4 - packet too big */
1106 /* Potential TODO: Adjust domain tunnel MTU based on the value received here */
1107 mtu = clib_net_to_host_u32 (*((u32 *) (icmp60 + 1)));
1111 (inner_ip40->flags_and_fragment_offset &
1112 clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT)))
1114 error0 = MAP_ERROR_ICMP_RELAY;
1118 new_icmp40->type = ICMP4_destination_unreachable;
1120 ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set;
1121 *((u32 *) (new_icmp40 + 1)) =
1122 clib_host_to_net_u32 (mtu < 1280 ? 1280 : mtu);
1126 error0 = MAP_ERROR_ICMP_RELAY;
1131 * Ensure the total ICMP packet is no longer than 576 bytes (RFC1812)
1133 new_ip40->ip_version_and_header_length = 0x45;
1135 u16 nlen = (tlen - 20) > 576 ? 576 : tlen - 20;
1136 new_ip40->length = clib_host_to_net_u16 (nlen);
1137 new_ip40->fragment_id = fid[0];
1140 new_ip40->protocol = IP_PROTOCOL_ICMP;
1141 new_ip40->src_address = mm->icmp4_src_address;
1142 new_ip40->dst_address = inner_ip40->src_address;
1143 new_ip40->checksum = ip4_header_checksum (new_ip40);
1145 new_icmp40->checksum = 0;
1146 ip_csum_t sum = ip_incremental_checksum (0, new_icmp40, nlen - 20);
1147 new_icmp40->checksum = ~ip_csum_fold (sum);
1149 vlib_increment_simple_counter (&mm->icmp_relayed, thread_index, 0,
1153 if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
1155 map_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr));
1156 tr->map_domain_index = 0;
1161 (error0 == MAP_ERROR_NONE) ? next0 : IP6_ICMP_RELAY_NEXT_DROP;
1162 p0->error = error_node->errors[error0];
1163 vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1164 n_left_to_next, pi0, next0);
1166 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1169 return frame->n_vectors;
1173 static char *map_error_strings[] = {
1174 #define _(sym,string) string,
1180 VLIB_REGISTER_NODE(ip6_map_node) = {
1181 .function = ip6_map,
1183 .vector_size = sizeof(u32),
1184 .format_trace = format_map_trace,
1185 .type = VLIB_NODE_TYPE_INTERNAL,
1187 .n_errors = MAP_N_ERROR,
1188 .error_strings = map_error_strings,
1190 .n_next_nodes = IP6_MAP_N_NEXT,
1192 [IP6_MAP_NEXT_IP4_LOOKUP] = "ip4-lookup",
1193 #ifdef MAP_SKIP_IP6_LOOKUP
1194 [IP6_MAP_NEXT_IP4_REWRITE] = "ip4-load-balance",
1196 [IP6_MAP_NEXT_IP6_REASS] = "ip6-map-ip6-reass",
1197 [IP6_MAP_NEXT_IP4_REASS] = "ip6-map-ip4-reass",
1198 [IP6_MAP_NEXT_IP4_FRAGMENT] = "ip4-frag",
1199 [IP6_MAP_NEXT_IP6_ICMP_RELAY] = "ip6-map-icmp-relay",
1200 [IP6_MAP_NEXT_IP6_LOCAL] = "ip6-local",
1201 [IP6_MAP_NEXT_DROP] = "error-drop",
1202 [IP6_MAP_NEXT_ICMP] = "ip6-icmp-error",
1208 VLIB_REGISTER_NODE(ip6_map_ip6_reass_node) = {
1209 .function = ip6_map_ip6_reass,
1210 .name = "ip6-map-ip6-reass",
1211 .vector_size = sizeof(u32),
1212 .format_trace = format_ip6_map_ip6_reass_trace,
1213 .type = VLIB_NODE_TYPE_INTERNAL,
1214 .n_errors = MAP_N_ERROR,
1215 .error_strings = map_error_strings,
1216 .n_next_nodes = IP6_MAP_IP6_REASS_N_NEXT,
1218 [IP6_MAP_IP6_REASS_NEXT_IP6_MAP] = "ip6-map",
1219 [IP6_MAP_IP6_REASS_NEXT_DROP] = "error-drop",
1225 VLIB_REGISTER_NODE(ip6_map_ip4_reass_node) = {
1226 .function = ip6_map_ip4_reass,
1227 .name = "ip6-map-ip4-reass",
1228 .vector_size = sizeof(u32),
1229 .format_trace = format_ip6_map_ip4_reass_trace,
1230 .type = VLIB_NODE_TYPE_INTERNAL,
1231 .n_errors = MAP_N_ERROR,
1232 .error_strings = map_error_strings,
1233 .n_next_nodes = IP6_MAP_IP4_REASS_N_NEXT,
1235 [IP6_MAP_IP4_REASS_NEXT_IP4_LOOKUP] = "ip4-lookup",
1236 [IP6_MAP_IP4_REASS_NEXT_IP4_FRAGMENT] = "ip4-frag",
1237 [IP6_MAP_IP4_REASS_NEXT_DROP] = "error-drop",
1243 VLIB_REGISTER_NODE(ip6_map_icmp_relay_node, static) = {
1244 .function = ip6_map_icmp_relay,
1245 .name = "ip6-map-icmp-relay",
1246 .vector_size = sizeof(u32),
1247 .format_trace = format_map_trace, //FIXME
1248 .type = VLIB_NODE_TYPE_INTERNAL,
1249 .n_errors = MAP_N_ERROR,
1250 .error_strings = map_error_strings,
1251 .n_next_nodes = IP6_ICMP_RELAY_N_NEXT,
1253 [IP6_ICMP_RELAY_NEXT_IP4_LOOKUP] = "ip4-lookup",
1254 [IP6_ICMP_RELAY_NEXT_DROP] = "error-drop",
1260 * fd.io coding-style-patch-verification: ON
1263 * eval: (c-set-style "gnu")