map: use ip6-full-reassembly instead of own code
[vpp.git] / src / plugins / map / ip6_map.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 #include "map.h"
16
17 #include <vnet/ip/ip_frag.h>
18 #include <vnet/ip/ip4_to_ip6.h>
19 #include <vnet/ip/ip6_to_ip4.h>
20 #include <vnet/ip/reass/ip4_sv_reass.h>
21
22 enum ip6_map_next_e
23 {
24   IP6_MAP_NEXT_IP4_LOOKUP,
25 #ifdef MAP_SKIP_IP6_LOOKUP
26   IP6_MAP_NEXT_IP4_REWRITE,
27 #endif
28   IP6_MAP_NEXT_IP4_REASS,
29   IP6_MAP_NEXT_IP4_FRAGMENT,
30   IP6_MAP_NEXT_IP6_ICMP_RELAY,
31   IP6_MAP_NEXT_IP6_LOCAL,
32   IP6_MAP_NEXT_DROP,
33   IP6_MAP_NEXT_ICMP,
34   IP6_MAP_N_NEXT,
35 };
36
37 enum ip6_map_ip6_reass_next_e
38 {
39   IP6_MAP_IP6_REASS_NEXT_IP6_MAP,
40   IP6_MAP_IP6_REASS_NEXT_DROP,
41   IP6_MAP_IP6_REASS_N_NEXT,
42 };
43
44 enum ip6_map_post_ip4_reass_next_e
45 {
46   IP6_MAP_POST_IP4_REASS_NEXT_IP4_LOOKUP,
47   IP6_MAP_POST_IP4_REASS_NEXT_IP4_FRAGMENT,
48   IP6_MAP_POST_IP4_REASS_NEXT_DROP,
49   IP6_MAP_POST_IP4_REASS_N_NEXT,
50 };
51
52 enum ip6_icmp_relay_next_e
53 {
54   IP6_ICMP_RELAY_NEXT_IP4_LOOKUP,
55   IP6_ICMP_RELAY_NEXT_DROP,
56   IP6_ICMP_RELAY_N_NEXT,
57 };
58
59 vlib_node_registration_t ip6_map_post_ip4_reass_node;
60 vlib_node_registration_t ip6_map_ip6_reass_node;
61 static vlib_node_registration_t ip6_map_icmp_relay_node;
62
63 typedef struct
64 {
65   u32 map_domain_index;
66   u16 port;
67   u8 cached;
68 } map_ip6_map_ip4_reass_trace_t;
69
70 u8 *
71 format_ip6_map_post_ip4_reass_trace (u8 * s, va_list * args)
72 {
73   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
74   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
75   map_ip6_map_ip4_reass_trace_t *t =
76     va_arg (*args, map_ip6_map_ip4_reass_trace_t *);
77   return format (s, "MAP domain index: %d L4 port: %u Status: %s",
78                  t->map_domain_index, clib_net_to_host_u16 (t->port),
79                  t->cached ? "cached" : "forwarded");
80 }
81
82 typedef struct
83 {
84   u16 offset;
85   u16 frag_len;
86   u8 out;
87 } map_ip6_map_ip6_reass_trace_t;
88
89 u8 *
90 format_ip6_map_ip6_reass_trace (u8 * s, va_list * args)
91 {
92   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
93   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
94   map_ip6_map_ip6_reass_trace_t *t =
95     va_arg (*args, map_ip6_map_ip6_reass_trace_t *);
96   return format (s, "Offset: %d Fragment length: %d Status: %s", t->offset,
97                  t->frag_len, t->out ? "out" : "in");
98 }
99
100 /*
101  * ip6_map_sec_check
102  */
103 static_always_inline bool
104 ip6_map_sec_check (map_domain_t * d, u16 port, ip4_header_t * ip4,
105                    ip6_header_t * ip6)
106 {
107   u16 sp4 = clib_net_to_host_u16 (port);
108   u32 sa4 = clib_net_to_host_u32 (ip4->src_address.as_u32);
109   u64 sal6 = map_get_pfx (d, sa4, sp4);
110   u64 sar6 = map_get_sfx (d, sa4, sp4);
111
112   if (PREDICT_FALSE
113       (sal6 != clib_net_to_host_u64 (ip6->src_address.as_u64[0])
114        || sar6 != clib_net_to_host_u64 (ip6->src_address.as_u64[1])))
115     return (false);
116   return (true);
117 }
118
119 static_always_inline void
120 ip6_map_security_check (map_domain_t * d, vlib_buffer_t * b0,
121                         ip4_header_t * ip4, ip6_header_t * ip6, u32 * next,
122                         u8 * error)
123 {
124   map_main_t *mm = &map_main;
125   if (d->ea_bits_len || d->rules)
126     {
127       if (d->psid_length > 0)
128         {
129           if (!ip4_is_fragment (ip4))
130             {
131               u16 port = ip4_get_port (ip4, 1);
132               if (port)
133                 {
134                   if (mm->sec_check)
135                     *error =
136                       ip6_map_sec_check (d, port, ip4,
137                                          ip6) ? MAP_ERROR_NONE :
138                       MAP_ERROR_DECAP_SEC_CHECK;
139                 }
140               else
141                 {
142                   *error = MAP_ERROR_BAD_PROTOCOL;
143                 }
144             }
145           else
146             {
147               if (mm->sec_check_frag)
148                 {
149                   vnet_buffer (b0)->ip.reass.next_index =
150                     map_main.ip4_sv_reass_custom_next_index;
151                   *next = IP6_MAP_NEXT_IP4_REASS;
152                 }
153             }
154         }
155     }
156 }
157
158 static_always_inline bool
159 ip6_map_ip4_lookup_bypass (vlib_buffer_t * p0, ip4_header_t * ip)
160 {
161 #ifdef MAP_SKIP_IP6_LOOKUP
162   if (FIB_NODE_INDEX_INVALID != pre_resolved[FIB_PROTOCOL_IP4].fei)
163     {
164       vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
165         pre_resolved[FIB_PROTOCOL_IP4].dpo.dpoi_index;
166       return (true);
167     }
168 #endif
169   return (false);
170 }
171
172 /*
173  * ip6_map
174  */
175 static uword
176 ip6_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
177 {
178   u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
179   vlib_node_runtime_t *error_node =
180     vlib_node_get_runtime (vm, ip6_map_node.index);
181   map_main_t *mm = &map_main;
182   vlib_combined_counter_main_t *cm = mm->domain_counters;
183   u32 thread_index = vm->thread_index;
184
185   from = vlib_frame_vector_args (frame);
186   n_left_from = frame->n_vectors;
187   next_index = node->cached_next_index;
188   while (n_left_from > 0)
189     {
190       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
191
192       /* Dual loop */
193       while (n_left_from >= 4 && n_left_to_next >= 2)
194         {
195           u32 pi0, pi1;
196           vlib_buffer_t *p0, *p1;
197           u8 error0 = MAP_ERROR_NONE;
198           u8 error1 = MAP_ERROR_NONE;
199           map_domain_t *d0 = 0, *d1 = 0;
200           ip4_header_t *ip40, *ip41;
201           ip6_header_t *ip60, *ip61;
202           u16 port0 = 0, port1 = 0;
203           u32 map_domain_index0 = ~0, map_domain_index1 = ~0;
204           u32 next0 = IP6_MAP_NEXT_IP4_LOOKUP;
205           u32 next1 = IP6_MAP_NEXT_IP4_LOOKUP;
206
207           /* Prefetch next iteration. */
208           {
209             vlib_buffer_t *p2, *p3;
210
211             p2 = vlib_get_buffer (vm, from[2]);
212             p3 = vlib_get_buffer (vm, from[3]);
213
214             vlib_prefetch_buffer_header (p2, LOAD);
215             vlib_prefetch_buffer_header (p3, LOAD);
216
217             /* IPv6 + IPv4 header + 8 bytes of ULP */
218             CLIB_PREFETCH (p2->data, 68, LOAD);
219             CLIB_PREFETCH (p3->data, 68, LOAD);
220           }
221
222           pi0 = to_next[0] = from[0];
223           pi1 = to_next[1] = from[1];
224           from += 2;
225           n_left_from -= 2;
226           to_next += 2;
227           n_left_to_next -= 2;
228
229           p0 = vlib_get_buffer (vm, pi0);
230           p1 = vlib_get_buffer (vm, pi1);
231           ip60 = vlib_buffer_get_current (p0);
232           ip61 = vlib_buffer_get_current (p1);
233           vlib_buffer_advance (p0, sizeof (ip6_header_t));
234           vlib_buffer_advance (p1, sizeof (ip6_header_t));
235           ip40 = vlib_buffer_get_current (p0);
236           ip41 = vlib_buffer_get_current (p1);
237
238           /*
239            * Encapsulated IPv4 packet
240            *   - IPv4 fragmented -> Pass to virtual reassembly unless security check disabled
241            *   - Lookup/Rewrite or Fragment node in case of packet > MTU
242            * Fragmented IPv6 packet
243            * ICMP IPv6 packet
244            *   - Error -> Pass to ICMPv6/ICMPv4 relay
245            *   - Info -> Pass to IPv6 local
246            * Anything else -> drop
247            */
248           if (PREDICT_TRUE
249               (ip60->protocol == IP_PROTOCOL_IP_IN_IP
250                && clib_net_to_host_u16 (ip60->payload_length) > 20))
251             {
252               d0 =
253                 ip4_map_get_domain ((ip4_address_t *) & ip40->
254                                     src_address.as_u32, &map_domain_index0,
255                                     &error0);
256             }
257           else if (ip60->protocol == IP_PROTOCOL_ICMP6 &&
258                    clib_net_to_host_u16 (ip60->payload_length) >
259                    sizeof (icmp46_header_t))
260             {
261               icmp46_header_t *icmp = (void *) (ip60 + 1);
262               next0 = (icmp->type == ICMP6_echo_request
263                        || icmp->type ==
264                        ICMP6_echo_reply) ? IP6_MAP_NEXT_IP6_LOCAL :
265                 IP6_MAP_NEXT_IP6_ICMP_RELAY;
266             }
267           else if (ip60->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION)
268             {
269               error0 = MAP_ERROR_FRAGMENTED;
270             }
271           else
272             {
273               error0 = MAP_ERROR_BAD_PROTOCOL;
274             }
275           if (PREDICT_TRUE
276               (ip61->protocol == IP_PROTOCOL_IP_IN_IP
277                && clib_net_to_host_u16 (ip61->payload_length) > 20))
278             {
279               d1 =
280                 ip4_map_get_domain ((ip4_address_t *) & ip41->
281                                     src_address.as_u32, &map_domain_index1,
282                                     &error1);
283             }
284           else if (ip61->protocol == IP_PROTOCOL_ICMP6 &&
285                    clib_net_to_host_u16 (ip61->payload_length) >
286                    sizeof (icmp46_header_t))
287             {
288               icmp46_header_t *icmp = (void *) (ip61 + 1);
289               next1 = (icmp->type == ICMP6_echo_request
290                        || icmp->type ==
291                        ICMP6_echo_reply) ? IP6_MAP_NEXT_IP6_LOCAL :
292                 IP6_MAP_NEXT_IP6_ICMP_RELAY;
293             }
294           else if (ip61->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION)
295             {
296               error1 = MAP_ERROR_FRAGMENTED;
297             }
298           else
299             {
300               error1 = MAP_ERROR_BAD_PROTOCOL;
301             }
302
303           if (d0)
304             {
305               /* MAP inbound security check */
306               ip6_map_security_check (d0, p0, ip40, ip60, &next0, &error0);
307
308               if (PREDICT_TRUE (error0 == MAP_ERROR_NONE &&
309                                 next0 == IP6_MAP_NEXT_IP4_LOOKUP))
310                 {
311                   if (PREDICT_FALSE
312                       (d0->mtu
313                        && (clib_host_to_net_u16 (ip40->length) > d0->mtu)))
314                     {
315                       vnet_buffer (p0)->ip_frag.flags = 0;
316                       vnet_buffer (p0)->ip_frag.next_index =
317                         IP4_FRAG_NEXT_IP4_LOOKUP;
318                       vnet_buffer (p0)->ip_frag.mtu = d0->mtu;
319                       next0 = IP6_MAP_NEXT_IP4_FRAGMENT;
320                     }
321                   else
322                     {
323                       next0 =
324                         ip6_map_ip4_lookup_bypass (p0,
325                                                    ip40) ?
326                         IP6_MAP_NEXT_IP4_REWRITE : next0;
327                     }
328                   vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
329                                                    thread_index,
330                                                    map_domain_index0, 1,
331                                                    clib_net_to_host_u16
332                                                    (ip40->length));
333                 }
334             }
335           if (d1)
336             {
337               /* MAP inbound security check */
338               ip6_map_security_check (d1, p1, ip41, ip61, &next1, &error1);
339
340               if (PREDICT_TRUE (error1 == MAP_ERROR_NONE &&
341                                 next1 == IP6_MAP_NEXT_IP4_LOOKUP))
342                 {
343                   if (PREDICT_FALSE
344                       (d1->mtu
345                        && (clib_host_to_net_u16 (ip41->length) > d1->mtu)))
346                     {
347                       vnet_buffer (p1)->ip_frag.flags = 0;
348                       vnet_buffer (p1)->ip_frag.next_index =
349                         IP4_FRAG_NEXT_IP4_LOOKUP;
350                       vnet_buffer (p1)->ip_frag.mtu = d1->mtu;
351                       next1 = IP6_MAP_NEXT_IP4_FRAGMENT;
352                     }
353                   else
354                     {
355                       next1 =
356                         ip6_map_ip4_lookup_bypass (p1,
357                                                    ip41) ?
358                         IP6_MAP_NEXT_IP4_REWRITE : next1;
359                     }
360                   vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
361                                                    thread_index,
362                                                    map_domain_index1, 1,
363                                                    clib_net_to_host_u16
364                                                    (ip41->length));
365                 }
366             }
367
368           if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
369             {
370               map_add_trace (vm, node, p0, map_domain_index0, port0);
371             }
372
373           if (PREDICT_FALSE (p1->flags & VLIB_BUFFER_IS_TRACED))
374             {
375               map_add_trace (vm, node, p1, map_domain_index1, port1);
376             }
377
378           if (error0 == MAP_ERROR_DECAP_SEC_CHECK && mm->icmp6_enabled)
379             {
380               /* Set ICMP parameters */
381               vlib_buffer_advance (p0, -sizeof (ip6_header_t));
382               icmp6_error_set_vnet_buffer (p0, ICMP6_destination_unreachable,
383                                            ICMP6_destination_unreachable_source_address_failed_policy,
384                                            0);
385               next0 = IP6_MAP_NEXT_ICMP;
386             }
387           else
388             {
389               next0 = (error0 == MAP_ERROR_NONE) ? next0 : IP6_MAP_NEXT_DROP;
390             }
391
392           if (error1 == MAP_ERROR_DECAP_SEC_CHECK && mm->icmp6_enabled)
393             {
394               /* Set ICMP parameters */
395               vlib_buffer_advance (p1, -sizeof (ip6_header_t));
396               icmp6_error_set_vnet_buffer (p1, ICMP6_destination_unreachable,
397                                            ICMP6_destination_unreachable_source_address_failed_policy,
398                                            0);
399               next1 = IP6_MAP_NEXT_ICMP;
400             }
401           else
402             {
403               next1 = (error1 == MAP_ERROR_NONE) ? next1 : IP6_MAP_NEXT_DROP;
404             }
405
406           /* Reset packet */
407           if (next0 == IP6_MAP_NEXT_IP6_LOCAL)
408             vlib_buffer_advance (p0, -sizeof (ip6_header_t));
409           if (next1 == IP6_MAP_NEXT_IP6_LOCAL)
410             vlib_buffer_advance (p1, -sizeof (ip6_header_t));
411
412           p0->error = error_node->errors[error0];
413           p1->error = error_node->errors[error1];
414           vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
415                                            n_left_to_next, pi0, pi1, next0,
416                                            next1);
417         }
418
419       /* Single loop */
420       while (n_left_from > 0 && n_left_to_next > 0)
421         {
422           u32 pi0;
423           vlib_buffer_t *p0;
424           u8 error0 = MAP_ERROR_NONE;
425           map_domain_t *d0 = 0;
426           ip4_header_t *ip40;
427           ip6_header_t *ip60;
428           i32 port0 = 0;
429           u32 map_domain_index0 = ~0;
430           u32 next0 = IP6_MAP_NEXT_IP4_LOOKUP;
431
432           pi0 = to_next[0] = from[0];
433           from += 1;
434           n_left_from -= 1;
435           to_next += 1;
436           n_left_to_next -= 1;
437
438           p0 = vlib_get_buffer (vm, pi0);
439           ip60 = vlib_buffer_get_current (p0);
440           vlib_buffer_advance (p0, sizeof (ip6_header_t));
441           ip40 = vlib_buffer_get_current (p0);
442
443           /*
444            * Encapsulated IPv4 packet
445            *   - IPv4 fragmented -> Pass to virtual reassembly unless security check disabled
446            *   - Lookup/Rewrite or Fragment node in case of packet > MTU
447            * Fragmented IPv6 packet
448            * ICMP IPv6 packet
449            *   - Error -> Pass to ICMPv6/ICMPv4 relay
450            *   - Info -> Pass to IPv6 local
451            * Anything else -> drop
452            */
453           if (PREDICT_TRUE
454               (ip60->protocol == IP_PROTOCOL_IP_IN_IP
455                && clib_net_to_host_u16 (ip60->payload_length) > 20))
456             {
457               d0 =
458                 ip4_map_get_domain ((ip4_address_t *) & ip40->
459                                     src_address.as_u32, &map_domain_index0,
460                                     &error0);
461             }
462           else if (ip60->protocol == IP_PROTOCOL_ICMP6 &&
463                    clib_net_to_host_u16 (ip60->payload_length) >
464                    sizeof (icmp46_header_t))
465             {
466               icmp46_header_t *icmp = (void *) (ip60 + 1);
467               next0 = (icmp->type == ICMP6_echo_request
468                        || icmp->type ==
469                        ICMP6_echo_reply) ? IP6_MAP_NEXT_IP6_LOCAL :
470                 IP6_MAP_NEXT_IP6_ICMP_RELAY;
471             }
472           else if (ip60->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION &&
473                    (((ip6_frag_hdr_t *) (ip60 + 1))->next_hdr ==
474                     IP_PROTOCOL_IP_IN_IP))
475             {
476               error0 = MAP_ERROR_FRAGMENTED;
477             }
478           else
479             {
480               /* XXX: Move get_domain to ip6_get_domain lookup on source */
481               //error0 = MAP_ERROR_BAD_PROTOCOL;
482               vlib_buffer_advance (p0, -sizeof (ip6_header_t));
483               vnet_feature_next (&next0, p0);
484             }
485
486           if (d0)
487             {
488               /* MAP inbound security check */
489               ip6_map_security_check (d0, p0, ip40, ip60, &next0, &error0);
490
491               if (PREDICT_TRUE (error0 == MAP_ERROR_NONE &&
492                                 next0 == IP6_MAP_NEXT_IP4_LOOKUP))
493                 {
494                   if (PREDICT_FALSE
495                       (d0->mtu
496                        && (clib_host_to_net_u16 (ip40->length) > d0->mtu)))
497                     {
498                       vnet_buffer (p0)->ip_frag.flags = 0;
499                       vnet_buffer (p0)->ip_frag.next_index =
500                         IP4_FRAG_NEXT_IP4_LOOKUP;
501                       vnet_buffer (p0)->ip_frag.mtu = d0->mtu;
502                       next0 = IP6_MAP_NEXT_IP4_FRAGMENT;
503                     }
504                   else
505                     {
506                       next0 =
507                         ip6_map_ip4_lookup_bypass (p0,
508                                                    ip40) ?
509                         IP6_MAP_NEXT_IP4_REWRITE : next0;
510                     }
511                   vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
512                                                    thread_index,
513                                                    map_domain_index0, 1,
514                                                    clib_net_to_host_u16
515                                                    (ip40->length));
516                 }
517             }
518
519           if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
520             {
521               map_add_trace (vm, node, p0, map_domain_index0, port0);
522             }
523
524           if (mm->icmp6_enabled &&
525               (error0 == MAP_ERROR_DECAP_SEC_CHECK
526                || error0 == MAP_ERROR_NO_DOMAIN))
527             {
528               /* Set ICMP parameters */
529               vlib_buffer_advance (p0, -sizeof (ip6_header_t));
530               icmp6_error_set_vnet_buffer (p0, ICMP6_destination_unreachable,
531                                            ICMP6_destination_unreachable_source_address_failed_policy,
532                                            0);
533               next0 = IP6_MAP_NEXT_ICMP;
534             }
535           else
536             {
537               next0 = (error0 == MAP_ERROR_NONE) ? next0 : IP6_MAP_NEXT_DROP;
538             }
539
540           /* Reset packet */
541           if (next0 == IP6_MAP_NEXT_IP6_LOCAL)
542             vlib_buffer_advance (p0, -sizeof (ip6_header_t));
543
544           p0->error = error_node->errors[error0];
545           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
546                                            n_left_to_next, pi0, next0);
547         }
548       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
549     }
550
551   return frame->n_vectors;
552 }
553
554
555 void
556 map_ip6_drop_pi (u32 pi)
557 {
558   vlib_main_t *vm = vlib_get_main ();
559   vlib_node_runtime_t *n =
560     vlib_node_get_runtime (vm, ip6_map_ip6_reass_node.index);
561   vlib_set_next_frame_buffer (vm, n, IP6_MAP_IP6_REASS_NEXT_DROP, pi);
562 }
563
564 /*
565  * ip6_map_post_ip4_reass
566  */
567 static uword
568 ip6_map_post_ip4_reass (vlib_main_t * vm,
569                         vlib_node_runtime_t * node, vlib_frame_t * frame)
570 {
571   u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
572   vlib_node_runtime_t *error_node =
573     vlib_node_get_runtime (vm, ip6_map_post_ip4_reass_node.index);
574   map_main_t *mm = &map_main;
575   vlib_combined_counter_main_t *cm = mm->domain_counters;
576   u32 thread_index = vm->thread_index;
577
578   from = vlib_frame_vector_args (frame);
579   n_left_from = frame->n_vectors;
580   next_index = node->cached_next_index;
581   while (n_left_from > 0)
582     {
583       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
584
585       /* Single loop */
586       while (n_left_from > 0 && n_left_to_next > 0)
587         {
588           u32 pi0;
589           vlib_buffer_t *p0;
590           u8 error0 = MAP_ERROR_NONE;
591           map_domain_t *d0;
592           ip4_header_t *ip40;
593           ip6_header_t *ip60;
594           i32 port0 = 0;
595           u32 map_domain_index0 = ~0;
596           u32 next0 = IP6_MAP_POST_IP4_REASS_NEXT_IP4_LOOKUP;
597
598           pi0 = to_next[0] = from[0];
599           from += 1;
600           n_left_from -= 1;
601           to_next += 1;
602           n_left_to_next -= 1;
603
604           p0 = vlib_get_buffer (vm, pi0);
605           ip40 = vlib_buffer_get_current (p0);
606           ip60 = ((ip6_header_t *) ip40) - 1;
607
608           d0 =
609             ip4_map_get_domain ((ip4_address_t *) & ip40->src_address.as_u32,
610                                 &map_domain_index0, &error0);
611
612           port0 = vnet_buffer (p0)->ip.reass.l4_src_port;
613
614           if (PREDICT_TRUE (error0 == MAP_ERROR_NONE))
615             error0 =
616               ip6_map_sec_check (d0, port0, ip40,
617                                  ip60) ? MAP_ERROR_NONE :
618               MAP_ERROR_DECAP_SEC_CHECK;
619
620           if (PREDICT_FALSE
621               (d0->mtu && (clib_host_to_net_u16 (ip40->length) > d0->mtu)
622                && error0 == MAP_ERROR_NONE))
623             {
624               vnet_buffer (p0)->ip_frag.flags = 0;
625               vnet_buffer (p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP;
626               vnet_buffer (p0)->ip_frag.mtu = d0->mtu;
627               next0 = IP6_MAP_POST_IP4_REASS_NEXT_IP4_FRAGMENT;
628             }
629
630           if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
631             {
632               map_ip6_map_ip4_reass_trace_t *tr =
633                 vlib_add_trace (vm, node, p0, sizeof (*tr));
634               tr->map_domain_index = map_domain_index0;
635               tr->port = port0;
636             }
637
638           if (error0 == MAP_ERROR_NONE)
639             vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
640                                              thread_index,
641                                              map_domain_index0, 1,
642                                              clib_net_to_host_u16
643                                              (ip40->length));
644           next0 =
645             (error0 ==
646              MAP_ERROR_NONE) ? next0 : IP6_MAP_POST_IP4_REASS_NEXT_DROP;
647           p0->error = error_node->errors[error0];
648           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
649                                            n_left_to_next, pi0, next0);
650
651         }
652       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
653     }
654   return frame->n_vectors;
655 }
656
657 /*
658  * ip6_icmp_relay
659  */
660 static uword
661 ip6_map_icmp_relay (vlib_main_t * vm,
662                     vlib_node_runtime_t * node, vlib_frame_t * frame)
663 {
664   u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
665   vlib_node_runtime_t *error_node =
666     vlib_node_get_runtime (vm, ip6_map_icmp_relay_node.index);
667   map_main_t *mm = &map_main;
668   u32 thread_index = vm->thread_index;
669   u16 *fragment_ids, *fid;
670
671   from = vlib_frame_vector_args (frame);
672   n_left_from = frame->n_vectors;
673   next_index = node->cached_next_index;
674
675   /* Get random fragment IDs for replies. */
676   fid = fragment_ids =
677     clib_random_buffer_get_data (&vm->random_buffer,
678                                  n_left_from * sizeof (fragment_ids[0]));
679
680   while (n_left_from > 0)
681     {
682       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
683
684       /* Single loop */
685       while (n_left_from > 0 && n_left_to_next > 0)
686         {
687           u32 pi0;
688           vlib_buffer_t *p0;
689           u8 error0 = MAP_ERROR_NONE;
690           ip6_header_t *ip60;
691           u32 next0 = IP6_ICMP_RELAY_NEXT_IP4_LOOKUP;
692           u32 mtu;
693
694           pi0 = to_next[0] = from[0];
695           from += 1;
696           n_left_from -= 1;
697           to_next += 1;
698           n_left_to_next -= 1;
699
700           p0 = vlib_get_buffer (vm, pi0);
701           ip60 = vlib_buffer_get_current (p0);
702           u16 tlen = clib_net_to_host_u16 (ip60->payload_length);
703
704           /*
705            * In:
706            *  IPv6 header           (40)
707            *  ICMPv6 header          (8)
708            *  IPv6 header           (40)
709            *  Original IPv4 header / packet
710            * Out:
711            *  New IPv4 header
712            *  New ICMP header
713            *  Original IPv4 header / packet
714            */
715
716           /* Need at least ICMP(8) + IPv6(40) + IPv4(20) + L4 header(8) */
717           if (tlen < 76)
718             {
719               error0 = MAP_ERROR_ICMP_RELAY;
720               goto error;
721             }
722
723           icmp46_header_t *icmp60 = (icmp46_header_t *) (ip60 + 1);
724           ip6_header_t *inner_ip60 = (ip6_header_t *) (icmp60 + 2);
725
726           if (inner_ip60->protocol != IP_PROTOCOL_IP_IN_IP)
727             {
728               error0 = MAP_ERROR_ICMP_RELAY;
729               goto error;
730             }
731
732           ip4_header_t *inner_ip40 = (ip4_header_t *) (inner_ip60 + 1);
733           vlib_buffer_advance (p0, 60); /* sizeof ( IPv6 + ICMP + IPv6 - IPv4 - ICMP ) */
734           ip4_header_t *new_ip40 = vlib_buffer_get_current (p0);
735           icmp46_header_t *new_icmp40 = (icmp46_header_t *) (new_ip40 + 1);
736
737           /*
738            * Relay according to RFC2473, section 8.3
739            */
740           switch (icmp60->type)
741             {
742             case ICMP6_destination_unreachable:
743             case ICMP6_time_exceeded:
744             case ICMP6_parameter_problem:
745               /* Type 3 - destination unreachable, Code 1 - host unreachable */
746               new_icmp40->type = ICMP4_destination_unreachable;
747               new_icmp40->code =
748                 ICMP4_destination_unreachable_destination_unreachable_host;
749               break;
750
751             case ICMP6_packet_too_big:
752               /* Type 3 - destination unreachable, Code 4 - packet too big */
753               /* Potential TODO: Adjust domain tunnel MTU based on the value received here */
754               mtu = clib_net_to_host_u32 (*((u32 *) (icmp60 + 1)));
755
756               /* Check DF flag */
757               if (!
758                   (inner_ip40->flags_and_fragment_offset &
759                    clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT)))
760                 {
761                   error0 = MAP_ERROR_ICMP_RELAY;
762                   goto error;
763                 }
764
765               new_icmp40->type = ICMP4_destination_unreachable;
766               new_icmp40->code =
767                 ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set;
768               *((u32 *) (new_icmp40 + 1)) =
769                 clib_host_to_net_u32 (mtu < 1280 ? 1280 : mtu);
770               break;
771
772             default:
773               error0 = MAP_ERROR_ICMP_RELAY;
774               break;
775             }
776
777           /*
778            * Ensure the total ICMP packet is no longer than 576 bytes (RFC1812)
779            */
780           new_ip40->ip_version_and_header_length = 0x45;
781           new_ip40->tos = 0;
782           u16 nlen = (tlen - 20) > 576 ? 576 : tlen - 20;
783           new_ip40->length = clib_host_to_net_u16 (nlen);
784           new_ip40->fragment_id = fid[0];
785           fid++;
786           new_ip40->ttl = 64;
787           new_ip40->protocol = IP_PROTOCOL_ICMP;
788           new_ip40->src_address = mm->icmp4_src_address;
789           new_ip40->dst_address = inner_ip40->src_address;
790           new_ip40->checksum = ip4_header_checksum (new_ip40);
791
792           new_icmp40->checksum = 0;
793           ip_csum_t sum = ip_incremental_checksum (0, new_icmp40, nlen - 20);
794           new_icmp40->checksum = ~ip_csum_fold (sum);
795
796           vlib_increment_simple_counter (&mm->icmp_relayed, thread_index, 0,
797                                          1);
798
799         error:
800           if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
801             {
802               map_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr));
803               tr->map_domain_index = 0;
804               tr->port = 0;
805             }
806
807           next0 =
808             (error0 == MAP_ERROR_NONE) ? next0 : IP6_ICMP_RELAY_NEXT_DROP;
809           p0->error = error_node->errors[error0];
810           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
811                                            n_left_to_next, pi0, next0);
812         }
813       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
814     }
815
816   return frame->n_vectors;
817
818 }
819
820 static char *map_error_strings[] = {
821 #define _(sym,string) string,
822   foreach_map_error
823 #undef _
824 };
825
826 /* *INDENT-OFF* */
827 VNET_FEATURE_INIT (ip6_map_feature, static) =
828 {
829   .arc_name = "ip6-unicast",
830   .node_name = "ip6-map",
831   .runs_before = VNET_FEATURES ("ip6-flow-classify"),
832   .runs_after = VNET_FEATURES ("ip6-full-reassembly-feature"),
833 };
834
835 VLIB_REGISTER_NODE(ip6_map_node) = {
836   .function = ip6_map,
837   .name = "ip6-map",
838   .vector_size = sizeof(u32),
839   .format_trace = format_map_trace,
840   .type = VLIB_NODE_TYPE_INTERNAL,
841
842   .n_errors = MAP_N_ERROR,
843   .error_strings = map_error_strings,
844
845   .n_next_nodes = IP6_MAP_N_NEXT,
846   .next_nodes = {
847     [IP6_MAP_NEXT_IP4_LOOKUP] = "ip4-lookup",
848 #ifdef MAP_SKIP_IP6_LOOKUP
849     [IP6_MAP_NEXT_IP4_REWRITE] = "ip4-load-balance",
850 #endif
851     [IP6_MAP_NEXT_IP4_REASS] = "ip4-sv-reassembly-custom-next",
852     [IP6_MAP_NEXT_IP4_FRAGMENT] = "ip4-frag",
853     [IP6_MAP_NEXT_IP6_ICMP_RELAY] = "ip6-map-icmp-relay",
854     [IP6_MAP_NEXT_IP6_LOCAL] = "ip6-local",
855     [IP6_MAP_NEXT_DROP] = "error-drop",
856     [IP6_MAP_NEXT_ICMP] = "ip6-icmp-error",
857   },
858 };
859 /* *INDENT-ON* */
860
861 /* *INDENT-OFF* */
862 VLIB_REGISTER_NODE(ip6_map_post_ip4_reass_node) = {
863   .function = ip6_map_post_ip4_reass,
864   .name = "ip6-map-post-ip4-reass",
865   .vector_size = sizeof(u32),
866   .format_trace = format_ip6_map_post_ip4_reass_trace,
867   .type = VLIB_NODE_TYPE_INTERNAL,
868   .n_errors = MAP_N_ERROR,
869   .error_strings = map_error_strings,
870   .n_next_nodes = IP6_MAP_POST_IP4_REASS_N_NEXT,
871   .next_nodes = {
872     [IP6_MAP_POST_IP4_REASS_NEXT_IP4_LOOKUP] = "ip4-lookup",
873     [IP6_MAP_POST_IP4_REASS_NEXT_IP4_FRAGMENT] = "ip4-frag",
874     [IP6_MAP_POST_IP4_REASS_NEXT_DROP] = "error-drop",
875   },
876 };
877 /* *INDENT-ON* */
878
879 /* *INDENT-OFF* */
880 VLIB_REGISTER_NODE(ip6_map_icmp_relay_node, static) = {
881   .function = ip6_map_icmp_relay,
882   .name = "ip6-map-icmp-relay",
883   .vector_size = sizeof(u32),
884   .format_trace = format_map_trace, //FIXME
885   .type = VLIB_NODE_TYPE_INTERNAL,
886   .n_errors = MAP_N_ERROR,
887   .error_strings = map_error_strings,
888   .n_next_nodes = IP6_ICMP_RELAY_N_NEXT,
889   .next_nodes = {
890     [IP6_ICMP_RELAY_NEXT_IP4_LOOKUP] = "ip4-lookup",
891     [IP6_ICMP_RELAY_NEXT_DROP] = "error-drop",
892   },
893 };
894 /* *INDENT-ON* */
895
896 clib_error_t *
897 ip6_map_init (vlib_main_t * vm)
898 {
899   map_main.ip4_sv_reass_custom_next_index =
900     ip4_sv_reass_custom_register_next_node
901     (ip6_map_post_ip4_reass_node.index);
902   return 0;
903 }
904
905 VLIB_INIT_FUNCTION (ip6_map_init) =
906 {
907 .runs_after = VLIB_INITS ("map_init"),};
908
909 /*
910  * fd.io coding-style-patch-verification: ON
911  *
912  * Local Variables:
913  * eval: (c-set-style "gnu")
914  * End:
915  */