stats: counters data model
[vpp.git] / src / plugins / map / ip6_map.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 #include "map.h"
16
17 #include <vnet/ip/ip_frag.h>
18 #include <vnet/ip/ip4_to_ip6.h>
19 #include <vnet/ip/ip6_to_ip4.h>
20 #include <vnet/ip/reass/ip4_sv_reass.h>
21
22 enum ip6_map_next_e
23 {
24   IP6_MAP_NEXT_IP4_LOOKUP,
25 #ifdef MAP_SKIP_IP6_LOOKUP
26   IP6_MAP_NEXT_IP4_REWRITE,
27 #endif
28   IP6_MAP_NEXT_IP4_REASS,
29   IP6_MAP_NEXT_IP4_FRAGMENT,
30   IP6_MAP_NEXT_IP6_ICMP_RELAY,
31   IP6_MAP_NEXT_IP6_LOCAL,
32   IP6_MAP_NEXT_DROP,
33   IP6_MAP_NEXT_ICMP,
34   IP6_MAP_N_NEXT,
35 };
36
37 enum ip6_map_ip6_reass_next_e
38 {
39   IP6_MAP_IP6_REASS_NEXT_IP6_MAP,
40   IP6_MAP_IP6_REASS_NEXT_DROP,
41   IP6_MAP_IP6_REASS_N_NEXT,
42 };
43
44 enum ip6_map_post_ip4_reass_next_e
45 {
46   IP6_MAP_POST_IP4_REASS_NEXT_IP4_LOOKUP,
47   IP6_MAP_POST_IP4_REASS_NEXT_IP4_FRAGMENT,
48   IP6_MAP_POST_IP4_REASS_NEXT_DROP,
49   IP6_MAP_POST_IP4_REASS_N_NEXT,
50 };
51
52 enum ip6_icmp_relay_next_e
53 {
54   IP6_ICMP_RELAY_NEXT_IP4_LOOKUP,
55   IP6_ICMP_RELAY_NEXT_DROP,
56   IP6_ICMP_RELAY_N_NEXT,
57 };
58
59 vlib_node_registration_t ip6_map_post_ip4_reass_node;
60 vlib_node_registration_t ip6_map_ip6_reass_node;
61 static vlib_node_registration_t ip6_map_icmp_relay_node;
62
63 typedef struct
64 {
65   u32 map_domain_index;
66   u16 port;
67   u8 cached;
68 } map_ip6_map_ip4_reass_trace_t;
69
70 u8 *
71 format_ip6_map_post_ip4_reass_trace (u8 * s, va_list * args)
72 {
73   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
74   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
75   map_ip6_map_ip4_reass_trace_t *t =
76     va_arg (*args, map_ip6_map_ip4_reass_trace_t *);
77   return format (s, "MAP domain index: %d L4 port: %u Status: %s",
78                  t->map_domain_index, clib_net_to_host_u16 (t->port),
79                  t->cached ? "cached" : "forwarded");
80 }
81
82 typedef struct
83 {
84   u16 offset;
85   u16 frag_len;
86   u8 out;
87 } map_ip6_map_ip6_reass_trace_t;
88
89 u8 *
90 format_ip6_map_ip6_reass_trace (u8 * s, va_list * args)
91 {
92   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
93   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
94   map_ip6_map_ip6_reass_trace_t *t =
95     va_arg (*args, map_ip6_map_ip6_reass_trace_t *);
96   return format (s, "Offset: %d Fragment length: %d Status: %s", t->offset,
97                  t->frag_len, t->out ? "out" : "in");
98 }
99
100 /*
101  * ip6_map_sec_check
102  */
103 static_always_inline bool
104 ip6_map_sec_check (map_domain_t * d, u16 port, ip4_header_t * ip4,
105                    ip6_header_t * ip6)
106 {
107   u16 sp4 = clib_net_to_host_u16 (port);
108   u32 sa4 = clib_net_to_host_u32 (ip4->src_address.as_u32);
109   u64 sal6 = map_get_pfx (d, sa4, sp4);
110   u64 sar6 = map_get_sfx (d, sa4, sp4);
111
112   if (PREDICT_FALSE
113       (sal6 != clib_net_to_host_u64 (ip6->src_address.as_u64[0])
114        || sar6 != clib_net_to_host_u64 (ip6->src_address.as_u64[1])))
115     return (false);
116   return (true);
117 }
118
119 static_always_inline void
120 ip6_map_security_check (map_domain_t * d, vlib_buffer_t * b0,
121                         ip4_header_t * ip4, ip6_header_t * ip6, u32 * next,
122                         u8 * error)
123 {
124   map_main_t *mm = &map_main;
125   if (d->ea_bits_len || d->rules)
126     {
127       if (d->psid_length > 0)
128         {
129           if (!ip4_is_fragment (ip4))
130             {
131               u16 port = ip4_get_port (ip4, 1);
132               if (port)
133                 {
134                   if (mm->sec_check)
135                     *error =
136                       ip6_map_sec_check (d, port, ip4,
137                                          ip6) ? MAP_ERROR_NONE :
138                       MAP_ERROR_DECAP_SEC_CHECK;
139                 }
140               else
141                 {
142                   *error = MAP_ERROR_BAD_PROTOCOL;
143                 }
144             }
145           else
146             {
147               if (mm->sec_check_frag)
148                 {
149                   vnet_buffer (b0)->ip.reass.next_index =
150                     map_main.ip4_sv_reass_custom_next_index;
151                   *next = IP6_MAP_NEXT_IP4_REASS;
152                 }
153             }
154         }
155     }
156 }
157
158 /*
159  * ip6_map
160  */
161 static uword
162 ip6_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
163 {
164   u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
165   vlib_node_runtime_t *error_node =
166     vlib_node_get_runtime (vm, ip6_map_node.index);
167   map_main_t *mm = &map_main;
168   vlib_combined_counter_main_t *cm = mm->domain_counters;
169   u32 thread_index = vm->thread_index;
170
171   from = vlib_frame_vector_args (frame);
172   n_left_from = frame->n_vectors;
173   next_index = node->cached_next_index;
174   while (n_left_from > 0)
175     {
176       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
177
178       /* Dual loop */
179       while (n_left_from >= 4 && n_left_to_next >= 2)
180         {
181           u32 pi0, pi1;
182           vlib_buffer_t *p0, *p1;
183           u8 error0 = MAP_ERROR_NONE;
184           u8 error1 = MAP_ERROR_NONE;
185           map_domain_t *d0 = 0, *d1 = 0;
186           ip4_header_t *ip40, *ip41;
187           ip6_header_t *ip60, *ip61;
188           u16 port0 = 0, port1 = 0;
189           u32 map_domain_index0 = ~0, map_domain_index1 = ~0;
190           u32 next0 = IP6_MAP_NEXT_IP4_LOOKUP;
191           u32 next1 = IP6_MAP_NEXT_IP4_LOOKUP;
192
193           /* Prefetch next iteration. */
194           {
195             vlib_buffer_t *p2, *p3;
196
197             p2 = vlib_get_buffer (vm, from[2]);
198             p3 = vlib_get_buffer (vm, from[3]);
199
200             vlib_prefetch_buffer_header (p2, LOAD);
201             vlib_prefetch_buffer_header (p3, LOAD);
202
203             /* IPv6 + IPv4 header + 8 bytes of ULP */
204             CLIB_PREFETCH (p2->data, 68, LOAD);
205             CLIB_PREFETCH (p3->data, 68, LOAD);
206           }
207
208           pi0 = to_next[0] = from[0];
209           pi1 = to_next[1] = from[1];
210           from += 2;
211           n_left_from -= 2;
212           to_next += 2;
213           n_left_to_next -= 2;
214
215           p0 = vlib_get_buffer (vm, pi0);
216           p1 = vlib_get_buffer (vm, pi1);
217           ip60 = vlib_buffer_get_current (p0);
218           ip61 = vlib_buffer_get_current (p1);
219           vlib_buffer_advance (p0, sizeof (ip6_header_t));
220           vlib_buffer_advance (p1, sizeof (ip6_header_t));
221           ip40 = vlib_buffer_get_current (p0);
222           ip41 = vlib_buffer_get_current (p1);
223
224           /*
225            * Encapsulated IPv4 packet
226            *   - IPv4 fragmented -> Pass to virtual reassembly unless security check disabled
227            *   - Lookup/Rewrite or Fragment node in case of packet > MTU
228            * Fragmented IPv6 packet
229            * ICMP IPv6 packet
230            *   - Error -> Pass to ICMPv6/ICMPv4 relay
231            *   - Info -> Pass to IPv6 local
232            * Anything else -> drop
233            */
234           if (PREDICT_TRUE
235               (ip60->protocol == IP_PROTOCOL_IP_IN_IP
236                && clib_net_to_host_u16 (ip60->payload_length) > 20))
237             {
238               d0 =
239                 ip4_map_get_domain ((ip4_address_t *) & ip40->
240                                     src_address.as_u32, &map_domain_index0,
241                                     &error0);
242             }
243           else if (ip60->protocol == IP_PROTOCOL_ICMP6 &&
244                    clib_net_to_host_u16 (ip60->payload_length) >
245                    sizeof (icmp46_header_t))
246             {
247               icmp46_header_t *icmp = (void *) (ip60 + 1);
248               next0 = (icmp->type == ICMP6_echo_request
249                        || icmp->type ==
250                        ICMP6_echo_reply) ? IP6_MAP_NEXT_IP6_LOCAL :
251                 IP6_MAP_NEXT_IP6_ICMP_RELAY;
252             }
253           else if (ip60->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION)
254             {
255               error0 = MAP_ERROR_FRAGMENTED;
256             }
257           else
258             {
259               error0 = MAP_ERROR_BAD_PROTOCOL;
260             }
261           if (PREDICT_TRUE
262               (ip61->protocol == IP_PROTOCOL_IP_IN_IP
263                && clib_net_to_host_u16 (ip61->payload_length) > 20))
264             {
265               d1 =
266                 ip4_map_get_domain ((ip4_address_t *) & ip41->
267                                     src_address.as_u32, &map_domain_index1,
268                                     &error1);
269             }
270           else if (ip61->protocol == IP_PROTOCOL_ICMP6 &&
271                    clib_net_to_host_u16 (ip61->payload_length) >
272                    sizeof (icmp46_header_t))
273             {
274               icmp46_header_t *icmp = (void *) (ip61 + 1);
275               next1 = (icmp->type == ICMP6_echo_request
276                        || icmp->type ==
277                        ICMP6_echo_reply) ? IP6_MAP_NEXT_IP6_LOCAL :
278                 IP6_MAP_NEXT_IP6_ICMP_RELAY;
279             }
280           else if (ip61->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION)
281             {
282               error1 = MAP_ERROR_FRAGMENTED;
283             }
284           else
285             {
286               error1 = MAP_ERROR_BAD_PROTOCOL;
287             }
288
289           if (d0)
290             {
291               /* MAP inbound security check */
292               ip6_map_security_check (d0, p0, ip40, ip60, &next0, &error0);
293
294               if (PREDICT_TRUE (error0 == MAP_ERROR_NONE &&
295                                 next0 == IP6_MAP_NEXT_IP4_LOOKUP))
296                 {
297                   if (PREDICT_FALSE
298                       (d0->mtu
299                        && (clib_host_to_net_u16 (ip40->length) > d0->mtu)))
300                     {
301                       vnet_buffer (p0)->ip_frag.flags = 0;
302                       vnet_buffer (p0)->ip_frag.next_index =
303                         IP_FRAG_NEXT_IP4_LOOKUP;
304                       vnet_buffer (p0)->ip_frag.mtu = d0->mtu;
305                       next0 = IP6_MAP_NEXT_IP4_FRAGMENT;
306                     }
307                   else
308                     {
309                       next0 =
310                         ip6_map_ip4_lookup_bypass (p0,
311                                                    ip40) ?
312                         IP6_MAP_NEXT_IP4_REWRITE : next0;
313                     }
314                   vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
315                                                    thread_index,
316                                                    map_domain_index0, 1,
317                                                    clib_net_to_host_u16
318                                                    (ip40->length));
319                 }
320             }
321           if (d1)
322             {
323               /* MAP inbound security check */
324               ip6_map_security_check (d1, p1, ip41, ip61, &next1, &error1);
325
326               if (PREDICT_TRUE (error1 == MAP_ERROR_NONE &&
327                                 next1 == IP6_MAP_NEXT_IP4_LOOKUP))
328                 {
329                   if (PREDICT_FALSE
330                       (d1->mtu
331                        && (clib_host_to_net_u16 (ip41->length) > d1->mtu)))
332                     {
333                       vnet_buffer (p1)->ip_frag.flags = 0;
334                       vnet_buffer (p1)->ip_frag.next_index =
335                         IP_FRAG_NEXT_IP4_LOOKUP;
336                       vnet_buffer (p1)->ip_frag.mtu = d1->mtu;
337                       next1 = IP6_MAP_NEXT_IP4_FRAGMENT;
338                     }
339                   else
340                     {
341                       next1 =
342                         ip6_map_ip4_lookup_bypass (p1,
343                                                    ip41) ?
344                         IP6_MAP_NEXT_IP4_REWRITE : next1;
345                     }
346                   vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
347                                                    thread_index,
348                                                    map_domain_index1, 1,
349                                                    clib_net_to_host_u16
350                                                    (ip41->length));
351                 }
352             }
353
354           if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
355             {
356               map_add_trace (vm, node, p0, map_domain_index0, port0);
357             }
358
359           if (PREDICT_FALSE (p1->flags & VLIB_BUFFER_IS_TRACED))
360             {
361               map_add_trace (vm, node, p1, map_domain_index1, port1);
362             }
363
364           if (error0 == MAP_ERROR_DECAP_SEC_CHECK && mm->icmp6_enabled)
365             {
366               /* Set ICMP parameters */
367               vlib_buffer_advance (p0, -sizeof (ip6_header_t));
368               icmp6_error_set_vnet_buffer (p0, ICMP6_destination_unreachable,
369                                            ICMP6_destination_unreachable_source_address_failed_policy,
370                                            0);
371               next0 = IP6_MAP_NEXT_ICMP;
372             }
373           else
374             {
375               next0 = (error0 == MAP_ERROR_NONE) ? next0 : IP6_MAP_NEXT_DROP;
376             }
377
378           if (error1 == MAP_ERROR_DECAP_SEC_CHECK && mm->icmp6_enabled)
379             {
380               /* Set ICMP parameters */
381               vlib_buffer_advance (p1, -sizeof (ip6_header_t));
382               icmp6_error_set_vnet_buffer (p1, ICMP6_destination_unreachable,
383                                            ICMP6_destination_unreachable_source_address_failed_policy,
384                                            0);
385               next1 = IP6_MAP_NEXT_ICMP;
386             }
387           else
388             {
389               next1 = (error1 == MAP_ERROR_NONE) ? next1 : IP6_MAP_NEXT_DROP;
390             }
391
392           /* Reset packet */
393           if (next0 == IP6_MAP_NEXT_IP6_LOCAL)
394             vlib_buffer_advance (p0, -sizeof (ip6_header_t));
395           if (next1 == IP6_MAP_NEXT_IP6_LOCAL)
396             vlib_buffer_advance (p1, -sizeof (ip6_header_t));
397
398           p0->error = error_node->errors[error0];
399           p1->error = error_node->errors[error1];
400           vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
401                                            n_left_to_next, pi0, pi1, next0,
402                                            next1);
403         }
404
405       /* Single loop */
406       while (n_left_from > 0 && n_left_to_next > 0)
407         {
408           u32 pi0;
409           vlib_buffer_t *p0;
410           u8 error0 = MAP_ERROR_NONE;
411           map_domain_t *d0 = 0;
412           ip4_header_t *ip40;
413           ip6_header_t *ip60;
414           i32 port0 = 0;
415           u32 map_domain_index0 = ~0;
416           u32 next0 = IP6_MAP_NEXT_IP4_LOOKUP;
417
418           pi0 = to_next[0] = from[0];
419           from += 1;
420           n_left_from -= 1;
421           to_next += 1;
422           n_left_to_next -= 1;
423
424           p0 = vlib_get_buffer (vm, pi0);
425           ip60 = vlib_buffer_get_current (p0);
426           vlib_buffer_advance (p0, sizeof (ip6_header_t));
427           ip40 = vlib_buffer_get_current (p0);
428
429           /*
430            * Encapsulated IPv4 packet
431            *   - IPv4 fragmented -> Pass to virtual reassembly unless security check disabled
432            *   - Lookup/Rewrite or Fragment node in case of packet > MTU
433            * Fragmented IPv6 packet
434            * ICMP IPv6 packet
435            *   - Error -> Pass to ICMPv6/ICMPv4 relay
436            *   - Info -> Pass to IPv6 local
437            * Anything else -> drop
438            */
439           if (PREDICT_TRUE
440               (ip60->protocol == IP_PROTOCOL_IP_IN_IP
441                && clib_net_to_host_u16 (ip60->payload_length) > 20))
442             {
443               d0 =
444                 ip4_map_get_domain ((ip4_address_t *) & ip40->
445                                     src_address.as_u32, &map_domain_index0,
446                                     &error0);
447             }
448           else if (ip60->protocol == IP_PROTOCOL_ICMP6 &&
449                    clib_net_to_host_u16 (ip60->payload_length) >
450                    sizeof (icmp46_header_t))
451             {
452               icmp46_header_t *icmp = (void *) (ip60 + 1);
453               next0 = (icmp->type == ICMP6_echo_request
454                        || icmp->type ==
455                        ICMP6_echo_reply) ? IP6_MAP_NEXT_IP6_LOCAL :
456                 IP6_MAP_NEXT_IP6_ICMP_RELAY;
457             }
458           else if (ip60->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION &&
459                    (((ip6_frag_hdr_t *) (ip60 + 1))->next_hdr ==
460                     IP_PROTOCOL_IP_IN_IP))
461             {
462               error0 = MAP_ERROR_FRAGMENTED;
463             }
464           else
465             {
466               /* XXX: Move get_domain to ip6_get_domain lookup on source */
467               //error0 = MAP_ERROR_BAD_PROTOCOL;
468               vlib_buffer_advance (p0, -sizeof (ip6_header_t));
469               vnet_feature_next (&next0, p0);
470             }
471
472           if (d0)
473             {
474               /* MAP inbound security check */
475               ip6_map_security_check (d0, p0, ip40, ip60, &next0, &error0);
476
477               if (PREDICT_TRUE (error0 == MAP_ERROR_NONE &&
478                                 next0 == IP6_MAP_NEXT_IP4_LOOKUP))
479                 {
480                   if (PREDICT_FALSE
481                       (d0->mtu
482                        && (clib_host_to_net_u16 (ip40->length) > d0->mtu)))
483                     {
484                       vnet_buffer (p0)->ip_frag.flags = 0;
485                       vnet_buffer (p0)->ip_frag.next_index =
486                         IP_FRAG_NEXT_IP4_LOOKUP;
487                       vnet_buffer (p0)->ip_frag.mtu = d0->mtu;
488                       next0 = IP6_MAP_NEXT_IP4_FRAGMENT;
489                     }
490                   else
491                     {
492                       next0 =
493                         ip6_map_ip4_lookup_bypass (p0,
494                                                    ip40) ?
495                         IP6_MAP_NEXT_IP4_REWRITE : next0;
496                     }
497                   vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
498                                                    thread_index,
499                                                    map_domain_index0, 1,
500                                                    clib_net_to_host_u16
501                                                    (ip40->length));
502                 }
503             }
504
505           if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
506             {
507               map_add_trace (vm, node, p0, map_domain_index0, port0);
508             }
509
510           if (mm->icmp6_enabled &&
511               (error0 == MAP_ERROR_DECAP_SEC_CHECK
512                || error0 == MAP_ERROR_NO_DOMAIN))
513             {
514               /* Set ICMP parameters */
515               vlib_buffer_advance (p0, -sizeof (ip6_header_t));
516               icmp6_error_set_vnet_buffer (p0, ICMP6_destination_unreachable,
517                                            ICMP6_destination_unreachable_source_address_failed_policy,
518                                            0);
519               next0 = IP6_MAP_NEXT_ICMP;
520             }
521           else
522             {
523               next0 = (error0 == MAP_ERROR_NONE) ? next0 : IP6_MAP_NEXT_DROP;
524             }
525
526           /* Reset packet */
527           if (next0 == IP6_MAP_NEXT_IP6_LOCAL)
528             vlib_buffer_advance (p0, -sizeof (ip6_header_t));
529
530           p0->error = error_node->errors[error0];
531           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
532                                            n_left_to_next, pi0, next0);
533         }
534       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
535     }
536
537   return frame->n_vectors;
538 }
539
540
541 void
542 map_ip6_drop_pi (u32 pi)
543 {
544   vlib_main_t *vm = vlib_get_main ();
545   vlib_node_runtime_t *n =
546     vlib_node_get_runtime (vm, ip6_map_ip6_reass_node.index);
547   vlib_set_next_frame_buffer (vm, n, IP6_MAP_IP6_REASS_NEXT_DROP, pi);
548 }
549
550 /*
551  * ip6_map_post_ip4_reass
552  */
553 static uword
554 ip6_map_post_ip4_reass (vlib_main_t * vm,
555                         vlib_node_runtime_t * node, vlib_frame_t * frame)
556 {
557   u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
558   vlib_node_runtime_t *error_node =
559     vlib_node_get_runtime (vm, ip6_map_post_ip4_reass_node.index);
560   map_main_t *mm = &map_main;
561   vlib_combined_counter_main_t *cm = mm->domain_counters;
562   u32 thread_index = vm->thread_index;
563
564   from = vlib_frame_vector_args (frame);
565   n_left_from = frame->n_vectors;
566   next_index = node->cached_next_index;
567   while (n_left_from > 0)
568     {
569       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
570
571       /* Single loop */
572       while (n_left_from > 0 && n_left_to_next > 0)
573         {
574           u32 pi0;
575           vlib_buffer_t *p0;
576           u8 error0 = MAP_ERROR_NONE;
577           map_domain_t *d0;
578           ip4_header_t *ip40;
579           ip6_header_t *ip60;
580           i32 port0 = 0;
581           u32 map_domain_index0 = ~0;
582           u32 next0 = IP6_MAP_POST_IP4_REASS_NEXT_IP4_LOOKUP;
583
584           pi0 = to_next[0] = from[0];
585           from += 1;
586           n_left_from -= 1;
587           to_next += 1;
588           n_left_to_next -= 1;
589
590           p0 = vlib_get_buffer (vm, pi0);
591           ip40 = vlib_buffer_get_current (p0);
592           ip60 = ((ip6_header_t *) ip40) - 1;
593
594           d0 =
595             ip4_map_get_domain ((ip4_address_t *) & ip40->src_address.as_u32,
596                                 &map_domain_index0, &error0);
597
598           port0 = vnet_buffer (p0)->ip.reass.l4_src_port;
599
600           if (PREDICT_TRUE (error0 == MAP_ERROR_NONE))
601             error0 =
602               ip6_map_sec_check (d0, port0, ip40,
603                                  ip60) ? MAP_ERROR_NONE :
604               MAP_ERROR_DECAP_SEC_CHECK;
605
606           if (PREDICT_FALSE
607               (error0 == MAP_ERROR_NONE &&
608                d0->mtu && (clib_host_to_net_u16 (ip40->length) > d0->mtu)))
609             {
610               vnet_buffer (p0)->ip_frag.flags = 0;
611               vnet_buffer (p0)->ip_frag.next_index = IP_FRAG_NEXT_IP4_LOOKUP;
612               vnet_buffer (p0)->ip_frag.mtu = d0->mtu;
613               next0 = IP6_MAP_POST_IP4_REASS_NEXT_IP4_FRAGMENT;
614             }
615
616           if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
617             {
618               map_ip6_map_ip4_reass_trace_t *tr =
619                 vlib_add_trace (vm, node, p0, sizeof (*tr));
620               tr->map_domain_index = map_domain_index0;
621               tr->port = port0;
622             }
623
624           if (error0 == MAP_ERROR_NONE)
625             vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
626                                              thread_index,
627                                              map_domain_index0, 1,
628                                              clib_net_to_host_u16
629                                              (ip40->length));
630           next0 =
631             (error0 ==
632              MAP_ERROR_NONE) ? next0 : IP6_MAP_POST_IP4_REASS_NEXT_DROP;
633           p0->error = error_node->errors[error0];
634           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
635                                            n_left_to_next, pi0, next0);
636
637         }
638       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
639     }
640   return frame->n_vectors;
641 }
642
643 /*
644  * ip6_icmp_relay
645  */
646 static uword
647 ip6_map_icmp_relay (vlib_main_t * vm,
648                     vlib_node_runtime_t * node, vlib_frame_t * frame)
649 {
650   u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
651   vlib_node_runtime_t *error_node =
652     vlib_node_get_runtime (vm, ip6_map_icmp_relay_node.index);
653   map_main_t *mm = &map_main;
654   u32 thread_index = vm->thread_index;
655   u16 *fragment_ids, *fid;
656
657   from = vlib_frame_vector_args (frame);
658   n_left_from = frame->n_vectors;
659   next_index = node->cached_next_index;
660
661   /* Get random fragment IDs for replies. */
662   fid = fragment_ids =
663     clib_random_buffer_get_data (&vm->random_buffer,
664                                  n_left_from * sizeof (fragment_ids[0]));
665
666   while (n_left_from > 0)
667     {
668       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
669
670       /* Single loop */
671       while (n_left_from > 0 && n_left_to_next > 0)
672         {
673           u32 pi0;
674           vlib_buffer_t *p0;
675           u8 error0 = MAP_ERROR_NONE;
676           ip6_header_t *ip60;
677           u32 next0 = IP6_ICMP_RELAY_NEXT_IP4_LOOKUP;
678           u32 mtu;
679
680           pi0 = to_next[0] = from[0];
681           from += 1;
682           n_left_from -= 1;
683           to_next += 1;
684           n_left_to_next -= 1;
685
686           p0 = vlib_get_buffer (vm, pi0);
687           ip60 = vlib_buffer_get_current (p0);
688           u16 tlen = clib_net_to_host_u16 (ip60->payload_length);
689
690           /*
691            * In:
692            *  IPv6 header           (40)
693            *  ICMPv6 header          (8)
694            *  IPv6 header           (40)
695            *  Original IPv4 header / packet
696            * Out:
697            *  New IPv4 header
698            *  New ICMP header
699            *  Original IPv4 header / packet
700            */
701
702           /* Need at least ICMP(8) + IPv6(40) + IPv4(20) + L4 header(8) */
703           if (tlen < 76)
704             {
705               error0 = MAP_ERROR_ICMP_RELAY;
706               goto error;
707             }
708
709           icmp46_header_t *icmp60 = (icmp46_header_t *) (ip60 + 1);
710           ip6_header_t *inner_ip60 = (ip6_header_t *) (icmp60 + 2);
711
712           if (inner_ip60->protocol != IP_PROTOCOL_IP_IN_IP)
713             {
714               error0 = MAP_ERROR_ICMP_RELAY;
715               goto error;
716             }
717
718           ip4_header_t *inner_ip40 = (ip4_header_t *) (inner_ip60 + 1);
719           vlib_buffer_advance (p0, 60); /* sizeof ( IPv6 + ICMP + IPv6 - IPv4 - ICMP ) */
720           ip4_header_t *new_ip40 = vlib_buffer_get_current (p0);
721           icmp46_header_t *new_icmp40 = (icmp46_header_t *) (new_ip40 + 1);
722
723           /*
724            * Relay according to RFC2473, section 8.3
725            */
726           switch (icmp60->type)
727             {
728             case ICMP6_destination_unreachable:
729             case ICMP6_time_exceeded:
730             case ICMP6_parameter_problem:
731               /* Type 3 - destination unreachable, Code 1 - host unreachable */
732               new_icmp40->type = ICMP4_destination_unreachable;
733               new_icmp40->code =
734                 ICMP4_destination_unreachable_destination_unreachable_host;
735               break;
736
737             case ICMP6_packet_too_big:
738               /* Type 3 - destination unreachable, Code 4 - packet too big */
739               /* Potential TODO: Adjust domain tunnel MTU based on the value received here */
740               mtu = clib_net_to_host_u32 (*((u32 *) (icmp60 + 1)));
741
742               /* Check DF flag */
743               if (!
744                   (inner_ip40->flags_and_fragment_offset &
745                    clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT)))
746                 {
747                   error0 = MAP_ERROR_ICMP_RELAY;
748                   goto error;
749                 }
750
751               new_icmp40->type = ICMP4_destination_unreachable;
752               new_icmp40->code =
753                 ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set;
754               *((u32 *) (new_icmp40 + 1)) =
755                 clib_host_to_net_u32 (mtu < 1280 ? 1280 : mtu);
756               break;
757
758             default:
759               error0 = MAP_ERROR_ICMP_RELAY;
760               break;
761             }
762
763           /*
764            * Ensure the total ICMP packet is no longer than 576 bytes (RFC1812)
765            */
766           new_ip40->ip_version_and_header_length = 0x45;
767           new_ip40->tos = 0;
768           u16 nlen = (tlen - 20) > 576 ? 576 : tlen - 20;
769           new_ip40->length = clib_host_to_net_u16 (nlen);
770           new_ip40->fragment_id = fid[0];
771           fid++;
772           new_ip40->ttl = 64;
773           new_ip40->protocol = IP_PROTOCOL_ICMP;
774           new_ip40->src_address = mm->icmp4_src_address;
775           new_ip40->dst_address = inner_ip40->src_address;
776           new_ip40->checksum = ip4_header_checksum (new_ip40);
777
778           new_icmp40->checksum = 0;
779           ip_csum_t sum = ip_incremental_checksum (0, new_icmp40, nlen - 20);
780           new_icmp40->checksum = ~ip_csum_fold (sum);
781
782           vlib_increment_simple_counter (&mm->icmp_relayed, thread_index, 0,
783                                          1);
784
785         error:
786           if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
787             {
788               map_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr));
789               tr->map_domain_index = 0;
790               tr->port = 0;
791             }
792
793           next0 =
794             (error0 == MAP_ERROR_NONE) ? next0 : IP6_ICMP_RELAY_NEXT_DROP;
795           p0->error = error_node->errors[error0];
796           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
797                                            n_left_to_next, pi0, next0);
798         }
799       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
800     }
801
802   return frame->n_vectors;
803
804 }
805
806 /* *INDENT-OFF* */
807 VNET_FEATURE_INIT (ip6_map_feature, static) =
808 {
809   .arc_name = "ip6-unicast",
810   .node_name = "ip6-map",
811   .runs_before = VNET_FEATURES ("ip6-flow-classify"),
812   .runs_after = VNET_FEATURES ("ip6-full-reassembly-feature"),
813 };
814
815 VLIB_REGISTER_NODE(ip6_map_node) = {
816   .function = ip6_map,
817   .name = "ip6-map",
818   .vector_size = sizeof(u32),
819   .format_trace = format_map_trace,
820   .type = VLIB_NODE_TYPE_INTERNAL,
821
822   .n_errors = MAP_N_ERROR,
823   .error_counters = map_error_counters,
824
825   .n_next_nodes = IP6_MAP_N_NEXT,
826   .next_nodes = {
827     [IP6_MAP_NEXT_IP4_LOOKUP] = "ip4-lookup",
828 #ifdef MAP_SKIP_IP6_LOOKUP
829     [IP6_MAP_NEXT_IP4_REWRITE] = "ip4-load-balance",
830 #endif
831     [IP6_MAP_NEXT_IP4_REASS] = "ip4-sv-reassembly-custom-next",
832     [IP6_MAP_NEXT_IP4_FRAGMENT] = "ip4-frag",
833     [IP6_MAP_NEXT_IP6_ICMP_RELAY] = "ip6-map-icmp-relay",
834     [IP6_MAP_NEXT_IP6_LOCAL] = "ip6-local",
835     [IP6_MAP_NEXT_DROP] = "error-drop",
836     [IP6_MAP_NEXT_ICMP] = "ip6-icmp-error",
837   },
838 };
839 /* *INDENT-ON* */
840
841 /* *INDENT-OFF* */
842 VLIB_REGISTER_NODE(ip6_map_post_ip4_reass_node) = {
843   .function = ip6_map_post_ip4_reass,
844   .name = "ip6-map-post-ip4-reass",
845   .vector_size = sizeof(u32),
846   .format_trace = format_ip6_map_post_ip4_reass_trace,
847   .type = VLIB_NODE_TYPE_INTERNAL,
848   .n_errors = MAP_N_ERROR,
849   .error_counters = map_error_counters,
850   .n_next_nodes = IP6_MAP_POST_IP4_REASS_N_NEXT,
851   .next_nodes = {
852     [IP6_MAP_POST_IP4_REASS_NEXT_IP4_LOOKUP] = "ip4-lookup",
853     [IP6_MAP_POST_IP4_REASS_NEXT_IP4_FRAGMENT] = "ip4-frag",
854     [IP6_MAP_POST_IP4_REASS_NEXT_DROP] = "error-drop",
855   },
856 };
857 /* *INDENT-ON* */
858
859 /* *INDENT-OFF* */
860 VLIB_REGISTER_NODE(ip6_map_icmp_relay_node, static) = {
861   .function = ip6_map_icmp_relay,
862   .name = "ip6-map-icmp-relay",
863   .vector_size = sizeof(u32),
864   .format_trace = format_map_trace, //FIXME
865   .type = VLIB_NODE_TYPE_INTERNAL,
866   .n_errors = MAP_N_ERROR,
867   .error_counters = map_error_counters,
868   .n_next_nodes = IP6_ICMP_RELAY_N_NEXT,
869   .next_nodes = {
870     [IP6_ICMP_RELAY_NEXT_IP4_LOOKUP] = "ip4-lookup",
871     [IP6_ICMP_RELAY_NEXT_DROP] = "error-drop",
872   },
873 };
874 /* *INDENT-ON* */
875
876 clib_error_t *
877 ip6_map_init (vlib_main_t * vm)
878 {
879   map_main.ip4_sv_reass_custom_next_index =
880     ip4_sv_reass_custom_register_next_node
881     (ip6_map_post_ip4_reass_node.index);
882   return 0;
883 }
884
885 VLIB_INIT_FUNCTION (ip6_map_init) =
886 {
887 .runs_after = VLIB_INITS ("map_init"),};
888
889 /*
890  * fd.io coding-style-patch-verification: ON
891  *
892  * Local Variables:
893  * eval: (c-set-style "gnu")
894  * End:
895  */