dhcp: dhcp6_pd_client_cp API cleanup
[vpp.git] / src / plugins / map / ip6_map.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 #include "map.h"
16
17 #include <vnet/ip/ip_frag.h>
18 #include <vnet/ip/ip4_to_ip6.h>
19 #include <vnet/ip/ip6_to_ip4.h>
20 #include <vnet/ip/reass/ip4_sv_reass.h>
21
22 enum ip6_map_next_e
23 {
24   IP6_MAP_NEXT_IP4_LOOKUP,
25 #ifdef MAP_SKIP_IP6_LOOKUP
26   IP6_MAP_NEXT_IP4_REWRITE,
27 #endif
28   IP6_MAP_NEXT_IP6_REASS,
29   IP6_MAP_NEXT_IP4_REASS,
30   IP6_MAP_NEXT_IP4_FRAGMENT,
31   IP6_MAP_NEXT_IP6_ICMP_RELAY,
32   IP6_MAP_NEXT_IP6_LOCAL,
33   IP6_MAP_NEXT_DROP,
34   IP6_MAP_NEXT_ICMP,
35   IP6_MAP_N_NEXT,
36 };
37
38 enum ip6_map_ip6_reass_next_e
39 {
40   IP6_MAP_IP6_REASS_NEXT_IP6_MAP,
41   IP6_MAP_IP6_REASS_NEXT_DROP,
42   IP6_MAP_IP6_REASS_N_NEXT,
43 };
44
45 enum ip6_map_post_ip4_reass_next_e
46 {
47   IP6_MAP_POST_IP4_REASS_NEXT_IP4_LOOKUP,
48   IP6_MAP_POST_IP4_REASS_NEXT_IP4_FRAGMENT,
49   IP6_MAP_POST_IP4_REASS_NEXT_DROP,
50   IP6_MAP_POST_IP4_REASS_N_NEXT,
51 };
52
53 enum ip6_icmp_relay_next_e
54 {
55   IP6_ICMP_RELAY_NEXT_IP4_LOOKUP,
56   IP6_ICMP_RELAY_NEXT_DROP,
57   IP6_ICMP_RELAY_N_NEXT,
58 };
59
60 vlib_node_registration_t ip6_map_post_ip4_reass_node;
61 vlib_node_registration_t ip6_map_ip6_reass_node;
62 static vlib_node_registration_t ip6_map_icmp_relay_node;
63
64 typedef struct
65 {
66   u32 map_domain_index;
67   u16 port;
68   u8 cached;
69 } map_ip6_map_ip4_reass_trace_t;
70
71 u8 *
72 format_ip6_map_post_ip4_reass_trace (u8 * s, va_list * args)
73 {
74   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
75   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
76   map_ip6_map_ip4_reass_trace_t *t =
77     va_arg (*args, map_ip6_map_ip4_reass_trace_t *);
78   return format (s, "MAP domain index: %d L4 port: %u Status: %s",
79                  t->map_domain_index, clib_net_to_host_u16 (t->port),
80                  t->cached ? "cached" : "forwarded");
81 }
82
83 typedef struct
84 {
85   u16 offset;
86   u16 frag_len;
87   u8 out;
88 } map_ip6_map_ip6_reass_trace_t;
89
90 u8 *
91 format_ip6_map_ip6_reass_trace (u8 * s, va_list * args)
92 {
93   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
94   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
95   map_ip6_map_ip6_reass_trace_t *t =
96     va_arg (*args, map_ip6_map_ip6_reass_trace_t *);
97   return format (s, "Offset: %d Fragment length: %d Status: %s", t->offset,
98                  t->frag_len, t->out ? "out" : "in");
99 }
100
101 /*
102  * ip6_map_sec_check
103  */
104 static_always_inline bool
105 ip6_map_sec_check (map_domain_t * d, u16 port, ip4_header_t * ip4,
106                    ip6_header_t * ip6)
107 {
108   u16 sp4 = clib_net_to_host_u16 (port);
109   u32 sa4 = clib_net_to_host_u32 (ip4->src_address.as_u32);
110   u64 sal6 = map_get_pfx (d, sa4, sp4);
111   u64 sar6 = map_get_sfx (d, sa4, sp4);
112
113   if (PREDICT_FALSE
114       (sal6 != clib_net_to_host_u64 (ip6->src_address.as_u64[0])
115        || sar6 != clib_net_to_host_u64 (ip6->src_address.as_u64[1])))
116     return (false);
117   return (true);
118 }
119
120 static_always_inline void
121 ip6_map_security_check (map_domain_t * d, vlib_buffer_t * b0,
122                         ip4_header_t * ip4, ip6_header_t * ip6, u32 * next,
123                         u8 * error)
124 {
125   map_main_t *mm = &map_main;
126   if (d->ea_bits_len || d->rules)
127     {
128       if (d->psid_length > 0)
129         {
130           if (!ip4_is_fragment (ip4))
131             {
132               u16 port = ip4_get_port (ip4, 1);
133               if (port)
134                 {
135                   if (mm->sec_check)
136                     *error =
137                       ip6_map_sec_check (d, port, ip4,
138                                          ip6) ? MAP_ERROR_NONE :
139                       MAP_ERROR_DECAP_SEC_CHECK;
140                 }
141               else
142                 {
143                   *error = MAP_ERROR_BAD_PROTOCOL;
144                 }
145             }
146           else
147             {
148               if (mm->sec_check_frag)
149                 {
150                   vnet_buffer (b0)->ip.reass.next_index =
151                     map_main.ip4_sv_reass_custom_next_index;
152                   *next = IP6_MAP_NEXT_IP4_REASS;
153                 }
154             }
155         }
156     }
157 }
158
159 static_always_inline bool
160 ip6_map_ip4_lookup_bypass (vlib_buffer_t * p0, ip4_header_t * ip)
161 {
162 #ifdef MAP_SKIP_IP6_LOOKUP
163   if (FIB_NODE_INDEX_INVALID != pre_resolved[FIB_PROTOCOL_IP4].fei)
164     {
165       vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
166         pre_resolved[FIB_PROTOCOL_IP4].dpo.dpoi_index;
167       return (true);
168     }
169 #endif
170   return (false);
171 }
172
173 /*
174  * ip6_map
175  */
176 static uword
177 ip6_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
178 {
179   u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
180   vlib_node_runtime_t *error_node =
181     vlib_node_get_runtime (vm, ip6_map_node.index);
182   map_main_t *mm = &map_main;
183   vlib_combined_counter_main_t *cm = mm->domain_counters;
184   u32 thread_index = vm->thread_index;
185
186   from = vlib_frame_vector_args (frame);
187   n_left_from = frame->n_vectors;
188   next_index = node->cached_next_index;
189   while (n_left_from > 0)
190     {
191       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
192
193       /* Dual loop */
194       while (n_left_from >= 4 && n_left_to_next >= 2)
195         {
196           u32 pi0, pi1;
197           vlib_buffer_t *p0, *p1;
198           u8 error0 = MAP_ERROR_NONE;
199           u8 error1 = MAP_ERROR_NONE;
200           map_domain_t *d0 = 0, *d1 = 0;
201           ip4_header_t *ip40, *ip41;
202           ip6_header_t *ip60, *ip61;
203           u16 port0 = 0, port1 = 0;
204           u32 map_domain_index0 = ~0, map_domain_index1 = ~0;
205           u32 next0 = IP6_MAP_NEXT_IP4_LOOKUP;
206           u32 next1 = IP6_MAP_NEXT_IP4_LOOKUP;
207
208           /* Prefetch next iteration. */
209           {
210             vlib_buffer_t *p2, *p3;
211
212             p2 = vlib_get_buffer (vm, from[2]);
213             p3 = vlib_get_buffer (vm, from[3]);
214
215             vlib_prefetch_buffer_header (p2, LOAD);
216             vlib_prefetch_buffer_header (p3, LOAD);
217
218             /* IPv6 + IPv4 header + 8 bytes of ULP */
219             CLIB_PREFETCH (p2->data, 68, LOAD);
220             CLIB_PREFETCH (p3->data, 68, LOAD);
221           }
222
223           pi0 = to_next[0] = from[0];
224           pi1 = to_next[1] = from[1];
225           from += 2;
226           n_left_from -= 2;
227           to_next += 2;
228           n_left_to_next -= 2;
229
230           p0 = vlib_get_buffer (vm, pi0);
231           p1 = vlib_get_buffer (vm, pi1);
232           ip60 = vlib_buffer_get_current (p0);
233           ip61 = vlib_buffer_get_current (p1);
234           vlib_buffer_advance (p0, sizeof (ip6_header_t));
235           vlib_buffer_advance (p1, sizeof (ip6_header_t));
236           ip40 = vlib_buffer_get_current (p0);
237           ip41 = vlib_buffer_get_current (p1);
238
239           /*
240            * Encapsulated IPv4 packet
241            *   - IPv4 fragmented -> Pass to virtual reassembly unless security check disabled
242            *   - Lookup/Rewrite or Fragment node in case of packet > MTU
243            * Fragmented IPv6 packet
244            * ICMP IPv6 packet
245            *   - Error -> Pass to ICMPv6/ICMPv4 relay
246            *   - Info -> Pass to IPv6 local
247            * Anything else -> drop
248            */
249           if (PREDICT_TRUE
250               (ip60->protocol == IP_PROTOCOL_IP_IN_IP
251                && clib_net_to_host_u16 (ip60->payload_length) > 20))
252             {
253               d0 =
254                 ip4_map_get_domain ((ip4_address_t *) & ip40->
255                                     src_address.as_u32, &map_domain_index0,
256                                     &error0);
257             }
258           else if (ip60->protocol == IP_PROTOCOL_ICMP6 &&
259                    clib_net_to_host_u16 (ip60->payload_length) >
260                    sizeof (icmp46_header_t))
261             {
262               icmp46_header_t *icmp = (void *) (ip60 + 1);
263               next0 = (icmp->type == ICMP6_echo_request
264                        || icmp->type ==
265                        ICMP6_echo_reply) ? IP6_MAP_NEXT_IP6_LOCAL :
266                 IP6_MAP_NEXT_IP6_ICMP_RELAY;
267             }
268           else if (ip60->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION)
269             {
270               next0 = IP6_MAP_NEXT_IP6_REASS;
271             }
272           else
273             {
274               error0 = MAP_ERROR_BAD_PROTOCOL;
275             }
276           if (PREDICT_TRUE
277               (ip61->protocol == IP_PROTOCOL_IP_IN_IP
278                && clib_net_to_host_u16 (ip61->payload_length) > 20))
279             {
280               d1 =
281                 ip4_map_get_domain ((ip4_address_t *) & ip41->
282                                     src_address.as_u32, &map_domain_index1,
283                                     &error1);
284             }
285           else if (ip61->protocol == IP_PROTOCOL_ICMP6 &&
286                    clib_net_to_host_u16 (ip61->payload_length) >
287                    sizeof (icmp46_header_t))
288             {
289               icmp46_header_t *icmp = (void *) (ip61 + 1);
290               next1 = (icmp->type == ICMP6_echo_request
291                        || icmp->type ==
292                        ICMP6_echo_reply) ? IP6_MAP_NEXT_IP6_LOCAL :
293                 IP6_MAP_NEXT_IP6_ICMP_RELAY;
294             }
295           else if (ip61->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION)
296             {
297               next1 = IP6_MAP_NEXT_IP6_REASS;
298             }
299           else
300             {
301               error1 = MAP_ERROR_BAD_PROTOCOL;
302             }
303
304           if (d0)
305             {
306               /* MAP inbound security check */
307               ip6_map_security_check (d0, p0, ip40, ip60, &next0, &error0);
308
309               if (PREDICT_TRUE (error0 == MAP_ERROR_NONE &&
310                                 next0 == IP6_MAP_NEXT_IP4_LOOKUP))
311                 {
312                   if (PREDICT_FALSE
313                       (d0->mtu
314                        && (clib_host_to_net_u16 (ip40->length) > d0->mtu)))
315                     {
316                       vnet_buffer (p0)->ip_frag.flags = 0;
317                       vnet_buffer (p0)->ip_frag.next_index =
318                         IP4_FRAG_NEXT_IP4_LOOKUP;
319                       vnet_buffer (p0)->ip_frag.mtu = d0->mtu;
320                       next0 = IP6_MAP_NEXT_IP4_FRAGMENT;
321                     }
322                   else
323                     {
324                       next0 =
325                         ip6_map_ip4_lookup_bypass (p0,
326                                                    ip40) ?
327                         IP6_MAP_NEXT_IP4_REWRITE : next0;
328                     }
329                   vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
330                                                    thread_index,
331                                                    map_domain_index0, 1,
332                                                    clib_net_to_host_u16
333                                                    (ip40->length));
334                 }
335             }
336           if (d1)
337             {
338               /* MAP inbound security check */
339               ip6_map_security_check (d1, p1, ip41, ip61, &next1, &error1);
340
341               if (PREDICT_TRUE (error1 == MAP_ERROR_NONE &&
342                                 next1 == IP6_MAP_NEXT_IP4_LOOKUP))
343                 {
344                   if (PREDICT_FALSE
345                       (d1->mtu
346                        && (clib_host_to_net_u16 (ip41->length) > d1->mtu)))
347                     {
348                       vnet_buffer (p1)->ip_frag.flags = 0;
349                       vnet_buffer (p1)->ip_frag.next_index =
350                         IP4_FRAG_NEXT_IP4_LOOKUP;
351                       vnet_buffer (p1)->ip_frag.mtu = d1->mtu;
352                       next1 = IP6_MAP_NEXT_IP4_FRAGMENT;
353                     }
354                   else
355                     {
356                       next1 =
357                         ip6_map_ip4_lookup_bypass (p1,
358                                                    ip41) ?
359                         IP6_MAP_NEXT_IP4_REWRITE : next1;
360                     }
361                   vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
362                                                    thread_index,
363                                                    map_domain_index1, 1,
364                                                    clib_net_to_host_u16
365                                                    (ip41->length));
366                 }
367             }
368
369           if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
370             {
371               map_add_trace (vm, node, p0, map_domain_index0, port0);
372             }
373
374           if (PREDICT_FALSE (p1->flags & VLIB_BUFFER_IS_TRACED))
375             {
376               map_add_trace (vm, node, p1, map_domain_index1, port1);
377             }
378
379           if (error0 == MAP_ERROR_DECAP_SEC_CHECK && mm->icmp6_enabled)
380             {
381               /* Set ICMP parameters */
382               vlib_buffer_advance (p0, -sizeof (ip6_header_t));
383               icmp6_error_set_vnet_buffer (p0, ICMP6_destination_unreachable,
384                                            ICMP6_destination_unreachable_source_address_failed_policy,
385                                            0);
386               next0 = IP6_MAP_NEXT_ICMP;
387             }
388           else
389             {
390               next0 = (error0 == MAP_ERROR_NONE) ? next0 : IP6_MAP_NEXT_DROP;
391             }
392
393           if (error1 == MAP_ERROR_DECAP_SEC_CHECK && mm->icmp6_enabled)
394             {
395               /* Set ICMP parameters */
396               vlib_buffer_advance (p1, -sizeof (ip6_header_t));
397               icmp6_error_set_vnet_buffer (p1, ICMP6_destination_unreachable,
398                                            ICMP6_destination_unreachable_source_address_failed_policy,
399                                            0);
400               next1 = IP6_MAP_NEXT_ICMP;
401             }
402           else
403             {
404               next1 = (error1 == MAP_ERROR_NONE) ? next1 : IP6_MAP_NEXT_DROP;
405             }
406
407           /* Reset packet */
408           if (next0 == IP6_MAP_NEXT_IP6_LOCAL)
409             vlib_buffer_advance (p0, -sizeof (ip6_header_t));
410           if (next1 == IP6_MAP_NEXT_IP6_LOCAL)
411             vlib_buffer_advance (p1, -sizeof (ip6_header_t));
412
413           p0->error = error_node->errors[error0];
414           p1->error = error_node->errors[error1];
415           vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
416                                            n_left_to_next, pi0, pi1, next0,
417                                            next1);
418         }
419
420       /* Single loop */
421       while (n_left_from > 0 && n_left_to_next > 0)
422         {
423           u32 pi0;
424           vlib_buffer_t *p0;
425           u8 error0 = MAP_ERROR_NONE;
426           map_domain_t *d0 = 0;
427           ip4_header_t *ip40;
428           ip6_header_t *ip60;
429           i32 port0 = 0;
430           u32 map_domain_index0 = ~0;
431           u32 next0 = IP6_MAP_NEXT_IP4_LOOKUP;
432
433           pi0 = to_next[0] = from[0];
434           from += 1;
435           n_left_from -= 1;
436           to_next += 1;
437           n_left_to_next -= 1;
438
439           p0 = vlib_get_buffer (vm, pi0);
440           ip60 = vlib_buffer_get_current (p0);
441           vlib_buffer_advance (p0, sizeof (ip6_header_t));
442           ip40 = vlib_buffer_get_current (p0);
443
444           /*
445            * Encapsulated IPv4 packet
446            *   - IPv4 fragmented -> Pass to virtual reassembly unless security check disabled
447            *   - Lookup/Rewrite or Fragment node in case of packet > MTU
448            * Fragmented IPv6 packet
449            * ICMP IPv6 packet
450            *   - Error -> Pass to ICMPv6/ICMPv4 relay
451            *   - Info -> Pass to IPv6 local
452            * Anything else -> drop
453            */
454           if (PREDICT_TRUE
455               (ip60->protocol == IP_PROTOCOL_IP_IN_IP
456                && clib_net_to_host_u16 (ip60->payload_length) > 20))
457             {
458               d0 =
459                 ip4_map_get_domain ((ip4_address_t *) & ip40->
460                                     src_address.as_u32, &map_domain_index0,
461                                     &error0);
462             }
463           else if (ip60->protocol == IP_PROTOCOL_ICMP6 &&
464                    clib_net_to_host_u16 (ip60->payload_length) >
465                    sizeof (icmp46_header_t))
466             {
467               icmp46_header_t *icmp = (void *) (ip60 + 1);
468               next0 = (icmp->type == ICMP6_echo_request
469                        || icmp->type ==
470                        ICMP6_echo_reply) ? IP6_MAP_NEXT_IP6_LOCAL :
471                 IP6_MAP_NEXT_IP6_ICMP_RELAY;
472             }
473           else if (ip60->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION &&
474                    (((ip6_frag_hdr_t *) (ip60 + 1))->next_hdr ==
475                     IP_PROTOCOL_IP_IN_IP))
476             {
477               next0 = IP6_MAP_NEXT_IP6_REASS;
478             }
479           else
480             {
481               /* XXX: Move get_domain to ip6_get_domain lookup on source */
482               //error0 = MAP_ERROR_BAD_PROTOCOL;
483               vlib_buffer_advance (p0, -sizeof (ip6_header_t));
484               vnet_feature_next (&next0, p0);
485             }
486
487           if (d0)
488             {
489               /* MAP inbound security check */
490               ip6_map_security_check (d0, p0, ip40, ip60, &next0, &error0);
491
492               if (PREDICT_TRUE (error0 == MAP_ERROR_NONE &&
493                                 next0 == IP6_MAP_NEXT_IP4_LOOKUP))
494                 {
495                   if (PREDICT_FALSE
496                       (d0->mtu
497                        && (clib_host_to_net_u16 (ip40->length) > d0->mtu)))
498                     {
499                       vnet_buffer (p0)->ip_frag.flags = 0;
500                       vnet_buffer (p0)->ip_frag.next_index =
501                         IP4_FRAG_NEXT_IP4_LOOKUP;
502                       vnet_buffer (p0)->ip_frag.mtu = d0->mtu;
503                       next0 = IP6_MAP_NEXT_IP4_FRAGMENT;
504                     }
505                   else
506                     {
507                       next0 =
508                         ip6_map_ip4_lookup_bypass (p0,
509                                                    ip40) ?
510                         IP6_MAP_NEXT_IP4_REWRITE : next0;
511                     }
512                   vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
513                                                    thread_index,
514                                                    map_domain_index0, 1,
515                                                    clib_net_to_host_u16
516                                                    (ip40->length));
517                 }
518             }
519
520           if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
521             {
522               map_add_trace (vm, node, p0, map_domain_index0, port0);
523             }
524
525           if (mm->icmp6_enabled &&
526               (error0 == MAP_ERROR_DECAP_SEC_CHECK
527                || error0 == MAP_ERROR_NO_DOMAIN))
528             {
529               /* Set ICMP parameters */
530               vlib_buffer_advance (p0, -sizeof (ip6_header_t));
531               icmp6_error_set_vnet_buffer (p0, ICMP6_destination_unreachable,
532                                            ICMP6_destination_unreachable_source_address_failed_policy,
533                                            0);
534               next0 = IP6_MAP_NEXT_ICMP;
535             }
536           else
537             {
538               next0 = (error0 == MAP_ERROR_NONE) ? next0 : IP6_MAP_NEXT_DROP;
539             }
540
541           /* Reset packet */
542           if (next0 == IP6_MAP_NEXT_IP6_LOCAL)
543             vlib_buffer_advance (p0, -sizeof (ip6_header_t));
544
545           p0->error = error_node->errors[error0];
546           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
547                                            n_left_to_next, pi0, next0);
548         }
549       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
550     }
551
552   return frame->n_vectors;
553 }
554
555
556 static_always_inline void
557 ip6_map_ip6_reass_prepare (vlib_main_t * vm, vlib_node_runtime_t * node,
558                            map_ip6_reass_t * r, u32 ** fragments_ready,
559                            u32 ** fragments_to_drop)
560 {
561   ip4_header_t *ip40;
562   ip6_header_t *ip60;
563   ip6_frag_hdr_t *frag0;
564   vlib_buffer_t *p0;
565
566   if (!r->ip4_header.ip_version_and_header_length)
567     return;
568
569   //The IP header is here, we need to check for packets
570   //that can be forwarded
571   int i;
572   for (i = 0; i < MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++)
573     {
574       if (r->fragments[i].pi == ~0 ||
575           ((!r->fragments[i].next_data_len)
576            && (r->fragments[i].next_data_offset != (0xffff))))
577         continue;
578
579       p0 = vlib_get_buffer (vm, r->fragments[i].pi);
580       ip60 = vlib_buffer_get_current (p0);
581       frag0 = (ip6_frag_hdr_t *) (ip60 + 1);
582       ip40 = (ip4_header_t *) (frag0 + 1);
583
584       if (ip6_frag_hdr_offset (frag0))
585         {
586           //Not first fragment, add the IPv4 header
587           clib_memcpy_fast (ip40, &r->ip4_header, 20);
588         }
589
590 #ifdef MAP_IP6_REASS_COUNT_BYTES
591       r->forwarded +=
592         clib_net_to_host_u16 (ip60->payload_length) - sizeof (*frag0);
593 #endif
594
595       if (ip6_frag_hdr_more (frag0))
596         {
597           //Not last fragment, we copy end of next
598           clib_memcpy_fast (u8_ptr_add (ip60, p0->current_length),
599                             r->fragments[i].next_data, 20);
600           p0->current_length += 20;
601           ip60->payload_length = u16_net_add (ip60->payload_length, 20);
602         }
603
604       if (!ip4_is_fragment (ip40))
605         {
606           ip40->fragment_id = frag_id_6to4 (frag0->identification);
607           ip40->flags_and_fragment_offset =
608             clib_host_to_net_u16 (ip6_frag_hdr_offset (frag0));
609         }
610       else
611         {
612           ip40->flags_and_fragment_offset =
613             clib_host_to_net_u16 (ip4_get_fragment_offset (ip40) +
614                                   ip6_frag_hdr_offset (frag0));
615         }
616
617       if (ip6_frag_hdr_more (frag0))
618         ip40->flags_and_fragment_offset |=
619           clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS);
620
621       ip40->length =
622         clib_host_to_net_u16 (p0->current_length - sizeof (*ip60) -
623                               sizeof (*frag0));
624       ip40->checksum = ip4_header_checksum (ip40);
625
626       if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
627         {
628           map_ip6_map_ip6_reass_trace_t *tr =
629             vlib_add_trace (vm, node, p0, sizeof (*tr));
630           tr->offset = ip4_get_fragment_offset (ip40);
631           tr->frag_len = clib_net_to_host_u16 (ip40->length) - sizeof (*ip40);
632           tr->out = 1;
633         }
634
635       vec_add1 (*fragments_ready, r->fragments[i].pi);
636       r->fragments[i].pi = ~0;
637       r->fragments[i].next_data_len = 0;
638       r->fragments[i].next_data_offset = 0;
639       map_main.ip6_reass_buffered_counter--;
640
641       //TODO: Best solution would be that ip6_map handles extension headers
642       // and ignores atomic fragment. But in the meantime, let's just copy the header.
643
644       u8 protocol = frag0->next_hdr;
645       memmove (u8_ptr_add (ip40, -sizeof (*ip60)), ip60, sizeof (*ip60));
646       ((ip6_header_t *) u8_ptr_add (ip40, -sizeof (*ip60)))->protocol =
647         protocol;
648       vlib_buffer_advance (p0, sizeof (*frag0));
649     }
650 }
651
652 void
653 map_ip6_drop_pi (u32 pi)
654 {
655   vlib_main_t *vm = vlib_get_main ();
656   vlib_node_runtime_t *n =
657     vlib_node_get_runtime (vm, ip6_map_ip6_reass_node.index);
658   vlib_set_next_frame_buffer (vm, n, IP6_MAP_IP6_REASS_NEXT_DROP, pi);
659 }
660
661 /*
662  * ip6_reass
663  * TODO: We should count the number of successfully
664  * transmitted fragment bytes and compare that to the last fragment
665  * offset such that we can free the reassembly structure when all fragments
666  * have been forwarded.
667  */
668 static uword
669 ip6_map_ip6_reass (vlib_main_t * vm,
670                    vlib_node_runtime_t * node, vlib_frame_t * frame)
671 {
672   u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
673   vlib_node_runtime_t *error_node =
674     vlib_node_get_runtime (vm, ip6_map_ip6_reass_node.index);
675   u32 *fragments_to_drop = NULL;
676   u32 *fragments_ready = NULL;
677
678   from = vlib_frame_vector_args (frame);
679   n_left_from = frame->n_vectors;
680   next_index = node->cached_next_index;
681   while (n_left_from > 0)
682     {
683       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
684
685       /* Single loop */
686       while (n_left_from > 0 && n_left_to_next > 0)
687         {
688           u32 pi0;
689           vlib_buffer_t *p0;
690           u8 error0 = MAP_ERROR_NONE;
691           ip6_header_t *ip60;
692           ip6_frag_hdr_t *frag0;
693           u16 offset;
694           u16 next_offset;
695           u16 frag_len;
696
697           pi0 = to_next[0] = from[0];
698           from += 1;
699           n_left_from -= 1;
700           to_next += 1;
701           n_left_to_next -= 1;
702
703           p0 = vlib_get_buffer (vm, pi0);
704           ip60 = vlib_buffer_get_current (p0);
705           frag0 = (ip6_frag_hdr_t *) (ip60 + 1);
706           offset =
707             clib_host_to_net_u16 (frag0->fragment_offset_and_more) & (~7);
708           frag_len =
709             clib_net_to_host_u16 (ip60->payload_length) - sizeof (*frag0);
710           next_offset =
711             ip6_frag_hdr_more (frag0) ? (offset + frag_len) : (0xffff);
712
713           //FIXME: Support other extension headers, maybe
714
715           if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
716             {
717               map_ip6_map_ip6_reass_trace_t *tr =
718                 vlib_add_trace (vm, node, p0, sizeof (*tr));
719               tr->offset = offset;
720               tr->frag_len = frag_len;
721               tr->out = 0;
722             }
723
724           map_ip6_reass_lock ();
725           map_ip6_reass_t *r =
726             map_ip6_reass_get (&ip60->src_address, &ip60->dst_address,
727                                frag0->identification, frag0->next_hdr,
728                                &fragments_to_drop);
729           //FIXME: Use better error codes
730           if (PREDICT_FALSE (!r))
731             {
732               // Could not create a caching entry
733               error0 = MAP_ERROR_FRAGMENT_MEMORY;
734             }
735           else if (PREDICT_FALSE ((frag_len <= 20 &&
736                                    (ip6_frag_hdr_more (frag0) || (!offset)))))
737             {
738               //Very small fragment are restricted to the last one and
739               //can't be the first one
740               error0 = MAP_ERROR_FRAGMENT_MALFORMED;
741             }
742           else
743             if (map_ip6_reass_add_fragment
744                 (r, pi0, offset, next_offset, (u8 *) (frag0 + 1), frag_len))
745             {
746               map_ip6_reass_free (r, &fragments_to_drop);
747               error0 = MAP_ERROR_FRAGMENT_MEMORY;
748             }
749           else
750             {
751 #ifdef MAP_IP6_REASS_COUNT_BYTES
752               if (!ip6_frag_hdr_more (frag0))
753                 r->expected_total = offset + frag_len;
754 #endif
755               ip6_map_ip6_reass_prepare (vm, node, r, &fragments_ready,
756                                          &fragments_to_drop);
757 #ifdef MAP_IP6_REASS_COUNT_BYTES
758               if (r->forwarded >= r->expected_total)
759                 map_ip6_reass_free (r, &fragments_to_drop);
760 #endif
761             }
762           map_ip6_reass_unlock ();
763
764           if (error0 == MAP_ERROR_NONE)
765             {
766               if (frag_len > 20)
767                 {
768                   //Dequeue the packet
769                   n_left_to_next++;
770                   to_next--;
771                 }
772               else
773                 {
774                   //All data from that packet was copied no need to keep it, but this is not an error
775                   p0->error = error_node->errors[MAP_ERROR_NONE];
776                   vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
777                                                    to_next, n_left_to_next,
778                                                    pi0,
779                                                    IP6_MAP_IP6_REASS_NEXT_DROP);
780                 }
781             }
782           else
783             {
784               p0->error = error_node->errors[error0];
785               vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
786                                                n_left_to_next, pi0,
787                                                IP6_MAP_IP6_REASS_NEXT_DROP);
788             }
789         }
790       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
791     }
792
793   map_send_all_to_node (vm, fragments_ready, node,
794                         &error_node->errors[MAP_ERROR_NONE],
795                         IP6_MAP_IP6_REASS_NEXT_IP6_MAP);
796   map_send_all_to_node (vm, fragments_to_drop, node,
797                         &error_node->errors[MAP_ERROR_FRAGMENT_DROPPED],
798                         IP6_MAP_IP6_REASS_NEXT_DROP);
799
800   vec_free (fragments_to_drop);
801   vec_free (fragments_ready);
802   return frame->n_vectors;
803 }
804
805 /*
806  * ip6_map_post_ip4_reass
807  */
808 static uword
809 ip6_map_post_ip4_reass (vlib_main_t * vm,
810                         vlib_node_runtime_t * node, vlib_frame_t * frame)
811 {
812   u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
813   vlib_node_runtime_t *error_node =
814     vlib_node_get_runtime (vm, ip6_map_post_ip4_reass_node.index);
815   map_main_t *mm = &map_main;
816   vlib_combined_counter_main_t *cm = mm->domain_counters;
817   u32 thread_index = vm->thread_index;
818
819   from = vlib_frame_vector_args (frame);
820   n_left_from = frame->n_vectors;
821   next_index = node->cached_next_index;
822   while (n_left_from > 0)
823     {
824       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
825
826       /* Single loop */
827       while (n_left_from > 0 && n_left_to_next > 0)
828         {
829           u32 pi0;
830           vlib_buffer_t *p0;
831           u8 error0 = MAP_ERROR_NONE;
832           map_domain_t *d0;
833           ip4_header_t *ip40;
834           ip6_header_t *ip60;
835           i32 port0 = 0;
836           u32 map_domain_index0 = ~0;
837           u32 next0 = IP6_MAP_POST_IP4_REASS_NEXT_IP4_LOOKUP;
838
839           pi0 = to_next[0] = from[0];
840           from += 1;
841           n_left_from -= 1;
842           to_next += 1;
843           n_left_to_next -= 1;
844
845           p0 = vlib_get_buffer (vm, pi0);
846           ip40 = vlib_buffer_get_current (p0);
847           ip60 = ((ip6_header_t *) ip40) - 1;
848
849           d0 =
850             ip4_map_get_domain ((ip4_address_t *) & ip40->src_address.as_u32,
851                                 &map_domain_index0, &error0);
852
853           port0 = vnet_buffer (p0)->ip.reass.l4_src_port;
854
855           if (PREDICT_TRUE (error0 == MAP_ERROR_NONE))
856             error0 =
857               ip6_map_sec_check (d0, port0, ip40,
858                                  ip60) ? MAP_ERROR_NONE :
859               MAP_ERROR_DECAP_SEC_CHECK;
860
861           if (PREDICT_FALSE
862               (d0->mtu && (clib_host_to_net_u16 (ip40->length) > d0->mtu)
863                && error0 == MAP_ERROR_NONE))
864             {
865               vnet_buffer (p0)->ip_frag.flags = 0;
866               vnet_buffer (p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP;
867               vnet_buffer (p0)->ip_frag.mtu = d0->mtu;
868               next0 = IP6_MAP_POST_IP4_REASS_NEXT_IP4_FRAGMENT;
869             }
870
871           if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
872             {
873               map_ip6_map_ip4_reass_trace_t *tr =
874                 vlib_add_trace (vm, node, p0, sizeof (*tr));
875               tr->map_domain_index = map_domain_index0;
876               tr->port = port0;
877             }
878
879           if (error0 == MAP_ERROR_NONE)
880             vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
881                                              thread_index,
882                                              map_domain_index0, 1,
883                                              clib_net_to_host_u16
884                                              (ip40->length));
885           next0 =
886             (error0 ==
887              MAP_ERROR_NONE) ? next0 : IP6_MAP_POST_IP4_REASS_NEXT_DROP;
888           p0->error = error_node->errors[error0];
889           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
890                                            n_left_to_next, pi0, next0);
891
892         }
893       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
894     }
895   return frame->n_vectors;
896 }
897
898 /*
899  * ip6_icmp_relay
900  */
901 static uword
902 ip6_map_icmp_relay (vlib_main_t * vm,
903                     vlib_node_runtime_t * node, vlib_frame_t * frame)
904 {
905   u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
906   vlib_node_runtime_t *error_node =
907     vlib_node_get_runtime (vm, ip6_map_icmp_relay_node.index);
908   map_main_t *mm = &map_main;
909   u32 thread_index = vm->thread_index;
910   u16 *fragment_ids, *fid;
911
912   from = vlib_frame_vector_args (frame);
913   n_left_from = frame->n_vectors;
914   next_index = node->cached_next_index;
915
916   /* Get random fragment IDs for replies. */
917   fid = fragment_ids =
918     clib_random_buffer_get_data (&vm->random_buffer,
919                                  n_left_from * sizeof (fragment_ids[0]));
920
921   while (n_left_from > 0)
922     {
923       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
924
925       /* Single loop */
926       while (n_left_from > 0 && n_left_to_next > 0)
927         {
928           u32 pi0;
929           vlib_buffer_t *p0;
930           u8 error0 = MAP_ERROR_NONE;
931           ip6_header_t *ip60;
932           u32 next0 = IP6_ICMP_RELAY_NEXT_IP4_LOOKUP;
933           u32 mtu;
934
935           pi0 = to_next[0] = from[0];
936           from += 1;
937           n_left_from -= 1;
938           to_next += 1;
939           n_left_to_next -= 1;
940
941           p0 = vlib_get_buffer (vm, pi0);
942           ip60 = vlib_buffer_get_current (p0);
943           u16 tlen = clib_net_to_host_u16 (ip60->payload_length);
944
945           /*
946            * In:
947            *  IPv6 header           (40)
948            *  ICMPv6 header          (8)
949            *  IPv6 header           (40)
950            *  Original IPv4 header / packet
951            * Out:
952            *  New IPv4 header
953            *  New ICMP header
954            *  Original IPv4 header / packet
955            */
956
957           /* Need at least ICMP(8) + IPv6(40) + IPv4(20) + L4 header(8) */
958           if (tlen < 76)
959             {
960               error0 = MAP_ERROR_ICMP_RELAY;
961               goto error;
962             }
963
964           icmp46_header_t *icmp60 = (icmp46_header_t *) (ip60 + 1);
965           ip6_header_t *inner_ip60 = (ip6_header_t *) (icmp60 + 2);
966
967           if (inner_ip60->protocol != IP_PROTOCOL_IP_IN_IP)
968             {
969               error0 = MAP_ERROR_ICMP_RELAY;
970               goto error;
971             }
972
973           ip4_header_t *inner_ip40 = (ip4_header_t *) (inner_ip60 + 1);
974           vlib_buffer_advance (p0, 60); /* sizeof ( IPv6 + ICMP + IPv6 - IPv4 - ICMP ) */
975           ip4_header_t *new_ip40 = vlib_buffer_get_current (p0);
976           icmp46_header_t *new_icmp40 = (icmp46_header_t *) (new_ip40 + 1);
977
978           /*
979            * Relay according to RFC2473, section 8.3
980            */
981           switch (icmp60->type)
982             {
983             case ICMP6_destination_unreachable:
984             case ICMP6_time_exceeded:
985             case ICMP6_parameter_problem:
986               /* Type 3 - destination unreachable, Code 1 - host unreachable */
987               new_icmp40->type = ICMP4_destination_unreachable;
988               new_icmp40->code =
989                 ICMP4_destination_unreachable_destination_unreachable_host;
990               break;
991
992             case ICMP6_packet_too_big:
993               /* Type 3 - destination unreachable, Code 4 - packet too big */
994               /* Potential TODO: Adjust domain tunnel MTU based on the value received here */
995               mtu = clib_net_to_host_u32 (*((u32 *) (icmp60 + 1)));
996
997               /* Check DF flag */
998               if (!
999                   (inner_ip40->flags_and_fragment_offset &
1000                    clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT)))
1001                 {
1002                   error0 = MAP_ERROR_ICMP_RELAY;
1003                   goto error;
1004                 }
1005
1006               new_icmp40->type = ICMP4_destination_unreachable;
1007               new_icmp40->code =
1008                 ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set;
1009               *((u32 *) (new_icmp40 + 1)) =
1010                 clib_host_to_net_u32 (mtu < 1280 ? 1280 : mtu);
1011               break;
1012
1013             default:
1014               error0 = MAP_ERROR_ICMP_RELAY;
1015               break;
1016             }
1017
1018           /*
1019            * Ensure the total ICMP packet is no longer than 576 bytes (RFC1812)
1020            */
1021           new_ip40->ip_version_and_header_length = 0x45;
1022           new_ip40->tos = 0;
1023           u16 nlen = (tlen - 20) > 576 ? 576 : tlen - 20;
1024           new_ip40->length = clib_host_to_net_u16 (nlen);
1025           new_ip40->fragment_id = fid[0];
1026           fid++;
1027           new_ip40->ttl = 64;
1028           new_ip40->protocol = IP_PROTOCOL_ICMP;
1029           new_ip40->src_address = mm->icmp4_src_address;
1030           new_ip40->dst_address = inner_ip40->src_address;
1031           new_ip40->checksum = ip4_header_checksum (new_ip40);
1032
1033           new_icmp40->checksum = 0;
1034           ip_csum_t sum = ip_incremental_checksum (0, new_icmp40, nlen - 20);
1035           new_icmp40->checksum = ~ip_csum_fold (sum);
1036
1037           vlib_increment_simple_counter (&mm->icmp_relayed, thread_index, 0,
1038                                          1);
1039
1040         error:
1041           if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
1042             {
1043               map_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr));
1044               tr->map_domain_index = 0;
1045               tr->port = 0;
1046             }
1047
1048           next0 =
1049             (error0 == MAP_ERROR_NONE) ? next0 : IP6_ICMP_RELAY_NEXT_DROP;
1050           p0->error = error_node->errors[error0];
1051           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1052                                            n_left_to_next, pi0, next0);
1053         }
1054       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1055     }
1056
1057   return frame->n_vectors;
1058
1059 }
1060
1061 static char *map_error_strings[] = {
1062 #define _(sym,string) string,
1063   foreach_map_error
1064 #undef _
1065 };
1066
1067 /* *INDENT-OFF* */
1068 VNET_FEATURE_INIT (ip6_map_feature, static) =
1069 {
1070   .arc_name = "ip6-unicast",
1071   .node_name = "ip6-map",
1072   .runs_before = VNET_FEATURES ("ip6-flow-classify"),
1073 };
1074
1075 VLIB_REGISTER_NODE(ip6_map_node) = {
1076   .function = ip6_map,
1077   .name = "ip6-map",
1078   .vector_size = sizeof(u32),
1079   .format_trace = format_map_trace,
1080   .type = VLIB_NODE_TYPE_INTERNAL,
1081
1082   .n_errors = MAP_N_ERROR,
1083   .error_strings = map_error_strings,
1084
1085   .n_next_nodes = IP6_MAP_N_NEXT,
1086   .next_nodes = {
1087     [IP6_MAP_NEXT_IP4_LOOKUP] = "ip4-lookup",
1088 #ifdef MAP_SKIP_IP6_LOOKUP
1089     [IP6_MAP_NEXT_IP4_REWRITE] = "ip4-load-balance",
1090 #endif
1091     [IP6_MAP_NEXT_IP6_REASS] = "ip6-map-ip6-reass",
1092     [IP6_MAP_NEXT_IP4_REASS] = "ip4-sv-reassembly-custom-next",
1093     [IP6_MAP_NEXT_IP4_FRAGMENT] = "ip4-frag",
1094     [IP6_MAP_NEXT_IP6_ICMP_RELAY] = "ip6-map-icmp-relay",
1095     [IP6_MAP_NEXT_IP6_LOCAL] = "ip6-local",
1096     [IP6_MAP_NEXT_DROP] = "error-drop",
1097     [IP6_MAP_NEXT_ICMP] = "ip6-icmp-error",
1098   },
1099 };
1100 /* *INDENT-ON* */
1101
1102 /* *INDENT-OFF* */
1103 VLIB_REGISTER_NODE(ip6_map_ip6_reass_node) = {
1104   .function = ip6_map_ip6_reass,
1105   .name = "ip6-map-ip6-reass",
1106   .vector_size = sizeof(u32),
1107   .format_trace = format_ip6_map_ip6_reass_trace,
1108   .type = VLIB_NODE_TYPE_INTERNAL,
1109   .n_errors = MAP_N_ERROR,
1110   .error_strings = map_error_strings,
1111   .n_next_nodes = IP6_MAP_IP6_REASS_N_NEXT,
1112   .next_nodes = {
1113     [IP6_MAP_IP6_REASS_NEXT_IP6_MAP] = "ip6-map",
1114     [IP6_MAP_IP6_REASS_NEXT_DROP] = "error-drop",
1115   },
1116 };
1117 /* *INDENT-ON* */
1118
1119 /* *INDENT-OFF* */
1120 VLIB_REGISTER_NODE(ip6_map_post_ip4_reass_node) = {
1121   .function = ip6_map_post_ip4_reass,
1122   .name = "ip6-map-post-ip4-reass",
1123   .vector_size = sizeof(u32),
1124   .format_trace = format_ip6_map_post_ip4_reass_trace,
1125   .type = VLIB_NODE_TYPE_INTERNAL,
1126   .n_errors = MAP_N_ERROR,
1127   .error_strings = map_error_strings,
1128   .n_next_nodes = IP6_MAP_POST_IP4_REASS_N_NEXT,
1129   .next_nodes = {
1130     [IP6_MAP_POST_IP4_REASS_NEXT_IP4_LOOKUP] = "ip4-lookup",
1131     [IP6_MAP_POST_IP4_REASS_NEXT_IP4_FRAGMENT] = "ip4-frag",
1132     [IP6_MAP_POST_IP4_REASS_NEXT_DROP] = "error-drop",
1133   },
1134 };
1135 /* *INDENT-ON* */
1136
1137 /* *INDENT-OFF* */
1138 VLIB_REGISTER_NODE(ip6_map_icmp_relay_node, static) = {
1139   .function = ip6_map_icmp_relay,
1140   .name = "ip6-map-icmp-relay",
1141   .vector_size = sizeof(u32),
1142   .format_trace = format_map_trace, //FIXME
1143   .type = VLIB_NODE_TYPE_INTERNAL,
1144   .n_errors = MAP_N_ERROR,
1145   .error_strings = map_error_strings,
1146   .n_next_nodes = IP6_ICMP_RELAY_N_NEXT,
1147   .next_nodes = {
1148     [IP6_ICMP_RELAY_NEXT_IP4_LOOKUP] = "ip4-lookup",
1149     [IP6_ICMP_RELAY_NEXT_DROP] = "error-drop",
1150   },
1151 };
1152 /* *INDENT-ON* */
1153
1154 clib_error_t *
1155 ip6_map_init (vlib_main_t * vm)
1156 {
1157   map_main.ip4_sv_reass_custom_next_index =
1158     ip4_sv_reass_custom_register_next_node
1159     (ip6_map_post_ip4_reass_node.index);
1160   return 0;
1161 }
1162
1163 VLIB_INIT_FUNCTION (ip6_map_init) =
1164 {
1165 .runs_after = VLIB_INITS ("map_init"),};
1166
1167 /*
1168  * fd.io coding-style-patch-verification: ON
1169  *
1170  * Local Variables:
1171  * eval: (c-set-style "gnu")
1172  * End:
1173  */