Fix dual loop >= 4. Fix format specified for printing u64 counter.
[vpp.git] / vnet / vnet / map / ip6_map.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 #include "map.h"
16
17 #include "../ip/ip_frag.h"
18
19 enum  ip6_map_next_e {
20   IP6_MAP_NEXT_IP4_LOOKUP,
21 #ifdef MAP_SKIP_IP6_LOOKUP
22   IP6_MAP_NEXT_IP4_REWRITE,
23 #endif
24   IP6_MAP_NEXT_IP6_REASS,
25   IP6_MAP_NEXT_IP4_REASS,
26   IP6_MAP_NEXT_IP4_FRAGMENT,
27   IP6_MAP_NEXT_IP6_ICMP_RELAY,
28   IP6_MAP_NEXT_IP6_LOCAL,
29   IP6_MAP_NEXT_DROP,
30   IP6_MAP_N_NEXT,
31 };
32
33 enum  ip6_map_ip6_reass_next_e {
34   IP6_MAP_IP6_REASS_NEXT_IP6_MAP,
35   IP6_MAP_IP6_REASS_NEXT_DROP,
36   IP6_MAP_IP6_REASS_N_NEXT,
37 };
38
39 enum  ip6_map_ip4_reass_next_e {
40   IP6_MAP_IP4_REASS_NEXT_IP4_LOOKUP,
41   IP6_MAP_IP4_REASS_NEXT_IP4_FRAGMENT,
42   IP6_MAP_IP4_REASS_NEXT_DROP,
43   IP6_MAP_IP4_REASS_N_NEXT,
44 };
45
46 enum  ip6_icmp_relay_next_e {
47   IP6_ICMP_RELAY_NEXT_IP4_LOOKUP,
48   IP6_ICMP_RELAY_NEXT_DROP,
49   IP6_ICMP_RELAY_N_NEXT,
50 };
51
52 vlib_node_registration_t ip6_map_ip4_reass_node;
53 vlib_node_registration_t ip6_map_ip6_reass_node;
54 static vlib_node_registration_t ip6_map_icmp_relay_node;
55
56 typedef struct {
57   u32 map_domain_index;
58   u16 port;
59   u8 cached;
60 } map_ip6_map_ip4_reass_trace_t;
61
62 u8 *
63 format_ip6_map_ip4_reass_trace (u8 *s, va_list *args)
64 {
65   CLIB_UNUSED(vlib_main_t *vm) = va_arg (*args, vlib_main_t *);
66   CLIB_UNUSED(vlib_node_t *node) = va_arg (*args, vlib_node_t *);
67   map_ip6_map_ip4_reass_trace_t *t = va_arg (*args, map_ip6_map_ip4_reass_trace_t *);
68   return format(s, "MAP domain index: %d L4 port: %u Status: %s", t->map_domain_index,
69                 t->port, t->cached?"cached":"forwarded");
70 }
71
72 typedef struct {
73   u16 offset;
74   u16 frag_len;
75   u8 out;
76 } map_ip6_map_ip6_reass_trace_t;
77
78 u8 *
79 format_ip6_map_ip6_reass_trace (u8 *s, va_list *args)
80 {
81   CLIB_UNUSED(vlib_main_t *vm) = va_arg (*args, vlib_main_t *);
82   CLIB_UNUSED(vlib_node_t *node) = va_arg (*args, vlib_node_t *);
83   map_ip6_map_ip6_reass_trace_t *t = va_arg (*args, map_ip6_map_ip6_reass_trace_t *);
84   return format(s, "Offset: %d Fragment length: %d Status: %s", t->offset, t->frag_len, t->out?"out":"in");
85 }
86
87 /*
88  * ip6_map_sec_check
89  */
90 static_always_inline bool
91 ip6_map_sec_check (map_domain_t *d, u16 port, ip4_header_t *ip4, ip6_header_t *ip6)
92 {
93   u16 sp4 = clib_net_to_host_u16(port);
94   u32 sa4 = clib_net_to_host_u32(ip4->src_address.as_u32);
95   u64 sal6 = map_get_pfx(d, sa4, sp4);
96   u64 sar6 = map_get_sfx(d, sa4, sp4);
97
98   if (PREDICT_FALSE(sal6 != clib_net_to_host_u64(ip6->src_address.as_u64[0]) ||
99                     sar6 != clib_net_to_host_u64(ip6->src_address.as_u64[1])))
100     return (false);
101   return (true);
102 }
103
104 static_always_inline void
105 ip6_map_security_check (map_domain_t *d, ip4_header_t *ip4, ip6_header_t *ip6, u32 *next, u8 *error)
106 {
107   map_main_t *mm = &map_main;
108   if (d->ea_bits_len || d->rules) {
109     if (d->psid_length > 0) {
110       if (!ip4_is_fragment(ip4)) {
111         u16 port = ip4_map_get_port(ip4, MAP_SENDER);
112         if (port) {
113           if (mm->sec_check)
114             *error = ip6_map_sec_check(d, port, ip4, ip6) ? MAP_ERROR_NONE : MAP_ERROR_DECAP_SEC_CHECK;
115         } else {
116           *error = MAP_ERROR_BAD_PROTOCOL;
117         }
118       } else {
119         *next = mm->sec_check_frag ? IP6_MAP_NEXT_IP4_REASS : *next;
120       }
121     }
122   }
123 }
124
125 static_always_inline bool
126 ip6_map_ip4_lookup_bypass (vlib_buffer_t *p0, ip4_header_t *ip)
127 {
128 #ifdef MAP_SKIP_IP6_LOOKUP
129   map_main_t *mm = &map_main;
130   u32 adj_index0 = mm->adj4_index;
131   if (adj_index0 > 0) {
132     ip_lookup_main_t *lm4 = &ip4_main.lookup_main;
133     ip_adjacency_t *adj = ip_get_adjacency(lm4, mm->adj4_index);
134     if (adj->n_adj > 1) {
135       u32 hash_c0 = ip4_compute_flow_hash(ip, IP_FLOW_HASH_DEFAULT);
136       adj_index0 += (hash_c0 & (adj->n_adj - 1));
137     }
138     vnet_buffer(p0)->ip.adj_index[VLIB_TX] = adj_index0;
139     return (true);
140   }
141 #endif
142   return (false);
143 }
144
145
146 /*
147  * ip6_map
148  */
149 static uword
150 ip6_map (vlib_main_t *vm,
151          vlib_node_runtime_t *node,
152          vlib_frame_t *frame)
153 {
154   u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
155   vlib_node_runtime_t *error_node = vlib_node_get_runtime(vm, ip6_map_node.index);
156   map_main_t *mm = &map_main;
157   vlib_combined_counter_main_t *cm = mm->domain_counters;
158   u32 cpu_index = os_get_cpu_number();
159
160   from = vlib_frame_vector_args(frame);
161   n_left_from = frame->n_vectors;
162   next_index = node->cached_next_index;
163   while (n_left_from > 0) {
164     vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
165
166     /* Dual loop */
167     while (n_left_from >= 4 && n_left_to_next >= 2) {
168       u32 pi0, pi1;
169       vlib_buffer_t *p0, *p1;
170       u8 error0 = MAP_ERROR_NONE;
171       u8 error1 = MAP_ERROR_NONE;
172       map_domain_t *d0 = 0, *d1 = 0;
173       ip4_header_t *ip40, *ip41;
174       ip6_header_t *ip60, *ip61;
175       u16 port0 = 0, port1 = 0;
176       u32 map_domain_index0 = ~0, map_domain_index1 = ~0;
177       u32 next0 = IP6_MAP_NEXT_IP4_LOOKUP;
178       u32 next1 = IP6_MAP_NEXT_IP4_LOOKUP;
179
180       /* Prefetch next iteration. */
181       {
182         vlib_buffer_t *p2, *p3;
183
184         p2 = vlib_get_buffer(vm, from[2]);
185         p3 = vlib_get_buffer(vm, from[3]);
186
187         vlib_prefetch_buffer_header(p2, LOAD);
188         vlib_prefetch_buffer_header(p3, LOAD);
189
190         /* IPv6 + IPv4 header + 8 bytes of ULP */
191         CLIB_PREFETCH(p2->data, 68, LOAD);
192         CLIB_PREFETCH(p3->data, 68, LOAD);
193       }
194
195       pi0 = to_next[0] = from[0];
196       pi1 = to_next[1] = from[1];
197       from += 2;
198       n_left_from -= 2;
199       to_next +=2;
200       n_left_to_next -= 2;
201
202       p0 = vlib_get_buffer(vm, pi0);
203       p1 = vlib_get_buffer(vm, pi1);
204       ip60 = vlib_buffer_get_current(p0);
205       ip61 = vlib_buffer_get_current(p1);
206       vlib_buffer_advance(p0, sizeof(ip6_header_t));
207       vlib_buffer_advance(p1, sizeof(ip6_header_t));
208       ip40 = vlib_buffer_get_current(p0);
209       ip41 = vlib_buffer_get_current(p1);
210
211       /*
212        * Encapsulated IPv4 packet
213        *   - IPv4 fragmented -> Pass to virtual reassembly unless security check disabled
214        *   - Lookup/Rewrite or Fragment node in case of packet > MTU
215        * Fragmented IPv6 packet
216        * ICMP IPv6 packet
217        *   - Error -> Pass to ICMPv6/ICMPv4 relay
218        *   - Info -> Pass to IPv6 local
219        * Anything else -> drop
220        */
221       if (PREDICT_TRUE(ip60->protocol == IP_PROTOCOL_IP_IN_IP && clib_net_to_host_u16(ip60->payload_length) > 20)) {
222         d0 = ip6_map_get_domain(vnet_buffer(p0)->ip.adj_index[VLIB_TX], (ip4_address_t *)&ip40->src_address.as_u32,
223                                 &map_domain_index0, &error0);
224       } else if (ip60->protocol == IP_PROTOCOL_ICMP6 &&
225                  clib_net_to_host_u16(ip60->payload_length) > sizeof(icmp46_header_t)) {
226         icmp46_header_t *icmp = (void *)(ip60 + 1);
227         next0 = (icmp->type == ICMP6_echo_request || icmp->type == ICMP6_echo_reply) ?
228           IP6_MAP_NEXT_IP6_LOCAL : IP6_MAP_NEXT_IP6_ICMP_RELAY;
229       } else if (ip60->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION) {
230         next0 = IP6_MAP_NEXT_IP6_REASS;
231       } else {
232         error0 = MAP_ERROR_BAD_PROTOCOL;
233         next0 = IP6_MAP_NEXT_DROP;
234       }
235       if (PREDICT_TRUE(ip61->protocol == IP_PROTOCOL_IP_IN_IP && clib_net_to_host_u16(ip61->payload_length) > 20)) {
236         d1 = ip6_map_get_domain(vnet_buffer(p1)->ip.adj_index[VLIB_TX], (ip4_address_t *)&ip41->src_address.as_u32,
237                                 &map_domain_index1, &error1);
238       } else if (ip61->protocol == IP_PROTOCOL_ICMP6 &&
239                  clib_net_to_host_u16(ip61->payload_length) > sizeof(icmp46_header_t)) {
240         icmp46_header_t *icmp = (void *)(ip61 + 1);
241         next1 = (icmp->type == ICMP6_echo_request || icmp->type == ICMP6_echo_reply) ?
242           IP6_MAP_NEXT_IP6_LOCAL : IP6_MAP_NEXT_IP6_ICMP_RELAY;
243       } else if (ip61->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION) {
244         next1 = IP6_MAP_NEXT_IP6_REASS;
245       } else {
246         error1 = MAP_ERROR_BAD_PROTOCOL;
247         next1 = IP6_MAP_NEXT_DROP;
248       }
249
250       if (d0) {
251         /* MAP inbound security check */
252         ip6_map_security_check(d0, ip40, ip60, &next0, &error0);
253
254         if (PREDICT_TRUE(error0 == MAP_ERROR_NONE &&
255                          next0 == IP6_MAP_NEXT_IP4_LOOKUP)) {
256           if (PREDICT_FALSE(d0->mtu && (clib_host_to_net_u16(ip40->length) > d0->mtu))) {
257             vnet_buffer(p0)->ip_frag.header_offset = 0;
258             vnet_buffer(p0)->ip_frag.flags = 0;
259             vnet_buffer(p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP;
260             vnet_buffer(p0)->ip_frag.mtu = d0->mtu;
261             next0 = IP6_MAP_NEXT_IP4_FRAGMENT;
262           } else {
263             next0 = ip6_map_ip4_lookup_bypass(p0, ip40) ? IP6_MAP_NEXT_IP4_REWRITE : next0;
264           }
265           vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_RX, cpu_index, map_domain_index0, 1,
266                                           clib_net_to_host_u16(ip40->length));
267         }
268       }
269       if (d1) {
270         /* MAP inbound security check */
271         ip6_map_security_check(d1, ip41, ip61, &next1, &error1);
272
273         if (PREDICT_TRUE(error1 == MAP_ERROR_NONE &&
274                          next1 == IP6_MAP_NEXT_IP4_LOOKUP)) {
275           if (PREDICT_FALSE(d1->mtu && (clib_host_to_net_u16(ip41->length) > d1->mtu))) {
276             vnet_buffer(p1)->ip_frag.header_offset = 0;
277             vnet_buffer(p1)->ip_frag.flags = 0;
278             vnet_buffer(p1)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP;
279             vnet_buffer(p1)->ip_frag.mtu = d0->mtu;
280             next1 = IP6_MAP_NEXT_IP4_FRAGMENT;
281           } else {
282             next1 = ip6_map_ip4_lookup_bypass(p1, ip41) ? IP6_MAP_NEXT_IP4_REWRITE : next1;
283           }
284           vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_RX, cpu_index, map_domain_index1, 1,
285                                           clib_net_to_host_u16(ip41->length));
286         }
287       }
288
289       if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) {
290         map_trace_t *tr = vlib_add_trace(vm, node, p0, sizeof(*tr));
291         tr->map_domain_index = map_domain_index0;
292         tr->port = port0;
293       }
294
295       if (PREDICT_FALSE(p1->flags & VLIB_BUFFER_IS_TRACED)) {
296         map_trace_t *tr = vlib_add_trace(vm, node, p1, sizeof(*tr));
297         tr->map_domain_index = map_domain_index1;
298         tr->port = port1;
299       }
300
301       p0->error = error_node->errors[error0];
302       p1->error = error_node->errors[error1];
303       vlib_validate_buffer_enqueue_x2(vm, node, next_index, to_next, n_left_to_next, pi0, pi1, next0, next1);
304     }
305
306     /* Single loop */
307     while (n_left_from > 0 && n_left_to_next > 0) {
308       u32 pi0;
309       vlib_buffer_t *p0;
310       u8 error0 = MAP_ERROR_NONE;
311       map_domain_t *d0 = 0;
312       ip4_header_t *ip40;
313       ip6_header_t *ip60;
314       i32 port0 = 0;
315       u32 map_domain_index0 = ~0;
316       u32 next0 = IP6_MAP_NEXT_IP4_LOOKUP;
317
318       pi0 = to_next[0] = from[0];
319       from += 1;
320       n_left_from -= 1;
321       to_next +=1;
322       n_left_to_next -= 1;
323
324       p0 = vlib_get_buffer(vm, pi0);
325       ip60 = vlib_buffer_get_current(p0);
326       vlib_buffer_advance(p0, sizeof(ip6_header_t));
327       ip40 = vlib_buffer_get_current(p0);
328
329       /*
330        * Encapsulated IPv4 packet
331        *   - IPv4 fragmented -> Pass to virtual reassembly unless security check disabled
332        *   - Lookup/Rewrite or Fragment node in case of packet > MTU
333        * Fragmented IPv6 packet
334        * ICMP IPv6 packet
335        *   - Error -> Pass to ICMPv6/ICMPv4 relay
336        *   - Info -> Pass to IPv6 local
337        * Anything else -> drop
338        */
339       if (PREDICT_TRUE(ip60->protocol == IP_PROTOCOL_IP_IN_IP && clib_net_to_host_u16(ip60->payload_length) > 20)) {
340         d0 = ip6_map_get_domain(vnet_buffer(p0)->ip.adj_index[VLIB_TX], (ip4_address_t *)&ip40->src_address.as_u32,
341                                 &map_domain_index0, &error0);
342       } else if (ip60->protocol == IP_PROTOCOL_ICMP6 &&
343                  clib_net_to_host_u16(ip60->payload_length) > sizeof(icmp46_header_t)) {
344         icmp46_header_t *icmp = (void *)(ip60 + 1);
345         next0 = (icmp->type == ICMP6_echo_request || icmp->type == ICMP6_echo_reply) ?
346           IP6_MAP_NEXT_IP6_LOCAL : IP6_MAP_NEXT_IP6_ICMP_RELAY;
347       } else if (ip60->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION &&
348           (((ip6_frag_hdr_t *)(ip60+1))->next_hdr == IP_PROTOCOL_IP_IN_IP)) {
349         next0 = IP6_MAP_NEXT_IP6_REASS;
350       } else {
351         error0 = MAP_ERROR_BAD_PROTOCOL;
352       }
353
354       if (d0) {
355         /* MAP inbound security check */
356         ip6_map_security_check(d0, ip40, ip60, &next0, &error0);
357
358         if (PREDICT_TRUE(error0 == MAP_ERROR_NONE &&
359                          next0 == IP6_MAP_NEXT_IP4_LOOKUP)) {
360           if (PREDICT_FALSE(d0->mtu && (clib_host_to_net_u16(ip40->length) > d0->mtu))) {
361             vnet_buffer(p0)->ip_frag.header_offset = 0;
362             vnet_buffer(p0)->ip_frag.flags = 0;
363             vnet_buffer(p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP;
364             vnet_buffer(p0)->ip_frag.mtu = d0->mtu;
365             next0 = IP6_MAP_NEXT_IP4_FRAGMENT;
366           } else {
367             next0 = ip6_map_ip4_lookup_bypass(p0, ip40) ? IP6_MAP_NEXT_IP4_REWRITE : next0;
368           }
369           vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_RX, cpu_index, map_domain_index0, 1,
370                                           clib_net_to_host_u16(ip40->length));
371         }
372       }
373
374       if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) {
375         map_trace_t *tr = vlib_add_trace(vm, node, p0, sizeof(*tr));
376         tr->map_domain_index = map_domain_index0;
377         tr->port = (u16)port0;
378       }
379
380       next0 = (error0 == MAP_ERROR_NONE) ? next0 : IP6_MAP_NEXT_DROP;
381       p0->error = error_node->errors[error0];
382       vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, pi0, next0);
383     }
384     vlib_put_next_frame(vm, node, next_index, n_left_to_next);
385   }
386
387   return frame->n_vectors;
388 }
389
390
391 static_always_inline void
392 ip6_map_ip6_reass_prepare(vlib_main_t *vm, vlib_node_runtime_t *node, map_ip6_reass_t *r,
393                           u32 **fragments_ready, u32 **fragments_to_drop)
394 {
395   ip4_header_t *ip40;
396   ip6_header_t *ip60;
397   ip6_frag_hdr_t *frag0;
398   vlib_buffer_t *p0;
399
400   if(!r->ip4_header.ip_version_and_header_length)
401     return;
402
403   //The IP header is here, we need to check for packets
404   //that can be forwarded
405   int i;
406   for (i=0; i<MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++) {
407     if (r->fragments[i].pi == ~0 ||
408         ((!r->fragments[i].next_data_len) && (r->fragments[i].next_data_offset != (0xffff))))
409       continue;
410
411     p0 = vlib_get_buffer(vm, r->fragments[i].pi);
412     ip60 = vlib_buffer_get_current(p0);
413     frag0 = (ip6_frag_hdr_t *)(ip60 + 1);
414     ip40 = (ip4_header_t *)(frag0 + 1);
415
416     if (ip6_frag_hdr_offset(frag0)) {
417       //Not first fragment, add the IPv4 header
418       memcpy(ip40, &r->ip4_header, 20);
419     }
420
421 #ifdef MAP_IP6_REASS_COUNT_BYTES
422     r->forwarded += clib_net_to_host_u16(ip60->payload_length) - sizeof(*frag0);
423 #endif
424
425     if (ip6_frag_hdr_more(frag0)) {
426       //Not last fragment, we copy end of next
427       memcpy(u8_ptr_add(ip60, p0->current_length), r->fragments[i].next_data, 20);
428       p0->current_length += 20;
429       ip60->payload_length = u16_net_add(ip60->payload_length, 20);
430     }
431
432     if (!ip4_is_fragment(ip40)) {
433       ip40->fragment_id = frag_id_6to4(frag0->identification);
434       ip40->flags_and_fragment_offset = clib_host_to_net_u16(ip6_frag_hdr_offset(frag0));
435     } else {
436       ip40->flags_and_fragment_offset = clib_host_to_net_u16(ip4_get_fragment_offset(ip40) + ip6_frag_hdr_offset(frag0));
437     }
438
439     if (ip6_frag_hdr_more(frag0))
440       ip40->flags_and_fragment_offset |= clib_host_to_net_u16(IP4_HEADER_FLAG_MORE_FRAGMENTS);
441
442     ip40->length = clib_host_to_net_u16(p0->current_length - sizeof(*ip60) - sizeof(*frag0));
443     ip40->checksum = ip4_header_checksum(ip40);
444
445     if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) {
446       map_ip6_map_ip6_reass_trace_t *tr = vlib_add_trace(vm, node, p0, sizeof(*tr));
447       tr->offset = ip4_get_fragment_offset(ip40);
448       tr->frag_len = clib_net_to_host_u16(ip40->length) - sizeof(*ip40);
449       tr->out = 1;
450     }
451
452     vec_add1(*fragments_ready, r->fragments[i].pi);
453     r->fragments[i].pi = ~0;
454     r->fragments[i].next_data_len = 0;
455     r->fragments[i].next_data_offset = 0;
456     map_main.ip6_reass_buffered_counter--;
457
458     //TODO: Best solution would be that ip6_map handles extension headers
459     // and ignores atomic fragment. But in the meantime, let's just copy the header.
460
461     u8 protocol = frag0->next_hdr;
462     memmove(u8_ptr_add(ip40, - sizeof(*ip60)), ip60, sizeof(*ip60));
463     ((ip6_header_t *)u8_ptr_add(ip40, - sizeof(*ip60)))->protocol = protocol;
464     vlib_buffer_advance(p0, sizeof(*frag0));
465   }
466 }
467
468 void
469 map_ip6_drop_pi(u32 pi)
470 {
471   vlib_main_t *vm = vlib_get_main();
472   vlib_node_runtime_t *n = vlib_node_get_runtime(vm, ip6_map_ip6_reass_node.index);
473   vlib_set_next_frame_buffer(vm, n, IP6_MAP_IP6_REASS_NEXT_DROP, pi);
474 }
475
476 void
477 map_ip4_drop_pi(u32 pi)
478 {
479   vlib_main_t *vm = vlib_get_main();
480   vlib_node_runtime_t *n = vlib_node_get_runtime(vm, ip6_map_ip4_reass_node.index);
481   vlib_set_next_frame_buffer(vm, n, IP6_MAP_IP4_REASS_NEXT_DROP, pi);
482 }
483
484 /*
485  * ip6_reass
486  * TODO: We should count the number of successfully
487  * transmitted fragment bytes and compare that to the last fragment
488  * offset such that we can free the reassembly structure when all fragments
489  * have been forwarded.
490  */
491 static uword
492 ip6_map_ip6_reass (vlib_main_t *vm,
493            vlib_node_runtime_t *node,
494            vlib_frame_t *frame)
495 {
496   u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
497   vlib_node_runtime_t *error_node = vlib_node_get_runtime(vm, ip6_map_ip6_reass_node.index);
498   u32 *fragments_to_drop = NULL;
499   u32 *fragments_ready = NULL;
500
501   from = vlib_frame_vector_args(frame);
502   n_left_from = frame->n_vectors;
503   next_index = node->cached_next_index;
504   while (n_left_from > 0) {
505     vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
506
507     /* Single loop */
508     while (n_left_from > 0 && n_left_to_next > 0) {
509       u32 pi0;
510       vlib_buffer_t *p0;
511       u8 error0 = MAP_ERROR_NONE;
512       ip6_header_t *ip60;
513       ip6_frag_hdr_t *frag0;
514       u16 offset;
515       u16 next_offset;
516       u16 frag_len;
517
518       pi0 = to_next[0] = from[0];
519       from += 1;
520       n_left_from -= 1;
521       to_next +=1;
522       n_left_to_next -= 1;
523
524       p0 = vlib_get_buffer(vm, pi0);
525       ip60 = vlib_buffer_get_current(p0);
526       frag0 = (ip6_frag_hdr_t *)(ip60 + 1);
527       offset = clib_host_to_net_u16(frag0->fragment_offset_and_more) & (~7);
528       frag_len = clib_net_to_host_u16(ip60->payload_length) - sizeof(*frag0);
529       next_offset = ip6_frag_hdr_more(frag0) ? (offset + frag_len) : (0xffff);
530
531       //FIXME: Support other extension headers, maybe
532
533       if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) {
534         map_ip6_map_ip6_reass_trace_t *tr = vlib_add_trace(vm, node, p0, sizeof(*tr));
535         tr->offset = offset;
536         tr->frag_len = frag_len;
537         tr->out = 0;
538       }
539
540       map_ip6_reass_lock();
541       map_ip6_reass_t *r = map_ip6_reass_get(&ip60->src_address, &ip60->dst_address,
542                                              frag0->identification, frag0->next_hdr, &fragments_to_drop);
543       //FIXME: Use better error codes
544       if (PREDICT_FALSE(!r)) {
545         // Could not create a caching entry
546         error0 = MAP_ERROR_FRAGMENT_MEMORY;
547       } else if (PREDICT_FALSE((frag_len <= 20 &&
548           (ip6_frag_hdr_more(frag0) || (!offset))))) {
549         //Very small fragment are restricted to the last one and
550         //can't be the first one
551         error0 = MAP_ERROR_FRAGMENT_MALFORMED;
552       } else if (map_ip6_reass_add_fragment(r, pi0, offset, next_offset, (u8 *)(frag0 + 1), frag_len)) {
553         map_ip6_reass_free(r, &fragments_to_drop);
554         error0 = MAP_ERROR_FRAGMENT_MEMORY;
555       } else {
556 #ifdef MAP_IP6_REASS_COUNT_BYTES
557         if (!ip6_frag_hdr_more(frag0))
558           r->expected_total = offset + frag_len;
559 #endif
560         ip6_map_ip6_reass_prepare(vm, node, r, &fragments_ready, &fragments_to_drop);
561 #ifdef MAP_IP6_REASS_COUNT_BYTES
562         if(r->forwarded >= r->expected_total)
563           map_ip6_reass_free(r, &fragments_to_drop);
564 #endif
565       }
566       map_ip6_reass_unlock();
567
568       if (error0 == MAP_ERROR_NONE) {
569         if (frag_len > 20) {
570           //Dequeue the packet
571           n_left_to_next++;
572           to_next--;
573         } else {
574           //All data from that packet was copied no need to keep it, but this is not an error
575           p0->error = error_node->errors[MAP_ERROR_NONE];
576           vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, pi0, IP6_MAP_IP6_REASS_NEXT_DROP);
577         }
578       } else {
579         p0->error = error_node->errors[error0];
580         vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, pi0, IP6_MAP_IP6_REASS_NEXT_DROP);
581       }
582     }
583     vlib_put_next_frame(vm, node, next_index, n_left_to_next);
584   }
585
586   map_send_all_to_node(vm, fragments_ready, node,
587                            &error_node->errors[MAP_ERROR_NONE],
588                            IP6_MAP_IP6_REASS_NEXT_IP6_MAP);
589   map_send_all_to_node(vm, fragments_to_drop, node,
590                            &error_node->errors[MAP_ERROR_FRAGMENT_DROPPED],
591                            IP6_MAP_IP6_REASS_NEXT_DROP);
592
593   vec_free(fragments_to_drop);
594   vec_free(fragments_ready);
595   return frame->n_vectors;
596 }
597
598 /*
599  * ip6_ip4_virt_reass
600  */
601 static uword
602 ip6_map_ip4_reass (vlib_main_t *vm,
603                     vlib_node_runtime_t *node,
604                     vlib_frame_t *frame)
605 {
606   u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
607   vlib_node_runtime_t *error_node = vlib_node_get_runtime(vm, ip6_map_ip4_reass_node.index);
608   map_main_t *mm = &map_main;
609   vlib_combined_counter_main_t *cm = mm->domain_counters;
610   u32 cpu_index = os_get_cpu_number();
611   u32 *fragments_to_drop = NULL;
612   u32 *fragments_to_loopback = NULL;
613
614   from = vlib_frame_vector_args(frame);
615   n_left_from = frame->n_vectors;
616   next_index = node->cached_next_index;
617   while (n_left_from > 0) {
618     vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
619
620     /* Single loop */
621     while (n_left_from > 0 && n_left_to_next > 0) {
622       u32 pi0;
623       vlib_buffer_t *p0;
624       u8 error0 = MAP_ERROR_NONE;
625       map_domain_t *d0;
626       ip4_header_t *ip40;
627       ip6_header_t *ip60;
628       i32 port0 = 0;
629       u32 map_domain_index0;
630       u32 next0 = IP6_MAP_IP4_REASS_NEXT_IP4_LOOKUP;
631       u8 cached = 0;
632
633       pi0 = to_next[0] = from[0];
634       from += 1;
635       n_left_from -= 1;
636       to_next +=1;
637       n_left_to_next -= 1;
638
639       p0 = vlib_get_buffer(vm, pi0);
640       ip40 = vlib_buffer_get_current(p0);
641       ip60 = ((ip6_header_t *)ip40) - 1;
642
643       d0 = ip6_map_get_domain(vnet_buffer(p0)->ip.adj_index[VLIB_TX], (ip4_address_t *)&ip40->src_address.as_u32,
644                               &map_domain_index0, &error0);
645
646       map_ip4_reass_lock();
647       //This node only deals with fragmented ip4
648       map_ip4_reass_t *r = map_ip4_reass_get(ip40->src_address.as_u32, ip40->dst_address.as_u32,
649                                              ip40->fragment_id, ip40->protocol, &fragments_to_drop);
650       if (PREDICT_FALSE(!r)) {
651         // Could not create a caching entry
652         error0 = MAP_ERROR_FRAGMENT_MEMORY;
653       } else if (PREDICT_TRUE(ip4_get_fragment_offset(ip40))) {
654         // This is a fragment
655         if (r->port >= 0) {
656           // We know the port already
657           port0 = r->port;
658         } else if (map_ip4_reass_add_fragment(r, pi0)) {
659           // Not enough space for caching
660           error0 = MAP_ERROR_FRAGMENT_MEMORY;
661           map_ip4_reass_free(r, &fragments_to_drop);
662         } else {
663           cached = 1;
664         }
665       } else if ((port0 = ip4_get_port(ip40, MAP_SENDER, p0->current_length)) < 0) {
666         // Could not find port from first fragment. Stop reassembling.
667         error0 = MAP_ERROR_BAD_PROTOCOL;
668         port0 = 0;
669         map_ip4_reass_free(r, &fragments_to_drop);
670       } else {
671         // Found port. Remember it and loopback saved fragments
672         r->port = port0;
673         map_ip4_reass_get_fragments(r, &fragments_to_loopback);
674       }
675
676 #ifdef MAP_IP4_REASS_COUNT_BYTES
677       if (!cached && r) {
678         r->forwarded += clib_host_to_net_u16(ip40->length) - 20;
679         if (!ip4_get_fragment_more(ip40))
680           r->expected_total = ip4_get_fragment_offset(ip40) * 8 + clib_host_to_net_u16(ip40->length) - 20;
681         if(r->forwarded >= r->expected_total)
682           map_ip4_reass_free(r, &fragments_to_drop);
683       }
684 #endif
685
686       map_ip4_reass_unlock();
687
688       if(PREDICT_TRUE(error0 == MAP_ERROR_NONE))
689         error0 = ip6_map_sec_check(d0, port0, ip40, ip60) ? MAP_ERROR_NONE : MAP_ERROR_DECAP_SEC_CHECK;
690
691       if (PREDICT_FALSE(d0->mtu && (clib_host_to_net_u16(ip40->length) > d0->mtu) &&
692                         error0 == MAP_ERROR_NONE && !cached)) {
693         vnet_buffer(p0)->ip_frag.header_offset = 0;
694         vnet_buffer(p0)->ip_frag.flags = 0;
695         vnet_buffer(p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP;
696         vnet_buffer(p0)->ip_frag.mtu = d0->mtu;
697         next0 = IP6_MAP_IP4_REASS_NEXT_IP4_FRAGMENT;
698       }
699
700       if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) {
701         map_ip6_map_ip4_reass_trace_t *tr = vlib_add_trace(vm, node, p0, sizeof(*tr));
702         tr->map_domain_index = map_domain_index0;
703         tr->port = port0;
704         tr->cached = cached;
705       }
706
707       if (cached) {
708         //Dequeue the packet
709         n_left_to_next++;
710         to_next--;
711       } else {
712         if (error0 == MAP_ERROR_NONE)
713                vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_RX, cpu_index, map_domain_index0, 1,
714                                                clib_net_to_host_u16(ip40->length));
715         next0 = (error0 == MAP_ERROR_NONE) ? next0 : IP6_MAP_IP4_REASS_NEXT_DROP;
716         p0->error = error_node->errors[error0];
717         vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, pi0, next0);
718       }
719
720       //Loopback when we reach the end of the inpu vector
721       if(n_left_from == 0 && vec_len(fragments_to_loopback)) {
722         from = vlib_frame_vector_args(frame);
723         u32 len = vec_len(fragments_to_loopback);
724         if(len <= VLIB_FRAME_SIZE) {
725           memcpy(from, fragments_to_loopback, sizeof(u32)*len);
726           n_left_from = len;
727           vec_reset_length(fragments_to_loopback);
728         } else {
729           memcpy(from, fragments_to_loopback + (len - VLIB_FRAME_SIZE), sizeof(u32)*VLIB_FRAME_SIZE);
730           n_left_from = VLIB_FRAME_SIZE;
731           _vec_len(fragments_to_loopback) = len - VLIB_FRAME_SIZE;
732         }
733       }
734     }
735     vlib_put_next_frame(vm, node, next_index, n_left_to_next);
736   }
737   map_send_all_to_node(vm, fragments_to_drop, node,
738                              &error_node->errors[MAP_ERROR_FRAGMENT_DROPPED],
739                              IP6_MAP_IP4_REASS_NEXT_DROP);
740
741   vec_free(fragments_to_drop);
742   vec_free(fragments_to_loopback);
743   return frame->n_vectors;
744 }
745
746 /*
747  * ip6_icmp_relay
748  */
749 static uword
750 ip6_map_icmp_relay (vlib_main_t *vm,
751                     vlib_node_runtime_t *node,
752                     vlib_frame_t *frame)
753 {
754   u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
755   vlib_node_runtime_t *error_node = vlib_node_get_runtime(vm, ip6_map_icmp_relay_node.index);
756   map_main_t *mm = &map_main;
757   u32 cpu_index = os_get_cpu_number();
758   u16 *fragment_ids, *fid;
759
760   from = vlib_frame_vector_args(frame);
761   n_left_from = frame->n_vectors;
762   next_index = node->cached_next_index;
763
764   /* Get random fragment IDs for replies. */
765   fid = fragment_ids = clib_random_buffer_get_data (&vm->random_buffer, n_left_from * sizeof (fragment_ids[0]));
766
767   while (n_left_from > 0) {
768     vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
769
770     /* Single loop */
771     while (n_left_from > 0 && n_left_to_next > 0) {
772       u32 pi0;
773       vlib_buffer_t *p0;
774       u8 error0 = MAP_ERROR_NONE;
775       ip6_header_t *ip60;
776       u32 next0 = IP6_ICMP_RELAY_NEXT_IP4_LOOKUP;
777       u32 mtu;
778
779       pi0 = to_next[0] = from[0];
780       from += 1;
781       n_left_from -= 1;
782       to_next +=1;
783       n_left_to_next -= 1;
784
785       p0 = vlib_get_buffer(vm, pi0);
786       ip60 = vlib_buffer_get_current(p0);
787       u16 tlen = clib_net_to_host_u16(ip60->payload_length);
788
789       /*
790        * In:
791        *  IPv6 header           (40)
792        *  ICMPv6 header          (8) 
793        *  IPv6 header           (40)
794        *  Original IPv4 header / packet
795        * Out:
796        *  New IPv4 header
797        *  New ICMP header
798        *  Original IPv4 header / packet
799        */
800
801       /* Need at least ICMP(8) + IPv6(40) + IPv4(20) + L4 header(8) */
802       if (tlen < 76) {
803         error0 = MAP_ERROR_ICMP_RELAY;
804         goto error;
805       }
806
807       icmp46_header_t *icmp60 = (icmp46_header_t *)(ip60 + 1);
808       ip6_header_t *inner_ip60 = (ip6_header_t *)(icmp60 + 2);
809
810       if (inner_ip60->protocol != IP_PROTOCOL_IP_IN_IP) {
811         error0 = MAP_ERROR_ICMP_RELAY;
812         goto error;
813       }
814
815       ip4_header_t *inner_ip40 = (ip4_header_t *)(inner_ip60 + 1);
816       vlib_buffer_advance(p0, 60); /* sizeof ( IPv6 + ICMP + IPv6 - IPv4 - ICMP ) */
817       ip4_header_t *new_ip40 = vlib_buffer_get_current(p0);
818       icmp46_header_t *new_icmp40 = (icmp46_header_t *)(new_ip40 + 1);
819
820       /*
821        * Relay according to RFC2473, section 8.3
822        */
823       switch (icmp60->type) {
824       case ICMP6_destination_unreachable:
825       case ICMP6_time_exceeded:
826       case ICMP6_parameter_problem:
827         /* Type 3 - destination unreachable, Code 1 - host unreachable */
828         new_icmp40->type = ICMP4_destination_unreachable;
829         new_icmp40->code = ICMP4_destination_unreachable_destination_unreachable_host;
830         break;
831
832       case ICMP6_packet_too_big:
833         /* Type 3 - destination unreachable, Code 4 - packet too big */
834         /* Potential TODO: Adjust domain tunnel MTU based on the value received here */
835         mtu = clib_net_to_host_u32(*((u32 *)(icmp60 + 1)));
836
837         /* Check DF flag */
838         if (!(inner_ip40->flags_and_fragment_offset & clib_host_to_net_u16(IP4_HEADER_FLAG_DONT_FRAGMENT))) {
839           error0 = MAP_ERROR_ICMP_RELAY;
840           goto error;
841         }
842
843         new_icmp40->type = ICMP4_destination_unreachable;
844         new_icmp40->code = ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set;
845         *((u32 *)(new_icmp40 + 1)) = clib_host_to_net_u32(mtu < 1280 ? 1280 : mtu);
846         break;
847
848       default:
849         error0 = MAP_ERROR_ICMP_RELAY;
850         break;
851       }
852
853       /*
854        * Ensure the total ICMP packet is no longer than 576 bytes (RFC1812)
855        */
856       new_ip40->ip_version_and_header_length = 0x45;
857       new_ip40->tos = 0;
858       u16 nlen = (tlen - 20) > 576 ? 576 : tlen - 20;
859       new_ip40->length = clib_host_to_net_u16(nlen);
860       new_ip40->fragment_id = fid[0]; fid++;
861       new_ip40->ttl = 64;
862       new_ip40->protocol = IP_PROTOCOL_ICMP;
863       new_ip40->src_address = mm->icmp_src_address;
864       new_ip40->dst_address = inner_ip40->src_address;
865       new_ip40->checksum = ip4_header_checksum(new_ip40);
866
867       new_icmp40->checksum = 0;
868       ip_csum_t sum = ip_incremental_checksum(0, new_icmp40, nlen - 20);
869       new_icmp40->checksum = ~ip_csum_fold(sum);
870
871       vlib_increment_simple_counter(&mm->icmp_relayed, cpu_index, 0, 1);
872
873     error:
874       if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) {
875         map_trace_t *tr = vlib_add_trace(vm, node, p0, sizeof(*tr));
876         tr->map_domain_index = 0;
877         tr->port = 0;
878       }
879
880       next0 = (error0 == MAP_ERROR_NONE) ? next0 : IP6_ICMP_RELAY_NEXT_DROP;
881       p0->error = error_node->errors[error0];
882       vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, pi0, next0);
883     }
884     vlib_put_next_frame(vm, node, next_index, n_left_to_next);
885   }
886
887   return frame->n_vectors;
888
889 }
890
891 static char *map_error_strings[] = {
892 #define _(sym,string) string,
893   foreach_map_error
894 #undef _
895 };
896
897 VLIB_REGISTER_NODE(ip6_map_node) = {
898   .function = ip6_map,
899   .name = "ip6-map",
900   .vector_size = sizeof(u32),
901   .format_trace = format_map_trace,
902   .type = VLIB_NODE_TYPE_INTERNAL,
903
904   .n_errors = MAP_N_ERROR,
905   .error_strings = map_error_strings,
906
907   .n_next_nodes = IP6_MAP_N_NEXT,
908   .next_nodes = {
909     [IP6_MAP_NEXT_IP4_LOOKUP] = "ip4-lookup",
910 #ifdef MAP_SKIP_IP6_LOOKUP
911     [IP6_MAP_NEXT_IP4_REWRITE] = "ip4-rewrite-transit",
912 #endif
913     [IP6_MAP_NEXT_IP6_REASS] = "ip6-map-ip6-reass",
914     [IP6_MAP_NEXT_IP4_REASS] = "ip6-map-ip4-reass",
915     [IP6_MAP_NEXT_IP4_FRAGMENT] = "ip4-frag",
916     [IP6_MAP_NEXT_IP6_ICMP_RELAY] = "ip6-map-icmp-relay",
917     [IP6_MAP_NEXT_IP6_LOCAL] = "ip6-local",
918     [IP6_MAP_NEXT_DROP] = "error-drop",
919   },
920 };
921
922 VLIB_REGISTER_NODE(ip6_map_ip6_reass_node) = {
923   .function = ip6_map_ip6_reass,
924   .name = "ip6-map-ip6-reass",
925   .vector_size = sizeof(u32),
926   .format_trace = format_ip6_map_ip6_reass_trace,
927   .type = VLIB_NODE_TYPE_INTERNAL,
928   .n_errors = MAP_N_ERROR,
929   .error_strings = map_error_strings,
930   .n_next_nodes = IP6_MAP_IP6_REASS_N_NEXT,
931   .next_nodes = {
932     [IP6_MAP_IP6_REASS_NEXT_IP6_MAP] = "ip6-map",
933     [IP6_MAP_IP6_REASS_NEXT_DROP] = "error-drop",
934   },
935 };
936
937 VLIB_REGISTER_NODE(ip6_map_ip4_reass_node) = {
938   .function = ip6_map_ip4_reass,
939   .name = "ip6-map-ip4-reass",
940   .vector_size = sizeof(u32),
941   .format_trace = format_ip6_map_ip4_reass_trace,
942   .type = VLIB_NODE_TYPE_INTERNAL,
943   .n_errors = MAP_N_ERROR,
944   .error_strings = map_error_strings,
945   .n_next_nodes = IP6_MAP_IP4_REASS_N_NEXT,
946   .next_nodes = {
947     [IP6_MAP_IP4_REASS_NEXT_IP4_LOOKUP] = "ip4-lookup",
948     [IP6_MAP_IP4_REASS_NEXT_IP4_FRAGMENT] = "ip4-frag",
949     [IP6_MAP_IP4_REASS_NEXT_DROP] = "error-drop",
950   },
951 };
952
953 VLIB_REGISTER_NODE(ip6_map_icmp_relay_node, static) = {
954   .function = ip6_map_icmp_relay,
955   .name = "ip6-map-icmp-relay",
956   .vector_size = sizeof(u32),
957   .format_trace = format_map_trace, //FIXME
958   .type = VLIB_NODE_TYPE_INTERNAL,
959   .n_errors = MAP_N_ERROR,
960   .error_strings = map_error_strings,
961   .n_next_nodes = IP6_ICMP_RELAY_N_NEXT,
962   .next_nodes = {
963     [IP6_ICMP_RELAY_NEXT_IP4_LOOKUP] = "ip4-lookup",
964     [IP6_ICMP_RELAY_NEXT_DROP] = "error-drop",
965   },
966 };