2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
17 #include "../ip/ip_frag.h"
20 IP6_MAP_NEXT_IP4_LOOKUP,
21 #ifdef MAP_SKIP_IP6_LOOKUP
22 IP6_MAP_NEXT_IP4_REWRITE,
24 IP6_MAP_NEXT_IP6_REASS,
25 IP6_MAP_NEXT_IP4_REASS,
26 IP6_MAP_NEXT_IP4_FRAGMENT,
27 IP6_MAP_NEXT_IP6_ICMP_RELAY,
28 IP6_MAP_NEXT_IP6_LOCAL,
34 enum ip6_map_ip6_reass_next_e {
35 IP6_MAP_IP6_REASS_NEXT_IP6_MAP,
36 IP6_MAP_IP6_REASS_NEXT_DROP,
37 IP6_MAP_IP6_REASS_N_NEXT,
40 enum ip6_map_ip4_reass_next_e {
41 IP6_MAP_IP4_REASS_NEXT_IP4_LOOKUP,
42 IP6_MAP_IP4_REASS_NEXT_IP4_FRAGMENT,
43 IP6_MAP_IP4_REASS_NEXT_DROP,
44 IP6_MAP_IP4_REASS_N_NEXT,
47 enum ip6_icmp_relay_next_e {
48 IP6_ICMP_RELAY_NEXT_IP4_LOOKUP,
49 IP6_ICMP_RELAY_NEXT_DROP,
50 IP6_ICMP_RELAY_N_NEXT,
53 vlib_node_registration_t ip6_map_ip4_reass_node;
54 vlib_node_registration_t ip6_map_ip6_reass_node;
55 static vlib_node_registration_t ip6_map_icmp_relay_node;
61 } map_ip6_map_ip4_reass_trace_t;
64 format_ip6_map_ip4_reass_trace (u8 *s, va_list *args)
66 CLIB_UNUSED(vlib_main_t *vm) = va_arg (*args, vlib_main_t *);
67 CLIB_UNUSED(vlib_node_t *node) = va_arg (*args, vlib_node_t *);
68 map_ip6_map_ip4_reass_trace_t *t = va_arg (*args, map_ip6_map_ip4_reass_trace_t *);
69 return format(s, "MAP domain index: %d L4 port: %u Status: %s", t->map_domain_index,
70 t->port, t->cached?"cached":"forwarded");
77 } map_ip6_map_ip6_reass_trace_t;
80 format_ip6_map_ip6_reass_trace (u8 *s, va_list *args)
82 CLIB_UNUSED(vlib_main_t *vm) = va_arg (*args, vlib_main_t *);
83 CLIB_UNUSED(vlib_node_t *node) = va_arg (*args, vlib_node_t *);
84 map_ip6_map_ip6_reass_trace_t *t = va_arg (*args, map_ip6_map_ip6_reass_trace_t *);
85 return format(s, "Offset: %d Fragment length: %d Status: %s", t->offset, t->frag_len, t->out?"out":"in");
91 static_always_inline bool
92 ip6_map_sec_check (map_domain_t *d, u16 port, ip4_header_t *ip4, ip6_header_t *ip6)
94 u16 sp4 = clib_net_to_host_u16(port);
95 u32 sa4 = clib_net_to_host_u32(ip4->src_address.as_u32);
96 u64 sal6 = map_get_pfx(d, sa4, sp4);
97 u64 sar6 = map_get_sfx(d, sa4, sp4);
99 if (PREDICT_FALSE(sal6 != clib_net_to_host_u64(ip6->src_address.as_u64[0]) ||
100 sar6 != clib_net_to_host_u64(ip6->src_address.as_u64[1])))
105 static_always_inline void
106 ip6_map_security_check (map_domain_t *d, ip4_header_t *ip4, ip6_header_t *ip6, u32 *next, u8 *error)
108 map_main_t *mm = &map_main;
109 if (d->ea_bits_len || d->rules) {
110 if (d->psid_length > 0) {
111 if (!ip4_is_fragment(ip4)) {
112 u16 port = ip4_map_get_port(ip4, MAP_SENDER);
115 *error = ip6_map_sec_check(d, port, ip4, ip6) ? MAP_ERROR_NONE : MAP_ERROR_DECAP_SEC_CHECK;
117 *error = MAP_ERROR_BAD_PROTOCOL;
120 *next = mm->sec_check_frag ? IP6_MAP_NEXT_IP4_REASS : *next;
126 static_always_inline bool
127 ip6_map_ip4_lookup_bypass (vlib_buffer_t *p0, ip4_header_t *ip)
129 #ifdef MAP_SKIP_IP6_LOOKUP
130 map_main_t *mm = &map_main;
131 u32 adj_index0 = mm->adj4_index;
132 if (adj_index0 > 0) {
133 ip_lookup_main_t *lm4 = &ip4_main.lookup_main;
134 ip_adjacency_t *adj = ip_get_adjacency(lm4, mm->adj4_index);
135 if (adj->n_adj > 1) {
136 u32 hash_c0 = ip4_compute_flow_hash(ip, IP_FLOW_HASH_DEFAULT);
137 adj_index0 += (hash_c0 & (adj->n_adj - 1));
139 vnet_buffer(p0)->ip.adj_index[VLIB_TX] = adj_index0;
150 ip6_map (vlib_main_t *vm,
151 vlib_node_runtime_t *node,
154 u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
155 vlib_node_runtime_t *error_node = vlib_node_get_runtime(vm, ip6_map_node.index);
156 map_main_t *mm = &map_main;
157 vlib_combined_counter_main_t *cm = mm->domain_counters;
158 u32 cpu_index = os_get_cpu_number();
160 from = vlib_frame_vector_args(frame);
161 n_left_from = frame->n_vectors;
162 next_index = node->cached_next_index;
163 while (n_left_from > 0) {
164 vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
167 while (n_left_from >= 4 && n_left_to_next >= 2) {
169 vlib_buffer_t *p0, *p1;
170 u8 error0 = MAP_ERROR_NONE;
171 u8 error1 = MAP_ERROR_NONE;
172 map_domain_t *d0 = 0, *d1 = 0;
173 ip4_header_t *ip40, *ip41;
174 ip6_header_t *ip60, *ip61;
175 u16 port0 = 0, port1 = 0;
176 u32 map_domain_index0 = ~0, map_domain_index1 = ~0;
177 u32 next0 = IP6_MAP_NEXT_IP4_LOOKUP;
178 u32 next1 = IP6_MAP_NEXT_IP4_LOOKUP;
180 /* Prefetch next iteration. */
182 vlib_buffer_t *p2, *p3;
184 p2 = vlib_get_buffer(vm, from[2]);
185 p3 = vlib_get_buffer(vm, from[3]);
187 vlib_prefetch_buffer_header(p2, LOAD);
188 vlib_prefetch_buffer_header(p3, LOAD);
190 /* IPv6 + IPv4 header + 8 bytes of ULP */
191 CLIB_PREFETCH(p2->data, 68, LOAD);
192 CLIB_PREFETCH(p3->data, 68, LOAD);
195 pi0 = to_next[0] = from[0];
196 pi1 = to_next[1] = from[1];
202 p0 = vlib_get_buffer(vm, pi0);
203 p1 = vlib_get_buffer(vm, pi1);
204 ip60 = vlib_buffer_get_current(p0);
205 ip61 = vlib_buffer_get_current(p1);
206 vlib_buffer_advance(p0, sizeof(ip6_header_t));
207 vlib_buffer_advance(p1, sizeof(ip6_header_t));
208 ip40 = vlib_buffer_get_current(p0);
209 ip41 = vlib_buffer_get_current(p1);
212 * Encapsulated IPv4 packet
213 * - IPv4 fragmented -> Pass to virtual reassembly unless security check disabled
214 * - Lookup/Rewrite or Fragment node in case of packet > MTU
215 * Fragmented IPv6 packet
217 * - Error -> Pass to ICMPv6/ICMPv4 relay
218 * - Info -> Pass to IPv6 local
219 * Anything else -> drop
221 if (PREDICT_TRUE(ip60->protocol == IP_PROTOCOL_IP_IN_IP && clib_net_to_host_u16(ip60->payload_length) > 20)) {
222 d0 = ip6_map_get_domain(vnet_buffer(p0)->ip.adj_index[VLIB_TX], (ip4_address_t *)&ip40->src_address.as_u32,
223 &map_domain_index0, &error0);
224 } else if (ip60->protocol == IP_PROTOCOL_ICMP6 &&
225 clib_net_to_host_u16(ip60->payload_length) > sizeof(icmp46_header_t)) {
226 icmp46_header_t *icmp = (void *)(ip60 + 1);
227 next0 = (icmp->type == ICMP6_echo_request || icmp->type == ICMP6_echo_reply) ?
228 IP6_MAP_NEXT_IP6_LOCAL : IP6_MAP_NEXT_IP6_ICMP_RELAY;
229 } else if (ip60->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION) {
230 next0 = IP6_MAP_NEXT_IP6_REASS;
232 error0 = MAP_ERROR_BAD_PROTOCOL;
234 if (PREDICT_TRUE(ip61->protocol == IP_PROTOCOL_IP_IN_IP && clib_net_to_host_u16(ip61->payload_length) > 20)) {
235 d1 = ip6_map_get_domain(vnet_buffer(p1)->ip.adj_index[VLIB_TX], (ip4_address_t *)&ip41->src_address.as_u32,
236 &map_domain_index1, &error1);
237 } else if (ip61->protocol == IP_PROTOCOL_ICMP6 &&
238 clib_net_to_host_u16(ip61->payload_length) > sizeof(icmp46_header_t)) {
239 icmp46_header_t *icmp = (void *)(ip61 + 1);
240 next1 = (icmp->type == ICMP6_echo_request || icmp->type == ICMP6_echo_reply) ?
241 IP6_MAP_NEXT_IP6_LOCAL : IP6_MAP_NEXT_IP6_ICMP_RELAY;
242 } else if (ip61->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION) {
243 next1 = IP6_MAP_NEXT_IP6_REASS;
245 error1 = MAP_ERROR_BAD_PROTOCOL;
249 /* MAP inbound security check */
250 ip6_map_security_check(d0, ip40, ip60, &next0, &error0);
252 if (PREDICT_TRUE(error0 == MAP_ERROR_NONE &&
253 next0 == IP6_MAP_NEXT_IP4_LOOKUP)) {
254 if (PREDICT_FALSE(d0->mtu && (clib_host_to_net_u16(ip40->length) > d0->mtu))) {
255 vnet_buffer(p0)->ip_frag.header_offset = 0;
256 vnet_buffer(p0)->ip_frag.flags = 0;
257 vnet_buffer(p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP;
258 vnet_buffer(p0)->ip_frag.mtu = d0->mtu;
259 next0 = IP6_MAP_NEXT_IP4_FRAGMENT;
261 next0 = ip6_map_ip4_lookup_bypass(p0, ip40) ? IP6_MAP_NEXT_IP4_REWRITE : next0;
263 vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_RX, cpu_index, map_domain_index0, 1,
264 clib_net_to_host_u16(ip40->length));
268 /* MAP inbound security check */
269 ip6_map_security_check(d1, ip41, ip61, &next1, &error1);
271 if (PREDICT_TRUE(error1 == MAP_ERROR_NONE &&
272 next1 == IP6_MAP_NEXT_IP4_LOOKUP)) {
273 if (PREDICT_FALSE(d1->mtu && (clib_host_to_net_u16(ip41->length) > d1->mtu))) {
274 vnet_buffer(p1)->ip_frag.header_offset = 0;
275 vnet_buffer(p1)->ip_frag.flags = 0;
276 vnet_buffer(p1)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP;
277 vnet_buffer(p1)->ip_frag.mtu = d1->mtu;
278 next1 = IP6_MAP_NEXT_IP4_FRAGMENT;
280 next1 = ip6_map_ip4_lookup_bypass(p1, ip41) ? IP6_MAP_NEXT_IP4_REWRITE : next1;
282 vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_RX, cpu_index, map_domain_index1, 1,
283 clib_net_to_host_u16(ip41->length));
287 if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) {
288 map_trace_t *tr = vlib_add_trace(vm, node, p0, sizeof(*tr));
289 tr->map_domain_index = map_domain_index0;
293 if (PREDICT_FALSE(p1->flags & VLIB_BUFFER_IS_TRACED)) {
294 map_trace_t *tr = vlib_add_trace(vm, node, p1, sizeof(*tr));
295 tr->map_domain_index = map_domain_index1;
299 if (error0 == MAP_ERROR_DECAP_SEC_CHECK && mm->icmp6_enabled) {
300 /* Set ICMP parameters */
301 vlib_buffer_advance(p0, -sizeof(ip6_header_t));
302 icmp6_error_set_vnet_buffer(p0, ICMP6_destination_unreachable,
303 ICMP6_destination_unreachable_source_address_failed_policy, 0);
304 next0 = IP6_MAP_NEXT_ICMP;
306 next0 = (error0 == MAP_ERROR_NONE) ? next0 : IP6_MAP_NEXT_DROP;
309 if (error1 == MAP_ERROR_DECAP_SEC_CHECK && mm->icmp6_enabled) {
310 /* Set ICMP parameters */
311 vlib_buffer_advance(p1, -sizeof(ip6_header_t));
312 icmp6_error_set_vnet_buffer(p1, ICMP6_destination_unreachable,
313 ICMP6_destination_unreachable_source_address_failed_policy, 0);
314 next1 = IP6_MAP_NEXT_ICMP;
316 next1 = (error1 == MAP_ERROR_NONE) ? next1 : IP6_MAP_NEXT_DROP;
320 if (next0 == IP6_MAP_NEXT_IP6_LOCAL)
321 vlib_buffer_advance(p0, -sizeof(ip6_header_t));
322 if (next1 == IP6_MAP_NEXT_IP6_LOCAL)
323 vlib_buffer_advance(p1, -sizeof(ip6_header_t));
325 p0->error = error_node->errors[error0];
326 p1->error = error_node->errors[error1];
327 vlib_validate_buffer_enqueue_x2(vm, node, next_index, to_next, n_left_to_next, pi0, pi1, next0, next1);
331 while (n_left_from > 0 && n_left_to_next > 0) {
334 u8 error0 = MAP_ERROR_NONE;
335 map_domain_t *d0 = 0;
339 u32 map_domain_index0 = ~0;
340 u32 next0 = IP6_MAP_NEXT_IP4_LOOKUP;
342 pi0 = to_next[0] = from[0];
348 p0 = vlib_get_buffer(vm, pi0);
349 ip60 = vlib_buffer_get_current(p0);
350 vlib_buffer_advance(p0, sizeof(ip6_header_t));
351 ip40 = vlib_buffer_get_current(p0);
354 * Encapsulated IPv4 packet
355 * - IPv4 fragmented -> Pass to virtual reassembly unless security check disabled
356 * - Lookup/Rewrite or Fragment node in case of packet > MTU
357 * Fragmented IPv6 packet
359 * - Error -> Pass to ICMPv6/ICMPv4 relay
360 * - Info -> Pass to IPv6 local
361 * Anything else -> drop
363 if (PREDICT_TRUE(ip60->protocol == IP_PROTOCOL_IP_IN_IP && clib_net_to_host_u16(ip60->payload_length) > 20)) {
364 d0 = ip6_map_get_domain(vnet_buffer(p0)->ip.adj_index[VLIB_TX], (ip4_address_t *)&ip40->src_address.as_u32,
365 &map_domain_index0, &error0);
366 } else if (ip60->protocol == IP_PROTOCOL_ICMP6 &&
367 clib_net_to_host_u16(ip60->payload_length) > sizeof(icmp46_header_t)) {
368 icmp46_header_t *icmp = (void *)(ip60 + 1);
369 next0 = (icmp->type == ICMP6_echo_request || icmp->type == ICMP6_echo_reply) ?
370 IP6_MAP_NEXT_IP6_LOCAL : IP6_MAP_NEXT_IP6_ICMP_RELAY;
371 } else if (ip60->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION &&
372 (((ip6_frag_hdr_t *)(ip60+1))->next_hdr == IP_PROTOCOL_IP_IN_IP)) {
373 next0 = IP6_MAP_NEXT_IP6_REASS;
375 error0 = MAP_ERROR_BAD_PROTOCOL;
379 /* MAP inbound security check */
380 ip6_map_security_check(d0, ip40, ip60, &next0, &error0);
382 if (PREDICT_TRUE(error0 == MAP_ERROR_NONE &&
383 next0 == IP6_MAP_NEXT_IP4_LOOKUP)) {
384 if (PREDICT_FALSE(d0->mtu && (clib_host_to_net_u16(ip40->length) > d0->mtu))) {
385 vnet_buffer(p0)->ip_frag.header_offset = 0;
386 vnet_buffer(p0)->ip_frag.flags = 0;
387 vnet_buffer(p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP;
388 vnet_buffer(p0)->ip_frag.mtu = d0->mtu;
389 next0 = IP6_MAP_NEXT_IP4_FRAGMENT;
391 next0 = ip6_map_ip4_lookup_bypass(p0, ip40) ? IP6_MAP_NEXT_IP4_REWRITE : next0;
393 vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_RX, cpu_index, map_domain_index0, 1,
394 clib_net_to_host_u16(ip40->length));
398 if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) {
399 map_trace_t *tr = vlib_add_trace(vm, node, p0, sizeof(*tr));
400 tr->map_domain_index = map_domain_index0;
401 tr->port = (u16)port0;
404 if (mm->icmp6_enabled &&
405 (error0 == MAP_ERROR_DECAP_SEC_CHECK || error0 == MAP_ERROR_NO_DOMAIN)) {
406 /* Set ICMP parameters */
407 vlib_buffer_advance(p0, -sizeof(ip6_header_t));
408 icmp6_error_set_vnet_buffer(p0, ICMP6_destination_unreachable,
409 ICMP6_destination_unreachable_source_address_failed_policy, 0);
410 next0 = IP6_MAP_NEXT_ICMP;
412 next0 = (error0 == MAP_ERROR_NONE) ? next0 : IP6_MAP_NEXT_DROP;
416 if (next0 == IP6_MAP_NEXT_IP6_LOCAL)
417 vlib_buffer_advance(p0, -sizeof(ip6_header_t));
419 p0->error = error_node->errors[error0];
420 vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, pi0, next0);
422 vlib_put_next_frame(vm, node, next_index, n_left_to_next);
425 return frame->n_vectors;
429 static_always_inline void
430 ip6_map_ip6_reass_prepare(vlib_main_t *vm, vlib_node_runtime_t *node, map_ip6_reass_t *r,
431 u32 **fragments_ready, u32 **fragments_to_drop)
435 ip6_frag_hdr_t *frag0;
438 if(!r->ip4_header.ip_version_and_header_length)
441 //The IP header is here, we need to check for packets
442 //that can be forwarded
444 for (i=0; i<MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++) {
445 if (r->fragments[i].pi == ~0 ||
446 ((!r->fragments[i].next_data_len) && (r->fragments[i].next_data_offset != (0xffff))))
449 p0 = vlib_get_buffer(vm, r->fragments[i].pi);
450 ip60 = vlib_buffer_get_current(p0);
451 frag0 = (ip6_frag_hdr_t *)(ip60 + 1);
452 ip40 = (ip4_header_t *)(frag0 + 1);
454 if (ip6_frag_hdr_offset(frag0)) {
455 //Not first fragment, add the IPv4 header
456 clib_memcpy(ip40, &r->ip4_header, 20);
459 #ifdef MAP_IP6_REASS_COUNT_BYTES
460 r->forwarded += clib_net_to_host_u16(ip60->payload_length) - sizeof(*frag0);
463 if (ip6_frag_hdr_more(frag0)) {
464 //Not last fragment, we copy end of next
465 clib_memcpy(u8_ptr_add(ip60, p0->current_length), r->fragments[i].next_data, 20);
466 p0->current_length += 20;
467 ip60->payload_length = u16_net_add(ip60->payload_length, 20);
470 if (!ip4_is_fragment(ip40)) {
471 ip40->fragment_id = frag_id_6to4(frag0->identification);
472 ip40->flags_and_fragment_offset = clib_host_to_net_u16(ip6_frag_hdr_offset(frag0));
474 ip40->flags_and_fragment_offset = clib_host_to_net_u16(ip4_get_fragment_offset(ip40) + ip6_frag_hdr_offset(frag0));
477 if (ip6_frag_hdr_more(frag0))
478 ip40->flags_and_fragment_offset |= clib_host_to_net_u16(IP4_HEADER_FLAG_MORE_FRAGMENTS);
480 ip40->length = clib_host_to_net_u16(p0->current_length - sizeof(*ip60) - sizeof(*frag0));
481 ip40->checksum = ip4_header_checksum(ip40);
483 if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) {
484 map_ip6_map_ip6_reass_trace_t *tr = vlib_add_trace(vm, node, p0, sizeof(*tr));
485 tr->offset = ip4_get_fragment_offset(ip40);
486 tr->frag_len = clib_net_to_host_u16(ip40->length) - sizeof(*ip40);
490 vec_add1(*fragments_ready, r->fragments[i].pi);
491 r->fragments[i].pi = ~0;
492 r->fragments[i].next_data_len = 0;
493 r->fragments[i].next_data_offset = 0;
494 map_main.ip6_reass_buffered_counter--;
496 //TODO: Best solution would be that ip6_map handles extension headers
497 // and ignores atomic fragment. But in the meantime, let's just copy the header.
499 u8 protocol = frag0->next_hdr;
500 memmove(u8_ptr_add(ip40, - sizeof(*ip60)), ip60, sizeof(*ip60));
501 ((ip6_header_t *)u8_ptr_add(ip40, - sizeof(*ip60)))->protocol = protocol;
502 vlib_buffer_advance(p0, sizeof(*frag0));
507 map_ip6_drop_pi(u32 pi)
509 vlib_main_t *vm = vlib_get_main();
510 vlib_node_runtime_t *n = vlib_node_get_runtime(vm, ip6_map_ip6_reass_node.index);
511 vlib_set_next_frame_buffer(vm, n, IP6_MAP_IP6_REASS_NEXT_DROP, pi);
515 map_ip4_drop_pi(u32 pi)
517 vlib_main_t *vm = vlib_get_main();
518 vlib_node_runtime_t *n = vlib_node_get_runtime(vm, ip6_map_ip4_reass_node.index);
519 vlib_set_next_frame_buffer(vm, n, IP6_MAP_IP4_REASS_NEXT_DROP, pi);
524 * TODO: We should count the number of successfully
525 * transmitted fragment bytes and compare that to the last fragment
526 * offset such that we can free the reassembly structure when all fragments
527 * have been forwarded.
530 ip6_map_ip6_reass (vlib_main_t *vm,
531 vlib_node_runtime_t *node,
534 u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
535 vlib_node_runtime_t *error_node = vlib_node_get_runtime(vm, ip6_map_ip6_reass_node.index);
536 u32 *fragments_to_drop = NULL;
537 u32 *fragments_ready = NULL;
539 from = vlib_frame_vector_args(frame);
540 n_left_from = frame->n_vectors;
541 next_index = node->cached_next_index;
542 while (n_left_from > 0) {
543 vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
546 while (n_left_from > 0 && n_left_to_next > 0) {
549 u8 error0 = MAP_ERROR_NONE;
551 ip6_frag_hdr_t *frag0;
556 pi0 = to_next[0] = from[0];
562 p0 = vlib_get_buffer(vm, pi0);
563 ip60 = vlib_buffer_get_current(p0);
564 frag0 = (ip6_frag_hdr_t *)(ip60 + 1);
565 offset = clib_host_to_net_u16(frag0->fragment_offset_and_more) & (~7);
566 frag_len = clib_net_to_host_u16(ip60->payload_length) - sizeof(*frag0);
567 next_offset = ip6_frag_hdr_more(frag0) ? (offset + frag_len) : (0xffff);
569 //FIXME: Support other extension headers, maybe
571 if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) {
572 map_ip6_map_ip6_reass_trace_t *tr = vlib_add_trace(vm, node, p0, sizeof(*tr));
574 tr->frag_len = frag_len;
578 map_ip6_reass_lock();
579 map_ip6_reass_t *r = map_ip6_reass_get(&ip60->src_address, &ip60->dst_address,
580 frag0->identification, frag0->next_hdr, &fragments_to_drop);
581 //FIXME: Use better error codes
582 if (PREDICT_FALSE(!r)) {
583 // Could not create a caching entry
584 error0 = MAP_ERROR_FRAGMENT_MEMORY;
585 } else if (PREDICT_FALSE((frag_len <= 20 &&
586 (ip6_frag_hdr_more(frag0) || (!offset))))) {
587 //Very small fragment are restricted to the last one and
588 //can't be the first one
589 error0 = MAP_ERROR_FRAGMENT_MALFORMED;
590 } else if (map_ip6_reass_add_fragment(r, pi0, offset, next_offset, (u8 *)(frag0 + 1), frag_len)) {
591 map_ip6_reass_free(r, &fragments_to_drop);
592 error0 = MAP_ERROR_FRAGMENT_MEMORY;
594 #ifdef MAP_IP6_REASS_COUNT_BYTES
595 if (!ip6_frag_hdr_more(frag0))
596 r->expected_total = offset + frag_len;
598 ip6_map_ip6_reass_prepare(vm, node, r, &fragments_ready, &fragments_to_drop);
599 #ifdef MAP_IP6_REASS_COUNT_BYTES
600 if(r->forwarded >= r->expected_total)
601 map_ip6_reass_free(r, &fragments_to_drop);
604 map_ip6_reass_unlock();
606 if (error0 == MAP_ERROR_NONE) {
612 //All data from that packet was copied no need to keep it, but this is not an error
613 p0->error = error_node->errors[MAP_ERROR_NONE];
614 vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, pi0, IP6_MAP_IP6_REASS_NEXT_DROP);
617 p0->error = error_node->errors[error0];
618 vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, pi0, IP6_MAP_IP6_REASS_NEXT_DROP);
621 vlib_put_next_frame(vm, node, next_index, n_left_to_next);
624 map_send_all_to_node(vm, fragments_ready, node,
625 &error_node->errors[MAP_ERROR_NONE],
626 IP6_MAP_IP6_REASS_NEXT_IP6_MAP);
627 map_send_all_to_node(vm, fragments_to_drop, node,
628 &error_node->errors[MAP_ERROR_FRAGMENT_DROPPED],
629 IP6_MAP_IP6_REASS_NEXT_DROP);
631 vec_free(fragments_to_drop);
632 vec_free(fragments_ready);
633 return frame->n_vectors;
640 ip6_map_ip4_reass (vlib_main_t *vm,
641 vlib_node_runtime_t *node,
644 u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
645 vlib_node_runtime_t *error_node = vlib_node_get_runtime(vm, ip6_map_ip4_reass_node.index);
646 map_main_t *mm = &map_main;
647 vlib_combined_counter_main_t *cm = mm->domain_counters;
648 u32 cpu_index = os_get_cpu_number();
649 u32 *fragments_to_drop = NULL;
650 u32 *fragments_to_loopback = NULL;
652 from = vlib_frame_vector_args(frame);
653 n_left_from = frame->n_vectors;
654 next_index = node->cached_next_index;
655 while (n_left_from > 0) {
656 vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
659 while (n_left_from > 0 && n_left_to_next > 0) {
662 u8 error0 = MAP_ERROR_NONE;
667 u32 map_domain_index0 = ~0;
668 u32 next0 = IP6_MAP_IP4_REASS_NEXT_IP4_LOOKUP;
671 pi0 = to_next[0] = from[0];
677 p0 = vlib_get_buffer(vm, pi0);
678 ip40 = vlib_buffer_get_current(p0);
679 ip60 = ((ip6_header_t *)ip40) - 1;
681 d0 = ip6_map_get_domain(vnet_buffer(p0)->ip.adj_index[VLIB_TX], (ip4_address_t *)&ip40->src_address.as_u32,
682 &map_domain_index0, &error0);
684 map_ip4_reass_lock();
685 //This node only deals with fragmented ip4
686 map_ip4_reass_t *r = map_ip4_reass_get(ip40->src_address.as_u32, ip40->dst_address.as_u32,
687 ip40->fragment_id, ip40->protocol, &fragments_to_drop);
688 if (PREDICT_FALSE(!r)) {
689 // Could not create a caching entry
690 error0 = MAP_ERROR_FRAGMENT_MEMORY;
691 } else if (PREDICT_TRUE(ip4_get_fragment_offset(ip40))) {
692 // This is a fragment
694 // We know the port already
696 } else if (map_ip4_reass_add_fragment(r, pi0)) {
697 // Not enough space for caching
698 error0 = MAP_ERROR_FRAGMENT_MEMORY;
699 map_ip4_reass_free(r, &fragments_to_drop);
703 } else if ((port0 = ip4_get_port(ip40, MAP_SENDER, p0->current_length)) < 0) {
704 // Could not find port from first fragment. Stop reassembling.
705 error0 = MAP_ERROR_BAD_PROTOCOL;
707 map_ip4_reass_free(r, &fragments_to_drop);
709 // Found port. Remember it and loopback saved fragments
711 map_ip4_reass_get_fragments(r, &fragments_to_loopback);
714 #ifdef MAP_IP4_REASS_COUNT_BYTES
716 r->forwarded += clib_host_to_net_u16(ip40->length) - 20;
717 if (!ip4_get_fragment_more(ip40))
718 r->expected_total = ip4_get_fragment_offset(ip40) * 8 + clib_host_to_net_u16(ip40->length) - 20;
719 if(r->forwarded >= r->expected_total)
720 map_ip4_reass_free(r, &fragments_to_drop);
724 map_ip4_reass_unlock();
726 if(PREDICT_TRUE(error0 == MAP_ERROR_NONE))
727 error0 = ip6_map_sec_check(d0, port0, ip40, ip60) ? MAP_ERROR_NONE : MAP_ERROR_DECAP_SEC_CHECK;
729 if (PREDICT_FALSE(d0->mtu && (clib_host_to_net_u16(ip40->length) > d0->mtu) &&
730 error0 == MAP_ERROR_NONE && !cached)) {
731 vnet_buffer(p0)->ip_frag.header_offset = 0;
732 vnet_buffer(p0)->ip_frag.flags = 0;
733 vnet_buffer(p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP;
734 vnet_buffer(p0)->ip_frag.mtu = d0->mtu;
735 next0 = IP6_MAP_IP4_REASS_NEXT_IP4_FRAGMENT;
738 if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) {
739 map_ip6_map_ip4_reass_trace_t *tr = vlib_add_trace(vm, node, p0, sizeof(*tr));
740 tr->map_domain_index = map_domain_index0;
750 if (error0 == MAP_ERROR_NONE)
751 vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_RX, cpu_index, map_domain_index0, 1,
752 clib_net_to_host_u16(ip40->length));
753 next0 = (error0 == MAP_ERROR_NONE) ? next0 : IP6_MAP_IP4_REASS_NEXT_DROP;
754 p0->error = error_node->errors[error0];
755 vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, pi0, next0);
758 //Loopback when we reach the end of the inpu vector
759 if(n_left_from == 0 && vec_len(fragments_to_loopback)) {
760 from = vlib_frame_vector_args(frame);
761 u32 len = vec_len(fragments_to_loopback);
762 if(len <= VLIB_FRAME_SIZE) {
763 clib_memcpy(from, fragments_to_loopback, sizeof(u32)*len);
765 vec_reset_length(fragments_to_loopback);
767 clib_memcpy(from, fragments_to_loopback + (len - VLIB_FRAME_SIZE), sizeof(u32)*VLIB_FRAME_SIZE);
768 n_left_from = VLIB_FRAME_SIZE;
769 _vec_len(fragments_to_loopback) = len - VLIB_FRAME_SIZE;
773 vlib_put_next_frame(vm, node, next_index, n_left_to_next);
775 map_send_all_to_node(vm, fragments_to_drop, node,
776 &error_node->errors[MAP_ERROR_FRAGMENT_DROPPED],
777 IP6_MAP_IP4_REASS_NEXT_DROP);
779 vec_free(fragments_to_drop);
780 vec_free(fragments_to_loopback);
781 return frame->n_vectors;
788 ip6_map_icmp_relay (vlib_main_t *vm,
789 vlib_node_runtime_t *node,
792 u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
793 vlib_node_runtime_t *error_node = vlib_node_get_runtime(vm, ip6_map_icmp_relay_node.index);
794 map_main_t *mm = &map_main;
795 u32 cpu_index = os_get_cpu_number();
796 u16 *fragment_ids, *fid;
798 from = vlib_frame_vector_args(frame);
799 n_left_from = frame->n_vectors;
800 next_index = node->cached_next_index;
802 /* Get random fragment IDs for replies. */
803 fid = fragment_ids = clib_random_buffer_get_data (&vm->random_buffer, n_left_from * sizeof (fragment_ids[0]));
805 while (n_left_from > 0) {
806 vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
809 while (n_left_from > 0 && n_left_to_next > 0) {
812 u8 error0 = MAP_ERROR_NONE;
814 u32 next0 = IP6_ICMP_RELAY_NEXT_IP4_LOOKUP;
817 pi0 = to_next[0] = from[0];
823 p0 = vlib_get_buffer(vm, pi0);
824 ip60 = vlib_buffer_get_current(p0);
825 u16 tlen = clib_net_to_host_u16(ip60->payload_length);
832 * Original IPv4 header / packet
836 * Original IPv4 header / packet
839 /* Need at least ICMP(8) + IPv6(40) + IPv4(20) + L4 header(8) */
841 error0 = MAP_ERROR_ICMP_RELAY;
845 icmp46_header_t *icmp60 = (icmp46_header_t *)(ip60 + 1);
846 ip6_header_t *inner_ip60 = (ip6_header_t *)(icmp60 + 2);
848 if (inner_ip60->protocol != IP_PROTOCOL_IP_IN_IP) {
849 error0 = MAP_ERROR_ICMP_RELAY;
853 ip4_header_t *inner_ip40 = (ip4_header_t *)(inner_ip60 + 1);
854 vlib_buffer_advance(p0, 60); /* sizeof ( IPv6 + ICMP + IPv6 - IPv4 - ICMP ) */
855 ip4_header_t *new_ip40 = vlib_buffer_get_current(p0);
856 icmp46_header_t *new_icmp40 = (icmp46_header_t *)(new_ip40 + 1);
859 * Relay according to RFC2473, section 8.3
861 switch (icmp60->type) {
862 case ICMP6_destination_unreachable:
863 case ICMP6_time_exceeded:
864 case ICMP6_parameter_problem:
865 /* Type 3 - destination unreachable, Code 1 - host unreachable */
866 new_icmp40->type = ICMP4_destination_unreachable;
867 new_icmp40->code = ICMP4_destination_unreachable_destination_unreachable_host;
870 case ICMP6_packet_too_big:
871 /* Type 3 - destination unreachable, Code 4 - packet too big */
872 /* Potential TODO: Adjust domain tunnel MTU based on the value received here */
873 mtu = clib_net_to_host_u32(*((u32 *)(icmp60 + 1)));
876 if (!(inner_ip40->flags_and_fragment_offset & clib_host_to_net_u16(IP4_HEADER_FLAG_DONT_FRAGMENT))) {
877 error0 = MAP_ERROR_ICMP_RELAY;
881 new_icmp40->type = ICMP4_destination_unreachable;
882 new_icmp40->code = ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set;
883 *((u32 *)(new_icmp40 + 1)) = clib_host_to_net_u32(mtu < 1280 ? 1280 : mtu);
887 error0 = MAP_ERROR_ICMP_RELAY;
892 * Ensure the total ICMP packet is no longer than 576 bytes (RFC1812)
894 new_ip40->ip_version_and_header_length = 0x45;
896 u16 nlen = (tlen - 20) > 576 ? 576 : tlen - 20;
897 new_ip40->length = clib_host_to_net_u16(nlen);
898 new_ip40->fragment_id = fid[0]; fid++;
900 new_ip40->protocol = IP_PROTOCOL_ICMP;
901 new_ip40->src_address = mm->icmp4_src_address;
902 new_ip40->dst_address = inner_ip40->src_address;
903 new_ip40->checksum = ip4_header_checksum(new_ip40);
905 new_icmp40->checksum = 0;
906 ip_csum_t sum = ip_incremental_checksum(0, new_icmp40, nlen - 20);
907 new_icmp40->checksum = ~ip_csum_fold(sum);
909 vlib_increment_simple_counter(&mm->icmp_relayed, cpu_index, 0, 1);
912 if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) {
913 map_trace_t *tr = vlib_add_trace(vm, node, p0, sizeof(*tr));
914 tr->map_domain_index = 0;
918 next0 = (error0 == MAP_ERROR_NONE) ? next0 : IP6_ICMP_RELAY_NEXT_DROP;
919 p0->error = error_node->errors[error0];
920 vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, pi0, next0);
922 vlib_put_next_frame(vm, node, next_index, n_left_to_next);
925 return frame->n_vectors;
929 static char *map_error_strings[] = {
930 #define _(sym,string) string,
935 VLIB_REGISTER_NODE(ip6_map_node) = {
938 .vector_size = sizeof(u32),
939 .format_trace = format_map_trace,
940 .type = VLIB_NODE_TYPE_INTERNAL,
942 .n_errors = MAP_N_ERROR,
943 .error_strings = map_error_strings,
945 .n_next_nodes = IP6_MAP_N_NEXT,
947 [IP6_MAP_NEXT_IP4_LOOKUP] = "ip4-lookup",
948 #ifdef MAP_SKIP_IP6_LOOKUP
949 [IP6_MAP_NEXT_IP4_REWRITE] = "ip4-rewrite-transit",
951 [IP6_MAP_NEXT_IP6_REASS] = "ip6-map-ip6-reass",
952 [IP6_MAP_NEXT_IP4_REASS] = "ip6-map-ip4-reass",
953 [IP6_MAP_NEXT_IP4_FRAGMENT] = "ip4-frag",
954 [IP6_MAP_NEXT_IP6_ICMP_RELAY] = "ip6-map-icmp-relay",
955 [IP6_MAP_NEXT_IP6_LOCAL] = "ip6-local",
956 [IP6_MAP_NEXT_DROP] = "error-drop",
957 [IP6_MAP_NEXT_ICMP] = "ip6-icmp-error",
961 VLIB_REGISTER_NODE(ip6_map_ip6_reass_node) = {
962 .function = ip6_map_ip6_reass,
963 .name = "ip6-map-ip6-reass",
964 .vector_size = sizeof(u32),
965 .format_trace = format_ip6_map_ip6_reass_trace,
966 .type = VLIB_NODE_TYPE_INTERNAL,
967 .n_errors = MAP_N_ERROR,
968 .error_strings = map_error_strings,
969 .n_next_nodes = IP6_MAP_IP6_REASS_N_NEXT,
971 [IP6_MAP_IP6_REASS_NEXT_IP6_MAP] = "ip6-map",
972 [IP6_MAP_IP6_REASS_NEXT_DROP] = "error-drop",
976 VLIB_REGISTER_NODE(ip6_map_ip4_reass_node) = {
977 .function = ip6_map_ip4_reass,
978 .name = "ip6-map-ip4-reass",
979 .vector_size = sizeof(u32),
980 .format_trace = format_ip6_map_ip4_reass_trace,
981 .type = VLIB_NODE_TYPE_INTERNAL,
982 .n_errors = MAP_N_ERROR,
983 .error_strings = map_error_strings,
984 .n_next_nodes = IP6_MAP_IP4_REASS_N_NEXT,
986 [IP6_MAP_IP4_REASS_NEXT_IP4_LOOKUP] = "ip4-lookup",
987 [IP6_MAP_IP4_REASS_NEXT_IP4_FRAGMENT] = "ip4-frag",
988 [IP6_MAP_IP4_REASS_NEXT_DROP] = "error-drop",
992 VLIB_REGISTER_NODE(ip6_map_icmp_relay_node, static) = {
993 .function = ip6_map_icmp_relay,
994 .name = "ip6-map-icmp-relay",
995 .vector_size = sizeof(u32),
996 .format_trace = format_map_trace, //FIXME
997 .type = VLIB_NODE_TYPE_INTERNAL,
998 .n_errors = MAP_N_ERROR,
999 .error_strings = map_error_strings,
1000 .n_next_nodes = IP6_ICMP_RELAY_N_NEXT,
1002 [IP6_ICMP_RELAY_NEXT_IP4_LOOKUP] = "ip4-lookup",
1003 [IP6_ICMP_RELAY_NEXT_DROP] = "error-drop",