2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
17 #include <vnet/ip/ip_frag.h>
18 #include <vnet/ip/ip4_to_ip6.h>
22 IP4_MAPT_NEXT_MAPT_TCP_UDP,
23 IP4_MAPT_NEXT_MAPT_ICMP,
24 IP4_MAPT_NEXT_MAPT_FRAGMENTED,
31 IP4_MAPT_ICMP_NEXT_IP6_LOOKUP,
32 IP4_MAPT_ICMP_NEXT_IP6_FRAG,
33 IP4_MAPT_ICMP_NEXT_DROP,
35 } ip4_mapt_icmp_next_t;
39 IP4_MAPT_TCP_UDP_NEXT_IP6_LOOKUP,
40 IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG,
41 IP4_MAPT_TCP_UDP_NEXT_DROP,
42 IP4_MAPT_TCP_UDP_N_NEXT
43 } ip4_mapt_tcp_udp_next_t;
47 IP4_MAPT_FRAGMENTED_NEXT_IP6_LOOKUP,
48 IP4_MAPT_FRAGMENTED_NEXT_IP6_FRAG,
49 IP4_MAPT_FRAGMENTED_NEXT_DROP,
50 IP4_MAPT_FRAGMENTED_N_NEXT
51 } ip4_mapt_fragmented_next_t;
53 //This is used to pass information within the buffer data.
54 //Buffer structure being too small to contain big structures like this.
56 typedef CLIB_PACKED (struct {
59 //IPv6 header + Fragmentation header will be here
60 //sizeof(ip6) + sizeof(ip_frag) - sizeof(ip4)
62 }) ip4_mapt_pseudo_header_t;
66 static_always_inline int
67 ip4_map_fragment_cache (ip4_header_t * ip4, u16 port)
70 map_ip4_reass_lock ();
72 map_ip4_reass_get (ip4->src_address.as_u32, ip4->dst_address.as_u32,
75 IP_PROTOCOL_ICMP) ? IP_PROTOCOL_ICMP6 : ip4->protocol,
80 map_ip4_reass_unlock ();
84 static_always_inline i32
85 ip4_map_fragment_get_port (ip4_header_t * ip4)
88 map_ip4_reass_lock ();
90 map_ip4_reass_get (ip4->src_address.as_u32, ip4->dst_address.as_u32,
93 IP_PROTOCOL_ICMP) ? IP_PROTOCOL_ICMP6 : ip4->protocol,
95 i32 ret = r ? r->port : -1;
96 map_ip4_reass_unlock ();
104 } icmp_to_icmp6_ctx_t;
107 ip4_to_ip6_set_icmp_cb (ip4_header_t * ip4, ip6_header_t * ip6, void *arg)
109 icmp_to_icmp6_ctx_t *ctx = arg;
111 ip4_map_t_embedded_address (ctx->d, &ip6->src_address, &ip4->src_address);
112 ip6->dst_address.as_u64[0] =
113 map_get_pfx_net (ctx->d, ip4->dst_address.as_u32, ctx->recv_port);
114 ip6->dst_address.as_u64[1] =
115 map_get_sfx_net (ctx->d, ip4->dst_address.as_u32, ctx->recv_port);
121 ip4_to_ip6_set_inner_icmp_cb (ip4_header_t * ip4, ip6_header_t * ip6,
124 icmp_to_icmp6_ctx_t *ctx = arg;
126 //Note that the source address is within the domain
127 //while the destination address is the one outside the domain
128 ip4_map_t_embedded_address (ctx->d, &ip6->dst_address, &ip4->dst_address);
129 ip6->src_address.as_u64[0] =
130 map_get_pfx_net (ctx->d, ip4->src_address.as_u32, ctx->recv_port);
131 ip6->src_address.as_u64[1] =
132 map_get_sfx_net (ctx->d, ip4->src_address.as_u32, ctx->recv_port);
138 ip4_map_t_icmp (vlib_main_t * vm,
139 vlib_node_runtime_t * node, vlib_frame_t * frame)
141 u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
142 vlib_node_runtime_t *error_node =
143 vlib_node_get_runtime (vm, ip4_map_t_icmp_node.index);
144 from = vlib_frame_vector_args (frame);
145 n_left_from = frame->n_vectors;
146 next_index = node->cached_next_index;
147 vlib_combined_counter_main_t *cm = map_main.domain_counters;
148 u32 thread_index = vm->thread_index;
150 while (n_left_from > 0)
152 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
154 while (n_left_from > 0 && n_left_to_next > 0)
158 ip4_mapt_icmp_next_t next0;
162 icmp_to_icmp6_ctx_t ctx0;
165 next0 = IP4_MAPT_ICMP_NEXT_IP6_LOOKUP;
166 pi0 = to_next[0] = from[0];
171 error0 = MAP_ERROR_NONE;
173 p0 = vlib_get_buffer (vm, pi0);
174 vlib_buffer_advance (p0, sizeof (ip4_mapt_pseudo_header_t)); //The pseudo-header is not used
176 clib_net_to_host_u16 (((ip4_header_t *)
177 vlib_buffer_get_current (p0))->length);
179 pool_elt_at_index (map_main.domains,
180 vnet_buffer (p0)->map_t.map_domain_index);
182 ip40 = vlib_buffer_get_current (p0);
183 ctx0.recv_port = ip4_get_port (ip40, 1);
185 if (ctx0.recv_port == 0)
187 // In case of 1:1 mapping, we don't care about the port
188 if (!(d0->ea_bits_len == 0 && d0->rules))
190 error0 = MAP_ERROR_ICMP;
196 (p0, ip4_to_ip6_set_icmp_cb, &ctx0,
197 ip4_to_ip6_set_inner_icmp_cb, &ctx0))
199 error0 = MAP_ERROR_ICMP;
203 if (vnet_buffer (p0)->map_t.mtu < p0->current_length)
205 vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
206 vnet_buffer (p0)->ip_frag.next_index = IP6_FRAG_NEXT_IP6_LOOKUP;
207 next0 = IP4_MAPT_ICMP_NEXT_IP6_FRAG;
210 if (PREDICT_TRUE (error0 == MAP_ERROR_NONE))
212 vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
215 map_t.map_domain_index, 1,
220 next0 = IP4_MAPT_ICMP_NEXT_DROP;
222 p0->error = error_node->errors[error0];
223 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
224 to_next, n_left_to_next, pi0,
227 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
229 return frame->n_vectors;
233 ip4_to_ip6_set_cb (ip4_header_t * ip4, ip6_header_t * ip6, void *ctx)
235 ip4_mapt_pseudo_header_t *pheader = ctx;
237 ip6->dst_address.as_u64[0] = pheader->daddr.as_u64[0];
238 ip6->dst_address.as_u64[1] = pheader->daddr.as_u64[1];
239 ip6->src_address.as_u64[0] = pheader->saddr.as_u64[0];
240 ip6->src_address.as_u64[1] = pheader->saddr.as_u64[1];
246 ip4_map_t_fragmented (vlib_main_t * vm,
247 vlib_node_runtime_t * node, vlib_frame_t * frame)
249 u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
250 from = vlib_frame_vector_args (frame);
251 n_left_from = frame->n_vectors;
252 next_index = node->cached_next_index;
253 vlib_node_runtime_t *error_node =
254 vlib_node_get_runtime (vm, ip4_map_t_fragmented_node.index);
256 while (n_left_from > 0)
258 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
260 while (n_left_from > 0 && n_left_to_next > 0)
264 ip4_mapt_pseudo_header_t *pheader0;
265 ip4_mapt_fragmented_next_t next0;
267 next0 = IP4_MAPT_FRAGMENTED_NEXT_IP6_LOOKUP;
268 pi0 = to_next[0] = from[0];
274 p0 = vlib_get_buffer (vm, pi0);
276 //Accessing pseudo header
277 pheader0 = vlib_buffer_get_current (p0);
278 vlib_buffer_advance (p0, sizeof (*pheader0));
280 if (ip4_to_ip6_fragmented (p0, ip4_to_ip6_set_cb, pheader0))
282 p0->error = error_node->errors[MAP_ERROR_FRAGMENT_DROPPED];
283 next0 = IP4_MAPT_FRAGMENTED_NEXT_DROP;
287 if (vnet_buffer (p0)->map_t.mtu < p0->current_length)
289 vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
290 vnet_buffer (p0)->ip_frag.next_index =
291 IP6_FRAG_NEXT_IP6_LOOKUP;
292 next0 = IP4_MAPT_FRAGMENTED_NEXT_IP6_FRAG;
296 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
297 to_next, n_left_to_next, pi0,
300 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
302 return frame->n_vectors;
306 ip4_map_t_tcp_udp (vlib_main_t * vm,
307 vlib_node_runtime_t * node, vlib_frame_t * frame)
309 u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
310 from = vlib_frame_vector_args (frame);
311 n_left_from = frame->n_vectors;
312 next_index = node->cached_next_index;
313 vlib_node_runtime_t *error_node =
314 vlib_node_get_runtime (vm, ip4_map_t_tcp_udp_node.index);
317 while (n_left_from > 0)
319 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
321 while (n_left_from > 0 && n_left_to_next > 0)
325 ip4_mapt_pseudo_header_t *pheader0;
326 ip4_mapt_tcp_udp_next_t next0;
328 pi0 = to_next[0] = from[0];
334 next0 = IP4_MAPT_TCP_UDP_NEXT_IP6_LOOKUP;
335 p0 = vlib_get_buffer (vm, pi0);
337 //Accessing pseudo header
338 pheader0 = vlib_buffer_get_current (p0);
339 vlib_buffer_advance (p0, sizeof (*pheader0));
341 if (ip4_to_ip6_tcp_udp (p0, ip4_to_ip6_set_cb, pheader0))
343 p0->error = error_node->errors[MAP_ERROR_UNKNOWN];
344 next0 = IP4_MAPT_TCP_UDP_NEXT_DROP;
348 if (vnet_buffer (p0)->map_t.mtu < p0->current_length)
350 //Send to fragmentation node if necessary
351 vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
352 vnet_buffer (p0)->ip_frag.next_index =
353 IP6_FRAG_NEXT_IP6_LOOKUP;
354 next0 = IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG;
357 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
358 to_next, n_left_to_next, pi0,
361 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
364 return frame->n_vectors;
367 static_always_inline void
368 ip4_map_t_classify (vlib_buffer_t * p0, map_domain_t * d0,
369 ip4_header_t * ip40, u16 ip4_len0, i32 * dst_port0,
370 u8 * error0, ip4_mapt_next_t * next0)
372 if (PREDICT_FALSE (ip4_get_fragment_offset (ip40)))
374 *next0 = IP4_MAPT_NEXT_MAPT_FRAGMENTED;
375 if (d0->ea_bits_len == 0 && d0->rules)
381 *dst_port0 = ip4_map_fragment_get_port (ip40);
382 *error0 = (*dst_port0 == -1) ? MAP_ERROR_FRAGMENT_MEMORY : *error0;
385 else if (PREDICT_TRUE (ip40->protocol == IP_PROTOCOL_TCP))
387 vnet_buffer (p0)->map_t.checksum_offset = 36;
388 *next0 = IP4_MAPT_NEXT_MAPT_TCP_UDP;
389 *error0 = ip4_len0 < 40 ? MAP_ERROR_MALFORMED : *error0;
390 *dst_port0 = (i32) * ((u16 *) u8_ptr_add (ip40, sizeof (*ip40) + 2));
392 else if (PREDICT_TRUE (ip40->protocol == IP_PROTOCOL_UDP))
394 vnet_buffer (p0)->map_t.checksum_offset = 26;
395 *next0 = IP4_MAPT_NEXT_MAPT_TCP_UDP;
396 *error0 = ip4_len0 < 28 ? MAP_ERROR_MALFORMED : *error0;
397 *dst_port0 = (i32) * ((u16 *) u8_ptr_add (ip40, sizeof (*ip40) + 2));
399 else if (ip40->protocol == IP_PROTOCOL_ICMP)
401 *next0 = IP4_MAPT_NEXT_MAPT_ICMP;
402 if (d0->ea_bits_len == 0 && d0->rules)
404 else if (((icmp46_header_t *) u8_ptr_add (ip40, sizeof (*ip40)))->code
406 || ((icmp46_header_t *)
408 sizeof (*ip40)))->code == ICMP4_echo_request)
409 *dst_port0 = (i32) * ((u16 *) u8_ptr_add (ip40, sizeof (*ip40) + 6));
413 *error0 = MAP_ERROR_BAD_PROTOCOL;
418 ip4_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
420 u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
421 vlib_node_runtime_t *error_node =
422 vlib_node_get_runtime (vm, ip4_map_t_node.index);
423 from = vlib_frame_vector_args (frame);
424 n_left_from = frame->n_vectors;
425 next_index = node->cached_next_index;
426 vlib_combined_counter_main_t *cm = map_main.domain_counters;
427 u32 thread_index = vm->thread_index;
429 while (n_left_from > 0)
431 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
433 while (n_left_from > 0 && n_left_to_next > 0)
439 ip4_mapt_next_t next0;
443 ip4_mapt_pseudo_header_t *pheader0;
445 pi0 = to_next[0] = from[0];
450 error0 = MAP_ERROR_NONE;
452 p0 = vlib_get_buffer (vm, pi0);
453 ip40 = vlib_buffer_get_current (p0);
454 ip4_len0 = clib_host_to_net_u16 (ip40->length);
455 if (PREDICT_FALSE (p0->current_length < ip4_len0 ||
456 ip40->ip_version_and_header_length != 0x45))
458 error0 = MAP_ERROR_UNKNOWN;
459 next0 = IP4_MAPT_NEXT_DROP;
462 vnet_buffer (p0)->map_t.map_domain_index =
463 vnet_buffer (p0)->ip.adj_index[VLIB_TX];
464 d0 = ip4_map_get_domain (vnet_buffer (p0)->map_t.map_domain_index);
466 vnet_buffer (p0)->map_t.mtu = d0->mtu ? d0->mtu : ~0;
469 ip4_map_t_classify (p0, d0, ip40, ip4_len0, &dst_port0, &error0,
472 /* Verify that port is not among the well-known ports */
473 if ((d0->psid_length > 0 && d0->psid_offset > 0)
474 && (clib_net_to_host_u16 (dst_port0) <
475 (0x1 << (16 - d0->psid_offset))))
477 error0 = MAP_ERROR_SEC_CHECK;
480 //Add MAP-T pseudo header in front of the packet
481 vlib_buffer_advance (p0, -sizeof (*pheader0));
482 pheader0 = vlib_buffer_get_current (p0);
484 //Save addresses within the packet
485 ip4_map_t_embedded_address (d0, &pheader0->saddr,
487 pheader0->daddr.as_u64[0] =
488 map_get_pfx_net (d0, ip40->dst_address.as_u32, (u16) dst_port0);
489 pheader0->daddr.as_u64[1] =
490 map_get_sfx_net (d0, ip40->dst_address.as_u32, (u16) dst_port0);
492 //It is important to cache at this stage because the result might be necessary
493 //for packets within the same vector.
494 //Actually, this approach even provides some limited out-of-order fragments support
496 (ip4_is_first_fragment (ip40) && (dst_port0 != -1)
497 && (d0->ea_bits_len != 0 || !d0->rules)
498 && ip4_map_fragment_cache (ip40, dst_port0)))
500 error0 = MAP_ERROR_UNKNOWN;
504 (error0 == MAP_ERROR_NONE && next0 != IP4_MAPT_NEXT_MAPT_ICMP))
506 vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
509 map_t.map_domain_index, 1,
514 next0 = (error0 != MAP_ERROR_NONE) ? IP4_MAPT_NEXT_DROP : next0;
515 p0->error = error_node->errors[error0];
516 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
517 to_next, n_left_to_next, pi0,
520 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
522 return frame->n_vectors;
525 static char *map_t_error_strings[] = {
526 #define _(sym,string) string,
532 VLIB_REGISTER_NODE(ip4_map_t_fragmented_node) = {
533 .function = ip4_map_t_fragmented,
534 .name = "ip4-map-t-fragmented",
535 .vector_size = sizeof(u32),
536 .format_trace = format_map_trace,
537 .type = VLIB_NODE_TYPE_INTERNAL,
539 .n_errors = MAP_N_ERROR,
540 .error_strings = map_t_error_strings,
542 .n_next_nodes = IP4_MAPT_FRAGMENTED_N_NEXT,
544 [IP4_MAPT_FRAGMENTED_NEXT_IP6_LOOKUP] = "ip6-lookup",
545 [IP4_MAPT_FRAGMENTED_NEXT_IP6_FRAG] = IP6_FRAG_NODE_NAME,
546 [IP4_MAPT_FRAGMENTED_NEXT_DROP] = "error-drop",
552 VLIB_REGISTER_NODE(ip4_map_t_icmp_node) = {
553 .function = ip4_map_t_icmp,
554 .name = "ip4-map-t-icmp",
555 .vector_size = sizeof(u32),
556 .format_trace = format_map_trace,
557 .type = VLIB_NODE_TYPE_INTERNAL,
559 .n_errors = MAP_N_ERROR,
560 .error_strings = map_t_error_strings,
562 .n_next_nodes = IP4_MAPT_ICMP_N_NEXT,
564 [IP4_MAPT_ICMP_NEXT_IP6_LOOKUP] = "ip6-lookup",
565 [IP4_MAPT_ICMP_NEXT_IP6_FRAG] = IP6_FRAG_NODE_NAME,
566 [IP4_MAPT_ICMP_NEXT_DROP] = "error-drop",
572 VLIB_REGISTER_NODE(ip4_map_t_tcp_udp_node) = {
573 .function = ip4_map_t_tcp_udp,
574 .name = "ip4-map-t-tcp-udp",
575 .vector_size = sizeof(u32),
576 .format_trace = format_map_trace,
577 .type = VLIB_NODE_TYPE_INTERNAL,
579 .n_errors = MAP_N_ERROR,
580 .error_strings = map_t_error_strings,
582 .n_next_nodes = IP4_MAPT_TCP_UDP_N_NEXT,
584 [IP4_MAPT_TCP_UDP_NEXT_IP6_LOOKUP] = "ip6-lookup",
585 [IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG] = IP6_FRAG_NODE_NAME,
586 [IP4_MAPT_TCP_UDP_NEXT_DROP] = "error-drop",
592 VLIB_REGISTER_NODE(ip4_map_t_node) = {
593 .function = ip4_map_t,
595 .vector_size = sizeof(u32),
596 .format_trace = format_map_trace,
597 .type = VLIB_NODE_TYPE_INTERNAL,
599 .n_errors = MAP_N_ERROR,
600 .error_strings = map_t_error_strings,
602 .n_next_nodes = IP4_MAPT_N_NEXT,
604 [IP4_MAPT_NEXT_MAPT_TCP_UDP] = "ip4-map-t-tcp-udp",
605 [IP4_MAPT_NEXT_MAPT_ICMP] = "ip4-map-t-icmp",
606 [IP4_MAPT_NEXT_MAPT_FRAGMENTED] = "ip4-map-t-fragmented",
607 [IP4_MAPT_NEXT_DROP] = "error-drop",
613 * fd.io coding-style-patch-verification: ON
616 * eval: (c-set-style "gnu")