vcl: allow more rx events on peek
[vpp.git] / src / plugins / map / ip4_map_t.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 #include "map.h"
16
17 #include <vnet/ip/ip_frag.h>
18 #include <vnet/ip/ip4_to_ip6.h>
19
20 typedef enum
21 {
22   IP4_MAPT_NEXT_MAPT_TCP_UDP,
23   IP4_MAPT_NEXT_MAPT_ICMP,
24   IP4_MAPT_NEXT_MAPT_FRAGMENTED,
25   IP4_MAPT_NEXT_ICMP_ERROR,
26   IP4_MAPT_NEXT_DROP,
27   IP4_MAPT_N_NEXT
28 } ip4_mapt_next_t;
29
30 typedef enum
31 {
32   IP4_MAPT_ICMP_NEXT_IP6_LOOKUP,
33   IP4_MAPT_ICMP_NEXT_IP6_REWRITE,
34   IP4_MAPT_ICMP_NEXT_IP6_FRAG,
35   IP4_MAPT_ICMP_NEXT_DROP,
36   IP4_MAPT_ICMP_N_NEXT
37 } ip4_mapt_icmp_next_t;
38
39 typedef enum
40 {
41   IP4_MAPT_TCP_UDP_NEXT_IP6_LOOKUP,
42   IP4_MAPT_TCP_UDP_NEXT_IP6_REWRITE,
43   IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG,
44   IP4_MAPT_TCP_UDP_NEXT_DROP,
45   IP4_MAPT_TCP_UDP_N_NEXT
46 } ip4_mapt_tcp_udp_next_t;
47
48 typedef enum
49 {
50   IP4_MAPT_FRAGMENTED_NEXT_IP6_LOOKUP,
51   IP4_MAPT_FRAGMENTED_NEXT_IP6_REWRITE,
52   IP4_MAPT_FRAGMENTED_NEXT_IP6_FRAG,
53   IP4_MAPT_FRAGMENTED_NEXT_DROP,
54   IP4_MAPT_FRAGMENTED_N_NEXT
55 } ip4_mapt_fragmented_next_t;
56
57 //This is used to pass information within the buffer data.
58 //Buffer structure being too small to contain big structures like this.
59 typedef CLIB_PACKED (struct {
60   ip6_address_t daddr;
61   ip6_address_t saddr;
62   //IPv6 header + Fragmentation header will be here
63   //sizeof(ip6) + sizeof(ip_frag) - sizeof(ip4)
64   u8 unused[28];
65 }) ip4_mapt_pseudo_header_t;
66
67 typedef struct
68 {
69   map_domain_t *d;
70   u16 recv_port;
71 } icmp_to_icmp6_ctx_t;
72
73 static int
74 ip4_to_ip6_set_icmp_cb (vlib_buffer_t * b, ip4_header_t * ip4,
75                         ip6_header_t * ip6, void *arg)
76 {
77   icmp_to_icmp6_ctx_t *ctx = arg;
78
79   ip4_map_t_embedded_address (ctx->d, &ip6->src_address, &ip4->src_address);
80   ip6->dst_address.as_u64[0] =
81     map_get_pfx_net (ctx->d, ip4->dst_address.as_u32, ctx->recv_port);
82   ip6->dst_address.as_u64[1] =
83     map_get_sfx_net (ctx->d, ip4->dst_address.as_u32, ctx->recv_port);
84
85   return 0;
86 }
87
88 static int
89 ip4_to_ip6_set_inner_icmp_cb (vlib_buffer_t * b, ip4_header_t * ip4,
90                               ip6_header_t * ip6, void *arg)
91 {
92   icmp_to_icmp6_ctx_t *ctx = arg;
93   ip4_address_t old_src, old_dst;
94
95   old_src.as_u32 = ip4->src_address.as_u32;
96   old_dst.as_u32 = ip4->dst_address.as_u32;
97
98   //Note that the source address is within the domain
99   //while the destination address is the one outside the domain
100   ip4_map_t_embedded_address (ctx->d, &ip6->dst_address, &old_dst);
101   ip6->src_address.as_u64[0] =
102     map_get_pfx_net (ctx->d, old_src.as_u32, ctx->recv_port);
103   ip6->src_address.as_u64[1] =
104     map_get_sfx_net (ctx->d, old_src.as_u32, ctx->recv_port);
105
106   return 0;
107 }
108
109 static uword
110 ip4_map_t_icmp (vlib_main_t * vm,
111                 vlib_node_runtime_t * node, vlib_frame_t * frame)
112 {
113   u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
114   vlib_node_runtime_t *error_node =
115     vlib_node_get_runtime (vm, ip4_map_t_icmp_node.index);
116   from = vlib_frame_vector_args (frame);
117   n_left_from = frame->n_vectors;
118   next_index = node->cached_next_index;
119   vlib_combined_counter_main_t *cm = map_main.domain_counters;
120   u32 thread_index = vm->thread_index;
121
122   while (n_left_from > 0)
123     {
124       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
125
126       while (n_left_from > 0 && n_left_to_next > 0)
127         {
128           u32 pi0;
129           vlib_buffer_t *p0;
130           ip4_mapt_icmp_next_t next0;
131           u8 error0;
132           map_domain_t *d0;
133           u16 len0;
134           icmp_to_icmp6_ctx_t ctx0;
135           ip4_header_t *ip40;
136
137           next0 = IP4_MAPT_ICMP_NEXT_IP6_LOOKUP;
138           pi0 = to_next[0] = from[0];
139           from += 1;
140           n_left_from -= 1;
141           to_next += 1;
142           n_left_to_next -= 1;
143           error0 = MAP_ERROR_NONE;
144
145           p0 = vlib_get_buffer (vm, pi0);
146           vlib_buffer_advance (p0, sizeof (ip4_mapt_pseudo_header_t));  //The pseudo-header is not used
147           len0 =
148             clib_net_to_host_u16 (((ip4_header_t *)
149                                    vlib_buffer_get_current (p0))->length);
150           d0 =
151             pool_elt_at_index (map_main.domains,
152                                vnet_buffer (p0)->map_t.map_domain_index);
153
154           ip40 = vlib_buffer_get_current (p0);
155           ctx0.recv_port = ip4_get_port (ip40, 0);
156           ctx0.d = d0;
157           if (ctx0.recv_port == 0)
158             {
159               // In case of 1:1 mapping, we don't care about the port
160               if (!(d0->ea_bits_len == 0 && d0->rules))
161                 {
162                   error0 = MAP_ERROR_ICMP;
163                   goto err0;
164                 }
165             }
166
167           if (icmp_to_icmp6
168               (p0, ip4_to_ip6_set_icmp_cb, &ctx0,
169                ip4_to_ip6_set_inner_icmp_cb, &ctx0))
170             {
171               error0 = MAP_ERROR_ICMP;
172               goto err0;
173             }
174
175           if (vnet_buffer (p0)->map_t.mtu < p0->current_length)
176             {
177               vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
178               vnet_buffer (p0)->ip_frag.next_index = IP_FRAG_NEXT_IP6_LOOKUP;
179               next0 = IP4_MAPT_ICMP_NEXT_IP6_FRAG;
180             }
181           else
182             {
183               next0 = ip4_map_ip6_lookup_bypass (p0, NULL) ?
184                 IP4_MAPT_ICMP_NEXT_IP6_REWRITE : next0;
185             }
186         err0:
187           if (PREDICT_TRUE (error0 == MAP_ERROR_NONE))
188             {
189               vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
190                                                thread_index,
191                                                vnet_buffer (p0)->
192                                                map_t.map_domain_index, 1,
193                                                len0);
194             }
195           else
196             {
197               next0 = IP4_MAPT_ICMP_NEXT_DROP;
198             }
199           p0->error = error_node->errors[error0];
200           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
201                                            to_next, n_left_to_next, pi0,
202                                            next0);
203         }
204       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
205     }
206   return frame->n_vectors;
207 }
208
209 /*
210  * Translate fragmented IPv4 UDP/TCP packet to IPv6.
211  */
212 always_inline int
213 map_ip4_to_ip6_fragmented (vlib_buffer_t * p,
214                            ip4_mapt_pseudo_header_t * pheader)
215 {
216   ip4_header_t *ip4;
217   ip6_header_t *ip6;
218   ip6_frag_hdr_t *frag;
219
220   ip4 = vlib_buffer_get_current (p);
221   frag = (ip6_frag_hdr_t *) u8_ptr_add (ip4, sizeof (*ip4) - sizeof (*frag));
222   ip6 =
223     (ip6_header_t *) u8_ptr_add (ip4,
224                                  sizeof (*ip4) - sizeof (*frag) -
225                                  sizeof (*ip6));
226   vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6) - sizeof (*frag));
227
228   //We know that the protocol was one of ICMP, TCP or UDP
229   //because the first fragment was found and cached
230   frag->next_hdr =
231     (ip4->protocol == IP_PROTOCOL_ICMP) ? IP_PROTOCOL_ICMP6 : ip4->protocol;
232   frag->identification = frag_id_4to6 (ip4->fragment_id);
233   frag->rsv = 0;
234   frag->fragment_offset_and_more =
235     ip6_frag_hdr_offset_and_more (ip4_get_fragment_offset (ip4),
236                                   clib_net_to_host_u16
237                                   (ip4->flags_and_fragment_offset) &
238                                   IP4_HEADER_FLAG_MORE_FRAGMENTS);
239
240   ip6->ip_version_traffic_class_and_flow_label =
241     clib_host_to_net_u32 ((6 << 28) + (ip4->tos << 20));
242   ip6->payload_length =
243     clib_host_to_net_u16 (clib_net_to_host_u16 (ip4->length) -
244                           sizeof (*ip4) + sizeof (*frag));
245   ip6->hop_limit = ip4->ttl;
246   ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION;
247
248   ip6->dst_address.as_u64[0] = pheader->daddr.as_u64[0];
249   ip6->dst_address.as_u64[1] = pheader->daddr.as_u64[1];
250   ip6->src_address.as_u64[0] = pheader->saddr.as_u64[0];
251   ip6->src_address.as_u64[1] = pheader->saddr.as_u64[1];
252
253   return 0;
254 }
255
256 static uword
257 ip4_map_t_fragmented (vlib_main_t * vm,
258                       vlib_node_runtime_t * node, vlib_frame_t * frame)
259 {
260   u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
261   from = vlib_frame_vector_args (frame);
262   n_left_from = frame->n_vectors;
263   next_index = node->cached_next_index;
264   vlib_node_runtime_t *error_node =
265     vlib_node_get_runtime (vm, ip4_map_t_fragmented_node.index);
266
267   while (n_left_from > 0)
268     {
269       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
270
271       while (n_left_from > 0 && n_left_to_next > 0)
272         {
273           u32 pi0;
274           vlib_buffer_t *p0;
275           ip4_mapt_pseudo_header_t *pheader0;
276           ip4_mapt_fragmented_next_t next0;
277
278           next0 = IP4_MAPT_FRAGMENTED_NEXT_IP6_LOOKUP;
279           pi0 = to_next[0] = from[0];
280           from += 1;
281           n_left_from -= 1;
282           to_next += 1;
283           n_left_to_next -= 1;
284
285           p0 = vlib_get_buffer (vm, pi0);
286
287           //Accessing pseudo header
288           pheader0 = vlib_buffer_get_current (p0);
289           vlib_buffer_advance (p0, sizeof (*pheader0));
290
291           if (map_ip4_to_ip6_fragmented (p0, pheader0))
292             {
293               p0->error = error_node->errors[MAP_ERROR_FRAGMENT_DROPPED];
294               next0 = IP4_MAPT_FRAGMENTED_NEXT_DROP;
295             }
296           else
297             {
298               if (vnet_buffer (p0)->map_t.mtu < p0->current_length)
299                 {
300                   vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
301                   vnet_buffer (p0)->ip_frag.next_index =
302                     IP_FRAG_NEXT_IP6_LOOKUP;
303                   next0 = IP4_MAPT_FRAGMENTED_NEXT_IP6_FRAG;
304                 }
305               else
306                 {
307                   next0 = ip4_map_ip6_lookup_bypass (p0, NULL) ?
308                     IP4_MAPT_FRAGMENTED_NEXT_IP6_REWRITE : next0;
309                 }
310             }
311
312           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
313                                            to_next, n_left_to_next, pi0,
314                                            next0);
315         }
316       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
317     }
318   return frame->n_vectors;
319 }
320
321 /*
322  * Translate IPv4 UDP/TCP packet to IPv6.
323  */
324 always_inline int
325 map_ip4_to_ip6_tcp_udp (vlib_buffer_t * p, ip4_mapt_pseudo_header_t * pheader)
326 {
327   map_main_t *mm = &map_main;
328   ip4_header_t *ip4;
329   ip6_header_t *ip6;
330   ip_csum_t csum;
331   u16 *checksum;
332   ip6_frag_hdr_t *frag;
333   u32 frag_id;
334   ip4_address_t old_src, old_dst;
335
336   ip4 = vlib_buffer_get_current (p);
337
338   if (ip4->protocol == IP_PROTOCOL_UDP)
339     {
340       udp_header_t *udp = ip4_next_header (ip4);
341       checksum = &udp->checksum;
342
343       /*
344        * UDP checksum is optional over IPv4 but mandatory for IPv6 We
345        * do not check udp->length sanity but use our safe computed
346        * value instead
347        */
348       if (PREDICT_FALSE (!*checksum))
349         {
350           u16 udp_len = clib_host_to_net_u16 (ip4->length) - sizeof (*ip4);
351           csum = ip_incremental_checksum (0, udp, udp_len);
352           csum = ip_csum_with_carry (csum, clib_host_to_net_u16 (udp_len));
353           csum =
354             ip_csum_with_carry (csum, clib_host_to_net_u16 (IP_PROTOCOL_UDP));
355           csum = ip_csum_with_carry (csum, *((u64 *) (&ip4->src_address)));
356           *checksum = ~ip_csum_fold (csum);
357         }
358     }
359   else
360     {
361       tcp_header_t *tcp = ip4_next_header (ip4);
362       if (mm->tcp_mss > 0)
363         {
364           csum = tcp->checksum;
365           map_mss_clamping (tcp, &csum, mm->tcp_mss);
366           tcp->checksum = ip_csum_fold (csum);
367         }
368       checksum = &tcp->checksum;
369     }
370
371   old_src.as_u32 = ip4->src_address.as_u32;
372   old_dst.as_u32 = ip4->dst_address.as_u32;
373
374   /* Deal with fragmented packets */
375   if (PREDICT_FALSE (ip4->flags_and_fragment_offset &
376                      clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS)))
377     {
378       ip6 =
379         (ip6_header_t *) u8_ptr_add (ip4,
380                                      sizeof (*ip4) - sizeof (*ip6) -
381                                      sizeof (*frag));
382       frag =
383         (ip6_frag_hdr_t *) u8_ptr_add (ip4, sizeof (*ip4) - sizeof (*frag));
384       frag_id = frag_id_4to6 (ip4->fragment_id);
385       vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6) - sizeof (*frag));
386     }
387   else
388     {
389       ip6 = (ip6_header_t *) (((u8 *) ip4) + sizeof (*ip4) - sizeof (*ip6));
390       vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6));
391       frag = NULL;
392     }
393
394   ip6->ip_version_traffic_class_and_flow_label =
395     clib_host_to_net_u32 ((6 << 28) + (ip4->tos << 20));
396   ip6->payload_length = u16_net_add (ip4->length, -sizeof (*ip4));
397   ip6->hop_limit = ip4->ttl;
398   ip6->protocol = ip4->protocol;
399   if (PREDICT_FALSE (frag != NULL))
400     {
401       frag->next_hdr = ip6->protocol;
402       frag->identification = frag_id;
403       frag->rsv = 0;
404       frag->fragment_offset_and_more = ip6_frag_hdr_offset_and_more (0, 1);
405       ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION;
406       ip6->payload_length = u16_net_add (ip6->payload_length, sizeof (*frag));
407     }
408
409   ip6->dst_address.as_u64[0] = pheader->daddr.as_u64[0];
410   ip6->dst_address.as_u64[1] = pheader->daddr.as_u64[1];
411   ip6->src_address.as_u64[0] = pheader->saddr.as_u64[0];
412   ip6->src_address.as_u64[1] = pheader->saddr.as_u64[1];
413
414   csum = ip_csum_sub_even (*checksum, old_src.as_u32);
415   csum = ip_csum_sub_even (csum, old_dst.as_u32);
416   csum = ip_csum_add_even (csum, ip6->src_address.as_u64[0]);
417   csum = ip_csum_add_even (csum, ip6->src_address.as_u64[1]);
418   csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[0]);
419   csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[1]);
420   *checksum = ip_csum_fold (csum);
421
422   return 0;
423 }
424
425 static uword
426 ip4_map_t_tcp_udp (vlib_main_t * vm,
427                    vlib_node_runtime_t * node, vlib_frame_t * frame)
428 {
429   u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
430   from = vlib_frame_vector_args (frame);
431   n_left_from = frame->n_vectors;
432   next_index = node->cached_next_index;
433   vlib_node_runtime_t *error_node =
434     vlib_node_get_runtime (vm, ip4_map_t_tcp_udp_node.index);
435
436
437   while (n_left_from > 0)
438     {
439       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
440
441       while (n_left_from > 0 && n_left_to_next > 0)
442         {
443           u32 pi0;
444           vlib_buffer_t *p0;
445           ip4_mapt_pseudo_header_t *pheader0;
446           ip4_mapt_tcp_udp_next_t next0;
447
448           pi0 = to_next[0] = from[0];
449           from += 1;
450           n_left_from -= 1;
451           to_next += 1;
452           n_left_to_next -= 1;
453
454           next0 = IP4_MAPT_TCP_UDP_NEXT_IP6_LOOKUP;
455           p0 = vlib_get_buffer (vm, pi0);
456
457           //Accessing pseudo header
458           pheader0 = vlib_buffer_get_current (p0);
459           vlib_buffer_advance (p0, sizeof (*pheader0));
460
461           if (map_ip4_to_ip6_tcp_udp (p0, pheader0))
462             {
463               p0->error = error_node->errors[MAP_ERROR_UNKNOWN];
464               next0 = IP4_MAPT_TCP_UDP_NEXT_DROP;
465             }
466           else
467             {
468               if (vnet_buffer (p0)->map_t.mtu < p0->current_length)
469                 {
470                   //Send to fragmentation node if necessary
471                   vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
472                   vnet_buffer (p0)->ip_frag.next_index =
473                     IP_FRAG_NEXT_IP6_LOOKUP;
474                   next0 = IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG;
475                 }
476               else
477                 {
478                   next0 = ip4_map_ip6_lookup_bypass (p0, NULL) ?
479                     IP4_MAPT_TCP_UDP_NEXT_IP6_REWRITE : next0;
480                 }
481             }
482           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
483                                            to_next, n_left_to_next, pi0,
484                                            next0);
485         }
486       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
487     }
488
489   return frame->n_vectors;
490 }
491
492 static_always_inline void
493 ip4_map_t_classify (vlib_buffer_t * p0, map_domain_t * d0,
494                     ip4_header_t * ip40, u16 ip4_len0, i32 * dst_port0,
495                     u8 * error0, ip4_mapt_next_t * next0, u16 l4_dst_port)
496 {
497   if (PREDICT_FALSE (ip4_get_fragment_offset (ip40)))
498     {
499       *next0 = IP4_MAPT_NEXT_MAPT_FRAGMENTED;
500       if (d0->ea_bits_len == 0 && d0->rules)
501         {
502           *dst_port0 = 0;
503         }
504       else
505         {
506           *dst_port0 = l4_dst_port;
507           *error0 = (*dst_port0 == -1) ? MAP_ERROR_FRAGMENT_MEMORY : *error0;
508         }
509     }
510   else if (PREDICT_TRUE (ip40->protocol == IP_PROTOCOL_TCP))
511     {
512       vnet_buffer (p0)->map_t.checksum_offset = 36;
513       *next0 = IP4_MAPT_NEXT_MAPT_TCP_UDP;
514       *error0 = ip4_len0 < 40 ? MAP_ERROR_MALFORMED : *error0;
515       *dst_port0 = l4_dst_port;
516     }
517   else if (PREDICT_TRUE (ip40->protocol == IP_PROTOCOL_UDP))
518     {
519       vnet_buffer (p0)->map_t.checksum_offset = 26;
520       *next0 = IP4_MAPT_NEXT_MAPT_TCP_UDP;
521       *error0 = ip4_len0 < 28 ? MAP_ERROR_MALFORMED : *error0;
522       *dst_port0 = l4_dst_port;
523     }
524   else if (ip40->protocol == IP_PROTOCOL_ICMP)
525     {
526       *next0 = IP4_MAPT_NEXT_MAPT_ICMP;
527       if (d0->ea_bits_len == 0 && d0->rules)
528         *dst_port0 = 0;
529       else if (((icmp46_header_t *) u8_ptr_add (ip40, sizeof (*ip40)))->type
530                == ICMP4_echo_reply
531                || ((icmp46_header_t *)
532                    u8_ptr_add (ip40,
533                                sizeof (*ip40)))->type == ICMP4_echo_request)
534         *dst_port0 = l4_dst_port;
535     }
536   else
537     {
538       *error0 = MAP_ERROR_BAD_PROTOCOL;
539     }
540 }
541
542 static uword
543 ip4_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
544 {
545   u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
546   vlib_node_runtime_t *error_node =
547     vlib_node_get_runtime (vm, ip4_map_t_node.index);
548   from = vlib_frame_vector_args (frame);
549   n_left_from = frame->n_vectors;
550   next_index = node->cached_next_index;
551   vlib_combined_counter_main_t *cm = map_main.domain_counters;
552   u32 thread_index = vm->thread_index;
553
554   while (n_left_from > 0)
555     {
556       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
557
558       while (n_left_from > 0 && n_left_to_next > 0)
559         {
560           u32 pi0;
561           vlib_buffer_t *p0;
562           ip4_header_t *ip40;
563           map_domain_t *d0;
564           ip4_mapt_next_t next0 = 0;
565           u16 ip4_len0;
566           u8 error0;
567           i32 dst_port0;
568           ip4_mapt_pseudo_header_t *pheader0;
569
570           pi0 = to_next[0] = from[0];
571           from += 1;
572           n_left_from -= 1;
573           to_next += 1;
574           n_left_to_next -= 1;
575           error0 = MAP_ERROR_NONE;
576
577           p0 = vlib_get_buffer (vm, pi0);
578
579           u16 l4_dst_port = vnet_buffer (p0)->ip.reass.l4_dst_port;
580
581           ip40 = vlib_buffer_get_current (p0);
582           ip4_len0 = clib_host_to_net_u16 (ip40->length);
583           if (PREDICT_FALSE (p0->current_length < ip4_len0 ||
584                              ip40->ip_version_and_header_length != 0x45))
585             {
586               error0 = MAP_ERROR_UNKNOWN;
587             }
588
589           d0 = ip4_map_get_domain (&ip40->dst_address,
590                                    &vnet_buffer (p0)->map_t.map_domain_index,
591                                    &error0);
592
593           if (!d0)
594             {                   /* Guess it wasn't for us */
595               vnet_feature_next (&next0, p0);
596               goto exit;
597             }
598
599           dst_port0 = -1;
600
601           if (PREDICT_FALSE (ip40->ttl == 1))
602             {
603               icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
604                                            ICMP4_time_exceeded_ttl_exceeded_in_transit,
605                                            0);
606               p0->error = error_node->errors[MAP_ERROR_TIME_EXCEEDED];
607               next0 = IP4_MAPT_NEXT_ICMP_ERROR;
608               goto trace;
609             }
610
611           bool df0 =
612             ip40->flags_and_fragment_offset &
613             clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT);
614
615           vnet_buffer (p0)->map_t.mtu = d0->mtu ? d0->mtu : ~0;
616
617           if (PREDICT_FALSE
618               (df0 && !map_main.frag_ignore_df
619                &&
620                ((ip4_len0 +
621                  (sizeof (ip6_header_t) - sizeof (ip4_header_t))) >
622                 vnet_buffer (p0)->map_t.mtu)))
623             {
624               icmp4_error_set_vnet_buffer (p0, ICMP4_destination_unreachable,
625                                            ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
626                                            vnet_buffer (p0)->map_t.mtu -
627                                            (sizeof (ip6_header_t) -
628                                             sizeof (ip4_header_t)));
629               p0->error = error_node->errors[MAP_ERROR_DF_SET];
630               next0 = IP4_MAPT_NEXT_ICMP_ERROR;
631               goto trace;
632             }
633
634           ip4_map_t_classify (p0, d0, ip40, ip4_len0, &dst_port0, &error0,
635                               &next0, l4_dst_port);
636
637           /* Verify that port is not among the well-known ports */
638           if ((d0->psid_length > 0 && d0->psid_offset > 0)
639               && (clib_net_to_host_u16 (dst_port0) <
640                   (0x1 << (16 - d0->psid_offset))))
641             {
642               error0 = MAP_ERROR_SEC_CHECK;
643             }
644
645           //Add MAP-T pseudo header in front of the packet
646           vlib_buffer_advance (p0, -sizeof (*pheader0));
647           pheader0 = vlib_buffer_get_current (p0);
648
649           //Save addresses within the packet
650           ip4_map_t_embedded_address (d0, &pheader0->saddr,
651                                       &ip40->src_address);
652           pheader0->daddr.as_u64[0] =
653             map_get_pfx_net (d0, ip40->dst_address.as_u32, (u16) dst_port0);
654           pheader0->daddr.as_u64[1] =
655             map_get_sfx_net (d0, ip40->dst_address.as_u32, (u16) dst_port0);
656
657           if (PREDICT_TRUE
658               (error0 == MAP_ERROR_NONE && next0 != IP4_MAPT_NEXT_MAPT_ICMP))
659             {
660               vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
661                                                thread_index,
662                                                vnet_buffer (p0)->
663                                                map_t.map_domain_index, 1,
664                                                clib_net_to_host_u16
665                                                (ip40->length));
666             }
667
668           next0 = (error0 != MAP_ERROR_NONE) ? IP4_MAPT_NEXT_DROP : next0;
669           p0->error = error_node->errors[error0];
670         trace:
671           if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
672             {
673               map_add_trace (vm, node, p0, d0 - map_main.domains, dst_port0);
674             }
675         exit:
676           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
677                                            to_next, n_left_to_next, pi0,
678                                            next0);
679         }
680       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
681     }
682   return frame->n_vectors;
683 }
684
685 VNET_FEATURE_INIT (ip4_map_t_feature, static) = {
686     .arc_name = "ip4-unicast",
687     .node_name = "ip4-map-t",
688     .runs_before = VNET_FEATURES ("ip4-flow-classify"),
689     .runs_after = VNET_FEATURES ("ip4-sv-reassembly-feature"),
690 };
691
692 VLIB_REGISTER_NODE(ip4_map_t_fragmented_node) = {
693   .function = ip4_map_t_fragmented,
694   .name = "ip4-map-t-fragmented",
695   .vector_size = sizeof(u32),
696   .format_trace = format_map_trace,
697   .type = VLIB_NODE_TYPE_INTERNAL,
698
699   .n_errors = MAP_N_ERROR,
700   .error_counters = map_error_counters,
701
702   .n_next_nodes = IP4_MAPT_FRAGMENTED_N_NEXT,
703   .next_nodes = {
704       [IP4_MAPT_FRAGMENTED_NEXT_IP6_LOOKUP] = "ip6-lookup",
705       [IP4_MAPT_FRAGMENTED_NEXT_IP6_REWRITE] = "ip6-load-balance",
706       [IP4_MAPT_FRAGMENTED_NEXT_IP6_FRAG] = IP6_FRAG_NODE_NAME,
707       [IP4_MAPT_FRAGMENTED_NEXT_DROP] = "error-drop",
708   },
709 };
710
711 VLIB_REGISTER_NODE(ip4_map_t_icmp_node) = {
712   .function = ip4_map_t_icmp,
713   .name = "ip4-map-t-icmp",
714   .vector_size = sizeof(u32),
715   .format_trace = format_map_trace,
716   .type = VLIB_NODE_TYPE_INTERNAL,
717
718   .n_errors = MAP_N_ERROR,
719   .error_counters = map_error_counters,
720
721   .n_next_nodes = IP4_MAPT_ICMP_N_NEXT,
722   .next_nodes = {
723       [IP4_MAPT_ICMP_NEXT_IP6_LOOKUP] = "ip6-lookup",
724       [IP4_MAPT_ICMP_NEXT_IP6_REWRITE] = "ip6-load-balance",
725       [IP4_MAPT_ICMP_NEXT_IP6_FRAG] = IP6_FRAG_NODE_NAME,
726       [IP4_MAPT_ICMP_NEXT_DROP] = "error-drop",
727   },
728 };
729
730 VLIB_REGISTER_NODE(ip4_map_t_tcp_udp_node) = {
731   .function = ip4_map_t_tcp_udp,
732   .name = "ip4-map-t-tcp-udp",
733   .vector_size = sizeof(u32),
734   .format_trace = format_map_trace,
735   .type = VLIB_NODE_TYPE_INTERNAL,
736
737   .n_errors = MAP_N_ERROR,
738   .error_counters = map_error_counters,
739
740   .n_next_nodes = IP4_MAPT_TCP_UDP_N_NEXT,
741   .next_nodes = {
742       [IP4_MAPT_TCP_UDP_NEXT_IP6_LOOKUP] = "ip6-lookup",
743       [IP4_MAPT_TCP_UDP_NEXT_IP6_REWRITE] = "ip6-load-balance",
744       [IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG] = IP6_FRAG_NODE_NAME,
745       [IP4_MAPT_TCP_UDP_NEXT_DROP] = "error-drop",
746   },
747 };
748
749 VLIB_REGISTER_NODE(ip4_map_t_node) = {
750   .function = ip4_map_t,
751   .name = "ip4-map-t",
752   .vector_size = sizeof(u32),
753   .format_trace = format_map_trace,
754   .type = VLIB_NODE_TYPE_INTERNAL,
755
756   .n_errors = MAP_N_ERROR,
757   .error_counters = map_error_counters,
758
759   .n_next_nodes = IP4_MAPT_N_NEXT,
760   .next_nodes = {
761       [IP4_MAPT_NEXT_MAPT_TCP_UDP] = "ip4-map-t-tcp-udp",
762       [IP4_MAPT_NEXT_MAPT_ICMP] = "ip4-map-t-icmp",
763       [IP4_MAPT_NEXT_MAPT_FRAGMENTED] = "ip4-map-t-fragmented",
764       [IP4_MAPT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
765       [IP4_MAPT_NEXT_DROP] = "error-drop",
766   },
767 };
768
769 /*
770  * fd.io coding-style-patch-verification: ON
771  *
772  * Local Variables:
773  * eval: (c-set-style "gnu")
774  * End:
775  */