map: use SVR for MAP-E
[vpp.git] / src / plugins / map / ip4_map.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * Defines used for testing various optimisation schemes
17  */
18
19 #include "map.h"
20 #include <vnet/ip/ip_frag.h>
21 #include <vnet/ip/ip4_to_ip6.h>
22
23 enum ip4_map_next_e
24 {
25   IP4_MAP_NEXT_IP6_LOOKUP,
26 #ifdef MAP_SKIP_IP6_LOOKUP
27   IP4_MAP_NEXT_IP6_REWRITE,
28 #endif
29   IP4_MAP_NEXT_IP4_FRAGMENT,
30   IP4_MAP_NEXT_IP6_FRAGMENT,
31   IP4_MAP_NEXT_ICMP_ERROR,
32   IP4_MAP_NEXT_DROP,
33   IP4_MAP_N_NEXT,
34 };
35
36 static_always_inline u16
37 ip4_map_port_and_security_check (map_domain_t * d, vlib_buffer_t * b0,
38                                  u8 * error)
39 {
40   u16 port;
41   if (d->psid_length > 0)
42     {
43       ip4_header_t *ip = vlib_buffer_get_current (b0);
44
45       if (PREDICT_FALSE
46           ((ip->ip_version_and_header_length != 0x45)
47            || clib_host_to_net_u16 (ip->length) < 28))
48         {
49           return 0;
50         }
51
52       port = vnet_buffer (b0)->ip.reass.l4_dst_port;
53
54       /* Verify that port is not among the well-known ports */
55       if ((d->psid_offset > 0)
56           && (clib_net_to_host_u16 (port) < (0x1 << (16 - d->psid_offset))))
57         {
58           *error = MAP_ERROR_ENCAP_SEC_CHECK;
59         }
60       else
61         {
62           return port;
63         }
64     }
65   return (0);
66 }
67
68 /*
69  * ip4_map_vtcfl
70  */
71 static_always_inline u32
72 ip4_map_vtcfl (ip4_header_t * ip4, vlib_buffer_t * p)
73 {
74   map_main_t *mm = &map_main;
75   u8 tc = mm->tc_copy ? ip4->tos : mm->tc;
76   u32 vtcfl = 0x6 << 28;
77   vtcfl |= tc << 20;
78   vtcfl |= vnet_buffer (p)->ip.flow_hash & 0x000fffff;
79
80   return (clib_host_to_net_u32 (vtcfl));
81 }
82
83 static_always_inline bool
84 ip4_map_ip6_lookup_bypass (vlib_buffer_t * p0, ip4_header_t * ip)
85 {
86 #ifdef MAP_SKIP_IP6_LOOKUP
87   if (FIB_NODE_INDEX_INVALID != pre_resolved[FIB_PROTOCOL_IP6].fei)
88     {
89       vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
90         pre_resolved[FIB_PROTOCOL_IP6].dpo.dpoi_index;
91       return (true);
92     }
93 #endif
94   return (false);
95 }
96
97 /*
98  * ip4_map_ttl
99  */
100 static inline void
101 ip4_map_decrement_ttl (ip4_header_t * ip, u8 * error)
102 {
103   i32 ttl = ip->ttl;
104
105   /* Input node should have reject packets with ttl 0. */
106   ASSERT (ip->ttl > 0);
107
108   u32 checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
109   checksum += checksum >= 0xffff;
110   ip->checksum = checksum;
111   ttl -= 1;
112   ip->ttl = ttl;
113   *error = ttl <= 0 ? IP4_ERROR_TIME_EXPIRED : *error;
114
115   /* Verify checksum. */
116   ASSERT (ip->checksum == ip4_header_checksum (ip));
117 }
118
119 static u32
120 ip4_map_fragment (vlib_buffer_t * b, u16 mtu, bool df, u8 * error)
121 {
122   map_main_t *mm = &map_main;
123
124   if (mm->frag_inner)
125     {
126       // TODO: Fix inner fragmentation after removed inner support from ip-frag.
127       ip_frag_set_vnet_buffer (b, /*sizeof (ip6_header_t), */ mtu,
128                                IP4_FRAG_NEXT_IP6_LOOKUP,
129                                IP_FRAG_FLAG_IP6_HEADER);
130       return (IP4_MAP_NEXT_IP4_FRAGMENT);
131     }
132   else
133     {
134       if (df && !mm->frag_ignore_df)
135         {
136           icmp4_error_set_vnet_buffer (b, ICMP4_destination_unreachable,
137                                        ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
138                                        mtu);
139           vlib_buffer_advance (b, sizeof (ip6_header_t));
140           *error = MAP_ERROR_DF_SET;
141           return (IP4_MAP_NEXT_ICMP_ERROR);
142         }
143       ip_frag_set_vnet_buffer (b, mtu, IP6_FRAG_NEXT_IP6_LOOKUP,
144                                IP_FRAG_FLAG_IP6_HEADER);
145       return (IP4_MAP_NEXT_IP6_FRAGMENT);
146     }
147 }
148
149 /*
150  * ip4_map
151  */
152 static uword
153 ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
154 {
155   u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
156   vlib_node_runtime_t *error_node =
157     vlib_node_get_runtime (vm, ip4_map_node.index);
158   from = vlib_frame_vector_args (frame);
159   n_left_from = frame->n_vectors;
160   next_index = node->cached_next_index;
161   map_main_t *mm = &map_main;
162   vlib_combined_counter_main_t *cm = mm->domain_counters;
163   u32 thread_index = vm->thread_index;
164
165   while (n_left_from > 0)
166     {
167       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
168
169       /* Dual loop */
170       while (n_left_from >= 4 && n_left_to_next >= 2)
171         {
172           u32 pi0, pi1;
173           vlib_buffer_t *p0, *p1;
174           map_domain_t *d0, *d1;
175           u8 error0 = MAP_ERROR_NONE, error1 = MAP_ERROR_NONE;
176           ip4_header_t *ip40, *ip41;
177           u16 port0 = 0, port1 = 0;
178           ip6_header_t *ip6h0, *ip6h1;
179           u32 map_domain_index0 = ~0, map_domain_index1 = ~0;
180           u32 next0 = IP4_MAP_NEXT_IP6_LOOKUP, next1 =
181             IP4_MAP_NEXT_IP6_LOOKUP;
182
183           /* Prefetch next iteration. */
184           {
185             vlib_buffer_t *p2, *p3;
186
187             p2 = vlib_get_buffer (vm, from[2]);
188             p3 = vlib_get_buffer (vm, from[3]);
189
190             vlib_prefetch_buffer_header (p2, STORE);
191             vlib_prefetch_buffer_header (p3, STORE);
192             /* IPv4 + 8 = 28. possibly plus -40 */
193             CLIB_PREFETCH (p2->data - 40, 68, STORE);
194             CLIB_PREFETCH (p3->data - 40, 68, STORE);
195           }
196
197           pi0 = to_next[0] = from[0];
198           pi1 = to_next[1] = from[1];
199           from += 2;
200           n_left_from -= 2;
201           to_next += 2;
202           n_left_to_next -= 2;
203
204           p0 = vlib_get_buffer (vm, pi0);
205           p1 = vlib_get_buffer (vm, pi1);
206           ip40 = vlib_buffer_get_current (p0);
207           ip41 = vlib_buffer_get_current (p1);
208           d0 =
209             ip4_map_get_domain (&ip40->dst_address, &map_domain_index0,
210                                 &error0);
211           d1 =
212             ip4_map_get_domain (&ip41->dst_address, &map_domain_index1,
213                                 &error1);
214
215           /*
216            * Shared IPv4 address
217            */
218           port0 = ip4_map_port_and_security_check (d0, p0, &error0);
219           port1 = ip4_map_port_and_security_check (d1, p1, &error1);
220
221           /* Decrement IPv4 TTL */
222           ip4_map_decrement_ttl (ip40, &error0);
223           ip4_map_decrement_ttl (ip41, &error1);
224           bool df0 =
225             ip40->flags_and_fragment_offset &
226             clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT);
227           bool df1 =
228             ip41->flags_and_fragment_offset &
229             clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT);
230
231           /* MAP calc */
232           u32 da40 = clib_net_to_host_u32 (ip40->dst_address.as_u32);
233           u32 da41 = clib_net_to_host_u32 (ip41->dst_address.as_u32);
234           u16 dp40 = clib_net_to_host_u16 (port0);
235           u16 dp41 = clib_net_to_host_u16 (port1);
236           u64 dal60 = map_get_pfx (d0, da40, dp40);
237           u64 dal61 = map_get_pfx (d1, da41, dp41);
238           u64 dar60 = map_get_sfx (d0, da40, dp40);
239           u64 dar61 = map_get_sfx (d1, da41, dp41);
240           if (dal60 == 0 && dar60 == 0 && error0 == MAP_ERROR_NONE)
241             error0 = MAP_ERROR_NO_BINDING;
242           if (dal61 == 0 && dar61 == 0 && error1 == MAP_ERROR_NONE)
243             error1 = MAP_ERROR_NO_BINDING;
244
245           /* construct ipv6 header */
246           vlib_buffer_advance (p0, -sizeof (ip6_header_t));
247           vlib_buffer_advance (p1, -sizeof (ip6_header_t));
248           ip6h0 = vlib_buffer_get_current (p0);
249           ip6h1 = vlib_buffer_get_current (p1);
250           vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
251           vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
252
253           ip6h0->ip_version_traffic_class_and_flow_label =
254             ip4_map_vtcfl (ip40, p0);
255           ip6h1->ip_version_traffic_class_and_flow_label =
256             ip4_map_vtcfl (ip41, p1);
257           ip6h0->payload_length = ip40->length;
258           ip6h1->payload_length = ip41->length;
259           ip6h0->protocol = IP_PROTOCOL_IP_IN_IP;
260           ip6h1->protocol = IP_PROTOCOL_IP_IN_IP;
261           ip6h0->hop_limit = 0x40;
262           ip6h1->hop_limit = 0x40;
263           ip6h0->src_address = d0->ip6_src;
264           ip6h1->src_address = d1->ip6_src;
265           ip6h0->dst_address.as_u64[0] = clib_host_to_net_u64 (dal60);
266           ip6h0->dst_address.as_u64[1] = clib_host_to_net_u64 (dar60);
267           ip6h1->dst_address.as_u64[0] = clib_host_to_net_u64 (dal61);
268           ip6h1->dst_address.as_u64[1] = clib_host_to_net_u64 (dar61);
269
270           /*
271            * Determine next node. Can be one of:
272            * ip6-lookup, ip6-rewrite, ip4-fragment, error-drop
273            */
274           if (PREDICT_TRUE (error0 == MAP_ERROR_NONE))
275             {
276               if (PREDICT_FALSE
277                   (d0->mtu
278                    && (clib_net_to_host_u16 (ip6h0->payload_length) +
279                        sizeof (*ip6h0) > d0->mtu)))
280                 {
281                   next0 = ip4_map_fragment (p0, d0->mtu, df0, &error0);
282                 }
283               else
284                 {
285                   next0 =
286                     ip4_map_ip6_lookup_bypass (p0,
287                                                ip40) ?
288                     IP4_MAP_NEXT_IP6_REWRITE : next0;
289                   vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
290                                                    thread_index,
291                                                    map_domain_index0, 1,
292                                                    clib_net_to_host_u16
293                                                    (ip6h0->payload_length) +
294                                                    40);
295                 }
296             }
297           else
298             {
299               next0 = IP4_MAP_NEXT_DROP;
300             }
301
302           /*
303            * Determine next node. Can be one of:
304            * ip6-lookup, ip6-rewrite, ip4-fragment, error-drop
305            */
306           if (PREDICT_TRUE (error1 == MAP_ERROR_NONE))
307             {
308               if (PREDICT_FALSE
309                   (d1->mtu
310                    && (clib_net_to_host_u16 (ip6h1->payload_length) +
311                        sizeof (*ip6h1) > d1->mtu)))
312                 {
313                   next1 = ip4_map_fragment (p1, d1->mtu, df1, &error1);
314                 }
315               else
316                 {
317                   next1 =
318                     ip4_map_ip6_lookup_bypass (p1,
319                                                ip41) ?
320                     IP4_MAP_NEXT_IP6_REWRITE : next1;
321                   vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
322                                                    thread_index,
323                                                    map_domain_index1, 1,
324                                                    clib_net_to_host_u16
325                                                    (ip6h1->payload_length) +
326                                                    40);
327                 }
328             }
329           else
330             {
331               next1 = IP4_MAP_NEXT_DROP;
332             }
333
334           if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
335             {
336               map_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr));
337               tr->map_domain_index = map_domain_index0;
338               tr->port = port0;
339             }
340           if (PREDICT_FALSE (p1->flags & VLIB_BUFFER_IS_TRACED))
341             {
342               map_trace_t *tr = vlib_add_trace (vm, node, p1, sizeof (*tr));
343               tr->map_domain_index = map_domain_index1;
344               tr->port = port1;
345             }
346
347           p0->error = error_node->errors[error0];
348           p1->error = error_node->errors[error1];
349
350           vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
351                                            n_left_to_next, pi0, pi1, next0,
352                                            next1);
353         }
354
355       while (n_left_from > 0 && n_left_to_next > 0)
356         {
357           u32 pi0;
358           vlib_buffer_t *p0;
359           map_domain_t *d0;
360           u8 error0 = MAP_ERROR_NONE;
361           ip4_header_t *ip40;
362           u16 port0 = 0;
363           ip6_header_t *ip6h0;
364           u32 next0 = IP4_MAP_NEXT_IP6_LOOKUP;
365           u32 map_domain_index0 = ~0;
366
367           pi0 = to_next[0] = from[0];
368           from += 1;
369           n_left_from -= 1;
370           to_next += 1;
371           n_left_to_next -= 1;
372
373           p0 = vlib_get_buffer (vm, pi0);
374           ip40 = vlib_buffer_get_current (p0);
375
376           d0 =
377             ip4_map_get_domain (&ip40->dst_address, &map_domain_index0,
378                                 &error0);
379           if (!d0)
380             {                   /* Guess it wasn't for us */
381               vnet_feature_next (&next0, p0);
382               goto exit;
383             }
384
385           /*
386            * Shared IPv4 address
387            */
388           port0 = ip4_map_port_and_security_check (d0, p0, &error0);
389
390           /* Decrement IPv4 TTL */
391           ip4_map_decrement_ttl (ip40, &error0);
392           bool df0 =
393             ip40->flags_and_fragment_offset &
394             clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT);
395
396           /* MAP calc */
397           u32 da40 = clib_net_to_host_u32 (ip40->dst_address.as_u32);
398           u16 dp40 = clib_net_to_host_u16 (port0);
399           u64 dal60 = map_get_pfx (d0, da40, dp40);
400           u64 dar60 = map_get_sfx (d0, da40, dp40);
401           if (dal60 == 0 && dar60 == 0 && error0 == MAP_ERROR_NONE)
402             error0 = MAP_ERROR_NO_BINDING;
403
404           /* construct ipv6 header */
405           vlib_buffer_advance (p0, -(sizeof (ip6_header_t)));
406           ip6h0 = vlib_buffer_get_current (p0);
407           vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
408
409           ip6h0->ip_version_traffic_class_and_flow_label =
410             ip4_map_vtcfl (ip40, p0);
411           ip6h0->payload_length = ip40->length;
412           ip6h0->protocol = IP_PROTOCOL_IP_IN_IP;
413           ip6h0->hop_limit = 0x40;
414           ip6h0->src_address = d0->ip6_src;
415           ip6h0->dst_address.as_u64[0] = clib_host_to_net_u64 (dal60);
416           ip6h0->dst_address.as_u64[1] = clib_host_to_net_u64 (dar60);
417
418           /*
419            * Determine next node. Can be one of:
420            * ip6-lookup, ip6-rewrite, ip4-fragment, error-drop
421            */
422           if (PREDICT_TRUE (error0 == MAP_ERROR_NONE))
423             {
424               if (PREDICT_FALSE
425                   (d0->mtu
426                    && (clib_net_to_host_u16 (ip6h0->payload_length) +
427                        sizeof (*ip6h0) > d0->mtu)))
428                 {
429                   next0 = ip4_map_fragment (p0, d0->mtu, df0, &error0);
430                 }
431               else
432                 {
433                   next0 =
434                     ip4_map_ip6_lookup_bypass (p0,
435                                                ip40) ?
436                     IP4_MAP_NEXT_IP6_REWRITE : next0;
437                   vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
438                                                    thread_index,
439                                                    map_domain_index0, 1,
440                                                    clib_net_to_host_u16
441                                                    (ip6h0->payload_length) +
442                                                    40);
443                 }
444             }
445           else
446             {
447               next0 = IP4_MAP_NEXT_DROP;
448             }
449
450           if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
451             {
452               map_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr));
453               tr->map_domain_index = map_domain_index0;
454               tr->port = port0;
455             }
456
457           p0->error = error_node->errors[error0];
458         exit:
459           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
460                                            n_left_to_next, pi0, next0);
461         }
462       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
463     }
464
465   return frame->n_vectors;
466 }
467
468 static char *map_error_strings[] = {
469 #define _(sym,string) string,
470   foreach_map_error
471 #undef _
472 };
473
474
475 /* *INDENT-OFF* */
476 VNET_FEATURE_INIT (ip4_map_feature, static) =
477 {
478   .arc_name = "ip4-unicast",
479   .node_name = "ip4-map",
480   .runs_before = VNET_FEATURES ("ip4-flow-classify"),
481   .runs_after = VNET_FEATURES("ip4-sv-reassembly-feature"),
482 };
483
484 VLIB_REGISTER_NODE(ip4_map_node) = {
485   .function = ip4_map,
486   .name = "ip4-map",
487   .vector_size = sizeof(u32),
488   .format_trace = format_map_trace,
489   .type = VLIB_NODE_TYPE_INTERNAL,
490
491   .n_errors = MAP_N_ERROR,
492   .error_strings = map_error_strings,
493
494   .n_next_nodes = IP4_MAP_N_NEXT,
495   .next_nodes = {
496     [IP4_MAP_NEXT_IP6_LOOKUP] = "ip6-lookup",
497 #ifdef MAP_SKIP_IP6_LOOKUP
498     [IP4_MAP_NEXT_IP6_REWRITE] = "ip6-load-balance",
499 #endif
500     [IP4_MAP_NEXT_IP4_FRAGMENT] = "ip4-frag",
501     [IP4_MAP_NEXT_IP6_FRAGMENT] = "ip6-frag",
502     [IP4_MAP_NEXT_ICMP_ERROR] = "ip4-icmp-error",
503     [IP4_MAP_NEXT_DROP] = "error-drop",
504   },
505 };
506 /* *INDENT-ON* */
507
508 /*
509  * fd.io coding-style-patch-verification: ON
510  *
511  * Local Variables:
512  * eval: (c-set-style "gnu")
513  * End:
514  */