map: Fix TCP MSS clamping for MAP-E traffic.
[vpp.git] / src / plugins / map / ip4_map.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * Defines used for testing various optimisation schemes
17  */
18
19 #include "map.h"
20 #include <vnet/ip/ip_frag.h>
21 #include <vnet/ip/ip4_to_ip6.h>
22
23 enum ip4_map_next_e
24 {
25   IP4_MAP_NEXT_IP6_LOOKUP,
26 #ifdef MAP_SKIP_IP6_LOOKUP
27   IP4_MAP_NEXT_IP6_REWRITE,
28 #endif
29   IP4_MAP_NEXT_ICMP_ERROR,
30   IP4_MAP_NEXT_DROP,
31   IP4_MAP_N_NEXT,
32 };
33
34 static_always_inline u16
35 ip4_map_port_and_security_check (map_domain_t * d, vlib_buffer_t * b0,
36                                  u8 * error)
37 {
38   u16 port;
39   if (d->psid_length > 0)
40     {
41       ip4_header_t *ip = vlib_buffer_get_current (b0);
42
43       if (PREDICT_FALSE
44           ((ip->ip_version_and_header_length != 0x45)
45            || clib_host_to_net_u16 (ip->length) < 28))
46         {
47           return 0;
48         }
49
50       port = vnet_buffer (b0)->ip.reass.l4_dst_port;
51
52       /* Verify that port is not among the well-known ports */
53       if ((d->psid_offset > 0)
54           && (clib_net_to_host_u16 (port) < (0x1 << (16 - d->psid_offset))))
55         {
56           *error = MAP_ERROR_ENCAP_SEC_CHECK;
57         }
58       else
59         {
60           return port;
61         }
62     }
63   return (0);
64 }
65
66 /*
67  * ip4_map_vtcfl
68  */
69 static_always_inline u32
70 ip4_map_vtcfl (ip4_header_t * ip4, vlib_buffer_t * p)
71 {
72   map_main_t *mm = &map_main;
73   u8 tc = mm->tc_copy ? ip4->tos : mm->tc;
74   u32 vtcfl = 0x6 << 28;
75   vtcfl |= tc << 20;
76   vtcfl |= vnet_buffer (p)->ip.flow_hash & 0x000fffff;
77
78   return (clib_host_to_net_u32 (vtcfl));
79 }
80
81 static_always_inline bool
82 ip4_map_ip6_lookup_bypass (vlib_buffer_t * p0, ip4_header_t * ip)
83 {
84 #ifdef MAP_SKIP_IP6_LOOKUP
85   if (FIB_NODE_INDEX_INVALID != pre_resolved[FIB_PROTOCOL_IP6].fei)
86     {
87       vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
88         pre_resolved[FIB_PROTOCOL_IP6].dpo.dpoi_index;
89       return (true);
90     }
91 #endif
92   return (false);
93 }
94
95 /*
96  * ip4_map_ttl
97  */
98 static inline void
99 ip4_map_decrement_ttl (ip4_header_t * ip, u8 * error)
100 {
101   i32 ttl = ip->ttl;
102
103   /* Input node should have reject packets with ttl 0. */
104   ASSERT (ip->ttl > 0);
105
106   u32 checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
107   checksum += checksum >= 0xffff;
108   ip->checksum = checksum;
109   ttl -= 1;
110   ip->ttl = ttl;
111   *error = ttl <= 0 ? IP4_ERROR_TIME_EXPIRED : *error;
112
113   /* Verify checksum. */
114   ASSERT (ip->checksum == ip4_header_checksum (ip));
115 }
116
117 static u32
118 ip4_map_fragment (vlib_main_t * vm, u32 bi, u16 mtu, bool df, u32 ** buffers,
119                   u8 * error)
120 {
121   map_main_t *mm = &map_main;
122   vlib_buffer_t *b = vlib_get_buffer (vm, bi);
123
124   if (mm->frag_inner)
125     {
126       /* IPv4 fragmented packets inside of IPv6 */
127       ip4_frag_do_fragment (vm, bi, mtu, sizeof (ip6_header_t), buffers);
128
129       /* Fixup */
130       u32 *i;
131       vec_foreach (i, *buffers)
132       {
133         vlib_buffer_t *p = vlib_get_buffer (vm, *i);
134         ip6_header_t *ip6 = vlib_buffer_get_current (p);
135         ip6->payload_length =
136           clib_host_to_net_u16 (p->current_length - sizeof (ip6_header_t));
137       }
138     }
139   else
140     {
141       if (df && !mm->frag_ignore_df)
142         {
143           icmp4_error_set_vnet_buffer (b, ICMP4_destination_unreachable,
144                                        ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
145                                        mtu);
146           vlib_buffer_advance (b, sizeof (ip6_header_t));
147           *error = MAP_ERROR_DF_SET;
148           return (IP4_MAP_NEXT_ICMP_ERROR);
149         }
150
151       /* Create IPv6 fragments here */
152       ip6_frag_do_fragment (vm, bi, mtu, 0, buffers);
153     }
154   return (IP4_MAP_NEXT_IP6_LOOKUP);
155 }
156
157 /*
158  * ip4_map
159  */
160 static uword
161 ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
162 {
163   u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
164   vlib_node_runtime_t *error_node =
165     vlib_node_get_runtime (vm, ip4_map_node.index);
166   from = vlib_frame_vector_args (frame);
167   n_left_from = frame->n_vectors;
168   next_index = node->cached_next_index;
169   map_main_t *mm = &map_main;
170   vlib_combined_counter_main_t *cm = mm->domain_counters;
171   u32 thread_index = vm->thread_index;
172
173   while (n_left_from > 0)
174     {
175       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
176       while (n_left_from > 0 && n_left_to_next > 0)
177         {
178           u32 pi0;
179           vlib_buffer_t *p0;
180           map_domain_t *d0;
181           u8 error0 = MAP_ERROR_NONE;
182           ip4_header_t *ip40;
183           u16 port0 = 0;
184           ip6_header_t *ip6h0;
185           u32 next0 = IP4_MAP_NEXT_IP6_LOOKUP;
186           u32 map_domain_index0 = ~0;
187           u32 *buffer0 = 0;
188           bool free_original_buffer0 = false;
189           u32 *frag_from0, frag_left0;
190
191           pi0 = to_next[0] = from[0];
192           from += 1;
193           n_left_from -= 1;
194
195           p0 = vlib_get_buffer (vm, pi0);
196           ip40 = vlib_buffer_get_current (p0);
197
198           d0 =
199             ip4_map_get_domain (&ip40->dst_address, &map_domain_index0,
200                                 &error0);
201           if (!d0)
202             {                   /* Guess it wasn't for us */
203               vnet_feature_next (&next0, p0);
204               goto exit;
205             }
206
207           /*
208            * Shared IPv4 address
209            */
210           port0 = ip4_map_port_and_security_check (d0, p0, &error0);
211
212           /*
213            * Clamp TCP MSS value.
214            */
215           if (ip40->protocol == IP_PROTOCOL_TCP)
216             {
217               tcp_header_t *tcp = ip4_next_header (ip40);
218               if (mm->tcp_mss > 0 && tcp_syn (tcp))
219                 {
220                   ip_csum_t csum = tcp->checksum;
221                   map_mss_clamping (tcp, &csum, mm->tcp_mss);
222                   tcp->checksum = ip_csum_fold (csum);
223                 }
224             }
225
226           /* Decrement IPv4 TTL */
227           ip4_map_decrement_ttl (ip40, &error0);
228           bool df0 =
229             ip40->flags_and_fragment_offset &
230             clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT);
231
232           /* MAP calc */
233           u32 da40 = clib_net_to_host_u32 (ip40->dst_address.as_u32);
234           u16 dp40 = clib_net_to_host_u16 (port0);
235           u64 dal60 = map_get_pfx (d0, da40, dp40);
236           u64 dar60 = map_get_sfx (d0, da40, dp40);
237           if (dal60 == 0 && dar60 == 0 && error0 == MAP_ERROR_NONE)
238             error0 = MAP_ERROR_NO_BINDING;
239
240           /* construct ipv6 header */
241           vlib_buffer_advance (p0, -(sizeof (ip6_header_t)));
242           ip6h0 = vlib_buffer_get_current (p0);
243           vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
244
245           ip6h0->ip_version_traffic_class_and_flow_label =
246             ip4_map_vtcfl (ip40, p0);
247           ip6h0->payload_length = ip40->length;
248           ip6h0->protocol = IP_PROTOCOL_IP_IN_IP;
249           ip6h0->hop_limit = 0x40;
250           ip6h0->src_address = d0->ip6_src;
251           ip6h0->dst_address.as_u64[0] = clib_host_to_net_u64 (dal60);
252           ip6h0->dst_address.as_u64[1] = clib_host_to_net_u64 (dar60);
253
254           /*
255            * Determine next node. Can be one of:
256            * ip6-lookup, ip6-rewrite, error-drop
257            */
258           if (PREDICT_TRUE (error0 == MAP_ERROR_NONE))
259             {
260               if (PREDICT_FALSE
261                   (d0->mtu
262                    && (clib_net_to_host_u16 (ip6h0->payload_length) +
263                        sizeof (*ip6h0) > d0->mtu)))
264                 {
265                   next0 =
266                     ip4_map_fragment (vm, pi0, d0->mtu, df0, &buffer0,
267                                       &error0);
268
269                   if (error0 == MAP_ERROR_NONE)
270                     {
271                       free_original_buffer0 = true;
272                     }
273                 }
274               else
275                 {
276                   next0 =
277                     ip4_map_ip6_lookup_bypass (p0,
278                                                ip40) ?
279                     IP4_MAP_NEXT_IP6_REWRITE : next0;
280                   vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
281                                                    thread_index,
282                                                    map_domain_index0, 1,
283                                                    clib_net_to_host_u16
284                                                    (ip6h0->payload_length) +
285                                                    40);
286                 }
287             }
288           else
289             {
290               next0 = IP4_MAP_NEXT_DROP;
291             }
292
293           if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
294             {
295               map_add_trace (vm, node, p0, map_domain_index0, port0);
296             }
297
298           p0->error = error_node->errors[error0];
299         exit:
300           /* Send fragments that were added in the frame */
301           if (free_original_buffer0)
302             {
303               vlib_buffer_free_one (vm, pi0);   /* Free original packet */
304             }
305           else
306             {
307               vec_add1 (buffer0, pi0);
308             }
309
310           frag_from0 = buffer0;
311           frag_left0 = vec_len (buffer0);
312
313           while (frag_left0 > 0)
314             {
315               while (frag_left0 > 0 && n_left_to_next > 0)
316                 {
317                   u32 i0;
318                   i0 = to_next[0] = frag_from0[0];
319                   frag_from0 += 1;
320                   frag_left0 -= 1;
321                   to_next += 1;
322                   n_left_to_next -= 1;
323
324                   vlib_get_buffer (vm, i0)->error =
325                     error_node->errors[error0];
326                   vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
327                                                    to_next, n_left_to_next,
328                                                    i0, next0);
329                 }
330               vlib_put_next_frame (vm, node, next_index, n_left_to_next);
331               vlib_get_next_frame (vm, node, next_index, to_next,
332                                    n_left_to_next);
333             }
334           vec_reset_length (buffer0);
335         }
336       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
337     }
338
339   return frame->n_vectors;
340 }
341
342 static char *map_error_strings[] = {
343 #define _(sym,string) string,
344   foreach_map_error
345 #undef _
346 };
347
348
349 /* *INDENT-OFF* */
350 VNET_FEATURE_INIT (ip4_map_feature, static) =
351 {
352   .arc_name = "ip4-unicast",
353   .node_name = "ip4-map",
354   .runs_before = VNET_FEATURES ("ip4-flow-classify"),
355   .runs_after = VNET_FEATURES("ip4-sv-reassembly-feature"),
356 };
357
358 VLIB_REGISTER_NODE(ip4_map_node) = {
359   .function = ip4_map,
360   .name = "ip4-map",
361   .vector_size = sizeof(u32),
362   .format_trace = format_map_trace,
363   .type = VLIB_NODE_TYPE_INTERNAL,
364
365   .n_errors = MAP_N_ERROR,
366   .error_strings = map_error_strings,
367
368   .n_next_nodes = IP4_MAP_N_NEXT,
369   .next_nodes = {
370     [IP4_MAP_NEXT_IP6_LOOKUP] = "ip6-lookup",
371 #ifdef MAP_SKIP_IP6_LOOKUP
372     [IP4_MAP_NEXT_IP6_REWRITE] = "ip6-load-balance",
373 #endif
374     [IP4_MAP_NEXT_ICMP_ERROR] = "ip4-icmp-error",
375     [IP4_MAP_NEXT_DROP] = "error-drop",
376   },
377 };
378 /* *INDENT-ON* */
379
380 /*
381  * fd.io coding-style-patch-verification: ON
382  *
383  * Local Variables:
384  * eval: (c-set-style "gnu")
385  * End:
386  */