stats: counters data model
[vpp.git] / src / plugins / map / ip4_map.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * Defines used for testing various optimisation schemes
17  */
18
19 #include "map.h"
20 #include <vnet/ip/ip_frag.h>
21 #include <vnet/ip/ip4_to_ip6.h>
22
23 enum ip4_map_next_e
24 {
25   IP4_MAP_NEXT_IP6_LOOKUP,
26 #ifdef MAP_SKIP_IP6_LOOKUP
27   IP4_MAP_NEXT_IP6_REWRITE,
28 #endif
29   IP4_MAP_NEXT_ICMP_ERROR,
30   IP4_MAP_NEXT_DROP,
31   IP4_MAP_N_NEXT,
32 };
33
34 static_always_inline u16
35 ip4_map_port_and_security_check (map_domain_t * d, vlib_buffer_t * b0,
36                                  u8 * error)
37 {
38   u16 port;
39   if (d->psid_length > 0)
40     {
41       ip4_header_t *ip = vlib_buffer_get_current (b0);
42
43       if (PREDICT_FALSE
44           ((ip->ip_version_and_header_length != 0x45)
45            || clib_host_to_net_u16 (ip->length) < 28))
46         {
47           return 0;
48         }
49
50       port = vnet_buffer (b0)->ip.reass.l4_dst_port;
51
52       /* Verify that port is not among the well-known ports */
53       if ((d->psid_offset > 0)
54           && (clib_net_to_host_u16 (port) < (0x1 << (16 - d->psid_offset))))
55         {
56           *error = MAP_ERROR_ENCAP_SEC_CHECK;
57         }
58       else
59         {
60           return port;
61         }
62     }
63   return (0);
64 }
65
66 /*
67  * ip4_map_vtcfl
68  */
69 static_always_inline u32
70 ip4_map_vtcfl (ip4_header_t * ip4, vlib_buffer_t * p)
71 {
72   map_main_t *mm = &map_main;
73   u8 tc = mm->tc_copy ? ip4->tos : mm->tc;
74   u32 vtcfl = 0x6 << 28;
75   vtcfl |= tc << 20;
76   vtcfl |= vnet_buffer (p)->ip.flow_hash & 0x000fffff;
77
78   return (clib_host_to_net_u32 (vtcfl));
79 }
80
81 /*
82  * ip4_map_ttl
83  */
84 static inline void
85 ip4_map_decrement_ttl (ip4_header_t * ip, u8 * error)
86 {
87   i32 ttl = ip->ttl;
88
89   /* Input node should have reject packets with ttl 0. */
90   ASSERT (ip->ttl > 0);
91
92   u32 checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
93   checksum += checksum >= 0xffff;
94   ip->checksum = checksum;
95   ttl -= 1;
96   ip->ttl = ttl;
97   *error = ttl <= 0 ? IP4_ERROR_TIME_EXPIRED : *error;
98
99   /* Verify checksum. */
100   ASSERT (ip4_header_checksum_is_valid (ip));
101 }
102
103 static u32
104 ip4_map_fragment (vlib_main_t * vm, u32 bi, u16 mtu, bool df, u32 ** buffers,
105                   u8 * error)
106 {
107   map_main_t *mm = &map_main;
108   vlib_buffer_t *b = vlib_get_buffer (vm, bi);
109
110   if (mm->frag_inner)
111     {
112       /* IPv4 fragmented packets inside of IPv6 */
113       ip4_frag_do_fragment (vm, bi, mtu, sizeof (ip6_header_t), buffers);
114
115       /* Fixup */
116       u32 *i;
117       vec_foreach (i, *buffers)
118       {
119         vlib_buffer_t *p = vlib_get_buffer (vm, *i);
120         ip6_header_t *ip6 = vlib_buffer_get_current (p);
121         ip6->payload_length =
122           clib_host_to_net_u16 (p->current_length - sizeof (ip6_header_t));
123       }
124     }
125   else
126     {
127       if (df && !mm->frag_ignore_df)
128         {
129           icmp4_error_set_vnet_buffer (b, ICMP4_destination_unreachable,
130                                        ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
131                                        mtu);
132           vlib_buffer_advance (b, sizeof (ip6_header_t));
133           *error = MAP_ERROR_DF_SET;
134           return (IP4_MAP_NEXT_ICMP_ERROR);
135         }
136
137       /* Create IPv6 fragments here */
138       ip6_frag_do_fragment (vm, bi, mtu, 0, buffers);
139     }
140   return (IP4_MAP_NEXT_IP6_LOOKUP);
141 }
142
143 /*
144  * ip4_map
145  */
146 static uword
147 ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
148 {
149   u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
150   vlib_node_runtime_t *error_node =
151     vlib_node_get_runtime (vm, ip4_map_node.index);
152   from = vlib_frame_vector_args (frame);
153   n_left_from = frame->n_vectors;
154   next_index = node->cached_next_index;
155   map_main_t *mm = &map_main;
156   vlib_combined_counter_main_t *cm = mm->domain_counters;
157   u32 thread_index = vm->thread_index;
158
159   while (n_left_from > 0)
160     {
161       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
162       while (n_left_from > 0 && n_left_to_next > 0)
163         {
164           u32 pi0;
165           vlib_buffer_t *p0;
166           map_domain_t *d0;
167           u8 error0 = MAP_ERROR_NONE;
168           ip4_header_t *ip40;
169           u16 port0 = 0;
170           ip6_header_t *ip6h0;
171           u32 next0 = IP4_MAP_NEXT_IP6_LOOKUP;
172           u32 map_domain_index0 = ~0;
173           u32 *buffer0 = 0;
174           bool free_original_buffer0 = false;
175           u32 *frag_from0, frag_left0;
176
177           pi0 = to_next[0] = from[0];
178           from += 1;
179           n_left_from -= 1;
180
181           p0 = vlib_get_buffer (vm, pi0);
182           ip40 = vlib_buffer_get_current (p0);
183
184           d0 =
185             ip4_map_get_domain (&ip40->dst_address, &map_domain_index0,
186                                 &error0);
187           if (!d0)
188             {                   /* Guess it wasn't for us */
189               vnet_feature_next (&next0, p0);
190               goto exit;
191             }
192
193           /*
194            * Shared IPv4 address
195            */
196           port0 = ip4_map_port_and_security_check (d0, p0, &error0);
197
198           /*
199            * Clamp TCP MSS value.
200            */
201           if (ip40->protocol == IP_PROTOCOL_TCP)
202             {
203               tcp_header_t *tcp = ip4_next_header (ip40);
204               if (mm->tcp_mss > 0 && tcp_syn (tcp))
205                 {
206                   ip_csum_t csum = tcp->checksum;
207                   map_mss_clamping (tcp, &csum, mm->tcp_mss);
208                   tcp->checksum = ip_csum_fold (csum);
209                 }
210             }
211
212           /* Decrement IPv4 TTL */
213           ip4_map_decrement_ttl (ip40, &error0);
214           bool df0 =
215             ip40->flags_and_fragment_offset &
216             clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT);
217
218           /* MAP calc */
219           u32 da40 = clib_net_to_host_u32 (ip40->dst_address.as_u32);
220           u16 dp40 = clib_net_to_host_u16 (port0);
221           u64 dal60 = map_get_pfx (d0, da40, dp40);
222           u64 dar60 = map_get_sfx (d0, da40, dp40);
223           if (dal60 == 0 && dar60 == 0 && error0 == MAP_ERROR_NONE)
224             error0 = MAP_ERROR_NO_BINDING;
225
226           /* construct ipv6 header */
227           vlib_buffer_advance (p0, -(sizeof (ip6_header_t)));
228           ip6h0 = vlib_buffer_get_current (p0);
229           vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
230
231           ip6h0->ip_version_traffic_class_and_flow_label =
232             ip4_map_vtcfl (ip40, p0);
233           ip6h0->payload_length = ip40->length;
234           ip6h0->protocol = IP_PROTOCOL_IP_IN_IP;
235           ip6h0->hop_limit = 0x40;
236           ip6h0->src_address = d0->ip6_src;
237           ip6h0->dst_address.as_u64[0] = clib_host_to_net_u64 (dal60);
238           ip6h0->dst_address.as_u64[1] = clib_host_to_net_u64 (dar60);
239
240           /*
241            * Determine next node. Can be one of:
242            * ip6-lookup, ip6-rewrite, error-drop
243            */
244           if (PREDICT_TRUE (error0 == MAP_ERROR_NONE))
245             {
246               if (PREDICT_FALSE
247                   (d0->mtu
248                    && (clib_net_to_host_u16 (ip6h0->payload_length) +
249                        sizeof (*ip6h0) > d0->mtu)))
250                 {
251                   next0 =
252                     ip4_map_fragment (vm, pi0, d0->mtu, df0, &buffer0,
253                                       &error0);
254
255                   if (error0 == MAP_ERROR_NONE)
256                     {
257                       free_original_buffer0 = true;
258                     }
259                 }
260               else
261                 {
262                   next0 =
263                     ip4_map_ip6_lookup_bypass (p0,
264                                                ip40) ?
265                     IP4_MAP_NEXT_IP6_REWRITE : next0;
266                   vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
267                                                    thread_index,
268                                                    map_domain_index0, 1,
269                                                    clib_net_to_host_u16
270                                                    (ip6h0->payload_length) +
271                                                    40);
272                 }
273             }
274           else
275             {
276               next0 = IP4_MAP_NEXT_DROP;
277             }
278
279           if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
280             {
281               map_add_trace (vm, node, p0, map_domain_index0, port0);
282             }
283
284           p0->error = error_node->errors[error0];
285         exit:
286           /* Send fragments that were added in the frame */
287           if (free_original_buffer0)
288             {
289               vlib_buffer_free_one (vm, pi0);   /* Free original packet */
290             }
291           else
292             {
293               vec_add1 (buffer0, pi0);
294             }
295
296           frag_from0 = buffer0;
297           frag_left0 = vec_len (buffer0);
298
299           while (frag_left0 > 0)
300             {
301               while (frag_left0 > 0 && n_left_to_next > 0)
302                 {
303                   u32 i0;
304                   i0 = to_next[0] = frag_from0[0];
305                   frag_from0 += 1;
306                   frag_left0 -= 1;
307                   to_next += 1;
308                   n_left_to_next -= 1;
309
310                   vlib_get_buffer (vm, i0)->error =
311                     error_node->errors[error0];
312                   vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
313                                                    to_next, n_left_to_next,
314                                                    i0, next0);
315                 }
316               vlib_put_next_frame (vm, node, next_index, n_left_to_next);
317               vlib_get_next_frame (vm, node, next_index, to_next,
318                                    n_left_to_next);
319             }
320           vec_reset_length (buffer0);
321         }
322       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
323     }
324
325   return frame->n_vectors;
326 }
327
328 /* *INDENT-OFF* */
329 VNET_FEATURE_INIT (ip4_map_feature, static) =
330 {
331   .arc_name = "ip4-unicast",
332   .node_name = "ip4-map",
333   .runs_before = VNET_FEATURES ("ip4-flow-classify"),
334   .runs_after = VNET_FEATURES("ip4-sv-reassembly-feature"),
335 };
336
337 VLIB_REGISTER_NODE(ip4_map_node) = {
338   .function = ip4_map,
339   .name = "ip4-map",
340   .vector_size = sizeof(u32),
341   .format_trace = format_map_trace,
342   .type = VLIB_NODE_TYPE_INTERNAL,
343
344   .n_errors = MAP_N_ERROR,
345   .error_counters = map_error_counters,
346
347   .n_next_nodes = IP4_MAP_N_NEXT,
348   .next_nodes = {
349     [IP4_MAP_NEXT_IP6_LOOKUP] = "ip6-lookup",
350 #ifdef MAP_SKIP_IP6_LOOKUP
351     [IP4_MAP_NEXT_IP6_REWRITE] = "ip6-load-balance",
352 #endif
353     [IP4_MAP_NEXT_ICMP_ERROR] = "ip4-icmp-error",
354     [IP4_MAP_NEXT_DROP] = "error-drop",
355   },
356 };
357 /* *INDENT-ON* */
358
359 /*
360  * fd.io coding-style-patch-verification: ON
361  *
362  * Local Variables:
363  * eval: (c-set-style "gnu")
364  * End:
365  */