2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * Defines used for testing various optimisation schemes
20 #include <vnet/ip/ip_frag.h>
21 #include <vnet/ip/ip4_to_ip6.h>
25 IP4_MAP_NEXT_IP6_LOOKUP,
26 #ifdef MAP_SKIP_IP6_LOOKUP
27 IP4_MAP_NEXT_IP6_REWRITE,
29 IP4_MAP_NEXT_IP4_FRAGMENT,
30 IP4_MAP_NEXT_IP6_FRAGMENT,
31 IP4_MAP_NEXT_ICMP_ERROR,
36 static_always_inline u16
37 ip4_map_port_and_security_check (map_domain_t * d, vlib_buffer_t * b0,
41 if (d->psid_length > 0)
43 ip4_header_t *ip = vlib_buffer_get_current (b0);
46 ((ip->ip_version_and_header_length != 0x45)
47 || clib_host_to_net_u16 (ip->length) < 28))
52 port = vnet_buffer (b0)->ip.reass.l4_dst_port;
54 /* Verify that port is not among the well-known ports */
55 if ((d->psid_offset > 0)
56 && (clib_net_to_host_u16 (port) < (0x1 << (16 - d->psid_offset))))
58 *error = MAP_ERROR_ENCAP_SEC_CHECK;
71 static_always_inline u32
72 ip4_map_vtcfl (ip4_header_t * ip4, vlib_buffer_t * p)
74 map_main_t *mm = &map_main;
75 u8 tc = mm->tc_copy ? ip4->tos : mm->tc;
76 u32 vtcfl = 0x6 << 28;
78 vtcfl |= vnet_buffer (p)->ip.flow_hash & 0x000fffff;
80 return (clib_host_to_net_u32 (vtcfl));
83 static_always_inline bool
84 ip4_map_ip6_lookup_bypass (vlib_buffer_t * p0, ip4_header_t * ip)
86 #ifdef MAP_SKIP_IP6_LOOKUP
87 if (FIB_NODE_INDEX_INVALID != pre_resolved[FIB_PROTOCOL_IP6].fei)
89 vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
90 pre_resolved[FIB_PROTOCOL_IP6].dpo.dpoi_index;
101 ip4_map_decrement_ttl (ip4_header_t * ip, u8 * error)
105 /* Input node should have reject packets with ttl 0. */
106 ASSERT (ip->ttl > 0);
108 u32 checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
109 checksum += checksum >= 0xffff;
110 ip->checksum = checksum;
113 *error = ttl <= 0 ? IP4_ERROR_TIME_EXPIRED : *error;
115 /* Verify checksum. */
116 ASSERT (ip->checksum == ip4_header_checksum (ip));
120 ip4_map_fragment (vlib_buffer_t * b, u16 mtu, bool df, u8 * error)
122 map_main_t *mm = &map_main;
126 // TODO: Fix inner fragmentation after removed inner support from ip-frag.
127 ip_frag_set_vnet_buffer (b, /*sizeof (ip6_header_t), */ mtu,
128 IP4_FRAG_NEXT_IP6_LOOKUP,
129 IP_FRAG_FLAG_IP6_HEADER);
130 return (IP4_MAP_NEXT_IP4_FRAGMENT);
134 if (df && !mm->frag_ignore_df)
136 icmp4_error_set_vnet_buffer (b, ICMP4_destination_unreachable,
137 ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
139 vlib_buffer_advance (b, sizeof (ip6_header_t));
140 *error = MAP_ERROR_DF_SET;
141 return (IP4_MAP_NEXT_ICMP_ERROR);
143 ip_frag_set_vnet_buffer (b, mtu, IP6_FRAG_NEXT_IP6_LOOKUP,
144 IP_FRAG_FLAG_IP6_HEADER);
145 return (IP4_MAP_NEXT_IP6_FRAGMENT);
153 ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
155 u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
156 vlib_node_runtime_t *error_node =
157 vlib_node_get_runtime (vm, ip4_map_node.index);
158 from = vlib_frame_vector_args (frame);
159 n_left_from = frame->n_vectors;
160 next_index = node->cached_next_index;
161 map_main_t *mm = &map_main;
162 vlib_combined_counter_main_t *cm = mm->domain_counters;
163 u32 thread_index = vm->thread_index;
165 while (n_left_from > 0)
167 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
170 while (n_left_from >= 4 && n_left_to_next >= 2)
173 vlib_buffer_t *p0, *p1;
174 map_domain_t *d0, *d1;
175 u8 error0 = MAP_ERROR_NONE, error1 = MAP_ERROR_NONE;
176 ip4_header_t *ip40, *ip41;
177 u16 port0 = 0, port1 = 0;
178 ip6_header_t *ip6h0, *ip6h1;
179 u32 map_domain_index0 = ~0, map_domain_index1 = ~0;
180 u32 next0 = IP4_MAP_NEXT_IP6_LOOKUP, next1 =
181 IP4_MAP_NEXT_IP6_LOOKUP;
183 /* Prefetch next iteration. */
185 vlib_buffer_t *p2, *p3;
187 p2 = vlib_get_buffer (vm, from[2]);
188 p3 = vlib_get_buffer (vm, from[3]);
190 vlib_prefetch_buffer_header (p2, STORE);
191 vlib_prefetch_buffer_header (p3, STORE);
192 /* IPv4 + 8 = 28. possibly plus -40 */
193 CLIB_PREFETCH (p2->data - 40, 68, STORE);
194 CLIB_PREFETCH (p3->data - 40, 68, STORE);
197 pi0 = to_next[0] = from[0];
198 pi1 = to_next[1] = from[1];
204 p0 = vlib_get_buffer (vm, pi0);
205 p1 = vlib_get_buffer (vm, pi1);
206 ip40 = vlib_buffer_get_current (p0);
207 ip41 = vlib_buffer_get_current (p1);
209 ip4_map_get_domain (&ip40->dst_address, &map_domain_index0,
212 ip4_map_get_domain (&ip41->dst_address, &map_domain_index1,
216 * Shared IPv4 address
218 port0 = ip4_map_port_and_security_check (d0, p0, &error0);
219 port1 = ip4_map_port_and_security_check (d1, p1, &error1);
221 /* Decrement IPv4 TTL */
222 ip4_map_decrement_ttl (ip40, &error0);
223 ip4_map_decrement_ttl (ip41, &error1);
225 ip40->flags_and_fragment_offset &
226 clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT);
228 ip41->flags_and_fragment_offset &
229 clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT);
232 u32 da40 = clib_net_to_host_u32 (ip40->dst_address.as_u32);
233 u32 da41 = clib_net_to_host_u32 (ip41->dst_address.as_u32);
234 u16 dp40 = clib_net_to_host_u16 (port0);
235 u16 dp41 = clib_net_to_host_u16 (port1);
236 u64 dal60 = map_get_pfx (d0, da40, dp40);
237 u64 dal61 = map_get_pfx (d1, da41, dp41);
238 u64 dar60 = map_get_sfx (d0, da40, dp40);
239 u64 dar61 = map_get_sfx (d1, da41, dp41);
240 if (dal60 == 0 && dar60 == 0 && error0 == MAP_ERROR_NONE)
241 error0 = MAP_ERROR_NO_BINDING;
242 if (dal61 == 0 && dar61 == 0 && error1 == MAP_ERROR_NONE)
243 error1 = MAP_ERROR_NO_BINDING;
245 /* construct ipv6 header */
246 vlib_buffer_advance (p0, -sizeof (ip6_header_t));
247 vlib_buffer_advance (p1, -sizeof (ip6_header_t));
248 ip6h0 = vlib_buffer_get_current (p0);
249 ip6h1 = vlib_buffer_get_current (p1);
250 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
251 vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
253 ip6h0->ip_version_traffic_class_and_flow_label =
254 ip4_map_vtcfl (ip40, p0);
255 ip6h1->ip_version_traffic_class_and_flow_label =
256 ip4_map_vtcfl (ip41, p1);
257 ip6h0->payload_length = ip40->length;
258 ip6h1->payload_length = ip41->length;
259 ip6h0->protocol = IP_PROTOCOL_IP_IN_IP;
260 ip6h1->protocol = IP_PROTOCOL_IP_IN_IP;
261 ip6h0->hop_limit = 0x40;
262 ip6h1->hop_limit = 0x40;
263 ip6h0->src_address = d0->ip6_src;
264 ip6h1->src_address = d1->ip6_src;
265 ip6h0->dst_address.as_u64[0] = clib_host_to_net_u64 (dal60);
266 ip6h0->dst_address.as_u64[1] = clib_host_to_net_u64 (dar60);
267 ip6h1->dst_address.as_u64[0] = clib_host_to_net_u64 (dal61);
268 ip6h1->dst_address.as_u64[1] = clib_host_to_net_u64 (dar61);
271 * Determine next node. Can be one of:
272 * ip6-lookup, ip6-rewrite, ip4-fragment, error-drop
274 if (PREDICT_TRUE (error0 == MAP_ERROR_NONE))
278 && (clib_net_to_host_u16 (ip6h0->payload_length) +
279 sizeof (*ip6h0) > d0->mtu)))
281 next0 = ip4_map_fragment (p0, d0->mtu, df0, &error0);
286 ip4_map_ip6_lookup_bypass (p0,
288 IP4_MAP_NEXT_IP6_REWRITE : next0;
289 vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
291 map_domain_index0, 1,
293 (ip6h0->payload_length) +
299 next0 = IP4_MAP_NEXT_DROP;
303 * Determine next node. Can be one of:
304 * ip6-lookup, ip6-rewrite, ip4-fragment, error-drop
306 if (PREDICT_TRUE (error1 == MAP_ERROR_NONE))
310 && (clib_net_to_host_u16 (ip6h1->payload_length) +
311 sizeof (*ip6h1) > d1->mtu)))
313 next1 = ip4_map_fragment (p1, d1->mtu, df1, &error1);
318 ip4_map_ip6_lookup_bypass (p1,
320 IP4_MAP_NEXT_IP6_REWRITE : next1;
321 vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
323 map_domain_index1, 1,
325 (ip6h1->payload_length) +
331 next1 = IP4_MAP_NEXT_DROP;
334 if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
336 map_add_trace (vm, node, p0, map_domain_index0, port0);
338 if (PREDICT_FALSE (p1->flags & VLIB_BUFFER_IS_TRACED))
340 map_add_trace (vm, node, p1, map_domain_index1, port0);
343 p0->error = error_node->errors[error0];
344 p1->error = error_node->errors[error1];
346 vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
347 n_left_to_next, pi0, pi1, next0,
351 while (n_left_from > 0 && n_left_to_next > 0)
356 u8 error0 = MAP_ERROR_NONE;
360 u32 next0 = IP4_MAP_NEXT_IP6_LOOKUP;
361 u32 map_domain_index0 = ~0;
363 pi0 = to_next[0] = from[0];
369 p0 = vlib_get_buffer (vm, pi0);
370 ip40 = vlib_buffer_get_current (p0);
373 ip4_map_get_domain (&ip40->dst_address, &map_domain_index0,
376 { /* Guess it wasn't for us */
377 vnet_feature_next (&next0, p0);
382 * Shared IPv4 address
384 port0 = ip4_map_port_and_security_check (d0, p0, &error0);
386 /* Decrement IPv4 TTL */
387 ip4_map_decrement_ttl (ip40, &error0);
389 ip40->flags_and_fragment_offset &
390 clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT);
393 u32 da40 = clib_net_to_host_u32 (ip40->dst_address.as_u32);
394 u16 dp40 = clib_net_to_host_u16 (port0);
395 u64 dal60 = map_get_pfx (d0, da40, dp40);
396 u64 dar60 = map_get_sfx (d0, da40, dp40);
397 if (dal60 == 0 && dar60 == 0 && error0 == MAP_ERROR_NONE)
398 error0 = MAP_ERROR_NO_BINDING;
400 /* construct ipv6 header */
401 vlib_buffer_advance (p0, -(sizeof (ip6_header_t)));
402 ip6h0 = vlib_buffer_get_current (p0);
403 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
405 ip6h0->ip_version_traffic_class_and_flow_label =
406 ip4_map_vtcfl (ip40, p0);
407 ip6h0->payload_length = ip40->length;
408 ip6h0->protocol = IP_PROTOCOL_IP_IN_IP;
409 ip6h0->hop_limit = 0x40;
410 ip6h0->src_address = d0->ip6_src;
411 ip6h0->dst_address.as_u64[0] = clib_host_to_net_u64 (dal60);
412 ip6h0->dst_address.as_u64[1] = clib_host_to_net_u64 (dar60);
415 * Determine next node. Can be one of:
416 * ip6-lookup, ip6-rewrite, ip4-fragment, error-drop
418 if (PREDICT_TRUE (error0 == MAP_ERROR_NONE))
422 && (clib_net_to_host_u16 (ip6h0->payload_length) +
423 sizeof (*ip6h0) > d0->mtu)))
425 next0 = ip4_map_fragment (p0, d0->mtu, df0, &error0);
430 ip4_map_ip6_lookup_bypass (p0,
432 IP4_MAP_NEXT_IP6_REWRITE : next0;
433 vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
435 map_domain_index0, 1,
437 (ip6h0->payload_length) +
443 next0 = IP4_MAP_NEXT_DROP;
446 if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
448 map_add_trace (vm, node, p0, map_domain_index0, port0);
451 p0->error = error_node->errors[error0];
453 vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
454 n_left_to_next, pi0, next0);
456 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
459 return frame->n_vectors;
462 static char *map_error_strings[] = {
463 #define _(sym,string) string,
470 VNET_FEATURE_INIT (ip4_map_feature, static) =
472 .arc_name = "ip4-unicast",
473 .node_name = "ip4-map",
474 .runs_before = VNET_FEATURES ("ip4-flow-classify"),
475 .runs_after = VNET_FEATURES("ip4-sv-reassembly-feature"),
478 VLIB_REGISTER_NODE(ip4_map_node) = {
481 .vector_size = sizeof(u32),
482 .format_trace = format_map_trace,
483 .type = VLIB_NODE_TYPE_INTERNAL,
485 .n_errors = MAP_N_ERROR,
486 .error_strings = map_error_strings,
488 .n_next_nodes = IP4_MAP_N_NEXT,
490 [IP4_MAP_NEXT_IP6_LOOKUP] = "ip6-lookup",
491 #ifdef MAP_SKIP_IP6_LOOKUP
492 [IP4_MAP_NEXT_IP6_REWRITE] = "ip6-load-balance",
494 [IP4_MAP_NEXT_IP4_FRAGMENT] = "ip4-frag",
495 [IP4_MAP_NEXT_IP6_FRAGMENT] = "ip6-frag",
496 [IP4_MAP_NEXT_ICMP_ERROR] = "ip4-icmp-error",
497 [IP4_MAP_NEXT_DROP] = "error-drop",
503 * fd.io coding-style-patch-verification: ON
506 * eval: (c-set-style "gnu")