2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * ip/ip4_forward.c: IP v4 forwarding
18 * Copyright (c) 2008 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 /** for ethernet_header_t */
43 #include <vnet/ethernet/ethernet.h>
44 /** for ethernet_arp_header_t */
45 #include <vnet/ethernet/arp_packet.h>
46 #include <vnet/ppp/ppp.h>
47 /** for srp_hw_interface_class */
48 #include <vnet/srp/srp.h>
49 /** for API error numbers */
50 #include <vnet/api_errno.h>
56 /* This is really, really simple but stupid fib. */
58 ip4_fib_lookup_with_table (ip4_main_t * im, u32 fib_index,
60 u32 disable_default_route)
62 ip_lookup_main_t * lm = &im->lookup_main;
63 ip4_fib_t * fib = vec_elt_at_index (im->fibs, fib_index);
64 uword * p, * hash, key;
65 i32 i, i_min, dst_address, ai;
67 i_min = disable_default_route ? 1 : 0;
68 dst_address = clib_mem_unaligned (&dst->data_u32, u32);
69 for (i = ARRAY_LEN (fib->adj_index_by_dst_address) - 1; i >= i_min; i--)
71 hash = fib->adj_index_by_dst_address[i];
75 key = dst_address & im->fib_masks[i];
76 if ((p = hash_get (hash, key)) != 0)
83 /* Nothing matches in table. */
84 ai = lm->miss_adj_index;
90 /** @brief Create FIB from table ID and init all hashing.
91 @param im - @ref ip4_main_t
92 @param table_id - table ID
93 @return fib - @ref ip4_fib_t
96 create_fib_with_table_id (ip4_main_t * im, u32 table_id)
99 hash_set (im->fib_index_by_table_id, table_id, vec_len (im->fibs));
100 vec_add2 (im->fibs, fib, 1);
101 fib->table_id = table_id;
102 fib->index = fib - im->fibs;
103 /* IP_FLOW_HASH_DEFAULT is net value of 5 tuple flags without "reverse" bit */
104 fib->flow_hash_config = IP_FLOW_HASH_DEFAULT;
105 fib->fwd_classify_table_index = ~0;
106 fib->rev_classify_table_index = ~0;
107 ip4_mtrie_init (&fib->mtrie);
111 /** @brief Find existing or Create new FIB based on index
112 @param im @ref ip4_main_t
113 @param table_index_or_id - overloaded parameter referring
114 to the table or a table's index in the FIB vector
115 @param flags - used to check if table_index_or_id was a table or
116 an index (detected by @ref IP4_ROUTE_FLAG_FIB_INDEX)
117 @return either the existing or a new ip4_fib_t entry
120 find_ip4_fib_by_table_index_or_id (ip4_main_t * im,
121 u32 table_index_or_id, u32 flags)
123 uword * p, fib_index;
125 fib_index = table_index_or_id;
126 /* If this isn't a FIB_INDEX ... */
127 if (! (flags & IP4_ROUTE_FLAG_FIB_INDEX))
129 /* If passed ~0 then request the next table available */
130 if (table_index_or_id == ~0) {
131 table_index_or_id = 0;
132 while ((p = hash_get (im->fib_index_by_table_id, table_index_or_id))) {
135 /* Create the next table and return the ip4_fib_t associated with it */
136 return create_fib_with_table_id (im, table_index_or_id);
138 /* A specific table_id was requested.. */
139 p = hash_get (im->fib_index_by_table_id, table_index_or_id);
140 /* ... and if it doesn't exist create it else grab its index */
142 return create_fib_with_table_id (im, table_index_or_id);
145 /* Return the ip4_fib_t associated with this index */
146 return vec_elt_at_index (im->fibs, fib_index);
150 ip4_fib_init_adj_index_by_dst_address (ip_lookup_main_t * lm,
157 ASSERT (lm->fib_result_n_bytes >= sizeof (uword));
158 lm->fib_result_n_words = round_pow2 (lm->fib_result_n_bytes, sizeof (uword)) / sizeof (uword);
160 fib->adj_index_by_dst_address[address_length] =
161 hash_create (32 /* elts */, lm->fib_result_n_words * sizeof (uword));
163 hash_set_flags (fib->adj_index_by_dst_address[address_length],
164 HASH_FLAG_NO_AUTO_SHRINK);
166 h = hash_header (fib->adj_index_by_dst_address[address_length]);
167 max_index = (hash_value_bytes (h) / sizeof (fib->new_hash_values[0])) - 1;
169 /* Initialize new/old hash value vectors. */
170 vec_validate_init_empty (fib->new_hash_values, max_index, ~0);
171 vec_validate_init_empty (fib->old_hash_values, max_index, ~0);
175 ip4_fib_set_adj_index (ip4_main_t * im,
179 u32 dst_address_length,
182 ip_lookup_main_t * lm = &im->lookup_main;
185 if (vec_bytes(fib->old_hash_values))
186 memset (fib->old_hash_values, ~0, vec_bytes (fib->old_hash_values));
187 if (vec_bytes(fib->new_hash_values))
188 memset (fib->new_hash_values, ~0, vec_bytes (fib->new_hash_values));
189 fib->new_hash_values[0] = adj_index;
191 /* Make sure adj index is valid. */
193 (void) ip_get_adjacency (lm, adj_index);
195 hash = fib->adj_index_by_dst_address[dst_address_length];
197 hash = _hash_set3 (hash, dst_address_u32,
198 fib->new_hash_values,
199 fib->old_hash_values);
201 fib->adj_index_by_dst_address[dst_address_length] = hash;
203 if (vec_len (im->add_del_route_callbacks) > 0)
205 ip4_add_del_route_callback_t * cb;
209 d.data_u32 = dst_address_u32;
210 vec_foreach (cb, im->add_del_route_callbacks)
211 if ((flags & cb->required_flags) == cb->required_flags)
212 cb->function (im, cb->function_opaque,
214 &d, dst_address_length,
215 fib->old_hash_values,
216 fib->new_hash_values);
218 p = hash_get (hash, dst_address_u32);
219 /* hash_get should never return NULL here */
221 clib_memcpy (p, fib->new_hash_values,
222 vec_bytes (fib->new_hash_values));
228 void ip4_add_del_route (ip4_main_t * im, ip4_add_del_route_args_t * a)
230 ip_lookup_main_t * lm = &im->lookup_main;
232 u32 dst_address, dst_address_length, adj_index, old_adj_index;
233 uword * hash, is_del;
234 ip4_add_del_route_callback_t * cb;
236 /* Either create new adjacency or use given one depending on arguments. */
237 if (a->n_add_adj > 0)
239 ip_add_adjacency (lm, a->add_adj, a->n_add_adj, &adj_index);
240 ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 0);
243 adj_index = a->adj_index;
245 dst_address = a->dst_address.data_u32;
246 dst_address_length = a->dst_address_length;
247 fib = find_ip4_fib_by_table_index_or_id (im, a->table_index_or_table_id, a->flags);
249 ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
250 dst_address &= im->fib_masks[dst_address_length];
252 if (! fib->adj_index_by_dst_address[dst_address_length])
253 ip4_fib_init_adj_index_by_dst_address (lm, fib, dst_address_length);
255 hash = fib->adj_index_by_dst_address[dst_address_length];
257 is_del = (a->flags & IP4_ROUTE_FLAG_DEL) != 0;
261 fib->old_hash_values[0] = ~0;
262 hash = _hash_unset (hash, dst_address, fib->old_hash_values);
263 fib->adj_index_by_dst_address[dst_address_length] = hash;
265 if (vec_len (im->add_del_route_callbacks) > 0
266 && fib->old_hash_values[0] != ~0) /* make sure destination was found in hash */
268 fib->new_hash_values[0] = ~0;
269 vec_foreach (cb, im->add_del_route_callbacks)
270 if ((a->flags & cb->required_flags) == cb->required_flags)
271 cb->function (im, cb->function_opaque,
273 &a->dst_address, dst_address_length,
274 fib->old_hash_values,
275 fib->new_hash_values);
279 ip4_fib_set_adj_index (im, fib, a->flags, dst_address, dst_address_length,
282 old_adj_index = fib->old_hash_values[0];
284 /* Avoid spurious reference count increments */
285 if (old_adj_index == adj_index
287 && !(a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY))
289 ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index);
290 if (adj->share_count > 0)
294 ip4_fib_mtrie_add_del_route (fib, a->dst_address, dst_address_length,
295 is_del ? old_adj_index : adj_index,
298 /* Delete old adjacency index if present and changed. */
299 if (! (a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY)
300 && old_adj_index != ~0
301 && old_adj_index != adj_index)
302 ip_del_adjacency (lm, old_adj_index);
307 ip4_route_get_next_hop_adj (ip4_main_t * im,
309 ip4_address_t *next_hop,
310 u32 next_hop_sw_if_index,
311 u32 explicit_fib_index)
313 ip_lookup_main_t * lm = &im->lookup_main;
314 vnet_main_t * vnm = vnet_get_main();
315 uword * nh_hash, * nh_result;
316 int is_interface_next_hop;
320 fib = vec_elt_at_index (im->fibs, fib_index);
322 is_interface_next_hop = next_hop->data_u32 == 0;
323 if (is_interface_next_hop)
325 nh_result = hash_get (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index);
327 nh_adj_index = *nh_result;
330 ip_adjacency_t * adj;
331 adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
333 ip4_adjacency_set_interface_route (vnm, adj, next_hop_sw_if_index, /* if_address_index */ ~0);
334 ip_call_add_del_adjacency_callbacks (lm, nh_adj_index, /* is_del */ 0);
335 hash_set (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index, nh_adj_index);
338 else if (next_hop_sw_if_index == ~0)
340 /* next-hop is recursive. we always need a indirect adj
341 * for recursive paths. Any LPM we perform now will give
342 * us a valid adj, but without tracking the next-hop we
343 * have no way to keep it valid.
345 ip_adjacency_t add_adj;
346 memset (&add_adj, 0, sizeof(add_adj));
348 add_adj.lookup_next_index = IP_LOOKUP_NEXT_INDIRECT;
349 add_adj.indirect.next_hop.ip4.as_u32 = next_hop->as_u32;
350 add_adj.explicit_fib_index = explicit_fib_index;
351 ip_add_adjacency (lm, &add_adj, 1, &nh_adj_index);
355 nh_hash = fib->adj_index_by_dst_address[32];
356 nh_result = hash_get (nh_hash, next_hop->data_u32);
358 /* Next hop must be known. */
361 ip_adjacency_t * adj;
363 /* no /32 exists, get the longest prefix match */
364 nh_adj_index = ip4_fib_lookup_with_table (im, fib_index,
366 adj = ip_get_adjacency (lm, nh_adj_index);
367 /* if ARP interface adjacency is present, we need to
368 install ARP adjaceny for specific next hop */
369 if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP &&
370 adj->arp.next_hop.ip4.as_u32 == 0)
372 nh_adj_index = vnet_arp_glean_add(fib_index, next_hop);
377 nh_adj_index = *nh_result;
381 return (nh_adj_index);
385 ip4_add_del_route_next_hop (ip4_main_t * im,
387 ip4_address_t * dst_address,
388 u32 dst_address_length,
389 ip4_address_t * next_hop,
390 u32 next_hop_sw_if_index,
391 u32 next_hop_weight, u32 adj_index,
392 u32 explicit_fib_index)
394 vnet_main_t * vnm = vnet_get_main();
395 ip_lookup_main_t * lm = &im->lookup_main;
398 u32 dst_address_u32, old_mp_adj_index, new_mp_adj_index;
399 u32 dst_adj_index, nh_adj_index;
400 uword * dst_hash, * dst_result;
401 ip_adjacency_t * dst_adj;
402 ip_multipath_adjacency_t * old_mp, * new_mp;
403 int is_del = (flags & IP4_ROUTE_FLAG_DEL) != 0;
404 clib_error_t * error = 0;
406 if (explicit_fib_index == (u32)~0)
407 fib_index = vec_elt (im->fib_index_by_sw_if_index, next_hop_sw_if_index);
409 fib_index = explicit_fib_index;
411 fib = vec_elt_at_index (im->fibs, fib_index);
413 /* Lookup next hop to be added or deleted. */
414 if (adj_index == (u32)~0)
416 nh_adj_index = ip4_route_get_next_hop_adj(im, fib_index,
418 next_hop_sw_if_index,
423 nh_adj_index = adj_index;
425 ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
426 dst_address_u32 = dst_address->data_u32 & im->fib_masks[dst_address_length];
428 dst_hash = fib->adj_index_by_dst_address[dst_address_length];
429 dst_result = hash_get (dst_hash, dst_address_u32);
432 dst_adj_index = dst_result[0];
433 dst_adj = ip_get_adjacency (lm, dst_adj_index);
437 /* For deletes destination must be known. */
440 vnm->api_errno = VNET_API_ERROR_UNKNOWN_DESTINATION;
441 error = clib_error_return (0, "unknown destination %U/%d",
442 format_ip4_address, dst_address,
451 /* Ignore adds of X/32 with next hop of X. */
453 && dst_address_length == 32
454 && dst_address->data_u32 == next_hop->data_u32
455 && adj_index != (u32)~0)
457 vnm->api_errno = VNET_API_ERROR_PREFIX_MATCHES_NEXT_HOP;
458 error = clib_error_return (0, "prefix matches next hop %U/%d",
459 format_ip4_address, dst_address,
464 /* Destination is not known and default weight is set so add route
465 to existing non-multipath adjacency */
466 if (dst_adj_index == ~0 && next_hop_weight == 1 && next_hop_sw_if_index == ~0)
468 /* create / delete additional mapping of existing adjacency */
469 ip4_add_del_route_args_t a;
471 a.table_index_or_table_id = fib_index;
472 a.flags = ((is_del ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
473 | IP4_ROUTE_FLAG_FIB_INDEX
474 | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
475 | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE
476 | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
477 a.dst_address = dst_address[0];
478 a.dst_address_length = dst_address_length;
479 a.adj_index = nh_adj_index;
483 ip4_add_del_route (im, &a);
487 old_mp_adj_index = dst_adj ? dst_adj->heap_handle : ~0;
489 if (! ip_multipath_adjacency_add_del_next_hop
496 vnm->api_errno = VNET_API_ERROR_NEXT_HOP_NOT_FOUND_MP;
497 error = clib_error_return (0, "requested deleting next-hop %U not found in multi-path",
498 format_ip4_address, next_hop);
503 if (old_mp_adj_index != ~0)
504 old_mp = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index);
505 if (new_mp_adj_index != ~0)
506 new_mp = vec_elt_at_index (lm->multipath_adjacencies, new_mp_adj_index);
508 if (old_mp != new_mp)
510 ip4_add_del_route_args_t a;
511 ip_adjacency_t * adj;
513 a.table_index_or_table_id = fib_index;
514 a.flags = ((is_del && ! new_mp ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
515 | IP4_ROUTE_FLAG_FIB_INDEX
516 | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
517 | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
518 a.dst_address = dst_address[0];
519 a.dst_address_length = dst_address_length;
520 a.adj_index = new_mp ? new_mp->adj_index : dst_adj_index;
524 ip4_add_del_route (im, &a);
526 adj = ip_get_adjacency (lm, new_mp ? new_mp->adj_index : dst_adj_index);
528 adj->share_count += is_del ? -1 : 1;
533 clib_error_report (error);
537 ip4_get_route (ip4_main_t * im,
538 u32 table_index_or_table_id,
543 ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
544 u32 dst_address = * (u32 *) address;
547 ASSERT (address_length < ARRAY_LEN (im->fib_masks));
548 dst_address &= im->fib_masks[address_length];
550 hash = fib->adj_index_by_dst_address[address_length];
551 p = hash_get (hash, dst_address);
556 ip4_foreach_matching_route (ip4_main_t * im,
557 u32 table_index_or_table_id,
559 ip4_address_t * address,
561 ip4_address_t ** results,
562 u8 ** result_lengths)
564 ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
565 u32 dst_address = address->data_u32;
566 u32 this_length = address_length;
569 _vec_len (*results) = 0;
571 _vec_len (*result_lengths) = 0;
573 while (this_length <= 32 && vec_len (results) == 0)
576 hash_foreach (k, v, fib->adj_index_by_dst_address[this_length], ({
577 if (0 == ((k ^ dst_address) & im->fib_masks[address_length]))
581 vec_add1 (*results, a);
582 vec_add1 (*result_lengths, this_length);
590 void ip4_maybe_remap_adjacencies (ip4_main_t * im,
591 u32 table_index_or_table_id,
594 ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
595 ip_lookup_main_t * lm = &im->lookup_main;
598 ip4_add_del_route_callback_t * cb;
599 static ip4_address_t * to_delete;
601 if (lm->n_adjacency_remaps == 0)
604 for (l = 0; l <= 32; l++)
607 uword * hash = fib->adj_index_by_dst_address[l];
609 if (hash_elts (hash) == 0)
613 _vec_len (to_delete) = 0;
615 hash_foreach_pair (p, hash, ({
616 u32 adj_index = p->value[0];
617 u32 m = vec_elt (lm->adjacency_remap_table, adj_index);
621 /* Record destination address from hash key. */
624 /* New adjacency points to nothing: so delete prefix. */
626 vec_add1 (to_delete, a);
629 /* Remap to new adjacency. */
630 clib_memcpy (fib->old_hash_values, p->value, vec_bytes (fib->old_hash_values));
632 /* Set new adjacency value. */
633 fib->new_hash_values[0] = p->value[0] = m - 1;
635 vec_foreach (cb, im->add_del_route_callbacks)
636 if ((flags & cb->required_flags) == cb->required_flags)
637 cb->function (im, cb->function_opaque,
638 fib, flags | IP4_ROUTE_FLAG_ADD,
640 fib->old_hash_values,
641 fib->new_hash_values);
646 fib->new_hash_values[0] = ~0;
647 for (i = 0; i < vec_len (to_delete); i++)
649 hash = _hash_unset (hash, to_delete[i].data_u32, fib->old_hash_values);
650 vec_foreach (cb, im->add_del_route_callbacks)
651 if ((flags & cb->required_flags) == cb->required_flags)
652 cb->function (im, cb->function_opaque,
653 fib, flags | IP4_ROUTE_FLAG_DEL,
655 fib->old_hash_values,
656 fib->new_hash_values);
660 /* Also remap adjacencies in mtrie. */
661 ip4_mtrie_maybe_remap_adjacencies (lm, &fib->mtrie);
663 /* Reset mapping table. */
664 vec_zero (lm->adjacency_remap_table);
666 /* All remaps have been performed. */
667 lm->n_adjacency_remaps = 0;
670 void ip4_delete_matching_routes (ip4_main_t * im,
671 u32 table_index_or_table_id,
673 ip4_address_t * address,
676 static ip4_address_t * matching_addresses;
677 static u8 * matching_address_lengths;
679 ip4_add_del_route_args_t a;
681 a.flags = IP4_ROUTE_FLAG_DEL | IP4_ROUTE_FLAG_NO_REDISTRIBUTE | flags;
682 a.table_index_or_table_id = table_index_or_table_id;
687 for (l = address_length + 1; l <= 32; l++)
689 ip4_foreach_matching_route (im, table_index_or_table_id, flags,
693 &matching_address_lengths);
694 for (i = 0; i < vec_len (matching_addresses); i++)
696 a.dst_address = matching_addresses[i];
697 a.dst_address_length = matching_address_lengths[i];
698 ip4_add_del_route (im, &a);
702 ip4_maybe_remap_adjacencies (im, table_index_or_table_id, flags);
706 ip4_forward_next_trace (vlib_main_t * vm,
707 vlib_node_runtime_t * node,
708 vlib_frame_t * frame,
709 vlib_rx_or_tx_t which_adj_index);
712 ip4_lookup_inline (vlib_main_t * vm,
713 vlib_node_runtime_t * node,
714 vlib_frame_t * frame,
715 int lookup_for_responses_to_locally_received_packets,
718 ip4_main_t * im = &ip4_main;
719 ip_lookup_main_t * lm = &im->lookup_main;
720 vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
721 u32 n_left_from, n_left_to_next, * from, * to_next;
722 ip_lookup_next_t next;
723 u32 cpu_index = os_get_cpu_number();
725 from = vlib_frame_vector_args (frame);
726 n_left_from = frame->n_vectors;
727 next = node->cached_next_index;
729 while (n_left_from > 0)
731 vlib_get_next_frame (vm, node, next,
732 to_next, n_left_to_next);
734 while (n_left_from >= 4 && n_left_to_next >= 2)
736 vlib_buffer_t * p0, * p1;
737 ip4_header_t * ip0, * ip1;
738 __attribute__((unused)) tcp_header_t * tcp0, * tcp1;
739 ip_lookup_next_t next0, next1;
740 ip_adjacency_t * adj0, * adj1;
741 ip4_fib_mtrie_t * mtrie0, * mtrie1;
742 ip4_fib_mtrie_leaf_t leaf0, leaf1;
743 ip4_address_t * dst_addr0, *dst_addr1;
744 __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
745 __attribute__((unused)) u32 pi1, fib_index1, adj_index1, is_tcp_udp1;
746 u32 flow_hash_config0, flow_hash_config1;
747 u32 hash_c0, hash_c1;
750 /* Prefetch next iteration. */
752 vlib_buffer_t * p2, * p3;
754 p2 = vlib_get_buffer (vm, from[2]);
755 p3 = vlib_get_buffer (vm, from[3]);
757 vlib_prefetch_buffer_header (p2, LOAD);
758 vlib_prefetch_buffer_header (p3, LOAD);
760 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
761 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
764 pi0 = to_next[0] = from[0];
765 pi1 = to_next[1] = from[1];
767 p0 = vlib_get_buffer (vm, pi0);
768 p1 = vlib_get_buffer (vm, pi1);
770 ip0 = vlib_buffer_get_current (p0);
771 ip1 = vlib_buffer_get_current (p1);
775 ip_adjacency_t * iadj0, * iadj1;
776 iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
777 iadj1 = ip_get_adjacency (lm, vnet_buffer(p1)->ip.adj_index[VLIB_TX]);
778 dst_addr0 = &iadj0->indirect.next_hop.ip4;
779 dst_addr1 = &iadj1->indirect.next_hop.ip4;
783 dst_addr0 = &ip0->dst_address;
784 dst_addr1 = &ip1->dst_address;
787 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
788 fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
789 fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
790 fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
791 fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
792 fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
795 if (! lookup_for_responses_to_locally_received_packets)
797 mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
798 mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
800 leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
802 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
803 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
806 tcp0 = (void *) (ip0 + 1);
807 tcp1 = (void *) (ip1 + 1);
809 is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
810 || ip0->protocol == IP_PROTOCOL_UDP);
811 is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
812 || ip1->protocol == IP_PROTOCOL_UDP);
814 if (! lookup_for_responses_to_locally_received_packets)
816 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
817 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
820 if (! lookup_for_responses_to_locally_received_packets)
822 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
823 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
826 if (! lookup_for_responses_to_locally_received_packets)
828 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
829 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
832 if (lookup_for_responses_to_locally_received_packets)
834 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
835 adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
839 /* Handle default route. */
840 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
841 leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
843 adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
844 adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
847 ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
849 /* no_default_route */ 0));
850 ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
852 /* no_default_route */ 0));
853 adj0 = ip_get_adjacency (lm, adj_index0);
854 adj1 = ip_get_adjacency (lm, adj_index1);
856 next0 = adj0->lookup_next_index;
857 next1 = adj1->lookup_next_index;
859 /* Use flow hash to compute multipath adjacency. */
860 hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
861 hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
862 if (PREDICT_FALSE (adj0->n_adj > 1))
865 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
866 hash_c0 = vnet_buffer (p0)->ip.flow_hash =
867 ip4_compute_flow_hash (ip0, flow_hash_config0);
869 if (PREDICT_FALSE(adj1->n_adj > 1))
872 vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
873 hash_c1 = vnet_buffer (p1)->ip.flow_hash =
874 ip4_compute_flow_hash (ip1, flow_hash_config1);
877 ASSERT (adj0->n_adj > 0);
878 ASSERT (adj1->n_adj > 0);
879 ASSERT (is_pow2 (adj0->n_adj));
880 ASSERT (is_pow2 (adj1->n_adj));
881 adj_index0 += (hash_c0 & (adj0->n_adj - 1));
882 adj_index1 += (hash_c1 & (adj1->n_adj - 1));
884 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
885 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
889 /* ARP for next-hop not packet's destination address */
890 if (adj0->lookup_next_index == IP_LOOKUP_NEXT_ARP)
891 ip0->dst_address.as_u32 = dst_addr0->as_u32;
892 if (adj1->lookup_next_index == IP_LOOKUP_NEXT_ARP)
893 ip1->dst_address.as_u32 = dst_addr1->as_u32;
896 vlib_increment_combined_counter
897 (cm, cpu_index, adj_index0, 1,
898 vlib_buffer_length_in_chain (vm, p0)
899 + sizeof(ethernet_header_t));
900 vlib_increment_combined_counter
901 (cm, cpu_index, adj_index1, 1,
902 vlib_buffer_length_in_chain (vm, p1)
903 + sizeof(ethernet_header_t));
910 wrong_next = (next0 != next) + 2*(next1 != next);
911 if (PREDICT_FALSE (wrong_next != 0))
920 vlib_set_next_frame_buffer (vm, node, next0, pi0);
927 vlib_set_next_frame_buffer (vm, node, next1, pi1);
934 vlib_set_next_frame_buffer (vm, node, next0, pi0);
935 vlib_set_next_frame_buffer (vm, node, next1, pi1);
939 vlib_put_next_frame (vm, node, next, n_left_to_next);
941 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
947 while (n_left_from > 0 && n_left_to_next > 0)
951 __attribute__((unused)) tcp_header_t * tcp0;
952 ip_lookup_next_t next0;
953 ip_adjacency_t * adj0;
954 ip4_fib_mtrie_t * mtrie0;
955 ip4_fib_mtrie_leaf_t leaf0;
956 ip4_address_t * dst_addr0;
957 __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
958 u32 flow_hash_config0, hash_c0;
963 p0 = vlib_get_buffer (vm, pi0);
965 ip0 = vlib_buffer_get_current (p0);
969 ip_adjacency_t * iadj0;
970 iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
971 dst_addr0 = &iadj0->indirect.next_hop.ip4;
975 dst_addr0 = &ip0->dst_address;
978 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
979 fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
980 fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
982 if (! lookup_for_responses_to_locally_received_packets)
984 mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
986 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
988 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
991 tcp0 = (void *) (ip0 + 1);
993 is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
994 || ip0->protocol == IP_PROTOCOL_UDP);
996 if (! lookup_for_responses_to_locally_received_packets)
997 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
999 if (! lookup_for_responses_to_locally_received_packets)
1000 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
1002 if (! lookup_for_responses_to_locally_received_packets)
1003 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
1005 if (lookup_for_responses_to_locally_received_packets)
1006 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
1009 /* Handle default route. */
1010 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1011 adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1014 ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
1016 /* no_default_route */ 0));
1018 adj0 = ip_get_adjacency (lm, adj_index0);
1020 next0 = adj0->lookup_next_index;
1022 /* Use flow hash to compute multipath adjacency. */
1023 hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
1024 if (PREDICT_FALSE(adj0->n_adj > 1))
1027 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
1029 hash_c0 = vnet_buffer (p0)->ip.flow_hash =
1030 ip4_compute_flow_hash (ip0, flow_hash_config0);
1033 ASSERT (adj0->n_adj > 0);
1034 ASSERT (is_pow2 (adj0->n_adj));
1035 adj_index0 += (hash_c0 & (adj0->n_adj - 1));
1037 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
1041 /* ARP for next-hop not packet's destination address */
1042 if (adj0->lookup_next_index == IP_LOOKUP_NEXT_ARP)
1043 ip0->dst_address.as_u32 = dst_addr0->as_u32;
1046 vlib_increment_combined_counter
1047 (cm, cpu_index, adj_index0, 1,
1048 vlib_buffer_length_in_chain (vm, p0)
1049 + sizeof(ethernet_header_t));
1053 n_left_to_next -= 1;
1056 if (PREDICT_FALSE (next0 != next))
1058 n_left_to_next += 1;
1059 vlib_put_next_frame (vm, node, next, n_left_to_next);
1061 vlib_get_next_frame (vm, node, next,
1062 to_next, n_left_to_next);
1065 n_left_to_next -= 1;
1069 vlib_put_next_frame (vm, node, next, n_left_to_next);
1072 if (node->flags & VLIB_NODE_FLAG_TRACE)
1073 ip4_forward_next_trace(vm, node, frame, VLIB_TX);
1075 return frame->n_vectors;
1078 /** @brief IPv4 lookup node.
1081 This is the main IPv4 lookup dispatch node.
1083 @param vm vlib_main_t corresponding to the current thread
1084 @param node vlib_node_runtime_t
1085 @param frame vlib_frame_t whose contents should be dispatched
1087 @par Graph mechanics: buffer metadata, next index usage
1090 - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
1091 - Indicates the @c sw_if_index value of the interface that the
1092 packet was received on.
1093 - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
1094 - When the value is @c ~0 then the node performs a longest prefix
1095 match (LPM) for the packet destination address in the FIB attached
1096 to the receive interface.
1097 - Otherwise perform LPM for the packet destination address in the
1098 indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
1099 value (0, 1, ...) and not a VRF id.
1102 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
1103 - The lookup result adjacency index.
1105 <em>Next Index:</em>
1106 - Dispatches the packet to the node index found in
1107 ip_adjacency_t @c adj->lookup_next_index
1108 (where @c adj is the lookup result adjacency).
1111 ip4_lookup (vlib_main_t * vm,
1112 vlib_node_runtime_t * node,
1113 vlib_frame_t * frame)
1115 return ip4_lookup_inline (vm, node, frame,
1116 /* lookup_for_responses_to_locally_received_packets */ 0,
1117 /* is_indirect */ 0);
1121 void ip4_adjacency_set_interface_route (vnet_main_t * vnm,
1122 ip_adjacency_t * adj,
1124 u32 if_address_index)
1126 vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
1128 vnet_l3_packet_type_t packet_type;
1131 if (hw->hw_class_index == ethernet_hw_interface_class.index
1132 || hw->hw_class_index == srp_hw_interface_class.index)
1135 * We have a bit of a problem in this case. ip4-arp uses
1136 * the rewrite_header.next_index to hand pkts to the
1137 * indicated inteface output node. We can end up in
1138 * ip4_rewrite_local, too, which also pays attention to
1139 * rewrite_header.next index. Net result: a hack in
1140 * ip4_rewrite_local...
1142 n = IP_LOOKUP_NEXT_ARP;
1143 node_index = ip4_arp_node.index;
1144 adj->if_address_index = if_address_index;
1145 adj->arp.next_hop.ip4.as_u32 = 0;
1146 ip46_address_reset(&adj->arp.next_hop);
1147 packet_type = VNET_L3_PACKET_TYPE_ARP;
1151 n = IP_LOOKUP_NEXT_REWRITE;
1152 node_index = ip4_rewrite_node.index;
1153 packet_type = VNET_L3_PACKET_TYPE_IP4;
1156 adj->lookup_next_index = n;
1157 vnet_rewrite_for_sw_interface
1162 VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST,
1163 &adj->rewrite_header,
1164 sizeof (adj->rewrite_data));
1168 ip4_add_interface_routes (u32 sw_if_index,
1169 ip4_main_t * im, u32 fib_index,
1170 ip_interface_address_t * a)
1172 vnet_main_t * vnm = vnet_get_main();
1173 ip_lookup_main_t * lm = &im->lookup_main;
1174 ip_adjacency_t * adj;
1175 ip4_address_t * address = ip_interface_address_get_address (lm, a);
1176 ip4_add_del_route_args_t x;
1177 vnet_hw_interface_t * hw_if = vnet_get_sup_hw_interface (vnm, sw_if_index);
1178 u32 classify_table_index;
1180 /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1181 x.table_index_or_table_id = fib_index;
1182 x.flags = (IP4_ROUTE_FLAG_ADD
1183 | IP4_ROUTE_FLAG_FIB_INDEX
1184 | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1185 x.dst_address = address[0];
1186 x.dst_address_length = a->address_length;
1190 a->neighbor_probe_adj_index = ~0;
1191 if (a->address_length < 32)
1193 adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1195 ip4_adjacency_set_interface_route (vnm, adj, sw_if_index, a - lm->if_address_pool);
1196 ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1197 ip4_add_del_route (im, &x);
1198 a->neighbor_probe_adj_index = x.adj_index;
1201 /* Add e.g. 1.1.1.1/32 as local to this host. */
1202 adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1205 classify_table_index = ~0;
1206 if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
1207 classify_table_index = lm->classify_table_index_by_sw_if_index [sw_if_index];
1208 if (classify_table_index != (u32) ~0)
1210 adj->lookup_next_index = IP_LOOKUP_NEXT_CLASSIFY;
1211 adj->classify.table_index = classify_table_index;
1214 adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
1216 adj->if_address_index = a - lm->if_address_pool;
1217 adj->rewrite_header.sw_if_index = sw_if_index;
1218 adj->rewrite_header.max_l3_packet_bytes = hw_if->max_l3_packet_bytes[VLIB_RX];
1220 * Local adjs are never to be rewritten. Spoofed pkts w/ src = dst = local
1221 * fail an RPF-ish check, but still go thru the rewrite code...
1223 adj->rewrite_header.data_bytes = 0;
1225 ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1226 x.dst_address_length = 32;
1227 ip4_add_del_route (im, &x);
1231 ip4_del_interface_routes (ip4_main_t * im, u32 fib_index, ip4_address_t * address, u32 address_length)
1233 ip4_add_del_route_args_t x;
1235 /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1236 x.table_index_or_table_id = fib_index;
1237 x.flags = (IP4_ROUTE_FLAG_DEL
1238 | IP4_ROUTE_FLAG_FIB_INDEX
1239 | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1240 x.dst_address = address[0];
1241 x.dst_address_length = address_length;
1246 if (address_length < 32)
1247 ip4_add_del_route (im, &x);
1249 x.dst_address_length = 32;
1250 ip4_add_del_route (im, &x);
1252 ip4_delete_matching_routes (im,
1254 IP4_ROUTE_FLAG_FIB_INDEX,
1261 ip4_address_t address;
1263 } ip4_interface_address_t;
1265 static clib_error_t *
1266 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1268 ip4_address_t * new_address,
1274 static clib_error_t *
1275 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1277 ip4_address_t * address,
1283 vnet_main_t * vnm = vnet_get_main();
1284 ip4_main_t * im = &ip4_main;
1285 ip_lookup_main_t * lm = &im->lookup_main;
1286 clib_error_t * error = 0;
1287 u32 if_address_index, elts_before;
1288 ip4_address_fib_t ip4_af, * addr_fib = 0;
1290 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1291 ip4_addr_fib_init (&ip4_af, address,
1292 vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
1293 vec_add1 (addr_fib, ip4_af);
1295 /* When adding an address check that it does not conflict with an existing address. */
1298 ip_interface_address_t * ia;
1299 foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
1300 0 /* honor unnumbered */,
1302 ip4_address_t * x = ip_interface_address_get_address (&im->lookup_main, ia);
1304 if (ip4_destination_matches_route (im, address, x, ia->address_length)
1305 || ip4_destination_matches_route (im, x, address, address_length))
1306 return clib_error_create ("failed to add %U which conflicts with %U for interface %U",
1307 format_ip4_address_and_length, address, address_length,
1308 format_ip4_address_and_length, x, ia->address_length,
1309 format_vnet_sw_if_index_name, vnm, sw_if_index);
1313 elts_before = pool_elts (lm->if_address_pool);
1315 error = ip_interface_address_add_del
1325 if (vnet_sw_interface_is_admin_up (vnm, sw_if_index) && insert_routes)
1328 ip4_del_interface_routes (im, ip4_af.fib_index, address,
1332 ip4_add_interface_routes (sw_if_index,
1333 im, ip4_af.fib_index,
1335 (lm->if_address_pool, if_address_index));
1338 /* If pool did not grow/shrink: add duplicate address. */
1339 if (elts_before != pool_elts (lm->if_address_pool))
1341 ip4_add_del_interface_address_callback_t * cb;
1342 vec_foreach (cb, im->add_del_interface_address_callbacks)
1343 cb->function (im, cb->function_opaque, sw_if_index,
1344 address, address_length,
1350 vec_free (addr_fib);
1355 ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
1356 ip4_address_t * address, u32 address_length,
1359 return ip4_add_del_interface_address_internal
1360 (vm, sw_if_index, address, address_length,
1361 /* redistribute */ 1,
1362 /* insert_routes */ 1,
1366 static clib_error_t *
1367 ip4_sw_interface_admin_up_down (vnet_main_t * vnm,
1371 ip4_main_t * im = &ip4_main;
1372 ip_interface_address_t * ia;
1374 u32 is_admin_up, fib_index;
1376 /* Fill in lookup tables with default table (0). */
1377 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1379 vec_validate_init_empty (im->lookup_main.if_address_pool_index_by_sw_if_index, sw_if_index, ~0);
1381 is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
1383 fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
1385 foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
1386 0 /* honor unnumbered */,
1388 a = ip_interface_address_get_address (&im->lookup_main, ia);
1390 ip4_add_interface_routes (sw_if_index,
1394 ip4_del_interface_routes (im, fib_index,
1395 a, ia->address_length);
1401 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
1403 /* Built-in ip4 unicast rx feature path definition */
1404 VNET_IP4_UNICAST_FEATURE_INIT (ip4_inacl, static) = {
1405 .node_name = "ip4-inacl",
1406 .runs_before = ORDER_CONSTRAINTS {"ip4-source-check-via-rx", 0},
1407 .feature_index = &ip4_main.ip4_unicast_rx_feature_check_access,
1410 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_1, static) = {
1411 .node_name = "ip4-source-check-via-rx",
1412 .runs_before = ORDER_CONSTRAINTS {"ip4-source-check-via-any", 0},
1414 &ip4_main.ip4_unicast_rx_feature_source_reachable_via_rx,
1417 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_2, static) = {
1418 .node_name = "ip4-source-check-via-any",
1419 .runs_before = ORDER_CONSTRAINTS {"ip4-policer-classify", 0},
1421 &ip4_main.ip4_unicast_rx_feature_source_reachable_via_any,
1424 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) = {
1425 .node_name = "ip4-source-and-port-range-check-rx",
1426 .runs_before = ORDER_CONSTRAINTS {"ip4-policer-classify", 0},
1428 &ip4_main.ip4_unicast_rx_feature_source_and_port_range_check,
1431 VNET_IP4_UNICAST_FEATURE_INIT (ip4_policer_classify, static) = {
1432 .node_name = "ip4-policer-classify",
1433 .runs_before = ORDER_CONSTRAINTS {"ipsec-input-ip4", 0},
1435 &ip4_main.ip4_unicast_rx_feature_policer_classify,
1438 VNET_IP4_UNICAST_FEATURE_INIT (ip4_ipsec, static) = {
1439 .node_name = "ipsec-input-ip4",
1440 .runs_before = ORDER_CONSTRAINTS {"vpath-input-ip4", 0},
1441 .feature_index = &ip4_main.ip4_unicast_rx_feature_ipsec,
1444 VNET_IP4_UNICAST_FEATURE_INIT (ip4_vpath, static) = {
1445 .node_name = "vpath-input-ip4",
1446 .runs_before = ORDER_CONSTRAINTS {"ip4-lookup", 0},
1447 .feature_index = &ip4_main.ip4_unicast_rx_feature_vpath,
1450 VNET_IP4_UNICAST_FEATURE_INIT (ip4_lookup, static) = {
1451 .node_name = "ip4-lookup",
1452 .runs_before = 0, /* not before any other features */
1453 .feature_index = &ip4_main.ip4_unicast_rx_feature_lookup,
1456 /* Built-in ip4 multicast rx feature path definition */
1457 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_vpath_mc, static) = {
1458 .node_name = "vpath-input-ip4",
1459 .runs_before = ORDER_CONSTRAINTS {"ip4-lookup-multicast", 0},
1460 .feature_index = &ip4_main.ip4_multicast_rx_feature_vpath,
1463 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_lookup_mc, static) = {
1464 .node_name = "ip4-lookup-multicast",
1465 .runs_before = 0, /* not before any other features */
1466 .feature_index = &ip4_main.ip4_multicast_rx_feature_lookup,
1469 static char * rx_feature_start_nodes[] =
1470 { "ip4-input", "ip4-input-no-checksum"};
1472 static char * tx_feature_start_nodes[] =
1473 { "ip4-rewrite-transit"};
1475 /* Source and port-range check ip4 tx feature path definition */
1476 VNET_IP4_TX_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) = {
1477 .node_name = "ip4-source-and-port-range-check-tx",
1478 .runs_before = ORDER_CONSTRAINTS {"interface-output", 0},
1480 &ip4_main.ip4_unicast_tx_feature_source_and_port_range_check,
1484 /* Built-in ip4 tx feature path definition */
1485 VNET_IP4_TX_FEATURE_INIT (interface_output, static) = {
1486 .node_name = "interface-output",
1487 .runs_before = 0, /* not before any other features */
1488 .feature_index = &ip4_main.ip4_tx_feature_interface_output,
1492 static clib_error_t *
1493 ip4_feature_init (vlib_main_t * vm, ip4_main_t * im)
1495 ip_lookup_main_t * lm = &im->lookup_main;
1496 clib_error_t * error;
1498 ip_config_main_t * cm;
1499 vnet_config_main_t * vcm;
1500 char **feature_start_nodes;
1501 int feature_start_len;
1503 for (cast = 0; cast < VNET_N_IP_FEAT; cast++)
1505 cm = &lm->feature_config_mains[cast];
1506 vcm = &cm->config_main;
1508 if (cast < VNET_IP_TX_FEAT)
1510 feature_start_nodes = rx_feature_start_nodes;
1511 feature_start_len = ARRAY_LEN(rx_feature_start_nodes);
1515 feature_start_nodes = tx_feature_start_nodes;
1516 feature_start_len = ARRAY_LEN(tx_feature_start_nodes);
1519 if ((error = ip_feature_init_cast (vm, cm, vcm,
1520 feature_start_nodes,
1530 static clib_error_t *
1531 ip4_sw_interface_add_del (vnet_main_t * vnm,
1535 vlib_main_t * vm = vnm->vlib_main;
1536 ip4_main_t * im = &ip4_main;
1537 ip_lookup_main_t * lm = &im->lookup_main;
1541 for (cast = 0; cast < VNET_N_IP_FEAT; cast++)
1543 ip_config_main_t * cm = &lm->feature_config_mains[cast];
1544 vnet_config_main_t * vcm = &cm->config_main;
1546 vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
1547 ci = cm->config_index_by_sw_if_index[sw_if_index];
1549 if (cast == VNET_IP_RX_UNICAST_FEAT)
1550 feature_index = im->ip4_unicast_rx_feature_lookup;
1551 else if (cast == VNET_IP_RX_MULTICAST_FEAT)
1552 feature_index = im->ip4_multicast_rx_feature_lookup;
1554 feature_index = im->ip4_tx_feature_interface_output;
1557 ci = vnet_config_add_feature (vm, vcm,
1560 /* config data */ 0,
1561 /* # bytes of config data */ 0);
1563 ci = vnet_config_del_feature (vm, vcm,
1566 /* config data */ 0,
1567 /* # bytes of config data */ 0);
1569 cm->config_index_by_sw_if_index[sw_if_index] = ci;
1571 * note: do not update the tx feature count here.
1575 return /* no error */ 0;
1578 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1580 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args);
1582 VLIB_REGISTER_NODE (ip4_lookup_node) = {
1583 .function = ip4_lookup,
1584 .name = "ip4-lookup",
1585 .vector_size = sizeof (u32),
1587 .format_trace = format_ip4_lookup_trace,
1589 .n_next_nodes = IP4_LOOKUP_N_NEXT,
1590 .next_nodes = IP4_LOOKUP_NEXT_NODES,
1593 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup);
1596 ip4_indirect (vlib_main_t * vm,
1597 vlib_node_runtime_t * node,
1598 vlib_frame_t * frame)
1600 return ip4_lookup_inline (vm, node, frame,
1601 /* lookup_for_responses_to_locally_received_packets */ 0,
1602 /* is_indirect */ 1);
1605 VLIB_REGISTER_NODE (ip4_indirect_node) = {
1606 .function = ip4_indirect,
1607 .name = "ip4-indirect",
1608 .vector_size = sizeof (u32),
1609 .sibling_of = "ip4-lookup",
1610 .format_trace = format_ip4_lookup_trace,
1615 VLIB_NODE_FUNCTION_MULTIARCH (ip4_indirect_node, ip4_indirect);
1618 /* Global IP4 main. */
1619 ip4_main_t ip4_main;
1622 ip4_lookup_init (vlib_main_t * vm)
1624 ip4_main_t * im = &ip4_main;
1625 clib_error_t * error;
1628 for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1633 m = pow2_mask (i) << (32 - i);
1636 im->fib_masks[i] = clib_host_to_net_u32 (m);
1639 /* Create FIB with index 0 and table id of 0. */
1640 find_ip4_fib_by_table_index_or_id (im, /* table id */ 0, IP4_ROUTE_FLAG_TABLE_ID);
1642 ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1646 pn = pg_get_node (ip4_lookup_node.index);
1647 pn->unformat_edit = unformat_pg_ip4_header;
1651 ethernet_arp_header_t h;
1653 memset (&h, 0, sizeof (h));
1655 /* Set target ethernet address to all zeros. */
1656 memset (h.ip4_over_ethernet[1].ethernet, 0, sizeof (h.ip4_over_ethernet[1].ethernet));
1658 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1659 #define _8(f,v) h.f = v;
1660 _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1661 _16 (l3_type, ETHERNET_TYPE_IP4);
1662 _8 (n_l2_address_bytes, 6);
1663 _8 (n_l3_address_bytes, 4);
1664 _16 (opcode, ETHERNET_ARP_OPCODE_request);
1668 vlib_packet_template_init (vm,
1669 &im->ip4_arp_request_packet_template,
1672 /* alloc chunk size */ 8,
1676 error = ip4_feature_init (vm, im);
1681 VLIB_INIT_FUNCTION (ip4_lookup_init);
1684 /* Adjacency taken. */
1689 /* Packet data, possibly *after* rewrite. */
1690 u8 packet_data[64 - 1*sizeof(u32)];
1691 } ip4_forward_next_trace_t;
1693 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args)
1695 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1696 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1697 ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1698 uword indent = format_get_indent (s);
1699 s = format (s, "%U%U",
1700 format_white_space, indent,
1701 format_ip4_header, t->packet_data);
1705 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args)
1707 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1708 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1709 ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1710 vnet_main_t * vnm = vnet_get_main();
1711 ip4_main_t * im = &ip4_main;
1712 uword indent = format_get_indent (s);
1714 s = format (s, "fib %d adj-idx %d : %U flow hash: 0x%08x",
1715 t->fib_index, t->adj_index, format_ip_adjacency,
1716 vnm, &im->lookup_main, t->adj_index, t->flow_hash);
1717 s = format (s, "\n%U%U",
1718 format_white_space, indent,
1719 format_ip4_header, t->packet_data);
1723 static u8 * format_ip4_rewrite_trace (u8 * s, va_list * args)
1725 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1726 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1727 ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1728 vnet_main_t * vnm = vnet_get_main();
1729 ip4_main_t * im = &ip4_main;
1730 uword indent = format_get_indent (s);
1732 s = format (s, "tx_sw_if_index %d adj-idx %d : %U flow hash: 0x%08x",
1733 t->fib_index, t->adj_index, format_ip_adjacency,
1734 vnm, &im->lookup_main, t->adj_index, t->flow_hash);
1735 s = format (s, "\n%U%U",
1736 format_white_space, indent,
1737 format_ip_adjacency_packet_data,
1738 vnm, &im->lookup_main, t->adj_index,
1739 t->packet_data, sizeof (t->packet_data));
1743 /* Common trace function for all ip4-forward next nodes. */
1745 ip4_forward_next_trace (vlib_main_t * vm,
1746 vlib_node_runtime_t * node,
1747 vlib_frame_t * frame,
1748 vlib_rx_or_tx_t which_adj_index)
1751 ip4_main_t * im = &ip4_main;
1753 n_left = frame->n_vectors;
1754 from = vlib_frame_vector_args (frame);
1759 vlib_buffer_t * b0, * b1;
1760 ip4_forward_next_trace_t * t0, * t1;
1762 /* Prefetch next iteration. */
1763 vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1764 vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1769 b0 = vlib_get_buffer (vm, bi0);
1770 b1 = vlib_get_buffer (vm, bi1);
1772 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1774 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1775 t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1776 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1777 t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1778 vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1779 vec_elt (im->fib_index_by_sw_if_index,
1780 vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1782 clib_memcpy (t0->packet_data,
1783 vlib_buffer_get_current (b0),
1784 sizeof (t0->packet_data));
1786 if (b1->flags & VLIB_BUFFER_IS_TRACED)
1788 t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1789 t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1790 t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1791 t1->fib_index = (vnet_buffer(b1)->sw_if_index[VLIB_TX] != (u32)~0) ?
1792 vnet_buffer(b1)->sw_if_index[VLIB_TX] :
1793 vec_elt (im->fib_index_by_sw_if_index,
1794 vnet_buffer(b1)->sw_if_index[VLIB_RX]);
1795 clib_memcpy (t1->packet_data,
1796 vlib_buffer_get_current (b1),
1797 sizeof (t1->packet_data));
1807 ip4_forward_next_trace_t * t0;
1811 b0 = vlib_get_buffer (vm, bi0);
1813 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1815 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1816 t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1817 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1818 t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1819 vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1820 vec_elt (im->fib_index_by_sw_if_index,
1821 vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1822 clib_memcpy (t0->packet_data,
1823 vlib_buffer_get_current (b0),
1824 sizeof (t0->packet_data));
1832 ip4_drop_or_punt (vlib_main_t * vm,
1833 vlib_node_runtime_t * node,
1834 vlib_frame_t * frame,
1835 ip4_error_t error_code)
1837 u32 * buffers = vlib_frame_vector_args (frame);
1838 uword n_packets = frame->n_vectors;
1840 vlib_error_drop_buffers (vm, node,
1845 ip4_input_node.index,
1848 if (node->flags & VLIB_NODE_FLAG_TRACE)
1849 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1855 ip4_drop (vlib_main_t * vm,
1856 vlib_node_runtime_t * node,
1857 vlib_frame_t * frame)
1858 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP); }
1861 ip4_punt (vlib_main_t * vm,
1862 vlib_node_runtime_t * node,
1863 vlib_frame_t * frame)
1864 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); }
1867 ip4_miss (vlib_main_t * vm,
1868 vlib_node_runtime_t * node,
1869 vlib_frame_t * frame)
1870 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_DST_LOOKUP_MISS); }
1872 VLIB_REGISTER_NODE (ip4_drop_node,static) = {
1873 .function = ip4_drop,
1875 .vector_size = sizeof (u32),
1877 .format_trace = format_ip4_forward_next_trace,
1885 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop);
1887 VLIB_REGISTER_NODE (ip4_punt_node,static) = {
1888 .function = ip4_punt,
1890 .vector_size = sizeof (u32),
1892 .format_trace = format_ip4_forward_next_trace,
1900 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt);
1902 VLIB_REGISTER_NODE (ip4_miss_node,static) = {
1903 .function = ip4_miss,
1905 .vector_size = sizeof (u32),
1907 .format_trace = format_ip4_forward_next_trace,
1915 VLIB_NODE_FUNCTION_MULTIARCH (ip4_miss_node, ip4_miss);
1917 /* Compute TCP/UDP/ICMP4 checksum in software. */
1919 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1923 u32 ip_header_length, payload_length_host_byte_order;
1924 u32 n_this_buffer, n_bytes_left;
1926 void * data_this_buffer;
1928 /* Initialize checksum with ip header. */
1929 ip_header_length = ip4_header_bytes (ip0);
1930 payload_length_host_byte_order = clib_net_to_host_u16 (ip0->length) - ip_header_length;
1931 sum0 = clib_host_to_net_u32 (payload_length_host_byte_order + (ip0->protocol << 16));
1933 if (BITS (uword) == 32)
1935 sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u32));
1936 sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32));
1939 sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1941 n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1942 data_this_buffer = (void *) ip0 + ip_header_length;
1943 if (n_this_buffer + ip_header_length > p0->current_length)
1944 n_this_buffer = p0->current_length > ip_header_length ? p0->current_length - ip_header_length : 0;
1947 sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1948 n_bytes_left -= n_this_buffer;
1949 if (n_bytes_left == 0)
1952 ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1953 p0 = vlib_get_buffer (vm, p0->next_buffer);
1954 data_this_buffer = vlib_buffer_get_current (p0);
1955 n_this_buffer = p0->current_length;
1958 sum16 = ~ ip_csum_fold (sum0);
1964 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1966 ip4_header_t * ip0 = vlib_buffer_get_current (p0);
1967 udp_header_t * udp0;
1970 ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1971 || ip0->protocol == IP_PROTOCOL_UDP);
1973 udp0 = (void *) (ip0 + 1);
1974 if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1976 p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1977 | IP_BUFFER_L4_CHECKSUM_CORRECT);
1981 sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1983 p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1984 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1990 ip4_local (vlib_main_t * vm,
1991 vlib_node_runtime_t * node,
1992 vlib_frame_t * frame)
1994 ip4_main_t * im = &ip4_main;
1995 ip_lookup_main_t * lm = &im->lookup_main;
1996 ip_local_next_t next_index;
1997 u32 * from, * to_next, n_left_from, n_left_to_next;
1998 vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
2000 from = vlib_frame_vector_args (frame);
2001 n_left_from = frame->n_vectors;
2002 next_index = node->cached_next_index;
2004 if (node->flags & VLIB_NODE_FLAG_TRACE)
2005 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2007 while (n_left_from > 0)
2009 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2011 while (n_left_from >= 4 && n_left_to_next >= 2)
2013 vlib_buffer_t * p0, * p1;
2014 ip4_header_t * ip0, * ip1;
2015 udp_header_t * udp0, * udp1;
2016 ip4_fib_mtrie_t * mtrie0, * mtrie1;
2017 ip4_fib_mtrie_leaf_t leaf0, leaf1;
2018 ip_adjacency_t * adj0, * adj1;
2019 u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, adj_index0;
2020 u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, adj_index1;
2021 i32 len_diff0, len_diff1;
2022 u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
2023 u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
2026 pi0 = to_next[0] = from[0];
2027 pi1 = to_next[1] = from[1];
2031 n_left_to_next -= 2;
2033 p0 = vlib_get_buffer (vm, pi0);
2034 p1 = vlib_get_buffer (vm, pi1);
2036 ip0 = vlib_buffer_get_current (p0);
2037 ip1 = vlib_buffer_get_current (p1);
2039 fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
2040 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
2041 fib_index1 = vec_elt (im->fib_index_by_sw_if_index,
2042 vnet_buffer(p1)->sw_if_index[VLIB_RX]);
2044 mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
2045 mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
2047 leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
2049 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
2050 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
2052 /* Treat IP frag packets as "experimental" protocol for now
2053 until support of IP frag reassembly is implemented */
2054 proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
2055 proto1 = ip4_is_fragment(ip1) ? 0xfe : ip1->protocol;
2056 is_udp0 = proto0 == IP_PROTOCOL_UDP;
2057 is_udp1 = proto1 == IP_PROTOCOL_UDP;
2058 is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
2059 is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
2064 good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2065 good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2067 udp0 = ip4_next_header (ip0);
2068 udp1 = ip4_next_header (ip1);
2070 /* Don't verify UDP checksum for packets with explicit zero checksum. */
2071 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2072 good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
2074 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
2075 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
2077 /* Verify UDP length. */
2078 ip_len0 = clib_net_to_host_u16 (ip0->length);
2079 ip_len1 = clib_net_to_host_u16 (ip1->length);
2080 udp_len0 = clib_net_to_host_u16 (udp0->length);
2081 udp_len1 = clib_net_to_host_u16 (udp1->length);
2083 len_diff0 = ip_len0 - udp_len0;
2084 len_diff1 = ip_len1 - udp_len1;
2086 len_diff0 = is_udp0 ? len_diff0 : 0;
2087 len_diff1 = is_udp1 ? len_diff1 : 0;
2089 if (PREDICT_FALSE (! (is_tcp_udp0 & is_tcp_udp1
2090 & good_tcp_udp0 & good_tcp_udp1)))
2095 && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
2096 flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
2098 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2099 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2104 && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
2105 flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
2107 (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2108 good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
2112 good_tcp_udp0 &= len_diff0 >= 0;
2113 good_tcp_udp1 &= len_diff1 >= 0;
2115 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
2116 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
2118 error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
2120 error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
2121 error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
2123 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
2124 error0 = (is_tcp_udp0 && ! good_tcp_udp0
2125 ? IP4_ERROR_TCP_CHECKSUM + is_udp0
2127 error1 = (is_tcp_udp1 && ! good_tcp_udp1
2128 ? IP4_ERROR_TCP_CHECKSUM + is_udp1
2131 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
2132 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
2134 vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2135 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2137 vnet_buffer (p1)->ip.adj_index[VLIB_RX] = adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
2138 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
2140 ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
2142 /* no_default_route */ 1));
2143 ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
2145 /* no_default_route */ 1));
2147 adj0 = ip_get_adjacency (lm, adj_index0);
2148 adj1 = ip_get_adjacency (lm, adj_index1);
2151 * Must have a route to source otherwise we drop the packet.
2152 * ip4 broadcasts are accepted, e.g. to make dhcp client work
2154 error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
2155 && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2156 && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
2157 && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2158 && ip0->dst_address.as_u32 != 0xFFFFFFFF
2159 ? IP4_ERROR_SRC_LOOKUP_MISS
2161 error1 = (error1 == IP4_ERROR_UNKNOWN_PROTOCOL
2162 && adj1->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2163 && adj1->lookup_next_index != IP_LOOKUP_NEXT_ARP
2164 && adj1->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2165 && ip0->dst_address.as_u32 != 0xFFFFFFFF
2166 ? IP4_ERROR_SRC_LOOKUP_MISS
2169 next0 = lm->local_next_by_ip_protocol[proto0];
2170 next1 = lm->local_next_by_ip_protocol[proto1];
2172 next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
2173 next1 = error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
2175 p0->error = error0 ? error_node->errors[error0] : 0;
2176 p1->error = error1 ? error_node->errors[error1] : 0;
2178 enqueue_code = (next0 != next_index) + 2*(next1 != next_index);
2180 if (PREDICT_FALSE (enqueue_code != 0))
2182 switch (enqueue_code)
2188 n_left_to_next += 1;
2189 vlib_set_next_frame_buffer (vm, node, next0, pi0);
2195 n_left_to_next += 1;
2196 vlib_set_next_frame_buffer (vm, node, next1, pi1);
2200 /* A B B or A B C */
2202 n_left_to_next += 2;
2203 vlib_set_next_frame_buffer (vm, node, next0, pi0);
2204 vlib_set_next_frame_buffer (vm, node, next1, pi1);
2207 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2209 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2216 while (n_left_from > 0 && n_left_to_next > 0)
2220 udp_header_t * udp0;
2221 ip4_fib_mtrie_t * mtrie0;
2222 ip4_fib_mtrie_leaf_t leaf0;
2223 ip_adjacency_t * adj0;
2224 u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, adj_index0;
2226 u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
2228 pi0 = to_next[0] = from[0];
2232 n_left_to_next -= 1;
2234 p0 = vlib_get_buffer (vm, pi0);
2236 ip0 = vlib_buffer_get_current (p0);
2238 fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
2239 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
2241 mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
2243 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
2245 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
2247 /* Treat IP frag packets as "experimental" protocol for now
2248 until support of IP frag reassembly is implemented */
2249 proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
2250 is_udp0 = proto0 == IP_PROTOCOL_UDP;
2251 is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
2255 good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2257 udp0 = ip4_next_header (ip0);
2259 /* Don't verify UDP checksum for packets with explicit zero checksum. */
2260 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2262 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
2264 /* Verify UDP length. */
2265 ip_len0 = clib_net_to_host_u16 (ip0->length);
2266 udp_len0 = clib_net_to_host_u16 (udp0->length);
2268 len_diff0 = ip_len0 - udp_len0;
2270 len_diff0 = is_udp0 ? len_diff0 : 0;
2272 if (PREDICT_FALSE (! (is_tcp_udp0 & good_tcp_udp0)))
2277 && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
2278 flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
2280 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2281 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2285 good_tcp_udp0 &= len_diff0 >= 0;
2287 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
2289 error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
2291 error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
2293 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
2294 error0 = (is_tcp_udp0 && ! good_tcp_udp0
2295 ? IP4_ERROR_TCP_CHECKSUM + is_udp0
2298 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
2300 vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2301 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2303 ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
2305 /* no_default_route */ 1));
2307 adj0 = ip_get_adjacency (lm, adj_index0);
2309 /* Must have a route to source otherwise we drop the packet. */
2310 error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
2311 && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2312 && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
2313 && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2314 && ip0->dst_address.as_u32 != 0xFFFFFFFF
2315 ? IP4_ERROR_SRC_LOOKUP_MISS
2318 next0 = lm->local_next_by_ip_protocol[proto0];
2320 next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
2322 p0->error = error0? error_node->errors[error0] : 0;
2324 if (PREDICT_FALSE (next0 != next_index))
2326 n_left_to_next += 1;
2327 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2330 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2333 n_left_to_next -= 1;
2337 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2340 return frame->n_vectors;
2343 VLIB_REGISTER_NODE (ip4_local_node,static) = {
2344 .function = ip4_local,
2345 .name = "ip4-local",
2346 .vector_size = sizeof (u32),
2348 .format_trace = format_ip4_forward_next_trace,
2350 .n_next_nodes = IP_LOCAL_N_NEXT,
2352 [IP_LOCAL_NEXT_DROP] = "error-drop",
2353 [IP_LOCAL_NEXT_PUNT] = "error-punt",
2354 [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
2355 [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
2359 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local);
2361 void ip4_register_protocol (u32 protocol, u32 node_index)
2363 vlib_main_t * vm = vlib_get_main();
2364 ip4_main_t * im = &ip4_main;
2365 ip_lookup_main_t * lm = &im->lookup_main;
2367 ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
2368 lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip4_local_node.index, node_index);
2371 static clib_error_t *
2372 show_ip_local_command_fn (vlib_main_t * vm,
2373 unformat_input_t * input,
2374 vlib_cli_command_t * cmd)
2376 ip4_main_t * im = &ip4_main;
2377 ip_lookup_main_t * lm = &im->lookup_main;
2380 vlib_cli_output (vm, "Protocols handled by ip4_local");
2381 for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
2383 if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
2384 vlib_cli_output (vm, "%d", i);
2391 VLIB_CLI_COMMAND (show_ip_local, static) = {
2392 .path = "show ip local",
2393 .function = show_ip_local_command_fn,
2394 .short_help = "Show ip local protocol table",
2398 ip4_arp (vlib_main_t * vm,
2399 vlib_node_runtime_t * node,
2400 vlib_frame_t * frame)
2402 vnet_main_t * vnm = vnet_get_main();
2403 ip4_main_t * im = &ip4_main;
2404 ip_lookup_main_t * lm = &im->lookup_main;
2405 u32 * from, * to_next_drop;
2406 uword n_left_from, n_left_to_next_drop, next_index;
2407 static f64 time_last_seed_change = -1e100;
2408 static u32 hash_seeds[3];
2409 static uword hash_bitmap[256 / BITS (uword)];
2412 if (node->flags & VLIB_NODE_FLAG_TRACE)
2413 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2415 time_now = vlib_time_now (vm);
2416 if (time_now - time_last_seed_change > 1e-3)
2419 u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
2420 sizeof (hash_seeds));
2421 for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
2422 hash_seeds[i] = r[i];
2424 /* Mark all hash keys as been no-seen before. */
2425 for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
2428 time_last_seed_change = time_now;
2431 from = vlib_frame_vector_args (frame);
2432 n_left_from = frame->n_vectors;
2433 next_index = node->cached_next_index;
2434 if (next_index == IP4_ARP_NEXT_DROP)
2435 next_index = IP4_ARP_N_NEXT; /* point to first interface */
2437 while (n_left_from > 0)
2439 vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
2440 to_next_drop, n_left_to_next_drop);
2442 while (n_left_from > 0 && n_left_to_next_drop > 0)
2446 ethernet_header_t * eh0;
2447 u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
2449 ip_adjacency_t * adj0;
2453 p0 = vlib_get_buffer (vm, pi0);
2455 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2456 adj0 = ip_get_adjacency (lm, adj_index0);
2457 ip0 = vlib_buffer_get_current (p0);
2459 /* If packet destination is not local, send ARP to next hop */
2460 if (adj0->arp.next_hop.ip4.as_u32)
2461 ip0->dst_address.data_u32 = adj0->arp.next_hop.ip4.as_u32;
2464 * if ip4_rewrite_local applied the IP_LOOKUP_NEXT_ARP
2465 * rewrite to this packet, we need to skip it here.
2466 * Note, to distinguish from src IP addr *.8.6.*, we
2467 * check for a bcast eth dest instead of IPv4 version.
2469 eh0 = (ethernet_header_t*)ip0;
2470 if ((ip0->ip_version_and_header_length & 0xF0) != 0x40)
2473 u16 * etype = &eh0->type;
2474 while ((*etype == clib_host_to_net_u16 (0x8100)) //dot1q
2475 || (*etype == clib_host_to_net_u16 (0x88a8)))//dot1ad
2478 etype += 2; //vlan tag also 16 bits, same as etype
2480 if (*etype == clib_host_to_net_u16 (0x0806)) //arp
2482 vlib_buffer_advance (
2483 p0, sizeof(ethernet_header_t) + (4*vlan_num));
2484 ip0 = vlib_buffer_get_current (p0);
2492 sw_if_index0 = adj0->rewrite_header.sw_if_index;
2493 vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
2495 a0 ^= ip0->dst_address.data_u32;
2498 hash_v3_finalize32 (a0, b0, c0);
2500 c0 &= BITS (hash_bitmap) - 1;
2501 c0 = c0 / BITS (uword);
2502 m0 = (uword) 1 << (c0 % BITS (uword));
2504 bm0 = hash_bitmap[c0];
2505 drop0 = (bm0 & m0) != 0;
2507 /* Mark it as seen. */
2508 hash_bitmap[c0] = bm0 | m0;
2512 to_next_drop[0] = pi0;
2514 n_left_to_next_drop -= 1;
2516 p0->error = node->errors[drop0 ? IP4_ARP_ERROR_DROP : IP4_ARP_ERROR_REQUEST_SENT];
2522 * Can happen if the control-plane is programming tables
2523 * with traffic flowing; at least that's today's lame excuse.
2525 if (adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP)
2527 p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2530 /* Send ARP request. */
2534 ethernet_arp_header_t * h0;
2535 vnet_hw_interface_t * hw_if0;
2537 h0 = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi0);
2539 /* Add rewrite/encap string for ARP packet. */
2540 vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
2542 hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2544 /* Src ethernet address in ARP header. */
2545 clib_memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address,
2546 sizeof (h0->ip4_over_ethernet[0].ethernet));
2548 if (ip4_src_address_for_packet (im, p0, &h0->ip4_over_ethernet[0].ip4, sw_if_index0)) {
2549 //No source address available
2550 p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
2551 vlib_buffer_free(vm, &bi0, 1);
2555 /* Copy in destination address we are requesting. */
2556 h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32;
2558 vlib_buffer_copy_trace_flag (vm, p0, bi0);
2559 b0 = vlib_get_buffer (vm, bi0);
2560 vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2562 vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2564 vlib_set_next_frame_buffer (vm, node, adj0->rewrite_header.next_index, bi0);
2568 vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2571 return frame->n_vectors;
2574 static char * ip4_arp_error_strings[] = {
2575 [IP4_ARP_ERROR_DROP] = "address overflow drops",
2576 [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2577 [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2578 [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2579 [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2580 [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
2583 VLIB_REGISTER_NODE (ip4_arp_node) = {
2584 .function = ip4_arp,
2586 .vector_size = sizeof (u32),
2588 .format_trace = format_ip4_forward_next_trace,
2590 .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2591 .error_strings = ip4_arp_error_strings,
2593 .n_next_nodes = IP4_ARP_N_NEXT,
2595 [IP4_ARP_NEXT_DROP] = "error-drop",
2599 #define foreach_notrace_ip4_arp_error \
2605 clib_error_t * arp_notrace_init (vlib_main_t * vm)
2607 vlib_node_runtime_t *rt =
2608 vlib_node_get_runtime (vm, ip4_arp_node.index);
2610 /* don't trace ARP request packets */
2612 vnet_pcap_drop_trace_filter_add_del \
2613 (rt->errors[IP4_ARP_ERROR_##a], \
2615 foreach_notrace_ip4_arp_error;
2620 VLIB_INIT_FUNCTION(arp_notrace_init);
2623 /* Send an ARP request to see if given destination is reachable on given interface. */
2625 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2627 vnet_main_t * vnm = vnet_get_main();
2628 ip4_main_t * im = &ip4_main;
2629 ethernet_arp_header_t * h;
2630 ip4_address_t * src;
2631 ip_interface_address_t * ia;
2632 ip_adjacency_t * adj;
2633 vnet_hw_interface_t * hi;
2634 vnet_sw_interface_t * si;
2638 si = vnet_get_sw_interface (vnm, sw_if_index);
2640 if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2642 return clib_error_return (0, "%U: interface %U down",
2643 format_ip4_address, dst,
2644 format_vnet_sw_if_index_name, vnm,
2648 src = ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2651 vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2652 return clib_error_return
2653 (0, "no matching interface address for destination %U (interface %U)",
2654 format_ip4_address, dst,
2655 format_vnet_sw_if_index_name, vnm, sw_if_index);
2658 adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2660 h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi);
2662 hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2664 clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
2666 h->ip4_over_ethernet[0].ip4 = src[0];
2667 h->ip4_over_ethernet[1].ip4 = dst[0];
2669 b = vlib_get_buffer (vm, bi);
2670 vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2672 /* Add encapsulation string for software interface (e.g. ethernet header). */
2673 vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2674 vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2677 vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
2678 u32 * to_next = vlib_frame_vector_args (f);
2681 vlib_put_frame_to_node (vm, hi->output_node_index, f);
2684 return /* no error */ 0;
2688 IP4_REWRITE_NEXT_DROP,
2689 IP4_REWRITE_NEXT_ARP,
2690 IP4_REWRITE_NEXT_ICMP_ERROR,
2691 } ip4_rewrite_next_t;
2694 ip4_rewrite_inline (vlib_main_t * vm,
2695 vlib_node_runtime_t * node,
2696 vlib_frame_t * frame,
2697 int rewrite_for_locally_received_packets)
2699 ip_lookup_main_t * lm = &ip4_main.lookup_main;
2700 u32 * from = vlib_frame_vector_args (frame);
2701 u32 n_left_from, n_left_to_next, * to_next, next_index;
2702 vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
2703 vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
2704 ip_config_main_t * cm = &lm->feature_config_mains[VNET_IP_TX_FEAT];
2706 n_left_from = frame->n_vectors;
2707 next_index = node->cached_next_index;
2708 u32 cpu_index = os_get_cpu_number();
2710 while (n_left_from > 0)
2712 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2714 while (n_left_from >= 4 && n_left_to_next >= 2)
2716 ip_adjacency_t * adj0, * adj1;
2717 vlib_buffer_t * p0, * p1;
2718 ip4_header_t * ip0, * ip1;
2719 u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2720 u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2721 u32 next0_override, next1_override;
2722 u32 tx_sw_if_index0, tx_sw_if_index1;
2724 if (rewrite_for_locally_received_packets)
2725 next0_override = next1_override = 0;
2727 /* Prefetch next iteration. */
2729 vlib_buffer_t * p2, * p3;
2731 p2 = vlib_get_buffer (vm, from[2]);
2732 p3 = vlib_get_buffer (vm, from[3]);
2734 vlib_prefetch_buffer_header (p2, STORE);
2735 vlib_prefetch_buffer_header (p3, STORE);
2737 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2738 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2741 pi0 = to_next[0] = from[0];
2742 pi1 = to_next[1] = from[1];
2747 n_left_to_next -= 2;
2749 p0 = vlib_get_buffer (vm, pi0);
2750 p1 = vlib_get_buffer (vm, pi1);
2752 adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2753 adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
2755 /* We should never rewrite a pkt using the MISS adjacency */
2756 ASSERT(adj_index0 && adj_index1);
2758 ip0 = vlib_buffer_get_current (p0);
2759 ip1 = vlib_buffer_get_current (p1);
2761 error0 = error1 = IP4_ERROR_NONE;
2762 next0 = next1 = IP4_REWRITE_NEXT_DROP;
2764 /* Decrement TTL & update checksum.
2765 Works either endian, so no need for byte swap. */
2766 if (! rewrite_for_locally_received_packets)
2768 i32 ttl0 = ip0->ttl, ttl1 = ip1->ttl;
2770 /* Input node should have reject packets with ttl 0. */
2771 ASSERT (ip0->ttl > 0);
2772 ASSERT (ip1->ttl > 0);
2774 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2775 checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2777 checksum0 += checksum0 >= 0xffff;
2778 checksum1 += checksum1 >= 0xffff;
2780 ip0->checksum = checksum0;
2781 ip1->checksum = checksum1;
2790 * If the ttl drops below 1 when forwarding, generate
2793 if (PREDICT_FALSE(ttl0 <= 0))
2795 error0 = IP4_ERROR_TIME_EXPIRED;
2796 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2797 icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2798 ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2799 next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2801 if (PREDICT_FALSE(ttl1 <= 0))
2803 error1 = IP4_ERROR_TIME_EXPIRED;
2804 vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32)~0;
2805 icmp4_error_set_vnet_buffer(p1, ICMP4_time_exceeded,
2806 ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2807 next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2810 /* Verify checksum. */
2811 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2812 ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2815 /* Rewrite packet header and updates lengths. */
2816 adj0 = ip_get_adjacency (lm, adj_index0);
2817 adj1 = ip_get_adjacency (lm, adj_index1);
2819 if (rewrite_for_locally_received_packets)
2822 * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2823 * we end up here with a local adjacency in hand
2824 * The local adj rewrite data is 0xfefe on purpose.
2825 * Bad engineer, no donut for you.
2827 if (PREDICT_FALSE(adj0->lookup_next_index
2828 == IP_LOOKUP_NEXT_LOCAL))
2829 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2830 if (PREDICT_FALSE(adj0->lookup_next_index
2831 == IP_LOOKUP_NEXT_ARP))
2832 next0_override = IP4_REWRITE_NEXT_ARP;
2833 if (PREDICT_FALSE(adj1->lookup_next_index
2834 == IP_LOOKUP_NEXT_LOCAL))
2835 error1 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2836 if (PREDICT_FALSE(adj1->lookup_next_index
2837 == IP_LOOKUP_NEXT_ARP))
2838 next1_override = IP4_REWRITE_NEXT_ARP;
2841 /* Worth pipelining. No guarantee that adj0,1 are hot... */
2842 rw_len0 = adj0[0].rewrite_header.data_bytes;
2843 rw_len1 = adj1[0].rewrite_header.data_bytes;
2844 vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
2845 vnet_buffer(p1)->ip.save_rewrite_length = rw_len1;
2847 /* Check MTU of outgoing interface. */
2848 error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2849 ? IP4_ERROR_MTU_EXCEEDED
2851 error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
2852 ? IP4_ERROR_MTU_EXCEEDED
2855 next0 = (error0 == IP4_ERROR_NONE)
2856 ? adj0[0].rewrite_header.next_index : next0;
2858 if (rewrite_for_locally_received_packets)
2859 next0 = next0 && next0_override ? next0_override : next0;
2861 next1 = (error1 == IP4_ERROR_NONE)
2862 ? adj1[0].rewrite_header.next_index : next1;
2864 if (rewrite_for_locally_received_packets)
2865 next1 = next1 && next1_override ? next1_override : next1;
2868 * We've already accounted for an ethernet_header_t elsewhere
2870 if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2871 vlib_increment_combined_counter
2872 (&lm->adjacency_counters,
2873 cpu_index, adj_index0,
2874 /* packet increment */ 0,
2875 /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2877 if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t)))
2878 vlib_increment_combined_counter
2879 (&lm->adjacency_counters,
2880 cpu_index, adj_index1,
2881 /* packet increment */ 0,
2882 /* byte increment */ rw_len1-sizeof(ethernet_header_t));
2884 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2885 * to see the IP headerr */
2886 if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2888 p0->current_data -= rw_len0;
2889 p0->current_length += rw_len0;
2890 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2891 vnet_buffer (p0)->sw_if_index[VLIB_TX] =
2895 (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features,
2898 p0->current_config_index =
2899 vec_elt (cm->config_index_by_sw_if_index,
2901 vnet_get_config_data (&cm->config_main,
2902 &p0->current_config_index,
2904 /* # bytes of config data */ 0);
2907 if (PREDICT_TRUE(error1 == IP4_ERROR_NONE))
2909 p1->current_data -= rw_len1;
2910 p1->current_length += rw_len1;
2912 tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2913 vnet_buffer (p1)->sw_if_index[VLIB_TX] =
2917 (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features,
2920 p1->current_config_index =
2921 vec_elt (cm->config_index_by_sw_if_index,
2923 vnet_get_config_data (&cm->config_main,
2924 &p1->current_config_index,
2926 /* # bytes of config data */ 0);
2930 /* Guess we are only writing on simple Ethernet header. */
2931 vnet_rewrite_two_headers (adj0[0], adj1[0],
2933 sizeof (ethernet_header_t));
2935 vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2936 to_next, n_left_to_next,
2937 pi0, pi1, next0, next1);
2940 while (n_left_from > 0 && n_left_to_next > 0)
2942 ip_adjacency_t * adj0;
2945 u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2947 u32 tx_sw_if_index0;
2949 if (rewrite_for_locally_received_packets)
2952 pi0 = to_next[0] = from[0];
2954 p0 = vlib_get_buffer (vm, pi0);
2956 adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2958 /* We should never rewrite a pkt using the MISS adjacency */
2961 adj0 = ip_get_adjacency (lm, adj_index0);
2963 ip0 = vlib_buffer_get_current (p0);
2965 error0 = IP4_ERROR_NONE;
2966 next0 = IP4_REWRITE_NEXT_DROP; /* drop on error */
2968 /* Decrement TTL & update checksum. */
2969 if (! rewrite_for_locally_received_packets)
2971 i32 ttl0 = ip0->ttl;
2973 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2975 checksum0 += checksum0 >= 0xffff;
2977 ip0->checksum = checksum0;
2979 ASSERT (ip0->ttl > 0);
2985 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2987 if (PREDICT_FALSE(ttl0 <= 0))
2990 * If the ttl drops below 1 when forwarding, generate
2993 error0 = IP4_ERROR_TIME_EXPIRED;
2994 next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2995 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2996 icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2997 ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
3001 if (rewrite_for_locally_received_packets)
3004 * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
3005 * we end up here with a local adjacency in hand
3006 * The local adj rewrite data is 0xfefe on purpose.
3007 * Bad engineer, no donut for you.
3009 if (PREDICT_FALSE(adj0->lookup_next_index
3010 == IP_LOOKUP_NEXT_LOCAL))
3011 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
3013 * We have to override the next_index in ARP adjacencies,
3014 * because they're set up for ip4-arp, not this node...
3016 if (PREDICT_FALSE(adj0->lookup_next_index
3017 == IP_LOOKUP_NEXT_ARP))
3018 next0_override = IP4_REWRITE_NEXT_ARP;
3021 /* Guess we are only writing on simple Ethernet header. */
3022 vnet_rewrite_one_header (adj0[0], ip0,
3023 sizeof (ethernet_header_t));
3025 /* Update packet buffer attributes/set output interface. */
3026 rw_len0 = adj0[0].rewrite_header.data_bytes;
3027 vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
3029 if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
3030 vlib_increment_combined_counter
3031 (&lm->adjacency_counters,
3032 cpu_index, adj_index0,
3033 /* packet increment */ 0,
3034 /* byte increment */ rw_len0-sizeof(ethernet_header_t));
3036 /* Check MTU of outgoing interface. */
3037 error0 = (vlib_buffer_length_in_chain (vm, p0)
3038 > adj0[0].rewrite_header.max_l3_packet_bytes
3039 ? IP4_ERROR_MTU_EXCEEDED
3042 p0->error = error_node->errors[error0];
3044 /* Don't adjust the buffer for ttl issue; icmp-error node wants
3045 * to see the IP headerr */
3046 if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
3048 p0->current_data -= rw_len0;
3049 p0->current_length += rw_len0;
3050 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
3052 vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
3053 next0 = adj0[0].rewrite_header.next_index;
3056 (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features,
3059 p0->current_config_index =
3060 vec_elt (cm->config_index_by_sw_if_index,
3062 vnet_get_config_data (&cm->config_main,
3063 &p0->current_config_index,
3065 /* # bytes of config data */ 0);
3069 if (rewrite_for_locally_received_packets)
3070 next0 = next0 && next0_override ? next0_override : next0;
3075 n_left_to_next -= 1;
3077 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3078 to_next, n_left_to_next,
3082 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3085 /* Need to do trace after rewrites to pick up new packet data. */
3086 if (node->flags & VLIB_NODE_FLAG_TRACE)
3087 ip4_forward_next_trace (vm, node, frame, adj_rx_tx);
3089 return frame->n_vectors;
3093 /** @brief IPv4 transit rewrite node.
3094 @node ip4-rewrite-transit
3096 This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
3097 header checksum, fetch the ip adjacency, check the outbound mtu,
3098 apply the adjacency rewrite, and send pkts to the adjacency
3099 rewrite header's rewrite_next_index.
3101 @param vm vlib_main_t corresponding to the current thread
3102 @param node vlib_node_runtime_t
3103 @param frame vlib_frame_t whose contents should be dispatched
3105 @par Graph mechanics: buffer metadata, next index usage
3108 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
3109 - the rewrite adjacency index
3110 - <code>adj->lookup_next_index</code>
3111 - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
3112 the packet will be dropped.
3113 - <code>adj->rewrite_header</code>
3114 - Rewrite string length, rewrite string, next_index
3117 - <code>b->current_data, b->current_length</code>
3118 - Updated net of applying the rewrite string
3120 <em>Next Indices:</em>
3121 - <code> adj->rewrite_header.next_index </code>
3125 ip4_rewrite_transit (vlib_main_t * vm,
3126 vlib_node_runtime_t * node,
3127 vlib_frame_t * frame)
3129 return ip4_rewrite_inline (vm, node, frame,
3130 /* rewrite_for_locally_received_packets */ 0);
3133 /** @brief IPv4 local rewrite node.
3134 @node ip4-rewrite-local
3136 This is the IPv4 local rewrite node. Fetch the ip adjacency, check
3137 the outbound interface mtu, apply the adjacency rewrite, and send
3138 pkts to the adjacency rewrite header's rewrite_next_index. Deal
3139 with hemorrhoids of the form "some clown sends an icmp4 w/ src =
3140 dst = interface addr."
3142 @param vm vlib_main_t corresponding to the current thread
3143 @param node vlib_node_runtime_t
3144 @param frame vlib_frame_t whose contents should be dispatched
3146 @par Graph mechanics: buffer metadata, next index usage
3149 - <code>vnet_buffer(b)->ip.adj_index[VLIB_RX]</code>
3150 - the rewrite adjacency index
3151 - <code>adj->lookup_next_index</code>
3152 - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
3153 the packet will be dropped.
3154 - <code>adj->rewrite_header</code>
3155 - Rewrite string length, rewrite string, next_index
3158 - <code>b->current_data, b->current_length</code>
3159 - Updated net of applying the rewrite string
3161 <em>Next Indices:</em>
3162 - <code> adj->rewrite_header.next_index </code>
3167 ip4_rewrite_local (vlib_main_t * vm,
3168 vlib_node_runtime_t * node,
3169 vlib_frame_t * frame)
3171 return ip4_rewrite_inline (vm, node, frame,
3172 /* rewrite_for_locally_received_packets */ 1);
3175 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
3176 .function = ip4_rewrite_transit,
3177 .name = "ip4-rewrite-transit",
3178 .vector_size = sizeof (u32),
3180 .format_trace = format_ip4_rewrite_trace,
3184 [IP4_REWRITE_NEXT_DROP] = "error-drop",
3185 [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
3186 [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3190 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite_transit);
3192 VLIB_REGISTER_NODE (ip4_rewrite_local_node) = {
3193 .function = ip4_rewrite_local,
3194 .name = "ip4-rewrite-local",
3195 .vector_size = sizeof (u32),
3197 .sibling_of = "ip4-rewrite-transit",
3199 .format_trace = format_ip4_rewrite_trace,
3204 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_local_node, ip4_rewrite_local);
3206 static clib_error_t *
3207 add_del_interface_table (vlib_main_t * vm,
3208 unformat_input_t * input,
3209 vlib_cli_command_t * cmd)
3211 vnet_main_t * vnm = vnet_get_main();
3212 clib_error_t * error = 0;
3213 u32 sw_if_index, table_id;
3217 if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
3219 error = clib_error_return (0, "unknown interface `%U'",
3220 format_unformat_error, input);
3224 if (unformat (input, "%d", &table_id))
3228 error = clib_error_return (0, "expected table id `%U'",
3229 format_unformat_error, input);
3234 ip4_main_t * im = &ip4_main;
3235 ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_id, IP4_ROUTE_FLAG_TABLE_ID);
3239 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
3240 im->fib_index_by_sw_if_index[sw_if_index] = fib->index;
3249 * Place the indicated interface into the supplied VRF
3252 * @cliexstart{set interface ip table}
3254 * vpp# set interface ip table GigabitEthernet2/0/0 2
3256 * Interface addresses added after setting the interface IP table end up in the indicated VRF table.
3257 * Predictable but potentially counter-intuitive results occur if you provision interface addresses in multiple FIBs.
3258 * Upon RX, packets will be processed in the last IP table ID provisioned.
3259 * It might be marginally useful to evade source RPF drops to put an interface address into multiple FIBs.
3262 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
3263 .path = "set interface ip table",
3264 .function = add_del_interface_table,
3265 .short_help = "Add/delete FIB table id for interface",
3270 ip4_lookup_multicast (vlib_main_t * vm,
3271 vlib_node_runtime_t * node,
3272 vlib_frame_t * frame)
3274 ip4_main_t * im = &ip4_main;
3275 ip_lookup_main_t * lm = &im->lookup_main;
3276 vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
3277 u32 n_left_from, n_left_to_next, * from, * to_next;
3278 ip_lookup_next_t next;
3279 u32 cpu_index = os_get_cpu_number();
3281 from = vlib_frame_vector_args (frame);
3282 n_left_from = frame->n_vectors;
3283 next = node->cached_next_index;
3285 while (n_left_from > 0)
3287 vlib_get_next_frame (vm, node, next,
3288 to_next, n_left_to_next);
3290 while (n_left_from >= 4 && n_left_to_next >= 2)
3292 vlib_buffer_t * p0, * p1;
3293 u32 pi0, pi1, adj_index0, adj_index1, wrong_next;
3294 ip_lookup_next_t next0, next1;
3295 ip4_header_t * ip0, * ip1;
3296 ip_adjacency_t * adj0, * adj1;
3297 u32 fib_index0, fib_index1;
3298 u32 flow_hash_config0, flow_hash_config1;
3300 /* Prefetch next iteration. */
3302 vlib_buffer_t * p2, * p3;
3304 p2 = vlib_get_buffer (vm, from[2]);
3305 p3 = vlib_get_buffer (vm, from[3]);
3307 vlib_prefetch_buffer_header (p2, LOAD);
3308 vlib_prefetch_buffer_header (p3, LOAD);
3310 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
3311 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
3314 pi0 = to_next[0] = from[0];
3315 pi1 = to_next[1] = from[1];
3317 p0 = vlib_get_buffer (vm, pi0);
3318 p1 = vlib_get_buffer (vm, pi1);
3320 ip0 = vlib_buffer_get_current (p0);
3321 ip1 = vlib_buffer_get_current (p1);
3323 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
3324 fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
3325 fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
3326 fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
3327 fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
3328 fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
3330 adj_index0 = ip4_fib_lookup_buffer (im, fib_index0,
3331 &ip0->dst_address, p0);
3332 adj_index1 = ip4_fib_lookup_buffer (im, fib_index1,
3333 &ip1->dst_address, p1);
3335 adj0 = ip_get_adjacency (lm, adj_index0);
3336 adj1 = ip_get_adjacency (lm, adj_index1);
3338 next0 = adj0->lookup_next_index;
3339 next1 = adj1->lookup_next_index;
3342 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
3345 vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
3347 vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash
3348 (ip0, flow_hash_config0);
3350 vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash
3351 (ip1, flow_hash_config1);
3353 ASSERT (adj0->n_adj > 0);
3354 ASSERT (adj1->n_adj > 0);
3355 ASSERT (is_pow2 (adj0->n_adj));
3356 ASSERT (is_pow2 (adj1->n_adj));
3357 adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
3358 adj_index1 += (vnet_buffer (p1)->ip.flow_hash & (adj1->n_adj - 1));
3360 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
3361 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
3363 if (1) /* $$$$$$ HACK FIXME */
3364 vlib_increment_combined_counter
3365 (cm, cpu_index, adj_index0, 1,
3366 vlib_buffer_length_in_chain (vm, p0));
3367 if (1) /* $$$$$$ HACK FIXME */
3368 vlib_increment_combined_counter
3369 (cm, cpu_index, adj_index1, 1,
3370 vlib_buffer_length_in_chain (vm, p1));
3374 n_left_to_next -= 2;
3377 wrong_next = (next0 != next) + 2*(next1 != next);
3378 if (PREDICT_FALSE (wrong_next != 0))
3386 n_left_to_next += 1;
3387 vlib_set_next_frame_buffer (vm, node, next0, pi0);
3393 n_left_to_next += 1;
3394 vlib_set_next_frame_buffer (vm, node, next1, pi1);
3400 n_left_to_next += 2;
3401 vlib_set_next_frame_buffer (vm, node, next0, pi0);
3402 vlib_set_next_frame_buffer (vm, node, next1, pi1);
3406 vlib_put_next_frame (vm, node, next, n_left_to_next);
3408 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
3414 while (n_left_from > 0 && n_left_to_next > 0)
3418 u32 pi0, adj_index0;
3419 ip_lookup_next_t next0;
3420 ip_adjacency_t * adj0;
3422 u32 flow_hash_config0;
3427 p0 = vlib_get_buffer (vm, pi0);
3429 ip0 = vlib_buffer_get_current (p0);
3431 fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
3432 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
3433 fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
3434 fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
3436 adj_index0 = ip4_fib_lookup_buffer (im, fib_index0,
3437 &ip0->dst_address, p0);
3439 adj0 = ip_get_adjacency (lm, adj_index0);
3441 next0 = adj0->lookup_next_index;
3444 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
3446 vnet_buffer (p0)->ip.flow_hash =
3447 ip4_compute_flow_hash (ip0, flow_hash_config0);
3449 ASSERT (adj0->n_adj > 0);
3450 ASSERT (is_pow2 (adj0->n_adj));
3451 adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
3453 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
3455 if (1) /* $$$$$$ HACK FIXME */
3456 vlib_increment_combined_counter
3457 (cm, cpu_index, adj_index0, 1,
3458 vlib_buffer_length_in_chain (vm, p0));
3462 n_left_to_next -= 1;
3465 if (PREDICT_FALSE (next0 != next))
3467 n_left_to_next += 1;
3468 vlib_put_next_frame (vm, node, next, n_left_to_next);
3470 vlib_get_next_frame (vm, node, next,
3471 to_next, n_left_to_next);
3474 n_left_to_next -= 1;
3478 vlib_put_next_frame (vm, node, next, n_left_to_next);
3481 if (node->flags & VLIB_NODE_FLAG_TRACE)
3482 ip4_forward_next_trace(vm, node, frame, VLIB_TX);
3484 return frame->n_vectors;
3487 VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = {
3488 .function = ip4_lookup_multicast,
3489 .name = "ip4-lookup-multicast",
3490 .vector_size = sizeof (u32),
3491 .sibling_of = "ip4-lookup",
3492 .format_trace = format_ip4_lookup_trace,
3497 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_multicast_node, ip4_lookup_multicast);
3499 VLIB_REGISTER_NODE (ip4_multicast_node,static) = {
3500 .function = ip4_drop,
3501 .name = "ip4-multicast",
3502 .vector_size = sizeof (u32),
3504 .format_trace = format_ip4_forward_next_trace,
3512 int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0)
3514 ip4_main_t * im = &ip4_main;
3515 ip4_fib_mtrie_t * mtrie0;
3516 ip4_fib_mtrie_leaf_t leaf0;
3519 mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
3521 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
3522 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
3523 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
3524 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
3525 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
3527 /* Handle default route. */
3528 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
3530 adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
3532 return adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
3534 /* no_default_route */ 0);
3537 static clib_error_t *
3538 test_lookup_command_fn (vlib_main_t * vm,
3539 unformat_input_t * input,
3540 vlib_cli_command_t * cmd)
3546 ip4_address_t ip4_base_address;
3549 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3550 if (unformat (input, "table %d", &table_id))
3552 else if (unformat (input, "count %f", &count))
3555 else if (unformat (input, "%U",
3556 unformat_ip4_address, &ip4_base_address))
3559 return clib_error_return (0, "unknown input `%U'",
3560 format_unformat_error, input);
3565 for (i = 0; i < n; i++)
3567 if (!ip4_lookup_validate (&ip4_base_address, table_id))
3570 ip4_base_address.as_u32 =
3571 clib_host_to_net_u32 (1 +
3572 clib_net_to_host_u32 (ip4_base_address.as_u32));
3576 vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
3578 vlib_cli_output (vm, "No errors in %d lookups\n", n);
3583 VLIB_CLI_COMMAND (lookup_test_command, static) = {
3584 .path = "test lookup",
3585 .short_help = "test lookup",
3586 .function = test_lookup_command_fn,
3589 int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3591 ip4_main_t * im4 = &ip4_main;
3593 uword * p = hash_get (im4->fib_index_by_table_id, table_id);
3596 return VNET_API_ERROR_NO_SUCH_FIB;
3598 fib = vec_elt_at_index (im4->fibs, p[0]);
3600 fib->flow_hash_config = flow_hash_config;
3604 static clib_error_t *
3605 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3606 unformat_input_t * input,
3607 vlib_cli_command_t * cmd)
3611 u32 flow_hash_config = 0;
3614 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3615 if (unformat (input, "table %d", &table_id))
3618 else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3619 foreach_flow_hash_bit
3625 return clib_error_return (0, "unknown input `%U'",
3626 format_unformat_error, input);
3628 rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3634 case VNET_API_ERROR_NO_SUCH_FIB:
3635 return clib_error_return (0, "no such FIB table %d", table_id);
3638 clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3645 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
3646 .path = "set ip flow-hash",
3648 "set ip table flow-hash table <fib-id> src dst sport dport proto reverse",
3649 .function = set_ip_flow_hash_command_fn,
3652 int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3655 vnet_main_t * vnm = vnet_get_main();
3656 vnet_interface_main_t * im = &vnm->interface_main;
3657 ip4_main_t * ipm = &ip4_main;
3658 ip_lookup_main_t * lm = &ipm->lookup_main;
3659 vnet_classify_main_t * cm = &vnet_classify_main;
3661 if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3662 return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3664 if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3665 return VNET_API_ERROR_NO_SUCH_ENTRY;
3667 vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3668 lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
3673 static clib_error_t *
3674 set_ip_classify_command_fn (vlib_main_t * vm,
3675 unformat_input_t * input,
3676 vlib_cli_command_t * cmd)
3678 u32 table_index = ~0;
3679 int table_index_set = 0;
3680 u32 sw_if_index = ~0;
3683 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3684 if (unformat (input, "table-index %d", &table_index))
3685 table_index_set = 1;
3686 else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3687 vnet_get_main(), &sw_if_index))
3693 if (table_index_set == 0)
3694 return clib_error_return (0, "classify table-index must be specified");
3696 if (sw_if_index == ~0)
3697 return clib_error_return (0, "interface / subif must be specified");
3699 rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3706 case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3707 return clib_error_return (0, "No such interface");
3709 case VNET_API_ERROR_NO_SUCH_ENTRY:
3710 return clib_error_return (0, "No such classifier table");
3715 VLIB_CLI_COMMAND (set_ip_classify_command, static) = {
3716 .path = "set ip classify",
3718 "set ip classify intfc <int> table-index <index>",
3719 .function = set_ip_classify_command_fn,
3726 static clib_error_t *
3727 set_interface_output_feature_command_fn (vlib_main_t * vm,
3728 unformat_input_t * input,
3729 vlib_cli_command_t * cmd)
3731 vnet_main_t * vnm = vnet_get_main();
3732 u32 sw_if_index = ~0;
3734 ip4_main_t * im = &ip4_main;
3735 ip_lookup_main_t * lm = &im->lookup_main;
3737 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3739 if (unformat (input, "%U", unformat_vnet_sw_interface, vnm, &sw_if_index))
3741 else if (unformat (input, "del"))
3747 if (sw_if_index == ~0)
3748 return clib_error_return (0, "unknown interface `%U'",
3749 format_unformat_error, input);
3751 lm->tx_sw_if_has_ip_output_features =
3752 clib_bitmap_set (lm->tx_sw_if_has_ip_output_features, sw_if_index, is_add);
3757 VLIB_CLI_COMMAND (set_interface_output_feature, static) = {
3758 .path = "set interface output feature",
3759 .function = set_interface_output_feature_command_fn,
3760 .short_help = "set interface output feature <intfc>",
3762 #endif /* TEST_CODE */