2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * ip/ip4_forward.c: IP v4 forwarding
18 * Copyright (c) 2008 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 /** for ethernet_header_t */
43 #include <vnet/ethernet/ethernet.h>
44 /** for ethernet_arp_header_t */
45 #include <vnet/ethernet/arp_packet.h>
46 #include <vnet/ppp/ppp.h>
47 /** for srp_hw_interface_class */
48 #include <vnet/srp/srp.h>
49 /** for API error numbers */
50 #include <vnet/api_errno.h>
56 /* This is really, really simple but stupid fib. */
58 ip4_fib_lookup_with_table (ip4_main_t * im, u32 fib_index,
60 u32 disable_default_route)
62 ip_lookup_main_t * lm = &im->lookup_main;
63 ip4_fib_t * fib = vec_elt_at_index (im->fibs, fib_index);
64 uword * p, * hash, key;
65 i32 i, i_min, dst_address, ai;
67 i_min = disable_default_route ? 1 : 0;
68 dst_address = clib_mem_unaligned (&dst->data_u32, u32);
69 for (i = ARRAY_LEN (fib->adj_index_by_dst_address) - 1; i >= i_min; i--)
71 hash = fib->adj_index_by_dst_address[i];
75 key = dst_address & im->fib_masks[i];
76 if ((p = hash_get (hash, key)) != 0)
83 /* Nothing matches in table. */
84 ai = lm->miss_adj_index;
90 /** @brief Create FIB from table ID and init all hashing.
91 @param im - @ref ip4_main_t
92 @param table_id - table ID
93 @return fib - @ref ip4_fib_t
96 create_fib_with_table_id (ip4_main_t * im, u32 table_id)
99 hash_set (im->fib_index_by_table_id, table_id, vec_len (im->fibs));
100 vec_add2 (im->fibs, fib, 1);
101 fib->table_id = table_id;
102 fib->index = fib - im->fibs;
103 /* IP_FLOW_HASH_DEFAULT is net value of 5 tuple flags without "reverse" bit */
104 fib->flow_hash_config = IP_FLOW_HASH_DEFAULT;
105 fib->fwd_classify_table_index = ~0;
106 fib->rev_classify_table_index = ~0;
107 ip4_mtrie_init (&fib->mtrie);
111 /** @brief Find existing or Create new FIB based on index
112 @param im @ref ip4_main_t
113 @param table_index_or_id - overloaded parameter referring
114 to the table or a table's index in the FIB vector
115 @param flags - used to check if table_index_or_id was a table or
116 an index (detected by @ref IP4_ROUTE_FLAG_FIB_INDEX)
117 @return either the existing or a new ip4_fib_t entry
120 find_ip4_fib_by_table_index_or_id (ip4_main_t * im,
121 u32 table_index_or_id, u32 flags)
123 uword * p, fib_index;
125 fib_index = table_index_or_id;
126 /* If this isn't a FIB_INDEX ... */
127 if (! (flags & IP4_ROUTE_FLAG_FIB_INDEX))
129 /* If passed ~0 then request the next table available */
130 if (table_index_or_id == ~0) {
131 table_index_or_id = 0;
132 while ((p = hash_get (im->fib_index_by_table_id, table_index_or_id))) {
135 /* Create the next table and return the ip4_fib_t associated with it */
136 return create_fib_with_table_id (im, table_index_or_id);
138 /* A specific table_id was requested.. */
139 p = hash_get (im->fib_index_by_table_id, table_index_or_id);
140 /* ... and if it doesn't exist create it else grab its index */
142 return create_fib_with_table_id (im, table_index_or_id);
145 /* Return the ip4_fib_t associated with this index */
146 return vec_elt_at_index (im->fibs, fib_index);
150 ip4_fib_init_adj_index_by_dst_address (ip_lookup_main_t * lm,
157 ASSERT (lm->fib_result_n_bytes >= sizeof (uword));
158 lm->fib_result_n_words = round_pow2 (lm->fib_result_n_bytes, sizeof (uword)) / sizeof (uword);
160 fib->adj_index_by_dst_address[address_length] =
161 hash_create (32 /* elts */, lm->fib_result_n_words * sizeof (uword));
163 hash_set_flags (fib->adj_index_by_dst_address[address_length],
164 HASH_FLAG_NO_AUTO_SHRINK);
166 h = hash_header (fib->adj_index_by_dst_address[address_length]);
167 max_index = (hash_value_bytes (h) / sizeof (fib->new_hash_values[0])) - 1;
169 /* Initialize new/old hash value vectors. */
170 vec_validate_init_empty (fib->new_hash_values, max_index, ~0);
171 vec_validate_init_empty (fib->old_hash_values, max_index, ~0);
175 ip4_fib_set_adj_index (ip4_main_t * im,
179 u32 dst_address_length,
182 ip_lookup_main_t * lm = &im->lookup_main;
185 if (vec_bytes(fib->old_hash_values))
186 memset (fib->old_hash_values, ~0, vec_bytes (fib->old_hash_values));
187 if (vec_bytes(fib->new_hash_values))
188 memset (fib->new_hash_values, ~0, vec_bytes (fib->new_hash_values));
189 fib->new_hash_values[0] = adj_index;
191 /* Make sure adj index is valid. */
193 (void) ip_get_adjacency (lm, adj_index);
195 hash = fib->adj_index_by_dst_address[dst_address_length];
197 hash = _hash_set3 (hash, dst_address_u32,
198 fib->new_hash_values,
199 fib->old_hash_values);
201 fib->adj_index_by_dst_address[dst_address_length] = hash;
203 if (vec_len (im->add_del_route_callbacks) > 0)
205 ip4_add_del_route_callback_t * cb;
209 d.data_u32 = dst_address_u32;
210 vec_foreach (cb, im->add_del_route_callbacks)
211 if ((flags & cb->required_flags) == cb->required_flags)
212 cb->function (im, cb->function_opaque,
214 &d, dst_address_length,
215 fib->old_hash_values,
216 fib->new_hash_values);
218 p = hash_get (hash, dst_address_u32);
219 /* hash_get should never return NULL here */
221 clib_memcpy (p, fib->new_hash_values,
222 vec_bytes (fib->new_hash_values));
228 void ip4_add_del_route (ip4_main_t * im, ip4_add_del_route_args_t * a)
230 ip_lookup_main_t * lm = &im->lookup_main;
232 u32 dst_address, dst_address_length, adj_index, old_adj_index;
233 uword * hash, is_del;
234 ip4_add_del_route_callback_t * cb;
236 /* Either create new adjacency or use given one depending on arguments. */
237 if (a->n_add_adj > 0)
239 ip_add_adjacency (lm, a->add_adj, a->n_add_adj, &adj_index);
240 ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 0);
243 adj_index = a->adj_index;
245 dst_address = a->dst_address.data_u32;
246 dst_address_length = a->dst_address_length;
247 fib = find_ip4_fib_by_table_index_or_id (im, a->table_index_or_table_id, a->flags);
249 ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
250 dst_address &= im->fib_masks[dst_address_length];
252 if (! fib->adj_index_by_dst_address[dst_address_length])
253 ip4_fib_init_adj_index_by_dst_address (lm, fib, dst_address_length);
255 hash = fib->adj_index_by_dst_address[dst_address_length];
257 is_del = (a->flags & IP4_ROUTE_FLAG_DEL) != 0;
261 fib->old_hash_values[0] = ~0;
262 hash = _hash_unset (hash, dst_address, fib->old_hash_values);
263 fib->adj_index_by_dst_address[dst_address_length] = hash;
265 if (vec_len (im->add_del_route_callbacks) > 0
266 && fib->old_hash_values[0] != ~0) /* make sure destination was found in hash */
268 fib->new_hash_values[0] = ~0;
269 vec_foreach (cb, im->add_del_route_callbacks)
270 if ((a->flags & cb->required_flags) == cb->required_flags)
271 cb->function (im, cb->function_opaque,
273 &a->dst_address, dst_address_length,
274 fib->old_hash_values,
275 fib->new_hash_values);
279 ip4_fib_set_adj_index (im, fib, a->flags, dst_address, dst_address_length,
282 old_adj_index = fib->old_hash_values[0];
284 /* Avoid spurious reference count increments */
285 if (old_adj_index == adj_index
287 && !(a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY))
289 ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index);
290 if (adj->share_count > 0)
294 ip4_fib_mtrie_add_del_route (fib, a->dst_address, dst_address_length,
295 is_del ? old_adj_index : adj_index,
298 /* Delete old adjacency index if present and changed. */
299 if (! (a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY)
300 && old_adj_index != ~0
301 && old_adj_index != adj_index)
302 ip_del_adjacency (lm, old_adj_index);
307 ip4_route_get_next_hop_adj (ip4_main_t * im,
309 ip4_address_t *next_hop,
310 u32 next_hop_sw_if_index,
311 u32 explicit_fib_index)
313 ip_lookup_main_t * lm = &im->lookup_main;
314 vnet_main_t * vnm = vnet_get_main();
315 uword * nh_hash, * nh_result;
316 int is_interface_next_hop;
320 fib = vec_elt_at_index (im->fibs, fib_index);
322 is_interface_next_hop = next_hop->data_u32 == 0;
323 if (is_interface_next_hop)
325 nh_result = hash_get (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index);
327 nh_adj_index = *nh_result;
330 ip_adjacency_t * adj;
331 adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
333 ip4_adjacency_set_interface_route (vnm, adj, next_hop_sw_if_index, /* if_address_index */ ~0);
334 ip_call_add_del_adjacency_callbacks (lm, nh_adj_index, /* is_del */ 0);
335 hash_set (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index, nh_adj_index);
338 else if (next_hop_sw_if_index == ~0)
340 /* next-hop is recursive. we always need a indirect adj
341 * for recursive paths. Any LPM we perform now will give
342 * us a valid adj, but without tracking the next-hop we
343 * have no way to keep it valid.
345 ip_adjacency_t add_adj;
346 memset (&add_adj, 0, sizeof(add_adj));
348 add_adj.lookup_next_index = IP_LOOKUP_NEXT_INDIRECT;
349 add_adj.indirect.next_hop.ip4.as_u32 = next_hop->as_u32;
350 add_adj.explicit_fib_index = explicit_fib_index;
351 ip_add_adjacency (lm, &add_adj, 1, &nh_adj_index);
355 nh_hash = fib->adj_index_by_dst_address[32];
356 nh_result = hash_get (nh_hash, next_hop->data_u32);
358 /* Next hop must be known. */
361 ip_adjacency_t * adj;
363 /* no /32 exists, get the longest prefix match */
364 nh_adj_index = ip4_fib_lookup_with_table (im, fib_index,
366 adj = ip_get_adjacency (lm, nh_adj_index);
367 /* if ARP interface adjacency is present, we need to
368 install ARP adjaceny for specific next hop */
369 if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP &&
370 adj->arp.next_hop.ip4.as_u32 == 0)
372 nh_adj_index = vnet_arp_glean_add(fib_index, next_hop);
377 nh_adj_index = *nh_result;
381 return (nh_adj_index);
385 ip4_add_del_route_next_hop (ip4_main_t * im,
387 ip4_address_t * dst_address,
388 u32 dst_address_length,
389 ip4_address_t * next_hop,
390 u32 next_hop_sw_if_index,
391 u32 next_hop_weight, u32 adj_index,
392 u32 explicit_fib_index)
394 vnet_main_t * vnm = vnet_get_main();
395 ip_lookup_main_t * lm = &im->lookup_main;
398 u32 dst_address_u32, old_mp_adj_index, new_mp_adj_index;
399 u32 dst_adj_index, nh_adj_index;
400 uword * dst_hash, * dst_result;
401 ip_adjacency_t * dst_adj;
402 ip_multipath_adjacency_t * old_mp, * new_mp;
403 int is_del = (flags & IP4_ROUTE_FLAG_DEL) != 0;
404 clib_error_t * error = 0;
406 if (explicit_fib_index == (u32)~0)
407 fib_index = vec_elt (im->fib_index_by_sw_if_index, next_hop_sw_if_index);
409 fib_index = explicit_fib_index;
411 fib = vec_elt_at_index (im->fibs, fib_index);
413 /* Lookup next hop to be added or deleted. */
414 if (adj_index == (u32)~0)
416 nh_adj_index = ip4_route_get_next_hop_adj(im, fib_index,
418 next_hop_sw_if_index,
423 nh_adj_index = adj_index;
425 ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
426 dst_address_u32 = dst_address->data_u32 & im->fib_masks[dst_address_length];
428 dst_hash = fib->adj_index_by_dst_address[dst_address_length];
429 dst_result = hash_get (dst_hash, dst_address_u32);
432 dst_adj_index = dst_result[0];
433 dst_adj = ip_get_adjacency (lm, dst_adj_index);
437 /* For deletes destination must be known. */
440 vnm->api_errno = VNET_API_ERROR_UNKNOWN_DESTINATION;
441 error = clib_error_return (0, "unknown destination %U/%d",
442 format_ip4_address, dst_address,
451 /* Ignore adds of X/32 with next hop of X. */
453 && dst_address_length == 32
454 && dst_address->data_u32 == next_hop->data_u32
455 && adj_index != (u32)~0)
457 vnm->api_errno = VNET_API_ERROR_PREFIX_MATCHES_NEXT_HOP;
458 error = clib_error_return (0, "prefix matches next hop %U/%d",
459 format_ip4_address, dst_address,
464 /* Destination is not known and default weight is set so add route
465 to existing non-multipath adjacency */
466 if (dst_adj_index == ~0 && next_hop_weight == 1 && next_hop_sw_if_index == ~0)
468 /* create / delete additional mapping of existing adjacency */
469 ip4_add_del_route_args_t a;
471 a.table_index_or_table_id = fib_index;
472 a.flags = ((is_del ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
473 | IP4_ROUTE_FLAG_FIB_INDEX
474 | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
475 | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE
476 | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
477 a.dst_address = dst_address[0];
478 a.dst_address_length = dst_address_length;
479 a.adj_index = nh_adj_index;
483 ip4_add_del_route (im, &a);
487 old_mp_adj_index = dst_adj ? dst_adj->heap_handle : ~0;
489 if (! ip_multipath_adjacency_add_del_next_hop
496 vnm->api_errno = VNET_API_ERROR_NEXT_HOP_NOT_FOUND_MP;
497 error = clib_error_return (0, "requested deleting next-hop %U not found in multi-path",
498 format_ip4_address, next_hop);
503 if (old_mp_adj_index != ~0)
504 old_mp = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index);
505 if (new_mp_adj_index != ~0)
506 new_mp = vec_elt_at_index (lm->multipath_adjacencies, new_mp_adj_index);
508 if (old_mp != new_mp)
510 ip4_add_del_route_args_t a;
511 ip_adjacency_t * adj;
513 a.table_index_or_table_id = fib_index;
514 a.flags = ((is_del && ! new_mp ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
515 | IP4_ROUTE_FLAG_FIB_INDEX
516 | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
517 | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
518 a.dst_address = dst_address[0];
519 a.dst_address_length = dst_address_length;
520 a.adj_index = new_mp ? new_mp->adj_index : dst_adj_index;
524 ip4_add_del_route (im, &a);
526 adj = ip_get_adjacency (lm, new_mp ? new_mp->adj_index : dst_adj_index);
528 adj->share_count += is_del ? -1 : 1;
533 clib_error_report (error);
537 ip4_get_route (ip4_main_t * im,
538 u32 table_index_or_table_id,
543 ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
544 u32 dst_address = * (u32 *) address;
547 ASSERT (address_length < ARRAY_LEN (im->fib_masks));
548 dst_address &= im->fib_masks[address_length];
550 hash = fib->adj_index_by_dst_address[address_length];
551 p = hash_get (hash, dst_address);
556 ip4_foreach_matching_route (ip4_main_t * im,
557 u32 table_index_or_table_id,
559 ip4_address_t * address,
561 ip4_address_t ** results,
562 u8 ** result_lengths)
564 ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
565 u32 dst_address = address->data_u32;
566 u32 this_length = address_length;
569 _vec_len (*results) = 0;
571 _vec_len (*result_lengths) = 0;
573 while (this_length <= 32 && vec_len (results) == 0)
576 hash_foreach (k, v, fib->adj_index_by_dst_address[this_length], ({
577 if (0 == ((k ^ dst_address) & im->fib_masks[address_length]))
581 vec_add1 (*results, a);
582 vec_add1 (*result_lengths, this_length);
590 void ip4_maybe_remap_adjacencies (ip4_main_t * im,
591 u32 table_index_or_table_id,
594 ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
595 ip_lookup_main_t * lm = &im->lookup_main;
598 ip4_add_del_route_callback_t * cb;
599 static ip4_address_t * to_delete;
601 if (lm->n_adjacency_remaps == 0)
604 for (l = 0; l <= 32; l++)
607 uword * hash = fib->adj_index_by_dst_address[l];
609 if (hash_elts (hash) == 0)
613 _vec_len (to_delete) = 0;
615 hash_foreach_pair (p, hash, ({
616 u32 adj_index = p->value[0];
617 u32 m = vec_elt (lm->adjacency_remap_table, adj_index);
621 /* Record destination address from hash key. */
624 /* New adjacency points to nothing: so delete prefix. */
626 vec_add1 (to_delete, a);
629 /* Remap to new adjacency. */
630 clib_memcpy (fib->old_hash_values, p->value, vec_bytes (fib->old_hash_values));
632 /* Set new adjacency value. */
633 fib->new_hash_values[0] = p->value[0] = m - 1;
635 vec_foreach (cb, im->add_del_route_callbacks)
636 if ((flags & cb->required_flags) == cb->required_flags)
637 cb->function (im, cb->function_opaque,
638 fib, flags | IP4_ROUTE_FLAG_ADD,
640 fib->old_hash_values,
641 fib->new_hash_values);
646 fib->new_hash_values[0] = ~0;
647 for (i = 0; i < vec_len (to_delete); i++)
649 hash = _hash_unset (hash, to_delete[i].data_u32, fib->old_hash_values);
650 vec_foreach (cb, im->add_del_route_callbacks)
651 if ((flags & cb->required_flags) == cb->required_flags)
652 cb->function (im, cb->function_opaque,
653 fib, flags | IP4_ROUTE_FLAG_DEL,
655 fib->old_hash_values,
656 fib->new_hash_values);
660 /* Also remap adjacencies in mtrie. */
661 ip4_mtrie_maybe_remap_adjacencies (lm, &fib->mtrie);
663 /* Reset mapping table. */
664 vec_zero (lm->adjacency_remap_table);
666 /* All remaps have been performed. */
667 lm->n_adjacency_remaps = 0;
670 void ip4_delete_matching_routes (ip4_main_t * im,
671 u32 table_index_or_table_id,
673 ip4_address_t * address,
676 static ip4_address_t * matching_addresses;
677 static u8 * matching_address_lengths;
679 ip4_add_del_route_args_t a;
681 a.flags = IP4_ROUTE_FLAG_DEL | IP4_ROUTE_FLAG_NO_REDISTRIBUTE | flags;
682 a.table_index_or_table_id = table_index_or_table_id;
687 for (l = address_length + 1; l <= 32; l++)
689 ip4_foreach_matching_route (im, table_index_or_table_id, flags,
693 &matching_address_lengths);
694 for (i = 0; i < vec_len (matching_addresses); i++)
696 a.dst_address = matching_addresses[i];
697 a.dst_address_length = matching_address_lengths[i];
698 ip4_add_del_route (im, &a);
702 ip4_maybe_remap_adjacencies (im, table_index_or_table_id, flags);
706 ip4_forward_next_trace (vlib_main_t * vm,
707 vlib_node_runtime_t * node,
708 vlib_frame_t * frame,
709 vlib_rx_or_tx_t which_adj_index);
712 ip4_lookup_inline (vlib_main_t * vm,
713 vlib_node_runtime_t * node,
714 vlib_frame_t * frame,
715 int lookup_for_responses_to_locally_received_packets,
718 ip4_main_t * im = &ip4_main;
719 ip_lookup_main_t * lm = &im->lookup_main;
720 vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
721 u32 n_left_from, n_left_to_next, * from, * to_next;
722 ip_lookup_next_t next;
723 u32 cpu_index = os_get_cpu_number();
725 from = vlib_frame_vector_args (frame);
726 n_left_from = frame->n_vectors;
727 next = node->cached_next_index;
729 while (n_left_from > 0)
731 vlib_get_next_frame (vm, node, next,
732 to_next, n_left_to_next);
734 while (n_left_from >= 4 && n_left_to_next >= 2)
736 vlib_buffer_t * p0, * p1;
737 ip4_header_t * ip0, * ip1;
738 __attribute__((unused)) tcp_header_t * tcp0, * tcp1;
739 ip_lookup_next_t next0, next1;
740 ip_adjacency_t * adj0, * adj1;
741 ip4_fib_mtrie_t * mtrie0, * mtrie1;
742 ip4_fib_mtrie_leaf_t leaf0, leaf1;
743 ip4_address_t * dst_addr0, *dst_addr1;
744 __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
745 __attribute__((unused)) u32 pi1, fib_index1, adj_index1, is_tcp_udp1;
746 u32 flow_hash_config0, flow_hash_config1;
747 u32 hash_c0, hash_c1;
750 /* Prefetch next iteration. */
752 vlib_buffer_t * p2, * p3;
754 p2 = vlib_get_buffer (vm, from[2]);
755 p3 = vlib_get_buffer (vm, from[3]);
757 vlib_prefetch_buffer_header (p2, LOAD);
758 vlib_prefetch_buffer_header (p3, LOAD);
760 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
761 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
764 pi0 = to_next[0] = from[0];
765 pi1 = to_next[1] = from[1];
767 p0 = vlib_get_buffer (vm, pi0);
768 p1 = vlib_get_buffer (vm, pi1);
770 ip0 = vlib_buffer_get_current (p0);
771 ip1 = vlib_buffer_get_current (p1);
775 ip_adjacency_t * iadj0, * iadj1;
776 iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
777 iadj1 = ip_get_adjacency (lm, vnet_buffer(p1)->ip.adj_index[VLIB_TX]);
778 dst_addr0 = &iadj0->indirect.next_hop.ip4;
779 dst_addr1 = &iadj1->indirect.next_hop.ip4;
783 dst_addr0 = &ip0->dst_address;
784 dst_addr1 = &ip1->dst_address;
787 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
788 fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
789 fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
790 fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
791 fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
792 fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
795 if (! lookup_for_responses_to_locally_received_packets)
797 mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
798 mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
800 leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
802 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
803 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
806 tcp0 = (void *) (ip0 + 1);
807 tcp1 = (void *) (ip1 + 1);
809 is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
810 || ip0->protocol == IP_PROTOCOL_UDP);
811 is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
812 || ip1->protocol == IP_PROTOCOL_UDP);
814 if (! lookup_for_responses_to_locally_received_packets)
816 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
817 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
820 if (! lookup_for_responses_to_locally_received_packets)
822 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
823 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
826 if (! lookup_for_responses_to_locally_received_packets)
828 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
829 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
832 if (lookup_for_responses_to_locally_received_packets)
834 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
835 adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
839 /* Handle default route. */
840 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
841 leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
843 adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
844 adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
847 ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
849 /* no_default_route */ 0));
850 ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
852 /* no_default_route */ 0));
853 adj0 = ip_get_adjacency (lm, adj_index0);
854 adj1 = ip_get_adjacency (lm, adj_index1);
856 next0 = adj0->lookup_next_index;
857 next1 = adj1->lookup_next_index;
859 /* Use flow hash to compute multipath adjacency. */
860 hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
861 hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
862 if (PREDICT_FALSE (adj0->n_adj > 1))
865 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
866 hash_c0 = vnet_buffer (p0)->ip.flow_hash =
867 ip4_compute_flow_hash (ip0, flow_hash_config0);
869 if (PREDICT_FALSE(adj1->n_adj > 1))
872 vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
873 hash_c1 = vnet_buffer (p1)->ip.flow_hash =
874 ip4_compute_flow_hash (ip1, flow_hash_config1);
877 ASSERT (adj0->n_adj > 0);
878 ASSERT (adj1->n_adj > 0);
879 ASSERT (is_pow2 (adj0->n_adj));
880 ASSERT (is_pow2 (adj1->n_adj));
881 adj_index0 += (hash_c0 & (adj0->n_adj - 1));
882 adj_index1 += (hash_c1 & (adj1->n_adj - 1));
884 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
885 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
887 vlib_increment_combined_counter
888 (cm, cpu_index, adj_index0, 1,
889 vlib_buffer_length_in_chain (vm, p0)
890 + sizeof(ethernet_header_t));
891 vlib_increment_combined_counter
892 (cm, cpu_index, adj_index1, 1,
893 vlib_buffer_length_in_chain (vm, p1)
894 + sizeof(ethernet_header_t));
901 wrong_next = (next0 != next) + 2*(next1 != next);
902 if (PREDICT_FALSE (wrong_next != 0))
911 vlib_set_next_frame_buffer (vm, node, next0, pi0);
918 vlib_set_next_frame_buffer (vm, node, next1, pi1);
925 vlib_set_next_frame_buffer (vm, node, next0, pi0);
926 vlib_set_next_frame_buffer (vm, node, next1, pi1);
930 vlib_put_next_frame (vm, node, next, n_left_to_next);
932 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
938 while (n_left_from > 0 && n_left_to_next > 0)
942 __attribute__((unused)) tcp_header_t * tcp0;
943 ip_lookup_next_t next0;
944 ip_adjacency_t * adj0;
945 ip4_fib_mtrie_t * mtrie0;
946 ip4_fib_mtrie_leaf_t leaf0;
947 ip4_address_t * dst_addr0;
948 __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
949 u32 flow_hash_config0, hash_c0;
954 p0 = vlib_get_buffer (vm, pi0);
956 ip0 = vlib_buffer_get_current (p0);
960 ip_adjacency_t * iadj0;
961 iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
962 dst_addr0 = &iadj0->indirect.next_hop.ip4;
966 dst_addr0 = &ip0->dst_address;
969 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
970 fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
971 fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
973 if (! lookup_for_responses_to_locally_received_packets)
975 mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
977 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
979 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
982 tcp0 = (void *) (ip0 + 1);
984 is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
985 || ip0->protocol == IP_PROTOCOL_UDP);
987 if (! lookup_for_responses_to_locally_received_packets)
988 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
990 if (! lookup_for_responses_to_locally_received_packets)
991 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
993 if (! lookup_for_responses_to_locally_received_packets)
994 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
996 if (lookup_for_responses_to_locally_received_packets)
997 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
1000 /* Handle default route. */
1001 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1002 adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1005 ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
1007 /* no_default_route */ 0));
1009 adj0 = ip_get_adjacency (lm, adj_index0);
1011 next0 = adj0->lookup_next_index;
1013 /* Use flow hash to compute multipath adjacency. */
1014 hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
1015 if (PREDICT_FALSE(adj0->n_adj > 1))
1018 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
1020 hash_c0 = vnet_buffer (p0)->ip.flow_hash =
1021 ip4_compute_flow_hash (ip0, flow_hash_config0);
1024 ASSERT (adj0->n_adj > 0);
1025 ASSERT (is_pow2 (adj0->n_adj));
1026 adj_index0 += (hash_c0 & (adj0->n_adj - 1));
1028 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
1030 vlib_increment_combined_counter
1031 (cm, cpu_index, adj_index0, 1,
1032 vlib_buffer_length_in_chain (vm, p0)
1033 + sizeof(ethernet_header_t));
1037 n_left_to_next -= 1;
1040 if (PREDICT_FALSE (next0 != next))
1042 n_left_to_next += 1;
1043 vlib_put_next_frame (vm, node, next, n_left_to_next);
1045 vlib_get_next_frame (vm, node, next,
1046 to_next, n_left_to_next);
1049 n_left_to_next -= 1;
1053 vlib_put_next_frame (vm, node, next, n_left_to_next);
1056 if (node->flags & VLIB_NODE_FLAG_TRACE)
1057 ip4_forward_next_trace(vm, node, frame, VLIB_TX);
1059 return frame->n_vectors;
1062 /** @brief IPv4 lookup node.
1065 This is the main IPv4 lookup dispatch node.
1067 @param vm vlib_main_t corresponding to the current thread
1068 @param node vlib_node_runtime_t
1069 @param frame vlib_frame_t whose contents should be dispatched
1071 @par Graph mechanics: buffer metadata, next index usage
1074 - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
1075 - Indicates the @c sw_if_index value of the interface that the
1076 packet was received on.
1077 - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
1078 - When the value is @c ~0 then the node performs a longest prefix
1079 match (LPM) for the packet destination address in the FIB attached
1080 to the receive interface.
1081 - Otherwise perform LPM for the packet destination address in the
1082 indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
1083 value (0, 1, ...) and not a VRF id.
1086 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
1087 - The lookup result adjacency index.
1089 <em>Next Index:</em>
1090 - Dispatches the packet to the node index found in
1091 ip_adjacency_t @c adj->lookup_next_index
1092 (where @c adj is the lookup result adjacency).
1095 ip4_lookup (vlib_main_t * vm,
1096 vlib_node_runtime_t * node,
1097 vlib_frame_t * frame)
1099 return ip4_lookup_inline (vm, node, frame,
1100 /* lookup_for_responses_to_locally_received_packets */ 0,
1101 /* is_indirect */ 0);
1105 void ip4_adjacency_set_interface_route (vnet_main_t * vnm,
1106 ip_adjacency_t * adj,
1108 u32 if_address_index)
1110 vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
1112 vnet_l3_packet_type_t packet_type;
1115 if (hw->hw_class_index == ethernet_hw_interface_class.index
1116 || hw->hw_class_index == srp_hw_interface_class.index)
1119 * We have a bit of a problem in this case. ip4-arp uses
1120 * the rewrite_header.next_index to hand pkts to the
1121 * indicated inteface output node. We can end up in
1122 * ip4_rewrite_local, too, which also pays attention to
1123 * rewrite_header.next index. Net result: a hack in
1124 * ip4_rewrite_local...
1126 n = IP_LOOKUP_NEXT_ARP;
1127 node_index = ip4_arp_node.index;
1128 adj->if_address_index = if_address_index;
1129 adj->arp.next_hop.ip4.as_u32 = 0;
1130 ip46_address_reset(&adj->arp.next_hop);
1131 packet_type = VNET_L3_PACKET_TYPE_ARP;
1135 n = IP_LOOKUP_NEXT_REWRITE;
1136 node_index = ip4_rewrite_node.index;
1137 packet_type = VNET_L3_PACKET_TYPE_IP4;
1140 adj->lookup_next_index = n;
1141 vnet_rewrite_for_sw_interface
1146 VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST,
1147 &adj->rewrite_header,
1148 sizeof (adj->rewrite_data));
1152 ip4_add_interface_routes (u32 sw_if_index,
1153 ip4_main_t * im, u32 fib_index,
1154 ip_interface_address_t * a)
1156 vnet_main_t * vnm = vnet_get_main();
1157 ip_lookup_main_t * lm = &im->lookup_main;
1158 ip_adjacency_t * adj;
1159 ip4_address_t * address = ip_interface_address_get_address (lm, a);
1160 ip4_add_del_route_args_t x;
1161 vnet_hw_interface_t * hw_if = vnet_get_sup_hw_interface (vnm, sw_if_index);
1162 u32 classify_table_index;
1164 /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1165 x.table_index_or_table_id = fib_index;
1166 x.flags = (IP4_ROUTE_FLAG_ADD
1167 | IP4_ROUTE_FLAG_FIB_INDEX
1168 | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1169 x.dst_address = address[0];
1170 x.dst_address_length = a->address_length;
1174 a->neighbor_probe_adj_index = ~0;
1175 if (a->address_length < 32)
1177 adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1179 ip4_adjacency_set_interface_route (vnm, adj, sw_if_index, a - lm->if_address_pool);
1180 ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1181 ip4_add_del_route (im, &x);
1182 a->neighbor_probe_adj_index = x.adj_index;
1185 /* Add e.g. 1.1.1.1/32 as local to this host. */
1186 adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1189 classify_table_index = ~0;
1190 if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
1191 classify_table_index = lm->classify_table_index_by_sw_if_index [sw_if_index];
1192 if (classify_table_index != (u32) ~0)
1194 adj->lookup_next_index = IP_LOOKUP_NEXT_CLASSIFY;
1195 adj->classify.table_index = classify_table_index;
1198 adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
1200 adj->if_address_index = a - lm->if_address_pool;
1201 adj->rewrite_header.sw_if_index = sw_if_index;
1202 adj->rewrite_header.max_l3_packet_bytes = hw_if->max_l3_packet_bytes[VLIB_RX];
1204 * Local adjs are never to be rewritten. Spoofed pkts w/ src = dst = local
1205 * fail an RPF-ish check, but still go thru the rewrite code...
1207 adj->rewrite_header.data_bytes = 0;
1209 ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1210 x.dst_address_length = 32;
1211 ip4_add_del_route (im, &x);
1215 ip4_del_interface_routes (ip4_main_t * im, u32 fib_index, ip4_address_t * address, u32 address_length)
1217 ip4_add_del_route_args_t x;
1219 /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1220 x.table_index_or_table_id = fib_index;
1221 x.flags = (IP4_ROUTE_FLAG_DEL
1222 | IP4_ROUTE_FLAG_FIB_INDEX
1223 | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1224 x.dst_address = address[0];
1225 x.dst_address_length = address_length;
1230 if (address_length < 32)
1231 ip4_add_del_route (im, &x);
1233 x.dst_address_length = 32;
1234 ip4_add_del_route (im, &x);
1236 ip4_delete_matching_routes (im,
1238 IP4_ROUTE_FLAG_FIB_INDEX,
1245 ip4_address_t address;
1247 } ip4_interface_address_t;
1249 static clib_error_t *
1250 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1252 ip4_address_t * new_address,
1258 static clib_error_t *
1259 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1261 ip4_address_t * address,
1267 vnet_main_t * vnm = vnet_get_main();
1268 ip4_main_t * im = &ip4_main;
1269 ip_lookup_main_t * lm = &im->lookup_main;
1270 clib_error_t * error = 0;
1271 u32 if_address_index, elts_before;
1272 ip4_address_fib_t ip4_af, * addr_fib = 0;
1274 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1275 ip4_addr_fib_init (&ip4_af, address,
1276 vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
1277 vec_add1 (addr_fib, ip4_af);
1279 /* When adding an address check that it does not conflict with an existing address. */
1282 ip_interface_address_t * ia;
1283 foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
1284 0 /* honor unnumbered */,
1286 ip4_address_t * x = ip_interface_address_get_address (&im->lookup_main, ia);
1288 if (ip4_destination_matches_route (im, address, x, ia->address_length)
1289 || ip4_destination_matches_route (im, x, address, address_length))
1290 return clib_error_create ("failed to add %U which conflicts with %U for interface %U",
1291 format_ip4_address_and_length, address, address_length,
1292 format_ip4_address_and_length, x, ia->address_length,
1293 format_vnet_sw_if_index_name, vnm, sw_if_index);
1297 elts_before = pool_elts (lm->if_address_pool);
1299 error = ip_interface_address_add_del
1309 if (vnet_sw_interface_is_admin_up (vnm, sw_if_index) && insert_routes)
1312 ip4_del_interface_routes (im, ip4_af.fib_index, address,
1316 ip4_add_interface_routes (sw_if_index,
1317 im, ip4_af.fib_index,
1319 (lm->if_address_pool, if_address_index));
1322 /* If pool did not grow/shrink: add duplicate address. */
1323 if (elts_before != pool_elts (lm->if_address_pool))
1325 ip4_add_del_interface_address_callback_t * cb;
1326 vec_foreach (cb, im->add_del_interface_address_callbacks)
1327 cb->function (im, cb->function_opaque, sw_if_index,
1328 address, address_length,
1334 vec_free (addr_fib);
1339 ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
1340 ip4_address_t * address, u32 address_length,
1343 return ip4_add_del_interface_address_internal
1344 (vm, sw_if_index, address, address_length,
1345 /* redistribute */ 1,
1346 /* insert_routes */ 1,
1350 static clib_error_t *
1351 ip4_sw_interface_admin_up_down (vnet_main_t * vnm,
1355 ip4_main_t * im = &ip4_main;
1356 ip_interface_address_t * ia;
1358 u32 is_admin_up, fib_index;
1360 /* Fill in lookup tables with default table (0). */
1361 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1363 vec_validate_init_empty (im->lookup_main.if_address_pool_index_by_sw_if_index, sw_if_index, ~0);
1365 is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
1367 fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
1369 foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
1370 0 /* honor unnumbered */,
1372 a = ip_interface_address_get_address (&im->lookup_main, ia);
1374 ip4_add_interface_routes (sw_if_index,
1378 ip4_del_interface_routes (im, fib_index,
1379 a, ia->address_length);
1385 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
1387 /* Built-in ip4 unicast rx feature path definition */
1388 VNET_IP4_UNICAST_FEATURE_INIT (ip4_inacl, static) = {
1389 .node_name = "ip4-inacl",
1390 .runs_before = ORDER_CONSTRAINTS {"ip4-source-check-via-rx", 0},
1391 .feature_index = &ip4_main.ip4_unicast_rx_feature_check_access,
1394 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_1, static) = {
1395 .node_name = "ip4-source-check-via-rx",
1396 .runs_before = ORDER_CONSTRAINTS {"ip4-source-check-via-any", 0},
1398 &ip4_main.ip4_unicast_rx_feature_source_reachable_via_rx,
1401 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_2, static) = {
1402 .node_name = "ip4-source-check-via-any",
1403 .runs_before = ORDER_CONSTRAINTS {"ip4-policer-classify", 0},
1405 &ip4_main.ip4_unicast_rx_feature_source_reachable_via_any,
1408 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) = {
1409 .node_name = "ip4-source-and-port-range-check-rx",
1410 .runs_before = ORDER_CONSTRAINTS {"ip4-policer-classify", 0},
1412 &ip4_main.ip4_unicast_rx_feature_source_and_port_range_check,
1415 VNET_IP4_UNICAST_FEATURE_INIT (ip4_policer_classify, static) = {
1416 .node_name = "ip4-policer-classify",
1417 .runs_before = ORDER_CONSTRAINTS {"ipsec-input-ip4", 0},
1419 &ip4_main.ip4_unicast_rx_feature_policer_classify,
1422 VNET_IP4_UNICAST_FEATURE_INIT (ip4_ipsec, static) = {
1423 .node_name = "ipsec-input-ip4",
1424 .runs_before = ORDER_CONSTRAINTS {"vpath-input-ip4", 0},
1425 .feature_index = &ip4_main.ip4_unicast_rx_feature_ipsec,
1428 VNET_IP4_UNICAST_FEATURE_INIT (ip4_vpath, static) = {
1429 .node_name = "vpath-input-ip4",
1430 .runs_before = ORDER_CONSTRAINTS {"ip4-lookup", 0},
1431 .feature_index = &ip4_main.ip4_unicast_rx_feature_vpath,
1434 VNET_IP4_UNICAST_FEATURE_INIT (ip4_lookup, static) = {
1435 .node_name = "ip4-lookup",
1436 .runs_before = 0, /* not before any other features */
1437 .feature_index = &ip4_main.ip4_unicast_rx_feature_lookup,
1440 /* Built-in ip4 multicast rx feature path definition */
1441 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_vpath_mc, static) = {
1442 .node_name = "vpath-input-ip4",
1443 .runs_before = ORDER_CONSTRAINTS {"ip4-lookup-multicast", 0},
1444 .feature_index = &ip4_main.ip4_multicast_rx_feature_vpath,
1447 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_lookup_mc, static) = {
1448 .node_name = "ip4-lookup-multicast",
1449 .runs_before = 0, /* not before any other features */
1450 .feature_index = &ip4_main.ip4_multicast_rx_feature_lookup,
1453 static char * rx_feature_start_nodes[] =
1454 { "ip4-input", "ip4-input-no-checksum"};
1456 static char * tx_feature_start_nodes[] =
1457 { "ip4-rewrite-transit"};
1459 /* Source and port-range check ip4 tx feature path definition */
1460 VNET_IP4_TX_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) = {
1461 .node_name = "ip4-source-and-port-range-check-tx",
1462 .runs_before = ORDER_CONSTRAINTS {"interface-output", 0},
1464 &ip4_main.ip4_unicast_tx_feature_source_and_port_range_check,
1468 /* Built-in ip4 tx feature path definition */
1469 VNET_IP4_TX_FEATURE_INIT (interface_output, static) = {
1470 .node_name = "interface-output",
1471 .runs_before = 0, /* not before any other features */
1472 .feature_index = &ip4_main.ip4_tx_feature_interface_output,
1476 static clib_error_t *
1477 ip4_feature_init (vlib_main_t * vm, ip4_main_t * im)
1479 ip_lookup_main_t * lm = &im->lookup_main;
1480 clib_error_t * error;
1482 ip_config_main_t * cm;
1483 vnet_config_main_t * vcm;
1484 char **feature_start_nodes;
1485 int feature_start_len;
1487 for (cast = 0; cast < VNET_N_IP_FEAT; cast++)
1489 cm = &lm->feature_config_mains[cast];
1490 vcm = &cm->config_main;
1492 if (cast < VNET_IP_TX_FEAT)
1494 feature_start_nodes = rx_feature_start_nodes;
1495 feature_start_len = ARRAY_LEN(rx_feature_start_nodes);
1499 feature_start_nodes = tx_feature_start_nodes;
1500 feature_start_len = ARRAY_LEN(tx_feature_start_nodes);
1503 if ((error = ip_feature_init_cast (vm, cm, vcm,
1504 feature_start_nodes,
1514 static clib_error_t *
1515 ip4_sw_interface_add_del (vnet_main_t * vnm,
1519 vlib_main_t * vm = vnm->vlib_main;
1520 ip4_main_t * im = &ip4_main;
1521 ip_lookup_main_t * lm = &im->lookup_main;
1525 for (cast = 0; cast < VNET_N_IP_FEAT; cast++)
1527 ip_config_main_t * cm = &lm->feature_config_mains[cast];
1528 vnet_config_main_t * vcm = &cm->config_main;
1530 vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
1531 ci = cm->config_index_by_sw_if_index[sw_if_index];
1533 if (cast == VNET_IP_RX_UNICAST_FEAT)
1534 feature_index = im->ip4_unicast_rx_feature_lookup;
1535 else if (cast == VNET_IP_RX_MULTICAST_FEAT)
1536 feature_index = im->ip4_multicast_rx_feature_lookup;
1538 feature_index = im->ip4_tx_feature_interface_output;
1541 ci = vnet_config_add_feature (vm, vcm,
1544 /* config data */ 0,
1545 /* # bytes of config data */ 0);
1547 ci = vnet_config_del_feature (vm, vcm,
1550 /* config data */ 0,
1551 /* # bytes of config data */ 0);
1553 cm->config_index_by_sw_if_index[sw_if_index] = ci;
1555 * note: do not update the tx feature count here.
1559 return /* no error */ 0;
1562 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1564 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args);
1566 VLIB_REGISTER_NODE (ip4_lookup_node) = {
1567 .function = ip4_lookup,
1568 .name = "ip4-lookup",
1569 .vector_size = sizeof (u32),
1571 .format_trace = format_ip4_lookup_trace,
1573 .n_next_nodes = IP4_LOOKUP_N_NEXT,
1574 .next_nodes = IP4_LOOKUP_NEXT_NODES,
1577 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup);
1580 ip4_indirect (vlib_main_t * vm,
1581 vlib_node_runtime_t * node,
1582 vlib_frame_t * frame)
1584 return ip4_lookup_inline (vm, node, frame,
1585 /* lookup_for_responses_to_locally_received_packets */ 0,
1586 /* is_indirect */ 1);
1589 VLIB_REGISTER_NODE (ip4_indirect_node) = {
1590 .function = ip4_indirect,
1591 .name = "ip4-indirect",
1592 .vector_size = sizeof (u32),
1593 .sibling_of = "ip4-lookup",
1594 .format_trace = format_ip4_lookup_trace,
1599 VLIB_NODE_FUNCTION_MULTIARCH (ip4_indirect_node, ip4_indirect);
1602 /* Global IP4 main. */
1603 ip4_main_t ip4_main;
1606 ip4_lookup_init (vlib_main_t * vm)
1608 ip4_main_t * im = &ip4_main;
1609 clib_error_t * error;
1612 for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1617 m = pow2_mask (i) << (32 - i);
1620 im->fib_masks[i] = clib_host_to_net_u32 (m);
1623 /* Create FIB with index 0 and table id of 0. */
1624 find_ip4_fib_by_table_index_or_id (im, /* table id */ 0, IP4_ROUTE_FLAG_TABLE_ID);
1626 ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1630 pn = pg_get_node (ip4_lookup_node.index);
1631 pn->unformat_edit = unformat_pg_ip4_header;
1635 ethernet_arp_header_t h;
1637 memset (&h, 0, sizeof (h));
1639 /* Set target ethernet address to all zeros. */
1640 memset (h.ip4_over_ethernet[1].ethernet, 0, sizeof (h.ip4_over_ethernet[1].ethernet));
1642 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1643 #define _8(f,v) h.f = v;
1644 _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1645 _16 (l3_type, ETHERNET_TYPE_IP4);
1646 _8 (n_l2_address_bytes, 6);
1647 _8 (n_l3_address_bytes, 4);
1648 _16 (opcode, ETHERNET_ARP_OPCODE_request);
1652 vlib_packet_template_init (vm,
1653 &im->ip4_arp_request_packet_template,
1656 /* alloc chunk size */ 8,
1660 error = ip4_feature_init (vm, im);
1665 VLIB_INIT_FUNCTION (ip4_lookup_init);
1668 /* Adjacency taken. */
1673 /* Packet data, possibly *after* rewrite. */
1674 u8 packet_data[64 - 1*sizeof(u32)];
1675 } ip4_forward_next_trace_t;
1677 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args)
1679 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1680 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1681 ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1682 uword indent = format_get_indent (s);
1683 s = format (s, "%U%U",
1684 format_white_space, indent,
1685 format_ip4_header, t->packet_data);
1689 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args)
1691 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1692 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1693 ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1694 vnet_main_t * vnm = vnet_get_main();
1695 ip4_main_t * im = &ip4_main;
1696 uword indent = format_get_indent (s);
1698 s = format (s, "fib %d adj-idx %d : %U flow hash: 0x%08x",
1699 t->fib_index, t->adj_index, format_ip_adjacency,
1700 vnm, &im->lookup_main, t->adj_index, t->flow_hash);
1701 s = format (s, "\n%U%U",
1702 format_white_space, indent,
1703 format_ip4_header, t->packet_data);
1707 static u8 * format_ip4_rewrite_trace (u8 * s, va_list * args)
1709 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1710 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1711 ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1712 vnet_main_t * vnm = vnet_get_main();
1713 ip4_main_t * im = &ip4_main;
1714 uword indent = format_get_indent (s);
1716 s = format (s, "tx_sw_if_index %d adj-idx %d : %U flow hash: 0x%08x",
1717 t->fib_index, t->adj_index, format_ip_adjacency,
1718 vnm, &im->lookup_main, t->adj_index, t->flow_hash);
1719 s = format (s, "\n%U%U",
1720 format_white_space, indent,
1721 format_ip_adjacency_packet_data,
1722 vnm, &im->lookup_main, t->adj_index,
1723 t->packet_data, sizeof (t->packet_data));
1727 /* Common trace function for all ip4-forward next nodes. */
1729 ip4_forward_next_trace (vlib_main_t * vm,
1730 vlib_node_runtime_t * node,
1731 vlib_frame_t * frame,
1732 vlib_rx_or_tx_t which_adj_index)
1735 ip4_main_t * im = &ip4_main;
1737 n_left = frame->n_vectors;
1738 from = vlib_frame_vector_args (frame);
1743 vlib_buffer_t * b0, * b1;
1744 ip4_forward_next_trace_t * t0, * t1;
1746 /* Prefetch next iteration. */
1747 vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1748 vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1753 b0 = vlib_get_buffer (vm, bi0);
1754 b1 = vlib_get_buffer (vm, bi1);
1756 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1758 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1759 t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1760 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1761 t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1762 vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1763 vec_elt (im->fib_index_by_sw_if_index,
1764 vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1766 clib_memcpy (t0->packet_data,
1767 vlib_buffer_get_current (b0),
1768 sizeof (t0->packet_data));
1770 if (b1->flags & VLIB_BUFFER_IS_TRACED)
1772 t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1773 t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1774 t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1775 t1->fib_index = (vnet_buffer(b1)->sw_if_index[VLIB_TX] != (u32)~0) ?
1776 vnet_buffer(b1)->sw_if_index[VLIB_TX] :
1777 vec_elt (im->fib_index_by_sw_if_index,
1778 vnet_buffer(b1)->sw_if_index[VLIB_RX]);
1779 clib_memcpy (t1->packet_data,
1780 vlib_buffer_get_current (b1),
1781 sizeof (t1->packet_data));
1791 ip4_forward_next_trace_t * t0;
1795 b0 = vlib_get_buffer (vm, bi0);
1797 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1799 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1800 t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1801 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1802 t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1803 vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1804 vec_elt (im->fib_index_by_sw_if_index,
1805 vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1806 clib_memcpy (t0->packet_data,
1807 vlib_buffer_get_current (b0),
1808 sizeof (t0->packet_data));
1816 ip4_drop_or_punt (vlib_main_t * vm,
1817 vlib_node_runtime_t * node,
1818 vlib_frame_t * frame,
1819 ip4_error_t error_code)
1821 u32 * buffers = vlib_frame_vector_args (frame);
1822 uword n_packets = frame->n_vectors;
1824 vlib_error_drop_buffers (vm, node,
1829 ip4_input_node.index,
1832 if (node->flags & VLIB_NODE_FLAG_TRACE)
1833 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1839 ip4_drop (vlib_main_t * vm,
1840 vlib_node_runtime_t * node,
1841 vlib_frame_t * frame)
1842 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP); }
1845 ip4_punt (vlib_main_t * vm,
1846 vlib_node_runtime_t * node,
1847 vlib_frame_t * frame)
1848 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); }
1851 ip4_miss (vlib_main_t * vm,
1852 vlib_node_runtime_t * node,
1853 vlib_frame_t * frame)
1854 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_DST_LOOKUP_MISS); }
1856 VLIB_REGISTER_NODE (ip4_drop_node,static) = {
1857 .function = ip4_drop,
1859 .vector_size = sizeof (u32),
1861 .format_trace = format_ip4_forward_next_trace,
1869 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop);
1871 VLIB_REGISTER_NODE (ip4_punt_node,static) = {
1872 .function = ip4_punt,
1874 .vector_size = sizeof (u32),
1876 .format_trace = format_ip4_forward_next_trace,
1884 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt);
1886 VLIB_REGISTER_NODE (ip4_miss_node,static) = {
1887 .function = ip4_miss,
1889 .vector_size = sizeof (u32),
1891 .format_trace = format_ip4_forward_next_trace,
1899 VLIB_NODE_FUNCTION_MULTIARCH (ip4_miss_node, ip4_miss);
1901 /* Compute TCP/UDP/ICMP4 checksum in software. */
1903 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1907 u32 ip_header_length, payload_length_host_byte_order;
1908 u32 n_this_buffer, n_bytes_left;
1910 void * data_this_buffer;
1912 /* Initialize checksum with ip header. */
1913 ip_header_length = ip4_header_bytes (ip0);
1914 payload_length_host_byte_order = clib_net_to_host_u16 (ip0->length) - ip_header_length;
1915 sum0 = clib_host_to_net_u32 (payload_length_host_byte_order + (ip0->protocol << 16));
1917 if (BITS (uword) == 32)
1919 sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u32));
1920 sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32));
1923 sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1925 n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1926 data_this_buffer = (void *) ip0 + ip_header_length;
1927 if (n_this_buffer + ip_header_length > p0->current_length)
1928 n_this_buffer = p0->current_length > ip_header_length ? p0->current_length - ip_header_length : 0;
1931 sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1932 n_bytes_left -= n_this_buffer;
1933 if (n_bytes_left == 0)
1936 ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1937 p0 = vlib_get_buffer (vm, p0->next_buffer);
1938 data_this_buffer = vlib_buffer_get_current (p0);
1939 n_this_buffer = p0->current_length;
1942 sum16 = ~ ip_csum_fold (sum0);
1948 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1950 ip4_header_t * ip0 = vlib_buffer_get_current (p0);
1951 udp_header_t * udp0;
1954 ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1955 || ip0->protocol == IP_PROTOCOL_UDP);
1957 udp0 = (void *) (ip0 + 1);
1958 if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1960 p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1961 | IP_BUFFER_L4_CHECKSUM_CORRECT);
1965 sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1967 p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1968 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1974 ip4_local (vlib_main_t * vm,
1975 vlib_node_runtime_t * node,
1976 vlib_frame_t * frame)
1978 ip4_main_t * im = &ip4_main;
1979 ip_lookup_main_t * lm = &im->lookup_main;
1980 ip_local_next_t next_index;
1981 u32 * from, * to_next, n_left_from, n_left_to_next;
1982 vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
1984 from = vlib_frame_vector_args (frame);
1985 n_left_from = frame->n_vectors;
1986 next_index = node->cached_next_index;
1988 if (node->flags & VLIB_NODE_FLAG_TRACE)
1989 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1991 while (n_left_from > 0)
1993 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1995 while (n_left_from >= 4 && n_left_to_next >= 2)
1997 vlib_buffer_t * p0, * p1;
1998 ip4_header_t * ip0, * ip1;
1999 udp_header_t * udp0, * udp1;
2000 ip4_fib_mtrie_t * mtrie0, * mtrie1;
2001 ip4_fib_mtrie_leaf_t leaf0, leaf1;
2002 ip_adjacency_t * adj0, * adj1;
2003 u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, adj_index0;
2004 u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, adj_index1;
2005 i32 len_diff0, len_diff1;
2006 u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
2007 u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
2010 pi0 = to_next[0] = from[0];
2011 pi1 = to_next[1] = from[1];
2015 n_left_to_next -= 2;
2017 p0 = vlib_get_buffer (vm, pi0);
2018 p1 = vlib_get_buffer (vm, pi1);
2020 ip0 = vlib_buffer_get_current (p0);
2021 ip1 = vlib_buffer_get_current (p1);
2023 fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
2024 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
2025 fib_index1 = vec_elt (im->fib_index_by_sw_if_index,
2026 vnet_buffer(p1)->sw_if_index[VLIB_RX]);
2028 mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
2029 mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
2031 leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
2033 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
2034 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
2036 /* Treat IP frag packets as "experimental" protocol for now
2037 until support of IP frag reassembly is implemented */
2038 proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
2039 proto1 = ip4_is_fragment(ip1) ? 0xfe : ip1->protocol;
2040 is_udp0 = proto0 == IP_PROTOCOL_UDP;
2041 is_udp1 = proto1 == IP_PROTOCOL_UDP;
2042 is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
2043 is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
2048 good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2049 good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2051 udp0 = ip4_next_header (ip0);
2052 udp1 = ip4_next_header (ip1);
2054 /* Don't verify UDP checksum for packets with explicit zero checksum. */
2055 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2056 good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
2058 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
2059 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
2061 /* Verify UDP length. */
2062 ip_len0 = clib_net_to_host_u16 (ip0->length);
2063 ip_len1 = clib_net_to_host_u16 (ip1->length);
2064 udp_len0 = clib_net_to_host_u16 (udp0->length);
2065 udp_len1 = clib_net_to_host_u16 (udp1->length);
2067 len_diff0 = ip_len0 - udp_len0;
2068 len_diff1 = ip_len1 - udp_len1;
2070 len_diff0 = is_udp0 ? len_diff0 : 0;
2071 len_diff1 = is_udp1 ? len_diff1 : 0;
2073 if (PREDICT_FALSE (! (is_tcp_udp0 & is_tcp_udp1
2074 & good_tcp_udp0 & good_tcp_udp1)))
2079 && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
2080 flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
2082 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2083 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2088 && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
2089 flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
2091 (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2092 good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
2096 good_tcp_udp0 &= len_diff0 >= 0;
2097 good_tcp_udp1 &= len_diff1 >= 0;
2099 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
2100 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
2102 error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
2104 error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
2105 error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
2107 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
2108 error0 = (is_tcp_udp0 && ! good_tcp_udp0
2109 ? IP4_ERROR_TCP_CHECKSUM + is_udp0
2111 error1 = (is_tcp_udp1 && ! good_tcp_udp1
2112 ? IP4_ERROR_TCP_CHECKSUM + is_udp1
2115 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
2116 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
2118 vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2119 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2121 vnet_buffer (p1)->ip.adj_index[VLIB_RX] = adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
2122 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
2124 ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
2126 /* no_default_route */ 1));
2127 ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
2129 /* no_default_route */ 1));
2131 adj0 = ip_get_adjacency (lm, adj_index0);
2132 adj1 = ip_get_adjacency (lm, adj_index1);
2135 * Must have a route to source otherwise we drop the packet.
2136 * ip4 broadcasts are accepted, e.g. to make dhcp client work
2138 error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
2139 && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2140 && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
2141 && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2142 && ip0->dst_address.as_u32 != 0xFFFFFFFF
2143 ? IP4_ERROR_SRC_LOOKUP_MISS
2145 error1 = (error1 == IP4_ERROR_UNKNOWN_PROTOCOL
2146 && adj1->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2147 && adj1->lookup_next_index != IP_LOOKUP_NEXT_ARP
2148 && adj1->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2149 && ip0->dst_address.as_u32 != 0xFFFFFFFF
2150 ? IP4_ERROR_SRC_LOOKUP_MISS
2153 next0 = lm->local_next_by_ip_protocol[proto0];
2154 next1 = lm->local_next_by_ip_protocol[proto1];
2156 next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
2157 next1 = error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
2159 p0->error = error0 ? error_node->errors[error0] : 0;
2160 p1->error = error1 ? error_node->errors[error1] : 0;
2162 enqueue_code = (next0 != next_index) + 2*(next1 != next_index);
2164 if (PREDICT_FALSE (enqueue_code != 0))
2166 switch (enqueue_code)
2172 n_left_to_next += 1;
2173 vlib_set_next_frame_buffer (vm, node, next0, pi0);
2179 n_left_to_next += 1;
2180 vlib_set_next_frame_buffer (vm, node, next1, pi1);
2184 /* A B B or A B C */
2186 n_left_to_next += 2;
2187 vlib_set_next_frame_buffer (vm, node, next0, pi0);
2188 vlib_set_next_frame_buffer (vm, node, next1, pi1);
2191 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2193 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2200 while (n_left_from > 0 && n_left_to_next > 0)
2204 udp_header_t * udp0;
2205 ip4_fib_mtrie_t * mtrie0;
2206 ip4_fib_mtrie_leaf_t leaf0;
2207 ip_adjacency_t * adj0;
2208 u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, adj_index0;
2210 u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
2212 pi0 = to_next[0] = from[0];
2216 n_left_to_next -= 1;
2218 p0 = vlib_get_buffer (vm, pi0);
2220 ip0 = vlib_buffer_get_current (p0);
2222 fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
2223 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
2225 mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
2227 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
2229 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
2231 /* Treat IP frag packets as "experimental" protocol for now
2232 until support of IP frag reassembly is implemented */
2233 proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
2234 is_udp0 = proto0 == IP_PROTOCOL_UDP;
2235 is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
2239 good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2241 udp0 = ip4_next_header (ip0);
2243 /* Don't verify UDP checksum for packets with explicit zero checksum. */
2244 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2246 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
2248 /* Verify UDP length. */
2249 ip_len0 = clib_net_to_host_u16 (ip0->length);
2250 udp_len0 = clib_net_to_host_u16 (udp0->length);
2252 len_diff0 = ip_len0 - udp_len0;
2254 len_diff0 = is_udp0 ? len_diff0 : 0;
2256 if (PREDICT_FALSE (! (is_tcp_udp0 & good_tcp_udp0)))
2261 && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
2262 flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
2264 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
2265 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
2269 good_tcp_udp0 &= len_diff0 >= 0;
2271 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
2273 error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
2275 error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
2277 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
2278 error0 = (is_tcp_udp0 && ! good_tcp_udp0
2279 ? IP4_ERROR_TCP_CHECKSUM + is_udp0
2282 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
2284 vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2285 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2287 ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
2289 /* no_default_route */ 1));
2291 adj0 = ip_get_adjacency (lm, adj_index0);
2293 /* Must have a route to source otherwise we drop the packet. */
2294 error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
2295 && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2296 && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
2297 && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2298 && ip0->dst_address.as_u32 != 0xFFFFFFFF
2299 ? IP4_ERROR_SRC_LOOKUP_MISS
2302 next0 = lm->local_next_by_ip_protocol[proto0];
2304 next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
2306 p0->error = error0? error_node->errors[error0] : 0;
2308 if (PREDICT_FALSE (next0 != next_index))
2310 n_left_to_next += 1;
2311 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2314 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2317 n_left_to_next -= 1;
2321 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2324 return frame->n_vectors;
2327 VLIB_REGISTER_NODE (ip4_local_node,static) = {
2328 .function = ip4_local,
2329 .name = "ip4-local",
2330 .vector_size = sizeof (u32),
2332 .format_trace = format_ip4_forward_next_trace,
2334 .n_next_nodes = IP_LOCAL_N_NEXT,
2336 [IP_LOCAL_NEXT_DROP] = "error-drop",
2337 [IP_LOCAL_NEXT_PUNT] = "error-punt",
2338 [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
2339 [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
2343 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local);
2345 void ip4_register_protocol (u32 protocol, u32 node_index)
2347 vlib_main_t * vm = vlib_get_main();
2348 ip4_main_t * im = &ip4_main;
2349 ip_lookup_main_t * lm = &im->lookup_main;
2351 ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
2352 lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip4_local_node.index, node_index);
2355 static clib_error_t *
2356 show_ip_local_command_fn (vlib_main_t * vm,
2357 unformat_input_t * input,
2358 vlib_cli_command_t * cmd)
2360 ip4_main_t * im = &ip4_main;
2361 ip_lookup_main_t * lm = &im->lookup_main;
2364 vlib_cli_output (vm, "Protocols handled by ip4_local");
2365 for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
2367 if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
2368 vlib_cli_output (vm, "%d", i);
2375 VLIB_CLI_COMMAND (show_ip_local, static) = {
2376 .path = "show ip local",
2377 .function = show_ip_local_command_fn,
2378 .short_help = "Show ip local protocol table",
2382 ip4_arp (vlib_main_t * vm,
2383 vlib_node_runtime_t * node,
2384 vlib_frame_t * frame)
2386 vnet_main_t * vnm = vnet_get_main();
2387 ip4_main_t * im = &ip4_main;
2388 ip_lookup_main_t * lm = &im->lookup_main;
2389 u32 * from, * to_next_drop;
2390 uword n_left_from, n_left_to_next_drop, next_index;
2391 static f64 time_last_seed_change = -1e100;
2392 static u32 hash_seeds[3];
2393 static uword hash_bitmap[256 / BITS (uword)];
2396 if (node->flags & VLIB_NODE_FLAG_TRACE)
2397 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2399 time_now = vlib_time_now (vm);
2400 if (time_now - time_last_seed_change > 1e-3)
2403 u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
2404 sizeof (hash_seeds));
2405 for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
2406 hash_seeds[i] = r[i];
2408 /* Mark all hash keys as been no-seen before. */
2409 for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
2412 time_last_seed_change = time_now;
2415 from = vlib_frame_vector_args (frame);
2416 n_left_from = frame->n_vectors;
2417 next_index = node->cached_next_index;
2418 if (next_index == IP4_ARP_NEXT_DROP)
2419 next_index = IP4_ARP_N_NEXT; /* point to first interface */
2421 while (n_left_from > 0)
2423 vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
2424 to_next_drop, n_left_to_next_drop);
2426 while (n_left_from > 0 && n_left_to_next_drop > 0)
2430 ethernet_header_t * eh0;
2431 u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
2433 ip_adjacency_t * adj0;
2437 p0 = vlib_get_buffer (vm, pi0);
2439 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2440 adj0 = ip_get_adjacency (lm, adj_index0);
2441 ip0 = vlib_buffer_get_current (p0);
2443 /* If packet destination is not local, send ARP to next hop */
2444 if (adj0->arp.next_hop.ip4.as_u32)
2445 ip0->dst_address.data_u32 = adj0->arp.next_hop.ip4.as_u32;
2448 * if ip4_rewrite_local applied the IP_LOOKUP_NEXT_ARP
2449 * rewrite to this packet, we need to skip it here.
2450 * Note, to distinguish from src IP addr *.8.6.*, we
2451 * check for a bcast eth dest instead of IPv4 version.
2453 eh0 = (ethernet_header_t*)ip0;
2454 if ((ip0->ip_version_and_header_length & 0xF0) != 0x40)
2457 u16 * etype = &eh0->type;
2458 while ((*etype == clib_host_to_net_u16 (0x8100)) //dot1q
2459 || (*etype == clib_host_to_net_u16 (0x88a8)))//dot1ad
2462 etype += 2; //vlan tag also 16 bits, same as etype
2464 if (*etype == clib_host_to_net_u16 (0x0806)) //arp
2466 vlib_buffer_advance (
2467 p0, sizeof(ethernet_header_t) + (4*vlan_num));
2468 ip0 = vlib_buffer_get_current (p0);
2476 sw_if_index0 = adj0->rewrite_header.sw_if_index;
2477 vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
2479 a0 ^= ip0->dst_address.data_u32;
2482 hash_v3_finalize32 (a0, b0, c0);
2484 c0 &= BITS (hash_bitmap) - 1;
2485 c0 = c0 / BITS (uword);
2486 m0 = (uword) 1 << (c0 % BITS (uword));
2488 bm0 = hash_bitmap[c0];
2489 drop0 = (bm0 & m0) != 0;
2491 /* Mark it as seen. */
2492 hash_bitmap[c0] = bm0 | m0;
2496 to_next_drop[0] = pi0;
2498 n_left_to_next_drop -= 1;
2500 p0->error = node->errors[drop0 ? IP4_ARP_ERROR_DROP : IP4_ARP_ERROR_REQUEST_SENT];
2506 * Can happen if the control-plane is programming tables
2507 * with traffic flowing; at least that's today's lame excuse.
2509 if (adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP)
2511 p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2514 /* Send ARP request. */
2518 ethernet_arp_header_t * h0;
2519 vnet_hw_interface_t * hw_if0;
2521 h0 = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi0);
2523 /* Add rewrite/encap string for ARP packet. */
2524 vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
2526 hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2528 /* Src ethernet address in ARP header. */
2529 clib_memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address,
2530 sizeof (h0->ip4_over_ethernet[0].ethernet));
2532 if (ip4_src_address_for_packet (im, p0, &h0->ip4_over_ethernet[0].ip4, sw_if_index0)) {
2533 //No source address available
2534 p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
2535 vlib_buffer_free(vm, &bi0, 1);
2539 /* Copy in destination address we are requesting. */
2540 h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32;
2542 vlib_buffer_copy_trace_flag (vm, p0, bi0);
2543 b0 = vlib_get_buffer (vm, bi0);
2544 vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2546 vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2548 vlib_set_next_frame_buffer (vm, node, adj0->rewrite_header.next_index, bi0);
2552 vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2555 return frame->n_vectors;
2558 static char * ip4_arp_error_strings[] = {
2559 [IP4_ARP_ERROR_DROP] = "address overflow drops",
2560 [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2561 [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2562 [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2563 [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2564 [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
2567 VLIB_REGISTER_NODE (ip4_arp_node) = {
2568 .function = ip4_arp,
2570 .vector_size = sizeof (u32),
2572 .format_trace = format_ip4_forward_next_trace,
2574 .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2575 .error_strings = ip4_arp_error_strings,
2577 .n_next_nodes = IP4_ARP_N_NEXT,
2579 [IP4_ARP_NEXT_DROP] = "error-drop",
2583 #define foreach_notrace_ip4_arp_error \
2589 clib_error_t * arp_notrace_init (vlib_main_t * vm)
2591 vlib_node_runtime_t *rt =
2592 vlib_node_get_runtime (vm, ip4_arp_node.index);
2594 /* don't trace ARP request packets */
2596 vnet_pcap_drop_trace_filter_add_del \
2597 (rt->errors[IP4_ARP_ERROR_##a], \
2599 foreach_notrace_ip4_arp_error;
2604 VLIB_INIT_FUNCTION(arp_notrace_init);
2607 /* Send an ARP request to see if given destination is reachable on given interface. */
2609 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2611 vnet_main_t * vnm = vnet_get_main();
2612 ip4_main_t * im = &ip4_main;
2613 ethernet_arp_header_t * h;
2614 ip4_address_t * src;
2615 ip_interface_address_t * ia;
2616 ip_adjacency_t * adj;
2617 vnet_hw_interface_t * hi;
2618 vnet_sw_interface_t * si;
2622 si = vnet_get_sw_interface (vnm, sw_if_index);
2624 if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2626 return clib_error_return (0, "%U: interface %U down",
2627 format_ip4_address, dst,
2628 format_vnet_sw_if_index_name, vnm,
2632 src = ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2635 vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2636 return clib_error_return
2637 (0, "no matching interface address for destination %U (interface %U)",
2638 format_ip4_address, dst,
2639 format_vnet_sw_if_index_name, vnm, sw_if_index);
2642 adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2644 h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi);
2646 hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2648 clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
2650 h->ip4_over_ethernet[0].ip4 = src[0];
2651 h->ip4_over_ethernet[1].ip4 = dst[0];
2653 b = vlib_get_buffer (vm, bi);
2654 vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2656 /* Add encapsulation string for software interface (e.g. ethernet header). */
2657 vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2658 vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2661 vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
2662 u32 * to_next = vlib_frame_vector_args (f);
2665 vlib_put_frame_to_node (vm, hi->output_node_index, f);
2668 return /* no error */ 0;
2672 IP4_REWRITE_NEXT_DROP,
2673 IP4_REWRITE_NEXT_ARP,
2674 IP4_REWRITE_NEXT_ICMP_ERROR,
2675 } ip4_rewrite_next_t;
2678 ip4_rewrite_inline (vlib_main_t * vm,
2679 vlib_node_runtime_t * node,
2680 vlib_frame_t * frame,
2681 int rewrite_for_locally_received_packets)
2683 ip_lookup_main_t * lm = &ip4_main.lookup_main;
2684 u32 * from = vlib_frame_vector_args (frame);
2685 u32 n_left_from, n_left_to_next, * to_next, next_index;
2686 vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
2687 vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
2688 ip_config_main_t * cm = &lm->feature_config_mains[VNET_IP_TX_FEAT];
2690 n_left_from = frame->n_vectors;
2691 next_index = node->cached_next_index;
2692 u32 cpu_index = os_get_cpu_number();
2694 while (n_left_from > 0)
2696 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2698 while (n_left_from >= 4 && n_left_to_next >= 2)
2700 ip_adjacency_t * adj0, * adj1;
2701 vlib_buffer_t * p0, * p1;
2702 ip4_header_t * ip0, * ip1;
2703 u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2704 u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2705 u32 next0_override, next1_override;
2706 u32 tx_sw_if_index0, tx_sw_if_index1;
2708 if (rewrite_for_locally_received_packets)
2709 next0_override = next1_override = 0;
2711 /* Prefetch next iteration. */
2713 vlib_buffer_t * p2, * p3;
2715 p2 = vlib_get_buffer (vm, from[2]);
2716 p3 = vlib_get_buffer (vm, from[3]);
2718 vlib_prefetch_buffer_header (p2, STORE);
2719 vlib_prefetch_buffer_header (p3, STORE);
2721 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2722 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2725 pi0 = to_next[0] = from[0];
2726 pi1 = to_next[1] = from[1];
2731 n_left_to_next -= 2;
2733 p0 = vlib_get_buffer (vm, pi0);
2734 p1 = vlib_get_buffer (vm, pi1);
2736 adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2737 adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
2739 /* We should never rewrite a pkt using the MISS adjacency */
2740 ASSERT(adj_index0 && adj_index1);
2742 ip0 = vlib_buffer_get_current (p0);
2743 ip1 = vlib_buffer_get_current (p1);
2745 error0 = error1 = IP4_ERROR_NONE;
2746 next0 = next1 = IP4_REWRITE_NEXT_DROP;
2748 /* Decrement TTL & update checksum.
2749 Works either endian, so no need for byte swap. */
2750 if (! rewrite_for_locally_received_packets)
2752 i32 ttl0 = ip0->ttl, ttl1 = ip1->ttl;
2754 /* Input node should have reject packets with ttl 0. */
2755 ASSERT (ip0->ttl > 0);
2756 ASSERT (ip1->ttl > 0);
2758 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2759 checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2761 checksum0 += checksum0 >= 0xffff;
2762 checksum1 += checksum1 >= 0xffff;
2764 ip0->checksum = checksum0;
2765 ip1->checksum = checksum1;
2774 * If the ttl drops below 1 when forwarding, generate
2777 if (PREDICT_FALSE(ttl0 <= 0))
2779 error0 = IP4_ERROR_TIME_EXPIRED;
2780 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2781 icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2782 ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2783 next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2785 if (PREDICT_FALSE(ttl1 <= 0))
2787 error1 = IP4_ERROR_TIME_EXPIRED;
2788 vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32)~0;
2789 icmp4_error_set_vnet_buffer(p1, ICMP4_time_exceeded,
2790 ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2791 next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2794 /* Verify checksum. */
2795 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2796 ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2799 /* Rewrite packet header and updates lengths. */
2800 adj0 = ip_get_adjacency (lm, adj_index0);
2801 adj1 = ip_get_adjacency (lm, adj_index1);
2803 if (rewrite_for_locally_received_packets)
2806 * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2807 * we end up here with a local adjacency in hand
2808 * The local adj rewrite data is 0xfefe on purpose.
2809 * Bad engineer, no donut for you.
2811 if (PREDICT_FALSE(adj0->lookup_next_index
2812 == IP_LOOKUP_NEXT_LOCAL))
2813 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2814 if (PREDICT_FALSE(adj0->lookup_next_index
2815 == IP_LOOKUP_NEXT_ARP))
2816 next0_override = IP4_REWRITE_NEXT_ARP;
2817 if (PREDICT_FALSE(adj1->lookup_next_index
2818 == IP_LOOKUP_NEXT_LOCAL))
2819 error1 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2820 if (PREDICT_FALSE(adj1->lookup_next_index
2821 == IP_LOOKUP_NEXT_ARP))
2822 next1_override = IP4_REWRITE_NEXT_ARP;
2825 /* Worth pipelining. No guarantee that adj0,1 are hot... */
2826 rw_len0 = adj0[0].rewrite_header.data_bytes;
2827 rw_len1 = adj1[0].rewrite_header.data_bytes;
2829 /* Check MTU of outgoing interface. */
2830 error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2831 ? IP4_ERROR_MTU_EXCEEDED
2833 error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
2834 ? IP4_ERROR_MTU_EXCEEDED
2837 next0 = (error0 == IP4_ERROR_NONE)
2838 ? adj0[0].rewrite_header.next_index : next0;
2840 if (rewrite_for_locally_received_packets)
2841 next0 = next0 && next0_override ? next0_override : next0;
2843 next1 = (error1 == IP4_ERROR_NONE)
2844 ? adj1[0].rewrite_header.next_index : next1;
2846 if (rewrite_for_locally_received_packets)
2847 next1 = next1 && next1_override ? next1_override : next1;
2850 * We've already accounted for an ethernet_header_t elsewhere
2852 if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2853 vlib_increment_combined_counter
2854 (&lm->adjacency_counters,
2855 cpu_index, adj_index0,
2856 /* packet increment */ 0,
2857 /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2859 if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t)))
2860 vlib_increment_combined_counter
2861 (&lm->adjacency_counters,
2862 cpu_index, adj_index1,
2863 /* packet increment */ 0,
2864 /* byte increment */ rw_len1-sizeof(ethernet_header_t));
2866 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2867 * to see the IP headerr */
2868 if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2870 p0->current_data -= rw_len0;
2871 p0->current_length += rw_len0;
2872 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2873 vnet_buffer (p0)->sw_if_index[VLIB_TX] =
2877 (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features,
2880 p0->current_config_index =
2881 vec_elt (cm->config_index_by_sw_if_index,
2883 vnet_get_config_data (&cm->config_main,
2884 &p0->current_config_index,
2886 /* # bytes of config data */ 0);
2889 if (PREDICT_TRUE(error1 == IP4_ERROR_NONE))
2891 p1->current_data -= rw_len1;
2892 p1->current_length += rw_len1;
2894 tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2895 vnet_buffer (p1)->sw_if_index[VLIB_TX] =
2899 (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features,
2902 p1->current_config_index =
2903 vec_elt (cm->config_index_by_sw_if_index,
2905 vnet_get_config_data (&cm->config_main,
2906 &p1->current_config_index,
2908 /* # bytes of config data */ 0);
2912 /* Guess we are only writing on simple Ethernet header. */
2913 vnet_rewrite_two_headers (adj0[0], adj1[0],
2915 sizeof (ethernet_header_t));
2917 vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2918 to_next, n_left_to_next,
2919 pi0, pi1, next0, next1);
2922 while (n_left_from > 0 && n_left_to_next > 0)
2924 ip_adjacency_t * adj0;
2927 u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2929 u32 tx_sw_if_index0;
2931 if (rewrite_for_locally_received_packets)
2934 pi0 = to_next[0] = from[0];
2936 p0 = vlib_get_buffer (vm, pi0);
2938 adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2940 /* We should never rewrite a pkt using the MISS adjacency */
2943 adj0 = ip_get_adjacency (lm, adj_index0);
2945 ip0 = vlib_buffer_get_current (p0);
2947 error0 = IP4_ERROR_NONE;
2948 next0 = IP4_REWRITE_NEXT_DROP; /* drop on error */
2950 /* Decrement TTL & update checksum. */
2951 if (! rewrite_for_locally_received_packets)
2953 i32 ttl0 = ip0->ttl;
2955 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2957 checksum0 += checksum0 >= 0xffff;
2959 ip0->checksum = checksum0;
2961 ASSERT (ip0->ttl > 0);
2967 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2969 if (PREDICT_FALSE(ttl0 <= 0))
2972 * If the ttl drops below 1 when forwarding, generate
2975 error0 = IP4_ERROR_TIME_EXPIRED;
2976 next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2977 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2978 icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2979 ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2983 if (rewrite_for_locally_received_packets)
2986 * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2987 * we end up here with a local adjacency in hand
2988 * The local adj rewrite data is 0xfefe on purpose.
2989 * Bad engineer, no donut for you.
2991 if (PREDICT_FALSE(adj0->lookup_next_index
2992 == IP_LOOKUP_NEXT_LOCAL))
2993 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2995 * We have to override the next_index in ARP adjacencies,
2996 * because they're set up for ip4-arp, not this node...
2998 if (PREDICT_FALSE(adj0->lookup_next_index
2999 == IP_LOOKUP_NEXT_ARP))
3000 next0_override = IP4_REWRITE_NEXT_ARP;
3003 /* Guess we are only writing on simple Ethernet header. */
3004 vnet_rewrite_one_header (adj0[0], ip0,
3005 sizeof (ethernet_header_t));
3007 /* Update packet buffer attributes/set output interface. */
3008 rw_len0 = adj0[0].rewrite_header.data_bytes;
3010 if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
3011 vlib_increment_combined_counter
3012 (&lm->adjacency_counters,
3013 cpu_index, adj_index0,
3014 /* packet increment */ 0,
3015 /* byte increment */ rw_len0-sizeof(ethernet_header_t));
3017 /* Check MTU of outgoing interface. */
3018 error0 = (vlib_buffer_length_in_chain (vm, p0)
3019 > adj0[0].rewrite_header.max_l3_packet_bytes
3020 ? IP4_ERROR_MTU_EXCEEDED
3023 p0->error = error_node->errors[error0];
3025 /* Don't adjust the buffer for ttl issue; icmp-error node wants
3026 * to see the IP headerr */
3027 if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
3029 p0->current_data -= rw_len0;
3030 p0->current_length += rw_len0;
3031 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
3033 vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
3034 next0 = adj0[0].rewrite_header.next_index;
3037 (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features,
3040 p0->current_config_index =
3041 vec_elt (cm->config_index_by_sw_if_index,
3043 vnet_get_config_data (&cm->config_main,
3044 &p0->current_config_index,
3046 /* # bytes of config data */ 0);
3050 if (rewrite_for_locally_received_packets)
3051 next0 = next0 && next0_override ? next0_override : next0;
3056 n_left_to_next -= 1;
3058 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
3059 to_next, n_left_to_next,
3063 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
3066 /* Need to do trace after rewrites to pick up new packet data. */
3067 if (node->flags & VLIB_NODE_FLAG_TRACE)
3068 ip4_forward_next_trace (vm, node, frame, adj_rx_tx);
3070 return frame->n_vectors;
3074 /** @brief IPv4 transit rewrite node.
3075 @node ip4-rewrite-transit
3077 This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
3078 header checksum, fetch the ip adjacency, check the outbound mtu,
3079 apply the adjacency rewrite, and send pkts to the adjacency
3080 rewrite header's rewrite_next_index.
3082 @param vm vlib_main_t corresponding to the current thread
3083 @param node vlib_node_runtime_t
3084 @param frame vlib_frame_t whose contents should be dispatched
3086 @par Graph mechanics: buffer metadata, next index usage
3089 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
3090 - the rewrite adjacency index
3091 - <code>adj->lookup_next_index</code>
3092 - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
3093 the packet will be dropped.
3094 - <code>adj->rewrite_header</code>
3095 - Rewrite string length, rewrite string, next_index
3098 - <code>b->current_data, b->current_length</code>
3099 - Updated net of applying the rewrite string
3101 <em>Next Indices:</em>
3102 - <code> adj->rewrite_header.next_index </code>
3106 ip4_rewrite_transit (vlib_main_t * vm,
3107 vlib_node_runtime_t * node,
3108 vlib_frame_t * frame)
3110 return ip4_rewrite_inline (vm, node, frame,
3111 /* rewrite_for_locally_received_packets */ 0);
3114 /** @brief IPv4 local rewrite node.
3115 @node ip4-rewrite-local
3117 This is the IPv4 local rewrite node. Fetch the ip adjacency, check
3118 the outbound interface mtu, apply the adjacency rewrite, and send
3119 pkts to the adjacency rewrite header's rewrite_next_index. Deal
3120 with hemorrhoids of the form "some clown sends an icmp4 w/ src =
3121 dst = interface addr."
3123 @param vm vlib_main_t corresponding to the current thread
3124 @param node vlib_node_runtime_t
3125 @param frame vlib_frame_t whose contents should be dispatched
3127 @par Graph mechanics: buffer metadata, next index usage
3130 - <code>vnet_buffer(b)->ip.adj_index[VLIB_RX]</code>
3131 - the rewrite adjacency index
3132 - <code>adj->lookup_next_index</code>
3133 - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
3134 the packet will be dropped.
3135 - <code>adj->rewrite_header</code>
3136 - Rewrite string length, rewrite string, next_index
3139 - <code>b->current_data, b->current_length</code>
3140 - Updated net of applying the rewrite string
3142 <em>Next Indices:</em>
3143 - <code> adj->rewrite_header.next_index </code>
3148 ip4_rewrite_local (vlib_main_t * vm,
3149 vlib_node_runtime_t * node,
3150 vlib_frame_t * frame)
3152 return ip4_rewrite_inline (vm, node, frame,
3153 /* rewrite_for_locally_received_packets */ 1);
3156 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
3157 .function = ip4_rewrite_transit,
3158 .name = "ip4-rewrite-transit",
3159 .vector_size = sizeof (u32),
3161 .format_trace = format_ip4_rewrite_trace,
3165 [IP4_REWRITE_NEXT_DROP] = "error-drop",
3166 [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
3167 [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
3171 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite_transit);
3173 VLIB_REGISTER_NODE (ip4_rewrite_local_node) = {
3174 .function = ip4_rewrite_local,
3175 .name = "ip4-rewrite-local",
3176 .vector_size = sizeof (u32),
3178 .sibling_of = "ip4-rewrite-transit",
3180 .format_trace = format_ip4_rewrite_trace,
3185 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_local_node, ip4_rewrite_local);
3187 static clib_error_t *
3188 add_del_interface_table (vlib_main_t * vm,
3189 unformat_input_t * input,
3190 vlib_cli_command_t * cmd)
3192 vnet_main_t * vnm = vnet_get_main();
3193 clib_error_t * error = 0;
3194 u32 sw_if_index, table_id;
3198 if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
3200 error = clib_error_return (0, "unknown interface `%U'",
3201 format_unformat_error, input);
3205 if (unformat (input, "%d", &table_id))
3209 error = clib_error_return (0, "expected table id `%U'",
3210 format_unformat_error, input);
3215 ip4_main_t * im = &ip4_main;
3216 ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_id, IP4_ROUTE_FLAG_TABLE_ID);
3220 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
3221 im->fib_index_by_sw_if_index[sw_if_index] = fib->index;
3229 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
3230 .path = "set interface ip table",
3231 .function = add_del_interface_table,
3232 .short_help = "Add/delete FIB table id for interface",
3237 ip4_lookup_multicast (vlib_main_t * vm,
3238 vlib_node_runtime_t * node,
3239 vlib_frame_t * frame)
3241 ip4_main_t * im = &ip4_main;
3242 ip_lookup_main_t * lm = &im->lookup_main;
3243 vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
3244 u32 n_left_from, n_left_to_next, * from, * to_next;
3245 ip_lookup_next_t next;
3246 u32 cpu_index = os_get_cpu_number();
3248 from = vlib_frame_vector_args (frame);
3249 n_left_from = frame->n_vectors;
3250 next = node->cached_next_index;
3252 while (n_left_from > 0)
3254 vlib_get_next_frame (vm, node, next,
3255 to_next, n_left_to_next);
3257 while (n_left_from >= 4 && n_left_to_next >= 2)
3259 vlib_buffer_t * p0, * p1;
3260 u32 pi0, pi1, adj_index0, adj_index1, wrong_next;
3261 ip_lookup_next_t next0, next1;
3262 ip4_header_t * ip0, * ip1;
3263 ip_adjacency_t * adj0, * adj1;
3264 u32 fib_index0, fib_index1;
3265 u32 flow_hash_config0, flow_hash_config1;
3267 /* Prefetch next iteration. */
3269 vlib_buffer_t * p2, * p3;
3271 p2 = vlib_get_buffer (vm, from[2]);
3272 p3 = vlib_get_buffer (vm, from[3]);
3274 vlib_prefetch_buffer_header (p2, LOAD);
3275 vlib_prefetch_buffer_header (p3, LOAD);
3277 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
3278 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
3281 pi0 = to_next[0] = from[0];
3282 pi1 = to_next[1] = from[1];
3284 p0 = vlib_get_buffer (vm, pi0);
3285 p1 = vlib_get_buffer (vm, pi1);
3287 ip0 = vlib_buffer_get_current (p0);
3288 ip1 = vlib_buffer_get_current (p1);
3290 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
3291 fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
3292 fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
3293 fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
3294 fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
3295 fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
3297 adj_index0 = ip4_fib_lookup_buffer (im, fib_index0,
3298 &ip0->dst_address, p0);
3299 adj_index1 = ip4_fib_lookup_buffer (im, fib_index1,
3300 &ip1->dst_address, p1);
3302 adj0 = ip_get_adjacency (lm, adj_index0);
3303 adj1 = ip_get_adjacency (lm, adj_index1);
3305 next0 = adj0->lookup_next_index;
3306 next1 = adj1->lookup_next_index;
3309 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
3312 vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
3314 vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash
3315 (ip0, flow_hash_config0);
3317 vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash
3318 (ip1, flow_hash_config1);
3320 ASSERT (adj0->n_adj > 0);
3321 ASSERT (adj1->n_adj > 0);
3322 ASSERT (is_pow2 (adj0->n_adj));
3323 ASSERT (is_pow2 (adj1->n_adj));
3324 adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
3325 adj_index1 += (vnet_buffer (p1)->ip.flow_hash & (adj1->n_adj - 1));
3327 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
3328 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
3330 if (1) /* $$$$$$ HACK FIXME */
3331 vlib_increment_combined_counter
3332 (cm, cpu_index, adj_index0, 1,
3333 vlib_buffer_length_in_chain (vm, p0));
3334 if (1) /* $$$$$$ HACK FIXME */
3335 vlib_increment_combined_counter
3336 (cm, cpu_index, adj_index1, 1,
3337 vlib_buffer_length_in_chain (vm, p1));
3341 n_left_to_next -= 2;
3344 wrong_next = (next0 != next) + 2*(next1 != next);
3345 if (PREDICT_FALSE (wrong_next != 0))
3353 n_left_to_next += 1;
3354 vlib_set_next_frame_buffer (vm, node, next0, pi0);
3360 n_left_to_next += 1;
3361 vlib_set_next_frame_buffer (vm, node, next1, pi1);
3367 n_left_to_next += 2;
3368 vlib_set_next_frame_buffer (vm, node, next0, pi0);
3369 vlib_set_next_frame_buffer (vm, node, next1, pi1);
3373 vlib_put_next_frame (vm, node, next, n_left_to_next);
3375 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
3381 while (n_left_from > 0 && n_left_to_next > 0)
3385 u32 pi0, adj_index0;
3386 ip_lookup_next_t next0;
3387 ip_adjacency_t * adj0;
3389 u32 flow_hash_config0;
3394 p0 = vlib_get_buffer (vm, pi0);
3396 ip0 = vlib_buffer_get_current (p0);
3398 fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
3399 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
3400 fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
3401 fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
3403 adj_index0 = ip4_fib_lookup_buffer (im, fib_index0,
3404 &ip0->dst_address, p0);
3406 adj0 = ip_get_adjacency (lm, adj_index0);
3408 next0 = adj0->lookup_next_index;
3411 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
3413 vnet_buffer (p0)->ip.flow_hash =
3414 ip4_compute_flow_hash (ip0, flow_hash_config0);
3416 ASSERT (adj0->n_adj > 0);
3417 ASSERT (is_pow2 (adj0->n_adj));
3418 adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
3420 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
3422 if (1) /* $$$$$$ HACK FIXME */
3423 vlib_increment_combined_counter
3424 (cm, cpu_index, adj_index0, 1,
3425 vlib_buffer_length_in_chain (vm, p0));
3429 n_left_to_next -= 1;
3432 if (PREDICT_FALSE (next0 != next))
3434 n_left_to_next += 1;
3435 vlib_put_next_frame (vm, node, next, n_left_to_next);
3437 vlib_get_next_frame (vm, node, next,
3438 to_next, n_left_to_next);
3441 n_left_to_next -= 1;
3445 vlib_put_next_frame (vm, node, next, n_left_to_next);
3448 if (node->flags & VLIB_NODE_FLAG_TRACE)
3449 ip4_forward_next_trace(vm, node, frame, VLIB_TX);
3451 return frame->n_vectors;
3454 VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = {
3455 .function = ip4_lookup_multicast,
3456 .name = "ip4-lookup-multicast",
3457 .vector_size = sizeof (u32),
3458 .sibling_of = "ip4-lookup",
3459 .format_trace = format_ip4_lookup_trace,
3464 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_multicast_node, ip4_lookup_multicast);
3466 VLIB_REGISTER_NODE (ip4_multicast_node,static) = {
3467 .function = ip4_drop,
3468 .name = "ip4-multicast",
3469 .vector_size = sizeof (u32),
3471 .format_trace = format_ip4_forward_next_trace,
3479 int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0)
3481 ip4_main_t * im = &ip4_main;
3482 ip4_fib_mtrie_t * mtrie0;
3483 ip4_fib_mtrie_leaf_t leaf0;
3486 mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
3488 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
3489 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
3490 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
3491 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
3492 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
3494 /* Handle default route. */
3495 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
3497 adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
3499 return adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
3501 /* no_default_route */ 0);
3504 static clib_error_t *
3505 test_lookup_command_fn (vlib_main_t * vm,
3506 unformat_input_t * input,
3507 vlib_cli_command_t * cmd)
3513 ip4_address_t ip4_base_address;
3516 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3517 if (unformat (input, "table %d", &table_id))
3519 else if (unformat (input, "count %f", &count))
3522 else if (unformat (input, "%U",
3523 unformat_ip4_address, &ip4_base_address))
3526 return clib_error_return (0, "unknown input `%U'",
3527 format_unformat_error, input);
3532 for (i = 0; i < n; i++)
3534 if (!ip4_lookup_validate (&ip4_base_address, table_id))
3537 ip4_base_address.as_u32 =
3538 clib_host_to_net_u32 (1 +
3539 clib_net_to_host_u32 (ip4_base_address.as_u32));
3543 vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
3545 vlib_cli_output (vm, "No errors in %d lookups\n", n);
3550 VLIB_CLI_COMMAND (lookup_test_command, static) = {
3551 .path = "test lookup",
3552 .short_help = "test lookup",
3553 .function = test_lookup_command_fn,
3556 int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3558 ip4_main_t * im4 = &ip4_main;
3560 uword * p = hash_get (im4->fib_index_by_table_id, table_id);
3563 return VNET_API_ERROR_NO_SUCH_FIB;
3565 fib = vec_elt_at_index (im4->fibs, p[0]);
3567 fib->flow_hash_config = flow_hash_config;
3571 static clib_error_t *
3572 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3573 unformat_input_t * input,
3574 vlib_cli_command_t * cmd)
3578 u32 flow_hash_config = 0;
3581 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3582 if (unformat (input, "table %d", &table_id))
3585 else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3586 foreach_flow_hash_bit
3592 return clib_error_return (0, "unknown input `%U'",
3593 format_unformat_error, input);
3595 rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3601 case VNET_API_ERROR_NO_SUCH_FIB:
3602 return clib_error_return (0, "no such FIB table %d", table_id);
3605 clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3612 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
3613 .path = "set ip flow-hash",
3615 "set ip table flow-hash table <fib-id> src dst sport dport proto reverse",
3616 .function = set_ip_flow_hash_command_fn,
3619 int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3622 vnet_main_t * vnm = vnet_get_main();
3623 vnet_interface_main_t * im = &vnm->interface_main;
3624 ip4_main_t * ipm = &ip4_main;
3625 ip_lookup_main_t * lm = &ipm->lookup_main;
3626 vnet_classify_main_t * cm = &vnet_classify_main;
3628 if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3629 return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3631 if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3632 return VNET_API_ERROR_NO_SUCH_ENTRY;
3634 vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3635 lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
3640 static clib_error_t *
3641 set_ip_classify_command_fn (vlib_main_t * vm,
3642 unformat_input_t * input,
3643 vlib_cli_command_t * cmd)
3645 u32 table_index = ~0;
3646 int table_index_set = 0;
3647 u32 sw_if_index = ~0;
3650 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3651 if (unformat (input, "table-index %d", &table_index))
3652 table_index_set = 1;
3653 else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3654 vnet_get_main(), &sw_if_index))
3660 if (table_index_set == 0)
3661 return clib_error_return (0, "classify table-index must be specified");
3663 if (sw_if_index == ~0)
3664 return clib_error_return (0, "interface / subif must be specified");
3666 rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3673 case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3674 return clib_error_return (0, "No such interface");
3676 case VNET_API_ERROR_NO_SUCH_ENTRY:
3677 return clib_error_return (0, "No such classifier table");
3682 VLIB_CLI_COMMAND (set_ip_classify_command, static) = {
3683 .path = "set ip classify",
3685 "set ip classify intfc <int> table-index <index>",
3686 .function = set_ip_classify_command_fn,
3693 static clib_error_t *
3694 set_interface_output_feature_command_fn (vlib_main_t * vm,
3695 unformat_input_t * input,
3696 vlib_cli_command_t * cmd)
3698 vnet_main_t * vnm = vnet_get_main();
3699 u32 sw_if_index = ~0;
3701 ip4_main_t * im = &ip4_main;
3702 ip_lookup_main_t * lm = &im->lookup_main;
3704 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3706 if (unformat (input, "%U", unformat_vnet_sw_interface, vnm, &sw_if_index))
3708 else if (unformat (input, "del"))
3714 if (sw_if_index == ~0)
3715 return clib_error_return (0, "unknown interface `%U'",
3716 format_unformat_error, input);
3718 lm->tx_sw_if_has_ip_output_features =
3719 clib_bitmap_set (lm->tx_sw_if_has_ip_output_features, sw_if_index, is_add);
3724 VLIB_CLI_COMMAND (set_interface_output_feature, static) = {
3725 .path = "set interface output feature",
3726 .function = set_interface_output_feature_command_fn,
3727 .short_help = "set interface output feature <intfc>",
3729 #endif /* TEST_CODE */