2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * ip/ip4_forward.c: IP v4 forwarding
18 * Copyright (c) 2008 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h> /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h> /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h> /* for API error numbers */
48 /* This is really, really simple but stupid fib. */
50 ip4_fib_lookup_with_table (ip4_main_t * im, u32 fib_index,
52 u32 disable_default_route)
54 ip_lookup_main_t * lm = &im->lookup_main;
55 ip4_fib_t * fib = vec_elt_at_index (im->fibs, fib_index);
56 uword * p, * hash, key;
57 i32 i, i_min, dst_address, ai;
59 i_min = disable_default_route ? 1 : 0;
60 dst_address = clib_mem_unaligned (&dst->data_u32, u32);
61 for (i = ARRAY_LEN (fib->adj_index_by_dst_address) - 1; i >= i_min; i--)
63 hash = fib->adj_index_by_dst_address[i];
67 key = dst_address & im->fib_masks[i];
68 if ((p = hash_get (hash, key)) != 0)
75 /* Nothing matches in table. */
76 ai = lm->miss_adj_index;
83 create_fib_with_table_id (ip4_main_t * im, u32 table_id)
86 hash_set (im->fib_index_by_table_id, table_id, vec_len (im->fibs));
87 vec_add2 (im->fibs, fib, 1);
88 fib->table_id = table_id;
89 fib->index = fib - im->fibs;
90 fib->flow_hash_config = IP_FLOW_HASH_DEFAULT;
91 fib->fwd_classify_table_index = ~0;
92 fib->rev_classify_table_index = ~0;
93 ip4_mtrie_init (&fib->mtrie);
98 find_ip4_fib_by_table_index_or_id (ip4_main_t * im,
99 u32 table_index_or_id, u32 flags)
101 uword * p, fib_index;
103 fib_index = table_index_or_id;
104 if (! (flags & IP4_ROUTE_FLAG_FIB_INDEX))
106 p = hash_get (im->fib_index_by_table_id, table_index_or_id);
108 return create_fib_with_table_id (im, table_index_or_id);
111 return vec_elt_at_index (im->fibs, fib_index);
115 ip4_fib_init_adj_index_by_dst_address (ip_lookup_main_t * lm,
122 ASSERT (lm->fib_result_n_bytes >= sizeof (uword));
123 lm->fib_result_n_words = round_pow2 (lm->fib_result_n_bytes, sizeof (uword)) / sizeof (uword);
125 fib->adj_index_by_dst_address[address_length] =
126 hash_create (32 /* elts */, lm->fib_result_n_words * sizeof (uword));
128 hash_set_flags (fib->adj_index_by_dst_address[address_length],
129 HASH_FLAG_NO_AUTO_SHRINK);
131 h = hash_header (fib->adj_index_by_dst_address[address_length]);
132 max_index = (hash_value_bytes (h) / sizeof (fib->new_hash_values[0])) - 1;
134 /* Initialize new/old hash value vectors. */
135 vec_validate_init_empty (fib->new_hash_values, max_index, ~0);
136 vec_validate_init_empty (fib->old_hash_values, max_index, ~0);
140 ip4_fib_set_adj_index (ip4_main_t * im,
144 u32 dst_address_length,
147 ip_lookup_main_t * lm = &im->lookup_main;
150 if (vec_bytes(fib->old_hash_values))
151 memset (fib->old_hash_values, ~0, vec_bytes (fib->old_hash_values));
152 if (vec_bytes(fib->new_hash_values))
153 memset (fib->new_hash_values, ~0, vec_bytes (fib->new_hash_values));
154 fib->new_hash_values[0] = adj_index;
156 /* Make sure adj index is valid. */
158 (void) ip_get_adjacency (lm, adj_index);
160 hash = fib->adj_index_by_dst_address[dst_address_length];
162 hash = _hash_set3 (hash, dst_address_u32,
163 fib->new_hash_values,
164 fib->old_hash_values);
166 fib->adj_index_by_dst_address[dst_address_length] = hash;
168 if (vec_len (im->add_del_route_callbacks) > 0)
170 ip4_add_del_route_callback_t * cb;
174 d.data_u32 = dst_address_u32;
175 vec_foreach (cb, im->add_del_route_callbacks)
176 if ((flags & cb->required_flags) == cb->required_flags)
177 cb->function (im, cb->function_opaque,
179 &d, dst_address_length,
180 fib->old_hash_values,
181 fib->new_hash_values);
183 p = hash_get (hash, dst_address_u32);
184 clib_memcpy (p, fib->new_hash_values, vec_bytes (fib->new_hash_values));
188 void ip4_add_del_route (ip4_main_t * im, ip4_add_del_route_args_t * a)
190 ip_lookup_main_t * lm = &im->lookup_main;
192 u32 dst_address, dst_address_length, adj_index, old_adj_index;
193 uword * hash, is_del;
194 ip4_add_del_route_callback_t * cb;
196 /* Either create new adjacency or use given one depending on arguments. */
197 if (a->n_add_adj > 0)
199 ip_add_adjacency (lm, a->add_adj, a->n_add_adj, &adj_index);
200 ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 0);
203 adj_index = a->adj_index;
205 dst_address = a->dst_address.data_u32;
206 dst_address_length = a->dst_address_length;
207 fib = find_ip4_fib_by_table_index_or_id (im, a->table_index_or_table_id, a->flags);
209 ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
210 dst_address &= im->fib_masks[dst_address_length];
212 if (! fib->adj_index_by_dst_address[dst_address_length])
213 ip4_fib_init_adj_index_by_dst_address (lm, fib, dst_address_length);
215 hash = fib->adj_index_by_dst_address[dst_address_length];
217 is_del = (a->flags & IP4_ROUTE_FLAG_DEL) != 0;
221 fib->old_hash_values[0] = ~0;
222 hash = _hash_unset (hash, dst_address, fib->old_hash_values);
223 fib->adj_index_by_dst_address[dst_address_length] = hash;
225 if (vec_len (im->add_del_route_callbacks) > 0
226 && fib->old_hash_values[0] != ~0) /* make sure destination was found in hash */
228 fib->new_hash_values[0] = ~0;
229 vec_foreach (cb, im->add_del_route_callbacks)
230 if ((a->flags & cb->required_flags) == cb->required_flags)
231 cb->function (im, cb->function_opaque,
233 &a->dst_address, dst_address_length,
234 fib->old_hash_values,
235 fib->new_hash_values);
239 ip4_fib_set_adj_index (im, fib, a->flags, dst_address, dst_address_length,
242 old_adj_index = fib->old_hash_values[0];
244 /* Avoid spurious reference count increments */
245 if (old_adj_index == adj_index && !(a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY))
247 ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index);
248 if (adj->share_count > 0)
252 ip4_fib_mtrie_add_del_route (fib, a->dst_address, dst_address_length,
253 is_del ? old_adj_index : adj_index,
256 /* Delete old adjacency index if present and changed. */
257 if (! (a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY)
258 && old_adj_index != ~0
259 && old_adj_index != adj_index)
260 ip_del_adjacency (lm, old_adj_index);
264 ip4_add_del_route_next_hop (ip4_main_t * im,
266 ip4_address_t * dst_address,
267 u32 dst_address_length,
268 ip4_address_t * next_hop,
269 u32 next_hop_sw_if_index,
270 u32 next_hop_weight, u32 adj_index,
271 u32 explicit_fib_index)
273 vnet_main_t * vnm = vnet_get_main();
274 ip_lookup_main_t * lm = &im->lookup_main;
277 u32 dst_address_u32, old_mp_adj_index, new_mp_adj_index;
278 u32 dst_adj_index, nh_adj_index;
279 uword * dst_hash, * dst_result;
280 uword * nh_hash, * nh_result;
281 ip_adjacency_t * dst_adj;
282 ip_multipath_adjacency_t * old_mp, * new_mp;
283 int is_del = (flags & IP4_ROUTE_FLAG_DEL) != 0;
284 int is_interface_next_hop;
285 clib_error_t * error = 0;
287 if (explicit_fib_index == (u32)~0)
288 fib_index = vec_elt (im->fib_index_by_sw_if_index, next_hop_sw_if_index);
290 fib_index = explicit_fib_index;
292 fib = vec_elt_at_index (im->fibs, fib_index);
294 /* Lookup next hop to be added or deleted. */
295 is_interface_next_hop = next_hop->data_u32 == 0;
296 if (adj_index == (u32)~0)
298 if (is_interface_next_hop)
300 nh_result = hash_get (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index);
302 nh_adj_index = *nh_result;
305 ip_adjacency_t * adj;
306 adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
308 ip4_adjacency_set_interface_route (vnm, adj, next_hop_sw_if_index, /* if_address_index */ ~0);
309 ip_call_add_del_adjacency_callbacks (lm, nh_adj_index, /* is_del */ 0);
310 hash_set (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index, nh_adj_index);
315 nh_hash = fib->adj_index_by_dst_address[32];
316 nh_result = hash_get (nh_hash, next_hop->data_u32);
318 /* Next hop must be known. */
321 ip_adjacency_t * adj;
323 nh_adj_index = ip4_fib_lookup_with_table (im, fib_index,
325 adj = ip_get_adjacency (lm, nh_adj_index);
326 /* if ARP interface adjacencty is present, we need to
327 install ARP adjaceny for specific next hop */
328 if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP &&
329 adj->arp.next_hop.ip4.as_u32 == 0)
331 nh_adj_index = vnet_arp_glean_add(fib_index, next_hop);
335 /* Next hop is not known, so create indirect adj */
336 ip_adjacency_t add_adj;
337 add_adj.lookup_next_index = IP_LOOKUP_NEXT_INDIRECT;
338 add_adj.indirect.next_hop.ip4.as_u32 = next_hop->as_u32;
339 add_adj.explicit_fib_index = explicit_fib_index;
340 ip_add_adjacency (lm, &add_adj, 1, &nh_adj_index);
344 nh_adj_index = *nh_result;
349 nh_adj_index = adj_index;
351 ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks));
352 dst_address_u32 = dst_address->data_u32 & im->fib_masks[dst_address_length];
354 dst_hash = fib->adj_index_by_dst_address[dst_address_length];
355 dst_result = hash_get (dst_hash, dst_address_u32);
358 dst_adj_index = dst_result[0];
359 dst_adj = ip_get_adjacency (lm, dst_adj_index);
363 /* For deletes destination must be known. */
366 vnm->api_errno = VNET_API_ERROR_UNKNOWN_DESTINATION;
367 error = clib_error_return (0, "unknown destination %U/%d",
368 format_ip4_address, dst_address,
377 /* Ignore adds of X/32 with next hop of X. */
379 && dst_address_length == 32
380 && dst_address->data_u32 == next_hop->data_u32
381 && adj_index != (u32)~0)
383 vnm->api_errno = VNET_API_ERROR_PREFIX_MATCHES_NEXT_HOP;
384 error = clib_error_return (0, "prefix matches next hop %U/%d",
385 format_ip4_address, dst_address,
390 /* Destination is not known and default weight is set so add route
391 to existing non-multipath adjacency */
392 if (dst_adj_index == ~0 && next_hop_weight == 1 && next_hop_sw_if_index == ~0)
394 /* create new adjacency */
395 ip4_add_del_route_args_t a;
396 a.table_index_or_table_id = fib_index;
397 a.flags = ((is_del ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
398 | IP4_ROUTE_FLAG_FIB_INDEX
399 | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
400 | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE
401 | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
402 a.dst_address = dst_address[0];
403 a.dst_address_length = dst_address_length;
404 a.adj_index = nh_adj_index;
408 ip4_add_del_route (im, &a);
413 old_mp_adj_index = dst_adj ? dst_adj->heap_handle : ~0;
415 if (! ip_multipath_adjacency_add_del_next_hop
422 vnm->api_errno = VNET_API_ERROR_NEXT_HOP_NOT_FOUND_MP;
423 error = clib_error_return (0, "requested deleting next-hop %U not found in multi-path",
424 format_ip4_address, next_hop);
429 if (old_mp_adj_index != ~0)
430 old_mp = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index);
431 if (new_mp_adj_index != ~0)
432 new_mp = vec_elt_at_index (lm->multipath_adjacencies, new_mp_adj_index);
434 if (old_mp != new_mp)
436 ip4_add_del_route_args_t a;
437 a.table_index_or_table_id = fib_index;
438 a.flags = ((is_del && ! new_mp ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD)
439 | IP4_ROUTE_FLAG_FIB_INDEX
440 | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY
441 | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP)));
442 a.dst_address = dst_address[0];
443 a.dst_address_length = dst_address_length;
444 a.adj_index = new_mp ? new_mp->adj_index : dst_adj_index;
448 ip4_add_del_route (im, &a);
453 clib_error_report (error);
457 ip4_get_route (ip4_main_t * im,
458 u32 table_index_or_table_id,
463 ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
464 u32 dst_address = * (u32 *) address;
467 ASSERT (address_length < ARRAY_LEN (im->fib_masks));
468 dst_address &= im->fib_masks[address_length];
470 hash = fib->adj_index_by_dst_address[address_length];
471 p = hash_get (hash, dst_address);
476 ip4_foreach_matching_route (ip4_main_t * im,
477 u32 table_index_or_table_id,
479 ip4_address_t * address,
481 ip4_address_t ** results,
482 u8 ** result_lengths)
484 ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
485 u32 dst_address = address->data_u32;
486 u32 this_length = address_length;
489 _vec_len (*results) = 0;
491 _vec_len (*result_lengths) = 0;
493 while (this_length <= 32 && vec_len (results) == 0)
496 hash_foreach (k, v, fib->adj_index_by_dst_address[this_length], ({
497 if (0 == ((k ^ dst_address) & im->fib_masks[address_length]))
501 vec_add1 (*results, a);
502 vec_add1 (*result_lengths, this_length);
510 void ip4_maybe_remap_adjacencies (ip4_main_t * im,
511 u32 table_index_or_table_id,
514 ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags);
515 ip_lookup_main_t * lm = &im->lookup_main;
518 ip4_add_del_route_callback_t * cb;
519 static ip4_address_t * to_delete;
521 if (lm->n_adjacency_remaps == 0)
524 for (l = 0; l <= 32; l++)
527 uword * hash = fib->adj_index_by_dst_address[l];
529 if (hash_elts (hash) == 0)
533 _vec_len (to_delete) = 0;
535 hash_foreach_pair (p, hash, ({
536 u32 adj_index = p->value[0];
537 u32 m = vec_elt (lm->adjacency_remap_table, adj_index);
541 /* Record destination address from hash key. */
544 /* New adjacency points to nothing: so delete prefix. */
546 vec_add1 (to_delete, a);
549 /* Remap to new adjacency. */
550 clib_memcpy (fib->old_hash_values, p->value, vec_bytes (fib->old_hash_values));
552 /* Set new adjacency value. */
553 fib->new_hash_values[0] = p->value[0] = m - 1;
555 vec_foreach (cb, im->add_del_route_callbacks)
556 if ((flags & cb->required_flags) == cb->required_flags)
557 cb->function (im, cb->function_opaque,
558 fib, flags | IP4_ROUTE_FLAG_ADD,
560 fib->old_hash_values,
561 fib->new_hash_values);
566 fib->new_hash_values[0] = ~0;
567 for (i = 0; i < vec_len (to_delete); i++)
569 hash = _hash_unset (hash, to_delete[i].data_u32, fib->old_hash_values);
570 vec_foreach (cb, im->add_del_route_callbacks)
571 if ((flags & cb->required_flags) == cb->required_flags)
572 cb->function (im, cb->function_opaque,
573 fib, flags | IP4_ROUTE_FLAG_DEL,
575 fib->old_hash_values,
576 fib->new_hash_values);
580 /* Also remap adjacencies in mtrie. */
581 ip4_mtrie_maybe_remap_adjacencies (lm, &fib->mtrie);
583 /* Reset mapping table. */
584 vec_zero (lm->adjacency_remap_table);
586 /* All remaps have been performed. */
587 lm->n_adjacency_remaps = 0;
590 void ip4_delete_matching_routes (ip4_main_t * im,
591 u32 table_index_or_table_id,
593 ip4_address_t * address,
596 static ip4_address_t * matching_addresses;
597 static u8 * matching_address_lengths;
599 ip4_add_del_route_args_t a;
601 a.flags = IP4_ROUTE_FLAG_DEL | IP4_ROUTE_FLAG_NO_REDISTRIBUTE | flags;
602 a.table_index_or_table_id = table_index_or_table_id;
607 for (l = address_length + 1; l <= 32; l++)
609 ip4_foreach_matching_route (im, table_index_or_table_id, flags,
613 &matching_address_lengths);
614 for (i = 0; i < vec_len (matching_addresses); i++)
616 a.dst_address = matching_addresses[i];
617 a.dst_address_length = matching_address_lengths[i];
618 ip4_add_del_route (im, &a);
622 ip4_maybe_remap_adjacencies (im, table_index_or_table_id, flags);
626 ip4_lookup_inline (vlib_main_t * vm,
627 vlib_node_runtime_t * node,
628 vlib_frame_t * frame,
629 int lookup_for_responses_to_locally_received_packets,
632 ip4_main_t * im = &ip4_main;
633 ip_lookup_main_t * lm = &im->lookup_main;
634 vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
635 u32 n_left_from, n_left_to_next, * from, * to_next;
636 ip_lookup_next_t next;
637 u32 cpu_index = os_get_cpu_number();
639 from = vlib_frame_vector_args (frame);
640 n_left_from = frame->n_vectors;
641 next = node->cached_next_index;
643 while (n_left_from > 0)
645 vlib_get_next_frame (vm, node, next,
646 to_next, n_left_to_next);
648 while (n_left_from >= 4 && n_left_to_next >= 2)
650 vlib_buffer_t * p0, * p1;
651 ip4_header_t * ip0, * ip1;
652 __attribute__((unused)) tcp_header_t * tcp0, * tcp1;
653 ip_lookup_next_t next0, next1;
654 ip_adjacency_t * adj0, * adj1;
655 ip4_fib_mtrie_t * mtrie0, * mtrie1;
656 ip4_fib_mtrie_leaf_t leaf0, leaf1;
657 ip4_address_t * dst_addr0, *dst_addr1;
658 __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
659 __attribute__((unused)) u32 pi1, fib_index1, adj_index1, is_tcp_udp1;
660 u32 flow_hash_config0, flow_hash_config1;
661 u32 hash_c0, hash_c1;
664 /* Prefetch next iteration. */
666 vlib_buffer_t * p2, * p3;
668 p2 = vlib_get_buffer (vm, from[2]);
669 p3 = vlib_get_buffer (vm, from[3]);
671 vlib_prefetch_buffer_header (p2, LOAD);
672 vlib_prefetch_buffer_header (p3, LOAD);
674 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
675 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
678 pi0 = to_next[0] = from[0];
679 pi1 = to_next[1] = from[1];
681 p0 = vlib_get_buffer (vm, pi0);
682 p1 = vlib_get_buffer (vm, pi1);
684 ip0 = vlib_buffer_get_current (p0);
685 ip1 = vlib_buffer_get_current (p1);
689 ip_adjacency_t * iadj0, * iadj1;
690 iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
691 iadj1 = ip_get_adjacency (lm, vnet_buffer(p1)->ip.adj_index[VLIB_TX]);
692 dst_addr0 = &iadj0->indirect.next_hop.ip4;
693 dst_addr1 = &iadj1->indirect.next_hop.ip4;
697 dst_addr0 = &ip0->dst_address;
698 dst_addr1 = &ip1->dst_address;
701 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
702 fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
703 fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
704 fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
705 fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
706 fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
709 if (! lookup_for_responses_to_locally_received_packets)
711 mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
712 mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
714 leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
716 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
717 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
720 tcp0 = (void *) (ip0 + 1);
721 tcp1 = (void *) (ip1 + 1);
723 is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
724 || ip0->protocol == IP_PROTOCOL_UDP);
725 is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
726 || ip1->protocol == IP_PROTOCOL_UDP);
728 if (! lookup_for_responses_to_locally_received_packets)
730 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
731 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
734 if (! lookup_for_responses_to_locally_received_packets)
736 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
737 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
740 if (! lookup_for_responses_to_locally_received_packets)
742 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
743 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
746 if (lookup_for_responses_to_locally_received_packets)
748 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
749 adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
753 /* Handle default route. */
754 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
755 leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
757 adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
758 adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
761 ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
763 /* no_default_route */ 0));
764 ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
766 /* no_default_route */ 0));
767 adj0 = ip_get_adjacency (lm, adj_index0);
768 adj1 = ip_get_adjacency (lm, adj_index1);
770 next0 = adj0->lookup_next_index;
771 next1 = adj1->lookup_next_index;
773 /* Use flow hash to compute multipath adjacency. */
774 hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
775 hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
776 if (PREDICT_FALSE (adj0->n_adj > 1))
779 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
780 hash_c0 = vnet_buffer (p0)->ip.flow_hash =
781 ip4_compute_flow_hash (ip0, flow_hash_config0);
783 if (PREDICT_FALSE(adj1->n_adj > 1))
786 vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
787 hash_c1 = vnet_buffer (p1)->ip.flow_hash =
788 ip4_compute_flow_hash (ip1, flow_hash_config1);
791 ASSERT (adj0->n_adj > 0);
792 ASSERT (adj1->n_adj > 0);
793 ASSERT (is_pow2 (adj0->n_adj));
794 ASSERT (is_pow2 (adj1->n_adj));
795 adj_index0 += (hash_c0 & (adj0->n_adj - 1));
796 adj_index1 += (hash_c1 & (adj1->n_adj - 1));
798 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
799 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
801 vlib_increment_combined_counter
802 (cm, cpu_index, adj_index0, 1,
803 vlib_buffer_length_in_chain (vm, p0)
804 + sizeof(ethernet_header_t));
805 vlib_increment_combined_counter
806 (cm, cpu_index, adj_index1, 1,
807 vlib_buffer_length_in_chain (vm, p1)
808 + sizeof(ethernet_header_t));
815 wrong_next = (next0 != next) + 2*(next1 != next);
816 if (PREDICT_FALSE (wrong_next != 0))
825 vlib_set_next_frame_buffer (vm, node, next0, pi0);
832 vlib_set_next_frame_buffer (vm, node, next1, pi1);
839 vlib_set_next_frame_buffer (vm, node, next0, pi0);
840 vlib_set_next_frame_buffer (vm, node, next1, pi1);
844 vlib_put_next_frame (vm, node, next, n_left_to_next);
846 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
852 while (n_left_from > 0 && n_left_to_next > 0)
856 __attribute__((unused)) tcp_header_t * tcp0;
857 ip_lookup_next_t next0;
858 ip_adjacency_t * adj0;
859 ip4_fib_mtrie_t * mtrie0;
860 ip4_fib_mtrie_leaf_t leaf0;
861 ip4_address_t * dst_addr0;
862 __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0;
863 u32 flow_hash_config0, hash_c0;
868 p0 = vlib_get_buffer (vm, pi0);
870 ip0 = vlib_buffer_get_current (p0);
874 ip_adjacency_t * iadj0;
875 iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
876 dst_addr0 = &iadj0->indirect.next_hop.ip4;
880 dst_addr0 = &ip0->dst_address;
883 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
884 fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
885 fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
887 if (! lookup_for_responses_to_locally_received_packets)
889 mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
891 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
893 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
896 tcp0 = (void *) (ip0 + 1);
898 is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
899 || ip0->protocol == IP_PROTOCOL_UDP);
901 if (! lookup_for_responses_to_locally_received_packets)
902 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
904 if (! lookup_for_responses_to_locally_received_packets)
905 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
907 if (! lookup_for_responses_to_locally_received_packets)
908 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
910 if (lookup_for_responses_to_locally_received_packets)
911 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
914 /* Handle default route. */
915 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
916 adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
919 ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
921 /* no_default_route */ 0));
923 adj0 = ip_get_adjacency (lm, adj_index0);
925 next0 = adj0->lookup_next_index;
927 /* Use flow hash to compute multipath adjacency. */
928 hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
929 if (PREDICT_FALSE(adj0->n_adj > 1))
932 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
934 hash_c0 = vnet_buffer (p0)->ip.flow_hash =
935 ip4_compute_flow_hash (ip0, flow_hash_config0);
938 ASSERT (adj0->n_adj > 0);
939 ASSERT (is_pow2 (adj0->n_adj));
940 adj_index0 += (hash_c0 & (adj0->n_adj - 1));
942 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
944 vlib_increment_combined_counter
945 (cm, cpu_index, adj_index0, 1,
946 vlib_buffer_length_in_chain (vm, p0)
947 + sizeof(ethernet_header_t));
954 if (PREDICT_FALSE (next0 != next))
957 vlib_put_next_frame (vm, node, next, n_left_to_next);
959 vlib_get_next_frame (vm, node, next,
960 to_next, n_left_to_next);
967 vlib_put_next_frame (vm, node, next, n_left_to_next);
970 return frame->n_vectors;
974 ip4_lookup (vlib_main_t * vm,
975 vlib_node_runtime_t * node,
976 vlib_frame_t * frame)
978 return ip4_lookup_inline (vm, node, frame,
979 /* lookup_for_responses_to_locally_received_packets */ 0,
980 /* is_indirect */ 0);
984 void ip4_adjacency_set_interface_route (vnet_main_t * vnm,
985 ip_adjacency_t * adj,
987 u32 if_address_index)
989 vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
991 vnet_l3_packet_type_t packet_type;
994 if (hw->hw_class_index == ethernet_hw_interface_class.index
995 || hw->hw_class_index == srp_hw_interface_class.index)
998 * We have a bit of a problem in this case. ip4-arp uses
999 * the rewrite_header.next_index to hand pkts to the
1000 * indicated inteface output node. We can end up in
1001 * ip4_rewrite_local, too, which also pays attention to
1002 * rewrite_header.next index. Net result: a hack in
1003 * ip4_rewrite_local...
1005 n = IP_LOOKUP_NEXT_ARP;
1006 node_index = ip4_arp_node.index;
1007 adj->if_address_index = if_address_index;
1008 adj->arp.next_hop.ip4.as_u32 = 0;
1009 ip46_address_reset(&adj->arp.next_hop);
1010 packet_type = VNET_L3_PACKET_TYPE_ARP;
1014 n = IP_LOOKUP_NEXT_REWRITE;
1015 node_index = ip4_rewrite_node.index;
1016 packet_type = VNET_L3_PACKET_TYPE_IP4;
1019 adj->lookup_next_index = n;
1020 vnet_rewrite_for_sw_interface
1025 VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST,
1026 &adj->rewrite_header,
1027 sizeof (adj->rewrite_data));
1031 ip4_add_interface_routes (u32 sw_if_index,
1032 ip4_main_t * im, u32 fib_index,
1033 ip_interface_address_t * a)
1035 vnet_main_t * vnm = vnet_get_main();
1036 ip_lookup_main_t * lm = &im->lookup_main;
1037 ip_adjacency_t * adj;
1038 ip4_address_t * address = ip_interface_address_get_address (lm, a);
1039 ip4_add_del_route_args_t x;
1040 vnet_hw_interface_t * hw_if = vnet_get_sup_hw_interface (vnm, sw_if_index);
1041 u32 classify_table_index;
1043 /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1044 x.table_index_or_table_id = fib_index;
1045 x.flags = (IP4_ROUTE_FLAG_ADD
1046 | IP4_ROUTE_FLAG_FIB_INDEX
1047 | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1048 x.dst_address = address[0];
1049 x.dst_address_length = a->address_length;
1053 a->neighbor_probe_adj_index = ~0;
1054 if (a->address_length < 32)
1056 adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1058 ip4_adjacency_set_interface_route (vnm, adj, sw_if_index, a - lm->if_address_pool);
1059 ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1060 ip4_add_del_route (im, &x);
1061 a->neighbor_probe_adj_index = x.adj_index;
1064 /* Add e.g. 1.1.1.1/32 as local to this host. */
1065 adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1,
1068 classify_table_index = ~0;
1069 if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
1070 classify_table_index = lm->classify_table_index_by_sw_if_index [sw_if_index];
1071 if (classify_table_index != (u32) ~0)
1073 adj->lookup_next_index = IP_LOOKUP_NEXT_CLASSIFY;
1074 adj->classify.table_index = classify_table_index;
1077 adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL;
1079 adj->if_address_index = a - lm->if_address_pool;
1080 adj->rewrite_header.sw_if_index = sw_if_index;
1081 adj->rewrite_header.max_l3_packet_bytes = hw_if->max_l3_packet_bytes[VLIB_RX];
1083 * Local adjs are never to be rewritten. Spoofed pkts w/ src = dst = local
1084 * fail an RPF-ish check, but still go thru the rewrite code...
1086 adj->rewrite_header.data_bytes = 0;
1088 ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0);
1089 x.dst_address_length = 32;
1090 ip4_add_del_route (im, &x);
1094 ip4_del_interface_routes (ip4_main_t * im, u32 fib_index, ip4_address_t * address, u32 address_length)
1096 ip4_add_del_route_args_t x;
1098 /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */
1099 x.table_index_or_table_id = fib_index;
1100 x.flags = (IP4_ROUTE_FLAG_DEL
1101 | IP4_ROUTE_FLAG_FIB_INDEX
1102 | IP4_ROUTE_FLAG_NO_REDISTRIBUTE);
1103 x.dst_address = address[0];
1104 x.dst_address_length = address_length;
1109 if (address_length < 32)
1110 ip4_add_del_route (im, &x);
1112 x.dst_address_length = 32;
1113 ip4_add_del_route (im, &x);
1115 ip4_delete_matching_routes (im,
1117 IP4_ROUTE_FLAG_FIB_INDEX,
1124 ip4_address_t address;
1126 } ip4_interface_address_t;
1128 static clib_error_t *
1129 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1131 ip4_address_t * new_address,
1137 static clib_error_t *
1138 ip4_add_del_interface_address_internal (vlib_main_t * vm,
1140 ip4_address_t * address,
1146 vnet_main_t * vnm = vnet_get_main();
1147 ip4_main_t * im = &ip4_main;
1148 ip_lookup_main_t * lm = &im->lookup_main;
1149 clib_error_t * error = 0;
1150 u32 if_address_index, elts_before;
1151 ip4_address_fib_t ip4_af, * addr_fib = 0;
1153 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1154 ip4_addr_fib_init (&ip4_af, address,
1155 vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
1156 vec_add1 (addr_fib, ip4_af);
1158 /* When adding an address check that it does not conflict with an existing address. */
1161 ip_interface_address_t * ia;
1162 foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
1163 0 /* honor unnumbered */,
1165 ip4_address_t * x = ip_interface_address_get_address (&im->lookup_main, ia);
1167 if (ip4_destination_matches_route (im, address, x, ia->address_length)
1168 || ip4_destination_matches_route (im, x, address, address_length))
1169 return clib_error_create ("failed to add %U which conflicts with %U for interface %U",
1170 format_ip4_address_and_length, address, address_length,
1171 format_ip4_address_and_length, x, ia->address_length,
1172 format_vnet_sw_if_index_name, vnm, sw_if_index);
1176 elts_before = pool_elts (lm->if_address_pool);
1178 error = ip_interface_address_add_del
1188 if (vnet_sw_interface_is_admin_up (vnm, sw_if_index) && insert_routes)
1191 ip4_del_interface_routes (im, ip4_af.fib_index, address,
1195 ip4_add_interface_routes (sw_if_index,
1196 im, ip4_af.fib_index,
1198 (lm->if_address_pool, if_address_index));
1201 /* If pool did not grow/shrink: add duplicate address. */
1202 if (elts_before != pool_elts (lm->if_address_pool))
1204 ip4_add_del_interface_address_callback_t * cb;
1205 vec_foreach (cb, im->add_del_interface_address_callbacks)
1206 cb->function (im, cb->function_opaque, sw_if_index,
1207 address, address_length,
1213 vec_free (addr_fib);
1218 ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
1219 ip4_address_t * address, u32 address_length,
1222 return ip4_add_del_interface_address_internal
1223 (vm, sw_if_index, address, address_length,
1224 /* redistribute */ 1,
1225 /* insert_routes */ 1,
1229 static clib_error_t *
1230 ip4_sw_interface_admin_up_down (vnet_main_t * vnm,
1234 ip4_main_t * im = &ip4_main;
1235 ip_interface_address_t * ia;
1237 u32 is_admin_up, fib_index;
1239 /* Fill in lookup tables with default table (0). */
1240 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1242 vec_validate_init_empty (im->lookup_main.if_address_pool_index_by_sw_if_index, sw_if_index, ~0);
1244 is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
1246 fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
1248 foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
1249 0 /* honor unnumbered */,
1251 a = ip_interface_address_get_address (&im->lookup_main, ia);
1253 ip4_add_interface_routes (sw_if_index,
1257 ip4_del_interface_routes (im, fib_index,
1258 a, ia->address_length);
1264 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
1266 static clib_error_t *
1267 ip4_sw_interface_add_del (vnet_main_t * vnm,
1271 vlib_main_t * vm = vnm->vlib_main;
1272 ip4_main_t * im = &ip4_main;
1273 ip_lookup_main_t * lm = &im->lookup_main;
1276 for (cast = 0; cast < VNET_N_CAST; cast++)
1278 ip_config_main_t * cm = &lm->rx_config_mains[cast];
1279 vnet_config_main_t * vcm = &cm->config_main;
1281 if (! vcm->node_index_by_feature_index)
1283 if (cast == VNET_UNICAST)
1285 static char * start_nodes[] = { "ip4-input", "ip4-input-no-checksum", };
1286 static char * feature_nodes[] = {
1287 [IP4_RX_FEATURE_CHECK_ACCESS] = "ip4-inacl",
1288 [IP4_RX_FEATURE_SOURCE_CHECK_REACHABLE_VIA_RX] = "ip4-source-check-via-rx",
1289 [IP4_RX_FEATURE_SOURCE_CHECK_REACHABLE_VIA_ANY] = "ip4-source-check-via-any",
1290 [IP4_RX_FEATURE_IPSEC] = "ipsec-input-ip4",
1291 [IP4_RX_FEATURE_VPATH] = "vpath-input-ip4",
1292 [IP4_RX_FEATURE_LOOKUP] = "ip4-lookup",
1295 vnet_config_init (vm, vcm,
1296 start_nodes, ARRAY_LEN (start_nodes),
1297 feature_nodes, ARRAY_LEN (feature_nodes));
1301 static char * start_nodes[] = { "ip4-input", "ip4-input-no-checksum", };
1302 static char * feature_nodes[] = {
1303 [IP4_RX_FEATURE_VPATH] = "vpath-input-ip4",
1304 [IP4_RX_FEATURE_LOOKUP] = "ip4-lookup-multicast",
1307 vnet_config_init (vm, vcm,
1308 start_nodes, ARRAY_LEN (start_nodes),
1309 feature_nodes, ARRAY_LEN (feature_nodes));
1313 vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
1314 ci = cm->config_index_by_sw_if_index[sw_if_index];
1317 ci = vnet_config_add_feature (vm, vcm,
1319 IP4_RX_FEATURE_LOOKUP,
1320 /* config data */ 0,
1321 /* # bytes of config data */ 0);
1323 ci = vnet_config_del_feature (vm, vcm,
1325 IP4_RX_FEATURE_LOOKUP,
1326 /* config data */ 0,
1327 /* # bytes of config data */ 0);
1329 cm->config_index_by_sw_if_index[sw_if_index] = ci;
1332 return /* no error */ 0;
1335 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1338 VLIB_REGISTER_NODE (ip4_lookup_node) = {
1339 .function = ip4_lookup,
1340 .name = "ip4-lookup",
1341 .vector_size = sizeof (u32),
1343 .n_next_nodes = IP_LOOKUP_N_NEXT,
1344 .next_nodes = IP4_LOOKUP_NEXT_NODES,
1348 ip4_indirect (vlib_main_t * vm,
1349 vlib_node_runtime_t * node,
1350 vlib_frame_t * frame)
1352 return ip4_lookup_inline (vm, node, frame,
1353 /* lookup_for_responses_to_locally_received_packets */ 0,
1354 /* is_indirect */ 1);
1357 VLIB_REGISTER_NODE (ip4_indirect_node) = {
1358 .function = ip4_indirect,
1359 .name = "ip4-indirect",
1360 .vector_size = sizeof (u32),
1362 .n_next_nodes = IP_LOOKUP_N_NEXT,
1363 .next_nodes = IP4_LOOKUP_NEXT_NODES,
1367 /* Global IP4 main. */
1368 ip4_main_t ip4_main;
1371 ip4_lookup_init (vlib_main_t * vm)
1373 ip4_main_t * im = &ip4_main;
1376 for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1381 m = pow2_mask (i) << (32 - i);
1384 im->fib_masks[i] = clib_host_to_net_u32 (m);
1387 /* Create FIB with index 0 and table id of 0. */
1388 find_ip4_fib_by_table_index_or_id (im, /* table id */ 0, IP4_ROUTE_FLAG_TABLE_ID);
1390 ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1394 pn = pg_get_node (ip4_lookup_node.index);
1395 pn->unformat_edit = unformat_pg_ip4_header;
1399 ethernet_arp_header_t h;
1401 memset (&h, 0, sizeof (h));
1403 /* Set target ethernet address to all zeros. */
1404 memset (h.ip4_over_ethernet[1].ethernet, 0, sizeof (h.ip4_over_ethernet[1].ethernet));
1406 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1407 #define _8(f,v) h.f = v;
1408 _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1409 _16 (l3_type, ETHERNET_TYPE_IP4);
1410 _8 (n_l2_address_bytes, 6);
1411 _8 (n_l3_address_bytes, 4);
1412 _16 (opcode, ETHERNET_ARP_OPCODE_request);
1416 vlib_packet_template_init (vm,
1417 &im->ip4_arp_request_packet_template,
1420 /* alloc chunk size */ 8,
1427 VLIB_INIT_FUNCTION (ip4_lookup_init);
1430 /* Adjacency taken. */
1435 /* Packet data, possibly *after* rewrite. */
1436 u8 packet_data[64 - 1*sizeof(u32)];
1437 } ip4_forward_next_trace_t;
1439 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args)
1441 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1442 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1443 ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1444 vnet_main_t * vnm = vnet_get_main();
1445 ip4_main_t * im = &ip4_main;
1446 ip_adjacency_t * adj;
1447 uword indent = format_get_indent (s);
1449 adj = ip_get_adjacency (&im->lookup_main, t->adj_index);
1450 s = format (s, "fib %d adj-idx %d : %U flow hash: 0x%08x",
1451 t->fib_index, t->adj_index, format_ip_adjacency,
1452 vnm, &im->lookup_main, t->adj_index, t->flow_hash);
1453 switch (adj->lookup_next_index)
1455 case IP_LOOKUP_NEXT_REWRITE:
1456 s = format (s, "\n%U%U",
1457 format_white_space, indent,
1458 format_ip_adjacency_packet_data,
1459 vnm, &im->lookup_main, t->adj_index,
1460 t->packet_data, sizeof (t->packet_data));
1470 /* Common trace function for all ip4-forward next nodes. */
1472 ip4_forward_next_trace (vlib_main_t * vm,
1473 vlib_node_runtime_t * node,
1474 vlib_frame_t * frame,
1475 vlib_rx_or_tx_t which_adj_index)
1478 ip4_main_t * im = &ip4_main;
1480 n_left = frame->n_vectors;
1481 from = vlib_frame_vector_args (frame);
1486 vlib_buffer_t * b0, * b1;
1487 ip4_forward_next_trace_t * t0, * t1;
1489 /* Prefetch next iteration. */
1490 vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1491 vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1496 b0 = vlib_get_buffer (vm, bi0);
1497 b1 = vlib_get_buffer (vm, bi1);
1499 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1501 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1502 t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1503 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1504 t0->fib_index = vec_elt (im->fib_index_by_sw_if_index,
1505 vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1506 clib_memcpy (t0->packet_data,
1507 vlib_buffer_get_current (b0),
1508 sizeof (t0->packet_data));
1510 if (b1->flags & VLIB_BUFFER_IS_TRACED)
1512 t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1513 t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1514 t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1515 t1->fib_index = vec_elt (im->fib_index_by_sw_if_index,
1516 vnet_buffer(b1)->sw_if_index[VLIB_RX]);
1517 clib_memcpy (t1->packet_data,
1518 vlib_buffer_get_current (b1),
1519 sizeof (t1->packet_data));
1529 ip4_forward_next_trace_t * t0;
1533 b0 = vlib_get_buffer (vm, bi0);
1535 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1537 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1538 t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1539 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1540 t0->fib_index = vec_elt (im->fib_index_by_sw_if_index,
1541 vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1542 clib_memcpy (t0->packet_data,
1543 vlib_buffer_get_current (b0),
1544 sizeof (t0->packet_data));
1552 ip4_drop_or_punt (vlib_main_t * vm,
1553 vlib_node_runtime_t * node,
1554 vlib_frame_t * frame,
1555 ip4_error_t error_code)
1557 u32 * buffers = vlib_frame_vector_args (frame);
1558 uword n_packets = frame->n_vectors;
1560 vlib_error_drop_buffers (vm, node,
1565 ip4_input_node.index,
1568 if (node->flags & VLIB_NODE_FLAG_TRACE)
1569 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1575 ip4_drop (vlib_main_t * vm,
1576 vlib_node_runtime_t * node,
1577 vlib_frame_t * frame)
1578 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP); }
1581 ip4_punt (vlib_main_t * vm,
1582 vlib_node_runtime_t * node,
1583 vlib_frame_t * frame)
1584 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); }
1587 ip4_miss (vlib_main_t * vm,
1588 vlib_node_runtime_t * node,
1589 vlib_frame_t * frame)
1590 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_DST_LOOKUP_MISS); }
1592 VLIB_REGISTER_NODE (ip4_drop_node,static) = {
1593 .function = ip4_drop,
1595 .vector_size = sizeof (u32),
1597 .format_trace = format_ip4_forward_next_trace,
1605 VLIB_REGISTER_NODE (ip4_punt_node,static) = {
1606 .function = ip4_punt,
1608 .vector_size = sizeof (u32),
1610 .format_trace = format_ip4_forward_next_trace,
1618 VLIB_REGISTER_NODE (ip4_miss_node,static) = {
1619 .function = ip4_miss,
1621 .vector_size = sizeof (u32),
1623 .format_trace = format_ip4_forward_next_trace,
1631 /* Compute TCP/UDP/ICMP4 checksum in software. */
1633 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1637 u32 ip_header_length, payload_length_host_byte_order;
1638 u32 n_this_buffer, n_bytes_left;
1640 void * data_this_buffer;
1642 /* Initialize checksum with ip header. */
1643 ip_header_length = ip4_header_bytes (ip0);
1644 payload_length_host_byte_order = clib_net_to_host_u16 (ip0->length) - ip_header_length;
1645 sum0 = clib_host_to_net_u32 (payload_length_host_byte_order + (ip0->protocol << 16));
1647 if (BITS (uword) == 32)
1649 sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u32));
1650 sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32));
1653 sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1655 n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1656 data_this_buffer = (void *) ip0 + ip_header_length;
1657 if (n_this_buffer + ip_header_length > p0->current_length)
1658 n_this_buffer = p0->current_length > ip_header_length ? p0->current_length - ip_header_length : 0;
1661 sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1662 n_bytes_left -= n_this_buffer;
1663 if (n_bytes_left == 0)
1666 ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1667 p0 = vlib_get_buffer (vm, p0->next_buffer);
1668 data_this_buffer = vlib_buffer_get_current (p0);
1669 n_this_buffer = p0->current_length;
1672 sum16 = ~ ip_csum_fold (sum0);
1678 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1680 ip4_header_t * ip0 = vlib_buffer_get_current (p0);
1681 udp_header_t * udp0;
1684 ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1685 || ip0->protocol == IP_PROTOCOL_UDP);
1687 udp0 = (void *) (ip0 + 1);
1688 if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1690 p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1691 | IP_BUFFER_L4_CHECKSUM_CORRECT);
1695 sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1697 p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1698 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1704 ip4_local (vlib_main_t * vm,
1705 vlib_node_runtime_t * node,
1706 vlib_frame_t * frame)
1708 ip4_main_t * im = &ip4_main;
1709 ip_lookup_main_t * lm = &im->lookup_main;
1710 ip_local_next_t next_index;
1711 u32 * from, * to_next, n_left_from, n_left_to_next;
1712 vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
1714 from = vlib_frame_vector_args (frame);
1715 n_left_from = frame->n_vectors;
1716 next_index = node->cached_next_index;
1718 if (node->flags & VLIB_NODE_FLAG_TRACE)
1719 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1721 while (n_left_from > 0)
1723 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1725 while (n_left_from >= 4 && n_left_to_next >= 2)
1727 vlib_buffer_t * p0, * p1;
1728 ip4_header_t * ip0, * ip1;
1729 udp_header_t * udp0, * udp1;
1730 ip4_fib_mtrie_t * mtrie0, * mtrie1;
1731 ip4_fib_mtrie_leaf_t leaf0, leaf1;
1732 ip_adjacency_t * adj0, * adj1;
1733 u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, adj_index0;
1734 u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, adj_index1;
1735 i32 len_diff0, len_diff1;
1736 u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1737 u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1740 pi0 = to_next[0] = from[0];
1741 pi1 = to_next[1] = from[1];
1745 n_left_to_next -= 2;
1747 p0 = vlib_get_buffer (vm, pi0);
1748 p1 = vlib_get_buffer (vm, pi1);
1750 ip0 = vlib_buffer_get_current (p0);
1751 ip1 = vlib_buffer_get_current (p1);
1753 fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
1754 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1755 fib_index1 = vec_elt (im->fib_index_by_sw_if_index,
1756 vnet_buffer(p1)->sw_if_index[VLIB_RX]);
1758 mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
1759 mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie;
1761 leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
1763 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1764 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
1766 proto0 = ip0->protocol;
1767 proto1 = ip1->protocol;
1768 is_udp0 = proto0 == IP_PROTOCOL_UDP;
1769 is_udp1 = proto1 == IP_PROTOCOL_UDP;
1770 is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1771 is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1776 good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1777 good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1779 udp0 = ip4_next_header (ip0);
1780 udp1 = ip4_next_header (ip1);
1782 /* Don't verify UDP checksum for packets with explicit zero checksum. */
1783 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1784 good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1786 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1787 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
1789 /* Verify UDP length. */
1790 ip_len0 = clib_net_to_host_u16 (ip0->length);
1791 ip_len1 = clib_net_to_host_u16 (ip1->length);
1792 udp_len0 = clib_net_to_host_u16 (udp0->length);
1793 udp_len1 = clib_net_to_host_u16 (udp1->length);
1795 len_diff0 = ip_len0 - udp_len0;
1796 len_diff1 = ip_len1 - udp_len1;
1798 len_diff0 = is_udp0 ? len_diff0 : 0;
1799 len_diff1 = is_udp1 ? len_diff1 : 0;
1801 if (PREDICT_FALSE (! (is_tcp_udp0 & is_tcp_udp1
1802 & good_tcp_udp0 & good_tcp_udp1)))
1807 && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1808 flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1810 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1811 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1816 && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1817 flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
1819 (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1820 good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1824 good_tcp_udp0 &= len_diff0 >= 0;
1825 good_tcp_udp1 &= len_diff1 >= 0;
1827 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1828 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
1830 error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1832 error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1833 error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
1835 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1836 error0 = (is_tcp_udp0 && ! good_tcp_udp0
1837 ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1839 error1 = (is_tcp_udp1 && ! good_tcp_udp1
1840 ? IP4_ERROR_TCP_CHECKSUM + is_udp1
1843 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1844 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
1846 vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1847 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
1849 vnet_buffer (p1)->ip.adj_index[VLIB_RX] = adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1850 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
1852 ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
1854 /* no_default_route */ 1));
1855 ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1,
1857 /* no_default_route */ 1));
1859 adj0 = ip_get_adjacency (lm, adj_index0);
1860 adj1 = ip_get_adjacency (lm, adj_index1);
1863 * Must have a route to source otherwise we drop the packet.
1864 * ip4 broadcasts are accepted, e.g. to make dhcp client work
1866 error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1867 && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
1868 && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
1869 && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
1870 && ip0->dst_address.as_u32 != 0xFFFFFFFF
1871 ? IP4_ERROR_SRC_LOOKUP_MISS
1873 error1 = (error1 == IP4_ERROR_UNKNOWN_PROTOCOL
1874 && adj1->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
1875 && adj1->lookup_next_index != IP_LOOKUP_NEXT_ARP
1876 && adj1->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
1877 && ip0->dst_address.as_u32 != 0xFFFFFFFF
1878 ? IP4_ERROR_SRC_LOOKUP_MISS
1881 next0 = lm->local_next_by_ip_protocol[proto0];
1882 next1 = lm->local_next_by_ip_protocol[proto1];
1884 next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1885 next1 = error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1887 p0->error = error0 ? error_node->errors[error0] : 0;
1888 p1->error = error1 ? error_node->errors[error1] : 0;
1890 enqueue_code = (next0 != next_index) + 2*(next1 != next_index);
1892 if (PREDICT_FALSE (enqueue_code != 0))
1894 switch (enqueue_code)
1900 n_left_to_next += 1;
1901 vlib_set_next_frame_buffer (vm, node, next0, pi0);
1907 n_left_to_next += 1;
1908 vlib_set_next_frame_buffer (vm, node, next1, pi1);
1912 /* A B B or A B C */
1914 n_left_to_next += 2;
1915 vlib_set_next_frame_buffer (vm, node, next0, pi0);
1916 vlib_set_next_frame_buffer (vm, node, next1, pi1);
1919 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1921 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1928 while (n_left_from > 0 && n_left_to_next > 0)
1932 udp_header_t * udp0;
1933 ip4_fib_mtrie_t * mtrie0;
1934 ip4_fib_mtrie_leaf_t leaf0;
1935 ip_adjacency_t * adj0;
1936 u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, adj_index0;
1938 u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1940 pi0 = to_next[0] = from[0];
1944 n_left_to_next -= 1;
1946 p0 = vlib_get_buffer (vm, pi0);
1948 ip0 = vlib_buffer_get_current (p0);
1950 fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
1951 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1953 mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
1955 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
1957 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1959 proto0 = ip0->protocol;
1960 is_udp0 = proto0 == IP_PROTOCOL_UDP;
1961 is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1965 good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1967 udp0 = ip4_next_header (ip0);
1969 /* Don't verify UDP checksum for packets with explicit zero checksum. */
1970 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1972 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1974 /* Verify UDP length. */
1975 ip_len0 = clib_net_to_host_u16 (ip0->length);
1976 udp_len0 = clib_net_to_host_u16 (udp0->length);
1978 len_diff0 = ip_len0 - udp_len0;
1980 len_diff0 = is_udp0 ? len_diff0 : 0;
1982 if (PREDICT_FALSE (! (is_tcp_udp0 & good_tcp_udp0)))
1987 && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1988 flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1990 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1991 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1995 good_tcp_udp0 &= len_diff0 >= 0;
1997 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1999 error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
2001 error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
2003 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
2004 error0 = (is_tcp_udp0 && ! good_tcp_udp0
2005 ? IP4_ERROR_TCP_CHECKSUM + is_udp0
2008 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
2010 vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2011 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2013 ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
2015 /* no_default_route */ 1));
2017 adj0 = ip_get_adjacency (lm, adj_index0);
2019 /* Must have a route to source otherwise we drop the packet. */
2020 error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
2021 && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE
2022 && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP
2023 && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL
2024 && ip0->dst_address.as_u32 != 0xFFFFFFFF
2025 ? IP4_ERROR_SRC_LOOKUP_MISS
2028 next0 = lm->local_next_by_ip_protocol[proto0];
2030 next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
2032 p0->error = error0? error_node->errors[error0] : 0;
2034 if (PREDICT_FALSE (next0 != next_index))
2036 n_left_to_next += 1;
2037 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2040 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2043 n_left_to_next -= 1;
2047 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2050 return frame->n_vectors;
2053 VLIB_REGISTER_NODE (ip4_local_node,static) = {
2054 .function = ip4_local,
2055 .name = "ip4-local",
2056 .vector_size = sizeof (u32),
2058 .format_trace = format_ip4_forward_next_trace,
2060 .n_next_nodes = IP_LOCAL_N_NEXT,
2062 [IP_LOCAL_NEXT_DROP] = "error-drop",
2063 [IP_LOCAL_NEXT_PUNT] = "error-punt",
2064 [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
2065 [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
2069 void ip4_register_protocol (u32 protocol, u32 node_index)
2071 vlib_main_t * vm = vlib_get_main();
2072 ip4_main_t * im = &ip4_main;
2073 ip_lookup_main_t * lm = &im->lookup_main;
2075 ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
2076 lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip4_local_node.index, node_index);
2079 static clib_error_t *
2080 show_ip_local_command_fn (vlib_main_t * vm,
2081 unformat_input_t * input,
2082 vlib_cli_command_t * cmd)
2084 ip4_main_t * im = &ip4_main;
2085 ip_lookup_main_t * lm = &im->lookup_main;
2088 vlib_cli_output (vm, "Protocols handled by ip4_local");
2089 for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
2091 if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
2092 vlib_cli_output (vm, "%d", i);
2099 VLIB_CLI_COMMAND (show_ip_local, static) = {
2100 .path = "show ip local",
2101 .function = show_ip_local_command_fn,
2102 .short_help = "Show ip local protocol table",
2106 ip4_arp (vlib_main_t * vm,
2107 vlib_node_runtime_t * node,
2108 vlib_frame_t * frame)
2110 vnet_main_t * vnm = vnet_get_main();
2111 ip4_main_t * im = &ip4_main;
2112 ip_lookup_main_t * lm = &im->lookup_main;
2113 u32 * from, * to_next_drop;
2114 uword n_left_from, n_left_to_next_drop, next_index;
2115 static f64 time_last_seed_change = -1e100;
2116 static u32 hash_seeds[3];
2117 static uword hash_bitmap[256 / BITS (uword)];
2120 if (node->flags & VLIB_NODE_FLAG_TRACE)
2121 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2123 time_now = vlib_time_now (vm);
2124 if (time_now - time_last_seed_change > 1e-3)
2127 u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
2128 sizeof (hash_seeds));
2129 for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
2130 hash_seeds[i] = r[i];
2132 /* Mark all hash keys as been no-seen before. */
2133 for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
2136 time_last_seed_change = time_now;
2139 from = vlib_frame_vector_args (frame);
2140 n_left_from = frame->n_vectors;
2141 next_index = node->cached_next_index;
2142 if (next_index == IP4_ARP_NEXT_DROP)
2143 next_index = IP4_ARP_N_NEXT; /* point to first interface */
2145 while (n_left_from > 0)
2147 vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
2148 to_next_drop, n_left_to_next_drop);
2150 while (n_left_from > 0 && n_left_to_next_drop > 0)
2154 ethernet_header_t * eh0;
2155 u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
2157 ip_adjacency_t * adj0;
2161 p0 = vlib_get_buffer (vm, pi0);
2163 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2164 adj0 = ip_get_adjacency (lm, adj_index0);
2165 ip0 = vlib_buffer_get_current (p0);
2167 /* If packet destination is not local, send ARP to next hop */
2168 if (adj0->arp.next_hop.ip4.as_u32)
2169 ip0->dst_address.data_u32 = adj0->arp.next_hop.ip4.as_u32;
2172 * if ip4_rewrite_local applied the IP_LOOKUP_NEXT_ARP
2173 * rewrite to this packet, we need to skip it here.
2174 * Note, to distinguish from src IP addr *.8.6.*, we
2175 * check for a bcast eth dest instead of IPv4 version.
2177 eh0 = (ethernet_header_t*)ip0;
2178 if ((ip0->ip_version_and_header_length & 0xF0) != 0x40)
2181 u16 * etype = &eh0->type;
2182 while ((*etype == clib_host_to_net_u16 (0x8100)) //dot1q
2183 || (*etype == clib_host_to_net_u16 (0x88a8)))//dot1ad
2186 etype += 2; //vlan tag also 16 bits, same as etype
2188 if (*etype == clib_host_to_net_u16 (0x0806)) //arp
2190 vlib_buffer_advance (
2191 p0, sizeof(ethernet_header_t) + (4*vlan_num));
2192 ip0 = vlib_buffer_get_current (p0);
2200 sw_if_index0 = adj0->rewrite_header.sw_if_index;
2201 vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
2203 a0 ^= ip0->dst_address.data_u32;
2206 hash_v3_finalize32 (a0, b0, c0);
2208 c0 &= BITS (hash_bitmap) - 1;
2209 c0 = c0 / BITS (uword);
2210 m0 = (uword) 1 << (c0 % BITS (uword));
2212 bm0 = hash_bitmap[c0];
2213 drop0 = (bm0 & m0) != 0;
2215 /* Mark it as seen. */
2216 hash_bitmap[c0] = bm0 | m0;
2220 to_next_drop[0] = pi0;
2222 n_left_to_next_drop -= 1;
2224 p0->error = node->errors[drop0 ? IP4_ARP_ERROR_DROP : IP4_ARP_ERROR_REQUEST_SENT];
2230 * Can happen if the control-plane is programming tables
2231 * with traffic flowing; at least that's today's lame excuse.
2233 if (adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP)
2235 p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2238 /* Send ARP request. */
2242 ethernet_arp_header_t * h0;
2243 vnet_hw_interface_t * hw_if0;
2245 h0 = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi0);
2247 /* Add rewrite/encap string for ARP packet. */
2248 vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
2250 hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2252 /* Src ethernet address in ARP header. */
2253 clib_memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address,
2254 sizeof (h0->ip4_over_ethernet[0].ethernet));
2256 ip4_src_address_for_packet (im, p0, &h0->ip4_over_ethernet[0].ip4, sw_if_index0);
2258 /* Copy in destination address we are requesting. */
2259 h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32;
2261 vlib_buffer_copy_trace_flag (vm, p0, bi0);
2262 b0 = vlib_get_buffer (vm, bi0);
2263 vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2265 vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2267 vlib_set_next_frame_buffer (vm, node, adj0->rewrite_header.next_index, bi0);
2271 vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2274 return frame->n_vectors;
2277 static char * ip4_arp_error_strings[] = {
2278 [IP4_ARP_ERROR_DROP] = "address overflow drops",
2279 [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2280 [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2281 [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2282 [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2285 VLIB_REGISTER_NODE (ip4_arp_node) = {
2286 .function = ip4_arp,
2288 .vector_size = sizeof (u32),
2290 .format_trace = format_ip4_forward_next_trace,
2292 .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2293 .error_strings = ip4_arp_error_strings,
2295 .n_next_nodes = IP4_ARP_N_NEXT,
2297 [IP4_ARP_NEXT_DROP] = "error-drop",
2301 #define foreach_notrace_ip4_arp_error \
2307 clib_error_t * arp_notrace_init (vlib_main_t * vm)
2309 vlib_node_runtime_t *rt =
2310 vlib_node_get_runtime (vm, ip4_arp_node.index);
2312 /* don't trace ARP request packets */
2314 vnet_pcap_drop_trace_filter_add_del \
2315 (rt->errors[IP4_ARP_ERROR_##a], \
2317 foreach_notrace_ip4_arp_error;
2322 VLIB_INIT_FUNCTION(arp_notrace_init);
2325 /* Send an ARP request to see if given destination is reachable on given interface. */
2327 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2329 vnet_main_t * vnm = vnet_get_main();
2330 ip4_main_t * im = &ip4_main;
2331 ethernet_arp_header_t * h;
2332 ip4_address_t * src;
2333 ip_interface_address_t * ia;
2334 ip_adjacency_t * adj;
2335 vnet_hw_interface_t * hi;
2336 vnet_sw_interface_t * si;
2340 si = vnet_get_sw_interface (vnm, sw_if_index);
2342 if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2344 return clib_error_return (0, "%U: interface %U down",
2345 format_ip4_address, dst,
2346 format_vnet_sw_if_index_name, vnm,
2350 src = ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2353 vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2354 return clib_error_return
2355 (0, "no matching interface address for destination %U (interface %U)",
2356 format_ip4_address, dst,
2357 format_vnet_sw_if_index_name, vnm, sw_if_index);
2360 adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2362 h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi);
2364 hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2366 clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
2368 h->ip4_over_ethernet[0].ip4 = src[0];
2369 h->ip4_over_ethernet[1].ip4 = dst[0];
2371 b = vlib_get_buffer (vm, bi);
2372 vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2374 /* Add encapsulation string for software interface (e.g. ethernet header). */
2375 vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2376 vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2379 vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
2380 u32 * to_next = vlib_frame_vector_args (f);
2383 vlib_put_frame_to_node (vm, hi->output_node_index, f);
2386 return /* no error */ 0;
2390 IP4_REWRITE_NEXT_DROP,
2391 IP4_REWRITE_NEXT_ARP,
2392 } ip4_rewrite_next_t;
2395 ip4_rewrite_inline (vlib_main_t * vm,
2396 vlib_node_runtime_t * node,
2397 vlib_frame_t * frame,
2398 int rewrite_for_locally_received_packets)
2400 ip_lookup_main_t * lm = &ip4_main.lookup_main;
2401 u32 * from = vlib_frame_vector_args (frame);
2402 u32 n_left_from, n_left_to_next, * to_next, next_index;
2403 vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
2404 vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
2406 n_left_from = frame->n_vectors;
2407 next_index = node->cached_next_index;
2408 u32 cpu_index = os_get_cpu_number();
2410 while (n_left_from > 0)
2412 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2414 while (n_left_from >= 4 && n_left_to_next >= 2)
2416 ip_adjacency_t * adj0, * adj1;
2417 vlib_buffer_t * p0, * p1;
2418 ip4_header_t * ip0, * ip1;
2419 u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2420 u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2421 u32 next0_override, next1_override;
2423 if (rewrite_for_locally_received_packets)
2424 next0_override = next1_override = 0;
2426 /* Prefetch next iteration. */
2428 vlib_buffer_t * p2, * p3;
2430 p2 = vlib_get_buffer (vm, from[2]);
2431 p3 = vlib_get_buffer (vm, from[3]);
2433 vlib_prefetch_buffer_header (p2, STORE);
2434 vlib_prefetch_buffer_header (p3, STORE);
2436 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2437 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2440 pi0 = to_next[0] = from[0];
2441 pi1 = to_next[1] = from[1];
2446 n_left_to_next -= 2;
2448 p0 = vlib_get_buffer (vm, pi0);
2449 p1 = vlib_get_buffer (vm, pi1);
2451 adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2452 adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
2454 /* We should never rewrite a pkt using the MISS adjacency */
2455 ASSERT(adj_index0 && adj_index1);
2457 ip0 = vlib_buffer_get_current (p0);
2458 ip1 = vlib_buffer_get_current (p1);
2460 error0 = error1 = IP4_ERROR_NONE;
2462 /* Decrement TTL & update checksum.
2463 Works either endian, so no need for byte swap. */
2464 if (! rewrite_for_locally_received_packets)
2466 i32 ttl0 = ip0->ttl, ttl1 = ip1->ttl;
2468 /* Input node should have reject packets with ttl 0. */
2469 ASSERT (ip0->ttl > 0);
2470 ASSERT (ip1->ttl > 0);
2472 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2473 checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2475 checksum0 += checksum0 >= 0xffff;
2476 checksum1 += checksum1 >= 0xffff;
2478 ip0->checksum = checksum0;
2479 ip1->checksum = checksum1;
2487 error0 = ttl0 <= 0 ? IP4_ERROR_TIME_EXPIRED : error0;
2488 error1 = ttl1 <= 0 ? IP4_ERROR_TIME_EXPIRED : error1;
2490 /* Verify checksum. */
2491 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2492 ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2495 /* Rewrite packet header and updates lengths. */
2496 adj0 = ip_get_adjacency (lm, adj_index0);
2497 adj1 = ip_get_adjacency (lm, adj_index1);
2499 if (rewrite_for_locally_received_packets)
2502 * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2503 * we end up here with a local adjacency in hand
2504 * The local adj rewrite data is 0xfefe on purpose.
2505 * Bad engineer, no donut for you.
2507 if (PREDICT_FALSE(adj0->lookup_next_index
2508 == IP_LOOKUP_NEXT_LOCAL))
2509 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2510 if (PREDICT_FALSE(adj0->lookup_next_index
2511 == IP_LOOKUP_NEXT_ARP))
2512 next0_override = IP4_REWRITE_NEXT_ARP;
2513 if (PREDICT_FALSE(adj1->lookup_next_index
2514 == IP_LOOKUP_NEXT_LOCAL))
2515 error1 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2516 if (PREDICT_FALSE(adj1->lookup_next_index
2517 == IP_LOOKUP_NEXT_ARP))
2518 next1_override = IP4_REWRITE_NEXT_ARP;
2521 /* Worth pipelining. No guarantee that adj0,1 are hot... */
2522 rw_len0 = adj0[0].rewrite_header.data_bytes;
2523 rw_len1 = adj1[0].rewrite_header.data_bytes;
2524 next0 = (error0 == IP4_ERROR_NONE)
2525 ? adj0[0].rewrite_header.next_index : 0;
2527 if (rewrite_for_locally_received_packets)
2528 next0 = next0 && next0_override ? next0_override : next0;
2530 next1 = (error1 == IP4_ERROR_NONE)
2531 ? adj1[0].rewrite_header.next_index : 0;
2533 if (rewrite_for_locally_received_packets)
2534 next1 = next1 && next1_override ? next1_override : next1;
2537 * We've already accounted for an ethernet_header_t elsewhere
2539 if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2540 vlib_increment_combined_counter
2541 (&lm->adjacency_counters,
2542 cpu_index, adj_index0,
2543 /* packet increment */ 0,
2544 /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2546 if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t)))
2547 vlib_increment_combined_counter
2548 (&lm->adjacency_counters,
2549 cpu_index, adj_index1,
2550 /* packet increment */ 0,
2551 /* byte increment */ rw_len1-sizeof(ethernet_header_t));
2553 /* Check MTU of outgoing interface. */
2554 error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2555 ? IP4_ERROR_MTU_EXCEEDED
2557 error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
2558 ? IP4_ERROR_MTU_EXCEEDED
2561 p0->current_data -= rw_len0;
2562 p1->current_data -= rw_len1;
2564 p0->current_length += rw_len0;
2565 p1->current_length += rw_len1;
2567 vnet_buffer (p0)->sw_if_index[VLIB_TX] = adj0[0].rewrite_header.sw_if_index;
2568 vnet_buffer (p1)->sw_if_index[VLIB_TX] = adj1[0].rewrite_header.sw_if_index;
2570 p0->error = error_node->errors[error0];
2571 p1->error = error_node->errors[error1];
2573 /* Guess we are only writing on simple Ethernet header. */
2574 vnet_rewrite_two_headers (adj0[0], adj1[0],
2576 sizeof (ethernet_header_t));
2578 vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2579 to_next, n_left_to_next,
2580 pi0, pi1, next0, next1);
2583 while (n_left_from > 0 && n_left_to_next > 0)
2585 ip_adjacency_t * adj0;
2588 u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2591 if (rewrite_for_locally_received_packets)
2594 pi0 = to_next[0] = from[0];
2596 p0 = vlib_get_buffer (vm, pi0);
2598 adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2600 /* We should never rewrite a pkt using the MISS adjacency */
2603 adj0 = ip_get_adjacency (lm, adj_index0);
2605 ip0 = vlib_buffer_get_current (p0);
2607 error0 = IP4_ERROR_NONE;
2608 next0 = 0; /* drop on error */
2610 /* Decrement TTL & update checksum. */
2611 if (! rewrite_for_locally_received_packets)
2613 i32 ttl0 = ip0->ttl;
2615 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2617 checksum0 += checksum0 >= 0xffff;
2619 ip0->checksum = checksum0;
2621 ASSERT (ip0->ttl > 0);
2627 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2629 error0 = ttl0 <= 0 ? IP4_ERROR_TIME_EXPIRED : error0;
2632 if (rewrite_for_locally_received_packets)
2635 * If someone sends e.g. an icmp4 w/ src = dst = interface addr,
2636 * we end up here with a local adjacency in hand
2637 * The local adj rewrite data is 0xfefe on purpose.
2638 * Bad engineer, no donut for you.
2640 if (PREDICT_FALSE(adj0->lookup_next_index
2641 == IP_LOOKUP_NEXT_LOCAL))
2642 error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS;
2644 * We have to override the next_index in ARP adjacencies,
2645 * because they're set up for ip4-arp, not this node...
2647 if (PREDICT_FALSE(adj0->lookup_next_index
2648 == IP_LOOKUP_NEXT_ARP))
2649 next0_override = IP4_REWRITE_NEXT_ARP;
2652 /* Guess we are only writing on simple Ethernet header. */
2653 vnet_rewrite_one_header (adj0[0], ip0,
2654 sizeof (ethernet_header_t));
2656 /* Update packet buffer attributes/set output interface. */
2657 rw_len0 = adj0[0].rewrite_header.data_bytes;
2659 if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2660 vlib_increment_combined_counter
2661 (&lm->adjacency_counters,
2662 cpu_index, adj_index0,
2663 /* packet increment */ 0,
2664 /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2666 /* Check MTU of outgoing interface. */
2667 error0 = (vlib_buffer_length_in_chain (vm, p0)
2668 > adj0[0].rewrite_header.max_l3_packet_bytes
2669 ? IP4_ERROR_MTU_EXCEEDED
2672 p0->error = error_node->errors[error0];
2673 p0->current_data -= rw_len0;
2674 p0->current_length += rw_len0;
2675 vnet_buffer (p0)->sw_if_index[VLIB_TX] =
2676 adj0[0].rewrite_header.sw_if_index;
2678 next0 = (error0 == IP4_ERROR_NONE)
2679 ? adj0[0].rewrite_header.next_index : 0;
2681 if (rewrite_for_locally_received_packets)
2682 next0 = next0 && next0_override ? next0_override : next0;
2687 n_left_to_next -= 1;
2689 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2690 to_next, n_left_to_next,
2694 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2697 /* Need to do trace after rewrites to pick up new packet data. */
2698 if (node->flags & VLIB_NODE_FLAG_TRACE)
2699 ip4_forward_next_trace (vm, node, frame, adj_rx_tx);
2701 return frame->n_vectors;
2705 ip4_rewrite_transit (vlib_main_t * vm,
2706 vlib_node_runtime_t * node,
2707 vlib_frame_t * frame)
2709 return ip4_rewrite_inline (vm, node, frame,
2710 /* rewrite_for_locally_received_packets */ 0);
2714 ip4_rewrite_local (vlib_main_t * vm,
2715 vlib_node_runtime_t * node,
2716 vlib_frame_t * frame)
2718 return ip4_rewrite_inline (vm, node, frame,
2719 /* rewrite_for_locally_received_packets */ 1);
2722 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2723 .function = ip4_rewrite_transit,
2724 .name = "ip4-rewrite-transit",
2725 .vector_size = sizeof (u32),
2727 .format_trace = format_ip4_forward_next_trace,
2731 [IP4_REWRITE_NEXT_DROP] = "error-drop",
2732 [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
2736 VLIB_REGISTER_NODE (ip4_rewrite_local_node,static) = {
2737 .function = ip4_rewrite_local,
2738 .name = "ip4-rewrite-local",
2739 .vector_size = sizeof (u32),
2741 .sibling_of = "ip4-rewrite-transit",
2743 .format_trace = format_ip4_forward_next_trace,
2747 [IP4_REWRITE_NEXT_DROP] = "error-drop",
2748 [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
2752 static clib_error_t *
2753 add_del_interface_table (vlib_main_t * vm,
2754 unformat_input_t * input,
2755 vlib_cli_command_t * cmd)
2757 vnet_main_t * vnm = vnet_get_main();
2758 clib_error_t * error = 0;
2759 u32 sw_if_index, table_id;
2763 if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2765 error = clib_error_return (0, "unknown interface `%U'",
2766 format_unformat_error, input);
2770 if (unformat (input, "%d", &table_id))
2774 error = clib_error_return (0, "expected table id `%U'",
2775 format_unformat_error, input);
2780 ip4_main_t * im = &ip4_main;
2781 ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_id, IP4_ROUTE_FLAG_TABLE_ID);
2785 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
2786 im->fib_index_by_sw_if_index[sw_if_index] = fib->index;
2794 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
2795 .path = "set interface ip table",
2796 .function = add_del_interface_table,
2797 .short_help = "Add/delete FIB table id for interface",
2802 ip4_lookup_multicast (vlib_main_t * vm,
2803 vlib_node_runtime_t * node,
2804 vlib_frame_t * frame)
2806 ip4_main_t * im = &ip4_main;
2807 ip_lookup_main_t * lm = &im->lookup_main;
2808 vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters;
2809 u32 n_left_from, n_left_to_next, * from, * to_next;
2810 ip_lookup_next_t next;
2811 u32 cpu_index = os_get_cpu_number();
2813 from = vlib_frame_vector_args (frame);
2814 n_left_from = frame->n_vectors;
2815 next = node->cached_next_index;
2817 while (n_left_from > 0)
2819 vlib_get_next_frame (vm, node, next,
2820 to_next, n_left_to_next);
2822 while (n_left_from >= 4 && n_left_to_next >= 2)
2824 vlib_buffer_t * p0, * p1;
2825 u32 pi0, pi1, adj_index0, adj_index1, wrong_next;
2826 ip_lookup_next_t next0, next1;
2827 ip4_header_t * ip0, * ip1;
2828 ip_adjacency_t * adj0, * adj1;
2829 u32 fib_index0, fib_index1;
2830 u32 flow_hash_config0, flow_hash_config1;
2832 /* Prefetch next iteration. */
2834 vlib_buffer_t * p2, * p3;
2836 p2 = vlib_get_buffer (vm, from[2]);
2837 p3 = vlib_get_buffer (vm, from[3]);
2839 vlib_prefetch_buffer_header (p2, LOAD);
2840 vlib_prefetch_buffer_header (p3, LOAD);
2842 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
2843 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
2846 pi0 = to_next[0] = from[0];
2847 pi1 = to_next[1] = from[1];
2849 p0 = vlib_get_buffer (vm, pi0);
2850 p1 = vlib_get_buffer (vm, pi1);
2852 ip0 = vlib_buffer_get_current (p0);
2853 ip1 = vlib_buffer_get_current (p1);
2855 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2856 fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
2857 fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2858 fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2859 fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
2860 fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
2862 adj_index0 = ip4_fib_lookup_buffer (im, fib_index0,
2863 &ip0->dst_address, p0);
2864 adj_index1 = ip4_fib_lookup_buffer (im, fib_index1,
2865 &ip1->dst_address, p1);
2867 adj0 = ip_get_adjacency (lm, adj_index0);
2868 adj1 = ip_get_adjacency (lm, adj_index1);
2870 next0 = adj0->lookup_next_index;
2871 next1 = adj1->lookup_next_index;
2874 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
2877 vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config;
2879 vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash
2880 (ip0, flow_hash_config0);
2882 vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash
2883 (ip1, flow_hash_config1);
2885 ASSERT (adj0->n_adj > 0);
2886 ASSERT (adj1->n_adj > 0);
2887 ASSERT (is_pow2 (adj0->n_adj));
2888 ASSERT (is_pow2 (adj1->n_adj));
2889 adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
2890 adj_index1 += (vnet_buffer (p1)->ip.flow_hash & (adj1->n_adj - 1));
2892 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2893 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1;
2895 if (1) /* $$$$$$ HACK FIXME */
2896 vlib_increment_combined_counter
2897 (cm, cpu_index, adj_index0, 1,
2898 vlib_buffer_length_in_chain (vm, p0));
2899 if (1) /* $$$$$$ HACK FIXME */
2900 vlib_increment_combined_counter
2901 (cm, cpu_index, adj_index1, 1,
2902 vlib_buffer_length_in_chain (vm, p1));
2906 n_left_to_next -= 2;
2909 wrong_next = (next0 != next) + 2*(next1 != next);
2910 if (PREDICT_FALSE (wrong_next != 0))
2918 n_left_to_next += 1;
2919 vlib_set_next_frame_buffer (vm, node, next0, pi0);
2925 n_left_to_next += 1;
2926 vlib_set_next_frame_buffer (vm, node, next1, pi1);
2932 n_left_to_next += 2;
2933 vlib_set_next_frame_buffer (vm, node, next0, pi0);
2934 vlib_set_next_frame_buffer (vm, node, next1, pi1);
2938 vlib_put_next_frame (vm, node, next, n_left_to_next);
2940 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
2946 while (n_left_from > 0 && n_left_to_next > 0)
2950 u32 pi0, adj_index0;
2951 ip_lookup_next_t next0;
2952 ip_adjacency_t * adj0;
2954 u32 flow_hash_config0;
2959 p0 = vlib_get_buffer (vm, pi0);
2961 ip0 = vlib_buffer_get_current (p0);
2963 fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
2964 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2965 fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2966 fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2968 adj_index0 = ip4_fib_lookup_buffer (im, fib_index0,
2969 &ip0->dst_address, p0);
2971 adj0 = ip_get_adjacency (lm, adj_index0);
2973 next0 = adj0->lookup_next_index;
2976 vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config;
2978 vnet_buffer (p0)->ip.flow_hash =
2979 ip4_compute_flow_hash (ip0, flow_hash_config0);
2981 ASSERT (adj0->n_adj > 0);
2982 ASSERT (is_pow2 (adj0->n_adj));
2983 adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1));
2985 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
2987 if (1) /* $$$$$$ HACK FIXME */
2988 vlib_increment_combined_counter
2989 (cm, cpu_index, adj_index0, 1,
2990 vlib_buffer_length_in_chain (vm, p0));
2994 n_left_to_next -= 1;
2997 if (PREDICT_FALSE (next0 != next))
2999 n_left_to_next += 1;
3000 vlib_put_next_frame (vm, node, next, n_left_to_next);
3002 vlib_get_next_frame (vm, node, next,
3003 to_next, n_left_to_next);
3006 n_left_to_next -= 1;
3010 vlib_put_next_frame (vm, node, next, n_left_to_next);
3013 return frame->n_vectors;
3016 VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = {
3017 .function = ip4_lookup_multicast,
3018 .name = "ip4-lookup-multicast",
3019 .vector_size = sizeof (u32),
3021 .n_next_nodes = IP_LOOKUP_N_NEXT,
3022 .next_nodes = IP4_LOOKUP_NEXT_NODES,
3025 VLIB_REGISTER_NODE (ip4_multicast_node,static) = {
3026 .function = ip4_drop,
3027 .name = "ip4-multicast",
3028 .vector_size = sizeof (u32),
3030 .format_trace = format_ip4_forward_next_trace,
3038 int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0)
3040 ip4_main_t * im = &ip4_main;
3041 ip4_fib_mtrie_t * mtrie0;
3042 ip4_fib_mtrie_leaf_t leaf0;
3045 mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie;
3047 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
3048 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
3049 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
3050 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
3051 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
3053 /* Handle default route. */
3054 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
3056 adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
3058 return adj_index0 == ip4_fib_lookup_with_table (im, fib_index0,
3060 /* no_default_route */ 0);
3063 static clib_error_t *
3064 test_lookup_command_fn (vlib_main_t * vm,
3065 unformat_input_t * input,
3066 vlib_cli_command_t * cmd)
3072 ip4_address_t ip4_base_address;
3075 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3076 if (unformat (input, "table %d", &table_id))
3078 else if (unformat (input, "count %f", &count))
3081 else if (unformat (input, "%U",
3082 unformat_ip4_address, &ip4_base_address))
3085 return clib_error_return (0, "unknown input `%U'",
3086 format_unformat_error, input);
3091 for (i = 0; i < n; i++)
3093 if (!ip4_lookup_validate (&ip4_base_address, table_id))
3096 ip4_base_address.as_u32 =
3097 clib_host_to_net_u32 (1 +
3098 clib_net_to_host_u32 (ip4_base_address.as_u32));
3102 vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
3104 vlib_cli_output (vm, "No errors in %d lookups\n", n);
3109 VLIB_CLI_COMMAND (lookup_test_command, static) = {
3110 .path = "test lookup",
3111 .short_help = "test lookup",
3112 .function = test_lookup_command_fn,
3115 int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3117 ip4_main_t * im4 = &ip4_main;
3119 uword * p = hash_get (im4->fib_index_by_table_id, table_id);
3122 return VNET_API_ERROR_NO_SUCH_FIB;
3124 fib = vec_elt_at_index (im4->fibs, p[0]);
3126 fib->flow_hash_config = flow_hash_config;
3130 static clib_error_t *
3131 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3132 unformat_input_t * input,
3133 vlib_cli_command_t * cmd)
3137 u32 flow_hash_config = 0;
3140 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3141 if (unformat (input, "table %d", &table_id))
3144 else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3145 foreach_flow_hash_bit
3151 return clib_error_return (0, "unknown input `%U'",
3152 format_unformat_error, input);
3154 rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3160 case VNET_API_ERROR_NO_SUCH_FIB:
3161 return clib_error_return (0, "no such FIB table %d", table_id);
3164 clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3171 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
3172 .path = "set ip flow-hash",
3174 "set ip table flow-hash table <fib-id> src dst sport dport proto reverse",
3175 .function = set_ip_flow_hash_command_fn,
3178 int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3181 vnet_main_t * vnm = vnet_get_main();
3182 vnet_interface_main_t * im = &vnm->interface_main;
3183 ip4_main_t * ipm = &ip4_main;
3184 ip_lookup_main_t * lm = &ipm->lookup_main;
3185 vnet_classify_main_t * cm = &vnet_classify_main;
3187 if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3188 return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3190 if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3191 return VNET_API_ERROR_NO_SUCH_ENTRY;
3193 vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3194 lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
3199 static clib_error_t *
3200 set_ip_classify_command_fn (vlib_main_t * vm,
3201 unformat_input_t * input,
3202 vlib_cli_command_t * cmd)
3204 u32 table_index = ~0;
3205 int table_index_set = 0;
3206 u32 sw_if_index = ~0;
3209 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3210 if (unformat (input, "table-index %d", &table_index))
3211 table_index_set = 1;
3212 else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3213 vnet_get_main(), &sw_if_index))
3219 if (table_index_set == 0)
3220 return clib_error_return (0, "classify table-index must be specified");
3222 if (sw_if_index == ~0)
3223 return clib_error_return (0, "interface / subif must be specified");
3225 rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3232 case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3233 return clib_error_return (0, "No such interface");
3235 case VNET_API_ERROR_NO_SUCH_ENTRY:
3236 return clib_error_return (0, "No such classifier table");
3241 VLIB_CLI_COMMAND (set_ip_classify_command, static) = {
3242 .path = "set ip classify",
3244 "set ip classify intfc <int> table-index <index>",
3245 .function = set_ip_classify_command_fn,