2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * ip/ip4_forward.c: IP v4 forwarding
18 * Copyright (c) 2008 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h> /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h> /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h> /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h> /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/dpo/load_balance.h>
53 #include <vnet/dpo/load_balance_map.h>
54 #include <vnet/dpo/classify_dpo.h>
55 #include <vnet/mfib/mfib_table.h> /* for mFIB table and entry creation */
57 #include <vnet/ip/ip4_forward.h>
58 #include <vnet/interface_output.h>
60 /** @brief IPv4 lookup node.
63 This is the main IPv4 lookup dispatch node.
65 @param vm vlib_main_t corresponding to the current thread
66 @param node vlib_node_runtime_t
67 @param frame vlib_frame_t whose contents should be dispatched
69 @par Graph mechanics: buffer metadata, next index usage
72 - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
73 - Indicates the @c sw_if_index value of the interface that the
74 packet was received on.
75 - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
76 - When the value is @c ~0 then the node performs a longest prefix
77 match (LPM) for the packet destination address in the FIB attached
78 to the receive interface.
79 - Otherwise perform LPM for the packet destination address in the
80 indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
81 value (0, 1, ...) and not a VRF id.
84 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
85 - The lookup result adjacency index.
88 - Dispatches the packet to the node index found in
89 ip_adjacency_t @c adj->lookup_next_index
90 (where @c adj is the lookup result adjacency).
92 VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
95 return ip4_lookup_inline (vm, node, frame);
98 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
101 VLIB_REGISTER_NODE (ip4_lookup_node) =
103 .name = "ip4-lookup",
104 .vector_size = sizeof (u32),
105 .format_trace = format_ip4_lookup_trace,
106 .n_next_nodes = IP_LOOKUP_N_NEXT,
107 .next_nodes = IP4_LOOKUP_NEXT_NODES,
111 VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
112 vlib_node_runtime_t * node,
113 vlib_frame_t * frame)
115 vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
117 u32 thread_index = vm->thread_index;
118 vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
119 u16 nexts[VLIB_FRAME_SIZE], *next;
121 from = vlib_frame_vector_args (frame);
122 n_left = frame->n_vectors;
125 vlib_get_buffers (vm, from, bufs, n_left);
129 const load_balance_t *lb0, *lb1;
130 const ip4_header_t *ip0, *ip1;
131 u32 lbi0, hc0, lbi1, hc1;
132 const dpo_id_t *dpo0, *dpo1;
134 /* Prefetch next iteration. */
136 vlib_prefetch_buffer_header (b[2], LOAD);
137 vlib_prefetch_buffer_header (b[3], LOAD);
139 CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), LOAD);
140 CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), LOAD);
143 ip0 = vlib_buffer_get_current (b[0]);
144 ip1 = vlib_buffer_get_current (b[1]);
145 lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
146 lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
148 lb0 = load_balance_get (lbi0);
149 lb1 = load_balance_get (lbi1);
152 * this node is for via FIBs we can re-use the hash value from the
153 * to node if present.
154 * We don't want to use the same hash value at each level in the recursion
155 * graph as that would lead to polarisation
159 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
161 if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
163 hc0 = vnet_buffer (b[0])->ip.flow_hash =
164 vnet_buffer (b[0])->ip.flow_hash >> 1;
168 hc0 = vnet_buffer (b[0])->ip.flow_hash =
169 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
171 dpo0 = load_balance_get_fwd_bucket
172 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
176 dpo0 = load_balance_get_bucket_i (lb0, 0);
178 if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
180 if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
182 hc1 = vnet_buffer (b[1])->ip.flow_hash =
183 vnet_buffer (b[1])->ip.flow_hash >> 1;
187 hc1 = vnet_buffer (b[1])->ip.flow_hash =
188 ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
190 dpo1 = load_balance_get_fwd_bucket
191 (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
195 dpo1 = load_balance_get_bucket_i (lb1, 0);
198 next[0] = dpo0->dpoi_next_node;
199 next[1] = dpo1->dpoi_next_node;
201 vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
202 vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
204 vlib_increment_combined_counter
205 (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
206 vlib_increment_combined_counter
207 (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
216 const load_balance_t *lb0;
217 const ip4_header_t *ip0;
218 const dpo_id_t *dpo0;
221 ip0 = vlib_buffer_get_current (b[0]);
222 lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
224 lb0 = load_balance_get (lbi0);
227 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
229 if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
231 hc0 = vnet_buffer (b[0])->ip.flow_hash =
232 vnet_buffer (b[0])->ip.flow_hash >> 1;
236 hc0 = vnet_buffer (b[0])->ip.flow_hash =
237 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
239 dpo0 = load_balance_get_fwd_bucket
240 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
244 dpo0 = load_balance_get_bucket_i (lb0, 0);
247 next[0] = dpo0->dpoi_next_node;
248 vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
250 vlib_increment_combined_counter
251 (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
258 vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
259 if (node->flags & VLIB_NODE_FLAG_TRACE)
260 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
262 return frame->n_vectors;
266 VLIB_REGISTER_NODE (ip4_load_balance_node) =
268 .name = "ip4-load-balance",
269 .vector_size = sizeof (u32),
270 .sibling_of = "ip4-lookup",
271 .format_trace = format_ip4_lookup_trace,
275 #ifndef CLIB_MARCH_VARIANT
276 /* get first interface address */
278 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
279 ip_interface_address_t ** result_ia)
281 ip_lookup_main_t *lm = &im->lookup_main;
282 ip_interface_address_t *ia = 0;
283 ip4_address_t *result = 0;
286 foreach_ip_interface_address
287 (lm, ia, sw_if_index,
288 1 /* honor unnumbered */ ,
291 ip_interface_address_get_address (lm, ia);
297 *result_ia = result ? ia : 0;
303 ip4_add_subnet_bcast_route (u32 fib_index,
307 vnet_sw_interface_flags_t iflags;
309 iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
311 fib_table_entry_special_remove(fib_index,
313 FIB_SOURCE_INTERFACE);
315 if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
317 fib_table_entry_update_one_path (fib_index, pfx,
318 FIB_SOURCE_INTERFACE,
321 /* No next-hop address */
327 // no out-label stack
329 FIB_ROUTE_PATH_FLAG_NONE);
333 fib_table_entry_special_add(fib_index,
335 FIB_SOURCE_INTERFACE,
336 (FIB_ENTRY_FLAG_DROP |
337 FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
342 ip4_add_interface_prefix_routes (ip4_main_t *im,
345 ip_interface_address_t * a)
347 ip_lookup_main_t *lm = &im->lookup_main;
348 ip_interface_prefix_t *if_prefix;
349 ip4_address_t *address = ip_interface_address_get_address (lm, a);
351 ip_interface_prefix_key_t key = {
353 .fp_len = a->address_length,
354 .fp_proto = FIB_PROTOCOL_IP4,
355 .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[a->address_length],
357 .sw_if_index = sw_if_index,
360 fib_prefix_t pfx_special = {
361 .fp_proto = FIB_PROTOCOL_IP4,
364 /* If prefix already set on interface, just increment ref count & return */
365 if_prefix = ip_get_interface_prefix (lm, &key);
368 if_prefix->ref_count += 1;
372 /* New prefix - allocate a pool entry, initialize it, add to the hash */
373 pool_get (lm->if_prefix_pool, if_prefix);
374 if_prefix->ref_count = 1;
375 if_prefix->src_ia_index = a - lm->if_address_pool;
376 clib_memcpy (&if_prefix->key, &key, sizeof (key));
377 mhash_set (&lm->prefix_to_if_prefix_index, &key,
378 if_prefix - lm->if_prefix_pool, 0 /* old value */);
380 /* length <= 30 - add glean, drop first address, maybe drop bcast address */
381 if (a->address_length <= 30)
383 pfx_special.fp_len = a->address_length;
384 pfx_special.fp_addr.ip4.as_u32 = address->as_u32;
386 /* set the glean route for the prefix */
387 fib_table_entry_update_one_path (fib_index, &pfx_special,
388 FIB_SOURCE_INTERFACE,
389 (FIB_ENTRY_FLAG_CONNECTED |
390 FIB_ENTRY_FLAG_ATTACHED),
392 /* No next-hop address */
395 /* invalid FIB index */
398 /* no out-label stack */
400 FIB_ROUTE_PATH_FLAG_NONE);
402 /* set a drop route for the base address of the prefix */
403 pfx_special.fp_len = 32;
404 pfx_special.fp_addr.ip4.as_u32 =
405 address->as_u32 & im->fib_masks[a->address_length];
407 if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
408 fib_table_entry_special_add (fib_index, &pfx_special,
409 FIB_SOURCE_INTERFACE,
410 (FIB_ENTRY_FLAG_DROP |
411 FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
413 /* set a route for the broadcast address of the prefix */
414 pfx_special.fp_len = 32;
415 pfx_special.fp_addr.ip4.as_u32 =
416 address->as_u32 | ~im->fib_masks[a->address_length];
417 if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
418 ip4_add_subnet_bcast_route (fib_index, &pfx_special, sw_if_index);
422 /* length == 31 - add an attached route for the other address */
423 else if (a->address_length == 31)
425 pfx_special.fp_len = 32;
426 pfx_special.fp_addr.ip4.as_u32 =
427 address->as_u32 ^ clib_host_to_net_u32(1);
429 fib_table_entry_update_one_path (fib_index, &pfx_special,
430 FIB_SOURCE_INTERFACE,
431 (FIB_ENTRY_FLAG_ATTACHED),
433 &pfx_special.fp_addr,
435 /* invalid FIB index */
439 FIB_ROUTE_PATH_FLAG_NONE);
444 ip4_add_interface_routes (u32 sw_if_index,
445 ip4_main_t * im, u32 fib_index,
446 ip_interface_address_t * a)
448 ip_lookup_main_t *lm = &im->lookup_main;
449 ip4_address_t *address = ip_interface_address_get_address (lm, a);
452 .fp_proto = FIB_PROTOCOL_IP4,
453 .fp_addr.ip4 = *address,
456 /* set special routes for the prefix if needed */
457 ip4_add_interface_prefix_routes (im, sw_if_index, fib_index, a);
459 if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
461 u32 classify_table_index =
462 lm->classify_table_index_by_sw_if_index[sw_if_index];
463 if (classify_table_index != (u32) ~ 0)
465 dpo_id_t dpo = DPO_INVALID;
470 classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
472 fib_table_entry_special_dpo_add (fib_index,
475 FIB_ENTRY_FLAG_NONE, &dpo);
480 fib_table_entry_update_one_path (fib_index, &pfx,
481 FIB_SOURCE_INTERFACE,
482 (FIB_ENTRY_FLAG_CONNECTED |
483 FIB_ENTRY_FLAG_LOCAL),
490 FIB_ROUTE_PATH_FLAG_NONE);
494 ip4_del_interface_prefix_routes (ip4_main_t * im,
497 ip4_address_t * address,
500 ip_lookup_main_t *lm = &im->lookup_main;
501 ip_interface_prefix_t *if_prefix;
503 ip_interface_prefix_key_t key = {
505 .fp_len = address_length,
506 .fp_proto = FIB_PROTOCOL_IP4,
507 .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[address_length],
509 .sw_if_index = sw_if_index,
512 fib_prefix_t pfx_special = {
514 .fp_proto = FIB_PROTOCOL_IP4,
517 if_prefix = ip_get_interface_prefix (lm, &key);
520 clib_warning ("Prefix not found while deleting %U",
521 format_ip4_address_and_length, address, address_length);
525 if_prefix->ref_count -= 1;
528 * Routes need to be adjusted if:
529 * - deleting last intf addr in prefix
530 * - deleting intf addr used as default source address in glean adjacency
532 * We're done now otherwise
534 if ((if_prefix->ref_count > 0) &&
535 !pool_is_free_index (lm->if_address_pool, if_prefix->src_ia_index))
538 /* length <= 30, delete glean route, first address, last address */
539 if (address_length <= 30)
542 /* remove glean route for prefix */
543 pfx_special.fp_addr.ip4 = *address;
544 pfx_special.fp_len = address_length;
545 fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
547 /* if no more intf addresses in prefix, remove other special routes */
548 if (!if_prefix->ref_count)
550 /* first address in prefix */
551 pfx_special.fp_addr.ip4.as_u32 =
552 address->as_u32 & im->fib_masks[address_length];
553 pfx_special.fp_len = 32;
555 if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
556 fib_table_entry_special_remove (fib_index,
558 FIB_SOURCE_INTERFACE);
560 /* prefix broadcast address */
561 pfx_special.fp_addr.ip4.as_u32 =
562 address->as_u32 | ~im->fib_masks[address_length];
563 pfx_special.fp_len = 32;
565 if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
566 fib_table_entry_special_remove (fib_index,
568 FIB_SOURCE_INTERFACE);
571 /* default source addr just got deleted, find another */
573 ip_interface_address_t *new_src_ia = NULL;
574 ip4_address_t *new_src_addr = NULL;
577 ip4_interface_address_matching_destination
578 (im, address, sw_if_index, &new_src_ia);
580 if_prefix->src_ia_index = new_src_ia - lm->if_address_pool;
582 pfx_special.fp_len = address_length;
583 pfx_special.fp_addr.ip4 = *new_src_addr;
585 /* set new glean route for the prefix */
586 fib_table_entry_update_one_path (fib_index, &pfx_special,
587 FIB_SOURCE_INTERFACE,
588 (FIB_ENTRY_FLAG_CONNECTED |
589 FIB_ENTRY_FLAG_ATTACHED),
591 /* No next-hop address */
594 /* invalid FIB index */
597 /* no out-label stack */
599 FIB_ROUTE_PATH_FLAG_NONE);
603 /* length == 31, delete attached route for the other address */
604 else if (address_length == 31)
606 pfx_special.fp_addr.ip4.as_u32 =
607 address->as_u32 ^ clib_host_to_net_u32(1);
609 fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
612 mhash_unset (&lm->prefix_to_if_prefix_index, &key, 0 /* old_value */);
613 pool_put (lm->if_prefix_pool, if_prefix);
617 ip4_del_interface_routes (u32 sw_if_index,
620 ip4_address_t * address, u32 address_length)
623 .fp_len = address_length,
624 .fp_proto = FIB_PROTOCOL_IP4,
625 .fp_addr.ip4 = *address,
628 ip4_del_interface_prefix_routes (im, sw_if_index, fib_index,
629 address, address_length);
632 fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
635 #ifndef CLIB_MARCH_VARIANT
637 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
639 ip4_main_t *im = &ip4_main;
641 vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
644 * enable/disable only on the 1<->0 transition
648 if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
653 ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
654 if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
657 vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
661 vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
662 sw_if_index, !is_enable, 0, 0);
665 ip4_enable_disable_interface_callback_t *cb;
666 vec_foreach (cb, im->enable_disable_interface_callbacks)
667 cb->function (im, cb->function_opaque, sw_if_index, is_enable);
671 static clib_error_t *
672 ip4_add_del_interface_address_internal (vlib_main_t * vm,
674 ip4_address_t * address,
675 u32 address_length, u32 is_del)
677 vnet_main_t *vnm = vnet_get_main ();
678 ip4_main_t *im = &ip4_main;
679 ip_lookup_main_t *lm = &im->lookup_main;
680 clib_error_t *error = 0;
681 u32 if_address_index, elts_before;
682 ip4_address_fib_t ip4_af, *addr_fib = 0;
684 /* local0 interface doesn't support IP addressing */
685 if (sw_if_index == 0)
688 clib_error_create ("local0 interface doesn't support IP addressing");
691 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
692 ip4_addr_fib_init (&ip4_af, address,
693 vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
694 vec_add1 (addr_fib, ip4_af);
697 * there is no support for adj-fib handling in the presence of overlapping
698 * subnets on interfaces. Easy fix - disallow overlapping subnets, like
704 /* When adding an address check that it does not conflict
705 with an existing address on any interface in this table. */
706 ip_interface_address_t *ia;
707 vnet_sw_interface_t *sif;
709 pool_foreach(sif, vnm->interface_main.sw_interfaces,
711 if (im->fib_index_by_sw_if_index[sw_if_index] ==
712 im->fib_index_by_sw_if_index[sif->sw_if_index])
714 foreach_ip_interface_address
715 (&im->lookup_main, ia, sif->sw_if_index,
716 0 /* honor unnumbered */ ,
719 ip_interface_address_get_address
720 (&im->lookup_main, ia);
721 if (ip4_destination_matches_route
722 (im, address, x, ia->address_length) ||
723 ip4_destination_matches_route (im,
728 /* an intf may have >1 addr from the same prefix */
729 if ((sw_if_index == sif->sw_if_index) &&
730 (ia->address_length == address_length) &&
731 (x->as_u32 != address->as_u32))
734 /* error if the length or intf was different */
735 vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
739 ("failed to add %U which conflicts with %U for interface %U",
740 format_ip4_address_and_length, address,
742 format_ip4_address_and_length, x,
744 format_vnet_sw_if_index_name, vnm,
753 elts_before = pool_elts (lm->if_address_pool);
755 error = ip_interface_address_add_del
756 (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
760 ip4_sw_interface_enable_disable (sw_if_index, !is_del);
762 /* intf addr routes are added/deleted on admin up/down */
763 if (vnet_sw_interface_is_admin_up (vnm, sw_if_index))
766 ip4_del_interface_routes (sw_if_index,
767 im, ip4_af.fib_index, address,
770 ip4_add_interface_routes (sw_if_index,
771 im, ip4_af.fib_index,
773 (lm->if_address_pool, if_address_index));
776 /* If pool did not grow/shrink: add duplicate address. */
777 if (elts_before != pool_elts (lm->if_address_pool))
779 ip4_add_del_interface_address_callback_t *cb;
780 vec_foreach (cb, im->add_del_interface_address_callbacks)
781 cb->function (im, cb->function_opaque, sw_if_index,
782 address, address_length, if_address_index, is_del);
791 ip4_add_del_interface_address (vlib_main_t * vm,
793 ip4_address_t * address,
794 u32 address_length, u32 is_del)
796 return ip4_add_del_interface_address_internal
797 (vm, sw_if_index, address, address_length, is_del);
801 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
803 ip_interface_address_t *ia;
809 * when directed broadcast is enabled, the subnet braodcast route will forward
810 * packets using an adjacency with a broadcast MAC. otherwise it drops
813 foreach_ip_interface_address(&im->lookup_main, ia,
816 if (ia->address_length <= 30)
820 ipa = ip_interface_address_get_address (&im->lookup_main, ia);
824 .fp_proto = FIB_PROTOCOL_IP4,
826 .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
830 ip4_add_subnet_bcast_route
831 (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
840 static clib_error_t *
841 ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
843 ip4_main_t *im = &ip4_main;
844 ip_interface_address_t *ia;
846 u32 is_admin_up, fib_index;
848 /* Fill in lookup tables with default table (0). */
849 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
851 vec_validate_init_empty (im->
852 lookup_main.if_address_pool_index_by_sw_if_index,
855 is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
857 fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
860 foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
861 0 /* honor unnumbered */,
863 a = ip_interface_address_get_address (&im->lookup_main, ia);
865 ip4_add_interface_routes (sw_if_index,
869 ip4_del_interface_routes (sw_if_index,
871 a, ia->address_length);
878 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
880 /* Built-in ip4 unicast rx feature path definition */
882 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
884 .arc_name = "ip4-unicast",
885 .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
886 .last_in_arc = "ip4-lookup",
887 .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
890 VNET_FEATURE_INIT (ip4_flow_classify, static) =
892 .arc_name = "ip4-unicast",
893 .node_name = "ip4-flow-classify",
894 .runs_before = VNET_FEATURES ("ip4-inacl"),
897 VNET_FEATURE_INIT (ip4_inacl, static) =
899 .arc_name = "ip4-unicast",
900 .node_name = "ip4-inacl",
901 .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
904 VNET_FEATURE_INIT (ip4_source_check_1, static) =
906 .arc_name = "ip4-unicast",
907 .node_name = "ip4-source-check-via-rx",
908 .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
911 VNET_FEATURE_INIT (ip4_source_check_2, static) =
913 .arc_name = "ip4-unicast",
914 .node_name = "ip4-source-check-via-any",
915 .runs_before = VNET_FEATURES ("ip4-policer-classify"),
918 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
920 .arc_name = "ip4-unicast",
921 .node_name = "ip4-source-and-port-range-check-rx",
922 .runs_before = VNET_FEATURES ("ip4-policer-classify"),
925 VNET_FEATURE_INIT (ip4_policer_classify, static) =
927 .arc_name = "ip4-unicast",
928 .node_name = "ip4-policer-classify",
929 .runs_before = VNET_FEATURES ("ipsec4-input-feature"),
932 VNET_FEATURE_INIT (ip4_ipsec, static) =
934 .arc_name = "ip4-unicast",
935 .node_name = "ipsec4-input-feature",
936 .runs_before = VNET_FEATURES ("vpath-input-ip4"),
939 VNET_FEATURE_INIT (ip4_vpath, static) =
941 .arc_name = "ip4-unicast",
942 .node_name = "vpath-input-ip4",
943 .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
946 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
948 .arc_name = "ip4-unicast",
949 .node_name = "ip4-vxlan-bypass",
950 .runs_before = VNET_FEATURES ("ip4-lookup"),
953 VNET_FEATURE_INIT (ip4_not_enabled, static) =
955 .arc_name = "ip4-unicast",
956 .node_name = "ip4-not-enabled",
957 .runs_before = VNET_FEATURES ("ip4-lookup"),
960 VNET_FEATURE_INIT (ip4_lookup, static) =
962 .arc_name = "ip4-unicast",
963 .node_name = "ip4-lookup",
964 .runs_before = 0, /* not before any other features */
967 /* Built-in ip4 multicast rx feature path definition */
968 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
970 .arc_name = "ip4-multicast",
971 .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
972 .last_in_arc = "ip4-mfib-forward-lookup",
973 .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
976 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
978 .arc_name = "ip4-multicast",
979 .node_name = "vpath-input-ip4",
980 .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
983 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
985 .arc_name = "ip4-multicast",
986 .node_name = "ip4-not-enabled",
987 .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
990 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
992 .arc_name = "ip4-multicast",
993 .node_name = "ip4-mfib-forward-lookup",
994 .runs_before = 0, /* last feature */
997 /* Source and port-range check ip4 tx feature path definition */
998 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1000 .arc_name = "ip4-output",
1001 .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
1002 .last_in_arc = "interface-output",
1003 .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1006 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1008 .arc_name = "ip4-output",
1009 .node_name = "ip4-source-and-port-range-check-tx",
1010 .runs_before = VNET_FEATURES ("ip4-outacl"),
1013 VNET_FEATURE_INIT (ip4_outacl, static) =
1015 .arc_name = "ip4-output",
1016 .node_name = "ip4-outacl",
1017 .runs_before = VNET_FEATURES ("ipsec4-output-feature"),
1020 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1022 .arc_name = "ip4-output",
1023 .node_name = "ipsec4-output-feature",
1024 .runs_before = VNET_FEATURES ("interface-output"),
1027 /* Built-in ip4 tx feature path definition */
1028 VNET_FEATURE_INIT (ip4_interface_output, static) =
1030 .arc_name = "ip4-output",
1031 .node_name = "interface-output",
1032 .runs_before = 0, /* not before any other features */
1036 static clib_error_t *
1037 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1039 ip4_main_t *im = &ip4_main;
1041 /* Fill in lookup tables with default table (0). */
1042 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1043 vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
1047 ip4_main_t *im4 = &ip4_main;
1048 ip_lookup_main_t *lm4 = &im4->lookup_main;
1049 ip_interface_address_t *ia = 0;
1050 ip4_address_t *address;
1051 vlib_main_t *vm = vlib_get_main ();
1053 vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
1055 foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
1057 address = ip_interface_address_get_address (lm4, ia);
1058 ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
1063 vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
1066 vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
1067 sw_if_index, is_add, 0, 0);
1069 return /* no error */ 0;
1072 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1074 /* Global IP4 main. */
1075 #ifndef CLIB_MARCH_VARIANT
1076 ip4_main_t ip4_main;
1077 #endif /* CLIB_MARCH_VARIANT */
1079 static clib_error_t *
1080 ip4_lookup_init (vlib_main_t * vm)
1082 ip4_main_t *im = &ip4_main;
1083 clib_error_t *error;
1086 if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1088 if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
1090 if ((error = vlib_call_init_function (vm, fib_module_init)))
1092 if ((error = vlib_call_init_function (vm, mfib_module_init)))
1095 for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1100 m = pow2_mask (i) << (32 - i);
1103 im->fib_masks[i] = clib_host_to_net_u32 (m);
1106 ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1108 /* Create FIB with index 0 and table id of 0. */
1109 fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1110 FIB_SOURCE_DEFAULT_ROUTE);
1111 mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1112 MFIB_SOURCE_DEFAULT_ROUTE);
1116 pn = pg_get_node (ip4_lookup_node.index);
1117 pn->unformat_edit = unformat_pg_ip4_header;
1121 ethernet_arp_header_t h;
1123 clib_memset (&h, 0, sizeof (h));
1125 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1126 #define _8(f,v) h.f = v;
1127 _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1128 _16 (l3_type, ETHERNET_TYPE_IP4);
1129 _8 (n_l2_address_bytes, 6);
1130 _8 (n_l3_address_bytes, 4);
1131 _16 (opcode, ETHERNET_ARP_OPCODE_request);
1135 vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1138 /* alloc chunk size */ 8,
1145 VLIB_INIT_FUNCTION (ip4_lookup_init);
1149 /* Adjacency taken. */
1154 /* Packet data, possibly *after* rewrite. */
1155 u8 packet_data[64 - 1 * sizeof (u32)];
1157 ip4_forward_next_trace_t;
1159 #ifndef CLIB_MARCH_VARIANT
1161 format_ip4_forward_next_trace (u8 * s, va_list * args)
1163 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1164 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1165 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1166 u32 indent = format_get_indent (s);
1167 s = format (s, "%U%U",
1168 format_white_space, indent,
1169 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1175 format_ip4_lookup_trace (u8 * s, va_list * args)
1177 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1178 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1179 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1180 u32 indent = format_get_indent (s);
1182 s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1183 t->fib_index, t->dpo_index, t->flow_hash);
1184 s = format (s, "\n%U%U",
1185 format_white_space, indent,
1186 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1191 format_ip4_rewrite_trace (u8 * s, va_list * args)
1193 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1194 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1195 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1196 u32 indent = format_get_indent (s);
1198 s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1199 t->fib_index, t->dpo_index, format_ip_adjacency,
1200 t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1201 s = format (s, "\n%U%U",
1202 format_white_space, indent,
1203 format_ip_adjacency_packet_data,
1204 t->dpo_index, t->packet_data, sizeof (t->packet_data));
1208 #ifndef CLIB_MARCH_VARIANT
1209 /* Common trace function for all ip4-forward next nodes. */
1211 ip4_forward_next_trace (vlib_main_t * vm,
1212 vlib_node_runtime_t * node,
1213 vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1216 ip4_main_t *im = &ip4_main;
1218 n_left = frame->n_vectors;
1219 from = vlib_frame_vector_args (frame);
1224 vlib_buffer_t *b0, *b1;
1225 ip4_forward_next_trace_t *t0, *t1;
1227 /* Prefetch next iteration. */
1228 vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1229 vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1234 b0 = vlib_get_buffer (vm, bi0);
1235 b1 = vlib_get_buffer (vm, bi1);
1237 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1239 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1240 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1241 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1243 (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1244 (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1245 vec_elt (im->fib_index_by_sw_if_index,
1246 vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1248 clib_memcpy_fast (t0->packet_data,
1249 vlib_buffer_get_current (b0),
1250 sizeof (t0->packet_data));
1252 if (b1->flags & VLIB_BUFFER_IS_TRACED)
1254 t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1255 t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1256 t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1258 (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1259 (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1260 vec_elt (im->fib_index_by_sw_if_index,
1261 vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1262 clib_memcpy_fast (t1->packet_data, vlib_buffer_get_current (b1),
1263 sizeof (t1->packet_data));
1273 ip4_forward_next_trace_t *t0;
1277 b0 = vlib_get_buffer (vm, bi0);
1279 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1281 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1282 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1283 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1285 (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1286 (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1287 vec_elt (im->fib_index_by_sw_if_index,
1288 vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1289 clib_memcpy_fast (t0->packet_data, vlib_buffer_get_current (b0),
1290 sizeof (t0->packet_data));
1297 /* Compute TCP/UDP/ICMP4 checksum in software. */
1299 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1303 u32 ip_header_length, payload_length_host_byte_order;
1305 /* Initialize checksum with ip header. */
1306 ip_header_length = ip4_header_bytes (ip0);
1307 payload_length_host_byte_order =
1308 clib_net_to_host_u16 (ip0->length) - ip_header_length;
1310 clib_host_to_net_u32 (payload_length_host_byte_order +
1311 (ip0->protocol << 16));
1313 if (BITS (uword) == 32)
1316 ip_csum_with_carry (sum0,
1317 clib_mem_unaligned (&ip0->src_address, u32));
1319 ip_csum_with_carry (sum0,
1320 clib_mem_unaligned (&ip0->dst_address, u32));
1324 ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1326 return ip_calculate_l4_checksum (vm, p0, sum0,
1327 payload_length_host_byte_order, (u8 *) ip0,
1328 ip_header_length, NULL);
1332 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1334 ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1338 ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1339 || ip0->protocol == IP_PROTOCOL_UDP);
1341 udp0 = (void *) (ip0 + 1);
1342 if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1344 p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1345 | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1349 sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1351 p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1352 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1359 VNET_FEATURE_ARC_INIT (ip4_local) =
1361 .arc_name = "ip4-local",
1362 .start_nodes = VNET_FEATURES ("ip4-local"),
1363 .last_in_arc = "ip4-local-end-of-arc",
1368 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1369 ip4_header_t * ip, u8 is_udp, u8 * error,
1373 flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1374 *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1378 u32 ip_len, udp_len;
1380 udp = ip4_next_header (ip);
1381 /* Verify UDP length. */
1382 ip_len = clib_net_to_host_u16 (ip->length);
1383 udp_len = clib_net_to_host_u16 (udp->length);
1385 len_diff = ip_len - udp_len;
1386 *good_tcp_udp &= len_diff >= 0;
1387 *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1391 #define ip4_local_csum_is_offloaded(_b) \
1392 _b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM \
1393 || _b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM
1395 #define ip4_local_need_csum_check(is_tcp_udp, _b) \
1396 (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED \
1397 || ip4_local_csum_is_offloaded (_b)))
1399 #define ip4_local_csum_is_valid(_b) \
1400 (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT \
1401 || (ip4_local_csum_is_offloaded (_b))) != 0
1404 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1405 ip4_header_t * ih, u8 * error)
1407 u8 is_udp, is_tcp_udp, good_tcp_udp;
1409 is_udp = ih->protocol == IP_PROTOCOL_UDP;
1410 is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1412 if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1413 ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1415 good_tcp_udp = ip4_local_csum_is_valid (b);
1417 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1418 *error = (is_tcp_udp && !good_tcp_udp
1419 ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1423 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1424 ip4_header_t ** ih, u8 * error)
1426 u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1428 is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1429 is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1431 is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1432 is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1434 good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1435 good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1437 if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1438 || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1441 ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1444 ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1448 error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1449 IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1450 error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1451 IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1455 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1456 vlib_buffer_t * b, u16 * next, u8 error,
1457 u8 head_of_feature_arc)
1459 u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1462 *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1463 b->error = error ? error_node->errors[error] : 0;
1464 if (head_of_feature_arc)
1467 if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1469 vnet_feature_arc_start (arc_index,
1470 vnet_buffer (b)->sw_if_index[VLIB_RX],
1483 } ip4_local_last_check_t;
1486 ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
1487 ip4_local_last_check_t * last_check, u8 * error0)
1489 ip4_fib_mtrie_leaf_t leaf0;
1490 ip4_fib_mtrie_t *mtrie0;
1491 const dpo_id_t *dpo0;
1492 load_balance_t *lb0;
1495 vnet_buffer (b)->ip.fib_index =
1496 vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1497 vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1500 * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1501 * adjacency for the destination address (the local interface address).
1502 * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1503 * adjacency for the source address (the remote sender's address)
1505 if (PREDICT_FALSE (last_check->first ||
1506 (last_check->src.as_u32 != ip0->src_address.as_u32)))
1508 mtrie0 = &ip4_fib_get (vnet_buffer (b)->ip.fib_index)->mtrie;
1509 leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1510 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1511 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1512 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1514 vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1515 vnet_buffer (b)->ip.adj_index[VLIB_TX];
1516 vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1518 lb0 = load_balance_get (lbi0);
1519 dpo0 = load_balance_get_bucket_i (lb0, 0);
1522 * Must have a route to source otherwise we drop the packet.
1523 * ip4 broadcasts are accepted, e.g. to make dhcp client work
1526 * - the source is a recieve => it's from us => bogus, do this
1527 * first since it sets a different error code.
1528 * - uRPF check for any route to source - accept if passes.
1529 * - allow packets destined to the broadcast address from unknown sources
1532 *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1533 && dpo0->dpoi_type == DPO_RECEIVE) ?
1534 IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1535 *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1536 && !fib_urpf_check_size (lb0->lb_urpf)
1537 && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1538 IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1540 last_check->src.as_u32 = ip0->src_address.as_u32;
1541 last_check->lbi = lbi0;
1542 last_check->error = *error0;
1546 vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1547 vnet_buffer (b)->ip.adj_index[VLIB_TX];
1548 vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1549 *error0 = last_check->error;
1550 last_check->first = 0;
1555 ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
1556 ip4_local_last_check_t * last_check, u8 * error)
1558 ip4_fib_mtrie_leaf_t leaf[2];
1559 ip4_fib_mtrie_t *mtrie[2];
1560 const dpo_id_t *dpo[2];
1561 load_balance_t *lb[2];
1565 not_last_hit = last_check->first;
1566 not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1567 not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1569 vnet_buffer (b[0])->ip.fib_index =
1570 vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1571 vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1572 vnet_buffer (b[0])->ip.fib_index;
1574 vnet_buffer (b[1])->ip.fib_index =
1575 vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1576 vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1577 vnet_buffer (b[1])->ip.fib_index;
1580 * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1581 * adjacency for the destination address (the local interface address).
1582 * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1583 * adjacency for the source address (the remote sender's address)
1585 if (PREDICT_FALSE (not_last_hit))
1587 mtrie[0] = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie;
1588 mtrie[1] = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie;
1590 leaf[0] = ip4_fib_mtrie_lookup_step_one (mtrie[0], &ip[0]->src_address);
1591 leaf[1] = ip4_fib_mtrie_lookup_step_one (mtrie[1], &ip[1]->src_address);
1593 leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1594 &ip[0]->src_address, 2);
1595 leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1596 &ip[1]->src_address, 2);
1598 leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1599 &ip[0]->src_address, 3);
1600 leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1601 &ip[1]->src_address, 3);
1603 lbi[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf[0]);
1604 lbi[1] = ip4_fib_mtrie_leaf_get_adj_index (leaf[1]);
1606 vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1607 vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1608 vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1610 vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1611 vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1612 vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1614 lb[0] = load_balance_get (lbi[0]);
1615 lb[1] = load_balance_get (lbi[1]);
1617 dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1618 dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1620 error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1621 dpo[0]->dpoi_type == DPO_RECEIVE) ?
1622 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1623 error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1624 !fib_urpf_check_size (lb[0]->lb_urpf) &&
1625 ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1626 ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1628 error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1629 dpo[1]->dpoi_type == DPO_RECEIVE) ?
1630 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1631 error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1632 !fib_urpf_check_size (lb[1]->lb_urpf) &&
1633 ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1634 ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1636 last_check->src.as_u32 = ip[1]->src_address.as_u32;
1637 last_check->lbi = lbi[1];
1638 last_check->error = error[1];
1642 vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1643 vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1644 vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1646 vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1647 vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1648 vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1650 error[0] = last_check->error;
1651 error[1] = last_check->error;
1652 last_check->first = 0;
1656 enum ip_local_packet_type_e
1658 IP_LOCAL_PACKET_TYPE_L4,
1659 IP_LOCAL_PACKET_TYPE_NAT,
1660 IP_LOCAL_PACKET_TYPE_FRAG,
1664 * Determine packet type and next node.
1666 * The expectation is that all packets that are not L4 will skip
1667 * checksums and source checks.
1670 ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
1672 ip_lookup_main_t *lm = &ip4_main.lookup_main;
1674 if (PREDICT_FALSE (ip4_is_fragment (ip)))
1676 *next = IP_LOCAL_NEXT_REASSEMBLY;
1677 return IP_LOCAL_PACKET_TYPE_FRAG;
1679 if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_IS_NATED))
1681 *next = lm->local_next_by_ip_protocol[ip->protocol];
1682 return IP_LOCAL_PACKET_TYPE_NAT;
1685 *next = lm->local_next_by_ip_protocol[ip->protocol];
1686 return IP_LOCAL_PACKET_TYPE_L4;
1690 ip4_local_inline (vlib_main_t * vm,
1691 vlib_node_runtime_t * node,
1692 vlib_frame_t * frame, int head_of_feature_arc)
1694 u32 *from, n_left_from;
1695 vlib_node_runtime_t *error_node =
1696 vlib_node_get_runtime (vm, ip4_input_node.index);
1697 u16 nexts[VLIB_FRAME_SIZE], *next;
1698 vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1699 ip4_header_t *ip[2];
1702 ip4_local_last_check_t last_check = {
1704 * 0.0.0.0 can appear as the source address of an IP packet,
1705 * as can any other address, hence the need to use the 'first'
1706 * member to make sure the .lbi is initialised for the first
1709 .src = {.as_u32 = 0},
1711 .error = IP4_ERROR_UNKNOWN_PROTOCOL,
1715 from = vlib_frame_vector_args (frame);
1716 n_left_from = frame->n_vectors;
1718 if (node->flags & VLIB_NODE_FLAG_TRACE)
1719 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1721 vlib_get_buffers (vm, from, bufs, n_left_from);
1725 while (n_left_from >= 6)
1729 /* Prefetch next iteration. */
1731 vlib_prefetch_buffer_header (b[4], LOAD);
1732 vlib_prefetch_buffer_header (b[5], LOAD);
1734 CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1735 CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1738 error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1740 ip[0] = vlib_buffer_get_current (b[0]);
1741 ip[1] = vlib_buffer_get_current (b[1]);
1743 vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1744 vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1746 pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1747 pt[1] = ip4_local_classify (b[1], ip[1], &next[1]);
1749 not_batch = pt[0] ^ pt[1];
1751 if (head_of_feature_arc == 0 || (pt[0] && not_batch == 0))
1754 if (PREDICT_TRUE (not_batch == 0))
1756 ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1757 ip4_local_check_src_x2 (b, ip, &last_check, error);
1763 ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1764 ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1768 ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1769 ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
1775 ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1776 head_of_feature_arc);
1777 ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1778 head_of_feature_arc);
1785 while (n_left_from > 0)
1787 error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1789 ip[0] = vlib_buffer_get_current (b[0]);
1790 vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1791 pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1793 if (head_of_feature_arc == 0 || pt[0])
1796 ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1797 ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1801 ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1802 head_of_feature_arc);
1809 vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1810 return frame->n_vectors;
1813 VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1814 vlib_frame_t * frame)
1816 return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1820 VLIB_REGISTER_NODE (ip4_local_node) =
1822 .name = "ip4-local",
1823 .vector_size = sizeof (u32),
1824 .format_trace = format_ip4_forward_next_trace,
1825 .n_next_nodes = IP_LOCAL_N_NEXT,
1828 [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1829 [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1830 [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1831 [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1832 [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-full-reassembly",
1838 VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
1839 vlib_node_runtime_t * node,
1840 vlib_frame_t * frame)
1842 return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1846 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
1847 .name = "ip4-local-end-of-arc",
1848 .vector_size = sizeof (u32),
1850 .format_trace = format_ip4_forward_next_trace,
1851 .sibling_of = "ip4-local",
1854 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1855 .arc_name = "ip4-local",
1856 .node_name = "ip4-local-end-of-arc",
1857 .runs_before = 0, /* not before any other features */
1861 #ifndef CLIB_MARCH_VARIANT
1863 ip4_register_protocol (u32 protocol, u32 node_index)
1865 vlib_main_t *vm = vlib_get_main ();
1866 ip4_main_t *im = &ip4_main;
1867 ip_lookup_main_t *lm = &im->lookup_main;
1869 ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1870 lm->local_next_by_ip_protocol[protocol] =
1871 vlib_node_add_next (vm, ip4_local_node.index, node_index);
1875 ip4_unregister_protocol (u32 protocol)
1877 ip4_main_t *im = &ip4_main;
1878 ip_lookup_main_t *lm = &im->lookup_main;
1880 ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1881 lm->local_next_by_ip_protocol[protocol] = IP_LOCAL_NEXT_PUNT;
1885 static clib_error_t *
1886 show_ip_local_command_fn (vlib_main_t * vm,
1887 unformat_input_t * input, vlib_cli_command_t * cmd)
1889 ip4_main_t *im = &ip4_main;
1890 ip_lookup_main_t *lm = &im->lookup_main;
1893 vlib_cli_output (vm, "Protocols handled by ip4_local");
1894 for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1896 if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1898 u32 node_index = vlib_get_node (vm,
1899 ip4_local_node.index)->
1900 next_nodes[lm->local_next_by_ip_protocol[i]];
1901 vlib_cli_output (vm, "%U: %U", format_ip_protocol, i,
1902 format_vlib_node_name, vm, node_index);
1911 * Display the set of protocols handled by the local IPv4 stack.
1914 * Example of how to display local protocol table:
1915 * @cliexstart{show ip local}
1916 * Protocols handled by ip4_local
1923 VLIB_CLI_COMMAND (show_ip_local, static) =
1925 .path = "show ip local",
1926 .function = show_ip_local_command_fn,
1927 .short_help = "show ip local",
1932 ip4_arp_inline (vlib_main_t * vm,
1933 vlib_node_runtime_t * node,
1934 vlib_frame_t * frame, int is_glean)
1936 vnet_main_t *vnm = vnet_get_main ();
1937 ip4_main_t *im = &ip4_main;
1938 ip_lookup_main_t *lm = &im->lookup_main;
1939 u32 *from, *to_next_drop;
1940 uword n_left_from, n_left_to_next_drop, next_index;
1941 u32 thread_index = vm->thread_index;
1944 if (node->flags & VLIB_NODE_FLAG_TRACE)
1945 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1947 seed = throttle_seed (&im->arp_throttle, thread_index, vlib_time_now (vm));
1949 from = vlib_frame_vector_args (frame);
1950 n_left_from = frame->n_vectors;
1951 next_index = node->cached_next_index;
1952 if (next_index == IP4_ARP_NEXT_DROP)
1953 next_index = IP4_ARP_N_NEXT; /* point to first interface */
1955 while (n_left_from > 0)
1957 vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1958 to_next_drop, n_left_to_next_drop);
1960 while (n_left_from > 0 && n_left_to_next_drop > 0)
1962 u32 pi0, bi0, adj_index0, sw_if_index0;
1963 ip_adjacency_t *adj0;
1964 vlib_buffer_t *p0, *b0;
1965 ip4_address_t resolve0;
1966 ethernet_arp_header_t *h0;
1967 vnet_hw_interface_t *hw_if0;
1971 p0 = vlib_get_buffer (vm, pi0);
1975 to_next_drop[0] = pi0;
1977 n_left_to_next_drop -= 1;
1979 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1980 adj0 = adj_get (adj_index0);
1984 /* resolve the packet's destination */
1985 ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1986 resolve0 = ip0->dst_address;
1990 /* resolve the incomplete adj */
1991 resolve0 = adj0->sub_type.nbr.next_hop.ip4;
1994 /* combine the address and interface for the hash key */
1995 sw_if_index0 = adj0->rewrite_header.sw_if_index;
1996 r0 = (u64) resolve0.data_u32 << 32;
1999 if (throttle_check (&im->arp_throttle, thread_index, r0, seed))
2001 p0->error = node->errors[IP4_ARP_ERROR_THROTTLED];
2006 * the adj has been updated to a rewrite but the node the DPO that got
2007 * us here hasn't - yet. no big deal. we'll drop while we wait.
2009 if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
2011 p0->error = node->errors[IP4_ARP_ERROR_RESOLVED];
2016 * Can happen if the control-plane is programming tables
2017 * with traffic flowing; at least that's today's lame excuse.
2019 if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
2020 || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
2022 p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2025 /* Send ARP request. */
2027 vlib_packet_template_get_packet (vm,
2028 &im->ip4_arp_request_packet_template,
2030 /* Seems we're out of buffers */
2031 if (PREDICT_FALSE (!h0))
2033 p0->error = node->errors[IP4_ARP_ERROR_NO_BUFFERS];
2037 b0 = vlib_get_buffer (vm, bi0);
2039 /* copy the persistent fields from the original */
2040 clib_memcpy_fast (b0->opaque2, p0->opaque2, sizeof (p0->opaque2));
2042 /* Add rewrite/encap string for ARP packet. */
2043 vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
2045 hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2047 /* Src ethernet address in ARP header. */
2048 mac_address_from_bytes (&h0->ip4_over_ethernet[0].mac,
2049 hw_if0->hw_address);
2052 /* The interface's source address is stashed in the Glean Adj */
2053 h0->ip4_over_ethernet[0].ip4 =
2054 adj0->sub_type.glean.receive_addr.ip4;
2058 /* Src IP address in ARP header. */
2059 if (ip4_src_address_for_packet (lm, sw_if_index0,
2060 &h0->ip4_over_ethernet[0].ip4))
2062 /* No source address available */
2063 p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
2064 vlib_buffer_free (vm, &bi0, 1);
2068 h0->ip4_over_ethernet[1].ip4 = resolve0;
2070 p0->error = node->errors[IP4_ARP_ERROR_REQUEST_SENT];
2072 vlib_buffer_copy_trace_flag (vm, p0, bi0);
2073 VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
2074 vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2076 vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2078 vlib_set_next_frame_buffer (vm, node,
2079 adj0->rewrite_header.next_index, bi0);
2082 vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2085 return frame->n_vectors;
2088 VLIB_NODE_FN (ip4_arp_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2089 vlib_frame_t * frame)
2091 return (ip4_arp_inline (vm, node, frame, 0));
2094 VLIB_NODE_FN (ip4_glean_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2095 vlib_frame_t * frame)
2097 return (ip4_arp_inline (vm, node, frame, 1));
2100 static char *ip4_arp_error_strings[] = {
2101 [IP4_ARP_ERROR_THROTTLED] = "ARP requests throttled",
2102 [IP4_ARP_ERROR_RESOLVED] = "ARP requests resolved",
2103 [IP4_ARP_ERROR_NO_BUFFERS] = "ARP requests out of buffer",
2104 [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2105 [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2106 [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
2110 VLIB_REGISTER_NODE (ip4_arp_node) =
2113 .vector_size = sizeof (u32),
2114 .format_trace = format_ip4_forward_next_trace,
2115 .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2116 .error_strings = ip4_arp_error_strings,
2117 .n_next_nodes = IP4_ARP_N_NEXT,
2120 [IP4_ARP_NEXT_DROP] = "error-drop",
2124 VLIB_REGISTER_NODE (ip4_glean_node) =
2126 .name = "ip4-glean",
2127 .vector_size = sizeof (u32),
2128 .format_trace = format_ip4_forward_next_trace,
2129 .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2130 .error_strings = ip4_arp_error_strings,
2131 .n_next_nodes = IP4_ARP_N_NEXT,
2133 [IP4_ARP_NEXT_DROP] = "error-drop",
2138 #define foreach_notrace_ip4_arp_error \
2144 _(NO_SOURCE_ADDRESS)
2146 static clib_error_t *
2147 arp_notrace_init (vlib_main_t * vm)
2149 vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
2151 /* don't trace ARP request packets */
2153 vnet_pcap_drop_trace_filter_add_del \
2154 (rt->errors[IP4_ARP_ERROR_##a], \
2156 foreach_notrace_ip4_arp_error;
2161 VLIB_INIT_FUNCTION (arp_notrace_init);
2164 #ifndef CLIB_MARCH_VARIANT
2165 /* Send an ARP request to see if given destination is reachable on given interface. */
2167 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index,
2170 vnet_main_t *vnm = vnet_get_main ();
2171 ip4_main_t *im = &ip4_main;
2172 ethernet_arp_header_t *h;
2174 ip_interface_address_t *ia;
2175 ip_adjacency_t *adj;
2176 vnet_hw_interface_t *hi;
2177 vnet_sw_interface_t *si;
2181 u8 unicast_rewrite = 0;
2183 si = vnet_get_sw_interface (vnm, sw_if_index);
2185 if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2187 return clib_error_return (0, "%U: interface %U down",
2188 format_ip4_address, dst,
2189 format_vnet_sw_if_index_name, vnm,
2194 ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2197 vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2198 return clib_error_return
2200 "no matching interface address for destination %U (interface %U)",
2201 format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
2205 h = vlib_packet_template_get_packet (vm,
2206 &im->ip4_arp_request_packet_template,
2210 return clib_error_return (0, "ARP request packet allocation failed");
2212 hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2213 if (PREDICT_FALSE (!hi->hw_address))
2215 return clib_error_return (0, "%U: interface %U do not support ip probe",
2216 format_ip4_address, dst,
2217 format_vnet_sw_if_index_name, vnm,
2221 mac_address_from_bytes (&h->ip4_over_ethernet[0].mac, hi->hw_address);
2223 h->ip4_over_ethernet[0].ip4 = src[0];
2224 h->ip4_over_ethernet[1].ip4 = dst[0];
2226 b = vlib_get_buffer (vm, bi);
2227 vnet_buffer (b)->sw_if_index[VLIB_RX] =
2228 vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2230 ip46_address_t nh = {
2234 ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4,
2235 VNET_LINK_IP4, &nh, sw_if_index);
2238 /* Peer has been previously resolved, retrieve glean adj instead */
2239 if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE)
2242 unicast_rewrite = 1;
2246 ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4,
2247 VNET_LINK_IP4, sw_if_index, &nh);
2252 /* Add encapsulation string for software interface (e.g. ethernet header). */
2253 vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2254 if (unicast_rewrite)
2256 u16 *etype = vlib_buffer_get_current (b) - 2;
2257 etype[0] = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
2259 vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2262 vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2263 u32 *to_next = vlib_frame_vector_args (f);
2266 vlib_put_frame_to_node (vm, hi->output_node_index, f);
2270 return /* no error */ 0;
2276 IP4_REWRITE_NEXT_DROP,
2277 IP4_REWRITE_NEXT_ICMP_ERROR,
2278 IP4_REWRITE_NEXT_FRAGMENT,
2279 IP4_REWRITE_N_NEXT /* Last */
2280 } ip4_rewrite_next_t;
2283 * This bits of an IPv4 address to mask to construct a multicast
2286 #if CLIB_ARCH_IS_BIG_ENDIAN
2287 #define IP4_MCAST_ADDR_MASK 0x007fffff
2289 #define IP4_MCAST_ADDR_MASK 0xffff7f00
2293 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
2294 u16 adj_packet_bytes, bool df, u16 * next, u32 * error)
2296 if (packet_len > adj_packet_bytes)
2298 *error = IP4_ERROR_MTU_EXCEEDED;
2301 icmp4_error_set_vnet_buffer
2302 (b, ICMP4_destination_unreachable,
2303 ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
2305 *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2309 /* IP fragmentation */
2310 ip_frag_set_vnet_buffer (b, adj_packet_bytes,
2311 IP4_FRAG_NEXT_IP4_REWRITE, 0);
2312 *next = IP4_REWRITE_NEXT_FRAGMENT;
2317 /* Decrement TTL & update checksum.
2318 Works either endian, so no need for byte swap. */
2319 static_always_inline void
2320 ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
2325 if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2327 b->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2333 /* Input node should have reject packets with ttl 0. */
2334 ASSERT (ip->ttl > 0);
2336 checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
2337 checksum += checksum >= 0xffff;
2339 ip->checksum = checksum;
2344 * If the ttl drops below 1 when forwarding, generate
2347 if (PREDICT_FALSE (ttl <= 0))
2349 *error = IP4_ERROR_TIME_EXPIRED;
2350 vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2351 icmp4_error_set_vnet_buffer (b, ICMP4_time_exceeded,
2352 ICMP4_time_exceeded_ttl_exceeded_in_transit,
2354 *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2357 /* Verify checksum. */
2358 ASSERT ((ip->checksum == ip4_header_checksum (ip)) ||
2359 (b->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2364 ip4_rewrite_inline_with_gso (vlib_main_t * vm,
2365 vlib_node_runtime_t * node,
2366 vlib_frame_t * frame,
2367 int do_counters, int is_midchain, int is_mcast,
2370 ip_lookup_main_t *lm = &ip4_main.lookup_main;
2371 u32 *from = vlib_frame_vector_args (frame);
2372 vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2373 u16 nexts[VLIB_FRAME_SIZE], *next;
2375 vlib_node_runtime_t *error_node =
2376 vlib_node_get_runtime (vm, ip4_input_node.index);
2378 n_left_from = frame->n_vectors;
2379 u32 thread_index = vm->thread_index;
2381 vlib_get_buffers (vm, from, bufs, n_left_from);
2382 clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
2384 if (n_left_from >= 6)
2387 for (i = 2; i < 6; i++)
2388 vlib_prefetch_buffer_header (bufs[i], LOAD);
2393 while (n_left_from >= 8)
2395 ip_adjacency_t *adj0, *adj1;
2396 ip4_header_t *ip0, *ip1;
2397 u32 rw_len0, error0, adj_index0;
2398 u32 rw_len1, error1, adj_index1;
2399 u32 tx_sw_if_index0, tx_sw_if_index1;
2402 vlib_prefetch_buffer_header (b[6], LOAD);
2403 vlib_prefetch_buffer_header (b[7], LOAD);
2405 adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2406 adj_index1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
2409 * pre-fetch the per-adjacency counters
2413 vlib_prefetch_combined_counter (&adjacency_counters,
2414 thread_index, adj_index0);
2415 vlib_prefetch_combined_counter (&adjacency_counters,
2416 thread_index, adj_index1);
2419 ip0 = vlib_buffer_get_current (b[0]);
2420 ip1 = vlib_buffer_get_current (b[1]);
2422 error0 = error1 = IP4_ERROR_NONE;
2424 ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2425 ip4_ttl_and_checksum_check (b[1], ip1, next + 1, &error1);
2427 /* Rewrite packet header and updates lengths. */
2428 adj0 = adj_get (adj_index0);
2429 adj1 = adj_get (adj_index1);
2431 /* Worth pipelining. No guarantee that adj0,1 are hot... */
2432 rw_len0 = adj0[0].rewrite_header.data_bytes;
2433 rw_len1 = adj1[0].rewrite_header.data_bytes;
2434 vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2435 vnet_buffer (b[1])->ip.save_rewrite_length = rw_len1;
2437 p = vlib_buffer_get_current (b[2]);
2438 CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2439 CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2441 p = vlib_buffer_get_current (b[3]);
2442 CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2443 CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2445 /* Check MTU of outgoing interface. */
2446 u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2447 u16 ip1_len = clib_net_to_host_u16 (ip1->length);
2449 if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO))
2450 ip0_len = gso_mtu_sz (b[0]);
2451 if (do_gso && (b[1]->flags & VNET_BUFFER_F_GSO))
2452 ip1_len = gso_mtu_sz (b[1]);
2454 ip4_mtu_check (b[0], ip0_len,
2455 adj0[0].rewrite_header.max_l3_packet_bytes,
2456 ip0->flags_and_fragment_offset &
2457 clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2459 ip4_mtu_check (b[1], ip1_len,
2460 adj1[0].rewrite_header.max_l3_packet_bytes,
2461 ip1->flags_and_fragment_offset &
2462 clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2467 error0 = ((adj0[0].rewrite_header.sw_if_index ==
2468 vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2469 IP4_ERROR_SAME_INTERFACE : error0);
2470 error1 = ((adj1[0].rewrite_header.sw_if_index ==
2471 vnet_buffer (b[1])->sw_if_index[VLIB_RX]) ?
2472 IP4_ERROR_SAME_INTERFACE : error1);
2475 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2476 * to see the IP header */
2477 if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2479 u32 next_index = adj0[0].rewrite_header.next_index;
2480 vlib_buffer_advance (b[0], -(word) rw_len0);
2481 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2482 vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2485 (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2486 vnet_feature_arc_start (lm->output_feature_arc_index,
2487 tx_sw_if_index0, &next_index, b[0]);
2488 next[0] = next_index;
2492 b[0]->error = error_node->errors[error0];
2494 if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2496 u32 next_index = adj1[0].rewrite_header.next_index;
2497 vlib_buffer_advance (b[1], -(word) rw_len1);
2499 tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2500 vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2503 (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2504 vnet_feature_arc_start (lm->output_feature_arc_index,
2505 tx_sw_if_index1, &next_index, b[1]);
2506 next[1] = next_index;
2510 b[1]->error = error_node->errors[error1];
2514 calc_checksums (vm, b[0]);
2515 calc_checksums (vm, b[1]);
2517 /* Guess we are only writing on simple Ethernet header. */
2518 vnet_rewrite_two_headers (adj0[0], adj1[0],
2519 ip0, ip1, sizeof (ethernet_header_t));
2522 * Bump the per-adjacency counters
2526 vlib_increment_combined_counter
2527 (&adjacency_counters,
2529 adj_index0, 1, vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2531 vlib_increment_combined_counter
2532 (&adjacency_counters,
2534 adj_index1, 1, vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
2539 if (adj0->sub_type.midchain.fixup_func)
2540 adj0->sub_type.midchain.fixup_func
2541 (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2542 if (adj1->sub_type.midchain.fixup_func)
2543 adj1->sub_type.midchain.fixup_func
2544 (vm, adj1, b[1], adj1->sub_type.midchain.fixup_data);
2550 * copy bytes from the IP address into the MAC rewrite
2552 vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2553 adj0->rewrite_header.dst_mcast_offset,
2554 &ip0->dst_address.as_u32, (u8 *) ip0);
2555 vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2556 adj1->rewrite_header.dst_mcast_offset,
2557 &ip1->dst_address.as_u32, (u8 *) ip1);
2565 while (n_left_from > 0)
2567 ip_adjacency_t *adj0;
2569 u32 rw_len0, adj_index0, error0;
2570 u32 tx_sw_if_index0;
2572 adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2574 adj0 = adj_get (adj_index0);
2577 vlib_prefetch_combined_counter (&adjacency_counters,
2578 thread_index, adj_index0);
2580 ip0 = vlib_buffer_get_current (b[0]);
2582 error0 = IP4_ERROR_NONE;
2584 ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2587 /* Update packet buffer attributes/set output interface. */
2588 rw_len0 = adj0[0].rewrite_header.data_bytes;
2589 vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2591 /* Check MTU of outgoing interface. */
2592 u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2593 if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO))
2594 ip0_len = gso_mtu_sz (b[0]);
2596 ip4_mtu_check (b[0], ip0_len,
2597 adj0[0].rewrite_header.max_l3_packet_bytes,
2598 ip0->flags_and_fragment_offset &
2599 clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2604 error0 = ((adj0[0].rewrite_header.sw_if_index ==
2605 vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2606 IP4_ERROR_SAME_INTERFACE : error0);
2609 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2610 * to see the IP header */
2611 if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2613 u32 next_index = adj0[0].rewrite_header.next_index;
2614 vlib_buffer_advance (b[0], -(word) rw_len0);
2615 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2616 vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2619 (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2620 vnet_feature_arc_start (lm->output_feature_arc_index,
2621 tx_sw_if_index0, &next_index, b[0]);
2622 next[0] = next_index;
2626 b[0]->error = error_node->errors[error0];
2630 calc_checksums (vm, b[0]);
2632 /* Guess we are only writing on simple Ethernet header. */
2633 vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2636 vlib_increment_combined_counter
2637 (&adjacency_counters,
2638 thread_index, adj_index0, 1,
2639 vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2643 if (adj0->sub_type.midchain.fixup_func)
2644 adj0->sub_type.midchain.fixup_func
2645 (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2651 * copy bytes from the IP address into the MAC rewrite
2653 vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2654 adj0->rewrite_header.dst_mcast_offset,
2655 &ip0->dst_address.as_u32, (u8 *) ip0);
2664 /* Need to do trace after rewrites to pick up new packet data. */
2665 if (node->flags & VLIB_NODE_FLAG_TRACE)
2666 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2668 vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2669 return frame->n_vectors;
2673 ip4_rewrite_inline (vlib_main_t * vm,
2674 vlib_node_runtime_t * node,
2675 vlib_frame_t * frame,
2676 int do_counters, int is_midchain, int is_mcast)
2678 vnet_main_t *vnm = vnet_get_main ();
2679 if (PREDICT_FALSE (vnm->interface_main.gso_interface_count > 0))
2680 return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2681 is_midchain, is_mcast,
2684 return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2685 is_midchain, is_mcast,
2686 0 /* no do_gso */ );
2690 /** @brief IPv4 rewrite node.
2693 This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2694 header checksum, fetch the ip adjacency, check the outbound mtu,
2695 apply the adjacency rewrite, and send pkts to the adjacency
2696 rewrite header's rewrite_next_index.
2698 @param vm vlib_main_t corresponding to the current thread
2699 @param node vlib_node_runtime_t
2700 @param frame vlib_frame_t whose contents should be dispatched
2702 @par Graph mechanics: buffer metadata, next index usage
2705 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2706 - the rewrite adjacency index
2707 - <code>adj->lookup_next_index</code>
2708 - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2709 the packet will be dropped.
2710 - <code>adj->rewrite_header</code>
2711 - Rewrite string length, rewrite string, next_index
2714 - <code>b->current_data, b->current_length</code>
2715 - Updated net of applying the rewrite string
2717 <em>Next Indices:</em>
2718 - <code> adj->rewrite_header.next_index </code>
2722 VLIB_NODE_FN (ip4_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2723 vlib_frame_t * frame)
2725 if (adj_are_counters_enabled ())
2726 return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2728 return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2731 VLIB_NODE_FN (ip4_rewrite_bcast_node) (vlib_main_t * vm,
2732 vlib_node_runtime_t * node,
2733 vlib_frame_t * frame)
2735 if (adj_are_counters_enabled ())
2736 return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2738 return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2741 VLIB_NODE_FN (ip4_midchain_node) (vlib_main_t * vm,
2742 vlib_node_runtime_t * node,
2743 vlib_frame_t * frame)
2745 if (adj_are_counters_enabled ())
2746 return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2748 return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2751 VLIB_NODE_FN (ip4_rewrite_mcast_node) (vlib_main_t * vm,
2752 vlib_node_runtime_t * node,
2753 vlib_frame_t * frame)
2755 if (adj_are_counters_enabled ())
2756 return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2758 return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2761 VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
2762 vlib_node_runtime_t * node,
2763 vlib_frame_t * frame)
2765 if (adj_are_counters_enabled ())
2766 return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2768 return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2772 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2773 .name = "ip4-rewrite",
2774 .vector_size = sizeof (u32),
2776 .format_trace = format_ip4_rewrite_trace,
2778 .n_next_nodes = IP4_REWRITE_N_NEXT,
2780 [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2781 [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2782 [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2786 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2787 .name = "ip4-rewrite-bcast",
2788 .vector_size = sizeof (u32),
2790 .format_trace = format_ip4_rewrite_trace,
2791 .sibling_of = "ip4-rewrite",
2794 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2795 .name = "ip4-rewrite-mcast",
2796 .vector_size = sizeof (u32),
2798 .format_trace = format_ip4_rewrite_trace,
2799 .sibling_of = "ip4-rewrite",
2802 VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
2803 .name = "ip4-mcast-midchain",
2804 .vector_size = sizeof (u32),
2806 .format_trace = format_ip4_rewrite_trace,
2807 .sibling_of = "ip4-rewrite",
2810 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2811 .name = "ip4-midchain",
2812 .vector_size = sizeof (u32),
2813 .format_trace = format_ip4_forward_next_trace,
2814 .sibling_of = "ip4-rewrite",
2819 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2821 ip4_fib_mtrie_t *mtrie0;
2822 ip4_fib_mtrie_leaf_t leaf0;
2825 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2827 leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2828 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2829 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2831 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2833 return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2836 static clib_error_t *
2837 test_lookup_command_fn (vlib_main_t * vm,
2838 unformat_input_t * input, vlib_cli_command_t * cmd)
2845 ip4_address_t ip4_base_address;
2848 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2850 if (unformat (input, "table %d", &table_id))
2852 /* Make sure the entry exists. */
2853 fib = ip4_fib_get (table_id);
2854 if ((fib) && (fib->index != table_id))
2855 return clib_error_return (0, "<fib-index> %d does not exist",
2858 else if (unformat (input, "count %f", &count))
2861 else if (unformat (input, "%U",
2862 unformat_ip4_address, &ip4_base_address))
2865 return clib_error_return (0, "unknown input `%U'",
2866 format_unformat_error, input);
2871 for (i = 0; i < n; i++)
2873 if (!ip4_lookup_validate (&ip4_base_address, table_id))
2876 ip4_base_address.as_u32 =
2877 clib_host_to_net_u32 (1 +
2878 clib_net_to_host_u32 (ip4_base_address.as_u32));
2882 vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2884 vlib_cli_output (vm, "No errors in %d lookups\n", n);
2890 * Perform a lookup of an IPv4 Address (or range of addresses) in the
2891 * given FIB table to determine if there is a conflict with the
2892 * adjacency table. The fib-id can be determined by using the
2893 * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2896 * @todo This command uses fib-id, other commands use table-id (not
2897 * just a name, they are different indexes). Would like to change this
2898 * to table-id for consistency.
2901 * Example of how to run the test lookup command:
2902 * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2903 * No errors in 2 lookups
2907 VLIB_CLI_COMMAND (lookup_test_command, static) =
2909 .path = "test lookup",
2910 .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2911 .function = test_lookup_command_fn,
2915 #ifndef CLIB_MARCH_VARIANT
2917 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2921 fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
2923 if (~0 == fib_index)
2924 return VNET_API_ERROR_NO_SUCH_FIB;
2926 fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
2933 static clib_error_t *
2934 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2935 unformat_input_t * input,
2936 vlib_cli_command_t * cmd)
2940 u32 flow_hash_config = 0;
2943 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2945 if (unformat (input, "table %d", &table_id))
2948 else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2949 foreach_flow_hash_bit
2956 return clib_error_return (0, "unknown input `%U'",
2957 format_unformat_error, input);
2959 rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
2965 case VNET_API_ERROR_NO_SUCH_FIB:
2966 return clib_error_return (0, "no such FIB table %d", table_id);
2969 clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2977 * Configure the set of IPv4 fields used by the flow hash.
2980 * Example of how to set the flow hash on a given table:
2981 * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2982 * Example of display the configured flow hash:
2983 * @cliexstart{show ip fib}
2984 * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2987 * [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2988 * [0] [@0]: dpo-drop ip6
2991 * [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2992 * [0] [@0]: dpo-drop ip6
2995 * [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2996 * [0] [@0]: dpo-drop ip6
2999 * [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
3000 * [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3003 * [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
3004 * [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3005 * [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3006 * [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3007 * [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3010 * [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
3011 * [0] [@0]: dpo-drop ip6
3012 * 255.255.255.255/32
3014 * [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
3015 * [0] [@0]: dpo-drop ip6
3016 * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
3019 * [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
3020 * [0] [@0]: dpo-drop ip6
3023 * [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
3024 * [0] [@0]: dpo-drop ip6
3027 * [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
3028 * [0] [@4]: ipv4-glean: af_packet0
3031 * [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
3032 * [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
3035 * [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
3036 * [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
3039 * [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
3040 * [0] [@4]: ipv4-glean: af_packet1
3043 * [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
3044 * [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
3047 * [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
3048 * [0] [@0]: dpo-drop ip6
3051 * [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
3052 * [0] [@0]: dpo-drop ip6
3053 * 255.255.255.255/32
3055 * [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
3056 * [0] [@0]: dpo-drop ip6
3060 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
3062 .path = "set ip flow-hash",
3064 "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
3065 .function = set_ip_flow_hash_command_fn,
3069 #ifndef CLIB_MARCH_VARIANT
3071 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3074 vnet_main_t *vnm = vnet_get_main ();
3075 vnet_interface_main_t *im = &vnm->interface_main;
3076 ip4_main_t *ipm = &ip4_main;
3077 ip_lookup_main_t *lm = &ipm->lookup_main;
3078 vnet_classify_main_t *cm = &vnet_classify_main;
3079 ip4_address_t *if_addr;
3081 if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3082 return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3084 if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3085 return VNET_API_ERROR_NO_SUCH_ENTRY;
3087 vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3088 lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
3090 if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
3092 if (NULL != if_addr)
3094 fib_prefix_t pfx = {
3096 .fp_proto = FIB_PROTOCOL_IP4,
3097 .fp_addr.ip4 = *if_addr,
3101 fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
3105 if (table_index != (u32) ~ 0)
3107 dpo_id_t dpo = DPO_INVALID;
3112 classify_dpo_create (DPO_PROTO_IP4, table_index));
3114 fib_table_entry_special_dpo_add (fib_index,
3116 FIB_SOURCE_CLASSIFY,
3117 FIB_ENTRY_FLAG_NONE, &dpo);
3122 fib_table_entry_special_remove (fib_index,
3123 &pfx, FIB_SOURCE_CLASSIFY);
3131 static clib_error_t *
3132 set_ip_classify_command_fn (vlib_main_t * vm,
3133 unformat_input_t * input,
3134 vlib_cli_command_t * cmd)
3136 u32 table_index = ~0;
3137 int table_index_set = 0;
3138 u32 sw_if_index = ~0;
3141 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3143 if (unformat (input, "table-index %d", &table_index))
3144 table_index_set = 1;
3145 else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3146 vnet_get_main (), &sw_if_index))
3152 if (table_index_set == 0)
3153 return clib_error_return (0, "classify table-index must be specified");
3155 if (sw_if_index == ~0)
3156 return clib_error_return (0, "interface / subif must be specified");
3158 rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3165 case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3166 return clib_error_return (0, "No such interface");
3168 case VNET_API_ERROR_NO_SUCH_ENTRY:
3169 return clib_error_return (0, "No such classifier table");
3175 * Assign a classification table to an interface. The classification
3176 * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3177 * commands. Once the table is create, use this command to filter packets
3181 * Example of how to assign a classification table to an interface:
3182 * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3185 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
3187 .path = "set ip classify",
3189 "set ip classify intfc <interface> table-index <classify-idx>",
3190 .function = set_ip_classify_command_fn,
3194 static clib_error_t *
3195 ip4_config (vlib_main_t * vm, unformat_input_t * input)
3197 ip4_main_t *im = &ip4_main;
3200 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3202 if (unformat (input, "heap-size %U", unformat_memory_size, &heapsize))
3205 return clib_error_return (0,
3206 "invalid heap-size parameter `%U'",
3207 format_unformat_error, input);
3210 im->mtrie_heap_size = heapsize;
3215 VLIB_EARLY_CONFIG_FUNCTION (ip4_config, "ip");
3218 * fd.io coding-style-patch-verification: ON
3221 * eval: (c-set-style "gnu")