2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * ip/ip4_forward.c: IP v4 forwarding
18 * Copyright (c) 2008 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h> /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h> /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h> /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h> /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/dpo/load_balance.h>
53 #include <vnet/dpo/load_balance_map.h>
54 #include <vnet/dpo/classify_dpo.h>
55 #include <vnet/mfib/mfib_table.h> /* for mFIB table and entry creation */
57 #include <vnet/ip/ip4_forward.h>
58 #include <vnet/interface_output.h>
60 /** @brief IPv4 lookup node.
63 This is the main IPv4 lookup dispatch node.
65 @param vm vlib_main_t corresponding to the current thread
66 @param node vlib_node_runtime_t
67 @param frame vlib_frame_t whose contents should be dispatched
69 @par Graph mechanics: buffer metadata, next index usage
72 - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
73 - Indicates the @c sw_if_index value of the interface that the
74 packet was received on.
75 - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
76 - When the value is @c ~0 then the node performs a longest prefix
77 match (LPM) for the packet destination address in the FIB attached
78 to the receive interface.
79 - Otherwise perform LPM for the packet destination address in the
80 indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
81 value (0, 1, ...) and not a VRF id.
84 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
85 - The lookup result adjacency index.
88 - Dispatches the packet to the node index found in
89 ip_adjacency_t @c adj->lookup_next_index
90 (where @c adj is the lookup result adjacency).
92 VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
95 return ip4_lookup_inline (vm, node, frame);
98 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
101 VLIB_REGISTER_NODE (ip4_lookup_node) =
103 .name = "ip4-lookup",
104 .vector_size = sizeof (u32),
105 .format_trace = format_ip4_lookup_trace,
106 .n_next_nodes = IP_LOOKUP_N_NEXT,
107 .next_nodes = IP4_LOOKUP_NEXT_NODES,
111 VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
112 vlib_node_runtime_t * node,
113 vlib_frame_t * frame)
115 vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
117 u32 thread_index = vm->thread_index;
118 vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
119 u16 nexts[VLIB_FRAME_SIZE], *next;
121 from = vlib_frame_vector_args (frame);
122 n_left = frame->n_vectors;
125 vlib_get_buffers (vm, from, bufs, n_left);
129 const load_balance_t *lb0, *lb1;
130 const ip4_header_t *ip0, *ip1;
131 u32 lbi0, hc0, lbi1, hc1;
132 const dpo_id_t *dpo0, *dpo1;
134 /* Prefetch next iteration. */
136 vlib_prefetch_buffer_header (b[2], LOAD);
137 vlib_prefetch_buffer_header (b[3], LOAD);
139 CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), LOAD);
140 CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), LOAD);
143 ip0 = vlib_buffer_get_current (b[0]);
144 ip1 = vlib_buffer_get_current (b[1]);
145 lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
146 lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
148 lb0 = load_balance_get (lbi0);
149 lb1 = load_balance_get (lbi1);
152 * this node is for via FIBs we can re-use the hash value from the
153 * to node if present.
154 * We don't want to use the same hash value at each level in the recursion
155 * graph as that would lead to polarisation
159 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
161 if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
163 hc0 = vnet_buffer (b[0])->ip.flow_hash =
164 vnet_buffer (b[0])->ip.flow_hash >> 1;
168 hc0 = vnet_buffer (b[0])->ip.flow_hash =
169 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
171 dpo0 = load_balance_get_fwd_bucket
172 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
176 dpo0 = load_balance_get_bucket_i (lb0, 0);
178 if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
180 if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
182 hc1 = vnet_buffer (b[1])->ip.flow_hash =
183 vnet_buffer (b[1])->ip.flow_hash >> 1;
187 hc1 = vnet_buffer (b[1])->ip.flow_hash =
188 ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
190 dpo1 = load_balance_get_fwd_bucket
191 (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
195 dpo1 = load_balance_get_bucket_i (lb1, 0);
198 next[0] = dpo0->dpoi_next_node;
199 next[1] = dpo1->dpoi_next_node;
201 vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
202 vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
204 vlib_increment_combined_counter
205 (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
206 vlib_increment_combined_counter
207 (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
216 const load_balance_t *lb0;
217 const ip4_header_t *ip0;
218 const dpo_id_t *dpo0;
221 ip0 = vlib_buffer_get_current (b[0]);
222 lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
224 lb0 = load_balance_get (lbi0);
227 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
229 if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
231 hc0 = vnet_buffer (b[0])->ip.flow_hash =
232 vnet_buffer (b[0])->ip.flow_hash >> 1;
236 hc0 = vnet_buffer (b[0])->ip.flow_hash =
237 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
239 dpo0 = load_balance_get_fwd_bucket
240 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
244 dpo0 = load_balance_get_bucket_i (lb0, 0);
247 next[0] = dpo0->dpoi_next_node;
248 vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
250 vlib_increment_combined_counter
251 (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
258 vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
259 if (node->flags & VLIB_NODE_FLAG_TRACE)
260 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
262 return frame->n_vectors;
266 VLIB_REGISTER_NODE (ip4_load_balance_node) =
268 .name = "ip4-load-balance",
269 .vector_size = sizeof (u32),
270 .sibling_of = "ip4-lookup",
271 .format_trace = format_ip4_lookup_trace,
275 #ifndef CLIB_MARCH_VARIANT
276 /* get first interface address */
278 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
279 ip_interface_address_t ** result_ia)
281 ip_lookup_main_t *lm = &im->lookup_main;
282 ip_interface_address_t *ia = 0;
283 ip4_address_t *result = 0;
286 foreach_ip_interface_address
287 (lm, ia, sw_if_index,
288 1 /* honor unnumbered */ ,
291 ip_interface_address_get_address (lm, ia);
297 *result_ia = result ? ia : 0;
303 ip4_add_subnet_bcast_route (u32 fib_index,
307 vnet_sw_interface_flags_t iflags;
309 iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
311 fib_table_entry_special_remove(fib_index,
313 FIB_SOURCE_INTERFACE);
315 if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
317 fib_table_entry_update_one_path (fib_index, pfx,
318 FIB_SOURCE_INTERFACE,
321 /* No next-hop address */
327 // no out-label stack
329 FIB_ROUTE_PATH_FLAG_NONE);
333 fib_table_entry_special_add(fib_index,
335 FIB_SOURCE_INTERFACE,
336 (FIB_ENTRY_FLAG_DROP |
337 FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
342 ip4_add_interface_prefix_routes (ip4_main_t *im,
345 ip_interface_address_t * a)
347 ip_lookup_main_t *lm = &im->lookup_main;
348 ip_interface_prefix_t *if_prefix;
349 ip4_address_t *address = ip_interface_address_get_address (lm, a);
351 ip_interface_prefix_key_t key = {
353 .fp_len = a->address_length,
354 .fp_proto = FIB_PROTOCOL_IP4,
355 .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[a->address_length],
357 .sw_if_index = sw_if_index,
360 fib_prefix_t pfx_special = {
361 .fp_proto = FIB_PROTOCOL_IP4,
364 /* If prefix already set on interface, just increment ref count & return */
365 if_prefix = ip_get_interface_prefix (lm, &key);
368 if_prefix->ref_count += 1;
372 /* New prefix - allocate a pool entry, initialize it, add to the hash */
373 pool_get (lm->if_prefix_pool, if_prefix);
374 if_prefix->ref_count = 1;
375 if_prefix->src_ia_index = a - lm->if_address_pool;
376 clib_memcpy (&if_prefix->key, &key, sizeof (key));
377 mhash_set (&lm->prefix_to_if_prefix_index, &key,
378 if_prefix - lm->if_prefix_pool, 0 /* old value */);
380 /* length <= 30 - add glean, drop first address, maybe drop bcast address */
381 if (a->address_length <= 30)
383 pfx_special.fp_len = a->address_length;
384 pfx_special.fp_addr.ip4.as_u32 = address->as_u32;
386 /* set the glean route for the prefix */
387 fib_table_entry_update_one_path (fib_index, &pfx_special,
388 FIB_SOURCE_INTERFACE,
389 (FIB_ENTRY_FLAG_CONNECTED |
390 FIB_ENTRY_FLAG_ATTACHED),
392 /* No next-hop address */
395 /* invalid FIB index */
398 /* no out-label stack */
400 FIB_ROUTE_PATH_FLAG_NONE);
402 /* set a drop route for the base address of the prefix */
403 pfx_special.fp_len = 32;
404 pfx_special.fp_addr.ip4.as_u32 =
405 address->as_u32 & im->fib_masks[a->address_length];
407 if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
408 fib_table_entry_special_add (fib_index, &pfx_special,
409 FIB_SOURCE_INTERFACE,
410 (FIB_ENTRY_FLAG_DROP |
411 FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
413 /* set a route for the broadcast address of the prefix */
414 pfx_special.fp_len = 32;
415 pfx_special.fp_addr.ip4.as_u32 =
416 address->as_u32 | ~im->fib_masks[a->address_length];
417 if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
418 ip4_add_subnet_bcast_route (fib_index, &pfx_special, sw_if_index);
422 /* length == 31 - add an attached route for the other address */
423 else if (a->address_length == 31)
425 pfx_special.fp_len = 32;
426 pfx_special.fp_addr.ip4.as_u32 =
427 address->as_u32 ^ clib_host_to_net_u32(1);
429 fib_table_entry_update_one_path (fib_index, &pfx_special,
430 FIB_SOURCE_INTERFACE,
431 (FIB_ENTRY_FLAG_ATTACHED),
433 &pfx_special.fp_addr,
435 /* invalid FIB index */
439 FIB_ROUTE_PATH_FLAG_NONE);
444 ip4_add_interface_routes (u32 sw_if_index,
445 ip4_main_t * im, u32 fib_index,
446 ip_interface_address_t * a)
448 ip_lookup_main_t *lm = &im->lookup_main;
449 ip4_address_t *address = ip_interface_address_get_address (lm, a);
452 .fp_proto = FIB_PROTOCOL_IP4,
453 .fp_addr.ip4 = *address,
456 /* set special routes for the prefix if needed */
457 ip4_add_interface_prefix_routes (im, sw_if_index, fib_index, a);
459 if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
461 u32 classify_table_index =
462 lm->classify_table_index_by_sw_if_index[sw_if_index];
463 if (classify_table_index != (u32) ~ 0)
465 dpo_id_t dpo = DPO_INVALID;
470 classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
472 fib_table_entry_special_dpo_add (fib_index,
475 FIB_ENTRY_FLAG_NONE, &dpo);
480 fib_table_entry_update_one_path (fib_index, &pfx,
481 FIB_SOURCE_INTERFACE,
482 (FIB_ENTRY_FLAG_CONNECTED |
483 FIB_ENTRY_FLAG_LOCAL),
490 FIB_ROUTE_PATH_FLAG_NONE);
494 ip4_del_interface_prefix_routes (ip4_main_t * im,
497 ip4_address_t * address,
500 ip_lookup_main_t *lm = &im->lookup_main;
501 ip_interface_prefix_t *if_prefix;
503 ip_interface_prefix_key_t key = {
505 .fp_len = address_length,
506 .fp_proto = FIB_PROTOCOL_IP4,
507 .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[address_length],
509 .sw_if_index = sw_if_index,
512 fib_prefix_t pfx_special = {
514 .fp_proto = FIB_PROTOCOL_IP4,
517 if_prefix = ip_get_interface_prefix (lm, &key);
520 clib_warning ("Prefix not found while deleting %U",
521 format_ip4_address_and_length, address, address_length);
525 if_prefix->ref_count -= 1;
528 * Routes need to be adjusted if:
529 * - deleting last intf addr in prefix
530 * - deleting intf addr used as default source address in glean adjacency
532 * We're done now otherwise
534 if ((if_prefix->ref_count > 0) &&
535 !pool_is_free_index (lm->if_address_pool, if_prefix->src_ia_index))
538 /* length <= 30, delete glean route, first address, last address */
539 if (address_length <= 30)
542 /* remove glean route for prefix */
543 pfx_special.fp_addr.ip4 = *address;
544 pfx_special.fp_len = address_length;
545 fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
547 /* if no more intf addresses in prefix, remove other special routes */
548 if (!if_prefix->ref_count)
550 /* first address in prefix */
551 pfx_special.fp_addr.ip4.as_u32 =
552 address->as_u32 & im->fib_masks[address_length];
553 pfx_special.fp_len = 32;
555 if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
556 fib_table_entry_special_remove (fib_index,
558 FIB_SOURCE_INTERFACE);
560 /* prefix broadcast address */
561 pfx_special.fp_addr.ip4.as_u32 =
562 address->as_u32 | ~im->fib_masks[address_length];
563 pfx_special.fp_len = 32;
565 if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
566 fib_table_entry_special_remove (fib_index,
568 FIB_SOURCE_INTERFACE);
571 /* default source addr just got deleted, find another */
573 ip_interface_address_t *new_src_ia = NULL;
574 ip4_address_t *new_src_addr = NULL;
577 ip4_interface_address_matching_destination
578 (im, address, sw_if_index, &new_src_ia);
580 if_prefix->src_ia_index = new_src_ia - lm->if_address_pool;
582 pfx_special.fp_len = address_length;
583 pfx_special.fp_addr.ip4 = *new_src_addr;
585 /* set new glean route for the prefix */
586 fib_table_entry_update_one_path (fib_index, &pfx_special,
587 FIB_SOURCE_INTERFACE,
588 (FIB_ENTRY_FLAG_CONNECTED |
589 FIB_ENTRY_FLAG_ATTACHED),
591 /* No next-hop address */
594 /* invalid FIB index */
597 /* no out-label stack */
599 FIB_ROUTE_PATH_FLAG_NONE);
603 /* length == 31, delete attached route for the other address */
604 else if (address_length == 31)
606 pfx_special.fp_addr.ip4.as_u32 =
607 address->as_u32 ^ clib_host_to_net_u32(1);
609 fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
612 mhash_unset (&lm->prefix_to_if_prefix_index, &key, 0 /* old_value */);
613 pool_put (lm->if_prefix_pool, if_prefix);
617 ip4_del_interface_routes (u32 sw_if_index,
620 ip4_address_t * address, u32 address_length)
623 .fp_len = address_length,
624 .fp_proto = FIB_PROTOCOL_IP4,
625 .fp_addr.ip4 = *address,
628 ip4_del_interface_prefix_routes (im, sw_if_index, fib_index,
629 address, address_length);
632 fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
635 #ifndef CLIB_MARCH_VARIANT
637 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
639 ip4_main_t *im = &ip4_main;
641 vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
644 * enable/disable only on the 1<->0 transition
648 if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
653 ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
654 if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
657 vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
661 vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
662 sw_if_index, !is_enable, 0, 0);
665 ip4_enable_disable_interface_callback_t *cb;
666 vec_foreach (cb, im->enable_disable_interface_callbacks)
667 cb->function (im, cb->function_opaque, sw_if_index, is_enable);
671 static clib_error_t *
672 ip4_add_del_interface_address_internal (vlib_main_t * vm,
674 ip4_address_t * address,
675 u32 address_length, u32 is_del)
677 vnet_main_t *vnm = vnet_get_main ();
678 ip4_main_t *im = &ip4_main;
679 ip_lookup_main_t *lm = &im->lookup_main;
680 clib_error_t *error = 0;
681 u32 if_address_index, elts_before;
682 ip4_address_fib_t ip4_af, *addr_fib = 0;
684 /* local0 interface doesn't support IP addressing */
685 if (sw_if_index == 0)
688 clib_error_create ("local0 interface doesn't support IP addressing");
691 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
692 ip4_addr_fib_init (&ip4_af, address,
693 vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
694 vec_add1 (addr_fib, ip4_af);
697 * there is no support for adj-fib handling in the presence of overlapping
698 * subnets on interfaces. Easy fix - disallow overlapping subnets, like
704 /* When adding an address check that it does not conflict
705 with an existing address on any interface in this table. */
706 ip_interface_address_t *ia;
707 vnet_sw_interface_t *sif;
709 pool_foreach(sif, vnm->interface_main.sw_interfaces,
711 if (im->fib_index_by_sw_if_index[sw_if_index] ==
712 im->fib_index_by_sw_if_index[sif->sw_if_index])
714 foreach_ip_interface_address
715 (&im->lookup_main, ia, sif->sw_if_index,
716 0 /* honor unnumbered */ ,
719 ip_interface_address_get_address
720 (&im->lookup_main, ia);
721 if (ip4_destination_matches_route
722 (im, address, x, ia->address_length) ||
723 ip4_destination_matches_route (im,
728 /* an intf may have >1 addr from the same prefix */
729 if ((sw_if_index == sif->sw_if_index) &&
730 (ia->address_length == address_length) &&
731 (x->as_u32 != address->as_u32))
734 /* error if the length or intf was different */
735 vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
739 ("failed to add %U which conflicts with %U for interface %U",
740 format_ip4_address_and_length, address,
742 format_ip4_address_and_length, x,
744 format_vnet_sw_if_index_name, vnm,
753 elts_before = pool_elts (lm->if_address_pool);
755 error = ip_interface_address_add_del
756 (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
760 ip4_sw_interface_enable_disable (sw_if_index, !is_del);
762 /* intf addr routes are added/deleted on admin up/down */
763 if (vnet_sw_interface_is_admin_up (vnm, sw_if_index))
766 ip4_del_interface_routes (sw_if_index,
767 im, ip4_af.fib_index, address,
770 ip4_add_interface_routes (sw_if_index,
771 im, ip4_af.fib_index,
773 (lm->if_address_pool, if_address_index));
776 /* If pool did not grow/shrink: add duplicate address. */
777 if (elts_before != pool_elts (lm->if_address_pool))
779 ip4_add_del_interface_address_callback_t *cb;
780 vec_foreach (cb, im->add_del_interface_address_callbacks)
781 cb->function (im, cb->function_opaque, sw_if_index,
782 address, address_length, if_address_index, is_del);
791 ip4_add_del_interface_address (vlib_main_t * vm,
793 ip4_address_t * address,
794 u32 address_length, u32 is_del)
796 return ip4_add_del_interface_address_internal
797 (vm, sw_if_index, address, address_length, is_del);
801 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
803 ip_interface_address_t *ia;
809 * when directed broadcast is enabled, the subnet braodcast route will forward
810 * packets using an adjacency with a broadcast MAC. otherwise it drops
813 foreach_ip_interface_address(&im->lookup_main, ia,
816 if (ia->address_length <= 30)
820 ipa = ip_interface_address_get_address (&im->lookup_main, ia);
824 .fp_proto = FIB_PROTOCOL_IP4,
826 .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
830 ip4_add_subnet_bcast_route
831 (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
840 static clib_error_t *
841 ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
843 ip4_main_t *im = &ip4_main;
844 ip_interface_address_t *ia;
846 u32 is_admin_up, fib_index;
848 /* Fill in lookup tables with default table (0). */
849 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
851 vec_validate_init_empty (im->
852 lookup_main.if_address_pool_index_by_sw_if_index,
855 is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
857 fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
860 foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
861 0 /* honor unnumbered */,
863 a = ip_interface_address_get_address (&im->lookup_main, ia);
865 ip4_add_interface_routes (sw_if_index,
869 ip4_del_interface_routes (sw_if_index,
871 a, ia->address_length);
878 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
880 /* Built-in ip4 unicast rx feature path definition */
882 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
884 .arc_name = "ip4-unicast",
885 .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
886 .last_in_arc = "ip4-lookup",
887 .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
890 VNET_FEATURE_INIT (ip4_flow_classify, static) =
892 .arc_name = "ip4-unicast",
893 .node_name = "ip4-flow-classify",
894 .runs_before = VNET_FEATURES ("ip4-inacl"),
897 VNET_FEATURE_INIT (ip4_inacl, static) =
899 .arc_name = "ip4-unicast",
900 .node_name = "ip4-inacl",
901 .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
904 VNET_FEATURE_INIT (ip4_source_check_1, static) =
906 .arc_name = "ip4-unicast",
907 .node_name = "ip4-source-check-via-rx",
908 .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
911 VNET_FEATURE_INIT (ip4_source_check_2, static) =
913 .arc_name = "ip4-unicast",
914 .node_name = "ip4-source-check-via-any",
915 .runs_before = VNET_FEATURES ("ip4-policer-classify"),
918 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
920 .arc_name = "ip4-unicast",
921 .node_name = "ip4-source-and-port-range-check-rx",
922 .runs_before = VNET_FEATURES ("ip4-policer-classify"),
925 VNET_FEATURE_INIT (ip4_policer_classify, static) =
927 .arc_name = "ip4-unicast",
928 .node_name = "ip4-policer-classify",
929 .runs_before = VNET_FEATURES ("ipsec4-input-feature"),
932 VNET_FEATURE_INIT (ip4_ipsec, static) =
934 .arc_name = "ip4-unicast",
935 .node_name = "ipsec4-input-feature",
936 .runs_before = VNET_FEATURES ("vpath-input-ip4"),
939 VNET_FEATURE_INIT (ip4_vpath, static) =
941 .arc_name = "ip4-unicast",
942 .node_name = "vpath-input-ip4",
943 .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
946 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
948 .arc_name = "ip4-unicast",
949 .node_name = "ip4-vxlan-bypass",
950 .runs_before = VNET_FEATURES ("ip4-lookup"),
953 VNET_FEATURE_INIT (ip4_not_enabled, static) =
955 .arc_name = "ip4-unicast",
956 .node_name = "ip4-not-enabled",
957 .runs_before = VNET_FEATURES ("ip4-lookup"),
960 VNET_FEATURE_INIT (ip4_lookup, static) =
962 .arc_name = "ip4-unicast",
963 .node_name = "ip4-lookup",
964 .runs_before = 0, /* not before any other features */
967 /* Built-in ip4 multicast rx feature path definition */
968 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
970 .arc_name = "ip4-multicast",
971 .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
972 .last_in_arc = "ip4-mfib-forward-lookup",
973 .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
976 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
978 .arc_name = "ip4-multicast",
979 .node_name = "vpath-input-ip4",
980 .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
983 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
985 .arc_name = "ip4-multicast",
986 .node_name = "ip4-not-enabled",
987 .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
990 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
992 .arc_name = "ip4-multicast",
993 .node_name = "ip4-mfib-forward-lookup",
994 .runs_before = 0, /* last feature */
997 /* Source and port-range check ip4 tx feature path definition */
998 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1000 .arc_name = "ip4-output",
1001 .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
1002 .last_in_arc = "interface-output",
1003 .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1006 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1008 .arc_name = "ip4-output",
1009 .node_name = "ip4-source-and-port-range-check-tx",
1010 .runs_before = VNET_FEATURES ("ip4-outacl"),
1013 VNET_FEATURE_INIT (ip4_outacl, static) =
1015 .arc_name = "ip4-output",
1016 .node_name = "ip4-outacl",
1017 .runs_before = VNET_FEATURES ("ipsec4-output-feature"),
1020 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1022 .arc_name = "ip4-output",
1023 .node_name = "ipsec4-output-feature",
1024 .runs_before = VNET_FEATURES ("interface-output"),
1027 /* Built-in ip4 tx feature path definition */
1028 VNET_FEATURE_INIT (ip4_interface_output, static) =
1030 .arc_name = "ip4-output",
1031 .node_name = "interface-output",
1032 .runs_before = 0, /* not before any other features */
1036 static clib_error_t *
1037 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1039 ip4_main_t *im = &ip4_main;
1041 /* Fill in lookup tables with default table (0). */
1042 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1043 vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
1047 ip4_main_t *im4 = &ip4_main;
1048 ip_lookup_main_t *lm4 = &im4->lookup_main;
1049 ip_interface_address_t *ia = 0;
1050 ip4_address_t *address;
1051 vlib_main_t *vm = vlib_get_main ();
1053 vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
1055 foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
1057 address = ip_interface_address_get_address (lm4, ia);
1058 ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
1063 vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
1066 vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
1067 sw_if_index, is_add, 0, 0);
1069 return /* no error */ 0;
1072 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1074 /* Global IP4 main. */
1075 #ifndef CLIB_MARCH_VARIANT
1076 ip4_main_t ip4_main;
1077 #endif /* CLIB_MARCH_VARIANT */
1079 static clib_error_t *
1080 ip4_lookup_init (vlib_main_t * vm)
1082 ip4_main_t *im = &ip4_main;
1083 clib_error_t *error;
1086 if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1088 if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
1090 if ((error = vlib_call_init_function (vm, fib_module_init)))
1092 if ((error = vlib_call_init_function (vm, mfib_module_init)))
1095 for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1100 m = pow2_mask (i) << (32 - i);
1103 im->fib_masks[i] = clib_host_to_net_u32 (m);
1106 ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1108 /* Create FIB with index 0 and table id of 0. */
1109 fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1110 FIB_SOURCE_DEFAULT_ROUTE);
1111 mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1112 MFIB_SOURCE_DEFAULT_ROUTE);
1116 pn = pg_get_node (ip4_lookup_node.index);
1117 pn->unformat_edit = unformat_pg_ip4_header;
1121 ethernet_arp_header_t h;
1123 clib_memset (&h, 0, sizeof (h));
1125 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1126 #define _8(f,v) h.f = v;
1127 _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1128 _16 (l3_type, ETHERNET_TYPE_IP4);
1129 _8 (n_l2_address_bytes, 6);
1130 _8 (n_l3_address_bytes, 4);
1131 _16 (opcode, ETHERNET_ARP_OPCODE_request);
1135 vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1138 /* alloc chunk size */ 8,
1145 VLIB_INIT_FUNCTION (ip4_lookup_init);
1149 /* Adjacency taken. */
1154 /* Packet data, possibly *after* rewrite. */
1155 u8 packet_data[64 - 1 * sizeof (u32)];
1157 ip4_forward_next_trace_t;
1159 #ifndef CLIB_MARCH_VARIANT
1161 format_ip4_forward_next_trace (u8 * s, va_list * args)
1163 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1164 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1165 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1166 u32 indent = format_get_indent (s);
1167 s = format (s, "%U%U",
1168 format_white_space, indent,
1169 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1175 format_ip4_lookup_trace (u8 * s, va_list * args)
1177 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1178 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1179 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1180 u32 indent = format_get_indent (s);
1182 s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1183 t->fib_index, t->dpo_index, t->flow_hash);
1184 s = format (s, "\n%U%U",
1185 format_white_space, indent,
1186 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1191 format_ip4_rewrite_trace (u8 * s, va_list * args)
1193 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1194 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1195 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1196 u32 indent = format_get_indent (s);
1198 s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1199 t->fib_index, t->dpo_index, format_ip_adjacency,
1200 t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1201 s = format (s, "\n%U%U",
1202 format_white_space, indent,
1203 format_ip_adjacency_packet_data,
1204 t->dpo_index, t->packet_data, sizeof (t->packet_data));
1208 #ifndef CLIB_MARCH_VARIANT
1209 /* Common trace function for all ip4-forward next nodes. */
1211 ip4_forward_next_trace (vlib_main_t * vm,
1212 vlib_node_runtime_t * node,
1213 vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1216 ip4_main_t *im = &ip4_main;
1218 n_left = frame->n_vectors;
1219 from = vlib_frame_vector_args (frame);
1224 vlib_buffer_t *b0, *b1;
1225 ip4_forward_next_trace_t *t0, *t1;
1227 /* Prefetch next iteration. */
1228 vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1229 vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1234 b0 = vlib_get_buffer (vm, bi0);
1235 b1 = vlib_get_buffer (vm, bi1);
1237 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1239 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1240 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1241 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1243 (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1244 (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1245 vec_elt (im->fib_index_by_sw_if_index,
1246 vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1248 clib_memcpy_fast (t0->packet_data,
1249 vlib_buffer_get_current (b0),
1250 sizeof (t0->packet_data));
1252 if (b1->flags & VLIB_BUFFER_IS_TRACED)
1254 t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1255 t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1256 t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1258 (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1259 (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1260 vec_elt (im->fib_index_by_sw_if_index,
1261 vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1262 clib_memcpy_fast (t1->packet_data, vlib_buffer_get_current (b1),
1263 sizeof (t1->packet_data));
1273 ip4_forward_next_trace_t *t0;
1277 b0 = vlib_get_buffer (vm, bi0);
1279 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1281 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1282 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1283 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1285 (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1286 (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1287 vec_elt (im->fib_index_by_sw_if_index,
1288 vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1289 clib_memcpy_fast (t0->packet_data, vlib_buffer_get_current (b0),
1290 sizeof (t0->packet_data));
1297 /* Compute TCP/UDP/ICMP4 checksum in software. */
1299 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1303 u32 ip_header_length, payload_length_host_byte_order;
1304 u32 n_this_buffer, n_bytes_left, n_ip_bytes_this_buffer;
1306 u8 *data_this_buffer;
1309 /* Initialize checksum with ip header. */
1310 ip_header_length = ip4_header_bytes (ip0);
1311 payload_length_host_byte_order =
1312 clib_net_to_host_u16 (ip0->length) - ip_header_length;
1314 clib_host_to_net_u32 (payload_length_host_byte_order +
1315 (ip0->protocol << 16));
1317 if (BITS (uword) == 32)
1320 ip_csum_with_carry (sum0,
1321 clib_mem_unaligned (&ip0->src_address, u32));
1323 ip_csum_with_carry (sum0,
1324 clib_mem_unaligned (&ip0->dst_address, u32));
1328 ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1330 n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1331 data_this_buffer = (u8 *) ip0 + ip_header_length;
1332 n_ip_bytes_this_buffer =
1333 p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data);
1334 if (n_this_buffer + ip_header_length > n_ip_bytes_this_buffer)
1336 n_this_buffer = n_ip_bytes_this_buffer > ip_header_length ?
1337 n_ip_bytes_this_buffer - ip_header_length : 0;
1342 sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1343 n_bytes_left -= n_this_buffer;
1344 if (n_bytes_left == 0)
1347 ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1348 if (!(p0->flags & VLIB_BUFFER_NEXT_PRESENT))
1351 length_odd = (n_this_buffer & 1);
1353 p0 = vlib_get_buffer (vm, p0->next_buffer);
1354 data_this_buffer = vlib_buffer_get_current (p0);
1355 n_this_buffer = clib_min (p0->current_length, n_bytes_left);
1357 if (PREDICT_FALSE (length_odd))
1359 /* Prepend a 0 or the resulting checksum will be incorrect. */
1363 data_this_buffer[0] = 0;
1367 sum16 = ~ip_csum_fold (sum0);
1372 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1374 ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1378 ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1379 || ip0->protocol == IP_PROTOCOL_UDP);
1381 udp0 = (void *) (ip0 + 1);
1382 if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1384 p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1385 | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1389 sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1391 p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1392 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1399 VNET_FEATURE_ARC_INIT (ip4_local) =
1401 .arc_name = "ip4-local",
1402 .start_nodes = VNET_FEATURES ("ip4-local"),
1403 .last_in_arc = "ip4-local-end-of-arc",
1408 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1409 ip4_header_t * ip, u8 is_udp, u8 * error,
1413 flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1414 *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1418 u32 ip_len, udp_len;
1420 udp = ip4_next_header (ip);
1421 /* Verify UDP length. */
1422 ip_len = clib_net_to_host_u16 (ip->length);
1423 udp_len = clib_net_to_host_u16 (udp->length);
1425 len_diff = ip_len - udp_len;
1426 *good_tcp_udp &= len_diff >= 0;
1427 *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1431 #define ip4_local_csum_is_offloaded(_b) \
1432 _b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM \
1433 || _b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM
1435 #define ip4_local_need_csum_check(is_tcp_udp, _b) \
1436 (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED \
1437 || ip4_local_csum_is_offloaded (_b)))
1439 #define ip4_local_csum_is_valid(_b) \
1440 (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT \
1441 || (ip4_local_csum_is_offloaded (_b))) != 0
1444 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1445 ip4_header_t * ih, u8 * error)
1447 u8 is_udp, is_tcp_udp, good_tcp_udp;
1449 is_udp = ih->protocol == IP_PROTOCOL_UDP;
1450 is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1452 if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1453 ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1455 good_tcp_udp = ip4_local_csum_is_valid (b);
1457 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1458 *error = (is_tcp_udp && !good_tcp_udp
1459 ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1463 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1464 ip4_header_t ** ih, u8 * error)
1466 u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1468 is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1469 is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1471 is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1472 is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1474 good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1475 good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1477 if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1478 || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1481 ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1484 ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1488 error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1489 IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1490 error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1491 IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1495 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1496 vlib_buffer_t * b, u16 * next, u8 error,
1497 u8 head_of_feature_arc)
1499 u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1502 *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1503 b->error = error ? error_node->errors[error] : 0;
1504 if (head_of_feature_arc)
1507 if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1509 vnet_feature_arc_start (arc_index,
1510 vnet_buffer (b)->sw_if_index[VLIB_RX],
1523 } ip4_local_last_check_t;
1526 ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
1527 ip4_local_last_check_t * last_check, u8 * error0)
1529 ip4_fib_mtrie_leaf_t leaf0;
1530 ip4_fib_mtrie_t *mtrie0;
1531 const dpo_id_t *dpo0;
1532 load_balance_t *lb0;
1535 vnet_buffer (b)->ip.fib_index =
1536 vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1537 vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1540 * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1541 * adjacency for the destination address (the local interface address).
1542 * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1543 * adjacency for the source address (the remote sender's address)
1545 if (PREDICT_FALSE (last_check->first ||
1546 (last_check->src.as_u32 != ip0->src_address.as_u32)))
1548 mtrie0 = &ip4_fib_get (vnet_buffer (b)->ip.fib_index)->mtrie;
1549 leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1550 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1551 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1552 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1554 vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1555 vnet_buffer (b)->ip.adj_index[VLIB_TX];
1556 vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1558 lb0 = load_balance_get (lbi0);
1559 dpo0 = load_balance_get_bucket_i (lb0, 0);
1562 * Must have a route to source otherwise we drop the packet.
1563 * ip4 broadcasts are accepted, e.g. to make dhcp client work
1566 * - the source is a recieve => it's from us => bogus, do this
1567 * first since it sets a different error code.
1568 * - uRPF check for any route to source - accept if passes.
1569 * - allow packets destined to the broadcast address from unknown sources
1572 *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1573 && dpo0->dpoi_type == DPO_RECEIVE) ?
1574 IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1575 *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1576 && !fib_urpf_check_size (lb0->lb_urpf)
1577 && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1578 IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1580 last_check->src.as_u32 = ip0->src_address.as_u32;
1581 last_check->lbi = lbi0;
1582 last_check->error = *error0;
1586 vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1587 vnet_buffer (b)->ip.adj_index[VLIB_TX];
1588 vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1589 *error0 = last_check->error;
1590 last_check->first = 0;
1595 ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
1596 ip4_local_last_check_t * last_check, u8 * error)
1598 ip4_fib_mtrie_leaf_t leaf[2];
1599 ip4_fib_mtrie_t *mtrie[2];
1600 const dpo_id_t *dpo[2];
1601 load_balance_t *lb[2];
1605 not_last_hit = last_check->first;
1606 not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1607 not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1609 vnet_buffer (b[0])->ip.fib_index =
1610 vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1611 vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1612 vnet_buffer (b[0])->ip.fib_index;
1614 vnet_buffer (b[1])->ip.fib_index =
1615 vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1616 vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1617 vnet_buffer (b[1])->ip.fib_index;
1620 * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1621 * adjacency for the destination address (the local interface address).
1622 * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1623 * adjacency for the source address (the remote sender's address)
1625 if (PREDICT_FALSE (not_last_hit))
1627 mtrie[0] = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie;
1628 mtrie[1] = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie;
1630 leaf[0] = ip4_fib_mtrie_lookup_step_one (mtrie[0], &ip[0]->src_address);
1631 leaf[1] = ip4_fib_mtrie_lookup_step_one (mtrie[1], &ip[1]->src_address);
1633 leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1634 &ip[0]->src_address, 2);
1635 leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1636 &ip[1]->src_address, 2);
1638 leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1639 &ip[0]->src_address, 3);
1640 leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1641 &ip[1]->src_address, 3);
1643 lbi[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf[0]);
1644 lbi[1] = ip4_fib_mtrie_leaf_get_adj_index (leaf[1]);
1646 vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1647 vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1648 vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1650 vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1651 vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1652 vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1654 lb[0] = load_balance_get (lbi[0]);
1655 lb[1] = load_balance_get (lbi[1]);
1657 dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1658 dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1660 error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1661 dpo[0]->dpoi_type == DPO_RECEIVE) ?
1662 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1663 error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1664 !fib_urpf_check_size (lb[0]->lb_urpf) &&
1665 ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1666 ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1668 error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1669 dpo[1]->dpoi_type == DPO_RECEIVE) ?
1670 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1671 error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1672 !fib_urpf_check_size (lb[1]->lb_urpf) &&
1673 ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1674 ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1676 last_check->src.as_u32 = ip[1]->src_address.as_u32;
1677 last_check->lbi = lbi[1];
1678 last_check->error = error[1];
1682 vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1683 vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1684 vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1686 vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1687 vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1688 vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1690 error[0] = last_check->error;
1691 error[1] = last_check->error;
1692 last_check->first = 0;
1696 enum ip_local_packet_type_e
1698 IP_LOCAL_PACKET_TYPE_L4,
1699 IP_LOCAL_PACKET_TYPE_NAT,
1700 IP_LOCAL_PACKET_TYPE_FRAG,
1704 * Determine packet type and next node.
1706 * The expectation is that all packets that are not L4 will skip
1707 * checksums and source checks.
1710 ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
1712 ip_lookup_main_t *lm = &ip4_main.lookup_main;
1714 if (PREDICT_FALSE (ip4_is_fragment (ip)))
1716 *next = IP_LOCAL_NEXT_REASSEMBLY;
1717 return IP_LOCAL_PACKET_TYPE_FRAG;
1719 if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_IS_NATED))
1721 *next = lm->local_next_by_ip_protocol[ip->protocol];
1722 return IP_LOCAL_PACKET_TYPE_NAT;
1725 *next = lm->local_next_by_ip_protocol[ip->protocol];
1726 return IP_LOCAL_PACKET_TYPE_L4;
1730 ip4_local_inline (vlib_main_t * vm,
1731 vlib_node_runtime_t * node,
1732 vlib_frame_t * frame, int head_of_feature_arc)
1734 u32 *from, n_left_from;
1735 vlib_node_runtime_t *error_node =
1736 vlib_node_get_runtime (vm, ip4_input_node.index);
1737 u16 nexts[VLIB_FRAME_SIZE], *next;
1738 vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1739 ip4_header_t *ip[2];
1742 ip4_local_last_check_t last_check = {
1744 * 0.0.0.0 can appear as the source address of an IP packet,
1745 * as can any other address, hence the need to use the 'first'
1746 * member to make sure the .lbi is initialised for the first
1749 .src = {.as_u32 = 0},
1751 .error = IP4_ERROR_UNKNOWN_PROTOCOL,
1755 from = vlib_frame_vector_args (frame);
1756 n_left_from = frame->n_vectors;
1758 if (node->flags & VLIB_NODE_FLAG_TRACE)
1759 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1761 vlib_get_buffers (vm, from, bufs, n_left_from);
1765 while (n_left_from >= 6)
1769 /* Prefetch next iteration. */
1771 vlib_prefetch_buffer_header (b[4], LOAD);
1772 vlib_prefetch_buffer_header (b[5], LOAD);
1774 CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1775 CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1778 error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1780 ip[0] = vlib_buffer_get_current (b[0]);
1781 ip[1] = vlib_buffer_get_current (b[1]);
1783 vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1784 vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1786 pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1787 pt[1] = ip4_local_classify (b[1], ip[1], &next[1]);
1789 not_batch = pt[0] ^ pt[1];
1791 if (head_of_feature_arc == 0 || (pt[0] && not_batch == 0))
1794 if (PREDICT_TRUE (not_batch == 0))
1796 ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1797 ip4_local_check_src_x2 (b, ip, &last_check, error);
1803 ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1804 ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1808 ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1809 ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
1815 ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1816 head_of_feature_arc);
1817 ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1818 head_of_feature_arc);
1825 while (n_left_from > 0)
1827 error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1829 ip[0] = vlib_buffer_get_current (b[0]);
1830 vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1831 pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1833 if (head_of_feature_arc == 0 || pt[0])
1836 ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1837 ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1841 ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1842 head_of_feature_arc);
1849 vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1850 return frame->n_vectors;
1853 VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1854 vlib_frame_t * frame)
1856 return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1860 VLIB_REGISTER_NODE (ip4_local_node) =
1862 .name = "ip4-local",
1863 .vector_size = sizeof (u32),
1864 .format_trace = format_ip4_forward_next_trace,
1865 .n_next_nodes = IP_LOCAL_N_NEXT,
1868 [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1869 [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1870 [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1871 [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1872 [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-reassembly",
1878 VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
1879 vlib_node_runtime_t * node,
1880 vlib_frame_t * frame)
1882 return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1886 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
1887 .name = "ip4-local-end-of-arc",
1888 .vector_size = sizeof (u32),
1890 .format_trace = format_ip4_forward_next_trace,
1891 .sibling_of = "ip4-local",
1894 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1895 .arc_name = "ip4-local",
1896 .node_name = "ip4-local-end-of-arc",
1897 .runs_before = 0, /* not before any other features */
1901 #ifndef CLIB_MARCH_VARIANT
1903 ip4_register_protocol (u32 protocol, u32 node_index)
1905 vlib_main_t *vm = vlib_get_main ();
1906 ip4_main_t *im = &ip4_main;
1907 ip_lookup_main_t *lm = &im->lookup_main;
1909 ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1910 lm->local_next_by_ip_protocol[protocol] =
1911 vlib_node_add_next (vm, ip4_local_node.index, node_index);
1915 ip4_unregister_protocol (u32 protocol)
1917 ip4_main_t *im = &ip4_main;
1918 ip_lookup_main_t *lm = &im->lookup_main;
1920 ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1921 lm->local_next_by_ip_protocol[protocol] = IP_LOCAL_NEXT_PUNT;
1925 static clib_error_t *
1926 show_ip_local_command_fn (vlib_main_t * vm,
1927 unformat_input_t * input, vlib_cli_command_t * cmd)
1929 ip4_main_t *im = &ip4_main;
1930 ip_lookup_main_t *lm = &im->lookup_main;
1933 vlib_cli_output (vm, "Protocols handled by ip4_local");
1934 for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1936 if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1938 u32 node_index = vlib_get_node (vm,
1939 ip4_local_node.index)->
1940 next_nodes[lm->local_next_by_ip_protocol[i]];
1941 vlib_cli_output (vm, "%U: %U", format_ip_protocol, i,
1942 format_vlib_node_name, vm, node_index);
1951 * Display the set of protocols handled by the local IPv4 stack.
1954 * Example of how to display local protocol table:
1955 * @cliexstart{show ip local}
1956 * Protocols handled by ip4_local
1963 VLIB_CLI_COMMAND (show_ip_local, static) =
1965 .path = "show ip local",
1966 .function = show_ip_local_command_fn,
1967 .short_help = "show ip local",
1972 ip4_arp_inline (vlib_main_t * vm,
1973 vlib_node_runtime_t * node,
1974 vlib_frame_t * frame, int is_glean)
1976 vnet_main_t *vnm = vnet_get_main ();
1977 ip4_main_t *im = &ip4_main;
1978 ip_lookup_main_t *lm = &im->lookup_main;
1979 u32 *from, *to_next_drop;
1980 uword n_left_from, n_left_to_next_drop, next_index;
1981 u32 thread_index = vm->thread_index;
1984 if (node->flags & VLIB_NODE_FLAG_TRACE)
1985 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1987 seed = throttle_seed (&im->arp_throttle, thread_index, vlib_time_now (vm));
1989 from = vlib_frame_vector_args (frame);
1990 n_left_from = frame->n_vectors;
1991 next_index = node->cached_next_index;
1992 if (next_index == IP4_ARP_NEXT_DROP)
1993 next_index = IP4_ARP_N_NEXT; /* point to first interface */
1995 while (n_left_from > 0)
1997 vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1998 to_next_drop, n_left_to_next_drop);
2000 while (n_left_from > 0 && n_left_to_next_drop > 0)
2002 u32 pi0, bi0, adj_index0, sw_if_index0;
2003 ip_adjacency_t *adj0;
2004 vlib_buffer_t *p0, *b0;
2005 ip4_address_t resolve0;
2006 ethernet_arp_header_t *h0;
2007 vnet_hw_interface_t *hw_if0;
2011 p0 = vlib_get_buffer (vm, pi0);
2015 to_next_drop[0] = pi0;
2017 n_left_to_next_drop -= 1;
2019 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2020 adj0 = adj_get (adj_index0);
2024 /* resolve the packet's destination */
2025 ip4_header_t *ip0 = vlib_buffer_get_current (p0);
2026 resolve0 = ip0->dst_address;
2030 /* resolve the incomplete adj */
2031 resolve0 = adj0->sub_type.nbr.next_hop.ip4;
2034 /* combine the address and interface for the hash key */
2035 sw_if_index0 = adj0->rewrite_header.sw_if_index;
2036 r0 = (u64) resolve0.data_u32 << 32;
2039 if (throttle_check (&im->arp_throttle, thread_index, r0, seed))
2041 p0->error = node->errors[IP4_ARP_ERROR_THROTTLED];
2046 * the adj has been updated to a rewrite but the node the DPO that got
2047 * us here hasn't - yet. no big deal. we'll drop while we wait.
2049 if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
2051 p0->error = node->errors[IP4_ARP_ERROR_RESOLVED];
2056 * Can happen if the control-plane is programming tables
2057 * with traffic flowing; at least that's today's lame excuse.
2059 if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
2060 || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
2062 p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2065 /* Send ARP request. */
2067 vlib_packet_template_get_packet (vm,
2068 &im->ip4_arp_request_packet_template,
2070 /* Seems we're out of buffers */
2071 if (PREDICT_FALSE (!h0))
2073 p0->error = node->errors[IP4_ARP_ERROR_NO_BUFFERS];
2077 b0 = vlib_get_buffer (vm, bi0);
2079 /* copy the persistent fields from the original */
2080 clib_memcpy_fast (b0->opaque2, p0->opaque2, sizeof (p0->opaque2));
2082 /* Add rewrite/encap string for ARP packet. */
2083 vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
2085 hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2087 /* Src ethernet address in ARP header. */
2088 mac_address_from_bytes (&h0->ip4_over_ethernet[0].mac,
2089 hw_if0->hw_address);
2092 /* The interface's source address is stashed in the Glean Adj */
2093 h0->ip4_over_ethernet[0].ip4 =
2094 adj0->sub_type.glean.receive_addr.ip4;
2098 /* Src IP address in ARP header. */
2099 if (ip4_src_address_for_packet (lm, sw_if_index0,
2100 &h0->ip4_over_ethernet[0].ip4))
2102 /* No source address available */
2103 p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
2104 vlib_buffer_free (vm, &bi0, 1);
2108 h0->ip4_over_ethernet[1].ip4 = resolve0;
2110 p0->error = node->errors[IP4_ARP_ERROR_REQUEST_SENT];
2112 vlib_buffer_copy_trace_flag (vm, p0, bi0);
2113 VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
2114 vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2116 vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2118 vlib_set_next_frame_buffer (vm, node,
2119 adj0->rewrite_header.next_index, bi0);
2122 vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2125 return frame->n_vectors;
2128 VLIB_NODE_FN (ip4_arp_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2129 vlib_frame_t * frame)
2131 return (ip4_arp_inline (vm, node, frame, 0));
2134 VLIB_NODE_FN (ip4_glean_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2135 vlib_frame_t * frame)
2137 return (ip4_arp_inline (vm, node, frame, 1));
2140 static char *ip4_arp_error_strings[] = {
2141 [IP4_ARP_ERROR_THROTTLED] = "ARP requests throttled",
2142 [IP4_ARP_ERROR_RESOLVED] = "ARP requests resolved",
2143 [IP4_ARP_ERROR_NO_BUFFERS] = "ARP requests out of buffer",
2144 [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2145 [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2146 [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
2150 VLIB_REGISTER_NODE (ip4_arp_node) =
2153 .vector_size = sizeof (u32),
2154 .format_trace = format_ip4_forward_next_trace,
2155 .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2156 .error_strings = ip4_arp_error_strings,
2157 .n_next_nodes = IP4_ARP_N_NEXT,
2160 [IP4_ARP_NEXT_DROP] = "error-drop",
2164 VLIB_REGISTER_NODE (ip4_glean_node) =
2166 .name = "ip4-glean",
2167 .vector_size = sizeof (u32),
2168 .format_trace = format_ip4_forward_next_trace,
2169 .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2170 .error_strings = ip4_arp_error_strings,
2171 .n_next_nodes = IP4_ARP_N_NEXT,
2173 [IP4_ARP_NEXT_DROP] = "error-drop",
2178 #define foreach_notrace_ip4_arp_error \
2184 _(NO_SOURCE_ADDRESS)
2186 static clib_error_t *
2187 arp_notrace_init (vlib_main_t * vm)
2189 vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
2191 /* don't trace ARP request packets */
2193 vnet_pcap_drop_trace_filter_add_del \
2194 (rt->errors[IP4_ARP_ERROR_##a], \
2196 foreach_notrace_ip4_arp_error;
2201 VLIB_INIT_FUNCTION (arp_notrace_init);
2204 #ifndef CLIB_MARCH_VARIANT
2205 /* Send an ARP request to see if given destination is reachable on given interface. */
2207 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index,
2210 vnet_main_t *vnm = vnet_get_main ();
2211 ip4_main_t *im = &ip4_main;
2212 ethernet_arp_header_t *h;
2214 ip_interface_address_t *ia;
2215 ip_adjacency_t *adj;
2216 vnet_hw_interface_t *hi;
2217 vnet_sw_interface_t *si;
2221 u8 unicast_rewrite = 0;
2223 si = vnet_get_sw_interface (vnm, sw_if_index);
2225 if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2227 return clib_error_return (0, "%U: interface %U down",
2228 format_ip4_address, dst,
2229 format_vnet_sw_if_index_name, vnm,
2234 ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2237 vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2238 return clib_error_return
2240 "no matching interface address for destination %U (interface %U)",
2241 format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
2245 h = vlib_packet_template_get_packet (vm,
2246 &im->ip4_arp_request_packet_template,
2250 return clib_error_return (0, "ARP request packet allocation failed");
2252 hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2253 if (PREDICT_FALSE (!hi->hw_address))
2255 return clib_error_return (0, "%U: interface %U do not support ip probe",
2256 format_ip4_address, dst,
2257 format_vnet_sw_if_index_name, vnm,
2261 mac_address_from_bytes (&h->ip4_over_ethernet[0].mac, hi->hw_address);
2263 h->ip4_over_ethernet[0].ip4 = src[0];
2264 h->ip4_over_ethernet[1].ip4 = dst[0];
2266 b = vlib_get_buffer (vm, bi);
2267 vnet_buffer (b)->sw_if_index[VLIB_RX] =
2268 vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2270 ip46_address_t nh = {
2274 ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4,
2275 VNET_LINK_IP4, &nh, sw_if_index);
2278 /* Peer has been previously resolved, retrieve glean adj instead */
2279 if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE)
2282 unicast_rewrite = 1;
2286 ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4,
2287 VNET_LINK_IP4, sw_if_index, &nh);
2292 /* Add encapsulation string for software interface (e.g. ethernet header). */
2293 vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2294 if (unicast_rewrite)
2296 u16 *etype = vlib_buffer_get_current (b) - 2;
2297 etype[0] = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
2299 vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2302 vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2303 u32 *to_next = vlib_frame_vector_args (f);
2306 vlib_put_frame_to_node (vm, hi->output_node_index, f);
2310 return /* no error */ 0;
2316 IP4_REWRITE_NEXT_DROP,
2317 IP4_REWRITE_NEXT_ICMP_ERROR,
2318 IP4_REWRITE_NEXT_FRAGMENT,
2319 IP4_REWRITE_N_NEXT /* Last */
2320 } ip4_rewrite_next_t;
2323 * This bits of an IPv4 address to mask to construct a multicast
2326 #if CLIB_ARCH_IS_BIG_ENDIAN
2327 #define IP4_MCAST_ADDR_MASK 0x007fffff
2329 #define IP4_MCAST_ADDR_MASK 0xffff7f00
2333 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
2334 u16 adj_packet_bytes, bool df, u16 * next, u32 * error)
2336 if (packet_len > adj_packet_bytes)
2338 *error = IP4_ERROR_MTU_EXCEEDED;
2341 icmp4_error_set_vnet_buffer
2342 (b, ICMP4_destination_unreachable,
2343 ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
2345 *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2349 /* IP fragmentation */
2350 ip_frag_set_vnet_buffer (b, adj_packet_bytes,
2351 IP4_FRAG_NEXT_IP4_REWRITE, 0);
2352 *next = IP4_REWRITE_NEXT_FRAGMENT;
2357 /* Decrement TTL & update checksum.
2358 Works either endian, so no need for byte swap. */
2359 static_always_inline void
2360 ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
2365 if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2367 b->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2373 /* Input node should have reject packets with ttl 0. */
2374 ASSERT (ip->ttl > 0);
2376 checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
2377 checksum += checksum >= 0xffff;
2379 ip->checksum = checksum;
2384 * If the ttl drops below 1 when forwarding, generate
2387 if (PREDICT_FALSE (ttl <= 0))
2389 *error = IP4_ERROR_TIME_EXPIRED;
2390 vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2391 icmp4_error_set_vnet_buffer (b, ICMP4_time_exceeded,
2392 ICMP4_time_exceeded_ttl_exceeded_in_transit,
2394 *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2397 /* Verify checksum. */
2398 ASSERT ((ip->checksum == ip4_header_checksum (ip)) ||
2399 (b->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2404 ip4_rewrite_inline_with_gso (vlib_main_t * vm,
2405 vlib_node_runtime_t * node,
2406 vlib_frame_t * frame,
2407 int do_counters, int is_midchain, int is_mcast,
2410 ip_lookup_main_t *lm = &ip4_main.lookup_main;
2411 u32 *from = vlib_frame_vector_args (frame);
2412 vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2413 u16 nexts[VLIB_FRAME_SIZE], *next;
2415 vlib_node_runtime_t *error_node =
2416 vlib_node_get_runtime (vm, ip4_input_node.index);
2418 n_left_from = frame->n_vectors;
2419 u32 thread_index = vm->thread_index;
2421 vlib_get_buffers (vm, from, bufs, n_left_from);
2422 clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
2424 if (n_left_from >= 6)
2427 for (i = 2; i < 6; i++)
2428 vlib_prefetch_buffer_header (bufs[i], LOAD);
2433 while (n_left_from >= 8)
2435 ip_adjacency_t *adj0, *adj1;
2436 ip4_header_t *ip0, *ip1;
2437 u32 rw_len0, error0, adj_index0;
2438 u32 rw_len1, error1, adj_index1;
2439 u32 tx_sw_if_index0, tx_sw_if_index1;
2442 vlib_prefetch_buffer_header (b[6], LOAD);
2443 vlib_prefetch_buffer_header (b[7], LOAD);
2445 adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2446 adj_index1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
2449 * pre-fetch the per-adjacency counters
2453 vlib_prefetch_combined_counter (&adjacency_counters,
2454 thread_index, adj_index0);
2455 vlib_prefetch_combined_counter (&adjacency_counters,
2456 thread_index, adj_index1);
2459 ip0 = vlib_buffer_get_current (b[0]);
2460 ip1 = vlib_buffer_get_current (b[1]);
2462 error0 = error1 = IP4_ERROR_NONE;
2464 ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2465 ip4_ttl_and_checksum_check (b[1], ip1, next + 1, &error1);
2467 /* Rewrite packet header and updates lengths. */
2468 adj0 = adj_get (adj_index0);
2469 adj1 = adj_get (adj_index1);
2471 /* Worth pipelining. No guarantee that adj0,1 are hot... */
2472 rw_len0 = adj0[0].rewrite_header.data_bytes;
2473 rw_len1 = adj1[0].rewrite_header.data_bytes;
2474 vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2475 vnet_buffer (b[1])->ip.save_rewrite_length = rw_len1;
2477 p = vlib_buffer_get_current (b[2]);
2478 CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2479 CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2481 p = vlib_buffer_get_current (b[3]);
2482 CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2483 CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2485 /* Check MTU of outgoing interface. */
2486 u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2487 u16 ip1_len = clib_net_to_host_u16 (ip1->length);
2489 if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO))
2490 ip0_len = gso_mtu_sz (b[0]);
2491 if (do_gso && (b[1]->flags & VNET_BUFFER_F_GSO))
2492 ip1_len = gso_mtu_sz (b[1]);
2494 ip4_mtu_check (b[0], ip0_len,
2495 adj0[0].rewrite_header.max_l3_packet_bytes,
2496 ip0->flags_and_fragment_offset &
2497 clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2499 ip4_mtu_check (b[1], ip1_len,
2500 adj1[0].rewrite_header.max_l3_packet_bytes,
2501 ip1->flags_and_fragment_offset &
2502 clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2507 error0 = ((adj0[0].rewrite_header.sw_if_index ==
2508 vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2509 IP4_ERROR_SAME_INTERFACE : error0);
2510 error1 = ((adj1[0].rewrite_header.sw_if_index ==
2511 vnet_buffer (b[1])->sw_if_index[VLIB_RX]) ?
2512 IP4_ERROR_SAME_INTERFACE : error1);
2515 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2516 * to see the IP header */
2517 if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2519 u32 next_index = adj0[0].rewrite_header.next_index;
2520 vlib_buffer_advance (b[0], -(word) rw_len0);
2521 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2522 vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2525 (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2526 vnet_feature_arc_start (lm->output_feature_arc_index,
2527 tx_sw_if_index0, &next_index, b[0]);
2528 next[0] = next_index;
2532 b[0]->error = error_node->errors[error0];
2534 if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2536 u32 next_index = adj1[0].rewrite_header.next_index;
2537 vlib_buffer_advance (b[1], -(word) rw_len1);
2539 tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2540 vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2543 (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2544 vnet_feature_arc_start (lm->output_feature_arc_index,
2545 tx_sw_if_index1, &next_index, b[1]);
2546 next[1] = next_index;
2550 b[1]->error = error_node->errors[error1];
2554 calc_checksums (vm, b[0]);
2555 calc_checksums (vm, b[1]);
2557 /* Guess we are only writing on simple Ethernet header. */
2558 vnet_rewrite_two_headers (adj0[0], adj1[0],
2559 ip0, ip1, sizeof (ethernet_header_t));
2562 * Bump the per-adjacency counters
2566 vlib_increment_combined_counter
2567 (&adjacency_counters,
2569 adj_index0, 1, vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2571 vlib_increment_combined_counter
2572 (&adjacency_counters,
2574 adj_index1, 1, vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
2579 if (adj0->sub_type.midchain.fixup_func)
2580 adj0->sub_type.midchain.fixup_func
2581 (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2582 if (adj1->sub_type.midchain.fixup_func)
2583 adj1->sub_type.midchain.fixup_func
2584 (vm, adj1, b[1], adj1->sub_type.midchain.fixup_data);
2590 * copy bytes from the IP address into the MAC rewrite
2592 vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2593 adj0->rewrite_header.dst_mcast_offset,
2594 &ip0->dst_address.as_u32, (u8 *) ip0);
2595 vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2596 adj1->rewrite_header.dst_mcast_offset,
2597 &ip1->dst_address.as_u32, (u8 *) ip1);
2605 while (n_left_from > 0)
2607 ip_adjacency_t *adj0;
2609 u32 rw_len0, adj_index0, error0;
2610 u32 tx_sw_if_index0;
2612 adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2614 adj0 = adj_get (adj_index0);
2617 vlib_prefetch_combined_counter (&adjacency_counters,
2618 thread_index, adj_index0);
2620 ip0 = vlib_buffer_get_current (b[0]);
2622 error0 = IP4_ERROR_NONE;
2624 ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2627 /* Update packet buffer attributes/set output interface. */
2628 rw_len0 = adj0[0].rewrite_header.data_bytes;
2629 vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2631 /* Check MTU of outgoing interface. */
2632 u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2633 if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO))
2634 ip0_len = gso_mtu_sz (b[0]);
2636 ip4_mtu_check (b[0], ip0_len,
2637 adj0[0].rewrite_header.max_l3_packet_bytes,
2638 ip0->flags_and_fragment_offset &
2639 clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2644 error0 = ((adj0[0].rewrite_header.sw_if_index ==
2645 vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2646 IP4_ERROR_SAME_INTERFACE : error0);
2649 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2650 * to see the IP header */
2651 if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2653 u32 next_index = adj0[0].rewrite_header.next_index;
2654 vlib_buffer_advance (b[0], -(word) rw_len0);
2655 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2656 vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2659 (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2660 vnet_feature_arc_start (lm->output_feature_arc_index,
2661 tx_sw_if_index0, &next_index, b[0]);
2662 next[0] = next_index;
2666 b[0]->error = error_node->errors[error0];
2670 calc_checksums (vm, b[0]);
2672 /* Guess we are only writing on simple Ethernet header. */
2673 vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2676 vlib_increment_combined_counter
2677 (&adjacency_counters,
2678 thread_index, adj_index0, 1,
2679 vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2683 if (adj0->sub_type.midchain.fixup_func)
2684 adj0->sub_type.midchain.fixup_func
2685 (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2691 * copy bytes from the IP address into the MAC rewrite
2693 vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2694 adj0->rewrite_header.dst_mcast_offset,
2695 &ip0->dst_address.as_u32, (u8 *) ip0);
2704 /* Need to do trace after rewrites to pick up new packet data. */
2705 if (node->flags & VLIB_NODE_FLAG_TRACE)
2706 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2708 vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2709 return frame->n_vectors;
2713 ip4_rewrite_inline (vlib_main_t * vm,
2714 vlib_node_runtime_t * node,
2715 vlib_frame_t * frame,
2716 int do_counters, int is_midchain, int is_mcast)
2718 vnet_main_t *vnm = vnet_get_main ();
2719 if (PREDICT_FALSE (vnm->interface_main.gso_interface_count > 0))
2720 return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2721 is_midchain, is_mcast,
2724 return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2725 is_midchain, is_mcast,
2726 0 /* no do_gso */ );
2730 /** @brief IPv4 rewrite node.
2733 This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2734 header checksum, fetch the ip adjacency, check the outbound mtu,
2735 apply the adjacency rewrite, and send pkts to the adjacency
2736 rewrite header's rewrite_next_index.
2738 @param vm vlib_main_t corresponding to the current thread
2739 @param node vlib_node_runtime_t
2740 @param frame vlib_frame_t whose contents should be dispatched
2742 @par Graph mechanics: buffer metadata, next index usage
2745 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2746 - the rewrite adjacency index
2747 - <code>adj->lookup_next_index</code>
2748 - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2749 the packet will be dropped.
2750 - <code>adj->rewrite_header</code>
2751 - Rewrite string length, rewrite string, next_index
2754 - <code>b->current_data, b->current_length</code>
2755 - Updated net of applying the rewrite string
2757 <em>Next Indices:</em>
2758 - <code> adj->rewrite_header.next_index </code>
2762 VLIB_NODE_FN (ip4_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2763 vlib_frame_t * frame)
2765 if (adj_are_counters_enabled ())
2766 return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2768 return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2771 VLIB_NODE_FN (ip4_rewrite_bcast_node) (vlib_main_t * vm,
2772 vlib_node_runtime_t * node,
2773 vlib_frame_t * frame)
2775 if (adj_are_counters_enabled ())
2776 return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2778 return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2781 VLIB_NODE_FN (ip4_midchain_node) (vlib_main_t * vm,
2782 vlib_node_runtime_t * node,
2783 vlib_frame_t * frame)
2785 if (adj_are_counters_enabled ())
2786 return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2788 return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2791 VLIB_NODE_FN (ip4_rewrite_mcast_node) (vlib_main_t * vm,
2792 vlib_node_runtime_t * node,
2793 vlib_frame_t * frame)
2795 if (adj_are_counters_enabled ())
2796 return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2798 return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2801 VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
2802 vlib_node_runtime_t * node,
2803 vlib_frame_t * frame)
2805 if (adj_are_counters_enabled ())
2806 return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2808 return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2812 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2813 .name = "ip4-rewrite",
2814 .vector_size = sizeof (u32),
2816 .format_trace = format_ip4_rewrite_trace,
2818 .n_next_nodes = IP4_REWRITE_N_NEXT,
2820 [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2821 [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2822 [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2826 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2827 .name = "ip4-rewrite-bcast",
2828 .vector_size = sizeof (u32),
2830 .format_trace = format_ip4_rewrite_trace,
2831 .sibling_of = "ip4-rewrite",
2834 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2835 .name = "ip4-rewrite-mcast",
2836 .vector_size = sizeof (u32),
2838 .format_trace = format_ip4_rewrite_trace,
2839 .sibling_of = "ip4-rewrite",
2842 VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
2843 .name = "ip4-mcast-midchain",
2844 .vector_size = sizeof (u32),
2846 .format_trace = format_ip4_rewrite_trace,
2847 .sibling_of = "ip4-rewrite",
2850 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2851 .name = "ip4-midchain",
2852 .vector_size = sizeof (u32),
2853 .format_trace = format_ip4_forward_next_trace,
2854 .sibling_of = "ip4-rewrite",
2859 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2861 ip4_fib_mtrie_t *mtrie0;
2862 ip4_fib_mtrie_leaf_t leaf0;
2865 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2867 leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2868 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2869 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2871 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2873 return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2876 static clib_error_t *
2877 test_lookup_command_fn (vlib_main_t * vm,
2878 unformat_input_t * input, vlib_cli_command_t * cmd)
2885 ip4_address_t ip4_base_address;
2888 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2890 if (unformat (input, "table %d", &table_id))
2892 /* Make sure the entry exists. */
2893 fib = ip4_fib_get (table_id);
2894 if ((fib) && (fib->index != table_id))
2895 return clib_error_return (0, "<fib-index> %d does not exist",
2898 else if (unformat (input, "count %f", &count))
2901 else if (unformat (input, "%U",
2902 unformat_ip4_address, &ip4_base_address))
2905 return clib_error_return (0, "unknown input `%U'",
2906 format_unformat_error, input);
2911 for (i = 0; i < n; i++)
2913 if (!ip4_lookup_validate (&ip4_base_address, table_id))
2916 ip4_base_address.as_u32 =
2917 clib_host_to_net_u32 (1 +
2918 clib_net_to_host_u32 (ip4_base_address.as_u32));
2922 vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2924 vlib_cli_output (vm, "No errors in %d lookups\n", n);
2930 * Perform a lookup of an IPv4 Address (or range of addresses) in the
2931 * given FIB table to determine if there is a conflict with the
2932 * adjacency table. The fib-id can be determined by using the
2933 * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2936 * @todo This command uses fib-id, other commands use table-id (not
2937 * just a name, they are different indexes). Would like to change this
2938 * to table-id for consistency.
2941 * Example of how to run the test lookup command:
2942 * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2943 * No errors in 2 lookups
2947 VLIB_CLI_COMMAND (lookup_test_command, static) =
2949 .path = "test lookup",
2950 .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2951 .function = test_lookup_command_fn,
2955 #ifndef CLIB_MARCH_VARIANT
2957 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2961 fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
2963 if (~0 == fib_index)
2964 return VNET_API_ERROR_NO_SUCH_FIB;
2966 fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
2973 static clib_error_t *
2974 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2975 unformat_input_t * input,
2976 vlib_cli_command_t * cmd)
2980 u32 flow_hash_config = 0;
2983 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2985 if (unformat (input, "table %d", &table_id))
2988 else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2989 foreach_flow_hash_bit
2996 return clib_error_return (0, "unknown input `%U'",
2997 format_unformat_error, input);
2999 rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3005 case VNET_API_ERROR_NO_SUCH_FIB:
3006 return clib_error_return (0, "no such FIB table %d", table_id);
3009 clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3017 * Configure the set of IPv4 fields used by the flow hash.
3020 * Example of how to set the flow hash on a given table:
3021 * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
3022 * Example of display the configured flow hash:
3023 * @cliexstart{show ip fib}
3024 * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
3027 * [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
3028 * [0] [@0]: dpo-drop ip6
3031 * [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
3032 * [0] [@0]: dpo-drop ip6
3035 * [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
3036 * [0] [@0]: dpo-drop ip6
3039 * [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
3040 * [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3043 * [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
3044 * [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3045 * [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3046 * [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3047 * [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3050 * [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
3051 * [0] [@0]: dpo-drop ip6
3052 * 255.255.255.255/32
3054 * [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
3055 * [0] [@0]: dpo-drop ip6
3056 * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
3059 * [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
3060 * [0] [@0]: dpo-drop ip6
3063 * [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
3064 * [0] [@0]: dpo-drop ip6
3067 * [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
3068 * [0] [@4]: ipv4-glean: af_packet0
3071 * [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
3072 * [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
3075 * [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
3076 * [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
3079 * [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
3080 * [0] [@4]: ipv4-glean: af_packet1
3083 * [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
3084 * [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
3087 * [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
3088 * [0] [@0]: dpo-drop ip6
3091 * [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
3092 * [0] [@0]: dpo-drop ip6
3093 * 255.255.255.255/32
3095 * [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
3096 * [0] [@0]: dpo-drop ip6
3100 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
3102 .path = "set ip flow-hash",
3104 "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
3105 .function = set_ip_flow_hash_command_fn,
3109 #ifndef CLIB_MARCH_VARIANT
3111 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3114 vnet_main_t *vnm = vnet_get_main ();
3115 vnet_interface_main_t *im = &vnm->interface_main;
3116 ip4_main_t *ipm = &ip4_main;
3117 ip_lookup_main_t *lm = &ipm->lookup_main;
3118 vnet_classify_main_t *cm = &vnet_classify_main;
3119 ip4_address_t *if_addr;
3121 if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3122 return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3124 if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3125 return VNET_API_ERROR_NO_SUCH_ENTRY;
3127 vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3128 lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
3130 if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
3132 if (NULL != if_addr)
3134 fib_prefix_t pfx = {
3136 .fp_proto = FIB_PROTOCOL_IP4,
3137 .fp_addr.ip4 = *if_addr,
3141 fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
3145 if (table_index != (u32) ~ 0)
3147 dpo_id_t dpo = DPO_INVALID;
3152 classify_dpo_create (DPO_PROTO_IP4, table_index));
3154 fib_table_entry_special_dpo_add (fib_index,
3156 FIB_SOURCE_CLASSIFY,
3157 FIB_ENTRY_FLAG_NONE, &dpo);
3162 fib_table_entry_special_remove (fib_index,
3163 &pfx, FIB_SOURCE_CLASSIFY);
3171 static clib_error_t *
3172 set_ip_classify_command_fn (vlib_main_t * vm,
3173 unformat_input_t * input,
3174 vlib_cli_command_t * cmd)
3176 u32 table_index = ~0;
3177 int table_index_set = 0;
3178 u32 sw_if_index = ~0;
3181 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3183 if (unformat (input, "table-index %d", &table_index))
3184 table_index_set = 1;
3185 else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3186 vnet_get_main (), &sw_if_index))
3192 if (table_index_set == 0)
3193 return clib_error_return (0, "classify table-index must be specified");
3195 if (sw_if_index == ~0)
3196 return clib_error_return (0, "interface / subif must be specified");
3198 rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3205 case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3206 return clib_error_return (0, "No such interface");
3208 case VNET_API_ERROR_NO_SUCH_ENTRY:
3209 return clib_error_return (0, "No such classifier table");
3215 * Assign a classification table to an interface. The classification
3216 * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3217 * commands. Once the table is create, use this command to filter packets
3221 * Example of how to assign a classification table to an interface:
3222 * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3225 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
3227 .path = "set ip classify",
3229 "set ip classify intfc <interface> table-index <classify-idx>",
3230 .function = set_ip_classify_command_fn,
3234 static clib_error_t *
3235 ip4_config (vlib_main_t * vm, unformat_input_t * input)
3237 ip4_main_t *im = &ip4_main;
3240 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3242 if (unformat (input, "heap-size %U", unformat_memory_size, &heapsize))
3245 return clib_error_return (0,
3246 "invalid heap-size parameter `%U'",
3247 format_unformat_error, input);
3250 im->mtrie_heap_size = heapsize;
3255 VLIB_EARLY_CONFIG_FUNCTION (ip4_config, "ip");
3258 * fd.io coding-style-patch-verification: ON
3261 * eval: (c-set-style "gnu")