2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * ip/ip4_forward.c: IP v4 forwarding
18 * Copyright (c) 2008 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h> /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h> /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h> /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h> /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/dpo/load_balance.h>
53 #include <vnet/dpo/load_balance_map.h>
54 #include <vnet/dpo/classify_dpo.h>
55 #include <vnet/mfib/mfib_table.h> /* for mFIB table and entry creation */
57 #include <vnet/ip/ip4_forward.h>
58 #include <vnet/interface_output.h>
60 /** @brief IPv4 lookup node.
63 This is the main IPv4 lookup dispatch node.
65 @param vm vlib_main_t corresponding to the current thread
66 @param node vlib_node_runtime_t
67 @param frame vlib_frame_t whose contents should be dispatched
69 @par Graph mechanics: buffer metadata, next index usage
72 - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
73 - Indicates the @c sw_if_index value of the interface that the
74 packet was received on.
75 - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
76 - When the value is @c ~0 then the node performs a longest prefix
77 match (LPM) for the packet destination address in the FIB attached
78 to the receive interface.
79 - Otherwise perform LPM for the packet destination address in the
80 indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
81 value (0, 1, ...) and not a VRF id.
84 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
85 - The lookup result adjacency index.
88 - Dispatches the packet to the node index found in
89 ip_adjacency_t @c adj->lookup_next_index
90 (where @c adj is the lookup result adjacency).
92 VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
95 return ip4_lookup_inline (vm, node, frame);
98 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
101 VLIB_REGISTER_NODE (ip4_lookup_node) =
103 .name = "ip4-lookup",
104 .vector_size = sizeof (u32),
105 .format_trace = format_ip4_lookup_trace,
106 .n_next_nodes = IP_LOOKUP_N_NEXT,
107 .next_nodes = IP4_LOOKUP_NEXT_NODES,
111 VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
112 vlib_node_runtime_t * node,
113 vlib_frame_t * frame)
115 vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
117 u32 thread_index = vm->thread_index;
118 vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
119 u16 nexts[VLIB_FRAME_SIZE], *next;
121 from = vlib_frame_vector_args (frame);
122 n_left = frame->n_vectors;
125 vlib_get_buffers (vm, from, bufs, n_left);
129 const load_balance_t *lb0, *lb1;
130 const ip4_header_t *ip0, *ip1;
131 u32 lbi0, hc0, lbi1, hc1;
132 const dpo_id_t *dpo0, *dpo1;
134 /* Prefetch next iteration. */
136 vlib_prefetch_buffer_header (b[2], LOAD);
137 vlib_prefetch_buffer_header (b[3], LOAD);
139 CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), LOAD);
140 CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), LOAD);
143 ip0 = vlib_buffer_get_current (b[0]);
144 ip1 = vlib_buffer_get_current (b[1]);
145 lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
146 lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
148 lb0 = load_balance_get (lbi0);
149 lb1 = load_balance_get (lbi1);
152 * this node is for via FIBs we can re-use the hash value from the
153 * to node if present.
154 * We don't want to use the same hash value at each level in the recursion
155 * graph as that would lead to polarisation
159 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
161 if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
163 hc0 = vnet_buffer (b[0])->ip.flow_hash =
164 vnet_buffer (b[0])->ip.flow_hash >> 1;
168 hc0 = vnet_buffer (b[0])->ip.flow_hash =
169 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
171 dpo0 = load_balance_get_fwd_bucket
172 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
176 dpo0 = load_balance_get_bucket_i (lb0, 0);
178 if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
180 if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
182 hc1 = vnet_buffer (b[1])->ip.flow_hash =
183 vnet_buffer (b[1])->ip.flow_hash >> 1;
187 hc1 = vnet_buffer (b[1])->ip.flow_hash =
188 ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
190 dpo1 = load_balance_get_fwd_bucket
191 (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
195 dpo1 = load_balance_get_bucket_i (lb1, 0);
198 next[0] = dpo0->dpoi_next_node;
199 next[1] = dpo1->dpoi_next_node;
201 vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
202 vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
204 vlib_increment_combined_counter
205 (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
206 vlib_increment_combined_counter
207 (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
216 const load_balance_t *lb0;
217 const ip4_header_t *ip0;
218 const dpo_id_t *dpo0;
221 ip0 = vlib_buffer_get_current (b[0]);
222 lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
224 lb0 = load_balance_get (lbi0);
227 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
229 if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
231 hc0 = vnet_buffer (b[0])->ip.flow_hash =
232 vnet_buffer (b[0])->ip.flow_hash >> 1;
236 hc0 = vnet_buffer (b[0])->ip.flow_hash =
237 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
239 dpo0 = load_balance_get_fwd_bucket
240 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
244 dpo0 = load_balance_get_bucket_i (lb0, 0);
247 next[0] = dpo0->dpoi_next_node;
248 vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
250 vlib_increment_combined_counter
251 (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
258 vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
259 if (node->flags & VLIB_NODE_FLAG_TRACE)
260 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
262 return frame->n_vectors;
266 VLIB_REGISTER_NODE (ip4_load_balance_node) =
268 .name = "ip4-load-balance",
269 .vector_size = sizeof (u32),
270 .sibling_of = "ip4-lookup",
271 .format_trace = format_ip4_lookup_trace,
275 #ifndef CLIB_MARCH_VARIANT
276 /* get first interface address */
278 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
279 ip_interface_address_t ** result_ia)
281 ip_lookup_main_t *lm = &im->lookup_main;
282 ip_interface_address_t *ia = 0;
283 ip4_address_t *result = 0;
286 foreach_ip_interface_address
287 (lm, ia, sw_if_index,
288 1 /* honor unnumbered */ ,
291 ip_interface_address_get_address (lm, ia);
297 *result_ia = result ? ia : 0;
303 ip4_add_subnet_bcast_route (u32 fib_index,
307 vnet_sw_interface_flags_t iflags;
309 iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
311 fib_table_entry_special_remove(fib_index,
313 FIB_SOURCE_INTERFACE);
315 if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
317 fib_table_entry_update_one_path (fib_index, pfx,
318 FIB_SOURCE_INTERFACE,
321 /* No next-hop address */
327 // no out-label stack
329 FIB_ROUTE_PATH_FLAG_NONE);
333 fib_table_entry_special_add(fib_index,
335 FIB_SOURCE_INTERFACE,
336 (FIB_ENTRY_FLAG_DROP |
337 FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
342 ip4_add_interface_prefix_routes (ip4_main_t *im,
345 ip_interface_address_t * a)
347 ip_lookup_main_t *lm = &im->lookup_main;
348 ip_interface_prefix_t *if_prefix;
349 ip4_address_t *address = ip_interface_address_get_address (lm, a);
351 ip_interface_prefix_key_t key = {
353 .fp_len = a->address_length,
354 .fp_proto = FIB_PROTOCOL_IP4,
355 .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[a->address_length],
357 .sw_if_index = sw_if_index,
360 fib_prefix_t pfx_special = {
361 .fp_proto = FIB_PROTOCOL_IP4,
364 /* If prefix already set on interface, just increment ref count & return */
365 if_prefix = ip_get_interface_prefix (lm, &key);
368 if_prefix->ref_count += 1;
372 /* New prefix - allocate a pool entry, initialize it, add to the hash */
373 pool_get (lm->if_prefix_pool, if_prefix);
374 if_prefix->ref_count = 1;
375 if_prefix->src_ia_index = a - lm->if_address_pool;
376 clib_memcpy (&if_prefix->key, &key, sizeof (key));
377 mhash_set (&lm->prefix_to_if_prefix_index, &key,
378 if_prefix - lm->if_prefix_pool, 0 /* old value */);
380 /* length <= 30 - add glean, drop first address, maybe drop bcast address */
381 if (a->address_length <= 30)
383 pfx_special.fp_len = a->address_length;
384 pfx_special.fp_addr.ip4.as_u32 = address->as_u32;
386 /* set the glean route for the prefix */
387 fib_table_entry_update_one_path (fib_index, &pfx_special,
388 FIB_SOURCE_INTERFACE,
389 (FIB_ENTRY_FLAG_CONNECTED |
390 FIB_ENTRY_FLAG_ATTACHED),
392 /* No next-hop address */
395 /* invalid FIB index */
398 /* no out-label stack */
400 FIB_ROUTE_PATH_FLAG_NONE);
402 /* set a drop route for the base address of the prefix */
403 pfx_special.fp_len = 32;
404 pfx_special.fp_addr.ip4.as_u32 =
405 address->as_u32 & im->fib_masks[a->address_length];
407 if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
408 fib_table_entry_special_add (fib_index, &pfx_special,
409 FIB_SOURCE_INTERFACE,
410 (FIB_ENTRY_FLAG_DROP |
411 FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
413 /* set a route for the broadcast address of the prefix */
414 pfx_special.fp_len = 32;
415 pfx_special.fp_addr.ip4.as_u32 =
416 address->as_u32 | ~im->fib_masks[a->address_length];
417 if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
418 ip4_add_subnet_bcast_route (fib_index, &pfx_special, sw_if_index);
422 /* length == 31 - add an attached route for the other address */
423 else if (a->address_length == 31)
425 pfx_special.fp_len = 32;
426 pfx_special.fp_addr.ip4.as_u32 =
427 address->as_u32 ^ clib_host_to_net_u32(1);
429 fib_table_entry_update_one_path (fib_index, &pfx_special,
430 FIB_SOURCE_INTERFACE,
431 (FIB_ENTRY_FLAG_ATTACHED),
433 &pfx_special.fp_addr,
435 /* invalid FIB index */
439 FIB_ROUTE_PATH_FLAG_NONE);
444 ip4_add_interface_routes (u32 sw_if_index,
445 ip4_main_t * im, u32 fib_index,
446 ip_interface_address_t * a)
448 ip_lookup_main_t *lm = &im->lookup_main;
449 ip4_address_t *address = ip_interface_address_get_address (lm, a);
452 .fp_proto = FIB_PROTOCOL_IP4,
453 .fp_addr.ip4 = *address,
456 /* set special routes for the prefix if needed */
457 ip4_add_interface_prefix_routes (im, sw_if_index, fib_index, a);
459 if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
461 u32 classify_table_index =
462 lm->classify_table_index_by_sw_if_index[sw_if_index];
463 if (classify_table_index != (u32) ~ 0)
465 dpo_id_t dpo = DPO_INVALID;
470 classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
472 fib_table_entry_special_dpo_add (fib_index,
475 FIB_ENTRY_FLAG_NONE, &dpo);
480 fib_table_entry_update_one_path (fib_index, &pfx,
481 FIB_SOURCE_INTERFACE,
482 (FIB_ENTRY_FLAG_CONNECTED |
483 FIB_ENTRY_FLAG_LOCAL),
490 FIB_ROUTE_PATH_FLAG_NONE);
494 ip4_del_interface_prefix_routes (ip4_main_t * im,
497 ip4_address_t * address,
500 ip_lookup_main_t *lm = &im->lookup_main;
501 ip_interface_prefix_t *if_prefix;
503 ip_interface_prefix_key_t key = {
505 .fp_len = address_length,
506 .fp_proto = FIB_PROTOCOL_IP4,
507 .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[address_length],
509 .sw_if_index = sw_if_index,
512 fib_prefix_t pfx_special = {
514 .fp_proto = FIB_PROTOCOL_IP4,
517 if_prefix = ip_get_interface_prefix (lm, &key);
520 clib_warning ("Prefix not found while deleting %U",
521 format_ip4_address_and_length, address, address_length);
525 if_prefix->ref_count -= 1;
528 * Routes need to be adjusted if:
529 * - deleting last intf addr in prefix
530 * - deleting intf addr used as default source address in glean adjacency
532 * We're done now otherwise
534 if ((if_prefix->ref_count > 0) &&
535 !pool_is_free_index (lm->if_address_pool, if_prefix->src_ia_index))
538 /* length <= 30, delete glean route, first address, last address */
539 if (address_length <= 30)
542 /* remove glean route for prefix */
543 pfx_special.fp_addr.ip4 = *address;
544 pfx_special.fp_len = address_length;
545 fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
547 /* if no more intf addresses in prefix, remove other special routes */
548 if (!if_prefix->ref_count)
550 /* first address in prefix */
551 pfx_special.fp_addr.ip4.as_u32 =
552 address->as_u32 & im->fib_masks[address_length];
553 pfx_special.fp_len = 32;
555 if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
556 fib_table_entry_special_remove (fib_index,
558 FIB_SOURCE_INTERFACE);
560 /* prefix broadcast address */
561 pfx_special.fp_addr.ip4.as_u32 =
562 address->as_u32 | ~im->fib_masks[address_length];
563 pfx_special.fp_len = 32;
565 if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
566 fib_table_entry_special_remove (fib_index,
568 FIB_SOURCE_INTERFACE);
571 /* default source addr just got deleted, find another */
573 ip_interface_address_t *new_src_ia = NULL;
574 ip4_address_t *new_src_addr = NULL;
577 ip4_interface_address_matching_destination
578 (im, address, sw_if_index, &new_src_ia);
580 if_prefix->src_ia_index = new_src_ia - lm->if_address_pool;
582 pfx_special.fp_len = address_length;
583 pfx_special.fp_addr.ip4 = *new_src_addr;
585 /* set new glean route for the prefix */
586 fib_table_entry_update_one_path (fib_index, &pfx_special,
587 FIB_SOURCE_INTERFACE,
588 (FIB_ENTRY_FLAG_CONNECTED |
589 FIB_ENTRY_FLAG_ATTACHED),
591 /* No next-hop address */
594 /* invalid FIB index */
597 /* no out-label stack */
599 FIB_ROUTE_PATH_FLAG_NONE);
603 /* length == 31, delete attached route for the other address */
604 else if (address_length == 31)
606 pfx_special.fp_addr.ip4.as_u32 =
607 address->as_u32 ^ clib_host_to_net_u32(1);
609 fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
612 mhash_unset (&lm->prefix_to_if_prefix_index, &key, 0 /* old_value */);
613 pool_put (lm->if_prefix_pool, if_prefix);
617 ip4_del_interface_routes (u32 sw_if_index,
620 ip4_address_t * address, u32 address_length)
623 .fp_len = address_length,
624 .fp_proto = FIB_PROTOCOL_IP4,
625 .fp_addr.ip4 = *address,
628 ip4_del_interface_prefix_routes (im, sw_if_index, fib_index,
629 address, address_length);
632 fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
635 #ifndef CLIB_MARCH_VARIANT
637 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
639 ip4_main_t *im = &ip4_main;
641 vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
644 * enable/disable only on the 1<->0 transition
648 if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
653 ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
654 if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
657 vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
661 vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
662 sw_if_index, !is_enable, 0, 0);
665 ip4_enable_disable_interface_callback_t *cb;
666 vec_foreach (cb, im->enable_disable_interface_callbacks)
667 cb->function (im, cb->function_opaque, sw_if_index, is_enable);
671 static clib_error_t *
672 ip4_add_del_interface_address_internal (vlib_main_t * vm,
674 ip4_address_t * address,
675 u32 address_length, u32 is_del)
677 vnet_main_t *vnm = vnet_get_main ();
678 ip4_main_t *im = &ip4_main;
679 ip_lookup_main_t *lm = &im->lookup_main;
680 clib_error_t *error = 0;
681 u32 if_address_index, elts_before;
682 ip4_address_fib_t ip4_af, *addr_fib = 0;
684 /* local0 interface doesn't support IP addressing */
685 if (sw_if_index == 0)
688 clib_error_create ("local0 interface doesn't support IP addressing");
691 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
692 ip4_addr_fib_init (&ip4_af, address,
693 vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
694 vec_add1 (addr_fib, ip4_af);
697 * there is no support for adj-fib handling in the presence of overlapping
698 * subnets on interfaces. Easy fix - disallow overlapping subnets, like
704 /* When adding an address check that it does not conflict
705 with an existing address on any interface in this table. */
706 ip_interface_address_t *ia;
707 vnet_sw_interface_t *sif;
709 pool_foreach(sif, vnm->interface_main.sw_interfaces,
711 if (im->fib_index_by_sw_if_index[sw_if_index] ==
712 im->fib_index_by_sw_if_index[sif->sw_if_index])
714 foreach_ip_interface_address
715 (&im->lookup_main, ia, sif->sw_if_index,
716 0 /* honor unnumbered */ ,
719 ip_interface_address_get_address
720 (&im->lookup_main, ia);
721 if (ip4_destination_matches_route
722 (im, address, x, ia->address_length) ||
723 ip4_destination_matches_route (im,
728 /* an intf may have >1 addr from the same prefix */
729 if ((sw_if_index == sif->sw_if_index) &&
730 (ia->address_length == address_length) &&
731 (x->as_u32 != address->as_u32))
734 /* error if the length or intf was different */
735 vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
739 ("failed to add %U on %U which conflicts with %U for interface %U",
740 format_ip4_address_and_length, address,
742 format_vnet_sw_if_index_name, vnm,
744 format_ip4_address_and_length, x,
746 format_vnet_sw_if_index_name, vnm,
755 elts_before = pool_elts (lm->if_address_pool);
757 error = ip_interface_address_add_del
758 (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
762 ip4_sw_interface_enable_disable (sw_if_index, !is_del);
764 /* intf addr routes are added/deleted on admin up/down */
765 if (vnet_sw_interface_is_admin_up (vnm, sw_if_index))
768 ip4_del_interface_routes (sw_if_index,
769 im, ip4_af.fib_index, address,
772 ip4_add_interface_routes (sw_if_index,
773 im, ip4_af.fib_index,
775 (lm->if_address_pool, if_address_index));
778 /* If pool did not grow/shrink: add duplicate address. */
779 if (elts_before != pool_elts (lm->if_address_pool))
781 ip4_add_del_interface_address_callback_t *cb;
782 vec_foreach (cb, im->add_del_interface_address_callbacks)
783 cb->function (im, cb->function_opaque, sw_if_index,
784 address, address_length, if_address_index, is_del);
793 ip4_add_del_interface_address (vlib_main_t * vm,
795 ip4_address_t * address,
796 u32 address_length, u32 is_del)
798 return ip4_add_del_interface_address_internal
799 (vm, sw_if_index, address, address_length, is_del);
803 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
805 ip_interface_address_t *ia;
811 * when directed broadcast is enabled, the subnet braodcast route will forward
812 * packets using an adjacency with a broadcast MAC. otherwise it drops
815 foreach_ip_interface_address(&im->lookup_main, ia,
818 if (ia->address_length <= 30)
822 ipa = ip_interface_address_get_address (&im->lookup_main, ia);
826 .fp_proto = FIB_PROTOCOL_IP4,
828 .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
832 ip4_add_subnet_bcast_route
833 (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
842 static clib_error_t *
843 ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
845 ip4_main_t *im = &ip4_main;
846 ip_interface_address_t *ia;
848 u32 is_admin_up, fib_index;
850 /* Fill in lookup tables with default table (0). */
851 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
853 vec_validate_init_empty (im->
854 lookup_main.if_address_pool_index_by_sw_if_index,
857 is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
859 fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
862 foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
863 0 /* honor unnumbered */,
865 a = ip_interface_address_get_address (&im->lookup_main, ia);
867 ip4_add_interface_routes (sw_if_index,
871 ip4_del_interface_routes (sw_if_index,
873 a, ia->address_length);
880 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
882 /* Built-in ip4 unicast rx feature path definition */
884 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
886 .arc_name = "ip4-unicast",
887 .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
888 .last_in_arc = "ip4-lookup",
889 .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
892 VNET_FEATURE_INIT (ip4_flow_classify, static) =
894 .arc_name = "ip4-unicast",
895 .node_name = "ip4-flow-classify",
896 .runs_before = VNET_FEATURES ("ip4-inacl"),
899 VNET_FEATURE_INIT (ip4_inacl, static) =
901 .arc_name = "ip4-unicast",
902 .node_name = "ip4-inacl",
903 .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
906 VNET_FEATURE_INIT (ip4_source_check_1, static) =
908 .arc_name = "ip4-unicast",
909 .node_name = "ip4-source-check-via-rx",
910 .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
913 VNET_FEATURE_INIT (ip4_source_check_2, static) =
915 .arc_name = "ip4-unicast",
916 .node_name = "ip4-source-check-via-any",
917 .runs_before = VNET_FEATURES ("ip4-policer-classify"),
920 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
922 .arc_name = "ip4-unicast",
923 .node_name = "ip4-source-and-port-range-check-rx",
924 .runs_before = VNET_FEATURES ("ip4-policer-classify"),
927 VNET_FEATURE_INIT (ip4_policer_classify, static) =
929 .arc_name = "ip4-unicast",
930 .node_name = "ip4-policer-classify",
931 .runs_before = VNET_FEATURES ("ipsec4-input-feature"),
934 VNET_FEATURE_INIT (ip4_ipsec, static) =
936 .arc_name = "ip4-unicast",
937 .node_name = "ipsec4-input-feature",
938 .runs_before = VNET_FEATURES ("vpath-input-ip4"),
941 VNET_FEATURE_INIT (ip4_vpath, static) =
943 .arc_name = "ip4-unicast",
944 .node_name = "vpath-input-ip4",
945 .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
948 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
950 .arc_name = "ip4-unicast",
951 .node_name = "ip4-vxlan-bypass",
952 .runs_before = VNET_FEATURES ("ip4-lookup"),
955 VNET_FEATURE_INIT (ip4_not_enabled, static) =
957 .arc_name = "ip4-unicast",
958 .node_name = "ip4-not-enabled",
959 .runs_before = VNET_FEATURES ("ip4-lookup"),
962 VNET_FEATURE_INIT (ip4_lookup, static) =
964 .arc_name = "ip4-unicast",
965 .node_name = "ip4-lookup",
966 .runs_before = 0, /* not before any other features */
969 /* Built-in ip4 multicast rx feature path definition */
970 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
972 .arc_name = "ip4-multicast",
973 .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
974 .last_in_arc = "ip4-mfib-forward-lookup",
975 .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
978 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
980 .arc_name = "ip4-multicast",
981 .node_name = "vpath-input-ip4",
982 .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
985 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
987 .arc_name = "ip4-multicast",
988 .node_name = "ip4-not-enabled",
989 .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
992 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
994 .arc_name = "ip4-multicast",
995 .node_name = "ip4-mfib-forward-lookup",
996 .runs_before = 0, /* last feature */
999 /* Source and port-range check ip4 tx feature path definition */
1000 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1002 .arc_name = "ip4-output",
1003 .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
1004 .last_in_arc = "interface-output",
1005 .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1008 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1010 .arc_name = "ip4-output",
1011 .node_name = "ip4-source-and-port-range-check-tx",
1012 .runs_before = VNET_FEATURES ("ip4-outacl"),
1015 VNET_FEATURE_INIT (ip4_outacl, static) =
1017 .arc_name = "ip4-output",
1018 .node_name = "ip4-outacl",
1019 .runs_before = VNET_FEATURES ("ipsec4-output-feature"),
1022 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1024 .arc_name = "ip4-output",
1025 .node_name = "ipsec4-output-feature",
1026 .runs_before = VNET_FEATURES ("interface-output"),
1029 /* Built-in ip4 tx feature path definition */
1030 VNET_FEATURE_INIT (ip4_interface_output, static) =
1032 .arc_name = "ip4-output",
1033 .node_name = "interface-output",
1034 .runs_before = 0, /* not before any other features */
1038 static clib_error_t *
1039 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1041 ip4_main_t *im = &ip4_main;
1043 /* Fill in lookup tables with default table (0). */
1044 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1045 vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
1049 ip4_main_t *im4 = &ip4_main;
1050 ip_lookup_main_t *lm4 = &im4->lookup_main;
1051 ip_interface_address_t *ia = 0;
1052 ip4_address_t *address;
1053 vlib_main_t *vm = vlib_get_main ();
1055 vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
1057 foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
1059 address = ip_interface_address_get_address (lm4, ia);
1060 ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
1065 vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
1068 vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
1069 sw_if_index, is_add, 0, 0);
1071 return /* no error */ 0;
1074 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1076 /* Global IP4 main. */
1077 #ifndef CLIB_MARCH_VARIANT
1078 ip4_main_t ip4_main;
1079 #endif /* CLIB_MARCH_VARIANT */
1081 static clib_error_t *
1082 ip4_lookup_init (vlib_main_t * vm)
1084 ip4_main_t *im = &ip4_main;
1085 clib_error_t *error;
1088 if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1090 if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
1092 if ((error = vlib_call_init_function (vm, fib_module_init)))
1094 if ((error = vlib_call_init_function (vm, mfib_module_init)))
1097 for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1102 m = pow2_mask (i) << (32 - i);
1105 im->fib_masks[i] = clib_host_to_net_u32 (m);
1108 ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1110 /* Create FIB with index 0 and table id of 0. */
1111 fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1112 FIB_SOURCE_DEFAULT_ROUTE);
1113 mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1114 MFIB_SOURCE_DEFAULT_ROUTE);
1118 pn = pg_get_node (ip4_lookup_node.index);
1119 pn->unformat_edit = unformat_pg_ip4_header;
1123 ethernet_arp_header_t h;
1125 clib_memset (&h, 0, sizeof (h));
1127 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1128 #define _8(f,v) h.f = v;
1129 _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1130 _16 (l3_type, ETHERNET_TYPE_IP4);
1131 _8 (n_l2_address_bytes, 6);
1132 _8 (n_l3_address_bytes, 4);
1133 _16 (opcode, ETHERNET_ARP_OPCODE_request);
1137 vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1140 /* alloc chunk size */ 8,
1147 VLIB_INIT_FUNCTION (ip4_lookup_init);
1151 /* Adjacency taken. */
1156 /* Packet data, possibly *after* rewrite. */
1157 u8 packet_data[64 - 1 * sizeof (u32)];
1159 ip4_forward_next_trace_t;
1161 #ifndef CLIB_MARCH_VARIANT
1163 format_ip4_forward_next_trace (u8 * s, va_list * args)
1165 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1166 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1167 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1168 u32 indent = format_get_indent (s);
1169 s = format (s, "%U%U",
1170 format_white_space, indent,
1171 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1177 format_ip4_lookup_trace (u8 * s, va_list * args)
1179 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1180 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1181 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1182 u32 indent = format_get_indent (s);
1184 s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1185 t->fib_index, t->dpo_index, t->flow_hash);
1186 s = format (s, "\n%U%U",
1187 format_white_space, indent,
1188 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1193 format_ip4_rewrite_trace (u8 * s, va_list * args)
1195 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1196 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1197 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1198 u32 indent = format_get_indent (s);
1200 s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1201 t->fib_index, t->dpo_index, format_ip_adjacency,
1202 t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1203 s = format (s, "\n%U%U",
1204 format_white_space, indent,
1205 format_ip_adjacency_packet_data,
1206 t->dpo_index, t->packet_data, sizeof (t->packet_data));
1210 #ifndef CLIB_MARCH_VARIANT
1211 /* Common trace function for all ip4-forward next nodes. */
1213 ip4_forward_next_trace (vlib_main_t * vm,
1214 vlib_node_runtime_t * node,
1215 vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1218 ip4_main_t *im = &ip4_main;
1220 n_left = frame->n_vectors;
1221 from = vlib_frame_vector_args (frame);
1226 vlib_buffer_t *b0, *b1;
1227 ip4_forward_next_trace_t *t0, *t1;
1229 /* Prefetch next iteration. */
1230 vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1231 vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1236 b0 = vlib_get_buffer (vm, bi0);
1237 b1 = vlib_get_buffer (vm, bi1);
1239 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1241 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1242 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1243 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1245 (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1246 (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1247 vec_elt (im->fib_index_by_sw_if_index,
1248 vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1250 clib_memcpy_fast (t0->packet_data,
1251 vlib_buffer_get_current (b0),
1252 sizeof (t0->packet_data));
1254 if (b1->flags & VLIB_BUFFER_IS_TRACED)
1256 t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1257 t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1258 t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1260 (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1261 (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1262 vec_elt (im->fib_index_by_sw_if_index,
1263 vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1264 clib_memcpy_fast (t1->packet_data, vlib_buffer_get_current (b1),
1265 sizeof (t1->packet_data));
1275 ip4_forward_next_trace_t *t0;
1279 b0 = vlib_get_buffer (vm, bi0);
1281 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1283 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1284 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1285 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1287 (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1288 (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1289 vec_elt (im->fib_index_by_sw_if_index,
1290 vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1291 clib_memcpy_fast (t0->packet_data, vlib_buffer_get_current (b0),
1292 sizeof (t0->packet_data));
1299 /* Compute TCP/UDP/ICMP4 checksum in software. */
1301 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1305 u32 ip_header_length, payload_length_host_byte_order;
1306 u32 n_this_buffer, n_bytes_left, n_ip_bytes_this_buffer;
1308 u8 *data_this_buffer;
1311 /* Initialize checksum with ip header. */
1312 ip_header_length = ip4_header_bytes (ip0);
1313 payload_length_host_byte_order =
1314 clib_net_to_host_u16 (ip0->length) - ip_header_length;
1316 clib_host_to_net_u32 (payload_length_host_byte_order +
1317 (ip0->protocol << 16));
1319 if (BITS (uword) == 32)
1322 ip_csum_with_carry (sum0,
1323 clib_mem_unaligned (&ip0->src_address, u32));
1325 ip_csum_with_carry (sum0,
1326 clib_mem_unaligned (&ip0->dst_address, u32));
1330 ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1332 n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1333 data_this_buffer = (u8 *) ip0 + ip_header_length;
1334 n_ip_bytes_this_buffer =
1335 p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data);
1336 if (n_this_buffer + ip_header_length > n_ip_bytes_this_buffer)
1338 n_this_buffer = n_ip_bytes_this_buffer > ip_header_length ?
1339 n_ip_bytes_this_buffer - ip_header_length : 0;
1344 sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1345 n_bytes_left -= n_this_buffer;
1346 if (n_bytes_left == 0)
1349 ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1350 if (!(p0->flags & VLIB_BUFFER_NEXT_PRESENT))
1353 length_odd = (n_this_buffer & 1);
1355 p0 = vlib_get_buffer (vm, p0->next_buffer);
1356 data_this_buffer = vlib_buffer_get_current (p0);
1357 n_this_buffer = clib_min (p0->current_length, n_bytes_left);
1359 if (PREDICT_FALSE (length_odd))
1361 /* Prepend a 0 or the resulting checksum will be incorrect. */
1365 data_this_buffer[0] = 0;
1369 sum16 = ~ip_csum_fold (sum0);
1374 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1376 ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1380 ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1381 || ip0->protocol == IP_PROTOCOL_UDP);
1383 udp0 = (void *) (ip0 + 1);
1384 if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1386 p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1387 | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1391 sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1393 p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1394 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1401 VNET_FEATURE_ARC_INIT (ip4_local) =
1403 .arc_name = "ip4-local",
1404 .start_nodes = VNET_FEATURES ("ip4-local"),
1405 .last_in_arc = "ip4-local-end-of-arc",
1410 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1411 ip4_header_t * ip, u8 is_udp, u8 * error,
1415 flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1416 *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1420 u32 ip_len, udp_len;
1422 udp = ip4_next_header (ip);
1423 /* Verify UDP length. */
1424 ip_len = clib_net_to_host_u16 (ip->length);
1425 udp_len = clib_net_to_host_u16 (udp->length);
1427 len_diff = ip_len - udp_len;
1428 *good_tcp_udp &= len_diff >= 0;
1429 *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1433 #define ip4_local_csum_is_offloaded(_b) \
1434 _b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM \
1435 || _b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM
1437 #define ip4_local_need_csum_check(is_tcp_udp, _b) \
1438 (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED \
1439 || ip4_local_csum_is_offloaded (_b)))
1441 #define ip4_local_csum_is_valid(_b) \
1442 (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT \
1443 || (ip4_local_csum_is_offloaded (_b))) != 0
1446 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1447 ip4_header_t * ih, u8 * error)
1449 u8 is_udp, is_tcp_udp, good_tcp_udp;
1451 is_udp = ih->protocol == IP_PROTOCOL_UDP;
1452 is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1454 if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1455 ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1457 good_tcp_udp = ip4_local_csum_is_valid (b);
1459 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1460 *error = (is_tcp_udp && !good_tcp_udp
1461 ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1465 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1466 ip4_header_t ** ih, u8 * error)
1468 u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1470 is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1471 is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1473 is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1474 is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1476 good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1477 good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1479 if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1480 || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1483 ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1486 ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1490 error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1491 IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1492 error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1493 IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1497 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1498 vlib_buffer_t * b, u16 * next, u8 error,
1499 u8 head_of_feature_arc)
1501 u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1504 *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1505 b->error = error ? error_node->errors[error] : 0;
1506 if (head_of_feature_arc)
1509 if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1511 vnet_feature_arc_start (arc_index,
1512 vnet_buffer (b)->sw_if_index[VLIB_RX],
1525 } ip4_local_last_check_t;
1528 ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
1529 ip4_local_last_check_t * last_check, u8 * error0)
1531 ip4_fib_mtrie_leaf_t leaf0;
1532 ip4_fib_mtrie_t *mtrie0;
1533 const dpo_id_t *dpo0;
1534 load_balance_t *lb0;
1537 vnet_buffer (b)->ip.fib_index =
1538 vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1539 vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1542 * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1543 * adjacency for the destination address (the local interface address).
1544 * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1545 * adjacency for the source address (the remote sender's address)
1547 if (PREDICT_FALSE (last_check->first ||
1548 (last_check->src.as_u32 != ip0->src_address.as_u32)))
1550 mtrie0 = &ip4_fib_get (vnet_buffer (b)->ip.fib_index)->mtrie;
1551 leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1552 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1553 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1554 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1556 vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1557 vnet_buffer (b)->ip.adj_index[VLIB_TX];
1558 vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1560 lb0 = load_balance_get (lbi0);
1561 dpo0 = load_balance_get_bucket_i (lb0, 0);
1564 * Must have a route to source otherwise we drop the packet.
1565 * ip4 broadcasts are accepted, e.g. to make dhcp client work
1568 * - the source is a recieve => it's from us => bogus, do this
1569 * first since it sets a different error code.
1570 * - uRPF check for any route to source - accept if passes.
1571 * - allow packets destined to the broadcast address from unknown sources
1574 *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1575 && dpo0->dpoi_type == DPO_RECEIVE) ?
1576 IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1577 *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1578 && !fib_urpf_check_size (lb0->lb_urpf)
1579 && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1580 IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1582 last_check->src.as_u32 = ip0->src_address.as_u32;
1583 last_check->lbi = lbi0;
1584 last_check->error = *error0;
1588 vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1589 vnet_buffer (b)->ip.adj_index[VLIB_TX];
1590 vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1591 *error0 = last_check->error;
1592 last_check->first = 0;
1597 ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
1598 ip4_local_last_check_t * last_check, u8 * error)
1600 ip4_fib_mtrie_leaf_t leaf[2];
1601 ip4_fib_mtrie_t *mtrie[2];
1602 const dpo_id_t *dpo[2];
1603 load_balance_t *lb[2];
1607 not_last_hit = last_check->first;
1608 not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1609 not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1611 vnet_buffer (b[0])->ip.fib_index =
1612 vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1613 vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1614 vnet_buffer (b[0])->ip.fib_index;
1616 vnet_buffer (b[1])->ip.fib_index =
1617 vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1618 vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1619 vnet_buffer (b[1])->ip.fib_index;
1622 * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1623 * adjacency for the destination address (the local interface address).
1624 * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1625 * adjacency for the source address (the remote sender's address)
1627 if (PREDICT_FALSE (not_last_hit))
1629 mtrie[0] = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie;
1630 mtrie[1] = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie;
1632 leaf[0] = ip4_fib_mtrie_lookup_step_one (mtrie[0], &ip[0]->src_address);
1633 leaf[1] = ip4_fib_mtrie_lookup_step_one (mtrie[1], &ip[1]->src_address);
1635 leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1636 &ip[0]->src_address, 2);
1637 leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1638 &ip[1]->src_address, 2);
1640 leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1641 &ip[0]->src_address, 3);
1642 leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1643 &ip[1]->src_address, 3);
1645 lbi[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf[0]);
1646 lbi[1] = ip4_fib_mtrie_leaf_get_adj_index (leaf[1]);
1648 vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1649 vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1650 vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1652 vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1653 vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1654 vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1656 lb[0] = load_balance_get (lbi[0]);
1657 lb[1] = load_balance_get (lbi[1]);
1659 dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1660 dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1662 error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1663 dpo[0]->dpoi_type == DPO_RECEIVE) ?
1664 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1665 error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1666 !fib_urpf_check_size (lb[0]->lb_urpf) &&
1667 ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1668 ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1670 error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1671 dpo[1]->dpoi_type == DPO_RECEIVE) ?
1672 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1673 error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1674 !fib_urpf_check_size (lb[1]->lb_urpf) &&
1675 ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1676 ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1678 last_check->src.as_u32 = ip[1]->src_address.as_u32;
1679 last_check->lbi = lbi[1];
1680 last_check->error = error[1];
1684 vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1685 vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1686 vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1688 vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1689 vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1690 vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1692 error[0] = last_check->error;
1693 error[1] = last_check->error;
1694 last_check->first = 0;
1698 enum ip_local_packet_type_e
1700 IP_LOCAL_PACKET_TYPE_L4,
1701 IP_LOCAL_PACKET_TYPE_NAT,
1702 IP_LOCAL_PACKET_TYPE_FRAG,
1706 * Determine packet type and next node.
1708 * The expectation is that all packets that are not L4 will skip
1709 * checksums and source checks.
1712 ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
1714 ip_lookup_main_t *lm = &ip4_main.lookup_main;
1716 if (PREDICT_FALSE (ip4_is_fragment (ip)))
1718 *next = IP_LOCAL_NEXT_REASSEMBLY;
1719 return IP_LOCAL_PACKET_TYPE_FRAG;
1721 if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_IS_NATED))
1723 *next = lm->local_next_by_ip_protocol[ip->protocol];
1724 return IP_LOCAL_PACKET_TYPE_NAT;
1727 *next = lm->local_next_by_ip_protocol[ip->protocol];
1728 return IP_LOCAL_PACKET_TYPE_L4;
1732 ip4_local_inline (vlib_main_t * vm,
1733 vlib_node_runtime_t * node,
1734 vlib_frame_t * frame, int head_of_feature_arc)
1736 u32 *from, n_left_from;
1737 vlib_node_runtime_t *error_node =
1738 vlib_node_get_runtime (vm, ip4_input_node.index);
1739 u16 nexts[VLIB_FRAME_SIZE], *next;
1740 vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1741 ip4_header_t *ip[2];
1744 ip4_local_last_check_t last_check = {
1746 * 0.0.0.0 can appear as the source address of an IP packet,
1747 * as can any other address, hence the need to use the 'first'
1748 * member to make sure the .lbi is initialised for the first
1751 .src = {.as_u32 = 0},
1753 .error = IP4_ERROR_UNKNOWN_PROTOCOL,
1757 from = vlib_frame_vector_args (frame);
1758 n_left_from = frame->n_vectors;
1760 if (node->flags & VLIB_NODE_FLAG_TRACE)
1761 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1763 vlib_get_buffers (vm, from, bufs, n_left_from);
1767 while (n_left_from >= 6)
1771 /* Prefetch next iteration. */
1773 vlib_prefetch_buffer_header (b[4], LOAD);
1774 vlib_prefetch_buffer_header (b[5], LOAD);
1776 CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1777 CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1780 error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1782 ip[0] = vlib_buffer_get_current (b[0]);
1783 ip[1] = vlib_buffer_get_current (b[1]);
1785 vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1786 vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1788 pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1789 pt[1] = ip4_local_classify (b[1], ip[1], &next[1]);
1791 not_batch = pt[0] ^ pt[1];
1793 if (head_of_feature_arc == 0 || (pt[0] && not_batch == 0))
1796 if (PREDICT_TRUE (not_batch == 0))
1798 ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1799 ip4_local_check_src_x2 (b, ip, &last_check, error);
1805 ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1806 ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1810 ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1811 ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
1817 ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1818 head_of_feature_arc);
1819 ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1820 head_of_feature_arc);
1827 while (n_left_from > 0)
1829 error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1831 ip[0] = vlib_buffer_get_current (b[0]);
1832 vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1833 pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1835 if (head_of_feature_arc == 0 || pt[0])
1838 ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1839 ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1843 ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1844 head_of_feature_arc);
1851 vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1852 return frame->n_vectors;
1855 VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1856 vlib_frame_t * frame)
1858 return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1862 VLIB_REGISTER_NODE (ip4_local_node) =
1864 .name = "ip4-local",
1865 .vector_size = sizeof (u32),
1866 .format_trace = format_ip4_forward_next_trace,
1867 .n_next_nodes = IP_LOCAL_N_NEXT,
1870 [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1871 [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1872 [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1873 [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1874 [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-full-reassembly",
1880 VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
1881 vlib_node_runtime_t * node,
1882 vlib_frame_t * frame)
1884 return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1888 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
1889 .name = "ip4-local-end-of-arc",
1890 .vector_size = sizeof (u32),
1892 .format_trace = format_ip4_forward_next_trace,
1893 .sibling_of = "ip4-local",
1896 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1897 .arc_name = "ip4-local",
1898 .node_name = "ip4-local-end-of-arc",
1899 .runs_before = 0, /* not before any other features */
1903 #ifndef CLIB_MARCH_VARIANT
1905 ip4_register_protocol (u32 protocol, u32 node_index)
1907 vlib_main_t *vm = vlib_get_main ();
1908 ip4_main_t *im = &ip4_main;
1909 ip_lookup_main_t *lm = &im->lookup_main;
1911 ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1912 lm->local_next_by_ip_protocol[protocol] =
1913 vlib_node_add_next (vm, ip4_local_node.index, node_index);
1917 ip4_unregister_protocol (u32 protocol)
1919 ip4_main_t *im = &ip4_main;
1920 ip_lookup_main_t *lm = &im->lookup_main;
1922 ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1923 lm->local_next_by_ip_protocol[protocol] = IP_LOCAL_NEXT_PUNT;
1927 static clib_error_t *
1928 show_ip_local_command_fn (vlib_main_t * vm,
1929 unformat_input_t * input, vlib_cli_command_t * cmd)
1931 ip4_main_t *im = &ip4_main;
1932 ip_lookup_main_t *lm = &im->lookup_main;
1935 vlib_cli_output (vm, "Protocols handled by ip4_local");
1936 for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1938 if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1940 u32 node_index = vlib_get_node (vm,
1941 ip4_local_node.index)->
1942 next_nodes[lm->local_next_by_ip_protocol[i]];
1943 vlib_cli_output (vm, "%U: %U", format_ip_protocol, i,
1944 format_vlib_node_name, vm, node_index);
1953 * Display the set of protocols handled by the local IPv4 stack.
1956 * Example of how to display local protocol table:
1957 * @cliexstart{show ip local}
1958 * Protocols handled by ip4_local
1965 VLIB_CLI_COMMAND (show_ip_local, static) =
1967 .path = "show ip local",
1968 .function = show_ip_local_command_fn,
1969 .short_help = "show ip local",
1974 ip4_arp_inline (vlib_main_t * vm,
1975 vlib_node_runtime_t * node,
1976 vlib_frame_t * frame, int is_glean)
1978 vnet_main_t *vnm = vnet_get_main ();
1979 ip4_main_t *im = &ip4_main;
1980 ip_lookup_main_t *lm = &im->lookup_main;
1981 u32 *from, *to_next_drop;
1982 uword n_left_from, n_left_to_next_drop, next_index;
1983 u32 thread_index = vm->thread_index;
1986 if (node->flags & VLIB_NODE_FLAG_TRACE)
1987 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1989 seed = throttle_seed (&im->arp_throttle, thread_index, vlib_time_now (vm));
1991 from = vlib_frame_vector_args (frame);
1992 n_left_from = frame->n_vectors;
1993 next_index = node->cached_next_index;
1994 if (next_index == IP4_ARP_NEXT_DROP)
1995 next_index = IP4_ARP_N_NEXT; /* point to first interface */
1997 while (n_left_from > 0)
1999 vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
2000 to_next_drop, n_left_to_next_drop);
2002 while (n_left_from > 0 && n_left_to_next_drop > 0)
2004 u32 pi0, bi0, adj_index0, sw_if_index0;
2005 ip_adjacency_t *adj0;
2006 vlib_buffer_t *p0, *b0;
2007 ip4_address_t resolve0;
2008 ethernet_arp_header_t *h0;
2009 vnet_hw_interface_t *hw_if0;
2013 p0 = vlib_get_buffer (vm, pi0);
2017 to_next_drop[0] = pi0;
2019 n_left_to_next_drop -= 1;
2021 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2022 adj0 = adj_get (adj_index0);
2026 /* resolve the packet's destination */
2027 ip4_header_t *ip0 = vlib_buffer_get_current (p0);
2028 resolve0 = ip0->dst_address;
2032 /* resolve the incomplete adj */
2033 resolve0 = adj0->sub_type.nbr.next_hop.ip4;
2036 /* combine the address and interface for the hash key */
2037 sw_if_index0 = adj0->rewrite_header.sw_if_index;
2038 r0 = (u64) resolve0.data_u32 << 32;
2041 if (throttle_check (&im->arp_throttle, thread_index, r0, seed))
2043 p0->error = node->errors[IP4_ARP_ERROR_THROTTLED];
2048 * the adj has been updated to a rewrite but the node the DPO that got
2049 * us here hasn't - yet. no big deal. we'll drop while we wait.
2051 if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
2053 p0->error = node->errors[IP4_ARP_ERROR_RESOLVED];
2058 * Can happen if the control-plane is programming tables
2059 * with traffic flowing; at least that's today's lame excuse.
2061 if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
2062 || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
2064 p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2067 /* Send ARP request. */
2069 vlib_packet_template_get_packet (vm,
2070 &im->ip4_arp_request_packet_template,
2072 /* Seems we're out of buffers */
2073 if (PREDICT_FALSE (!h0))
2075 p0->error = node->errors[IP4_ARP_ERROR_NO_BUFFERS];
2079 b0 = vlib_get_buffer (vm, bi0);
2081 /* copy the persistent fields from the original */
2082 clib_memcpy_fast (b0->opaque2, p0->opaque2, sizeof (p0->opaque2));
2084 /* Add rewrite/encap string for ARP packet. */
2085 vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
2087 hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2089 /* Src ethernet address in ARP header. */
2090 mac_address_from_bytes (&h0->ip4_over_ethernet[0].mac,
2091 hw_if0->hw_address);
2094 /* The interface's source address is stashed in the Glean Adj */
2095 h0->ip4_over_ethernet[0].ip4 =
2096 adj0->sub_type.glean.receive_addr.ip4;
2100 /* Src IP address in ARP header. */
2101 if (ip4_src_address_for_packet (lm, sw_if_index0,
2102 &h0->ip4_over_ethernet[0].ip4))
2104 /* No source address available */
2105 p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
2106 vlib_buffer_free (vm, &bi0, 1);
2110 h0->ip4_over_ethernet[1].ip4 = resolve0;
2112 p0->error = node->errors[IP4_ARP_ERROR_REQUEST_SENT];
2114 vlib_buffer_copy_trace_flag (vm, p0, bi0);
2115 VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
2116 vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2118 vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2120 vlib_set_next_frame_buffer (vm, node,
2121 adj0->rewrite_header.next_index, bi0);
2124 vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2127 return frame->n_vectors;
2130 VLIB_NODE_FN (ip4_arp_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2131 vlib_frame_t * frame)
2133 return (ip4_arp_inline (vm, node, frame, 0));
2136 VLIB_NODE_FN (ip4_glean_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2137 vlib_frame_t * frame)
2139 return (ip4_arp_inline (vm, node, frame, 1));
2142 static char *ip4_arp_error_strings[] = {
2143 [IP4_ARP_ERROR_THROTTLED] = "ARP requests throttled",
2144 [IP4_ARP_ERROR_RESOLVED] = "ARP requests resolved",
2145 [IP4_ARP_ERROR_NO_BUFFERS] = "ARP requests out of buffer",
2146 [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2147 [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2148 [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
2152 VLIB_REGISTER_NODE (ip4_arp_node) =
2155 .vector_size = sizeof (u32),
2156 .format_trace = format_ip4_forward_next_trace,
2157 .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2158 .error_strings = ip4_arp_error_strings,
2159 .n_next_nodes = IP4_ARP_N_NEXT,
2162 [IP4_ARP_NEXT_DROP] = "error-drop",
2166 VLIB_REGISTER_NODE (ip4_glean_node) =
2168 .name = "ip4-glean",
2169 .vector_size = sizeof (u32),
2170 .format_trace = format_ip4_forward_next_trace,
2171 .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2172 .error_strings = ip4_arp_error_strings,
2173 .n_next_nodes = IP4_ARP_N_NEXT,
2175 [IP4_ARP_NEXT_DROP] = "error-drop",
2180 #define foreach_notrace_ip4_arp_error \
2186 _(NO_SOURCE_ADDRESS)
2188 static clib_error_t *
2189 arp_notrace_init (vlib_main_t * vm)
2191 vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
2193 /* don't trace ARP request packets */
2195 vnet_pcap_drop_trace_filter_add_del \
2196 (rt->errors[IP4_ARP_ERROR_##a], \
2198 foreach_notrace_ip4_arp_error;
2203 VLIB_INIT_FUNCTION (arp_notrace_init);
2206 #ifndef CLIB_MARCH_VARIANT
2207 /* Send an ARP request to see if given destination is reachable on given interface. */
2209 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index,
2212 vnet_main_t *vnm = vnet_get_main ();
2213 ip4_main_t *im = &ip4_main;
2214 ethernet_arp_header_t *h;
2216 ip_interface_address_t *ia;
2217 ip_adjacency_t *adj;
2218 vnet_hw_interface_t *hi;
2219 vnet_sw_interface_t *si;
2223 u8 unicast_rewrite = 0;
2225 si = vnet_get_sw_interface (vnm, sw_if_index);
2227 if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2229 return clib_error_return (0, "%U: interface %U down",
2230 format_ip4_address, dst,
2231 format_vnet_sw_if_index_name, vnm,
2236 ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2239 vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2240 return clib_error_return
2242 "no matching interface address for destination %U (interface %U)",
2243 format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
2247 h = vlib_packet_template_get_packet (vm,
2248 &im->ip4_arp_request_packet_template,
2252 return clib_error_return (0, "ARP request packet allocation failed");
2254 hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2255 if (PREDICT_FALSE (!hi->hw_address))
2257 return clib_error_return (0, "%U: interface %U do not support ip probe",
2258 format_ip4_address, dst,
2259 format_vnet_sw_if_index_name, vnm,
2263 mac_address_from_bytes (&h->ip4_over_ethernet[0].mac, hi->hw_address);
2265 h->ip4_over_ethernet[0].ip4 = src[0];
2266 h->ip4_over_ethernet[1].ip4 = dst[0];
2268 b = vlib_get_buffer (vm, bi);
2269 vnet_buffer (b)->sw_if_index[VLIB_RX] =
2270 vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2272 ip46_address_t nh = {
2276 ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4,
2277 VNET_LINK_IP4, &nh, sw_if_index);
2280 /* Peer has been previously resolved, retrieve glean adj instead */
2281 if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE)
2284 unicast_rewrite = 1;
2288 ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4,
2289 VNET_LINK_IP4, sw_if_index, &nh);
2294 /* Add encapsulation string for software interface (e.g. ethernet header). */
2295 vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2296 if (unicast_rewrite)
2298 u16 *etype = vlib_buffer_get_current (b) - 2;
2299 etype[0] = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
2301 vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2304 vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2305 u32 *to_next = vlib_frame_vector_args (f);
2308 vlib_put_frame_to_node (vm, hi->output_node_index, f);
2312 return /* no error */ 0;
2318 IP4_REWRITE_NEXT_DROP,
2319 IP4_REWRITE_NEXT_ICMP_ERROR,
2320 IP4_REWRITE_NEXT_FRAGMENT,
2321 IP4_REWRITE_N_NEXT /* Last */
2322 } ip4_rewrite_next_t;
2325 * This bits of an IPv4 address to mask to construct a multicast
2328 #if CLIB_ARCH_IS_BIG_ENDIAN
2329 #define IP4_MCAST_ADDR_MASK 0x007fffff
2331 #define IP4_MCAST_ADDR_MASK 0xffff7f00
2335 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
2336 u16 adj_packet_bytes, bool df, u16 * next, u32 * error)
2338 if (packet_len > adj_packet_bytes)
2340 *error = IP4_ERROR_MTU_EXCEEDED;
2343 icmp4_error_set_vnet_buffer
2344 (b, ICMP4_destination_unreachable,
2345 ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
2347 *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2351 /* IP fragmentation */
2352 ip_frag_set_vnet_buffer (b, adj_packet_bytes,
2353 IP4_FRAG_NEXT_IP4_REWRITE, 0);
2354 *next = IP4_REWRITE_NEXT_FRAGMENT;
2359 /* Decrement TTL & update checksum.
2360 Works either endian, so no need for byte swap. */
2361 static_always_inline void
2362 ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
2367 if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2369 b->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2375 /* Input node should have reject packets with ttl 0. */
2376 ASSERT (ip->ttl > 0);
2378 checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
2379 checksum += checksum >= 0xffff;
2381 ip->checksum = checksum;
2386 * If the ttl drops below 1 when forwarding, generate
2389 if (PREDICT_FALSE (ttl <= 0))
2391 *error = IP4_ERROR_TIME_EXPIRED;
2392 vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2393 icmp4_error_set_vnet_buffer (b, ICMP4_time_exceeded,
2394 ICMP4_time_exceeded_ttl_exceeded_in_transit,
2396 *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2399 /* Verify checksum. */
2400 ASSERT ((ip->checksum == ip4_header_checksum (ip)) ||
2401 (b->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2406 ip4_rewrite_inline_with_gso (vlib_main_t * vm,
2407 vlib_node_runtime_t * node,
2408 vlib_frame_t * frame,
2409 int do_counters, int is_midchain, int is_mcast,
2412 ip_lookup_main_t *lm = &ip4_main.lookup_main;
2413 u32 *from = vlib_frame_vector_args (frame);
2414 vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2415 u16 nexts[VLIB_FRAME_SIZE], *next;
2417 vlib_node_runtime_t *error_node =
2418 vlib_node_get_runtime (vm, ip4_input_node.index);
2420 n_left_from = frame->n_vectors;
2421 u32 thread_index = vm->thread_index;
2423 vlib_get_buffers (vm, from, bufs, n_left_from);
2424 clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
2426 #if (CLIB_N_PREFETCHES >= 8)
2427 if (n_left_from >= 6)
2430 for (i = 2; i < 6; i++)
2431 vlib_prefetch_buffer_header (bufs[i], LOAD);
2436 while (n_left_from >= 8)
2438 ip_adjacency_t *adj0, *adj1;
2439 ip4_header_t *ip0, *ip1;
2440 u32 rw_len0, error0, adj_index0;
2441 u32 rw_len1, error1, adj_index1;
2442 u32 tx_sw_if_index0, tx_sw_if_index1;
2445 vlib_prefetch_buffer_header (b[6], LOAD);
2446 vlib_prefetch_buffer_header (b[7], LOAD);
2448 adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2449 adj_index1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
2452 * pre-fetch the per-adjacency counters
2456 vlib_prefetch_combined_counter (&adjacency_counters,
2457 thread_index, adj_index0);
2458 vlib_prefetch_combined_counter (&adjacency_counters,
2459 thread_index, adj_index1);
2462 ip0 = vlib_buffer_get_current (b[0]);
2463 ip1 = vlib_buffer_get_current (b[1]);
2465 error0 = error1 = IP4_ERROR_NONE;
2467 ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2468 ip4_ttl_and_checksum_check (b[1], ip1, next + 1, &error1);
2470 /* Rewrite packet header and updates lengths. */
2471 adj0 = adj_get (adj_index0);
2472 adj1 = adj_get (adj_index1);
2474 /* Worth pipelining. No guarantee that adj0,1 are hot... */
2475 rw_len0 = adj0[0].rewrite_header.data_bytes;
2476 rw_len1 = adj1[0].rewrite_header.data_bytes;
2477 vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2478 vnet_buffer (b[1])->ip.save_rewrite_length = rw_len1;
2480 p = vlib_buffer_get_current (b[2]);
2481 CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2482 CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2484 p = vlib_buffer_get_current (b[3]);
2485 CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2486 CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2488 /* Check MTU of outgoing interface. */
2489 u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2490 u16 ip1_len = clib_net_to_host_u16 (ip1->length);
2492 if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO))
2493 ip0_len = gso_mtu_sz (b[0]);
2494 if (do_gso && (b[1]->flags & VNET_BUFFER_F_GSO))
2495 ip1_len = gso_mtu_sz (b[1]);
2497 ip4_mtu_check (b[0], ip0_len,
2498 adj0[0].rewrite_header.max_l3_packet_bytes,
2499 ip0->flags_and_fragment_offset &
2500 clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2502 ip4_mtu_check (b[1], ip1_len,
2503 adj1[0].rewrite_header.max_l3_packet_bytes,
2504 ip1->flags_and_fragment_offset &
2505 clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2510 error0 = ((adj0[0].rewrite_header.sw_if_index ==
2511 vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2512 IP4_ERROR_SAME_INTERFACE : error0);
2513 error1 = ((adj1[0].rewrite_header.sw_if_index ==
2514 vnet_buffer (b[1])->sw_if_index[VLIB_RX]) ?
2515 IP4_ERROR_SAME_INTERFACE : error1);
2518 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2519 * to see the IP header */
2520 if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2522 u32 next_index = adj0[0].rewrite_header.next_index;
2523 vlib_buffer_advance (b[0], -(word) rw_len0);
2524 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2525 vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2528 (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2529 vnet_feature_arc_start (lm->output_feature_arc_index,
2530 tx_sw_if_index0, &next_index, b[0]);
2531 next[0] = next_index;
2535 b[0]->error = error_node->errors[error0];
2537 if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2539 u32 next_index = adj1[0].rewrite_header.next_index;
2540 vlib_buffer_advance (b[1], -(word) rw_len1);
2542 tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2543 vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2546 (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2547 vnet_feature_arc_start (lm->output_feature_arc_index,
2548 tx_sw_if_index1, &next_index, b[1]);
2549 next[1] = next_index;
2553 b[1]->error = error_node->errors[error1];
2557 calc_checksums (vm, b[0]);
2558 calc_checksums (vm, b[1]);
2560 /* Guess we are only writing on simple Ethernet header. */
2561 vnet_rewrite_two_headers (adj0[0], adj1[0],
2562 ip0, ip1, sizeof (ethernet_header_t));
2565 * Bump the per-adjacency counters
2569 vlib_increment_combined_counter
2570 (&adjacency_counters,
2572 adj_index0, 1, vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2574 vlib_increment_combined_counter
2575 (&adjacency_counters,
2577 adj_index1, 1, vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
2582 if (adj0->sub_type.midchain.fixup_func)
2583 adj0->sub_type.midchain.fixup_func
2584 (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2585 if (adj1->sub_type.midchain.fixup_func)
2586 adj1->sub_type.midchain.fixup_func
2587 (vm, adj1, b[1], adj1->sub_type.midchain.fixup_data);
2593 * copy bytes from the IP address into the MAC rewrite
2595 vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2596 adj0->rewrite_header.dst_mcast_offset,
2597 &ip0->dst_address.as_u32, (u8 *) ip0);
2598 vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2599 adj1->rewrite_header.dst_mcast_offset,
2600 &ip1->dst_address.as_u32, (u8 *) ip1);
2607 #elif (CLIB_N_PREFETCHES >= 4)
2610 while (n_left_from >= 1)
2612 ip_adjacency_t *adj0;
2614 u32 rw_len0, error0, adj_index0;
2615 u32 tx_sw_if_index0;
2618 /* Prefetch next iteration */
2619 if (PREDICT_TRUE (n_left_from >= 4))
2621 ip_adjacency_t *adj2;
2624 vlib_prefetch_buffer_header (b[3], LOAD);
2625 vlib_prefetch_buffer_data (b[2], LOAD);
2627 /* Prefetch adj->rewrite_header */
2628 adj_index2 = vnet_buffer (b[2])->ip.adj_index[VLIB_TX];
2629 adj2 = adj_get (adj_index2);
2631 CLIB_PREFETCH (p + CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES,
2635 adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2638 * Prefetch the per-adjacency counters
2642 vlib_prefetch_combined_counter (&adjacency_counters,
2643 thread_index, adj_index0);
2646 ip0 = vlib_buffer_get_current (b[0]);
2648 error0 = IP4_ERROR_NONE;
2650 ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2652 /* Rewrite packet header and updates lengths. */
2653 adj0 = adj_get (adj_index0);
2655 /* Rewrite header was prefetched. */
2656 rw_len0 = adj0[0].rewrite_header.data_bytes;
2657 vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2659 /* Check MTU of outgoing interface. */
2660 u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2662 if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO))
2663 ip0_len = gso_mtu_sz (b[0]);
2665 ip4_mtu_check (b[0], ip0_len,
2666 adj0[0].rewrite_header.max_l3_packet_bytes,
2667 ip0->flags_and_fragment_offset &
2668 clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2673 error0 = ((adj0[0].rewrite_header.sw_if_index ==
2674 vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2675 IP4_ERROR_SAME_INTERFACE : error0);
2678 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2679 * to see the IP header */
2680 if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2682 u32 next_index = adj0[0].rewrite_header.next_index;
2683 vlib_buffer_advance (b[0], -(word) rw_len0);
2684 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2685 vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2688 (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2689 vnet_feature_arc_start (lm->output_feature_arc_index,
2690 tx_sw_if_index0, &next_index, b[0]);
2691 next[0] = next_index;
2695 b[0]->error = error_node->errors[error0];
2699 calc_checksums (vm, b[0]);
2701 /* Guess we are only writing on simple Ethernet header. */
2702 vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2705 * Bump the per-adjacency counters
2709 vlib_increment_combined_counter
2710 (&adjacency_counters,
2712 adj_index0, 1, vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2717 if (adj0->sub_type.midchain.fixup_func)
2718 adj0->sub_type.midchain.fixup_func
2719 (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2725 * copy bytes from the IP address into the MAC rewrite
2727 vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2728 adj0->rewrite_header.dst_mcast_offset,
2729 &ip0->dst_address.as_u32, (u8 *) ip0);
2738 while (n_left_from > 0)
2740 ip_adjacency_t *adj0;
2742 u32 rw_len0, adj_index0, error0;
2743 u32 tx_sw_if_index0;
2745 adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2747 adj0 = adj_get (adj_index0);
2750 vlib_prefetch_combined_counter (&adjacency_counters,
2751 thread_index, adj_index0);
2753 ip0 = vlib_buffer_get_current (b[0]);
2755 error0 = IP4_ERROR_NONE;
2757 ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2760 /* Update packet buffer attributes/set output interface. */
2761 rw_len0 = adj0[0].rewrite_header.data_bytes;
2762 vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2764 /* Check MTU of outgoing interface. */
2765 u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2766 if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO))
2767 ip0_len = gso_mtu_sz (b[0]);
2769 ip4_mtu_check (b[0], ip0_len,
2770 adj0[0].rewrite_header.max_l3_packet_bytes,
2771 ip0->flags_and_fragment_offset &
2772 clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2777 error0 = ((adj0[0].rewrite_header.sw_if_index ==
2778 vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2779 IP4_ERROR_SAME_INTERFACE : error0);
2782 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2783 * to see the IP header */
2784 if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2786 u32 next_index = adj0[0].rewrite_header.next_index;
2787 vlib_buffer_advance (b[0], -(word) rw_len0);
2788 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2789 vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2792 (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2793 vnet_feature_arc_start (lm->output_feature_arc_index,
2794 tx_sw_if_index0, &next_index, b[0]);
2795 next[0] = next_index;
2799 b[0]->error = error_node->errors[error0];
2803 calc_checksums (vm, b[0]);
2805 /* Guess we are only writing on simple Ethernet header. */
2806 vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2809 vlib_increment_combined_counter
2810 (&adjacency_counters,
2811 thread_index, adj_index0, 1,
2812 vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2816 if (adj0->sub_type.midchain.fixup_func)
2817 adj0->sub_type.midchain.fixup_func
2818 (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2824 * copy bytes from the IP address into the MAC rewrite
2826 vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2827 adj0->rewrite_header.dst_mcast_offset,
2828 &ip0->dst_address.as_u32, (u8 *) ip0);
2837 /* Need to do trace after rewrites to pick up new packet data. */
2838 if (node->flags & VLIB_NODE_FLAG_TRACE)
2839 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2841 vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2842 return frame->n_vectors;
2846 ip4_rewrite_inline (vlib_main_t * vm,
2847 vlib_node_runtime_t * node,
2848 vlib_frame_t * frame,
2849 int do_counters, int is_midchain, int is_mcast)
2851 vnet_main_t *vnm = vnet_get_main ();
2852 if (PREDICT_FALSE (vnm->interface_main.gso_interface_count > 0))
2853 return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2854 is_midchain, is_mcast,
2857 return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2858 is_midchain, is_mcast,
2859 0 /* no do_gso */ );
2863 /** @brief IPv4 rewrite node.
2866 This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2867 header checksum, fetch the ip adjacency, check the outbound mtu,
2868 apply the adjacency rewrite, and send pkts to the adjacency
2869 rewrite header's rewrite_next_index.
2871 @param vm vlib_main_t corresponding to the current thread
2872 @param node vlib_node_runtime_t
2873 @param frame vlib_frame_t whose contents should be dispatched
2875 @par Graph mechanics: buffer metadata, next index usage
2878 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2879 - the rewrite adjacency index
2880 - <code>adj->lookup_next_index</code>
2881 - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2882 the packet will be dropped.
2883 - <code>adj->rewrite_header</code>
2884 - Rewrite string length, rewrite string, next_index
2887 - <code>b->current_data, b->current_length</code>
2888 - Updated net of applying the rewrite string
2890 <em>Next Indices:</em>
2891 - <code> adj->rewrite_header.next_index </code>
2895 VLIB_NODE_FN (ip4_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2896 vlib_frame_t * frame)
2898 if (adj_are_counters_enabled ())
2899 return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2901 return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2904 VLIB_NODE_FN (ip4_rewrite_bcast_node) (vlib_main_t * vm,
2905 vlib_node_runtime_t * node,
2906 vlib_frame_t * frame)
2908 if (adj_are_counters_enabled ())
2909 return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2911 return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2914 VLIB_NODE_FN (ip4_midchain_node) (vlib_main_t * vm,
2915 vlib_node_runtime_t * node,
2916 vlib_frame_t * frame)
2918 if (adj_are_counters_enabled ())
2919 return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2921 return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2924 VLIB_NODE_FN (ip4_rewrite_mcast_node) (vlib_main_t * vm,
2925 vlib_node_runtime_t * node,
2926 vlib_frame_t * frame)
2928 if (adj_are_counters_enabled ())
2929 return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2931 return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2934 VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
2935 vlib_node_runtime_t * node,
2936 vlib_frame_t * frame)
2938 if (adj_are_counters_enabled ())
2939 return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2941 return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2945 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2946 .name = "ip4-rewrite",
2947 .vector_size = sizeof (u32),
2949 .format_trace = format_ip4_rewrite_trace,
2951 .n_next_nodes = IP4_REWRITE_N_NEXT,
2953 [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2954 [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2955 [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2959 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2960 .name = "ip4-rewrite-bcast",
2961 .vector_size = sizeof (u32),
2963 .format_trace = format_ip4_rewrite_trace,
2964 .sibling_of = "ip4-rewrite",
2967 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2968 .name = "ip4-rewrite-mcast",
2969 .vector_size = sizeof (u32),
2971 .format_trace = format_ip4_rewrite_trace,
2972 .sibling_of = "ip4-rewrite",
2975 VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
2976 .name = "ip4-mcast-midchain",
2977 .vector_size = sizeof (u32),
2979 .format_trace = format_ip4_rewrite_trace,
2980 .sibling_of = "ip4-rewrite",
2983 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2984 .name = "ip4-midchain",
2985 .vector_size = sizeof (u32),
2986 .format_trace = format_ip4_forward_next_trace,
2987 .sibling_of = "ip4-rewrite",
2992 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2994 ip4_fib_mtrie_t *mtrie0;
2995 ip4_fib_mtrie_leaf_t leaf0;
2998 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
3000 leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
3001 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
3002 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
3004 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
3006 return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
3009 static clib_error_t *
3010 test_lookup_command_fn (vlib_main_t * vm,
3011 unformat_input_t * input, vlib_cli_command_t * cmd)
3018 ip4_address_t ip4_base_address;
3021 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3023 if (unformat (input, "table %d", &table_id))
3025 /* Make sure the entry exists. */
3026 fib = ip4_fib_get (table_id);
3027 if ((fib) && (fib->index != table_id))
3028 return clib_error_return (0, "<fib-index> %d does not exist",
3031 else if (unformat (input, "count %f", &count))
3034 else if (unformat (input, "%U",
3035 unformat_ip4_address, &ip4_base_address))
3038 return clib_error_return (0, "unknown input `%U'",
3039 format_unformat_error, input);
3044 for (i = 0; i < n; i++)
3046 if (!ip4_lookup_validate (&ip4_base_address, table_id))
3049 ip4_base_address.as_u32 =
3050 clib_host_to_net_u32 (1 +
3051 clib_net_to_host_u32 (ip4_base_address.as_u32));
3055 vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
3057 vlib_cli_output (vm, "No errors in %d lookups\n", n);
3063 * Perform a lookup of an IPv4 Address (or range of addresses) in the
3064 * given FIB table to determine if there is a conflict with the
3065 * adjacency table. The fib-id can be determined by using the
3066 * '<em>show ip fib</em>' command. If fib-id is not entered, default value
3069 * @todo This command uses fib-id, other commands use table-id (not
3070 * just a name, they are different indexes). Would like to change this
3071 * to table-id for consistency.
3074 * Example of how to run the test lookup command:
3075 * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
3076 * No errors in 2 lookups
3080 VLIB_CLI_COMMAND (lookup_test_command, static) =
3082 .path = "test lookup",
3083 .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
3084 .function = test_lookup_command_fn,
3088 #ifndef CLIB_MARCH_VARIANT
3090 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3094 fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
3096 if (~0 == fib_index)
3097 return VNET_API_ERROR_NO_SUCH_FIB;
3099 fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
3106 static clib_error_t *
3107 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3108 unformat_input_t * input,
3109 vlib_cli_command_t * cmd)
3113 u32 flow_hash_config = 0;
3116 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3118 if (unformat (input, "table %d", &table_id))
3121 else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3122 foreach_flow_hash_bit
3129 return clib_error_return (0, "unknown input `%U'",
3130 format_unformat_error, input);
3132 rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3138 case VNET_API_ERROR_NO_SUCH_FIB:
3139 return clib_error_return (0, "no such FIB table %d", table_id);
3142 clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3150 * Configure the set of IPv4 fields used by the flow hash.
3153 * Example of how to set the flow hash on a given table:
3154 * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
3155 * Example of display the configured flow hash:
3156 * @cliexstart{show ip fib}
3157 * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
3160 * [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
3161 * [0] [@0]: dpo-drop ip6
3164 * [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
3165 * [0] [@0]: dpo-drop ip6
3168 * [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
3169 * [0] [@0]: dpo-drop ip6
3172 * [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
3173 * [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3176 * [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
3177 * [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3178 * [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3179 * [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3180 * [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3183 * [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
3184 * [0] [@0]: dpo-drop ip6
3185 * 255.255.255.255/32
3187 * [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
3188 * [0] [@0]: dpo-drop ip6
3189 * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
3192 * [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
3193 * [0] [@0]: dpo-drop ip6
3196 * [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
3197 * [0] [@0]: dpo-drop ip6
3200 * [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
3201 * [0] [@4]: ipv4-glean: af_packet0
3204 * [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
3205 * [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
3208 * [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
3209 * [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
3212 * [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
3213 * [0] [@4]: ipv4-glean: af_packet1
3216 * [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
3217 * [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
3220 * [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
3221 * [0] [@0]: dpo-drop ip6
3224 * [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
3225 * [0] [@0]: dpo-drop ip6
3226 * 255.255.255.255/32
3228 * [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
3229 * [0] [@0]: dpo-drop ip6
3233 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
3235 .path = "set ip flow-hash",
3237 "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
3238 .function = set_ip_flow_hash_command_fn,
3242 #ifndef CLIB_MARCH_VARIANT
3244 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3247 vnet_main_t *vnm = vnet_get_main ();
3248 vnet_interface_main_t *im = &vnm->interface_main;
3249 ip4_main_t *ipm = &ip4_main;
3250 ip_lookup_main_t *lm = &ipm->lookup_main;
3251 vnet_classify_main_t *cm = &vnet_classify_main;
3252 ip4_address_t *if_addr;
3254 if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3255 return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3257 if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3258 return VNET_API_ERROR_NO_SUCH_ENTRY;
3260 vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3261 lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
3263 if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
3265 if (NULL != if_addr)
3267 fib_prefix_t pfx = {
3269 .fp_proto = FIB_PROTOCOL_IP4,
3270 .fp_addr.ip4 = *if_addr,
3274 fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
3278 if (table_index != (u32) ~ 0)
3280 dpo_id_t dpo = DPO_INVALID;
3285 classify_dpo_create (DPO_PROTO_IP4, table_index));
3287 fib_table_entry_special_dpo_add (fib_index,
3289 FIB_SOURCE_CLASSIFY,
3290 FIB_ENTRY_FLAG_NONE, &dpo);
3295 fib_table_entry_special_remove (fib_index,
3296 &pfx, FIB_SOURCE_CLASSIFY);
3304 static clib_error_t *
3305 set_ip_classify_command_fn (vlib_main_t * vm,
3306 unformat_input_t * input,
3307 vlib_cli_command_t * cmd)
3309 u32 table_index = ~0;
3310 int table_index_set = 0;
3311 u32 sw_if_index = ~0;
3314 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3316 if (unformat (input, "table-index %d", &table_index))
3317 table_index_set = 1;
3318 else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3319 vnet_get_main (), &sw_if_index))
3325 if (table_index_set == 0)
3326 return clib_error_return (0, "classify table-index must be specified");
3328 if (sw_if_index == ~0)
3329 return clib_error_return (0, "interface / subif must be specified");
3331 rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3338 case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3339 return clib_error_return (0, "No such interface");
3341 case VNET_API_ERROR_NO_SUCH_ENTRY:
3342 return clib_error_return (0, "No such classifier table");
3348 * Assign a classification table to an interface. The classification
3349 * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3350 * commands. Once the table is create, use this command to filter packets
3354 * Example of how to assign a classification table to an interface:
3355 * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3358 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
3360 .path = "set ip classify",
3362 "set ip classify intfc <interface> table-index <classify-idx>",
3363 .function = set_ip_classify_command_fn,
3367 static clib_error_t *
3368 ip4_config (vlib_main_t * vm, unformat_input_t * input)
3370 ip4_main_t *im = &ip4_main;
3373 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3375 if (unformat (input, "heap-size %U", unformat_memory_size, &heapsize))
3378 return clib_error_return (0,
3379 "invalid heap-size parameter `%U'",
3380 format_unformat_error, input);
3383 im->mtrie_heap_size = heapsize;
3388 VLIB_EARLY_CONFIG_FUNCTION (ip4_config, "ip");
3391 * fd.io coding-style-patch-verification: ON
3394 * eval: (c-set-style "gnu")