2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * ip/ip4_forward.c: IP v4 forwarding
18 * Copyright (c) 2008 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h> /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h> /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h> /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h> /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/dpo/load_balance.h>
53 #include <vnet/dpo/load_balance_map.h>
54 #include <vnet/dpo/classify_dpo.h>
55 #include <vnet/mfib/mfib_table.h> /* for mFIB table and entry creation */
57 #include <vnet/ip/ip4_forward.h>
58 #include <vnet/interface_output.h>
60 /** @brief IPv4 lookup node.
63 This is the main IPv4 lookup dispatch node.
65 @param vm vlib_main_t corresponding to the current thread
66 @param node vlib_node_runtime_t
67 @param frame vlib_frame_t whose contents should be dispatched
69 @par Graph mechanics: buffer metadata, next index usage
72 - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
73 - Indicates the @c sw_if_index value of the interface that the
74 packet was received on.
75 - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
76 - When the value is @c ~0 then the node performs a longest prefix
77 match (LPM) for the packet destination address in the FIB attached
78 to the receive interface.
79 - Otherwise perform LPM for the packet destination address in the
80 indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
81 value (0, 1, ...) and not a VRF id.
84 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
85 - The lookup result adjacency index.
88 - Dispatches the packet to the node index found in
89 ip_adjacency_t @c adj->lookup_next_index
90 (where @c adj is the lookup result adjacency).
92 VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
95 return ip4_lookup_inline (vm, node, frame);
98 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
101 VLIB_REGISTER_NODE (ip4_lookup_node) =
103 .name = "ip4-lookup",
104 .vector_size = sizeof (u32),
105 .format_trace = format_ip4_lookup_trace,
106 .n_next_nodes = IP_LOOKUP_N_NEXT,
107 .next_nodes = IP4_LOOKUP_NEXT_NODES,
111 VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
112 vlib_node_runtime_t * node,
113 vlib_frame_t * frame)
115 vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
117 u32 thread_index = vm->thread_index;
118 vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
119 u16 nexts[VLIB_FRAME_SIZE], *next;
121 from = vlib_frame_vector_args (frame);
122 n_left = frame->n_vectors;
125 vlib_get_buffers (vm, from, bufs, n_left);
129 const load_balance_t *lb0, *lb1;
130 const ip4_header_t *ip0, *ip1;
131 u32 lbi0, hc0, lbi1, hc1;
132 const dpo_id_t *dpo0, *dpo1;
134 /* Prefetch next iteration. */
136 vlib_prefetch_buffer_header (b[2], LOAD);
137 vlib_prefetch_buffer_header (b[3], LOAD);
139 CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), LOAD);
140 CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), LOAD);
143 ip0 = vlib_buffer_get_current (b[0]);
144 ip1 = vlib_buffer_get_current (b[1]);
145 lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
146 lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
148 lb0 = load_balance_get (lbi0);
149 lb1 = load_balance_get (lbi1);
152 * this node is for via FIBs we can re-use the hash value from the
153 * to node if present.
154 * We don't want to use the same hash value at each level in the recursion
155 * graph as that would lead to polarisation
159 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
161 if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
163 hc0 = vnet_buffer (b[0])->ip.flow_hash =
164 vnet_buffer (b[0])->ip.flow_hash >> 1;
168 hc0 = vnet_buffer (b[0])->ip.flow_hash =
169 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
171 dpo0 = load_balance_get_fwd_bucket
172 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
176 dpo0 = load_balance_get_bucket_i (lb0, 0);
178 if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
180 if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
182 hc1 = vnet_buffer (b[1])->ip.flow_hash =
183 vnet_buffer (b[1])->ip.flow_hash >> 1;
187 hc1 = vnet_buffer (b[1])->ip.flow_hash =
188 ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
190 dpo1 = load_balance_get_fwd_bucket
191 (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
195 dpo1 = load_balance_get_bucket_i (lb1, 0);
198 next[0] = dpo0->dpoi_next_node;
199 next[1] = dpo1->dpoi_next_node;
201 vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
202 vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
204 vlib_increment_combined_counter
205 (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
206 vlib_increment_combined_counter
207 (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
216 const load_balance_t *lb0;
217 const ip4_header_t *ip0;
218 const dpo_id_t *dpo0;
221 ip0 = vlib_buffer_get_current (b[0]);
222 lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
224 lb0 = load_balance_get (lbi0);
227 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
229 if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
231 hc0 = vnet_buffer (b[0])->ip.flow_hash =
232 vnet_buffer (b[0])->ip.flow_hash >> 1;
236 hc0 = vnet_buffer (b[0])->ip.flow_hash =
237 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
239 dpo0 = load_balance_get_fwd_bucket
240 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
244 dpo0 = load_balance_get_bucket_i (lb0, 0);
247 next[0] = dpo0->dpoi_next_node;
248 vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
250 vlib_increment_combined_counter
251 (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
258 vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
259 if (node->flags & VLIB_NODE_FLAG_TRACE)
260 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
262 return frame->n_vectors;
266 VLIB_REGISTER_NODE (ip4_load_balance_node) =
268 .name = "ip4-load-balance",
269 .vector_size = sizeof (u32),
270 .sibling_of = "ip4-lookup",
271 .format_trace = format_ip4_lookup_trace,
275 #ifndef CLIB_MARCH_VARIANT
276 /* get first interface address */
278 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
279 ip_interface_address_t ** result_ia)
281 ip_lookup_main_t *lm = &im->lookup_main;
282 ip_interface_address_t *ia = 0;
283 ip4_address_t *result = 0;
286 foreach_ip_interface_address
287 (lm, ia, sw_if_index,
288 1 /* honor unnumbered */ ,
291 ip_interface_address_get_address (lm, ia);
297 *result_ia = result ? ia : 0;
303 ip4_add_subnet_bcast_route (u32 fib_index,
307 vnet_sw_interface_flags_t iflags;
309 iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
311 fib_table_entry_special_remove(fib_index,
313 FIB_SOURCE_INTERFACE);
315 if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
317 fib_table_entry_update_one_path (fib_index, pfx,
318 FIB_SOURCE_INTERFACE,
321 /* No next-hop address */
327 // no out-label stack
329 FIB_ROUTE_PATH_FLAG_NONE);
333 fib_table_entry_special_add(fib_index,
335 FIB_SOURCE_INTERFACE,
336 (FIB_ENTRY_FLAG_DROP |
337 FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
342 ip4_add_interface_prefix_routes (ip4_main_t *im,
345 ip_interface_address_t * a)
347 ip_lookup_main_t *lm = &im->lookup_main;
348 ip_interface_prefix_t *if_prefix;
349 ip4_address_t *address = ip_interface_address_get_address (lm, a);
351 ip_interface_prefix_key_t key = {
353 .fp_len = a->address_length,
354 .fp_proto = FIB_PROTOCOL_IP4,
355 .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[a->address_length],
357 .sw_if_index = sw_if_index,
360 fib_prefix_t pfx_special = {
361 .fp_proto = FIB_PROTOCOL_IP4,
364 /* If prefix already set on interface, just increment ref count & return */
365 if_prefix = ip_get_interface_prefix (lm, &key);
368 if_prefix->ref_count += 1;
372 /* New prefix - allocate a pool entry, initialize it, add to the hash */
373 pool_get (lm->if_prefix_pool, if_prefix);
374 if_prefix->ref_count = 1;
375 if_prefix->src_ia_index = a - lm->if_address_pool;
376 clib_memcpy (&if_prefix->key, &key, sizeof (key));
377 mhash_set (&lm->prefix_to_if_prefix_index, &key,
378 if_prefix - lm->if_prefix_pool, 0 /* old value */);
380 /* length <= 30 - add glean, drop first address, maybe drop bcast address */
381 if (a->address_length <= 30)
383 pfx_special.fp_len = a->address_length;
384 pfx_special.fp_addr.ip4.as_u32 = address->as_u32;
386 /* set the glean route for the prefix */
387 fib_table_entry_update_one_path (fib_index, &pfx_special,
388 FIB_SOURCE_INTERFACE,
389 (FIB_ENTRY_FLAG_CONNECTED |
390 FIB_ENTRY_FLAG_ATTACHED),
392 /* No next-hop address */
395 /* invalid FIB index */
398 /* no out-label stack */
400 FIB_ROUTE_PATH_FLAG_NONE);
402 /* set a drop route for the base address of the prefix */
403 pfx_special.fp_len = 32;
404 pfx_special.fp_addr.ip4.as_u32 =
405 address->as_u32 & im->fib_masks[a->address_length];
407 if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
408 fib_table_entry_special_add (fib_index, &pfx_special,
409 FIB_SOURCE_INTERFACE,
410 (FIB_ENTRY_FLAG_DROP |
411 FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
413 /* set a route for the broadcast address of the prefix */
414 pfx_special.fp_len = 32;
415 pfx_special.fp_addr.ip4.as_u32 =
416 address->as_u32 | ~im->fib_masks[a->address_length];
417 if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
418 ip4_add_subnet_bcast_route (fib_index, &pfx_special, sw_if_index);
422 /* length == 31 - add an attached route for the other address */
423 else if (a->address_length == 31)
425 pfx_special.fp_len = 32;
426 pfx_special.fp_addr.ip4.as_u32 =
427 address->as_u32 ^ clib_host_to_net_u32(1);
429 fib_table_entry_update_one_path (fib_index, &pfx_special,
430 FIB_SOURCE_INTERFACE,
431 (FIB_ENTRY_FLAG_ATTACHED),
433 &pfx_special.fp_addr,
435 /* invalid FIB index */
439 FIB_ROUTE_PATH_FLAG_NONE);
444 ip4_add_interface_routes (u32 sw_if_index,
445 ip4_main_t * im, u32 fib_index,
446 ip_interface_address_t * a)
448 ip_lookup_main_t *lm = &im->lookup_main;
449 ip4_address_t *address = ip_interface_address_get_address (lm, a);
452 .fp_proto = FIB_PROTOCOL_IP4,
453 .fp_addr.ip4 = *address,
456 /* set special routes for the prefix if needed */
457 ip4_add_interface_prefix_routes (im, sw_if_index, fib_index, a);
459 if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
461 u32 classify_table_index =
462 lm->classify_table_index_by_sw_if_index[sw_if_index];
463 if (classify_table_index != (u32) ~ 0)
465 dpo_id_t dpo = DPO_INVALID;
470 classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
472 fib_table_entry_special_dpo_add (fib_index,
475 FIB_ENTRY_FLAG_NONE, &dpo);
480 fib_table_entry_update_one_path (fib_index, &pfx,
481 FIB_SOURCE_INTERFACE,
482 (FIB_ENTRY_FLAG_CONNECTED |
483 FIB_ENTRY_FLAG_LOCAL),
490 FIB_ROUTE_PATH_FLAG_NONE);
494 ip4_del_interface_prefix_routes (ip4_main_t * im,
497 ip4_address_t * address,
500 ip_lookup_main_t *lm = &im->lookup_main;
501 ip_interface_prefix_t *if_prefix;
503 ip_interface_prefix_key_t key = {
505 .fp_len = address_length,
506 .fp_proto = FIB_PROTOCOL_IP4,
507 .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[address_length],
509 .sw_if_index = sw_if_index,
512 fib_prefix_t pfx_special = {
514 .fp_proto = FIB_PROTOCOL_IP4,
517 if_prefix = ip_get_interface_prefix (lm, &key);
520 clib_warning ("Prefix not found while deleting %U",
521 format_ip4_address_and_length, address, address_length);
525 if_prefix->ref_count -= 1;
528 * Routes need to be adjusted if:
529 * - deleting last intf addr in prefix
530 * - deleting intf addr used as default source address in glean adjacency
532 * We're done now otherwise
534 if ((if_prefix->ref_count > 0) &&
535 !pool_is_free_index (lm->if_address_pool, if_prefix->src_ia_index))
538 /* length <= 30, delete glean route, first address, last address */
539 if (address_length <= 30)
542 /* remove glean route for prefix */
543 pfx_special.fp_addr.ip4 = *address;
544 pfx_special.fp_len = address_length;
545 fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
547 /* if no more intf addresses in prefix, remove other special routes */
548 if (!if_prefix->ref_count)
550 /* first address in prefix */
551 pfx_special.fp_addr.ip4.as_u32 =
552 address->as_u32 & im->fib_masks[address_length];
553 pfx_special.fp_len = 32;
555 if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
556 fib_table_entry_special_remove (fib_index,
558 FIB_SOURCE_INTERFACE);
560 /* prefix broadcast address */
561 pfx_special.fp_addr.ip4.as_u32 =
562 address->as_u32 | ~im->fib_masks[address_length];
563 pfx_special.fp_len = 32;
565 if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
566 fib_table_entry_special_remove (fib_index,
568 FIB_SOURCE_INTERFACE);
571 /* default source addr just got deleted, find another */
573 ip_interface_address_t *new_src_ia = NULL;
574 ip4_address_t *new_src_addr = NULL;
577 ip4_interface_address_matching_destination
578 (im, address, sw_if_index, &new_src_ia);
580 if_prefix->src_ia_index = new_src_ia - lm->if_address_pool;
582 pfx_special.fp_len = address_length;
583 pfx_special.fp_addr.ip4 = *new_src_addr;
585 /* set new glean route for the prefix */
586 fib_table_entry_update_one_path (fib_index, &pfx_special,
587 FIB_SOURCE_INTERFACE,
588 (FIB_ENTRY_FLAG_CONNECTED |
589 FIB_ENTRY_FLAG_ATTACHED),
591 /* No next-hop address */
594 /* invalid FIB index */
597 /* no out-label stack */
599 FIB_ROUTE_PATH_FLAG_NONE);
603 /* length == 31, delete attached route for the other address */
604 else if (address_length == 31)
606 pfx_special.fp_addr.ip4.as_u32 =
607 address->as_u32 ^ clib_host_to_net_u32(1);
609 fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
612 mhash_unset (&lm->prefix_to_if_prefix_index, &key, 0 /* old_value */);
613 pool_put (lm->if_prefix_pool, if_prefix);
617 ip4_del_interface_routes (u32 sw_if_index,
620 ip4_address_t * address, u32 address_length)
623 .fp_len = address_length,
624 .fp_proto = FIB_PROTOCOL_IP4,
625 .fp_addr.ip4 = *address,
628 ip4_del_interface_prefix_routes (im, sw_if_index, fib_index,
629 address, address_length);
632 fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
635 #ifndef CLIB_MARCH_VARIANT
637 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
639 ip4_main_t *im = &ip4_main;
641 vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
644 * enable/disable only on the 1<->0 transition
648 if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
653 ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
654 if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
657 vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
661 vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
662 sw_if_index, !is_enable, 0, 0);
665 ip4_enable_disable_interface_callback_t *cb;
666 vec_foreach (cb, im->enable_disable_interface_callbacks)
667 cb->function (im, cb->function_opaque, sw_if_index, is_enable);
671 static clib_error_t *
672 ip4_add_del_interface_address_internal (vlib_main_t * vm,
674 ip4_address_t * address,
675 u32 address_length, u32 is_del)
677 vnet_main_t *vnm = vnet_get_main ();
678 ip4_main_t *im = &ip4_main;
679 ip_lookup_main_t *lm = &im->lookup_main;
680 clib_error_t *error = 0;
681 u32 if_address_index, elts_before;
682 ip4_address_fib_t ip4_af, *addr_fib = 0;
684 /* local0 interface doesn't support IP addressing */
685 if (sw_if_index == 0)
688 clib_error_create ("local0 interface doesn't support IP addressing");
691 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
692 ip4_addr_fib_init (&ip4_af, address,
693 vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
694 vec_add1 (addr_fib, ip4_af);
697 * there is no support for adj-fib handling in the presence of overlapping
698 * subnets on interfaces. Easy fix - disallow overlapping subnets, like
704 /* When adding an address check that it does not conflict
705 with an existing address on any interface in this table. */
706 ip_interface_address_t *ia;
707 vnet_sw_interface_t *sif;
709 pool_foreach(sif, vnm->interface_main.sw_interfaces,
711 if (im->fib_index_by_sw_if_index[sw_if_index] ==
712 im->fib_index_by_sw_if_index[sif->sw_if_index])
714 foreach_ip_interface_address
715 (&im->lookup_main, ia, sif->sw_if_index,
716 0 /* honor unnumbered */ ,
719 ip_interface_address_get_address
720 (&im->lookup_main, ia);
721 if (ip4_destination_matches_route
722 (im, address, x, ia->address_length) ||
723 ip4_destination_matches_route (im,
728 /* an intf may have >1 addr from the same prefix */
729 if ((sw_if_index == sif->sw_if_index) &&
730 (ia->address_length == address_length) &&
731 (x->as_u32 != address->as_u32))
734 /* error if the length or intf was different */
735 vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
739 ("failed to add %U which conflicts with %U for interface %U",
740 format_ip4_address_and_length, address,
742 format_ip4_address_and_length, x,
744 format_vnet_sw_if_index_name, vnm,
753 elts_before = pool_elts (lm->if_address_pool);
755 error = ip_interface_address_add_del
756 (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
760 ip4_sw_interface_enable_disable (sw_if_index, !is_del);
762 /* intf addr routes are added/deleted on admin up/down */
763 if (vnet_sw_interface_is_admin_up (vnm, sw_if_index))
766 ip4_del_interface_routes (sw_if_index,
767 im, ip4_af.fib_index, address,
770 ip4_add_interface_routes (sw_if_index,
771 im, ip4_af.fib_index,
773 (lm->if_address_pool, if_address_index));
776 /* If pool did not grow/shrink: add duplicate address. */
777 if (elts_before != pool_elts (lm->if_address_pool))
779 ip4_add_del_interface_address_callback_t *cb;
780 vec_foreach (cb, im->add_del_interface_address_callbacks)
781 cb->function (im, cb->function_opaque, sw_if_index,
782 address, address_length, if_address_index, is_del);
791 ip4_add_del_interface_address (vlib_main_t * vm,
793 ip4_address_t * address,
794 u32 address_length, u32 is_del)
796 return ip4_add_del_interface_address_internal
797 (vm, sw_if_index, address, address_length, is_del);
801 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
803 ip_interface_address_t *ia;
809 * when directed broadcast is enabled, the subnet braodcast route will forward
810 * packets using an adjacency with a broadcast MAC. otherwise it drops
813 foreach_ip_interface_address(&im->lookup_main, ia,
816 if (ia->address_length <= 30)
820 ipa = ip_interface_address_get_address (&im->lookup_main, ia);
824 .fp_proto = FIB_PROTOCOL_IP4,
826 .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
830 ip4_add_subnet_bcast_route
831 (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
840 static clib_error_t *
841 ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
843 ip4_main_t *im = &ip4_main;
844 ip_interface_address_t *ia;
846 u32 is_admin_up, fib_index;
848 /* Fill in lookup tables with default table (0). */
849 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
851 vec_validate_init_empty (im->
852 lookup_main.if_address_pool_index_by_sw_if_index,
855 is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
857 fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
860 foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
861 0 /* honor unnumbered */,
863 a = ip_interface_address_get_address (&im->lookup_main, ia);
865 ip4_add_interface_routes (sw_if_index,
869 ip4_del_interface_routes (sw_if_index,
871 a, ia->address_length);
878 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
880 /* Built-in ip4 unicast rx feature path definition */
882 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
884 .arc_name = "ip4-unicast",
885 .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
886 .last_in_arc = "ip4-lookup",
887 .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
890 VNET_FEATURE_INIT (ip4_flow_classify, static) =
892 .arc_name = "ip4-unicast",
893 .node_name = "ip4-flow-classify",
894 .runs_before = VNET_FEATURES ("ip4-inacl"),
897 VNET_FEATURE_INIT (ip4_inacl, static) =
899 .arc_name = "ip4-unicast",
900 .node_name = "ip4-inacl",
901 .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
904 VNET_FEATURE_INIT (ip4_source_check_1, static) =
906 .arc_name = "ip4-unicast",
907 .node_name = "ip4-source-check-via-rx",
908 .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
911 VNET_FEATURE_INIT (ip4_source_check_2, static) =
913 .arc_name = "ip4-unicast",
914 .node_name = "ip4-source-check-via-any",
915 .runs_before = VNET_FEATURES ("ip4-policer-classify"),
918 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
920 .arc_name = "ip4-unicast",
921 .node_name = "ip4-source-and-port-range-check-rx",
922 .runs_before = VNET_FEATURES ("ip4-policer-classify"),
925 VNET_FEATURE_INIT (ip4_policer_classify, static) =
927 .arc_name = "ip4-unicast",
928 .node_name = "ip4-policer-classify",
929 .runs_before = VNET_FEATURES ("ipsec4-input-feature"),
932 VNET_FEATURE_INIT (ip4_ipsec, static) =
934 .arc_name = "ip4-unicast",
935 .node_name = "ipsec4-input-feature",
936 .runs_before = VNET_FEATURES ("vpath-input-ip4"),
939 VNET_FEATURE_INIT (ip4_vpath, static) =
941 .arc_name = "ip4-unicast",
942 .node_name = "vpath-input-ip4",
943 .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
946 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
948 .arc_name = "ip4-unicast",
949 .node_name = "ip4-vxlan-bypass",
950 .runs_before = VNET_FEATURES ("ip4-lookup"),
953 VNET_FEATURE_INIT (ip4_not_enabled, static) =
955 .arc_name = "ip4-unicast",
956 .node_name = "ip4-not-enabled",
957 .runs_before = VNET_FEATURES ("ip4-lookup"),
960 VNET_FEATURE_INIT (ip4_lookup, static) =
962 .arc_name = "ip4-unicast",
963 .node_name = "ip4-lookup",
964 .runs_before = 0, /* not before any other features */
967 /* Built-in ip4 multicast rx feature path definition */
968 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
970 .arc_name = "ip4-multicast",
971 .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
972 .last_in_arc = "ip4-mfib-forward-lookup",
973 .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
976 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
978 .arc_name = "ip4-multicast",
979 .node_name = "vpath-input-ip4",
980 .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
983 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
985 .arc_name = "ip4-multicast",
986 .node_name = "ip4-not-enabled",
987 .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
990 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
992 .arc_name = "ip4-multicast",
993 .node_name = "ip4-mfib-forward-lookup",
994 .runs_before = 0, /* last feature */
997 /* Source and port-range check ip4 tx feature path definition */
998 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1000 .arc_name = "ip4-output",
1001 .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
1002 .last_in_arc = "interface-output",
1003 .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1006 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1008 .arc_name = "ip4-output",
1009 .node_name = "ip4-source-and-port-range-check-tx",
1010 .runs_before = VNET_FEATURES ("ip4-outacl"),
1013 VNET_FEATURE_INIT (ip4_outacl, static) =
1015 .arc_name = "ip4-output",
1016 .node_name = "ip4-outacl",
1017 .runs_before = VNET_FEATURES ("ipsec4-output-feature"),
1020 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1022 .arc_name = "ip4-output",
1023 .node_name = "ipsec4-output-feature",
1024 .runs_before = VNET_FEATURES ("interface-output"),
1027 /* Built-in ip4 tx feature path definition */
1028 VNET_FEATURE_INIT (ip4_interface_output, static) =
1030 .arc_name = "ip4-output",
1031 .node_name = "interface-output",
1032 .runs_before = 0, /* not before any other features */
1036 static clib_error_t *
1037 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1039 ip4_main_t *im = &ip4_main;
1041 /* Fill in lookup tables with default table (0). */
1042 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1043 vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
1047 ip4_main_t *im4 = &ip4_main;
1048 ip_lookup_main_t *lm4 = &im4->lookup_main;
1049 ip_interface_address_t *ia = 0;
1050 ip4_address_t *address;
1051 vlib_main_t *vm = vlib_get_main ();
1053 vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
1055 foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
1057 address = ip_interface_address_get_address (lm4, ia);
1058 ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
1063 vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
1066 vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
1067 sw_if_index, is_add, 0, 0);
1069 return /* no error */ 0;
1072 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1074 /* Global IP4 main. */
1075 #ifndef CLIB_MARCH_VARIANT
1076 ip4_main_t ip4_main;
1077 #endif /* CLIB_MARCH_VARIANT */
1079 static clib_error_t *
1080 ip4_lookup_init (vlib_main_t * vm)
1082 ip4_main_t *im = &ip4_main;
1083 clib_error_t *error;
1086 if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1088 if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
1090 if ((error = vlib_call_init_function (vm, fib_module_init)))
1092 if ((error = vlib_call_init_function (vm, mfib_module_init)))
1095 for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1100 m = pow2_mask (i) << (32 - i);
1103 im->fib_masks[i] = clib_host_to_net_u32 (m);
1106 ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1108 /* Create FIB with index 0 and table id of 0. */
1109 fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1110 FIB_SOURCE_DEFAULT_ROUTE);
1111 mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1112 MFIB_SOURCE_DEFAULT_ROUTE);
1116 pn = pg_get_node (ip4_lookup_node.index);
1117 pn->unformat_edit = unformat_pg_ip4_header;
1121 ethernet_arp_header_t h;
1123 clib_memset (&h, 0, sizeof (h));
1125 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1126 #define _8(f,v) h.f = v;
1127 _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1128 _16 (l3_type, ETHERNET_TYPE_IP4);
1129 _8 (n_l2_address_bytes, 6);
1130 _8 (n_l3_address_bytes, 4);
1131 _16 (opcode, ETHERNET_ARP_OPCODE_request);
1135 vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1138 /* alloc chunk size */ 8,
1145 VLIB_INIT_FUNCTION (ip4_lookup_init);
1149 /* Adjacency taken. */
1154 /* Packet data, possibly *after* rewrite. */
1155 u8 packet_data[64 - 1 * sizeof (u32)];
1157 ip4_forward_next_trace_t;
1159 #ifndef CLIB_MARCH_VARIANT
1161 format_ip4_forward_next_trace (u8 * s, va_list * args)
1163 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1164 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1165 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1166 u32 indent = format_get_indent (s);
1167 s = format (s, "%U%U",
1168 format_white_space, indent,
1169 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1175 format_ip4_lookup_trace (u8 * s, va_list * args)
1177 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1178 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1179 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1180 u32 indent = format_get_indent (s);
1182 s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1183 t->fib_index, t->dpo_index, t->flow_hash);
1184 s = format (s, "\n%U%U",
1185 format_white_space, indent,
1186 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1191 format_ip4_rewrite_trace (u8 * s, va_list * args)
1193 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1194 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1195 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1196 u32 indent = format_get_indent (s);
1198 s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1199 t->fib_index, t->dpo_index, format_ip_adjacency,
1200 t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1201 s = format (s, "\n%U%U",
1202 format_white_space, indent,
1203 format_ip_adjacency_packet_data,
1204 t->dpo_index, t->packet_data, sizeof (t->packet_data));
1208 #ifndef CLIB_MARCH_VARIANT
1209 /* Common trace function for all ip4-forward next nodes. */
1211 ip4_forward_next_trace (vlib_main_t * vm,
1212 vlib_node_runtime_t * node,
1213 vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1216 ip4_main_t *im = &ip4_main;
1218 n_left = frame->n_vectors;
1219 from = vlib_frame_vector_args (frame);
1224 vlib_buffer_t *b0, *b1;
1225 ip4_forward_next_trace_t *t0, *t1;
1227 /* Prefetch next iteration. */
1228 vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1229 vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1234 b0 = vlib_get_buffer (vm, bi0);
1235 b1 = vlib_get_buffer (vm, bi1);
1237 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1239 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1240 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1241 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1243 (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1244 (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1245 vec_elt (im->fib_index_by_sw_if_index,
1246 vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1248 clib_memcpy_fast (t0->packet_data,
1249 vlib_buffer_get_current (b0),
1250 sizeof (t0->packet_data));
1252 if (b1->flags & VLIB_BUFFER_IS_TRACED)
1254 t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1255 t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1256 t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1258 (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1259 (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1260 vec_elt (im->fib_index_by_sw_if_index,
1261 vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1262 clib_memcpy_fast (t1->packet_data, vlib_buffer_get_current (b1),
1263 sizeof (t1->packet_data));
1273 ip4_forward_next_trace_t *t0;
1277 b0 = vlib_get_buffer (vm, bi0);
1279 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1281 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1282 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1283 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1285 (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1286 (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1287 vec_elt (im->fib_index_by_sw_if_index,
1288 vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1289 clib_memcpy_fast (t0->packet_data, vlib_buffer_get_current (b0),
1290 sizeof (t0->packet_data));
1297 /* Compute TCP/UDP/ICMP4 checksum in software. */
1299 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1303 u32 ip_header_length, payload_length_host_byte_order;
1304 u32 n_this_buffer, n_bytes_left, n_ip_bytes_this_buffer;
1306 void *data_this_buffer;
1308 /* Initialize checksum with ip header. */
1309 ip_header_length = ip4_header_bytes (ip0);
1310 payload_length_host_byte_order =
1311 clib_net_to_host_u16 (ip0->length) - ip_header_length;
1313 clib_host_to_net_u32 (payload_length_host_byte_order +
1314 (ip0->protocol << 16));
1316 if (BITS (uword) == 32)
1319 ip_csum_with_carry (sum0,
1320 clib_mem_unaligned (&ip0->src_address, u32));
1322 ip_csum_with_carry (sum0,
1323 clib_mem_unaligned (&ip0->dst_address, u32));
1327 ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1329 n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1330 data_this_buffer = (void *) ip0 + ip_header_length;
1331 n_ip_bytes_this_buffer =
1332 p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data);
1333 if (n_this_buffer + ip_header_length > n_ip_bytes_this_buffer)
1335 n_this_buffer = n_ip_bytes_this_buffer > ip_header_length ?
1336 n_ip_bytes_this_buffer - ip_header_length : 0;
1340 sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1341 n_bytes_left -= n_this_buffer;
1342 if (n_bytes_left == 0)
1345 ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1346 if (!(p0->flags & VLIB_BUFFER_NEXT_PRESENT))
1349 p0 = vlib_get_buffer (vm, p0->next_buffer);
1350 data_this_buffer = vlib_buffer_get_current (p0);
1351 n_this_buffer = clib_min (p0->current_length, n_bytes_left);
1354 sum16 = ~ip_csum_fold (sum0);
1360 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1362 ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1366 ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1367 || ip0->protocol == IP_PROTOCOL_UDP);
1369 udp0 = (void *) (ip0 + 1);
1370 if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1372 p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1373 | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1377 sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1379 p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1380 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1387 VNET_FEATURE_ARC_INIT (ip4_local) =
1389 .arc_name = "ip4-local",
1390 .start_nodes = VNET_FEATURES ("ip4-local"),
1391 .last_in_arc = "ip4-local-end-of-arc",
1396 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1397 ip4_header_t * ip, u8 is_udp, u8 * error,
1401 flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1402 *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1406 u32 ip_len, udp_len;
1408 udp = ip4_next_header (ip);
1409 /* Verify UDP length. */
1410 ip_len = clib_net_to_host_u16 (ip->length);
1411 udp_len = clib_net_to_host_u16 (udp->length);
1413 len_diff = ip_len - udp_len;
1414 *good_tcp_udp &= len_diff >= 0;
1415 *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1419 #define ip4_local_csum_is_offloaded(_b) \
1420 _b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM \
1421 || _b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM
1423 #define ip4_local_need_csum_check(is_tcp_udp, _b) \
1424 (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED \
1425 || ip4_local_csum_is_offloaded (_b)))
1427 #define ip4_local_csum_is_valid(_b) \
1428 (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT \
1429 || (ip4_local_csum_is_offloaded (_b))) != 0
1432 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1433 ip4_header_t * ih, u8 * error)
1435 u8 is_udp, is_tcp_udp, good_tcp_udp;
1437 is_udp = ih->protocol == IP_PROTOCOL_UDP;
1438 is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1440 if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1441 ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1443 good_tcp_udp = ip4_local_csum_is_valid (b);
1445 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1446 *error = (is_tcp_udp && !good_tcp_udp
1447 ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1451 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1452 ip4_header_t ** ih, u8 * error)
1454 u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1456 is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1457 is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1459 is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1460 is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1462 good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1463 good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1465 if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1466 || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1469 ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1472 ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1476 error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1477 IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1478 error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1479 IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1483 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1484 vlib_buffer_t * b, u16 * next, u8 error,
1485 u8 head_of_feature_arc)
1487 u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1490 *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1491 b->error = error ? error_node->errors[error] : 0;
1492 if (head_of_feature_arc)
1495 if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1497 vnet_feature_arc_start (arc_index,
1498 vnet_buffer (b)->sw_if_index[VLIB_RX],
1511 } ip4_local_last_check_t;
1514 ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
1515 ip4_local_last_check_t * last_check, u8 * error0)
1517 ip4_fib_mtrie_leaf_t leaf0;
1518 ip4_fib_mtrie_t *mtrie0;
1519 const dpo_id_t *dpo0;
1520 load_balance_t *lb0;
1523 vnet_buffer (b)->ip.fib_index =
1524 vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1525 vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1528 * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1529 * adjacency for the destination address (the local interface address).
1530 * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1531 * adjacency for the source address (the remote sender's address)
1533 if (PREDICT_FALSE (last_check->first ||
1534 (last_check->src.as_u32 != ip0->src_address.as_u32)))
1536 mtrie0 = &ip4_fib_get (vnet_buffer (b)->ip.fib_index)->mtrie;
1537 leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1538 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1539 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1540 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1542 vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1543 vnet_buffer (b)->ip.adj_index[VLIB_TX];
1544 vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1546 lb0 = load_balance_get (lbi0);
1547 dpo0 = load_balance_get_bucket_i (lb0, 0);
1550 * Must have a route to source otherwise we drop the packet.
1551 * ip4 broadcasts are accepted, e.g. to make dhcp client work
1554 * - the source is a recieve => it's from us => bogus, do this
1555 * first since it sets a different error code.
1556 * - uRPF check for any route to source - accept if passes.
1557 * - allow packets destined to the broadcast address from unknown sources
1560 *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1561 && dpo0->dpoi_type == DPO_RECEIVE) ?
1562 IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1563 *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1564 && !fib_urpf_check_size (lb0->lb_urpf)
1565 && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1566 IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1568 last_check->src.as_u32 = ip0->src_address.as_u32;
1569 last_check->lbi = lbi0;
1570 last_check->error = *error0;
1574 vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1575 vnet_buffer (b)->ip.adj_index[VLIB_TX];
1576 vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1577 *error0 = last_check->error;
1578 last_check->first = 0;
1583 ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
1584 ip4_local_last_check_t * last_check, u8 * error)
1586 ip4_fib_mtrie_leaf_t leaf[2];
1587 ip4_fib_mtrie_t *mtrie[2];
1588 const dpo_id_t *dpo[2];
1589 load_balance_t *lb[2];
1593 not_last_hit = last_check->first;
1594 not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1595 not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1597 vnet_buffer (b[0])->ip.fib_index =
1598 vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1599 vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1600 vnet_buffer (b[0])->ip.fib_index;
1602 vnet_buffer (b[1])->ip.fib_index =
1603 vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1604 vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1605 vnet_buffer (b[1])->ip.fib_index;
1608 * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1609 * adjacency for the destination address (the local interface address).
1610 * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1611 * adjacency for the source address (the remote sender's address)
1613 if (PREDICT_FALSE (not_last_hit))
1615 mtrie[0] = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie;
1616 mtrie[1] = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie;
1618 leaf[0] = ip4_fib_mtrie_lookup_step_one (mtrie[0], &ip[0]->src_address);
1619 leaf[1] = ip4_fib_mtrie_lookup_step_one (mtrie[1], &ip[1]->src_address);
1621 leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1622 &ip[0]->src_address, 2);
1623 leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1624 &ip[1]->src_address, 2);
1626 leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1627 &ip[0]->src_address, 3);
1628 leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1629 &ip[1]->src_address, 3);
1631 lbi[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf[0]);
1632 lbi[1] = ip4_fib_mtrie_leaf_get_adj_index (leaf[1]);
1634 vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1635 vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1636 vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1638 vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1639 vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1640 vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1642 lb[0] = load_balance_get (lbi[0]);
1643 lb[1] = load_balance_get (lbi[1]);
1645 dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1646 dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1648 error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1649 dpo[0]->dpoi_type == DPO_RECEIVE) ?
1650 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1651 error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1652 !fib_urpf_check_size (lb[0]->lb_urpf) &&
1653 ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1654 ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1656 error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1657 dpo[1]->dpoi_type == DPO_RECEIVE) ?
1658 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1659 error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1660 !fib_urpf_check_size (lb[1]->lb_urpf) &&
1661 ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1662 ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1664 last_check->src.as_u32 = ip[1]->src_address.as_u32;
1665 last_check->lbi = lbi[1];
1666 last_check->error = error[1];
1670 vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1671 vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1672 vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1674 vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1675 vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1676 vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1678 error[0] = last_check->error;
1679 error[1] = last_check->error;
1680 last_check->first = 0;
1684 enum ip_local_packet_type_e
1686 IP_LOCAL_PACKET_TYPE_L4,
1687 IP_LOCAL_PACKET_TYPE_NAT,
1688 IP_LOCAL_PACKET_TYPE_FRAG,
1692 * Determine packet type and next node.
1694 * The expectation is that all packets that are not L4 will skip
1695 * checksums and source checks.
1698 ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
1700 ip_lookup_main_t *lm = &ip4_main.lookup_main;
1702 if (PREDICT_FALSE (ip4_is_fragment (ip)))
1704 *next = IP_LOCAL_NEXT_REASSEMBLY;
1705 return IP_LOCAL_PACKET_TYPE_FRAG;
1707 if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_IS_NATED))
1709 *next = lm->local_next_by_ip_protocol[ip->protocol];
1710 return IP_LOCAL_PACKET_TYPE_NAT;
1713 *next = lm->local_next_by_ip_protocol[ip->protocol];
1714 return IP_LOCAL_PACKET_TYPE_L4;
1718 ip4_local_inline (vlib_main_t * vm,
1719 vlib_node_runtime_t * node,
1720 vlib_frame_t * frame, int head_of_feature_arc)
1722 u32 *from, n_left_from;
1723 vlib_node_runtime_t *error_node =
1724 vlib_node_get_runtime (vm, ip4_input_node.index);
1725 u16 nexts[VLIB_FRAME_SIZE], *next;
1726 vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1727 ip4_header_t *ip[2];
1730 ip4_local_last_check_t last_check = {
1732 * 0.0.0.0 can appear as the source address of an IP packet,
1733 * as can any other address, hence the need to use the 'first'
1734 * member to make sure the .lbi is initialised for the first
1737 .src = {.as_u32 = 0},
1739 .error = IP4_ERROR_UNKNOWN_PROTOCOL,
1743 from = vlib_frame_vector_args (frame);
1744 n_left_from = frame->n_vectors;
1746 if (node->flags & VLIB_NODE_FLAG_TRACE)
1747 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1749 vlib_get_buffers (vm, from, bufs, n_left_from);
1753 while (n_left_from >= 6)
1757 /* Prefetch next iteration. */
1759 vlib_prefetch_buffer_header (b[4], LOAD);
1760 vlib_prefetch_buffer_header (b[5], LOAD);
1762 CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1763 CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1766 error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1768 ip[0] = vlib_buffer_get_current (b[0]);
1769 ip[1] = vlib_buffer_get_current (b[1]);
1771 vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1772 vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1774 pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1775 pt[1] = ip4_local_classify (b[1], ip[1], &next[1]);
1777 not_batch = pt[0] ^ pt[1];
1779 if (head_of_feature_arc == 0 || (pt[0] && not_batch == 0))
1782 if (PREDICT_TRUE (not_batch == 0))
1784 ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1785 ip4_local_check_src_x2 (b, ip, &last_check, error);
1791 ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1792 ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1796 ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1797 ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
1803 ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1804 head_of_feature_arc);
1805 ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1806 head_of_feature_arc);
1813 while (n_left_from > 0)
1815 error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1817 ip[0] = vlib_buffer_get_current (b[0]);
1818 vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1819 pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1821 if (head_of_feature_arc == 0 || pt[0])
1824 ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1825 ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1829 ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1830 head_of_feature_arc);
1837 vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1838 return frame->n_vectors;
1841 VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1842 vlib_frame_t * frame)
1844 return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1848 VLIB_REGISTER_NODE (ip4_local_node) =
1850 .name = "ip4-local",
1851 .vector_size = sizeof (u32),
1852 .format_trace = format_ip4_forward_next_trace,
1853 .n_next_nodes = IP_LOCAL_N_NEXT,
1856 [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1857 [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1858 [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1859 [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1860 [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-reassembly",
1866 VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
1867 vlib_node_runtime_t * node,
1868 vlib_frame_t * frame)
1870 return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1874 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
1875 .name = "ip4-local-end-of-arc",
1876 .vector_size = sizeof (u32),
1878 .format_trace = format_ip4_forward_next_trace,
1879 .sibling_of = "ip4-local",
1882 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1883 .arc_name = "ip4-local",
1884 .node_name = "ip4-local-end-of-arc",
1885 .runs_before = 0, /* not before any other features */
1889 #ifndef CLIB_MARCH_VARIANT
1891 ip4_register_protocol (u32 protocol, u32 node_index)
1893 vlib_main_t *vm = vlib_get_main ();
1894 ip4_main_t *im = &ip4_main;
1895 ip_lookup_main_t *lm = &im->lookup_main;
1897 ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1898 lm->local_next_by_ip_protocol[protocol] =
1899 vlib_node_add_next (vm, ip4_local_node.index, node_index);
1903 ip4_unregister_protocol (u32 protocol)
1905 ip4_main_t *im = &ip4_main;
1906 ip_lookup_main_t *lm = &im->lookup_main;
1908 ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1909 lm->local_next_by_ip_protocol[protocol] = IP_LOCAL_NEXT_PUNT;
1913 static clib_error_t *
1914 show_ip_local_command_fn (vlib_main_t * vm,
1915 unformat_input_t * input, vlib_cli_command_t * cmd)
1917 ip4_main_t *im = &ip4_main;
1918 ip_lookup_main_t *lm = &im->lookup_main;
1921 vlib_cli_output (vm, "Protocols handled by ip4_local");
1922 for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1924 if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1926 u32 node_index = vlib_get_node (vm,
1927 ip4_local_node.index)->
1928 next_nodes[lm->local_next_by_ip_protocol[i]];
1929 vlib_cli_output (vm, "%U: %U", format_ip_protocol, i,
1930 format_vlib_node_name, vm, node_index);
1939 * Display the set of protocols handled by the local IPv4 stack.
1942 * Example of how to display local protocol table:
1943 * @cliexstart{show ip local}
1944 * Protocols handled by ip4_local
1951 VLIB_CLI_COMMAND (show_ip_local, static) =
1953 .path = "show ip local",
1954 .function = show_ip_local_command_fn,
1955 .short_help = "show ip local",
1960 ip4_arp_inline (vlib_main_t * vm,
1961 vlib_node_runtime_t * node,
1962 vlib_frame_t * frame, int is_glean)
1964 vnet_main_t *vnm = vnet_get_main ();
1965 ip4_main_t *im = &ip4_main;
1966 ip_lookup_main_t *lm = &im->lookup_main;
1967 u32 *from, *to_next_drop;
1968 uword n_left_from, n_left_to_next_drop, next_index;
1969 u32 thread_index = vm->thread_index;
1972 if (node->flags & VLIB_NODE_FLAG_TRACE)
1973 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1975 seed = throttle_seed (&im->arp_throttle, thread_index, vlib_time_now (vm));
1977 from = vlib_frame_vector_args (frame);
1978 n_left_from = frame->n_vectors;
1979 next_index = node->cached_next_index;
1980 if (next_index == IP4_ARP_NEXT_DROP)
1981 next_index = IP4_ARP_N_NEXT; /* point to first interface */
1983 while (n_left_from > 0)
1985 vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1986 to_next_drop, n_left_to_next_drop);
1988 while (n_left_from > 0 && n_left_to_next_drop > 0)
1990 u32 pi0, bi0, adj_index0, sw_if_index0;
1991 ip_adjacency_t *adj0;
1992 vlib_buffer_t *p0, *b0;
1993 ip4_address_t resolve0;
1994 ethernet_arp_header_t *h0;
1995 vnet_hw_interface_t *hw_if0;
1999 p0 = vlib_get_buffer (vm, pi0);
2003 to_next_drop[0] = pi0;
2005 n_left_to_next_drop -= 1;
2007 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2008 adj0 = adj_get (adj_index0);
2012 /* resolve the packet's destination */
2013 ip4_header_t *ip0 = vlib_buffer_get_current (p0);
2014 resolve0 = ip0->dst_address;
2018 /* resolve the incomplete adj */
2019 resolve0 = adj0->sub_type.nbr.next_hop.ip4;
2022 /* combine the address and interface for the hash key */
2023 sw_if_index0 = adj0->rewrite_header.sw_if_index;
2024 r0 = (u64) resolve0.data_u32 << 32;
2027 if (throttle_check (&im->arp_throttle, thread_index, r0, seed))
2029 p0->error = node->errors[IP4_ARP_ERROR_THROTTLED];
2034 * the adj has been updated to a rewrite but the node the DPO that got
2035 * us here hasn't - yet. no big deal. we'll drop while we wait.
2037 if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
2039 p0->error = node->errors[IP4_ARP_ERROR_RESOLVED];
2044 * Can happen if the control-plane is programming tables
2045 * with traffic flowing; at least that's today's lame excuse.
2047 if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
2048 || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
2050 p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2053 /* Send ARP request. */
2055 vlib_packet_template_get_packet (vm,
2056 &im->ip4_arp_request_packet_template,
2058 /* Seems we're out of buffers */
2059 if (PREDICT_FALSE (!h0))
2061 p0->error = node->errors[IP4_ARP_ERROR_NO_BUFFERS];
2065 b0 = vlib_get_buffer (vm, bi0);
2067 /* copy the persistent fields from the original */
2068 clib_memcpy_fast (b0->opaque2, p0->opaque2, sizeof (p0->opaque2));
2070 /* Add rewrite/encap string for ARP packet. */
2071 vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
2073 hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2075 /* Src ethernet address in ARP header. */
2076 mac_address_from_bytes (&h0->ip4_over_ethernet[0].mac,
2077 hw_if0->hw_address);
2080 /* The interface's source address is stashed in the Glean Adj */
2081 h0->ip4_over_ethernet[0].ip4 =
2082 adj0->sub_type.glean.receive_addr.ip4;
2086 /* Src IP address in ARP header. */
2087 if (ip4_src_address_for_packet (lm, sw_if_index0,
2088 &h0->ip4_over_ethernet[0].ip4))
2090 /* No source address available */
2091 p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
2092 vlib_buffer_free (vm, &bi0, 1);
2096 h0->ip4_over_ethernet[1].ip4 = resolve0;
2098 p0->error = node->errors[IP4_ARP_ERROR_REQUEST_SENT];
2100 vlib_buffer_copy_trace_flag (vm, p0, bi0);
2101 VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
2102 vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2104 vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2106 vlib_set_next_frame_buffer (vm, node,
2107 adj0->rewrite_header.next_index, bi0);
2110 vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2113 return frame->n_vectors;
2116 VLIB_NODE_FN (ip4_arp_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2117 vlib_frame_t * frame)
2119 return (ip4_arp_inline (vm, node, frame, 0));
2122 VLIB_NODE_FN (ip4_glean_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2123 vlib_frame_t * frame)
2125 return (ip4_arp_inline (vm, node, frame, 1));
2128 static char *ip4_arp_error_strings[] = {
2129 [IP4_ARP_ERROR_THROTTLED] = "ARP requests throttled",
2130 [IP4_ARP_ERROR_RESOLVED] = "ARP requests resolved",
2131 [IP4_ARP_ERROR_NO_BUFFERS] = "ARP requests out of buffer",
2132 [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2133 [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2134 [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
2138 VLIB_REGISTER_NODE (ip4_arp_node) =
2141 .vector_size = sizeof (u32),
2142 .format_trace = format_ip4_forward_next_trace,
2143 .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2144 .error_strings = ip4_arp_error_strings,
2145 .n_next_nodes = IP4_ARP_N_NEXT,
2148 [IP4_ARP_NEXT_DROP] = "error-drop",
2152 VLIB_REGISTER_NODE (ip4_glean_node) =
2154 .name = "ip4-glean",
2155 .vector_size = sizeof (u32),
2156 .format_trace = format_ip4_forward_next_trace,
2157 .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2158 .error_strings = ip4_arp_error_strings,
2159 .n_next_nodes = IP4_ARP_N_NEXT,
2161 [IP4_ARP_NEXT_DROP] = "error-drop",
2166 #define foreach_notrace_ip4_arp_error \
2172 _(NO_SOURCE_ADDRESS)
2174 static clib_error_t *
2175 arp_notrace_init (vlib_main_t * vm)
2177 vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
2179 /* don't trace ARP request packets */
2181 vnet_pcap_drop_trace_filter_add_del \
2182 (rt->errors[IP4_ARP_ERROR_##a], \
2184 foreach_notrace_ip4_arp_error;
2189 VLIB_INIT_FUNCTION (arp_notrace_init);
2192 #ifndef CLIB_MARCH_VARIANT
2193 /* Send an ARP request to see if given destination is reachable on given interface. */
2195 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index,
2198 vnet_main_t *vnm = vnet_get_main ();
2199 ip4_main_t *im = &ip4_main;
2200 ethernet_arp_header_t *h;
2202 ip_interface_address_t *ia;
2203 ip_adjacency_t *adj;
2204 vnet_hw_interface_t *hi;
2205 vnet_sw_interface_t *si;
2209 u8 unicast_rewrite = 0;
2211 si = vnet_get_sw_interface (vnm, sw_if_index);
2213 if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2215 return clib_error_return (0, "%U: interface %U down",
2216 format_ip4_address, dst,
2217 format_vnet_sw_if_index_name, vnm,
2222 ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2225 vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2226 return clib_error_return
2228 "no matching interface address for destination %U (interface %U)",
2229 format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
2233 h = vlib_packet_template_get_packet (vm,
2234 &im->ip4_arp_request_packet_template,
2238 return clib_error_return (0, "ARP request packet allocation failed");
2240 hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2241 if (PREDICT_FALSE (!hi->hw_address))
2243 return clib_error_return (0, "%U: interface %U do not support ip probe",
2244 format_ip4_address, dst,
2245 format_vnet_sw_if_index_name, vnm,
2249 mac_address_from_bytes (&h->ip4_over_ethernet[0].mac, hi->hw_address);
2251 h->ip4_over_ethernet[0].ip4 = src[0];
2252 h->ip4_over_ethernet[1].ip4 = dst[0];
2254 b = vlib_get_buffer (vm, bi);
2255 vnet_buffer (b)->sw_if_index[VLIB_RX] =
2256 vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2258 ip46_address_t nh = {
2262 ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4,
2263 VNET_LINK_IP4, &nh, sw_if_index);
2266 /* Peer has been previously resolved, retrieve glean adj instead */
2267 if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE)
2270 unicast_rewrite = 1;
2274 ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4,
2275 VNET_LINK_IP4, sw_if_index, &nh);
2280 /* Add encapsulation string for software interface (e.g. ethernet header). */
2281 vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2282 if (unicast_rewrite)
2284 u16 *etype = vlib_buffer_get_current (b) - 2;
2285 etype[0] = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
2287 vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2290 vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2291 u32 *to_next = vlib_frame_vector_args (f);
2294 vlib_put_frame_to_node (vm, hi->output_node_index, f);
2298 return /* no error */ 0;
2304 IP4_REWRITE_NEXT_DROP,
2305 IP4_REWRITE_NEXT_ICMP_ERROR,
2306 IP4_REWRITE_NEXT_FRAGMENT,
2307 IP4_REWRITE_N_NEXT /* Last */
2308 } ip4_rewrite_next_t;
2311 * This bits of an IPv4 address to mask to construct a multicast
2314 #if CLIB_ARCH_IS_BIG_ENDIAN
2315 #define IP4_MCAST_ADDR_MASK 0x007fffff
2317 #define IP4_MCAST_ADDR_MASK 0xffff7f00
2321 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
2322 u16 adj_packet_bytes, bool df, u16 * next, u32 * error)
2324 if (packet_len > adj_packet_bytes)
2326 *error = IP4_ERROR_MTU_EXCEEDED;
2329 icmp4_error_set_vnet_buffer
2330 (b, ICMP4_destination_unreachable,
2331 ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
2333 *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2337 /* IP fragmentation */
2338 ip_frag_set_vnet_buffer (b, adj_packet_bytes,
2339 IP4_FRAG_NEXT_IP4_REWRITE, 0);
2340 *next = IP4_REWRITE_NEXT_FRAGMENT;
2345 /* Decrement TTL & update checksum.
2346 Works either endian, so no need for byte swap. */
2347 static_always_inline void
2348 ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
2353 if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2355 b->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2361 /* Input node should have reject packets with ttl 0. */
2362 ASSERT (ip->ttl > 0);
2364 checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
2365 checksum += checksum >= 0xffff;
2367 ip->checksum = checksum;
2372 * If the ttl drops below 1 when forwarding, generate
2375 if (PREDICT_FALSE (ttl <= 0))
2377 *error = IP4_ERROR_TIME_EXPIRED;
2378 vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2379 icmp4_error_set_vnet_buffer (b, ICMP4_time_exceeded,
2380 ICMP4_time_exceeded_ttl_exceeded_in_transit,
2382 *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2385 /* Verify checksum. */
2386 ASSERT ((ip->checksum == ip4_header_checksum (ip)) ||
2387 (b->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2392 ip4_rewrite_inline_with_gso (vlib_main_t * vm,
2393 vlib_node_runtime_t * node,
2394 vlib_frame_t * frame,
2395 int do_counters, int is_midchain, int is_mcast,
2398 ip_lookup_main_t *lm = &ip4_main.lookup_main;
2399 u32 *from = vlib_frame_vector_args (frame);
2400 vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2401 u16 nexts[VLIB_FRAME_SIZE], *next;
2403 vlib_node_runtime_t *error_node =
2404 vlib_node_get_runtime (vm, ip4_input_node.index);
2406 n_left_from = frame->n_vectors;
2407 u32 thread_index = vm->thread_index;
2409 vlib_get_buffers (vm, from, bufs, n_left_from);
2410 clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
2412 if (n_left_from >= 6)
2415 for (i = 2; i < 6; i++)
2416 vlib_prefetch_buffer_header (bufs[i], LOAD);
2421 while (n_left_from >= 8)
2423 ip_adjacency_t *adj0, *adj1;
2424 ip4_header_t *ip0, *ip1;
2425 u32 rw_len0, error0, adj_index0;
2426 u32 rw_len1, error1, adj_index1;
2427 u32 tx_sw_if_index0, tx_sw_if_index1;
2430 vlib_prefetch_buffer_header (b[6], LOAD);
2431 vlib_prefetch_buffer_header (b[7], LOAD);
2433 adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2434 adj_index1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
2437 * pre-fetch the per-adjacency counters
2441 vlib_prefetch_combined_counter (&adjacency_counters,
2442 thread_index, adj_index0);
2443 vlib_prefetch_combined_counter (&adjacency_counters,
2444 thread_index, adj_index1);
2447 ip0 = vlib_buffer_get_current (b[0]);
2448 ip1 = vlib_buffer_get_current (b[1]);
2450 error0 = error1 = IP4_ERROR_NONE;
2452 ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2453 ip4_ttl_and_checksum_check (b[1], ip1, next + 1, &error1);
2455 /* Rewrite packet header and updates lengths. */
2456 adj0 = adj_get (adj_index0);
2457 adj1 = adj_get (adj_index1);
2459 /* Worth pipelining. No guarantee that adj0,1 are hot... */
2460 rw_len0 = adj0[0].rewrite_header.data_bytes;
2461 rw_len1 = adj1[0].rewrite_header.data_bytes;
2462 vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2463 vnet_buffer (b[1])->ip.save_rewrite_length = rw_len1;
2465 p = vlib_buffer_get_current (b[2]);
2466 CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2467 CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2469 p = vlib_buffer_get_current (b[3]);
2470 CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2471 CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2473 /* Check MTU of outgoing interface. */
2474 u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2475 u16 ip1_len = clib_net_to_host_u16 (ip1->length);
2477 if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO))
2478 ip0_len = gso_mtu_sz (b[0]);
2479 if (do_gso && (b[1]->flags & VNET_BUFFER_F_GSO))
2480 ip1_len = gso_mtu_sz (b[1]);
2482 ip4_mtu_check (b[0], ip0_len,
2483 adj0[0].rewrite_header.max_l3_packet_bytes,
2484 ip0->flags_and_fragment_offset &
2485 clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2487 ip4_mtu_check (b[1], ip1_len,
2488 adj1[0].rewrite_header.max_l3_packet_bytes,
2489 ip1->flags_and_fragment_offset &
2490 clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2495 error0 = ((adj0[0].rewrite_header.sw_if_index ==
2496 vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2497 IP4_ERROR_SAME_INTERFACE : error0);
2498 error1 = ((adj1[0].rewrite_header.sw_if_index ==
2499 vnet_buffer (b[1])->sw_if_index[VLIB_RX]) ?
2500 IP4_ERROR_SAME_INTERFACE : error1);
2503 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2504 * to see the IP header */
2505 if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2507 u32 next_index = adj0[0].rewrite_header.next_index;
2508 b[0]->current_data -= rw_len0;
2509 b[0]->current_length += rw_len0;
2510 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2511 vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2514 (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2515 vnet_feature_arc_start (lm->output_feature_arc_index,
2516 tx_sw_if_index0, &next_index, b[0]);
2517 next[0] = next_index;
2521 b[0]->error = error_node->errors[error0];
2523 if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2525 u32 next_index = adj1[0].rewrite_header.next_index;
2526 b[1]->current_data -= rw_len1;
2527 b[1]->current_length += rw_len1;
2529 tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2530 vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2533 (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2534 vnet_feature_arc_start (lm->output_feature_arc_index,
2535 tx_sw_if_index1, &next_index, b[1]);
2536 next[1] = next_index;
2540 b[1]->error = error_node->errors[error1];
2544 calc_checksums (vm, b[0]);
2545 calc_checksums (vm, b[1]);
2547 /* Guess we are only writing on simple Ethernet header. */
2548 vnet_rewrite_two_headers (adj0[0], adj1[0],
2549 ip0, ip1, sizeof (ethernet_header_t));
2552 * Bump the per-adjacency counters
2556 vlib_increment_combined_counter
2557 (&adjacency_counters,
2559 adj_index0, 1, vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2561 vlib_increment_combined_counter
2562 (&adjacency_counters,
2564 adj_index1, 1, vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
2569 if (adj0->sub_type.midchain.fixup_func)
2570 adj0->sub_type.midchain.fixup_func
2571 (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2572 if (adj1->sub_type.midchain.fixup_func)
2573 adj1->sub_type.midchain.fixup_func
2574 (vm, adj1, b[1], adj1->sub_type.midchain.fixup_data);
2580 * copy bytes from the IP address into the MAC rewrite
2582 vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2583 adj0->rewrite_header.dst_mcast_offset,
2584 &ip0->dst_address.as_u32, (u8 *) ip0);
2585 vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2586 adj1->rewrite_header.dst_mcast_offset,
2587 &ip1->dst_address.as_u32, (u8 *) ip1);
2595 while (n_left_from > 0)
2597 ip_adjacency_t *adj0;
2599 u32 rw_len0, adj_index0, error0;
2600 u32 tx_sw_if_index0;
2602 adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2604 adj0 = adj_get (adj_index0);
2607 vlib_prefetch_combined_counter (&adjacency_counters,
2608 thread_index, adj_index0);
2610 ip0 = vlib_buffer_get_current (b[0]);
2612 error0 = IP4_ERROR_NONE;
2614 ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2617 /* Update packet buffer attributes/set output interface. */
2618 rw_len0 = adj0[0].rewrite_header.data_bytes;
2619 vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2621 /* Check MTU of outgoing interface. */
2622 u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2623 if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO))
2624 ip0_len = gso_mtu_sz (b[0]);
2626 ip4_mtu_check (b[0], ip0_len,
2627 adj0[0].rewrite_header.max_l3_packet_bytes,
2628 ip0->flags_and_fragment_offset &
2629 clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2634 error0 = ((adj0[0].rewrite_header.sw_if_index ==
2635 vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2636 IP4_ERROR_SAME_INTERFACE : error0);
2639 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2640 * to see the IP header */
2641 if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2643 u32 next_index = adj0[0].rewrite_header.next_index;
2644 b[0]->current_data -= rw_len0;
2645 b[0]->current_length += rw_len0;
2646 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2647 vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2650 (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2651 vnet_feature_arc_start (lm->output_feature_arc_index,
2652 tx_sw_if_index0, &next_index, b[0]);
2653 next[0] = next_index;
2657 b[0]->error = error_node->errors[error0];
2661 calc_checksums (vm, b[0]);
2663 /* Guess we are only writing on simple Ethernet header. */
2664 vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2667 vlib_increment_combined_counter
2668 (&adjacency_counters,
2669 thread_index, adj_index0, 1,
2670 vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2674 if (adj0->sub_type.midchain.fixup_func)
2675 adj0->sub_type.midchain.fixup_func
2676 (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2682 * copy bytes from the IP address into the MAC rewrite
2684 vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2685 adj0->rewrite_header.dst_mcast_offset,
2686 &ip0->dst_address.as_u32, (u8 *) ip0);
2695 /* Need to do trace after rewrites to pick up new packet data. */
2696 if (node->flags & VLIB_NODE_FLAG_TRACE)
2697 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2699 vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2700 return frame->n_vectors;
2704 ip4_rewrite_inline (vlib_main_t * vm,
2705 vlib_node_runtime_t * node,
2706 vlib_frame_t * frame,
2707 int do_counters, int is_midchain, int is_mcast)
2709 vnet_main_t *vnm = vnet_get_main ();
2710 if (PREDICT_FALSE (vnm->interface_main.gso_interface_count > 0))
2711 return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2712 is_midchain, is_mcast,
2715 return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2716 is_midchain, is_mcast,
2717 0 /* no do_gso */ );
2721 /** @brief IPv4 rewrite node.
2724 This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2725 header checksum, fetch the ip adjacency, check the outbound mtu,
2726 apply the adjacency rewrite, and send pkts to the adjacency
2727 rewrite header's rewrite_next_index.
2729 @param vm vlib_main_t corresponding to the current thread
2730 @param node vlib_node_runtime_t
2731 @param frame vlib_frame_t whose contents should be dispatched
2733 @par Graph mechanics: buffer metadata, next index usage
2736 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2737 - the rewrite adjacency index
2738 - <code>adj->lookup_next_index</code>
2739 - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2740 the packet will be dropped.
2741 - <code>adj->rewrite_header</code>
2742 - Rewrite string length, rewrite string, next_index
2745 - <code>b->current_data, b->current_length</code>
2746 - Updated net of applying the rewrite string
2748 <em>Next Indices:</em>
2749 - <code> adj->rewrite_header.next_index </code>
2753 VLIB_NODE_FN (ip4_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2754 vlib_frame_t * frame)
2756 if (adj_are_counters_enabled ())
2757 return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2759 return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2762 VLIB_NODE_FN (ip4_rewrite_bcast_node) (vlib_main_t * vm,
2763 vlib_node_runtime_t * node,
2764 vlib_frame_t * frame)
2766 if (adj_are_counters_enabled ())
2767 return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2769 return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2772 VLIB_NODE_FN (ip4_midchain_node) (vlib_main_t * vm,
2773 vlib_node_runtime_t * node,
2774 vlib_frame_t * frame)
2776 if (adj_are_counters_enabled ())
2777 return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2779 return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2782 VLIB_NODE_FN (ip4_rewrite_mcast_node) (vlib_main_t * vm,
2783 vlib_node_runtime_t * node,
2784 vlib_frame_t * frame)
2786 if (adj_are_counters_enabled ())
2787 return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2789 return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2792 VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
2793 vlib_node_runtime_t * node,
2794 vlib_frame_t * frame)
2796 if (adj_are_counters_enabled ())
2797 return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2799 return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2803 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2804 .name = "ip4-rewrite",
2805 .vector_size = sizeof (u32),
2807 .format_trace = format_ip4_rewrite_trace,
2809 .n_next_nodes = IP4_REWRITE_N_NEXT,
2811 [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2812 [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2813 [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2817 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2818 .name = "ip4-rewrite-bcast",
2819 .vector_size = sizeof (u32),
2821 .format_trace = format_ip4_rewrite_trace,
2822 .sibling_of = "ip4-rewrite",
2825 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2826 .name = "ip4-rewrite-mcast",
2827 .vector_size = sizeof (u32),
2829 .format_trace = format_ip4_rewrite_trace,
2830 .sibling_of = "ip4-rewrite",
2833 VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
2834 .name = "ip4-mcast-midchain",
2835 .vector_size = sizeof (u32),
2837 .format_trace = format_ip4_rewrite_trace,
2838 .sibling_of = "ip4-rewrite",
2841 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2842 .name = "ip4-midchain",
2843 .vector_size = sizeof (u32),
2844 .format_trace = format_ip4_forward_next_trace,
2845 .sibling_of = "ip4-rewrite",
2850 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2852 ip4_fib_mtrie_t *mtrie0;
2853 ip4_fib_mtrie_leaf_t leaf0;
2856 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2858 leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2859 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2860 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2862 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2864 return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2867 static clib_error_t *
2868 test_lookup_command_fn (vlib_main_t * vm,
2869 unformat_input_t * input, vlib_cli_command_t * cmd)
2876 ip4_address_t ip4_base_address;
2879 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2881 if (unformat (input, "table %d", &table_id))
2883 /* Make sure the entry exists. */
2884 fib = ip4_fib_get (table_id);
2885 if ((fib) && (fib->index != table_id))
2886 return clib_error_return (0, "<fib-index> %d does not exist",
2889 else if (unformat (input, "count %f", &count))
2892 else if (unformat (input, "%U",
2893 unformat_ip4_address, &ip4_base_address))
2896 return clib_error_return (0, "unknown input `%U'",
2897 format_unformat_error, input);
2902 for (i = 0; i < n; i++)
2904 if (!ip4_lookup_validate (&ip4_base_address, table_id))
2907 ip4_base_address.as_u32 =
2908 clib_host_to_net_u32 (1 +
2909 clib_net_to_host_u32 (ip4_base_address.as_u32));
2913 vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2915 vlib_cli_output (vm, "No errors in %d lookups\n", n);
2921 * Perform a lookup of an IPv4 Address (or range of addresses) in the
2922 * given FIB table to determine if there is a conflict with the
2923 * adjacency table. The fib-id can be determined by using the
2924 * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2927 * @todo This command uses fib-id, other commands use table-id (not
2928 * just a name, they are different indexes). Would like to change this
2929 * to table-id for consistency.
2932 * Example of how to run the test lookup command:
2933 * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2934 * No errors in 2 lookups
2938 VLIB_CLI_COMMAND (lookup_test_command, static) =
2940 .path = "test lookup",
2941 .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2942 .function = test_lookup_command_fn,
2946 #ifndef CLIB_MARCH_VARIANT
2948 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2952 fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
2954 if (~0 == fib_index)
2955 return VNET_API_ERROR_NO_SUCH_FIB;
2957 fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
2964 static clib_error_t *
2965 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2966 unformat_input_t * input,
2967 vlib_cli_command_t * cmd)
2971 u32 flow_hash_config = 0;
2974 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2976 if (unformat (input, "table %d", &table_id))
2979 else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2980 foreach_flow_hash_bit
2987 return clib_error_return (0, "unknown input `%U'",
2988 format_unformat_error, input);
2990 rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
2996 case VNET_API_ERROR_NO_SUCH_FIB:
2997 return clib_error_return (0, "no such FIB table %d", table_id);
3000 clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3008 * Configure the set of IPv4 fields used by the flow hash.
3011 * Example of how to set the flow hash on a given table:
3012 * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
3013 * Example of display the configured flow hash:
3014 * @cliexstart{show ip fib}
3015 * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
3018 * [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
3019 * [0] [@0]: dpo-drop ip6
3022 * [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
3023 * [0] [@0]: dpo-drop ip6
3026 * [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
3027 * [0] [@0]: dpo-drop ip6
3030 * [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
3031 * [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3034 * [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
3035 * [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3036 * [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3037 * [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3038 * [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3041 * [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
3042 * [0] [@0]: dpo-drop ip6
3043 * 255.255.255.255/32
3045 * [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
3046 * [0] [@0]: dpo-drop ip6
3047 * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
3050 * [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
3051 * [0] [@0]: dpo-drop ip6
3054 * [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
3055 * [0] [@0]: dpo-drop ip6
3058 * [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
3059 * [0] [@4]: ipv4-glean: af_packet0
3062 * [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
3063 * [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
3066 * [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
3067 * [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
3070 * [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
3071 * [0] [@4]: ipv4-glean: af_packet1
3074 * [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
3075 * [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
3078 * [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
3079 * [0] [@0]: dpo-drop ip6
3082 * [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
3083 * [0] [@0]: dpo-drop ip6
3084 * 255.255.255.255/32
3086 * [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
3087 * [0] [@0]: dpo-drop ip6
3091 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
3093 .path = "set ip flow-hash",
3095 "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
3096 .function = set_ip_flow_hash_command_fn,
3100 #ifndef CLIB_MARCH_VARIANT
3102 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3105 vnet_main_t *vnm = vnet_get_main ();
3106 vnet_interface_main_t *im = &vnm->interface_main;
3107 ip4_main_t *ipm = &ip4_main;
3108 ip_lookup_main_t *lm = &ipm->lookup_main;
3109 vnet_classify_main_t *cm = &vnet_classify_main;
3110 ip4_address_t *if_addr;
3112 if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3113 return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3115 if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3116 return VNET_API_ERROR_NO_SUCH_ENTRY;
3118 vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3119 lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
3121 if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
3123 if (NULL != if_addr)
3125 fib_prefix_t pfx = {
3127 .fp_proto = FIB_PROTOCOL_IP4,
3128 .fp_addr.ip4 = *if_addr,
3132 fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
3136 if (table_index != (u32) ~ 0)
3138 dpo_id_t dpo = DPO_INVALID;
3143 classify_dpo_create (DPO_PROTO_IP4, table_index));
3145 fib_table_entry_special_dpo_add (fib_index,
3147 FIB_SOURCE_CLASSIFY,
3148 FIB_ENTRY_FLAG_NONE, &dpo);
3153 fib_table_entry_special_remove (fib_index,
3154 &pfx, FIB_SOURCE_CLASSIFY);
3162 static clib_error_t *
3163 set_ip_classify_command_fn (vlib_main_t * vm,
3164 unformat_input_t * input,
3165 vlib_cli_command_t * cmd)
3167 u32 table_index = ~0;
3168 int table_index_set = 0;
3169 u32 sw_if_index = ~0;
3172 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3174 if (unformat (input, "table-index %d", &table_index))
3175 table_index_set = 1;
3176 else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3177 vnet_get_main (), &sw_if_index))
3183 if (table_index_set == 0)
3184 return clib_error_return (0, "classify table-index must be specified");
3186 if (sw_if_index == ~0)
3187 return clib_error_return (0, "interface / subif must be specified");
3189 rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3196 case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3197 return clib_error_return (0, "No such interface");
3199 case VNET_API_ERROR_NO_SUCH_ENTRY:
3200 return clib_error_return (0, "No such classifier table");
3206 * Assign a classification table to an interface. The classification
3207 * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3208 * commands. Once the table is create, use this command to filter packets
3212 * Example of how to assign a classification table to an interface:
3213 * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3216 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
3218 .path = "set ip classify",
3220 "set ip classify intfc <interface> table-index <classify-idx>",
3221 .function = set_ip_classify_command_fn,
3225 static clib_error_t *
3226 ip4_config (vlib_main_t * vm, unformat_input_t * input)
3228 ip4_main_t *im = &ip4_main;
3231 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3233 if (unformat (input, "heap-size %U", unformat_memory_size, &heapsize))
3236 return clib_error_return (0,
3237 "invalid heap-size parameter `%U'",
3238 format_unformat_error, input);
3241 im->mtrie_heap_size = heapsize;
3246 VLIB_EARLY_CONFIG_FUNCTION (ip4_config, "ip");
3249 * fd.io coding-style-patch-verification: ON
3252 * eval: (c-set-style "gnu")