2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * ip/ip4_forward.c: IP v4 forwarding
18 * Copyright (c) 2008 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h> /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h> /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h> /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h> /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/dpo/load_balance.h>
53 #include <vnet/dpo/load_balance_map.h>
54 #include <vnet/dpo/classify_dpo.h>
55 #include <vnet/mfib/mfib_table.h> /* for mFIB table and entry creation */
57 #include <vnet/ip/ip4_forward.h>
58 #include <vnet/interface_output.h>
60 /** @brief IPv4 lookup node.
63 This is the main IPv4 lookup dispatch node.
65 @param vm vlib_main_t corresponding to the current thread
66 @param node vlib_node_runtime_t
67 @param frame vlib_frame_t whose contents should be dispatched
69 @par Graph mechanics: buffer metadata, next index usage
72 - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
73 - Indicates the @c sw_if_index value of the interface that the
74 packet was received on.
75 - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
76 - When the value is @c ~0 then the node performs a longest prefix
77 match (LPM) for the packet destination address in the FIB attached
78 to the receive interface.
79 - Otherwise perform LPM for the packet destination address in the
80 indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
81 value (0, 1, ...) and not a VRF id.
84 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
85 - The lookup result adjacency index.
88 - Dispatches the packet to the node index found in
89 ip_adjacency_t @c adj->lookup_next_index
90 (where @c adj is the lookup result adjacency).
92 VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
95 return ip4_lookup_inline (vm, node, frame);
98 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
101 VLIB_REGISTER_NODE (ip4_lookup_node) =
103 .name = "ip4-lookup",
104 .vector_size = sizeof (u32),
105 .format_trace = format_ip4_lookup_trace,
106 .n_next_nodes = IP_LOOKUP_N_NEXT,
107 .next_nodes = IP4_LOOKUP_NEXT_NODES,
111 VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
112 vlib_node_runtime_t * node,
113 vlib_frame_t * frame)
115 vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
117 u32 thread_index = vm->thread_index;
118 vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
119 u16 nexts[VLIB_FRAME_SIZE], *next;
121 from = vlib_frame_vector_args (frame);
122 n_left = frame->n_vectors;
125 vlib_get_buffers (vm, from, bufs, n_left);
129 const load_balance_t *lb0, *lb1;
130 const ip4_header_t *ip0, *ip1;
131 u32 lbi0, hc0, lbi1, hc1;
132 const dpo_id_t *dpo0, *dpo1;
134 /* Prefetch next iteration. */
136 vlib_prefetch_buffer_header (b[2], LOAD);
137 vlib_prefetch_buffer_header (b[3], LOAD);
139 CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), LOAD);
140 CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), LOAD);
143 ip0 = vlib_buffer_get_current (b[0]);
144 ip1 = vlib_buffer_get_current (b[1]);
145 lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
146 lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
148 lb0 = load_balance_get (lbi0);
149 lb1 = load_balance_get (lbi1);
152 * this node is for via FIBs we can re-use the hash value from the
153 * to node if present.
154 * We don't want to use the same hash value at each level in the recursion
155 * graph as that would lead to polarisation
159 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
161 if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
163 hc0 = vnet_buffer (b[0])->ip.flow_hash =
164 vnet_buffer (b[0])->ip.flow_hash >> 1;
168 hc0 = vnet_buffer (b[0])->ip.flow_hash =
169 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
171 dpo0 = load_balance_get_fwd_bucket
172 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
176 dpo0 = load_balance_get_bucket_i (lb0, 0);
178 if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
180 if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
182 hc1 = vnet_buffer (b[1])->ip.flow_hash =
183 vnet_buffer (b[1])->ip.flow_hash >> 1;
187 hc1 = vnet_buffer (b[1])->ip.flow_hash =
188 ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
190 dpo1 = load_balance_get_fwd_bucket
191 (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
195 dpo1 = load_balance_get_bucket_i (lb1, 0);
198 next[0] = dpo0->dpoi_next_node;
199 next[1] = dpo1->dpoi_next_node;
201 vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
202 vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
204 vlib_increment_combined_counter
205 (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
206 vlib_increment_combined_counter
207 (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
216 const load_balance_t *lb0;
217 const ip4_header_t *ip0;
218 const dpo_id_t *dpo0;
221 ip0 = vlib_buffer_get_current (b[0]);
222 lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
224 lb0 = load_balance_get (lbi0);
227 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
229 if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
231 hc0 = vnet_buffer (b[0])->ip.flow_hash =
232 vnet_buffer (b[0])->ip.flow_hash >> 1;
236 hc0 = vnet_buffer (b[0])->ip.flow_hash =
237 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
239 dpo0 = load_balance_get_fwd_bucket
240 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
244 dpo0 = load_balance_get_bucket_i (lb0, 0);
247 next[0] = dpo0->dpoi_next_node;
248 vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
250 vlib_increment_combined_counter
251 (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
258 vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
259 if (node->flags & VLIB_NODE_FLAG_TRACE)
260 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
262 return frame->n_vectors;
266 VLIB_REGISTER_NODE (ip4_load_balance_node) =
268 .name = "ip4-load-balance",
269 .vector_size = sizeof (u32),
270 .sibling_of = "ip4-lookup",
271 .format_trace = format_ip4_lookup_trace,
275 #ifndef CLIB_MARCH_VARIANT
276 /* get first interface address */
278 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
279 ip_interface_address_t ** result_ia)
281 ip_lookup_main_t *lm = &im->lookup_main;
282 ip_interface_address_t *ia = 0;
283 ip4_address_t *result = 0;
286 foreach_ip_interface_address
287 (lm, ia, sw_if_index,
288 1 /* honor unnumbered */ ,
291 ip_interface_address_get_address (lm, ia);
297 *result_ia = result ? ia : 0;
303 ip4_add_subnet_bcast_route (u32 fib_index,
307 vnet_sw_interface_flags_t iflags;
309 iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
311 fib_table_entry_special_remove(fib_index,
313 FIB_SOURCE_INTERFACE);
315 if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
317 fib_table_entry_update_one_path (fib_index, pfx,
318 FIB_SOURCE_INTERFACE,
321 /* No next-hop address */
327 // no out-label stack
329 FIB_ROUTE_PATH_FLAG_NONE);
333 fib_table_entry_special_add(fib_index,
335 FIB_SOURCE_INTERFACE,
336 (FIB_ENTRY_FLAG_DROP |
337 FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
342 ip4_add_interface_prefix_routes (ip4_main_t *im,
345 ip_interface_address_t * a)
347 ip_lookup_main_t *lm = &im->lookup_main;
348 ip_interface_prefix_t *if_prefix;
349 ip4_address_t *address = ip_interface_address_get_address (lm, a);
351 ip_interface_prefix_key_t key = {
353 .fp_len = a->address_length,
354 .fp_proto = FIB_PROTOCOL_IP4,
355 .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[a->address_length],
357 .sw_if_index = sw_if_index,
360 fib_prefix_t pfx_special = {
361 .fp_proto = FIB_PROTOCOL_IP4,
364 /* If prefix already set on interface, just increment ref count & return */
365 if_prefix = ip_get_interface_prefix (lm, &key);
368 if_prefix->ref_count += 1;
372 /* New prefix - allocate a pool entry, initialize it, add to the hash */
373 pool_get (lm->if_prefix_pool, if_prefix);
374 if_prefix->ref_count = 1;
375 if_prefix->src_ia_index = a - lm->if_address_pool;
376 clib_memcpy (&if_prefix->key, &key, sizeof (key));
377 mhash_set (&lm->prefix_to_if_prefix_index, &key,
378 if_prefix - lm->if_prefix_pool, 0 /* old value */);
380 /* length <= 30 - add glean, drop first address, maybe drop bcast address */
381 if (a->address_length <= 30)
383 pfx_special.fp_len = a->address_length;
384 pfx_special.fp_addr.ip4.as_u32 = address->as_u32;
386 /* set the glean route for the prefix */
387 fib_table_entry_update_one_path (fib_index, &pfx_special,
388 FIB_SOURCE_INTERFACE,
389 (FIB_ENTRY_FLAG_CONNECTED |
390 FIB_ENTRY_FLAG_ATTACHED),
392 /* No next-hop address */
395 /* invalid FIB index */
398 /* no out-label stack */
400 FIB_ROUTE_PATH_FLAG_NONE);
402 /* set a drop route for the base address of the prefix */
403 pfx_special.fp_len = 32;
404 pfx_special.fp_addr.ip4.as_u32 =
405 address->as_u32 & im->fib_masks[a->address_length];
407 if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
408 fib_table_entry_special_add (fib_index, &pfx_special,
409 FIB_SOURCE_INTERFACE,
410 (FIB_ENTRY_FLAG_DROP |
411 FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
413 /* set a route for the broadcast address of the prefix */
414 pfx_special.fp_len = 32;
415 pfx_special.fp_addr.ip4.as_u32 =
416 address->as_u32 | ~im->fib_masks[a->address_length];
417 if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
418 ip4_add_subnet_bcast_route (fib_index, &pfx_special, sw_if_index);
422 /* length == 31 - add an attached route for the other address */
423 else if (a->address_length == 31)
425 pfx_special.fp_len = 32;
426 pfx_special.fp_addr.ip4.as_u32 =
427 address->as_u32 ^ clib_host_to_net_u32(1);
429 fib_table_entry_update_one_path (fib_index, &pfx_special,
430 FIB_SOURCE_INTERFACE,
431 (FIB_ENTRY_FLAG_ATTACHED),
433 &pfx_special.fp_addr,
435 /* invalid FIB index */
439 FIB_ROUTE_PATH_FLAG_NONE);
444 ip4_add_interface_routes (u32 sw_if_index,
445 ip4_main_t * im, u32 fib_index,
446 ip_interface_address_t * a)
448 ip_lookup_main_t *lm = &im->lookup_main;
449 ip4_address_t *address = ip_interface_address_get_address (lm, a);
452 .fp_proto = FIB_PROTOCOL_IP4,
453 .fp_addr.ip4 = *address,
456 /* set special routes for the prefix if needed */
457 ip4_add_interface_prefix_routes (im, sw_if_index, fib_index, a);
459 if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
461 u32 classify_table_index =
462 lm->classify_table_index_by_sw_if_index[sw_if_index];
463 if (classify_table_index != (u32) ~ 0)
465 dpo_id_t dpo = DPO_INVALID;
470 classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
472 fib_table_entry_special_dpo_add (fib_index,
475 FIB_ENTRY_FLAG_NONE, &dpo);
480 fib_table_entry_update_one_path (fib_index, &pfx,
481 FIB_SOURCE_INTERFACE,
482 (FIB_ENTRY_FLAG_CONNECTED |
483 FIB_ENTRY_FLAG_LOCAL),
490 FIB_ROUTE_PATH_FLAG_NONE);
494 ip4_del_interface_prefix_routes (ip4_main_t * im,
497 ip4_address_t * address,
500 ip_lookup_main_t *lm = &im->lookup_main;
501 ip_interface_prefix_t *if_prefix;
503 ip_interface_prefix_key_t key = {
505 .fp_len = address_length,
506 .fp_proto = FIB_PROTOCOL_IP4,
507 .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[address_length],
509 .sw_if_index = sw_if_index,
512 fib_prefix_t pfx_special = {
514 .fp_proto = FIB_PROTOCOL_IP4,
517 if_prefix = ip_get_interface_prefix (lm, &key);
520 clib_warning ("Prefix not found while deleting %U",
521 format_ip4_address_and_length, address, address_length);
525 if_prefix->ref_count -= 1;
528 * Routes need to be adjusted if:
529 * - deleting last intf addr in prefix
530 * - deleting intf addr used as default source address in glean adjacency
532 * We're done now otherwise
534 if ((if_prefix->ref_count > 0) &&
535 !pool_is_free_index (lm->if_address_pool, if_prefix->src_ia_index))
538 /* length <= 30, delete glean route, first address, last address */
539 if (address_length <= 30)
542 /* remove glean route for prefix */
543 pfx_special.fp_addr.ip4 = *address;
544 pfx_special.fp_len = address_length;
545 fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
547 /* if no more intf addresses in prefix, remove other special routes */
548 if (!if_prefix->ref_count)
550 /* first address in prefix */
551 pfx_special.fp_addr.ip4.as_u32 =
552 address->as_u32 & im->fib_masks[address_length];
553 pfx_special.fp_len = 32;
555 if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
556 fib_table_entry_special_remove (fib_index,
558 FIB_SOURCE_INTERFACE);
560 /* prefix broadcast address */
561 pfx_special.fp_addr.ip4.as_u32 =
562 address->as_u32 | ~im->fib_masks[address_length];
563 pfx_special.fp_len = 32;
565 if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
566 fib_table_entry_special_remove (fib_index,
568 FIB_SOURCE_INTERFACE);
571 /* default source addr just got deleted, find another */
573 ip_interface_address_t *new_src_ia = NULL;
574 ip4_address_t *new_src_addr = NULL;
577 ip4_interface_address_matching_destination
578 (im, address, sw_if_index, &new_src_ia);
580 if_prefix->src_ia_index = new_src_ia - lm->if_address_pool;
582 pfx_special.fp_len = address_length;
583 pfx_special.fp_addr.ip4 = *new_src_addr;
585 /* set new glean route for the prefix */
586 fib_table_entry_update_one_path (fib_index, &pfx_special,
587 FIB_SOURCE_INTERFACE,
588 (FIB_ENTRY_FLAG_CONNECTED |
589 FIB_ENTRY_FLAG_ATTACHED),
591 /* No next-hop address */
594 /* invalid FIB index */
597 /* no out-label stack */
599 FIB_ROUTE_PATH_FLAG_NONE);
603 /* length == 31, delete attached route for the other address */
604 else if (address_length == 31)
606 pfx_special.fp_addr.ip4.as_u32 =
607 address->as_u32 ^ clib_host_to_net_u32(1);
609 fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
612 mhash_unset (&lm->prefix_to_if_prefix_index, &key, 0 /* old_value */);
613 pool_put (lm->if_prefix_pool, if_prefix);
617 ip4_del_interface_routes (u32 sw_if_index,
620 ip4_address_t * address, u32 address_length)
623 .fp_len = address_length,
624 .fp_proto = FIB_PROTOCOL_IP4,
625 .fp_addr.ip4 = *address,
628 ip4_del_interface_prefix_routes (im, sw_if_index, fib_index,
629 address, address_length);
632 fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
635 #ifndef CLIB_MARCH_VARIANT
637 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
639 ip4_main_t *im = &ip4_main;
641 vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
644 * enable/disable only on the 1<->0 transition
648 if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
653 ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
654 if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
657 vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
661 vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
662 sw_if_index, !is_enable, 0, 0);
665 ip4_enable_disable_interface_callback_t *cb;
666 vec_foreach (cb, im->enable_disable_interface_callbacks)
667 cb->function (im, cb->function_opaque, sw_if_index, is_enable);
671 static clib_error_t *
672 ip4_add_del_interface_address_internal (vlib_main_t * vm,
674 ip4_address_t * address,
675 u32 address_length, u32 is_del)
677 vnet_main_t *vnm = vnet_get_main ();
678 ip4_main_t *im = &ip4_main;
679 ip_lookup_main_t *lm = &im->lookup_main;
680 clib_error_t *error = 0;
681 u32 if_address_index, elts_before;
682 ip4_address_fib_t ip4_af, *addr_fib = 0;
684 /* local0 interface doesn't support IP addressing */
685 if (sw_if_index == 0)
688 clib_error_create ("local0 interface doesn't support IP addressing");
691 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
692 ip4_addr_fib_init (&ip4_af, address,
693 vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
694 vec_add1 (addr_fib, ip4_af);
697 * there is no support for adj-fib handling in the presence of overlapping
698 * subnets on interfaces. Easy fix - disallow overlapping subnets, like
704 /* When adding an address check that it does not conflict
705 with an existing address on any interface in this table. */
706 ip_interface_address_t *ia;
707 vnet_sw_interface_t *sif;
709 pool_foreach(sif, vnm->interface_main.sw_interfaces,
711 if (im->fib_index_by_sw_if_index[sw_if_index] ==
712 im->fib_index_by_sw_if_index[sif->sw_if_index])
714 foreach_ip_interface_address
715 (&im->lookup_main, ia, sif->sw_if_index,
716 0 /* honor unnumbered */ ,
719 ip_interface_address_get_address
720 (&im->lookup_main, ia);
721 if (ip4_destination_matches_route
722 (im, address, x, ia->address_length) ||
723 ip4_destination_matches_route (im,
728 /* an intf may have >1 addr from the same prefix */
729 if ((sw_if_index == sif->sw_if_index) &&
730 (ia->address_length == address_length) &&
731 (x->as_u32 != address->as_u32))
734 /* error if the length or intf was different */
735 vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
739 ("failed to add %U which conflicts with %U for interface %U",
740 format_ip4_address_and_length, address,
742 format_ip4_address_and_length, x,
744 format_vnet_sw_if_index_name, vnm,
753 elts_before = pool_elts (lm->if_address_pool);
755 error = ip_interface_address_add_del
756 (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
760 ip4_sw_interface_enable_disable (sw_if_index, !is_del);
762 /* intf addr routes are added/deleted on admin up/down */
763 if (vnet_sw_interface_is_admin_up (vnm, sw_if_index))
766 ip4_del_interface_routes (sw_if_index,
767 im, ip4_af.fib_index, address,
770 ip4_add_interface_routes (sw_if_index,
771 im, ip4_af.fib_index,
773 (lm->if_address_pool, if_address_index));
776 /* If pool did not grow/shrink: add duplicate address. */
777 if (elts_before != pool_elts (lm->if_address_pool))
779 ip4_add_del_interface_address_callback_t *cb;
780 vec_foreach (cb, im->add_del_interface_address_callbacks)
781 cb->function (im, cb->function_opaque, sw_if_index,
782 address, address_length, if_address_index, is_del);
791 ip4_add_del_interface_address (vlib_main_t * vm,
793 ip4_address_t * address,
794 u32 address_length, u32 is_del)
796 return ip4_add_del_interface_address_internal
797 (vm, sw_if_index, address, address_length, is_del);
801 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
803 ip_interface_address_t *ia;
809 * when directed broadcast is enabled, the subnet braodcast route will forward
810 * packets using an adjacency with a broadcast MAC. otherwise it drops
813 foreach_ip_interface_address(&im->lookup_main, ia,
816 if (ia->address_length <= 30)
820 ipa = ip_interface_address_get_address (&im->lookup_main, ia);
824 .fp_proto = FIB_PROTOCOL_IP4,
826 .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
830 ip4_add_subnet_bcast_route
831 (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
840 static clib_error_t *
841 ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
843 ip4_main_t *im = &ip4_main;
844 ip_interface_address_t *ia;
846 u32 is_admin_up, fib_index;
848 /* Fill in lookup tables with default table (0). */
849 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
851 vec_validate_init_empty (im->
852 lookup_main.if_address_pool_index_by_sw_if_index,
855 is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
857 fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
860 foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
861 0 /* honor unnumbered */,
863 a = ip_interface_address_get_address (&im->lookup_main, ia);
865 ip4_add_interface_routes (sw_if_index,
869 ip4_del_interface_routes (sw_if_index,
871 a, ia->address_length);
878 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
880 /* Built-in ip4 unicast rx feature path definition */
882 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
884 .arc_name = "ip4-unicast",
885 .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
886 .last_in_arc = "ip4-lookup",
887 .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
890 VNET_FEATURE_INIT (ip4_flow_classify, static) =
892 .arc_name = "ip4-unicast",
893 .node_name = "ip4-flow-classify",
894 .runs_before = VNET_FEATURES ("ip4-inacl"),
897 VNET_FEATURE_INIT (ip4_inacl, static) =
899 .arc_name = "ip4-unicast",
900 .node_name = "ip4-inacl",
901 .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
904 VNET_FEATURE_INIT (ip4_source_check_1, static) =
906 .arc_name = "ip4-unicast",
907 .node_name = "ip4-source-check-via-rx",
908 .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
911 VNET_FEATURE_INIT (ip4_source_check_2, static) =
913 .arc_name = "ip4-unicast",
914 .node_name = "ip4-source-check-via-any",
915 .runs_before = VNET_FEATURES ("ip4-policer-classify"),
918 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
920 .arc_name = "ip4-unicast",
921 .node_name = "ip4-source-and-port-range-check-rx",
922 .runs_before = VNET_FEATURES ("ip4-policer-classify"),
925 VNET_FEATURE_INIT (ip4_policer_classify, static) =
927 .arc_name = "ip4-unicast",
928 .node_name = "ip4-policer-classify",
929 .runs_before = VNET_FEATURES ("ipsec4-input-feature"),
932 VNET_FEATURE_INIT (ip4_ipsec, static) =
934 .arc_name = "ip4-unicast",
935 .node_name = "ipsec4-input-feature",
936 .runs_before = VNET_FEATURES ("vpath-input-ip4"),
939 VNET_FEATURE_INIT (ip4_vpath, static) =
941 .arc_name = "ip4-unicast",
942 .node_name = "vpath-input-ip4",
943 .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
946 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
948 .arc_name = "ip4-unicast",
949 .node_name = "ip4-vxlan-bypass",
950 .runs_before = VNET_FEATURES ("ip4-lookup"),
953 VNET_FEATURE_INIT (ip4_not_enabled, static) =
955 .arc_name = "ip4-unicast",
956 .node_name = "ip4-not-enabled",
957 .runs_before = VNET_FEATURES ("ip4-lookup"),
960 VNET_FEATURE_INIT (ip4_lookup, static) =
962 .arc_name = "ip4-unicast",
963 .node_name = "ip4-lookup",
964 .runs_before = 0, /* not before any other features */
967 /* Built-in ip4 multicast rx feature path definition */
968 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
970 .arc_name = "ip4-multicast",
971 .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
972 .last_in_arc = "ip4-mfib-forward-lookup",
973 .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
976 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
978 .arc_name = "ip4-multicast",
979 .node_name = "vpath-input-ip4",
980 .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
983 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
985 .arc_name = "ip4-multicast",
986 .node_name = "ip4-not-enabled",
987 .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
990 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
992 .arc_name = "ip4-multicast",
993 .node_name = "ip4-mfib-forward-lookup",
994 .runs_before = 0, /* last feature */
997 /* Source and port-range check ip4 tx feature path definition */
998 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1000 .arc_name = "ip4-output",
1001 .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
1002 .last_in_arc = "interface-output",
1003 .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1006 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1008 .arc_name = "ip4-output",
1009 .node_name = "ip4-source-and-port-range-check-tx",
1010 .runs_before = VNET_FEATURES ("ip4-outacl"),
1013 VNET_FEATURE_INIT (ip4_outacl, static) =
1015 .arc_name = "ip4-output",
1016 .node_name = "ip4-outacl",
1017 .runs_before = VNET_FEATURES ("ipsec4-output-feature"),
1020 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1022 .arc_name = "ip4-output",
1023 .node_name = "ipsec4-output-feature",
1024 .runs_before = VNET_FEATURES ("interface-output"),
1027 /* Built-in ip4 tx feature path definition */
1028 VNET_FEATURE_INIT (ip4_interface_output, static) =
1030 .arc_name = "ip4-output",
1031 .node_name = "interface-output",
1032 .runs_before = 0, /* not before any other features */
1036 static clib_error_t *
1037 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1039 ip4_main_t *im = &ip4_main;
1041 /* Fill in lookup tables with default table (0). */
1042 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1043 vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
1047 ip4_main_t *im4 = &ip4_main;
1048 ip_lookup_main_t *lm4 = &im4->lookup_main;
1049 ip_interface_address_t *ia = 0;
1050 ip4_address_t *address;
1051 vlib_main_t *vm = vlib_get_main ();
1053 vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
1055 foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
1057 address = ip_interface_address_get_address (lm4, ia);
1058 ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
1063 vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
1066 vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
1067 sw_if_index, is_add, 0, 0);
1069 return /* no error */ 0;
1072 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1074 /* Global IP4 main. */
1075 #ifndef CLIB_MARCH_VARIANT
1076 ip4_main_t ip4_main;
1077 #endif /* CLIB_MARCH_VARIANT */
1079 static clib_error_t *
1080 ip4_lookup_init (vlib_main_t * vm)
1082 ip4_main_t *im = &ip4_main;
1083 clib_error_t *error;
1086 if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1088 if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
1090 if ((error = vlib_call_init_function (vm, fib_module_init)))
1092 if ((error = vlib_call_init_function (vm, mfib_module_init)))
1095 for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1100 m = pow2_mask (i) << (32 - i);
1103 im->fib_masks[i] = clib_host_to_net_u32 (m);
1106 ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1108 /* Create FIB with index 0 and table id of 0. */
1109 fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1110 FIB_SOURCE_DEFAULT_ROUTE);
1111 mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1112 MFIB_SOURCE_DEFAULT_ROUTE);
1116 pn = pg_get_node (ip4_lookup_node.index);
1117 pn->unformat_edit = unformat_pg_ip4_header;
1121 ethernet_arp_header_t h;
1123 clib_memset (&h, 0, sizeof (h));
1125 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1126 #define _8(f,v) h.f = v;
1127 _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1128 _16 (l3_type, ETHERNET_TYPE_IP4);
1129 _8 (n_l2_address_bytes, 6);
1130 _8 (n_l3_address_bytes, 4);
1131 _16 (opcode, ETHERNET_ARP_OPCODE_request);
1135 vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1138 /* alloc chunk size */ 8,
1145 VLIB_INIT_FUNCTION (ip4_lookup_init);
1149 /* Adjacency taken. */
1154 /* Packet data, possibly *after* rewrite. */
1155 u8 packet_data[64 - 1 * sizeof (u32)];
1157 ip4_forward_next_trace_t;
1159 #ifndef CLIB_MARCH_VARIANT
1161 format_ip4_forward_next_trace (u8 * s, va_list * args)
1163 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1164 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1165 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1166 u32 indent = format_get_indent (s);
1167 s = format (s, "%U%U",
1168 format_white_space, indent,
1169 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1175 format_ip4_lookup_trace (u8 * s, va_list * args)
1177 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1178 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1179 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1180 u32 indent = format_get_indent (s);
1182 s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1183 t->fib_index, t->dpo_index, t->flow_hash);
1184 s = format (s, "\n%U%U",
1185 format_white_space, indent,
1186 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1191 format_ip4_rewrite_trace (u8 * s, va_list * args)
1193 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1194 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1195 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1196 u32 indent = format_get_indent (s);
1198 s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1199 t->fib_index, t->dpo_index, format_ip_adjacency,
1200 t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1201 s = format (s, "\n%U%U",
1202 format_white_space, indent,
1203 format_ip_adjacency_packet_data,
1204 t->dpo_index, t->packet_data, sizeof (t->packet_data));
1208 #ifndef CLIB_MARCH_VARIANT
1209 /* Common trace function for all ip4-forward next nodes. */
1211 ip4_forward_next_trace (vlib_main_t * vm,
1212 vlib_node_runtime_t * node,
1213 vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1216 ip4_main_t *im = &ip4_main;
1218 n_left = frame->n_vectors;
1219 from = vlib_frame_vector_args (frame);
1224 vlib_buffer_t *b0, *b1;
1225 ip4_forward_next_trace_t *t0, *t1;
1227 /* Prefetch next iteration. */
1228 vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1229 vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1234 b0 = vlib_get_buffer (vm, bi0);
1235 b1 = vlib_get_buffer (vm, bi1);
1237 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1239 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1240 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1241 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1243 (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1244 (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1245 vec_elt (im->fib_index_by_sw_if_index,
1246 vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1248 clib_memcpy_fast (t0->packet_data,
1249 vlib_buffer_get_current (b0),
1250 sizeof (t0->packet_data));
1252 if (b1->flags & VLIB_BUFFER_IS_TRACED)
1254 t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1255 t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1256 t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1258 (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1259 (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1260 vec_elt (im->fib_index_by_sw_if_index,
1261 vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1262 clib_memcpy_fast (t1->packet_data, vlib_buffer_get_current (b1),
1263 sizeof (t1->packet_data));
1273 ip4_forward_next_trace_t *t0;
1277 b0 = vlib_get_buffer (vm, bi0);
1279 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1281 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1282 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1283 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1285 (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1286 (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1287 vec_elt (im->fib_index_by_sw_if_index,
1288 vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1289 clib_memcpy_fast (t0->packet_data, vlib_buffer_get_current (b0),
1290 sizeof (t0->packet_data));
1297 /* Compute TCP/UDP/ICMP4 checksum in software. */
1299 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1303 u32 ip_header_length, payload_length_host_byte_order;
1304 u32 n_this_buffer, n_bytes_left, n_ip_bytes_this_buffer;
1306 void *data_this_buffer;
1308 /* Initialize checksum with ip header. */
1309 ip_header_length = ip4_header_bytes (ip0);
1310 payload_length_host_byte_order =
1311 clib_net_to_host_u16 (ip0->length) - ip_header_length;
1313 clib_host_to_net_u32 (payload_length_host_byte_order +
1314 (ip0->protocol << 16));
1316 if (BITS (uword) == 32)
1319 ip_csum_with_carry (sum0,
1320 clib_mem_unaligned (&ip0->src_address, u32));
1322 ip_csum_with_carry (sum0,
1323 clib_mem_unaligned (&ip0->dst_address, u32));
1327 ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1329 n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1330 data_this_buffer = (void *) ip0 + ip_header_length;
1331 n_ip_bytes_this_buffer =
1332 p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data);
1333 if (n_this_buffer + ip_header_length > n_ip_bytes_this_buffer)
1335 n_this_buffer = n_ip_bytes_this_buffer > ip_header_length ?
1336 n_ip_bytes_this_buffer - ip_header_length : 0;
1340 sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1341 n_bytes_left -= n_this_buffer;
1342 if (n_bytes_left == 0)
1345 ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1346 if (!(p0->flags & VLIB_BUFFER_NEXT_PRESENT))
1349 p0 = vlib_get_buffer (vm, p0->next_buffer);
1350 data_this_buffer = vlib_buffer_get_current (p0);
1351 n_this_buffer = clib_min (p0->current_length, n_bytes_left);
1354 sum16 = ~ip_csum_fold (sum0);
1360 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1362 ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1366 ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1367 || ip0->protocol == IP_PROTOCOL_UDP);
1369 udp0 = (void *) (ip0 + 1);
1370 if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1372 p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1373 | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1377 sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1379 p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1380 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1387 VNET_FEATURE_ARC_INIT (ip4_local) =
1389 .arc_name = "ip4-local",
1390 .start_nodes = VNET_FEATURES ("ip4-local"),
1391 .last_in_arc = "ip4-local-end-of-arc",
1396 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1397 ip4_header_t * ip, u8 is_udp, u8 * error,
1401 flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1402 *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1406 u32 ip_len, udp_len;
1408 udp = ip4_next_header (ip);
1409 /* Verify UDP length. */
1410 ip_len = clib_net_to_host_u16 (ip->length);
1411 udp_len = clib_net_to_host_u16 (udp->length);
1413 len_diff = ip_len - udp_len;
1414 *good_tcp_udp &= len_diff >= 0;
1415 *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1419 #define ip4_local_csum_is_offloaded(_b) \
1420 _b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM \
1421 || _b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM
1423 #define ip4_local_need_csum_check(is_tcp_udp, _b) \
1424 (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED \
1425 || ip4_local_csum_is_offloaded (_b)))
1427 #define ip4_local_csum_is_valid(_b) \
1428 (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT \
1429 || (ip4_local_csum_is_offloaded (_b))) != 0
1432 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1433 ip4_header_t * ih, u8 * error)
1435 u8 is_udp, is_tcp_udp, good_tcp_udp;
1437 is_udp = ih->protocol == IP_PROTOCOL_UDP;
1438 is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1440 if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1441 ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1443 good_tcp_udp = ip4_local_csum_is_valid (b);
1445 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1446 *error = (is_tcp_udp && !good_tcp_udp
1447 ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1451 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1452 ip4_header_t ** ih, u8 * error)
1454 u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1456 is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1457 is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1459 is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1460 is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1462 good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1463 good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1465 if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1466 || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1469 ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1472 ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1476 error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1477 IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1478 error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1479 IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1483 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1484 vlib_buffer_t * b, u16 * next, u8 error,
1485 u8 head_of_feature_arc)
1487 u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1490 *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1491 b->error = error ? error_node->errors[error] : 0;
1492 if (head_of_feature_arc)
1495 if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1497 vnet_feature_arc_start (arc_index,
1498 vnet_buffer (b)->sw_if_index[VLIB_RX],
1511 } ip4_local_last_check_t;
1514 ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
1515 ip4_local_last_check_t * last_check, u8 * error0)
1517 ip4_fib_mtrie_leaf_t leaf0;
1518 ip4_fib_mtrie_t *mtrie0;
1519 const dpo_id_t *dpo0;
1520 load_balance_t *lb0;
1523 vnet_buffer (b)->ip.fib_index =
1524 vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1525 vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1528 * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1529 * adjacency for the destination address (the local interface address).
1530 * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1531 * adjacency for the source address (the remote sender's address)
1533 if (PREDICT_FALSE (last_check->first ||
1534 (last_check->src.as_u32 != ip0->src_address.as_u32)))
1536 mtrie0 = &ip4_fib_get (vnet_buffer (b)->ip.fib_index)->mtrie;
1537 leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1538 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1539 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1540 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1542 vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1543 vnet_buffer (b)->ip.adj_index[VLIB_TX];
1544 vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1546 lb0 = load_balance_get (lbi0);
1547 dpo0 = load_balance_get_bucket_i (lb0, 0);
1550 * Must have a route to source otherwise we drop the packet.
1551 * ip4 broadcasts are accepted, e.g. to make dhcp client work
1554 * - the source is a recieve => it's from us => bogus, do this
1555 * first since it sets a different error code.
1556 * - uRPF check for any route to source - accept if passes.
1557 * - allow packets destined to the broadcast address from unknown sources
1560 *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1561 && dpo0->dpoi_type == DPO_RECEIVE) ?
1562 IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1563 *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1564 && !fib_urpf_check_size (lb0->lb_urpf)
1565 && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1566 IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1568 last_check->src.as_u32 = ip0->src_address.as_u32;
1569 last_check->lbi = lbi0;
1570 last_check->error = *error0;
1574 vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1575 vnet_buffer (b)->ip.adj_index[VLIB_TX];
1576 vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1577 *error0 = last_check->error;
1578 last_check->first = 0;
1583 ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
1584 ip4_local_last_check_t * last_check, u8 * error)
1586 ip4_fib_mtrie_leaf_t leaf[2];
1587 ip4_fib_mtrie_t *mtrie[2];
1588 const dpo_id_t *dpo[2];
1589 load_balance_t *lb[2];
1593 not_last_hit = last_check->first;
1594 not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1595 not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1597 vnet_buffer (b[0])->ip.fib_index =
1598 vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1599 vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1600 vnet_buffer (b[0])->ip.fib_index;
1602 vnet_buffer (b[1])->ip.fib_index =
1603 vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1604 vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1605 vnet_buffer (b[1])->ip.fib_index;
1608 * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1609 * adjacency for the destination address (the local interface address).
1610 * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1611 * adjacency for the source address (the remote sender's address)
1613 if (PREDICT_FALSE (not_last_hit))
1615 mtrie[0] = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie;
1616 mtrie[1] = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie;
1618 leaf[0] = ip4_fib_mtrie_lookup_step_one (mtrie[0], &ip[0]->src_address);
1619 leaf[1] = ip4_fib_mtrie_lookup_step_one (mtrie[1], &ip[1]->src_address);
1621 leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1622 &ip[0]->src_address, 2);
1623 leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1624 &ip[1]->src_address, 2);
1626 leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1627 &ip[0]->src_address, 3);
1628 leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1629 &ip[1]->src_address, 3);
1631 lbi[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf[0]);
1632 lbi[1] = ip4_fib_mtrie_leaf_get_adj_index (leaf[1]);
1634 vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1635 vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1636 vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1638 vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1639 vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1640 vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1642 lb[0] = load_balance_get (lbi[0]);
1643 lb[1] = load_balance_get (lbi[1]);
1645 dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1646 dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1648 error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1649 dpo[0]->dpoi_type == DPO_RECEIVE) ?
1650 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1651 error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1652 !fib_urpf_check_size (lb[0]->lb_urpf) &&
1653 ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1654 ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1656 error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1657 dpo[1]->dpoi_type == DPO_RECEIVE) ?
1658 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1659 error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1660 !fib_urpf_check_size (lb[1]->lb_urpf) &&
1661 ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1662 ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1664 last_check->src.as_u32 = ip[1]->src_address.as_u32;
1665 last_check->lbi = lbi[1];
1666 last_check->error = error[1];
1670 vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1671 vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1672 vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1674 vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1675 vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1676 vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1678 error[0] = last_check->error;
1679 error[1] = last_check->error;
1680 last_check->first = 0;
1684 enum ip_local_packet_type_e
1686 IP_LOCAL_PACKET_TYPE_L4,
1687 IP_LOCAL_PACKET_TYPE_NAT,
1688 IP_LOCAL_PACKET_TYPE_FRAG,
1692 * Determine packet type and next node.
1694 * The expectation is that all packets that are not L4 will skip
1695 * checksums and source checks.
1698 ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
1700 ip_lookup_main_t *lm = &ip4_main.lookup_main;
1702 if (PREDICT_FALSE (ip4_is_fragment (ip)))
1704 *next = IP_LOCAL_NEXT_REASSEMBLY;
1705 return IP_LOCAL_PACKET_TYPE_FRAG;
1707 if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_IS_NATED))
1709 *next = lm->local_next_by_ip_protocol[ip->protocol];
1710 return IP_LOCAL_PACKET_TYPE_NAT;
1713 *next = lm->local_next_by_ip_protocol[ip->protocol];
1714 return IP_LOCAL_PACKET_TYPE_L4;
1718 ip4_local_inline (vlib_main_t * vm,
1719 vlib_node_runtime_t * node,
1720 vlib_frame_t * frame, int head_of_feature_arc)
1722 u32 *from, n_left_from;
1723 vlib_node_runtime_t *error_node =
1724 vlib_node_get_runtime (vm, ip4_input_node.index);
1725 u16 nexts[VLIB_FRAME_SIZE], *next;
1726 vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1727 ip4_header_t *ip[2];
1730 ip4_local_last_check_t last_check = {
1732 * 0.0.0.0 can appear as the source address of an IP packet,
1733 * as can any other address, hence the need to use the 'first'
1734 * member to make sure the .lbi is initialised for the first
1737 .src = {.as_u32 = 0},
1739 .error = IP4_ERROR_UNKNOWN_PROTOCOL,
1743 from = vlib_frame_vector_args (frame);
1744 n_left_from = frame->n_vectors;
1746 if (node->flags & VLIB_NODE_FLAG_TRACE)
1747 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1749 vlib_get_buffers (vm, from, bufs, n_left_from);
1753 while (n_left_from >= 6)
1757 /* Prefetch next iteration. */
1759 vlib_prefetch_buffer_header (b[4], LOAD);
1760 vlib_prefetch_buffer_header (b[5], LOAD);
1762 CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1763 CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1766 error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1768 ip[0] = vlib_buffer_get_current (b[0]);
1769 ip[1] = vlib_buffer_get_current (b[1]);
1771 vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1772 vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1774 pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1775 pt[1] = ip4_local_classify (b[1], ip[1], &next[1]);
1777 not_batch = pt[0] ^ pt[1];
1779 if (head_of_feature_arc == 0 || (pt[0] && not_batch == 0))
1782 if (PREDICT_TRUE (not_batch == 0))
1784 ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1785 ip4_local_check_src_x2 (b, ip, &last_check, error);
1791 ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1792 ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1796 ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1797 ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
1803 ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1804 head_of_feature_arc);
1805 ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1806 head_of_feature_arc);
1813 while (n_left_from > 0)
1815 error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1817 ip[0] = vlib_buffer_get_current (b[0]);
1818 vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1819 pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1821 if (head_of_feature_arc == 0 || pt[0])
1824 ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1825 ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1829 ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1830 head_of_feature_arc);
1837 vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1838 return frame->n_vectors;
1841 VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1842 vlib_frame_t * frame)
1844 return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1848 VLIB_REGISTER_NODE (ip4_local_node) =
1850 .name = "ip4-local",
1851 .vector_size = sizeof (u32),
1852 .format_trace = format_ip4_forward_next_trace,
1853 .n_next_nodes = IP_LOCAL_N_NEXT,
1856 [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1857 [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1858 [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1859 [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1860 [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-reassembly",
1866 VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
1867 vlib_node_runtime_t * node,
1868 vlib_frame_t * frame)
1870 return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1874 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
1875 .name = "ip4-local-end-of-arc",
1876 .vector_size = sizeof (u32),
1878 .format_trace = format_ip4_forward_next_trace,
1879 .sibling_of = "ip4-local",
1882 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1883 .arc_name = "ip4-local",
1884 .node_name = "ip4-local-end-of-arc",
1885 .runs_before = 0, /* not before any other features */
1889 #ifndef CLIB_MARCH_VARIANT
1891 ip4_register_protocol (u32 protocol, u32 node_index)
1893 vlib_main_t *vm = vlib_get_main ();
1894 ip4_main_t *im = &ip4_main;
1895 ip_lookup_main_t *lm = &im->lookup_main;
1897 ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1898 lm->local_next_by_ip_protocol[protocol] =
1899 vlib_node_add_next (vm, ip4_local_node.index, node_index);
1903 ip4_unregister_protocol (u32 protocol)
1905 ip4_main_t *im = &ip4_main;
1906 ip_lookup_main_t *lm = &im->lookup_main;
1908 ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1909 lm->local_next_by_ip_protocol[protocol] = IP_LOCAL_NEXT_PUNT;
1913 static clib_error_t *
1914 show_ip_local_command_fn (vlib_main_t * vm,
1915 unformat_input_t * input, vlib_cli_command_t * cmd)
1917 ip4_main_t *im = &ip4_main;
1918 ip_lookup_main_t *lm = &im->lookup_main;
1921 vlib_cli_output (vm, "Protocols handled by ip4_local");
1922 for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1924 if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1926 u32 node_index = vlib_get_node (vm,
1927 ip4_local_node.index)->
1928 next_nodes[lm->local_next_by_ip_protocol[i]];
1929 vlib_cli_output (vm, "%U: %U", format_ip_protocol, i,
1930 format_vlib_node_name, vm, node_index);
1939 * Display the set of protocols handled by the local IPv4 stack.
1942 * Example of how to display local protocol table:
1943 * @cliexstart{show ip local}
1944 * Protocols handled by ip4_local
1951 VLIB_CLI_COMMAND (show_ip_local, static) =
1953 .path = "show ip local",
1954 .function = show_ip_local_command_fn,
1955 .short_help = "show ip local",
1960 ip4_arp_inline (vlib_main_t * vm,
1961 vlib_node_runtime_t * node,
1962 vlib_frame_t * frame, int is_glean)
1964 vnet_main_t *vnm = vnet_get_main ();
1965 ip4_main_t *im = &ip4_main;
1966 ip_lookup_main_t *lm = &im->lookup_main;
1967 u32 *from, *to_next_drop;
1968 uword n_left_from, n_left_to_next_drop, next_index;
1969 u32 thread_index = vm->thread_index;
1972 if (node->flags & VLIB_NODE_FLAG_TRACE)
1973 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1975 seed = throttle_seed (&im->arp_throttle, thread_index, vlib_time_now (vm));
1977 from = vlib_frame_vector_args (frame);
1978 n_left_from = frame->n_vectors;
1979 next_index = node->cached_next_index;
1980 if (next_index == IP4_ARP_NEXT_DROP)
1981 next_index = IP4_ARP_N_NEXT; /* point to first interface */
1983 while (n_left_from > 0)
1985 vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1986 to_next_drop, n_left_to_next_drop);
1988 while (n_left_from > 0 && n_left_to_next_drop > 0)
1990 u32 pi0, bi0, adj_index0, sw_if_index0;
1991 ip_adjacency_t *adj0;
1992 vlib_buffer_t *p0, *b0;
1993 ip4_address_t resolve0;
1994 ethernet_arp_header_t *h0;
1995 vnet_hw_interface_t *hw_if0;
1999 p0 = vlib_get_buffer (vm, pi0);
2003 to_next_drop[0] = pi0;
2005 n_left_to_next_drop -= 1;
2007 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2008 adj0 = adj_get (adj_index0);
2012 /* resolve the packet's destination */
2013 ip4_header_t *ip0 = vlib_buffer_get_current (p0);
2014 resolve0 = ip0->dst_address;
2018 /* resolve the incomplete adj */
2019 resolve0 = adj0->sub_type.nbr.next_hop.ip4;
2022 /* combine the address and interface for the hash key */
2023 sw_if_index0 = adj0->rewrite_header.sw_if_index;
2024 r0 = (u64) resolve0.data_u32 << 32;
2027 if (throttle_check (&im->arp_throttle, thread_index, r0, seed))
2029 p0->error = node->errors[IP4_ARP_ERROR_THROTTLED];
2034 * the adj has been updated to a rewrite but the node the DPO that got
2035 * us here hasn't - yet. no big deal. we'll drop while we wait.
2037 if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
2039 p0->error = node->errors[IP4_ARP_ERROR_RESOLVED];
2044 * Can happen if the control-plane is programming tables
2045 * with traffic flowing; at least that's today's lame excuse.
2047 if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
2048 || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
2050 p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2053 /* Send ARP request. */
2055 vlib_packet_template_get_packet (vm,
2056 &im->ip4_arp_request_packet_template,
2058 /* Seems we're out of buffers */
2059 if (PREDICT_FALSE (!h0))
2061 p0->error = node->errors[IP4_ARP_ERROR_NO_BUFFERS];
2065 b0 = vlib_get_buffer (vm, bi0);
2067 /* copy the persistent fields from the original */
2068 clib_memcpy_fast (b0->opaque2, p0->opaque2, sizeof (p0->opaque2));
2070 /* Add rewrite/encap string for ARP packet. */
2071 vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
2073 hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2075 /* Src ethernet address in ARP header. */
2076 mac_address_from_bytes (&h0->ip4_over_ethernet[0].mac,
2077 hw_if0->hw_address);
2080 /* The interface's source address is stashed in the Glean Adj */
2081 h0->ip4_over_ethernet[0].ip4 =
2082 adj0->sub_type.glean.receive_addr.ip4;
2086 /* Src IP address in ARP header. */
2087 if (ip4_src_address_for_packet (lm, sw_if_index0,
2088 &h0->ip4_over_ethernet[0].ip4))
2090 /* No source address available */
2091 p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
2092 vlib_buffer_free (vm, &bi0, 1);
2096 h0->ip4_over_ethernet[1].ip4 = resolve0;
2098 p0->error = node->errors[IP4_ARP_ERROR_REQUEST_SENT];
2100 vlib_buffer_copy_trace_flag (vm, p0, bi0);
2101 VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
2102 vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2104 vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2106 vlib_set_next_frame_buffer (vm, node,
2107 adj0->rewrite_header.next_index, bi0);
2110 vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2113 return frame->n_vectors;
2116 VLIB_NODE_FN (ip4_arp_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2117 vlib_frame_t * frame)
2119 return (ip4_arp_inline (vm, node, frame, 0));
2122 VLIB_NODE_FN (ip4_glean_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2123 vlib_frame_t * frame)
2125 return (ip4_arp_inline (vm, node, frame, 1));
2128 static char *ip4_arp_error_strings[] = {
2129 [IP4_ARP_ERROR_THROTTLED] = "ARP requests throttled",
2130 [IP4_ARP_ERROR_RESOLVED] = "ARP requests resolved",
2131 [IP4_ARP_ERROR_NO_BUFFERS] = "ARP requests out of buffer",
2132 [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2133 [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2134 [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
2138 VLIB_REGISTER_NODE (ip4_arp_node) =
2141 .vector_size = sizeof (u32),
2142 .format_trace = format_ip4_forward_next_trace,
2143 .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2144 .error_strings = ip4_arp_error_strings,
2145 .n_next_nodes = IP4_ARP_N_NEXT,
2148 [IP4_ARP_NEXT_DROP] = "error-drop",
2152 VLIB_REGISTER_NODE (ip4_glean_node) =
2154 .name = "ip4-glean",
2155 .vector_size = sizeof (u32),
2156 .format_trace = format_ip4_forward_next_trace,
2157 .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2158 .error_strings = ip4_arp_error_strings,
2159 .n_next_nodes = IP4_ARP_N_NEXT,
2161 [IP4_ARP_NEXT_DROP] = "error-drop",
2166 #define foreach_notrace_ip4_arp_error \
2172 _(NO_SOURCE_ADDRESS)
2174 static clib_error_t *
2175 arp_notrace_init (vlib_main_t * vm)
2177 vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
2179 /* don't trace ARP request packets */
2181 vnet_pcap_drop_trace_filter_add_del \
2182 (rt->errors[IP4_ARP_ERROR_##a], \
2184 foreach_notrace_ip4_arp_error;
2189 VLIB_INIT_FUNCTION (arp_notrace_init);
2192 #ifndef CLIB_MARCH_VARIANT
2193 /* Send an ARP request to see if given destination is reachable on given interface. */
2195 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index,
2198 vnet_main_t *vnm = vnet_get_main ();
2199 ip4_main_t *im = &ip4_main;
2200 ethernet_arp_header_t *h;
2202 ip_interface_address_t *ia;
2203 ip_adjacency_t *adj;
2204 vnet_hw_interface_t *hi;
2205 vnet_sw_interface_t *si;
2209 u8 unicast_rewrite = 0;
2211 si = vnet_get_sw_interface (vnm, sw_if_index);
2213 if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2215 return clib_error_return (0, "%U: interface %U down",
2216 format_ip4_address, dst,
2217 format_vnet_sw_if_index_name, vnm,
2222 ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2225 vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2226 return clib_error_return
2228 "no matching interface address for destination %U (interface %U)",
2229 format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
2233 h = vlib_packet_template_get_packet (vm,
2234 &im->ip4_arp_request_packet_template,
2238 return clib_error_return (0, "ARP request packet allocation failed");
2240 hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2241 if (PREDICT_FALSE (!hi->hw_address))
2243 return clib_error_return (0, "%U: interface %U do not support ip probe",
2244 format_ip4_address, dst,
2245 format_vnet_sw_if_index_name, vnm,
2249 mac_address_from_bytes (&h->ip4_over_ethernet[0].mac, hi->hw_address);
2251 h->ip4_over_ethernet[0].ip4 = src[0];
2252 h->ip4_over_ethernet[1].ip4 = dst[0];
2254 b = vlib_get_buffer (vm, bi);
2255 vnet_buffer (b)->sw_if_index[VLIB_RX] =
2256 vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2258 ip46_address_t nh = {
2262 ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4,
2263 VNET_LINK_IP4, &nh, sw_if_index);
2266 /* Peer has been previously resolved, retrieve glean adj instead */
2267 if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE)
2270 unicast_rewrite = 1;
2274 ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4,
2275 VNET_LINK_IP4, sw_if_index, &nh);
2280 /* Add encapsulation string for software interface (e.g. ethernet header). */
2281 vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2282 if (unicast_rewrite)
2284 u16 *etype = vlib_buffer_get_current (b) - 2;
2285 etype[0] = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
2287 vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2290 vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2291 u32 *to_next = vlib_frame_vector_args (f);
2294 vlib_put_frame_to_node (vm, hi->output_node_index, f);
2298 return /* no error */ 0;
2304 IP4_REWRITE_NEXT_DROP,
2305 IP4_REWRITE_NEXT_ICMP_ERROR,
2306 IP4_REWRITE_NEXT_FRAGMENT,
2307 IP4_REWRITE_N_NEXT /* Last */
2308 } ip4_rewrite_next_t;
2311 * This bits of an IPv4 address to mask to construct a multicast
2314 #if CLIB_ARCH_IS_BIG_ENDIAN
2315 #define IP4_MCAST_ADDR_MASK 0x007fffff
2317 #define IP4_MCAST_ADDR_MASK 0xffff7f00
2321 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
2322 u16 adj_packet_bytes, bool df, u16 * next, u32 * error)
2324 if (packet_len > adj_packet_bytes)
2326 *error = IP4_ERROR_MTU_EXCEEDED;
2329 icmp4_error_set_vnet_buffer
2330 (b, ICMP4_destination_unreachable,
2331 ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
2333 *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2337 /* IP fragmentation */
2338 ip_frag_set_vnet_buffer (b, adj_packet_bytes,
2339 IP4_FRAG_NEXT_IP4_REWRITE, 0);
2340 *next = IP4_REWRITE_NEXT_FRAGMENT;
2345 /* Decrement TTL & update checksum.
2346 Works either endian, so no need for byte swap. */
2347 static_always_inline void
2348 ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
2353 if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2355 b->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2361 /* Input node should have reject packets with ttl 0. */
2362 ASSERT (ip->ttl > 0);
2364 checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
2365 checksum += checksum >= 0xffff;
2367 ip->checksum = checksum;
2372 * If the ttl drops below 1 when forwarding, generate
2375 if (PREDICT_FALSE (ttl <= 0))
2377 *error = IP4_ERROR_TIME_EXPIRED;
2378 vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2379 icmp4_error_set_vnet_buffer (b, ICMP4_time_exceeded,
2380 ICMP4_time_exceeded_ttl_exceeded_in_transit,
2382 *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2385 /* Verify checksum. */
2386 ASSERT ((ip->checksum == ip4_header_checksum (ip)) ||
2387 (b->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2392 ip4_rewrite_inline_with_gso (vlib_main_t * vm,
2393 vlib_node_runtime_t * node,
2394 vlib_frame_t * frame,
2395 int do_counters, int is_midchain, int is_mcast,
2398 ip_lookup_main_t *lm = &ip4_main.lookup_main;
2399 u32 *from = vlib_frame_vector_args (frame);
2400 vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2401 u16 nexts[VLIB_FRAME_SIZE], *next;
2403 vlib_node_runtime_t *error_node =
2404 vlib_node_get_runtime (vm, ip4_input_node.index);
2406 n_left_from = frame->n_vectors;
2407 u32 thread_index = vm->thread_index;
2409 vlib_get_buffers (vm, from, bufs, n_left_from);
2410 clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
2412 if (n_left_from >= 6)
2415 for (i = 2; i < 6; i++)
2416 vlib_prefetch_buffer_header (bufs[i], LOAD);
2421 while (n_left_from >= 8)
2423 ip_adjacency_t *adj0, *adj1;
2424 ip4_header_t *ip0, *ip1;
2425 u32 rw_len0, error0, adj_index0;
2426 u32 rw_len1, error1, adj_index1;
2427 u32 tx_sw_if_index0, tx_sw_if_index1;
2430 vlib_prefetch_buffer_header (b[6], LOAD);
2431 vlib_prefetch_buffer_header (b[7], LOAD);
2433 adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2434 adj_index1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
2437 * pre-fetch the per-adjacency counters
2441 vlib_prefetch_combined_counter (&adjacency_counters,
2442 thread_index, adj_index0);
2443 vlib_prefetch_combined_counter (&adjacency_counters,
2444 thread_index, adj_index1);
2447 ip0 = vlib_buffer_get_current (b[0]);
2448 ip1 = vlib_buffer_get_current (b[1]);
2450 error0 = error1 = IP4_ERROR_NONE;
2452 ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2453 ip4_ttl_and_checksum_check (b[1], ip1, next + 1, &error1);
2455 /* Rewrite packet header and updates lengths. */
2456 adj0 = adj_get (adj_index0);
2457 adj1 = adj_get (adj_index1);
2459 /* Worth pipelining. No guarantee that adj0,1 are hot... */
2460 rw_len0 = adj0[0].rewrite_header.data_bytes;
2461 rw_len1 = adj1[0].rewrite_header.data_bytes;
2462 vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2463 vnet_buffer (b[1])->ip.save_rewrite_length = rw_len1;
2465 p = vlib_buffer_get_current (b[2]);
2466 CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2467 CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2469 p = vlib_buffer_get_current (b[3]);
2470 CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2471 CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2473 /* Check MTU of outgoing interface. */
2474 u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2475 u16 ip1_len = clib_net_to_host_u16 (ip1->length);
2477 if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO))
2478 ip0_len = gso_mtu_sz (b[0]);
2479 if (do_gso && (b[1]->flags & VNET_BUFFER_F_GSO))
2480 ip1_len = gso_mtu_sz (b[1]);
2482 ip4_mtu_check (b[0], ip0_len,
2483 adj0[0].rewrite_header.max_l3_packet_bytes,
2484 ip0->flags_and_fragment_offset &
2485 clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2487 ip4_mtu_check (b[1], ip1_len,
2488 adj1[0].rewrite_header.max_l3_packet_bytes,
2489 ip1->flags_and_fragment_offset &
2490 clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2495 error0 = ((adj0[0].rewrite_header.sw_if_index ==
2496 vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2497 IP4_ERROR_SAME_INTERFACE : error0);
2498 error1 = ((adj1[0].rewrite_header.sw_if_index ==
2499 vnet_buffer (b[1])->sw_if_index[VLIB_RX]) ?
2500 IP4_ERROR_SAME_INTERFACE : error1);
2503 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2504 * to see the IP header */
2505 if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2507 u32 next_index = adj0[0].rewrite_header.next_index;
2508 vlib_buffer_advance (b[0], -(word) rw_len0);
2509 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2510 vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2513 (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2514 vnet_feature_arc_start (lm->output_feature_arc_index,
2515 tx_sw_if_index0, &next_index, b[0]);
2516 next[0] = next_index;
2520 b[0]->error = error_node->errors[error0];
2522 if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2524 u32 next_index = adj1[0].rewrite_header.next_index;
2525 vlib_buffer_advance (b[1], -(word) rw_len1);
2527 tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2528 vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2531 (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2532 vnet_feature_arc_start (lm->output_feature_arc_index,
2533 tx_sw_if_index1, &next_index, b[1]);
2534 next[1] = next_index;
2538 b[1]->error = error_node->errors[error1];
2542 calc_checksums (vm, b[0]);
2543 calc_checksums (vm, b[1]);
2545 /* Guess we are only writing on simple Ethernet header. */
2546 vnet_rewrite_two_headers (adj0[0], adj1[0],
2547 ip0, ip1, sizeof (ethernet_header_t));
2550 * Bump the per-adjacency counters
2554 vlib_increment_combined_counter
2555 (&adjacency_counters,
2557 adj_index0, 1, vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2559 vlib_increment_combined_counter
2560 (&adjacency_counters,
2562 adj_index1, 1, vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
2567 if (adj0->sub_type.midchain.fixup_func)
2568 adj0->sub_type.midchain.fixup_func
2569 (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2570 if (adj1->sub_type.midchain.fixup_func)
2571 adj1->sub_type.midchain.fixup_func
2572 (vm, adj1, b[1], adj1->sub_type.midchain.fixup_data);
2578 * copy bytes from the IP address into the MAC rewrite
2580 vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2581 adj0->rewrite_header.dst_mcast_offset,
2582 &ip0->dst_address.as_u32, (u8 *) ip0);
2583 vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2584 adj1->rewrite_header.dst_mcast_offset,
2585 &ip1->dst_address.as_u32, (u8 *) ip1);
2593 while (n_left_from > 0)
2595 ip_adjacency_t *adj0;
2597 u32 rw_len0, adj_index0, error0;
2598 u32 tx_sw_if_index0;
2600 adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2602 adj0 = adj_get (adj_index0);
2605 vlib_prefetch_combined_counter (&adjacency_counters,
2606 thread_index, adj_index0);
2608 ip0 = vlib_buffer_get_current (b[0]);
2610 error0 = IP4_ERROR_NONE;
2612 ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2615 /* Update packet buffer attributes/set output interface. */
2616 rw_len0 = adj0[0].rewrite_header.data_bytes;
2617 vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2619 /* Check MTU of outgoing interface. */
2620 u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2621 if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO))
2622 ip0_len = gso_mtu_sz (b[0]);
2624 ip4_mtu_check (b[0], ip0_len,
2625 adj0[0].rewrite_header.max_l3_packet_bytes,
2626 ip0->flags_and_fragment_offset &
2627 clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2632 error0 = ((adj0[0].rewrite_header.sw_if_index ==
2633 vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2634 IP4_ERROR_SAME_INTERFACE : error0);
2637 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2638 * to see the IP header */
2639 if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2641 u32 next_index = adj0[0].rewrite_header.next_index;
2642 vlib_buffer_advance (b[0], -(word) rw_len0);
2643 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2644 vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2647 (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2648 vnet_feature_arc_start (lm->output_feature_arc_index,
2649 tx_sw_if_index0, &next_index, b[0]);
2650 next[0] = next_index;
2654 b[0]->error = error_node->errors[error0];
2658 calc_checksums (vm, b[0]);
2660 /* Guess we are only writing on simple Ethernet header. */
2661 vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2664 vlib_increment_combined_counter
2665 (&adjacency_counters,
2666 thread_index, adj_index0, 1,
2667 vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2671 if (adj0->sub_type.midchain.fixup_func)
2672 adj0->sub_type.midchain.fixup_func
2673 (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2679 * copy bytes from the IP address into the MAC rewrite
2681 vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2682 adj0->rewrite_header.dst_mcast_offset,
2683 &ip0->dst_address.as_u32, (u8 *) ip0);
2692 /* Need to do trace after rewrites to pick up new packet data. */
2693 if (node->flags & VLIB_NODE_FLAG_TRACE)
2694 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2696 vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2697 return frame->n_vectors;
2701 ip4_rewrite_inline (vlib_main_t * vm,
2702 vlib_node_runtime_t * node,
2703 vlib_frame_t * frame,
2704 int do_counters, int is_midchain, int is_mcast)
2706 vnet_main_t *vnm = vnet_get_main ();
2707 if (PREDICT_FALSE (vnm->interface_main.gso_interface_count > 0))
2708 return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2709 is_midchain, is_mcast,
2712 return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2713 is_midchain, is_mcast,
2714 0 /* no do_gso */ );
2718 /** @brief IPv4 rewrite node.
2721 This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2722 header checksum, fetch the ip adjacency, check the outbound mtu,
2723 apply the adjacency rewrite, and send pkts to the adjacency
2724 rewrite header's rewrite_next_index.
2726 @param vm vlib_main_t corresponding to the current thread
2727 @param node vlib_node_runtime_t
2728 @param frame vlib_frame_t whose contents should be dispatched
2730 @par Graph mechanics: buffer metadata, next index usage
2733 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2734 - the rewrite adjacency index
2735 - <code>adj->lookup_next_index</code>
2736 - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2737 the packet will be dropped.
2738 - <code>adj->rewrite_header</code>
2739 - Rewrite string length, rewrite string, next_index
2742 - <code>b->current_data, b->current_length</code>
2743 - Updated net of applying the rewrite string
2745 <em>Next Indices:</em>
2746 - <code> adj->rewrite_header.next_index </code>
2750 VLIB_NODE_FN (ip4_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2751 vlib_frame_t * frame)
2753 if (adj_are_counters_enabled ())
2754 return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2756 return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2759 VLIB_NODE_FN (ip4_rewrite_bcast_node) (vlib_main_t * vm,
2760 vlib_node_runtime_t * node,
2761 vlib_frame_t * frame)
2763 if (adj_are_counters_enabled ())
2764 return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2766 return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2769 VLIB_NODE_FN (ip4_midchain_node) (vlib_main_t * vm,
2770 vlib_node_runtime_t * node,
2771 vlib_frame_t * frame)
2773 if (adj_are_counters_enabled ())
2774 return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2776 return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2779 VLIB_NODE_FN (ip4_rewrite_mcast_node) (vlib_main_t * vm,
2780 vlib_node_runtime_t * node,
2781 vlib_frame_t * frame)
2783 if (adj_are_counters_enabled ())
2784 return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2786 return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2789 VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
2790 vlib_node_runtime_t * node,
2791 vlib_frame_t * frame)
2793 if (adj_are_counters_enabled ())
2794 return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2796 return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2800 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2801 .name = "ip4-rewrite",
2802 .vector_size = sizeof (u32),
2804 .format_trace = format_ip4_rewrite_trace,
2806 .n_next_nodes = IP4_REWRITE_N_NEXT,
2808 [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2809 [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2810 [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2814 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2815 .name = "ip4-rewrite-bcast",
2816 .vector_size = sizeof (u32),
2818 .format_trace = format_ip4_rewrite_trace,
2819 .sibling_of = "ip4-rewrite",
2822 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2823 .name = "ip4-rewrite-mcast",
2824 .vector_size = sizeof (u32),
2826 .format_trace = format_ip4_rewrite_trace,
2827 .sibling_of = "ip4-rewrite",
2830 VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
2831 .name = "ip4-mcast-midchain",
2832 .vector_size = sizeof (u32),
2834 .format_trace = format_ip4_rewrite_trace,
2835 .sibling_of = "ip4-rewrite",
2838 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2839 .name = "ip4-midchain",
2840 .vector_size = sizeof (u32),
2841 .format_trace = format_ip4_forward_next_trace,
2842 .sibling_of = "ip4-rewrite",
2847 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2849 ip4_fib_mtrie_t *mtrie0;
2850 ip4_fib_mtrie_leaf_t leaf0;
2853 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2855 leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2856 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2857 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2859 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2861 return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2864 static clib_error_t *
2865 test_lookup_command_fn (vlib_main_t * vm,
2866 unformat_input_t * input, vlib_cli_command_t * cmd)
2873 ip4_address_t ip4_base_address;
2876 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2878 if (unformat (input, "table %d", &table_id))
2880 /* Make sure the entry exists. */
2881 fib = ip4_fib_get (table_id);
2882 if ((fib) && (fib->index != table_id))
2883 return clib_error_return (0, "<fib-index> %d does not exist",
2886 else if (unformat (input, "count %f", &count))
2889 else if (unformat (input, "%U",
2890 unformat_ip4_address, &ip4_base_address))
2893 return clib_error_return (0, "unknown input `%U'",
2894 format_unformat_error, input);
2899 for (i = 0; i < n; i++)
2901 if (!ip4_lookup_validate (&ip4_base_address, table_id))
2904 ip4_base_address.as_u32 =
2905 clib_host_to_net_u32 (1 +
2906 clib_net_to_host_u32 (ip4_base_address.as_u32));
2910 vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2912 vlib_cli_output (vm, "No errors in %d lookups\n", n);
2918 * Perform a lookup of an IPv4 Address (or range of addresses) in the
2919 * given FIB table to determine if there is a conflict with the
2920 * adjacency table. The fib-id can be determined by using the
2921 * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2924 * @todo This command uses fib-id, other commands use table-id (not
2925 * just a name, they are different indexes). Would like to change this
2926 * to table-id for consistency.
2929 * Example of how to run the test lookup command:
2930 * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2931 * No errors in 2 lookups
2935 VLIB_CLI_COMMAND (lookup_test_command, static) =
2937 .path = "test lookup",
2938 .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2939 .function = test_lookup_command_fn,
2943 #ifndef CLIB_MARCH_VARIANT
2945 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2949 fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
2951 if (~0 == fib_index)
2952 return VNET_API_ERROR_NO_SUCH_FIB;
2954 fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
2961 static clib_error_t *
2962 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2963 unformat_input_t * input,
2964 vlib_cli_command_t * cmd)
2968 u32 flow_hash_config = 0;
2971 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2973 if (unformat (input, "table %d", &table_id))
2976 else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2977 foreach_flow_hash_bit
2984 return clib_error_return (0, "unknown input `%U'",
2985 format_unformat_error, input);
2987 rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
2993 case VNET_API_ERROR_NO_SUCH_FIB:
2994 return clib_error_return (0, "no such FIB table %d", table_id);
2997 clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3005 * Configure the set of IPv4 fields used by the flow hash.
3008 * Example of how to set the flow hash on a given table:
3009 * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
3010 * Example of display the configured flow hash:
3011 * @cliexstart{show ip fib}
3012 * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
3015 * [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
3016 * [0] [@0]: dpo-drop ip6
3019 * [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
3020 * [0] [@0]: dpo-drop ip6
3023 * [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
3024 * [0] [@0]: dpo-drop ip6
3027 * [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
3028 * [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3031 * [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
3032 * [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3033 * [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3034 * [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3035 * [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3038 * [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
3039 * [0] [@0]: dpo-drop ip6
3040 * 255.255.255.255/32
3042 * [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
3043 * [0] [@0]: dpo-drop ip6
3044 * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
3047 * [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
3048 * [0] [@0]: dpo-drop ip6
3051 * [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
3052 * [0] [@0]: dpo-drop ip6
3055 * [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
3056 * [0] [@4]: ipv4-glean: af_packet0
3059 * [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
3060 * [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
3063 * [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
3064 * [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
3067 * [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
3068 * [0] [@4]: ipv4-glean: af_packet1
3071 * [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
3072 * [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
3075 * [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
3076 * [0] [@0]: dpo-drop ip6
3079 * [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
3080 * [0] [@0]: dpo-drop ip6
3081 * 255.255.255.255/32
3083 * [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
3084 * [0] [@0]: dpo-drop ip6
3088 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
3090 .path = "set ip flow-hash",
3092 "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
3093 .function = set_ip_flow_hash_command_fn,
3097 #ifndef CLIB_MARCH_VARIANT
3099 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3102 vnet_main_t *vnm = vnet_get_main ();
3103 vnet_interface_main_t *im = &vnm->interface_main;
3104 ip4_main_t *ipm = &ip4_main;
3105 ip_lookup_main_t *lm = &ipm->lookup_main;
3106 vnet_classify_main_t *cm = &vnet_classify_main;
3107 ip4_address_t *if_addr;
3109 if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3110 return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3112 if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3113 return VNET_API_ERROR_NO_SUCH_ENTRY;
3115 vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3116 lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
3118 if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
3120 if (NULL != if_addr)
3122 fib_prefix_t pfx = {
3124 .fp_proto = FIB_PROTOCOL_IP4,
3125 .fp_addr.ip4 = *if_addr,
3129 fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
3133 if (table_index != (u32) ~ 0)
3135 dpo_id_t dpo = DPO_INVALID;
3140 classify_dpo_create (DPO_PROTO_IP4, table_index));
3142 fib_table_entry_special_dpo_add (fib_index,
3144 FIB_SOURCE_CLASSIFY,
3145 FIB_ENTRY_FLAG_NONE, &dpo);
3150 fib_table_entry_special_remove (fib_index,
3151 &pfx, FIB_SOURCE_CLASSIFY);
3159 static clib_error_t *
3160 set_ip_classify_command_fn (vlib_main_t * vm,
3161 unformat_input_t * input,
3162 vlib_cli_command_t * cmd)
3164 u32 table_index = ~0;
3165 int table_index_set = 0;
3166 u32 sw_if_index = ~0;
3169 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3171 if (unformat (input, "table-index %d", &table_index))
3172 table_index_set = 1;
3173 else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3174 vnet_get_main (), &sw_if_index))
3180 if (table_index_set == 0)
3181 return clib_error_return (0, "classify table-index must be specified");
3183 if (sw_if_index == ~0)
3184 return clib_error_return (0, "interface / subif must be specified");
3186 rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3193 case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3194 return clib_error_return (0, "No such interface");
3196 case VNET_API_ERROR_NO_SUCH_ENTRY:
3197 return clib_error_return (0, "No such classifier table");
3203 * Assign a classification table to an interface. The classification
3204 * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3205 * commands. Once the table is create, use this command to filter packets
3209 * Example of how to assign a classification table to an interface:
3210 * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3213 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
3215 .path = "set ip classify",
3217 "set ip classify intfc <interface> table-index <classify-idx>",
3218 .function = set_ip_classify_command_fn,
3222 static clib_error_t *
3223 ip4_config (vlib_main_t * vm, unformat_input_t * input)
3225 ip4_main_t *im = &ip4_main;
3228 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3230 if (unformat (input, "heap-size %U", unformat_memory_size, &heapsize))
3233 return clib_error_return (0,
3234 "invalid heap-size parameter `%U'",
3235 format_unformat_error, input);
3238 im->mtrie_heap_size = heapsize;
3243 VLIB_EARLY_CONFIG_FUNCTION (ip4_config, "ip");
3246 * fd.io coding-style-patch-verification: ON
3249 * eval: (c-set-style "gnu")