2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * ip/ip4_forward.c: IP v4 forwarding
18 * Copyright (c) 2008 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h> /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h> /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h> /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h> /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/dpo/load_balance.h>
53 #include <vnet/dpo/load_balance_map.h>
54 #include <vnet/dpo/classify_dpo.h>
55 #include <vnet/mfib/mfib_table.h> /* for mFIB table and entry creation */
57 #include <vnet/ip/ip4_forward.h>
58 #include <vnet/interface_output.h>
60 /** @brief IPv4 lookup node.
63 This is the main IPv4 lookup dispatch node.
65 @param vm vlib_main_t corresponding to the current thread
66 @param node vlib_node_runtime_t
67 @param frame vlib_frame_t whose contents should be dispatched
69 @par Graph mechanics: buffer metadata, next index usage
72 - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
73 - Indicates the @c sw_if_index value of the interface that the
74 packet was received on.
75 - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
76 - When the value is @c ~0 then the node performs a longest prefix
77 match (LPM) for the packet destination address in the FIB attached
78 to the receive interface.
79 - Otherwise perform LPM for the packet destination address in the
80 indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
81 value (0, 1, ...) and not a VRF id.
84 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
85 - The lookup result adjacency index.
88 - Dispatches the packet to the node index found in
89 ip_adjacency_t @c adj->lookup_next_index
90 (where @c adj is the lookup result adjacency).
92 VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
95 return ip4_lookup_inline (vm, node, frame);
98 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
101 VLIB_REGISTER_NODE (ip4_lookup_node) =
103 .name = "ip4-lookup",
104 .vector_size = sizeof (u32),
105 .format_trace = format_ip4_lookup_trace,
106 .n_next_nodes = IP_LOOKUP_N_NEXT,
107 .next_nodes = IP4_LOOKUP_NEXT_NODES,
111 VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
112 vlib_node_runtime_t * node,
113 vlib_frame_t * frame)
115 vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
117 u32 thread_index = vm->thread_index;
118 vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
119 u16 nexts[VLIB_FRAME_SIZE], *next;
121 from = vlib_frame_vector_args (frame);
122 n_left = frame->n_vectors;
125 vlib_get_buffers (vm, from, bufs, n_left);
129 const load_balance_t *lb0, *lb1;
130 const ip4_header_t *ip0, *ip1;
131 u32 lbi0, hc0, lbi1, hc1;
132 const dpo_id_t *dpo0, *dpo1;
134 /* Prefetch next iteration. */
136 vlib_prefetch_buffer_header (b[2], LOAD);
137 vlib_prefetch_buffer_header (b[3], LOAD);
139 CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), LOAD);
140 CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), LOAD);
143 ip0 = vlib_buffer_get_current (b[0]);
144 ip1 = vlib_buffer_get_current (b[1]);
145 lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
146 lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
148 lb0 = load_balance_get (lbi0);
149 lb1 = load_balance_get (lbi1);
152 * this node is for via FIBs we can re-use the hash value from the
153 * to node if present.
154 * We don't want to use the same hash value at each level in the recursion
155 * graph as that would lead to polarisation
159 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
161 if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
163 hc0 = vnet_buffer (b[0])->ip.flow_hash =
164 vnet_buffer (b[0])->ip.flow_hash >> 1;
168 hc0 = vnet_buffer (b[0])->ip.flow_hash =
169 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
171 dpo0 = load_balance_get_fwd_bucket
172 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
176 dpo0 = load_balance_get_bucket_i (lb0, 0);
178 if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
180 if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
182 hc1 = vnet_buffer (b[1])->ip.flow_hash =
183 vnet_buffer (b[1])->ip.flow_hash >> 1;
187 hc1 = vnet_buffer (b[1])->ip.flow_hash =
188 ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
190 dpo1 = load_balance_get_fwd_bucket
191 (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
195 dpo1 = load_balance_get_bucket_i (lb1, 0);
198 next[0] = dpo0->dpoi_next_node;
199 next[1] = dpo1->dpoi_next_node;
201 vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
202 vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
204 vlib_increment_combined_counter
205 (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
206 vlib_increment_combined_counter
207 (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
216 const load_balance_t *lb0;
217 const ip4_header_t *ip0;
218 const dpo_id_t *dpo0;
221 ip0 = vlib_buffer_get_current (b[0]);
222 lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
224 lb0 = load_balance_get (lbi0);
227 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
229 if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
231 hc0 = vnet_buffer (b[0])->ip.flow_hash =
232 vnet_buffer (b[0])->ip.flow_hash >> 1;
236 hc0 = vnet_buffer (b[0])->ip.flow_hash =
237 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
239 dpo0 = load_balance_get_fwd_bucket
240 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
244 dpo0 = load_balance_get_bucket_i (lb0, 0);
247 next[0] = dpo0->dpoi_next_node;
248 vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
250 vlib_increment_combined_counter
251 (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
258 vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
259 if (node->flags & VLIB_NODE_FLAG_TRACE)
260 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
262 return frame->n_vectors;
266 VLIB_REGISTER_NODE (ip4_load_balance_node) =
268 .name = "ip4-load-balance",
269 .vector_size = sizeof (u32),
270 .sibling_of = "ip4-lookup",
271 .format_trace = format_ip4_lookup_trace,
275 #ifndef CLIB_MARCH_VARIANT
276 /* get first interface address */
278 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
279 ip_interface_address_t ** result_ia)
281 ip_lookup_main_t *lm = &im->lookup_main;
282 ip_interface_address_t *ia = 0;
283 ip4_address_t *result = 0;
286 foreach_ip_interface_address
287 (lm, ia, sw_if_index,
288 1 /* honor unnumbered */ ,
291 ip_interface_address_get_address (lm, ia);
297 *result_ia = result ? ia : 0;
303 ip4_add_subnet_bcast_route (u32 fib_index,
307 vnet_sw_interface_flags_t iflags;
309 iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
311 fib_table_entry_special_remove(fib_index,
313 FIB_SOURCE_INTERFACE);
315 if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
317 fib_table_entry_update_one_path (fib_index, pfx,
318 FIB_SOURCE_INTERFACE,
321 /* No next-hop address */
327 // no out-label stack
329 FIB_ROUTE_PATH_FLAG_NONE);
333 fib_table_entry_special_add(fib_index,
335 FIB_SOURCE_INTERFACE,
336 (FIB_ENTRY_FLAG_DROP |
337 FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
342 ip4_add_interface_prefix_routes (ip4_main_t *im,
345 ip_interface_address_t * a)
347 ip_lookup_main_t *lm = &im->lookup_main;
348 ip_interface_prefix_t *if_prefix;
349 ip4_address_t *address = ip_interface_address_get_address (lm, a);
351 ip_interface_prefix_key_t key = {
353 .fp_len = a->address_length,
354 .fp_proto = FIB_PROTOCOL_IP4,
355 .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[a->address_length],
357 .sw_if_index = sw_if_index,
360 fib_prefix_t pfx_special = {
361 .fp_proto = FIB_PROTOCOL_IP4,
364 /* If prefix already set on interface, just increment ref count & return */
365 if_prefix = ip_get_interface_prefix (lm, &key);
368 if_prefix->ref_count += 1;
372 /* New prefix - allocate a pool entry, initialize it, add to the hash */
373 pool_get (lm->if_prefix_pool, if_prefix);
374 if_prefix->ref_count = 1;
375 if_prefix->src_ia_index = a - lm->if_address_pool;
376 clib_memcpy (&if_prefix->key, &key, sizeof (key));
377 mhash_set (&lm->prefix_to_if_prefix_index, &key,
378 if_prefix - lm->if_prefix_pool, 0 /* old value */);
380 /* length <= 30 - add glean, drop first address, maybe drop bcast address */
381 if (a->address_length <= 30)
383 pfx_special.fp_len = a->address_length;
384 pfx_special.fp_addr.ip4.as_u32 = address->as_u32;
386 /* set the glean route for the prefix */
387 fib_table_entry_update_one_path (fib_index, &pfx_special,
388 FIB_SOURCE_INTERFACE,
389 (FIB_ENTRY_FLAG_CONNECTED |
390 FIB_ENTRY_FLAG_ATTACHED),
392 /* No next-hop address */
395 /* invalid FIB index */
398 /* no out-label stack */
400 FIB_ROUTE_PATH_FLAG_NONE);
402 /* set a drop route for the base address of the prefix */
403 pfx_special.fp_len = 32;
404 pfx_special.fp_addr.ip4.as_u32 =
405 address->as_u32 & im->fib_masks[a->address_length];
407 if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
408 fib_table_entry_special_add (fib_index, &pfx_special,
409 FIB_SOURCE_INTERFACE,
410 (FIB_ENTRY_FLAG_DROP |
411 FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
413 /* set a route for the broadcast address of the prefix */
414 pfx_special.fp_len = 32;
415 pfx_special.fp_addr.ip4.as_u32 =
416 address->as_u32 | ~im->fib_masks[a->address_length];
417 if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
418 ip4_add_subnet_bcast_route (fib_index, &pfx_special, sw_if_index);
422 /* length == 31 - add an attached route for the other address */
423 else if (a->address_length == 31)
425 pfx_special.fp_len = 32;
426 pfx_special.fp_addr.ip4.as_u32 =
427 address->as_u32 ^ clib_host_to_net_u32(1);
429 fib_table_entry_update_one_path (fib_index, &pfx_special,
430 FIB_SOURCE_INTERFACE,
431 (FIB_ENTRY_FLAG_ATTACHED),
433 &pfx_special.fp_addr,
435 /* invalid FIB index */
439 FIB_ROUTE_PATH_FLAG_NONE);
444 ip4_add_interface_routes (u32 sw_if_index,
445 ip4_main_t * im, u32 fib_index,
446 ip_interface_address_t * a)
448 ip_lookup_main_t *lm = &im->lookup_main;
449 ip4_address_t *address = ip_interface_address_get_address (lm, a);
452 .fp_proto = FIB_PROTOCOL_IP4,
453 .fp_addr.ip4 = *address,
456 /* set special routes for the prefix if needed */
457 ip4_add_interface_prefix_routes (im, sw_if_index, fib_index, a);
459 if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
461 u32 classify_table_index =
462 lm->classify_table_index_by_sw_if_index[sw_if_index];
463 if (classify_table_index != (u32) ~ 0)
465 dpo_id_t dpo = DPO_INVALID;
470 classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
472 fib_table_entry_special_dpo_add (fib_index,
475 FIB_ENTRY_FLAG_NONE, &dpo);
480 fib_table_entry_update_one_path (fib_index, &pfx,
481 FIB_SOURCE_INTERFACE,
482 (FIB_ENTRY_FLAG_CONNECTED |
483 FIB_ENTRY_FLAG_LOCAL),
490 FIB_ROUTE_PATH_FLAG_NONE);
494 ip4_del_interface_prefix_routes (ip4_main_t * im,
497 ip4_address_t * address,
500 ip_lookup_main_t *lm = &im->lookup_main;
501 ip_interface_prefix_t *if_prefix;
503 ip_interface_prefix_key_t key = {
505 .fp_len = address_length,
506 .fp_proto = FIB_PROTOCOL_IP4,
507 .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[address_length],
509 .sw_if_index = sw_if_index,
512 fib_prefix_t pfx_special = {
514 .fp_proto = FIB_PROTOCOL_IP4,
517 if_prefix = ip_get_interface_prefix (lm, &key);
520 clib_warning ("Prefix not found while deleting %U",
521 format_ip4_address_and_length, address, address_length);
525 if_prefix->ref_count -= 1;
528 * Routes need to be adjusted if:
529 * - deleting last intf addr in prefix
530 * - deleting intf addr used as default source address in glean adjacency
532 * We're done now otherwise
534 if ((if_prefix->ref_count > 0) &&
535 !pool_is_free_index (lm->if_address_pool, if_prefix->src_ia_index))
538 /* length <= 30, delete glean route, first address, last address */
539 if (address_length <= 30)
542 /* remove glean route for prefix */
543 pfx_special.fp_addr.ip4 = *address;
544 pfx_special.fp_len = address_length;
545 fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
547 /* if no more intf addresses in prefix, remove other special routes */
548 if (!if_prefix->ref_count)
550 /* first address in prefix */
551 pfx_special.fp_addr.ip4.as_u32 =
552 address->as_u32 & im->fib_masks[address_length];
553 pfx_special.fp_len = 32;
555 if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
556 fib_table_entry_special_remove (fib_index,
558 FIB_SOURCE_INTERFACE);
560 /* prefix broadcast address */
561 pfx_special.fp_addr.ip4.as_u32 =
562 address->as_u32 | ~im->fib_masks[address_length];
563 pfx_special.fp_len = 32;
565 if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
566 fib_table_entry_special_remove (fib_index,
568 FIB_SOURCE_INTERFACE);
571 /* default source addr just got deleted, find another */
573 ip_interface_address_t *new_src_ia = NULL;
574 ip4_address_t *new_src_addr = NULL;
577 ip4_interface_address_matching_destination
578 (im, address, sw_if_index, &new_src_ia);
580 if_prefix->src_ia_index = new_src_ia - lm->if_address_pool;
582 pfx_special.fp_len = address_length;
583 pfx_special.fp_addr.ip4 = *new_src_addr;
585 /* set new glean route for the prefix */
586 fib_table_entry_update_one_path (fib_index, &pfx_special,
587 FIB_SOURCE_INTERFACE,
588 (FIB_ENTRY_FLAG_CONNECTED |
589 FIB_ENTRY_FLAG_ATTACHED),
591 /* No next-hop address */
594 /* invalid FIB index */
597 /* no out-label stack */
599 FIB_ROUTE_PATH_FLAG_NONE);
603 /* length == 31, delete attached route for the other address */
604 else if (address_length == 31)
606 pfx_special.fp_addr.ip4.as_u32 =
607 address->as_u32 ^ clib_host_to_net_u32(1);
609 fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
612 mhash_unset (&lm->prefix_to_if_prefix_index, &key, 0 /* old_value */);
613 pool_put (lm->if_prefix_pool, if_prefix);
617 ip4_del_interface_routes (u32 sw_if_index,
620 ip4_address_t * address, u32 address_length)
623 .fp_len = address_length,
624 .fp_proto = FIB_PROTOCOL_IP4,
625 .fp_addr.ip4 = *address,
628 ip4_del_interface_prefix_routes (im, sw_if_index, fib_index,
629 address, address_length);
632 fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
635 #ifndef CLIB_MARCH_VARIANT
637 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
639 ip4_main_t *im = &ip4_main;
641 vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
644 * enable/disable only on the 1<->0 transition
648 if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
653 ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
654 if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
657 vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
661 vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
662 sw_if_index, !is_enable, 0, 0);
665 ip4_enable_disable_interface_callback_t *cb;
666 vec_foreach (cb, im->enable_disable_interface_callbacks)
667 cb->function (im, cb->function_opaque, sw_if_index, is_enable);
671 static clib_error_t *
672 ip4_add_del_interface_address_internal (vlib_main_t * vm,
674 ip4_address_t * address,
675 u32 address_length, u32 is_del)
677 vnet_main_t *vnm = vnet_get_main ();
678 ip4_main_t *im = &ip4_main;
679 ip_lookup_main_t *lm = &im->lookup_main;
680 clib_error_t *error = 0;
681 u32 if_address_index, elts_before;
682 ip4_address_fib_t ip4_af, *addr_fib = 0;
684 /* local0 interface doesn't support IP addressing */
685 if (sw_if_index == 0)
688 clib_error_create ("local0 interface doesn't support IP addressing");
691 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
692 ip4_addr_fib_init (&ip4_af, address,
693 vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
694 vec_add1 (addr_fib, ip4_af);
697 * there is no support for adj-fib handling in the presence of overlapping
698 * subnets on interfaces. Easy fix - disallow overlapping subnets, like
704 /* When adding an address check that it does not conflict
705 with an existing address on any interface in this table. */
706 ip_interface_address_t *ia;
707 vnet_sw_interface_t *sif;
709 pool_foreach(sif, vnm->interface_main.sw_interfaces,
711 if (im->fib_index_by_sw_if_index[sw_if_index] ==
712 im->fib_index_by_sw_if_index[sif->sw_if_index])
714 foreach_ip_interface_address
715 (&im->lookup_main, ia, sif->sw_if_index,
716 0 /* honor unnumbered */ ,
719 ip_interface_address_get_address
720 (&im->lookup_main, ia);
721 if (ip4_destination_matches_route
722 (im, address, x, ia->address_length) ||
723 ip4_destination_matches_route (im,
728 /* an intf may have >1 addr from the same prefix */
729 if ((sw_if_index == sif->sw_if_index) &&
730 (ia->address_length == address_length) &&
731 (x->as_u32 != address->as_u32))
734 /* error if the length or intf was different */
735 vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
739 ("failed to add %U which conflicts with %U for interface %U",
740 format_ip4_address_and_length, address,
742 format_ip4_address_and_length, x,
744 format_vnet_sw_if_index_name, vnm,
753 elts_before = pool_elts (lm->if_address_pool);
755 error = ip_interface_address_add_del
756 (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
760 ip4_sw_interface_enable_disable (sw_if_index, !is_del);
762 /* intf addr routes are added/deleted on admin up/down */
763 if (vnet_sw_interface_is_admin_up (vnm, sw_if_index))
766 ip4_del_interface_routes (sw_if_index,
767 im, ip4_af.fib_index, address,
770 ip4_add_interface_routes (sw_if_index,
771 im, ip4_af.fib_index,
773 (lm->if_address_pool, if_address_index));
776 /* If pool did not grow/shrink: add duplicate address. */
777 if (elts_before != pool_elts (lm->if_address_pool))
779 ip4_add_del_interface_address_callback_t *cb;
780 vec_foreach (cb, im->add_del_interface_address_callbacks)
781 cb->function (im, cb->function_opaque, sw_if_index,
782 address, address_length, if_address_index, is_del);
791 ip4_add_del_interface_address (vlib_main_t * vm,
793 ip4_address_t * address,
794 u32 address_length, u32 is_del)
796 return ip4_add_del_interface_address_internal
797 (vm, sw_if_index, address, address_length, is_del);
801 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
803 ip_interface_address_t *ia;
809 * when directed broadcast is enabled, the subnet braodcast route will forward
810 * packets using an adjacency with a broadcast MAC. otherwise it drops
813 foreach_ip_interface_address(&im->lookup_main, ia,
816 if (ia->address_length <= 30)
820 ipa = ip_interface_address_get_address (&im->lookup_main, ia);
824 .fp_proto = FIB_PROTOCOL_IP4,
826 .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
830 ip4_add_subnet_bcast_route
831 (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
840 static clib_error_t *
841 ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
843 ip4_main_t *im = &ip4_main;
844 ip_interface_address_t *ia;
846 u32 is_admin_up, fib_index;
848 /* Fill in lookup tables with default table (0). */
849 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
851 vec_validate_init_empty (im->
852 lookup_main.if_address_pool_index_by_sw_if_index,
855 is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
857 fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
860 foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
861 0 /* honor unnumbered */,
863 a = ip_interface_address_get_address (&im->lookup_main, ia);
865 ip4_add_interface_routes (sw_if_index,
869 ip4_del_interface_routes (sw_if_index,
871 a, ia->address_length);
878 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
880 /* Built-in ip4 unicast rx feature path definition */
882 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
884 .arc_name = "ip4-unicast",
885 .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
886 .last_in_arc = "ip4-lookup",
887 .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
890 VNET_FEATURE_INIT (ip4_flow_classify, static) =
892 .arc_name = "ip4-unicast",
893 .node_name = "ip4-flow-classify",
894 .runs_before = VNET_FEATURES ("ip4-inacl"),
897 VNET_FEATURE_INIT (ip4_inacl, static) =
899 .arc_name = "ip4-unicast",
900 .node_name = "ip4-inacl",
901 .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
904 VNET_FEATURE_INIT (ip4_source_check_1, static) =
906 .arc_name = "ip4-unicast",
907 .node_name = "ip4-source-check-via-rx",
908 .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
911 VNET_FEATURE_INIT (ip4_source_check_2, static) =
913 .arc_name = "ip4-unicast",
914 .node_name = "ip4-source-check-via-any",
915 .runs_before = VNET_FEATURES ("ip4-policer-classify"),
918 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
920 .arc_name = "ip4-unicast",
921 .node_name = "ip4-source-and-port-range-check-rx",
922 .runs_before = VNET_FEATURES ("ip4-policer-classify"),
925 VNET_FEATURE_INIT (ip4_policer_classify, static) =
927 .arc_name = "ip4-unicast",
928 .node_name = "ip4-policer-classify",
929 .runs_before = VNET_FEATURES ("ipsec4-input-feature"),
932 VNET_FEATURE_INIT (ip4_ipsec, static) =
934 .arc_name = "ip4-unicast",
935 .node_name = "ipsec4-input-feature",
936 .runs_before = VNET_FEATURES ("vpath-input-ip4"),
939 VNET_FEATURE_INIT (ip4_vpath, static) =
941 .arc_name = "ip4-unicast",
942 .node_name = "vpath-input-ip4",
943 .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
946 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
948 .arc_name = "ip4-unicast",
949 .node_name = "ip4-vxlan-bypass",
950 .runs_before = VNET_FEATURES ("ip4-lookup"),
953 VNET_FEATURE_INIT (ip4_not_enabled, static) =
955 .arc_name = "ip4-unicast",
956 .node_name = "ip4-not-enabled",
957 .runs_before = VNET_FEATURES ("ip4-lookup"),
960 VNET_FEATURE_INIT (ip4_lookup, static) =
962 .arc_name = "ip4-unicast",
963 .node_name = "ip4-lookup",
964 .runs_before = 0, /* not before any other features */
967 /* Built-in ip4 multicast rx feature path definition */
968 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
970 .arc_name = "ip4-multicast",
971 .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
972 .last_in_arc = "ip4-mfib-forward-lookup",
973 .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
976 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
978 .arc_name = "ip4-multicast",
979 .node_name = "vpath-input-ip4",
980 .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
983 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
985 .arc_name = "ip4-multicast",
986 .node_name = "ip4-not-enabled",
987 .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
990 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
992 .arc_name = "ip4-multicast",
993 .node_name = "ip4-mfib-forward-lookup",
994 .runs_before = 0, /* last feature */
997 /* Source and port-range check ip4 tx feature path definition */
998 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1000 .arc_name = "ip4-output",
1001 .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
1002 .last_in_arc = "interface-output",
1003 .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1006 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1008 .arc_name = "ip4-output",
1009 .node_name = "ip4-source-and-port-range-check-tx",
1010 .runs_before = VNET_FEATURES ("ip4-outacl"),
1013 VNET_FEATURE_INIT (ip4_outacl, static) =
1015 .arc_name = "ip4-output",
1016 .node_name = "ip4-outacl",
1017 .runs_before = VNET_FEATURES ("ipsec4-output-feature"),
1020 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1022 .arc_name = "ip4-output",
1023 .node_name = "ipsec4-output-feature",
1024 .runs_before = VNET_FEATURES ("interface-output"),
1027 /* Built-in ip4 tx feature path definition */
1028 VNET_FEATURE_INIT (ip4_interface_output, static) =
1030 .arc_name = "ip4-output",
1031 .node_name = "interface-output",
1032 .runs_before = 0, /* not before any other features */
1036 static clib_error_t *
1037 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1039 ip4_main_t *im = &ip4_main;
1041 /* Fill in lookup tables with default table (0). */
1042 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1043 vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
1047 ip4_main_t *im4 = &ip4_main;
1048 ip_lookup_main_t *lm4 = &im4->lookup_main;
1049 ip_interface_address_t *ia = 0;
1050 ip4_address_t *address;
1051 vlib_main_t *vm = vlib_get_main ();
1053 vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
1055 foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
1057 address = ip_interface_address_get_address (lm4, ia);
1058 ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
1063 vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
1066 vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
1067 sw_if_index, is_add, 0, 0);
1069 return /* no error */ 0;
1072 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1074 /* Global IP4 main. */
1075 #ifndef CLIB_MARCH_VARIANT
1076 ip4_main_t ip4_main;
1077 #endif /* CLIB_MARCH_VARIANT */
1079 static clib_error_t *
1080 ip4_lookup_init (vlib_main_t * vm)
1082 ip4_main_t *im = &ip4_main;
1083 clib_error_t *error;
1086 if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1088 if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
1090 if ((error = vlib_call_init_function (vm, fib_module_init)))
1092 if ((error = vlib_call_init_function (vm, mfib_module_init)))
1095 for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1100 m = pow2_mask (i) << (32 - i);
1103 im->fib_masks[i] = clib_host_to_net_u32 (m);
1106 ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1108 /* Create FIB with index 0 and table id of 0. */
1109 fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1110 FIB_SOURCE_DEFAULT_ROUTE);
1111 mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1112 MFIB_SOURCE_DEFAULT_ROUTE);
1116 pn = pg_get_node (ip4_lookup_node.index);
1117 pn->unformat_edit = unformat_pg_ip4_header;
1121 ethernet_arp_header_t h;
1123 clib_memset (&h, 0, sizeof (h));
1125 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1126 #define _8(f,v) h.f = v;
1127 _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1128 _16 (l3_type, ETHERNET_TYPE_IP4);
1129 _8 (n_l2_address_bytes, 6);
1130 _8 (n_l3_address_bytes, 4);
1131 _16 (opcode, ETHERNET_ARP_OPCODE_request);
1135 vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1138 /* alloc chunk size */ 8,
1145 VLIB_INIT_FUNCTION (ip4_lookup_init);
1149 /* Adjacency taken. */
1154 /* Packet data, possibly *after* rewrite. */
1155 u8 packet_data[64 - 1 * sizeof (u32)];
1157 ip4_forward_next_trace_t;
1159 #ifndef CLIB_MARCH_VARIANT
1161 format_ip4_forward_next_trace (u8 * s, va_list * args)
1163 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1164 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1165 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1166 u32 indent = format_get_indent (s);
1167 s = format (s, "%U%U",
1168 format_white_space, indent,
1169 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1175 format_ip4_lookup_trace (u8 * s, va_list * args)
1177 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1178 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1179 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1180 u32 indent = format_get_indent (s);
1182 s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1183 t->fib_index, t->dpo_index, t->flow_hash);
1184 s = format (s, "\n%U%U",
1185 format_white_space, indent,
1186 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1191 format_ip4_rewrite_trace (u8 * s, va_list * args)
1193 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1194 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1195 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1196 u32 indent = format_get_indent (s);
1198 s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1199 t->fib_index, t->dpo_index, format_ip_adjacency,
1200 t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1201 s = format (s, "\n%U%U",
1202 format_white_space, indent,
1203 format_ip_adjacency_packet_data,
1204 t->dpo_index, t->packet_data, sizeof (t->packet_data));
1208 #ifndef CLIB_MARCH_VARIANT
1209 /* Common trace function for all ip4-forward next nodes. */
1211 ip4_forward_next_trace (vlib_main_t * vm,
1212 vlib_node_runtime_t * node,
1213 vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1216 ip4_main_t *im = &ip4_main;
1218 n_left = frame->n_vectors;
1219 from = vlib_frame_vector_args (frame);
1224 vlib_buffer_t *b0, *b1;
1225 ip4_forward_next_trace_t *t0, *t1;
1227 /* Prefetch next iteration. */
1228 vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1229 vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1234 b0 = vlib_get_buffer (vm, bi0);
1235 b1 = vlib_get_buffer (vm, bi1);
1237 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1239 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1240 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1241 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1243 (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1244 (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1245 vec_elt (im->fib_index_by_sw_if_index,
1246 vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1248 clib_memcpy_fast (t0->packet_data,
1249 vlib_buffer_get_current (b0),
1250 sizeof (t0->packet_data));
1252 if (b1->flags & VLIB_BUFFER_IS_TRACED)
1254 t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1255 t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1256 t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1258 (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1259 (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1260 vec_elt (im->fib_index_by_sw_if_index,
1261 vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1262 clib_memcpy_fast (t1->packet_data, vlib_buffer_get_current (b1),
1263 sizeof (t1->packet_data));
1273 ip4_forward_next_trace_t *t0;
1277 b0 = vlib_get_buffer (vm, bi0);
1279 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1281 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1282 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1283 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1285 (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1286 (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1287 vec_elt (im->fib_index_by_sw_if_index,
1288 vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1289 clib_memcpy_fast (t0->packet_data, vlib_buffer_get_current (b0),
1290 sizeof (t0->packet_data));
1297 /* Compute TCP/UDP/ICMP4 checksum in software. */
1299 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1303 u32 ip_header_length, payload_length_host_byte_order;
1304 u32 n_this_buffer, n_bytes_left, n_ip_bytes_this_buffer;
1306 u8 *data_this_buffer;
1309 /* Initialize checksum with ip header. */
1310 ip_header_length = ip4_header_bytes (ip0);
1311 payload_length_host_byte_order =
1312 clib_net_to_host_u16 (ip0->length) - ip_header_length;
1314 clib_host_to_net_u32 (payload_length_host_byte_order +
1315 (ip0->protocol << 16));
1317 if (BITS (uword) == 32)
1320 ip_csum_with_carry (sum0,
1321 clib_mem_unaligned (&ip0->src_address, u32));
1323 ip_csum_with_carry (sum0,
1324 clib_mem_unaligned (&ip0->dst_address, u32));
1328 ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1330 n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1331 data_this_buffer = (u8 *) ip0 + ip_header_length;
1332 n_ip_bytes_this_buffer =
1333 p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data);
1334 if (n_this_buffer + ip_header_length > n_ip_bytes_this_buffer)
1336 n_this_buffer = n_ip_bytes_this_buffer > ip_header_length ?
1337 n_ip_bytes_this_buffer - ip_header_length : 0;
1342 sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1343 n_bytes_left -= n_this_buffer;
1344 if (n_bytes_left == 0)
1347 ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1348 if (!(p0->flags & VLIB_BUFFER_NEXT_PRESENT))
1351 length_odd = (n_this_buffer & 1);
1353 p0 = vlib_get_buffer (vm, p0->next_buffer);
1354 data_this_buffer = vlib_buffer_get_current (p0);
1355 n_this_buffer = clib_min (p0->current_length, n_bytes_left);
1357 if (PREDICT_FALSE (length_odd))
1359 /* Prepend a 0 or the resulting checksum will be incorrect. */
1363 data_this_buffer[0] = 0;
1367 sum16 = ~ip_csum_fold (sum0);
1372 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1374 ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1378 ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1379 || ip0->protocol == IP_PROTOCOL_UDP);
1381 udp0 = (void *) (ip0 + 1);
1382 if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1384 p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1385 | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1389 sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1391 p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1392 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1399 VNET_FEATURE_ARC_INIT (ip4_local) =
1401 .arc_name = "ip4-local",
1402 .start_nodes = VNET_FEATURES ("ip4-local"),
1403 .last_in_arc = "ip4-local-end-of-arc",
1408 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1409 ip4_header_t * ip, u8 is_udp, u8 * error,
1413 flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1414 *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1418 u32 ip_len, udp_len;
1420 udp = ip4_next_header (ip);
1421 /* Verify UDP length. */
1422 ip_len = clib_net_to_host_u16 (ip->length);
1423 udp_len = clib_net_to_host_u16 (udp->length);
1425 len_diff = ip_len - udp_len;
1426 *good_tcp_udp &= len_diff >= 0;
1427 *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1431 #define ip4_local_csum_is_offloaded(_b) \
1432 _b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM \
1433 || _b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM
1435 #define ip4_local_need_csum_check(is_tcp_udp, _b) \
1436 (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED \
1437 || ip4_local_csum_is_offloaded (_b)))
1439 #define ip4_local_csum_is_valid(_b) \
1440 (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT \
1441 || (ip4_local_csum_is_offloaded (_b))) != 0
1444 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1445 ip4_header_t * ih, u8 * error)
1447 u8 is_udp, is_tcp_udp, good_tcp_udp;
1449 is_udp = ih->protocol == IP_PROTOCOL_UDP;
1450 is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1452 if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1453 ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1455 good_tcp_udp = ip4_local_csum_is_valid (b);
1457 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1458 *error = (is_tcp_udp && !good_tcp_udp
1459 ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1463 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1464 ip4_header_t ** ih, u8 * error)
1466 u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1468 is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1469 is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1471 is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1472 is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1474 good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1475 good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1477 if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1478 || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1481 ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1484 ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1488 error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1489 IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1490 error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1491 IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1495 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1496 vlib_buffer_t * b, u16 * next, u8 error,
1497 u8 head_of_feature_arc)
1499 u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1502 *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1503 b->error = error ? error_node->errors[error] : 0;
1504 if (head_of_feature_arc)
1507 if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1509 vnet_feature_arc_start (arc_index,
1510 vnet_buffer (b)->sw_if_index[VLIB_RX],
1523 } ip4_local_last_check_t;
1526 ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
1527 ip4_local_last_check_t * last_check, u8 * error0)
1529 ip4_fib_mtrie_leaf_t leaf0;
1530 ip4_fib_mtrie_t *mtrie0;
1531 const dpo_id_t *dpo0;
1532 load_balance_t *lb0;
1535 vnet_buffer (b)->ip.fib_index =
1536 vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1537 vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1540 * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1541 * adjacency for the destination address (the local interface address).
1542 * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1543 * adjacency for the source address (the remote sender's address)
1545 if (PREDICT_FALSE (last_check->first ||
1546 (last_check->src.as_u32 != ip0->src_address.as_u32)))
1548 mtrie0 = &ip4_fib_get (vnet_buffer (b)->ip.fib_index)->mtrie;
1549 leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1550 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1551 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1552 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1554 vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1555 vnet_buffer (b)->ip.adj_index[VLIB_TX];
1556 vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1558 lb0 = load_balance_get (lbi0);
1559 dpo0 = load_balance_get_bucket_i (lb0, 0);
1562 * Must have a route to source otherwise we drop the packet.
1563 * ip4 broadcasts are accepted, e.g. to make dhcp client work
1566 * - the source is a recieve => it's from us => bogus, do this
1567 * first since it sets a different error code.
1568 * - uRPF check for any route to source - accept if passes.
1569 * - allow packets destined to the broadcast address from unknown sources
1572 *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1573 && dpo0->dpoi_type == DPO_RECEIVE) ?
1574 IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1575 *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1576 && !fib_urpf_check_size (lb0->lb_urpf)
1577 && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1578 IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1580 last_check->src.as_u32 = ip0->src_address.as_u32;
1581 last_check->lbi = lbi0;
1582 last_check->error = *error0;
1586 vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1587 vnet_buffer (b)->ip.adj_index[VLIB_TX];
1588 vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1589 *error0 = last_check->error;
1590 last_check->first = 0;
1595 ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
1596 ip4_local_last_check_t * last_check, u8 * error)
1598 ip4_fib_mtrie_leaf_t leaf[2];
1599 ip4_fib_mtrie_t *mtrie[2];
1600 const dpo_id_t *dpo[2];
1601 load_balance_t *lb[2];
1605 not_last_hit = last_check->first;
1606 not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1607 not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1609 vnet_buffer (b[0])->ip.fib_index =
1610 vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1611 vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1612 vnet_buffer (b[0])->ip.fib_index;
1614 vnet_buffer (b[1])->ip.fib_index =
1615 vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1616 vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1617 vnet_buffer (b[1])->ip.fib_index;
1620 * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1621 * adjacency for the destination address (the local interface address).
1622 * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1623 * adjacency for the source address (the remote sender's address)
1625 if (PREDICT_FALSE (not_last_hit))
1627 mtrie[0] = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie;
1628 mtrie[1] = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie;
1630 leaf[0] = ip4_fib_mtrie_lookup_step_one (mtrie[0], &ip[0]->src_address);
1631 leaf[1] = ip4_fib_mtrie_lookup_step_one (mtrie[1], &ip[1]->src_address);
1633 leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1634 &ip[0]->src_address, 2);
1635 leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1636 &ip[1]->src_address, 2);
1638 leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1639 &ip[0]->src_address, 3);
1640 leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1641 &ip[1]->src_address, 3);
1643 lbi[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf[0]);
1644 lbi[1] = ip4_fib_mtrie_leaf_get_adj_index (leaf[1]);
1646 vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1647 vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1648 vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1650 vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1651 vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1652 vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1654 lb[0] = load_balance_get (lbi[0]);
1655 lb[1] = load_balance_get (lbi[1]);
1657 dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1658 dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1660 error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1661 dpo[0]->dpoi_type == DPO_RECEIVE) ?
1662 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1663 error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1664 !fib_urpf_check_size (lb[0]->lb_urpf) &&
1665 ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1666 ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1668 error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1669 dpo[1]->dpoi_type == DPO_RECEIVE) ?
1670 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1671 error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1672 !fib_urpf_check_size (lb[1]->lb_urpf) &&
1673 ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1674 ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1676 last_check->src.as_u32 = ip[1]->src_address.as_u32;
1677 last_check->lbi = lbi[1];
1678 last_check->error = error[1];
1682 vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1683 vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1684 vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1686 vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1687 vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1688 vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1690 error[0] = last_check->error;
1691 error[1] = last_check->error;
1692 last_check->first = 0;
1696 enum ip_local_packet_type_e
1698 IP_LOCAL_PACKET_TYPE_L4,
1699 IP_LOCAL_PACKET_TYPE_NAT,
1700 IP_LOCAL_PACKET_TYPE_FRAG,
1704 * Determine packet type and next node.
1706 * The expectation is that all packets that are not L4 will skip
1707 * checksums and source checks.
1710 ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
1712 ip_lookup_main_t *lm = &ip4_main.lookup_main;
1714 if (PREDICT_FALSE (ip4_is_fragment (ip)))
1716 *next = IP_LOCAL_NEXT_REASSEMBLY;
1717 return IP_LOCAL_PACKET_TYPE_FRAG;
1719 if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_IS_NATED))
1721 *next = lm->local_next_by_ip_protocol[ip->protocol];
1722 return IP_LOCAL_PACKET_TYPE_NAT;
1725 *next = lm->local_next_by_ip_protocol[ip->protocol];
1726 return IP_LOCAL_PACKET_TYPE_L4;
1730 ip4_local_inline (vlib_main_t * vm,
1731 vlib_node_runtime_t * node,
1732 vlib_frame_t * frame, int head_of_feature_arc)
1734 u32 *from, n_left_from;
1735 vlib_node_runtime_t *error_node =
1736 vlib_node_get_runtime (vm, ip4_input_node.index);
1737 u16 nexts[VLIB_FRAME_SIZE], *next;
1738 vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1739 ip4_header_t *ip[2];
1742 ip4_local_last_check_t last_check = {
1744 * 0.0.0.0 can appear as the source address of an IP packet,
1745 * as can any other address, hence the need to use the 'first'
1746 * member to make sure the .lbi is initialised for the first
1749 .src = {.as_u32 = 0},
1751 .error = IP4_ERROR_UNKNOWN_PROTOCOL,
1755 from = vlib_frame_vector_args (frame);
1756 n_left_from = frame->n_vectors;
1758 if (node->flags & VLIB_NODE_FLAG_TRACE)
1759 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1761 vlib_get_buffers (vm, from, bufs, n_left_from);
1765 while (n_left_from >= 6)
1769 /* Prefetch next iteration. */
1771 vlib_prefetch_buffer_header (b[4], LOAD);
1772 vlib_prefetch_buffer_header (b[5], LOAD);
1774 CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1775 CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1778 error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1780 ip[0] = vlib_buffer_get_current (b[0]);
1781 ip[1] = vlib_buffer_get_current (b[1]);
1783 vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1784 vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1786 pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1787 pt[1] = ip4_local_classify (b[1], ip[1], &next[1]);
1789 not_batch = pt[0] ^ pt[1];
1791 if (head_of_feature_arc == 0 || (pt[0] && not_batch == 0))
1794 if (PREDICT_TRUE (not_batch == 0))
1796 ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1797 ip4_local_check_src_x2 (b, ip, &last_check, error);
1803 ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1804 ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1808 ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1809 ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
1815 ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1816 head_of_feature_arc);
1817 ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1818 head_of_feature_arc);
1825 while (n_left_from > 0)
1827 error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1829 ip[0] = vlib_buffer_get_current (b[0]);
1830 vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1831 pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1833 if (head_of_feature_arc == 0 || pt[0])
1836 ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1837 ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1841 ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1842 head_of_feature_arc);
1849 vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1850 return frame->n_vectors;
1853 VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1854 vlib_frame_t * frame)
1856 return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1860 VLIB_REGISTER_NODE (ip4_local_node) =
1862 .name = "ip4-local",
1863 .vector_size = sizeof (u32),
1864 .format_trace = format_ip4_forward_next_trace,
1865 .n_next_nodes = IP_LOCAL_N_NEXT,
1868 [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1869 [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1870 [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1871 [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1872 [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-reassembly",
1878 VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
1879 vlib_node_runtime_t * node,
1880 vlib_frame_t * frame)
1882 return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1886 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
1887 .name = "ip4-local-end-of-arc",
1888 .vector_size = sizeof (u32),
1890 .format_trace = format_ip4_forward_next_trace,
1891 .sibling_of = "ip4-local",
1894 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1895 .arc_name = "ip4-local",
1896 .node_name = "ip4-local-end-of-arc",
1897 .runs_before = 0, /* not before any other features */
1901 #ifndef CLIB_MARCH_VARIANT
1903 ip4_register_protocol (u32 protocol, u32 node_index)
1905 vlib_main_t *vm = vlib_get_main ();
1906 ip4_main_t *im = &ip4_main;
1907 ip_lookup_main_t *lm = &im->lookup_main;
1909 ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1910 lm->local_next_by_ip_protocol[protocol] =
1911 vlib_node_add_next (vm, ip4_local_node.index, node_index);
1915 ip4_unregister_protocol (u32 protocol)
1917 ip4_main_t *im = &ip4_main;
1918 ip_lookup_main_t *lm = &im->lookup_main;
1920 ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1921 lm->local_next_by_ip_protocol[protocol] = IP_LOCAL_NEXT_PUNT;
1925 static clib_error_t *
1926 show_ip_local_command_fn (vlib_main_t * vm,
1927 unformat_input_t * input, vlib_cli_command_t * cmd)
1929 ip4_main_t *im = &ip4_main;
1930 ip_lookup_main_t *lm = &im->lookup_main;
1933 vlib_cli_output (vm, "Protocols handled by ip4_local");
1934 for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1936 if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1938 u32 node_index = vlib_get_node (vm,
1939 ip4_local_node.index)->
1940 next_nodes[lm->local_next_by_ip_protocol[i]];
1941 vlib_cli_output (vm, "%U: %U", format_ip_protocol, i,
1942 format_vlib_node_name, vm, node_index);
1951 * Display the set of protocols handled by the local IPv4 stack.
1954 * Example of how to display local protocol table:
1955 * @cliexstart{show ip local}
1956 * Protocols handled by ip4_local
1963 VLIB_CLI_COMMAND (show_ip_local, static) =
1965 .path = "show ip local",
1966 .function = show_ip_local_command_fn,
1967 .short_help = "show ip local",
1972 ip4_arp_inline (vlib_main_t * vm,
1973 vlib_node_runtime_t * node,
1974 vlib_frame_t * frame, int is_glean)
1976 vnet_main_t *vnm = vnet_get_main ();
1977 ip4_main_t *im = &ip4_main;
1978 ip_lookup_main_t *lm = &im->lookup_main;
1979 u32 *from, *to_next_drop;
1980 uword n_left_from, n_left_to_next_drop, next_index;
1981 u32 thread_index = vm->thread_index;
1984 if (node->flags & VLIB_NODE_FLAG_TRACE)
1985 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1987 seed = throttle_seed (&im->arp_throttle, thread_index, vlib_time_now (vm));
1989 from = vlib_frame_vector_args (frame);
1990 n_left_from = frame->n_vectors;
1991 next_index = node->cached_next_index;
1992 if (next_index == IP4_ARP_NEXT_DROP)
1993 next_index = IP4_ARP_N_NEXT; /* point to first interface */
1995 while (n_left_from > 0)
1997 vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1998 to_next_drop, n_left_to_next_drop);
2000 while (n_left_from > 0 && n_left_to_next_drop > 0)
2002 u32 pi0, bi0, adj_index0, sw_if_index0;
2003 ip_adjacency_t *adj0;
2004 vlib_buffer_t *p0, *b0;
2005 ip4_address_t resolve0;
2006 ethernet_arp_header_t *h0;
2007 vnet_hw_interface_t *hw_if0;
2011 p0 = vlib_get_buffer (vm, pi0);
2015 to_next_drop[0] = pi0;
2017 n_left_to_next_drop -= 1;
2019 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2020 adj0 = adj_get (adj_index0);
2024 /* resolve the packet's destination */
2025 ip4_header_t *ip0 = vlib_buffer_get_current (p0);
2026 resolve0 = ip0->dst_address;
2030 /* resolve the incomplete adj */
2031 resolve0 = adj0->sub_type.nbr.next_hop.ip4;
2034 /* combine the address and interface for the hash key */
2035 sw_if_index0 = adj0->rewrite_header.sw_if_index;
2036 r0 = (u64) resolve0.data_u32 << 32;
2039 if (throttle_check (&im->arp_throttle, thread_index, r0, seed))
2041 p0->error = node->errors[IP4_ARP_ERROR_THROTTLED];
2046 * the adj has been updated to a rewrite but the node the DPO that got
2047 * us here hasn't - yet. no big deal. we'll drop while we wait.
2049 if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
2051 p0->error = node->errors[IP4_ARP_ERROR_RESOLVED];
2056 * Can happen if the control-plane is programming tables
2057 * with traffic flowing; at least that's today's lame excuse.
2059 if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
2060 || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
2062 p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2065 /* Send ARP request. */
2067 vlib_packet_template_get_packet (vm,
2068 &im->ip4_arp_request_packet_template,
2070 /* Seems we're out of buffers */
2071 if (PREDICT_FALSE (!h0))
2073 p0->error = node->errors[IP4_ARP_ERROR_NO_BUFFERS];
2077 b0 = vlib_get_buffer (vm, bi0);
2079 /* copy the persistent fields from the original */
2080 clib_memcpy_fast (b0->opaque2, p0->opaque2, sizeof (p0->opaque2));
2082 /* Add rewrite/encap string for ARP packet. */
2083 vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
2085 hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2087 /* Src ethernet address in ARP header. */
2088 mac_address_from_bytes (&h0->ip4_over_ethernet[0].mac,
2089 hw_if0->hw_address);
2092 /* The interface's source address is stashed in the Glean Adj */
2093 h0->ip4_over_ethernet[0].ip4 =
2094 adj0->sub_type.glean.receive_addr.ip4;
2098 /* Src IP address in ARP header. */
2099 if (ip4_src_address_for_packet (lm, sw_if_index0,
2100 &h0->ip4_over_ethernet[0].ip4))
2102 /* No source address available */
2103 p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
2104 vlib_buffer_free (vm, &bi0, 1);
2108 h0->ip4_over_ethernet[1].ip4 = resolve0;
2110 p0->error = node->errors[IP4_ARP_ERROR_REQUEST_SENT];
2112 vlib_buffer_copy_trace_flag (vm, p0, bi0);
2113 VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
2114 vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2116 vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2118 vlib_set_next_frame_buffer (vm, node,
2119 adj0->rewrite_header.next_index, bi0);
2122 vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2125 return frame->n_vectors;
2128 VLIB_NODE_FN (ip4_arp_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2129 vlib_frame_t * frame)
2131 return (ip4_arp_inline (vm, node, frame, 0));
2134 VLIB_NODE_FN (ip4_glean_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2135 vlib_frame_t * frame)
2137 return (ip4_arp_inline (vm, node, frame, 1));
2140 static char *ip4_arp_error_strings[] = {
2141 [IP4_ARP_ERROR_THROTTLED] = "ARP requests throttled",
2142 [IP4_ARP_ERROR_RESOLVED] = "ARP requests resolved",
2143 [IP4_ARP_ERROR_NO_BUFFERS] = "ARP requests out of buffer",
2144 [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2145 [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2146 [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
2150 VLIB_REGISTER_NODE (ip4_arp_node) =
2153 .vector_size = sizeof (u32),
2154 .format_trace = format_ip4_forward_next_trace,
2155 .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2156 .error_strings = ip4_arp_error_strings,
2157 .n_next_nodes = IP4_ARP_N_NEXT,
2160 [IP4_ARP_NEXT_DROP] = "error-drop",
2164 VLIB_REGISTER_NODE (ip4_glean_node) =
2166 .name = "ip4-glean",
2167 .vector_size = sizeof (u32),
2168 .format_trace = format_ip4_forward_next_trace,
2169 .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2170 .error_strings = ip4_arp_error_strings,
2171 .n_next_nodes = IP4_ARP_N_NEXT,
2173 [IP4_ARP_NEXT_DROP] = "error-drop",
2178 #define foreach_notrace_ip4_arp_error \
2184 _(NO_SOURCE_ADDRESS)
2186 static clib_error_t *
2187 arp_notrace_init (vlib_main_t * vm)
2189 vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
2191 /* don't trace ARP request packets */
2193 vnet_pcap_drop_trace_filter_add_del \
2194 (rt->errors[IP4_ARP_ERROR_##a], \
2196 foreach_notrace_ip4_arp_error;
2201 VLIB_INIT_FUNCTION (arp_notrace_init);
2204 #ifndef CLIB_MARCH_VARIANT
2205 /* Send an ARP request to see if given destination is reachable on given interface. */
2207 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index,
2210 vnet_main_t *vnm = vnet_get_main ();
2211 ip4_main_t *im = &ip4_main;
2212 ethernet_arp_header_t *h;
2214 ip_interface_address_t *ia;
2215 ip_adjacency_t *adj;
2216 vnet_hw_interface_t *hi;
2217 vnet_sw_interface_t *si;
2221 u8 unicast_rewrite = 0;
2223 si = vnet_get_sw_interface (vnm, sw_if_index);
2225 if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2227 return clib_error_return (0, "%U: interface %U down",
2228 format_ip4_address, dst,
2229 format_vnet_sw_if_index_name, vnm,
2234 ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2237 vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2238 return clib_error_return
2240 "no matching interface address for destination %U (interface %U)",
2241 format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
2245 h = vlib_packet_template_get_packet (vm,
2246 &im->ip4_arp_request_packet_template,
2250 return clib_error_return (0, "ARP request packet allocation failed");
2252 hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2253 if (PREDICT_FALSE (!hi->hw_address))
2255 return clib_error_return (0, "%U: interface %U do not support ip probe",
2256 format_ip4_address, dst,
2257 format_vnet_sw_if_index_name, vnm,
2261 mac_address_from_bytes (&h->ip4_over_ethernet[0].mac, hi->hw_address);
2263 h->ip4_over_ethernet[0].ip4 = src[0];
2264 h->ip4_over_ethernet[1].ip4 = dst[0];
2266 b = vlib_get_buffer (vm, bi);
2267 vnet_buffer (b)->sw_if_index[VLIB_RX] =
2268 vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2270 ip46_address_t nh = {
2274 ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4,
2275 VNET_LINK_IP4, &nh, sw_if_index);
2278 /* Peer has been previously resolved, retrieve glean adj instead */
2279 if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE)
2282 unicast_rewrite = 1;
2286 ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4,
2287 VNET_LINK_IP4, sw_if_index, &nh);
2292 /* Add encapsulation string for software interface (e.g. ethernet header). */
2293 vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2294 if (unicast_rewrite)
2296 u16 *etype = vlib_buffer_get_current (b) - 2;
2297 etype[0] = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
2299 vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2302 vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2303 u32 *to_next = vlib_frame_vector_args (f);
2306 vlib_put_frame_to_node (vm, hi->output_node_index, f);
2310 return /* no error */ 0;
2316 IP4_REWRITE_NEXT_DROP,
2317 IP4_REWRITE_NEXT_ICMP_ERROR,
2318 IP4_REWRITE_NEXT_FRAGMENT,
2319 IP4_REWRITE_N_NEXT /* Last */
2320 } ip4_rewrite_next_t;
2323 * This bits of an IPv4 address to mask to construct a multicast
2326 #if CLIB_ARCH_IS_BIG_ENDIAN
2327 #define IP4_MCAST_ADDR_MASK 0x007fffff
2329 #define IP4_MCAST_ADDR_MASK 0xffff7f00
2333 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
2334 u16 adj_packet_bytes, bool df, u16 * next, u32 * error)
2336 if (packet_len > adj_packet_bytes)
2338 *error = IP4_ERROR_MTU_EXCEEDED;
2341 icmp4_error_set_vnet_buffer
2342 (b, ICMP4_destination_unreachable,
2343 ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
2345 *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2349 /* IP fragmentation */
2350 ip_frag_set_vnet_buffer (b, adj_packet_bytes,
2351 IP4_FRAG_NEXT_IP4_REWRITE, 0);
2352 *next = IP4_REWRITE_NEXT_FRAGMENT;
2357 /* Decrement TTL & update checksum.
2358 Works either endian, so no need for byte swap. */
2359 static_always_inline void
2360 ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
2365 if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2367 b->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2373 /* Input node should have reject packets with ttl 0. */
2374 ASSERT (ip->ttl > 0);
2376 checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
2377 checksum += checksum >= 0xffff;
2379 ip->checksum = checksum;
2384 * If the ttl drops below 1 when forwarding, generate
2387 if (PREDICT_FALSE (ttl <= 0))
2389 *error = IP4_ERROR_TIME_EXPIRED;
2390 vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2391 icmp4_error_set_vnet_buffer (b, ICMP4_time_exceeded,
2392 ICMP4_time_exceeded_ttl_exceeded_in_transit,
2394 *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2397 /* Verify checksum. */
2398 ASSERT ((ip->checksum == ip4_header_checksum (ip)) ||
2399 (b->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2404 ip4_rewrite_inline_with_gso (vlib_main_t * vm,
2405 vlib_node_runtime_t * node,
2406 vlib_frame_t * frame,
2407 int do_counters, int is_midchain, int is_mcast,
2410 ip_lookup_main_t *lm = &ip4_main.lookup_main;
2411 u32 *from = vlib_frame_vector_args (frame);
2412 vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2413 u16 nexts[VLIB_FRAME_SIZE], *next;
2415 vlib_node_runtime_t *error_node =
2416 vlib_node_get_runtime (vm, ip4_input_node.index);
2418 n_left_from = frame->n_vectors;
2419 u32 thread_index = vm->thread_index;
2421 vlib_get_buffers (vm, from, bufs, n_left_from);
2422 clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
2424 #if (CLIB_N_PREFETCHES >= 8)
2425 if (n_left_from >= 6)
2428 for (i = 2; i < 6; i++)
2429 vlib_prefetch_buffer_header (bufs[i], LOAD);
2434 while (n_left_from >= 8)
2436 ip_adjacency_t *adj0, *adj1;
2437 ip4_header_t *ip0, *ip1;
2438 u32 rw_len0, error0, adj_index0;
2439 u32 rw_len1, error1, adj_index1;
2440 u32 tx_sw_if_index0, tx_sw_if_index1;
2443 vlib_prefetch_buffer_header (b[6], LOAD);
2444 vlib_prefetch_buffer_header (b[7], LOAD);
2446 adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2447 adj_index1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
2450 * pre-fetch the per-adjacency counters
2454 vlib_prefetch_combined_counter (&adjacency_counters,
2455 thread_index, adj_index0);
2456 vlib_prefetch_combined_counter (&adjacency_counters,
2457 thread_index, adj_index1);
2460 ip0 = vlib_buffer_get_current (b[0]);
2461 ip1 = vlib_buffer_get_current (b[1]);
2463 error0 = error1 = IP4_ERROR_NONE;
2465 ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2466 ip4_ttl_and_checksum_check (b[1], ip1, next + 1, &error1);
2468 /* Rewrite packet header and updates lengths. */
2469 adj0 = adj_get (adj_index0);
2470 adj1 = adj_get (adj_index1);
2472 /* Worth pipelining. No guarantee that adj0,1 are hot... */
2473 rw_len0 = adj0[0].rewrite_header.data_bytes;
2474 rw_len1 = adj1[0].rewrite_header.data_bytes;
2475 vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2476 vnet_buffer (b[1])->ip.save_rewrite_length = rw_len1;
2478 p = vlib_buffer_get_current (b[2]);
2479 CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2480 CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2482 p = vlib_buffer_get_current (b[3]);
2483 CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2484 CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2486 /* Check MTU of outgoing interface. */
2487 u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2488 u16 ip1_len = clib_net_to_host_u16 (ip1->length);
2490 if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO))
2491 ip0_len = gso_mtu_sz (b[0]);
2492 if (do_gso && (b[1]->flags & VNET_BUFFER_F_GSO))
2493 ip1_len = gso_mtu_sz (b[1]);
2495 ip4_mtu_check (b[0], ip0_len,
2496 adj0[0].rewrite_header.max_l3_packet_bytes,
2497 ip0->flags_and_fragment_offset &
2498 clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2500 ip4_mtu_check (b[1], ip1_len,
2501 adj1[0].rewrite_header.max_l3_packet_bytes,
2502 ip1->flags_and_fragment_offset &
2503 clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2508 error0 = ((adj0[0].rewrite_header.sw_if_index ==
2509 vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2510 IP4_ERROR_SAME_INTERFACE : error0);
2511 error1 = ((adj1[0].rewrite_header.sw_if_index ==
2512 vnet_buffer (b[1])->sw_if_index[VLIB_RX]) ?
2513 IP4_ERROR_SAME_INTERFACE : error1);
2516 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2517 * to see the IP header */
2518 if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2520 u32 next_index = adj0[0].rewrite_header.next_index;
2521 vlib_buffer_advance (b[0], -(word) rw_len0);
2522 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2523 vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2526 (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2527 vnet_feature_arc_start (lm->output_feature_arc_index,
2528 tx_sw_if_index0, &next_index, b[0]);
2529 next[0] = next_index;
2533 b[0]->error = error_node->errors[error0];
2535 if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2537 u32 next_index = adj1[0].rewrite_header.next_index;
2538 vlib_buffer_advance (b[1], -(word) rw_len1);
2540 tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2541 vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2544 (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2545 vnet_feature_arc_start (lm->output_feature_arc_index,
2546 tx_sw_if_index1, &next_index, b[1]);
2547 next[1] = next_index;
2551 b[1]->error = error_node->errors[error1];
2555 calc_checksums (vm, b[0]);
2556 calc_checksums (vm, b[1]);
2558 /* Guess we are only writing on simple Ethernet header. */
2559 vnet_rewrite_two_headers (adj0[0], adj1[0],
2560 ip0, ip1, sizeof (ethernet_header_t));
2563 * Bump the per-adjacency counters
2567 vlib_increment_combined_counter
2568 (&adjacency_counters,
2570 adj_index0, 1, vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2572 vlib_increment_combined_counter
2573 (&adjacency_counters,
2575 adj_index1, 1, vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
2580 if (adj0->sub_type.midchain.fixup_func)
2581 adj0->sub_type.midchain.fixup_func
2582 (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2583 if (adj1->sub_type.midchain.fixup_func)
2584 adj1->sub_type.midchain.fixup_func
2585 (vm, adj1, b[1], adj1->sub_type.midchain.fixup_data);
2591 * copy bytes from the IP address into the MAC rewrite
2593 vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2594 adj0->rewrite_header.dst_mcast_offset,
2595 &ip0->dst_address.as_u32, (u8 *) ip0);
2596 vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2597 adj1->rewrite_header.dst_mcast_offset,
2598 &ip1->dst_address.as_u32, (u8 *) ip1);
2605 #elif (CLIB_N_PREFETCHES >= 4)
2608 while (n_left_from >= 1)
2610 ip_adjacency_t *adj0;
2612 u32 rw_len0, error0, adj_index0;
2613 u32 tx_sw_if_index0;
2616 /* Prefetch next iteration */
2617 if (PREDICT_TRUE (n_left_from >= 4))
2619 ip_adjacency_t *adj2;
2622 vlib_prefetch_buffer_header (b[3], LOAD);
2623 vlib_prefetch_buffer_data (b[2], LOAD);
2625 /* Prefetch adj->rewrite_header */
2626 adj_index2 = vnet_buffer (b[2])->ip.adj_index[VLIB_TX];
2627 adj2 = adj_get (adj_index2);
2629 CLIB_PREFETCH (p + CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES,
2633 adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2636 * Prefetch the per-adjacency counters
2640 vlib_prefetch_combined_counter (&adjacency_counters,
2641 thread_index, adj_index0);
2644 ip0 = vlib_buffer_get_current (b[0]);
2646 error0 = IP4_ERROR_NONE;
2648 ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2650 /* Rewrite packet header and updates lengths. */
2651 adj0 = adj_get (adj_index0);
2653 /* Rewrite header was prefetched. */
2654 rw_len0 = adj0[0].rewrite_header.data_bytes;
2655 vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2657 /* Check MTU of outgoing interface. */
2658 u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2660 if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO))
2661 ip0_len = gso_mtu_sz (b[0]);
2663 ip4_mtu_check (b[0], ip0_len,
2664 adj0[0].rewrite_header.max_l3_packet_bytes,
2665 ip0->flags_and_fragment_offset &
2666 clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2671 error0 = ((adj0[0].rewrite_header.sw_if_index ==
2672 vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2673 IP4_ERROR_SAME_INTERFACE : error0);
2676 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2677 * to see the IP header */
2678 if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2680 u32 next_index = adj0[0].rewrite_header.next_index;
2681 vlib_buffer_advance (b[0], -(word) rw_len0);
2682 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2683 vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2686 (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2687 vnet_feature_arc_start (lm->output_feature_arc_index,
2688 tx_sw_if_index0, &next_index, b[0]);
2689 next[0] = next_index;
2693 b[0]->error = error_node->errors[error0];
2697 calc_checksums (vm, b[0]);
2699 /* Guess we are only writing on simple Ethernet header. */
2700 vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2703 * Bump the per-adjacency counters
2707 vlib_increment_combined_counter
2708 (&adjacency_counters,
2710 adj_index0, 1, vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2715 if (adj0->sub_type.midchain.fixup_func)
2716 adj0->sub_type.midchain.fixup_func
2717 (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2723 * copy bytes from the IP address into the MAC rewrite
2725 vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2726 adj0->rewrite_header.dst_mcast_offset,
2727 &ip0->dst_address.as_u32, (u8 *) ip0);
2736 while (n_left_from > 0)
2738 ip_adjacency_t *adj0;
2740 u32 rw_len0, adj_index0, error0;
2741 u32 tx_sw_if_index0;
2743 adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2745 adj0 = adj_get (adj_index0);
2748 vlib_prefetch_combined_counter (&adjacency_counters,
2749 thread_index, adj_index0);
2751 ip0 = vlib_buffer_get_current (b[0]);
2753 error0 = IP4_ERROR_NONE;
2755 ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2758 /* Update packet buffer attributes/set output interface. */
2759 rw_len0 = adj0[0].rewrite_header.data_bytes;
2760 vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2762 /* Check MTU of outgoing interface. */
2763 u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2764 if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO))
2765 ip0_len = gso_mtu_sz (b[0]);
2767 ip4_mtu_check (b[0], ip0_len,
2768 adj0[0].rewrite_header.max_l3_packet_bytes,
2769 ip0->flags_and_fragment_offset &
2770 clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2775 error0 = ((adj0[0].rewrite_header.sw_if_index ==
2776 vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2777 IP4_ERROR_SAME_INTERFACE : error0);
2780 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2781 * to see the IP header */
2782 if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2784 u32 next_index = adj0[0].rewrite_header.next_index;
2785 vlib_buffer_advance (b[0], -(word) rw_len0);
2786 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2787 vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2790 (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2791 vnet_feature_arc_start (lm->output_feature_arc_index,
2792 tx_sw_if_index0, &next_index, b[0]);
2793 next[0] = next_index;
2797 b[0]->error = error_node->errors[error0];
2801 calc_checksums (vm, b[0]);
2803 /* Guess we are only writing on simple Ethernet header. */
2804 vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2807 vlib_increment_combined_counter
2808 (&adjacency_counters,
2809 thread_index, adj_index0, 1,
2810 vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2814 if (adj0->sub_type.midchain.fixup_func)
2815 adj0->sub_type.midchain.fixup_func
2816 (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2822 * copy bytes from the IP address into the MAC rewrite
2824 vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2825 adj0->rewrite_header.dst_mcast_offset,
2826 &ip0->dst_address.as_u32, (u8 *) ip0);
2835 /* Need to do trace after rewrites to pick up new packet data. */
2836 if (node->flags & VLIB_NODE_FLAG_TRACE)
2837 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2839 vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2840 return frame->n_vectors;
2844 ip4_rewrite_inline (vlib_main_t * vm,
2845 vlib_node_runtime_t * node,
2846 vlib_frame_t * frame,
2847 int do_counters, int is_midchain, int is_mcast)
2849 vnet_main_t *vnm = vnet_get_main ();
2850 if (PREDICT_FALSE (vnm->interface_main.gso_interface_count > 0))
2851 return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2852 is_midchain, is_mcast,
2855 return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2856 is_midchain, is_mcast,
2857 0 /* no do_gso */ );
2861 /** @brief IPv4 rewrite node.
2864 This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2865 header checksum, fetch the ip adjacency, check the outbound mtu,
2866 apply the adjacency rewrite, and send pkts to the adjacency
2867 rewrite header's rewrite_next_index.
2869 @param vm vlib_main_t corresponding to the current thread
2870 @param node vlib_node_runtime_t
2871 @param frame vlib_frame_t whose contents should be dispatched
2873 @par Graph mechanics: buffer metadata, next index usage
2876 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2877 - the rewrite adjacency index
2878 - <code>adj->lookup_next_index</code>
2879 - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2880 the packet will be dropped.
2881 - <code>adj->rewrite_header</code>
2882 - Rewrite string length, rewrite string, next_index
2885 - <code>b->current_data, b->current_length</code>
2886 - Updated net of applying the rewrite string
2888 <em>Next Indices:</em>
2889 - <code> adj->rewrite_header.next_index </code>
2893 VLIB_NODE_FN (ip4_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2894 vlib_frame_t * frame)
2896 if (adj_are_counters_enabled ())
2897 return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2899 return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2902 VLIB_NODE_FN (ip4_rewrite_bcast_node) (vlib_main_t * vm,
2903 vlib_node_runtime_t * node,
2904 vlib_frame_t * frame)
2906 if (adj_are_counters_enabled ())
2907 return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2909 return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2912 VLIB_NODE_FN (ip4_midchain_node) (vlib_main_t * vm,
2913 vlib_node_runtime_t * node,
2914 vlib_frame_t * frame)
2916 if (adj_are_counters_enabled ())
2917 return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2919 return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2922 VLIB_NODE_FN (ip4_rewrite_mcast_node) (vlib_main_t * vm,
2923 vlib_node_runtime_t * node,
2924 vlib_frame_t * frame)
2926 if (adj_are_counters_enabled ())
2927 return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2929 return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2932 VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
2933 vlib_node_runtime_t * node,
2934 vlib_frame_t * frame)
2936 if (adj_are_counters_enabled ())
2937 return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2939 return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2943 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2944 .name = "ip4-rewrite",
2945 .vector_size = sizeof (u32),
2947 .format_trace = format_ip4_rewrite_trace,
2949 .n_next_nodes = IP4_REWRITE_N_NEXT,
2951 [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2952 [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2953 [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2957 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2958 .name = "ip4-rewrite-bcast",
2959 .vector_size = sizeof (u32),
2961 .format_trace = format_ip4_rewrite_trace,
2962 .sibling_of = "ip4-rewrite",
2965 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2966 .name = "ip4-rewrite-mcast",
2967 .vector_size = sizeof (u32),
2969 .format_trace = format_ip4_rewrite_trace,
2970 .sibling_of = "ip4-rewrite",
2973 VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
2974 .name = "ip4-mcast-midchain",
2975 .vector_size = sizeof (u32),
2977 .format_trace = format_ip4_rewrite_trace,
2978 .sibling_of = "ip4-rewrite",
2981 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2982 .name = "ip4-midchain",
2983 .vector_size = sizeof (u32),
2984 .format_trace = format_ip4_forward_next_trace,
2985 .sibling_of = "ip4-rewrite",
2990 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2992 ip4_fib_mtrie_t *mtrie0;
2993 ip4_fib_mtrie_leaf_t leaf0;
2996 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2998 leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2999 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
3000 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
3002 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
3004 return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
3007 static clib_error_t *
3008 test_lookup_command_fn (vlib_main_t * vm,
3009 unformat_input_t * input, vlib_cli_command_t * cmd)
3016 ip4_address_t ip4_base_address;
3019 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3021 if (unformat (input, "table %d", &table_id))
3023 /* Make sure the entry exists. */
3024 fib = ip4_fib_get (table_id);
3025 if ((fib) && (fib->index != table_id))
3026 return clib_error_return (0, "<fib-index> %d does not exist",
3029 else if (unformat (input, "count %f", &count))
3032 else if (unformat (input, "%U",
3033 unformat_ip4_address, &ip4_base_address))
3036 return clib_error_return (0, "unknown input `%U'",
3037 format_unformat_error, input);
3042 for (i = 0; i < n; i++)
3044 if (!ip4_lookup_validate (&ip4_base_address, table_id))
3047 ip4_base_address.as_u32 =
3048 clib_host_to_net_u32 (1 +
3049 clib_net_to_host_u32 (ip4_base_address.as_u32));
3053 vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
3055 vlib_cli_output (vm, "No errors in %d lookups\n", n);
3061 * Perform a lookup of an IPv4 Address (or range of addresses) in the
3062 * given FIB table to determine if there is a conflict with the
3063 * adjacency table. The fib-id can be determined by using the
3064 * '<em>show ip fib</em>' command. If fib-id is not entered, default value
3067 * @todo This command uses fib-id, other commands use table-id (not
3068 * just a name, they are different indexes). Would like to change this
3069 * to table-id for consistency.
3072 * Example of how to run the test lookup command:
3073 * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
3074 * No errors in 2 lookups
3078 VLIB_CLI_COMMAND (lookup_test_command, static) =
3080 .path = "test lookup",
3081 .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
3082 .function = test_lookup_command_fn,
3086 #ifndef CLIB_MARCH_VARIANT
3088 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3092 fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
3094 if (~0 == fib_index)
3095 return VNET_API_ERROR_NO_SUCH_FIB;
3097 fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
3104 static clib_error_t *
3105 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3106 unformat_input_t * input,
3107 vlib_cli_command_t * cmd)
3111 u32 flow_hash_config = 0;
3114 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3116 if (unformat (input, "table %d", &table_id))
3119 else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3120 foreach_flow_hash_bit
3127 return clib_error_return (0, "unknown input `%U'",
3128 format_unformat_error, input);
3130 rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3136 case VNET_API_ERROR_NO_SUCH_FIB:
3137 return clib_error_return (0, "no such FIB table %d", table_id);
3140 clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3148 * Configure the set of IPv4 fields used by the flow hash.
3151 * Example of how to set the flow hash on a given table:
3152 * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
3153 * Example of display the configured flow hash:
3154 * @cliexstart{show ip fib}
3155 * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
3158 * [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
3159 * [0] [@0]: dpo-drop ip6
3162 * [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
3163 * [0] [@0]: dpo-drop ip6
3166 * [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
3167 * [0] [@0]: dpo-drop ip6
3170 * [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
3171 * [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3174 * [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
3175 * [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3176 * [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3177 * [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3178 * [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3181 * [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
3182 * [0] [@0]: dpo-drop ip6
3183 * 255.255.255.255/32
3185 * [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
3186 * [0] [@0]: dpo-drop ip6
3187 * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
3190 * [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
3191 * [0] [@0]: dpo-drop ip6
3194 * [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
3195 * [0] [@0]: dpo-drop ip6
3198 * [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
3199 * [0] [@4]: ipv4-glean: af_packet0
3202 * [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
3203 * [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
3206 * [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
3207 * [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
3210 * [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
3211 * [0] [@4]: ipv4-glean: af_packet1
3214 * [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
3215 * [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
3218 * [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
3219 * [0] [@0]: dpo-drop ip6
3222 * [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
3223 * [0] [@0]: dpo-drop ip6
3224 * 255.255.255.255/32
3226 * [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
3227 * [0] [@0]: dpo-drop ip6
3231 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
3233 .path = "set ip flow-hash",
3235 "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
3236 .function = set_ip_flow_hash_command_fn,
3240 #ifndef CLIB_MARCH_VARIANT
3242 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3245 vnet_main_t *vnm = vnet_get_main ();
3246 vnet_interface_main_t *im = &vnm->interface_main;
3247 ip4_main_t *ipm = &ip4_main;
3248 ip_lookup_main_t *lm = &ipm->lookup_main;
3249 vnet_classify_main_t *cm = &vnet_classify_main;
3250 ip4_address_t *if_addr;
3252 if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3253 return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3255 if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3256 return VNET_API_ERROR_NO_SUCH_ENTRY;
3258 vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3259 lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
3261 if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
3263 if (NULL != if_addr)
3265 fib_prefix_t pfx = {
3267 .fp_proto = FIB_PROTOCOL_IP4,
3268 .fp_addr.ip4 = *if_addr,
3272 fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
3276 if (table_index != (u32) ~ 0)
3278 dpo_id_t dpo = DPO_INVALID;
3283 classify_dpo_create (DPO_PROTO_IP4, table_index));
3285 fib_table_entry_special_dpo_add (fib_index,
3287 FIB_SOURCE_CLASSIFY,
3288 FIB_ENTRY_FLAG_NONE, &dpo);
3293 fib_table_entry_special_remove (fib_index,
3294 &pfx, FIB_SOURCE_CLASSIFY);
3302 static clib_error_t *
3303 set_ip_classify_command_fn (vlib_main_t * vm,
3304 unformat_input_t * input,
3305 vlib_cli_command_t * cmd)
3307 u32 table_index = ~0;
3308 int table_index_set = 0;
3309 u32 sw_if_index = ~0;
3312 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3314 if (unformat (input, "table-index %d", &table_index))
3315 table_index_set = 1;
3316 else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3317 vnet_get_main (), &sw_if_index))
3323 if (table_index_set == 0)
3324 return clib_error_return (0, "classify table-index must be specified");
3326 if (sw_if_index == ~0)
3327 return clib_error_return (0, "interface / subif must be specified");
3329 rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3336 case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3337 return clib_error_return (0, "No such interface");
3339 case VNET_API_ERROR_NO_SUCH_ENTRY:
3340 return clib_error_return (0, "No such classifier table");
3346 * Assign a classification table to an interface. The classification
3347 * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3348 * commands. Once the table is create, use this command to filter packets
3352 * Example of how to assign a classification table to an interface:
3353 * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3356 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
3358 .path = "set ip classify",
3360 "set ip classify intfc <interface> table-index <classify-idx>",
3361 .function = set_ip_classify_command_fn,
3365 static clib_error_t *
3366 ip4_config (vlib_main_t * vm, unformat_input_t * input)
3368 ip4_main_t *im = &ip4_main;
3371 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3373 if (unformat (input, "heap-size %U", unformat_memory_size, &heapsize))
3376 return clib_error_return (0,
3377 "invalid heap-size parameter `%U'",
3378 format_unformat_error, input);
3381 im->mtrie_heap_size = heapsize;
3386 VLIB_EARLY_CONFIG_FUNCTION (ip4_config, "ip");
3389 * fd.io coding-style-patch-verification: ON
3392 * eval: (c-set-style "gnu")