2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * ip/ip4_forward.c: IP v4 forwarding
18 * Copyright (c) 2008 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h> /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h> /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h> /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h> /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/dpo/load_balance.h>
53 #include <vnet/dpo/load_balance_map.h>
54 #include <vnet/dpo/classify_dpo.h>
55 #include <vnet/mfib/mfib_table.h> /* for mFIB table and entry creation */
57 #include <vnet/ip/ip4_forward.h>
58 #include <vnet/interface_output.h>
60 /** @brief IPv4 lookup node.
63 This is the main IPv4 lookup dispatch node.
65 @param vm vlib_main_t corresponding to the current thread
66 @param node vlib_node_runtime_t
67 @param frame vlib_frame_t whose contents should be dispatched
69 @par Graph mechanics: buffer metadata, next index usage
72 - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
73 - Indicates the @c sw_if_index value of the interface that the
74 packet was received on.
75 - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
76 - When the value is @c ~0 then the node performs a longest prefix
77 match (LPM) for the packet destination address in the FIB attached
78 to the receive interface.
79 - Otherwise perform LPM for the packet destination address in the
80 indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
81 value (0, 1, ...) and not a VRF id.
84 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
85 - The lookup result adjacency index.
88 - Dispatches the packet to the node index found in
89 ip_adjacency_t @c adj->lookup_next_index
90 (where @c adj is the lookup result adjacency).
92 VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
95 return ip4_lookup_inline (vm, node, frame);
98 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
101 VLIB_REGISTER_NODE (ip4_lookup_node) =
103 .name = "ip4-lookup",
104 .vector_size = sizeof (u32),
105 .format_trace = format_ip4_lookup_trace,
106 .n_next_nodes = IP_LOOKUP_N_NEXT,
107 .next_nodes = IP4_LOOKUP_NEXT_NODES,
111 VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
112 vlib_node_runtime_t * node,
113 vlib_frame_t * frame)
115 vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
117 u32 thread_index = vm->thread_index;
118 vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
119 u16 nexts[VLIB_FRAME_SIZE], *next;
121 from = vlib_frame_vector_args (frame);
122 n_left = frame->n_vectors;
125 vlib_get_buffers (vm, from, bufs, n_left);
129 const load_balance_t *lb0, *lb1;
130 const ip4_header_t *ip0, *ip1;
131 u32 lbi0, hc0, lbi1, hc1;
132 const dpo_id_t *dpo0, *dpo1;
134 /* Prefetch next iteration. */
136 vlib_prefetch_buffer_header (b[2], LOAD);
137 vlib_prefetch_buffer_header (b[3], LOAD);
139 CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), LOAD);
140 CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), LOAD);
143 ip0 = vlib_buffer_get_current (b[0]);
144 ip1 = vlib_buffer_get_current (b[1]);
145 lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
146 lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
148 lb0 = load_balance_get (lbi0);
149 lb1 = load_balance_get (lbi1);
152 * this node is for via FIBs we can re-use the hash value from the
153 * to node if present.
154 * We don't want to use the same hash value at each level in the recursion
155 * graph as that would lead to polarisation
159 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
161 if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
163 hc0 = vnet_buffer (b[0])->ip.flow_hash =
164 vnet_buffer (b[0])->ip.flow_hash >> 1;
168 hc0 = vnet_buffer (b[0])->ip.flow_hash =
169 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
171 dpo0 = load_balance_get_fwd_bucket
172 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
176 dpo0 = load_balance_get_bucket_i (lb0, 0);
178 if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
180 if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
182 hc1 = vnet_buffer (b[1])->ip.flow_hash =
183 vnet_buffer (b[1])->ip.flow_hash >> 1;
187 hc1 = vnet_buffer (b[1])->ip.flow_hash =
188 ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
190 dpo1 = load_balance_get_fwd_bucket
191 (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
195 dpo1 = load_balance_get_bucket_i (lb1, 0);
198 next[0] = dpo0->dpoi_next_node;
199 next[1] = dpo1->dpoi_next_node;
201 vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
202 vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
204 vlib_increment_combined_counter
205 (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
206 vlib_increment_combined_counter
207 (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
216 const load_balance_t *lb0;
217 const ip4_header_t *ip0;
218 const dpo_id_t *dpo0;
221 ip0 = vlib_buffer_get_current (b[0]);
222 lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
224 lb0 = load_balance_get (lbi0);
227 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
229 if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
231 hc0 = vnet_buffer (b[0])->ip.flow_hash =
232 vnet_buffer (b[0])->ip.flow_hash >> 1;
236 hc0 = vnet_buffer (b[0])->ip.flow_hash =
237 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
239 dpo0 = load_balance_get_fwd_bucket
240 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
244 dpo0 = load_balance_get_bucket_i (lb0, 0);
247 next[0] = dpo0->dpoi_next_node;
248 vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
250 vlib_increment_combined_counter
251 (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
258 vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
259 if (node->flags & VLIB_NODE_FLAG_TRACE)
260 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
262 return frame->n_vectors;
266 VLIB_REGISTER_NODE (ip4_load_balance_node) =
268 .name = "ip4-load-balance",
269 .vector_size = sizeof (u32),
270 .sibling_of = "ip4-lookup",
271 .format_trace = format_ip4_lookup_trace,
275 #ifndef CLIB_MARCH_VARIANT
276 /* get first interface address */
278 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
279 ip_interface_address_t ** result_ia)
281 ip_lookup_main_t *lm = &im->lookup_main;
282 ip_interface_address_t *ia = 0;
283 ip4_address_t *result = 0;
286 foreach_ip_interface_address
287 (lm, ia, sw_if_index,
288 1 /* honor unnumbered */ ,
291 ip_interface_address_get_address (lm, ia);
297 *result_ia = result ? ia : 0;
303 ip4_add_subnet_bcast_route (u32 fib_index,
307 vnet_sw_interface_flags_t iflags;
309 iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
311 fib_table_entry_special_remove(fib_index,
313 FIB_SOURCE_INTERFACE);
315 if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
317 fib_table_entry_update_one_path (fib_index, pfx,
318 FIB_SOURCE_INTERFACE,
321 /* No next-hop address */
327 // no out-label stack
329 FIB_ROUTE_PATH_FLAG_NONE);
333 fib_table_entry_special_add(fib_index,
335 FIB_SOURCE_INTERFACE,
336 (FIB_ENTRY_FLAG_DROP |
337 FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
342 ip4_add_interface_routes (u32 sw_if_index,
343 ip4_main_t * im, u32 fib_index,
344 ip_interface_address_t * a)
346 ip_lookup_main_t *lm = &im->lookup_main;
347 ip4_address_t *address = ip_interface_address_get_address (lm, a);
349 .fp_len = a->address_length,
350 .fp_proto = FIB_PROTOCOL_IP4,
351 .fp_addr.ip4 = *address,
354 if (pfx.fp_len <= 30)
356 /* a /30 or shorter - add a glean for the network address */
357 fib_table_entry_update_one_path (fib_index, &pfx,
358 FIB_SOURCE_INTERFACE,
359 (FIB_ENTRY_FLAG_CONNECTED |
360 FIB_ENTRY_FLAG_ATTACHED),
362 /* No next-hop address */
368 // no out-label stack
370 FIB_ROUTE_PATH_FLAG_NONE);
372 /* Add the two broadcast addresses as drop */
373 fib_prefix_t net_pfx = {
375 .fp_proto = FIB_PROTOCOL_IP4,
376 .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
378 if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
379 fib_table_entry_special_add(fib_index,
381 FIB_SOURCE_INTERFACE,
382 (FIB_ENTRY_FLAG_DROP |
383 FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
384 net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
385 if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
386 ip4_add_subnet_bcast_route(fib_index, &net_pfx, sw_if_index);
388 else if (pfx.fp_len == 31)
390 u32 mask = clib_host_to_net_u32(1);
391 fib_prefix_t net_pfx = pfx;
394 net_pfx.fp_addr.ip4.as_u32 ^= mask;
396 /* a /31 - add the other end as an attached host */
397 fib_table_entry_update_one_path (fib_index, &net_pfx,
398 FIB_SOURCE_INTERFACE,
399 (FIB_ENTRY_FLAG_ATTACHED),
407 FIB_ROUTE_PATH_FLAG_NONE);
411 if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
413 u32 classify_table_index =
414 lm->classify_table_index_by_sw_if_index[sw_if_index];
415 if (classify_table_index != (u32) ~ 0)
417 dpo_id_t dpo = DPO_INVALID;
422 classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
424 fib_table_entry_special_dpo_add (fib_index,
427 FIB_ENTRY_FLAG_NONE, &dpo);
432 fib_table_entry_update_one_path (fib_index, &pfx,
433 FIB_SOURCE_INTERFACE,
434 (FIB_ENTRY_FLAG_CONNECTED |
435 FIB_ENTRY_FLAG_LOCAL),
442 FIB_ROUTE_PATH_FLAG_NONE);
446 ip4_del_interface_routes (ip4_main_t * im,
448 ip4_address_t * address, u32 address_length)
451 .fp_len = address_length,
452 .fp_proto = FIB_PROTOCOL_IP4,
453 .fp_addr.ip4 = *address,
456 if (pfx.fp_len <= 30)
458 fib_prefix_t net_pfx = {
460 .fp_proto = FIB_PROTOCOL_IP4,
461 .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
463 if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
464 fib_table_entry_special_remove(fib_index,
466 FIB_SOURCE_INTERFACE);
467 net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
468 if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
469 fib_table_entry_special_remove(fib_index,
471 FIB_SOURCE_INTERFACE);
472 fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
474 else if (pfx.fp_len == 31)
476 u32 mask = clib_host_to_net_u32(1);
477 fib_prefix_t net_pfx = pfx;
480 net_pfx.fp_addr.ip4.as_u32 ^= mask;
482 fib_table_entry_delete (fib_index, &net_pfx, FIB_SOURCE_INTERFACE);
486 fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
489 #ifndef CLIB_MARCH_VARIANT
491 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
493 ip4_main_t *im = &ip4_main;
495 vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
498 * enable/disable only on the 1<->0 transition
502 if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
507 ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
508 if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
511 vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
515 vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
516 sw_if_index, !is_enable, 0, 0);
519 ip4_enable_disable_interface_callback_t *cb;
520 vec_foreach (cb, im->enable_disable_interface_callbacks)
521 cb->function (im, cb->function_opaque, sw_if_index, is_enable);
525 static clib_error_t *
526 ip4_add_del_interface_address_internal (vlib_main_t * vm,
528 ip4_address_t * address,
529 u32 address_length, u32 is_del)
531 vnet_main_t *vnm = vnet_get_main ();
532 ip4_main_t *im = &ip4_main;
533 ip_lookup_main_t *lm = &im->lookup_main;
534 clib_error_t *error = 0;
535 u32 if_address_index, elts_before;
536 ip4_address_fib_t ip4_af, *addr_fib = 0;
538 /* local0 interface doesn't support IP addressing */
539 if (sw_if_index == 0)
542 clib_error_create ("local0 interface doesn't support IP addressing");
545 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
546 ip4_addr_fib_init (&ip4_af, address,
547 vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
548 vec_add1 (addr_fib, ip4_af);
551 * there is no support for adj-fib handling in the presence of overlapping
552 * subnets on interfaces. Easy fix - disallow overlapping subnets, like
558 /* When adding an address check that it does not conflict
559 with an existing address on any interface in this table. */
560 ip_interface_address_t *ia;
561 vnet_sw_interface_t *sif;
563 pool_foreach(sif, vnm->interface_main.sw_interfaces,
565 if (im->fib_index_by_sw_if_index[sw_if_index] ==
566 im->fib_index_by_sw_if_index[sif->sw_if_index])
568 foreach_ip_interface_address
569 (&im->lookup_main, ia, sif->sw_if_index,
570 0 /* honor unnumbered */ ,
573 ip_interface_address_get_address
574 (&im->lookup_main, ia);
575 if (ip4_destination_matches_route
576 (im, address, x, ia->address_length) ||
577 ip4_destination_matches_route (im,
582 vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
586 ("failed to add %U which conflicts with %U for interface %U",
587 format_ip4_address_and_length, address,
589 format_ip4_address_and_length, x,
591 format_vnet_sw_if_index_name, vnm,
600 elts_before = pool_elts (lm->if_address_pool);
602 error = ip_interface_address_add_del
603 (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
607 ip4_sw_interface_enable_disable (sw_if_index, !is_del);
609 /* intf addr routes are added/deleted on admin up/down */
610 if (vnet_sw_interface_is_admin_up (vnm, sw_if_index))
613 ip4_del_interface_routes (im, ip4_af.fib_index, address,
616 ip4_add_interface_routes (sw_if_index,
617 im, ip4_af.fib_index,
619 (lm->if_address_pool, if_address_index));
622 /* If pool did not grow/shrink: add duplicate address. */
623 if (elts_before != pool_elts (lm->if_address_pool))
625 ip4_add_del_interface_address_callback_t *cb;
626 vec_foreach (cb, im->add_del_interface_address_callbacks)
627 cb->function (im, cb->function_opaque, sw_if_index,
628 address, address_length, if_address_index, is_del);
637 ip4_add_del_interface_address (vlib_main_t * vm,
639 ip4_address_t * address,
640 u32 address_length, u32 is_del)
642 return ip4_add_del_interface_address_internal
643 (vm, sw_if_index, address, address_length, is_del);
647 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
649 ip_interface_address_t *ia;
655 * when directed broadcast is enabled, the subnet braodcast route will forward
656 * packets using an adjacency with a broadcast MAC. otherwise it drops
659 foreach_ip_interface_address(&im->lookup_main, ia,
662 if (ia->address_length <= 30)
666 ipa = ip_interface_address_get_address (&im->lookup_main, ia);
670 .fp_proto = FIB_PROTOCOL_IP4,
672 .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
676 ip4_add_subnet_bcast_route
677 (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
686 static clib_error_t *
687 ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
689 ip4_main_t *im = &ip4_main;
690 ip_interface_address_t *ia;
692 u32 is_admin_up, fib_index;
694 /* Fill in lookup tables with default table (0). */
695 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
697 vec_validate_init_empty (im->
698 lookup_main.if_address_pool_index_by_sw_if_index,
701 is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
703 fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
706 foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
707 0 /* honor unnumbered */,
709 a = ip_interface_address_get_address (&im->lookup_main, ia);
711 ip4_add_interface_routes (sw_if_index,
715 ip4_del_interface_routes (im, fib_index,
716 a, ia->address_length);
723 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
725 /* Built-in ip4 unicast rx feature path definition */
727 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
729 .arc_name = "ip4-unicast",
730 .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
731 .last_in_arc = "ip4-lookup",
732 .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
735 VNET_FEATURE_INIT (ip4_flow_classify, static) =
737 .arc_name = "ip4-unicast",
738 .node_name = "ip4-flow-classify",
739 .runs_before = VNET_FEATURES ("ip4-inacl"),
742 VNET_FEATURE_INIT (ip4_inacl, static) =
744 .arc_name = "ip4-unicast",
745 .node_name = "ip4-inacl",
746 .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
749 VNET_FEATURE_INIT (ip4_source_check_1, static) =
751 .arc_name = "ip4-unicast",
752 .node_name = "ip4-source-check-via-rx",
753 .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
756 VNET_FEATURE_INIT (ip4_source_check_2, static) =
758 .arc_name = "ip4-unicast",
759 .node_name = "ip4-source-check-via-any",
760 .runs_before = VNET_FEATURES ("ip4-policer-classify"),
763 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
765 .arc_name = "ip4-unicast",
766 .node_name = "ip4-source-and-port-range-check-rx",
767 .runs_before = VNET_FEATURES ("ip4-policer-classify"),
770 VNET_FEATURE_INIT (ip4_policer_classify, static) =
772 .arc_name = "ip4-unicast",
773 .node_name = "ip4-policer-classify",
774 .runs_before = VNET_FEATURES ("ipsec4-input-feature"),
777 VNET_FEATURE_INIT (ip4_ipsec, static) =
779 .arc_name = "ip4-unicast",
780 .node_name = "ipsec4-input-feature",
781 .runs_before = VNET_FEATURES ("vpath-input-ip4"),
784 VNET_FEATURE_INIT (ip4_vpath, static) =
786 .arc_name = "ip4-unicast",
787 .node_name = "vpath-input-ip4",
788 .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
791 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
793 .arc_name = "ip4-unicast",
794 .node_name = "ip4-vxlan-bypass",
795 .runs_before = VNET_FEATURES ("ip4-lookup"),
798 VNET_FEATURE_INIT (ip4_not_enabled, static) =
800 .arc_name = "ip4-unicast",
801 .node_name = "ip4-not-enabled",
802 .runs_before = VNET_FEATURES ("ip4-lookup"),
805 VNET_FEATURE_INIT (ip4_lookup, static) =
807 .arc_name = "ip4-unicast",
808 .node_name = "ip4-lookup",
809 .runs_before = 0, /* not before any other features */
812 /* Built-in ip4 multicast rx feature path definition */
813 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
815 .arc_name = "ip4-multicast",
816 .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
817 .last_in_arc = "ip4-mfib-forward-lookup",
818 .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
821 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
823 .arc_name = "ip4-multicast",
824 .node_name = "vpath-input-ip4",
825 .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
828 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
830 .arc_name = "ip4-multicast",
831 .node_name = "ip4-not-enabled",
832 .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
835 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
837 .arc_name = "ip4-multicast",
838 .node_name = "ip4-mfib-forward-lookup",
839 .runs_before = 0, /* last feature */
842 /* Source and port-range check ip4 tx feature path definition */
843 VNET_FEATURE_ARC_INIT (ip4_output, static) =
845 .arc_name = "ip4-output",
846 .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
847 .last_in_arc = "interface-output",
848 .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
851 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
853 .arc_name = "ip4-output",
854 .node_name = "ip4-source-and-port-range-check-tx",
855 .runs_before = VNET_FEATURES ("ip4-outacl"),
858 VNET_FEATURE_INIT (ip4_outacl, static) =
860 .arc_name = "ip4-output",
861 .node_name = "ip4-outacl",
862 .runs_before = VNET_FEATURES ("ipsec4-output-feature"),
865 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
867 .arc_name = "ip4-output",
868 .node_name = "ipsec4-output-feature",
869 .runs_before = VNET_FEATURES ("interface-output"),
872 /* Built-in ip4 tx feature path definition */
873 VNET_FEATURE_INIT (ip4_interface_output, static) =
875 .arc_name = "ip4-output",
876 .node_name = "interface-output",
877 .runs_before = 0, /* not before any other features */
881 static clib_error_t *
882 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
884 ip4_main_t *im = &ip4_main;
886 /* Fill in lookup tables with default table (0). */
887 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
888 vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
892 ip4_main_t *im4 = &ip4_main;
893 ip_lookup_main_t *lm4 = &im4->lookup_main;
894 ip_interface_address_t *ia = 0;
895 ip4_address_t *address;
896 vlib_main_t *vm = vlib_get_main ();
898 vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
900 foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
902 address = ip_interface_address_get_address (lm4, ia);
903 ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
908 vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
911 vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
912 sw_if_index, is_add, 0, 0);
914 return /* no error */ 0;
917 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
919 /* Global IP4 main. */
920 #ifndef CLIB_MARCH_VARIANT
922 #endif /* CLIB_MARCH_VARIANT */
924 static clib_error_t *
925 ip4_lookup_init (vlib_main_t * vm)
927 ip4_main_t *im = &ip4_main;
931 if ((error = vlib_call_init_function (vm, vnet_feature_init)))
933 if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
935 if ((error = vlib_call_init_function (vm, fib_module_init)))
937 if ((error = vlib_call_init_function (vm, mfib_module_init)))
940 for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
945 m = pow2_mask (i) << (32 - i);
948 im->fib_masks[i] = clib_host_to_net_u32 (m);
951 ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
953 /* Create FIB with index 0 and table id of 0. */
954 fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
955 FIB_SOURCE_DEFAULT_ROUTE);
956 mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
957 MFIB_SOURCE_DEFAULT_ROUTE);
961 pn = pg_get_node (ip4_lookup_node.index);
962 pn->unformat_edit = unformat_pg_ip4_header;
966 ethernet_arp_header_t h;
968 clib_memset (&h, 0, sizeof (h));
970 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
971 #define _8(f,v) h.f = v;
972 _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
973 _16 (l3_type, ETHERNET_TYPE_IP4);
974 _8 (n_l2_address_bytes, 6);
975 _8 (n_l3_address_bytes, 4);
976 _16 (opcode, ETHERNET_ARP_OPCODE_request);
980 vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
983 /* alloc chunk size */ 8,
990 VLIB_INIT_FUNCTION (ip4_lookup_init);
994 /* Adjacency taken. */
999 /* Packet data, possibly *after* rewrite. */
1000 u8 packet_data[64 - 1 * sizeof (u32)];
1002 ip4_forward_next_trace_t;
1004 #ifndef CLIB_MARCH_VARIANT
1006 format_ip4_forward_next_trace (u8 * s, va_list * args)
1008 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1009 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1010 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1011 u32 indent = format_get_indent (s);
1012 s = format (s, "%U%U",
1013 format_white_space, indent,
1014 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1020 format_ip4_lookup_trace (u8 * s, va_list * args)
1022 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1023 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1024 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1025 u32 indent = format_get_indent (s);
1027 s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1028 t->fib_index, t->dpo_index, t->flow_hash);
1029 s = format (s, "\n%U%U",
1030 format_white_space, indent,
1031 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1036 format_ip4_rewrite_trace (u8 * s, va_list * args)
1038 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1039 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1040 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1041 u32 indent = format_get_indent (s);
1043 s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1044 t->fib_index, t->dpo_index, format_ip_adjacency,
1045 t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1046 s = format (s, "\n%U%U",
1047 format_white_space, indent,
1048 format_ip_adjacency_packet_data,
1049 t->dpo_index, t->packet_data, sizeof (t->packet_data));
1053 #ifndef CLIB_MARCH_VARIANT
1054 /* Common trace function for all ip4-forward next nodes. */
1056 ip4_forward_next_trace (vlib_main_t * vm,
1057 vlib_node_runtime_t * node,
1058 vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1061 ip4_main_t *im = &ip4_main;
1063 n_left = frame->n_vectors;
1064 from = vlib_frame_vector_args (frame);
1069 vlib_buffer_t *b0, *b1;
1070 ip4_forward_next_trace_t *t0, *t1;
1072 /* Prefetch next iteration. */
1073 vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1074 vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1079 b0 = vlib_get_buffer (vm, bi0);
1080 b1 = vlib_get_buffer (vm, bi1);
1082 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1084 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1085 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1086 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1088 (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1089 (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1090 vec_elt (im->fib_index_by_sw_if_index,
1091 vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1093 clib_memcpy_fast (t0->packet_data,
1094 vlib_buffer_get_current (b0),
1095 sizeof (t0->packet_data));
1097 if (b1->flags & VLIB_BUFFER_IS_TRACED)
1099 t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1100 t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1101 t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1103 (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1104 (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1105 vec_elt (im->fib_index_by_sw_if_index,
1106 vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1107 clib_memcpy_fast (t1->packet_data, vlib_buffer_get_current (b1),
1108 sizeof (t1->packet_data));
1118 ip4_forward_next_trace_t *t0;
1122 b0 = vlib_get_buffer (vm, bi0);
1124 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1126 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1127 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1128 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1130 (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1131 (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1132 vec_elt (im->fib_index_by_sw_if_index,
1133 vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1134 clib_memcpy_fast (t0->packet_data, vlib_buffer_get_current (b0),
1135 sizeof (t0->packet_data));
1142 /* Compute TCP/UDP/ICMP4 checksum in software. */
1144 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1148 u32 ip_header_length, payload_length_host_byte_order;
1149 u32 n_this_buffer, n_bytes_left, n_ip_bytes_this_buffer;
1151 void *data_this_buffer;
1153 /* Initialize checksum with ip header. */
1154 ip_header_length = ip4_header_bytes (ip0);
1155 payload_length_host_byte_order =
1156 clib_net_to_host_u16 (ip0->length) - ip_header_length;
1158 clib_host_to_net_u32 (payload_length_host_byte_order +
1159 (ip0->protocol << 16));
1161 if (BITS (uword) == 32)
1164 ip_csum_with_carry (sum0,
1165 clib_mem_unaligned (&ip0->src_address, u32));
1167 ip_csum_with_carry (sum0,
1168 clib_mem_unaligned (&ip0->dst_address, u32));
1172 ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1174 n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1175 data_this_buffer = (void *) ip0 + ip_header_length;
1176 n_ip_bytes_this_buffer =
1177 p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data);
1178 if (n_this_buffer + ip_header_length > n_ip_bytes_this_buffer)
1180 n_this_buffer = n_ip_bytes_this_buffer > ip_header_length ?
1181 n_ip_bytes_this_buffer - ip_header_length : 0;
1185 sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1186 n_bytes_left -= n_this_buffer;
1187 if (n_bytes_left == 0)
1190 ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1191 if (!(p0->flags & VLIB_BUFFER_NEXT_PRESENT))
1194 p0 = vlib_get_buffer (vm, p0->next_buffer);
1195 data_this_buffer = vlib_buffer_get_current (p0);
1196 n_this_buffer = clib_min (p0->current_length, n_bytes_left);
1199 sum16 = ~ip_csum_fold (sum0);
1205 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1207 ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1211 ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1212 || ip0->protocol == IP_PROTOCOL_UDP);
1214 udp0 = (void *) (ip0 + 1);
1215 if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1217 p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1218 | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1222 sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1224 p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1225 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1232 VNET_FEATURE_ARC_INIT (ip4_local) =
1234 .arc_name = "ip4-local",
1235 .start_nodes = VNET_FEATURES ("ip4-local"),
1236 .last_in_arc = "ip4-local-end-of-arc",
1241 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1242 ip4_header_t * ip, u8 is_udp, u8 * error,
1246 flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1247 *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1251 u32 ip_len, udp_len;
1253 udp = ip4_next_header (ip);
1254 /* Verify UDP length. */
1255 ip_len = clib_net_to_host_u16 (ip->length);
1256 udp_len = clib_net_to_host_u16 (udp->length);
1258 len_diff = ip_len - udp_len;
1259 *good_tcp_udp &= len_diff >= 0;
1260 *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1264 #define ip4_local_csum_is_offloaded(_b) \
1265 _b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM \
1266 || _b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM
1268 #define ip4_local_need_csum_check(is_tcp_udp, _b) \
1269 (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED \
1270 || ip4_local_csum_is_offloaded (_b)))
1272 #define ip4_local_csum_is_valid(_b) \
1273 (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT \
1274 || (ip4_local_csum_is_offloaded (_b))) != 0
1277 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1278 ip4_header_t * ih, u8 * error)
1280 u8 is_udp, is_tcp_udp, good_tcp_udp;
1282 is_udp = ih->protocol == IP_PROTOCOL_UDP;
1283 is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1285 if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1286 ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1288 good_tcp_udp = ip4_local_csum_is_valid (b);
1290 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1291 *error = (is_tcp_udp && !good_tcp_udp
1292 ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1296 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1297 ip4_header_t ** ih, u8 * error)
1299 u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1301 is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1302 is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1304 is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1305 is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1307 good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1308 good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1310 if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1311 || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1314 ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1317 ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1321 error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1322 IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1323 error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1324 IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1328 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1329 vlib_buffer_t * b, u16 * next, u8 error,
1330 u8 head_of_feature_arc)
1332 u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1335 *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1336 b->error = error ? error_node->errors[error] : 0;
1337 if (head_of_feature_arc)
1340 if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1342 vnet_feature_arc_start (arc_index,
1343 vnet_buffer (b)->sw_if_index[VLIB_RX],
1356 } ip4_local_last_check_t;
1359 ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
1360 ip4_local_last_check_t * last_check, u8 * error0)
1362 ip4_fib_mtrie_leaf_t leaf0;
1363 ip4_fib_mtrie_t *mtrie0;
1364 const dpo_id_t *dpo0;
1365 load_balance_t *lb0;
1368 vnet_buffer (b)->ip.fib_index =
1369 vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1370 vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1373 * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1374 * adjacency for the destination address (the local interface address).
1375 * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1376 * adjacency for the source address (the remote sender's address)
1378 if (PREDICT_FALSE (last_check->first ||
1379 (last_check->src.as_u32 != ip0->src_address.as_u32)))
1381 mtrie0 = &ip4_fib_get (vnet_buffer (b)->ip.fib_index)->mtrie;
1382 leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1383 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1384 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1385 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1387 vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1388 vnet_buffer (b)->ip.adj_index[VLIB_TX];
1389 vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1391 lb0 = load_balance_get (lbi0);
1392 dpo0 = load_balance_get_bucket_i (lb0, 0);
1395 * Must have a route to source otherwise we drop the packet.
1396 * ip4 broadcasts are accepted, e.g. to make dhcp client work
1399 * - the source is a recieve => it's from us => bogus, do this
1400 * first since it sets a different error code.
1401 * - uRPF check for any route to source - accept if passes.
1402 * - allow packets destined to the broadcast address from unknown sources
1405 *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1406 && dpo0->dpoi_type == DPO_RECEIVE) ?
1407 IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1408 *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1409 && !fib_urpf_check_size (lb0->lb_urpf)
1410 && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1411 IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1413 last_check->src.as_u32 = ip0->src_address.as_u32;
1414 last_check->lbi = lbi0;
1415 last_check->error = *error0;
1419 vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1420 vnet_buffer (b)->ip.adj_index[VLIB_TX];
1421 vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1422 *error0 = last_check->error;
1423 last_check->first = 0;
1428 ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
1429 ip4_local_last_check_t * last_check, u8 * error)
1431 ip4_fib_mtrie_leaf_t leaf[2];
1432 ip4_fib_mtrie_t *mtrie[2];
1433 const dpo_id_t *dpo[2];
1434 load_balance_t *lb[2];
1438 not_last_hit = last_check->first;
1439 not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1440 not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1442 vnet_buffer (b[0])->ip.fib_index =
1443 vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1444 vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1445 vnet_buffer (b[0])->ip.fib_index;
1447 vnet_buffer (b[1])->ip.fib_index =
1448 vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1449 vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1450 vnet_buffer (b[1])->ip.fib_index;
1453 * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1454 * adjacency for the destination address (the local interface address).
1455 * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1456 * adjacency for the source address (the remote sender's address)
1458 if (PREDICT_FALSE (not_last_hit))
1460 mtrie[0] = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie;
1461 mtrie[1] = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie;
1463 leaf[0] = ip4_fib_mtrie_lookup_step_one (mtrie[0], &ip[0]->src_address);
1464 leaf[1] = ip4_fib_mtrie_lookup_step_one (mtrie[1], &ip[1]->src_address);
1466 leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1467 &ip[0]->src_address, 2);
1468 leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1469 &ip[1]->src_address, 2);
1471 leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1472 &ip[0]->src_address, 3);
1473 leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1474 &ip[1]->src_address, 3);
1476 lbi[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf[0]);
1477 lbi[1] = ip4_fib_mtrie_leaf_get_adj_index (leaf[1]);
1479 vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1480 vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1481 vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1483 vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1484 vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1485 vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1487 lb[0] = load_balance_get (lbi[0]);
1488 lb[1] = load_balance_get (lbi[1]);
1490 dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1491 dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1493 error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1494 dpo[0]->dpoi_type == DPO_RECEIVE) ?
1495 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1496 error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1497 !fib_urpf_check_size (lb[0]->lb_urpf) &&
1498 ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1499 ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1501 error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1502 dpo[1]->dpoi_type == DPO_RECEIVE) ?
1503 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1504 error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1505 !fib_urpf_check_size (lb[1]->lb_urpf) &&
1506 ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1507 ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1509 last_check->src.as_u32 = ip[1]->src_address.as_u32;
1510 last_check->lbi = lbi[1];
1511 last_check->error = error[1];
1515 vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1516 vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1517 vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1519 vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1520 vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1521 vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1523 error[0] = last_check->error;
1524 error[1] = last_check->error;
1525 last_check->first = 0;
1529 enum ip_local_packet_type_e
1531 IP_LOCAL_PACKET_TYPE_L4,
1532 IP_LOCAL_PACKET_TYPE_NAT,
1533 IP_LOCAL_PACKET_TYPE_FRAG,
1537 * Determine packet type and next node.
1539 * The expectation is that all packets that are not L4 will skip
1540 * checksums and source checks.
1543 ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
1545 ip_lookup_main_t *lm = &ip4_main.lookup_main;
1547 if (PREDICT_FALSE (ip4_is_fragment (ip)))
1549 *next = IP_LOCAL_NEXT_REASSEMBLY;
1550 return IP_LOCAL_PACKET_TYPE_FRAG;
1552 if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_IS_NATED))
1554 *next = lm->local_next_by_ip_protocol[ip->protocol];
1555 return IP_LOCAL_PACKET_TYPE_NAT;
1558 *next = lm->local_next_by_ip_protocol[ip->protocol];
1559 return IP_LOCAL_PACKET_TYPE_L4;
1563 ip4_local_inline (vlib_main_t * vm,
1564 vlib_node_runtime_t * node,
1565 vlib_frame_t * frame, int head_of_feature_arc)
1567 u32 *from, n_left_from;
1568 vlib_node_runtime_t *error_node =
1569 vlib_node_get_runtime (vm, ip4_input_node.index);
1570 u16 nexts[VLIB_FRAME_SIZE], *next;
1571 vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1572 ip4_header_t *ip[2];
1575 ip4_local_last_check_t last_check = {
1577 * 0.0.0.0 can appear as the source address of an IP packet,
1578 * as can any other address, hence the need to use the 'first'
1579 * member to make sure the .lbi is initialised for the first
1582 .src = {.as_u32 = 0},
1584 .error = IP4_ERROR_UNKNOWN_PROTOCOL,
1588 from = vlib_frame_vector_args (frame);
1589 n_left_from = frame->n_vectors;
1591 if (node->flags & VLIB_NODE_FLAG_TRACE)
1592 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1594 vlib_get_buffers (vm, from, bufs, n_left_from);
1598 while (n_left_from >= 6)
1602 /* Prefetch next iteration. */
1604 vlib_prefetch_buffer_header (b[4], LOAD);
1605 vlib_prefetch_buffer_header (b[5], LOAD);
1607 CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1608 CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1611 error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1613 ip[0] = vlib_buffer_get_current (b[0]);
1614 ip[1] = vlib_buffer_get_current (b[1]);
1616 vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1617 vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1619 pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1620 pt[1] = ip4_local_classify (b[1], ip[1], &next[1]);
1622 not_batch = pt[0] ^ pt[1];
1624 if (head_of_feature_arc == 0 || (pt[0] && not_batch == 0))
1627 if (PREDICT_TRUE (not_batch == 0))
1629 ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1630 ip4_local_check_src_x2 (b, ip, &last_check, error);
1636 ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1637 ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1641 ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1642 ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
1648 ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1649 head_of_feature_arc);
1650 ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1651 head_of_feature_arc);
1658 while (n_left_from > 0)
1660 error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1662 ip[0] = vlib_buffer_get_current (b[0]);
1663 vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1664 pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1666 if (head_of_feature_arc == 0 || pt[0])
1669 ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1670 ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1674 ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1675 head_of_feature_arc);
1682 vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1683 return frame->n_vectors;
1686 VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1687 vlib_frame_t * frame)
1689 return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1693 VLIB_REGISTER_NODE (ip4_local_node) =
1695 .name = "ip4-local",
1696 .vector_size = sizeof (u32),
1697 .format_trace = format_ip4_forward_next_trace,
1698 .n_next_nodes = IP_LOCAL_N_NEXT,
1701 [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1702 [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1703 [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1704 [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1705 [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-reassembly",
1711 VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
1712 vlib_node_runtime_t * node,
1713 vlib_frame_t * frame)
1715 return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1719 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
1720 .name = "ip4-local-end-of-arc",
1721 .vector_size = sizeof (u32),
1723 .format_trace = format_ip4_forward_next_trace,
1724 .sibling_of = "ip4-local",
1727 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1728 .arc_name = "ip4-local",
1729 .node_name = "ip4-local-end-of-arc",
1730 .runs_before = 0, /* not before any other features */
1734 #ifndef CLIB_MARCH_VARIANT
1736 ip4_register_protocol (u32 protocol, u32 node_index)
1738 vlib_main_t *vm = vlib_get_main ();
1739 ip4_main_t *im = &ip4_main;
1740 ip_lookup_main_t *lm = &im->lookup_main;
1742 ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1743 lm->local_next_by_ip_protocol[protocol] =
1744 vlib_node_add_next (vm, ip4_local_node.index, node_index);
1748 ip4_unregister_protocol (u32 protocol)
1750 ip4_main_t *im = &ip4_main;
1751 ip_lookup_main_t *lm = &im->lookup_main;
1753 ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1754 lm->local_next_by_ip_protocol[protocol] = IP_LOCAL_NEXT_PUNT;
1758 static clib_error_t *
1759 show_ip_local_command_fn (vlib_main_t * vm,
1760 unformat_input_t * input, vlib_cli_command_t * cmd)
1762 ip4_main_t *im = &ip4_main;
1763 ip_lookup_main_t *lm = &im->lookup_main;
1766 vlib_cli_output (vm, "Protocols handled by ip4_local");
1767 for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1769 if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1771 u32 node_index = vlib_get_node (vm,
1772 ip4_local_node.index)->
1773 next_nodes[lm->local_next_by_ip_protocol[i]];
1774 vlib_cli_output (vm, "%U: %U", format_ip_protocol, i,
1775 format_vlib_node_name, vm, node_index);
1784 * Display the set of protocols handled by the local IPv4 stack.
1787 * Example of how to display local protocol table:
1788 * @cliexstart{show ip local}
1789 * Protocols handled by ip4_local
1796 VLIB_CLI_COMMAND (show_ip_local, static) =
1798 .path = "show ip local",
1799 .function = show_ip_local_command_fn,
1800 .short_help = "show ip local",
1805 ip4_arp_inline (vlib_main_t * vm,
1806 vlib_node_runtime_t * node,
1807 vlib_frame_t * frame, int is_glean)
1809 vnet_main_t *vnm = vnet_get_main ();
1810 ip4_main_t *im = &ip4_main;
1811 ip_lookup_main_t *lm = &im->lookup_main;
1812 u32 *from, *to_next_drop;
1813 uword n_left_from, n_left_to_next_drop, next_index;
1814 u32 thread_index = vm->thread_index;
1817 if (node->flags & VLIB_NODE_FLAG_TRACE)
1818 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1820 seed = throttle_seed (&im->arp_throttle, thread_index, vlib_time_now (vm));
1822 from = vlib_frame_vector_args (frame);
1823 n_left_from = frame->n_vectors;
1824 next_index = node->cached_next_index;
1825 if (next_index == IP4_ARP_NEXT_DROP)
1826 next_index = IP4_ARP_N_NEXT; /* point to first interface */
1828 while (n_left_from > 0)
1830 vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1831 to_next_drop, n_left_to_next_drop);
1833 while (n_left_from > 0 && n_left_to_next_drop > 0)
1835 u32 pi0, bi0, adj_index0, sw_if_index0;
1836 ip_adjacency_t *adj0;
1837 vlib_buffer_t *p0, *b0;
1838 ip4_address_t resolve0;
1839 ethernet_arp_header_t *h0;
1840 vnet_hw_interface_t *hw_if0;
1844 p0 = vlib_get_buffer (vm, pi0);
1848 to_next_drop[0] = pi0;
1850 n_left_to_next_drop -= 1;
1852 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1853 adj0 = adj_get (adj_index0);
1857 /* resolve the packet's destination */
1858 ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1859 resolve0 = ip0->dst_address;
1863 /* resolve the incomplete adj */
1864 resolve0 = adj0->sub_type.nbr.next_hop.ip4;
1867 /* combine the address and interface for the hash key */
1868 sw_if_index0 = adj0->rewrite_header.sw_if_index;
1869 r0 = (u64) resolve0.data_u32 << 32;
1872 if (throttle_check (&im->arp_throttle, thread_index, r0, seed))
1874 p0->error = node->errors[IP4_ARP_ERROR_THROTTLED];
1879 * the adj has been updated to a rewrite but the node the DPO that got
1880 * us here hasn't - yet. no big deal. we'll drop while we wait.
1882 if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
1884 p0->error = node->errors[IP4_ARP_ERROR_RESOLVED];
1889 * Can happen if the control-plane is programming tables
1890 * with traffic flowing; at least that's today's lame excuse.
1892 if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
1893 || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
1895 p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
1898 /* Send ARP request. */
1900 vlib_packet_template_get_packet (vm,
1901 &im->ip4_arp_request_packet_template,
1903 /* Seems we're out of buffers */
1904 if (PREDICT_FALSE (!h0))
1906 p0->error = node->errors[IP4_ARP_ERROR_NO_BUFFERS];
1910 b0 = vlib_get_buffer (vm, bi0);
1912 /* copy the persistent fields from the original */
1913 clib_memcpy_fast (b0->opaque2, p0->opaque2, sizeof (p0->opaque2));
1915 /* Add rewrite/encap string for ARP packet. */
1916 vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
1918 hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1920 /* Src ethernet address in ARP header. */
1921 mac_address_from_bytes (&h0->ip4_over_ethernet[0].mac,
1922 hw_if0->hw_address);
1925 /* The interface's source address is stashed in the Glean Adj */
1926 h0->ip4_over_ethernet[0].ip4 =
1927 adj0->sub_type.glean.receive_addr.ip4;
1931 /* Src IP address in ARP header. */
1932 if (ip4_src_address_for_packet (lm, sw_if_index0,
1933 &h0->ip4_over_ethernet[0].ip4))
1935 /* No source address available */
1936 p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
1937 vlib_buffer_free (vm, &bi0, 1);
1941 h0->ip4_over_ethernet[1].ip4 = resolve0;
1943 p0->error = node->errors[IP4_ARP_ERROR_REQUEST_SENT];
1945 vlib_buffer_copy_trace_flag (vm, p0, bi0);
1946 VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
1947 vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
1949 vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
1951 vlib_set_next_frame_buffer (vm, node,
1952 adj0->rewrite_header.next_index, bi0);
1955 vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
1958 return frame->n_vectors;
1961 VLIB_NODE_FN (ip4_arp_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1962 vlib_frame_t * frame)
1964 return (ip4_arp_inline (vm, node, frame, 0));
1967 VLIB_NODE_FN (ip4_glean_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1968 vlib_frame_t * frame)
1970 return (ip4_arp_inline (vm, node, frame, 1));
1973 static char *ip4_arp_error_strings[] = {
1974 [IP4_ARP_ERROR_THROTTLED] = "ARP requests throttled",
1975 [IP4_ARP_ERROR_RESOLVED] = "ARP requests resolved",
1976 [IP4_ARP_ERROR_NO_BUFFERS] = "ARP requests out of buffer",
1977 [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
1978 [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
1979 [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
1983 VLIB_REGISTER_NODE (ip4_arp_node) =
1986 .vector_size = sizeof (u32),
1987 .format_trace = format_ip4_forward_next_trace,
1988 .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1989 .error_strings = ip4_arp_error_strings,
1990 .n_next_nodes = IP4_ARP_N_NEXT,
1993 [IP4_ARP_NEXT_DROP] = "error-drop",
1997 VLIB_REGISTER_NODE (ip4_glean_node) =
1999 .name = "ip4-glean",
2000 .vector_size = sizeof (u32),
2001 .format_trace = format_ip4_forward_next_trace,
2002 .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2003 .error_strings = ip4_arp_error_strings,
2004 .n_next_nodes = IP4_ARP_N_NEXT,
2006 [IP4_ARP_NEXT_DROP] = "error-drop",
2011 #define foreach_notrace_ip4_arp_error \
2017 _(NO_SOURCE_ADDRESS)
2019 static clib_error_t *
2020 arp_notrace_init (vlib_main_t * vm)
2022 vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
2024 /* don't trace ARP request packets */
2026 vnet_pcap_drop_trace_filter_add_del \
2027 (rt->errors[IP4_ARP_ERROR_##a], \
2029 foreach_notrace_ip4_arp_error;
2034 VLIB_INIT_FUNCTION (arp_notrace_init);
2037 #ifndef CLIB_MARCH_VARIANT
2038 /* Send an ARP request to see if given destination is reachable on given interface. */
2040 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index,
2043 vnet_main_t *vnm = vnet_get_main ();
2044 ip4_main_t *im = &ip4_main;
2045 ethernet_arp_header_t *h;
2047 ip_interface_address_t *ia;
2048 ip_adjacency_t *adj;
2049 vnet_hw_interface_t *hi;
2050 vnet_sw_interface_t *si;
2054 u8 unicast_rewrite = 0;
2056 si = vnet_get_sw_interface (vnm, sw_if_index);
2058 if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2060 return clib_error_return (0, "%U: interface %U down",
2061 format_ip4_address, dst,
2062 format_vnet_sw_if_index_name, vnm,
2067 ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2070 vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2071 return clib_error_return
2073 "no matching interface address for destination %U (interface %U)",
2074 format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
2078 h = vlib_packet_template_get_packet (vm,
2079 &im->ip4_arp_request_packet_template,
2083 return clib_error_return (0, "ARP request packet allocation failed");
2085 hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2086 if (PREDICT_FALSE (!hi->hw_address))
2088 return clib_error_return (0, "%U: interface %U do not support ip probe",
2089 format_ip4_address, dst,
2090 format_vnet_sw_if_index_name, vnm,
2094 mac_address_from_bytes (&h->ip4_over_ethernet[0].mac, hi->hw_address);
2096 h->ip4_over_ethernet[0].ip4 = src[0];
2097 h->ip4_over_ethernet[1].ip4 = dst[0];
2099 b = vlib_get_buffer (vm, bi);
2100 vnet_buffer (b)->sw_if_index[VLIB_RX] =
2101 vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2103 ip46_address_t nh = {
2107 ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4,
2108 VNET_LINK_IP4, &nh, sw_if_index);
2111 /* Peer has been previously resolved, retrieve glean adj instead */
2112 if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE)
2115 unicast_rewrite = 1;
2119 ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4,
2120 VNET_LINK_IP4, sw_if_index, &nh);
2125 /* Add encapsulation string for software interface (e.g. ethernet header). */
2126 vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2127 if (unicast_rewrite)
2129 u16 *etype = vlib_buffer_get_current (b) - 2;
2130 etype[0] = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
2132 vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2135 vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2136 u32 *to_next = vlib_frame_vector_args (f);
2139 vlib_put_frame_to_node (vm, hi->output_node_index, f);
2143 return /* no error */ 0;
2149 IP4_REWRITE_NEXT_DROP,
2150 IP4_REWRITE_NEXT_ICMP_ERROR,
2151 IP4_REWRITE_NEXT_FRAGMENT,
2152 IP4_REWRITE_N_NEXT /* Last */
2153 } ip4_rewrite_next_t;
2156 * This bits of an IPv4 address to mask to construct a multicast
2159 #if CLIB_ARCH_IS_BIG_ENDIAN
2160 #define IP4_MCAST_ADDR_MASK 0x007fffff
2162 #define IP4_MCAST_ADDR_MASK 0xffff7f00
2166 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
2167 u16 adj_packet_bytes, bool df, u16 * next, u32 * error)
2169 if (packet_len > adj_packet_bytes)
2171 *error = IP4_ERROR_MTU_EXCEEDED;
2174 icmp4_error_set_vnet_buffer
2175 (b, ICMP4_destination_unreachable,
2176 ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
2178 *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2182 /* IP fragmentation */
2183 ip_frag_set_vnet_buffer (b, adj_packet_bytes,
2184 IP4_FRAG_NEXT_IP4_REWRITE, 0);
2185 *next = IP4_REWRITE_NEXT_FRAGMENT;
2190 /* Decrement TTL & update checksum.
2191 Works either endian, so no need for byte swap. */
2192 static_always_inline void
2193 ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
2198 if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2200 b->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2206 /* Input node should have reject packets with ttl 0. */
2207 ASSERT (ip->ttl > 0);
2209 checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
2210 checksum += checksum >= 0xffff;
2212 ip->checksum = checksum;
2217 * If the ttl drops below 1 when forwarding, generate
2220 if (PREDICT_FALSE (ttl <= 0))
2222 *error = IP4_ERROR_TIME_EXPIRED;
2223 vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2224 icmp4_error_set_vnet_buffer (b, ICMP4_time_exceeded,
2225 ICMP4_time_exceeded_ttl_exceeded_in_transit,
2227 *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2230 /* Verify checksum. */
2231 ASSERT ((ip->checksum == ip4_header_checksum (ip)) ||
2232 (b->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2237 ip4_rewrite_inline_with_gso (vlib_main_t * vm,
2238 vlib_node_runtime_t * node,
2239 vlib_frame_t * frame,
2240 int do_counters, int is_midchain, int is_mcast,
2243 ip_lookup_main_t *lm = &ip4_main.lookup_main;
2244 u32 *from = vlib_frame_vector_args (frame);
2245 vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2246 u16 nexts[VLIB_FRAME_SIZE], *next;
2248 vlib_node_runtime_t *error_node =
2249 vlib_node_get_runtime (vm, ip4_input_node.index);
2251 n_left_from = frame->n_vectors;
2252 u32 thread_index = vm->thread_index;
2254 vlib_get_buffers (vm, from, bufs, n_left_from);
2255 clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
2257 if (n_left_from >= 6)
2260 for (i = 2; i < 6; i++)
2261 vlib_prefetch_buffer_header (bufs[i], LOAD);
2266 while (n_left_from >= 8)
2268 ip_adjacency_t *adj0, *adj1;
2269 ip4_header_t *ip0, *ip1;
2270 u32 rw_len0, error0, adj_index0;
2271 u32 rw_len1, error1, adj_index1;
2272 u32 tx_sw_if_index0, tx_sw_if_index1;
2275 vlib_prefetch_buffer_header (b[6], LOAD);
2276 vlib_prefetch_buffer_header (b[7], LOAD);
2278 adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2279 adj_index1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
2282 * pre-fetch the per-adjacency counters
2286 vlib_prefetch_combined_counter (&adjacency_counters,
2287 thread_index, adj_index0);
2288 vlib_prefetch_combined_counter (&adjacency_counters,
2289 thread_index, adj_index1);
2292 ip0 = vlib_buffer_get_current (b[0]);
2293 ip1 = vlib_buffer_get_current (b[1]);
2295 error0 = error1 = IP4_ERROR_NONE;
2297 ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2298 ip4_ttl_and_checksum_check (b[1], ip1, next + 1, &error1);
2300 /* Rewrite packet header and updates lengths. */
2301 adj0 = adj_get (adj_index0);
2302 adj1 = adj_get (adj_index1);
2304 /* Worth pipelining. No guarantee that adj0,1 are hot... */
2305 rw_len0 = adj0[0].rewrite_header.data_bytes;
2306 rw_len1 = adj1[0].rewrite_header.data_bytes;
2307 vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2308 vnet_buffer (b[1])->ip.save_rewrite_length = rw_len1;
2310 p = vlib_buffer_get_current (b[2]);
2311 CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2312 CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2314 p = vlib_buffer_get_current (b[3]);
2315 CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2316 CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2318 /* Check MTU of outgoing interface. */
2319 u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2320 u16 ip1_len = clib_net_to_host_u16 (ip1->length);
2322 if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO))
2323 ip0_len = gso_mtu_sz (b[0]);
2324 if (do_gso && (b[1]->flags & VNET_BUFFER_F_GSO))
2325 ip1_len = gso_mtu_sz (b[1]);
2327 ip4_mtu_check (b[0], ip0_len,
2328 adj0[0].rewrite_header.max_l3_packet_bytes,
2329 ip0->flags_and_fragment_offset &
2330 clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2332 ip4_mtu_check (b[1], ip1_len,
2333 adj1[0].rewrite_header.max_l3_packet_bytes,
2334 ip1->flags_and_fragment_offset &
2335 clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2340 error0 = ((adj0[0].rewrite_header.sw_if_index ==
2341 vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2342 IP4_ERROR_SAME_INTERFACE : error0);
2343 error1 = ((adj1[0].rewrite_header.sw_if_index ==
2344 vnet_buffer (b[1])->sw_if_index[VLIB_RX]) ?
2345 IP4_ERROR_SAME_INTERFACE : error1);
2348 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2349 * to see the IP header */
2350 if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2352 u32 next_index = adj0[0].rewrite_header.next_index;
2353 b[0]->current_data -= rw_len0;
2354 b[0]->current_length += rw_len0;
2355 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2356 vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2359 (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2360 vnet_feature_arc_start (lm->output_feature_arc_index,
2361 tx_sw_if_index0, &next_index, b[0]);
2362 next[0] = next_index;
2366 b[0]->error = error_node->errors[error0];
2368 if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2370 u32 next_index = adj1[0].rewrite_header.next_index;
2371 b[1]->current_data -= rw_len1;
2372 b[1]->current_length += rw_len1;
2374 tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2375 vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2378 (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2379 vnet_feature_arc_start (lm->output_feature_arc_index,
2380 tx_sw_if_index1, &next_index, b[1]);
2381 next[1] = next_index;
2385 b[1]->error = error_node->errors[error1];
2389 calc_checksums (vm, b[0]);
2390 calc_checksums (vm, b[1]);
2392 /* Guess we are only writing on simple Ethernet header. */
2393 vnet_rewrite_two_headers (adj0[0], adj1[0],
2394 ip0, ip1, sizeof (ethernet_header_t));
2397 * Bump the per-adjacency counters
2401 vlib_increment_combined_counter
2402 (&adjacency_counters,
2404 adj_index0, 1, vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2406 vlib_increment_combined_counter
2407 (&adjacency_counters,
2409 adj_index1, 1, vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
2414 if (adj0->sub_type.midchain.fixup_func)
2415 adj0->sub_type.midchain.fixup_func
2416 (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2417 if (adj1->sub_type.midchain.fixup_func)
2418 adj1->sub_type.midchain.fixup_func
2419 (vm, adj1, b[1], adj1->sub_type.midchain.fixup_data);
2425 * copy bytes from the IP address into the MAC rewrite
2427 vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2428 adj0->rewrite_header.dst_mcast_offset,
2429 &ip0->dst_address.as_u32, (u8 *) ip0);
2430 vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2431 adj1->rewrite_header.dst_mcast_offset,
2432 &ip1->dst_address.as_u32, (u8 *) ip1);
2440 while (n_left_from > 0)
2442 ip_adjacency_t *adj0;
2444 u32 rw_len0, adj_index0, error0;
2445 u32 tx_sw_if_index0;
2447 adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2449 adj0 = adj_get (adj_index0);
2452 vlib_prefetch_combined_counter (&adjacency_counters,
2453 thread_index, adj_index0);
2455 ip0 = vlib_buffer_get_current (b[0]);
2457 error0 = IP4_ERROR_NONE;
2459 ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2462 /* Update packet buffer attributes/set output interface. */
2463 rw_len0 = adj0[0].rewrite_header.data_bytes;
2464 vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2466 /* Check MTU of outgoing interface. */
2467 u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2468 if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO))
2469 ip0_len = gso_mtu_sz (b[0]);
2471 ip4_mtu_check (b[0], ip0_len,
2472 adj0[0].rewrite_header.max_l3_packet_bytes,
2473 ip0->flags_and_fragment_offset &
2474 clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2479 error0 = ((adj0[0].rewrite_header.sw_if_index ==
2480 vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2481 IP4_ERROR_SAME_INTERFACE : error0);
2484 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2485 * to see the IP header */
2486 if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2488 u32 next_index = adj0[0].rewrite_header.next_index;
2489 b[0]->current_data -= rw_len0;
2490 b[0]->current_length += rw_len0;
2491 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2492 vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2495 (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2496 vnet_feature_arc_start (lm->output_feature_arc_index,
2497 tx_sw_if_index0, &next_index, b[0]);
2498 next[0] = next_index;
2502 b[0]->error = error_node->errors[error0];
2506 calc_checksums (vm, b[0]);
2508 /* Guess we are only writing on simple Ethernet header. */
2509 vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2512 vlib_increment_combined_counter
2513 (&adjacency_counters,
2514 thread_index, adj_index0, 1,
2515 vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2519 if (adj0->sub_type.midchain.fixup_func)
2520 adj0->sub_type.midchain.fixup_func
2521 (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2527 * copy bytes from the IP address into the MAC rewrite
2529 vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2530 adj0->rewrite_header.dst_mcast_offset,
2531 &ip0->dst_address.as_u32, (u8 *) ip0);
2540 /* Need to do trace after rewrites to pick up new packet data. */
2541 if (node->flags & VLIB_NODE_FLAG_TRACE)
2542 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2544 vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2545 return frame->n_vectors;
2549 ip4_rewrite_inline (vlib_main_t * vm,
2550 vlib_node_runtime_t * node,
2551 vlib_frame_t * frame,
2552 int do_counters, int is_midchain, int is_mcast)
2554 vnet_main_t *vnm = vnet_get_main ();
2555 if (PREDICT_FALSE (vnm->interface_main.gso_interface_count > 0))
2556 return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2557 is_midchain, is_mcast,
2560 return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2561 is_midchain, is_mcast,
2562 0 /* no do_gso */ );
2566 /** @brief IPv4 rewrite node.
2569 This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2570 header checksum, fetch the ip adjacency, check the outbound mtu,
2571 apply the adjacency rewrite, and send pkts to the adjacency
2572 rewrite header's rewrite_next_index.
2574 @param vm vlib_main_t corresponding to the current thread
2575 @param node vlib_node_runtime_t
2576 @param frame vlib_frame_t whose contents should be dispatched
2578 @par Graph mechanics: buffer metadata, next index usage
2581 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2582 - the rewrite adjacency index
2583 - <code>adj->lookup_next_index</code>
2584 - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2585 the packet will be dropped.
2586 - <code>adj->rewrite_header</code>
2587 - Rewrite string length, rewrite string, next_index
2590 - <code>b->current_data, b->current_length</code>
2591 - Updated net of applying the rewrite string
2593 <em>Next Indices:</em>
2594 - <code> adj->rewrite_header.next_index </code>
2598 VLIB_NODE_FN (ip4_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2599 vlib_frame_t * frame)
2601 if (adj_are_counters_enabled ())
2602 return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2604 return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2607 VLIB_NODE_FN (ip4_rewrite_bcast_node) (vlib_main_t * vm,
2608 vlib_node_runtime_t * node,
2609 vlib_frame_t * frame)
2611 if (adj_are_counters_enabled ())
2612 return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2614 return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2617 VLIB_NODE_FN (ip4_midchain_node) (vlib_main_t * vm,
2618 vlib_node_runtime_t * node,
2619 vlib_frame_t * frame)
2621 if (adj_are_counters_enabled ())
2622 return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2624 return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2627 VLIB_NODE_FN (ip4_rewrite_mcast_node) (vlib_main_t * vm,
2628 vlib_node_runtime_t * node,
2629 vlib_frame_t * frame)
2631 if (adj_are_counters_enabled ())
2632 return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2634 return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2637 VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
2638 vlib_node_runtime_t * node,
2639 vlib_frame_t * frame)
2641 if (adj_are_counters_enabled ())
2642 return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2644 return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2648 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2649 .name = "ip4-rewrite",
2650 .vector_size = sizeof (u32),
2652 .format_trace = format_ip4_rewrite_trace,
2654 .n_next_nodes = IP4_REWRITE_N_NEXT,
2656 [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2657 [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2658 [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2662 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2663 .name = "ip4-rewrite-bcast",
2664 .vector_size = sizeof (u32),
2666 .format_trace = format_ip4_rewrite_trace,
2667 .sibling_of = "ip4-rewrite",
2670 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2671 .name = "ip4-rewrite-mcast",
2672 .vector_size = sizeof (u32),
2674 .format_trace = format_ip4_rewrite_trace,
2675 .sibling_of = "ip4-rewrite",
2678 VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
2679 .name = "ip4-mcast-midchain",
2680 .vector_size = sizeof (u32),
2682 .format_trace = format_ip4_rewrite_trace,
2683 .sibling_of = "ip4-rewrite",
2686 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2687 .name = "ip4-midchain",
2688 .vector_size = sizeof (u32),
2689 .format_trace = format_ip4_forward_next_trace,
2690 .sibling_of = "ip4-rewrite",
2695 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2697 ip4_fib_mtrie_t *mtrie0;
2698 ip4_fib_mtrie_leaf_t leaf0;
2701 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2703 leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2704 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2705 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2707 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2709 return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2712 static clib_error_t *
2713 test_lookup_command_fn (vlib_main_t * vm,
2714 unformat_input_t * input, vlib_cli_command_t * cmd)
2721 ip4_address_t ip4_base_address;
2724 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2726 if (unformat (input, "table %d", &table_id))
2728 /* Make sure the entry exists. */
2729 fib = ip4_fib_get (table_id);
2730 if ((fib) && (fib->index != table_id))
2731 return clib_error_return (0, "<fib-index> %d does not exist",
2734 else if (unformat (input, "count %f", &count))
2737 else if (unformat (input, "%U",
2738 unformat_ip4_address, &ip4_base_address))
2741 return clib_error_return (0, "unknown input `%U'",
2742 format_unformat_error, input);
2747 for (i = 0; i < n; i++)
2749 if (!ip4_lookup_validate (&ip4_base_address, table_id))
2752 ip4_base_address.as_u32 =
2753 clib_host_to_net_u32 (1 +
2754 clib_net_to_host_u32 (ip4_base_address.as_u32));
2758 vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2760 vlib_cli_output (vm, "No errors in %d lookups\n", n);
2766 * Perform a lookup of an IPv4 Address (or range of addresses) in the
2767 * given FIB table to determine if there is a conflict with the
2768 * adjacency table. The fib-id can be determined by using the
2769 * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2772 * @todo This command uses fib-id, other commands use table-id (not
2773 * just a name, they are different indexes). Would like to change this
2774 * to table-id for consistency.
2777 * Example of how to run the test lookup command:
2778 * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2779 * No errors in 2 lookups
2783 VLIB_CLI_COMMAND (lookup_test_command, static) =
2785 .path = "test lookup",
2786 .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2787 .function = test_lookup_command_fn,
2791 #ifndef CLIB_MARCH_VARIANT
2793 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2797 fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
2799 if (~0 == fib_index)
2800 return VNET_API_ERROR_NO_SUCH_FIB;
2802 fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
2809 static clib_error_t *
2810 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2811 unformat_input_t * input,
2812 vlib_cli_command_t * cmd)
2816 u32 flow_hash_config = 0;
2819 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2821 if (unformat (input, "table %d", &table_id))
2824 else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2825 foreach_flow_hash_bit
2832 return clib_error_return (0, "unknown input `%U'",
2833 format_unformat_error, input);
2835 rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
2841 case VNET_API_ERROR_NO_SUCH_FIB:
2842 return clib_error_return (0, "no such FIB table %d", table_id);
2845 clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2853 * Configure the set of IPv4 fields used by the flow hash.
2856 * Example of how to set the flow hash on a given table:
2857 * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2858 * Example of display the configured flow hash:
2859 * @cliexstart{show ip fib}
2860 * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2863 * [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2864 * [0] [@0]: dpo-drop ip6
2867 * [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2868 * [0] [@0]: dpo-drop ip6
2871 * [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2872 * [0] [@0]: dpo-drop ip6
2875 * [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2876 * [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2879 * [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
2880 * [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2881 * [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2882 * [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2883 * [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2886 * [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
2887 * [0] [@0]: dpo-drop ip6
2888 * 255.255.255.255/32
2890 * [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
2891 * [0] [@0]: dpo-drop ip6
2892 * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
2895 * [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
2896 * [0] [@0]: dpo-drop ip6
2899 * [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
2900 * [0] [@0]: dpo-drop ip6
2903 * [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
2904 * [0] [@4]: ipv4-glean: af_packet0
2907 * [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
2908 * [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
2911 * [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2912 * [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
2915 * [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
2916 * [0] [@4]: ipv4-glean: af_packet1
2919 * [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
2920 * [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
2923 * [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
2924 * [0] [@0]: dpo-drop ip6
2927 * [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
2928 * [0] [@0]: dpo-drop ip6
2929 * 255.255.255.255/32
2931 * [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
2932 * [0] [@0]: dpo-drop ip6
2936 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
2938 .path = "set ip flow-hash",
2940 "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
2941 .function = set_ip_flow_hash_command_fn,
2945 #ifndef CLIB_MARCH_VARIANT
2947 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2950 vnet_main_t *vnm = vnet_get_main ();
2951 vnet_interface_main_t *im = &vnm->interface_main;
2952 ip4_main_t *ipm = &ip4_main;
2953 ip_lookup_main_t *lm = &ipm->lookup_main;
2954 vnet_classify_main_t *cm = &vnet_classify_main;
2955 ip4_address_t *if_addr;
2957 if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2958 return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2960 if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2961 return VNET_API_ERROR_NO_SUCH_ENTRY;
2963 vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2964 lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
2966 if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
2968 if (NULL != if_addr)
2970 fib_prefix_t pfx = {
2972 .fp_proto = FIB_PROTOCOL_IP4,
2973 .fp_addr.ip4 = *if_addr,
2977 fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2981 if (table_index != (u32) ~ 0)
2983 dpo_id_t dpo = DPO_INVALID;
2988 classify_dpo_create (DPO_PROTO_IP4, table_index));
2990 fib_table_entry_special_dpo_add (fib_index,
2992 FIB_SOURCE_CLASSIFY,
2993 FIB_ENTRY_FLAG_NONE, &dpo);
2998 fib_table_entry_special_remove (fib_index,
2999 &pfx, FIB_SOURCE_CLASSIFY);
3007 static clib_error_t *
3008 set_ip_classify_command_fn (vlib_main_t * vm,
3009 unformat_input_t * input,
3010 vlib_cli_command_t * cmd)
3012 u32 table_index = ~0;
3013 int table_index_set = 0;
3014 u32 sw_if_index = ~0;
3017 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3019 if (unformat (input, "table-index %d", &table_index))
3020 table_index_set = 1;
3021 else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3022 vnet_get_main (), &sw_if_index))
3028 if (table_index_set == 0)
3029 return clib_error_return (0, "classify table-index must be specified");
3031 if (sw_if_index == ~0)
3032 return clib_error_return (0, "interface / subif must be specified");
3034 rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3041 case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3042 return clib_error_return (0, "No such interface");
3044 case VNET_API_ERROR_NO_SUCH_ENTRY:
3045 return clib_error_return (0, "No such classifier table");
3051 * Assign a classification table to an interface. The classification
3052 * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3053 * commands. Once the table is create, use this command to filter packets
3057 * Example of how to assign a classification table to an interface:
3058 * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3061 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
3063 .path = "set ip classify",
3065 "set ip classify intfc <interface> table-index <classify-idx>",
3066 .function = set_ip_classify_command_fn,
3070 static clib_error_t *
3071 ip4_config (vlib_main_t * vm, unformat_input_t * input)
3073 ip4_main_t *im = &ip4_main;
3076 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3078 if (unformat (input, "heap-size %U", unformat_memory_size, &heapsize))
3081 return clib_error_return (0,
3082 "invalid heap-size parameter `%U'",
3083 format_unformat_error, input);
3086 im->mtrie_heap_size = heapsize;
3091 VLIB_EARLY_CONFIG_FUNCTION (ip4_config, "ip");
3094 * fd.io coding-style-patch-verification: ON
3097 * eval: (c-set-style "gnu")