2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * ip/ip4_forward.c: IP v4 forwarding
18 * Copyright (c) 2008 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h> /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h> /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h> /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h> /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/dpo/load_balance.h>
53 #include <vnet/dpo/load_balance_map.h>
54 #include <vnet/dpo/classify_dpo.h>
55 #include <vnet/mfib/mfib_table.h> /* for mFIB table and entry creation */
57 #include <vnet/ip/ip4_forward.h>
59 /** @brief IPv4 lookup node.
62 This is the main IPv4 lookup dispatch node.
64 @param vm vlib_main_t corresponding to the current thread
65 @param node vlib_node_runtime_t
66 @param frame vlib_frame_t whose contents should be dispatched
68 @par Graph mechanics: buffer metadata, next index usage
71 - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
72 - Indicates the @c sw_if_index value of the interface that the
73 packet was received on.
74 - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
75 - When the value is @c ~0 then the node performs a longest prefix
76 match (LPM) for the packet destination address in the FIB attached
77 to the receive interface.
78 - Otherwise perform LPM for the packet destination address in the
79 indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
80 value (0, 1, ...) and not a VRF id.
83 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
84 - The lookup result adjacency index.
87 - Dispatches the packet to the node index found in
88 ip_adjacency_t @c adj->lookup_next_index
89 (where @c adj is the lookup result adjacency).
92 ip4_lookup (vlib_main_t * vm,
93 vlib_node_runtime_t * node, vlib_frame_t * frame)
95 return ip4_lookup_inline (vm, node, frame,
96 /* lookup_for_responses_to_locally_received_packets */
101 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
104 VLIB_REGISTER_NODE (ip4_lookup_node) =
106 .function = ip4_lookup,
107 .name = "ip4-lookup",
108 .vector_size = sizeof (u32),
109 .format_trace = format_ip4_lookup_trace,
110 .n_next_nodes = IP_LOOKUP_N_NEXT,
111 .next_nodes = IP4_LOOKUP_NEXT_NODES,
115 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup);
118 ip4_load_balance (vlib_main_t * vm,
119 vlib_node_runtime_t * node, vlib_frame_t * frame)
121 vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
122 u32 n_left_from, n_left_to_next, *from, *to_next;
123 ip_lookup_next_t next;
124 u32 thread_index = vm->thread_index;
126 from = vlib_frame_vector_args (frame);
127 n_left_from = frame->n_vectors;
128 next = node->cached_next_index;
130 if (node->flags & VLIB_NODE_FLAG_TRACE)
131 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
133 while (n_left_from > 0)
135 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
138 while (n_left_from >= 4 && n_left_to_next >= 2)
140 ip_lookup_next_t next0, next1;
141 const load_balance_t *lb0, *lb1;
142 vlib_buffer_t *p0, *p1;
143 u32 pi0, lbi0, hc0, pi1, lbi1, hc1;
144 const ip4_header_t *ip0, *ip1;
145 const dpo_id_t *dpo0, *dpo1;
147 /* Prefetch next iteration. */
149 vlib_buffer_t *p2, *p3;
151 p2 = vlib_get_buffer (vm, from[2]);
152 p3 = vlib_get_buffer (vm, from[3]);
154 vlib_prefetch_buffer_header (p2, STORE);
155 vlib_prefetch_buffer_header (p3, STORE);
157 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
158 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
161 pi0 = to_next[0] = from[0];
162 pi1 = to_next[1] = from[1];
169 p0 = vlib_get_buffer (vm, pi0);
170 p1 = vlib_get_buffer (vm, pi1);
172 ip0 = vlib_buffer_get_current (p0);
173 ip1 = vlib_buffer_get_current (p1);
174 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
175 lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
177 lb0 = load_balance_get (lbi0);
178 lb1 = load_balance_get (lbi1);
181 * this node is for via FIBs we can re-use the hash value from the
182 * to node if present.
183 * We don't want to use the same hash value at each level in the recursion
184 * graph as that would lead to polarisation
188 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
190 if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
192 hc0 = vnet_buffer (p0)->ip.flow_hash =
193 vnet_buffer (p0)->ip.flow_hash >> 1;
197 hc0 = vnet_buffer (p0)->ip.flow_hash =
198 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
200 dpo0 = load_balance_get_fwd_bucket
201 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
205 dpo0 = load_balance_get_bucket_i (lb0, 0);
207 if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
209 if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash))
211 hc1 = vnet_buffer (p1)->ip.flow_hash =
212 vnet_buffer (p1)->ip.flow_hash >> 1;
216 hc1 = vnet_buffer (p1)->ip.flow_hash =
217 ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
219 dpo1 = load_balance_get_fwd_bucket
220 (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
224 dpo1 = load_balance_get_bucket_i (lb1, 0);
227 next0 = dpo0->dpoi_next_node;
228 next1 = dpo1->dpoi_next_node;
230 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
231 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
233 vlib_increment_combined_counter
234 (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
235 vlib_increment_combined_counter
236 (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
238 vlib_validate_buffer_enqueue_x2 (vm, node, next,
239 to_next, n_left_to_next,
240 pi0, pi1, next0, next1);
243 while (n_left_from > 0 && n_left_to_next > 0)
245 ip_lookup_next_t next0;
246 const load_balance_t *lb0;
249 const ip4_header_t *ip0;
250 const dpo_id_t *dpo0;
259 p0 = vlib_get_buffer (vm, pi0);
261 ip0 = vlib_buffer_get_current (p0);
262 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
264 lb0 = load_balance_get (lbi0);
267 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
269 if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
271 hc0 = vnet_buffer (p0)->ip.flow_hash =
272 vnet_buffer (p0)->ip.flow_hash >> 1;
276 hc0 = vnet_buffer (p0)->ip.flow_hash =
277 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
279 dpo0 = load_balance_get_fwd_bucket
280 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
284 dpo0 = load_balance_get_bucket_i (lb0, 0);
287 next0 = dpo0->dpoi_next_node;
288 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
290 vlib_increment_combined_counter
291 (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
293 vlib_validate_buffer_enqueue_x1 (vm, node, next,
294 to_next, n_left_to_next,
298 vlib_put_next_frame (vm, node, next, n_left_to_next);
301 return frame->n_vectors;
305 VLIB_REGISTER_NODE (ip4_load_balance_node) =
307 .function = ip4_load_balance,
308 .name = "ip4-load-balance",
309 .vector_size = sizeof (u32),
310 .sibling_of = "ip4-lookup",
312 format_ip4_lookup_trace,
316 VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance);
318 /* get first interface address */
320 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
321 ip_interface_address_t ** result_ia)
323 ip_lookup_main_t *lm = &im->lookup_main;
324 ip_interface_address_t *ia = 0;
325 ip4_address_t *result = 0;
328 foreach_ip_interface_address
329 (lm, ia, sw_if_index,
330 1 /* honor unnumbered */ ,
333 ip_interface_address_get_address (lm, ia);
339 *result_ia = result ? ia : 0;
344 ip4_add_subnet_bcast_route (u32 fib_index,
348 vnet_sw_interface_flags_t iflags;
350 iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
352 fib_table_entry_special_remove(fib_index,
354 FIB_SOURCE_INTERFACE);
356 if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
358 fib_table_entry_update_one_path (fib_index, pfx,
359 FIB_SOURCE_INTERFACE,
362 /* No next-hop address */
368 // no out-label stack
370 FIB_ROUTE_PATH_FLAG_NONE);
374 fib_table_entry_special_add(fib_index,
376 FIB_SOURCE_INTERFACE,
377 (FIB_ENTRY_FLAG_DROP |
378 FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
383 ip4_add_interface_routes (u32 sw_if_index,
384 ip4_main_t * im, u32 fib_index,
385 ip_interface_address_t * a)
387 ip_lookup_main_t *lm = &im->lookup_main;
388 ip4_address_t *address = ip_interface_address_get_address (lm, a);
390 .fp_len = a->address_length,
391 .fp_proto = FIB_PROTOCOL_IP4,
392 .fp_addr.ip4 = *address,
395 if (pfx.fp_len <= 30)
397 /* a /30 or shorter - add a glean for the network address */
398 fib_table_entry_update_one_path (fib_index, &pfx,
399 FIB_SOURCE_INTERFACE,
400 (FIB_ENTRY_FLAG_CONNECTED |
401 FIB_ENTRY_FLAG_ATTACHED),
403 /* No next-hop address */
409 // no out-label stack
411 FIB_ROUTE_PATH_FLAG_NONE);
413 /* Add the two broadcast addresses as drop */
414 fib_prefix_t net_pfx = {
416 .fp_proto = FIB_PROTOCOL_IP4,
417 .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
419 if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
420 fib_table_entry_special_add(fib_index,
422 FIB_SOURCE_INTERFACE,
423 (FIB_ENTRY_FLAG_DROP |
424 FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
425 net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
426 if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
427 ip4_add_subnet_bcast_route(fib_index, &net_pfx, sw_if_index);
429 else if (pfx.fp_len == 31)
431 u32 mask = clib_host_to_net_u32(1);
432 fib_prefix_t net_pfx = pfx;
435 net_pfx.fp_addr.ip4.as_u32 ^= mask;
437 /* a /31 - add the other end as an attached host */
438 fib_table_entry_update_one_path (fib_index, &net_pfx,
439 FIB_SOURCE_INTERFACE,
440 (FIB_ENTRY_FLAG_ATTACHED),
448 FIB_ROUTE_PATH_FLAG_NONE);
452 if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
454 u32 classify_table_index =
455 lm->classify_table_index_by_sw_if_index[sw_if_index];
456 if (classify_table_index != (u32) ~ 0)
458 dpo_id_t dpo = DPO_INVALID;
463 classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
465 fib_table_entry_special_dpo_add (fib_index,
468 FIB_ENTRY_FLAG_NONE, &dpo);
473 fib_table_entry_update_one_path (fib_index, &pfx,
474 FIB_SOURCE_INTERFACE,
475 (FIB_ENTRY_FLAG_CONNECTED |
476 FIB_ENTRY_FLAG_LOCAL),
483 FIB_ROUTE_PATH_FLAG_NONE);
487 ip4_del_interface_routes (ip4_main_t * im,
489 ip4_address_t * address, u32 address_length)
492 .fp_len = address_length,
493 .fp_proto = FIB_PROTOCOL_IP4,
494 .fp_addr.ip4 = *address,
497 if (pfx.fp_len <= 30)
499 fib_prefix_t net_pfx = {
501 .fp_proto = FIB_PROTOCOL_IP4,
502 .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
504 if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
505 fib_table_entry_special_remove(fib_index,
507 FIB_SOURCE_INTERFACE);
508 net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
509 if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
510 fib_table_entry_special_remove(fib_index,
512 FIB_SOURCE_INTERFACE);
513 fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
515 else if (pfx.fp_len == 31)
517 u32 mask = clib_host_to_net_u32(1);
518 fib_prefix_t net_pfx = pfx;
521 net_pfx.fp_addr.ip4.as_u32 ^= mask;
523 fib_table_entry_delete (fib_index, &net_pfx, FIB_SOURCE_INTERFACE);
527 fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
531 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
533 ip4_main_t *im = &ip4_main;
535 vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
538 * enable/disable only on the 1<->0 transition
542 if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
547 ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
548 if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
551 vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
555 vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
556 sw_if_index, !is_enable, 0, 0);
559 static clib_error_t *
560 ip4_add_del_interface_address_internal (vlib_main_t * vm,
562 ip4_address_t * address,
563 u32 address_length, u32 is_del)
565 vnet_main_t *vnm = vnet_get_main ();
566 ip4_main_t *im = &ip4_main;
567 ip_lookup_main_t *lm = &im->lookup_main;
568 clib_error_t *error = 0;
569 u32 if_address_index, elts_before;
570 ip4_address_fib_t ip4_af, *addr_fib = 0;
572 /* local0 interface doesn't support IP addressing */
573 if (sw_if_index == 0)
576 clib_error_create ("local0 interface doesn't support IP addressing");
579 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
580 ip4_addr_fib_init (&ip4_af, address,
581 vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
582 vec_add1 (addr_fib, ip4_af);
585 * there is no support for adj-fib handling in the presence of overlapping
586 * subnets on interfaces. Easy fix - disallow overlapping subnets, like
592 /* When adding an address check that it does not conflict
593 with an existing address on any interface in this table. */
594 ip_interface_address_t *ia;
595 vnet_sw_interface_t *sif;
597 pool_foreach(sif, vnm->interface_main.sw_interfaces,
599 if (im->fib_index_by_sw_if_index[sw_if_index] ==
600 im->fib_index_by_sw_if_index[sif->sw_if_index])
602 foreach_ip_interface_address
603 (&im->lookup_main, ia, sif->sw_if_index,
604 0 /* honor unnumbered */ ,
607 ip_interface_address_get_address
608 (&im->lookup_main, ia);
609 if (ip4_destination_matches_route
610 (im, address, x, ia->address_length) ||
611 ip4_destination_matches_route (im,
616 vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
620 ("failed to add %U which conflicts with %U for interface %U",
621 format_ip4_address_and_length, address,
623 format_ip4_address_and_length, x,
625 format_vnet_sw_if_index_name, vnm,
634 elts_before = pool_elts (lm->if_address_pool);
636 error = ip_interface_address_add_del
637 (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
641 ip4_sw_interface_enable_disable (sw_if_index, !is_del);
644 ip4_del_interface_routes (im, ip4_af.fib_index, address, address_length);
646 ip4_add_interface_routes (sw_if_index,
647 im, ip4_af.fib_index,
649 (lm->if_address_pool, if_address_index));
651 /* If pool did not grow/shrink: add duplicate address. */
652 if (elts_before != pool_elts (lm->if_address_pool))
654 ip4_add_del_interface_address_callback_t *cb;
655 vec_foreach (cb, im->add_del_interface_address_callbacks)
656 cb->function (im, cb->function_opaque, sw_if_index,
657 address, address_length, if_address_index, is_del);
666 ip4_add_del_interface_address (vlib_main_t * vm,
668 ip4_address_t * address,
669 u32 address_length, u32 is_del)
671 return ip4_add_del_interface_address_internal
672 (vm, sw_if_index, address, address_length, is_del);
676 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
678 ip_interface_address_t *ia;
684 * when directed broadcast is enabled, the subnet braodcast route will forward
685 * packets using an adjacency with a broadcast MAC. otherwise it drops
688 foreach_ip_interface_address(&im->lookup_main, ia,
691 if (ia->address_length <= 30)
695 ipa = ip_interface_address_get_address (&im->lookup_main, ia);
699 .fp_proto = FIB_PROTOCOL_IP4,
701 .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
705 ip4_add_subnet_bcast_route
706 (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
714 /* Built-in ip4 unicast rx feature path definition */
716 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
718 .arc_name = "ip4-unicast",
719 .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
720 .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
723 VNET_FEATURE_INIT (ip4_flow_classify, static) =
725 .arc_name = "ip4-unicast",
726 .node_name = "ip4-flow-classify",
727 .runs_before = VNET_FEATURES ("ip4-inacl"),
730 VNET_FEATURE_INIT (ip4_inacl, static) =
732 .arc_name = "ip4-unicast",
733 .node_name = "ip4-inacl",
734 .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
737 VNET_FEATURE_INIT (ip4_source_check_1, static) =
739 .arc_name = "ip4-unicast",
740 .node_name = "ip4-source-check-via-rx",
741 .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
744 VNET_FEATURE_INIT (ip4_source_check_2, static) =
746 .arc_name = "ip4-unicast",
747 .node_name = "ip4-source-check-via-any",
748 .runs_before = VNET_FEATURES ("ip4-policer-classify"),
751 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
753 .arc_name = "ip4-unicast",
754 .node_name = "ip4-source-and-port-range-check-rx",
755 .runs_before = VNET_FEATURES ("ip4-policer-classify"),
758 VNET_FEATURE_INIT (ip4_policer_classify, static) =
760 .arc_name = "ip4-unicast",
761 .node_name = "ip4-policer-classify",
762 .runs_before = VNET_FEATURES ("ipsec-input-ip4"),
765 VNET_FEATURE_INIT (ip4_ipsec, static) =
767 .arc_name = "ip4-unicast",
768 .node_name = "ipsec-input-ip4",
769 .runs_before = VNET_FEATURES ("vpath-input-ip4"),
772 VNET_FEATURE_INIT (ip4_vpath, static) =
774 .arc_name = "ip4-unicast",
775 .node_name = "vpath-input-ip4",
776 .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
779 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
781 .arc_name = "ip4-unicast",
782 .node_name = "ip4-vxlan-bypass",
783 .runs_before = VNET_FEATURES ("ip4-lookup"),
786 VNET_FEATURE_INIT (ip4_not_enabled, static) =
788 .arc_name = "ip4-unicast",
789 .node_name = "ip4-not-enabled",
790 .runs_before = VNET_FEATURES ("ip4-lookup"),
793 VNET_FEATURE_INIT (ip4_lookup, static) =
795 .arc_name = "ip4-unicast",
796 .node_name = "ip4-lookup",
797 .runs_before = 0, /* not before any other features */
800 /* Built-in ip4 multicast rx feature path definition */
801 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
803 .arc_name = "ip4-multicast",
804 .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
805 .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
808 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
810 .arc_name = "ip4-multicast",
811 .node_name = "vpath-input-ip4",
812 .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
815 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
817 .arc_name = "ip4-multicast",
818 .node_name = "ip4-not-enabled",
819 .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
822 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
824 .arc_name = "ip4-multicast",
825 .node_name = "ip4-mfib-forward-lookup",
826 .runs_before = 0, /* last feature */
829 /* Source and port-range check ip4 tx feature path definition */
830 VNET_FEATURE_ARC_INIT (ip4_output, static) =
832 .arc_name = "ip4-output",
833 .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
834 .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
837 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
839 .arc_name = "ip4-output",
840 .node_name = "ip4-source-and-port-range-check-tx",
841 .runs_before = VNET_FEATURES ("ip4-outacl"),
844 VNET_FEATURE_INIT (ip4_outacl, static) =
846 .arc_name = "ip4-output",
847 .node_name = "ip4-outacl",
848 .runs_before = VNET_FEATURES ("ipsec-output-ip4"),
851 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
853 .arc_name = "ip4-output",
854 .node_name = "ipsec-output-ip4",
855 .runs_before = VNET_FEATURES ("interface-output"),
858 /* Built-in ip4 tx feature path definition */
859 VNET_FEATURE_INIT (ip4_interface_output, static) =
861 .arc_name = "ip4-output",
862 .node_name = "interface-output",
863 .runs_before = 0, /* not before any other features */
867 static clib_error_t *
868 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
870 ip4_main_t *im = &ip4_main;
872 /* Fill in lookup tables with default table (0). */
873 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
874 vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
878 ip4_main_t *im4 = &ip4_main;
879 ip_lookup_main_t *lm4 = &im4->lookup_main;
880 ip_interface_address_t *ia = 0;
881 ip4_address_t *address;
882 vlib_main_t *vm = vlib_get_main ();
884 vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
886 foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
888 address = ip_interface_address_get_address (lm4, ia);
889 ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
894 vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
897 vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
898 sw_if_index, is_add, 0, 0);
900 return /* no error */ 0;
903 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
905 /* Global IP4 main. */
909 ip4_lookup_init (vlib_main_t * vm)
911 ip4_main_t *im = &ip4_main;
915 if ((error = vlib_call_init_function (vm, vnet_feature_init)))
917 if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
919 if ((error = vlib_call_init_function (vm, fib_module_init)))
921 if ((error = vlib_call_init_function (vm, mfib_module_init)))
924 for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
929 m = pow2_mask (i) << (32 - i);
932 im->fib_masks[i] = clib_host_to_net_u32 (m);
935 ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
937 /* Create FIB with index 0 and table id of 0. */
938 fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
939 FIB_SOURCE_DEFAULT_ROUTE);
940 mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
941 MFIB_SOURCE_DEFAULT_ROUTE);
945 pn = pg_get_node (ip4_lookup_node.index);
946 pn->unformat_edit = unformat_pg_ip4_header;
950 ethernet_arp_header_t h;
952 memset (&h, 0, sizeof (h));
954 /* Set target ethernet address to all zeros. */
955 memset (h.ip4_over_ethernet[1].ethernet, 0,
956 sizeof (h.ip4_over_ethernet[1].ethernet));
958 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
959 #define _8(f,v) h.f = v;
960 _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
961 _16 (l3_type, ETHERNET_TYPE_IP4);
962 _8 (n_l2_address_bytes, 6);
963 _8 (n_l3_address_bytes, 4);
964 _16 (opcode, ETHERNET_ARP_OPCODE_request);
968 vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
971 /* alloc chunk size */ 8,
978 VLIB_INIT_FUNCTION (ip4_lookup_init);
982 /* Adjacency taken. */
987 /* Packet data, possibly *after* rewrite. */
988 u8 packet_data[64 - 1 * sizeof (u32)];
990 ip4_forward_next_trace_t;
993 format_ip4_forward_next_trace (u8 * s, va_list * args)
995 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
996 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
997 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
998 u32 indent = format_get_indent (s);
999 s = format (s, "%U%U",
1000 format_white_space, indent,
1001 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1006 format_ip4_lookup_trace (u8 * s, va_list * args)
1008 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1009 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1010 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1011 u32 indent = format_get_indent (s);
1013 s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1014 t->fib_index, t->dpo_index, t->flow_hash);
1015 s = format (s, "\n%U%U",
1016 format_white_space, indent,
1017 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1022 format_ip4_rewrite_trace (u8 * s, va_list * args)
1024 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1025 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1026 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1027 u32 indent = format_get_indent (s);
1029 s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1030 t->fib_index, t->dpo_index, format_ip_adjacency,
1031 t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1032 s = format (s, "\n%U%U",
1033 format_white_space, indent,
1034 format_ip_adjacency_packet_data,
1035 t->dpo_index, t->packet_data, sizeof (t->packet_data));
1039 /* Common trace function for all ip4-forward next nodes. */
1041 ip4_forward_next_trace (vlib_main_t * vm,
1042 vlib_node_runtime_t * node,
1043 vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1046 ip4_main_t *im = &ip4_main;
1048 n_left = frame->n_vectors;
1049 from = vlib_frame_vector_args (frame);
1054 vlib_buffer_t *b0, *b1;
1055 ip4_forward_next_trace_t *t0, *t1;
1057 /* Prefetch next iteration. */
1058 vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1059 vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1064 b0 = vlib_get_buffer (vm, bi0);
1065 b1 = vlib_get_buffer (vm, bi1);
1067 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1069 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1070 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1071 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1073 (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1074 (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1075 vec_elt (im->fib_index_by_sw_if_index,
1076 vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1078 clib_memcpy (t0->packet_data,
1079 vlib_buffer_get_current (b0),
1080 sizeof (t0->packet_data));
1082 if (b1->flags & VLIB_BUFFER_IS_TRACED)
1084 t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1085 t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1086 t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1088 (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1089 (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1090 vec_elt (im->fib_index_by_sw_if_index,
1091 vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1092 clib_memcpy (t1->packet_data, vlib_buffer_get_current (b1),
1093 sizeof (t1->packet_data));
1103 ip4_forward_next_trace_t *t0;
1107 b0 = vlib_get_buffer (vm, bi0);
1109 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1111 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1112 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1113 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1115 (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1116 (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1117 vec_elt (im->fib_index_by_sw_if_index,
1118 vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1119 clib_memcpy (t0->packet_data, vlib_buffer_get_current (b0),
1120 sizeof (t0->packet_data));
1127 /* Compute TCP/UDP/ICMP4 checksum in software. */
1129 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1133 u32 ip_header_length, payload_length_host_byte_order;
1134 u32 n_this_buffer, n_bytes_left, n_ip_bytes_this_buffer;
1136 void *data_this_buffer;
1138 /* Initialize checksum with ip header. */
1139 ip_header_length = ip4_header_bytes (ip0);
1140 payload_length_host_byte_order =
1141 clib_net_to_host_u16 (ip0->length) - ip_header_length;
1143 clib_host_to_net_u32 (payload_length_host_byte_order +
1144 (ip0->protocol << 16));
1146 if (BITS (uword) == 32)
1149 ip_csum_with_carry (sum0,
1150 clib_mem_unaligned (&ip0->src_address, u32));
1152 ip_csum_with_carry (sum0,
1153 clib_mem_unaligned (&ip0->dst_address, u32));
1157 ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1159 n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1160 data_this_buffer = (void *) ip0 + ip_header_length;
1161 n_ip_bytes_this_buffer =
1162 p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data);
1163 if (n_this_buffer + ip_header_length > n_ip_bytes_this_buffer)
1165 n_this_buffer = n_ip_bytes_this_buffer > ip_header_length ?
1166 n_ip_bytes_this_buffer - ip_header_length : 0;
1170 sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1171 n_bytes_left -= n_this_buffer;
1172 if (n_bytes_left == 0)
1175 ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1176 p0 = vlib_get_buffer (vm, p0->next_buffer);
1177 data_this_buffer = vlib_buffer_get_current (p0);
1178 n_this_buffer = p0->current_length;
1181 sum16 = ~ip_csum_fold (sum0);
1187 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1189 ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1193 ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1194 || ip0->protocol == IP_PROTOCOL_UDP);
1196 udp0 = (void *) (ip0 + 1);
1197 if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1199 p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1200 | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1204 sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1206 p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1207 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1213 VNET_FEATURE_ARC_INIT (ip4_local) =
1215 .arc_name = "ip4-local",
1216 .start_nodes = VNET_FEATURES ("ip4-local"),
1221 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1222 ip4_header_t * ip, u8 is_udp, u8 * error,
1226 flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1227 *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1231 u32 ip_len, udp_len;
1233 udp = ip4_next_header (ip);
1234 /* Verify UDP length. */
1235 ip_len = clib_net_to_host_u16 (ip->length);
1236 udp_len = clib_net_to_host_u16 (udp->length);
1238 len_diff = ip_len - udp_len;
1239 *good_tcp_udp &= len_diff >= 0;
1240 *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1244 #define ip4_local_csum_is_offloaded(_b) \
1245 _b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM \
1246 || _b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM
1248 #define ip4_local_need_csum_check(is_tcp_udp, _b) \
1249 (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED \
1250 || ip4_local_csum_is_offloaded (_b)))
1252 #define ip4_local_csum_is_valid(_b) \
1253 (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT \
1254 || (ip4_local_csum_is_offloaded (_b))) != 0
1257 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1258 ip4_header_t * ih, u8 * error)
1260 u8 is_udp, is_tcp_udp, good_tcp_udp;
1262 is_udp = ih->protocol == IP_PROTOCOL_UDP;
1263 is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1265 if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1266 ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1268 good_tcp_udp = ip4_local_csum_is_valid (b);
1270 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1271 *error = (is_tcp_udp && !good_tcp_udp
1272 ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1276 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1277 ip4_header_t ** ih, u8 * error)
1279 u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1281 is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1282 is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1284 is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1285 is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1287 good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1288 good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1290 if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1291 || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1294 ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1297 ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1301 error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1302 IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1303 error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1304 IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1308 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1309 vlib_buffer_t * b, u16 * next, u8 error,
1310 u8 head_of_feature_arc)
1312 u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1315 *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1316 b->error = error ? error_node->errors[error] : 0;
1317 if (head_of_feature_arc)
1320 if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1322 vnet_feature_arc_start (arc_index,
1323 vnet_buffer (b)->sw_if_index[VLIB_RX],
1335 } ip4_local_last_check_t;
1338 ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
1339 ip4_local_last_check_t * last_check, u8 * error0)
1341 ip4_fib_mtrie_leaf_t leaf0;
1342 ip4_fib_mtrie_t *mtrie0;
1343 const dpo_id_t *dpo0;
1344 load_balance_t *lb0;
1347 vnet_buffer (b)->ip.fib_index =
1348 vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1349 vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1351 if (PREDICT_FALSE (last_check->src.as_u32 != ip0->src_address.as_u32))
1353 mtrie0 = &ip4_fib_get (vnet_buffer (b)->ip.fib_index)->mtrie;
1354 leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1355 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1356 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1357 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1359 vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1360 vnet_buffer (b)->ip.adj_index[VLIB_RX] = lbi0;
1362 lb0 = load_balance_get (lbi0);
1363 dpo0 = load_balance_get_bucket_i (lb0, 0);
1366 * Must have a route to source otherwise we drop the packet.
1367 * ip4 broadcasts are accepted, e.g. to make dhcp client work
1370 * - the source is a recieve => it's from us => bogus, do this
1371 * first since it sets a different error code.
1372 * - uRPF check for any route to source - accept if passes.
1373 * - allow packets destined to the broadcast address from unknown sources
1376 *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1377 && dpo0->dpoi_type == DPO_RECEIVE) ?
1378 IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1379 *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1380 && !fib_urpf_check_size (lb0->lb_urpf)
1381 && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1382 IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1384 last_check->src.as_u32 = ip0->src_address.as_u32;
1385 last_check->lbi = lbi0;
1386 last_check->error = *error0;
1390 vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1391 vnet_buffer (b)->ip.adj_index[VLIB_RX] = last_check->lbi;
1392 *error0 = last_check->error;
1397 ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
1398 ip4_local_last_check_t * last_check, u8 * error)
1400 ip4_fib_mtrie_leaf_t leaf[2];
1401 ip4_fib_mtrie_t *mtrie[2];
1402 const dpo_id_t *dpo[2];
1403 load_balance_t *lb[2];
1404 u32 not_last_hit = 0;
1407 not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1408 not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1410 vnet_buffer (b[0])->ip.fib_index =
1411 vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1412 vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1413 vnet_buffer (b[0])->ip.fib_index;
1415 vnet_buffer (b[1])->ip.fib_index =
1416 vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1417 vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1418 vnet_buffer (b[1])->ip.fib_index;
1420 if (PREDICT_FALSE (not_last_hit))
1422 mtrie[0] = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie;
1423 mtrie[1] = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie;
1425 leaf[0] = ip4_fib_mtrie_lookup_step_one (mtrie[0], &ip[0]->src_address);
1426 leaf[1] = ip4_fib_mtrie_lookup_step_one (mtrie[1], &ip[1]->src_address);
1428 leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1429 &ip[0]->src_address, 2);
1430 leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1431 &ip[1]->src_address, 2);
1433 leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1434 &ip[0]->src_address, 3);
1435 leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1436 &ip[1]->src_address, 3);
1438 lbi[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf[0]);
1439 lbi[1] = ip4_fib_mtrie_leaf_get_adj_index (leaf[1]);
1441 vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1442 vnet_buffer (b[0])->ip.adj_index[VLIB_RX] = lbi[0];
1444 vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1445 vnet_buffer (b[1])->ip.adj_index[VLIB_RX] = lbi[1];
1447 lb[0] = load_balance_get (lbi[0]);
1448 lb[1] = load_balance_get (lbi[1]);
1450 dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1451 dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1453 error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1454 dpo[0]->dpoi_type == DPO_RECEIVE) ?
1455 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1456 error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1457 !fib_urpf_check_size (lb[0]->lb_urpf) &&
1458 ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1459 ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1461 error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1462 dpo[1]->dpoi_type == DPO_RECEIVE) ?
1463 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1464 error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1465 !fib_urpf_check_size (lb[1]->lb_urpf) &&
1466 ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1467 ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1469 last_check->src.as_u32 = ip[1]->src_address.as_u32;
1470 last_check->lbi = lbi[1];
1471 last_check->error = error[1];
1475 vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1476 vnet_buffer (b[0])->ip.adj_index[VLIB_RX] = last_check->lbi;
1478 vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1479 vnet_buffer (b[1])->ip.adj_index[VLIB_RX] = last_check->lbi;
1481 error[0] = last_check->error;
1482 error[1] = last_check->error;
1487 ip4_local_inline (vlib_main_t * vm,
1488 vlib_node_runtime_t * node,
1489 vlib_frame_t * frame, int head_of_feature_arc)
1491 ip4_main_t *im = &ip4_main;
1492 ip_lookup_main_t *lm = &im->lookup_main;
1493 u32 *from, n_left_from;
1494 vlib_node_runtime_t *error_node =
1495 vlib_node_get_runtime (vm, ip4_input_node.index);
1496 u16 nexts[VLIB_FRAME_SIZE], *next;
1497 vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1498 ip4_header_t *ip[2];
1501 ip4_local_last_check_t last_check = {
1502 .src = {.as_u32 = 0},
1504 .error = IP4_ERROR_UNKNOWN_PROTOCOL
1507 from = vlib_frame_vector_args (frame);
1508 n_left_from = frame->n_vectors;
1510 if (node->flags & VLIB_NODE_FLAG_TRACE)
1511 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1513 vlib_get_buffers (vm, from, bufs, n_left_from);
1517 while (n_left_from >= 6)
1519 u32 is_nat, not_batch = 0;
1521 /* Prefetch next iteration. */
1523 vlib_prefetch_buffer_header (b[4], LOAD);
1524 vlib_prefetch_buffer_header (b[5], LOAD);
1526 CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1527 CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1530 error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1532 ip[0] = vlib_buffer_get_current (b[0]);
1533 ip[1] = vlib_buffer_get_current (b[1]);
1535 vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1536 vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1538 is_nat = b[0]->flags & VNET_BUFFER_F_IS_NATED;
1539 not_batch |= is_nat ^ (b[1]->flags & VNET_BUFFER_F_IS_NATED);
1541 if (head_of_feature_arc == 0 || (is_nat && not_batch == 0))
1544 if (PREDICT_TRUE (not_batch == 0))
1546 ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1547 ip4_local_check_src_x2 (b, ip, &last_check, error);
1551 if (!(b[0]->flags & VNET_BUFFER_F_IS_NATED))
1553 ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1554 ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1556 if (!(b[1]->flags & VNET_BUFFER_F_IS_NATED))
1558 ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1559 ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
1565 next[0] = lm->local_next_by_ip_protocol[ip[0]->protocol];
1566 next[1] = lm->local_next_by_ip_protocol[ip[1]->protocol];
1567 ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1568 head_of_feature_arc);
1569 ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1570 head_of_feature_arc);
1577 while (n_left_from > 0)
1579 error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1581 ip[0] = vlib_buffer_get_current (b[0]);
1582 vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1584 if (head_of_feature_arc == 0 || (b[0]->flags & VNET_BUFFER_F_IS_NATED))
1587 ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1588 ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1592 next[0] = lm->local_next_by_ip_protocol[ip[0]->protocol];
1593 ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1594 head_of_feature_arc);
1601 vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1602 return frame->n_vectors;
1606 ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1608 return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1612 VLIB_REGISTER_NODE (ip4_local_node) =
1614 .function = ip4_local,
1615 .name = "ip4-local",
1616 .vector_size = sizeof (u32),
1617 .format_trace = format_ip4_forward_next_trace,
1618 .n_next_nodes = IP_LOCAL_N_NEXT,
1621 [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1622 [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1623 [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1624 [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1629 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local);
1632 ip4_local_end_of_arc (vlib_main_t * vm,
1633 vlib_node_runtime_t * node, vlib_frame_t * frame)
1635 return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1639 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node,static) = {
1640 .function = ip4_local_end_of_arc,
1641 .name = "ip4-local-end-of-arc",
1642 .vector_size = sizeof (u32),
1644 .format_trace = format_ip4_forward_next_trace,
1645 .sibling_of = "ip4-local",
1648 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_end_of_arc_node, ip4_local_end_of_arc)
1650 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1651 .arc_name = "ip4-local",
1652 .node_name = "ip4-local-end-of-arc",
1653 .runs_before = 0, /* not before any other features */
1658 ip4_register_protocol (u32 protocol, u32 node_index)
1660 vlib_main_t *vm = vlib_get_main ();
1661 ip4_main_t *im = &ip4_main;
1662 ip_lookup_main_t *lm = &im->lookup_main;
1664 ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1665 lm->local_next_by_ip_protocol[protocol] =
1666 vlib_node_add_next (vm, ip4_local_node.index, node_index);
1669 static clib_error_t *
1670 show_ip_local_command_fn (vlib_main_t * vm,
1671 unformat_input_t * input, vlib_cli_command_t * cmd)
1673 ip4_main_t *im = &ip4_main;
1674 ip_lookup_main_t *lm = &im->lookup_main;
1677 vlib_cli_output (vm, "Protocols handled by ip4_local");
1678 for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1680 if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1682 u32 node_index = vlib_get_node (vm,
1683 ip4_local_node.index)->
1684 next_nodes[lm->local_next_by_ip_protocol[i]];
1685 vlib_cli_output (vm, "%d: %U", i, format_vlib_node_name, vm,
1695 * Display the set of protocols handled by the local IPv4 stack.
1698 * Example of how to display local protocol table:
1699 * @cliexstart{show ip local}
1700 * Protocols handled by ip4_local
1707 VLIB_CLI_COMMAND (show_ip_local, static) =
1709 .path = "show ip local",
1710 .function = show_ip_local_command_fn,
1711 .short_help = "show ip local",
1716 ip4_arp_inline (vlib_main_t * vm,
1717 vlib_node_runtime_t * node,
1718 vlib_frame_t * frame, int is_glean)
1720 vnet_main_t *vnm = vnet_get_main ();
1721 ip4_main_t *im = &ip4_main;
1722 ip_lookup_main_t *lm = &im->lookup_main;
1723 u32 *from, *to_next_drop;
1724 uword n_left_from, n_left_to_next_drop, next_index;
1725 u32 thread_index = vm->thread_index;
1729 if (node->flags & VLIB_NODE_FLAG_TRACE)
1730 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1732 time_now = vlib_time_now (vm);
1733 if (time_now - im->arp_throttle_last_seed_change_time[thread_index] > 1e-3)
1735 (void) random_u32 (&im->arp_throttle_seeds[thread_index]);
1736 memset (im->arp_throttle_bitmaps[thread_index], 0,
1737 ARP_THROTTLE_BITS / BITS (u8));
1739 im->arp_throttle_last_seed_change_time[thread_index] = time_now;
1741 seed = im->arp_throttle_seeds[thread_index];
1743 from = vlib_frame_vector_args (frame);
1744 n_left_from = frame->n_vectors;
1745 next_index = node->cached_next_index;
1746 if (next_index == IP4_ARP_NEXT_DROP)
1747 next_index = IP4_ARP_N_NEXT; /* point to first interface */
1749 while (n_left_from > 0)
1751 vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1752 to_next_drop, n_left_to_next_drop);
1754 while (n_left_from > 0 && n_left_to_next_drop > 0)
1756 u32 pi0, adj_index0, r0, w0, sw_if_index0, drop0;
1758 ip_adjacency_t *adj0;
1764 p0 = vlib_get_buffer (vm, pi0);
1766 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1767 adj0 = adj_get (adj_index0);
1768 ip0 = vlib_buffer_get_current (p0);
1770 sw_if_index0 = adj0->rewrite_header.sw_if_index;
1771 vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1773 if (PREDICT_TRUE (is_glean))
1776 * this is the Glean case, so we are ARPing for the
1777 * packet's destination
1779 r0 = ip0->dst_address.data_u32;
1783 r0 = adj0->sub_type.nbr.next_hop.ip4.data_u32;
1787 /* Select bit number */
1788 r0 &= ARP_THROTTLE_BITS - 1;
1789 w0 = r0 / BITS (uword);
1790 m0 = (uword) 1 << (r0 % BITS (uword));
1792 drop0 = (im->arp_throttle_bitmaps[thread_index][w0] & m0) != 0;
1793 im->arp_throttle_bitmaps[thread_index][w0] |= m0;
1797 to_next_drop[0] = pi0;
1799 n_left_to_next_drop -= 1;
1802 node->errors[drop0 ? IP4_ARP_ERROR_DROP :
1803 IP4_ARP_ERROR_REQUEST_SENT];
1806 * the adj has been updated to a rewrite but the node the DPO that got
1807 * us here hasn't - yet. no big deal. we'll drop while we wait.
1809 if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
1816 * Can happen if the control-plane is programming tables
1817 * with traffic flowing; at least that's today's lame excuse.
1819 if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
1820 || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
1822 p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
1825 /* Send ARP request. */
1829 ethernet_arp_header_t *h0;
1830 vnet_hw_interface_t *hw_if0;
1833 vlib_packet_template_get_packet (vm,
1834 &im->ip4_arp_request_packet_template,
1837 /* Seems we're out of buffers */
1838 if (PREDICT_FALSE (!h0))
1841 /* Add rewrite/encap string for ARP packet. */
1842 vnet_rewrite_one_header (adj0[0], h0,
1843 sizeof (ethernet_header_t));
1845 hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1847 /* Src ethernet address in ARP header. */
1848 clib_memcpy (h0->ip4_over_ethernet[0].ethernet,
1850 sizeof (h0->ip4_over_ethernet[0].ethernet));
1854 /* The interface's source address is stashed in the Glean Adj */
1855 h0->ip4_over_ethernet[0].ip4 =
1856 adj0->sub_type.glean.receive_addr.ip4;
1858 /* Copy in destination address we are requesting. This is the
1859 * glean case, so it's the packet's destination.*/
1860 h0->ip4_over_ethernet[1].ip4.data_u32 =
1861 ip0->dst_address.data_u32;
1865 /* Src IP address in ARP header. */
1866 if (ip4_src_address_for_packet (lm, sw_if_index0,
1868 ip4_over_ethernet[0].ip4))
1870 /* No source address available */
1872 node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
1873 vlib_buffer_free (vm, &bi0, 1);
1877 /* Copy in destination address we are requesting from the
1879 h0->ip4_over_ethernet[1].ip4.data_u32 =
1880 adj0->sub_type.nbr.next_hop.ip4.as_u32;
1883 vlib_buffer_copy_trace_flag (vm, p0, bi0);
1884 b0 = vlib_get_buffer (vm, bi0);
1885 VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
1886 vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
1888 vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
1890 vlib_set_next_frame_buffer (vm, node,
1891 adj0->rewrite_header.next_index,
1896 vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
1899 return frame->n_vectors;
1903 ip4_arp (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1905 return (ip4_arp_inline (vm, node, frame, 0));
1909 ip4_glean (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1911 return (ip4_arp_inline (vm, node, frame, 1));
1914 static char *ip4_arp_error_strings[] = {
1915 [IP4_ARP_ERROR_DROP] = "address overflow drops",
1916 [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
1917 [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
1918 [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
1919 [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
1920 [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
1924 VLIB_REGISTER_NODE (ip4_arp_node) =
1926 .function = ip4_arp,
1928 .vector_size = sizeof (u32),
1929 .format_trace = format_ip4_forward_next_trace,
1930 .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1931 .error_strings = ip4_arp_error_strings,
1932 .n_next_nodes = IP4_ARP_N_NEXT,
1935 [IP4_ARP_NEXT_DROP] = "error-drop",
1939 VLIB_REGISTER_NODE (ip4_glean_node) =
1941 .function = ip4_glean,
1942 .name = "ip4-glean",
1943 .vector_size = sizeof (u32),
1944 .format_trace = format_ip4_forward_next_trace,
1945 .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1946 .error_strings = ip4_arp_error_strings,
1947 .n_next_nodes = IP4_ARP_N_NEXT,
1949 [IP4_ARP_NEXT_DROP] = "error-drop",
1954 #define foreach_notrace_ip4_arp_error \
1961 arp_notrace_init (vlib_main_t * vm)
1963 vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
1965 /* don't trace ARP request packets */
1967 vnet_pcap_drop_trace_filter_add_del \
1968 (rt->errors[IP4_ARP_ERROR_##a], \
1970 foreach_notrace_ip4_arp_error;
1975 VLIB_INIT_FUNCTION (arp_notrace_init);
1978 /* Send an ARP request to see if given destination is reachable on given interface. */
1980 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index,
1983 vnet_main_t *vnm = vnet_get_main ();
1984 ip4_main_t *im = &ip4_main;
1985 ethernet_arp_header_t *h;
1987 ip_interface_address_t *ia;
1988 ip_adjacency_t *adj;
1989 vnet_hw_interface_t *hi;
1990 vnet_sw_interface_t *si;
1994 u8 unicast_rewrite = 0;
1996 si = vnet_get_sw_interface (vnm, sw_if_index);
1998 if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2000 return clib_error_return (0, "%U: interface %U down",
2001 format_ip4_address, dst,
2002 format_vnet_sw_if_index_name, vnm,
2007 ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2010 vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2011 return clib_error_return
2013 "no matching interface address for destination %U (interface %U)",
2014 format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
2018 h = vlib_packet_template_get_packet (vm,
2019 &im->ip4_arp_request_packet_template,
2023 return clib_error_return (0, "ARP request packet allocation failed");
2025 hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2026 if (PREDICT_FALSE (!hi->hw_address))
2028 return clib_error_return (0, "%U: interface %U do not support ip probe",
2029 format_ip4_address, dst,
2030 format_vnet_sw_if_index_name, vnm,
2034 clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address,
2035 sizeof (h->ip4_over_ethernet[0].ethernet));
2037 h->ip4_over_ethernet[0].ip4 = src[0];
2038 h->ip4_over_ethernet[1].ip4 = dst[0];
2040 b = vlib_get_buffer (vm, bi);
2041 vnet_buffer (b)->sw_if_index[VLIB_RX] =
2042 vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2044 ip46_address_t nh = {
2048 ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4,
2049 VNET_LINK_IP4, &nh, sw_if_index);
2052 /* Peer has been previously resolved, retrieve glean adj instead */
2053 if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE)
2056 unicast_rewrite = 1;
2060 ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4,
2061 VNET_LINK_IP4, sw_if_index, &nh);
2066 /* Add encapsulation string for software interface (e.g. ethernet header). */
2067 vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2068 if (unicast_rewrite)
2070 u16 *etype = vlib_buffer_get_current (b) - 2;
2071 etype[0] = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
2073 vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2076 vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2077 u32 *to_next = vlib_frame_vector_args (f);
2080 vlib_put_frame_to_node (vm, hi->output_node_index, f);
2084 return /* no error */ 0;
2089 IP4_REWRITE_NEXT_DROP,
2090 IP4_REWRITE_NEXT_ICMP_ERROR,
2091 IP4_REWRITE_NEXT_FRAGMENT,
2092 IP4_REWRITE_N_NEXT /* Last */
2093 } ip4_rewrite_next_t;
2096 * This bits of an IPv4 address to mask to construct a multicast
2099 #if CLIB_ARCH_IS_BIG_ENDIAN
2100 #define IP4_MCAST_ADDR_MASK 0x007fffff
2102 #define IP4_MCAST_ADDR_MASK 0xffff7f00
2106 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
2107 u16 adj_packet_bytes, bool df, u32 * next, u32 * error)
2109 if (packet_len > adj_packet_bytes)
2111 *error = IP4_ERROR_MTU_EXCEEDED;
2114 icmp4_error_set_vnet_buffer
2115 (b, ICMP4_destination_unreachable,
2116 ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
2118 *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2122 /* IP fragmentation */
2123 ip_frag_set_vnet_buffer (b, 0, adj_packet_bytes,
2124 IP4_FRAG_NEXT_IP4_LOOKUP, 0);
2125 *next = IP4_REWRITE_NEXT_FRAGMENT;
2131 ip4_rewrite_inline (vlib_main_t * vm,
2132 vlib_node_runtime_t * node,
2133 vlib_frame_t * frame,
2134 int do_counters, int is_midchain, int is_mcast)
2136 ip_lookup_main_t *lm = &ip4_main.lookup_main;
2137 u32 *from = vlib_frame_vector_args (frame);
2138 u32 n_left_from, n_left_to_next, *to_next, next_index;
2139 vlib_node_runtime_t *error_node =
2140 vlib_node_get_runtime (vm, ip4_input_node.index);
2142 n_left_from = frame->n_vectors;
2143 next_index = node->cached_next_index;
2144 u32 thread_index = vm->thread_index;
2146 while (n_left_from > 0)
2148 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2150 while (n_left_from >= 4 && n_left_to_next >= 2)
2152 ip_adjacency_t *adj0, *adj1;
2153 vlib_buffer_t *p0, *p1;
2154 ip4_header_t *ip0, *ip1;
2155 u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2156 u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2157 u32 tx_sw_if_index0, tx_sw_if_index1;
2159 /* Prefetch next iteration. */
2161 vlib_buffer_t *p2, *p3;
2163 p2 = vlib_get_buffer (vm, from[2]);
2164 p3 = vlib_get_buffer (vm, from[3]);
2166 vlib_prefetch_buffer_header (p2, STORE);
2167 vlib_prefetch_buffer_header (p3, STORE);
2169 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2170 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2173 pi0 = to_next[0] = from[0];
2174 pi1 = to_next[1] = from[1];
2179 n_left_to_next -= 2;
2181 p0 = vlib_get_buffer (vm, pi0);
2182 p1 = vlib_get_buffer (vm, pi1);
2184 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2185 adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
2188 * pre-fetch the per-adjacency counters
2192 vlib_prefetch_combined_counter (&adjacency_counters,
2193 thread_index, adj_index0);
2194 vlib_prefetch_combined_counter (&adjacency_counters,
2195 thread_index, adj_index1);
2198 ip0 = vlib_buffer_get_current (p0);
2199 ip1 = vlib_buffer_get_current (p1);
2201 error0 = error1 = IP4_ERROR_NONE;
2202 next0 = next1 = IP4_REWRITE_NEXT_DROP;
2204 /* Decrement TTL & update checksum.
2205 Works either endian, so no need for byte swap. */
2206 if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
2208 i32 ttl0 = ip0->ttl;
2210 /* Input node should have reject packets with ttl 0. */
2211 ASSERT (ip0->ttl > 0);
2213 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2214 checksum0 += checksum0 >= 0xffff;
2216 ip0->checksum = checksum0;
2221 * If the ttl drops below 1 when forwarding, generate
2224 if (PREDICT_FALSE (ttl0 <= 0))
2226 error0 = IP4_ERROR_TIME_EXPIRED;
2227 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2228 icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2229 ICMP4_time_exceeded_ttl_exceeded_in_transit,
2231 next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2234 /* Verify checksum. */
2235 ASSERT ((ip0->checksum == ip4_header_checksum (ip0)) ||
2236 (p0->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2240 p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2242 if (PREDICT_TRUE (!(p1->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
2244 i32 ttl1 = ip1->ttl;
2246 /* Input node should have reject packets with ttl 0. */
2247 ASSERT (ip1->ttl > 0);
2249 checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2250 checksum1 += checksum1 >= 0xffff;
2252 ip1->checksum = checksum1;
2257 * If the ttl drops below 1 when forwarding, generate
2260 if (PREDICT_FALSE (ttl1 <= 0))
2262 error1 = IP4_ERROR_TIME_EXPIRED;
2263 vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2264 icmp4_error_set_vnet_buffer (p1, ICMP4_time_exceeded,
2265 ICMP4_time_exceeded_ttl_exceeded_in_transit,
2267 next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2270 /* Verify checksum. */
2271 ASSERT ((ip1->checksum == ip4_header_checksum (ip1)) ||
2272 (p1->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2276 p1->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2279 /* Rewrite packet header and updates lengths. */
2280 adj0 = adj_get (adj_index0);
2281 adj1 = adj_get (adj_index1);
2283 /* Worth pipelining. No guarantee that adj0,1 are hot... */
2284 rw_len0 = adj0[0].rewrite_header.data_bytes;
2285 rw_len1 = adj1[0].rewrite_header.data_bytes;
2286 vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2287 vnet_buffer (p1)->ip.save_rewrite_length = rw_len1;
2289 /* Check MTU of outgoing interface. */
2290 ip4_mtu_check (p0, clib_net_to_host_u16 (ip0->length),
2291 adj0[0].rewrite_header.max_l3_packet_bytes,
2292 ip0->flags_and_fragment_offset &
2293 clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2295 ip4_mtu_check (p1, clib_net_to_host_u16 (ip1->length),
2296 adj1[0].rewrite_header.max_l3_packet_bytes,
2297 ip1->flags_and_fragment_offset &
2298 clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2303 error0 = ((adj0[0].rewrite_header.sw_if_index ==
2304 vnet_buffer (p0)->sw_if_index[VLIB_RX]) ?
2305 IP4_ERROR_SAME_INTERFACE : error0);
2306 error1 = ((adj1[0].rewrite_header.sw_if_index ==
2307 vnet_buffer (p1)->sw_if_index[VLIB_RX]) ?
2308 IP4_ERROR_SAME_INTERFACE : error1);
2311 p0->error = error_node->errors[error0];
2312 p1->error = error_node->errors[error1];
2313 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2314 * to see the IP headerr */
2315 if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2317 next0 = adj0[0].rewrite_header.next_index;
2318 p0->current_data -= rw_len0;
2319 p0->current_length += rw_len0;
2320 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2321 vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2324 (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2325 vnet_feature_arc_start (lm->output_feature_arc_index,
2326 tx_sw_if_index0, &next0, p0);
2328 if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2330 next1 = adj1[0].rewrite_header.next_index;
2331 p1->current_data -= rw_len1;
2332 p1->current_length += rw_len1;
2334 tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2335 vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2338 (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2339 vnet_feature_arc_start (lm->output_feature_arc_index,
2340 tx_sw_if_index1, &next1, p1);
2343 /* Guess we are only writing on simple Ethernet header. */
2344 vnet_rewrite_two_headers (adj0[0], adj1[0],
2345 ip0, ip1, sizeof (ethernet_header_t));
2348 * Bump the per-adjacency counters
2352 vlib_increment_combined_counter
2353 (&adjacency_counters,
2356 vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2358 vlib_increment_combined_counter
2359 (&adjacency_counters,
2362 vlib_buffer_length_in_chain (vm, p1) + rw_len1);
2367 adj0->sub_type.midchain.fixup_func
2368 (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
2369 adj1->sub_type.midchain.fixup_func
2370 (vm, adj1, p1, adj0->sub_type.midchain.fixup_data);
2375 * copy bytes from the IP address into the MAC rewrite
2377 vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2379 rewrite_header.dst_mcast_offset,
2380 &ip0->dst_address.as_u32,
2382 vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2384 rewrite_header.dst_mcast_offset,
2385 &ip1->dst_address.as_u32,
2389 vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2390 to_next, n_left_to_next,
2391 pi0, pi1, next0, next1);
2394 while (n_left_from > 0 && n_left_to_next > 0)
2396 ip_adjacency_t *adj0;
2399 u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2400 u32 tx_sw_if_index0;
2402 pi0 = to_next[0] = from[0];
2404 p0 = vlib_get_buffer (vm, pi0);
2406 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2408 adj0 = adj_get (adj_index0);
2410 ip0 = vlib_buffer_get_current (p0);
2412 error0 = IP4_ERROR_NONE;
2413 next0 = IP4_REWRITE_NEXT_DROP; /* drop on error */
2415 /* Decrement TTL & update checksum. */
2416 if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
2418 i32 ttl0 = ip0->ttl;
2420 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2422 checksum0 += checksum0 >= 0xffff;
2424 ip0->checksum = checksum0;
2426 ASSERT (ip0->ttl > 0);
2432 ASSERT ((ip0->checksum == ip4_header_checksum (ip0)) ||
2433 (p0->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2435 if (PREDICT_FALSE (ttl0 <= 0))
2438 * If the ttl drops below 1 when forwarding, generate
2441 error0 = IP4_ERROR_TIME_EXPIRED;
2442 next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2443 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2444 icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2445 ICMP4_time_exceeded_ttl_exceeded_in_transit,
2451 p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2455 vlib_prefetch_combined_counter (&adjacency_counters,
2456 thread_index, adj_index0);
2458 /* Guess we are only writing on simple Ethernet header. */
2459 vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2463 * copy bytes from the IP address into the MAC rewrite
2465 vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2467 rewrite_header.dst_mcast_offset,
2468 &ip0->dst_address.as_u32,
2472 /* Update packet buffer attributes/set output interface. */
2473 rw_len0 = adj0[0].rewrite_header.data_bytes;
2474 vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2477 vlib_increment_combined_counter
2478 (&adjacency_counters,
2479 thread_index, adj_index0, 1,
2480 vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2482 /* Check MTU of outgoing interface. */
2483 ip4_mtu_check (p0, clib_net_to_host_u16 (ip0->length),
2484 adj0[0].rewrite_header.max_l3_packet_bytes,
2485 ip0->flags_and_fragment_offset &
2486 clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2491 error0 = ((adj0[0].rewrite_header.sw_if_index ==
2492 vnet_buffer (p0)->sw_if_index[VLIB_RX]) ?
2493 IP4_ERROR_SAME_INTERFACE : error0);
2495 p0->error = error_node->errors[error0];
2497 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2498 * to see the IP headerr */
2499 if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2501 p0->current_data -= rw_len0;
2502 p0->current_length += rw_len0;
2503 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2505 vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2506 next0 = adj0[0].rewrite_header.next_index;
2510 adj0->sub_type.midchain.fixup_func
2511 (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
2515 (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2516 vnet_feature_arc_start (lm->output_feature_arc_index,
2517 tx_sw_if_index0, &next0, p0);
2524 n_left_to_next -= 1;
2526 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2527 to_next, n_left_to_next,
2531 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2534 /* Need to do trace after rewrites to pick up new packet data. */
2535 if (node->flags & VLIB_NODE_FLAG_TRACE)
2536 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2538 return frame->n_vectors;
2542 /** @brief IPv4 rewrite node.
2545 This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2546 header checksum, fetch the ip adjacency, check the outbound mtu,
2547 apply the adjacency rewrite, and send pkts to the adjacency
2548 rewrite header's rewrite_next_index.
2550 @param vm vlib_main_t corresponding to the current thread
2551 @param node vlib_node_runtime_t
2552 @param frame vlib_frame_t whose contents should be dispatched
2554 @par Graph mechanics: buffer metadata, next index usage
2557 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2558 - the rewrite adjacency index
2559 - <code>adj->lookup_next_index</code>
2560 - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2561 the packet will be dropped.
2562 - <code>adj->rewrite_header</code>
2563 - Rewrite string length, rewrite string, next_index
2566 - <code>b->current_data, b->current_length</code>
2567 - Updated net of applying the rewrite string
2569 <em>Next Indices:</em>
2570 - <code> adj->rewrite_header.next_index </code>
2574 ip4_rewrite (vlib_main_t * vm,
2575 vlib_node_runtime_t * node, vlib_frame_t * frame)
2577 if (adj_are_counters_enabled ())
2578 return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2580 return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2584 ip4_rewrite_bcast (vlib_main_t * vm,
2585 vlib_node_runtime_t * node, vlib_frame_t * frame)
2587 if (adj_are_counters_enabled ())
2588 return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2590 return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2594 ip4_midchain (vlib_main_t * vm,
2595 vlib_node_runtime_t * node, vlib_frame_t * frame)
2597 if (adj_are_counters_enabled ())
2598 return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2600 return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2604 ip4_rewrite_mcast (vlib_main_t * vm,
2605 vlib_node_runtime_t * node, vlib_frame_t * frame)
2607 if (adj_are_counters_enabled ())
2608 return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2610 return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2614 ip4_mcast_midchain (vlib_main_t * vm,
2615 vlib_node_runtime_t * node, vlib_frame_t * frame)
2617 if (adj_are_counters_enabled ())
2618 return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2620 return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2624 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2625 .function = ip4_rewrite,
2626 .name = "ip4-rewrite",
2627 .vector_size = sizeof (u32),
2629 .format_trace = format_ip4_rewrite_trace,
2631 .n_next_nodes = IP4_REWRITE_N_NEXT,
2633 [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2634 [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2635 [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2639 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2640 .function = ip4_rewrite_bcast,
2641 .name = "ip4-rewrite-bcast",
2642 .vector_size = sizeof (u32),
2644 .format_trace = format_ip4_rewrite_trace,
2645 .sibling_of = "ip4-rewrite",
2647 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_bcast_node, ip4_rewrite_bcast)
2649 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2650 .function = ip4_rewrite_mcast,
2651 .name = "ip4-rewrite-mcast",
2652 .vector_size = sizeof (u32),
2654 .format_trace = format_ip4_rewrite_trace,
2655 .sibling_of = "ip4-rewrite",
2657 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_mcast_node, ip4_rewrite_mcast)
2659 VLIB_REGISTER_NODE (ip4_mcast_midchain_node, static) = {
2660 .function = ip4_mcast_midchain,
2661 .name = "ip4-mcast-midchain",
2662 .vector_size = sizeof (u32),
2664 .format_trace = format_ip4_rewrite_trace,
2665 .sibling_of = "ip4-rewrite",
2667 VLIB_NODE_FUNCTION_MULTIARCH (ip4_mcast_midchain_node, ip4_mcast_midchain)
2669 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2670 .function = ip4_midchain,
2671 .name = "ip4-midchain",
2672 .vector_size = sizeof (u32),
2673 .format_trace = format_ip4_forward_next_trace,
2674 .sibling_of = "ip4-rewrite",
2676 VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain);
2680 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2682 ip4_fib_mtrie_t *mtrie0;
2683 ip4_fib_mtrie_leaf_t leaf0;
2686 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2688 leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2689 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2690 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2692 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2694 return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2697 static clib_error_t *
2698 test_lookup_command_fn (vlib_main_t * vm,
2699 unformat_input_t * input, vlib_cli_command_t * cmd)
2706 ip4_address_t ip4_base_address;
2709 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2711 if (unformat (input, "table %d", &table_id))
2713 /* Make sure the entry exists. */
2714 fib = ip4_fib_get (table_id);
2715 if ((fib) && (fib->index != table_id))
2716 return clib_error_return (0, "<fib-index> %d does not exist",
2719 else if (unformat (input, "count %f", &count))
2722 else if (unformat (input, "%U",
2723 unformat_ip4_address, &ip4_base_address))
2726 return clib_error_return (0, "unknown input `%U'",
2727 format_unformat_error, input);
2732 for (i = 0; i < n; i++)
2734 if (!ip4_lookup_validate (&ip4_base_address, table_id))
2737 ip4_base_address.as_u32 =
2738 clib_host_to_net_u32 (1 +
2739 clib_net_to_host_u32 (ip4_base_address.as_u32));
2743 vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2745 vlib_cli_output (vm, "No errors in %d lookups\n", n);
2751 * Perform a lookup of an IPv4 Address (or range of addresses) in the
2752 * given FIB table to determine if there is a conflict with the
2753 * adjacency table. The fib-id can be determined by using the
2754 * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2757 * @todo This command uses fib-id, other commands use table-id (not
2758 * just a name, they are different indexes). Would like to change this
2759 * to table-id for consistency.
2762 * Example of how to run the test lookup command:
2763 * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2764 * No errors in 2 lookups
2768 VLIB_CLI_COMMAND (lookup_test_command, static) =
2770 .path = "test lookup",
2771 .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2772 .function = test_lookup_command_fn,
2777 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2781 fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
2783 if (~0 == fib_index)
2784 return VNET_API_ERROR_NO_SUCH_FIB;
2786 fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
2792 static clib_error_t *
2793 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2794 unformat_input_t * input,
2795 vlib_cli_command_t * cmd)
2799 u32 flow_hash_config = 0;
2802 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2804 if (unformat (input, "table %d", &table_id))
2807 else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2808 foreach_flow_hash_bit
2815 return clib_error_return (0, "unknown input `%U'",
2816 format_unformat_error, input);
2818 rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
2824 case VNET_API_ERROR_NO_SUCH_FIB:
2825 return clib_error_return (0, "no such FIB table %d", table_id);
2828 clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2836 * Configure the set of IPv4 fields used by the flow hash.
2839 * Example of how to set the flow hash on a given table:
2840 * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2841 * Example of display the configured flow hash:
2842 * @cliexstart{show ip fib}
2843 * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2846 * [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2847 * [0] [@0]: dpo-drop ip6
2850 * [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2851 * [0] [@0]: dpo-drop ip6
2854 * [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2855 * [0] [@0]: dpo-drop ip6
2858 * [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2859 * [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2862 * [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
2863 * [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2864 * [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2865 * [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2866 * [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2869 * [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
2870 * [0] [@0]: dpo-drop ip6
2871 * 255.255.255.255/32
2873 * [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
2874 * [0] [@0]: dpo-drop ip6
2875 * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
2878 * [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
2879 * [0] [@0]: dpo-drop ip6
2882 * [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
2883 * [0] [@0]: dpo-drop ip6
2886 * [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
2887 * [0] [@4]: ipv4-glean: af_packet0
2890 * [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
2891 * [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
2894 * [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2895 * [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
2898 * [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
2899 * [0] [@4]: ipv4-glean: af_packet1
2902 * [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
2903 * [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
2906 * [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
2907 * [0] [@0]: dpo-drop ip6
2910 * [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
2911 * [0] [@0]: dpo-drop ip6
2912 * 255.255.255.255/32
2914 * [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
2915 * [0] [@0]: dpo-drop ip6
2919 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
2921 .path = "set ip flow-hash",
2923 "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
2924 .function = set_ip_flow_hash_command_fn,
2929 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2932 vnet_main_t *vnm = vnet_get_main ();
2933 vnet_interface_main_t *im = &vnm->interface_main;
2934 ip4_main_t *ipm = &ip4_main;
2935 ip_lookup_main_t *lm = &ipm->lookup_main;
2936 vnet_classify_main_t *cm = &vnet_classify_main;
2937 ip4_address_t *if_addr;
2939 if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2940 return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2942 if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2943 return VNET_API_ERROR_NO_SUCH_ENTRY;
2945 vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2946 lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
2948 if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
2950 if (NULL != if_addr)
2952 fib_prefix_t pfx = {
2954 .fp_proto = FIB_PROTOCOL_IP4,
2955 .fp_addr.ip4 = *if_addr,
2959 fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2963 if (table_index != (u32) ~ 0)
2965 dpo_id_t dpo = DPO_INVALID;
2970 classify_dpo_create (DPO_PROTO_IP4, table_index));
2972 fib_table_entry_special_dpo_add (fib_index,
2974 FIB_SOURCE_CLASSIFY,
2975 FIB_ENTRY_FLAG_NONE, &dpo);
2980 fib_table_entry_special_remove (fib_index,
2981 &pfx, FIB_SOURCE_CLASSIFY);
2988 static clib_error_t *
2989 set_ip_classify_command_fn (vlib_main_t * vm,
2990 unformat_input_t * input,
2991 vlib_cli_command_t * cmd)
2993 u32 table_index = ~0;
2994 int table_index_set = 0;
2995 u32 sw_if_index = ~0;
2998 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3000 if (unformat (input, "table-index %d", &table_index))
3001 table_index_set = 1;
3002 else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3003 vnet_get_main (), &sw_if_index))
3009 if (table_index_set == 0)
3010 return clib_error_return (0, "classify table-index must be specified");
3012 if (sw_if_index == ~0)
3013 return clib_error_return (0, "interface / subif must be specified");
3015 rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3022 case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3023 return clib_error_return (0, "No such interface");
3025 case VNET_API_ERROR_NO_SUCH_ENTRY:
3026 return clib_error_return (0, "No such classifier table");
3032 * Assign a classification table to an interface. The classification
3033 * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3034 * commands. Once the table is create, use this command to filter packets
3038 * Example of how to assign a classification table to an interface:
3039 * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3042 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
3044 .path = "set ip classify",
3046 "set ip classify intfc <interface> table-index <classify-idx>",
3047 .function = set_ip_classify_command_fn,
3051 static clib_error_t *
3052 ip4_config (vlib_main_t * vm, unformat_input_t * input)
3054 ip4_main_t *im = &ip4_main;
3057 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3059 if (unformat (input, "heap-size %U", unformat_memory_size, &heapsize))
3062 return clib_error_return (0,
3063 "invalid heap-size parameter `%U'",
3064 format_unformat_error, input);
3067 im->mtrie_heap_size = heapsize;
3072 VLIB_EARLY_CONFIG_FUNCTION (ip4_config, "ip");
3075 * fd.io coding-style-patch-verification: ON
3078 * eval: (c-set-style "gnu")