2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * ip/ip4_forward.c: IP v4 forwarding
18 * Copyright (c) 2008 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h> /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h> /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h> /* for API error numbers */
47 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
48 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
49 #include <vnet/fib/ip4_fib.h>
50 #include <vnet/dpo/load_balance.h>
51 #include <vnet/dpo/classify_dpo.h>
54 ip4_forward_next_trace (vlib_main_t * vm,
55 vlib_node_runtime_t * node,
57 vlib_rx_or_tx_t which_adj_index);
60 ip4_lookup_inline (vlib_main_t * vm,
61 vlib_node_runtime_t * node,
63 int lookup_for_responses_to_locally_received_packets)
65 ip4_main_t * im = &ip4_main;
66 vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
67 u32 n_left_from, n_left_to_next, * from, * to_next;
68 ip_lookup_next_t next;
69 u32 cpu_index = os_get_cpu_number();
71 from = vlib_frame_vector_args (frame);
72 n_left_from = frame->n_vectors;
73 next = node->cached_next_index;
75 while (n_left_from > 0)
77 vlib_get_next_frame (vm, node, next,
78 to_next, n_left_to_next);
80 while (n_left_from >= 4 && n_left_to_next >= 2)
82 vlib_buffer_t * p0, * p1;
83 ip4_header_t * ip0, * ip1;
84 __attribute__((unused)) tcp_header_t * tcp0, * tcp1;
85 ip_lookup_next_t next0, next1;
86 const load_balance_t * lb0, * lb1;
87 ip4_fib_mtrie_t * mtrie0, * mtrie1;
88 ip4_fib_mtrie_leaf_t leaf0, leaf1;
89 ip4_address_t * dst_addr0, *dst_addr1;
90 __attribute__((unused)) u32 pi0, fib_index0, lb_index0, is_tcp_udp0;
91 __attribute__((unused)) u32 pi1, fib_index1, lb_index1, is_tcp_udp1;
92 flow_hash_config_t flow_hash_config0, flow_hash_config1;
95 const dpo_id_t *dpo0, *dpo1;
97 /* Prefetch next iteration. */
99 vlib_buffer_t * p2, * p3;
101 p2 = vlib_get_buffer (vm, from[2]);
102 p3 = vlib_get_buffer (vm, from[3]);
104 vlib_prefetch_buffer_header (p2, LOAD);
105 vlib_prefetch_buffer_header (p3, LOAD);
107 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
108 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
111 pi0 = to_next[0] = from[0];
112 pi1 = to_next[1] = from[1];
114 p0 = vlib_get_buffer (vm, pi0);
115 p1 = vlib_get_buffer (vm, pi1);
117 ip0 = vlib_buffer_get_current (p0);
118 ip1 = vlib_buffer_get_current (p1);
120 dst_addr0 = &ip0->dst_address;
121 dst_addr1 = &ip1->dst_address;
123 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
124 fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
125 fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
126 fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
127 fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
128 fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
131 if (! lookup_for_responses_to_locally_received_packets)
133 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
134 mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
136 leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
138 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
139 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
142 tcp0 = (void *) (ip0 + 1);
143 tcp1 = (void *) (ip1 + 1);
145 is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
146 || ip0->protocol == IP_PROTOCOL_UDP);
147 is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
148 || ip1->protocol == IP_PROTOCOL_UDP);
150 if (! lookup_for_responses_to_locally_received_packets)
152 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
153 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
156 if (! lookup_for_responses_to_locally_received_packets)
158 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
159 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
162 if (! lookup_for_responses_to_locally_received_packets)
164 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
165 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
168 if (lookup_for_responses_to_locally_received_packets)
170 lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
171 lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
175 /* Handle default route. */
176 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
177 leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
179 lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
180 lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
183 lb0 = load_balance_get (lb_index0);
184 lb1 = load_balance_get (lb_index1);
186 /* Use flow hash to compute multipath adjacency. */
187 hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
188 hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
189 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
191 flow_hash_config0 = lb0->lb_hash_config;
192 hash_c0 = vnet_buffer (p0)->ip.flow_hash =
193 ip4_compute_flow_hash (ip0, flow_hash_config0);
195 if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
197 flow_hash_config1 = lb1->lb_hash_config;
198 hash_c1 = vnet_buffer (p1)->ip.flow_hash =
199 ip4_compute_flow_hash (ip1, flow_hash_config1);
202 ASSERT (lb0->lb_n_buckets > 0);
203 ASSERT (is_pow2 (lb0->lb_n_buckets));
204 ASSERT (lb1->lb_n_buckets > 0);
205 ASSERT (is_pow2 (lb1->lb_n_buckets));
207 dpo0 = load_balance_get_bucket_i(lb0,
209 (lb0->lb_n_buckets_minus_1)));
210 dpo1 = load_balance_get_bucket_i(lb1,
212 (lb0->lb_n_buckets_minus_1)));
214 next0 = dpo0->dpoi_next_node;
215 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
216 next1 = dpo1->dpoi_next_node;
217 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
219 vlib_increment_combined_counter
220 (cm, cpu_index, lb_index0, 1,
221 vlib_buffer_length_in_chain (vm, p0)
222 + sizeof(ethernet_header_t));
223 vlib_increment_combined_counter
224 (cm, cpu_index, lb_index1, 1,
225 vlib_buffer_length_in_chain (vm, p1)
226 + sizeof(ethernet_header_t));
233 wrong_next = (next0 != next) + 2*(next1 != next);
234 if (PREDICT_FALSE (wrong_next != 0))
243 vlib_set_next_frame_buffer (vm, node, next0, pi0);
250 vlib_set_next_frame_buffer (vm, node, next1, pi1);
257 vlib_set_next_frame_buffer (vm, node, next0, pi0);
258 vlib_set_next_frame_buffer (vm, node, next1, pi1);
262 vlib_put_next_frame (vm, node, next, n_left_to_next);
264 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
270 while (n_left_from > 0 && n_left_to_next > 0)
274 __attribute__((unused)) tcp_header_t * tcp0;
275 ip_lookup_next_t next0;
276 const load_balance_t *lb0;
277 ip4_fib_mtrie_t * mtrie0;
278 ip4_fib_mtrie_leaf_t leaf0;
279 ip4_address_t * dst_addr0;
280 __attribute__((unused)) u32 pi0, fib_index0, is_tcp_udp0, lbi0;
281 flow_hash_config_t flow_hash_config0;
282 const dpo_id_t *dpo0;
288 p0 = vlib_get_buffer (vm, pi0);
290 ip0 = vlib_buffer_get_current (p0);
292 dst_addr0 = &ip0->dst_address;
294 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
295 fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
296 fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
298 if (! lookup_for_responses_to_locally_received_packets)
300 mtrie0 = &ip4_fib_get( fib_index0)->mtrie;
302 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
304 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
307 tcp0 = (void *) (ip0 + 1);
309 is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
310 || ip0->protocol == IP_PROTOCOL_UDP);
312 if (! lookup_for_responses_to_locally_received_packets)
313 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
315 if (! lookup_for_responses_to_locally_received_packets)
316 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
318 if (! lookup_for_responses_to_locally_received_packets)
319 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
321 if (lookup_for_responses_to_locally_received_packets)
322 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
325 /* Handle default route. */
326 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
327 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
330 lb0 = load_balance_get (lbi0);
332 /* Use flow hash to compute multipath adjacency. */
333 hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
334 if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
336 flow_hash_config0 = lb0->lb_hash_config;
338 hash_c0 = vnet_buffer (p0)->ip.flow_hash =
339 ip4_compute_flow_hash (ip0, flow_hash_config0);
342 ASSERT (lb0->lb_n_buckets > 0);
343 ASSERT (is_pow2 (lb0->lb_n_buckets));
345 dpo0 = load_balance_get_bucket_i(lb0,
347 (lb0->lb_n_buckets_minus_1)));
349 next0 = dpo0->dpoi_next_node;
350 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
352 vlib_increment_combined_counter
353 (cm, cpu_index, lbi0, 1,
354 vlib_buffer_length_in_chain (vm, p0));
361 if (PREDICT_FALSE (next0 != next))
364 vlib_put_next_frame (vm, node, next, n_left_to_next);
366 vlib_get_next_frame (vm, node, next,
367 to_next, n_left_to_next);
374 vlib_put_next_frame (vm, node, next, n_left_to_next);
377 if (node->flags & VLIB_NODE_FLAG_TRACE)
378 ip4_forward_next_trace(vm, node, frame, VLIB_TX);
380 return frame->n_vectors;
383 /** @brief IPv4 lookup node.
386 This is the main IPv4 lookup dispatch node.
388 @param vm vlib_main_t corresponding to the current thread
389 @param node vlib_node_runtime_t
390 @param frame vlib_frame_t whose contents should be dispatched
392 @par Graph mechanics: buffer metadata, next index usage
395 - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
396 - Indicates the @c sw_if_index value of the interface that the
397 packet was received on.
398 - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
399 - When the value is @c ~0 then the node performs a longest prefix
400 match (LPM) for the packet destination address in the FIB attached
401 to the receive interface.
402 - Otherwise perform LPM for the packet destination address in the
403 indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
404 value (0, 1, ...) and not a VRF id.
407 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
408 - The lookup result adjacency index.
411 - Dispatches the packet to the node index found in
412 ip_adjacency_t @c adj->lookup_next_index
413 (where @c adj is the lookup result adjacency).
416 ip4_lookup (vlib_main_t * vm,
417 vlib_node_runtime_t * node,
418 vlib_frame_t * frame)
420 return ip4_lookup_inline (vm, node, frame,
421 /* lookup_for_responses_to_locally_received_packets */ 0);
425 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args);
427 VLIB_REGISTER_NODE (ip4_lookup_node) = {
428 .function = ip4_lookup,
429 .name = "ip4-lookup",
430 .vector_size = sizeof (u32),
432 .format_trace = format_ip4_lookup_trace,
433 .n_next_nodes = IP_LOOKUP_N_NEXT,
434 .next_nodes = IP4_LOOKUP_NEXT_NODES,
437 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup)
440 ip4_load_balance (vlib_main_t * vm,
441 vlib_node_runtime_t * node,
442 vlib_frame_t * frame)
444 vlib_combined_counter_main_t * cm = &load_balance_main.lbm_via_counters;
445 u32 n_left_from, n_left_to_next, * from, * to_next;
446 ip_lookup_next_t next;
447 u32 cpu_index = os_get_cpu_number();
449 from = vlib_frame_vector_args (frame);
450 n_left_from = frame->n_vectors;
451 next = node->cached_next_index;
453 if (node->flags & VLIB_NODE_FLAG_TRACE)
454 ip4_forward_next_trace(vm, node, frame, VLIB_TX);
456 while (n_left_from > 0)
458 vlib_get_next_frame (vm, node, next,
459 to_next, n_left_to_next);
462 while (n_left_from > 0 && n_left_to_next > 0)
464 ip_lookup_next_t next0;
465 const load_balance_t *lb0;
468 const ip4_header_t *ip0;
469 const dpo_id_t *dpo0;
474 p0 = vlib_get_buffer (vm, pi0);
476 ip0 = vlib_buffer_get_current (p0);
477 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
479 lb0 = load_balance_get(lbi0);
480 hc0 = lb0->lb_hash_config;
481 vnet_buffer(p0)->ip.flow_hash = ip4_compute_flow_hash(ip0, hc0);
483 dpo0 = load_balance_get_bucket_i(lb0,
484 vnet_buffer(p0)->ip.flow_hash &
485 (lb0->lb_n_buckets_minus_1));
487 next0 = dpo0->dpoi_next_node;
488 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
490 vlib_increment_combined_counter
491 (cm, cpu_index, lbi0, 1,
492 vlib_buffer_length_in_chain (vm, p0));
499 if (PREDICT_FALSE (next0 != next))
502 vlib_put_next_frame (vm, node, next, n_left_to_next);
504 vlib_get_next_frame (vm, node, next,
505 to_next, n_left_to_next);
512 vlib_put_next_frame (vm, node, next, n_left_to_next);
515 return frame->n_vectors;
518 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args);
520 VLIB_REGISTER_NODE (ip4_load_balance_node) = {
521 .function = ip4_load_balance,
522 .name = "ip4-load-balance",
523 .vector_size = sizeof (u32),
524 .sibling_of = "ip4-lookup",
526 .format_trace = format_ip4_forward_next_trace,
529 VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance)
531 /* get first interface address */
533 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
534 ip_interface_address_t ** result_ia)
536 ip_lookup_main_t * lm = &im->lookup_main;
537 ip_interface_address_t * ia = 0;
538 ip4_address_t * result = 0;
540 foreach_ip_interface_address (lm, ia, sw_if_index,
541 1 /* honor unnumbered */,
543 ip4_address_t * a = ip_interface_address_get_address (lm, ia);
548 *result_ia = result ? ia : 0;
553 ip4_add_interface_routes (u32 sw_if_index,
554 ip4_main_t * im, u32 fib_index,
555 ip_interface_address_t * a)
557 ip_lookup_main_t * lm = &im->lookup_main;
558 ip4_address_t * address = ip_interface_address_get_address (lm, a);
560 .fp_len = a->address_length,
561 .fp_proto = FIB_PROTOCOL_IP4,
562 .fp_addr.ip4 = *address,
565 a->neighbor_probe_adj_index = ~0;
569 fib_node_index_t fei;
571 fei = fib_table_entry_update_one_path(fib_index,
573 FIB_SOURCE_INTERFACE,
574 (FIB_ENTRY_FLAG_CONNECTED |
575 FIB_ENTRY_FLAG_ATTACHED),
577 NULL, /* No next-hop address */
579 ~0, // invalid FIB index
582 FIB_ROUTE_PATH_FLAG_NONE);
583 a->neighbor_probe_adj_index = fib_entry_get_adj(fei);
588 if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
590 u32 classify_table_index =
591 lm->classify_table_index_by_sw_if_index [sw_if_index];
592 if (classify_table_index != (u32) ~0)
594 dpo_id_t dpo = DPO_NULL;
599 classify_dpo_create(FIB_PROTOCOL_IP4,
600 classify_table_index));
602 fib_table_entry_special_dpo_add(fib_index,
611 fib_table_entry_update_one_path(fib_index,
613 FIB_SOURCE_INTERFACE,
614 (FIB_ENTRY_FLAG_CONNECTED |
615 FIB_ENTRY_FLAG_LOCAL),
619 ~0, // invalid FIB index
622 FIB_ROUTE_PATH_FLAG_NONE);
626 ip4_del_interface_routes (ip4_main_t * im,
628 ip4_address_t * address,
632 .fp_len = address_length,
633 .fp_proto = FIB_PROTOCOL_IP4,
634 .fp_addr.ip4 = *address,
639 fib_table_entry_delete(fib_index,
641 FIB_SOURCE_INTERFACE);
645 fib_table_entry_delete(fib_index,
647 FIB_SOURCE_INTERFACE);
651 ip4_sw_interface_enable_disable (u32 sw_if_index,
654 vlib_main_t * vm = vlib_get_main();
655 ip4_main_t * im = &ip4_main;
656 ip_lookup_main_t * lm = &im->lookup_main;
658 u32 lookup_feature_index;
660 vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
663 * enable/disable only on the 1<->0 transition
667 if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
672 ASSERT(im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
673 if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
677 for (cast = 0; cast <= VNET_IP_RX_MULTICAST_FEAT; cast++)
679 ip_config_main_t * cm = &lm->feature_config_mains[cast];
680 vnet_config_main_t * vcm = &cm->config_main;
682 vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
683 ci = cm->config_index_by_sw_if_index[sw_if_index];
685 if (cast == VNET_IP_RX_UNICAST_FEAT)
686 lookup_feature_index = im->ip4_unicast_rx_feature_lookup;
688 lookup_feature_index = im->ip4_multicast_rx_feature_lookup;
691 ci = vnet_config_add_feature (vm, vcm,
693 lookup_feature_index,
695 /* # bytes of config data */ 0);
697 ci = vnet_config_del_feature (vm, vcm,
699 lookup_feature_index,
701 /* # bytes of config data */ 0);
702 cm->config_index_by_sw_if_index[sw_if_index] = ci;
706 static clib_error_t *
707 ip4_add_del_interface_address_internal (vlib_main_t * vm,
709 ip4_address_t * address,
713 vnet_main_t * vnm = vnet_get_main();
714 ip4_main_t * im = &ip4_main;
715 ip_lookup_main_t * lm = &im->lookup_main;
716 clib_error_t * error = 0;
717 u32 if_address_index, elts_before;
718 ip4_address_fib_t ip4_af, * addr_fib = 0;
720 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
721 ip4_addr_fib_init (&ip4_af, address,
722 vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
723 vec_add1 (addr_fib, ip4_af);
726 * there is no support for adj-fib handling in the presence of overlapping
727 * subnets on interfaces. Easy fix - disallow overlapping subnets, like
732 /* When adding an address check that it does not conflict
733 with an existing address. */
734 ip_interface_address_t * ia;
735 foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
736 0 /* honor unnumbered */,
738 ip4_address_t * x = ip_interface_address_get_address (&im->lookup_main, ia);
740 if (ip4_destination_matches_route (im, address, x, ia->address_length)
741 || ip4_destination_matches_route (im, x, address, address_length))
742 return clib_error_create ("failed to add %U which conflicts with %U for interface %U",
743 format_ip4_address_and_length, address, address_length,
744 format_ip4_address_and_length, x, ia->address_length,
745 format_vnet_sw_if_index_name, vnm, sw_if_index);
749 elts_before = pool_elts (lm->if_address_pool);
751 error = ip_interface_address_add_del
761 ip4_sw_interface_enable_disable(sw_if_index, !is_del);
764 ip4_del_interface_routes (im, ip4_af.fib_index, address,
767 ip4_add_interface_routes (sw_if_index,
768 im, ip4_af.fib_index,
770 (lm->if_address_pool, if_address_index));
772 /* If pool did not grow/shrink: add duplicate address. */
773 if (elts_before != pool_elts (lm->if_address_pool))
775 ip4_add_del_interface_address_callback_t * cb;
776 vec_foreach (cb, im->add_del_interface_address_callbacks)
777 cb->function (im, cb->function_opaque, sw_if_index,
778 address, address_length,
789 ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
790 ip4_address_t * address, u32 address_length,
793 return ip4_add_del_interface_address_internal
794 (vm, sw_if_index, address, address_length,
798 /* Built-in ip4 unicast rx feature path definition */
799 VNET_IP4_UNICAST_FEATURE_INIT (ip4_inacl, static) = {
800 .node_name = "ip4-inacl",
801 .runs_before = ORDER_CONSTRAINTS {"ip4-source-check-via-rx", 0},
802 .feature_index = &ip4_main.ip4_unicast_rx_feature_check_access,
805 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_1, static) = {
806 .node_name = "ip4-source-check-via-rx",
807 .runs_before = ORDER_CONSTRAINTS {"ip4-source-check-via-any", 0},
809 &ip4_main.ip4_unicast_rx_feature_source_reachable_via_rx,
812 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_2, static) = {
813 .node_name = "ip4-source-check-via-any",
814 .runs_before = ORDER_CONSTRAINTS {"ip4-policer-classify", 0},
816 &ip4_main.ip4_unicast_rx_feature_source_reachable_via_any,
819 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) = {
820 .node_name = "ip4-source-and-port-range-check-rx",
821 .runs_before = ORDER_CONSTRAINTS {"ip4-policer-classify", 0},
823 &ip4_main.ip4_unicast_rx_feature_source_and_port_range_check,
826 VNET_IP4_UNICAST_FEATURE_INIT (ip4_policer_classify, static) = {
827 .node_name = "ip4-policer-classify",
828 .runs_before = ORDER_CONSTRAINTS {"ipsec-input-ip4", 0},
830 &ip4_main.ip4_unicast_rx_feature_policer_classify,
833 VNET_IP4_UNICAST_FEATURE_INIT (ip4_ipsec, static) = {
834 .node_name = "ipsec-input-ip4",
835 .runs_before = ORDER_CONSTRAINTS {"vpath-input-ip4", 0},
836 .feature_index = &ip4_main.ip4_unicast_rx_feature_ipsec,
839 VNET_IP4_UNICAST_FEATURE_INIT (ip4_vpath, static) = {
840 .node_name = "vpath-input-ip4",
841 .runs_before = ORDER_CONSTRAINTS {"ip4-lookup", 0},
842 .feature_index = &ip4_main.ip4_unicast_rx_feature_vpath,
845 VNET_IP4_UNICAST_FEATURE_INIT (ip4_lookup, static) = {
846 .node_name = "ip4-lookup",
847 .runs_before = ORDER_CONSTRAINTS {"ip4-drop", 0},
848 .feature_index = &ip4_main.ip4_unicast_rx_feature_lookup,
851 VNET_IP4_UNICAST_FEATURE_INIT (ip4_drop, static) = {
852 .node_name = "ip4-drop",
853 .runs_before = 0, /* not before any other features */
854 .feature_index = &ip4_main.ip4_unicast_rx_feature_drop,
858 /* Built-in ip4 multicast rx feature path definition */
859 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_vpath_mc, static) = {
860 .node_name = "vpath-input-ip4",
861 .runs_before = ORDER_CONSTRAINTS {"ip4-lookup-multicast", 0},
862 .feature_index = &ip4_main.ip4_multicast_rx_feature_vpath,
865 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_lookup_mc, static) = {
866 .node_name = "ip4-lookup-multicast",
867 .runs_before = ORDER_CONSTRAINTS {"ip4-drop", 0},
868 .feature_index = &ip4_main.ip4_multicast_rx_feature_lookup,
871 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_mc_drop, static) = {
872 .node_name = "ip4-drop",
873 .runs_before = 0, /* last feature */
874 .feature_index = &ip4_main.ip4_multicast_rx_feature_drop,
877 static char * rx_feature_start_nodes[] =
878 { "ip4-input", "ip4-input-no-checksum"};
880 static char * tx_feature_start_nodes[] =
881 { "ip4-rewrite-transit"};
883 /* Source and port-range check ip4 tx feature path definition */
884 VNET_IP4_TX_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) = {
885 .node_name = "ip4-source-and-port-range-check-tx",
886 .runs_before = ORDER_CONSTRAINTS {"interface-output", 0},
888 &ip4_main.ip4_unicast_tx_feature_source_and_port_range_check,
892 /* Built-in ip4 tx feature path definition */
893 VNET_IP4_TX_FEATURE_INIT (interface_output, static) = {
894 .node_name = "interface-output",
895 .runs_before = 0, /* not before any other features */
896 .feature_index = &ip4_main.ip4_tx_feature_interface_output,
899 static clib_error_t *
900 ip4_feature_init (vlib_main_t * vm, ip4_main_t * im)
902 ip_lookup_main_t * lm = &im->lookup_main;
903 clib_error_t * error;
905 ip_config_main_t * cm;
906 vnet_config_main_t * vcm;
907 char **feature_start_nodes;
908 int feature_start_len;
910 for (cast = 0; cast < VNET_N_IP_FEAT; cast++)
912 cm = &lm->feature_config_mains[cast];
913 vcm = &cm->config_main;
915 if (cast < VNET_IP_TX_FEAT)
917 feature_start_nodes = rx_feature_start_nodes;
918 feature_start_len = ARRAY_LEN(rx_feature_start_nodes);
922 feature_start_nodes = tx_feature_start_nodes;
923 feature_start_len = ARRAY_LEN(tx_feature_start_nodes);
926 if ((error = ip_feature_init_cast (vm, cm, vcm,
930 VNET_L3_PACKET_TYPE_IP4)))
937 static clib_error_t *
938 ip4_sw_interface_add_del (vnet_main_t * vnm,
942 vlib_main_t * vm = vnm->vlib_main;
943 ip4_main_t * im = &ip4_main;
944 ip_lookup_main_t * lm = &im->lookup_main;
948 /* Fill in lookup tables with default table (0). */
949 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
951 for (cast = 0; cast < VNET_N_IP_FEAT; cast++)
953 ip_config_main_t * cm = &lm->feature_config_mains[cast];
954 vnet_config_main_t * vcm = &cm->config_main;
956 vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
957 ci = cm->config_index_by_sw_if_index[sw_if_index];
959 if (cast == VNET_IP_RX_UNICAST_FEAT)
960 feature_index = im->ip4_unicast_rx_feature_drop;
961 else if (cast == VNET_IP_RX_MULTICAST_FEAT)
962 feature_index = im->ip4_multicast_rx_feature_drop;
964 feature_index = im->ip4_tx_feature_interface_output;
967 ci = vnet_config_add_feature (vm, vcm,
971 /* # bytes of config data */ 0);
974 ci = vnet_config_del_feature (vm, vcm, ci,
977 /* # bytes of config data */ 0);
978 if (vec_len(im->ip_enabled_by_sw_if_index) > sw_if_index)
979 im->ip_enabled_by_sw_if_index[sw_if_index] = 0;
981 cm->config_index_by_sw_if_index[sw_if_index] = ci;
983 * note: do not update the tx feature count here.
987 return /* no error */ 0;
990 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
992 /* Global IP4 main. */
996 ip4_lookup_init (vlib_main_t * vm)
998 ip4_main_t * im = &ip4_main;
999 clib_error_t * error;
1002 for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1007 m = pow2_mask (i) << (32 - i);
1010 im->fib_masks[i] = clib_host_to_net_u32 (m);
1013 ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1015 /* Create FIB with index 0 and table id of 0. */
1016 fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 0);
1020 pn = pg_get_node (ip4_lookup_node.index);
1021 pn->unformat_edit = unformat_pg_ip4_header;
1025 ethernet_arp_header_t h;
1027 memset (&h, 0, sizeof (h));
1029 /* Set target ethernet address to all zeros. */
1030 memset (h.ip4_over_ethernet[1].ethernet, 0, sizeof (h.ip4_over_ethernet[1].ethernet));
1032 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1033 #define _8(f,v) h.f = v;
1034 _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1035 _16 (l3_type, ETHERNET_TYPE_IP4);
1036 _8 (n_l2_address_bytes, 6);
1037 _8 (n_l3_address_bytes, 4);
1038 _16 (opcode, ETHERNET_ARP_OPCODE_request);
1042 vlib_packet_template_init (vm,
1043 &im->ip4_arp_request_packet_template,
1046 /* alloc chunk size */ 8,
1050 error = ip4_feature_init (vm, im);
1055 VLIB_INIT_FUNCTION (ip4_lookup_init);
1058 /* Adjacency taken. */
1063 /* Packet data, possibly *after* rewrite. */
1064 u8 packet_data[64 - 1*sizeof(u32)];
1065 } ip4_forward_next_trace_t;
1067 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args)
1069 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1070 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1071 ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1072 uword indent = format_get_indent (s);
1073 s = format (s, "%U%U",
1074 format_white_space, indent,
1075 format_ip4_header, t->packet_data);
1079 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args)
1081 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1082 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1083 ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1084 vnet_main_t * vnm = vnet_get_main();
1085 uword indent = format_get_indent (s);
1087 s = format (s, "fib %d adj-idx %d : %U flow hash: 0x%08x",
1088 t->fib_index, t->adj_index, format_ip_adjacency,
1089 vnm, t->adj_index, FORMAT_IP_ADJACENCY_NONE,
1091 s = format (s, "\n%U%U",
1092 format_white_space, indent,
1093 format_ip4_header, t->packet_data);
1097 static u8 * format_ip4_rewrite_trace (u8 * s, va_list * args)
1099 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1100 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1101 ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1102 vnet_main_t * vnm = vnet_get_main();
1103 uword indent = format_get_indent (s);
1105 s = format (s, "tx_sw_if_index %d adj-idx %d : %U flow hash: 0x%08x",
1106 t->fib_index, t->adj_index, format_ip_adjacency,
1107 vnm, t->adj_index, FORMAT_IP_ADJACENCY_NONE,
1109 s = format (s, "\n%U%U",
1110 format_white_space, indent,
1111 format_ip_adjacency_packet_data,
1113 t->packet_data, sizeof (t->packet_data));
1117 /* Common trace function for all ip4-forward next nodes. */
1119 ip4_forward_next_trace (vlib_main_t * vm,
1120 vlib_node_runtime_t * node,
1121 vlib_frame_t * frame,
1122 vlib_rx_or_tx_t which_adj_index)
1125 ip4_main_t * im = &ip4_main;
1127 n_left = frame->n_vectors;
1128 from = vlib_frame_vector_args (frame);
1133 vlib_buffer_t * b0, * b1;
1134 ip4_forward_next_trace_t * t0, * t1;
1136 /* Prefetch next iteration. */
1137 vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1138 vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1143 b0 = vlib_get_buffer (vm, bi0);
1144 b1 = vlib_get_buffer (vm, bi1);
1146 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1148 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1149 t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1150 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1151 t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1152 vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1153 vec_elt (im->fib_index_by_sw_if_index,
1154 vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1156 clib_memcpy (t0->packet_data,
1157 vlib_buffer_get_current (b0),
1158 sizeof (t0->packet_data));
1160 if (b1->flags & VLIB_BUFFER_IS_TRACED)
1162 t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1163 t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1164 t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1165 t1->fib_index = (vnet_buffer(b1)->sw_if_index[VLIB_TX] != (u32)~0) ?
1166 vnet_buffer(b1)->sw_if_index[VLIB_TX] :
1167 vec_elt (im->fib_index_by_sw_if_index,
1168 vnet_buffer(b1)->sw_if_index[VLIB_RX]);
1169 clib_memcpy (t1->packet_data,
1170 vlib_buffer_get_current (b1),
1171 sizeof (t1->packet_data));
1181 ip4_forward_next_trace_t * t0;
1185 b0 = vlib_get_buffer (vm, bi0);
1187 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1189 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1190 t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1191 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1192 t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1193 vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1194 vec_elt (im->fib_index_by_sw_if_index,
1195 vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1196 clib_memcpy (t0->packet_data,
1197 vlib_buffer_get_current (b0),
1198 sizeof (t0->packet_data));
1206 ip4_drop_or_punt (vlib_main_t * vm,
1207 vlib_node_runtime_t * node,
1208 vlib_frame_t * frame,
1209 ip4_error_t error_code)
1211 u32 * buffers = vlib_frame_vector_args (frame);
1212 uword n_packets = frame->n_vectors;
1214 vlib_error_drop_buffers (vm, node,
1219 ip4_input_node.index,
1222 if (node->flags & VLIB_NODE_FLAG_TRACE)
1223 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1229 ip4_drop (vlib_main_t * vm,
1230 vlib_node_runtime_t * node,
1231 vlib_frame_t * frame)
1232 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP); }
1235 ip4_punt (vlib_main_t * vm,
1236 vlib_node_runtime_t * node,
1237 vlib_frame_t * frame)
1238 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); }
1240 VLIB_REGISTER_NODE (ip4_drop_node,static) = {
1241 .function = ip4_drop,
1243 .vector_size = sizeof (u32),
1245 .format_trace = format_ip4_forward_next_trace,
1253 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop)
1255 VLIB_REGISTER_NODE (ip4_punt_node,static) = {
1256 .function = ip4_punt,
1258 .vector_size = sizeof (u32),
1260 .format_trace = format_ip4_forward_next_trace,
1268 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt)
1270 /* Compute TCP/UDP/ICMP4 checksum in software. */
1272 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1276 u32 ip_header_length, payload_length_host_byte_order;
1277 u32 n_this_buffer, n_bytes_left;
1279 void * data_this_buffer;
1281 /* Initialize checksum with ip header. */
1282 ip_header_length = ip4_header_bytes (ip0);
1283 payload_length_host_byte_order = clib_net_to_host_u16 (ip0->length) - ip_header_length;
1284 sum0 = clib_host_to_net_u32 (payload_length_host_byte_order + (ip0->protocol << 16));
1286 if (BITS (uword) == 32)
1288 sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u32));
1289 sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32));
1292 sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1294 n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1295 data_this_buffer = (void *) ip0 + ip_header_length;
1296 if (n_this_buffer + ip_header_length > p0->current_length)
1297 n_this_buffer = p0->current_length > ip_header_length ? p0->current_length - ip_header_length : 0;
1300 sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1301 n_bytes_left -= n_this_buffer;
1302 if (n_bytes_left == 0)
1305 ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1306 p0 = vlib_get_buffer (vm, p0->next_buffer);
1307 data_this_buffer = vlib_buffer_get_current (p0);
1308 n_this_buffer = p0->current_length;
1311 sum16 = ~ ip_csum_fold (sum0);
1317 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1319 ip4_header_t * ip0 = vlib_buffer_get_current (p0);
1320 udp_header_t * udp0;
1323 ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1324 || ip0->protocol == IP_PROTOCOL_UDP);
1326 udp0 = (void *) (ip0 + 1);
1327 if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1329 p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1330 | IP_BUFFER_L4_CHECKSUM_CORRECT);
1334 sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1336 p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1337 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1343 ip4_local (vlib_main_t * vm,
1344 vlib_node_runtime_t * node,
1345 vlib_frame_t * frame)
1347 ip4_main_t * im = &ip4_main;
1348 ip_lookup_main_t * lm = &im->lookup_main;
1349 ip_local_next_t next_index;
1350 u32 * from, * to_next, n_left_from, n_left_to_next;
1351 vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
1353 from = vlib_frame_vector_args (frame);
1354 n_left_from = frame->n_vectors;
1355 next_index = node->cached_next_index;
1357 if (node->flags & VLIB_NODE_FLAG_TRACE)
1358 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1360 while (n_left_from > 0)
1362 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1364 while (n_left_from >= 4 && n_left_to_next >= 2)
1366 vlib_buffer_t * p0, * p1;
1367 ip4_header_t * ip0, * ip1;
1368 udp_header_t * udp0, * udp1;
1369 ip4_fib_mtrie_t * mtrie0, * mtrie1;
1370 ip4_fib_mtrie_leaf_t leaf0, leaf1;
1371 const dpo_id_t *dpo0, *dpo1;
1372 const load_balance_t *lb0, *lb1;
1373 u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, lbi0;
1374 u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, lbi1;
1375 i32 len_diff0, len_diff1;
1376 u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1377 u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1380 pi0 = to_next[0] = from[0];
1381 pi1 = to_next[1] = from[1];
1385 n_left_to_next -= 2;
1387 p0 = vlib_get_buffer (vm, pi0);
1388 p1 = vlib_get_buffer (vm, pi1);
1390 ip0 = vlib_buffer_get_current (p0);
1391 ip1 = vlib_buffer_get_current (p1);
1393 fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
1394 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1395 fib_index1 = vec_elt (im->fib_index_by_sw_if_index,
1396 vnet_buffer(p1)->sw_if_index[VLIB_RX]);
1398 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1399 mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
1401 leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
1403 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1404 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
1406 /* Treat IP frag packets as "experimental" protocol for now
1407 until support of IP frag reassembly is implemented */
1408 proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
1409 proto1 = ip4_is_fragment(ip1) ? 0xfe : ip1->protocol;
1410 is_udp0 = proto0 == IP_PROTOCOL_UDP;
1411 is_udp1 = proto1 == IP_PROTOCOL_UDP;
1412 is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1413 is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1418 good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1419 good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1421 udp0 = ip4_next_header (ip0);
1422 udp1 = ip4_next_header (ip1);
1424 /* Don't verify UDP checksum for packets with explicit zero checksum. */
1425 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1426 good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1428 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1429 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
1431 /* Verify UDP length. */
1432 ip_len0 = clib_net_to_host_u16 (ip0->length);
1433 ip_len1 = clib_net_to_host_u16 (ip1->length);
1434 udp_len0 = clib_net_to_host_u16 (udp0->length);
1435 udp_len1 = clib_net_to_host_u16 (udp1->length);
1437 len_diff0 = ip_len0 - udp_len0;
1438 len_diff1 = ip_len1 - udp_len1;
1440 len_diff0 = is_udp0 ? len_diff0 : 0;
1441 len_diff1 = is_udp1 ? len_diff1 : 0;
1443 if (PREDICT_FALSE (! (is_tcp_udp0 & is_tcp_udp1
1444 & good_tcp_udp0 & good_tcp_udp1)))
1449 && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1450 flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1452 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1453 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1458 && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1459 flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
1461 (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1462 good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1466 good_tcp_udp0 &= len_diff0 >= 0;
1467 good_tcp_udp1 &= len_diff1 >= 0;
1469 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1470 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
1472 error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1474 error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1475 error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
1477 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1478 error0 = (is_tcp_udp0 && ! good_tcp_udp0
1479 ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1481 error1 = (is_tcp_udp1 && ! good_tcp_udp1
1482 ? IP4_ERROR_TCP_CHECKSUM + is_udp1
1485 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1486 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
1487 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1488 leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
1490 vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1491 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1493 vnet_buffer (p1)->ip.adj_index[VLIB_RX] = lbi1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1494 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = lbi1;
1496 lb0 = load_balance_get(lbi0);
1497 lb1 = load_balance_get(lbi1);
1498 dpo0 = load_balance_get_bucket_i(lb0, 0);
1499 dpo1 = load_balance_get_bucket_i(lb1, 0);
1502 * Must have a route to source otherwise we drop the packet.
1503 * ip4 broadcasts are accepted, e.g. to make dhcp client work
1505 error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1506 && dpo0->dpoi_type != DPO_ADJACENCY
1507 && dpo0->dpoi_type != DPO_ADJACENCY_INCOMPLETE
1508 && ip0->dst_address.as_u32 != 0xFFFFFFFF
1509 ? IP4_ERROR_SRC_LOOKUP_MISS
1511 error0 = (dpo0->dpoi_type == DPO_RECEIVE ?
1512 IP4_ERROR_SPOOFED_LOCAL_PACKETS :
1514 error1 = (error1 == IP4_ERROR_UNKNOWN_PROTOCOL
1515 && dpo1->dpoi_type != DPO_ADJACENCY
1516 && dpo1->dpoi_type != DPO_ADJACENCY_INCOMPLETE
1517 && ip1->dst_address.as_u32 != 0xFFFFFFFF
1518 ? IP4_ERROR_SRC_LOOKUP_MISS
1520 error1 = (dpo0->dpoi_type == DPO_RECEIVE ?
1521 IP4_ERROR_SPOOFED_LOCAL_PACKETS :
1524 next0 = lm->local_next_by_ip_protocol[proto0];
1525 next1 = lm->local_next_by_ip_protocol[proto1];
1527 next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1528 next1 = error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1530 p0->error = error0 ? error_node->errors[error0] : 0;
1531 p1->error = error1 ? error_node->errors[error1] : 0;
1533 enqueue_code = (next0 != next_index) + 2*(next1 != next_index);
1535 if (PREDICT_FALSE (enqueue_code != 0))
1537 switch (enqueue_code)
1543 n_left_to_next += 1;
1544 vlib_set_next_frame_buffer (vm, node, next0, pi0);
1550 n_left_to_next += 1;
1551 vlib_set_next_frame_buffer (vm, node, next1, pi1);
1555 /* A B B or A B C */
1557 n_left_to_next += 2;
1558 vlib_set_next_frame_buffer (vm, node, next0, pi0);
1559 vlib_set_next_frame_buffer (vm, node, next1, pi1);
1562 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1564 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1571 while (n_left_from > 0 && n_left_to_next > 0)
1575 udp_header_t * udp0;
1576 ip4_fib_mtrie_t * mtrie0;
1577 ip4_fib_mtrie_leaf_t leaf0;
1578 u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, lbi0;
1580 u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1581 load_balance_t *lb0;
1582 const dpo_id_t *dpo0;
1584 pi0 = to_next[0] = from[0];
1588 n_left_to_next -= 1;
1590 p0 = vlib_get_buffer (vm, pi0);
1592 ip0 = vlib_buffer_get_current (p0);
1594 fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
1595 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1597 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1599 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
1601 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1603 /* Treat IP frag packets as "experimental" protocol for now
1604 until support of IP frag reassembly is implemented */
1605 proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
1606 is_udp0 = proto0 == IP_PROTOCOL_UDP;
1607 is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1611 good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1613 udp0 = ip4_next_header (ip0);
1615 /* Don't verify UDP checksum for packets with explicit zero checksum. */
1616 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1618 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1620 /* Verify UDP length. */
1621 ip_len0 = clib_net_to_host_u16 (ip0->length);
1622 udp_len0 = clib_net_to_host_u16 (udp0->length);
1624 len_diff0 = ip_len0 - udp_len0;
1626 len_diff0 = is_udp0 ? len_diff0 : 0;
1628 if (PREDICT_FALSE (! (is_tcp_udp0 & good_tcp_udp0)))
1633 && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1634 flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1636 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1637 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1641 good_tcp_udp0 &= len_diff0 >= 0;
1643 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1645 error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1647 error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1649 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1650 error0 = (is_tcp_udp0 && ! good_tcp_udp0
1651 ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1654 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1655 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1657 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1658 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1660 lb0 = load_balance_get(lbi0);
1661 dpo0 = load_balance_get_bucket_i(lb0, 0);
1663 vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
1664 vnet_buffer (p0)->ip.adj_index[VLIB_RX] =
1667 /* Must have a route to source otherwise we drop the packet. */
1668 error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1669 && dpo0->dpoi_type != DPO_ADJACENCY
1670 && dpo0->dpoi_type != DPO_ADJACENCY_INCOMPLETE
1671 && dpo0->dpoi_type != DPO_RECEIVE
1672 && ip0->dst_address.as_u32 != 0xFFFFFFFF
1673 ? IP4_ERROR_SRC_LOOKUP_MISS
1675 /* Packet originated from a local address => spoofing */
1676 error0 = (dpo0->dpoi_type == DPO_RECEIVE ?
1677 IP4_ERROR_SPOOFED_LOCAL_PACKETS :
1680 next0 = lm->local_next_by_ip_protocol[proto0];
1682 next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1684 p0->error = error0? error_node->errors[error0] : 0;
1686 if (PREDICT_FALSE (next0 != next_index))
1688 n_left_to_next += 1;
1689 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1692 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1695 n_left_to_next -= 1;
1699 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1702 return frame->n_vectors;
1705 VLIB_REGISTER_NODE (ip4_local_node,static) = {
1706 .function = ip4_local,
1707 .name = "ip4-local",
1708 .vector_size = sizeof (u32),
1710 .format_trace = format_ip4_forward_next_trace,
1712 .n_next_nodes = IP_LOCAL_N_NEXT,
1714 [IP_LOCAL_NEXT_DROP] = "error-drop",
1715 [IP_LOCAL_NEXT_PUNT] = "error-punt",
1716 [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1717 [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1721 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local)
1723 void ip4_register_protocol (u32 protocol, u32 node_index)
1725 vlib_main_t * vm = vlib_get_main();
1726 ip4_main_t * im = &ip4_main;
1727 ip_lookup_main_t * lm = &im->lookup_main;
1729 ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1730 lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip4_local_node.index, node_index);
1733 static clib_error_t *
1734 show_ip_local_command_fn (vlib_main_t * vm,
1735 unformat_input_t * input,
1736 vlib_cli_command_t * cmd)
1738 ip4_main_t * im = &ip4_main;
1739 ip_lookup_main_t * lm = &im->lookup_main;
1742 vlib_cli_output (vm, "Protocols handled by ip4_local");
1743 for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
1745 if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1746 vlib_cli_output (vm, "%d", i);
1753 VLIB_CLI_COMMAND (show_ip_local, static) = {
1754 .path = "show ip local",
1755 .function = show_ip_local_command_fn,
1756 .short_help = "Show ip local protocol table",
1760 ip4_arp_inline (vlib_main_t * vm,
1761 vlib_node_runtime_t * node,
1762 vlib_frame_t * frame,
1765 vnet_main_t * vnm = vnet_get_main();
1766 ip4_main_t * im = &ip4_main;
1767 ip_lookup_main_t * lm = &im->lookup_main;
1768 u32 * from, * to_next_drop;
1769 uword n_left_from, n_left_to_next_drop, next_index;
1770 static f64 time_last_seed_change = -1e100;
1771 static u32 hash_seeds[3];
1772 static uword hash_bitmap[256 / BITS (uword)];
1775 if (node->flags & VLIB_NODE_FLAG_TRACE)
1776 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1778 time_now = vlib_time_now (vm);
1779 if (time_now - time_last_seed_change > 1e-3)
1782 u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
1783 sizeof (hash_seeds));
1784 for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
1785 hash_seeds[i] = r[i];
1787 /* Mark all hash keys as been no-seen before. */
1788 for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
1791 time_last_seed_change = time_now;
1794 from = vlib_frame_vector_args (frame);
1795 n_left_from = frame->n_vectors;
1796 next_index = node->cached_next_index;
1797 if (next_index == IP4_ARP_NEXT_DROP)
1798 next_index = IP4_ARP_N_NEXT; /* point to first interface */
1800 while (n_left_from > 0)
1802 vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1803 to_next_drop, n_left_to_next_drop);
1805 while (n_left_from > 0 && n_left_to_next_drop > 0)
1807 u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
1808 ip_adjacency_t * adj0;
1815 p0 = vlib_get_buffer (vm, pi0);
1817 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1818 adj0 = ip_get_adjacency (lm, adj_index0);
1819 ip0 = vlib_buffer_get_current (p0);
1822 * this is the Glean case, so we are ARPing for the
1823 * packet's destination
1829 sw_if_index0 = adj0->rewrite_header.sw_if_index;
1830 vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1834 a0 ^= ip0->dst_address.data_u32;
1838 a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32;
1842 hash_v3_finalize32 (a0, b0, c0);
1844 c0 &= BITS (hash_bitmap) - 1;
1845 c0 = c0 / BITS (uword);
1846 m0 = (uword) 1 << (c0 % BITS (uword));
1848 bm0 = hash_bitmap[c0];
1849 drop0 = (bm0 & m0) != 0;
1851 /* Mark it as seen. */
1852 hash_bitmap[c0] = bm0 | m0;
1856 to_next_drop[0] = pi0;
1858 n_left_to_next_drop -= 1;
1860 p0->error = node->errors[drop0 ? IP4_ARP_ERROR_DROP : IP4_ARP_ERROR_REQUEST_SENT];
1866 * Can happen if the control-plane is programming tables
1867 * with traffic flowing; at least that's today's lame excuse.
1869 if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN) ||
1870 (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
1872 p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
1875 /* Send ARP request. */
1879 ethernet_arp_header_t * h0;
1880 vnet_hw_interface_t * hw_if0;
1882 h0 = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi0);
1884 /* Add rewrite/encap string for ARP packet. */
1885 vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
1887 hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1889 /* Src ethernet address in ARP header. */
1890 clib_memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address,
1891 sizeof (h0->ip4_over_ethernet[0].ethernet));
1895 /* The interface's source address is stashed in the Glean Adj */
1896 h0->ip4_over_ethernet[0].ip4 = adj0->sub_type.glean.receive_addr.ip4;
1898 /* Copy in destination address we are requesting. This is the
1899 * glean case, so it's the packet's destination.*/
1900 h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32;
1904 /* Src IP address in ARP header. */
1905 if (ip4_src_address_for_packet(lm, sw_if_index0,
1906 &h0->ip4_over_ethernet[0].ip4))
1908 /* No source address available */
1909 p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
1910 vlib_buffer_free(vm, &bi0, 1);
1914 /* Copy in destination address we are requesting from the
1916 h0->ip4_over_ethernet[1].ip4.data_u32 =
1917 adj0->sub_type.nbr.next_hop.ip4.as_u32;
1920 vlib_buffer_copy_trace_flag (vm, p0, bi0);
1921 b0 = vlib_get_buffer (vm, bi0);
1922 vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
1924 vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
1926 vlib_set_next_frame_buffer (vm, node, adj0->rewrite_header.next_index, bi0);
1930 vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
1933 return frame->n_vectors;
1937 ip4_arp (vlib_main_t * vm,
1938 vlib_node_runtime_t * node,
1939 vlib_frame_t * frame)
1941 return (ip4_arp_inline(vm, node, frame, 0));
1945 ip4_glean (vlib_main_t * vm,
1946 vlib_node_runtime_t * node,
1947 vlib_frame_t * frame)
1949 return (ip4_arp_inline(vm, node, frame, 1));
1952 static char * ip4_arp_error_strings[] = {
1953 [IP4_ARP_ERROR_DROP] = "address overflow drops",
1954 [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
1955 [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
1956 [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
1957 [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
1958 [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
1961 VLIB_REGISTER_NODE (ip4_arp_node) = {
1962 .function = ip4_arp,
1964 .vector_size = sizeof (u32),
1966 .format_trace = format_ip4_forward_next_trace,
1968 .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1969 .error_strings = ip4_arp_error_strings,
1971 .n_next_nodes = IP4_ARP_N_NEXT,
1973 [IP4_ARP_NEXT_DROP] = "error-drop",
1977 VLIB_REGISTER_NODE (ip4_glean_node) = {
1978 .function = ip4_glean,
1979 .name = "ip4-glean",
1980 .vector_size = sizeof (u32),
1982 .format_trace = format_ip4_forward_next_trace,
1984 .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1985 .error_strings = ip4_arp_error_strings,
1987 .n_next_nodes = IP4_ARP_N_NEXT,
1989 [IP4_ARP_NEXT_DROP] = "error-drop",
1993 #define foreach_notrace_ip4_arp_error \
1999 clib_error_t * arp_notrace_init (vlib_main_t * vm)
2001 vlib_node_runtime_t *rt =
2002 vlib_node_get_runtime (vm, ip4_arp_node.index);
2004 /* don't trace ARP request packets */
2006 vnet_pcap_drop_trace_filter_add_del \
2007 (rt->errors[IP4_ARP_ERROR_##a], \
2009 foreach_notrace_ip4_arp_error;
2014 VLIB_INIT_FUNCTION(arp_notrace_init);
2017 /* Send an ARP request to see if given destination is reachable on given interface. */
2019 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2021 vnet_main_t * vnm = vnet_get_main();
2022 ip4_main_t * im = &ip4_main;
2023 ethernet_arp_header_t * h;
2024 ip4_address_t * src;
2025 ip_interface_address_t * ia;
2026 ip_adjacency_t * adj;
2027 vnet_hw_interface_t * hi;
2028 vnet_sw_interface_t * si;
2032 si = vnet_get_sw_interface (vnm, sw_if_index);
2034 if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2036 return clib_error_return (0, "%U: interface %U down",
2037 format_ip4_address, dst,
2038 format_vnet_sw_if_index_name, vnm,
2042 src = ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2045 vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2046 return clib_error_return
2047 (0, "no matching interface address for destination %U (interface %U)",
2048 format_ip4_address, dst,
2049 format_vnet_sw_if_index_name, vnm, sw_if_index);
2052 adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2054 h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi);
2056 hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2058 clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
2060 h->ip4_over_ethernet[0].ip4 = src[0];
2061 h->ip4_over_ethernet[1].ip4 = dst[0];
2063 b = vlib_get_buffer (vm, bi);
2064 vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2066 /* Add encapsulation string for software interface (e.g. ethernet header). */
2067 vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2068 vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2071 vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
2072 u32 * to_next = vlib_frame_vector_args (f);
2075 vlib_put_frame_to_node (vm, hi->output_node_index, f);
2078 return /* no error */ 0;
2082 IP4_REWRITE_NEXT_DROP,
2083 IP4_REWRITE_NEXT_ARP,
2084 IP4_REWRITE_NEXT_ICMP_ERROR,
2085 } ip4_rewrite_next_t;
2088 ip4_rewrite_inline (vlib_main_t * vm,
2089 vlib_node_runtime_t * node,
2090 vlib_frame_t * frame,
2091 int rewrite_for_locally_received_packets)
2093 ip_lookup_main_t * lm = &ip4_main.lookup_main;
2094 u32 * from = vlib_frame_vector_args (frame);
2095 u32 n_left_from, n_left_to_next, * to_next, next_index;
2096 vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
2097 vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
2098 ip_config_main_t * cm = &lm->feature_config_mains[VNET_IP_TX_FEAT];
2100 n_left_from = frame->n_vectors;
2101 next_index = node->cached_next_index;
2102 u32 cpu_index = os_get_cpu_number();
2104 while (n_left_from > 0)
2106 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2108 while (n_left_from >= 4 && n_left_to_next >= 2)
2110 ip_adjacency_t * adj0, * adj1;
2111 vlib_buffer_t * p0, * p1;
2112 ip4_header_t * ip0, * ip1;
2113 u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2114 u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2115 u32 next0_override, next1_override;
2116 u32 tx_sw_if_index0, tx_sw_if_index1;
2118 if (rewrite_for_locally_received_packets)
2119 next0_override = next1_override = 0;
2121 /* Prefetch next iteration. */
2123 vlib_buffer_t * p2, * p3;
2125 p2 = vlib_get_buffer (vm, from[2]);
2126 p3 = vlib_get_buffer (vm, from[3]);
2128 vlib_prefetch_buffer_header (p2, STORE);
2129 vlib_prefetch_buffer_header (p3, STORE);
2131 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2132 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2135 pi0 = to_next[0] = from[0];
2136 pi1 = to_next[1] = from[1];
2141 n_left_to_next -= 2;
2143 p0 = vlib_get_buffer (vm, pi0);
2144 p1 = vlib_get_buffer (vm, pi1);
2146 adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2147 adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
2149 /* We should never rewrite a pkt using the MISS adjacency */
2150 ASSERT(adj_index0 && adj_index1);
2152 ip0 = vlib_buffer_get_current (p0);
2153 ip1 = vlib_buffer_get_current (p1);
2155 error0 = error1 = IP4_ERROR_NONE;
2156 next0 = next1 = IP4_REWRITE_NEXT_DROP;
2158 /* Decrement TTL & update checksum.
2159 Works either endian, so no need for byte swap. */
2160 if (! rewrite_for_locally_received_packets)
2162 i32 ttl0 = ip0->ttl, ttl1 = ip1->ttl;
2164 /* Input node should have reject packets with ttl 0. */
2165 ASSERT (ip0->ttl > 0);
2166 ASSERT (ip1->ttl > 0);
2168 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2169 checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2171 checksum0 += checksum0 >= 0xffff;
2172 checksum1 += checksum1 >= 0xffff;
2174 ip0->checksum = checksum0;
2175 ip1->checksum = checksum1;
2184 * If the ttl drops below 1 when forwarding, generate
2187 if (PREDICT_FALSE(ttl0 <= 0))
2189 error0 = IP4_ERROR_TIME_EXPIRED;
2190 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2191 icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2192 ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2193 next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2195 if (PREDICT_FALSE(ttl1 <= 0))
2197 error1 = IP4_ERROR_TIME_EXPIRED;
2198 vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32)~0;
2199 icmp4_error_set_vnet_buffer(p1, ICMP4_time_exceeded,
2200 ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2201 next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2204 /* Verify checksum. */
2205 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2206 ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2209 /* Rewrite packet header and updates lengths. */
2210 adj0 = ip_get_adjacency (lm, adj_index0);
2211 adj1 = ip_get_adjacency (lm, adj_index1);
2213 if (rewrite_for_locally_received_packets)
2215 if (PREDICT_FALSE(adj0->lookup_next_index
2216 == IP_LOOKUP_NEXT_ARP))
2217 next0_override = IP4_REWRITE_NEXT_ARP;
2218 if (PREDICT_FALSE(adj1->lookup_next_index
2219 == IP_LOOKUP_NEXT_ARP))
2220 next1_override = IP4_REWRITE_NEXT_ARP;
2223 /* Worth pipelining. No guarantee that adj0,1 are hot... */
2224 rw_len0 = adj0[0].rewrite_header.data_bytes;
2225 rw_len1 = adj1[0].rewrite_header.data_bytes;
2226 vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
2227 vnet_buffer(p1)->ip.save_rewrite_length = rw_len1;
2229 /* Check MTU of outgoing interface. */
2230 error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2231 ? IP4_ERROR_MTU_EXCEEDED
2233 error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
2234 ? IP4_ERROR_MTU_EXCEEDED
2237 next0 = (error0 == IP4_ERROR_NONE)
2238 ? adj0[0].rewrite_header.next_index : next0;
2240 if (rewrite_for_locally_received_packets)
2241 next0 = next0 && next0_override ? next0_override : next0;
2243 next1 = (error1 == IP4_ERROR_NONE)
2244 ? adj1[0].rewrite_header.next_index : next1;
2246 if (rewrite_for_locally_received_packets)
2247 next1 = next1 && next1_override ? next1_override : next1;
2250 * We've already accounted for an ethernet_header_t elsewhere
2252 if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2253 vlib_increment_combined_counter
2254 (&adjacency_counters,
2255 cpu_index, adj_index0,
2256 /* packet increment */ 0,
2257 /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2259 if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t)))
2260 vlib_increment_combined_counter
2261 (&adjacency_counters,
2262 cpu_index, adj_index1,
2263 /* packet increment */ 0,
2264 /* byte increment */ rw_len1-sizeof(ethernet_header_t));
2266 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2267 * to see the IP headerr */
2268 if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2270 p0->current_data -= rw_len0;
2271 p0->current_length += rw_len0;
2272 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2273 vnet_buffer (p0)->sw_if_index[VLIB_TX] =
2277 (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features,
2280 p0->current_config_index =
2281 vec_elt (cm->config_index_by_sw_if_index,
2283 vnet_get_config_data (&cm->config_main,
2284 &p0->current_config_index,
2286 /* # bytes of config data */ 0);
2289 if (PREDICT_TRUE(error1 == IP4_ERROR_NONE))
2291 p1->current_data -= rw_len1;
2292 p1->current_length += rw_len1;
2294 tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2295 vnet_buffer (p1)->sw_if_index[VLIB_TX] =
2299 (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features,
2302 p1->current_config_index =
2303 vec_elt (cm->config_index_by_sw_if_index,
2305 vnet_get_config_data (&cm->config_main,
2306 &p1->current_config_index,
2308 /* # bytes of config data */ 0);
2312 /* Guess we are only writing on simple Ethernet header. */
2313 vnet_rewrite_two_headers (adj0[0], adj1[0],
2315 sizeof (ethernet_header_t));
2317 vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2318 to_next, n_left_to_next,
2319 pi0, pi1, next0, next1);
2322 while (n_left_from > 0 && n_left_to_next > 0)
2324 ip_adjacency_t * adj0;
2327 u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2329 u32 tx_sw_if_index0;
2331 if (rewrite_for_locally_received_packets)
2334 pi0 = to_next[0] = from[0];
2336 p0 = vlib_get_buffer (vm, pi0);
2338 adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2340 /* We should never rewrite a pkt using the MISS adjacency */
2343 adj0 = ip_get_adjacency (lm, adj_index0);
2345 ip0 = vlib_buffer_get_current (p0);
2347 error0 = IP4_ERROR_NONE;
2348 next0 = IP4_REWRITE_NEXT_DROP; /* drop on error */
2350 /* Decrement TTL & update checksum. */
2351 if (! rewrite_for_locally_received_packets)
2353 i32 ttl0 = ip0->ttl;
2355 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2357 checksum0 += checksum0 >= 0xffff;
2359 ip0->checksum = checksum0;
2361 ASSERT (ip0->ttl > 0);
2367 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2369 if (PREDICT_FALSE(ttl0 <= 0))
2372 * If the ttl drops below 1 when forwarding, generate
2375 error0 = IP4_ERROR_TIME_EXPIRED;
2376 next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2377 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2378 icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2379 ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2383 if (rewrite_for_locally_received_packets)
2386 * We have to override the next_index in ARP adjacencies,
2387 * because they're set up for ip4-arp, not this node...
2389 if (PREDICT_FALSE(adj0->lookup_next_index
2390 == IP_LOOKUP_NEXT_ARP))
2391 next0_override = IP4_REWRITE_NEXT_ARP;
2394 /* Guess we are only writing on simple Ethernet header. */
2395 vnet_rewrite_one_header (adj0[0], ip0,
2396 sizeof (ethernet_header_t));
2398 /* Update packet buffer attributes/set output interface. */
2399 rw_len0 = adj0[0].rewrite_header.data_bytes;
2400 vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
2402 if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2403 vlib_increment_combined_counter
2404 (&adjacency_counters,
2405 cpu_index, adj_index0,
2406 /* packet increment */ 0,
2407 /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2409 /* Check MTU of outgoing interface. */
2410 error0 = (vlib_buffer_length_in_chain (vm, p0)
2411 > adj0[0].rewrite_header.max_l3_packet_bytes
2412 ? IP4_ERROR_MTU_EXCEEDED
2415 p0->error = error_node->errors[error0];
2417 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2418 * to see the IP headerr */
2419 if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2421 p0->current_data -= rw_len0;
2422 p0->current_length += rw_len0;
2423 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2425 vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2426 next0 = adj0[0].rewrite_header.next_index;
2429 (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features,
2432 p0->current_config_index =
2433 vec_elt (cm->config_index_by_sw_if_index,
2435 vnet_get_config_data (&cm->config_main,
2436 &p0->current_config_index,
2438 /* # bytes of config data */ 0);
2442 if (rewrite_for_locally_received_packets)
2443 next0 = next0 && next0_override ? next0_override : next0;
2448 n_left_to_next -= 1;
2450 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2451 to_next, n_left_to_next,
2455 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2458 /* Need to do trace after rewrites to pick up new packet data. */
2459 if (node->flags & VLIB_NODE_FLAG_TRACE)
2460 ip4_forward_next_trace (vm, node, frame, adj_rx_tx);
2462 return frame->n_vectors;
2466 /** @brief IPv4 transit rewrite node.
2467 @node ip4-rewrite-transit
2469 This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2470 header checksum, fetch the ip adjacency, check the outbound mtu,
2471 apply the adjacency rewrite, and send pkts to the adjacency
2472 rewrite header's rewrite_next_index.
2474 @param vm vlib_main_t corresponding to the current thread
2475 @param node vlib_node_runtime_t
2476 @param frame vlib_frame_t whose contents should be dispatched
2478 @par Graph mechanics: buffer metadata, next index usage
2481 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2482 - the rewrite adjacency index
2483 - <code>adj->lookup_next_index</code>
2484 - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2485 the packet will be dropped.
2486 - <code>adj->rewrite_header</code>
2487 - Rewrite string length, rewrite string, next_index
2490 - <code>b->current_data, b->current_length</code>
2491 - Updated net of applying the rewrite string
2493 <em>Next Indices:</em>
2494 - <code> adj->rewrite_header.next_index </code>
2498 ip4_rewrite_transit (vlib_main_t * vm,
2499 vlib_node_runtime_t * node,
2500 vlib_frame_t * frame)
2502 return ip4_rewrite_inline (vm, node, frame,
2503 /* rewrite_for_locally_received_packets */ 0);
2506 /** @brief IPv4 local rewrite node.
2507 @node ip4-rewrite-local
2509 This is the IPv4 local rewrite node. Fetch the ip adjacency, check
2510 the outbound interface mtu, apply the adjacency rewrite, and send
2511 pkts to the adjacency rewrite header's rewrite_next_index. Deal
2512 with hemorrhoids of the form "some clown sends an icmp4 w/ src =
2513 dst = interface addr."
2515 @param vm vlib_main_t corresponding to the current thread
2516 @param node vlib_node_runtime_t
2517 @param frame vlib_frame_t whose contents should be dispatched
2519 @par Graph mechanics: buffer metadata, next index usage
2522 - <code>vnet_buffer(b)->ip.adj_index[VLIB_RX]</code>
2523 - the rewrite adjacency index
2524 - <code>adj->lookup_next_index</code>
2525 - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2526 the packet will be dropped.
2527 - <code>adj->rewrite_header</code>
2528 - Rewrite string length, rewrite string, next_index
2531 - <code>b->current_data, b->current_length</code>
2532 - Updated net of applying the rewrite string
2534 <em>Next Indices:</em>
2535 - <code> adj->rewrite_header.next_index </code>
2540 ip4_rewrite_local (vlib_main_t * vm,
2541 vlib_node_runtime_t * node,
2542 vlib_frame_t * frame)
2544 return ip4_rewrite_inline (vm, node, frame,
2545 /* rewrite_for_locally_received_packets */ 1);
2549 ip4_midchain (vlib_main_t * vm,
2550 vlib_node_runtime_t * node,
2551 vlib_frame_t * frame)
2553 return ip4_rewrite_inline (vm, node, frame,
2554 /* rewrite_for_locally_received_packets */ 0);
2557 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2558 .function = ip4_rewrite_transit,
2559 .name = "ip4-rewrite-transit",
2560 .vector_size = sizeof (u32),
2562 .format_trace = format_ip4_rewrite_trace,
2566 [IP4_REWRITE_NEXT_DROP] = "error-drop",
2567 [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
2568 [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2572 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite_transit)
2574 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2575 .function = ip4_midchain,
2576 .name = "ip4-midchain",
2577 .vector_size = sizeof (u32),
2579 .format_trace = format_ip4_forward_next_trace,
2583 [IP4_REWRITE_NEXT_DROP] = "error-drop",
2584 [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
2588 VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain)
2590 VLIB_REGISTER_NODE (ip4_rewrite_local_node) = {
2591 .function = ip4_rewrite_local,
2592 .name = "ip4-rewrite-local",
2593 .vector_size = sizeof (u32),
2595 .sibling_of = "ip4-rewrite-transit",
2597 .format_trace = format_ip4_rewrite_trace,
2602 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_local_node, ip4_rewrite_local)
2604 static clib_error_t *
2605 add_del_interface_table (vlib_main_t * vm,
2606 unformat_input_t * input,
2607 vlib_cli_command_t * cmd)
2609 vnet_main_t * vnm = vnet_get_main();
2610 clib_error_t * error = 0;
2611 u32 sw_if_index, table_id;
2615 if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2617 error = clib_error_return (0, "unknown interface `%U'",
2618 format_unformat_error, input);
2622 if (unformat (input, "%d", &table_id))
2626 error = clib_error_return (0, "expected table id `%U'",
2627 format_unformat_error, input);
2632 ip4_main_t * im = &ip4_main;
2635 fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
2640 // changing an interface's table has consequences for any connecteds
2641 // and adj-fibs already installed.
2643 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
2644 im->fib_index_by_sw_if_index[sw_if_index] = fib_index;
2652 * Place the indicated interface into the supplied VRF
2655 * @cliexstart{set interface ip table}
2657 * vpp# set interface ip table GigabitEthernet2/0/0 2
2659 * Interface addresses added after setting the interface IP table end up in the indicated VRF table.
2660 * Predictable but potentially counter-intuitive results occur if you provision interface addresses in multiple FIBs.
2661 * Upon RX, packets will be processed in the last IP table ID provisioned.
2662 * It might be marginally useful to evade source RPF drops to put an interface address into multiple FIBs.
2665 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
2666 .path = "set interface ip table",
2667 .function = add_del_interface_table,
2668 .short_help = "Add/delete FIB table id for interface",
2673 ip4_lookup_multicast (vlib_main_t * vm,
2674 vlib_node_runtime_t * node,
2675 vlib_frame_t * frame)
2677 ip4_main_t * im = &ip4_main;
2678 vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
2679 u32 n_left_from, n_left_to_next, * from, * to_next;
2680 ip_lookup_next_t next;
2681 u32 cpu_index = os_get_cpu_number();
2683 from = vlib_frame_vector_args (frame);
2684 n_left_from = frame->n_vectors;
2685 next = node->cached_next_index;
2687 while (n_left_from > 0)
2689 vlib_get_next_frame (vm, node, next,
2690 to_next, n_left_to_next);
2692 while (n_left_from >= 4 && n_left_to_next >= 2)
2694 vlib_buffer_t * p0, * p1;
2695 u32 pi0, pi1, lb_index0, lb_index1, wrong_next;
2696 ip_lookup_next_t next0, next1;
2697 ip4_header_t * ip0, * ip1;
2698 u32 fib_index0, fib_index1;
2699 const dpo_id_t *dpo0, *dpo1;
2700 const load_balance_t * lb0, * lb1;
2702 /* Prefetch next iteration. */
2704 vlib_buffer_t * p2, * p3;
2706 p2 = vlib_get_buffer (vm, from[2]);
2707 p3 = vlib_get_buffer (vm, from[3]);
2709 vlib_prefetch_buffer_header (p2, LOAD);
2710 vlib_prefetch_buffer_header (p3, LOAD);
2712 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
2713 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
2716 pi0 = to_next[0] = from[0];
2717 pi1 = to_next[1] = from[1];
2719 p0 = vlib_get_buffer (vm, pi0);
2720 p1 = vlib_get_buffer (vm, pi1);
2722 ip0 = vlib_buffer_get_current (p0);
2723 ip1 = vlib_buffer_get_current (p1);
2725 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2726 fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
2727 fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2728 fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2729 fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
2730 fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
2732 lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0),
2734 lb_index1 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index1),
2737 lb0 = load_balance_get (lb_index0);
2738 lb1 = load_balance_get (lb_index1);
2740 ASSERT (lb0->lb_n_buckets > 0);
2741 ASSERT (is_pow2 (lb0->lb_n_buckets));
2742 ASSERT (lb1->lb_n_buckets > 0);
2743 ASSERT (is_pow2 (lb1->lb_n_buckets));
2745 vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash
2746 (ip0, lb0->lb_hash_config);
2748 vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash
2749 (ip1, lb1->lb_hash_config);
2751 dpo0 = load_balance_get_bucket_i(lb0,
2752 (vnet_buffer (p0)->ip.flow_hash &
2753 (lb0->lb_n_buckets_minus_1)));
2754 dpo1 = load_balance_get_bucket_i(lb1,
2755 (vnet_buffer (p1)->ip.flow_hash &
2756 (lb0->lb_n_buckets_minus_1)));
2758 next0 = dpo0->dpoi_next_node;
2759 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
2760 next1 = dpo1->dpoi_next_node;
2761 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
2763 if (1) /* $$$$$$ HACK FIXME */
2764 vlib_increment_combined_counter
2765 (cm, cpu_index, lb_index0, 1,
2766 vlib_buffer_length_in_chain (vm, p0));
2767 if (1) /* $$$$$$ HACK FIXME */
2768 vlib_increment_combined_counter
2769 (cm, cpu_index, lb_index1, 1,
2770 vlib_buffer_length_in_chain (vm, p1));
2774 n_left_to_next -= 2;
2777 wrong_next = (next0 != next) + 2*(next1 != next);
2778 if (PREDICT_FALSE (wrong_next != 0))
2786 n_left_to_next += 1;
2787 vlib_set_next_frame_buffer (vm, node, next0, pi0);
2793 n_left_to_next += 1;
2794 vlib_set_next_frame_buffer (vm, node, next1, pi1);
2800 n_left_to_next += 2;
2801 vlib_set_next_frame_buffer (vm, node, next0, pi0);
2802 vlib_set_next_frame_buffer (vm, node, next1, pi1);
2806 vlib_put_next_frame (vm, node, next, n_left_to_next);
2808 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
2814 while (n_left_from > 0 && n_left_to_next > 0)
2819 ip_lookup_next_t next0;
2821 const dpo_id_t *dpo0;
2822 const load_balance_t * lb0;
2827 p0 = vlib_get_buffer (vm, pi0);
2829 ip0 = vlib_buffer_get_current (p0);
2831 fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
2832 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2833 fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2834 fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2836 lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0),
2839 lb0 = load_balance_get (lb_index0);
2841 ASSERT (lb0->lb_n_buckets > 0);
2842 ASSERT (is_pow2 (lb0->lb_n_buckets));
2844 vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash
2845 (ip0, lb0->lb_hash_config);
2847 dpo0 = load_balance_get_bucket_i(lb0,
2848 (vnet_buffer (p0)->ip.flow_hash &
2849 (lb0->lb_n_buckets_minus_1)));
2851 next0 = dpo0->dpoi_next_node;
2852 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
2854 if (1) /* $$$$$$ HACK FIXME */
2855 vlib_increment_combined_counter
2856 (cm, cpu_index, lb_index0, 1,
2857 vlib_buffer_length_in_chain (vm, p0));
2861 n_left_to_next -= 1;
2864 if (PREDICT_FALSE (next0 != next))
2866 n_left_to_next += 1;
2867 vlib_put_next_frame (vm, node, next, n_left_to_next);
2869 vlib_get_next_frame (vm, node, next,
2870 to_next, n_left_to_next);
2873 n_left_to_next -= 1;
2877 vlib_put_next_frame (vm, node, next, n_left_to_next);
2880 if (node->flags & VLIB_NODE_FLAG_TRACE)
2881 ip4_forward_next_trace(vm, node, frame, VLIB_TX);
2883 return frame->n_vectors;
2886 VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = {
2887 .function = ip4_lookup_multicast,
2888 .name = "ip4-lookup-multicast",
2889 .vector_size = sizeof (u32),
2890 .sibling_of = "ip4-lookup",
2891 .format_trace = format_ip4_lookup_trace,
2896 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_multicast_node, ip4_lookup_multicast)
2898 VLIB_REGISTER_NODE (ip4_multicast_node,static) = {
2899 .function = ip4_drop,
2900 .name = "ip4-multicast",
2901 .vector_size = sizeof (u32),
2903 .format_trace = format_ip4_forward_next_trace,
2911 int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0)
2913 ip4_fib_mtrie_t * mtrie0;
2914 ip4_fib_mtrie_leaf_t leaf0;
2917 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2919 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
2920 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
2921 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
2922 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2923 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2925 /* Handle default route. */
2926 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
2928 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2930 return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0), a);
2933 static clib_error_t *
2934 test_lookup_command_fn (vlib_main_t * vm,
2935 unformat_input_t * input,
2936 vlib_cli_command_t * cmd)
2942 ip4_address_t ip4_base_address;
2945 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
2946 if (unformat (input, "table %d", &table_id))
2948 else if (unformat (input, "count %f", &count))
2951 else if (unformat (input, "%U",
2952 unformat_ip4_address, &ip4_base_address))
2955 return clib_error_return (0, "unknown input `%U'",
2956 format_unformat_error, input);
2961 for (i = 0; i < n; i++)
2963 if (!ip4_lookup_validate (&ip4_base_address, table_id))
2966 ip4_base_address.as_u32 =
2967 clib_host_to_net_u32 (1 +
2968 clib_net_to_host_u32 (ip4_base_address.as_u32));
2972 vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2974 vlib_cli_output (vm, "No errors in %d lookups\n", n);
2979 VLIB_CLI_COMMAND (lookup_test_command, static) = {
2980 .path = "test lookup",
2981 .short_help = "test lookup",
2982 .function = test_lookup_command_fn,
2985 int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2987 ip4_main_t * im4 = &ip4_main;
2989 uword * p = hash_get (im4->fib_index_by_table_id, table_id);
2992 return VNET_API_ERROR_NO_SUCH_FIB;
2994 fib = ip4_fib_get (p[0]);
2996 fib->flow_hash_config = flow_hash_config;
3000 static clib_error_t *
3001 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3002 unformat_input_t * input,
3003 vlib_cli_command_t * cmd)
3007 u32 flow_hash_config = 0;
3010 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3011 if (unformat (input, "table %d", &table_id))
3014 else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3015 foreach_flow_hash_bit
3021 return clib_error_return (0, "unknown input `%U'",
3022 format_unformat_error, input);
3024 rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3030 case VNET_API_ERROR_NO_SUCH_FIB:
3031 return clib_error_return (0, "no such FIB table %d", table_id);
3034 clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3041 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
3042 .path = "set ip flow-hash",
3044 "set ip table flow-hash table <fib-id> src dst sport dport proto reverse",
3045 .function = set_ip_flow_hash_command_fn,
3048 int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3051 vnet_main_t * vnm = vnet_get_main();
3052 vnet_interface_main_t * im = &vnm->interface_main;
3053 ip4_main_t * ipm = &ip4_main;
3054 ip_lookup_main_t * lm = &ipm->lookup_main;
3055 vnet_classify_main_t * cm = &vnet_classify_main;
3057 if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3058 return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3060 if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3061 return VNET_API_ERROR_NO_SUCH_ENTRY;
3063 vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3064 lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
3069 static clib_error_t *
3070 set_ip_classify_command_fn (vlib_main_t * vm,
3071 unformat_input_t * input,
3072 vlib_cli_command_t * cmd)
3074 u32 table_index = ~0;
3075 int table_index_set = 0;
3076 u32 sw_if_index = ~0;
3079 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3080 if (unformat (input, "table-index %d", &table_index))
3081 table_index_set = 1;
3082 else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3083 vnet_get_main(), &sw_if_index))
3089 if (table_index_set == 0)
3090 return clib_error_return (0, "classify table-index must be specified");
3092 if (sw_if_index == ~0)
3093 return clib_error_return (0, "interface / subif must be specified");
3095 rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3102 case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3103 return clib_error_return (0, "No such interface");
3105 case VNET_API_ERROR_NO_SUCH_ENTRY:
3106 return clib_error_return (0, "No such classifier table");
3111 VLIB_CLI_COMMAND (set_ip_classify_command, static) = {
3112 .path = "set ip classify",
3114 "set ip classify intfc <int> table-index <index>",
3115 .function = set_ip_classify_command_fn,