2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * ip/ip4_forward.c: IP v4 forwarding
18 * Copyright (c) 2008 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h> /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h> /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h> /* for API error numbers */
47 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
48 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
49 #include <vnet/fib/ip4_fib.h>
50 #include <vnet/dpo/load_balance.h>
51 #include <vnet/dpo/classify_dpo.h>
54 ip4_forward_next_trace (vlib_main_t * vm,
55 vlib_node_runtime_t * node,
57 vlib_rx_or_tx_t which_adj_index);
60 ip4_lookup_inline (vlib_main_t * vm,
61 vlib_node_runtime_t * node,
63 int lookup_for_responses_to_locally_received_packets)
65 ip4_main_t * im = &ip4_main;
66 vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
67 u32 n_left_from, n_left_to_next, * from, * to_next;
68 ip_lookup_next_t next;
69 u32 cpu_index = os_get_cpu_number();
71 from = vlib_frame_vector_args (frame);
72 n_left_from = frame->n_vectors;
73 next = node->cached_next_index;
75 while (n_left_from > 0)
77 vlib_get_next_frame (vm, node, next,
78 to_next, n_left_to_next);
80 while (n_left_from >= 4 && n_left_to_next >= 2)
82 vlib_buffer_t * p0, * p1;
83 ip4_header_t * ip0, * ip1;
84 __attribute__((unused)) tcp_header_t * tcp0, * tcp1;
85 ip_lookup_next_t next0, next1;
86 const load_balance_t * lb0, * lb1;
87 ip4_fib_mtrie_t * mtrie0, * mtrie1;
88 ip4_fib_mtrie_leaf_t leaf0, leaf1;
89 ip4_address_t * dst_addr0, *dst_addr1;
90 __attribute__((unused)) u32 pi0, fib_index0, lb_index0, is_tcp_udp0;
91 __attribute__((unused)) u32 pi1, fib_index1, lb_index1, is_tcp_udp1;
92 flow_hash_config_t flow_hash_config0, flow_hash_config1;
95 const dpo_id_t *dpo0, *dpo1;
97 /* Prefetch next iteration. */
99 vlib_buffer_t * p2, * p3;
101 p2 = vlib_get_buffer (vm, from[2]);
102 p3 = vlib_get_buffer (vm, from[3]);
104 vlib_prefetch_buffer_header (p2, LOAD);
105 vlib_prefetch_buffer_header (p3, LOAD);
107 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
108 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
111 pi0 = to_next[0] = from[0];
112 pi1 = to_next[1] = from[1];
114 p0 = vlib_get_buffer (vm, pi0);
115 p1 = vlib_get_buffer (vm, pi1);
117 ip0 = vlib_buffer_get_current (p0);
118 ip1 = vlib_buffer_get_current (p1);
120 dst_addr0 = &ip0->dst_address;
121 dst_addr1 = &ip1->dst_address;
123 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
124 fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
125 fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
126 fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
127 fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
128 fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
131 if (! lookup_for_responses_to_locally_received_packets)
133 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
134 mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
136 leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
138 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
139 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
142 tcp0 = (void *) (ip0 + 1);
143 tcp1 = (void *) (ip1 + 1);
145 is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
146 || ip0->protocol == IP_PROTOCOL_UDP);
147 is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
148 || ip1->protocol == IP_PROTOCOL_UDP);
150 if (! lookup_for_responses_to_locally_received_packets)
152 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
153 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
156 if (! lookup_for_responses_to_locally_received_packets)
158 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
159 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
162 if (! lookup_for_responses_to_locally_received_packets)
164 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
165 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
168 if (lookup_for_responses_to_locally_received_packets)
170 lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
171 lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
175 /* Handle default route. */
176 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
177 leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
179 lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
180 lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
183 lb0 = load_balance_get (lb_index0);
184 lb1 = load_balance_get (lb_index1);
186 /* Use flow hash to compute multipath adjacency. */
187 hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
188 hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
189 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
191 flow_hash_config0 = lb0->lb_hash_config;
192 hash_c0 = vnet_buffer (p0)->ip.flow_hash =
193 ip4_compute_flow_hash (ip0, flow_hash_config0);
195 if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
197 flow_hash_config1 = lb1->lb_hash_config;
198 hash_c1 = vnet_buffer (p1)->ip.flow_hash =
199 ip4_compute_flow_hash (ip1, flow_hash_config1);
202 ASSERT (lb0->lb_n_buckets > 0);
203 ASSERT (is_pow2 (lb0->lb_n_buckets));
204 ASSERT (lb1->lb_n_buckets > 0);
205 ASSERT (is_pow2 (lb1->lb_n_buckets));
207 dpo0 = load_balance_get_bucket_i(lb0,
209 (lb0->lb_n_buckets_minus_1)));
210 dpo1 = load_balance_get_bucket_i(lb1,
212 (lb0->lb_n_buckets_minus_1)));
214 next0 = dpo0->dpoi_next_node;
215 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
216 next1 = dpo1->dpoi_next_node;
217 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
219 vlib_increment_combined_counter
220 (cm, cpu_index, lb_index0, 1,
221 vlib_buffer_length_in_chain (vm, p0)
222 + sizeof(ethernet_header_t));
223 vlib_increment_combined_counter
224 (cm, cpu_index, lb_index1, 1,
225 vlib_buffer_length_in_chain (vm, p1)
226 + sizeof(ethernet_header_t));
233 wrong_next = (next0 != next) + 2*(next1 != next);
234 if (PREDICT_FALSE (wrong_next != 0))
243 vlib_set_next_frame_buffer (vm, node, next0, pi0);
250 vlib_set_next_frame_buffer (vm, node, next1, pi1);
257 vlib_set_next_frame_buffer (vm, node, next0, pi0);
258 vlib_set_next_frame_buffer (vm, node, next1, pi1);
262 vlib_put_next_frame (vm, node, next, n_left_to_next);
264 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
270 while (n_left_from > 0 && n_left_to_next > 0)
274 __attribute__((unused)) tcp_header_t * tcp0;
275 ip_lookup_next_t next0;
276 const load_balance_t *lb0;
277 ip4_fib_mtrie_t * mtrie0;
278 ip4_fib_mtrie_leaf_t leaf0;
279 ip4_address_t * dst_addr0;
280 __attribute__((unused)) u32 pi0, fib_index0, is_tcp_udp0, lbi0;
281 flow_hash_config_t flow_hash_config0;
282 const dpo_id_t *dpo0;
288 p0 = vlib_get_buffer (vm, pi0);
290 ip0 = vlib_buffer_get_current (p0);
292 dst_addr0 = &ip0->dst_address;
294 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
295 fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
296 fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
298 if (! lookup_for_responses_to_locally_received_packets)
300 mtrie0 = &ip4_fib_get( fib_index0)->mtrie;
302 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
304 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
307 tcp0 = (void *) (ip0 + 1);
309 is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
310 || ip0->protocol == IP_PROTOCOL_UDP);
312 if (! lookup_for_responses_to_locally_received_packets)
313 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
315 if (! lookup_for_responses_to_locally_received_packets)
316 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
318 if (! lookup_for_responses_to_locally_received_packets)
319 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
321 if (lookup_for_responses_to_locally_received_packets)
322 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
325 /* Handle default route. */
326 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
327 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
330 lb0 = load_balance_get (lbi0);
332 /* Use flow hash to compute multipath adjacency. */
333 hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
334 if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
336 flow_hash_config0 = lb0->lb_hash_config;
338 hash_c0 = vnet_buffer (p0)->ip.flow_hash =
339 ip4_compute_flow_hash (ip0, flow_hash_config0);
342 ASSERT (lb0->lb_n_buckets > 0);
343 ASSERT (is_pow2 (lb0->lb_n_buckets));
345 dpo0 = load_balance_get_bucket_i(lb0,
347 (lb0->lb_n_buckets_minus_1)));
349 next0 = dpo0->dpoi_next_node;
350 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
352 vlib_increment_combined_counter
353 (cm, cpu_index, lbi0, 1,
354 vlib_buffer_length_in_chain (vm, p0));
361 if (PREDICT_FALSE (next0 != next))
364 vlib_put_next_frame (vm, node, next, n_left_to_next);
366 vlib_get_next_frame (vm, node, next,
367 to_next, n_left_to_next);
374 vlib_put_next_frame (vm, node, next, n_left_to_next);
377 if (node->flags & VLIB_NODE_FLAG_TRACE)
378 ip4_forward_next_trace(vm, node, frame, VLIB_TX);
380 return frame->n_vectors;
383 /** @brief IPv4 lookup node.
386 This is the main IPv4 lookup dispatch node.
388 @param vm vlib_main_t corresponding to the current thread
389 @param node vlib_node_runtime_t
390 @param frame vlib_frame_t whose contents should be dispatched
392 @par Graph mechanics: buffer metadata, next index usage
395 - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
396 - Indicates the @c sw_if_index value of the interface that the
397 packet was received on.
398 - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
399 - When the value is @c ~0 then the node performs a longest prefix
400 match (LPM) for the packet destination address in the FIB attached
401 to the receive interface.
402 - Otherwise perform LPM for the packet destination address in the
403 indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
404 value (0, 1, ...) and not a VRF id.
407 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
408 - The lookup result adjacency index.
411 - Dispatches the packet to the node index found in
412 ip_adjacency_t @c adj->lookup_next_index
413 (where @c adj is the lookup result adjacency).
416 ip4_lookup (vlib_main_t * vm,
417 vlib_node_runtime_t * node,
418 vlib_frame_t * frame)
420 return ip4_lookup_inline (vm, node, frame,
421 /* lookup_for_responses_to_locally_received_packets */ 0);
425 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args);
427 VLIB_REGISTER_NODE (ip4_lookup_node) = {
428 .function = ip4_lookup,
429 .name = "ip4-lookup",
430 .vector_size = sizeof (u32),
432 .format_trace = format_ip4_lookup_trace,
433 .n_next_nodes = IP_LOOKUP_N_NEXT,
434 .next_nodes = IP4_LOOKUP_NEXT_NODES,
437 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup)
440 ip4_load_balance (vlib_main_t * vm,
441 vlib_node_runtime_t * node,
442 vlib_frame_t * frame)
444 vlib_combined_counter_main_t * cm = &load_balance_main.lbm_via_counters;
445 u32 n_left_from, n_left_to_next, * from, * to_next;
446 ip_lookup_next_t next;
447 u32 cpu_index = os_get_cpu_number();
449 from = vlib_frame_vector_args (frame);
450 n_left_from = frame->n_vectors;
451 next = node->cached_next_index;
453 if (node->flags & VLIB_NODE_FLAG_TRACE)
454 ip4_forward_next_trace(vm, node, frame, VLIB_TX);
456 while (n_left_from > 0)
458 vlib_get_next_frame (vm, node, next,
459 to_next, n_left_to_next);
462 while (n_left_from > 0 && n_left_to_next > 0)
464 ip_lookup_next_t next0;
465 const load_balance_t *lb0;
468 const ip4_header_t *ip0;
469 const dpo_id_t *dpo0;
474 p0 = vlib_get_buffer (vm, pi0);
476 ip0 = vlib_buffer_get_current (p0);
477 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
479 lb0 = load_balance_get(lbi0);
480 hc0 = lb0->lb_hash_config;
481 vnet_buffer(p0)->ip.flow_hash = ip4_compute_flow_hash(ip0, hc0);
483 dpo0 = load_balance_get_bucket_i(lb0,
484 vnet_buffer(p0)->ip.flow_hash &
485 (lb0->lb_n_buckets_minus_1));
487 next0 = dpo0->dpoi_next_node;
488 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
490 vlib_increment_combined_counter
491 (cm, cpu_index, lbi0, 1,
492 vlib_buffer_length_in_chain (vm, p0));
499 if (PREDICT_FALSE (next0 != next))
502 vlib_put_next_frame (vm, node, next, n_left_to_next);
504 vlib_get_next_frame (vm, node, next,
505 to_next, n_left_to_next);
512 vlib_put_next_frame (vm, node, next, n_left_to_next);
515 return frame->n_vectors;
518 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args);
520 VLIB_REGISTER_NODE (ip4_load_balance_node) = {
521 .function = ip4_load_balance,
522 .name = "ip4-load-balance",
523 .vector_size = sizeof (u32),
524 .sibling_of = "ip4-lookup",
526 .format_trace = format_ip4_forward_next_trace,
529 VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance)
531 /* get first interface address */
533 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
534 ip_interface_address_t ** result_ia)
536 ip_lookup_main_t * lm = &im->lookup_main;
537 ip_interface_address_t * ia = 0;
538 ip4_address_t * result = 0;
540 foreach_ip_interface_address (lm, ia, sw_if_index,
541 1 /* honor unnumbered */,
543 ip4_address_t * a = ip_interface_address_get_address (lm, ia);
548 *result_ia = result ? ia : 0;
553 ip4_add_interface_routes (u32 sw_if_index,
554 ip4_main_t * im, u32 fib_index,
555 ip_interface_address_t * a)
557 ip_lookup_main_t * lm = &im->lookup_main;
558 ip4_address_t * address = ip_interface_address_get_address (lm, a);
560 .fp_len = a->address_length,
561 .fp_proto = FIB_PROTOCOL_IP4,
562 .fp_addr.ip4 = *address,
565 a->neighbor_probe_adj_index = ~0;
569 fib_node_index_t fei;
571 fei = fib_table_entry_update_one_path(fib_index,
573 FIB_SOURCE_INTERFACE,
574 (FIB_ENTRY_FLAG_CONNECTED |
575 FIB_ENTRY_FLAG_ATTACHED),
577 NULL, /* No next-hop address */
579 ~0, // invalid FIB index
582 FIB_ROUTE_PATH_FLAG_NONE);
583 a->neighbor_probe_adj_index = fib_entry_get_adj(fei);
588 if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
590 u32 classify_table_index =
591 lm->classify_table_index_by_sw_if_index [sw_if_index];
592 if (classify_table_index != (u32) ~0)
594 dpo_id_t dpo = DPO_NULL;
599 classify_dpo_create(FIB_PROTOCOL_IP4,
600 classify_table_index));
602 fib_table_entry_special_dpo_add(fib_index,
611 fib_table_entry_update_one_path(fib_index,
613 FIB_SOURCE_INTERFACE,
614 (FIB_ENTRY_FLAG_CONNECTED |
615 FIB_ENTRY_FLAG_LOCAL),
619 ~0, // invalid FIB index
622 FIB_ROUTE_PATH_FLAG_NONE);
626 ip4_del_interface_routes (ip4_main_t * im,
628 ip4_address_t * address,
632 .fp_len = address_length,
633 .fp_proto = FIB_PROTOCOL_IP4,
634 .fp_addr.ip4 = *address,
639 fib_table_entry_delete(fib_index,
641 FIB_SOURCE_INTERFACE);
645 fib_table_entry_delete(fib_index,
647 FIB_SOURCE_INTERFACE);
651 ip4_sw_interface_enable_disable (u32 sw_if_index,
654 vlib_main_t * vm = vlib_get_main();
655 ip4_main_t * im = &ip4_main;
656 ip_lookup_main_t * lm = &im->lookup_main;
658 u32 lookup_feature_index;
660 vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
663 * enable/disable only on the 1<->0 transition
667 if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
672 ASSERT(im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
673 if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
677 for (cast = 0; cast <= VNET_IP_RX_MULTICAST_FEAT; cast++)
679 ip_config_main_t * cm = &lm->feature_config_mains[cast];
680 vnet_config_main_t * vcm = &cm->config_main;
682 vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
683 ci = cm->config_index_by_sw_if_index[sw_if_index];
685 if (cast == VNET_IP_RX_UNICAST_FEAT)
686 lookup_feature_index = im->ip4_unicast_rx_feature_lookup;
688 lookup_feature_index = im->ip4_multicast_rx_feature_lookup;
691 ci = vnet_config_add_feature (vm, vcm,
693 lookup_feature_index,
695 /* # bytes of config data */ 0);
697 ci = vnet_config_del_feature (vm, vcm,
699 lookup_feature_index,
701 /* # bytes of config data */ 0);
702 cm->config_index_by_sw_if_index[sw_if_index] = ci;
706 static clib_error_t *
707 ip4_add_del_interface_address_internal (vlib_main_t * vm,
709 ip4_address_t * address,
713 vnet_main_t * vnm = vnet_get_main();
714 ip4_main_t * im = &ip4_main;
715 ip_lookup_main_t * lm = &im->lookup_main;
716 clib_error_t * error = 0;
717 u32 if_address_index, elts_before;
718 ip4_address_fib_t ip4_af, * addr_fib = 0;
720 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
721 ip4_addr_fib_init (&ip4_af, address,
722 vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
723 vec_add1 (addr_fib, ip4_af);
726 * there is no support for adj-fib handling in the presence of overlapping
727 * subnets on interfaces. Easy fix - disallow overlapping subnets, like
732 /* When adding an address check that it does not conflict
733 with an existing address. */
734 ip_interface_address_t * ia;
735 foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
736 0 /* honor unnumbered */,
738 ip4_address_t * x = ip_interface_address_get_address (&im->lookup_main, ia);
740 if (ip4_destination_matches_route (im, address, x, ia->address_length)
741 || ip4_destination_matches_route (im, x, address, address_length))
742 return clib_error_create ("failed to add %U which conflicts with %U for interface %U",
743 format_ip4_address_and_length, address, address_length,
744 format_ip4_address_and_length, x, ia->address_length,
745 format_vnet_sw_if_index_name, vnm, sw_if_index);
749 elts_before = pool_elts (lm->if_address_pool);
751 error = ip_interface_address_add_del
761 ip4_sw_interface_enable_disable(sw_if_index, !is_del);
764 ip4_del_interface_routes (im, ip4_af.fib_index, address,
767 ip4_add_interface_routes (sw_if_index,
768 im, ip4_af.fib_index,
770 (lm->if_address_pool, if_address_index));
772 /* If pool did not grow/shrink: add duplicate address. */
773 if (elts_before != pool_elts (lm->if_address_pool))
775 ip4_add_del_interface_address_callback_t * cb;
776 vec_foreach (cb, im->add_del_interface_address_callbacks)
777 cb->function (im, cb->function_opaque, sw_if_index,
778 address, address_length,
789 ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
790 ip4_address_t * address, u32 address_length,
793 return ip4_add_del_interface_address_internal
794 (vm, sw_if_index, address, address_length,
798 /* Built-in ip4 unicast rx feature path definition */
799 VNET_IP4_UNICAST_FEATURE_INIT (ip4_inacl, static) = {
800 .node_name = "ip4-inacl",
801 .runs_before = ORDER_CONSTRAINTS {"ip4-source-check-via-rx", 0},
802 .feature_index = &ip4_main.ip4_unicast_rx_feature_check_access,
805 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_1, static) = {
806 .node_name = "ip4-source-check-via-rx",
807 .runs_before = ORDER_CONSTRAINTS {"ip4-source-check-via-any", 0},
809 &ip4_main.ip4_unicast_rx_feature_source_reachable_via_rx,
812 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_2, static) = {
813 .node_name = "ip4-source-check-via-any",
814 .runs_before = ORDER_CONSTRAINTS {"ip4-policer-classify", 0},
816 &ip4_main.ip4_unicast_rx_feature_source_reachable_via_any,
819 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) = {
820 .node_name = "ip4-source-and-port-range-check-rx",
821 .runs_before = ORDER_CONSTRAINTS {"ip4-policer-classify", 0},
823 &ip4_main.ip4_unicast_rx_feature_source_and_port_range_check,
826 VNET_IP4_UNICAST_FEATURE_INIT (ip4_policer_classify, static) = {
827 .node_name = "ip4-policer-classify",
828 .runs_before = ORDER_CONSTRAINTS {"ipsec-input-ip4", 0},
830 &ip4_main.ip4_unicast_rx_feature_policer_classify,
833 VNET_IP4_UNICAST_FEATURE_INIT (ip4_ipsec, static) = {
834 .node_name = "ipsec-input-ip4",
835 .runs_before = ORDER_CONSTRAINTS {"vpath-input-ip4", 0},
836 .feature_index = &ip4_main.ip4_unicast_rx_feature_ipsec,
839 VNET_IP4_UNICAST_FEATURE_INIT (ip4_vpath, static) = {
840 .node_name = "vpath-input-ip4",
841 .runs_before = ORDER_CONSTRAINTS {"ip4-lookup", 0},
842 .feature_index = &ip4_main.ip4_unicast_rx_feature_vpath,
845 VNET_IP4_UNICAST_FEATURE_INIT (ip4_lookup, static) = {
846 .node_name = "ip4-lookup",
847 .runs_before = ORDER_CONSTRAINTS {"ip4-drop", 0},
848 .feature_index = &ip4_main.ip4_unicast_rx_feature_lookup,
851 VNET_IP4_UNICAST_FEATURE_INIT (ip4_drop, static) = {
852 .node_name = "ip4-drop",
853 .runs_before = 0, /* not before any other features */
854 .feature_index = &ip4_main.ip4_unicast_rx_feature_drop,
858 /* Built-in ip4 multicast rx feature path definition */
859 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_vpath_mc, static) = {
860 .node_name = "vpath-input-ip4",
861 .runs_before = ORDER_CONSTRAINTS {"ip4-lookup-multicast", 0},
862 .feature_index = &ip4_main.ip4_multicast_rx_feature_vpath,
865 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_lookup_mc, static) = {
866 .node_name = "ip4-lookup-multicast",
867 .runs_before = ORDER_CONSTRAINTS {"ip4-drop", 0},
868 .feature_index = &ip4_main.ip4_multicast_rx_feature_lookup,
871 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_mc_drop, static) = {
872 .node_name = "ip4-drop",
873 .runs_before = 0, /* last feature */
874 .feature_index = &ip4_main.ip4_multicast_rx_feature_drop,
877 static char * rx_feature_start_nodes[] =
878 { "ip4-input", "ip4-input-no-checksum"};
880 static char * tx_feature_start_nodes[] =
881 { "ip4-rewrite-transit"};
883 /* Source and port-range check ip4 tx feature path definition */
884 VNET_IP4_TX_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) = {
885 .node_name = "ip4-source-and-port-range-check-tx",
886 .runs_before = ORDER_CONSTRAINTS {"interface-output", 0},
888 &ip4_main.ip4_unicast_tx_feature_source_and_port_range_check,
892 /* Built-in ip4 tx feature path definition */
893 VNET_IP4_TX_FEATURE_INIT (interface_output, static) = {
894 .node_name = "interface-output",
895 .runs_before = 0, /* not before any other features */
896 .feature_index = &ip4_main.ip4_tx_feature_interface_output,
899 static clib_error_t *
900 ip4_feature_init (vlib_main_t * vm, ip4_main_t * im)
902 ip_lookup_main_t * lm = &im->lookup_main;
903 clib_error_t * error;
905 ip_config_main_t * cm;
906 vnet_config_main_t * vcm;
907 char **feature_start_nodes;
908 int feature_start_len;
910 for (cast = 0; cast < VNET_N_IP_FEAT; cast++)
912 cm = &lm->feature_config_mains[cast];
913 vcm = &cm->config_main;
915 if (cast < VNET_IP_TX_FEAT)
917 feature_start_nodes = rx_feature_start_nodes;
918 feature_start_len = ARRAY_LEN(rx_feature_start_nodes);
922 feature_start_nodes = tx_feature_start_nodes;
923 feature_start_len = ARRAY_LEN(tx_feature_start_nodes);
926 if ((error = ip_feature_init_cast (vm, cm, vcm,
930 VNET_L3_PACKET_TYPE_IP4)))
937 static clib_error_t *
938 ip4_sw_interface_add_del (vnet_main_t * vnm,
942 vlib_main_t * vm = vnm->vlib_main;
943 ip4_main_t * im = &ip4_main;
944 ip_lookup_main_t * lm = &im->lookup_main;
948 /* Fill in lookup tables with default table (0). */
949 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
951 for (cast = 0; cast < VNET_N_IP_FEAT; cast++)
953 ip_config_main_t * cm = &lm->feature_config_mains[cast];
954 vnet_config_main_t * vcm = &cm->config_main;
956 vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
957 ci = cm->config_index_by_sw_if_index[sw_if_index];
959 if (cast == VNET_IP_RX_UNICAST_FEAT)
960 feature_index = im->ip4_unicast_rx_feature_drop;
961 else if (cast == VNET_IP_RX_MULTICAST_FEAT)
962 feature_index = im->ip4_multicast_rx_feature_drop;
964 feature_index = im->ip4_tx_feature_interface_output;
967 ci = vnet_config_add_feature (vm, vcm,
971 /* # bytes of config data */ 0);
974 ci = vnet_config_del_feature (vm, vcm, ci,
977 /* # bytes of config data */ 0);
978 if (vec_len(im->ip_enabled_by_sw_if_index) > sw_if_index)
979 im->ip_enabled_by_sw_if_index[sw_if_index] = 0;
981 cm->config_index_by_sw_if_index[sw_if_index] = ci;
983 * note: do not update the tx feature count here.
987 return /* no error */ 0;
990 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
992 /* Global IP4 main. */
996 ip4_lookup_init (vlib_main_t * vm)
998 ip4_main_t * im = &ip4_main;
999 clib_error_t * error;
1002 for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1007 m = pow2_mask (i) << (32 - i);
1010 im->fib_masks[i] = clib_host_to_net_u32 (m);
1013 ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1015 /* Create FIB with index 0 and table id of 0. */
1016 fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 0);
1020 pn = pg_get_node (ip4_lookup_node.index);
1021 pn->unformat_edit = unformat_pg_ip4_header;
1025 ethernet_arp_header_t h;
1027 memset (&h, 0, sizeof (h));
1029 /* Set target ethernet address to all zeros. */
1030 memset (h.ip4_over_ethernet[1].ethernet, 0, sizeof (h.ip4_over_ethernet[1].ethernet));
1032 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1033 #define _8(f,v) h.f = v;
1034 _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1035 _16 (l3_type, ETHERNET_TYPE_IP4);
1036 _8 (n_l2_address_bytes, 6);
1037 _8 (n_l3_address_bytes, 4);
1038 _16 (opcode, ETHERNET_ARP_OPCODE_request);
1042 vlib_packet_template_init (vm,
1043 &im->ip4_arp_request_packet_template,
1046 /* alloc chunk size */ 8,
1050 error = ip4_feature_init (vm, im);
1055 VLIB_INIT_FUNCTION (ip4_lookup_init);
1058 /* Adjacency taken. */
1063 /* Packet data, possibly *after* rewrite. */
1064 u8 packet_data[64 - 1*sizeof(u32)];
1065 } ip4_forward_next_trace_t;
1067 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args)
1069 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1070 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1071 ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1072 uword indent = format_get_indent (s);
1073 s = format (s, "%U%U",
1074 format_white_space, indent,
1075 format_ip4_header, t->packet_data);
1079 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args)
1081 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1082 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1083 ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1084 vnet_main_t * vnm = vnet_get_main();
1085 uword indent = format_get_indent (s);
1087 s = format (s, "fib %d adj-idx %d : %U flow hash: 0x%08x",
1088 t->fib_index, t->adj_index, format_ip_adjacency,
1089 vnm, t->adj_index, FORMAT_IP_ADJACENCY_NONE,
1091 s = format (s, "\n%U%U",
1092 format_white_space, indent,
1093 format_ip4_header, t->packet_data);
1097 static u8 * format_ip4_rewrite_trace (u8 * s, va_list * args)
1099 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1100 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1101 ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1102 vnet_main_t * vnm = vnet_get_main();
1103 uword indent = format_get_indent (s);
1105 s = format (s, "tx_sw_if_index %d adj-idx %d : %U flow hash: 0x%08x",
1106 t->fib_index, t->adj_index, format_ip_adjacency,
1107 vnm, t->adj_index, FORMAT_IP_ADJACENCY_NONE,
1109 s = format (s, "\n%U%U",
1110 format_white_space, indent,
1111 format_ip_adjacency_packet_data,
1113 t->packet_data, sizeof (t->packet_data));
1117 /* Common trace function for all ip4-forward next nodes. */
1119 ip4_forward_next_trace (vlib_main_t * vm,
1120 vlib_node_runtime_t * node,
1121 vlib_frame_t * frame,
1122 vlib_rx_or_tx_t which_adj_index)
1125 ip4_main_t * im = &ip4_main;
1127 n_left = frame->n_vectors;
1128 from = vlib_frame_vector_args (frame);
1133 vlib_buffer_t * b0, * b1;
1134 ip4_forward_next_trace_t * t0, * t1;
1136 /* Prefetch next iteration. */
1137 vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1138 vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1143 b0 = vlib_get_buffer (vm, bi0);
1144 b1 = vlib_get_buffer (vm, bi1);
1146 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1148 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1149 t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1150 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1151 t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1152 vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1153 vec_elt (im->fib_index_by_sw_if_index,
1154 vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1156 clib_memcpy (t0->packet_data,
1157 vlib_buffer_get_current (b0),
1158 sizeof (t0->packet_data));
1160 if (b1->flags & VLIB_BUFFER_IS_TRACED)
1162 t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1163 t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1164 t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1165 t1->fib_index = (vnet_buffer(b1)->sw_if_index[VLIB_TX] != (u32)~0) ?
1166 vnet_buffer(b1)->sw_if_index[VLIB_TX] :
1167 vec_elt (im->fib_index_by_sw_if_index,
1168 vnet_buffer(b1)->sw_if_index[VLIB_RX]);
1169 clib_memcpy (t1->packet_data,
1170 vlib_buffer_get_current (b1),
1171 sizeof (t1->packet_data));
1181 ip4_forward_next_trace_t * t0;
1185 b0 = vlib_get_buffer (vm, bi0);
1187 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1189 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1190 t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1191 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1192 t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1193 vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1194 vec_elt (im->fib_index_by_sw_if_index,
1195 vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1196 clib_memcpy (t0->packet_data,
1197 vlib_buffer_get_current (b0),
1198 sizeof (t0->packet_data));
1206 ip4_drop_or_punt (vlib_main_t * vm,
1207 vlib_node_runtime_t * node,
1208 vlib_frame_t * frame,
1209 ip4_error_t error_code)
1211 u32 * buffers = vlib_frame_vector_args (frame);
1212 uword n_packets = frame->n_vectors;
1214 vlib_error_drop_buffers (vm, node,
1219 ip4_input_node.index,
1222 if (node->flags & VLIB_NODE_FLAG_TRACE)
1223 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1229 ip4_drop (vlib_main_t * vm,
1230 vlib_node_runtime_t * node,
1231 vlib_frame_t * frame)
1232 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP); }
1235 ip4_punt (vlib_main_t * vm,
1236 vlib_node_runtime_t * node,
1237 vlib_frame_t * frame)
1238 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); }
1240 VLIB_REGISTER_NODE (ip4_drop_node,static) = {
1241 .function = ip4_drop,
1243 .vector_size = sizeof (u32),
1245 .format_trace = format_ip4_forward_next_trace,
1253 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop)
1255 VLIB_REGISTER_NODE (ip4_punt_node,static) = {
1256 .function = ip4_punt,
1258 .vector_size = sizeof (u32),
1260 .format_trace = format_ip4_forward_next_trace,
1268 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt)
1270 /* Compute TCP/UDP/ICMP4 checksum in software. */
1272 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1276 u32 ip_header_length, payload_length_host_byte_order;
1277 u32 n_this_buffer, n_bytes_left;
1279 void * data_this_buffer;
1281 /* Initialize checksum with ip header. */
1282 ip_header_length = ip4_header_bytes (ip0);
1283 payload_length_host_byte_order = clib_net_to_host_u16 (ip0->length) - ip_header_length;
1284 sum0 = clib_host_to_net_u32 (payload_length_host_byte_order + (ip0->protocol << 16));
1286 if (BITS (uword) == 32)
1288 sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u32));
1289 sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32));
1292 sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1294 n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1295 data_this_buffer = (void *) ip0 + ip_header_length;
1296 if (n_this_buffer + ip_header_length > p0->current_length)
1297 n_this_buffer = p0->current_length > ip_header_length ? p0->current_length - ip_header_length : 0;
1300 sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1301 n_bytes_left -= n_this_buffer;
1302 if (n_bytes_left == 0)
1305 ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1306 p0 = vlib_get_buffer (vm, p0->next_buffer);
1307 data_this_buffer = vlib_buffer_get_current (p0);
1308 n_this_buffer = p0->current_length;
1311 sum16 = ~ ip_csum_fold (sum0);
1317 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1319 ip4_header_t * ip0 = vlib_buffer_get_current (p0);
1320 udp_header_t * udp0;
1323 ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1324 || ip0->protocol == IP_PROTOCOL_UDP);
1326 udp0 = (void *) (ip0 + 1);
1327 if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1329 p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1330 | IP_BUFFER_L4_CHECKSUM_CORRECT);
1334 sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1336 p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1337 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1343 ip4_local (vlib_main_t * vm,
1344 vlib_node_runtime_t * node,
1345 vlib_frame_t * frame)
1347 ip4_main_t * im = &ip4_main;
1348 ip_lookup_main_t * lm = &im->lookup_main;
1349 ip_local_next_t next_index;
1350 u32 * from, * to_next, n_left_from, n_left_to_next;
1351 vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
1353 from = vlib_frame_vector_args (frame);
1354 n_left_from = frame->n_vectors;
1355 next_index = node->cached_next_index;
1357 if (node->flags & VLIB_NODE_FLAG_TRACE)
1358 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1360 while (n_left_from > 0)
1362 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1364 while (n_left_from >= 4 && n_left_to_next >= 2)
1366 vlib_buffer_t * p0, * p1;
1367 ip4_header_t * ip0, * ip1;
1368 udp_header_t * udp0, * udp1;
1369 ip4_fib_mtrie_t * mtrie0, * mtrie1;
1370 ip4_fib_mtrie_leaf_t leaf0, leaf1;
1371 const dpo_id_t *dpo0, *dpo1;
1372 const load_balance_t *lb0, *lb1;
1373 u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, lbi0;
1374 u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, lbi1;
1375 i32 len_diff0, len_diff1;
1376 u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1377 u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1380 pi0 = to_next[0] = from[0];
1381 pi1 = to_next[1] = from[1];
1385 n_left_to_next -= 2;
1387 p0 = vlib_get_buffer (vm, pi0);
1388 p1 = vlib_get_buffer (vm, pi1);
1390 ip0 = vlib_buffer_get_current (p0);
1391 ip1 = vlib_buffer_get_current (p1);
1393 fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
1394 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1395 fib_index1 = vec_elt (im->fib_index_by_sw_if_index,
1396 vnet_buffer(p1)->sw_if_index[VLIB_RX]);
1398 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1399 mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
1401 leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
1403 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1404 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
1406 /* Treat IP frag packets as "experimental" protocol for now
1407 until support of IP frag reassembly is implemented */
1408 proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
1409 proto1 = ip4_is_fragment(ip1) ? 0xfe : ip1->protocol;
1410 is_udp0 = proto0 == IP_PROTOCOL_UDP;
1411 is_udp1 = proto1 == IP_PROTOCOL_UDP;
1412 is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1413 is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1418 good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1419 good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1421 udp0 = ip4_next_header (ip0);
1422 udp1 = ip4_next_header (ip1);
1424 /* Don't verify UDP checksum for packets with explicit zero checksum. */
1425 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1426 good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1428 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1429 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
1431 /* Verify UDP length. */
1432 ip_len0 = clib_net_to_host_u16 (ip0->length);
1433 ip_len1 = clib_net_to_host_u16 (ip1->length);
1434 udp_len0 = clib_net_to_host_u16 (udp0->length);
1435 udp_len1 = clib_net_to_host_u16 (udp1->length);
1437 len_diff0 = ip_len0 - udp_len0;
1438 len_diff1 = ip_len1 - udp_len1;
1440 len_diff0 = is_udp0 ? len_diff0 : 0;
1441 len_diff1 = is_udp1 ? len_diff1 : 0;
1443 if (PREDICT_FALSE (! (is_tcp_udp0 & is_tcp_udp1
1444 & good_tcp_udp0 & good_tcp_udp1)))
1449 && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1450 flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1452 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1453 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1458 && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1459 flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
1461 (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1462 good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1466 good_tcp_udp0 &= len_diff0 >= 0;
1467 good_tcp_udp1 &= len_diff1 >= 0;
1469 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1470 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
1472 error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1474 error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1475 error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
1477 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1478 error0 = (is_tcp_udp0 && ! good_tcp_udp0
1479 ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1481 error1 = (is_tcp_udp1 && ! good_tcp_udp1
1482 ? IP4_ERROR_TCP_CHECKSUM + is_udp1
1485 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1486 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
1487 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1488 leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
1490 vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1491 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1493 vnet_buffer (p1)->ip.adj_index[VLIB_RX] = lbi1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1494 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = lbi1;
1496 lb0 = load_balance_get(lbi0);
1497 lb1 = load_balance_get(lbi1);
1498 dpo0 = load_balance_get_bucket_i(lb0, 0);
1499 dpo1 = load_balance_get_bucket_i(lb1, 0);
1502 * Must have a route to source otherwise we drop the packet.
1503 * ip4 broadcasts are accepted, e.g. to make dhcp client work
1505 error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1506 && dpo0->dpoi_type != DPO_ADJACENCY
1507 && dpo0->dpoi_type != DPO_ADJACENCY_INCOMPLETE
1508 && dpo0->dpoi_type != DPO_RECEIVE
1509 && dpo0->dpoi_type != DPO_DROP
1510 && dpo0->dpoi_type != DPO_ADJACENCY_GLEAN
1511 && ip0->dst_address.as_u32 != 0xFFFFFFFF
1512 ? IP4_ERROR_SRC_LOOKUP_MISS
1514 error0 = (dpo0->dpoi_type == DPO_RECEIVE ?
1515 IP4_ERROR_SPOOFED_LOCAL_PACKETS :
1517 error1 = (error1 == IP4_ERROR_UNKNOWN_PROTOCOL
1518 && dpo1->dpoi_type != DPO_ADJACENCY
1519 && dpo1->dpoi_type != DPO_ADJACENCY_INCOMPLETE
1520 && dpo1->dpoi_type != DPO_RECEIVE
1521 && dpo1->dpoi_type != DPO_DROP
1522 && dpo1->dpoi_type != DPO_ADJACENCY_GLEAN
1523 && ip1->dst_address.as_u32 != 0xFFFFFFFF
1524 ? IP4_ERROR_SRC_LOOKUP_MISS
1526 error1 = (dpo0->dpoi_type == DPO_RECEIVE ?
1527 IP4_ERROR_SPOOFED_LOCAL_PACKETS :
1530 next0 = lm->local_next_by_ip_protocol[proto0];
1531 next1 = lm->local_next_by_ip_protocol[proto1];
1533 next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1534 next1 = error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1536 p0->error = error0 ? error_node->errors[error0] : 0;
1537 p1->error = error1 ? error_node->errors[error1] : 0;
1539 enqueue_code = (next0 != next_index) + 2*(next1 != next_index);
1541 if (PREDICT_FALSE (enqueue_code != 0))
1543 switch (enqueue_code)
1549 n_left_to_next += 1;
1550 vlib_set_next_frame_buffer (vm, node, next0, pi0);
1556 n_left_to_next += 1;
1557 vlib_set_next_frame_buffer (vm, node, next1, pi1);
1561 /* A B B or A B C */
1563 n_left_to_next += 2;
1564 vlib_set_next_frame_buffer (vm, node, next0, pi0);
1565 vlib_set_next_frame_buffer (vm, node, next1, pi1);
1568 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1570 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1577 while (n_left_from > 0 && n_left_to_next > 0)
1581 udp_header_t * udp0;
1582 ip4_fib_mtrie_t * mtrie0;
1583 ip4_fib_mtrie_leaf_t leaf0;
1584 u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, lbi0;
1586 u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1587 load_balance_t *lb0;
1588 const dpo_id_t *dpo0;
1590 pi0 = to_next[0] = from[0];
1594 n_left_to_next -= 1;
1596 p0 = vlib_get_buffer (vm, pi0);
1598 ip0 = vlib_buffer_get_current (p0);
1600 fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
1601 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1603 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1605 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
1607 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1609 /* Treat IP frag packets as "experimental" protocol for now
1610 until support of IP frag reassembly is implemented */
1611 proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
1612 is_udp0 = proto0 == IP_PROTOCOL_UDP;
1613 is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1617 good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1619 udp0 = ip4_next_header (ip0);
1621 /* Don't verify UDP checksum for packets with explicit zero checksum. */
1622 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1624 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1626 /* Verify UDP length. */
1627 ip_len0 = clib_net_to_host_u16 (ip0->length);
1628 udp_len0 = clib_net_to_host_u16 (udp0->length);
1630 len_diff0 = ip_len0 - udp_len0;
1632 len_diff0 = is_udp0 ? len_diff0 : 0;
1634 if (PREDICT_FALSE (! (is_tcp_udp0 & good_tcp_udp0)))
1639 && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1640 flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1642 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1643 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1647 good_tcp_udp0 &= len_diff0 >= 0;
1649 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1651 error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1653 error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1655 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1656 error0 = (is_tcp_udp0 && ! good_tcp_udp0
1657 ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1660 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1661 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1663 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1664 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1666 lb0 = load_balance_get(lbi0);
1667 dpo0 = load_balance_get_bucket_i(lb0, 0);
1669 vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
1670 vnet_buffer (p0)->ip.adj_index[VLIB_RX] =
1673 /* Must have a route to source otherwise we drop the packet. */
1674 error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1675 && dpo0->dpoi_type != DPO_ADJACENCY
1676 && dpo0->dpoi_type != DPO_ADJACENCY_INCOMPLETE
1677 && dpo0->dpoi_type != DPO_RECEIVE
1678 && dpo0->dpoi_type != DPO_DROP
1679 && dpo0->dpoi_type != DPO_ADJACENCY_GLEAN
1680 && ip0->dst_address.as_u32 != 0xFFFFFFFF
1681 ? IP4_ERROR_SRC_LOOKUP_MISS
1683 /* Packet originated from a local address => spoofing */
1684 error0 = (dpo0->dpoi_type == DPO_RECEIVE ?
1685 IP4_ERROR_SPOOFED_LOCAL_PACKETS :
1688 next0 = lm->local_next_by_ip_protocol[proto0];
1690 next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1692 p0->error = error0? error_node->errors[error0] : 0;
1694 if (PREDICT_FALSE (next0 != next_index))
1696 n_left_to_next += 1;
1697 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1700 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1703 n_left_to_next -= 1;
1707 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1710 return frame->n_vectors;
1713 VLIB_REGISTER_NODE (ip4_local_node,static) = {
1714 .function = ip4_local,
1715 .name = "ip4-local",
1716 .vector_size = sizeof (u32),
1718 .format_trace = format_ip4_forward_next_trace,
1720 .n_next_nodes = IP_LOCAL_N_NEXT,
1722 [IP_LOCAL_NEXT_DROP] = "error-drop",
1723 [IP_LOCAL_NEXT_PUNT] = "error-punt",
1724 [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1725 [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1729 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local)
1731 void ip4_register_protocol (u32 protocol, u32 node_index)
1733 vlib_main_t * vm = vlib_get_main();
1734 ip4_main_t * im = &ip4_main;
1735 ip_lookup_main_t * lm = &im->lookup_main;
1737 ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1738 lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip4_local_node.index, node_index);
1741 static clib_error_t *
1742 show_ip_local_command_fn (vlib_main_t * vm,
1743 unformat_input_t * input,
1744 vlib_cli_command_t * cmd)
1746 ip4_main_t * im = &ip4_main;
1747 ip_lookup_main_t * lm = &im->lookup_main;
1750 vlib_cli_output (vm, "Protocols handled by ip4_local");
1751 for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
1753 if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1754 vlib_cli_output (vm, "%d", i);
1761 VLIB_CLI_COMMAND (show_ip_local, static) = {
1762 .path = "show ip local",
1763 .function = show_ip_local_command_fn,
1764 .short_help = "Show ip local protocol table",
1768 ip4_arp_inline (vlib_main_t * vm,
1769 vlib_node_runtime_t * node,
1770 vlib_frame_t * frame,
1773 vnet_main_t * vnm = vnet_get_main();
1774 ip4_main_t * im = &ip4_main;
1775 ip_lookup_main_t * lm = &im->lookup_main;
1776 u32 * from, * to_next_drop;
1777 uword n_left_from, n_left_to_next_drop, next_index;
1778 static f64 time_last_seed_change = -1e100;
1779 static u32 hash_seeds[3];
1780 static uword hash_bitmap[256 / BITS (uword)];
1783 if (node->flags & VLIB_NODE_FLAG_TRACE)
1784 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1786 time_now = vlib_time_now (vm);
1787 if (time_now - time_last_seed_change > 1e-3)
1790 u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
1791 sizeof (hash_seeds));
1792 for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
1793 hash_seeds[i] = r[i];
1795 /* Mark all hash keys as been no-seen before. */
1796 for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
1799 time_last_seed_change = time_now;
1802 from = vlib_frame_vector_args (frame);
1803 n_left_from = frame->n_vectors;
1804 next_index = node->cached_next_index;
1805 if (next_index == IP4_ARP_NEXT_DROP)
1806 next_index = IP4_ARP_N_NEXT; /* point to first interface */
1808 while (n_left_from > 0)
1810 vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1811 to_next_drop, n_left_to_next_drop);
1813 while (n_left_from > 0 && n_left_to_next_drop > 0)
1815 u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
1816 ip_adjacency_t * adj0;
1823 p0 = vlib_get_buffer (vm, pi0);
1825 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1826 adj0 = ip_get_adjacency (lm, adj_index0);
1827 ip0 = vlib_buffer_get_current (p0);
1830 * this is the Glean case, so we are ARPing for the
1831 * packet's destination
1837 sw_if_index0 = adj0->rewrite_header.sw_if_index;
1838 vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1842 a0 ^= ip0->dst_address.data_u32;
1846 a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32;
1850 hash_v3_finalize32 (a0, b0, c0);
1852 c0 &= BITS (hash_bitmap) - 1;
1853 c0 = c0 / BITS (uword);
1854 m0 = (uword) 1 << (c0 % BITS (uword));
1856 bm0 = hash_bitmap[c0];
1857 drop0 = (bm0 & m0) != 0;
1859 /* Mark it as seen. */
1860 hash_bitmap[c0] = bm0 | m0;
1864 to_next_drop[0] = pi0;
1866 n_left_to_next_drop -= 1;
1868 p0->error = node->errors[drop0 ? IP4_ARP_ERROR_DROP : IP4_ARP_ERROR_REQUEST_SENT];
1874 * Can happen if the control-plane is programming tables
1875 * with traffic flowing; at least that's today's lame excuse.
1877 if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN) ||
1878 (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
1880 p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
1883 /* Send ARP request. */
1887 ethernet_arp_header_t * h0;
1888 vnet_hw_interface_t * hw_if0;
1890 h0 = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi0);
1892 /* Add rewrite/encap string for ARP packet. */
1893 vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
1895 hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1897 /* Src ethernet address in ARP header. */
1898 clib_memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address,
1899 sizeof (h0->ip4_over_ethernet[0].ethernet));
1903 /* The interface's source address is stashed in the Glean Adj */
1904 h0->ip4_over_ethernet[0].ip4 = adj0->sub_type.glean.receive_addr.ip4;
1906 /* Copy in destination address we are requesting. This is the
1907 * glean case, so it's the packet's destination.*/
1908 h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32;
1912 /* Src IP address in ARP header. */
1913 if (ip4_src_address_for_packet(lm, sw_if_index0,
1914 &h0->ip4_over_ethernet[0].ip4))
1916 /* No source address available */
1917 p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
1918 vlib_buffer_free(vm, &bi0, 1);
1922 /* Copy in destination address we are requesting from the
1924 h0->ip4_over_ethernet[1].ip4.data_u32 =
1925 adj0->sub_type.nbr.next_hop.ip4.as_u32;
1928 vlib_buffer_copy_trace_flag (vm, p0, bi0);
1929 b0 = vlib_get_buffer (vm, bi0);
1930 vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
1932 vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
1934 vlib_set_next_frame_buffer (vm, node, adj0->rewrite_header.next_index, bi0);
1938 vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
1941 return frame->n_vectors;
1945 ip4_arp (vlib_main_t * vm,
1946 vlib_node_runtime_t * node,
1947 vlib_frame_t * frame)
1949 return (ip4_arp_inline(vm, node, frame, 0));
1953 ip4_glean (vlib_main_t * vm,
1954 vlib_node_runtime_t * node,
1955 vlib_frame_t * frame)
1957 return (ip4_arp_inline(vm, node, frame, 1));
1960 static char * ip4_arp_error_strings[] = {
1961 [IP4_ARP_ERROR_DROP] = "address overflow drops",
1962 [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
1963 [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
1964 [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
1965 [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
1966 [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
1969 VLIB_REGISTER_NODE (ip4_arp_node) = {
1970 .function = ip4_arp,
1972 .vector_size = sizeof (u32),
1974 .format_trace = format_ip4_forward_next_trace,
1976 .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1977 .error_strings = ip4_arp_error_strings,
1979 .n_next_nodes = IP4_ARP_N_NEXT,
1981 [IP4_ARP_NEXT_DROP] = "error-drop",
1985 VLIB_REGISTER_NODE (ip4_glean_node) = {
1986 .function = ip4_glean,
1987 .name = "ip4-glean",
1988 .vector_size = sizeof (u32),
1990 .format_trace = format_ip4_forward_next_trace,
1992 .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1993 .error_strings = ip4_arp_error_strings,
1995 .n_next_nodes = IP4_ARP_N_NEXT,
1997 [IP4_ARP_NEXT_DROP] = "error-drop",
2001 #define foreach_notrace_ip4_arp_error \
2007 clib_error_t * arp_notrace_init (vlib_main_t * vm)
2009 vlib_node_runtime_t *rt =
2010 vlib_node_get_runtime (vm, ip4_arp_node.index);
2012 /* don't trace ARP request packets */
2014 vnet_pcap_drop_trace_filter_add_del \
2015 (rt->errors[IP4_ARP_ERROR_##a], \
2017 foreach_notrace_ip4_arp_error;
2022 VLIB_INIT_FUNCTION(arp_notrace_init);
2025 /* Send an ARP request to see if given destination is reachable on given interface. */
2027 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2029 vnet_main_t * vnm = vnet_get_main();
2030 ip4_main_t * im = &ip4_main;
2031 ethernet_arp_header_t * h;
2032 ip4_address_t * src;
2033 ip_interface_address_t * ia;
2034 ip_adjacency_t * adj;
2035 vnet_hw_interface_t * hi;
2036 vnet_sw_interface_t * si;
2040 si = vnet_get_sw_interface (vnm, sw_if_index);
2042 if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2044 return clib_error_return (0, "%U: interface %U down",
2045 format_ip4_address, dst,
2046 format_vnet_sw_if_index_name, vnm,
2050 src = ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2053 vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2054 return clib_error_return
2055 (0, "no matching interface address for destination %U (interface %U)",
2056 format_ip4_address, dst,
2057 format_vnet_sw_if_index_name, vnm, sw_if_index);
2060 adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2062 h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi);
2064 hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2066 clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
2068 h->ip4_over_ethernet[0].ip4 = src[0];
2069 h->ip4_over_ethernet[1].ip4 = dst[0];
2071 b = vlib_get_buffer (vm, bi);
2072 vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2074 /* Add encapsulation string for software interface (e.g. ethernet header). */
2075 vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2076 vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2079 vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
2080 u32 * to_next = vlib_frame_vector_args (f);
2083 vlib_put_frame_to_node (vm, hi->output_node_index, f);
2086 return /* no error */ 0;
2090 IP4_REWRITE_NEXT_DROP,
2091 IP4_REWRITE_NEXT_ARP,
2092 IP4_REWRITE_NEXT_ICMP_ERROR,
2093 } ip4_rewrite_next_t;
2096 ip4_rewrite_inline (vlib_main_t * vm,
2097 vlib_node_runtime_t * node,
2098 vlib_frame_t * frame,
2099 int rewrite_for_locally_received_packets)
2101 ip_lookup_main_t * lm = &ip4_main.lookup_main;
2102 u32 * from = vlib_frame_vector_args (frame);
2103 u32 n_left_from, n_left_to_next, * to_next, next_index;
2104 vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
2105 vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
2106 ip_config_main_t * cm = &lm->feature_config_mains[VNET_IP_TX_FEAT];
2108 n_left_from = frame->n_vectors;
2109 next_index = node->cached_next_index;
2110 u32 cpu_index = os_get_cpu_number();
2112 while (n_left_from > 0)
2114 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2116 while (n_left_from >= 4 && n_left_to_next >= 2)
2118 ip_adjacency_t * adj0, * adj1;
2119 vlib_buffer_t * p0, * p1;
2120 ip4_header_t * ip0, * ip1;
2121 u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2122 u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2123 u32 next0_override, next1_override;
2124 u32 tx_sw_if_index0, tx_sw_if_index1;
2126 if (rewrite_for_locally_received_packets)
2127 next0_override = next1_override = 0;
2129 /* Prefetch next iteration. */
2131 vlib_buffer_t * p2, * p3;
2133 p2 = vlib_get_buffer (vm, from[2]);
2134 p3 = vlib_get_buffer (vm, from[3]);
2136 vlib_prefetch_buffer_header (p2, STORE);
2137 vlib_prefetch_buffer_header (p3, STORE);
2139 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2140 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2143 pi0 = to_next[0] = from[0];
2144 pi1 = to_next[1] = from[1];
2149 n_left_to_next -= 2;
2151 p0 = vlib_get_buffer (vm, pi0);
2152 p1 = vlib_get_buffer (vm, pi1);
2154 adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2155 adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
2157 /* We should never rewrite a pkt using the MISS adjacency */
2158 ASSERT(adj_index0 && adj_index1);
2160 ip0 = vlib_buffer_get_current (p0);
2161 ip1 = vlib_buffer_get_current (p1);
2163 error0 = error1 = IP4_ERROR_NONE;
2164 next0 = next1 = IP4_REWRITE_NEXT_DROP;
2166 /* Decrement TTL & update checksum.
2167 Works either endian, so no need for byte swap. */
2168 if (! rewrite_for_locally_received_packets)
2170 i32 ttl0 = ip0->ttl, ttl1 = ip1->ttl;
2172 /* Input node should have reject packets with ttl 0. */
2173 ASSERT (ip0->ttl > 0);
2174 ASSERT (ip1->ttl > 0);
2176 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2177 checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2179 checksum0 += checksum0 >= 0xffff;
2180 checksum1 += checksum1 >= 0xffff;
2182 ip0->checksum = checksum0;
2183 ip1->checksum = checksum1;
2192 * If the ttl drops below 1 when forwarding, generate
2195 if (PREDICT_FALSE(ttl0 <= 0))
2197 error0 = IP4_ERROR_TIME_EXPIRED;
2198 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2199 icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2200 ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2201 next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2203 if (PREDICT_FALSE(ttl1 <= 0))
2205 error1 = IP4_ERROR_TIME_EXPIRED;
2206 vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32)~0;
2207 icmp4_error_set_vnet_buffer(p1, ICMP4_time_exceeded,
2208 ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2209 next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2212 /* Verify checksum. */
2213 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2214 ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2217 /* Rewrite packet header and updates lengths. */
2218 adj0 = ip_get_adjacency (lm, adj_index0);
2219 adj1 = ip_get_adjacency (lm, adj_index1);
2221 if (rewrite_for_locally_received_packets)
2223 if (PREDICT_FALSE(adj0->lookup_next_index
2224 == IP_LOOKUP_NEXT_ARP))
2225 next0_override = IP4_REWRITE_NEXT_ARP;
2226 if (PREDICT_FALSE(adj1->lookup_next_index
2227 == IP_LOOKUP_NEXT_ARP))
2228 next1_override = IP4_REWRITE_NEXT_ARP;
2231 /* Worth pipelining. No guarantee that adj0,1 are hot... */
2232 rw_len0 = adj0[0].rewrite_header.data_bytes;
2233 rw_len1 = adj1[0].rewrite_header.data_bytes;
2234 vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
2235 vnet_buffer(p1)->ip.save_rewrite_length = rw_len1;
2237 /* Check MTU of outgoing interface. */
2238 error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2239 ? IP4_ERROR_MTU_EXCEEDED
2241 error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
2242 ? IP4_ERROR_MTU_EXCEEDED
2245 next0 = (error0 == IP4_ERROR_NONE)
2246 ? adj0[0].rewrite_header.next_index : next0;
2248 if (rewrite_for_locally_received_packets)
2249 next0 = next0 && next0_override ? next0_override : next0;
2251 next1 = (error1 == IP4_ERROR_NONE)
2252 ? adj1[0].rewrite_header.next_index : next1;
2254 if (rewrite_for_locally_received_packets)
2255 next1 = next1 && next1_override ? next1_override : next1;
2258 * We've already accounted for an ethernet_header_t elsewhere
2260 if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2261 vlib_increment_combined_counter
2262 (&adjacency_counters,
2263 cpu_index, adj_index0,
2264 /* packet increment */ 0,
2265 /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2267 if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t)))
2268 vlib_increment_combined_counter
2269 (&adjacency_counters,
2270 cpu_index, adj_index1,
2271 /* packet increment */ 0,
2272 /* byte increment */ rw_len1-sizeof(ethernet_header_t));
2274 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2275 * to see the IP headerr */
2276 if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2278 p0->current_data -= rw_len0;
2279 p0->current_length += rw_len0;
2280 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2281 vnet_buffer (p0)->sw_if_index[VLIB_TX] =
2285 (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features,
2288 p0->current_config_index =
2289 vec_elt (cm->config_index_by_sw_if_index,
2291 vnet_get_config_data (&cm->config_main,
2292 &p0->current_config_index,
2294 /* # bytes of config data */ 0);
2297 if (PREDICT_TRUE(error1 == IP4_ERROR_NONE))
2299 p1->current_data -= rw_len1;
2300 p1->current_length += rw_len1;
2302 tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2303 vnet_buffer (p1)->sw_if_index[VLIB_TX] =
2307 (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features,
2310 p1->current_config_index =
2311 vec_elt (cm->config_index_by_sw_if_index,
2313 vnet_get_config_data (&cm->config_main,
2314 &p1->current_config_index,
2316 /* # bytes of config data */ 0);
2320 /* Guess we are only writing on simple Ethernet header. */
2321 vnet_rewrite_two_headers (adj0[0], adj1[0],
2323 sizeof (ethernet_header_t));
2325 vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2326 to_next, n_left_to_next,
2327 pi0, pi1, next0, next1);
2330 while (n_left_from > 0 && n_left_to_next > 0)
2332 ip_adjacency_t * adj0;
2335 u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2337 u32 tx_sw_if_index0;
2339 if (rewrite_for_locally_received_packets)
2342 pi0 = to_next[0] = from[0];
2344 p0 = vlib_get_buffer (vm, pi0);
2346 adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2348 /* We should never rewrite a pkt using the MISS adjacency */
2351 adj0 = ip_get_adjacency (lm, adj_index0);
2353 ip0 = vlib_buffer_get_current (p0);
2355 error0 = IP4_ERROR_NONE;
2356 next0 = IP4_REWRITE_NEXT_DROP; /* drop on error */
2358 /* Decrement TTL & update checksum. */
2359 if (! rewrite_for_locally_received_packets)
2361 i32 ttl0 = ip0->ttl;
2363 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2365 checksum0 += checksum0 >= 0xffff;
2367 ip0->checksum = checksum0;
2369 ASSERT (ip0->ttl > 0);
2375 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2377 if (PREDICT_FALSE(ttl0 <= 0))
2380 * If the ttl drops below 1 when forwarding, generate
2383 error0 = IP4_ERROR_TIME_EXPIRED;
2384 next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2385 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2386 icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2387 ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2391 if (rewrite_for_locally_received_packets)
2394 * We have to override the next_index in ARP adjacencies,
2395 * because they're set up for ip4-arp, not this node...
2397 if (PREDICT_FALSE(adj0->lookup_next_index
2398 == IP_LOOKUP_NEXT_ARP))
2399 next0_override = IP4_REWRITE_NEXT_ARP;
2402 /* Guess we are only writing on simple Ethernet header. */
2403 vnet_rewrite_one_header (adj0[0], ip0,
2404 sizeof (ethernet_header_t));
2406 /* Update packet buffer attributes/set output interface. */
2407 rw_len0 = adj0[0].rewrite_header.data_bytes;
2408 vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
2410 if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2411 vlib_increment_combined_counter
2412 (&adjacency_counters,
2413 cpu_index, adj_index0,
2414 /* packet increment */ 0,
2415 /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2417 /* Check MTU of outgoing interface. */
2418 error0 = (vlib_buffer_length_in_chain (vm, p0)
2419 > adj0[0].rewrite_header.max_l3_packet_bytes
2420 ? IP4_ERROR_MTU_EXCEEDED
2423 p0->error = error_node->errors[error0];
2425 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2426 * to see the IP headerr */
2427 if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2429 p0->current_data -= rw_len0;
2430 p0->current_length += rw_len0;
2431 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2433 vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2434 next0 = adj0[0].rewrite_header.next_index;
2437 (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features,
2440 p0->current_config_index =
2441 vec_elt (cm->config_index_by_sw_if_index,
2443 vnet_get_config_data (&cm->config_main,
2444 &p0->current_config_index,
2446 /* # bytes of config data */ 0);
2450 if (rewrite_for_locally_received_packets)
2451 next0 = next0 && next0_override ? next0_override : next0;
2456 n_left_to_next -= 1;
2458 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2459 to_next, n_left_to_next,
2463 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2466 /* Need to do trace after rewrites to pick up new packet data. */
2467 if (node->flags & VLIB_NODE_FLAG_TRACE)
2468 ip4_forward_next_trace (vm, node, frame, adj_rx_tx);
2470 return frame->n_vectors;
2474 /** @brief IPv4 transit rewrite node.
2475 @node ip4-rewrite-transit
2477 This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2478 header checksum, fetch the ip adjacency, check the outbound mtu,
2479 apply the adjacency rewrite, and send pkts to the adjacency
2480 rewrite header's rewrite_next_index.
2482 @param vm vlib_main_t corresponding to the current thread
2483 @param node vlib_node_runtime_t
2484 @param frame vlib_frame_t whose contents should be dispatched
2486 @par Graph mechanics: buffer metadata, next index usage
2489 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2490 - the rewrite adjacency index
2491 - <code>adj->lookup_next_index</code>
2492 - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2493 the packet will be dropped.
2494 - <code>adj->rewrite_header</code>
2495 - Rewrite string length, rewrite string, next_index
2498 - <code>b->current_data, b->current_length</code>
2499 - Updated net of applying the rewrite string
2501 <em>Next Indices:</em>
2502 - <code> adj->rewrite_header.next_index </code>
2506 ip4_rewrite_transit (vlib_main_t * vm,
2507 vlib_node_runtime_t * node,
2508 vlib_frame_t * frame)
2510 return ip4_rewrite_inline (vm, node, frame,
2511 /* rewrite_for_locally_received_packets */ 0);
2514 /** @brief IPv4 local rewrite node.
2515 @node ip4-rewrite-local
2517 This is the IPv4 local rewrite node. Fetch the ip adjacency, check
2518 the outbound interface mtu, apply the adjacency rewrite, and send
2519 pkts to the adjacency rewrite header's rewrite_next_index. Deal
2520 with hemorrhoids of the form "some clown sends an icmp4 w/ src =
2521 dst = interface addr."
2523 @param vm vlib_main_t corresponding to the current thread
2524 @param node vlib_node_runtime_t
2525 @param frame vlib_frame_t whose contents should be dispatched
2527 @par Graph mechanics: buffer metadata, next index usage
2530 - <code>vnet_buffer(b)->ip.adj_index[VLIB_RX]</code>
2531 - the rewrite adjacency index
2532 - <code>adj->lookup_next_index</code>
2533 - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2534 the packet will be dropped.
2535 - <code>adj->rewrite_header</code>
2536 - Rewrite string length, rewrite string, next_index
2539 - <code>b->current_data, b->current_length</code>
2540 - Updated net of applying the rewrite string
2542 <em>Next Indices:</em>
2543 - <code> adj->rewrite_header.next_index </code>
2548 ip4_rewrite_local (vlib_main_t * vm,
2549 vlib_node_runtime_t * node,
2550 vlib_frame_t * frame)
2552 return ip4_rewrite_inline (vm, node, frame,
2553 /* rewrite_for_locally_received_packets */ 1);
2557 ip4_midchain (vlib_main_t * vm,
2558 vlib_node_runtime_t * node,
2559 vlib_frame_t * frame)
2561 return ip4_rewrite_inline (vm, node, frame,
2562 /* rewrite_for_locally_received_packets */ 0);
2565 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2566 .function = ip4_rewrite_transit,
2567 .name = "ip4-rewrite-transit",
2568 .vector_size = sizeof (u32),
2570 .format_trace = format_ip4_rewrite_trace,
2574 [IP4_REWRITE_NEXT_DROP] = "error-drop",
2575 [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
2576 [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2580 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite_transit)
2582 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2583 .function = ip4_midchain,
2584 .name = "ip4-midchain",
2585 .vector_size = sizeof (u32),
2587 .format_trace = format_ip4_forward_next_trace,
2591 [IP4_REWRITE_NEXT_DROP] = "error-drop",
2592 [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
2596 VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain)
2598 VLIB_REGISTER_NODE (ip4_rewrite_local_node) = {
2599 .function = ip4_rewrite_local,
2600 .name = "ip4-rewrite-local",
2601 .vector_size = sizeof (u32),
2603 .sibling_of = "ip4-rewrite-transit",
2605 .format_trace = format_ip4_rewrite_trace,
2610 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_local_node, ip4_rewrite_local)
2612 static clib_error_t *
2613 add_del_interface_table (vlib_main_t * vm,
2614 unformat_input_t * input,
2615 vlib_cli_command_t * cmd)
2617 vnet_main_t * vnm = vnet_get_main();
2618 clib_error_t * error = 0;
2619 u32 sw_if_index, table_id;
2623 if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2625 error = clib_error_return (0, "unknown interface `%U'",
2626 format_unformat_error, input);
2630 if (unformat (input, "%d", &table_id))
2634 error = clib_error_return (0, "expected table id `%U'",
2635 format_unformat_error, input);
2640 ip4_main_t * im = &ip4_main;
2643 fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
2648 // changing an interface's table has consequences for any connecteds
2649 // and adj-fibs already installed.
2651 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
2652 im->fib_index_by_sw_if_index[sw_if_index] = fib_index;
2660 * Place the indicated interface into the supplied VRF
2663 * @cliexstart{set interface ip table}
2665 * vpp# set interface ip table GigabitEthernet2/0/0 2
2667 * Interface addresses added after setting the interface IP table end up in the indicated VRF table.
2668 * Predictable but potentially counter-intuitive results occur if you provision interface addresses in multiple FIBs.
2669 * Upon RX, packets will be processed in the last IP table ID provisioned.
2670 * It might be marginally useful to evade source RPF drops to put an interface address into multiple FIBs.
2673 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
2674 .path = "set interface ip table",
2675 .function = add_del_interface_table,
2676 .short_help = "Add/delete FIB table id for interface",
2681 ip4_lookup_multicast (vlib_main_t * vm,
2682 vlib_node_runtime_t * node,
2683 vlib_frame_t * frame)
2685 ip4_main_t * im = &ip4_main;
2686 vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
2687 u32 n_left_from, n_left_to_next, * from, * to_next;
2688 ip_lookup_next_t next;
2689 u32 cpu_index = os_get_cpu_number();
2691 from = vlib_frame_vector_args (frame);
2692 n_left_from = frame->n_vectors;
2693 next = node->cached_next_index;
2695 while (n_left_from > 0)
2697 vlib_get_next_frame (vm, node, next,
2698 to_next, n_left_to_next);
2700 while (n_left_from >= 4 && n_left_to_next >= 2)
2702 vlib_buffer_t * p0, * p1;
2703 u32 pi0, pi1, lb_index0, lb_index1, wrong_next;
2704 ip_lookup_next_t next0, next1;
2705 ip4_header_t * ip0, * ip1;
2706 u32 fib_index0, fib_index1;
2707 const dpo_id_t *dpo0, *dpo1;
2708 const load_balance_t * lb0, * lb1;
2710 /* Prefetch next iteration. */
2712 vlib_buffer_t * p2, * p3;
2714 p2 = vlib_get_buffer (vm, from[2]);
2715 p3 = vlib_get_buffer (vm, from[3]);
2717 vlib_prefetch_buffer_header (p2, LOAD);
2718 vlib_prefetch_buffer_header (p3, LOAD);
2720 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
2721 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
2724 pi0 = to_next[0] = from[0];
2725 pi1 = to_next[1] = from[1];
2727 p0 = vlib_get_buffer (vm, pi0);
2728 p1 = vlib_get_buffer (vm, pi1);
2730 ip0 = vlib_buffer_get_current (p0);
2731 ip1 = vlib_buffer_get_current (p1);
2733 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2734 fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
2735 fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2736 fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2737 fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
2738 fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
2740 lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0),
2742 lb_index1 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index1),
2745 lb0 = load_balance_get (lb_index0);
2746 lb1 = load_balance_get (lb_index1);
2748 ASSERT (lb0->lb_n_buckets > 0);
2749 ASSERT (is_pow2 (lb0->lb_n_buckets));
2750 ASSERT (lb1->lb_n_buckets > 0);
2751 ASSERT (is_pow2 (lb1->lb_n_buckets));
2753 vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash
2754 (ip0, lb0->lb_hash_config);
2756 vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash
2757 (ip1, lb1->lb_hash_config);
2759 dpo0 = load_balance_get_bucket_i(lb0,
2760 (vnet_buffer (p0)->ip.flow_hash &
2761 (lb0->lb_n_buckets_minus_1)));
2762 dpo1 = load_balance_get_bucket_i(lb1,
2763 (vnet_buffer (p1)->ip.flow_hash &
2764 (lb0->lb_n_buckets_minus_1)));
2766 next0 = dpo0->dpoi_next_node;
2767 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
2768 next1 = dpo1->dpoi_next_node;
2769 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
2771 if (1) /* $$$$$$ HACK FIXME */
2772 vlib_increment_combined_counter
2773 (cm, cpu_index, lb_index0, 1,
2774 vlib_buffer_length_in_chain (vm, p0));
2775 if (1) /* $$$$$$ HACK FIXME */
2776 vlib_increment_combined_counter
2777 (cm, cpu_index, lb_index1, 1,
2778 vlib_buffer_length_in_chain (vm, p1));
2782 n_left_to_next -= 2;
2785 wrong_next = (next0 != next) + 2*(next1 != next);
2786 if (PREDICT_FALSE (wrong_next != 0))
2794 n_left_to_next += 1;
2795 vlib_set_next_frame_buffer (vm, node, next0, pi0);
2801 n_left_to_next += 1;
2802 vlib_set_next_frame_buffer (vm, node, next1, pi1);
2808 n_left_to_next += 2;
2809 vlib_set_next_frame_buffer (vm, node, next0, pi0);
2810 vlib_set_next_frame_buffer (vm, node, next1, pi1);
2814 vlib_put_next_frame (vm, node, next, n_left_to_next);
2816 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
2822 while (n_left_from > 0 && n_left_to_next > 0)
2827 ip_lookup_next_t next0;
2829 const dpo_id_t *dpo0;
2830 const load_balance_t * lb0;
2835 p0 = vlib_get_buffer (vm, pi0);
2837 ip0 = vlib_buffer_get_current (p0);
2839 fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
2840 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2841 fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2842 fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2844 lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0),
2847 lb0 = load_balance_get (lb_index0);
2849 ASSERT (lb0->lb_n_buckets > 0);
2850 ASSERT (is_pow2 (lb0->lb_n_buckets));
2852 vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash
2853 (ip0, lb0->lb_hash_config);
2855 dpo0 = load_balance_get_bucket_i(lb0,
2856 (vnet_buffer (p0)->ip.flow_hash &
2857 (lb0->lb_n_buckets_minus_1)));
2859 next0 = dpo0->dpoi_next_node;
2860 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
2862 if (1) /* $$$$$$ HACK FIXME */
2863 vlib_increment_combined_counter
2864 (cm, cpu_index, lb_index0, 1,
2865 vlib_buffer_length_in_chain (vm, p0));
2869 n_left_to_next -= 1;
2872 if (PREDICT_FALSE (next0 != next))
2874 n_left_to_next += 1;
2875 vlib_put_next_frame (vm, node, next, n_left_to_next);
2877 vlib_get_next_frame (vm, node, next,
2878 to_next, n_left_to_next);
2881 n_left_to_next -= 1;
2885 vlib_put_next_frame (vm, node, next, n_left_to_next);
2888 if (node->flags & VLIB_NODE_FLAG_TRACE)
2889 ip4_forward_next_trace(vm, node, frame, VLIB_TX);
2891 return frame->n_vectors;
2894 VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = {
2895 .function = ip4_lookup_multicast,
2896 .name = "ip4-lookup-multicast",
2897 .vector_size = sizeof (u32),
2898 .sibling_of = "ip4-lookup",
2899 .format_trace = format_ip4_lookup_trace,
2904 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_multicast_node, ip4_lookup_multicast)
2906 VLIB_REGISTER_NODE (ip4_multicast_node,static) = {
2907 .function = ip4_drop,
2908 .name = "ip4-multicast",
2909 .vector_size = sizeof (u32),
2911 .format_trace = format_ip4_forward_next_trace,
2919 int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0)
2921 ip4_fib_mtrie_t * mtrie0;
2922 ip4_fib_mtrie_leaf_t leaf0;
2925 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2927 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
2928 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
2929 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
2930 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2931 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2933 /* Handle default route. */
2934 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
2936 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2938 return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0), a);
2941 static clib_error_t *
2942 test_lookup_command_fn (vlib_main_t * vm,
2943 unformat_input_t * input,
2944 vlib_cli_command_t * cmd)
2950 ip4_address_t ip4_base_address;
2953 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
2954 if (unformat (input, "table %d", &table_id))
2956 else if (unformat (input, "count %f", &count))
2959 else if (unformat (input, "%U",
2960 unformat_ip4_address, &ip4_base_address))
2963 return clib_error_return (0, "unknown input `%U'",
2964 format_unformat_error, input);
2969 for (i = 0; i < n; i++)
2971 if (!ip4_lookup_validate (&ip4_base_address, table_id))
2974 ip4_base_address.as_u32 =
2975 clib_host_to_net_u32 (1 +
2976 clib_net_to_host_u32 (ip4_base_address.as_u32));
2980 vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2982 vlib_cli_output (vm, "No errors in %d lookups\n", n);
2987 VLIB_CLI_COMMAND (lookup_test_command, static) = {
2988 .path = "test lookup",
2989 .short_help = "test lookup",
2990 .function = test_lookup_command_fn,
2993 int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2995 ip4_main_t * im4 = &ip4_main;
2997 uword * p = hash_get (im4->fib_index_by_table_id, table_id);
3000 return VNET_API_ERROR_NO_SUCH_FIB;
3002 fib = ip4_fib_get (p[0]);
3004 fib->flow_hash_config = flow_hash_config;
3008 static clib_error_t *
3009 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3010 unformat_input_t * input,
3011 vlib_cli_command_t * cmd)
3015 u32 flow_hash_config = 0;
3018 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3019 if (unformat (input, "table %d", &table_id))
3022 else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3023 foreach_flow_hash_bit
3029 return clib_error_return (0, "unknown input `%U'",
3030 format_unformat_error, input);
3032 rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3038 case VNET_API_ERROR_NO_SUCH_FIB:
3039 return clib_error_return (0, "no such FIB table %d", table_id);
3042 clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3049 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
3050 .path = "set ip flow-hash",
3052 "set ip table flow-hash table <fib-id> src dst sport dport proto reverse",
3053 .function = set_ip_flow_hash_command_fn,
3056 int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3059 vnet_main_t * vnm = vnet_get_main();
3060 vnet_interface_main_t * im = &vnm->interface_main;
3061 ip4_main_t * ipm = &ip4_main;
3062 ip_lookup_main_t * lm = &ipm->lookup_main;
3063 vnet_classify_main_t * cm = &vnet_classify_main;
3065 if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3066 return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3068 if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3069 return VNET_API_ERROR_NO_SUCH_ENTRY;
3071 vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3072 lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
3077 static clib_error_t *
3078 set_ip_classify_command_fn (vlib_main_t * vm,
3079 unformat_input_t * input,
3080 vlib_cli_command_t * cmd)
3082 u32 table_index = ~0;
3083 int table_index_set = 0;
3084 u32 sw_if_index = ~0;
3087 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3088 if (unformat (input, "table-index %d", &table_index))
3089 table_index_set = 1;
3090 else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3091 vnet_get_main(), &sw_if_index))
3097 if (table_index_set == 0)
3098 return clib_error_return (0, "classify table-index must be specified");
3100 if (sw_if_index == ~0)
3101 return clib_error_return (0, "interface / subif must be specified");
3103 rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3110 case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3111 return clib_error_return (0, "No such interface");
3113 case VNET_API_ERROR_NO_SUCH_ENTRY:
3114 return clib_error_return (0, "No such classifier table");
3119 VLIB_CLI_COMMAND (set_ip_classify_command, static) = {
3120 .path = "set ip classify",
3122 "set ip classify intfc <int> table-index <index>",
3123 .function = set_ip_classify_command_fn,