2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * ip/ip4_forward.c: IP v4 forwarding
18 * Copyright (c) 2008 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h> /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h> /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h> /* for API error numbers */
47 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
48 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
49 #include <vnet/fib/ip4_fib.h>
50 #include <vnet/dpo/load_balance.h>
51 #include <vnet/dpo/classify_dpo.h>
54 ip4_forward_next_trace (vlib_main_t * vm,
55 vlib_node_runtime_t * node,
57 vlib_rx_or_tx_t which_adj_index);
60 ip4_lookup_inline (vlib_main_t * vm,
61 vlib_node_runtime_t * node,
63 int lookup_for_responses_to_locally_received_packets)
65 ip4_main_t * im = &ip4_main;
66 vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
67 u32 n_left_from, n_left_to_next, * from, * to_next;
68 ip_lookup_next_t next;
69 u32 cpu_index = os_get_cpu_number();
71 from = vlib_frame_vector_args (frame);
72 n_left_from = frame->n_vectors;
73 next = node->cached_next_index;
75 while (n_left_from > 0)
77 vlib_get_next_frame (vm, node, next,
78 to_next, n_left_to_next);
80 while (n_left_from >= 4 && n_left_to_next >= 2)
82 vlib_buffer_t * p0, * p1;
83 ip4_header_t * ip0, * ip1;
84 __attribute__((unused)) tcp_header_t * tcp0, * tcp1;
85 ip_lookup_next_t next0, next1;
86 const load_balance_t * lb0, * lb1;
87 ip4_fib_mtrie_t * mtrie0, * mtrie1;
88 ip4_fib_mtrie_leaf_t leaf0, leaf1;
89 ip4_address_t * dst_addr0, *dst_addr1;
90 __attribute__((unused)) u32 pi0, fib_index0, lb_index0, is_tcp_udp0;
91 __attribute__((unused)) u32 pi1, fib_index1, lb_index1, is_tcp_udp1;
92 flow_hash_config_t flow_hash_config0, flow_hash_config1;
95 const dpo_id_t *dpo0, *dpo1;
97 /* Prefetch next iteration. */
99 vlib_buffer_t * p2, * p3;
101 p2 = vlib_get_buffer (vm, from[2]);
102 p3 = vlib_get_buffer (vm, from[3]);
104 vlib_prefetch_buffer_header (p2, LOAD);
105 vlib_prefetch_buffer_header (p3, LOAD);
107 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
108 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
111 pi0 = to_next[0] = from[0];
112 pi1 = to_next[1] = from[1];
114 p0 = vlib_get_buffer (vm, pi0);
115 p1 = vlib_get_buffer (vm, pi1);
117 ip0 = vlib_buffer_get_current (p0);
118 ip1 = vlib_buffer_get_current (p1);
120 dst_addr0 = &ip0->dst_address;
121 dst_addr1 = &ip1->dst_address;
123 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
124 fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
125 fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
126 fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
127 fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
128 fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
131 if (! lookup_for_responses_to_locally_received_packets)
133 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
134 mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
136 leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
138 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
139 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
142 tcp0 = (void *) (ip0 + 1);
143 tcp1 = (void *) (ip1 + 1);
145 is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
146 || ip0->protocol == IP_PROTOCOL_UDP);
147 is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
148 || ip1->protocol == IP_PROTOCOL_UDP);
150 if (! lookup_for_responses_to_locally_received_packets)
152 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
153 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
156 if (! lookup_for_responses_to_locally_received_packets)
158 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
159 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
162 if (! lookup_for_responses_to_locally_received_packets)
164 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
165 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
168 if (lookup_for_responses_to_locally_received_packets)
170 lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
171 lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
175 /* Handle default route. */
176 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
177 leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
179 lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
180 lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
183 lb0 = load_balance_get (lb_index0);
184 lb1 = load_balance_get (lb_index1);
186 /* Use flow hash to compute multipath adjacency. */
187 hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
188 hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
189 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
191 flow_hash_config0 = lb0->lb_hash_config;
192 hash_c0 = vnet_buffer (p0)->ip.flow_hash =
193 ip4_compute_flow_hash (ip0, flow_hash_config0);
195 if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
197 flow_hash_config1 = lb1->lb_hash_config;
198 hash_c1 = vnet_buffer (p1)->ip.flow_hash =
199 ip4_compute_flow_hash (ip1, flow_hash_config1);
202 ASSERT (lb0->lb_n_buckets > 0);
203 ASSERT (is_pow2 (lb0->lb_n_buckets));
204 ASSERT (lb1->lb_n_buckets > 0);
205 ASSERT (is_pow2 (lb1->lb_n_buckets));
207 dpo0 = load_balance_get_bucket_i(lb0,
209 (lb0->lb_n_buckets_minus_1)));
210 dpo1 = load_balance_get_bucket_i(lb1,
212 (lb0->lb_n_buckets_minus_1)));
214 next0 = dpo0->dpoi_next_node;
215 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
216 next1 = dpo1->dpoi_next_node;
217 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
219 vlib_increment_combined_counter
220 (cm, cpu_index, lb_index0, 1,
221 vlib_buffer_length_in_chain (vm, p0)
222 + sizeof(ethernet_header_t));
223 vlib_increment_combined_counter
224 (cm, cpu_index, lb_index1, 1,
225 vlib_buffer_length_in_chain (vm, p1)
226 + sizeof(ethernet_header_t));
233 wrong_next = (next0 != next) + 2*(next1 != next);
234 if (PREDICT_FALSE (wrong_next != 0))
243 vlib_set_next_frame_buffer (vm, node, next0, pi0);
250 vlib_set_next_frame_buffer (vm, node, next1, pi1);
257 vlib_set_next_frame_buffer (vm, node, next0, pi0);
258 vlib_set_next_frame_buffer (vm, node, next1, pi1);
262 vlib_put_next_frame (vm, node, next, n_left_to_next);
264 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
270 while (n_left_from > 0 && n_left_to_next > 0)
274 __attribute__((unused)) tcp_header_t * tcp0;
275 ip_lookup_next_t next0;
276 const load_balance_t *lb0;
277 ip4_fib_mtrie_t * mtrie0;
278 ip4_fib_mtrie_leaf_t leaf0;
279 ip4_address_t * dst_addr0;
280 __attribute__((unused)) u32 pi0, fib_index0, is_tcp_udp0, lbi0;
281 flow_hash_config_t flow_hash_config0;
282 const dpo_id_t *dpo0;
288 p0 = vlib_get_buffer (vm, pi0);
290 ip0 = vlib_buffer_get_current (p0);
292 dst_addr0 = &ip0->dst_address;
294 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
295 fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
296 fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
298 if (! lookup_for_responses_to_locally_received_packets)
300 mtrie0 = &ip4_fib_get( fib_index0)->mtrie;
302 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
304 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
307 tcp0 = (void *) (ip0 + 1);
309 is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
310 || ip0->protocol == IP_PROTOCOL_UDP);
312 if (! lookup_for_responses_to_locally_received_packets)
313 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
315 if (! lookup_for_responses_to_locally_received_packets)
316 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
318 if (! lookup_for_responses_to_locally_received_packets)
319 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
321 if (lookup_for_responses_to_locally_received_packets)
322 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
325 /* Handle default route. */
326 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
327 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
330 lb0 = load_balance_get (lbi0);
332 /* Use flow hash to compute multipath adjacency. */
333 hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
334 if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
336 flow_hash_config0 = lb0->lb_hash_config;
338 hash_c0 = vnet_buffer (p0)->ip.flow_hash =
339 ip4_compute_flow_hash (ip0, flow_hash_config0);
342 ASSERT (lb0->lb_n_buckets > 0);
343 ASSERT (is_pow2 (lb0->lb_n_buckets));
345 dpo0 = load_balance_get_bucket_i(lb0,
347 (lb0->lb_n_buckets_minus_1)));
349 next0 = dpo0->dpoi_next_node;
350 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
352 vlib_increment_combined_counter
353 (cm, cpu_index, lbi0, 1,
354 vlib_buffer_length_in_chain (vm, p0));
361 if (PREDICT_FALSE (next0 != next))
364 vlib_put_next_frame (vm, node, next, n_left_to_next);
366 vlib_get_next_frame (vm, node, next,
367 to_next, n_left_to_next);
374 vlib_put_next_frame (vm, node, next, n_left_to_next);
377 if (node->flags & VLIB_NODE_FLAG_TRACE)
378 ip4_forward_next_trace(vm, node, frame, VLIB_TX);
380 return frame->n_vectors;
383 /** @brief IPv4 lookup node.
386 This is the main IPv4 lookup dispatch node.
388 @param vm vlib_main_t corresponding to the current thread
389 @param node vlib_node_runtime_t
390 @param frame vlib_frame_t whose contents should be dispatched
392 @par Graph mechanics: buffer metadata, next index usage
395 - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
396 - Indicates the @c sw_if_index value of the interface that the
397 packet was received on.
398 - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
399 - When the value is @c ~0 then the node performs a longest prefix
400 match (LPM) for the packet destination address in the FIB attached
401 to the receive interface.
402 - Otherwise perform LPM for the packet destination address in the
403 indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
404 value (0, 1, ...) and not a VRF id.
407 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
408 - The lookup result adjacency index.
411 - Dispatches the packet to the node index found in
412 ip_adjacency_t @c adj->lookup_next_index
413 (where @c adj is the lookup result adjacency).
416 ip4_lookup (vlib_main_t * vm,
417 vlib_node_runtime_t * node,
418 vlib_frame_t * frame)
420 return ip4_lookup_inline (vm, node, frame,
421 /* lookup_for_responses_to_locally_received_packets */ 0);
425 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args);
427 VLIB_REGISTER_NODE (ip4_lookup_node) = {
428 .function = ip4_lookup,
429 .name = "ip4-lookup",
430 .vector_size = sizeof (u32),
432 .format_trace = format_ip4_lookup_trace,
433 .n_next_nodes = IP_LOOKUP_N_NEXT,
434 .next_nodes = IP4_LOOKUP_NEXT_NODES,
437 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup)
440 ip4_load_balance (vlib_main_t * vm,
441 vlib_node_runtime_t * node,
442 vlib_frame_t * frame)
444 vlib_combined_counter_main_t * cm = &load_balance_main.lbm_via_counters;
445 u32 n_left_from, n_left_to_next, * from, * to_next;
446 ip_lookup_next_t next;
447 u32 cpu_index = os_get_cpu_number();
449 from = vlib_frame_vector_args (frame);
450 n_left_from = frame->n_vectors;
451 next = node->cached_next_index;
453 if (node->flags & VLIB_NODE_FLAG_TRACE)
454 ip4_forward_next_trace(vm, node, frame, VLIB_TX);
456 while (n_left_from > 0)
458 vlib_get_next_frame (vm, node, next,
459 to_next, n_left_to_next);
462 while (n_left_from > 0 && n_left_to_next > 0)
464 ip_lookup_next_t next0;
465 const load_balance_t *lb0;
468 const ip4_header_t *ip0;
469 const dpo_id_t *dpo0;
474 p0 = vlib_get_buffer (vm, pi0);
476 ip0 = vlib_buffer_get_current (p0);
477 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
479 lb0 = load_balance_get(lbi0);
480 hc0 = lb0->lb_hash_config;
481 vnet_buffer(p0)->ip.flow_hash = ip4_compute_flow_hash(ip0, hc0);
483 dpo0 = load_balance_get_bucket_i(lb0,
484 vnet_buffer(p0)->ip.flow_hash &
485 (lb0->lb_n_buckets_minus_1));
487 next0 = dpo0->dpoi_next_node;
488 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
490 vlib_increment_combined_counter
491 (cm, cpu_index, lbi0, 1,
492 vlib_buffer_length_in_chain (vm, p0));
499 if (PREDICT_FALSE (next0 != next))
502 vlib_put_next_frame (vm, node, next, n_left_to_next);
504 vlib_get_next_frame (vm, node, next,
505 to_next, n_left_to_next);
512 vlib_put_next_frame (vm, node, next, n_left_to_next);
515 return frame->n_vectors;
518 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args);
520 VLIB_REGISTER_NODE (ip4_load_balance_node) = {
521 .function = ip4_load_balance,
522 .name = "ip4-load-balance",
523 .vector_size = sizeof (u32),
524 .sibling_of = "ip4-lookup",
526 .format_trace = format_ip4_forward_next_trace,
529 VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance)
531 /* get first interface address */
533 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
534 ip_interface_address_t ** result_ia)
536 ip_lookup_main_t * lm = &im->lookup_main;
537 ip_interface_address_t * ia = 0;
538 ip4_address_t * result = 0;
540 foreach_ip_interface_address (lm, ia, sw_if_index,
541 1 /* honor unnumbered */,
543 ip4_address_t * a = ip_interface_address_get_address (lm, ia);
548 *result_ia = result ? ia : 0;
553 ip4_add_interface_routes (u32 sw_if_index,
554 ip4_main_t * im, u32 fib_index,
555 ip_interface_address_t * a)
557 ip_lookup_main_t * lm = &im->lookup_main;
558 ip4_address_t * address = ip_interface_address_get_address (lm, a);
560 .fp_len = a->address_length,
561 .fp_proto = FIB_PROTOCOL_IP4,
562 .fp_addr.ip4 = *address,
565 a->neighbor_probe_adj_index = ~0;
569 fib_node_index_t fei;
571 fei = fib_table_entry_update_one_path(fib_index,
573 FIB_SOURCE_INTERFACE,
574 (FIB_ENTRY_FLAG_CONNECTED |
575 FIB_ENTRY_FLAG_ATTACHED),
577 NULL, /* No next-hop address */
579 ~0, // invalid FIB index
582 FIB_ROUTE_PATH_FLAG_NONE);
583 a->neighbor_probe_adj_index = fib_entry_get_adj(fei);
588 if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
590 u32 classify_table_index =
591 lm->classify_table_index_by_sw_if_index [sw_if_index];
592 if (classify_table_index != (u32) ~0)
594 dpo_id_t dpo = DPO_NULL;
599 classify_dpo_create(FIB_PROTOCOL_IP4,
600 classify_table_index));
602 fib_table_entry_special_dpo_add(fib_index,
611 fib_table_entry_update_one_path(fib_index,
613 FIB_SOURCE_INTERFACE,
614 (FIB_ENTRY_FLAG_CONNECTED |
615 FIB_ENTRY_FLAG_LOCAL),
619 ~0, // invalid FIB index
622 FIB_ROUTE_PATH_FLAG_NONE);
626 ip4_del_interface_routes (ip4_main_t * im,
628 ip4_address_t * address,
632 .fp_len = address_length,
633 .fp_proto = FIB_PROTOCOL_IP4,
634 .fp_addr.ip4 = *address,
639 fib_table_entry_delete(fib_index,
641 FIB_SOURCE_INTERFACE);
645 fib_table_entry_delete(fib_index,
647 FIB_SOURCE_INTERFACE);
651 ip4_sw_interface_enable_disable (u32 sw_if_index,
654 vlib_main_t * vm = vlib_get_main();
655 ip4_main_t * im = &ip4_main;
656 ip_lookup_main_t * lm = &im->lookup_main;
658 u32 lookup_feature_index;
660 vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
663 * enable/disable only on the 1<->0 transition
667 if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
672 ASSERT(im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
673 if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
677 for (cast = 0; cast <= VNET_IP_RX_MULTICAST_FEAT; cast++)
679 ip_config_main_t * cm = &lm->feature_config_mains[cast];
680 vnet_config_main_t * vcm = &cm->config_main;
682 vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
683 ci = cm->config_index_by_sw_if_index[sw_if_index];
685 if (cast == VNET_IP_RX_UNICAST_FEAT)
686 lookup_feature_index = im->ip4_unicast_rx_feature_lookup;
688 lookup_feature_index = im->ip4_multicast_rx_feature_lookup;
691 ci = vnet_config_add_feature (vm, vcm,
693 lookup_feature_index,
695 /* # bytes of config data */ 0);
697 ci = vnet_config_del_feature (vm, vcm,
699 lookup_feature_index,
701 /* # bytes of config data */ 0);
702 cm->config_index_by_sw_if_index[sw_if_index] = ci;
706 static clib_error_t *
707 ip4_add_del_interface_address_internal (vlib_main_t * vm,
709 ip4_address_t * address,
713 vnet_main_t * vnm = vnet_get_main();
714 ip4_main_t * im = &ip4_main;
715 ip_lookup_main_t * lm = &im->lookup_main;
716 clib_error_t * error = 0;
717 u32 if_address_index, elts_before;
718 ip4_address_fib_t ip4_af, * addr_fib = 0;
720 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
721 ip4_addr_fib_init (&ip4_af, address,
722 vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
723 vec_add1 (addr_fib, ip4_af);
726 * there is no support for adj-fib handling in the presence of overlapping
727 * subnets on interfaces. Easy fix - disallow overlapping subnets, like
732 /* When adding an address check that it does not conflict
733 with an existing address. */
734 ip_interface_address_t * ia;
735 foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
736 0 /* honor unnumbered */,
738 ip4_address_t * x = ip_interface_address_get_address (&im->lookup_main, ia);
740 if (ip4_destination_matches_route (im, address, x, ia->address_length)
741 || ip4_destination_matches_route (im, x, address, address_length))
742 return clib_error_create ("failed to add %U which conflicts with %U for interface %U",
743 format_ip4_address_and_length, address, address_length,
744 format_ip4_address_and_length, x, ia->address_length,
745 format_vnet_sw_if_index_name, vnm, sw_if_index);
749 elts_before = pool_elts (lm->if_address_pool);
751 error = ip_interface_address_add_del
761 ip4_sw_interface_enable_disable(sw_if_index, !is_del);
764 ip4_del_interface_routes (im, ip4_af.fib_index, address,
767 ip4_add_interface_routes (sw_if_index,
768 im, ip4_af.fib_index,
770 (lm->if_address_pool, if_address_index));
772 /* If pool did not grow/shrink: add duplicate address. */
773 if (elts_before != pool_elts (lm->if_address_pool))
775 ip4_add_del_interface_address_callback_t * cb;
776 vec_foreach (cb, im->add_del_interface_address_callbacks)
777 cb->function (im, cb->function_opaque, sw_if_index,
778 address, address_length,
789 ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
790 ip4_address_t * address, u32 address_length,
793 return ip4_add_del_interface_address_internal
794 (vm, sw_if_index, address, address_length,
798 /* Built-in ip4 unicast rx feature path definition */
799 VNET_IP4_UNICAST_FEATURE_INIT (ip4_inacl, static) = {
800 .node_name = "ip4-inacl",
801 .runs_before = ORDER_CONSTRAINTS {"ip4-source-check-via-rx", 0},
802 .feature_index = &ip4_main.ip4_unicast_rx_feature_check_access,
805 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_1, static) = {
806 .node_name = "ip4-source-check-via-rx",
807 .runs_before = ORDER_CONSTRAINTS {"ip4-source-check-via-any", 0},
809 &ip4_main.ip4_unicast_rx_feature_source_reachable_via_rx,
812 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_2, static) = {
813 .node_name = "ip4-source-check-via-any",
814 .runs_before = ORDER_CONSTRAINTS {"ip4-policer-classify", 0},
816 &ip4_main.ip4_unicast_rx_feature_source_reachable_via_any,
819 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) = {
820 .node_name = "ip4-source-and-port-range-check-rx",
821 .runs_before = ORDER_CONSTRAINTS {"ip4-policer-classify", 0},
823 &ip4_main.ip4_unicast_rx_feature_source_and_port_range_check,
826 VNET_IP4_UNICAST_FEATURE_INIT (ip4_policer_classify, static) = {
827 .node_name = "ip4-policer-classify",
828 .runs_before = ORDER_CONSTRAINTS {"ipsec-input-ip4", 0},
830 &ip4_main.ip4_unicast_rx_feature_policer_classify,
833 VNET_IP4_UNICAST_FEATURE_INIT (ip4_ipsec, static) = {
834 .node_name = "ipsec-input-ip4",
835 .runs_before = ORDER_CONSTRAINTS {"vpath-input-ip4", 0},
836 .feature_index = &ip4_main.ip4_unicast_rx_feature_ipsec,
839 VNET_IP4_UNICAST_FEATURE_INIT (ip4_vpath, static) = {
840 .node_name = "vpath-input-ip4",
841 .runs_before = ORDER_CONSTRAINTS {"ip4-lookup", 0},
842 .feature_index = &ip4_main.ip4_unicast_rx_feature_vpath,
845 VNET_IP4_UNICAST_FEATURE_INIT (ip4_lookup, static) = {
846 .node_name = "ip4-lookup",
847 .runs_before = ORDER_CONSTRAINTS {"ip4-drop", 0},
848 .feature_index = &ip4_main.ip4_unicast_rx_feature_lookup,
851 VNET_IP4_UNICAST_FEATURE_INIT (ip4_drop, static) = {
852 .node_name = "ip4-drop",
853 .runs_before = 0, /* not before any other features */
854 .feature_index = &ip4_main.ip4_unicast_rx_feature_drop,
858 /* Built-in ip4 multicast rx feature path definition */
859 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_vpath_mc, static) = {
860 .node_name = "vpath-input-ip4",
861 .runs_before = ORDER_CONSTRAINTS {"ip4-lookup-multicast", 0},
862 .feature_index = &ip4_main.ip4_multicast_rx_feature_vpath,
865 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_lookup_mc, static) = {
866 .node_name = "ip4-lookup-multicast",
867 .runs_before = ORDER_CONSTRAINTS {"ip4-drop", 0},
868 .feature_index = &ip4_main.ip4_multicast_rx_feature_lookup,
871 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_mc_drop, static) = {
872 .node_name = "ip4-drop",
873 .runs_before = 0, /* last feature */
874 .feature_index = &ip4_main.ip4_multicast_rx_feature_drop,
877 static char * rx_feature_start_nodes[] =
878 { "ip4-input", "ip4-input-no-checksum"};
880 static char * tx_feature_start_nodes[] =
881 { "ip4-rewrite-transit"};
883 /* Source and port-range check ip4 tx feature path definition */
884 VNET_IP4_TX_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) = {
885 .node_name = "ip4-source-and-port-range-check-tx",
886 .runs_before = ORDER_CONSTRAINTS {"interface-output", 0},
888 &ip4_main.ip4_unicast_tx_feature_source_and_port_range_check,
892 /* Built-in ip4 tx feature path definition */
893 VNET_IP4_TX_FEATURE_INIT (interface_output, static) = {
894 .node_name = "interface-output",
895 .runs_before = 0, /* not before any other features */
896 .feature_index = &ip4_main.ip4_tx_feature_interface_output,
899 static clib_error_t *
900 ip4_feature_init (vlib_main_t * vm, ip4_main_t * im)
902 ip_lookup_main_t * lm = &im->lookup_main;
903 clib_error_t * error;
905 ip_config_main_t * cm;
906 vnet_config_main_t * vcm;
907 char **feature_start_nodes;
908 int feature_start_len;
910 for (cast = 0; cast < VNET_N_IP_FEAT; cast++)
912 cm = &lm->feature_config_mains[cast];
913 vcm = &cm->config_main;
915 if (cast < VNET_IP_TX_FEAT)
917 feature_start_nodes = rx_feature_start_nodes;
918 feature_start_len = ARRAY_LEN(rx_feature_start_nodes);
922 feature_start_nodes = tx_feature_start_nodes;
923 feature_start_len = ARRAY_LEN(tx_feature_start_nodes);
926 if ((error = ip_feature_init_cast (vm, cm, vcm,
930 VNET_L3_PACKET_TYPE_IP4)))
937 static clib_error_t *
938 ip4_sw_interface_add_del (vnet_main_t * vnm,
942 vlib_main_t * vm = vnm->vlib_main;
943 ip4_main_t * im = &ip4_main;
944 ip_lookup_main_t * lm = &im->lookup_main;
948 /* Fill in lookup tables with default table (0). */
949 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
951 for (cast = 0; cast < VNET_N_IP_FEAT; cast++)
953 ip_config_main_t * cm = &lm->feature_config_mains[cast];
954 vnet_config_main_t * vcm = &cm->config_main;
956 vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
957 ci = cm->config_index_by_sw_if_index[sw_if_index];
959 if (cast == VNET_IP_RX_UNICAST_FEAT)
960 feature_index = im->ip4_unicast_rx_feature_drop;
961 else if (cast == VNET_IP_RX_MULTICAST_FEAT)
962 feature_index = im->ip4_multicast_rx_feature_drop;
964 feature_index = im->ip4_tx_feature_interface_output;
967 ci = vnet_config_add_feature (vm, vcm,
971 /* # bytes of config data */ 0);
974 ci = vnet_config_del_feature (vm, vcm, ci,
977 /* # bytes of config data */ 0);
978 if (vec_len(im->ip_enabled_by_sw_if_index) > sw_if_index)
979 im->ip_enabled_by_sw_if_index[sw_if_index] = 0;
981 cm->config_index_by_sw_if_index[sw_if_index] = ci;
983 * note: do not update the tx feature count here.
987 return /* no error */ 0;
990 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
992 /* Global IP4 main. */
996 ip4_lookup_init (vlib_main_t * vm)
998 ip4_main_t * im = &ip4_main;
999 clib_error_t * error;
1002 for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1007 m = pow2_mask (i) << (32 - i);
1010 im->fib_masks[i] = clib_host_to_net_u32 (m);
1013 ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1015 /* Create FIB with index 0 and table id of 0. */
1016 fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 0);
1020 pn = pg_get_node (ip4_lookup_node.index);
1021 pn->unformat_edit = unformat_pg_ip4_header;
1025 ethernet_arp_header_t h;
1027 memset (&h, 0, sizeof (h));
1029 /* Set target ethernet address to all zeros. */
1030 memset (h.ip4_over_ethernet[1].ethernet, 0, sizeof (h.ip4_over_ethernet[1].ethernet));
1032 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1033 #define _8(f,v) h.f = v;
1034 _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1035 _16 (l3_type, ETHERNET_TYPE_IP4);
1036 _8 (n_l2_address_bytes, 6);
1037 _8 (n_l3_address_bytes, 4);
1038 _16 (opcode, ETHERNET_ARP_OPCODE_request);
1042 vlib_packet_template_init (vm,
1043 &im->ip4_arp_request_packet_template,
1046 /* alloc chunk size */ 8,
1050 error = ip4_feature_init (vm, im);
1055 VLIB_INIT_FUNCTION (ip4_lookup_init);
1058 /* Adjacency taken. */
1063 /* Packet data, possibly *after* rewrite. */
1064 u8 packet_data[64 - 1*sizeof(u32)];
1065 } ip4_forward_next_trace_t;
1067 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args)
1069 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1070 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1071 ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1072 uword indent = format_get_indent (s);
1073 s = format (s, "%U%U",
1074 format_white_space, indent,
1075 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1079 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args)
1081 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1082 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1083 ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1084 uword indent = format_get_indent (s);
1086 s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1087 t->fib_index, t->adj_index, t->flow_hash);
1088 s = format (s, "\n%U%U",
1089 format_white_space, indent,
1090 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1094 static u8 * format_ip4_rewrite_trace (u8 * s, va_list * args)
1096 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1097 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1098 ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1099 vnet_main_t * vnm = vnet_get_main();
1100 uword indent = format_get_indent (s);
1102 s = format (s, "tx_sw_if_index %d adj-idx %d : %U flow hash: 0x%08x",
1103 t->fib_index, t->adj_index, format_ip_adjacency,
1104 vnm, t->adj_index, FORMAT_IP_ADJACENCY_NONE,
1106 s = format (s, "\n%U%U",
1107 format_white_space, indent,
1108 format_ip_adjacency_packet_data,
1110 t->packet_data, sizeof (t->packet_data));
1114 /* Common trace function for all ip4-forward next nodes. */
1116 ip4_forward_next_trace (vlib_main_t * vm,
1117 vlib_node_runtime_t * node,
1118 vlib_frame_t * frame,
1119 vlib_rx_or_tx_t which_adj_index)
1122 ip4_main_t * im = &ip4_main;
1124 n_left = frame->n_vectors;
1125 from = vlib_frame_vector_args (frame);
1130 vlib_buffer_t * b0, * b1;
1131 ip4_forward_next_trace_t * t0, * t1;
1133 /* Prefetch next iteration. */
1134 vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1135 vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1140 b0 = vlib_get_buffer (vm, bi0);
1141 b1 = vlib_get_buffer (vm, bi1);
1143 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1145 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1146 t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1147 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1148 t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1149 vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1150 vec_elt (im->fib_index_by_sw_if_index,
1151 vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1153 clib_memcpy (t0->packet_data,
1154 vlib_buffer_get_current (b0),
1155 sizeof (t0->packet_data));
1157 if (b1->flags & VLIB_BUFFER_IS_TRACED)
1159 t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1160 t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1161 t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1162 t1->fib_index = (vnet_buffer(b1)->sw_if_index[VLIB_TX] != (u32)~0) ?
1163 vnet_buffer(b1)->sw_if_index[VLIB_TX] :
1164 vec_elt (im->fib_index_by_sw_if_index,
1165 vnet_buffer(b1)->sw_if_index[VLIB_RX]);
1166 clib_memcpy (t1->packet_data,
1167 vlib_buffer_get_current (b1),
1168 sizeof (t1->packet_data));
1178 ip4_forward_next_trace_t * t0;
1182 b0 = vlib_get_buffer (vm, bi0);
1184 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1186 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1187 t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1188 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1189 t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1190 vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1191 vec_elt (im->fib_index_by_sw_if_index,
1192 vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1193 clib_memcpy (t0->packet_data,
1194 vlib_buffer_get_current (b0),
1195 sizeof (t0->packet_data));
1203 ip4_drop_or_punt (vlib_main_t * vm,
1204 vlib_node_runtime_t * node,
1205 vlib_frame_t * frame,
1206 ip4_error_t error_code)
1208 u32 * buffers = vlib_frame_vector_args (frame);
1209 uword n_packets = frame->n_vectors;
1211 vlib_error_drop_buffers (vm, node,
1216 ip4_input_node.index,
1219 if (node->flags & VLIB_NODE_FLAG_TRACE)
1220 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1226 ip4_drop (vlib_main_t * vm,
1227 vlib_node_runtime_t * node,
1228 vlib_frame_t * frame)
1229 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP); }
1232 ip4_punt (vlib_main_t * vm,
1233 vlib_node_runtime_t * node,
1234 vlib_frame_t * frame)
1235 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); }
1237 VLIB_REGISTER_NODE (ip4_drop_node,static) = {
1238 .function = ip4_drop,
1240 .vector_size = sizeof (u32),
1242 .format_trace = format_ip4_forward_next_trace,
1250 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop)
1252 VLIB_REGISTER_NODE (ip4_punt_node,static) = {
1253 .function = ip4_punt,
1255 .vector_size = sizeof (u32),
1257 .format_trace = format_ip4_forward_next_trace,
1265 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt)
1267 /* Compute TCP/UDP/ICMP4 checksum in software. */
1269 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1273 u32 ip_header_length, payload_length_host_byte_order;
1274 u32 n_this_buffer, n_bytes_left;
1276 void * data_this_buffer;
1278 /* Initialize checksum with ip header. */
1279 ip_header_length = ip4_header_bytes (ip0);
1280 payload_length_host_byte_order = clib_net_to_host_u16 (ip0->length) - ip_header_length;
1281 sum0 = clib_host_to_net_u32 (payload_length_host_byte_order + (ip0->protocol << 16));
1283 if (BITS (uword) == 32)
1285 sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u32));
1286 sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32));
1289 sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1291 n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1292 data_this_buffer = (void *) ip0 + ip_header_length;
1293 if (n_this_buffer + ip_header_length > p0->current_length)
1294 n_this_buffer = p0->current_length > ip_header_length ? p0->current_length - ip_header_length : 0;
1297 sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1298 n_bytes_left -= n_this_buffer;
1299 if (n_bytes_left == 0)
1302 ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1303 p0 = vlib_get_buffer (vm, p0->next_buffer);
1304 data_this_buffer = vlib_buffer_get_current (p0);
1305 n_this_buffer = p0->current_length;
1308 sum16 = ~ ip_csum_fold (sum0);
1314 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1316 ip4_header_t * ip0 = vlib_buffer_get_current (p0);
1317 udp_header_t * udp0;
1320 ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1321 || ip0->protocol == IP_PROTOCOL_UDP);
1323 udp0 = (void *) (ip0 + 1);
1324 if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1326 p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1327 | IP_BUFFER_L4_CHECKSUM_CORRECT);
1331 sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1333 p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1334 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1340 ip4_local (vlib_main_t * vm,
1341 vlib_node_runtime_t * node,
1342 vlib_frame_t * frame)
1344 ip4_main_t * im = &ip4_main;
1345 ip_lookup_main_t * lm = &im->lookup_main;
1346 ip_local_next_t next_index;
1347 u32 * from, * to_next, n_left_from, n_left_to_next;
1348 vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
1350 from = vlib_frame_vector_args (frame);
1351 n_left_from = frame->n_vectors;
1352 next_index = node->cached_next_index;
1354 if (node->flags & VLIB_NODE_FLAG_TRACE)
1355 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1357 while (n_left_from > 0)
1359 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1361 while (n_left_from >= 4 && n_left_to_next >= 2)
1363 vlib_buffer_t * p0, * p1;
1364 ip4_header_t * ip0, * ip1;
1365 udp_header_t * udp0, * udp1;
1366 ip4_fib_mtrie_t * mtrie0, * mtrie1;
1367 ip4_fib_mtrie_leaf_t leaf0, leaf1;
1368 const dpo_id_t *dpo0, *dpo1;
1369 const load_balance_t *lb0, *lb1;
1370 u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, lbi0;
1371 u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, lbi1;
1372 i32 len_diff0, len_diff1;
1373 u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1374 u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1377 pi0 = to_next[0] = from[0];
1378 pi1 = to_next[1] = from[1];
1382 n_left_to_next -= 2;
1384 p0 = vlib_get_buffer (vm, pi0);
1385 p1 = vlib_get_buffer (vm, pi1);
1387 ip0 = vlib_buffer_get_current (p0);
1388 ip1 = vlib_buffer_get_current (p1);
1390 fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
1391 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1392 fib_index1 = vec_elt (im->fib_index_by_sw_if_index,
1393 vnet_buffer(p1)->sw_if_index[VLIB_RX]);
1395 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1396 mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
1398 leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
1400 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1401 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
1403 /* Treat IP frag packets as "experimental" protocol for now
1404 until support of IP frag reassembly is implemented */
1405 proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
1406 proto1 = ip4_is_fragment(ip1) ? 0xfe : ip1->protocol;
1407 is_udp0 = proto0 == IP_PROTOCOL_UDP;
1408 is_udp1 = proto1 == IP_PROTOCOL_UDP;
1409 is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1410 is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1415 good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1416 good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1418 udp0 = ip4_next_header (ip0);
1419 udp1 = ip4_next_header (ip1);
1421 /* Don't verify UDP checksum for packets with explicit zero checksum. */
1422 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1423 good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1425 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1426 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
1428 /* Verify UDP length. */
1429 ip_len0 = clib_net_to_host_u16 (ip0->length);
1430 ip_len1 = clib_net_to_host_u16 (ip1->length);
1431 udp_len0 = clib_net_to_host_u16 (udp0->length);
1432 udp_len1 = clib_net_to_host_u16 (udp1->length);
1434 len_diff0 = ip_len0 - udp_len0;
1435 len_diff1 = ip_len1 - udp_len1;
1437 len_diff0 = is_udp0 ? len_diff0 : 0;
1438 len_diff1 = is_udp1 ? len_diff1 : 0;
1440 if (PREDICT_FALSE (! (is_tcp_udp0 & is_tcp_udp1
1441 & good_tcp_udp0 & good_tcp_udp1)))
1446 && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1447 flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1449 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1450 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1455 && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1456 flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
1458 (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1459 good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1463 good_tcp_udp0 &= len_diff0 >= 0;
1464 good_tcp_udp1 &= len_diff1 >= 0;
1466 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1467 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
1469 error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1471 error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1472 error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
1474 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1475 error0 = (is_tcp_udp0 && ! good_tcp_udp0
1476 ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1478 error1 = (is_tcp_udp1 && ! good_tcp_udp1
1479 ? IP4_ERROR_TCP_CHECKSUM + is_udp1
1482 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1483 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
1484 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1485 leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
1487 vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1488 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1490 vnet_buffer (p1)->ip.adj_index[VLIB_RX] = lbi1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1491 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = lbi1;
1493 lb0 = load_balance_get(lbi0);
1494 lb1 = load_balance_get(lbi1);
1495 dpo0 = load_balance_get_bucket_i(lb0, 0);
1496 dpo1 = load_balance_get_bucket_i(lb1, 0);
1499 * Must have a route to source otherwise we drop the packet.
1500 * ip4 broadcasts are accepted, e.g. to make dhcp client work
1502 error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1503 && dpo0->dpoi_type != DPO_ADJACENCY
1504 && dpo0->dpoi_type != DPO_ADJACENCY_INCOMPLETE
1505 && dpo0->dpoi_type != DPO_RECEIVE
1506 && dpo0->dpoi_type != DPO_DROP
1507 && dpo0->dpoi_type != DPO_ADJACENCY_GLEAN
1508 && ip0->dst_address.as_u32 != 0xFFFFFFFF
1509 ? IP4_ERROR_SRC_LOOKUP_MISS
1511 error0 = (dpo0->dpoi_type == DPO_RECEIVE ?
1512 IP4_ERROR_SPOOFED_LOCAL_PACKETS :
1514 error1 = (error1 == IP4_ERROR_UNKNOWN_PROTOCOL
1515 && dpo1->dpoi_type != DPO_ADJACENCY
1516 && dpo1->dpoi_type != DPO_ADJACENCY_INCOMPLETE
1517 && dpo1->dpoi_type != DPO_RECEIVE
1518 && dpo1->dpoi_type != DPO_DROP
1519 && dpo1->dpoi_type != DPO_ADJACENCY_GLEAN
1520 && ip1->dst_address.as_u32 != 0xFFFFFFFF
1521 ? IP4_ERROR_SRC_LOOKUP_MISS
1523 error1 = (dpo0->dpoi_type == DPO_RECEIVE ?
1524 IP4_ERROR_SPOOFED_LOCAL_PACKETS :
1527 next0 = lm->local_next_by_ip_protocol[proto0];
1528 next1 = lm->local_next_by_ip_protocol[proto1];
1530 next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1531 next1 = error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1533 p0->error = error0 ? error_node->errors[error0] : 0;
1534 p1->error = error1 ? error_node->errors[error1] : 0;
1536 enqueue_code = (next0 != next_index) + 2*(next1 != next_index);
1538 if (PREDICT_FALSE (enqueue_code != 0))
1540 switch (enqueue_code)
1546 n_left_to_next += 1;
1547 vlib_set_next_frame_buffer (vm, node, next0, pi0);
1553 n_left_to_next += 1;
1554 vlib_set_next_frame_buffer (vm, node, next1, pi1);
1558 /* A B B or A B C */
1560 n_left_to_next += 2;
1561 vlib_set_next_frame_buffer (vm, node, next0, pi0);
1562 vlib_set_next_frame_buffer (vm, node, next1, pi1);
1565 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1567 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1574 while (n_left_from > 0 && n_left_to_next > 0)
1578 udp_header_t * udp0;
1579 ip4_fib_mtrie_t * mtrie0;
1580 ip4_fib_mtrie_leaf_t leaf0;
1581 u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, lbi0;
1583 u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1584 load_balance_t *lb0;
1585 const dpo_id_t *dpo0;
1587 pi0 = to_next[0] = from[0];
1591 n_left_to_next -= 1;
1593 p0 = vlib_get_buffer (vm, pi0);
1595 ip0 = vlib_buffer_get_current (p0);
1597 fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
1598 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1600 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1602 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
1604 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1606 /* Treat IP frag packets as "experimental" protocol for now
1607 until support of IP frag reassembly is implemented */
1608 proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
1609 is_udp0 = proto0 == IP_PROTOCOL_UDP;
1610 is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1614 good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1616 udp0 = ip4_next_header (ip0);
1618 /* Don't verify UDP checksum for packets with explicit zero checksum. */
1619 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1621 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1623 /* Verify UDP length. */
1624 ip_len0 = clib_net_to_host_u16 (ip0->length);
1625 udp_len0 = clib_net_to_host_u16 (udp0->length);
1627 len_diff0 = ip_len0 - udp_len0;
1629 len_diff0 = is_udp0 ? len_diff0 : 0;
1631 if (PREDICT_FALSE (! (is_tcp_udp0 & good_tcp_udp0)))
1636 && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1637 flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1639 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1640 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1644 good_tcp_udp0 &= len_diff0 >= 0;
1646 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1648 error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1650 error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1652 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1653 error0 = (is_tcp_udp0 && ! good_tcp_udp0
1654 ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1657 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1658 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1660 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1661 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1663 lb0 = load_balance_get(lbi0);
1664 dpo0 = load_balance_get_bucket_i(lb0, 0);
1666 vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
1667 vnet_buffer (p0)->ip.adj_index[VLIB_RX] =
1670 /* Must have a route to source otherwise we drop the packet. */
1671 error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1672 && dpo0->dpoi_type != DPO_ADJACENCY
1673 && dpo0->dpoi_type != DPO_ADJACENCY_INCOMPLETE
1674 && dpo0->dpoi_type != DPO_RECEIVE
1675 && dpo0->dpoi_type != DPO_DROP
1676 && dpo0->dpoi_type != DPO_ADJACENCY_GLEAN
1677 && ip0->dst_address.as_u32 != 0xFFFFFFFF
1678 ? IP4_ERROR_SRC_LOOKUP_MISS
1680 /* Packet originated from a local address => spoofing */
1681 error0 = (dpo0->dpoi_type == DPO_RECEIVE ?
1682 IP4_ERROR_SPOOFED_LOCAL_PACKETS :
1685 next0 = lm->local_next_by_ip_protocol[proto0];
1687 next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1689 p0->error = error0? error_node->errors[error0] : 0;
1691 if (PREDICT_FALSE (next0 != next_index))
1693 n_left_to_next += 1;
1694 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1697 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1700 n_left_to_next -= 1;
1704 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1707 return frame->n_vectors;
1710 VLIB_REGISTER_NODE (ip4_local_node,static) = {
1711 .function = ip4_local,
1712 .name = "ip4-local",
1713 .vector_size = sizeof (u32),
1715 .format_trace = format_ip4_forward_next_trace,
1717 .n_next_nodes = IP_LOCAL_N_NEXT,
1719 [IP_LOCAL_NEXT_DROP] = "error-drop",
1720 [IP_LOCAL_NEXT_PUNT] = "error-punt",
1721 [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1722 [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1726 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local)
1728 void ip4_register_protocol (u32 protocol, u32 node_index)
1730 vlib_main_t * vm = vlib_get_main();
1731 ip4_main_t * im = &ip4_main;
1732 ip_lookup_main_t * lm = &im->lookup_main;
1734 ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1735 lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip4_local_node.index, node_index);
1738 static clib_error_t *
1739 show_ip_local_command_fn (vlib_main_t * vm,
1740 unformat_input_t * input,
1741 vlib_cli_command_t * cmd)
1743 ip4_main_t * im = &ip4_main;
1744 ip_lookup_main_t * lm = &im->lookup_main;
1747 vlib_cli_output (vm, "Protocols handled by ip4_local");
1748 for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
1750 if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1751 vlib_cli_output (vm, "%d", i);
1758 VLIB_CLI_COMMAND (show_ip_local, static) = {
1759 .path = "show ip local",
1760 .function = show_ip_local_command_fn,
1761 .short_help = "Show ip local protocol table",
1765 ip4_arp_inline (vlib_main_t * vm,
1766 vlib_node_runtime_t * node,
1767 vlib_frame_t * frame,
1770 vnet_main_t * vnm = vnet_get_main();
1771 ip4_main_t * im = &ip4_main;
1772 ip_lookup_main_t * lm = &im->lookup_main;
1773 u32 * from, * to_next_drop;
1774 uword n_left_from, n_left_to_next_drop, next_index;
1775 static f64 time_last_seed_change = -1e100;
1776 static u32 hash_seeds[3];
1777 static uword hash_bitmap[256 / BITS (uword)];
1780 if (node->flags & VLIB_NODE_FLAG_TRACE)
1781 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1783 time_now = vlib_time_now (vm);
1784 if (time_now - time_last_seed_change > 1e-3)
1787 u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
1788 sizeof (hash_seeds));
1789 for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
1790 hash_seeds[i] = r[i];
1792 /* Mark all hash keys as been no-seen before. */
1793 for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
1796 time_last_seed_change = time_now;
1799 from = vlib_frame_vector_args (frame);
1800 n_left_from = frame->n_vectors;
1801 next_index = node->cached_next_index;
1802 if (next_index == IP4_ARP_NEXT_DROP)
1803 next_index = IP4_ARP_N_NEXT; /* point to first interface */
1805 while (n_left_from > 0)
1807 vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1808 to_next_drop, n_left_to_next_drop);
1810 while (n_left_from > 0 && n_left_to_next_drop > 0)
1812 u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
1813 ip_adjacency_t * adj0;
1820 p0 = vlib_get_buffer (vm, pi0);
1822 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1823 adj0 = ip_get_adjacency (lm, adj_index0);
1824 ip0 = vlib_buffer_get_current (p0);
1827 * this is the Glean case, so we are ARPing for the
1828 * packet's destination
1834 sw_if_index0 = adj0->rewrite_header.sw_if_index;
1835 vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1839 a0 ^= ip0->dst_address.data_u32;
1843 a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32;
1847 hash_v3_finalize32 (a0, b0, c0);
1849 c0 &= BITS (hash_bitmap) - 1;
1850 c0 = c0 / BITS (uword);
1851 m0 = (uword) 1 << (c0 % BITS (uword));
1853 bm0 = hash_bitmap[c0];
1854 drop0 = (bm0 & m0) != 0;
1856 /* Mark it as seen. */
1857 hash_bitmap[c0] = bm0 | m0;
1861 to_next_drop[0] = pi0;
1863 n_left_to_next_drop -= 1;
1865 p0->error = node->errors[drop0 ? IP4_ARP_ERROR_DROP : IP4_ARP_ERROR_REQUEST_SENT];
1871 * Can happen if the control-plane is programming tables
1872 * with traffic flowing; at least that's today's lame excuse.
1874 if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN) ||
1875 (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
1877 p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
1880 /* Send ARP request. */
1884 ethernet_arp_header_t * h0;
1885 vnet_hw_interface_t * hw_if0;
1887 h0 = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi0);
1889 /* Add rewrite/encap string for ARP packet. */
1890 vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
1892 hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1894 /* Src ethernet address in ARP header. */
1895 clib_memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address,
1896 sizeof (h0->ip4_over_ethernet[0].ethernet));
1900 /* The interface's source address is stashed in the Glean Adj */
1901 h0->ip4_over_ethernet[0].ip4 = adj0->sub_type.glean.receive_addr.ip4;
1903 /* Copy in destination address we are requesting. This is the
1904 * glean case, so it's the packet's destination.*/
1905 h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32;
1909 /* Src IP address in ARP header. */
1910 if (ip4_src_address_for_packet(lm, sw_if_index0,
1911 &h0->ip4_over_ethernet[0].ip4))
1913 /* No source address available */
1914 p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
1915 vlib_buffer_free(vm, &bi0, 1);
1919 /* Copy in destination address we are requesting from the
1921 h0->ip4_over_ethernet[1].ip4.data_u32 =
1922 adj0->sub_type.nbr.next_hop.ip4.as_u32;
1925 vlib_buffer_copy_trace_flag (vm, p0, bi0);
1926 b0 = vlib_get_buffer (vm, bi0);
1927 vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
1929 vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
1931 vlib_set_next_frame_buffer (vm, node, adj0->rewrite_header.next_index, bi0);
1935 vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
1938 return frame->n_vectors;
1942 ip4_arp (vlib_main_t * vm,
1943 vlib_node_runtime_t * node,
1944 vlib_frame_t * frame)
1946 return (ip4_arp_inline(vm, node, frame, 0));
1950 ip4_glean (vlib_main_t * vm,
1951 vlib_node_runtime_t * node,
1952 vlib_frame_t * frame)
1954 return (ip4_arp_inline(vm, node, frame, 1));
1957 static char * ip4_arp_error_strings[] = {
1958 [IP4_ARP_ERROR_DROP] = "address overflow drops",
1959 [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
1960 [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
1961 [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
1962 [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
1963 [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
1966 VLIB_REGISTER_NODE (ip4_arp_node) = {
1967 .function = ip4_arp,
1969 .vector_size = sizeof (u32),
1971 .format_trace = format_ip4_forward_next_trace,
1973 .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1974 .error_strings = ip4_arp_error_strings,
1976 .n_next_nodes = IP4_ARP_N_NEXT,
1978 [IP4_ARP_NEXT_DROP] = "error-drop",
1982 VLIB_REGISTER_NODE (ip4_glean_node) = {
1983 .function = ip4_glean,
1984 .name = "ip4-glean",
1985 .vector_size = sizeof (u32),
1987 .format_trace = format_ip4_forward_next_trace,
1989 .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1990 .error_strings = ip4_arp_error_strings,
1992 .n_next_nodes = IP4_ARP_N_NEXT,
1994 [IP4_ARP_NEXT_DROP] = "error-drop",
1998 #define foreach_notrace_ip4_arp_error \
2004 clib_error_t * arp_notrace_init (vlib_main_t * vm)
2006 vlib_node_runtime_t *rt =
2007 vlib_node_get_runtime (vm, ip4_arp_node.index);
2009 /* don't trace ARP request packets */
2011 vnet_pcap_drop_trace_filter_add_del \
2012 (rt->errors[IP4_ARP_ERROR_##a], \
2014 foreach_notrace_ip4_arp_error;
2019 VLIB_INIT_FUNCTION(arp_notrace_init);
2022 /* Send an ARP request to see if given destination is reachable on given interface. */
2024 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2026 vnet_main_t * vnm = vnet_get_main();
2027 ip4_main_t * im = &ip4_main;
2028 ethernet_arp_header_t * h;
2029 ip4_address_t * src;
2030 ip_interface_address_t * ia;
2031 ip_adjacency_t * adj;
2032 vnet_hw_interface_t * hi;
2033 vnet_sw_interface_t * si;
2037 si = vnet_get_sw_interface (vnm, sw_if_index);
2039 if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2041 return clib_error_return (0, "%U: interface %U down",
2042 format_ip4_address, dst,
2043 format_vnet_sw_if_index_name, vnm,
2047 src = ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2050 vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2051 return clib_error_return
2052 (0, "no matching interface address for destination %U (interface %U)",
2053 format_ip4_address, dst,
2054 format_vnet_sw_if_index_name, vnm, sw_if_index);
2057 adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2059 h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi);
2061 hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2063 clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
2065 h->ip4_over_ethernet[0].ip4 = src[0];
2066 h->ip4_over_ethernet[1].ip4 = dst[0];
2068 b = vlib_get_buffer (vm, bi);
2069 vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2071 /* Add encapsulation string for software interface (e.g. ethernet header). */
2072 vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2073 vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2076 vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
2077 u32 * to_next = vlib_frame_vector_args (f);
2080 vlib_put_frame_to_node (vm, hi->output_node_index, f);
2083 return /* no error */ 0;
2087 IP4_REWRITE_NEXT_DROP,
2088 IP4_REWRITE_NEXT_ARP,
2089 IP4_REWRITE_NEXT_ICMP_ERROR,
2090 } ip4_rewrite_next_t;
2093 ip4_rewrite_inline (vlib_main_t * vm,
2094 vlib_node_runtime_t * node,
2095 vlib_frame_t * frame,
2096 int rewrite_for_locally_received_packets)
2098 ip_lookup_main_t * lm = &ip4_main.lookup_main;
2099 u32 * from = vlib_frame_vector_args (frame);
2100 u32 n_left_from, n_left_to_next, * to_next, next_index;
2101 vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
2102 vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
2103 ip_config_main_t * cm = &lm->feature_config_mains[VNET_IP_TX_FEAT];
2105 n_left_from = frame->n_vectors;
2106 next_index = node->cached_next_index;
2107 u32 cpu_index = os_get_cpu_number();
2109 while (n_left_from > 0)
2111 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2113 while (n_left_from >= 4 && n_left_to_next >= 2)
2115 ip_adjacency_t * adj0, * adj1;
2116 vlib_buffer_t * p0, * p1;
2117 ip4_header_t * ip0, * ip1;
2118 u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2119 u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2120 u32 next0_override, next1_override;
2121 u32 tx_sw_if_index0, tx_sw_if_index1;
2123 if (rewrite_for_locally_received_packets)
2124 next0_override = next1_override = 0;
2126 /* Prefetch next iteration. */
2128 vlib_buffer_t * p2, * p3;
2130 p2 = vlib_get_buffer (vm, from[2]);
2131 p3 = vlib_get_buffer (vm, from[3]);
2133 vlib_prefetch_buffer_header (p2, STORE);
2134 vlib_prefetch_buffer_header (p3, STORE);
2136 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2137 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2140 pi0 = to_next[0] = from[0];
2141 pi1 = to_next[1] = from[1];
2146 n_left_to_next -= 2;
2148 p0 = vlib_get_buffer (vm, pi0);
2149 p1 = vlib_get_buffer (vm, pi1);
2151 adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2152 adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
2154 /* We should never rewrite a pkt using the MISS adjacency */
2155 ASSERT(adj_index0 && adj_index1);
2157 ip0 = vlib_buffer_get_current (p0);
2158 ip1 = vlib_buffer_get_current (p1);
2160 error0 = error1 = IP4_ERROR_NONE;
2161 next0 = next1 = IP4_REWRITE_NEXT_DROP;
2163 /* Decrement TTL & update checksum.
2164 Works either endian, so no need for byte swap. */
2165 if (! rewrite_for_locally_received_packets)
2167 i32 ttl0 = ip0->ttl, ttl1 = ip1->ttl;
2169 /* Input node should have reject packets with ttl 0. */
2170 ASSERT (ip0->ttl > 0);
2171 ASSERT (ip1->ttl > 0);
2173 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2174 checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2176 checksum0 += checksum0 >= 0xffff;
2177 checksum1 += checksum1 >= 0xffff;
2179 ip0->checksum = checksum0;
2180 ip1->checksum = checksum1;
2189 * If the ttl drops below 1 when forwarding, generate
2192 if (PREDICT_FALSE(ttl0 <= 0))
2194 error0 = IP4_ERROR_TIME_EXPIRED;
2195 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2196 icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2197 ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2198 next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2200 if (PREDICT_FALSE(ttl1 <= 0))
2202 error1 = IP4_ERROR_TIME_EXPIRED;
2203 vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32)~0;
2204 icmp4_error_set_vnet_buffer(p1, ICMP4_time_exceeded,
2205 ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2206 next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2209 /* Verify checksum. */
2210 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2211 ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2214 /* Rewrite packet header and updates lengths. */
2215 adj0 = ip_get_adjacency (lm, adj_index0);
2216 adj1 = ip_get_adjacency (lm, adj_index1);
2218 if (rewrite_for_locally_received_packets)
2220 if (PREDICT_FALSE(adj0->lookup_next_index
2221 == IP_LOOKUP_NEXT_ARP))
2222 next0_override = IP4_REWRITE_NEXT_ARP;
2223 if (PREDICT_FALSE(adj1->lookup_next_index
2224 == IP_LOOKUP_NEXT_ARP))
2225 next1_override = IP4_REWRITE_NEXT_ARP;
2228 /* Worth pipelining. No guarantee that adj0,1 are hot... */
2229 rw_len0 = adj0[0].rewrite_header.data_bytes;
2230 rw_len1 = adj1[0].rewrite_header.data_bytes;
2231 vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
2232 vnet_buffer(p1)->ip.save_rewrite_length = rw_len1;
2234 /* Check MTU of outgoing interface. */
2235 error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2236 ? IP4_ERROR_MTU_EXCEEDED
2238 error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
2239 ? IP4_ERROR_MTU_EXCEEDED
2242 next0 = (error0 == IP4_ERROR_NONE)
2243 ? adj0[0].rewrite_header.next_index : next0;
2245 if (rewrite_for_locally_received_packets)
2246 next0 = next0 && next0_override ? next0_override : next0;
2248 next1 = (error1 == IP4_ERROR_NONE)
2249 ? adj1[0].rewrite_header.next_index : next1;
2251 if (rewrite_for_locally_received_packets)
2252 next1 = next1 && next1_override ? next1_override : next1;
2255 * We've already accounted for an ethernet_header_t elsewhere
2257 if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2258 vlib_increment_combined_counter
2259 (&adjacency_counters,
2260 cpu_index, adj_index0,
2261 /* packet increment */ 0,
2262 /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2264 if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t)))
2265 vlib_increment_combined_counter
2266 (&adjacency_counters,
2267 cpu_index, adj_index1,
2268 /* packet increment */ 0,
2269 /* byte increment */ rw_len1-sizeof(ethernet_header_t));
2271 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2272 * to see the IP headerr */
2273 if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2275 p0->current_data -= rw_len0;
2276 p0->current_length += rw_len0;
2277 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2278 vnet_buffer (p0)->sw_if_index[VLIB_TX] =
2282 (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features,
2285 p0->current_config_index =
2286 vec_elt (cm->config_index_by_sw_if_index,
2288 vnet_get_config_data (&cm->config_main,
2289 &p0->current_config_index,
2291 /* # bytes of config data */ 0);
2294 if (PREDICT_TRUE(error1 == IP4_ERROR_NONE))
2296 p1->current_data -= rw_len1;
2297 p1->current_length += rw_len1;
2299 tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2300 vnet_buffer (p1)->sw_if_index[VLIB_TX] =
2304 (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features,
2307 p1->current_config_index =
2308 vec_elt (cm->config_index_by_sw_if_index,
2310 vnet_get_config_data (&cm->config_main,
2311 &p1->current_config_index,
2313 /* # bytes of config data */ 0);
2317 /* Guess we are only writing on simple Ethernet header. */
2318 vnet_rewrite_two_headers (adj0[0], adj1[0],
2320 sizeof (ethernet_header_t));
2322 vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2323 to_next, n_left_to_next,
2324 pi0, pi1, next0, next1);
2327 while (n_left_from > 0 && n_left_to_next > 0)
2329 ip_adjacency_t * adj0;
2332 u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2334 u32 tx_sw_if_index0;
2336 if (rewrite_for_locally_received_packets)
2339 pi0 = to_next[0] = from[0];
2341 p0 = vlib_get_buffer (vm, pi0);
2343 adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2345 /* We should never rewrite a pkt using the MISS adjacency */
2348 adj0 = ip_get_adjacency (lm, adj_index0);
2350 ip0 = vlib_buffer_get_current (p0);
2352 error0 = IP4_ERROR_NONE;
2353 next0 = IP4_REWRITE_NEXT_DROP; /* drop on error */
2355 /* Decrement TTL & update checksum. */
2356 if (! rewrite_for_locally_received_packets)
2358 i32 ttl0 = ip0->ttl;
2360 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2362 checksum0 += checksum0 >= 0xffff;
2364 ip0->checksum = checksum0;
2366 ASSERT (ip0->ttl > 0);
2372 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2374 if (PREDICT_FALSE(ttl0 <= 0))
2377 * If the ttl drops below 1 when forwarding, generate
2380 error0 = IP4_ERROR_TIME_EXPIRED;
2381 next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2382 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2383 icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2384 ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2388 if (rewrite_for_locally_received_packets)
2391 * We have to override the next_index in ARP adjacencies,
2392 * because they're set up for ip4-arp, not this node...
2394 if (PREDICT_FALSE(adj0->lookup_next_index
2395 == IP_LOOKUP_NEXT_ARP))
2396 next0_override = IP4_REWRITE_NEXT_ARP;
2399 /* Guess we are only writing on simple Ethernet header. */
2400 vnet_rewrite_one_header (adj0[0], ip0,
2401 sizeof (ethernet_header_t));
2403 /* Update packet buffer attributes/set output interface. */
2404 rw_len0 = adj0[0].rewrite_header.data_bytes;
2405 vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
2407 if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2408 vlib_increment_combined_counter
2409 (&adjacency_counters,
2410 cpu_index, adj_index0,
2411 /* packet increment */ 0,
2412 /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2414 /* Check MTU of outgoing interface. */
2415 error0 = (vlib_buffer_length_in_chain (vm, p0)
2416 > adj0[0].rewrite_header.max_l3_packet_bytes
2417 ? IP4_ERROR_MTU_EXCEEDED
2420 p0->error = error_node->errors[error0];
2422 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2423 * to see the IP headerr */
2424 if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2426 p0->current_data -= rw_len0;
2427 p0->current_length += rw_len0;
2428 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2430 vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2431 next0 = adj0[0].rewrite_header.next_index;
2434 (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features,
2437 p0->current_config_index =
2438 vec_elt (cm->config_index_by_sw_if_index,
2440 vnet_get_config_data (&cm->config_main,
2441 &p0->current_config_index,
2443 /* # bytes of config data */ 0);
2447 if (rewrite_for_locally_received_packets)
2448 next0 = next0 && next0_override ? next0_override : next0;
2453 n_left_to_next -= 1;
2455 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2456 to_next, n_left_to_next,
2460 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2463 /* Need to do trace after rewrites to pick up new packet data. */
2464 if (node->flags & VLIB_NODE_FLAG_TRACE)
2465 ip4_forward_next_trace (vm, node, frame, adj_rx_tx);
2467 return frame->n_vectors;
2471 /** @brief IPv4 transit rewrite node.
2472 @node ip4-rewrite-transit
2474 This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2475 header checksum, fetch the ip adjacency, check the outbound mtu,
2476 apply the adjacency rewrite, and send pkts to the adjacency
2477 rewrite header's rewrite_next_index.
2479 @param vm vlib_main_t corresponding to the current thread
2480 @param node vlib_node_runtime_t
2481 @param frame vlib_frame_t whose contents should be dispatched
2483 @par Graph mechanics: buffer metadata, next index usage
2486 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2487 - the rewrite adjacency index
2488 - <code>adj->lookup_next_index</code>
2489 - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2490 the packet will be dropped.
2491 - <code>adj->rewrite_header</code>
2492 - Rewrite string length, rewrite string, next_index
2495 - <code>b->current_data, b->current_length</code>
2496 - Updated net of applying the rewrite string
2498 <em>Next Indices:</em>
2499 - <code> adj->rewrite_header.next_index </code>
2503 ip4_rewrite_transit (vlib_main_t * vm,
2504 vlib_node_runtime_t * node,
2505 vlib_frame_t * frame)
2507 return ip4_rewrite_inline (vm, node, frame,
2508 /* rewrite_for_locally_received_packets */ 0);
2511 /** @brief IPv4 local rewrite node.
2512 @node ip4-rewrite-local
2514 This is the IPv4 local rewrite node. Fetch the ip adjacency, check
2515 the outbound interface mtu, apply the adjacency rewrite, and send
2516 pkts to the adjacency rewrite header's rewrite_next_index. Deal
2517 with hemorrhoids of the form "some clown sends an icmp4 w/ src =
2518 dst = interface addr."
2520 @param vm vlib_main_t corresponding to the current thread
2521 @param node vlib_node_runtime_t
2522 @param frame vlib_frame_t whose contents should be dispatched
2524 @par Graph mechanics: buffer metadata, next index usage
2527 - <code>vnet_buffer(b)->ip.adj_index[VLIB_RX]</code>
2528 - the rewrite adjacency index
2529 - <code>adj->lookup_next_index</code>
2530 - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2531 the packet will be dropped.
2532 - <code>adj->rewrite_header</code>
2533 - Rewrite string length, rewrite string, next_index
2536 - <code>b->current_data, b->current_length</code>
2537 - Updated net of applying the rewrite string
2539 <em>Next Indices:</em>
2540 - <code> adj->rewrite_header.next_index </code>
2545 ip4_rewrite_local (vlib_main_t * vm,
2546 vlib_node_runtime_t * node,
2547 vlib_frame_t * frame)
2549 return ip4_rewrite_inline (vm, node, frame,
2550 /* rewrite_for_locally_received_packets */ 1);
2554 ip4_midchain (vlib_main_t * vm,
2555 vlib_node_runtime_t * node,
2556 vlib_frame_t * frame)
2558 return ip4_rewrite_inline (vm, node, frame,
2559 /* rewrite_for_locally_received_packets */ 0);
2562 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2563 .function = ip4_rewrite_transit,
2564 .name = "ip4-rewrite-transit",
2565 .vector_size = sizeof (u32),
2567 .format_trace = format_ip4_rewrite_trace,
2571 [IP4_REWRITE_NEXT_DROP] = "error-drop",
2572 [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
2573 [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2577 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite_transit)
2579 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2580 .function = ip4_midchain,
2581 .name = "ip4-midchain",
2582 .vector_size = sizeof (u32),
2584 .format_trace = format_ip4_forward_next_trace,
2588 [IP4_REWRITE_NEXT_DROP] = "error-drop",
2589 [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
2593 VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain)
2595 VLIB_REGISTER_NODE (ip4_rewrite_local_node) = {
2596 .function = ip4_rewrite_local,
2597 .name = "ip4-rewrite-local",
2598 .vector_size = sizeof (u32),
2600 .sibling_of = "ip4-rewrite-transit",
2602 .format_trace = format_ip4_rewrite_trace,
2607 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_local_node, ip4_rewrite_local)
2609 static clib_error_t *
2610 add_del_interface_table (vlib_main_t * vm,
2611 unformat_input_t * input,
2612 vlib_cli_command_t * cmd)
2614 vnet_main_t * vnm = vnet_get_main();
2615 clib_error_t * error = 0;
2616 u32 sw_if_index, table_id;
2620 if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2622 error = clib_error_return (0, "unknown interface `%U'",
2623 format_unformat_error, input);
2627 if (unformat (input, "%d", &table_id))
2631 error = clib_error_return (0, "expected table id `%U'",
2632 format_unformat_error, input);
2637 ip4_main_t * im = &ip4_main;
2640 fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
2645 // changing an interface's table has consequences for any connecteds
2646 // and adj-fibs already installed.
2648 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
2649 im->fib_index_by_sw_if_index[sw_if_index] = fib_index;
2657 * Place the indicated interface into the supplied VRF
2660 * @cliexstart{set interface ip table}
2662 * vpp# set interface ip table GigabitEthernet2/0/0 2
2664 * Interface addresses added after setting the interface IP table end up in the indicated VRF table.
2665 * Predictable but potentially counter-intuitive results occur if you provision interface addresses in multiple FIBs.
2666 * Upon RX, packets will be processed in the last IP table ID provisioned.
2667 * It might be marginally useful to evade source RPF drops to put an interface address into multiple FIBs.
2670 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
2671 .path = "set interface ip table",
2672 .function = add_del_interface_table,
2673 .short_help = "Add/delete FIB table id for interface",
2678 ip4_lookup_multicast (vlib_main_t * vm,
2679 vlib_node_runtime_t * node,
2680 vlib_frame_t * frame)
2682 ip4_main_t * im = &ip4_main;
2683 vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
2684 u32 n_left_from, n_left_to_next, * from, * to_next;
2685 ip_lookup_next_t next;
2686 u32 cpu_index = os_get_cpu_number();
2688 from = vlib_frame_vector_args (frame);
2689 n_left_from = frame->n_vectors;
2690 next = node->cached_next_index;
2692 while (n_left_from > 0)
2694 vlib_get_next_frame (vm, node, next,
2695 to_next, n_left_to_next);
2697 while (n_left_from >= 4 && n_left_to_next >= 2)
2699 vlib_buffer_t * p0, * p1;
2700 u32 pi0, pi1, lb_index0, lb_index1, wrong_next;
2701 ip_lookup_next_t next0, next1;
2702 ip4_header_t * ip0, * ip1;
2703 u32 fib_index0, fib_index1;
2704 const dpo_id_t *dpo0, *dpo1;
2705 const load_balance_t * lb0, * lb1;
2707 /* Prefetch next iteration. */
2709 vlib_buffer_t * p2, * p3;
2711 p2 = vlib_get_buffer (vm, from[2]);
2712 p3 = vlib_get_buffer (vm, from[3]);
2714 vlib_prefetch_buffer_header (p2, LOAD);
2715 vlib_prefetch_buffer_header (p3, LOAD);
2717 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
2718 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
2721 pi0 = to_next[0] = from[0];
2722 pi1 = to_next[1] = from[1];
2724 p0 = vlib_get_buffer (vm, pi0);
2725 p1 = vlib_get_buffer (vm, pi1);
2727 ip0 = vlib_buffer_get_current (p0);
2728 ip1 = vlib_buffer_get_current (p1);
2730 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2731 fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
2732 fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2733 fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2734 fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
2735 fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
2737 lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0),
2739 lb_index1 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index1),
2742 lb0 = load_balance_get (lb_index0);
2743 lb1 = load_balance_get (lb_index1);
2745 ASSERT (lb0->lb_n_buckets > 0);
2746 ASSERT (is_pow2 (lb0->lb_n_buckets));
2747 ASSERT (lb1->lb_n_buckets > 0);
2748 ASSERT (is_pow2 (lb1->lb_n_buckets));
2750 vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash
2751 (ip0, lb0->lb_hash_config);
2753 vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash
2754 (ip1, lb1->lb_hash_config);
2756 dpo0 = load_balance_get_bucket_i(lb0,
2757 (vnet_buffer (p0)->ip.flow_hash &
2758 (lb0->lb_n_buckets_minus_1)));
2759 dpo1 = load_balance_get_bucket_i(lb1,
2760 (vnet_buffer (p1)->ip.flow_hash &
2761 (lb0->lb_n_buckets_minus_1)));
2763 next0 = dpo0->dpoi_next_node;
2764 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
2765 next1 = dpo1->dpoi_next_node;
2766 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
2768 if (1) /* $$$$$$ HACK FIXME */
2769 vlib_increment_combined_counter
2770 (cm, cpu_index, lb_index0, 1,
2771 vlib_buffer_length_in_chain (vm, p0));
2772 if (1) /* $$$$$$ HACK FIXME */
2773 vlib_increment_combined_counter
2774 (cm, cpu_index, lb_index1, 1,
2775 vlib_buffer_length_in_chain (vm, p1));
2779 n_left_to_next -= 2;
2782 wrong_next = (next0 != next) + 2*(next1 != next);
2783 if (PREDICT_FALSE (wrong_next != 0))
2791 n_left_to_next += 1;
2792 vlib_set_next_frame_buffer (vm, node, next0, pi0);
2798 n_left_to_next += 1;
2799 vlib_set_next_frame_buffer (vm, node, next1, pi1);
2805 n_left_to_next += 2;
2806 vlib_set_next_frame_buffer (vm, node, next0, pi0);
2807 vlib_set_next_frame_buffer (vm, node, next1, pi1);
2811 vlib_put_next_frame (vm, node, next, n_left_to_next);
2813 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
2819 while (n_left_from > 0 && n_left_to_next > 0)
2824 ip_lookup_next_t next0;
2826 const dpo_id_t *dpo0;
2827 const load_balance_t * lb0;
2832 p0 = vlib_get_buffer (vm, pi0);
2834 ip0 = vlib_buffer_get_current (p0);
2836 fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
2837 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2838 fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2839 fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2841 lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0),
2844 lb0 = load_balance_get (lb_index0);
2846 ASSERT (lb0->lb_n_buckets > 0);
2847 ASSERT (is_pow2 (lb0->lb_n_buckets));
2849 vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash
2850 (ip0, lb0->lb_hash_config);
2852 dpo0 = load_balance_get_bucket_i(lb0,
2853 (vnet_buffer (p0)->ip.flow_hash &
2854 (lb0->lb_n_buckets_minus_1)));
2856 next0 = dpo0->dpoi_next_node;
2857 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
2859 if (1) /* $$$$$$ HACK FIXME */
2860 vlib_increment_combined_counter
2861 (cm, cpu_index, lb_index0, 1,
2862 vlib_buffer_length_in_chain (vm, p0));
2866 n_left_to_next -= 1;
2869 if (PREDICT_FALSE (next0 != next))
2871 n_left_to_next += 1;
2872 vlib_put_next_frame (vm, node, next, n_left_to_next);
2874 vlib_get_next_frame (vm, node, next,
2875 to_next, n_left_to_next);
2878 n_left_to_next -= 1;
2882 vlib_put_next_frame (vm, node, next, n_left_to_next);
2885 if (node->flags & VLIB_NODE_FLAG_TRACE)
2886 ip4_forward_next_trace(vm, node, frame, VLIB_TX);
2888 return frame->n_vectors;
2891 VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = {
2892 .function = ip4_lookup_multicast,
2893 .name = "ip4-lookup-multicast",
2894 .vector_size = sizeof (u32),
2895 .sibling_of = "ip4-lookup",
2896 .format_trace = format_ip4_lookup_trace,
2901 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_multicast_node, ip4_lookup_multicast)
2903 VLIB_REGISTER_NODE (ip4_multicast_node,static) = {
2904 .function = ip4_drop,
2905 .name = "ip4-multicast",
2906 .vector_size = sizeof (u32),
2908 .format_trace = format_ip4_forward_next_trace,
2916 int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0)
2918 ip4_fib_mtrie_t * mtrie0;
2919 ip4_fib_mtrie_leaf_t leaf0;
2922 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2924 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
2925 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
2926 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
2927 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2928 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2930 /* Handle default route. */
2931 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
2933 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2935 return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0), a);
2938 static clib_error_t *
2939 test_lookup_command_fn (vlib_main_t * vm,
2940 unformat_input_t * input,
2941 vlib_cli_command_t * cmd)
2947 ip4_address_t ip4_base_address;
2950 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
2951 if (unformat (input, "table %d", &table_id))
2953 else if (unformat (input, "count %f", &count))
2956 else if (unformat (input, "%U",
2957 unformat_ip4_address, &ip4_base_address))
2960 return clib_error_return (0, "unknown input `%U'",
2961 format_unformat_error, input);
2966 for (i = 0; i < n; i++)
2968 if (!ip4_lookup_validate (&ip4_base_address, table_id))
2971 ip4_base_address.as_u32 =
2972 clib_host_to_net_u32 (1 +
2973 clib_net_to_host_u32 (ip4_base_address.as_u32));
2977 vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2979 vlib_cli_output (vm, "No errors in %d lookups\n", n);
2984 VLIB_CLI_COMMAND (lookup_test_command, static) = {
2985 .path = "test lookup",
2986 .short_help = "test lookup",
2987 .function = test_lookup_command_fn,
2990 int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2992 ip4_main_t * im4 = &ip4_main;
2994 uword * p = hash_get (im4->fib_index_by_table_id, table_id);
2997 return VNET_API_ERROR_NO_SUCH_FIB;
2999 fib = ip4_fib_get (p[0]);
3001 fib->flow_hash_config = flow_hash_config;
3005 static clib_error_t *
3006 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3007 unformat_input_t * input,
3008 vlib_cli_command_t * cmd)
3012 u32 flow_hash_config = 0;
3015 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3016 if (unformat (input, "table %d", &table_id))
3019 else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3020 foreach_flow_hash_bit
3026 return clib_error_return (0, "unknown input `%U'",
3027 format_unformat_error, input);
3029 rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3035 case VNET_API_ERROR_NO_SUCH_FIB:
3036 return clib_error_return (0, "no such FIB table %d", table_id);
3039 clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3046 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
3047 .path = "set ip flow-hash",
3049 "set ip table flow-hash table <fib-id> src dst sport dport proto reverse",
3050 .function = set_ip_flow_hash_command_fn,
3053 int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3056 vnet_main_t * vnm = vnet_get_main();
3057 vnet_interface_main_t * im = &vnm->interface_main;
3058 ip4_main_t * ipm = &ip4_main;
3059 ip_lookup_main_t * lm = &ipm->lookup_main;
3060 vnet_classify_main_t * cm = &vnet_classify_main;
3061 ip4_address_t *if_addr;
3063 if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3064 return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3066 if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3067 return VNET_API_ERROR_NO_SUCH_ENTRY;
3069 vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3070 lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
3072 if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
3074 if (NULL != if_addr)
3076 fib_prefix_t pfx = {
3078 .fp_proto = FIB_PROTOCOL_IP4,
3079 .fp_addr.ip4 = *if_addr,
3083 fib_index = fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
3087 if (table_index != (u32) ~0)
3089 dpo_id_t dpo = DPO_NULL;
3094 classify_dpo_create(FIB_PROTOCOL_IP4,
3097 fib_table_entry_special_dpo_add(fib_index,
3099 FIB_SOURCE_CLASSIFY,
3100 FIB_ENTRY_FLAG_NONE,
3106 fib_table_entry_special_remove(fib_index,
3108 FIB_SOURCE_CLASSIFY);
3115 static clib_error_t *
3116 set_ip_classify_command_fn (vlib_main_t * vm,
3117 unformat_input_t * input,
3118 vlib_cli_command_t * cmd)
3120 u32 table_index = ~0;
3121 int table_index_set = 0;
3122 u32 sw_if_index = ~0;
3125 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3126 if (unformat (input, "table-index %d", &table_index))
3127 table_index_set = 1;
3128 else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3129 vnet_get_main(), &sw_if_index))
3135 if (table_index_set == 0)
3136 return clib_error_return (0, "classify table-index must be specified");
3138 if (sw_if_index == ~0)
3139 return clib_error_return (0, "interface / subif must be specified");
3141 rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3148 case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3149 return clib_error_return (0, "No such interface");
3151 case VNET_API_ERROR_NO_SUCH_ENTRY:
3152 return clib_error_return (0, "No such classifier table");
3157 VLIB_CLI_COMMAND (set_ip_classify_command, static) = {
3158 .path = "set ip classify",
3160 "set ip classify intfc <int> table-index <index>",
3161 .function = set_ip_classify_command_fn,