2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * ip/ip4_forward.c: IP v4 forwarding
18 * Copyright (c) 2008 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h> /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h> /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h> /* for API error numbers */
47 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
48 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
49 #include <vnet/fib/ip4_fib.h>
50 #include <vnet/dpo/load_balance.h>
51 #include <vnet/dpo/classify_dpo.h>
54 ip4_forward_next_trace (vlib_main_t * vm,
55 vlib_node_runtime_t * node,
57 vlib_rx_or_tx_t which_adj_index);
60 ip4_lookup_inline (vlib_main_t * vm,
61 vlib_node_runtime_t * node,
63 int lookup_for_responses_to_locally_received_packets)
65 ip4_main_t * im = &ip4_main;
66 vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
67 u32 n_left_from, n_left_to_next, * from, * to_next;
68 ip_lookup_next_t next;
69 u32 cpu_index = os_get_cpu_number();
71 from = vlib_frame_vector_args (frame);
72 n_left_from = frame->n_vectors;
73 next = node->cached_next_index;
75 while (n_left_from > 0)
77 vlib_get_next_frame (vm, node, next,
78 to_next, n_left_to_next);
80 while (n_left_from >= 4 && n_left_to_next >= 2)
82 vlib_buffer_t * p0, * p1;
83 ip4_header_t * ip0, * ip1;
84 __attribute__((unused)) tcp_header_t * tcp0, * tcp1;
85 ip_lookup_next_t next0, next1;
86 const load_balance_t * lb0, * lb1;
87 ip4_fib_mtrie_t * mtrie0, * mtrie1;
88 ip4_fib_mtrie_leaf_t leaf0, leaf1;
89 ip4_address_t * dst_addr0, *dst_addr1;
90 __attribute__((unused)) u32 pi0, fib_index0, lb_index0, is_tcp_udp0;
91 __attribute__((unused)) u32 pi1, fib_index1, lb_index1, is_tcp_udp1;
92 flow_hash_config_t flow_hash_config0, flow_hash_config1;
95 const dpo_id_t *dpo0, *dpo1;
97 /* Prefetch next iteration. */
99 vlib_buffer_t * p2, * p3;
101 p2 = vlib_get_buffer (vm, from[2]);
102 p3 = vlib_get_buffer (vm, from[3]);
104 vlib_prefetch_buffer_header (p2, LOAD);
105 vlib_prefetch_buffer_header (p3, LOAD);
107 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
108 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
111 pi0 = to_next[0] = from[0];
112 pi1 = to_next[1] = from[1];
114 p0 = vlib_get_buffer (vm, pi0);
115 p1 = vlib_get_buffer (vm, pi1);
117 ip0 = vlib_buffer_get_current (p0);
118 ip1 = vlib_buffer_get_current (p1);
120 dst_addr0 = &ip0->dst_address;
121 dst_addr1 = &ip1->dst_address;
123 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
124 fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
125 fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
126 fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
127 fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
128 fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
131 if (! lookup_for_responses_to_locally_received_packets)
133 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
134 mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
136 leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
138 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
139 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
142 tcp0 = (void *) (ip0 + 1);
143 tcp1 = (void *) (ip1 + 1);
145 is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
146 || ip0->protocol == IP_PROTOCOL_UDP);
147 is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
148 || ip1->protocol == IP_PROTOCOL_UDP);
150 if (! lookup_for_responses_to_locally_received_packets)
152 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
153 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
156 if (! lookup_for_responses_to_locally_received_packets)
158 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
159 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
162 if (! lookup_for_responses_to_locally_received_packets)
164 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
165 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
168 if (lookup_for_responses_to_locally_received_packets)
170 lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
171 lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
175 /* Handle default route. */
176 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
177 leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
179 lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
180 lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
183 lb0 = load_balance_get (lb_index0);
184 lb1 = load_balance_get (lb_index1);
186 /* Use flow hash to compute multipath adjacency. */
187 hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
188 hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
189 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
191 flow_hash_config0 = lb0->lb_hash_config;
192 hash_c0 = vnet_buffer (p0)->ip.flow_hash =
193 ip4_compute_flow_hash (ip0, flow_hash_config0);
195 if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
197 flow_hash_config1 = lb1->lb_hash_config;
198 hash_c1 = vnet_buffer (p1)->ip.flow_hash =
199 ip4_compute_flow_hash (ip1, flow_hash_config1);
202 ASSERT (lb0->lb_n_buckets > 0);
203 ASSERT (is_pow2 (lb0->lb_n_buckets));
204 ASSERT (lb1->lb_n_buckets > 0);
205 ASSERT (is_pow2 (lb1->lb_n_buckets));
207 dpo0 = load_balance_get_bucket_i(lb0,
209 (lb0->lb_n_buckets_minus_1)));
210 dpo1 = load_balance_get_bucket_i(lb1,
212 (lb0->lb_n_buckets_minus_1)));
214 next0 = dpo0->dpoi_next_node;
215 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
216 next1 = dpo1->dpoi_next_node;
217 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
219 vlib_increment_combined_counter
220 (cm, cpu_index, lb_index0, 1,
221 vlib_buffer_length_in_chain (vm, p0)
222 + sizeof(ethernet_header_t));
223 vlib_increment_combined_counter
224 (cm, cpu_index, lb_index1, 1,
225 vlib_buffer_length_in_chain (vm, p1)
226 + sizeof(ethernet_header_t));
233 wrong_next = (next0 != next) + 2*(next1 != next);
234 if (PREDICT_FALSE (wrong_next != 0))
243 vlib_set_next_frame_buffer (vm, node, next0, pi0);
250 vlib_set_next_frame_buffer (vm, node, next1, pi1);
257 vlib_set_next_frame_buffer (vm, node, next0, pi0);
258 vlib_set_next_frame_buffer (vm, node, next1, pi1);
262 vlib_put_next_frame (vm, node, next, n_left_to_next);
264 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
270 while (n_left_from > 0 && n_left_to_next > 0)
274 __attribute__((unused)) tcp_header_t * tcp0;
275 ip_lookup_next_t next0;
276 const load_balance_t *lb0;
277 ip4_fib_mtrie_t * mtrie0;
278 ip4_fib_mtrie_leaf_t leaf0;
279 ip4_address_t * dst_addr0;
280 __attribute__((unused)) u32 pi0, fib_index0, is_tcp_udp0, lbi0;
281 flow_hash_config_t flow_hash_config0;
282 const dpo_id_t *dpo0;
288 p0 = vlib_get_buffer (vm, pi0);
290 ip0 = vlib_buffer_get_current (p0);
292 dst_addr0 = &ip0->dst_address;
294 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
295 fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
296 fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
298 if (! lookup_for_responses_to_locally_received_packets)
300 mtrie0 = &ip4_fib_get( fib_index0)->mtrie;
302 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
304 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
307 tcp0 = (void *) (ip0 + 1);
309 is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
310 || ip0->protocol == IP_PROTOCOL_UDP);
312 if (! lookup_for_responses_to_locally_received_packets)
313 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
315 if (! lookup_for_responses_to_locally_received_packets)
316 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
318 if (! lookup_for_responses_to_locally_received_packets)
319 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
321 if (lookup_for_responses_to_locally_received_packets)
322 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
325 /* Handle default route. */
326 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
327 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
330 lb0 = load_balance_get (lbi0);
332 /* Use flow hash to compute multipath adjacency. */
333 hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
334 if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
336 flow_hash_config0 = lb0->lb_hash_config;
338 hash_c0 = vnet_buffer (p0)->ip.flow_hash =
339 ip4_compute_flow_hash (ip0, flow_hash_config0);
342 ASSERT (lb0->lb_n_buckets > 0);
343 ASSERT (is_pow2 (lb0->lb_n_buckets));
345 dpo0 = load_balance_get_bucket_i(lb0,
347 (lb0->lb_n_buckets_minus_1)));
349 next0 = dpo0->dpoi_next_node;
350 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
352 vlib_increment_combined_counter
353 (cm, cpu_index, lbi0, 1,
354 vlib_buffer_length_in_chain (vm, p0));
361 if (PREDICT_FALSE (next0 != next))
364 vlib_put_next_frame (vm, node, next, n_left_to_next);
366 vlib_get_next_frame (vm, node, next,
367 to_next, n_left_to_next);
374 vlib_put_next_frame (vm, node, next, n_left_to_next);
377 if (node->flags & VLIB_NODE_FLAG_TRACE)
378 ip4_forward_next_trace(vm, node, frame, VLIB_TX);
380 return frame->n_vectors;
383 /** @brief IPv4 lookup node.
386 This is the main IPv4 lookup dispatch node.
388 @param vm vlib_main_t corresponding to the current thread
389 @param node vlib_node_runtime_t
390 @param frame vlib_frame_t whose contents should be dispatched
392 @par Graph mechanics: buffer metadata, next index usage
395 - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
396 - Indicates the @c sw_if_index value of the interface that the
397 packet was received on.
398 - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
399 - When the value is @c ~0 then the node performs a longest prefix
400 match (LPM) for the packet destination address in the FIB attached
401 to the receive interface.
402 - Otherwise perform LPM for the packet destination address in the
403 indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
404 value (0, 1, ...) and not a VRF id.
407 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
408 - The lookup result adjacency index.
411 - Dispatches the packet to the node index found in
412 ip_adjacency_t @c adj->lookup_next_index
413 (where @c adj is the lookup result adjacency).
416 ip4_lookup (vlib_main_t * vm,
417 vlib_node_runtime_t * node,
418 vlib_frame_t * frame)
420 return ip4_lookup_inline (vm, node, frame,
421 /* lookup_for_responses_to_locally_received_packets */ 0);
425 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args);
427 VLIB_REGISTER_NODE (ip4_lookup_node) = {
428 .function = ip4_lookup,
429 .name = "ip4-lookup",
430 .vector_size = sizeof (u32),
432 .format_trace = format_ip4_lookup_trace,
433 .n_next_nodes = IP_LOOKUP_N_NEXT,
434 .next_nodes = IP4_LOOKUP_NEXT_NODES,
437 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup)
440 ip4_load_balance (vlib_main_t * vm,
441 vlib_node_runtime_t * node,
442 vlib_frame_t * frame)
444 vlib_combined_counter_main_t * cm = &load_balance_main.lbm_via_counters;
445 u32 n_left_from, n_left_to_next, * from, * to_next;
446 ip_lookup_next_t next;
447 u32 cpu_index = os_get_cpu_number();
449 from = vlib_frame_vector_args (frame);
450 n_left_from = frame->n_vectors;
451 next = node->cached_next_index;
453 if (node->flags & VLIB_NODE_FLAG_TRACE)
454 ip4_forward_next_trace(vm, node, frame, VLIB_TX);
456 while (n_left_from > 0)
458 vlib_get_next_frame (vm, node, next,
459 to_next, n_left_to_next);
462 while (n_left_from > 0 && n_left_to_next > 0)
464 ip_lookup_next_t next0;
465 const load_balance_t *lb0;
468 const ip4_header_t *ip0;
469 const dpo_id_t *dpo0;
474 p0 = vlib_get_buffer (vm, pi0);
476 ip0 = vlib_buffer_get_current (p0);
477 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
479 lb0 = load_balance_get(lbi0);
480 hc0 = lb0->lb_hash_config;
481 vnet_buffer(p0)->ip.flow_hash = ip4_compute_flow_hash(ip0, hc0);
483 dpo0 = load_balance_get_bucket_i(lb0,
484 vnet_buffer(p0)->ip.flow_hash &
485 (lb0->lb_n_buckets_minus_1));
487 next0 = dpo0->dpoi_next_node;
488 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
490 vlib_increment_combined_counter
491 (cm, cpu_index, lbi0, 1,
492 vlib_buffer_length_in_chain (vm, p0));
499 if (PREDICT_FALSE (next0 != next))
502 vlib_put_next_frame (vm, node, next, n_left_to_next);
504 vlib_get_next_frame (vm, node, next,
505 to_next, n_left_to_next);
512 vlib_put_next_frame (vm, node, next, n_left_to_next);
515 return frame->n_vectors;
518 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args);
520 VLIB_REGISTER_NODE (ip4_load_balance_node) = {
521 .function = ip4_load_balance,
522 .name = "ip4-load-balance",
523 .vector_size = sizeof (u32),
524 .sibling_of = "ip4-lookup",
526 .format_trace = format_ip4_forward_next_trace,
529 VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance)
531 /* get first interface address */
533 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
534 ip_interface_address_t ** result_ia)
536 ip_lookup_main_t * lm = &im->lookup_main;
537 ip_interface_address_t * ia = 0;
538 ip4_address_t * result = 0;
540 foreach_ip_interface_address (lm, ia, sw_if_index,
541 1 /* honor unnumbered */,
543 ip4_address_t * a = ip_interface_address_get_address (lm, ia);
548 *result_ia = result ? ia : 0;
553 ip4_add_interface_routes (u32 sw_if_index,
554 ip4_main_t * im, u32 fib_index,
555 ip_interface_address_t * a)
557 ip_lookup_main_t * lm = &im->lookup_main;
558 ip4_address_t * address = ip_interface_address_get_address (lm, a);
560 .fp_len = a->address_length,
561 .fp_proto = FIB_PROTOCOL_IP4,
562 .fp_addr.ip4 = *address,
565 a->neighbor_probe_adj_index = ~0;
569 fib_node_index_t fei;
571 fei = fib_table_entry_update_one_path(fib_index,
573 FIB_SOURCE_INTERFACE,
574 (FIB_ENTRY_FLAG_CONNECTED |
575 FIB_ENTRY_FLAG_ATTACHED),
577 NULL, /* No next-hop address */
579 ~0, // invalid FIB index
582 FIB_ROUTE_PATH_FLAG_NONE);
583 a->neighbor_probe_adj_index = fib_entry_get_adj(fei);
588 if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
590 u32 classify_table_index =
591 lm->classify_table_index_by_sw_if_index [sw_if_index];
592 if (classify_table_index != (u32) ~0)
594 dpo_id_t dpo = DPO_NULL;
599 classify_dpo_create(FIB_PROTOCOL_IP4,
600 classify_table_index));
602 fib_table_entry_special_dpo_add(fib_index,
611 fib_table_entry_update_one_path(fib_index,
613 FIB_SOURCE_INTERFACE,
614 (FIB_ENTRY_FLAG_CONNECTED |
615 FIB_ENTRY_FLAG_LOCAL),
619 ~0, // invalid FIB index
622 FIB_ROUTE_PATH_FLAG_NONE);
626 ip4_del_interface_routes (ip4_main_t * im,
628 ip4_address_t * address,
632 .fp_len = address_length,
633 .fp_proto = FIB_PROTOCOL_IP4,
634 .fp_addr.ip4 = *address,
639 fib_table_entry_delete(fib_index,
641 FIB_SOURCE_INTERFACE);
645 fib_table_entry_delete(fib_index,
647 FIB_SOURCE_INTERFACE);
651 ip4_sw_interface_enable_disable (u32 sw_if_index,
654 vlib_main_t * vm = vlib_get_main();
655 ip4_main_t * im = &ip4_main;
656 ip_lookup_main_t * lm = &im->lookup_main;
658 u32 lookup_feature_index;
660 vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
663 * enable/disable only on the 1<->0 transition
667 if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
672 ASSERT(im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
673 if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
677 for (cast = 0; cast <= VNET_IP_RX_MULTICAST_FEAT; cast++)
679 ip_config_main_t * cm = &lm->feature_config_mains[cast];
680 vnet_config_main_t * vcm = &cm->config_main;
682 vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
683 ci = cm->config_index_by_sw_if_index[sw_if_index];
685 if (cast == VNET_IP_RX_UNICAST_FEAT)
686 lookup_feature_index = im->ip4_unicast_rx_feature_lookup;
688 lookup_feature_index = im->ip4_multicast_rx_feature_lookup;
691 ci = vnet_config_add_feature (vm, vcm,
693 lookup_feature_index,
695 /* # bytes of config data */ 0);
697 ci = vnet_config_del_feature (vm, vcm,
699 lookup_feature_index,
701 /* # bytes of config data */ 0);
702 cm->config_index_by_sw_if_index[sw_if_index] = ci;
706 static clib_error_t *
707 ip4_add_del_interface_address_internal (vlib_main_t * vm,
709 ip4_address_t * address,
713 vnet_main_t * vnm = vnet_get_main();
714 ip4_main_t * im = &ip4_main;
715 ip_lookup_main_t * lm = &im->lookup_main;
716 clib_error_t * error = 0;
717 u32 if_address_index, elts_before;
718 ip4_address_fib_t ip4_af, * addr_fib = 0;
720 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
721 ip4_addr_fib_init (&ip4_af, address,
722 vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
723 vec_add1 (addr_fib, ip4_af);
726 * there is no support for adj-fib handling in the presence of overlapping
727 * subnets on interfaces. Easy fix - disallow overlapping subnets, like
732 /* When adding an address check that it does not conflict
733 with an existing address. */
734 ip_interface_address_t * ia;
735 foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
736 0 /* honor unnumbered */,
738 ip4_address_t * x = ip_interface_address_get_address (&im->lookup_main, ia);
740 if (ip4_destination_matches_route (im, address, x, ia->address_length)
741 || ip4_destination_matches_route (im, x, address, address_length))
742 return clib_error_create ("failed to add %U which conflicts with %U for interface %U",
743 format_ip4_address_and_length, address, address_length,
744 format_ip4_address_and_length, x, ia->address_length,
745 format_vnet_sw_if_index_name, vnm, sw_if_index);
749 elts_before = pool_elts (lm->if_address_pool);
751 error = ip_interface_address_add_del
761 ip4_sw_interface_enable_disable(sw_if_index, !is_del);
764 ip4_del_interface_routes (im, ip4_af.fib_index, address,
767 ip4_add_interface_routes (sw_if_index,
768 im, ip4_af.fib_index,
770 (lm->if_address_pool, if_address_index));
772 /* If pool did not grow/shrink: add duplicate address. */
773 if (elts_before != pool_elts (lm->if_address_pool))
775 ip4_add_del_interface_address_callback_t * cb;
776 vec_foreach (cb, im->add_del_interface_address_callbacks)
777 cb->function (im, cb->function_opaque, sw_if_index,
778 address, address_length,
789 ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
790 ip4_address_t * address, u32 address_length,
793 return ip4_add_del_interface_address_internal
794 (vm, sw_if_index, address, address_length,
798 /* Built-in ip4 unicast rx feature path definition */
799 VNET_IP4_UNICAST_FEATURE_INIT (ip4_inacl, static) = {
800 .node_name = "ip4-inacl",
801 .runs_before = ORDER_CONSTRAINTS {"ip4-source-check-via-rx", 0},
802 .feature_index = &ip4_main.ip4_unicast_rx_feature_check_access,
805 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_1, static) = {
806 .node_name = "ip4-source-check-via-rx",
807 .runs_before = ORDER_CONSTRAINTS {"ip4-source-check-via-any", 0},
809 &ip4_main.ip4_unicast_rx_feature_source_reachable_via_rx,
812 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_2, static) = {
813 .node_name = "ip4-source-check-via-any",
814 .runs_before = ORDER_CONSTRAINTS {"ip4-policer-classify", 0},
816 &ip4_main.ip4_unicast_rx_feature_source_reachable_via_any,
819 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) = {
820 .node_name = "ip4-source-and-port-range-check-rx",
821 .runs_before = ORDER_CONSTRAINTS {"ip4-policer-classify", 0},
823 &ip4_main.ip4_unicast_rx_feature_source_and_port_range_check,
826 VNET_IP4_UNICAST_FEATURE_INIT (ip4_policer_classify, static) = {
827 .node_name = "ip4-policer-classify",
828 .runs_before = ORDER_CONSTRAINTS {"ipsec-input-ip4", 0},
830 &ip4_main.ip4_unicast_rx_feature_policer_classify,
833 VNET_IP4_UNICAST_FEATURE_INIT (ip4_ipsec, static) = {
834 .node_name = "ipsec-input-ip4",
835 .runs_before = ORDER_CONSTRAINTS {"vpath-input-ip4", 0},
836 .feature_index = &ip4_main.ip4_unicast_rx_feature_ipsec,
839 VNET_IP4_UNICAST_FEATURE_INIT (ip4_vpath, static) = {
840 .node_name = "vpath-input-ip4",
841 .runs_before = ORDER_CONSTRAINTS {"ip4-lookup", 0},
842 .feature_index = &ip4_main.ip4_unicast_rx_feature_vpath,
845 VNET_IP4_UNICAST_FEATURE_INIT (ip4_lookup, static) = {
846 .node_name = "ip4-lookup",
847 .runs_before = ORDER_CONSTRAINTS {"ip4-drop", 0},
848 .feature_index = &ip4_main.ip4_unicast_rx_feature_lookup,
851 VNET_IP4_UNICAST_FEATURE_INIT (ip4_drop, static) = {
852 .node_name = "ip4-drop",
853 .runs_before = 0, /* not before any other features */
854 .feature_index = &ip4_main.ip4_unicast_rx_feature_drop,
858 /* Built-in ip4 multicast rx feature path definition */
859 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_vpath_mc, static) = {
860 .node_name = "vpath-input-ip4",
861 .runs_before = ORDER_CONSTRAINTS {"ip4-lookup-multicast", 0},
862 .feature_index = &ip4_main.ip4_multicast_rx_feature_vpath,
865 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_lookup_mc, static) = {
866 .node_name = "ip4-lookup-multicast",
867 .runs_before = ORDER_CONSTRAINTS {"ip4-drop", 0},
868 .feature_index = &ip4_main.ip4_multicast_rx_feature_lookup,
871 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_mc_drop, static) = {
872 .node_name = "ip4-drop",
873 .runs_before = 0, /* last feature */
874 .feature_index = &ip4_main.ip4_multicast_rx_feature_drop,
877 static char * rx_feature_start_nodes[] =
878 { "ip4-input", "ip4-input-no-checksum"};
880 static char * tx_feature_start_nodes[] =
882 "ip4-rewrite-transit",
886 /* Source and port-range check ip4 tx feature path definition */
887 VNET_IP4_TX_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) = {
888 .node_name = "ip4-source-and-port-range-check-tx",
889 .runs_before = ORDER_CONSTRAINTS {"interface-output", 0},
891 &ip4_main.ip4_unicast_tx_feature_source_and_port_range_check,
895 /* Built-in ip4 tx feature path definition */
896 VNET_IP4_TX_FEATURE_INIT (interface_output, static) = {
897 .node_name = "interface-output",
898 .runs_before = 0, /* not before any other features */
899 .feature_index = &ip4_main.ip4_tx_feature_interface_output,
902 static clib_error_t *
903 ip4_feature_init (vlib_main_t * vm, ip4_main_t * im)
905 ip_lookup_main_t * lm = &im->lookup_main;
906 clib_error_t * error;
908 ip_config_main_t * cm;
909 vnet_config_main_t * vcm;
910 char **feature_start_nodes;
911 int feature_start_len;
913 for (cast = 0; cast < VNET_N_IP_FEAT; cast++)
915 cm = &lm->feature_config_mains[cast];
916 vcm = &cm->config_main;
918 if (cast < VNET_IP_TX_FEAT)
920 feature_start_nodes = rx_feature_start_nodes;
921 feature_start_len = ARRAY_LEN(rx_feature_start_nodes);
925 feature_start_nodes = tx_feature_start_nodes;
926 feature_start_len = ARRAY_LEN(tx_feature_start_nodes);
929 if ((error = ip_feature_init_cast (vm, cm, vcm,
932 im->next_feature[cast],
933 &im->feature_nodes[cast])))
940 static clib_error_t *
941 ip4_sw_interface_add_del (vnet_main_t * vnm,
945 vlib_main_t * vm = vnm->vlib_main;
946 ip4_main_t * im = &ip4_main;
947 ip_lookup_main_t * lm = &im->lookup_main;
951 /* Fill in lookup tables with default table (0). */
952 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
954 for (cast = 0; cast < VNET_N_IP_FEAT; cast++)
956 ip_config_main_t * cm = &lm->feature_config_mains[cast];
957 vnet_config_main_t * vcm = &cm->config_main;
959 vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
960 ci = cm->config_index_by_sw_if_index[sw_if_index];
962 if (cast == VNET_IP_RX_UNICAST_FEAT)
963 feature_index = im->ip4_unicast_rx_feature_drop;
964 else if (cast == VNET_IP_RX_MULTICAST_FEAT)
965 feature_index = im->ip4_multicast_rx_feature_drop;
967 feature_index = im->ip4_tx_feature_interface_output;
970 ci = vnet_config_add_feature (vm, vcm,
974 /* # bytes of config data */ 0);
977 ci = vnet_config_del_feature (vm, vcm, ci,
980 /* # bytes of config data */ 0);
981 if (vec_len(im->ip_enabled_by_sw_if_index) > sw_if_index)
982 im->ip_enabled_by_sw_if_index[sw_if_index] = 0;
984 cm->config_index_by_sw_if_index[sw_if_index] = ci;
986 * note: do not update the tx feature count here.
990 return /* no error */ 0;
993 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
995 /* Global IP4 main. */
999 ip4_lookup_init (vlib_main_t * vm)
1001 ip4_main_t * im = &ip4_main;
1002 clib_error_t * error;
1005 for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1010 m = pow2_mask (i) << (32 - i);
1013 im->fib_masks[i] = clib_host_to_net_u32 (m);
1016 ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1018 /* Create FIB with index 0 and table id of 0. */
1019 fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 0);
1023 pn = pg_get_node (ip4_lookup_node.index);
1024 pn->unformat_edit = unformat_pg_ip4_header;
1028 ethernet_arp_header_t h;
1030 memset (&h, 0, sizeof (h));
1032 /* Set target ethernet address to all zeros. */
1033 memset (h.ip4_over_ethernet[1].ethernet, 0, sizeof (h.ip4_over_ethernet[1].ethernet));
1035 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1036 #define _8(f,v) h.f = v;
1037 _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1038 _16 (l3_type, ETHERNET_TYPE_IP4);
1039 _8 (n_l2_address_bytes, 6);
1040 _8 (n_l3_address_bytes, 4);
1041 _16 (opcode, ETHERNET_ARP_OPCODE_request);
1045 vlib_packet_template_init (vm,
1046 &im->ip4_arp_request_packet_template,
1049 /* alloc chunk size */ 8,
1053 error = ip4_feature_init (vm, im);
1058 VLIB_INIT_FUNCTION (ip4_lookup_init);
1061 /* Adjacency taken. */
1066 /* Packet data, possibly *after* rewrite. */
1067 u8 packet_data[64 - 1*sizeof(u32)];
1068 } ip4_forward_next_trace_t;
1070 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args)
1072 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1073 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1074 ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1075 uword indent = format_get_indent (s);
1076 s = format (s, "%U%U",
1077 format_white_space, indent,
1078 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1082 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args)
1084 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1085 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1086 ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1087 uword indent = format_get_indent (s);
1089 s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1090 t->fib_index, t->adj_index, t->flow_hash);
1091 s = format (s, "\n%U%U",
1092 format_white_space, indent,
1093 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1097 static u8 * format_ip4_rewrite_trace (u8 * s, va_list * args)
1099 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1100 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1101 ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1102 vnet_main_t * vnm = vnet_get_main();
1103 uword indent = format_get_indent (s);
1105 s = format (s, "tx_sw_if_index %d adj-idx %d : %U flow hash: 0x%08x",
1106 t->fib_index, t->adj_index, format_ip_adjacency,
1107 vnm, t->adj_index, FORMAT_IP_ADJACENCY_NONE,
1109 s = format (s, "\n%U%U",
1110 format_white_space, indent,
1111 format_ip_adjacency_packet_data,
1113 t->packet_data, sizeof (t->packet_data));
1117 /* Common trace function for all ip4-forward next nodes. */
1119 ip4_forward_next_trace (vlib_main_t * vm,
1120 vlib_node_runtime_t * node,
1121 vlib_frame_t * frame,
1122 vlib_rx_or_tx_t which_adj_index)
1125 ip4_main_t * im = &ip4_main;
1127 n_left = frame->n_vectors;
1128 from = vlib_frame_vector_args (frame);
1133 vlib_buffer_t * b0, * b1;
1134 ip4_forward_next_trace_t * t0, * t1;
1136 /* Prefetch next iteration. */
1137 vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1138 vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1143 b0 = vlib_get_buffer (vm, bi0);
1144 b1 = vlib_get_buffer (vm, bi1);
1146 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1148 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1149 t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1150 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1151 t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1152 vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1153 vec_elt (im->fib_index_by_sw_if_index,
1154 vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1156 clib_memcpy (t0->packet_data,
1157 vlib_buffer_get_current (b0),
1158 sizeof (t0->packet_data));
1160 if (b1->flags & VLIB_BUFFER_IS_TRACED)
1162 t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1163 t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1164 t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1165 t1->fib_index = (vnet_buffer(b1)->sw_if_index[VLIB_TX] != (u32)~0) ?
1166 vnet_buffer(b1)->sw_if_index[VLIB_TX] :
1167 vec_elt (im->fib_index_by_sw_if_index,
1168 vnet_buffer(b1)->sw_if_index[VLIB_RX]);
1169 clib_memcpy (t1->packet_data,
1170 vlib_buffer_get_current (b1),
1171 sizeof (t1->packet_data));
1181 ip4_forward_next_trace_t * t0;
1185 b0 = vlib_get_buffer (vm, bi0);
1187 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1189 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1190 t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1191 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1192 t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1193 vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1194 vec_elt (im->fib_index_by_sw_if_index,
1195 vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1196 clib_memcpy (t0->packet_data,
1197 vlib_buffer_get_current (b0),
1198 sizeof (t0->packet_data));
1206 ip4_drop_or_punt (vlib_main_t * vm,
1207 vlib_node_runtime_t * node,
1208 vlib_frame_t * frame,
1209 ip4_error_t error_code)
1211 u32 * buffers = vlib_frame_vector_args (frame);
1212 uword n_packets = frame->n_vectors;
1214 vlib_error_drop_buffers (vm, node,
1219 ip4_input_node.index,
1222 if (node->flags & VLIB_NODE_FLAG_TRACE)
1223 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1229 ip4_drop (vlib_main_t * vm,
1230 vlib_node_runtime_t * node,
1231 vlib_frame_t * frame)
1232 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP); }
1235 ip4_punt (vlib_main_t * vm,
1236 vlib_node_runtime_t * node,
1237 vlib_frame_t * frame)
1238 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); }
1240 VLIB_REGISTER_NODE (ip4_drop_node,static) = {
1241 .function = ip4_drop,
1243 .vector_size = sizeof (u32),
1245 .format_trace = format_ip4_forward_next_trace,
1253 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop)
1255 VLIB_REGISTER_NODE (ip4_punt_node,static) = {
1256 .function = ip4_punt,
1258 .vector_size = sizeof (u32),
1260 .format_trace = format_ip4_forward_next_trace,
1268 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt)
1270 /* Compute TCP/UDP/ICMP4 checksum in software. */
1272 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1276 u32 ip_header_length, payload_length_host_byte_order;
1277 u32 n_this_buffer, n_bytes_left;
1279 void * data_this_buffer;
1281 /* Initialize checksum with ip header. */
1282 ip_header_length = ip4_header_bytes (ip0);
1283 payload_length_host_byte_order = clib_net_to_host_u16 (ip0->length) - ip_header_length;
1284 sum0 = clib_host_to_net_u32 (payload_length_host_byte_order + (ip0->protocol << 16));
1286 if (BITS (uword) == 32)
1288 sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u32));
1289 sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32));
1292 sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1294 n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1295 data_this_buffer = (void *) ip0 + ip_header_length;
1296 if (n_this_buffer + ip_header_length > p0->current_length)
1297 n_this_buffer = p0->current_length > ip_header_length ? p0->current_length - ip_header_length : 0;
1300 sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1301 n_bytes_left -= n_this_buffer;
1302 if (n_bytes_left == 0)
1305 ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1306 p0 = vlib_get_buffer (vm, p0->next_buffer);
1307 data_this_buffer = vlib_buffer_get_current (p0);
1308 n_this_buffer = p0->current_length;
1311 sum16 = ~ ip_csum_fold (sum0);
1317 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1319 ip4_header_t * ip0 = vlib_buffer_get_current (p0);
1320 udp_header_t * udp0;
1323 ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1324 || ip0->protocol == IP_PROTOCOL_UDP);
1326 udp0 = (void *) (ip0 + 1);
1327 if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1329 p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1330 | IP_BUFFER_L4_CHECKSUM_CORRECT);
1334 sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1336 p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1337 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1343 ip4_local (vlib_main_t * vm,
1344 vlib_node_runtime_t * node,
1345 vlib_frame_t * frame)
1347 ip4_main_t * im = &ip4_main;
1348 ip_lookup_main_t * lm = &im->lookup_main;
1349 ip_local_next_t next_index;
1350 u32 * from, * to_next, n_left_from, n_left_to_next;
1351 vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
1353 from = vlib_frame_vector_args (frame);
1354 n_left_from = frame->n_vectors;
1355 next_index = node->cached_next_index;
1357 if (node->flags & VLIB_NODE_FLAG_TRACE)
1358 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1360 while (n_left_from > 0)
1362 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1364 while (n_left_from >= 4 && n_left_to_next >= 2)
1366 vlib_buffer_t * p0, * p1;
1367 ip4_header_t * ip0, * ip1;
1368 udp_header_t * udp0, * udp1;
1369 ip4_fib_mtrie_t * mtrie0, * mtrie1;
1370 ip4_fib_mtrie_leaf_t leaf0, leaf1;
1371 const dpo_id_t *dpo0, *dpo1;
1372 const load_balance_t *lb0, *lb1;
1373 u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, lbi0;
1374 u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, lbi1;
1375 i32 len_diff0, len_diff1;
1376 u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1377 u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1380 pi0 = to_next[0] = from[0];
1381 pi1 = to_next[1] = from[1];
1385 n_left_to_next -= 2;
1387 p0 = vlib_get_buffer (vm, pi0);
1388 p1 = vlib_get_buffer (vm, pi1);
1390 ip0 = vlib_buffer_get_current (p0);
1391 ip1 = vlib_buffer_get_current (p1);
1393 fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
1394 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1395 fib_index1 = vec_elt (im->fib_index_by_sw_if_index,
1396 vnet_buffer(p1)->sw_if_index[VLIB_RX]);
1398 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1399 mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
1401 leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
1403 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1404 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
1406 /* Treat IP frag packets as "experimental" protocol for now
1407 until support of IP frag reassembly is implemented */
1408 proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
1409 proto1 = ip4_is_fragment(ip1) ? 0xfe : ip1->protocol;
1410 is_udp0 = proto0 == IP_PROTOCOL_UDP;
1411 is_udp1 = proto1 == IP_PROTOCOL_UDP;
1412 is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1413 is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1418 good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1419 good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1421 udp0 = ip4_next_header (ip0);
1422 udp1 = ip4_next_header (ip1);
1424 /* Don't verify UDP checksum for packets with explicit zero checksum. */
1425 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1426 good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1428 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1429 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
1431 /* Verify UDP length. */
1432 ip_len0 = clib_net_to_host_u16 (ip0->length);
1433 ip_len1 = clib_net_to_host_u16 (ip1->length);
1434 udp_len0 = clib_net_to_host_u16 (udp0->length);
1435 udp_len1 = clib_net_to_host_u16 (udp1->length);
1437 len_diff0 = ip_len0 - udp_len0;
1438 len_diff1 = ip_len1 - udp_len1;
1440 len_diff0 = is_udp0 ? len_diff0 : 0;
1441 len_diff1 = is_udp1 ? len_diff1 : 0;
1443 if (PREDICT_FALSE (! (is_tcp_udp0 & is_tcp_udp1
1444 & good_tcp_udp0 & good_tcp_udp1)))
1449 && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1450 flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1452 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1453 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1458 && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1459 flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
1461 (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1462 good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1466 good_tcp_udp0 &= len_diff0 >= 0;
1467 good_tcp_udp1 &= len_diff1 >= 0;
1469 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1470 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
1472 error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1474 error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1475 error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
1477 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1478 error0 = (is_tcp_udp0 && ! good_tcp_udp0
1479 ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1481 error1 = (is_tcp_udp1 && ! good_tcp_udp1
1482 ? IP4_ERROR_TCP_CHECKSUM + is_udp1
1485 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1486 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
1487 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1488 leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
1490 vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1491 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1493 vnet_buffer (p1)->ip.adj_index[VLIB_RX] = lbi1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1494 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = lbi1;
1496 lb0 = load_balance_get(lbi0);
1497 lb1 = load_balance_get(lbi1);
1498 dpo0 = load_balance_get_bucket_i(lb0, 0);
1499 dpo1 = load_balance_get_bucket_i(lb1, 0);
1502 * Must have a route to source otherwise we drop the packet.
1503 * ip4 broadcasts are accepted, e.g. to make dhcp client work
1505 error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1506 && dpo0->dpoi_type != DPO_ADJACENCY
1507 && dpo0->dpoi_type != DPO_ADJACENCY_INCOMPLETE
1508 && dpo0->dpoi_type != DPO_RECEIVE
1509 && dpo0->dpoi_type != DPO_DROP
1510 && dpo0->dpoi_type != DPO_ADJACENCY_GLEAN
1511 && ip0->dst_address.as_u32 != 0xFFFFFFFF
1512 ? IP4_ERROR_SRC_LOOKUP_MISS
1514 error0 = (dpo0->dpoi_type == DPO_RECEIVE ?
1515 IP4_ERROR_SPOOFED_LOCAL_PACKETS :
1517 error1 = (error1 == IP4_ERROR_UNKNOWN_PROTOCOL
1518 && dpo1->dpoi_type != DPO_ADJACENCY
1519 && dpo1->dpoi_type != DPO_ADJACENCY_INCOMPLETE
1520 && dpo1->dpoi_type != DPO_RECEIVE
1521 && dpo1->dpoi_type != DPO_DROP
1522 && dpo1->dpoi_type != DPO_ADJACENCY_GLEAN
1523 && ip1->dst_address.as_u32 != 0xFFFFFFFF
1524 ? IP4_ERROR_SRC_LOOKUP_MISS
1526 error1 = (dpo0->dpoi_type == DPO_RECEIVE ?
1527 IP4_ERROR_SPOOFED_LOCAL_PACKETS :
1530 next0 = lm->local_next_by_ip_protocol[proto0];
1531 next1 = lm->local_next_by_ip_protocol[proto1];
1533 next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1534 next1 = error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1536 p0->error = error0 ? error_node->errors[error0] : 0;
1537 p1->error = error1 ? error_node->errors[error1] : 0;
1539 enqueue_code = (next0 != next_index) + 2*(next1 != next_index);
1541 if (PREDICT_FALSE (enqueue_code != 0))
1543 switch (enqueue_code)
1549 n_left_to_next += 1;
1550 vlib_set_next_frame_buffer (vm, node, next0, pi0);
1556 n_left_to_next += 1;
1557 vlib_set_next_frame_buffer (vm, node, next1, pi1);
1561 /* A B B or A B C */
1563 n_left_to_next += 2;
1564 vlib_set_next_frame_buffer (vm, node, next0, pi0);
1565 vlib_set_next_frame_buffer (vm, node, next1, pi1);
1568 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1570 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1577 while (n_left_from > 0 && n_left_to_next > 0)
1581 udp_header_t * udp0;
1582 ip4_fib_mtrie_t * mtrie0;
1583 ip4_fib_mtrie_leaf_t leaf0;
1584 u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, lbi0;
1586 u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1587 load_balance_t *lb0;
1588 const dpo_id_t *dpo0;
1590 pi0 = to_next[0] = from[0];
1594 n_left_to_next -= 1;
1596 p0 = vlib_get_buffer (vm, pi0);
1598 ip0 = vlib_buffer_get_current (p0);
1600 fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
1601 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1603 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1605 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
1607 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1609 /* Treat IP frag packets as "experimental" protocol for now
1610 until support of IP frag reassembly is implemented */
1611 proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
1612 is_udp0 = proto0 == IP_PROTOCOL_UDP;
1613 is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1617 good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1619 udp0 = ip4_next_header (ip0);
1621 /* Don't verify UDP checksum for packets with explicit zero checksum. */
1622 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1624 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1626 /* Verify UDP length. */
1627 ip_len0 = clib_net_to_host_u16 (ip0->length);
1628 udp_len0 = clib_net_to_host_u16 (udp0->length);
1630 len_diff0 = ip_len0 - udp_len0;
1632 len_diff0 = is_udp0 ? len_diff0 : 0;
1634 if (PREDICT_FALSE (! (is_tcp_udp0 & good_tcp_udp0)))
1639 && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1640 flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1642 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1643 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1647 good_tcp_udp0 &= len_diff0 >= 0;
1649 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1651 error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1653 error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1655 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1656 error0 = (is_tcp_udp0 && ! good_tcp_udp0
1657 ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1660 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1661 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1663 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1664 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1666 lb0 = load_balance_get(lbi0);
1667 dpo0 = load_balance_get_bucket_i(lb0, 0);
1669 vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
1670 vnet_buffer (p0)->ip.adj_index[VLIB_RX] =
1673 /* Must have a route to source otherwise we drop the packet. */
1674 error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1675 && dpo0->dpoi_type != DPO_ADJACENCY
1676 && dpo0->dpoi_type != DPO_ADJACENCY_INCOMPLETE
1677 && dpo0->dpoi_type != DPO_RECEIVE
1678 && dpo0->dpoi_type != DPO_DROP
1679 && dpo0->dpoi_type != DPO_ADJACENCY_GLEAN
1680 && ip0->dst_address.as_u32 != 0xFFFFFFFF
1681 ? IP4_ERROR_SRC_LOOKUP_MISS
1683 /* Packet originated from a local address => spoofing */
1684 error0 = (dpo0->dpoi_type == DPO_RECEIVE ?
1685 IP4_ERROR_SPOOFED_LOCAL_PACKETS :
1688 next0 = lm->local_next_by_ip_protocol[proto0];
1690 next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1692 p0->error = error0? error_node->errors[error0] : 0;
1694 if (PREDICT_FALSE (next0 != next_index))
1696 n_left_to_next += 1;
1697 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1700 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1703 n_left_to_next -= 1;
1707 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1710 return frame->n_vectors;
1713 VLIB_REGISTER_NODE (ip4_local_node,static) = {
1714 .function = ip4_local,
1715 .name = "ip4-local",
1716 .vector_size = sizeof (u32),
1718 .format_trace = format_ip4_forward_next_trace,
1720 .n_next_nodes = IP_LOCAL_N_NEXT,
1722 [IP_LOCAL_NEXT_DROP] = "error-drop",
1723 [IP_LOCAL_NEXT_PUNT] = "error-punt",
1724 [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1725 [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1729 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local)
1731 void ip4_register_protocol (u32 protocol, u32 node_index)
1733 vlib_main_t * vm = vlib_get_main();
1734 ip4_main_t * im = &ip4_main;
1735 ip_lookup_main_t * lm = &im->lookup_main;
1737 ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1738 lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip4_local_node.index, node_index);
1741 static clib_error_t *
1742 show_ip_local_command_fn (vlib_main_t * vm,
1743 unformat_input_t * input,
1744 vlib_cli_command_t * cmd)
1746 ip4_main_t * im = &ip4_main;
1747 ip_lookup_main_t * lm = &im->lookup_main;
1750 vlib_cli_output (vm, "Protocols handled by ip4_local");
1751 for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
1753 if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1754 vlib_cli_output (vm, "%d", i);
1761 VLIB_CLI_COMMAND (show_ip_local, static) = {
1762 .path = "show ip local",
1763 .function = show_ip_local_command_fn,
1764 .short_help = "Show ip local protocol table",
1768 ip4_arp_inline (vlib_main_t * vm,
1769 vlib_node_runtime_t * node,
1770 vlib_frame_t * frame,
1773 vnet_main_t * vnm = vnet_get_main();
1774 ip4_main_t * im = &ip4_main;
1775 ip_lookup_main_t * lm = &im->lookup_main;
1776 u32 * from, * to_next_drop;
1777 uword n_left_from, n_left_to_next_drop, next_index;
1778 static f64 time_last_seed_change = -1e100;
1779 static u32 hash_seeds[3];
1780 static uword hash_bitmap[256 / BITS (uword)];
1783 if (node->flags & VLIB_NODE_FLAG_TRACE)
1784 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1786 time_now = vlib_time_now (vm);
1787 if (time_now - time_last_seed_change > 1e-3)
1790 u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
1791 sizeof (hash_seeds));
1792 for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
1793 hash_seeds[i] = r[i];
1795 /* Mark all hash keys as been no-seen before. */
1796 for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
1799 time_last_seed_change = time_now;
1802 from = vlib_frame_vector_args (frame);
1803 n_left_from = frame->n_vectors;
1804 next_index = node->cached_next_index;
1805 if (next_index == IP4_ARP_NEXT_DROP)
1806 next_index = IP4_ARP_N_NEXT; /* point to first interface */
1808 while (n_left_from > 0)
1810 vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1811 to_next_drop, n_left_to_next_drop);
1813 while (n_left_from > 0 && n_left_to_next_drop > 0)
1815 u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
1816 ip_adjacency_t * adj0;
1823 p0 = vlib_get_buffer (vm, pi0);
1825 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1826 adj0 = ip_get_adjacency (lm, adj_index0);
1827 ip0 = vlib_buffer_get_current (p0);
1830 * this is the Glean case, so we are ARPing for the
1831 * packet's destination
1837 sw_if_index0 = adj0->rewrite_header.sw_if_index;
1838 vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1842 a0 ^= ip0->dst_address.data_u32;
1846 a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32;
1850 hash_v3_finalize32 (a0, b0, c0);
1852 c0 &= BITS (hash_bitmap) - 1;
1853 c0 = c0 / BITS (uword);
1854 m0 = (uword) 1 << (c0 % BITS (uword));
1856 bm0 = hash_bitmap[c0];
1857 drop0 = (bm0 & m0) != 0;
1859 /* Mark it as seen. */
1860 hash_bitmap[c0] = bm0 | m0;
1864 to_next_drop[0] = pi0;
1866 n_left_to_next_drop -= 1;
1868 p0->error = node->errors[drop0 ? IP4_ARP_ERROR_DROP : IP4_ARP_ERROR_REQUEST_SENT];
1874 * Can happen if the control-plane is programming tables
1875 * with traffic flowing; at least that's today's lame excuse.
1877 if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN) ||
1878 (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
1880 p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
1883 /* Send ARP request. */
1887 ethernet_arp_header_t * h0;
1888 vnet_hw_interface_t * hw_if0;
1890 h0 = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi0);
1892 /* Add rewrite/encap string for ARP packet. */
1893 vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
1895 hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1897 /* Src ethernet address in ARP header. */
1898 clib_memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address,
1899 sizeof (h0->ip4_over_ethernet[0].ethernet));
1903 /* The interface's source address is stashed in the Glean Adj */
1904 h0->ip4_over_ethernet[0].ip4 = adj0->sub_type.glean.receive_addr.ip4;
1906 /* Copy in destination address we are requesting. This is the
1907 * glean case, so it's the packet's destination.*/
1908 h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32;
1912 /* Src IP address in ARP header. */
1913 if (ip4_src_address_for_packet(lm, sw_if_index0,
1914 &h0->ip4_over_ethernet[0].ip4))
1916 /* No source address available */
1917 p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
1918 vlib_buffer_free(vm, &bi0, 1);
1922 /* Copy in destination address we are requesting from the
1924 h0->ip4_over_ethernet[1].ip4.data_u32 =
1925 adj0->sub_type.nbr.next_hop.ip4.as_u32;
1928 vlib_buffer_copy_trace_flag (vm, p0, bi0);
1929 b0 = vlib_get_buffer (vm, bi0);
1930 vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
1932 vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
1934 vlib_set_next_frame_buffer (vm, node, adj0->rewrite_header.next_index, bi0);
1938 vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
1941 return frame->n_vectors;
1945 ip4_arp (vlib_main_t * vm,
1946 vlib_node_runtime_t * node,
1947 vlib_frame_t * frame)
1949 return (ip4_arp_inline(vm, node, frame, 0));
1953 ip4_glean (vlib_main_t * vm,
1954 vlib_node_runtime_t * node,
1955 vlib_frame_t * frame)
1957 return (ip4_arp_inline(vm, node, frame, 1));
1960 static char * ip4_arp_error_strings[] = {
1961 [IP4_ARP_ERROR_DROP] = "address overflow drops",
1962 [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
1963 [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
1964 [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
1965 [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
1966 [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
1969 VLIB_REGISTER_NODE (ip4_arp_node) = {
1970 .function = ip4_arp,
1972 .vector_size = sizeof (u32),
1974 .format_trace = format_ip4_forward_next_trace,
1976 .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1977 .error_strings = ip4_arp_error_strings,
1979 .n_next_nodes = IP4_ARP_N_NEXT,
1981 [IP4_ARP_NEXT_DROP] = "error-drop",
1985 VLIB_REGISTER_NODE (ip4_glean_node) = {
1986 .function = ip4_glean,
1987 .name = "ip4-glean",
1988 .vector_size = sizeof (u32),
1990 .format_trace = format_ip4_forward_next_trace,
1992 .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1993 .error_strings = ip4_arp_error_strings,
1995 .n_next_nodes = IP4_ARP_N_NEXT,
1997 [IP4_ARP_NEXT_DROP] = "error-drop",
2001 #define foreach_notrace_ip4_arp_error \
2007 clib_error_t * arp_notrace_init (vlib_main_t * vm)
2009 vlib_node_runtime_t *rt =
2010 vlib_node_get_runtime (vm, ip4_arp_node.index);
2012 /* don't trace ARP request packets */
2014 vnet_pcap_drop_trace_filter_add_del \
2015 (rt->errors[IP4_ARP_ERROR_##a], \
2017 foreach_notrace_ip4_arp_error;
2022 VLIB_INIT_FUNCTION(arp_notrace_init);
2025 /* Send an ARP request to see if given destination is reachable on given interface. */
2027 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2029 vnet_main_t * vnm = vnet_get_main();
2030 ip4_main_t * im = &ip4_main;
2031 ethernet_arp_header_t * h;
2032 ip4_address_t * src;
2033 ip_interface_address_t * ia;
2034 ip_adjacency_t * adj;
2035 vnet_hw_interface_t * hi;
2036 vnet_sw_interface_t * si;
2040 si = vnet_get_sw_interface (vnm, sw_if_index);
2042 if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2044 return clib_error_return (0, "%U: interface %U down",
2045 format_ip4_address, dst,
2046 format_vnet_sw_if_index_name, vnm,
2050 src = ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2053 vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2054 return clib_error_return
2055 (0, "no matching interface address for destination %U (interface %U)",
2056 format_ip4_address, dst,
2057 format_vnet_sw_if_index_name, vnm, sw_if_index);
2060 adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2062 h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi);
2064 hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2066 clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
2068 h->ip4_over_ethernet[0].ip4 = src[0];
2069 h->ip4_over_ethernet[1].ip4 = dst[0];
2071 b = vlib_get_buffer (vm, bi);
2072 vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2074 /* Add encapsulation string for software interface (e.g. ethernet header). */
2075 vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2076 vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2079 vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
2080 u32 * to_next = vlib_frame_vector_args (f);
2083 vlib_put_frame_to_node (vm, hi->output_node_index, f);
2086 return /* no error */ 0;
2090 IP4_REWRITE_NEXT_DROP,
2091 IP4_REWRITE_NEXT_ARP,
2092 IP4_REWRITE_NEXT_ICMP_ERROR,
2093 } ip4_rewrite_next_t;
2096 ip4_rewrite_inline (vlib_main_t * vm,
2097 vlib_node_runtime_t * node,
2098 vlib_frame_t * frame,
2099 int rewrite_for_locally_received_packets,
2102 ip_lookup_main_t * lm = &ip4_main.lookup_main;
2103 u32 * from = vlib_frame_vector_args (frame);
2104 u32 n_left_from, n_left_to_next, * to_next, next_index;
2105 vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
2106 vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
2107 ip_config_main_t * cm = &lm->feature_config_mains[VNET_IP_TX_FEAT];
2109 n_left_from = frame->n_vectors;
2110 next_index = node->cached_next_index;
2111 u32 cpu_index = os_get_cpu_number();
2113 while (n_left_from > 0)
2115 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2117 while (n_left_from >= 4 && n_left_to_next >= 2)
2119 ip_adjacency_t * adj0, * adj1;
2120 vlib_buffer_t * p0, * p1;
2121 ip4_header_t * ip0, * ip1;
2122 u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2123 u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2124 u32 next0_override, next1_override;
2125 u32 tx_sw_if_index0, tx_sw_if_index1;
2127 if (rewrite_for_locally_received_packets)
2128 next0_override = next1_override = 0;
2130 /* Prefetch next iteration. */
2132 vlib_buffer_t * p2, * p3;
2134 p2 = vlib_get_buffer (vm, from[2]);
2135 p3 = vlib_get_buffer (vm, from[3]);
2137 vlib_prefetch_buffer_header (p2, STORE);
2138 vlib_prefetch_buffer_header (p3, STORE);
2140 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2141 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2144 pi0 = to_next[0] = from[0];
2145 pi1 = to_next[1] = from[1];
2150 n_left_to_next -= 2;
2152 p0 = vlib_get_buffer (vm, pi0);
2153 p1 = vlib_get_buffer (vm, pi1);
2155 adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2156 adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
2158 /* We should never rewrite a pkt using the MISS adjacency */
2159 ASSERT(adj_index0 && adj_index1);
2161 ip0 = vlib_buffer_get_current (p0);
2162 ip1 = vlib_buffer_get_current (p1);
2164 error0 = error1 = IP4_ERROR_NONE;
2165 next0 = next1 = IP4_REWRITE_NEXT_DROP;
2167 /* Decrement TTL & update checksum.
2168 Works either endian, so no need for byte swap. */
2169 if (! rewrite_for_locally_received_packets)
2171 i32 ttl0 = ip0->ttl, ttl1 = ip1->ttl;
2173 /* Input node should have reject packets with ttl 0. */
2174 ASSERT (ip0->ttl > 0);
2175 ASSERT (ip1->ttl > 0);
2177 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2178 checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2180 checksum0 += checksum0 >= 0xffff;
2181 checksum1 += checksum1 >= 0xffff;
2183 ip0->checksum = checksum0;
2184 ip1->checksum = checksum1;
2193 * If the ttl drops below 1 when forwarding, generate
2196 if (PREDICT_FALSE(ttl0 <= 0))
2198 error0 = IP4_ERROR_TIME_EXPIRED;
2199 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2200 icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2201 ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2202 next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2204 if (PREDICT_FALSE(ttl1 <= 0))
2206 error1 = IP4_ERROR_TIME_EXPIRED;
2207 vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32)~0;
2208 icmp4_error_set_vnet_buffer(p1, ICMP4_time_exceeded,
2209 ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2210 next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2213 /* Verify checksum. */
2214 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2215 ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2218 /* Rewrite packet header and updates lengths. */
2219 adj0 = ip_get_adjacency (lm, adj_index0);
2220 adj1 = ip_get_adjacency (lm, adj_index1);
2222 if (rewrite_for_locally_received_packets)
2224 if (PREDICT_FALSE(adj0->lookup_next_index
2225 == IP_LOOKUP_NEXT_ARP))
2226 next0_override = IP4_REWRITE_NEXT_ARP;
2227 if (PREDICT_FALSE(adj1->lookup_next_index
2228 == IP_LOOKUP_NEXT_ARP))
2229 next1_override = IP4_REWRITE_NEXT_ARP;
2232 /* Worth pipelining. No guarantee that adj0,1 are hot... */
2233 rw_len0 = adj0[0].rewrite_header.data_bytes;
2234 rw_len1 = adj1[0].rewrite_header.data_bytes;
2235 vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
2236 vnet_buffer(p1)->ip.save_rewrite_length = rw_len1;
2238 /* Check MTU of outgoing interface. */
2239 error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2240 ? IP4_ERROR_MTU_EXCEEDED
2242 error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
2243 ? IP4_ERROR_MTU_EXCEEDED
2246 next0 = (error0 == IP4_ERROR_NONE)
2247 ? adj0[0].rewrite_header.next_index : next0;
2249 if (rewrite_for_locally_received_packets)
2250 next0 = next0 && next0_override ? next0_override : next0;
2252 next1 = (error1 == IP4_ERROR_NONE)
2253 ? adj1[0].rewrite_header.next_index : next1;
2255 if (rewrite_for_locally_received_packets)
2256 next1 = next1 && next1_override ? next1_override : next1;
2259 * We've already accounted for an ethernet_header_t elsewhere
2261 if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2262 vlib_increment_combined_counter
2263 (&adjacency_counters,
2264 cpu_index, adj_index0,
2265 /* packet increment */ 0,
2266 /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2268 if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t)))
2269 vlib_increment_combined_counter
2270 (&adjacency_counters,
2271 cpu_index, adj_index1,
2272 /* packet increment */ 0,
2273 /* byte increment */ rw_len1-sizeof(ethernet_header_t));
2275 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2276 * to see the IP headerr */
2277 if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2279 p0->current_data -= rw_len0;
2280 p0->current_length += rw_len0;
2281 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2282 vnet_buffer (p0)->sw_if_index[VLIB_TX] =
2286 (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features,
2289 p0->current_config_index =
2290 vec_elt (cm->config_index_by_sw_if_index,
2292 vnet_get_config_data (&cm->config_main,
2293 &p0->current_config_index,
2295 /* # bytes of config data */ 0);
2298 if (PREDICT_TRUE(error1 == IP4_ERROR_NONE))
2300 p1->current_data -= rw_len1;
2301 p1->current_length += rw_len1;
2303 tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2304 vnet_buffer (p1)->sw_if_index[VLIB_TX] =
2308 (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features,
2311 p1->current_config_index =
2312 vec_elt (cm->config_index_by_sw_if_index,
2314 vnet_get_config_data (&cm->config_main,
2315 &p1->current_config_index,
2317 /* # bytes of config data */ 0);
2321 /* Guess we are only writing on simple Ethernet header. */
2322 vnet_rewrite_two_headers (adj0[0], adj1[0],
2324 sizeof (ethernet_header_t));
2328 adj0->sub_type.midchain.fixup_func(vm, adj0, p0);
2329 adj1->sub_type.midchain.fixup_func(vm, adj1, p1);
2332 vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2333 to_next, n_left_to_next,
2334 pi0, pi1, next0, next1);
2337 while (n_left_from > 0 && n_left_to_next > 0)
2339 ip_adjacency_t * adj0;
2342 u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2344 u32 tx_sw_if_index0;
2346 if (rewrite_for_locally_received_packets)
2349 pi0 = to_next[0] = from[0];
2351 p0 = vlib_get_buffer (vm, pi0);
2353 adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2355 /* We should never rewrite a pkt using the MISS adjacency */
2358 adj0 = ip_get_adjacency (lm, adj_index0);
2360 ip0 = vlib_buffer_get_current (p0);
2362 error0 = IP4_ERROR_NONE;
2363 next0 = IP4_REWRITE_NEXT_DROP; /* drop on error */
2365 /* Decrement TTL & update checksum. */
2366 if (! rewrite_for_locally_received_packets)
2368 i32 ttl0 = ip0->ttl;
2370 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2372 checksum0 += checksum0 >= 0xffff;
2374 ip0->checksum = checksum0;
2376 ASSERT (ip0->ttl > 0);
2382 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2384 if (PREDICT_FALSE(ttl0 <= 0))
2387 * If the ttl drops below 1 when forwarding, generate
2390 error0 = IP4_ERROR_TIME_EXPIRED;
2391 next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2392 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2393 icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2394 ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2398 if (rewrite_for_locally_received_packets)
2401 * We have to override the next_index in ARP adjacencies,
2402 * because they're set up for ip4-arp, not this node...
2404 if (PREDICT_FALSE(adj0->lookup_next_index
2405 == IP_LOOKUP_NEXT_ARP))
2406 next0_override = IP4_REWRITE_NEXT_ARP;
2409 /* Guess we are only writing on simple Ethernet header. */
2410 vnet_rewrite_one_header (adj0[0], ip0,
2411 sizeof (ethernet_header_t));
2413 /* Update packet buffer attributes/set output interface. */
2414 rw_len0 = adj0[0].rewrite_header.data_bytes;
2415 vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
2417 if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2418 vlib_increment_combined_counter
2419 (&adjacency_counters,
2420 cpu_index, adj_index0,
2421 /* packet increment */ 0,
2422 /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2424 /* Check MTU of outgoing interface. */
2425 error0 = (vlib_buffer_length_in_chain (vm, p0)
2426 > adj0[0].rewrite_header.max_l3_packet_bytes
2427 ? IP4_ERROR_MTU_EXCEEDED
2430 p0->error = error_node->errors[error0];
2432 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2433 * to see the IP headerr */
2434 if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2436 p0->current_data -= rw_len0;
2437 p0->current_length += rw_len0;
2438 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2440 vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2441 next0 = adj0[0].rewrite_header.next_index;
2445 adj0->sub_type.midchain.fixup_func(vm, adj0, p0);
2449 (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features,
2452 p0->current_config_index =
2453 vec_elt (cm->config_index_by_sw_if_index,
2455 vnet_get_config_data (&cm->config_main,
2456 &p0->current_config_index,
2458 /* # bytes of config data */ 0);
2462 if (rewrite_for_locally_received_packets)
2463 next0 = next0 && next0_override ? next0_override : next0;
2468 n_left_to_next -= 1;
2470 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2471 to_next, n_left_to_next,
2475 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2478 /* Need to do trace after rewrites to pick up new packet data. */
2479 if (node->flags & VLIB_NODE_FLAG_TRACE)
2480 ip4_forward_next_trace (vm, node, frame, adj_rx_tx);
2482 return frame->n_vectors;
2486 /** @brief IPv4 transit rewrite node.
2487 @node ip4-rewrite-transit
2489 This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2490 header checksum, fetch the ip adjacency, check the outbound mtu,
2491 apply the adjacency rewrite, and send pkts to the adjacency
2492 rewrite header's rewrite_next_index.
2494 @param vm vlib_main_t corresponding to the current thread
2495 @param node vlib_node_runtime_t
2496 @param frame vlib_frame_t whose contents should be dispatched
2498 @par Graph mechanics: buffer metadata, next index usage
2501 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2502 - the rewrite adjacency index
2503 - <code>adj->lookup_next_index</code>
2504 - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2505 the packet will be dropped.
2506 - <code>adj->rewrite_header</code>
2507 - Rewrite string length, rewrite string, next_index
2510 - <code>b->current_data, b->current_length</code>
2511 - Updated net of applying the rewrite string
2513 <em>Next Indices:</em>
2514 - <code> adj->rewrite_header.next_index </code>
2518 ip4_rewrite_transit (vlib_main_t * vm,
2519 vlib_node_runtime_t * node,
2520 vlib_frame_t * frame)
2522 return ip4_rewrite_inline (vm, node, frame,
2523 /* rewrite_for_locally_received_packets */ 0, 0);
2526 /** @brief IPv4 local rewrite node.
2527 @node ip4-rewrite-local
2529 This is the IPv4 local rewrite node. Fetch the ip adjacency, check
2530 the outbound interface mtu, apply the adjacency rewrite, and send
2531 pkts to the adjacency rewrite header's rewrite_next_index. Deal
2532 with hemorrhoids of the form "some clown sends an icmp4 w/ src =
2533 dst = interface addr."
2535 @param vm vlib_main_t corresponding to the current thread
2536 @param node vlib_node_runtime_t
2537 @param frame vlib_frame_t whose contents should be dispatched
2539 @par Graph mechanics: buffer metadata, next index usage
2542 - <code>vnet_buffer(b)->ip.adj_index[VLIB_RX]</code>
2543 - the rewrite adjacency index
2544 - <code>adj->lookup_next_index</code>
2545 - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2546 the packet will be dropped.
2547 - <code>adj->rewrite_header</code>
2548 - Rewrite string length, rewrite string, next_index
2551 - <code>b->current_data, b->current_length</code>
2552 - Updated net of applying the rewrite string
2554 <em>Next Indices:</em>
2555 - <code> adj->rewrite_header.next_index </code>
2560 ip4_rewrite_local (vlib_main_t * vm,
2561 vlib_node_runtime_t * node,
2562 vlib_frame_t * frame)
2564 return ip4_rewrite_inline (vm, node, frame,
2565 /* rewrite_for_locally_received_packets */ 1, 0);
2569 ip4_midchain (vlib_main_t * vm,
2570 vlib_node_runtime_t * node,
2571 vlib_frame_t * frame)
2573 return ip4_rewrite_inline (vm, node, frame,
2574 /* rewrite_for_locally_received_packets */ 0, 1);
2577 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2578 .function = ip4_rewrite_transit,
2579 .name = "ip4-rewrite-transit",
2580 .vector_size = sizeof (u32),
2582 .format_trace = format_ip4_rewrite_trace,
2586 [IP4_REWRITE_NEXT_DROP] = "error-drop",
2587 [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
2588 [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2592 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite_transit)
2594 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2595 .function = ip4_midchain,
2596 .name = "ip4-midchain",
2597 .vector_size = sizeof (u32),
2599 .format_trace = format_ip4_forward_next_trace,
2601 .sibling_of = "ip4-rewrite-transit",
2604 VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain)
2606 VLIB_REGISTER_NODE (ip4_rewrite_local_node) = {
2607 .function = ip4_rewrite_local,
2608 .name = "ip4-rewrite-local",
2609 .vector_size = sizeof (u32),
2611 .sibling_of = "ip4-rewrite-transit",
2613 .format_trace = format_ip4_rewrite_trace,
2618 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_local_node, ip4_rewrite_local)
2620 static clib_error_t *
2621 add_del_interface_table (vlib_main_t * vm,
2622 unformat_input_t * input,
2623 vlib_cli_command_t * cmd)
2625 vnet_main_t * vnm = vnet_get_main();
2626 clib_error_t * error = 0;
2627 u32 sw_if_index, table_id;
2631 if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2633 error = clib_error_return (0, "unknown interface `%U'",
2634 format_unformat_error, input);
2638 if (unformat (input, "%d", &table_id))
2642 error = clib_error_return (0, "expected table id `%U'",
2643 format_unformat_error, input);
2648 ip4_main_t * im = &ip4_main;
2651 fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
2656 // changing an interface's table has consequences for any connecteds
2657 // and adj-fibs already installed.
2659 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
2660 im->fib_index_by_sw_if_index[sw_if_index] = fib_index;
2668 * Place the indicated interface into the supplied VRF
2671 * @cliexstart{set interface ip table}
2673 * vpp# set interface ip table GigabitEthernet2/0/0 2
2675 * Interface addresses added after setting the interface IP table end up in the indicated VRF table.
2676 * Predictable but potentially counter-intuitive results occur if you provision interface addresses in multiple FIBs.
2677 * Upon RX, packets will be processed in the last IP table ID provisioned.
2678 * It might be marginally useful to evade source RPF drops to put an interface address into multiple FIBs.
2681 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
2682 .path = "set interface ip table",
2683 .function = add_del_interface_table,
2684 .short_help = "Add/delete FIB table id for interface",
2689 ip4_lookup_multicast (vlib_main_t * vm,
2690 vlib_node_runtime_t * node,
2691 vlib_frame_t * frame)
2693 ip4_main_t * im = &ip4_main;
2694 vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
2695 u32 n_left_from, n_left_to_next, * from, * to_next;
2696 ip_lookup_next_t next;
2697 u32 cpu_index = os_get_cpu_number();
2699 from = vlib_frame_vector_args (frame);
2700 n_left_from = frame->n_vectors;
2701 next = node->cached_next_index;
2703 while (n_left_from > 0)
2705 vlib_get_next_frame (vm, node, next,
2706 to_next, n_left_to_next);
2708 while (n_left_from >= 4 && n_left_to_next >= 2)
2710 vlib_buffer_t * p0, * p1;
2711 u32 pi0, pi1, lb_index0, lb_index1, wrong_next;
2712 ip_lookup_next_t next0, next1;
2713 ip4_header_t * ip0, * ip1;
2714 u32 fib_index0, fib_index1;
2715 const dpo_id_t *dpo0, *dpo1;
2716 const load_balance_t * lb0, * lb1;
2718 /* Prefetch next iteration. */
2720 vlib_buffer_t * p2, * p3;
2722 p2 = vlib_get_buffer (vm, from[2]);
2723 p3 = vlib_get_buffer (vm, from[3]);
2725 vlib_prefetch_buffer_header (p2, LOAD);
2726 vlib_prefetch_buffer_header (p3, LOAD);
2728 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
2729 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
2732 pi0 = to_next[0] = from[0];
2733 pi1 = to_next[1] = from[1];
2735 p0 = vlib_get_buffer (vm, pi0);
2736 p1 = vlib_get_buffer (vm, pi1);
2738 ip0 = vlib_buffer_get_current (p0);
2739 ip1 = vlib_buffer_get_current (p1);
2741 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2742 fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
2743 fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2744 fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2745 fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
2746 fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
2748 lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0),
2750 lb_index1 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index1),
2753 lb0 = load_balance_get (lb_index0);
2754 lb1 = load_balance_get (lb_index1);
2756 ASSERT (lb0->lb_n_buckets > 0);
2757 ASSERT (is_pow2 (lb0->lb_n_buckets));
2758 ASSERT (lb1->lb_n_buckets > 0);
2759 ASSERT (is_pow2 (lb1->lb_n_buckets));
2761 vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash
2762 (ip0, lb0->lb_hash_config);
2764 vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash
2765 (ip1, lb1->lb_hash_config);
2767 dpo0 = load_balance_get_bucket_i(lb0,
2768 (vnet_buffer (p0)->ip.flow_hash &
2769 (lb0->lb_n_buckets_minus_1)));
2770 dpo1 = load_balance_get_bucket_i(lb1,
2771 (vnet_buffer (p1)->ip.flow_hash &
2772 (lb0->lb_n_buckets_minus_1)));
2774 next0 = dpo0->dpoi_next_node;
2775 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
2776 next1 = dpo1->dpoi_next_node;
2777 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
2779 if (1) /* $$$$$$ HACK FIXME */
2780 vlib_increment_combined_counter
2781 (cm, cpu_index, lb_index0, 1,
2782 vlib_buffer_length_in_chain (vm, p0));
2783 if (1) /* $$$$$$ HACK FIXME */
2784 vlib_increment_combined_counter
2785 (cm, cpu_index, lb_index1, 1,
2786 vlib_buffer_length_in_chain (vm, p1));
2790 n_left_to_next -= 2;
2793 wrong_next = (next0 != next) + 2*(next1 != next);
2794 if (PREDICT_FALSE (wrong_next != 0))
2802 n_left_to_next += 1;
2803 vlib_set_next_frame_buffer (vm, node, next0, pi0);
2809 n_left_to_next += 1;
2810 vlib_set_next_frame_buffer (vm, node, next1, pi1);
2816 n_left_to_next += 2;
2817 vlib_set_next_frame_buffer (vm, node, next0, pi0);
2818 vlib_set_next_frame_buffer (vm, node, next1, pi1);
2822 vlib_put_next_frame (vm, node, next, n_left_to_next);
2824 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
2830 while (n_left_from > 0 && n_left_to_next > 0)
2835 ip_lookup_next_t next0;
2837 const dpo_id_t *dpo0;
2838 const load_balance_t * lb0;
2843 p0 = vlib_get_buffer (vm, pi0);
2845 ip0 = vlib_buffer_get_current (p0);
2847 fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
2848 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2849 fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2850 fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2852 lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0),
2855 lb0 = load_balance_get (lb_index0);
2857 ASSERT (lb0->lb_n_buckets > 0);
2858 ASSERT (is_pow2 (lb0->lb_n_buckets));
2860 vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash
2861 (ip0, lb0->lb_hash_config);
2863 dpo0 = load_balance_get_bucket_i(lb0,
2864 (vnet_buffer (p0)->ip.flow_hash &
2865 (lb0->lb_n_buckets_minus_1)));
2867 next0 = dpo0->dpoi_next_node;
2868 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
2870 if (1) /* $$$$$$ HACK FIXME */
2871 vlib_increment_combined_counter
2872 (cm, cpu_index, lb_index0, 1,
2873 vlib_buffer_length_in_chain (vm, p0));
2877 n_left_to_next -= 1;
2880 if (PREDICT_FALSE (next0 != next))
2882 n_left_to_next += 1;
2883 vlib_put_next_frame (vm, node, next, n_left_to_next);
2885 vlib_get_next_frame (vm, node, next,
2886 to_next, n_left_to_next);
2889 n_left_to_next -= 1;
2893 vlib_put_next_frame (vm, node, next, n_left_to_next);
2896 if (node->flags & VLIB_NODE_FLAG_TRACE)
2897 ip4_forward_next_trace(vm, node, frame, VLIB_TX);
2899 return frame->n_vectors;
2902 VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = {
2903 .function = ip4_lookup_multicast,
2904 .name = "ip4-lookup-multicast",
2905 .vector_size = sizeof (u32),
2906 .sibling_of = "ip4-lookup",
2907 .format_trace = format_ip4_lookup_trace,
2912 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_multicast_node, ip4_lookup_multicast)
2914 VLIB_REGISTER_NODE (ip4_multicast_node,static) = {
2915 .function = ip4_drop,
2916 .name = "ip4-multicast",
2917 .vector_size = sizeof (u32),
2919 .format_trace = format_ip4_forward_next_trace,
2927 int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0)
2929 ip4_fib_mtrie_t * mtrie0;
2930 ip4_fib_mtrie_leaf_t leaf0;
2933 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2935 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
2936 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
2937 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
2938 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2939 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2941 /* Handle default route. */
2942 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
2944 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2946 return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0), a);
2949 static clib_error_t *
2950 test_lookup_command_fn (vlib_main_t * vm,
2951 unformat_input_t * input,
2952 vlib_cli_command_t * cmd)
2958 ip4_address_t ip4_base_address;
2961 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
2962 if (unformat (input, "table %d", &table_id))
2964 else if (unformat (input, "count %f", &count))
2967 else if (unformat (input, "%U",
2968 unformat_ip4_address, &ip4_base_address))
2971 return clib_error_return (0, "unknown input `%U'",
2972 format_unformat_error, input);
2977 for (i = 0; i < n; i++)
2979 if (!ip4_lookup_validate (&ip4_base_address, table_id))
2982 ip4_base_address.as_u32 =
2983 clib_host_to_net_u32 (1 +
2984 clib_net_to_host_u32 (ip4_base_address.as_u32));
2988 vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2990 vlib_cli_output (vm, "No errors in %d lookups\n", n);
2995 VLIB_CLI_COMMAND (lookup_test_command, static) = {
2996 .path = "test lookup",
2997 .short_help = "test lookup",
2998 .function = test_lookup_command_fn,
3001 int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3003 ip4_main_t * im4 = &ip4_main;
3005 uword * p = hash_get (im4->fib_index_by_table_id, table_id);
3008 return VNET_API_ERROR_NO_SUCH_FIB;
3010 fib = ip4_fib_get (p[0]);
3012 fib->flow_hash_config = flow_hash_config;
3016 static clib_error_t *
3017 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3018 unformat_input_t * input,
3019 vlib_cli_command_t * cmd)
3023 u32 flow_hash_config = 0;
3026 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3027 if (unformat (input, "table %d", &table_id))
3030 else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3031 foreach_flow_hash_bit
3037 return clib_error_return (0, "unknown input `%U'",
3038 format_unformat_error, input);
3040 rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3046 case VNET_API_ERROR_NO_SUCH_FIB:
3047 return clib_error_return (0, "no such FIB table %d", table_id);
3050 clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3057 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
3058 .path = "set ip flow-hash",
3060 "set ip table flow-hash table <fib-id> src dst sport dport proto reverse",
3061 .function = set_ip_flow_hash_command_fn,
3064 int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3067 vnet_main_t * vnm = vnet_get_main();
3068 vnet_interface_main_t * im = &vnm->interface_main;
3069 ip4_main_t * ipm = &ip4_main;
3070 ip_lookup_main_t * lm = &ipm->lookup_main;
3071 vnet_classify_main_t * cm = &vnet_classify_main;
3072 ip4_address_t *if_addr;
3074 if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3075 return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3077 if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3078 return VNET_API_ERROR_NO_SUCH_ENTRY;
3080 vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3081 lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
3083 if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
3085 if (NULL != if_addr)
3087 fib_prefix_t pfx = {
3089 .fp_proto = FIB_PROTOCOL_IP4,
3090 .fp_addr.ip4 = *if_addr,
3094 fib_index = fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
3098 if (table_index != (u32) ~0)
3100 dpo_id_t dpo = DPO_NULL;
3105 classify_dpo_create(FIB_PROTOCOL_IP4,
3108 fib_table_entry_special_dpo_add(fib_index,
3110 FIB_SOURCE_CLASSIFY,
3111 FIB_ENTRY_FLAG_NONE,
3117 fib_table_entry_special_remove(fib_index,
3119 FIB_SOURCE_CLASSIFY);
3126 static clib_error_t *
3127 set_ip_classify_command_fn (vlib_main_t * vm,
3128 unformat_input_t * input,
3129 vlib_cli_command_t * cmd)
3131 u32 table_index = ~0;
3132 int table_index_set = 0;
3133 u32 sw_if_index = ~0;
3136 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3137 if (unformat (input, "table-index %d", &table_index))
3138 table_index_set = 1;
3139 else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3140 vnet_get_main(), &sw_if_index))
3146 if (table_index_set == 0)
3147 return clib_error_return (0, "classify table-index must be specified");
3149 if (sw_if_index == ~0)
3150 return clib_error_return (0, "interface / subif must be specified");
3152 rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3159 case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3160 return clib_error_return (0, "No such interface");
3162 case VNET_API_ERROR_NO_SUCH_ENTRY:
3163 return clib_error_return (0, "No such classifier table");
3168 VLIB_CLI_COMMAND (set_ip_classify_command, static) = {
3169 .path = "set ip classify",
3171 "set ip classify intfc <int> table-index <index>",
3172 .function = set_ip_classify_command_fn,