MPLS infrastructure improvments
[vpp.git] / vnet / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h>     /* for API error numbers */
47 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
48 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_urpf_list.h> /* for FIB uRPF check */
50 #include <vnet/fib/ip4_fib.h>
51 #include <vnet/dpo/load_balance.h>
52 #include <vnet/dpo/classify_dpo.h>
53
54 /**
55  * @file
56  * @brief IPv4 Forwarding.
57  *
58  * This file contains the source code for IPv4 forwarding.
59  */
60
61 void
62 ip4_forward_next_trace (vlib_main_t * vm,
63                         vlib_node_runtime_t * node,
64                         vlib_frame_t * frame,
65                         vlib_rx_or_tx_t which_adj_index);
66
67 always_inline uword
68 ip4_lookup_inline (vlib_main_t * vm,
69                    vlib_node_runtime_t * node,
70                    vlib_frame_t * frame,
71                    int lookup_for_responses_to_locally_received_packets)
72 {
73   ip4_main_t * im = &ip4_main;
74   vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
75   u32 n_left_from, n_left_to_next, * from, * to_next;
76   ip_lookup_next_t next;
77   u32 cpu_index = os_get_cpu_number();
78
79   from = vlib_frame_vector_args (frame);
80   n_left_from = frame->n_vectors;
81   next = node->cached_next_index;
82
83   while (n_left_from > 0)
84     {
85       vlib_get_next_frame (vm, node, next,
86                            to_next, n_left_to_next);
87
88       while (n_left_from >= 8 && n_left_to_next >= 4)
89         {
90           vlib_buffer_t * p0, * p1, * p2, * p3;
91           ip4_header_t * ip0, * ip1, * ip2, * ip3;
92           __attribute__((unused)) tcp_header_t * tcp0, * tcp1, * tcp2, * tcp3;
93           ip_lookup_next_t next0, next1, next2, next3;
94           const load_balance_t * lb0, * lb1, * lb2, * lb3;
95           ip4_fib_mtrie_t * mtrie0, * mtrie1, * mtrie2, * mtrie3;
96           ip4_fib_mtrie_leaf_t leaf0, leaf1, leaf2, leaf3;
97           ip4_address_t * dst_addr0, *dst_addr1, *dst_addr2, *dst_addr3;
98           __attribute__((unused)) u32 pi0, fib_index0, lb_index0, is_tcp_udp0;
99           __attribute__((unused)) u32 pi1, fib_index1, lb_index1, is_tcp_udp1;
100           __attribute__((unused)) u32 pi2, fib_index2, lb_index2, is_tcp_udp2;
101           __attribute__((unused)) u32 pi3, fib_index3, lb_index3, is_tcp_udp3;
102           flow_hash_config_t flow_hash_config0, flow_hash_config1;
103           flow_hash_config_t flow_hash_config2, flow_hash_config3;
104           u32 hash_c0, hash_c1, hash_c2, hash_c3;
105           const dpo_id_t *dpo0, *dpo1, *dpo2, *dpo3;
106
107           /* Prefetch next iteration. */
108           {
109             vlib_buffer_t * p4, * p5, * p6, * p7;
110
111             p4 = vlib_get_buffer (vm, from[4]);
112             p5 = vlib_get_buffer (vm, from[5]);
113             p6 = vlib_get_buffer (vm, from[6]);
114             p7 = vlib_get_buffer (vm, from[7]);
115
116             vlib_prefetch_buffer_header (p4, LOAD);
117             vlib_prefetch_buffer_header (p5, LOAD);
118             vlib_prefetch_buffer_header (p6, LOAD);
119             vlib_prefetch_buffer_header (p7, LOAD);
120
121             CLIB_PREFETCH (p4->data, sizeof (ip0[0]), LOAD);
122             CLIB_PREFETCH (p5->data, sizeof (ip0[0]), LOAD);
123             CLIB_PREFETCH (p6->data, sizeof (ip0[0]), LOAD);
124             CLIB_PREFETCH (p7->data, sizeof (ip0[0]), LOAD);
125           }
126
127           pi0 = to_next[0] = from[0];
128           pi1 = to_next[1] = from[1];
129           pi2 = to_next[2] = from[2];
130           pi3 = to_next[3] = from[3];
131
132           from += 4;
133           to_next += 4;
134           n_left_to_next -= 4;
135           n_left_from -= 4;
136
137           p0 = vlib_get_buffer (vm, pi0);
138           p1 = vlib_get_buffer (vm, pi1);
139           p2 = vlib_get_buffer (vm, pi2);
140           p3 = vlib_get_buffer (vm, pi3);
141
142           ip0 = vlib_buffer_get_current (p0);
143           ip1 = vlib_buffer_get_current (p1);
144           ip2 = vlib_buffer_get_current (p2);
145           ip3 = vlib_buffer_get_current (p3);
146
147           dst_addr0 = &ip0->dst_address;
148           dst_addr1 = &ip1->dst_address;
149           dst_addr2 = &ip2->dst_address;
150           dst_addr3 = &ip3->dst_address;
151
152           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
153           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
154           fib_index2 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p2)->sw_if_index[VLIB_RX]);
155           fib_index3 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p3)->sw_if_index[VLIB_RX]);
156           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
157             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
158           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
159             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
160           fib_index2 = (vnet_buffer(p2)->sw_if_index[VLIB_TX] == (u32)~0) ?
161             fib_index2 : vnet_buffer(p2)->sw_if_index[VLIB_TX];
162           fib_index3 = (vnet_buffer(p3)->sw_if_index[VLIB_TX] == (u32)~0) ?
163             fib_index3 : vnet_buffer(p3)->sw_if_index[VLIB_TX];
164
165
166           if (! lookup_for_responses_to_locally_received_packets)
167             {
168               mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
169               mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
170               mtrie2 = &ip4_fib_get (fib_index2)->mtrie;
171               mtrie3 = &ip4_fib_get (fib_index3)->mtrie;
172
173               leaf0 = leaf1 = leaf2 = leaf3 = IP4_FIB_MTRIE_LEAF_ROOT;
174
175               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
176               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
177               leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 0);
178               leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 0);
179             }
180
181           tcp0 = (void *) (ip0 + 1);
182           tcp1 = (void *) (ip1 + 1);
183           tcp2 = (void *) (ip2 + 1);
184           tcp3 = (void *) (ip3 + 1);
185
186           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
187                          || ip0->protocol == IP_PROTOCOL_UDP);
188           is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
189                          || ip1->protocol == IP_PROTOCOL_UDP);
190           is_tcp_udp2 = (ip2->protocol == IP_PROTOCOL_TCP
191                          || ip2->protocol == IP_PROTOCOL_UDP);
192           is_tcp_udp3 = (ip1->protocol == IP_PROTOCOL_TCP
193                          || ip1->protocol == IP_PROTOCOL_UDP);
194
195           if (! lookup_for_responses_to_locally_received_packets)
196             {
197               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
198               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
199               leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 1);
200               leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 1);
201             }
202
203           if (! lookup_for_responses_to_locally_received_packets)
204             {
205               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
206               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
207               leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 2);
208               leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 2);
209             }
210
211           if (! lookup_for_responses_to_locally_received_packets)
212             {
213               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
214               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
215               leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 3);
216               leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 3);
217             }
218
219           if (lookup_for_responses_to_locally_received_packets)
220             {
221               lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
222               lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
223               lb_index2 = vnet_buffer (p2)->ip.adj_index[VLIB_RX];
224               lb_index3 = vnet_buffer (p3)->ip.adj_index[VLIB_RX];
225             }
226           else
227             {
228               /* Handle default route. */
229               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
230               leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
231               leaf2 = (leaf2 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie2->default_leaf : leaf2);
232               leaf3 = (leaf3 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie3->default_leaf : leaf3);
233               lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
234               lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
235               lb_index2 = ip4_fib_mtrie_leaf_get_adj_index (leaf2);
236               lb_index3 = ip4_fib_mtrie_leaf_get_adj_index (leaf3);
237             }
238
239           lb0 = load_balance_get (lb_index0);
240           lb1 = load_balance_get (lb_index1);
241           lb2 = load_balance_get (lb_index2);
242           lb3 = load_balance_get (lb_index3);
243
244           /* Use flow hash to compute multipath adjacency. */
245           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
246           hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
247           hash_c2 = vnet_buffer (p2)->ip.flow_hash = 0;
248           hash_c3 = vnet_buffer (p3)->ip.flow_hash = 0;
249           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
250             {
251               flow_hash_config0 = lb0->lb_hash_config;
252               hash_c0 = vnet_buffer (p0)->ip.flow_hash =
253                 ip4_compute_flow_hash (ip0, flow_hash_config0);
254             }
255           if (PREDICT_FALSE(lb1->lb_n_buckets > 1))
256             {
257               flow_hash_config1 = lb1->lb_hash_config;
258               hash_c1 = vnet_buffer (p1)->ip.flow_hash =
259                 ip4_compute_flow_hash (ip1, flow_hash_config1);
260             }
261           if (PREDICT_FALSE (lb2->lb_n_buckets > 1))
262             {
263               flow_hash_config2 = lb2->lb_hash_config;
264               hash_c2 = vnet_buffer (p2)->ip.flow_hash =
265                 ip4_compute_flow_hash (ip2, flow_hash_config2);
266             }
267           if (PREDICT_FALSE(lb3->lb_n_buckets > 1))
268             {
269               flow_hash_config3 = lb3->lb_hash_config;
270               hash_c3 = vnet_buffer (p3)->ip.flow_hash =
271                 ip4_compute_flow_hash (ip3, flow_hash_config3);
272             }
273
274           ASSERT (lb0->lb_n_buckets > 0);
275           ASSERT (is_pow2 (lb0->lb_n_buckets));
276           ASSERT (lb1->lb_n_buckets > 0);
277           ASSERT (is_pow2 (lb1->lb_n_buckets));
278           ASSERT (lb2->lb_n_buckets > 0);
279           ASSERT (is_pow2 (lb2->lb_n_buckets));
280           ASSERT (lb3->lb_n_buckets > 0);
281           ASSERT (is_pow2 (lb3->lb_n_buckets));
282
283           dpo0 = load_balance_get_bucket_i(lb0,
284                                            (hash_c0 &
285                                             (lb0->lb_n_buckets_minus_1)));
286           dpo1 = load_balance_get_bucket_i(lb1,
287                                            (hash_c1 &
288                                             (lb1->lb_n_buckets_minus_1)));
289           dpo2 = load_balance_get_bucket_i(lb2,
290                                            (hash_c2 &
291                                             (lb2->lb_n_buckets_minus_1)));
292           dpo3 = load_balance_get_bucket_i(lb3,
293                                            (hash_c3 &
294                                             (lb3->lb_n_buckets_minus_1)));
295
296           next0 = dpo0->dpoi_next_node;
297           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
298           next1 = dpo1->dpoi_next_node;
299           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
300           next2 = dpo2->dpoi_next_node;
301           vnet_buffer (p2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index;
302           next3 = dpo3->dpoi_next_node;
303           vnet_buffer (p3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index;
304
305           vlib_increment_combined_counter
306               (cm, cpu_index, lb_index0, 1,
307                vlib_buffer_length_in_chain (vm, p0)
308                + sizeof(ethernet_header_t));
309           vlib_increment_combined_counter
310               (cm, cpu_index, lb_index1, 1,
311                vlib_buffer_length_in_chain (vm, p1)
312                + sizeof(ethernet_header_t));
313           vlib_increment_combined_counter
314               (cm, cpu_index, lb_index2, 1,
315                vlib_buffer_length_in_chain (vm, p2)
316                + sizeof(ethernet_header_t));
317           vlib_increment_combined_counter
318               (cm, cpu_index, lb_index3, 1,
319                vlib_buffer_length_in_chain (vm, p3)
320                + sizeof(ethernet_header_t));
321
322           vlib_validate_buffer_enqueue_x4 (vm, node, next,
323                                            to_next, n_left_to_next,
324                                            pi0, pi1, pi2, pi3,
325                                            next0, next1, next2, next3);
326         }
327
328       while (n_left_from > 0 && n_left_to_next > 0)
329         {
330           vlib_buffer_t * p0;
331           ip4_header_t * ip0;
332           __attribute__((unused)) tcp_header_t * tcp0;
333           ip_lookup_next_t next0;
334           const load_balance_t *lb0;
335           ip4_fib_mtrie_t * mtrie0;
336           ip4_fib_mtrie_leaf_t leaf0;
337           ip4_address_t * dst_addr0;
338           __attribute__((unused)) u32 pi0, fib_index0, is_tcp_udp0, lbi0;
339           flow_hash_config_t flow_hash_config0;
340           const dpo_id_t *dpo0;
341           u32 hash_c0;
342
343           pi0 = from[0];
344           to_next[0] = pi0;
345
346           p0 = vlib_get_buffer (vm, pi0);
347
348           ip0 = vlib_buffer_get_current (p0);
349
350           dst_addr0 = &ip0->dst_address;
351
352           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
353           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
354             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
355
356           if (! lookup_for_responses_to_locally_received_packets)
357             {
358               mtrie0 = &ip4_fib_get( fib_index0)->mtrie;
359
360               leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
361
362               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
363             }
364
365           tcp0 = (void *) (ip0 + 1);
366
367           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
368                          || ip0->protocol == IP_PROTOCOL_UDP);
369
370           if (! lookup_for_responses_to_locally_received_packets)
371             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
372
373           if (! lookup_for_responses_to_locally_received_packets)
374             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
375
376           if (! lookup_for_responses_to_locally_received_packets)
377             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
378
379           if (lookup_for_responses_to_locally_received_packets)
380             lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
381           else
382             {
383               /* Handle default route. */
384               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
385               lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
386             }
387
388           lb0 = load_balance_get (lbi0);
389
390           /* Use flow hash to compute multipath adjacency. */
391           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
392           if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
393             {
394               flow_hash_config0 = lb0->lb_hash_config;
395
396               hash_c0 = vnet_buffer (p0)->ip.flow_hash =
397                 ip4_compute_flow_hash (ip0, flow_hash_config0);
398             }
399
400           ASSERT (lb0->lb_n_buckets > 0);
401           ASSERT (is_pow2 (lb0->lb_n_buckets));
402
403           dpo0 = load_balance_get_bucket_i(lb0,
404                                            (hash_c0 &
405                                             (lb0->lb_n_buckets_minus_1)));
406
407           next0 = dpo0->dpoi_next_node;
408           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
409
410           vlib_increment_combined_counter
411               (cm, cpu_index, lbi0, 1,
412                vlib_buffer_length_in_chain (vm, p0));
413
414           from += 1;
415           to_next += 1;
416           n_left_to_next -= 1;
417           n_left_from -= 1;
418
419           if (PREDICT_FALSE (next0 != next))
420             {
421               n_left_to_next += 1;
422               vlib_put_next_frame (vm, node, next, n_left_to_next);
423               next = next0;
424               vlib_get_next_frame (vm, node, next,
425                                    to_next, n_left_to_next);
426               to_next[0] = pi0;
427               to_next += 1;
428               n_left_to_next -= 1;
429             }
430         }
431
432       vlib_put_next_frame (vm, node, next, n_left_to_next);
433     }
434
435   if (node->flags & VLIB_NODE_FLAG_TRACE)
436     ip4_forward_next_trace(vm, node, frame, VLIB_TX);
437
438   return frame->n_vectors;
439 }
440
441 /** @brief IPv4 lookup node.
442     @node ip4-lookup
443
444     This is the main IPv4 lookup dispatch node.
445
446     @param vm vlib_main_t corresponding to the current thread
447     @param node vlib_node_runtime_t
448     @param frame vlib_frame_t whose contents should be dispatched
449
450     @par Graph mechanics: buffer metadata, next index usage
451
452     @em Uses:
453     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
454         - Indicates the @c sw_if_index value of the interface that the
455           packet was received on.
456     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
457         - When the value is @c ~0 then the node performs a longest prefix
458           match (LPM) for the packet destination address in the FIB attached
459           to the receive interface.
460         - Otherwise perform LPM for the packet destination address in the
461           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
462           value (0, 1, ...) and not a VRF id.
463
464     @em Sets:
465     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
466         - The lookup result adjacency index.
467
468     <em>Next Index:</em>
469     - Dispatches the packet to the node index found in
470       ip_adjacency_t @c adj->lookup_next_index
471       (where @c adj is the lookup result adjacency).
472 */
473 static uword
474 ip4_lookup (vlib_main_t * vm,
475             vlib_node_runtime_t * node,
476             vlib_frame_t * frame)
477 {
478   return ip4_lookup_inline (vm, node, frame,
479                             /* lookup_for_responses_to_locally_received_packets */ 0);
480
481 }
482
483 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args);
484
485 VLIB_REGISTER_NODE (ip4_lookup_node) = {
486   .function = ip4_lookup,
487   .name = "ip4-lookup",
488   .vector_size = sizeof (u32),
489
490   .format_trace = format_ip4_lookup_trace,
491   .n_next_nodes = IP_LOOKUP_N_NEXT,
492   .next_nodes = IP4_LOOKUP_NEXT_NODES,
493 };
494
495 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup)
496
497 always_inline uword
498 ip4_load_balance (vlib_main_t * vm,
499                   vlib_node_runtime_t * node,
500                   vlib_frame_t * frame)
501 {
502   vlib_combined_counter_main_t * cm = &load_balance_main.lbm_via_counters;
503   u32 n_left_from, n_left_to_next, * from, * to_next;
504   ip_lookup_next_t next;
505   u32 cpu_index = os_get_cpu_number();
506
507   from = vlib_frame_vector_args (frame);
508   n_left_from = frame->n_vectors;
509   next = node->cached_next_index;
510
511   if (node->flags & VLIB_NODE_FLAG_TRACE)
512       ip4_forward_next_trace(vm, node, frame, VLIB_TX);
513
514   while (n_left_from > 0)
515     {
516       vlib_get_next_frame (vm, node, next,
517                            to_next, n_left_to_next);
518
519
520       while (n_left_from >= 4 && n_left_to_next >= 2)
521         {
522           ip_lookup_next_t next0, next1;
523           const load_balance_t *lb0, *lb1;
524           vlib_buffer_t * p0, *p1;
525           u32 pi0, lbi0, hc0, pi1, lbi1, hc1;
526           const ip4_header_t *ip0, *ip1;
527           const dpo_id_t *dpo0, *dpo1;
528
529           /* Prefetch next iteration. */
530           {
531             vlib_buffer_t * p2, * p3;
532
533             p2 = vlib_get_buffer (vm, from[2]);
534             p3 = vlib_get_buffer (vm, from[3]);
535
536             vlib_prefetch_buffer_header (p2, STORE);
537             vlib_prefetch_buffer_header (p3, STORE);
538
539             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
540             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
541           }
542
543           pi0 = to_next[0] = from[0];
544           pi1 = to_next[1] = from[1];
545
546           from += 2;
547           n_left_from -= 2;
548           to_next += 2;
549           n_left_to_next -= 2;
550
551           p0 = vlib_get_buffer (vm, pi0);
552           p1 = vlib_get_buffer (vm, pi1);
553
554           ip0 = vlib_buffer_get_current (p0);
555           ip1 = vlib_buffer_get_current (p1);
556           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
557           lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
558
559           lb0 = load_balance_get(lbi0);
560           lb1 = load_balance_get(lbi1);
561
562           /*
563            * this node is for via FIBs we can re-use the hash value from the
564            * to node if present.
565            * We don't want to use the same hash value at each level in the recursion
566            * graph as that would lead to polarisation
567            */
568           hc0 = vnet_buffer (p0)->ip.flow_hash = 0;
569           hc1 = vnet_buffer (p1)->ip.flow_hash = 0;
570
571           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
572           {
573               if (PREDICT_TRUE (vnet_buffer(p0)->ip.flow_hash))
574               {
575                   hc0 = vnet_buffer(p0)->ip.flow_hash = vnet_buffer(p0)->ip.flow_hash >> 1;
576               }
577               else
578               {
579                   hc0 = vnet_buffer(p0)->ip.flow_hash = ip4_compute_flow_hash(ip0, hc0);
580               }
581           }
582           if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
583           {
584               if (PREDICT_TRUE (vnet_buffer(p1)->ip.flow_hash))
585               {
586                   hc1 = vnet_buffer(p1)->ip.flow_hash = vnet_buffer(p1)->ip.flow_hash >> 1;
587               }
588               else
589               {
590                   hc1 = vnet_buffer(p1)->ip.flow_hash = ip4_compute_flow_hash(ip1, hc1);
591               }
592           }
593
594           dpo0 = load_balance_get_bucket_i(lb0, hc0 & (lb0->lb_n_buckets_minus_1));
595           dpo1 = load_balance_get_bucket_i(lb1, hc1 & (lb1->lb_n_buckets_minus_1));
596
597           next0 = dpo0->dpoi_next_node;
598           next1 = dpo1->dpoi_next_node;
599
600           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
601           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
602
603           vlib_increment_combined_counter
604               (cm, cpu_index, lbi0, 1,
605                vlib_buffer_length_in_chain (vm, p0));
606           vlib_increment_combined_counter
607               (cm, cpu_index, lbi1, 1,
608                vlib_buffer_length_in_chain (vm, p1));
609
610           vlib_validate_buffer_enqueue_x2 (vm, node, next,
611                                            to_next, n_left_to_next,
612                                            pi0, pi1, next0, next1);
613        }
614
615       while (n_left_from > 0 && n_left_to_next > 0)
616         {
617           ip_lookup_next_t next0;
618           const load_balance_t *lb0;
619           vlib_buffer_t * p0;
620           u32 pi0, lbi0, hc0;
621           const ip4_header_t *ip0;
622           const dpo_id_t *dpo0;
623
624           pi0 = from[0];
625           to_next[0] = pi0;
626           from += 1;
627           to_next += 1;
628           n_left_to_next -= 1;
629           n_left_from -= 1;
630
631           p0 = vlib_get_buffer (vm, pi0);
632
633           ip0 = vlib_buffer_get_current (p0);
634           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
635
636           lb0 = load_balance_get(lbi0);
637
638           hc0 = vnet_buffer (p0)->ip.flow_hash = 0;
639           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
640           {
641               if (PREDICT_TRUE (vnet_buffer(p0)->ip.flow_hash))
642               {
643                   hc0 = vnet_buffer(p0)->ip.flow_hash = vnet_buffer(p0)->ip.flow_hash >> 1;
644               }
645               else
646               {
647                   hc0 = vnet_buffer(p0)->ip.flow_hash = ip4_compute_flow_hash(ip0, hc0);
648               }
649           }
650
651           dpo0 = load_balance_get_bucket_i(lb0, hc0 & (lb0->lb_n_buckets_minus_1));
652
653           next0 = dpo0->dpoi_next_node;
654           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
655
656           vlib_increment_combined_counter
657               (cm, cpu_index, lbi0, 1,
658                vlib_buffer_length_in_chain (vm, p0));
659
660           vlib_validate_buffer_enqueue_x1 (vm, node, next,
661                                            to_next, n_left_to_next,
662                                            pi0, next0);
663         }
664
665       vlib_put_next_frame (vm, node, next, n_left_to_next);
666     }
667
668   return frame->n_vectors;
669 }
670
671 VLIB_REGISTER_NODE (ip4_load_balance_node) = {
672   .function = ip4_load_balance,
673   .name = "ip4-load-balance",
674   .vector_size = sizeof (u32),
675   .sibling_of = "ip4-lookup",
676
677   .format_trace = format_ip4_lookup_trace,
678 };
679
680 VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance)
681
682 /* get first interface address */
683 ip4_address_t *
684 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
685                              ip_interface_address_t ** result_ia)
686 {
687   ip_lookup_main_t * lm = &im->lookup_main;
688   ip_interface_address_t * ia = 0;
689   ip4_address_t * result = 0;
690
691   foreach_ip_interface_address (lm, ia, sw_if_index,
692                                 1 /* honor unnumbered */,
693   ({
694     ip4_address_t * a = ip_interface_address_get_address (lm, ia);
695     result = a;
696     break;
697   }));
698   if (result_ia)
699     *result_ia = result ? ia : 0;
700   return result;
701 }
702
703 static void
704 ip4_add_interface_routes (u32 sw_if_index,
705                           ip4_main_t * im, u32 fib_index,
706                           ip_interface_address_t * a)
707 {
708   ip_lookup_main_t * lm = &im->lookup_main;
709   ip4_address_t * address = ip_interface_address_get_address (lm, a);
710   fib_prefix_t pfx = {
711       .fp_len = a->address_length,
712       .fp_proto = FIB_PROTOCOL_IP4,
713       .fp_addr.ip4 = *address,
714   };
715
716   a->neighbor_probe_adj_index = ~0;
717
718   if (pfx.fp_len < 32)
719   {
720       fib_node_index_t fei;
721
722       fei = fib_table_entry_update_one_path(fib_index,
723                                             &pfx,
724                                             FIB_SOURCE_INTERFACE,
725                                             (FIB_ENTRY_FLAG_CONNECTED |
726                                              FIB_ENTRY_FLAG_ATTACHED),
727                                             FIB_PROTOCOL_IP4,
728                                             NULL, /* No next-hop address */
729                                             sw_if_index,
730                                             ~0, // invalid FIB index
731                                             1,
732                                             NULL, // no out-label stack
733                                             FIB_ROUTE_PATH_FLAG_NONE);
734       a->neighbor_probe_adj_index = fib_entry_get_adj(fei);
735   }
736
737   pfx.fp_len = 32;
738
739   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
740   {
741       u32 classify_table_index =
742           lm->classify_table_index_by_sw_if_index [sw_if_index];
743       if (classify_table_index != (u32) ~0)
744       {
745           dpo_id_t dpo = DPO_INVALID;
746
747           dpo_set(&dpo,
748                   DPO_CLASSIFY,
749                   DPO_PROTO_IP4,
750                   classify_dpo_create(DPO_PROTO_IP4,
751                                       classify_table_index));
752
753           fib_table_entry_special_dpo_add(fib_index,
754                                           &pfx,
755                                           FIB_SOURCE_CLASSIFY,
756                                           FIB_ENTRY_FLAG_NONE,
757                                           &dpo);
758           dpo_reset(&dpo);
759       }
760   }
761
762   fib_table_entry_update_one_path(fib_index,
763                                   &pfx,
764                                   FIB_SOURCE_INTERFACE,
765                                   (FIB_ENTRY_FLAG_CONNECTED |
766                                    FIB_ENTRY_FLAG_LOCAL),
767                                   FIB_PROTOCOL_IP4,
768                                   &pfx.fp_addr,
769                                   sw_if_index,
770                                   ~0, // invalid FIB index
771                                   1,
772                                   NULL, // no out-label stack
773                                   FIB_ROUTE_PATH_FLAG_NONE);
774 }
775
776 static void
777 ip4_del_interface_routes (ip4_main_t * im,
778                           u32 fib_index,
779                           ip4_address_t * address,
780                           u32 address_length)
781 {
782     fib_prefix_t pfx = {
783         .fp_len = address_length,
784         .fp_proto = FIB_PROTOCOL_IP4,
785         .fp_addr.ip4 = *address,
786     };
787
788     if (pfx.fp_len < 32)
789     {
790         fib_table_entry_delete(fib_index,
791                                &pfx,
792                                FIB_SOURCE_INTERFACE);
793     }
794
795     pfx.fp_len = 32;
796     fib_table_entry_delete(fib_index,
797                            &pfx,
798                            FIB_SOURCE_INTERFACE);
799 }
800
801 void
802 ip4_sw_interface_enable_disable (u32 sw_if_index,
803                                  u32 is_enable)
804 {
805   ip4_main_t * im = &ip4_main;
806
807   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
808
809   /*
810    * enable/disable only on the 1<->0 transition
811    */
812   if (is_enable)
813     {
814       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
815         return;
816     }
817   else
818     {
819       ASSERT(im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
820       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
821         return;
822     }
823   vnet_feature_enable_disable ("ip4-unicast", "ip4-lookup", sw_if_index,
824                                is_enable, 0, 0);
825
826   vnet_feature_enable_disable ("ip4-multicast", "ip4-lookup-multicast", sw_if_index,
827                                is_enable, 0, 0);
828
829 }
830
831 static clib_error_t *
832 ip4_add_del_interface_address_internal (vlib_main_t * vm,
833                                         u32 sw_if_index,
834                                         ip4_address_t * address,
835                                         u32 address_length,
836                                         u32 is_del)
837 {
838   vnet_main_t * vnm = vnet_get_main();
839   ip4_main_t * im = &ip4_main;
840   ip_lookup_main_t * lm = &im->lookup_main;
841   clib_error_t * error = 0;
842   u32 if_address_index, elts_before;
843   ip4_address_fib_t ip4_af, * addr_fib = 0;
844
845   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
846   ip4_addr_fib_init (&ip4_af, address,
847                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
848   vec_add1 (addr_fib, ip4_af);
849
850   /* FIXME-LATER
851    * there is no support for adj-fib handling in the presence of overlapping
852    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
853    * most routers do.
854    */
855   if (! is_del)
856     {
857       /* When adding an address check that it does not conflict
858          with an existing address. */
859       ip_interface_address_t * ia;
860       foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
861                                     0 /* honor unnumbered */,
862       ({
863         ip4_address_t * x = ip_interface_address_get_address (&im->lookup_main, ia);
864
865         if (ip4_destination_matches_route (im, address, x, ia->address_length)
866             || ip4_destination_matches_route (im, x, address, address_length))
867           return clib_error_create ("failed to add %U which conflicts with %U for interface %U",
868                                     format_ip4_address_and_length, address, address_length,
869                                     format_ip4_address_and_length, x, ia->address_length,
870                                     format_vnet_sw_if_index_name, vnm, sw_if_index);
871        }));
872     }
873
874   elts_before = pool_elts (lm->if_address_pool);
875
876   error = ip_interface_address_add_del
877     (lm,
878      sw_if_index,
879      addr_fib,
880      address_length,
881      is_del,
882      &if_address_index);
883   if (error)
884     goto done;
885
886   ip4_sw_interface_enable_disable(sw_if_index, !is_del);
887
888   if (is_del)
889       ip4_del_interface_routes (im, ip4_af.fib_index, address,
890                                 address_length);
891   else
892       ip4_add_interface_routes (sw_if_index,
893                                 im, ip4_af.fib_index,
894                                 pool_elt_at_index
895                                 (lm->if_address_pool, if_address_index));
896
897   /* If pool did not grow/shrink: add duplicate address. */
898   if (elts_before != pool_elts (lm->if_address_pool))
899     {
900       ip4_add_del_interface_address_callback_t * cb;
901       vec_foreach (cb, im->add_del_interface_address_callbacks)
902         cb->function (im, cb->function_opaque, sw_if_index,
903                       address, address_length,
904                       if_address_index,
905                       is_del);
906     }
907
908  done:
909   vec_free (addr_fib);
910   return error;
911 }
912
913 clib_error_t *
914 ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
915                                ip4_address_t * address, u32 address_length,
916                                u32 is_del)
917 {
918   return ip4_add_del_interface_address_internal
919     (vm, sw_if_index, address, address_length,
920      is_del);
921 }
922
923 /* Built-in ip4 unicast rx feature path definition */
924 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
925 {
926   .arc_name  = "ip4-unicast",
927   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
928   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
929 };
930
931 VNET_FEATURE_INIT (ip4_flow_classify, static) = {
932   .arc_name = "ip4-unicast",
933   .node_name = "ip4-flow-classify",
934   .runs_before = VNET_FEATURES ("ip4-inacl"),
935 };
936
937 VNET_FEATURE_INIT (ip4_inacl, static) = {
938   .arc_name = "ip4-unicast",
939   .node_name = "ip4-inacl",
940   .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
941 };
942
943 VNET_FEATURE_INIT (ip4_source_check_1, static) = {
944   .arc_name = "ip4-unicast",
945   .node_name = "ip4-source-check-via-rx",
946   .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
947 };
948
949 VNET_FEATURE_INIT (ip4_source_check_2, static) = {
950   .arc_name = "ip4-unicast",
951   .node_name = "ip4-source-check-via-any",
952   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
953 };
954
955 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) = {
956   .arc_name = "ip4-unicast",
957   .node_name = "ip4-source-and-port-range-check-rx",
958   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
959 };
960
961 VNET_FEATURE_INIT (ip4_policer_classify, static) = {
962   .arc_name = "ip4-unicast",
963   .node_name = "ip4-policer-classify",
964   .runs_before = VNET_FEATURES ("ipsec-input-ip4"),
965 };
966
967 VNET_FEATURE_INIT (ip4_ipsec, static) = {
968   .arc_name = "ip4-unicast",
969   .node_name = "ipsec-input-ip4",
970   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
971 };
972
973 VNET_FEATURE_INIT (ip4_vpath, static) = {
974   .arc_name = "ip4-unicast",
975   .node_name = "vpath-input-ip4",
976   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
977 };
978
979 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) = {
980   .arc_name = "ip4-unicast",
981   .node_name = "ip4-vxlan-bypass",
982   .runs_before = VNET_FEATURES ("ip4-lookup"),
983 };
984
985 VNET_FEATURE_INIT (ip4_lookup, static) = {
986   .arc_name = "ip4-unicast",
987   .node_name = "ip4-lookup",
988   .runs_before = VNET_FEATURES ("ip4-drop"),
989 };
990
991 VNET_FEATURE_INIT (ip4_drop, static) = {
992   .arc_name = "ip4-unicast",
993   .node_name = "ip4-drop",
994   .runs_before = 0, /* not before any other features */
995 };
996
997
998 /* Built-in ip4 multicast rx feature path definition */
999 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
1000 {
1001   .arc_name  = "ip4-multicast",
1002   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
1003   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
1004 };
1005
1006 VNET_FEATURE_INIT (ip4_vpath_mc, static) = {
1007   .arc_name = "ip4-multicast",
1008   .node_name = "vpath-input-ip4",
1009   .runs_before = VNET_FEATURES ("ip4-lookup-multicast"),
1010 };
1011
1012 VNET_FEATURE_INIT (ip4_lookup_mc, static) = {
1013   .arc_name = "ip4-multicast",
1014   .node_name = "ip4-lookup-multicast",
1015   .runs_before = VNET_FEATURES ("ip4-drop"),
1016 };
1017
1018 VNET_FEATURE_INIT (ip4_mc_drop, static) = {
1019   .arc_name = "ip4-multicast",
1020   .node_name = "ip4-drop",
1021   .runs_before = 0, /* last feature */
1022 };
1023
1024 /* Source and port-range check ip4 tx feature path definition */
1025 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1026 {
1027   .arc_name  = "ip4-output",
1028   .start_nodes = VNET_FEATURES ("ip4-rewrite-transit", "ip4-midchain"),
1029   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1030 };
1031
1032 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) = {
1033   .arc_name = "ip4-output",
1034   .node_name = "ip4-source-and-port-range-check-tx",
1035   .runs_before = VNET_FEATURES ("ipsec-output-ip4"),
1036 };
1037
1038 VNET_FEATURE_INIT (ip4_ipsec_output, static) = {
1039   .arc_name = "ip4-output",
1040   .node_name = "ipsec-output-ip4",
1041   .runs_before = VNET_FEATURES ("interface-output"),
1042 };
1043
1044 /* Built-in ip4 tx feature path definition */
1045 VNET_FEATURE_INIT (ip4_interface_output, static) = {
1046   .arc_name = "ip4-output",
1047   .node_name = "interface-output",
1048   .runs_before = 0, /* not before any other features */
1049 };
1050
1051
1052 static clib_error_t *
1053 ip4_sw_interface_add_del (vnet_main_t * vnm,
1054                           u32 sw_if_index,
1055                           u32 is_add)
1056 {
1057   ip4_main_t * im = &ip4_main;
1058
1059   /* Fill in lookup tables with default table (0). */
1060   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1061
1062   vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index,
1063                                is_add, 0, 0);
1064
1065   vnet_feature_enable_disable ("ip4-multicast", "ip4-drop", sw_if_index,
1066                                is_add, 0, 0);
1067
1068   vnet_feature_enable_disable ("ip4-output", "interface-output", sw_if_index,
1069                                is_add, 0, 0);
1070
1071   return /* no error */ 0;
1072 }
1073
1074 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1075
1076 /* Global IP4 main. */
1077 ip4_main_t ip4_main;
1078
1079 clib_error_t *
1080 ip4_lookup_init (vlib_main_t * vm)
1081 {
1082   ip4_main_t * im = &ip4_main;
1083   clib_error_t * error;
1084   uword i;
1085
1086   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1087     return error;
1088
1089   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1090     {
1091       u32 m;
1092
1093       if (i < 32)
1094         m = pow2_mask (i) << (32 - i);
1095       else
1096         m = ~0;
1097       im->fib_masks[i] = clib_host_to_net_u32 (m);
1098     }
1099
1100   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1101
1102   /* Create FIB with index 0 and table id of 0. */
1103   fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 0);
1104
1105   {
1106     pg_node_t * pn;
1107     pn = pg_get_node (ip4_lookup_node.index);
1108     pn->unformat_edit = unformat_pg_ip4_header;
1109   }
1110
1111   {
1112     ethernet_arp_header_t h;
1113
1114     memset (&h, 0, sizeof (h));
1115
1116     /* Set target ethernet address to all zeros. */
1117     memset (h.ip4_over_ethernet[1].ethernet, 0, sizeof (h.ip4_over_ethernet[1].ethernet));
1118
1119 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1120 #define _8(f,v) h.f = v;
1121     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1122     _16 (l3_type, ETHERNET_TYPE_IP4);
1123     _8 (n_l2_address_bytes, 6);
1124     _8 (n_l3_address_bytes, 4);
1125     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1126 #undef _16
1127 #undef _8
1128
1129     vlib_packet_template_init (vm,
1130                                &im->ip4_arp_request_packet_template,
1131                                /* data */ &h,
1132                                sizeof (h),
1133                                /* alloc chunk size */ 8,
1134                                "ip4 arp");
1135   }
1136
1137   return error;
1138 }
1139
1140 VLIB_INIT_FUNCTION (ip4_lookup_init);
1141
1142 typedef struct {
1143   /* Adjacency taken. */
1144   u32 dpo_index;
1145   u32 flow_hash;
1146   u32 fib_index;
1147
1148   /* Packet data, possibly *after* rewrite. */
1149   u8 packet_data[64 - 1*sizeof(u32)];
1150 } ip4_forward_next_trace_t;
1151
1152 u8 * format_ip4_forward_next_trace (u8 * s, va_list * args)
1153 {
1154   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1155   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1156   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1157   uword indent = format_get_indent (s);
1158   s = format (s, "%U%U",
1159               format_white_space, indent,
1160               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1161   return s;
1162 }
1163
1164 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args)
1165 {
1166   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1167   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1168   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1169   uword indent = format_get_indent (s);
1170
1171   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1172               t->fib_index, t->dpo_index, t->flow_hash);
1173   s = format (s, "\n%U%U",
1174               format_white_space, indent,
1175               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1176   return s;
1177 }
1178
1179 static u8 * format_ip4_rewrite_trace (u8 * s, va_list * args)
1180 {
1181   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1182   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1183   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1184   vnet_main_t * vnm = vnet_get_main();
1185   uword indent = format_get_indent (s);
1186
1187   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1188               t->fib_index, t->dpo_index, format_ip_adjacency,
1189               t->dpo_index, FORMAT_IP_ADJACENCY_NONE,
1190               t->flow_hash);
1191   s = format (s, "\n%U%U",
1192               format_white_space, indent,
1193               format_ip_adjacency_packet_data,
1194               vnm, t->dpo_index,
1195               t->packet_data, sizeof (t->packet_data));
1196   return s;
1197 }
1198
1199 /* Common trace function for all ip4-forward next nodes. */
1200 void
1201 ip4_forward_next_trace (vlib_main_t * vm,
1202                         vlib_node_runtime_t * node,
1203                         vlib_frame_t * frame,
1204                         vlib_rx_or_tx_t which_adj_index)
1205 {
1206   u32 * from, n_left;
1207   ip4_main_t * im = &ip4_main;
1208
1209   n_left = frame->n_vectors;
1210   from = vlib_frame_vector_args (frame);
1211
1212   while (n_left >= 4)
1213     {
1214       u32 bi0, bi1;
1215       vlib_buffer_t * b0, * b1;
1216       ip4_forward_next_trace_t * t0, * t1;
1217
1218       /* Prefetch next iteration. */
1219       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1220       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1221
1222       bi0 = from[0];
1223       bi1 = from[1];
1224
1225       b0 = vlib_get_buffer (vm, bi0);
1226       b1 = vlib_get_buffer (vm, bi1);
1227
1228       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1229         {
1230           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1231           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1232           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1233           t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1234               vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1235               vec_elt (im->fib_index_by_sw_if_index,
1236                        vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1237
1238           clib_memcpy (t0->packet_data,
1239                   vlib_buffer_get_current (b0),
1240                   sizeof (t0->packet_data));
1241         }
1242       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1243         {
1244           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1245           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1246           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1247           t1->fib_index = (vnet_buffer(b1)->sw_if_index[VLIB_TX] != (u32)~0) ?
1248               vnet_buffer(b1)->sw_if_index[VLIB_TX] :
1249               vec_elt (im->fib_index_by_sw_if_index,
1250                        vnet_buffer(b1)->sw_if_index[VLIB_RX]);
1251           clib_memcpy (t1->packet_data,
1252                   vlib_buffer_get_current (b1),
1253                   sizeof (t1->packet_data));
1254         }
1255       from += 2;
1256       n_left -= 2;
1257     }
1258
1259   while (n_left >= 1)
1260     {
1261       u32 bi0;
1262       vlib_buffer_t * b0;
1263       ip4_forward_next_trace_t * t0;
1264
1265       bi0 = from[0];
1266
1267       b0 = vlib_get_buffer (vm, bi0);
1268
1269       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1270         {
1271           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1272           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1273           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1274           t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1275               vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1276               vec_elt (im->fib_index_by_sw_if_index,
1277                        vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1278           clib_memcpy (t0->packet_data,
1279                   vlib_buffer_get_current (b0),
1280                   sizeof (t0->packet_data));
1281         }
1282       from += 1;
1283       n_left -= 1;
1284     }
1285 }
1286
1287 static uword
1288 ip4_drop_or_punt (vlib_main_t * vm,
1289                   vlib_node_runtime_t * node,
1290                   vlib_frame_t * frame,
1291                   ip4_error_t error_code)
1292 {
1293   u32 * buffers = vlib_frame_vector_args (frame);
1294   uword n_packets = frame->n_vectors;
1295
1296   vlib_error_drop_buffers (vm, node,
1297                            buffers,
1298                            /* stride */ 1,
1299                            n_packets,
1300                            /* next */ 0,
1301                            ip4_input_node.index,
1302                            error_code);
1303
1304   if (node->flags & VLIB_NODE_FLAG_TRACE)
1305     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1306
1307   return n_packets;
1308 }
1309
1310 static uword
1311 ip4_drop (vlib_main_t * vm,
1312           vlib_node_runtime_t * node,
1313           vlib_frame_t * frame)
1314 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP); }
1315
1316 static uword
1317 ip4_punt (vlib_main_t * vm,
1318           vlib_node_runtime_t * node,
1319           vlib_frame_t * frame)
1320 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); }
1321
1322 VLIB_REGISTER_NODE (ip4_drop_node,static) = {
1323   .function = ip4_drop,
1324   .name = "ip4-drop",
1325   .vector_size = sizeof (u32),
1326
1327   .format_trace = format_ip4_forward_next_trace,
1328
1329   .n_next_nodes = 1,
1330   .next_nodes = {
1331     [0] = "error-drop",
1332   },
1333 };
1334
1335 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop)
1336
1337 VLIB_REGISTER_NODE (ip4_punt_node,static) = {
1338   .function = ip4_punt,
1339   .name = "ip4-punt",
1340   .vector_size = sizeof (u32),
1341
1342   .format_trace = format_ip4_forward_next_trace,
1343
1344   .n_next_nodes = 1,
1345   .next_nodes = {
1346     [0] = "error-punt",
1347   },
1348 };
1349
1350 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt)
1351
1352 /* Compute TCP/UDP/ICMP4 checksum in software. */
1353 u16
1354 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1355                               ip4_header_t * ip0)
1356 {
1357   ip_csum_t sum0;
1358   u32 ip_header_length, payload_length_host_byte_order;
1359   u32 n_this_buffer, n_bytes_left;
1360   u16 sum16;
1361   void * data_this_buffer;
1362
1363   /* Initialize checksum with ip header. */
1364   ip_header_length = ip4_header_bytes (ip0);
1365   payload_length_host_byte_order = clib_net_to_host_u16 (ip0->length) - ip_header_length;
1366   sum0 = clib_host_to_net_u32 (payload_length_host_byte_order + (ip0->protocol << 16));
1367
1368   if (BITS (uword) == 32)
1369     {
1370       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u32));
1371       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32));
1372     }
1373   else
1374     sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1375
1376   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1377   data_this_buffer = (void *) ip0 + ip_header_length;
1378   if (n_this_buffer + ip_header_length > p0->current_length)
1379     n_this_buffer = p0->current_length > ip_header_length ? p0->current_length - ip_header_length : 0;
1380   while (1)
1381     {
1382       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1383       n_bytes_left -= n_this_buffer;
1384       if (n_bytes_left == 0)
1385         break;
1386
1387       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1388       p0 = vlib_get_buffer (vm, p0->next_buffer);
1389       data_this_buffer = vlib_buffer_get_current (p0);
1390       n_this_buffer = p0->current_length;
1391     }
1392
1393   sum16 = ~ ip_csum_fold (sum0);
1394
1395   return sum16;
1396 }
1397
1398 u32
1399 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1400 {
1401   ip4_header_t * ip0 = vlib_buffer_get_current (p0);
1402   udp_header_t * udp0;
1403   u16 sum16;
1404
1405   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1406           || ip0->protocol == IP_PROTOCOL_UDP);
1407
1408   udp0 = (void *) (ip0 + 1);
1409   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1410     {
1411       p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1412                     | IP_BUFFER_L4_CHECKSUM_CORRECT);
1413       return p0->flags;
1414     }
1415
1416   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1417
1418   p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1419                 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1420
1421   return p0->flags;
1422 }
1423
1424 static uword
1425 ip4_local (vlib_main_t * vm,
1426            vlib_node_runtime_t * node,
1427            vlib_frame_t * frame)
1428 {
1429   ip4_main_t * im = &ip4_main;
1430   ip_lookup_main_t * lm = &im->lookup_main;
1431   ip_local_next_t next_index;
1432   u32 * from, * to_next, n_left_from, n_left_to_next;
1433   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
1434
1435   from = vlib_frame_vector_args (frame);
1436   n_left_from = frame->n_vectors;
1437   next_index = node->cached_next_index;
1438
1439   if (node->flags & VLIB_NODE_FLAG_TRACE)
1440     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1441
1442   while (n_left_from > 0)
1443     {
1444       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1445
1446       while (n_left_from >= 4 && n_left_to_next >= 2)
1447         {
1448           vlib_buffer_t * p0, * p1;
1449           ip4_header_t * ip0, * ip1;
1450           udp_header_t * udp0, * udp1;
1451           ip4_fib_mtrie_t * mtrie0, * mtrie1;
1452           ip4_fib_mtrie_leaf_t leaf0, leaf1;
1453           const dpo_id_t *dpo0, *dpo1;
1454           const load_balance_t *lb0, *lb1;
1455           u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, lbi0;
1456           u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, lbi1;
1457           i32 len_diff0, len_diff1;
1458           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1459           u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1460           u8 enqueue_code;
1461
1462           pi0 = to_next[0] = from[0];
1463           pi1 = to_next[1] = from[1];
1464           from += 2;
1465           n_left_from -= 2;
1466           to_next += 2;
1467           n_left_to_next -= 2;
1468
1469           p0 = vlib_get_buffer (vm, pi0);
1470           p1 = vlib_get_buffer (vm, pi1);
1471
1472           ip0 = vlib_buffer_get_current (p0);
1473           ip1 = vlib_buffer_get_current (p1);
1474
1475           fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
1476                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1477           fib_index1 = vec_elt (im->fib_index_by_sw_if_index,
1478                                 vnet_buffer(p1)->sw_if_index[VLIB_RX]);
1479
1480           mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1481           mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
1482
1483           leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
1484
1485           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1486           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
1487
1488           /* Treat IP frag packets as "experimental" protocol for now
1489              until support of IP frag reassembly is implemented */
1490           proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
1491           proto1 = ip4_is_fragment(ip1) ? 0xfe : ip1->protocol;
1492           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1493           is_udp1 = proto1 == IP_PROTOCOL_UDP;
1494           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1495           is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1496
1497           flags0 = p0->flags;
1498           flags1 = p1->flags;
1499
1500           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1501           good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1502
1503           udp0 = ip4_next_header (ip0);
1504           udp1 = ip4_next_header (ip1);
1505
1506           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1507           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1508           good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1509
1510           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1511           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
1512
1513           /* Verify UDP length. */
1514           ip_len0 = clib_net_to_host_u16 (ip0->length);
1515           ip_len1 = clib_net_to_host_u16 (ip1->length);
1516           udp_len0 = clib_net_to_host_u16 (udp0->length);
1517           udp_len1 = clib_net_to_host_u16 (udp1->length);
1518
1519           len_diff0 = ip_len0 - udp_len0;
1520           len_diff1 = ip_len1 - udp_len1;
1521
1522           len_diff0 = is_udp0 ? len_diff0 : 0;
1523           len_diff1 = is_udp1 ? len_diff1 : 0;
1524
1525           if (PREDICT_FALSE (! (is_tcp_udp0 & is_tcp_udp1
1526                                 & good_tcp_udp0 & good_tcp_udp1)))
1527             {
1528               if (is_tcp_udp0)
1529                 {
1530                   if (is_tcp_udp0
1531                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1532                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1533                   good_tcp_udp0 =
1534                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1535                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1536                 }
1537               if (is_tcp_udp1)
1538                 {
1539                   if (is_tcp_udp1
1540                       && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1541                     flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
1542                   good_tcp_udp1 =
1543                     (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1544                   good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1545                 }
1546             }
1547
1548           good_tcp_udp0 &= len_diff0 >= 0;
1549           good_tcp_udp1 &= len_diff1 >= 0;
1550
1551           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1552           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
1553
1554           error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1555
1556           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1557           error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
1558
1559           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1560           error0 = (is_tcp_udp0 && ! good_tcp_udp0
1561                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1562                     : error0);
1563           error1 = (is_tcp_udp1 && ! good_tcp_udp1
1564                     ? IP4_ERROR_TCP_CHECKSUM + is_udp1
1565                     : error1);
1566
1567           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1568           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
1569           leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1570           leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
1571
1572           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1573           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1574
1575           vnet_buffer (p1)->ip.adj_index[VLIB_RX] = lbi1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1576           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = lbi1;
1577
1578           lb0 = load_balance_get(lbi0);
1579           lb1 = load_balance_get(lbi1);
1580           dpo0 = load_balance_get_bucket_i(lb0, 0);
1581           dpo1 = load_balance_get_bucket_i(lb1, 0);
1582
1583           /*
1584            * Must have a route to source otherwise we drop the packet.
1585            * ip4 broadcasts are accepted, e.g. to make dhcp client work
1586            *
1587            * The checks are:
1588            *  - the source is a recieve => it's from us => bogus, do this
1589            *    first since it sets a different error code.
1590            *  - uRPF check for any route to source - accept if passes.
1591            *  - allow packets destined to the broadcast address from unknown sources
1592            */
1593           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1594                      dpo0->dpoi_type == DPO_RECEIVE) ?
1595                     IP4_ERROR_SPOOFED_LOCAL_PACKETS :
1596                     error0);
1597           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1598                      !fib_urpf_check_size(lb0->lb_urpf) &&
1599                      ip0->dst_address.as_u32 != 0xFFFFFFFF)
1600                     ? IP4_ERROR_SRC_LOOKUP_MISS
1601                     : error0);
1602           error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1603                      dpo1->dpoi_type == DPO_RECEIVE) ?
1604                     IP4_ERROR_SPOOFED_LOCAL_PACKETS :
1605                     error1);
1606           error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1607                      !fib_urpf_check_size(lb1->lb_urpf) &&
1608                      ip1->dst_address.as_u32 != 0xFFFFFFFF)
1609                     ? IP4_ERROR_SRC_LOOKUP_MISS
1610                     : error1);
1611
1612           next0 = lm->local_next_by_ip_protocol[proto0];
1613           next1 = lm->local_next_by_ip_protocol[proto1];
1614
1615           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1616           next1 = error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1617
1618           p0->error = error0 ? error_node->errors[error0] : 0;
1619           p1->error = error1 ? error_node->errors[error1] : 0;
1620
1621           enqueue_code = (next0 != next_index) + 2*(next1 != next_index);
1622
1623           if (PREDICT_FALSE (enqueue_code != 0))
1624             {
1625               switch (enqueue_code)
1626                 {
1627                 case 1:
1628                   /* A B A */
1629                   to_next[-2] = pi1;
1630                   to_next -= 1;
1631                   n_left_to_next += 1;
1632                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
1633                   break;
1634
1635                 case 2:
1636                   /* A A B */
1637                   to_next -= 1;
1638                   n_left_to_next += 1;
1639                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
1640                   break;
1641
1642                 case 3:
1643                   /* A B B or A B C */
1644                   to_next -= 2;
1645                   n_left_to_next += 2;
1646                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
1647                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
1648                   if (next0 == next1)
1649                     {
1650                       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1651                       next_index = next1;
1652                       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1653                     }
1654                   break;
1655                 }
1656             }
1657         }
1658
1659       while (n_left_from > 0 && n_left_to_next > 0)
1660         {
1661           vlib_buffer_t * p0;
1662           ip4_header_t * ip0;
1663           udp_header_t * udp0;
1664           ip4_fib_mtrie_t * mtrie0;
1665           ip4_fib_mtrie_leaf_t leaf0;
1666           u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, lbi0;
1667           i32 len_diff0;
1668           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1669           load_balance_t *lb0;
1670           const dpo_id_t *dpo0;
1671
1672           pi0 = to_next[0] = from[0];
1673           from += 1;
1674           n_left_from -= 1;
1675           to_next += 1;
1676           n_left_to_next -= 1;
1677
1678           p0 = vlib_get_buffer (vm, pi0);
1679
1680           ip0 = vlib_buffer_get_current (p0);
1681
1682           fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
1683                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1684
1685           mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1686
1687           leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
1688
1689           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1690
1691           /* Treat IP frag packets as "experimental" protocol for now
1692              until support of IP frag reassembly is implemented */
1693           proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
1694           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1695           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1696
1697           flags0 = p0->flags;
1698
1699           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1700
1701           udp0 = ip4_next_header (ip0);
1702
1703           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1704           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1705
1706           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1707
1708           /* Verify UDP length. */
1709           ip_len0 = clib_net_to_host_u16 (ip0->length);
1710           udp_len0 = clib_net_to_host_u16 (udp0->length);
1711
1712           len_diff0 = ip_len0 - udp_len0;
1713
1714           len_diff0 = is_udp0 ? len_diff0 : 0;
1715
1716           if (PREDICT_FALSE (! (is_tcp_udp0 & good_tcp_udp0)))
1717             {
1718               if (is_tcp_udp0)
1719                 {
1720                   if (is_tcp_udp0
1721                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1722                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1723                   good_tcp_udp0 =
1724                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1725                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1726                 }
1727             }
1728
1729           good_tcp_udp0 &= len_diff0 >= 0;
1730
1731           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1732
1733           error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1734
1735           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1736
1737           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1738           error0 = (is_tcp_udp0 && ! good_tcp_udp0
1739                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1740                     : error0);
1741
1742           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1743           leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1744
1745           lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1746           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1747
1748           lb0 = load_balance_get(lbi0);
1749           dpo0 = load_balance_get_bucket_i(lb0, 0);
1750
1751           vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
1752               vnet_buffer (p0)->ip.adj_index[VLIB_RX] =
1753                   dpo0->dpoi_index;
1754
1755           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1756                      dpo0->dpoi_type == DPO_RECEIVE) ?
1757                     IP4_ERROR_SPOOFED_LOCAL_PACKETS :
1758                     error0);
1759           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1760                      !fib_urpf_check_size(lb0->lb_urpf) &&
1761                      ip0->dst_address.as_u32 != 0xFFFFFFFF)
1762                     ? IP4_ERROR_SRC_LOOKUP_MISS
1763                     : error0);
1764
1765           next0 = lm->local_next_by_ip_protocol[proto0];
1766
1767           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1768
1769           p0->error = error0? error_node->errors[error0] : 0;
1770
1771           if (PREDICT_FALSE (next0 != next_index))
1772             {
1773               n_left_to_next += 1;
1774               vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1775
1776               next_index = next0;
1777               vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1778               to_next[0] = pi0;
1779               to_next += 1;
1780               n_left_to_next -= 1;
1781             }
1782         }
1783
1784       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1785     }
1786
1787   return frame->n_vectors;
1788 }
1789
1790 VLIB_REGISTER_NODE (ip4_local_node,static) = {
1791   .function = ip4_local,
1792   .name = "ip4-local",
1793   .vector_size = sizeof (u32),
1794
1795   .format_trace = format_ip4_forward_next_trace,
1796
1797   .n_next_nodes = IP_LOCAL_N_NEXT,
1798   .next_nodes = {
1799     [IP_LOCAL_NEXT_DROP] = "error-drop",
1800     [IP_LOCAL_NEXT_PUNT] = "error-punt",
1801     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1802     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1803   },
1804 };
1805
1806 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local)
1807
1808 void ip4_register_protocol (u32 protocol, u32 node_index)
1809 {
1810   vlib_main_t * vm = vlib_get_main();
1811   ip4_main_t * im = &ip4_main;
1812   ip_lookup_main_t * lm = &im->lookup_main;
1813
1814   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1815   lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip4_local_node.index, node_index);
1816 }
1817
1818 static clib_error_t *
1819 show_ip_local_command_fn (vlib_main_t * vm,
1820                           unformat_input_t * input,
1821                          vlib_cli_command_t * cmd)
1822 {
1823   ip4_main_t * im = &ip4_main;
1824   ip_lookup_main_t * lm = &im->lookup_main;
1825   int i;
1826
1827   vlib_cli_output (vm, "Protocols handled by ip4_local");
1828   for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
1829     {
1830       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1831         vlib_cli_output (vm, "%d", i);
1832     }
1833   return 0;
1834 }
1835
1836
1837
1838 /*?
1839  * Display the set of protocols handled by the local IPv4 stack.
1840  *
1841  * @cliexpar
1842  * Example of how to display local protocol table:
1843  * @cliexstart{show ip local}
1844  * Protocols handled by ip4_local
1845  * 1
1846  * 17
1847  * 47
1848  * @cliexend
1849 ?*/
1850 /* *INDENT-OFF* */
1851 VLIB_CLI_COMMAND (show_ip_local, static) = {
1852   .path = "show ip local",
1853   .function = show_ip_local_command_fn,
1854   .short_help = "show ip local",
1855 };
1856 /* *INDENT-ON* */
1857
1858 always_inline uword
1859 ip4_arp_inline (vlib_main_t * vm,
1860                 vlib_node_runtime_t * node,
1861                 vlib_frame_t * frame,
1862                 int is_glean)
1863 {
1864   vnet_main_t * vnm = vnet_get_main();
1865   ip4_main_t * im = &ip4_main;
1866   ip_lookup_main_t * lm = &im->lookup_main;
1867   u32 * from, * to_next_drop;
1868   uword n_left_from, n_left_to_next_drop, next_index;
1869   static f64 time_last_seed_change = -1e100;
1870   static u32 hash_seeds[3];
1871   static uword hash_bitmap[256 / BITS (uword)];
1872   f64 time_now;
1873
1874   if (node->flags & VLIB_NODE_FLAG_TRACE)
1875     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1876
1877   time_now = vlib_time_now (vm);
1878   if (time_now - time_last_seed_change > 1e-3)
1879     {
1880       uword i;
1881       u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
1882                                              sizeof (hash_seeds));
1883       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
1884         hash_seeds[i] = r[i];
1885
1886       /* Mark all hash keys as been no-seen before. */
1887       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
1888         hash_bitmap[i] = 0;
1889
1890       time_last_seed_change = time_now;
1891     }
1892
1893   from = vlib_frame_vector_args (frame);
1894   n_left_from = frame->n_vectors;
1895   next_index = node->cached_next_index;
1896   if (next_index == IP4_ARP_NEXT_DROP)
1897     next_index = IP4_ARP_N_NEXT; /* point to first interface */
1898
1899   while (n_left_from > 0)
1900     {
1901       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1902                            to_next_drop, n_left_to_next_drop);
1903
1904       while (n_left_from > 0 && n_left_to_next_drop > 0)
1905         {
1906           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
1907           ip_adjacency_t * adj0;
1908           vlib_buffer_t * p0;
1909           ip4_header_t * ip0;
1910           uword bm0;
1911
1912           pi0 = from[0];
1913
1914           p0 = vlib_get_buffer (vm, pi0);
1915
1916           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1917           adj0 = ip_get_adjacency (lm, adj_index0);
1918           ip0 = vlib_buffer_get_current (p0);
1919
1920           a0 = hash_seeds[0];
1921           b0 = hash_seeds[1];
1922           c0 = hash_seeds[2];
1923
1924           sw_if_index0 = adj0->rewrite_header.sw_if_index;
1925           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1926
1927           if (is_glean)
1928           {
1929               /*
1930                * this is the Glean case, so we are ARPing for the
1931                * packet's destination
1932                */
1933               a0 ^= ip0->dst_address.data_u32;
1934           }
1935           else
1936           {
1937               a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32;
1938           }
1939           b0 ^= sw_if_index0;
1940
1941           hash_v3_finalize32 (a0, b0, c0);
1942
1943           c0 &= BITS (hash_bitmap) - 1;
1944           c0 = c0 / BITS (uword);
1945           m0 = (uword) 1 << (c0 % BITS (uword));
1946
1947           bm0 = hash_bitmap[c0];
1948           drop0 = (bm0 & m0) != 0;
1949
1950           /* Mark it as seen. */
1951           hash_bitmap[c0] = bm0 | m0;
1952
1953           from += 1;
1954           n_left_from -= 1;
1955           to_next_drop[0] = pi0;
1956           to_next_drop += 1;
1957           n_left_to_next_drop -= 1;
1958
1959           p0->error = node->errors[drop0 ? IP4_ARP_ERROR_DROP : IP4_ARP_ERROR_REQUEST_SENT];
1960
1961           /*
1962            * the adj has been updated to a rewrite but the node the DPO that got
1963            * us here hasn't - yet. no big deal. we'll drop while we wait.
1964            */
1965           if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
1966             continue;
1967
1968           if (drop0)
1969             continue;
1970
1971           /*
1972            * Can happen if the control-plane is programming tables
1973            * with traffic flowing; at least that's today's lame excuse.
1974            */
1975           if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN) ||
1976               (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
1977           {
1978             p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
1979           }
1980           else
1981           /* Send ARP request. */
1982           {
1983             u32 bi0 = 0;
1984             vlib_buffer_t * b0;
1985             ethernet_arp_header_t * h0;
1986             vnet_hw_interface_t * hw_if0;
1987
1988             h0 = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi0);
1989
1990             /* Add rewrite/encap string for ARP packet. */
1991             vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
1992
1993             hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1994
1995             /* Src ethernet address in ARP header. */
1996             clib_memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address,
1997                     sizeof (h0->ip4_over_ethernet[0].ethernet));
1998
1999             if (is_glean)
2000             {
2001                 /* The interface's source address is stashed in the Glean Adj */
2002                 h0->ip4_over_ethernet[0].ip4 = adj0->sub_type.glean.receive_addr.ip4;
2003
2004                 /* Copy in destination address we are requesting. This is the
2005                 * glean case, so it's the packet's destination.*/
2006                 h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32;
2007             }
2008             else
2009             {
2010                 /* Src IP address in ARP header. */
2011                 if (ip4_src_address_for_packet(lm, sw_if_index0,
2012                                                &h0->ip4_over_ethernet[0].ip4))
2013                 {
2014                     /* No source address available */
2015                     p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
2016                     vlib_buffer_free(vm, &bi0, 1);
2017                     continue;
2018                 }
2019
2020                 /* Copy in destination address we are requesting from the
2021                    incomplete adj */
2022                 h0->ip4_over_ethernet[1].ip4.data_u32 =
2023                     adj0->sub_type.nbr.next_hop.ip4.as_u32;
2024             }
2025
2026             vlib_buffer_copy_trace_flag (vm, p0, bi0);
2027             b0 = vlib_get_buffer (vm, bi0);
2028             vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2029
2030             vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2031
2032             vlib_set_next_frame_buffer (vm, node, adj0->rewrite_header.next_index, bi0);
2033           }
2034         }
2035
2036       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2037     }
2038
2039   return frame->n_vectors;
2040 }
2041
2042 static uword
2043 ip4_arp (vlib_main_t * vm,
2044          vlib_node_runtime_t * node,
2045          vlib_frame_t * frame)
2046 {
2047     return (ip4_arp_inline(vm, node, frame, 0));
2048 }
2049
2050 static uword
2051 ip4_glean (vlib_main_t * vm,
2052            vlib_node_runtime_t * node,
2053            vlib_frame_t * frame)
2054 {
2055     return (ip4_arp_inline(vm, node, frame, 1));
2056 }
2057
2058 static char * ip4_arp_error_strings[] = {
2059   [IP4_ARP_ERROR_DROP] = "address overflow drops",
2060   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2061   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2062   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2063   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2064   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
2065 };
2066
2067 VLIB_REGISTER_NODE (ip4_arp_node) = {
2068   .function = ip4_arp,
2069   .name = "ip4-arp",
2070   .vector_size = sizeof (u32),
2071
2072   .format_trace = format_ip4_forward_next_trace,
2073
2074   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2075   .error_strings = ip4_arp_error_strings,
2076
2077   .n_next_nodes = IP4_ARP_N_NEXT,
2078   .next_nodes = {
2079     [IP4_ARP_NEXT_DROP] = "error-drop",
2080   },
2081 };
2082
2083 VLIB_REGISTER_NODE (ip4_glean_node) = {
2084   .function = ip4_glean,
2085   .name = "ip4-glean",
2086   .vector_size = sizeof (u32),
2087
2088   .format_trace = format_ip4_forward_next_trace,
2089
2090   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2091   .error_strings = ip4_arp_error_strings,
2092
2093   .n_next_nodes = IP4_ARP_N_NEXT,
2094   .next_nodes = {
2095     [IP4_ARP_NEXT_DROP] = "error-drop",
2096   },
2097 };
2098
2099 #define foreach_notrace_ip4_arp_error           \
2100 _(DROP)                                         \
2101 _(REQUEST_SENT)                                 \
2102 _(REPLICATE_DROP)                               \
2103 _(REPLICATE_FAIL)
2104
2105 clib_error_t * arp_notrace_init (vlib_main_t * vm)
2106 {
2107   vlib_node_runtime_t *rt =
2108     vlib_node_get_runtime (vm, ip4_arp_node.index);
2109
2110   /* don't trace ARP request packets */
2111 #define _(a)                                    \
2112     vnet_pcap_drop_trace_filter_add_del         \
2113         (rt->errors[IP4_ARP_ERROR_##a],         \
2114          1 /* is_add */);
2115     foreach_notrace_ip4_arp_error;
2116 #undef _
2117   return 0;
2118 }
2119
2120 VLIB_INIT_FUNCTION(arp_notrace_init);
2121
2122
2123 /* Send an ARP request to see if given destination is reachable on given interface. */
2124 clib_error_t *
2125 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2126 {
2127   vnet_main_t * vnm = vnet_get_main();
2128   ip4_main_t * im = &ip4_main;
2129   ethernet_arp_header_t * h;
2130   ip4_address_t * src;
2131   ip_interface_address_t * ia;
2132   ip_adjacency_t * adj;
2133   vnet_hw_interface_t * hi;
2134   vnet_sw_interface_t * si;
2135   vlib_buffer_t * b;
2136   u32 bi = 0;
2137
2138   si = vnet_get_sw_interface (vnm, sw_if_index);
2139
2140   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2141     {
2142       return clib_error_return (0, "%U: interface %U down",
2143                                 format_ip4_address, dst,
2144                                 format_vnet_sw_if_index_name, vnm,
2145                                 sw_if_index);
2146     }
2147
2148   src = ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2149   if (! src)
2150     {
2151       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2152       return clib_error_return
2153         (0, "no matching interface address for destination %U (interface %U)",
2154          format_ip4_address, dst,
2155          format_vnet_sw_if_index_name, vnm, sw_if_index);
2156     }
2157
2158   adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2159
2160   h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi);
2161
2162   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2163
2164   clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
2165
2166   h->ip4_over_ethernet[0].ip4 = src[0];
2167   h->ip4_over_ethernet[1].ip4 = dst[0];
2168
2169   b = vlib_get_buffer (vm, bi);
2170   vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2171
2172   /* Add encapsulation string for software interface (e.g. ethernet header). */
2173   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2174   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2175
2176   {
2177     vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
2178     u32 * to_next = vlib_frame_vector_args (f);
2179     to_next[0] = bi;
2180     f->n_vectors = 1;
2181     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2182   }
2183
2184   return /* no error */ 0;
2185 }
2186
2187 typedef enum {
2188   IP4_REWRITE_NEXT_DROP,
2189   IP4_REWRITE_NEXT_ARP,
2190   IP4_REWRITE_NEXT_ICMP_ERROR,
2191 } ip4_rewrite_next_t;
2192
2193 always_inline uword
2194 ip4_rewrite_inline (vlib_main_t * vm,
2195                     vlib_node_runtime_t * node,
2196                     vlib_frame_t * frame,
2197                     int rewrite_for_locally_received_packets,
2198                     int is_midchain)
2199 {
2200   ip_lookup_main_t * lm = &ip4_main.lookup_main;
2201   u32 * from = vlib_frame_vector_args (frame);
2202   u32 n_left_from, n_left_to_next, * to_next, next_index;
2203   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
2204   vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
2205
2206   n_left_from = frame->n_vectors;
2207   next_index = node->cached_next_index;
2208   u32 cpu_index = os_get_cpu_number();
2209
2210   while (n_left_from > 0)
2211     {
2212       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2213
2214       while (n_left_from >= 4 && n_left_to_next >= 2)
2215         {
2216           ip_adjacency_t * adj0, * adj1;
2217           vlib_buffer_t * p0, * p1;
2218           ip4_header_t * ip0, * ip1;
2219           u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2220           u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2221           u32 next0_override, next1_override;
2222           u32 tx_sw_if_index0, tx_sw_if_index1;
2223
2224           if (rewrite_for_locally_received_packets)
2225               next0_override = next1_override = 0;
2226
2227           /* Prefetch next iteration. */
2228           {
2229             vlib_buffer_t * p2, * p3;
2230
2231             p2 = vlib_get_buffer (vm, from[2]);
2232             p3 = vlib_get_buffer (vm, from[3]);
2233
2234             vlib_prefetch_buffer_header (p2, STORE);
2235             vlib_prefetch_buffer_header (p3, STORE);
2236
2237             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2238             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2239           }
2240
2241           pi0 = to_next[0] = from[0];
2242           pi1 = to_next[1] = from[1];
2243
2244           from += 2;
2245           n_left_from -= 2;
2246           to_next += 2;
2247           n_left_to_next -= 2;
2248
2249           p0 = vlib_get_buffer (vm, pi0);
2250           p1 = vlib_get_buffer (vm, pi1);
2251
2252           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2253           adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
2254
2255           /* We should never rewrite a pkt using the MISS adjacency */
2256           ASSERT(adj_index0 && adj_index1);
2257
2258           ip0 = vlib_buffer_get_current (p0);
2259           ip1 = vlib_buffer_get_current (p1);
2260
2261           error0 = error1 = IP4_ERROR_NONE;
2262           next0 = next1 = IP4_REWRITE_NEXT_DROP;
2263
2264           /* Decrement TTL & update checksum.
2265              Works either endian, so no need for byte swap. */
2266           if (! rewrite_for_locally_received_packets)
2267             {
2268               i32 ttl0 = ip0->ttl, ttl1 = ip1->ttl;
2269
2270               /* Input node should have reject packets with ttl 0. */
2271               ASSERT (ip0->ttl > 0);
2272               ASSERT (ip1->ttl > 0);
2273
2274               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2275               checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2276
2277               checksum0 += checksum0 >= 0xffff;
2278               checksum1 += checksum1 >= 0xffff;
2279
2280               ip0->checksum = checksum0;
2281               ip1->checksum = checksum1;
2282
2283               ttl0 -= 1;
2284               ttl1 -= 1;
2285
2286               ip0->ttl = ttl0;
2287               ip1->ttl = ttl1;
2288
2289               /*
2290                * If the ttl drops below 1 when forwarding, generate
2291                * an ICMP response.
2292                */
2293               if (PREDICT_FALSE(ttl0 <= 0))
2294                 {
2295                   error0 = IP4_ERROR_TIME_EXPIRED;
2296                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2297                   icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2298                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2299                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2300                 }
2301               if (PREDICT_FALSE(ttl1 <= 0))
2302                 {
2303                   error1 = IP4_ERROR_TIME_EXPIRED;
2304                   vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32)~0;
2305                   icmp4_error_set_vnet_buffer(p1, ICMP4_time_exceeded,
2306                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2307                   next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2308                 }
2309
2310               /* Verify checksum. */
2311               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2312               ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2313             }
2314
2315           /* Rewrite packet header and updates lengths. */
2316           adj0 = ip_get_adjacency (lm, adj_index0);
2317           adj1 = ip_get_adjacency (lm, adj_index1);
2318
2319           if (rewrite_for_locally_received_packets)
2320             {
2321               if (PREDICT_FALSE(adj0->lookup_next_index
2322                                 == IP_LOOKUP_NEXT_ARP))
2323                 next0_override = IP4_REWRITE_NEXT_ARP;
2324               if (PREDICT_FALSE(adj1->lookup_next_index
2325                                 == IP_LOOKUP_NEXT_ARP))
2326                 next1_override = IP4_REWRITE_NEXT_ARP;
2327             }
2328
2329           /* Worth pipelining. No guarantee that adj0,1 are hot... */
2330           rw_len0 = adj0[0].rewrite_header.data_bytes;
2331           rw_len1 = adj1[0].rewrite_header.data_bytes;
2332           vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
2333           vnet_buffer(p1)->ip.save_rewrite_length = rw_len1;
2334
2335           /* Check MTU of outgoing interface. */
2336           error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2337                     ? IP4_ERROR_MTU_EXCEEDED
2338                     : error0);
2339           error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
2340                     ? IP4_ERROR_MTU_EXCEEDED
2341                     : error1);
2342
2343           next0 = (error0 == IP4_ERROR_NONE)
2344             ? adj0[0].rewrite_header.next_index : next0;
2345
2346           if (rewrite_for_locally_received_packets)
2347               next0 = next0 && next0_override ? next0_override : next0;
2348
2349           next1 = (error1 == IP4_ERROR_NONE)
2350             ? adj1[0].rewrite_header.next_index : next1;
2351
2352           if (rewrite_for_locally_received_packets)
2353               next1 = next1 && next1_override ? next1_override : next1;
2354
2355           /*
2356            * We've already accounted for an ethernet_header_t elsewhere
2357            */
2358           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2359               vlib_increment_combined_counter
2360                   (&adjacency_counters,
2361                    cpu_index, adj_index0,
2362                    /* packet increment */ 0,
2363                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2364
2365           if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t)))
2366               vlib_increment_combined_counter
2367                   (&adjacency_counters,
2368                    cpu_index, adj_index1,
2369                    /* packet increment */ 0,
2370                    /* byte increment */ rw_len1-sizeof(ethernet_header_t));
2371
2372           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2373            * to see the IP headerr */
2374           if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2375             {
2376               p0->current_data -= rw_len0;
2377               p0->current_length += rw_len0;
2378               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2379               vnet_buffer (p0)->sw_if_index[VLIB_TX] =
2380                   tx_sw_if_index0;
2381
2382               vnet_feature_arc_start(lm->output_feature_arc_index,
2383                                      tx_sw_if_index0, &next0, p0);
2384             }
2385           if (PREDICT_TRUE(error1 == IP4_ERROR_NONE))
2386             {
2387               p1->current_data -= rw_len1;
2388               p1->current_length += rw_len1;
2389
2390               tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2391               vnet_buffer (p1)->sw_if_index[VLIB_TX] =
2392                   tx_sw_if_index1;
2393
2394               vnet_feature_arc_start(lm->output_feature_arc_index,
2395                                      tx_sw_if_index1, &next1, p1);
2396             }
2397
2398           /* Guess we are only writing on simple Ethernet header. */
2399           vnet_rewrite_two_headers (adj0[0], adj1[0],
2400                                     ip0, ip1,
2401                                     sizeof (ethernet_header_t));
2402
2403           if (is_midchain)
2404           {
2405               adj0->sub_type.midchain.fixup_func(vm, adj0, p0);
2406               adj1->sub_type.midchain.fixup_func(vm, adj1, p1);
2407           }
2408
2409           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2410                                            to_next, n_left_to_next,
2411                                            pi0, pi1, next0, next1);
2412         }
2413
2414       while (n_left_from > 0 && n_left_to_next > 0)
2415         {
2416           ip_adjacency_t * adj0;
2417           vlib_buffer_t * p0;
2418           ip4_header_t * ip0;
2419           u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2420           u32 next0_override;
2421           u32 tx_sw_if_index0;
2422
2423           if (rewrite_for_locally_received_packets)
2424               next0_override = 0;
2425
2426           pi0 = to_next[0] = from[0];
2427
2428           p0 = vlib_get_buffer (vm, pi0);
2429
2430           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2431
2432           /* We should never rewrite a pkt using the MISS adjacency */
2433           ASSERT(adj_index0);
2434
2435           adj0 = ip_get_adjacency (lm, adj_index0);
2436
2437           ip0 = vlib_buffer_get_current (p0);
2438
2439           error0 = IP4_ERROR_NONE;
2440           next0 = IP4_REWRITE_NEXT_DROP;            /* drop on error */
2441
2442           /* Decrement TTL & update checksum. */
2443           if (! rewrite_for_locally_received_packets)
2444             {
2445               i32 ttl0 = ip0->ttl;
2446
2447               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2448
2449               checksum0 += checksum0 >= 0xffff;
2450
2451               ip0->checksum = checksum0;
2452
2453               ASSERT (ip0->ttl > 0);
2454
2455               ttl0 -= 1;
2456
2457               ip0->ttl = ttl0;
2458
2459               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2460
2461               if (PREDICT_FALSE(ttl0 <= 0))
2462                 {
2463                   /*
2464                    * If the ttl drops below 1 when forwarding, generate
2465                    * an ICMP response.
2466                    */
2467                   error0 = IP4_ERROR_TIME_EXPIRED;
2468                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2469                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2470                   icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2471                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2472                 }
2473             }
2474
2475           if (rewrite_for_locally_received_packets)
2476             {
2477               /*
2478                * We have to override the next_index in ARP adjacencies,
2479                * because they're set up for ip4-arp, not this node...
2480                */
2481               if (PREDICT_FALSE(adj0->lookup_next_index
2482                                 == IP_LOOKUP_NEXT_ARP))
2483                 next0_override = IP4_REWRITE_NEXT_ARP;
2484             }
2485
2486           /* Guess we are only writing on simple Ethernet header. */
2487           vnet_rewrite_one_header (adj0[0], ip0,
2488                                    sizeof (ethernet_header_t));
2489
2490           /* Update packet buffer attributes/set output interface. */
2491           rw_len0 = adj0[0].rewrite_header.data_bytes;
2492           vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
2493
2494           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2495               vlib_increment_combined_counter
2496                   (&adjacency_counters,
2497                    cpu_index, adj_index0,
2498                    /* packet increment */ 0,
2499                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2500
2501           /* Check MTU of outgoing interface. */
2502           error0 = (vlib_buffer_length_in_chain (vm, p0)
2503                     > adj0[0].rewrite_header.max_l3_packet_bytes
2504                     ? IP4_ERROR_MTU_EXCEEDED
2505                     : error0);
2506
2507           p0->error = error_node->errors[error0];
2508
2509           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2510            * to see the IP headerr */
2511           if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2512             {
2513               p0->current_data -= rw_len0;
2514               p0->current_length += rw_len0;
2515               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2516
2517               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2518               next0 = adj0[0].rewrite_header.next_index;
2519
2520               if (is_midchain)
2521                 {
2522                   adj0->sub_type.midchain.fixup_func(vm, adj0, p0);
2523                 }
2524
2525               vnet_feature_arc_start(lm->output_feature_arc_index,
2526                                      tx_sw_if_index0, &next0, p0);
2527
2528             }
2529
2530           if (rewrite_for_locally_received_packets)
2531               next0 = next0 && next0_override ? next0_override : next0;
2532
2533           from += 1;
2534           n_left_from -= 1;
2535           to_next += 1;
2536           n_left_to_next -= 1;
2537
2538           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2539                                            to_next, n_left_to_next,
2540                                            pi0, next0);
2541         }
2542
2543       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2544     }
2545
2546   /* Need to do trace after rewrites to pick up new packet data. */
2547   if (node->flags & VLIB_NODE_FLAG_TRACE)
2548     ip4_forward_next_trace (vm, node, frame, adj_rx_tx);
2549
2550   return frame->n_vectors;
2551 }
2552
2553
2554 /** @brief IPv4 transit rewrite node.
2555     @node ip4-rewrite-transit
2556
2557     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2558     header checksum, fetch the ip adjacency, check the outbound mtu,
2559     apply the adjacency rewrite, and send pkts to the adjacency
2560     rewrite header's rewrite_next_index.
2561
2562     @param vm vlib_main_t corresponding to the current thread
2563     @param node vlib_node_runtime_t
2564     @param frame vlib_frame_t whose contents should be dispatched
2565
2566     @par Graph mechanics: buffer metadata, next index usage
2567
2568     @em Uses:
2569     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2570         - the rewrite adjacency index
2571     - <code>adj->lookup_next_index</code>
2572         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2573           the packet will be dropped.
2574     - <code>adj->rewrite_header</code>
2575         - Rewrite string length, rewrite string, next_index
2576
2577     @em Sets:
2578     - <code>b->current_data, b->current_length</code>
2579         - Updated net of applying the rewrite string
2580
2581     <em>Next Indices:</em>
2582     - <code> adj->rewrite_header.next_index </code>
2583       or @c error-drop
2584 */
2585 static uword
2586 ip4_rewrite_transit (vlib_main_t * vm,
2587                      vlib_node_runtime_t * node,
2588                      vlib_frame_t * frame)
2589 {
2590   return ip4_rewrite_inline (vm, node, frame,
2591                              /* rewrite_for_locally_received_packets */ 0, 0);
2592 }
2593
2594 /** @brief IPv4 local rewrite node.
2595     @node ip4-rewrite-local
2596
2597     This is the IPv4 local rewrite node. Fetch the ip adjacency, check
2598     the outbound interface mtu, apply the adjacency rewrite, and send
2599     pkts to the adjacency rewrite header's rewrite_next_index. Deal
2600     with hemorrhoids of the form "some clown sends an icmp4 w/ src =
2601     dst = interface addr."
2602
2603     @param vm vlib_main_t corresponding to the current thread
2604     @param node vlib_node_runtime_t
2605     @param frame vlib_frame_t whose contents should be dispatched
2606
2607     @par Graph mechanics: buffer metadata, next index usage
2608
2609     @em Uses:
2610     - <code>vnet_buffer(b)->ip.adj_index[VLIB_RX]</code>
2611         - the rewrite adjacency index
2612     - <code>adj->lookup_next_index</code>
2613         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2614           the packet will be dropped.
2615     - <code>adj->rewrite_header</code>
2616         - Rewrite string length, rewrite string, next_index
2617
2618     @em Sets:
2619     - <code>b->current_data, b->current_length</code>
2620         - Updated net of applying the rewrite string
2621
2622     <em>Next Indices:</em>
2623     - <code> adj->rewrite_header.next_index </code>
2624       or @c error-drop
2625 */
2626
2627 static uword
2628 ip4_rewrite_local (vlib_main_t * vm,
2629                    vlib_node_runtime_t * node,
2630                    vlib_frame_t * frame)
2631 {
2632   return ip4_rewrite_inline (vm, node, frame,
2633                              /* rewrite_for_locally_received_packets */ 1, 0);
2634 }
2635
2636 static uword
2637 ip4_midchain (vlib_main_t * vm,
2638               vlib_node_runtime_t * node,
2639               vlib_frame_t * frame)
2640 {
2641   return ip4_rewrite_inline (vm, node, frame,
2642                              /* rewrite_for_locally_received_packets */ 0, 1);
2643 }
2644
2645 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2646   .function = ip4_rewrite_transit,
2647   .name = "ip4-rewrite-transit",
2648   .vector_size = sizeof (u32),
2649
2650   .format_trace = format_ip4_rewrite_trace,
2651
2652   .n_next_nodes = 3,
2653   .next_nodes = {
2654     [IP4_REWRITE_NEXT_DROP] = "error-drop",
2655     [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
2656     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2657   },
2658 };
2659
2660 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite_transit)
2661
2662 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2663   .function = ip4_midchain,
2664   .name = "ip4-midchain",
2665   .vector_size = sizeof (u32),
2666
2667   .format_trace = format_ip4_forward_next_trace,
2668
2669   .sibling_of = "ip4-rewrite-transit",
2670 };
2671
2672 VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain)
2673
2674 VLIB_REGISTER_NODE (ip4_rewrite_local_node) = {
2675   .function = ip4_rewrite_local,
2676   .name = "ip4-rewrite-local",
2677   .vector_size = sizeof (u32),
2678
2679   .sibling_of = "ip4-rewrite-transit",
2680
2681   .format_trace = format_ip4_rewrite_trace,
2682
2683   .n_next_nodes = 0,
2684 };
2685
2686 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_local_node, ip4_rewrite_local)
2687
2688 static clib_error_t *
2689 add_del_interface_table (vlib_main_t * vm,
2690                          unformat_input_t * input,
2691                          vlib_cli_command_t * cmd)
2692 {
2693   vnet_main_t * vnm = vnet_get_main();
2694   clib_error_t * error = 0;
2695   u32 sw_if_index, table_id;
2696
2697   sw_if_index = ~0;
2698
2699   if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2700     {
2701       error = clib_error_return (0, "unknown interface `%U'",
2702                                  format_unformat_error, input);
2703       goto done;
2704     }
2705
2706   if (unformat (input, "%d", &table_id))
2707     ;
2708   else
2709     {
2710       error = clib_error_return (0, "expected table id `%U'",
2711                                  format_unformat_error, input);
2712       goto done;
2713     }
2714
2715   {
2716     ip4_main_t * im = &ip4_main;
2717     u32 fib_index;
2718
2719     fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
2720                                                    table_id);
2721
2722     //
2723     // FIXME-LATER
2724     //  changing an interface's table has consequences for any connecteds
2725     //  and adj-fibs already installed.
2726     //
2727     vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
2728     im->fib_index_by_sw_if_index[sw_if_index] = fib_index;
2729   }
2730
2731  done:
2732   return error;
2733 }
2734
2735 /*?
2736  * Place the indicated interface into the supplied IPv4 FIB table (also known
2737  * as a VRF). If the FIB table does not exist, this command creates it. To
2738  * display the current IPv4 FIB table, use the command '<em>show ip fib</em>'.
2739  * FIB table will only be displayed if a route has been added to the table, or
2740  * an IP Address is assigned to an interface in the table (which adds a route
2741  * automatically).
2742  *
2743  * @note IP addresses added after setting the interface IP table end up in
2744  * the indicated FIB table. If the IP address is added prior to adding the
2745  * interface to the FIB table, it will NOT be part of the FIB table. Predictable
2746  * but potentially counter-intuitive results occur if you provision interface
2747  * addresses in multiple FIBs. Upon RX, packets will be processed in the last
2748  * IP table ID provisioned. It might be marginally useful to evade source RPF
2749  * drops to put an interface address into multiple FIBs.
2750  *
2751  * @cliexpar
2752  * Example of how to add an interface to an IPv4 FIB table (where 2 is the table-id):
2753  * @cliexcmd{set interface ip table GigabitEthernet2/0/0 2}
2754  ?*/
2755 /* *INDENT-OFF* */
2756 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
2757   .path = "set interface ip table",
2758   .function = add_del_interface_table,
2759   .short_help = "set interface ip table <interface> <table-id>",
2760 };
2761 /* *INDENT-ON* */
2762
2763
2764 static uword
2765 ip4_lookup_multicast (vlib_main_t * vm,
2766                       vlib_node_runtime_t * node,
2767                       vlib_frame_t * frame)
2768 {
2769   ip4_main_t * im = &ip4_main;
2770   vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
2771   u32 n_left_from, n_left_to_next, * from, * to_next;
2772   ip_lookup_next_t next;
2773   u32 cpu_index = os_get_cpu_number();
2774
2775   from = vlib_frame_vector_args (frame);
2776   n_left_from = frame->n_vectors;
2777   next = node->cached_next_index;
2778
2779   while (n_left_from > 0)
2780     {
2781       vlib_get_next_frame (vm, node, next,
2782                            to_next, n_left_to_next);
2783
2784       while (n_left_from >= 4 && n_left_to_next >= 2)
2785         {
2786           vlib_buffer_t * p0, * p1;
2787           u32 pi0, pi1, lb_index0, lb_index1, wrong_next;
2788           ip_lookup_next_t next0, next1;
2789           ip4_header_t * ip0, * ip1;
2790           u32 fib_index0, fib_index1;
2791           const dpo_id_t *dpo0, *dpo1;
2792           const load_balance_t * lb0, * lb1;
2793
2794           /* Prefetch next iteration. */
2795           {
2796             vlib_buffer_t * p2, * p3;
2797
2798             p2 = vlib_get_buffer (vm, from[2]);
2799             p3 = vlib_get_buffer (vm, from[3]);
2800
2801             vlib_prefetch_buffer_header (p2, LOAD);
2802             vlib_prefetch_buffer_header (p3, LOAD);
2803
2804             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
2805             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
2806           }
2807
2808           pi0 = to_next[0] = from[0];
2809           pi1 = to_next[1] = from[1];
2810
2811           p0 = vlib_get_buffer (vm, pi0);
2812           p1 = vlib_get_buffer (vm, pi1);
2813
2814           ip0 = vlib_buffer_get_current (p0);
2815           ip1 = vlib_buffer_get_current (p1);
2816
2817           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2818           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
2819           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2820             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2821           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
2822             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
2823
2824           lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0),
2825                                                &ip0->dst_address);
2826           lb_index1 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index1),
2827                                                &ip1->dst_address);
2828
2829           lb0 = load_balance_get (lb_index0);
2830           lb1 = load_balance_get (lb_index1);
2831
2832           ASSERT (lb0->lb_n_buckets > 0);
2833           ASSERT (is_pow2 (lb0->lb_n_buckets));
2834           ASSERT (lb1->lb_n_buckets > 0);
2835           ASSERT (is_pow2 (lb1->lb_n_buckets));
2836
2837           vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash
2838               (ip0, lb0->lb_hash_config);
2839
2840           vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash
2841               (ip1, lb1->lb_hash_config);
2842
2843           dpo0 = load_balance_get_bucket_i(lb0,
2844                                            (vnet_buffer (p0)->ip.flow_hash &
2845                                             (lb0->lb_n_buckets_minus_1)));
2846           dpo1 = load_balance_get_bucket_i(lb1,
2847                                            (vnet_buffer (p1)->ip.flow_hash &
2848                                             (lb1->lb_n_buckets_minus_1)));
2849
2850           next0 = dpo0->dpoi_next_node;
2851           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
2852           next1 = dpo1->dpoi_next_node;
2853           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
2854
2855           if (1) /* $$$$$$ HACK FIXME */
2856           vlib_increment_combined_counter
2857               (cm, cpu_index, lb_index0, 1,
2858                vlib_buffer_length_in_chain (vm, p0));
2859           if (1) /* $$$$$$ HACK FIXME */
2860           vlib_increment_combined_counter
2861               (cm, cpu_index, lb_index1, 1,
2862                vlib_buffer_length_in_chain (vm, p1));
2863
2864           from += 2;
2865           to_next += 2;
2866           n_left_to_next -= 2;
2867           n_left_from -= 2;
2868
2869           wrong_next = (next0 != next) + 2*(next1 != next);
2870           if (PREDICT_FALSE (wrong_next != 0))
2871             {
2872               switch (wrong_next)
2873                 {
2874                 case 1:
2875                   /* A B A */
2876                   to_next[-2] = pi1;
2877                   to_next -= 1;
2878                   n_left_to_next += 1;
2879                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2880                   break;
2881
2882                 case 2:
2883                   /* A A B */
2884                   to_next -= 1;
2885                   n_left_to_next += 1;
2886                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2887                   break;
2888
2889                 case 3:
2890                   /* A B C */
2891                   to_next -= 2;
2892                   n_left_to_next += 2;
2893                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2894                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2895                   if (next0 == next1)
2896                     {
2897                       /* A B B */
2898                       vlib_put_next_frame (vm, node, next, n_left_to_next);
2899                       next = next1;
2900                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
2901                     }
2902                 }
2903             }
2904         }
2905
2906       while (n_left_from > 0 && n_left_to_next > 0)
2907         {
2908           vlib_buffer_t * p0;
2909           ip4_header_t * ip0;
2910           u32 pi0, lb_index0;
2911           ip_lookup_next_t next0;
2912           u32 fib_index0;
2913           const dpo_id_t *dpo0;
2914           const load_balance_t * lb0;
2915
2916           pi0 = from[0];
2917           to_next[0] = pi0;
2918
2919           p0 = vlib_get_buffer (vm, pi0);
2920
2921           ip0 = vlib_buffer_get_current (p0);
2922
2923           fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
2924                                 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2925           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2926               fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2927
2928           lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0),
2929                                                &ip0->dst_address);
2930
2931           lb0 = load_balance_get (lb_index0);
2932
2933           ASSERT (lb0->lb_n_buckets > 0);
2934           ASSERT (is_pow2 (lb0->lb_n_buckets));
2935
2936           vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash
2937               (ip0, lb0->lb_hash_config);
2938
2939           dpo0 = load_balance_get_bucket_i(lb0,
2940                                            (vnet_buffer (p0)->ip.flow_hash &
2941                                             (lb0->lb_n_buckets_minus_1)));
2942
2943           next0 = dpo0->dpoi_next_node;
2944           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
2945
2946           if (1) /* $$$$$$ HACK FIXME */
2947               vlib_increment_combined_counter
2948                   (cm, cpu_index, lb_index0, 1,
2949                    vlib_buffer_length_in_chain (vm, p0));
2950
2951           from += 1;
2952           to_next += 1;
2953           n_left_to_next -= 1;
2954           n_left_from -= 1;
2955
2956           if (PREDICT_FALSE (next0 != next))
2957             {
2958               n_left_to_next += 1;
2959               vlib_put_next_frame (vm, node, next, n_left_to_next);
2960               next = next0;
2961               vlib_get_next_frame (vm, node, next,
2962                                    to_next, n_left_to_next);
2963               to_next[0] = pi0;
2964               to_next += 1;
2965               n_left_to_next -= 1;
2966             }
2967         }
2968
2969       vlib_put_next_frame (vm, node, next, n_left_to_next);
2970     }
2971
2972   if (node->flags & VLIB_NODE_FLAG_TRACE)
2973       ip4_forward_next_trace(vm, node, frame, VLIB_TX);
2974
2975   return frame->n_vectors;
2976 }
2977
2978 VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = {
2979   .function = ip4_lookup_multicast,
2980   .name = "ip4-lookup-multicast",
2981   .vector_size = sizeof (u32),
2982   .sibling_of = "ip4-lookup",
2983   .format_trace = format_ip4_lookup_trace,
2984
2985   .n_next_nodes = 0,
2986 };
2987
2988 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_multicast_node, ip4_lookup_multicast)
2989
2990 VLIB_REGISTER_NODE (ip4_multicast_node,static) = {
2991   .function = ip4_drop,
2992   .name = "ip4-multicast",
2993   .vector_size = sizeof (u32),
2994
2995   .format_trace = format_ip4_forward_next_trace,
2996
2997   .n_next_nodes = 1,
2998   .next_nodes = {
2999     [0] = "error-drop",
3000   },
3001 };
3002
3003 int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0)
3004 {
3005   ip4_fib_mtrie_t * mtrie0;
3006   ip4_fib_mtrie_leaf_t leaf0;
3007   u32 lbi0;
3008
3009   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
3010
3011   leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
3012   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
3013   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
3014   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
3015   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
3016
3017   /* Handle default route. */
3018   leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
3019
3020   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
3021
3022   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0), a);
3023 }
3024
3025 static clib_error_t *
3026 test_lookup_command_fn (vlib_main_t * vm,
3027                         unformat_input_t * input,
3028                         vlib_cli_command_t * cmd)
3029 {
3030   ip4_fib_t *fib;
3031   u32 table_id = 0;
3032   f64 count = 1;
3033   u32 n;
3034   int i;
3035   ip4_address_t ip4_base_address;
3036   u64 errors = 0;
3037
3038   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3039       if (unformat (input, "table %d", &table_id))
3040       {
3041           /* Make sure the entry exists. */
3042           fib = ip4_fib_get(table_id);
3043           if ((fib) && (fib->index != table_id))
3044               return clib_error_return (0, "<fib-index> %d does not exist",
3045                                         table_id);
3046       }
3047       else if (unformat (input, "count %f", &count))
3048         ;
3049
3050       else if (unformat (input, "%U",
3051                          unformat_ip4_address, &ip4_base_address))
3052         ;
3053       else
3054         return clib_error_return (0, "unknown input `%U'",
3055                                   format_unformat_error, input);
3056   }
3057
3058   n = count;
3059
3060   for (i = 0; i < n; i++)
3061     {
3062       if (!ip4_lookup_validate (&ip4_base_address, table_id))
3063         errors++;
3064
3065       ip4_base_address.as_u32 =
3066         clib_host_to_net_u32 (1 +
3067                               clib_net_to_host_u32 (ip4_base_address.as_u32));
3068     }
3069
3070   if (errors)
3071     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
3072   else
3073     vlib_cli_output (vm, "No errors in %d lookups\n", n);
3074
3075   return 0;
3076 }
3077
3078 /*?
3079  * Perform a lookup of an IPv4 Address (or range of addresses) in the
3080  * given FIB table to determine if there is a conflict with the
3081  * adjacency table. The fib-id can be determined by using the
3082  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
3083  * of 0 is used.
3084  *
3085  * @todo This command uses fib-id, other commands use table-id (not
3086  * just a name, they are different indexes). Would like to change this
3087  * to table-id for consistency.
3088  *
3089  * @cliexpar
3090  * Example of how to run the test lookup command:
3091  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
3092  * No errors in 2 lookups
3093  * @cliexend
3094 ?*/
3095 /* *INDENT-OFF* */
3096 VLIB_CLI_COMMAND (lookup_test_command, static) = {
3097     .path = "test lookup",
3098     .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
3099     .function = test_lookup_command_fn,
3100 };
3101 /* *INDENT-ON* */
3102
3103 int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3104 {
3105   ip4_main_t * im4 = &ip4_main;
3106   ip4_fib_t * fib;
3107   uword * p = hash_get (im4->fib_index_by_table_id, table_id);
3108
3109   if (p == 0)
3110     return VNET_API_ERROR_NO_SUCH_FIB;
3111
3112   fib = ip4_fib_get (p[0]);
3113
3114   fib->flow_hash_config = flow_hash_config;
3115   return 0;
3116 }
3117
3118 static clib_error_t *
3119 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3120                              unformat_input_t * input,
3121                              vlib_cli_command_t * cmd)
3122 {
3123   int matched = 0;
3124   u32 table_id = 0;
3125   u32 flow_hash_config = 0;
3126   int rv;
3127
3128   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3129     if (unformat (input, "table %d", &table_id))
3130       matched = 1;
3131 #define _(a,v) \
3132     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3133     foreach_flow_hash_bit
3134 #undef _
3135     else break;
3136   }
3137
3138   if (matched == 0)
3139     return clib_error_return (0, "unknown input `%U'",
3140                               format_unformat_error, input);
3141
3142   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3143   switch (rv)
3144     {
3145     case 0:
3146       break;
3147
3148     case VNET_API_ERROR_NO_SUCH_FIB:
3149       return clib_error_return (0, "no such FIB table %d", table_id);
3150
3151     default:
3152       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3153       break;
3154     }
3155
3156   return 0;
3157 }
3158
3159 /*?
3160  * Configure the set of IPv4 fields used by the flow hash.
3161  *
3162  * @cliexpar
3163  * Example of how to set the flow hash on a given table:
3164  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
3165  * Example of display the configured flow hash:
3166  * @cliexstart{show ip fib}
3167  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
3168  * 0.0.0.0/0
3169  *   unicast-ip4-chain
3170  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
3171  *     [0] [@0]: dpo-drop ip6
3172  * 0.0.0.0/32
3173  *   unicast-ip4-chain
3174  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
3175  *     [0] [@0]: dpo-drop ip6
3176  * 224.0.0.0/8
3177  *   unicast-ip4-chain
3178  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
3179  *     [0] [@0]: dpo-drop ip6
3180  * 6.0.1.2/32
3181  *   unicast-ip4-chain
3182  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
3183  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3184  * 7.0.0.1/32
3185  *   unicast-ip4-chain
3186  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
3187  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3188  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3189  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3190  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3191  * 240.0.0.0/8
3192  *   unicast-ip4-chain
3193  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
3194  *     [0] [@0]: dpo-drop ip6
3195  * 255.255.255.255/32
3196  *   unicast-ip4-chain
3197  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
3198  *     [0] [@0]: dpo-drop ip6
3199  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
3200  * 0.0.0.0/0
3201  *   unicast-ip4-chain
3202  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
3203  *     [0] [@0]: dpo-drop ip6
3204  * 0.0.0.0/32
3205  *   unicast-ip4-chain
3206  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
3207  *     [0] [@0]: dpo-drop ip6
3208  * 172.16.1.0/24
3209  *   unicast-ip4-chain
3210  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
3211  *     [0] [@4]: ipv4-glean: af_packet0
3212  * 172.16.1.1/32
3213  *   unicast-ip4-chain
3214  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
3215  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
3216  * 172.16.1.2/32
3217  *   unicast-ip4-chain
3218  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
3219  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
3220  * 172.16.2.0/24
3221  *   unicast-ip4-chain
3222  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
3223  *     [0] [@4]: ipv4-glean: af_packet1
3224  * 172.16.2.1/32
3225  *   unicast-ip4-chain
3226  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
3227  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
3228  * 224.0.0.0/8
3229  *   unicast-ip4-chain
3230  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
3231  *     [0] [@0]: dpo-drop ip6
3232  * 240.0.0.0/8
3233  *   unicast-ip4-chain
3234  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
3235  *     [0] [@0]: dpo-drop ip6
3236  * 255.255.255.255/32
3237  *   unicast-ip4-chain
3238  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
3239  *     [0] [@0]: dpo-drop ip6
3240  * @cliexend
3241 ?*/
3242 /* *INDENT-OFF* */
3243 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
3244   .path = "set ip flow-hash",
3245   .short_help =
3246   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
3247   .function = set_ip_flow_hash_command_fn,
3248 };
3249 /* *INDENT-ON* */
3250
3251 int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3252                                  u32 table_index)
3253 {
3254   vnet_main_t * vnm = vnet_get_main();
3255   vnet_interface_main_t * im = &vnm->interface_main;
3256   ip4_main_t * ipm = &ip4_main;
3257   ip_lookup_main_t * lm = &ipm->lookup_main;
3258   vnet_classify_main_t * cm = &vnet_classify_main;
3259   ip4_address_t *if_addr;
3260
3261   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3262     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3263
3264   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3265     return VNET_API_ERROR_NO_SUCH_ENTRY;
3266
3267   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3268   lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
3269
3270   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
3271
3272   if (NULL != if_addr)
3273   {
3274       fib_prefix_t pfx = {
3275           .fp_len = 32,
3276           .fp_proto = FIB_PROTOCOL_IP4,
3277           .fp_addr.ip4 = *if_addr,
3278       };
3279       u32 fib_index;
3280
3281       fib_index = fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
3282                                                       sw_if_index);
3283
3284
3285       if (table_index != (u32) ~0)
3286       {
3287           dpo_id_t dpo = DPO_INVALID;
3288
3289           dpo_set(&dpo,
3290                   DPO_CLASSIFY,
3291                   DPO_PROTO_IP4,
3292                   classify_dpo_create(DPO_PROTO_IP4, table_index));
3293
3294           fib_table_entry_special_dpo_add(fib_index,
3295                                           &pfx,
3296                                           FIB_SOURCE_CLASSIFY,
3297                                           FIB_ENTRY_FLAG_NONE,
3298                                           &dpo);
3299           dpo_reset(&dpo);
3300       }
3301       else
3302       {
3303           fib_table_entry_special_remove(fib_index,
3304                                          &pfx,
3305                                          FIB_SOURCE_CLASSIFY);
3306       }
3307   }
3308
3309   return 0;
3310 }
3311
3312 static clib_error_t *
3313 set_ip_classify_command_fn (vlib_main_t * vm,
3314                             unformat_input_t * input,
3315                             vlib_cli_command_t * cmd)
3316 {
3317   u32 table_index = ~0;
3318   int table_index_set = 0;
3319   u32 sw_if_index = ~0;
3320   int rv;
3321
3322   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3323     if (unformat (input, "table-index %d", &table_index))
3324       table_index_set = 1;
3325     else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3326                        vnet_get_main(), &sw_if_index))
3327       ;
3328     else
3329       break;
3330   }
3331
3332   if (table_index_set == 0)
3333     return clib_error_return (0, "classify table-index must be specified");
3334
3335   if (sw_if_index == ~0)
3336     return clib_error_return (0, "interface / subif must be specified");
3337
3338   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3339
3340   switch (rv)
3341     {
3342     case 0:
3343       break;
3344
3345     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3346       return clib_error_return (0, "No such interface");
3347
3348     case VNET_API_ERROR_NO_SUCH_ENTRY:
3349       return clib_error_return (0, "No such classifier table");
3350     }
3351   return 0;
3352 }
3353
3354 /*?
3355  * Assign a classification table to an interface. The classification
3356  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3357  * commands. Once the table is create, use this command to filter packets
3358  * on an interface.
3359  *
3360  * @cliexpar
3361  * Example of how to assign a classification table to an interface:
3362  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3363 ?*/
3364 /* *INDENT-OFF* */
3365 VLIB_CLI_COMMAND (set_ip_classify_command, static) = {
3366     .path = "set ip classify",
3367     .short_help =
3368     "set ip classify intfc <interface> table-index <classify-idx>",
3369     .function = set_ip_classify_command_fn,
3370 };
3371 /* *INDENT-ON* */
3372