bfa9f0a54f47f08665024b6642ae5808716610cb
[vpp.git] / vnet / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h>     /* for API error numbers */
47 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
48 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_urpf_list.h> /* for FIB uRPF check */
50 #include <vnet/fib/ip4_fib.h>
51 #include <vnet/dpo/load_balance.h>
52 #include <vnet/dpo/classify_dpo.h>
53
54 /**
55  * @file
56  * @brief IPv4 Forwarding.
57  *
58  * This file contains the source code for IPv4 forwarding.
59  */
60
61 void
62 ip4_forward_next_trace (vlib_main_t * vm,
63                         vlib_node_runtime_t * node,
64                         vlib_frame_t * frame,
65                         vlib_rx_or_tx_t which_adj_index);
66
67 always_inline uword
68 ip4_lookup_inline (vlib_main_t * vm,
69                    vlib_node_runtime_t * node,
70                    vlib_frame_t * frame,
71                    int lookup_for_responses_to_locally_received_packets)
72 {
73   ip4_main_t * im = &ip4_main;
74   vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
75   u32 n_left_from, n_left_to_next, * from, * to_next;
76   ip_lookup_next_t next;
77   u32 cpu_index = os_get_cpu_number();
78
79   from = vlib_frame_vector_args (frame);
80   n_left_from = frame->n_vectors;
81   next = node->cached_next_index;
82
83   while (n_left_from > 0)
84     {
85       vlib_get_next_frame (vm, node, next,
86                            to_next, n_left_to_next);
87
88       while (n_left_from >= 8 && n_left_to_next >= 4)
89         {
90           vlib_buffer_t * p0, * p1, * p2, * p3;
91           ip4_header_t * ip0, * ip1, * ip2, * ip3;
92           __attribute__((unused)) tcp_header_t * tcp0, * tcp1, * tcp2, * tcp3;
93           ip_lookup_next_t next0, next1, next2, next3;
94           const load_balance_t * lb0, * lb1, * lb2, * lb3;
95           ip4_fib_mtrie_t * mtrie0, * mtrie1, * mtrie2, * mtrie3;
96           ip4_fib_mtrie_leaf_t leaf0, leaf1, leaf2, leaf3;
97           ip4_address_t * dst_addr0, *dst_addr1, *dst_addr2, *dst_addr3;
98           __attribute__((unused)) u32 pi0, fib_index0, lb_index0, is_tcp_udp0;
99           __attribute__((unused)) u32 pi1, fib_index1, lb_index1, is_tcp_udp1;
100           __attribute__((unused)) u32 pi2, fib_index2, lb_index2, is_tcp_udp2;
101           __attribute__((unused)) u32 pi3, fib_index3, lb_index3, is_tcp_udp3;
102           flow_hash_config_t flow_hash_config0, flow_hash_config1;
103           flow_hash_config_t flow_hash_config2, flow_hash_config3;
104           u32 hash_c0, hash_c1, hash_c2, hash_c3;
105           const dpo_id_t *dpo0, *dpo1, *dpo2, *dpo3;
106
107           /* Prefetch next iteration. */
108           {
109             vlib_buffer_t * p4, * p5, * p6, * p7;
110
111             p4 = vlib_get_buffer (vm, from[4]);
112             p5 = vlib_get_buffer (vm, from[5]);
113             p6 = vlib_get_buffer (vm, from[6]);
114             p7 = vlib_get_buffer (vm, from[7]);
115
116             vlib_prefetch_buffer_header (p4, LOAD);
117             vlib_prefetch_buffer_header (p5, LOAD);
118             vlib_prefetch_buffer_header (p6, LOAD);
119             vlib_prefetch_buffer_header (p7, LOAD);
120
121             CLIB_PREFETCH (p4->data, sizeof (ip0[0]), LOAD);
122             CLIB_PREFETCH (p5->data, sizeof (ip0[0]), LOAD);
123             CLIB_PREFETCH (p6->data, sizeof (ip0[0]), LOAD);
124             CLIB_PREFETCH (p7->data, sizeof (ip0[0]), LOAD);
125           }
126
127           pi0 = to_next[0] = from[0];
128           pi1 = to_next[1] = from[1];
129           pi2 = to_next[2] = from[2];
130           pi3 = to_next[3] = from[3];
131
132           from += 4;
133           to_next += 4;
134           n_left_to_next -= 4;
135           n_left_from -= 4;
136
137           p0 = vlib_get_buffer (vm, pi0);
138           p1 = vlib_get_buffer (vm, pi1);
139           p2 = vlib_get_buffer (vm, pi2);
140           p3 = vlib_get_buffer (vm, pi3);
141
142           ip0 = vlib_buffer_get_current (p0);
143           ip1 = vlib_buffer_get_current (p1);
144           ip2 = vlib_buffer_get_current (p2);
145           ip3 = vlib_buffer_get_current (p3);
146
147           dst_addr0 = &ip0->dst_address;
148           dst_addr1 = &ip1->dst_address;
149           dst_addr2 = &ip2->dst_address;
150           dst_addr3 = &ip3->dst_address;
151
152           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
153           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
154           fib_index2 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p2)->sw_if_index[VLIB_RX]);
155           fib_index3 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p3)->sw_if_index[VLIB_RX]);
156           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
157             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
158           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
159             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
160           fib_index2 = (vnet_buffer(p2)->sw_if_index[VLIB_TX] == (u32)~0) ?
161             fib_index2 : vnet_buffer(p2)->sw_if_index[VLIB_TX];
162           fib_index3 = (vnet_buffer(p3)->sw_if_index[VLIB_TX] == (u32)~0) ?
163             fib_index3 : vnet_buffer(p3)->sw_if_index[VLIB_TX];
164
165
166           if (! lookup_for_responses_to_locally_received_packets)
167             {
168               mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
169               mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
170               mtrie2 = &ip4_fib_get (fib_index2)->mtrie;
171               mtrie3 = &ip4_fib_get (fib_index3)->mtrie;
172
173               leaf0 = leaf1 = leaf2 = leaf3 = IP4_FIB_MTRIE_LEAF_ROOT;
174
175               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
176               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
177               leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 0);
178               leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 0);
179             }
180
181           tcp0 = (void *) (ip0 + 1);
182           tcp1 = (void *) (ip1 + 1);
183           tcp2 = (void *) (ip2 + 1);
184           tcp3 = (void *) (ip3 + 1);
185
186           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
187                          || ip0->protocol == IP_PROTOCOL_UDP);
188           is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
189                          || ip1->protocol == IP_PROTOCOL_UDP);
190           is_tcp_udp2 = (ip2->protocol == IP_PROTOCOL_TCP
191                          || ip2->protocol == IP_PROTOCOL_UDP);
192           is_tcp_udp3 = (ip1->protocol == IP_PROTOCOL_TCP
193                          || ip1->protocol == IP_PROTOCOL_UDP);
194
195           if (! lookup_for_responses_to_locally_received_packets)
196             {
197               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
198               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
199               leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 1);
200               leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 1);
201             }
202
203           if (! lookup_for_responses_to_locally_received_packets)
204             {
205               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
206               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
207               leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 2);
208               leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 2);
209             }
210
211           if (! lookup_for_responses_to_locally_received_packets)
212             {
213               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
214               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
215               leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 3);
216               leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 3);
217             }
218
219           if (lookup_for_responses_to_locally_received_packets)
220             {
221               lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
222               lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
223               lb_index2 = vnet_buffer (p2)->ip.adj_index[VLIB_RX];
224               lb_index3 = vnet_buffer (p3)->ip.adj_index[VLIB_RX];
225             }
226           else
227             {
228               /* Handle default route. */
229               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
230               leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
231               leaf2 = (leaf2 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie2->default_leaf : leaf2);
232               leaf3 = (leaf3 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie3->default_leaf : leaf3);
233               lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
234               lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
235               lb_index2 = ip4_fib_mtrie_leaf_get_adj_index (leaf2);
236               lb_index3 = ip4_fib_mtrie_leaf_get_adj_index (leaf3);
237             }
238
239           lb0 = load_balance_get (lb_index0);
240           lb1 = load_balance_get (lb_index1);
241           lb2 = load_balance_get (lb_index2);
242           lb3 = load_balance_get (lb_index3);
243
244           /* Use flow hash to compute multipath adjacency. */
245           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
246           hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
247           hash_c2 = vnet_buffer (p2)->ip.flow_hash = 0;
248           hash_c3 = vnet_buffer (p3)->ip.flow_hash = 0;
249           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
250             {
251               flow_hash_config0 = lb0->lb_hash_config;
252               hash_c0 = vnet_buffer (p0)->ip.flow_hash =
253                 ip4_compute_flow_hash (ip0, flow_hash_config0);
254             }
255           if (PREDICT_FALSE(lb1->lb_n_buckets > 1))
256             {
257               flow_hash_config1 = lb1->lb_hash_config;
258               hash_c1 = vnet_buffer (p1)->ip.flow_hash =
259                 ip4_compute_flow_hash (ip1, flow_hash_config1);
260             }
261           if (PREDICT_FALSE (lb2->lb_n_buckets > 1))
262             {
263               flow_hash_config2 = lb2->lb_hash_config;
264               hash_c2 = vnet_buffer (p2)->ip.flow_hash =
265                 ip4_compute_flow_hash (ip2, flow_hash_config2);
266             }
267           if (PREDICT_FALSE(lb3->lb_n_buckets > 1))
268             {
269               flow_hash_config3 = lb3->lb_hash_config;
270               hash_c3 = vnet_buffer (p3)->ip.flow_hash =
271                 ip4_compute_flow_hash (ip3, flow_hash_config3);
272             }
273
274           ASSERT (lb0->lb_n_buckets > 0);
275           ASSERT (is_pow2 (lb0->lb_n_buckets));
276           ASSERT (lb1->lb_n_buckets > 0);
277           ASSERT (is_pow2 (lb1->lb_n_buckets));
278           ASSERT (lb2->lb_n_buckets > 0);
279           ASSERT (is_pow2 (lb2->lb_n_buckets));
280           ASSERT (lb3->lb_n_buckets > 0);
281           ASSERT (is_pow2 (lb3->lb_n_buckets));
282
283           dpo0 = load_balance_get_bucket_i(lb0,
284                                            (hash_c0 &
285                                             (lb0->lb_n_buckets_minus_1)));
286           dpo1 = load_balance_get_bucket_i(lb1,
287                                            (hash_c1 &
288                                             (lb1->lb_n_buckets_minus_1)));
289           dpo2 = load_balance_get_bucket_i(lb2,
290                                            (hash_c2 &
291                                             (lb2->lb_n_buckets_minus_1)));
292           dpo3 = load_balance_get_bucket_i(lb3,
293                                            (hash_c3 &
294                                             (lb3->lb_n_buckets_minus_1)));
295
296           next0 = dpo0->dpoi_next_node;
297           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
298           next1 = dpo1->dpoi_next_node;
299           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
300           next2 = dpo2->dpoi_next_node;
301           vnet_buffer (p2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index;
302           next3 = dpo3->dpoi_next_node;
303           vnet_buffer (p3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index;
304
305           vlib_increment_combined_counter
306               (cm, cpu_index, lb_index0, 1,
307                vlib_buffer_length_in_chain (vm, p0)
308                + sizeof(ethernet_header_t));
309           vlib_increment_combined_counter
310               (cm, cpu_index, lb_index1, 1,
311                vlib_buffer_length_in_chain (vm, p1)
312                + sizeof(ethernet_header_t));
313           vlib_increment_combined_counter
314               (cm, cpu_index, lb_index2, 1,
315                vlib_buffer_length_in_chain (vm, p2)
316                + sizeof(ethernet_header_t));
317           vlib_increment_combined_counter
318               (cm, cpu_index, lb_index3, 1,
319                vlib_buffer_length_in_chain (vm, p3)
320                + sizeof(ethernet_header_t));
321
322           vlib_validate_buffer_enqueue_x4 (vm, node, next,
323                                            to_next, n_left_to_next,
324                                            pi0, pi1, pi2, pi3,
325                                            next0, next1, next2, next3);
326         }
327
328       while (n_left_from > 0 && n_left_to_next > 0)
329         {
330           vlib_buffer_t * p0;
331           ip4_header_t * ip0;
332           __attribute__((unused)) tcp_header_t * tcp0;
333           ip_lookup_next_t next0;
334           const load_balance_t *lb0;
335           ip4_fib_mtrie_t * mtrie0;
336           ip4_fib_mtrie_leaf_t leaf0;
337           ip4_address_t * dst_addr0;
338           __attribute__((unused)) u32 pi0, fib_index0, is_tcp_udp0, lbi0;
339           flow_hash_config_t flow_hash_config0;
340           const dpo_id_t *dpo0;
341           u32 hash_c0;
342
343           pi0 = from[0];
344           to_next[0] = pi0;
345
346           p0 = vlib_get_buffer (vm, pi0);
347
348           ip0 = vlib_buffer_get_current (p0);
349
350           dst_addr0 = &ip0->dst_address;
351
352           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
353           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
354             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
355
356           if (! lookup_for_responses_to_locally_received_packets)
357             {
358               mtrie0 = &ip4_fib_get( fib_index0)->mtrie;
359
360               leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
361
362               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
363             }
364
365           tcp0 = (void *) (ip0 + 1);
366
367           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
368                          || ip0->protocol == IP_PROTOCOL_UDP);
369
370           if (! lookup_for_responses_to_locally_received_packets)
371             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
372
373           if (! lookup_for_responses_to_locally_received_packets)
374             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
375
376           if (! lookup_for_responses_to_locally_received_packets)
377             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
378
379           if (lookup_for_responses_to_locally_received_packets)
380             lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
381           else
382             {
383               /* Handle default route. */
384               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
385               lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
386             }
387
388           lb0 = load_balance_get (lbi0);
389
390           /* Use flow hash to compute multipath adjacency. */
391           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
392           if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
393             {
394               flow_hash_config0 = lb0->lb_hash_config;
395
396               hash_c0 = vnet_buffer (p0)->ip.flow_hash =
397                 ip4_compute_flow_hash (ip0, flow_hash_config0);
398             }
399
400           ASSERT (lb0->lb_n_buckets > 0);
401           ASSERT (is_pow2 (lb0->lb_n_buckets));
402
403           dpo0 = load_balance_get_bucket_i(lb0,
404                                            (hash_c0 &
405                                             (lb0->lb_n_buckets_minus_1)));
406
407           next0 = dpo0->dpoi_next_node;
408           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
409
410           vlib_increment_combined_counter
411               (cm, cpu_index, lbi0, 1,
412                vlib_buffer_length_in_chain (vm, p0));
413
414           from += 1;
415           to_next += 1;
416           n_left_to_next -= 1;
417           n_left_from -= 1;
418
419           if (PREDICT_FALSE (next0 != next))
420             {
421               n_left_to_next += 1;
422               vlib_put_next_frame (vm, node, next, n_left_to_next);
423               next = next0;
424               vlib_get_next_frame (vm, node, next,
425                                    to_next, n_left_to_next);
426               to_next[0] = pi0;
427               to_next += 1;
428               n_left_to_next -= 1;
429             }
430         }
431
432       vlib_put_next_frame (vm, node, next, n_left_to_next);
433     }
434
435   if (node->flags & VLIB_NODE_FLAG_TRACE)
436     ip4_forward_next_trace(vm, node, frame, VLIB_TX);
437
438   return frame->n_vectors;
439 }
440
441 /** @brief IPv4 lookup node.
442     @node ip4-lookup
443
444     This is the main IPv4 lookup dispatch node.
445
446     @param vm vlib_main_t corresponding to the current thread
447     @param node vlib_node_runtime_t
448     @param frame vlib_frame_t whose contents should be dispatched
449
450     @par Graph mechanics: buffer metadata, next index usage
451
452     @em Uses:
453     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
454         - Indicates the @c sw_if_index value of the interface that the
455           packet was received on.
456     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
457         - When the value is @c ~0 then the node performs a longest prefix
458           match (LPM) for the packet destination address in the FIB attached
459           to the receive interface.
460         - Otherwise perform LPM for the packet destination address in the
461           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
462           value (0, 1, ...) and not a VRF id.
463
464     @em Sets:
465     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
466         - The lookup result adjacency index.
467
468     <em>Next Index:</em>
469     - Dispatches the packet to the node index found in
470       ip_adjacency_t @c adj->lookup_next_index
471       (where @c adj is the lookup result adjacency).
472 */
473 static uword
474 ip4_lookup (vlib_main_t * vm,
475             vlib_node_runtime_t * node,
476             vlib_frame_t * frame)
477 {
478   return ip4_lookup_inline (vm, node, frame,
479                             /* lookup_for_responses_to_locally_received_packets */ 0);
480
481 }
482
483 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args);
484
485 VLIB_REGISTER_NODE (ip4_lookup_node) = {
486   .function = ip4_lookup,
487   .name = "ip4-lookup",
488   .vector_size = sizeof (u32),
489
490   .format_trace = format_ip4_lookup_trace,
491   .n_next_nodes = IP_LOOKUP_N_NEXT,
492   .next_nodes = IP4_LOOKUP_NEXT_NODES,
493 };
494
495 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup)
496
497 always_inline uword
498 ip4_load_balance (vlib_main_t * vm,
499                   vlib_node_runtime_t * node,
500                   vlib_frame_t * frame)
501 {
502   vlib_combined_counter_main_t * cm = &load_balance_main.lbm_via_counters;
503   u32 n_left_from, n_left_to_next, * from, * to_next;
504   ip_lookup_next_t next;
505   u32 cpu_index = os_get_cpu_number();
506
507   from = vlib_frame_vector_args (frame);
508   n_left_from = frame->n_vectors;
509   next = node->cached_next_index;
510
511   if (node->flags & VLIB_NODE_FLAG_TRACE)
512       ip4_forward_next_trace(vm, node, frame, VLIB_TX);
513
514   while (n_left_from > 0)
515     {
516       vlib_get_next_frame (vm, node, next,
517                            to_next, n_left_to_next);
518
519
520       while (n_left_from > 0 && n_left_to_next > 0)
521         {
522           ip_lookup_next_t next0;
523           const load_balance_t *lb0;
524           vlib_buffer_t * p0;
525           u32 pi0, lbi0, hc0;
526           const ip4_header_t *ip0;
527           const dpo_id_t *dpo0;
528
529           pi0 = from[0];
530           to_next[0] = pi0;
531
532           p0 = vlib_get_buffer (vm, pi0);
533
534           ip0 = vlib_buffer_get_current (p0);
535           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
536
537           lb0 = load_balance_get(lbi0);
538           hc0 = lb0->lb_hash_config;
539           vnet_buffer(p0)->ip.flow_hash = ip4_compute_flow_hash(ip0, hc0);
540
541           dpo0 = load_balance_get_bucket_i(lb0,
542                                            vnet_buffer(p0)->ip.flow_hash &
543                                            (lb0->lb_n_buckets_minus_1));
544
545           next0 = dpo0->dpoi_next_node;
546           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
547
548           vlib_increment_combined_counter
549               (cm, cpu_index, lbi0, 1,
550                vlib_buffer_length_in_chain (vm, p0));
551
552           from += 1;
553           to_next += 1;
554           n_left_to_next -= 1;
555           n_left_from -= 1;
556
557           if (PREDICT_FALSE (next0 != next))
558             {
559               n_left_to_next += 1;
560               vlib_put_next_frame (vm, node, next, n_left_to_next);
561               next = next0;
562               vlib_get_next_frame (vm, node, next,
563                                    to_next, n_left_to_next);
564               to_next[0] = pi0;
565               to_next += 1;
566               n_left_to_next -= 1;
567             }
568         }
569
570       vlib_put_next_frame (vm, node, next, n_left_to_next);
571     }
572
573   return frame->n_vectors;
574 }
575
576 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args);
577
578 VLIB_REGISTER_NODE (ip4_load_balance_node) = {
579   .function = ip4_load_balance,
580   .name = "ip4-load-balance",
581   .vector_size = sizeof (u32),
582   .sibling_of = "ip4-lookup",
583
584   .format_trace = format_ip4_forward_next_trace,
585 };
586
587 VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance)
588
589 /* get first interface address */
590 ip4_address_t *
591 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
592                              ip_interface_address_t ** result_ia)
593 {
594   ip_lookup_main_t * lm = &im->lookup_main;
595   ip_interface_address_t * ia = 0;
596   ip4_address_t * result = 0;
597
598   foreach_ip_interface_address (lm, ia, sw_if_index,
599                                 1 /* honor unnumbered */,
600   ({
601     ip4_address_t * a = ip_interface_address_get_address (lm, ia);
602     result = a;
603     break;
604   }));
605   if (result_ia)
606     *result_ia = result ? ia : 0;
607   return result;
608 }
609
610 static void
611 ip4_add_interface_routes (u32 sw_if_index,
612                           ip4_main_t * im, u32 fib_index,
613                           ip_interface_address_t * a)
614 {
615   ip_lookup_main_t * lm = &im->lookup_main;
616   ip4_address_t * address = ip_interface_address_get_address (lm, a);
617   fib_prefix_t pfx = {
618       .fp_len = a->address_length,
619       .fp_proto = FIB_PROTOCOL_IP4,
620       .fp_addr.ip4 = *address,
621   };
622
623   a->neighbor_probe_adj_index = ~0;
624
625   if (pfx.fp_len < 32)
626   {
627       fib_node_index_t fei;
628
629       fei = fib_table_entry_update_one_path(fib_index,
630                                             &pfx,
631                                             FIB_SOURCE_INTERFACE,
632                                             (FIB_ENTRY_FLAG_CONNECTED |
633                                              FIB_ENTRY_FLAG_ATTACHED),
634                                             FIB_PROTOCOL_IP4,
635                                             NULL, /* No next-hop address */
636                                             sw_if_index,
637                                             ~0, // invalid FIB index
638                                             1,
639                                             MPLS_LABEL_INVALID,
640                                             FIB_ROUTE_PATH_FLAG_NONE);
641       a->neighbor_probe_adj_index = fib_entry_get_adj(fei);
642   }
643
644   pfx.fp_len = 32;
645
646   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
647   {
648       u32 classify_table_index =
649           lm->classify_table_index_by_sw_if_index [sw_if_index];
650       if (classify_table_index != (u32) ~0)
651       {
652           dpo_id_t dpo = DPO_INVALID;
653
654           dpo_set(&dpo,
655                   DPO_CLASSIFY,
656                   DPO_PROTO_IP4,
657                   classify_dpo_create(FIB_PROTOCOL_IP4,
658                                       classify_table_index));
659
660           fib_table_entry_special_dpo_add(fib_index,
661                                           &pfx,
662                                           FIB_SOURCE_CLASSIFY,
663                                           FIB_ENTRY_FLAG_NONE,
664                                           &dpo);
665           dpo_reset(&dpo);
666       }
667   }
668
669   fib_table_entry_update_one_path(fib_index,
670                                   &pfx,
671                                   FIB_SOURCE_INTERFACE,
672                                   (FIB_ENTRY_FLAG_CONNECTED |
673                                    FIB_ENTRY_FLAG_LOCAL),
674                                   FIB_PROTOCOL_IP4,
675                                   &pfx.fp_addr,
676                                   sw_if_index,
677                                   ~0, // invalid FIB index
678                                   1,
679                                   MPLS_LABEL_INVALID,
680                                   FIB_ROUTE_PATH_FLAG_NONE);
681 }
682
683 static void
684 ip4_del_interface_routes (ip4_main_t * im,
685                           u32 fib_index,
686                           ip4_address_t * address,
687                           u32 address_length)
688 {
689     fib_prefix_t pfx = {
690         .fp_len = address_length,
691         .fp_proto = FIB_PROTOCOL_IP4,
692         .fp_addr.ip4 = *address,
693     };
694
695     if (pfx.fp_len < 32)
696     {
697         fib_table_entry_delete(fib_index,
698                                &pfx,
699                                FIB_SOURCE_INTERFACE);
700     }
701
702     pfx.fp_len = 32;
703     fib_table_entry_delete(fib_index,
704                            &pfx,
705                            FIB_SOURCE_INTERFACE);
706 }
707
708 void
709 ip4_sw_interface_enable_disable (u32 sw_if_index,
710                                  u32 is_enable)
711 {
712   vlib_main_t * vm = vlib_get_main();
713   ip4_main_t * im = &ip4_main;
714   ip_lookup_main_t * lm = &im->lookup_main;
715   u32 ci, cast;
716   u32 lookup_feature_index;
717
718   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
719
720   /*
721    * enable/disable only on the 1<->0 transition
722    */
723   if (is_enable)
724     {
725       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
726         return;
727     }
728   else
729     {
730       ASSERT(im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
731       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
732         return;
733     }
734
735   for (cast = 0; cast <= VNET_IP_RX_MULTICAST_FEAT; cast++)
736     {
737       ip_config_main_t * cm = &lm->feature_config_mains[cast];
738       vnet_config_main_t * vcm = &cm->config_main;
739
740       vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
741       ci = cm->config_index_by_sw_if_index[sw_if_index];
742
743       if (cast == VNET_IP_RX_UNICAST_FEAT)
744         lookup_feature_index = im->ip4_unicast_rx_feature_lookup;
745       else
746         lookup_feature_index = im->ip4_multicast_rx_feature_lookup;
747
748       if (is_enable)
749         ci = vnet_config_add_feature (vm, vcm,
750                                       ci,
751                                       lookup_feature_index,
752                                       /* config data */ 0,
753                                       /* # bytes of config data */ 0);
754       else
755         ci = vnet_config_del_feature (vm, vcm,
756                                       ci,
757                                       lookup_feature_index,
758                                       /* config data */ 0,
759                                       /* # bytes of config data */ 0);
760       cm->config_index_by_sw_if_index[sw_if_index] = ci;
761     }
762 }
763
764 static clib_error_t *
765 ip4_add_del_interface_address_internal (vlib_main_t * vm,
766                                         u32 sw_if_index,
767                                         ip4_address_t * address,
768                                         u32 address_length,
769                                         u32 is_del)
770 {
771   vnet_main_t * vnm = vnet_get_main();
772   ip4_main_t * im = &ip4_main;
773   ip_lookup_main_t * lm = &im->lookup_main;
774   clib_error_t * error = 0;
775   u32 if_address_index, elts_before;
776   ip4_address_fib_t ip4_af, * addr_fib = 0;
777
778   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
779   ip4_addr_fib_init (&ip4_af, address,
780                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
781   vec_add1 (addr_fib, ip4_af);
782
783   /* FIXME-LATER
784    * there is no support for adj-fib handling in the presence of overlapping
785    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
786    * most routers do.
787    */
788   if (! is_del)
789     {
790       /* When adding an address check that it does not conflict
791          with an existing address. */
792       ip_interface_address_t * ia;
793       foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
794                                     0 /* honor unnumbered */,
795       ({
796         ip4_address_t * x = ip_interface_address_get_address (&im->lookup_main, ia);
797
798         if (ip4_destination_matches_route (im, address, x, ia->address_length)
799             || ip4_destination_matches_route (im, x, address, address_length))
800           return clib_error_create ("failed to add %U which conflicts with %U for interface %U",
801                                     format_ip4_address_and_length, address, address_length,
802                                     format_ip4_address_and_length, x, ia->address_length,
803                                     format_vnet_sw_if_index_name, vnm, sw_if_index);
804        }));
805     }
806
807   elts_before = pool_elts (lm->if_address_pool);
808
809   error = ip_interface_address_add_del
810     (lm,
811      sw_if_index,
812      addr_fib,
813      address_length,
814      is_del,
815      &if_address_index);
816   if (error)
817     goto done;
818
819   ip4_sw_interface_enable_disable(sw_if_index, !is_del);
820
821   if (is_del)
822       ip4_del_interface_routes (im, ip4_af.fib_index, address,
823                                 address_length);
824   else
825       ip4_add_interface_routes (sw_if_index,
826                                 im, ip4_af.fib_index,
827                                 pool_elt_at_index
828                                 (lm->if_address_pool, if_address_index));
829
830   /* If pool did not grow/shrink: add duplicate address. */
831   if (elts_before != pool_elts (lm->if_address_pool))
832     {
833       ip4_add_del_interface_address_callback_t * cb;
834       vec_foreach (cb, im->add_del_interface_address_callbacks)
835         cb->function (im, cb->function_opaque, sw_if_index,
836                       address, address_length,
837                       if_address_index,
838                       is_del);
839     }
840
841  done:
842   vec_free (addr_fib);
843   return error;
844 }
845
846 clib_error_t *
847 ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
848                                ip4_address_t * address, u32 address_length,
849                                u32 is_del)
850 {
851   return ip4_add_del_interface_address_internal
852     (vm, sw_if_index, address, address_length,
853      is_del);
854 }
855
856 /* Built-in ip4 unicast rx feature path definition */
857 VNET_IP4_UNICAST_FEATURE_INIT (ip4_flow_classify, static) = {
858   .node_name = "ip4-flow-classify",
859   .runs_before = ORDER_CONSTRAINTS {"ip4-inacl", 0},
860   .feature_index = &ip4_main.ip4_unicast_rx_feature_flow_classify,
861 };
862
863 VNET_IP4_UNICAST_FEATURE_INIT (ip4_inacl, static) = {
864   .node_name = "ip4-inacl",
865   .runs_before = ORDER_CONSTRAINTS {"ip4-source-check-via-rx", 0},
866   .feature_index = &ip4_main.ip4_unicast_rx_feature_check_access,
867 };
868
869 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_1, static) = {
870   .node_name = "ip4-source-check-via-rx",
871   .runs_before = ORDER_CONSTRAINTS {"ip4-source-check-via-any", 0},
872   .feature_index =
873   &ip4_main.ip4_unicast_rx_feature_source_reachable_via_rx,
874 };
875
876 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_2, static) = {
877   .node_name = "ip4-source-check-via-any",
878   .runs_before = ORDER_CONSTRAINTS {"ip4-policer-classify", 0},
879   .feature_index =
880   &ip4_main.ip4_unicast_rx_feature_source_reachable_via_any,
881 };
882
883 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) = {
884   .node_name = "ip4-source-and-port-range-check-rx",
885   .runs_before = ORDER_CONSTRAINTS {"ip4-policer-classify", 0},
886   .feature_index =
887   &ip4_main.ip4_unicast_rx_feature_source_and_port_range_check,
888 };
889
890 VNET_IP4_UNICAST_FEATURE_INIT (ip4_policer_classify, static) = {
891   .node_name = "ip4-policer-classify",
892   .runs_before = ORDER_CONSTRAINTS {"ipsec-input-ip4", 0},
893   .feature_index =
894   &ip4_main.ip4_unicast_rx_feature_policer_classify,
895 };
896
897 VNET_IP4_UNICAST_FEATURE_INIT (ip4_ipsec, static) = {
898   .node_name = "ipsec-input-ip4",
899   .runs_before = ORDER_CONSTRAINTS {"vpath-input-ip4", 0},
900   .feature_index = &ip4_main.ip4_unicast_rx_feature_ipsec,
901 };
902
903 VNET_IP4_UNICAST_FEATURE_INIT (ip4_vpath, static) = {
904   .node_name = "vpath-input-ip4",
905   .runs_before = ORDER_CONSTRAINTS {"ip4-lookup", 0},
906   .feature_index = &ip4_main.ip4_unicast_rx_feature_vpath,
907 };
908
909 VNET_IP4_UNICAST_FEATURE_INIT (ip4_lookup, static) = {
910   .node_name = "ip4-lookup",
911   .runs_before = ORDER_CONSTRAINTS {"ip4-drop", 0},
912   .feature_index = &ip4_main.ip4_unicast_rx_feature_lookup,
913 };
914
915 VNET_IP4_UNICAST_FEATURE_INIT (ip4_drop, static) = {
916   .node_name = "ip4-drop",
917   .runs_before = 0, /* not before any other features */
918   .feature_index = &ip4_main.ip4_unicast_rx_feature_drop,
919 };
920
921
922 /* Built-in ip4 multicast rx feature path definition */
923 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_vpath_mc, static) = {
924   .node_name = "vpath-input-ip4",
925   .runs_before = ORDER_CONSTRAINTS {"ip4-lookup-multicast", 0},
926   .feature_index = &ip4_main.ip4_multicast_rx_feature_vpath,
927 };
928
929 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_lookup_mc, static) = {
930   .node_name = "ip4-lookup-multicast",
931   .runs_before = ORDER_CONSTRAINTS {"ip4-drop", 0},
932   .feature_index = &ip4_main.ip4_multicast_rx_feature_lookup,
933 };
934
935 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_mc_drop, static) = {
936   .node_name = "ip4-drop",
937   .runs_before = 0, /* last feature */
938   .feature_index = &ip4_main.ip4_multicast_rx_feature_drop,
939 };
940
941 static char * rx_feature_start_nodes[] =
942   { "ip4-input", "ip4-input-no-checksum"};
943
944 static char * tx_feature_start_nodes[] =
945 {
946   "ip4-rewrite-transit",
947   "ip4-midchain",
948 };
949
950 /* Source and port-range check ip4 tx feature path definition */
951 VNET_IP4_TX_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) = {
952   .node_name = "ip4-source-and-port-range-check-tx",
953   .runs_before = ORDER_CONSTRAINTS {"interface-output", 0},
954   .feature_index =
955   &ip4_main.ip4_unicast_tx_feature_source_and_port_range_check,
956
957 };
958
959 /* Built-in ip4 tx feature path definition */
960 VNET_IP4_TX_FEATURE_INIT (interface_output, static) = {
961   .node_name = "interface-output",
962   .runs_before = 0, /* not before any other features */
963   .feature_index = &ip4_main.ip4_tx_feature_interface_output,
964 };
965
966 static clib_error_t *
967 ip4_feature_init (vlib_main_t * vm, ip4_main_t * im)
968 {
969   ip_lookup_main_t * lm = &im->lookup_main;
970   clib_error_t * error;
971   vnet_cast_t cast;
972   ip_config_main_t * cm;
973   vnet_config_main_t * vcm;
974   char **feature_start_nodes;
975   int feature_start_len;
976
977   for (cast = 0; cast < VNET_N_IP_FEAT; cast++)
978     {
979       cm = &lm->feature_config_mains[cast];
980       vcm = &cm->config_main;
981
982       if (cast < VNET_IP_TX_FEAT)
983         {
984           feature_start_nodes = rx_feature_start_nodes;
985           feature_start_len = ARRAY_LEN(rx_feature_start_nodes);
986         }
987       else
988         {
989           feature_start_nodes = tx_feature_start_nodes;
990           feature_start_len = ARRAY_LEN(tx_feature_start_nodes);
991         }
992
993       if ((error = vnet_feature_arc_init (vm, vcm,
994                                          feature_start_nodes,
995                                          feature_start_len,
996                                          im->next_feature[cast],
997                                          &im->feature_nodes[cast])))
998         return error;
999     }
1000
1001   return 0;
1002 }
1003
1004 static clib_error_t *
1005 ip4_sw_interface_add_del (vnet_main_t * vnm,
1006                           u32 sw_if_index,
1007                           u32 is_add)
1008 {
1009   vlib_main_t * vm = vnm->vlib_main;
1010   ip4_main_t * im = &ip4_main;
1011   ip_lookup_main_t * lm = &im->lookup_main;
1012   u32 ci, cast;
1013   u32 feature_index;
1014
1015   /* Fill in lookup tables with default table (0). */
1016   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1017
1018   for (cast = 0; cast < VNET_N_IP_FEAT; cast++)
1019     {
1020       ip_config_main_t * cm = &lm->feature_config_mains[cast];
1021       vnet_config_main_t * vcm = &cm->config_main;
1022
1023       vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
1024       ci = cm->config_index_by_sw_if_index[sw_if_index];
1025
1026       if (cast == VNET_IP_RX_UNICAST_FEAT)
1027         feature_index = im->ip4_unicast_rx_feature_drop;
1028       else if (cast == VNET_IP_RX_MULTICAST_FEAT)
1029         feature_index = im->ip4_multicast_rx_feature_drop;
1030       else
1031         feature_index = im->ip4_tx_feature_interface_output;
1032
1033       if (is_add)
1034         ci = vnet_config_add_feature (vm, vcm,
1035                                       ci,
1036                                       feature_index,
1037                                       /* config data */ 0,
1038                                       /* # bytes of config data */ 0);
1039       else
1040         {
1041           ci = vnet_config_del_feature (vm, vcm, ci,
1042                                         feature_index,
1043                                         /* config data */ 0,
1044                                         /* # bytes of config data */ 0);
1045           if (vec_len(im->ip_enabled_by_sw_if_index) > sw_if_index)
1046               im->ip_enabled_by_sw_if_index[sw_if_index] = 0;
1047         }
1048       cm->config_index_by_sw_if_index[sw_if_index] = ci;
1049       /*
1050        * note: do not update the tx feature count here.
1051        */
1052     }
1053
1054   return /* no error */ 0;
1055 }
1056
1057 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1058
1059 /* Global IP4 main. */
1060 ip4_main_t ip4_main;
1061
1062 clib_error_t *
1063 ip4_lookup_init (vlib_main_t * vm)
1064 {
1065   ip4_main_t * im = &ip4_main;
1066   clib_error_t * error;
1067   uword i;
1068
1069   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1070     {
1071       u32 m;
1072
1073       if (i < 32)
1074         m = pow2_mask (i) << (32 - i);
1075       else
1076         m = ~0;
1077       im->fib_masks[i] = clib_host_to_net_u32 (m);
1078     }
1079
1080   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1081
1082   /* Create FIB with index 0 and table id of 0. */
1083   fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 0);
1084
1085   {
1086     pg_node_t * pn;
1087     pn = pg_get_node (ip4_lookup_node.index);
1088     pn->unformat_edit = unformat_pg_ip4_header;
1089   }
1090
1091   {
1092     ethernet_arp_header_t h;
1093
1094     memset (&h, 0, sizeof (h));
1095
1096     /* Set target ethernet address to all zeros. */
1097     memset (h.ip4_over_ethernet[1].ethernet, 0, sizeof (h.ip4_over_ethernet[1].ethernet));
1098
1099 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1100 #define _8(f,v) h.f = v;
1101     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1102     _16 (l3_type, ETHERNET_TYPE_IP4);
1103     _8 (n_l2_address_bytes, 6);
1104     _8 (n_l3_address_bytes, 4);
1105     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1106 #undef _16
1107 #undef _8
1108
1109     vlib_packet_template_init (vm,
1110                                &im->ip4_arp_request_packet_template,
1111                                /* data */ &h,
1112                                sizeof (h),
1113                                /* alloc chunk size */ 8,
1114                                "ip4 arp");
1115   }
1116
1117   error = ip4_feature_init (vm, im);
1118
1119   return error;
1120 }
1121
1122 VLIB_INIT_FUNCTION (ip4_lookup_init);
1123
1124 typedef struct {
1125   /* Adjacency taken. */
1126   u32 dpo_index;
1127   u32 flow_hash;
1128   u32 fib_index;
1129
1130   /* Packet data, possibly *after* rewrite. */
1131   u8 packet_data[64 - 1*sizeof(u32)];
1132 } ip4_forward_next_trace_t;
1133
1134 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args)
1135 {
1136   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1137   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1138   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1139   uword indent = format_get_indent (s);
1140   s = format (s, "%U%U",
1141               format_white_space, indent,
1142               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1143   return s;
1144 }
1145
1146 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args)
1147 {
1148   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1149   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1150   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1151   uword indent = format_get_indent (s);
1152
1153   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1154               t->fib_index, t->dpo_index, t->flow_hash);
1155   s = format (s, "\n%U%U",
1156               format_white_space, indent,
1157               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1158   return s;
1159 }
1160
1161 static u8 * format_ip4_rewrite_trace (u8 * s, va_list * args)
1162 {
1163   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1164   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1165   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1166   vnet_main_t * vnm = vnet_get_main();
1167   uword indent = format_get_indent (s);
1168
1169   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1170               t->fib_index, t->dpo_index, format_ip_adjacency,
1171               t->dpo_index, FORMAT_IP_ADJACENCY_NONE,
1172               t->flow_hash);
1173   s = format (s, "\n%U%U",
1174               format_white_space, indent,
1175               format_ip_adjacency_packet_data,
1176               vnm, t->dpo_index,
1177               t->packet_data, sizeof (t->packet_data));
1178   return s;
1179 }
1180
1181 /* Common trace function for all ip4-forward next nodes. */
1182 void
1183 ip4_forward_next_trace (vlib_main_t * vm,
1184                         vlib_node_runtime_t * node,
1185                         vlib_frame_t * frame,
1186                         vlib_rx_or_tx_t which_adj_index)
1187 {
1188   u32 * from, n_left;
1189   ip4_main_t * im = &ip4_main;
1190
1191   n_left = frame->n_vectors;
1192   from = vlib_frame_vector_args (frame);
1193
1194   while (n_left >= 4)
1195     {
1196       u32 bi0, bi1;
1197       vlib_buffer_t * b0, * b1;
1198       ip4_forward_next_trace_t * t0, * t1;
1199
1200       /* Prefetch next iteration. */
1201       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1202       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1203
1204       bi0 = from[0];
1205       bi1 = from[1];
1206
1207       b0 = vlib_get_buffer (vm, bi0);
1208       b1 = vlib_get_buffer (vm, bi1);
1209
1210       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1211         {
1212           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1213           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1214           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1215           t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1216               vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1217               vec_elt (im->fib_index_by_sw_if_index,
1218                        vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1219
1220           clib_memcpy (t0->packet_data,
1221                   vlib_buffer_get_current (b0),
1222                   sizeof (t0->packet_data));
1223         }
1224       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1225         {
1226           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1227           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1228           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1229           t1->fib_index = (vnet_buffer(b1)->sw_if_index[VLIB_TX] != (u32)~0) ?
1230               vnet_buffer(b1)->sw_if_index[VLIB_TX] :
1231               vec_elt (im->fib_index_by_sw_if_index,
1232                        vnet_buffer(b1)->sw_if_index[VLIB_RX]);
1233           clib_memcpy (t1->packet_data,
1234                   vlib_buffer_get_current (b1),
1235                   sizeof (t1->packet_data));
1236         }
1237       from += 2;
1238       n_left -= 2;
1239     }
1240
1241   while (n_left >= 1)
1242     {
1243       u32 bi0;
1244       vlib_buffer_t * b0;
1245       ip4_forward_next_trace_t * t0;
1246
1247       bi0 = from[0];
1248
1249       b0 = vlib_get_buffer (vm, bi0);
1250
1251       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1252         {
1253           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1254           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1255           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1256           t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1257               vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1258               vec_elt (im->fib_index_by_sw_if_index,
1259                        vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1260           clib_memcpy (t0->packet_data,
1261                   vlib_buffer_get_current (b0),
1262                   sizeof (t0->packet_data));
1263         }
1264       from += 1;
1265       n_left -= 1;
1266     }
1267 }
1268
1269 static uword
1270 ip4_drop_or_punt (vlib_main_t * vm,
1271                   vlib_node_runtime_t * node,
1272                   vlib_frame_t * frame,
1273                   ip4_error_t error_code)
1274 {
1275   u32 * buffers = vlib_frame_vector_args (frame);
1276   uword n_packets = frame->n_vectors;
1277
1278   vlib_error_drop_buffers (vm, node,
1279                            buffers,
1280                            /* stride */ 1,
1281                            n_packets,
1282                            /* next */ 0,
1283                            ip4_input_node.index,
1284                            error_code);
1285
1286   if (node->flags & VLIB_NODE_FLAG_TRACE)
1287     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1288
1289   return n_packets;
1290 }
1291
1292 static uword
1293 ip4_drop (vlib_main_t * vm,
1294           vlib_node_runtime_t * node,
1295           vlib_frame_t * frame)
1296 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP); }
1297
1298 static uword
1299 ip4_punt (vlib_main_t * vm,
1300           vlib_node_runtime_t * node,
1301           vlib_frame_t * frame)
1302 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); }
1303
1304 VLIB_REGISTER_NODE (ip4_drop_node,static) = {
1305   .function = ip4_drop,
1306   .name = "ip4-drop",
1307   .vector_size = sizeof (u32),
1308
1309   .format_trace = format_ip4_forward_next_trace,
1310
1311   .n_next_nodes = 1,
1312   .next_nodes = {
1313     [0] = "error-drop",
1314   },
1315 };
1316
1317 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop)
1318
1319 VLIB_REGISTER_NODE (ip4_punt_node,static) = {
1320   .function = ip4_punt,
1321   .name = "ip4-punt",
1322   .vector_size = sizeof (u32),
1323
1324   .format_trace = format_ip4_forward_next_trace,
1325
1326   .n_next_nodes = 1,
1327   .next_nodes = {
1328     [0] = "error-punt",
1329   },
1330 };
1331
1332 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt)
1333
1334 /* Compute TCP/UDP/ICMP4 checksum in software. */
1335 u16
1336 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1337                               ip4_header_t * ip0)
1338 {
1339   ip_csum_t sum0;
1340   u32 ip_header_length, payload_length_host_byte_order;
1341   u32 n_this_buffer, n_bytes_left;
1342   u16 sum16;
1343   void * data_this_buffer;
1344
1345   /* Initialize checksum with ip header. */
1346   ip_header_length = ip4_header_bytes (ip0);
1347   payload_length_host_byte_order = clib_net_to_host_u16 (ip0->length) - ip_header_length;
1348   sum0 = clib_host_to_net_u32 (payload_length_host_byte_order + (ip0->protocol << 16));
1349
1350   if (BITS (uword) == 32)
1351     {
1352       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u32));
1353       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32));
1354     }
1355   else
1356     sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1357
1358   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1359   data_this_buffer = (void *) ip0 + ip_header_length;
1360   if (n_this_buffer + ip_header_length > p0->current_length)
1361     n_this_buffer = p0->current_length > ip_header_length ? p0->current_length - ip_header_length : 0;
1362   while (1)
1363     {
1364       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1365       n_bytes_left -= n_this_buffer;
1366       if (n_bytes_left == 0)
1367         break;
1368
1369       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1370       p0 = vlib_get_buffer (vm, p0->next_buffer);
1371       data_this_buffer = vlib_buffer_get_current (p0);
1372       n_this_buffer = p0->current_length;
1373     }
1374
1375   sum16 = ~ ip_csum_fold (sum0);
1376
1377   return sum16;
1378 }
1379
1380 static u32
1381 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1382 {
1383   ip4_header_t * ip0 = vlib_buffer_get_current (p0);
1384   udp_header_t * udp0;
1385   u16 sum16;
1386
1387   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1388           || ip0->protocol == IP_PROTOCOL_UDP);
1389
1390   udp0 = (void *) (ip0 + 1);
1391   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1392     {
1393       p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1394                     | IP_BUFFER_L4_CHECKSUM_CORRECT);
1395       return p0->flags;
1396     }
1397
1398   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1399
1400   p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1401                 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1402
1403   return p0->flags;
1404 }
1405
1406 static uword
1407 ip4_local (vlib_main_t * vm,
1408            vlib_node_runtime_t * node,
1409            vlib_frame_t * frame)
1410 {
1411   ip4_main_t * im = &ip4_main;
1412   ip_lookup_main_t * lm = &im->lookup_main;
1413   ip_local_next_t next_index;
1414   u32 * from, * to_next, n_left_from, n_left_to_next;
1415   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
1416
1417   from = vlib_frame_vector_args (frame);
1418   n_left_from = frame->n_vectors;
1419   next_index = node->cached_next_index;
1420
1421   if (node->flags & VLIB_NODE_FLAG_TRACE)
1422     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1423
1424   while (n_left_from > 0)
1425     {
1426       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1427
1428       while (n_left_from >= 4 && n_left_to_next >= 2)
1429         {
1430           vlib_buffer_t * p0, * p1;
1431           ip4_header_t * ip0, * ip1;
1432           udp_header_t * udp0, * udp1;
1433           ip4_fib_mtrie_t * mtrie0, * mtrie1;
1434           ip4_fib_mtrie_leaf_t leaf0, leaf1;
1435           const dpo_id_t *dpo0, *dpo1;
1436           const load_balance_t *lb0, *lb1;
1437           u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, lbi0;
1438           u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, lbi1;
1439           i32 len_diff0, len_diff1;
1440           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1441           u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1442           u8 enqueue_code;
1443
1444           pi0 = to_next[0] = from[0];
1445           pi1 = to_next[1] = from[1];
1446           from += 2;
1447           n_left_from -= 2;
1448           to_next += 2;
1449           n_left_to_next -= 2;
1450
1451           p0 = vlib_get_buffer (vm, pi0);
1452           p1 = vlib_get_buffer (vm, pi1);
1453
1454           ip0 = vlib_buffer_get_current (p0);
1455           ip1 = vlib_buffer_get_current (p1);
1456
1457           fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
1458                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1459           fib_index1 = vec_elt (im->fib_index_by_sw_if_index,
1460                                 vnet_buffer(p1)->sw_if_index[VLIB_RX]);
1461
1462           mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1463           mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
1464
1465           leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
1466
1467           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1468           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
1469
1470           /* Treat IP frag packets as "experimental" protocol for now
1471              until support of IP frag reassembly is implemented */
1472           proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
1473           proto1 = ip4_is_fragment(ip1) ? 0xfe : ip1->protocol;
1474           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1475           is_udp1 = proto1 == IP_PROTOCOL_UDP;
1476           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1477           is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1478
1479           flags0 = p0->flags;
1480           flags1 = p1->flags;
1481
1482           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1483           good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1484
1485           udp0 = ip4_next_header (ip0);
1486           udp1 = ip4_next_header (ip1);
1487
1488           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1489           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1490           good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1491
1492           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1493           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
1494
1495           /* Verify UDP length. */
1496           ip_len0 = clib_net_to_host_u16 (ip0->length);
1497           ip_len1 = clib_net_to_host_u16 (ip1->length);
1498           udp_len0 = clib_net_to_host_u16 (udp0->length);
1499           udp_len1 = clib_net_to_host_u16 (udp1->length);
1500
1501           len_diff0 = ip_len0 - udp_len0;
1502           len_diff1 = ip_len1 - udp_len1;
1503
1504           len_diff0 = is_udp0 ? len_diff0 : 0;
1505           len_diff1 = is_udp1 ? len_diff1 : 0;
1506
1507           if (PREDICT_FALSE (! (is_tcp_udp0 & is_tcp_udp1
1508                                 & good_tcp_udp0 & good_tcp_udp1)))
1509             {
1510               if (is_tcp_udp0)
1511                 {
1512                   if (is_tcp_udp0
1513                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1514                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1515                   good_tcp_udp0 =
1516                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1517                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1518                 }
1519               if (is_tcp_udp1)
1520                 {
1521                   if (is_tcp_udp1
1522                       && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1523                     flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
1524                   good_tcp_udp1 =
1525                     (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1526                   good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1527                 }
1528             }
1529
1530           good_tcp_udp0 &= len_diff0 >= 0;
1531           good_tcp_udp1 &= len_diff1 >= 0;
1532
1533           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1534           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
1535
1536           error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1537
1538           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1539           error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
1540
1541           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1542           error0 = (is_tcp_udp0 && ! good_tcp_udp0
1543                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1544                     : error0);
1545           error1 = (is_tcp_udp1 && ! good_tcp_udp1
1546                     ? IP4_ERROR_TCP_CHECKSUM + is_udp1
1547                     : error1);
1548
1549           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1550           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
1551           leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1552           leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
1553
1554           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1555           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1556
1557           vnet_buffer (p1)->ip.adj_index[VLIB_RX] = lbi1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1558           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = lbi1;
1559
1560           lb0 = load_balance_get(lbi0);
1561           lb1 = load_balance_get(lbi1);
1562           dpo0 = load_balance_get_bucket_i(lb0, 0);
1563           dpo1 = load_balance_get_bucket_i(lb1, 0);
1564
1565           /*
1566            * Must have a route to source otherwise we drop the packet.
1567            * ip4 broadcasts are accepted, e.g. to make dhcp client work
1568            *
1569            * The checks are:
1570            *  - the source is a recieve => it's from us => bogus, do this
1571            *    first since it sets a different error code.
1572            *  - uRPF check for any route to source - accept if passes.
1573            *  - allow packets destined to the broadcast address from unknown sources
1574            */
1575           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1576                      dpo0->dpoi_type == DPO_RECEIVE) ?
1577                     IP4_ERROR_SPOOFED_LOCAL_PACKETS :
1578                     error0);
1579           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1580                      !fib_urpf_check_size(lb0->lb_urpf) &&
1581                      ip0->dst_address.as_u32 != 0xFFFFFFFF)
1582                     ? IP4_ERROR_SRC_LOOKUP_MISS
1583                     : error0);
1584           error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1585                      dpo1->dpoi_type == DPO_RECEIVE) ?
1586                     IP4_ERROR_SPOOFED_LOCAL_PACKETS :
1587                     error1);
1588           error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1589                      !fib_urpf_check_size(lb1->lb_urpf) &&
1590                      ip1->dst_address.as_u32 != 0xFFFFFFFF)
1591                     ? IP4_ERROR_SRC_LOOKUP_MISS
1592                     : error1);
1593
1594           next0 = lm->local_next_by_ip_protocol[proto0];
1595           next1 = lm->local_next_by_ip_protocol[proto1];
1596
1597           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1598           next1 = error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1599
1600           p0->error = error0 ? error_node->errors[error0] : 0;
1601           p1->error = error1 ? error_node->errors[error1] : 0;
1602
1603           enqueue_code = (next0 != next_index) + 2*(next1 != next_index);
1604
1605           if (PREDICT_FALSE (enqueue_code != 0))
1606             {
1607               switch (enqueue_code)
1608                 {
1609                 case 1:
1610                   /* A B A */
1611                   to_next[-2] = pi1;
1612                   to_next -= 1;
1613                   n_left_to_next += 1;
1614                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
1615                   break;
1616
1617                 case 2:
1618                   /* A A B */
1619                   to_next -= 1;
1620                   n_left_to_next += 1;
1621                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
1622                   break;
1623
1624                 case 3:
1625                   /* A B B or A B C */
1626                   to_next -= 2;
1627                   n_left_to_next += 2;
1628                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
1629                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
1630                   if (next0 == next1)
1631                     {
1632                       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1633                       next_index = next1;
1634                       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1635                     }
1636                   break;
1637                 }
1638             }
1639         }
1640
1641       while (n_left_from > 0 && n_left_to_next > 0)
1642         {
1643           vlib_buffer_t * p0;
1644           ip4_header_t * ip0;
1645           udp_header_t * udp0;
1646           ip4_fib_mtrie_t * mtrie0;
1647           ip4_fib_mtrie_leaf_t leaf0;
1648           u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, lbi0;
1649           i32 len_diff0;
1650           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1651           load_balance_t *lb0;
1652           const dpo_id_t *dpo0;
1653
1654           pi0 = to_next[0] = from[0];
1655           from += 1;
1656           n_left_from -= 1;
1657           to_next += 1;
1658           n_left_to_next -= 1;
1659
1660           p0 = vlib_get_buffer (vm, pi0);
1661
1662           ip0 = vlib_buffer_get_current (p0);
1663
1664           fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
1665                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1666
1667           mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1668
1669           leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
1670
1671           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1672
1673           /* Treat IP frag packets as "experimental" protocol for now
1674              until support of IP frag reassembly is implemented */
1675           proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
1676           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1677           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1678
1679           flags0 = p0->flags;
1680
1681           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1682
1683           udp0 = ip4_next_header (ip0);
1684
1685           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1686           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1687
1688           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1689
1690           /* Verify UDP length. */
1691           ip_len0 = clib_net_to_host_u16 (ip0->length);
1692           udp_len0 = clib_net_to_host_u16 (udp0->length);
1693
1694           len_diff0 = ip_len0 - udp_len0;
1695
1696           len_diff0 = is_udp0 ? len_diff0 : 0;
1697
1698           if (PREDICT_FALSE (! (is_tcp_udp0 & good_tcp_udp0)))
1699             {
1700               if (is_tcp_udp0)
1701                 {
1702                   if (is_tcp_udp0
1703                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1704                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1705                   good_tcp_udp0 =
1706                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1707                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1708                 }
1709             }
1710
1711           good_tcp_udp0 &= len_diff0 >= 0;
1712
1713           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1714
1715           error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1716
1717           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1718
1719           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1720           error0 = (is_tcp_udp0 && ! good_tcp_udp0
1721                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1722                     : error0);
1723
1724           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1725           leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1726
1727           lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1728           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1729
1730           lb0 = load_balance_get(lbi0);
1731           dpo0 = load_balance_get_bucket_i(lb0, 0);
1732
1733           vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
1734               vnet_buffer (p0)->ip.adj_index[VLIB_RX] =
1735                   dpo0->dpoi_index;
1736
1737           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1738                      dpo0->dpoi_type == DPO_RECEIVE) ?
1739                     IP4_ERROR_SPOOFED_LOCAL_PACKETS :
1740                     error0);
1741           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1742                      !fib_urpf_check_size(lb0->lb_urpf) &&
1743                      ip0->dst_address.as_u32 != 0xFFFFFFFF)
1744                     ? IP4_ERROR_SRC_LOOKUP_MISS
1745                     : error0);
1746
1747           next0 = lm->local_next_by_ip_protocol[proto0];
1748
1749           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1750
1751           p0->error = error0? error_node->errors[error0] : 0;
1752
1753           if (PREDICT_FALSE (next0 != next_index))
1754             {
1755               n_left_to_next += 1;
1756               vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1757
1758               next_index = next0;
1759               vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1760               to_next[0] = pi0;
1761               to_next += 1;
1762               n_left_to_next -= 1;
1763             }
1764         }
1765
1766       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1767     }
1768
1769   return frame->n_vectors;
1770 }
1771
1772 VLIB_REGISTER_NODE (ip4_local_node,static) = {
1773   .function = ip4_local,
1774   .name = "ip4-local",
1775   .vector_size = sizeof (u32),
1776
1777   .format_trace = format_ip4_forward_next_trace,
1778
1779   .n_next_nodes = IP_LOCAL_N_NEXT,
1780   .next_nodes = {
1781     [IP_LOCAL_NEXT_DROP] = "error-drop",
1782     [IP_LOCAL_NEXT_PUNT] = "error-punt",
1783     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1784     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1785   },
1786 };
1787
1788 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local)
1789
1790 void ip4_register_protocol (u32 protocol, u32 node_index)
1791 {
1792   vlib_main_t * vm = vlib_get_main();
1793   ip4_main_t * im = &ip4_main;
1794   ip_lookup_main_t * lm = &im->lookup_main;
1795
1796   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1797   lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip4_local_node.index, node_index);
1798 }
1799
1800 static clib_error_t *
1801 show_ip_local_command_fn (vlib_main_t * vm,
1802                           unformat_input_t * input,
1803                          vlib_cli_command_t * cmd)
1804 {
1805   ip4_main_t * im = &ip4_main;
1806   ip_lookup_main_t * lm = &im->lookup_main;
1807   int i;
1808
1809   vlib_cli_output (vm, "Protocols handled by ip4_local");
1810   for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
1811     {
1812       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1813         vlib_cli_output (vm, "%d", i);
1814     }
1815   return 0;
1816 }
1817
1818
1819
1820 /*?
1821  * Display the set of protocols handled by the local IPv4 stack.
1822  *
1823  * @cliexpar
1824  * Example of how to display local protocol table:
1825  * @cliexstart{show ip local}
1826  * Protocols handled by ip4_local
1827  * 1
1828  * 17
1829  * 47
1830  * @cliexend
1831 ?*/
1832 /* *INDENT-OFF* */
1833 VLIB_CLI_COMMAND (show_ip_local, static) = {
1834   .path = "show ip local",
1835   .function = show_ip_local_command_fn,
1836   .short_help = "show ip local",
1837 };
1838 /* *INDENT-ON* */
1839
1840 always_inline uword
1841 ip4_arp_inline (vlib_main_t * vm,
1842                 vlib_node_runtime_t * node,
1843                 vlib_frame_t * frame,
1844                 int is_glean)
1845 {
1846   vnet_main_t * vnm = vnet_get_main();
1847   ip4_main_t * im = &ip4_main;
1848   ip_lookup_main_t * lm = &im->lookup_main;
1849   u32 * from, * to_next_drop;
1850   uword n_left_from, n_left_to_next_drop, next_index;
1851   static f64 time_last_seed_change = -1e100;
1852   static u32 hash_seeds[3];
1853   static uword hash_bitmap[256 / BITS (uword)];
1854   f64 time_now;
1855
1856   if (node->flags & VLIB_NODE_FLAG_TRACE)
1857     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1858
1859   time_now = vlib_time_now (vm);
1860   if (time_now - time_last_seed_change > 1e-3)
1861     {
1862       uword i;
1863       u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
1864                                              sizeof (hash_seeds));
1865       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
1866         hash_seeds[i] = r[i];
1867
1868       /* Mark all hash keys as been no-seen before. */
1869       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
1870         hash_bitmap[i] = 0;
1871
1872       time_last_seed_change = time_now;
1873     }
1874
1875   from = vlib_frame_vector_args (frame);
1876   n_left_from = frame->n_vectors;
1877   next_index = node->cached_next_index;
1878   if (next_index == IP4_ARP_NEXT_DROP)
1879     next_index = IP4_ARP_N_NEXT; /* point to first interface */
1880
1881   while (n_left_from > 0)
1882     {
1883       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1884                            to_next_drop, n_left_to_next_drop);
1885
1886       while (n_left_from > 0 && n_left_to_next_drop > 0)
1887         {
1888           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
1889           ip_adjacency_t * adj0;
1890           vlib_buffer_t * p0;
1891           ip4_header_t * ip0;
1892           uword bm0;
1893
1894           pi0 = from[0];
1895
1896           p0 = vlib_get_buffer (vm, pi0);
1897
1898           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1899           adj0 = ip_get_adjacency (lm, adj_index0);
1900           ip0 = vlib_buffer_get_current (p0);
1901
1902           a0 = hash_seeds[0];
1903           b0 = hash_seeds[1];
1904           c0 = hash_seeds[2];
1905
1906           sw_if_index0 = adj0->rewrite_header.sw_if_index;
1907           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1908
1909           if (is_glean)
1910           {
1911               /*
1912                * this is the Glean case, so we are ARPing for the
1913                * packet's destination
1914                */
1915               a0 ^= ip0->dst_address.data_u32;
1916           }
1917           else
1918           {
1919               a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32;
1920           }
1921           b0 ^= sw_if_index0;
1922
1923           hash_v3_finalize32 (a0, b0, c0);
1924
1925           c0 &= BITS (hash_bitmap) - 1;
1926           c0 = c0 / BITS (uword);
1927           m0 = (uword) 1 << (c0 % BITS (uword));
1928
1929           bm0 = hash_bitmap[c0];
1930           drop0 = (bm0 & m0) != 0;
1931
1932           /* Mark it as seen. */
1933           hash_bitmap[c0] = bm0 | m0;
1934
1935           from += 1;
1936           n_left_from -= 1;
1937           to_next_drop[0] = pi0;
1938           to_next_drop += 1;
1939           n_left_to_next_drop -= 1;
1940
1941           p0->error = node->errors[drop0 ? IP4_ARP_ERROR_DROP : IP4_ARP_ERROR_REQUEST_SENT];
1942
1943           /*
1944            * the adj has been updated to a rewrite but the node the DPO that got
1945            * us here hasn't - yet. no big deal. we'll drop while we wait.
1946            */
1947           if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
1948             continue;
1949
1950           if (drop0)
1951             continue;
1952
1953           /*
1954            * Can happen if the control-plane is programming tables
1955            * with traffic flowing; at least that's today's lame excuse.
1956            */
1957           if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN) ||
1958               (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
1959           {
1960             p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
1961           }
1962           else
1963           /* Send ARP request. */
1964           {
1965             u32 bi0 = 0;
1966             vlib_buffer_t * b0;
1967             ethernet_arp_header_t * h0;
1968             vnet_hw_interface_t * hw_if0;
1969
1970             h0 = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi0);
1971
1972             /* Add rewrite/encap string for ARP packet. */
1973             vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
1974
1975             hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1976
1977             /* Src ethernet address in ARP header. */
1978             clib_memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address,
1979                     sizeof (h0->ip4_over_ethernet[0].ethernet));
1980
1981             if (is_glean)
1982             {
1983                 /* The interface's source address is stashed in the Glean Adj */
1984                 h0->ip4_over_ethernet[0].ip4 = adj0->sub_type.glean.receive_addr.ip4;
1985
1986                 /* Copy in destination address we are requesting. This is the
1987                 * glean case, so it's the packet's destination.*/
1988                 h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32;
1989             }
1990             else
1991             {
1992                 /* Src IP address in ARP header. */
1993                 if (ip4_src_address_for_packet(lm, sw_if_index0,
1994                                                &h0->ip4_over_ethernet[0].ip4))
1995                 {
1996                     /* No source address available */
1997                     p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
1998                     vlib_buffer_free(vm, &bi0, 1);
1999                     continue;
2000                 }
2001
2002                 /* Copy in destination address we are requesting from the
2003                    incomplete adj */
2004                 h0->ip4_over_ethernet[1].ip4.data_u32 =
2005                     adj0->sub_type.nbr.next_hop.ip4.as_u32;
2006             }
2007
2008             vlib_buffer_copy_trace_flag (vm, p0, bi0);
2009             b0 = vlib_get_buffer (vm, bi0);
2010             vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2011
2012             vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2013
2014             vlib_set_next_frame_buffer (vm, node, adj0->rewrite_header.next_index, bi0);
2015           }
2016         }
2017
2018       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2019     }
2020
2021   return frame->n_vectors;
2022 }
2023
2024 static uword
2025 ip4_arp (vlib_main_t * vm,
2026          vlib_node_runtime_t * node,
2027          vlib_frame_t * frame)
2028 {
2029     return (ip4_arp_inline(vm, node, frame, 0));
2030 }
2031
2032 static uword
2033 ip4_glean (vlib_main_t * vm,
2034            vlib_node_runtime_t * node,
2035            vlib_frame_t * frame)
2036 {
2037     return (ip4_arp_inline(vm, node, frame, 1));
2038 }
2039
2040 static char * ip4_arp_error_strings[] = {
2041   [IP4_ARP_ERROR_DROP] = "address overflow drops",
2042   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2043   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2044   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2045   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2046   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
2047 };
2048
2049 VLIB_REGISTER_NODE (ip4_arp_node) = {
2050   .function = ip4_arp,
2051   .name = "ip4-arp",
2052   .vector_size = sizeof (u32),
2053
2054   .format_trace = format_ip4_forward_next_trace,
2055
2056   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2057   .error_strings = ip4_arp_error_strings,
2058
2059   .n_next_nodes = IP4_ARP_N_NEXT,
2060   .next_nodes = {
2061     [IP4_ARP_NEXT_DROP] = "error-drop",
2062   },
2063 };
2064
2065 VLIB_REGISTER_NODE (ip4_glean_node) = {
2066   .function = ip4_glean,
2067   .name = "ip4-glean",
2068   .vector_size = sizeof (u32),
2069
2070   .format_trace = format_ip4_forward_next_trace,
2071
2072   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2073   .error_strings = ip4_arp_error_strings,
2074
2075   .n_next_nodes = IP4_ARP_N_NEXT,
2076   .next_nodes = {
2077     [IP4_ARP_NEXT_DROP] = "error-drop",
2078   },
2079 };
2080
2081 #define foreach_notrace_ip4_arp_error           \
2082 _(DROP)                                         \
2083 _(REQUEST_SENT)                                 \
2084 _(REPLICATE_DROP)                               \
2085 _(REPLICATE_FAIL)
2086
2087 clib_error_t * arp_notrace_init (vlib_main_t * vm)
2088 {
2089   vlib_node_runtime_t *rt =
2090     vlib_node_get_runtime (vm, ip4_arp_node.index);
2091
2092   /* don't trace ARP request packets */
2093 #define _(a)                                    \
2094     vnet_pcap_drop_trace_filter_add_del         \
2095         (rt->errors[IP4_ARP_ERROR_##a],         \
2096          1 /* is_add */);
2097     foreach_notrace_ip4_arp_error;
2098 #undef _
2099   return 0;
2100 }
2101
2102 VLIB_INIT_FUNCTION(arp_notrace_init);
2103
2104
2105 /* Send an ARP request to see if given destination is reachable on given interface. */
2106 clib_error_t *
2107 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2108 {
2109   vnet_main_t * vnm = vnet_get_main();
2110   ip4_main_t * im = &ip4_main;
2111   ethernet_arp_header_t * h;
2112   ip4_address_t * src;
2113   ip_interface_address_t * ia;
2114   ip_adjacency_t * adj;
2115   vnet_hw_interface_t * hi;
2116   vnet_sw_interface_t * si;
2117   vlib_buffer_t * b;
2118   u32 bi = 0;
2119
2120   si = vnet_get_sw_interface (vnm, sw_if_index);
2121
2122   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2123     {
2124       return clib_error_return (0, "%U: interface %U down",
2125                                 format_ip4_address, dst,
2126                                 format_vnet_sw_if_index_name, vnm,
2127                                 sw_if_index);
2128     }
2129
2130   src = ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2131   if (! src)
2132     {
2133       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2134       return clib_error_return
2135         (0, "no matching interface address for destination %U (interface %U)",
2136          format_ip4_address, dst,
2137          format_vnet_sw_if_index_name, vnm, sw_if_index);
2138     }
2139
2140   adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2141
2142   h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi);
2143
2144   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2145
2146   clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
2147
2148   h->ip4_over_ethernet[0].ip4 = src[0];
2149   h->ip4_over_ethernet[1].ip4 = dst[0];
2150
2151   b = vlib_get_buffer (vm, bi);
2152   vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2153
2154   /* Add encapsulation string for software interface (e.g. ethernet header). */
2155   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2156   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2157
2158   {
2159     vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
2160     u32 * to_next = vlib_frame_vector_args (f);
2161     to_next[0] = bi;
2162     f->n_vectors = 1;
2163     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2164   }
2165
2166   return /* no error */ 0;
2167 }
2168
2169 typedef enum {
2170   IP4_REWRITE_NEXT_DROP,
2171   IP4_REWRITE_NEXT_ARP,
2172   IP4_REWRITE_NEXT_ICMP_ERROR,
2173 } ip4_rewrite_next_t;
2174
2175 always_inline uword
2176 ip4_rewrite_inline (vlib_main_t * vm,
2177                     vlib_node_runtime_t * node,
2178                     vlib_frame_t * frame,
2179                     int rewrite_for_locally_received_packets,
2180                     int is_midchain)
2181 {
2182   ip_lookup_main_t * lm = &ip4_main.lookup_main;
2183   u32 * from = vlib_frame_vector_args (frame);
2184   u32 n_left_from, n_left_to_next, * to_next, next_index;
2185   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
2186   vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
2187   ip_config_main_t * cm = &lm->feature_config_mains[VNET_IP_TX_FEAT];
2188
2189   n_left_from = frame->n_vectors;
2190   next_index = node->cached_next_index;
2191   u32 cpu_index = os_get_cpu_number();
2192
2193   while (n_left_from > 0)
2194     {
2195       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2196
2197       while (n_left_from >= 4 && n_left_to_next >= 2)
2198         {
2199           ip_adjacency_t * adj0, * adj1;
2200           vlib_buffer_t * p0, * p1;
2201           ip4_header_t * ip0, * ip1;
2202           u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2203           u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2204           u32 next0_override, next1_override;
2205           u32 tx_sw_if_index0, tx_sw_if_index1;
2206
2207           if (rewrite_for_locally_received_packets)
2208               next0_override = next1_override = 0;
2209
2210           /* Prefetch next iteration. */
2211           {
2212             vlib_buffer_t * p2, * p3;
2213
2214             p2 = vlib_get_buffer (vm, from[2]);
2215             p3 = vlib_get_buffer (vm, from[3]);
2216
2217             vlib_prefetch_buffer_header (p2, STORE);
2218             vlib_prefetch_buffer_header (p3, STORE);
2219
2220             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2221             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2222           }
2223
2224           pi0 = to_next[0] = from[0];
2225           pi1 = to_next[1] = from[1];
2226
2227           from += 2;
2228           n_left_from -= 2;
2229           to_next += 2;
2230           n_left_to_next -= 2;
2231
2232           p0 = vlib_get_buffer (vm, pi0);
2233           p1 = vlib_get_buffer (vm, pi1);
2234
2235           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2236           adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
2237
2238           /* We should never rewrite a pkt using the MISS adjacency */
2239           ASSERT(adj_index0 && adj_index1);
2240
2241           ip0 = vlib_buffer_get_current (p0);
2242           ip1 = vlib_buffer_get_current (p1);
2243
2244           error0 = error1 = IP4_ERROR_NONE;
2245           next0 = next1 = IP4_REWRITE_NEXT_DROP;
2246
2247           /* Decrement TTL & update checksum.
2248              Works either endian, so no need for byte swap. */
2249           if (! rewrite_for_locally_received_packets)
2250             {
2251               i32 ttl0 = ip0->ttl, ttl1 = ip1->ttl;
2252
2253               /* Input node should have reject packets with ttl 0. */
2254               ASSERT (ip0->ttl > 0);
2255               ASSERT (ip1->ttl > 0);
2256
2257               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2258               checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2259
2260               checksum0 += checksum0 >= 0xffff;
2261               checksum1 += checksum1 >= 0xffff;
2262
2263               ip0->checksum = checksum0;
2264               ip1->checksum = checksum1;
2265
2266               ttl0 -= 1;
2267               ttl1 -= 1;
2268
2269               ip0->ttl = ttl0;
2270               ip1->ttl = ttl1;
2271
2272               /*
2273                * If the ttl drops below 1 when forwarding, generate
2274                * an ICMP response.
2275                */
2276               if (PREDICT_FALSE(ttl0 <= 0))
2277                 {
2278                   error0 = IP4_ERROR_TIME_EXPIRED;
2279                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2280                   icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2281                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2282                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2283                 }
2284               if (PREDICT_FALSE(ttl1 <= 0))
2285                 {
2286                   error1 = IP4_ERROR_TIME_EXPIRED;
2287                   vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32)~0;
2288                   icmp4_error_set_vnet_buffer(p1, ICMP4_time_exceeded,
2289                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2290                   next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2291                 }
2292
2293               /* Verify checksum. */
2294               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2295               ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2296             }
2297
2298           /* Rewrite packet header and updates lengths. */
2299           adj0 = ip_get_adjacency (lm, adj_index0);
2300           adj1 = ip_get_adjacency (lm, adj_index1);
2301
2302           if (rewrite_for_locally_received_packets)
2303             {
2304               if (PREDICT_FALSE(adj0->lookup_next_index
2305                                 == IP_LOOKUP_NEXT_ARP))
2306                 next0_override = IP4_REWRITE_NEXT_ARP;
2307               if (PREDICT_FALSE(adj1->lookup_next_index
2308                                 == IP_LOOKUP_NEXT_ARP))
2309                 next1_override = IP4_REWRITE_NEXT_ARP;
2310             }
2311
2312           /* Worth pipelining. No guarantee that adj0,1 are hot... */
2313           rw_len0 = adj0[0].rewrite_header.data_bytes;
2314           rw_len1 = adj1[0].rewrite_header.data_bytes;
2315           vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
2316           vnet_buffer(p1)->ip.save_rewrite_length = rw_len1;
2317
2318           /* Check MTU of outgoing interface. */
2319           error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2320                     ? IP4_ERROR_MTU_EXCEEDED
2321                     : error0);
2322           error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
2323                     ? IP4_ERROR_MTU_EXCEEDED
2324                     : error1);
2325
2326           next0 = (error0 == IP4_ERROR_NONE)
2327             ? adj0[0].rewrite_header.next_index : next0;
2328
2329           if (rewrite_for_locally_received_packets)
2330               next0 = next0 && next0_override ? next0_override : next0;
2331
2332           next1 = (error1 == IP4_ERROR_NONE)
2333             ? adj1[0].rewrite_header.next_index : next1;
2334
2335           if (rewrite_for_locally_received_packets)
2336               next1 = next1 && next1_override ? next1_override : next1;
2337
2338           /*
2339            * We've already accounted for an ethernet_header_t elsewhere
2340            */
2341           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2342               vlib_increment_combined_counter
2343                   (&adjacency_counters,
2344                    cpu_index, adj_index0,
2345                    /* packet increment */ 0,
2346                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2347
2348           if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t)))
2349               vlib_increment_combined_counter
2350                   (&adjacency_counters,
2351                    cpu_index, adj_index1,
2352                    /* packet increment */ 0,
2353                    /* byte increment */ rw_len1-sizeof(ethernet_header_t));
2354
2355           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2356            * to see the IP headerr */
2357           if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2358             {
2359               p0->current_data -= rw_len0;
2360               p0->current_length += rw_len0;
2361               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2362               vnet_buffer (p0)->sw_if_index[VLIB_TX] =
2363                   tx_sw_if_index0;
2364
2365               if (PREDICT_FALSE
2366                   (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features,
2367                                     tx_sw_if_index0)))
2368                 {
2369                   p0->current_config_index =
2370                     vec_elt (cm->config_index_by_sw_if_index,
2371                              tx_sw_if_index0);
2372                   vnet_get_config_data (&cm->config_main,
2373                                         &p0->current_config_index,
2374                                         &next0,
2375                                         /* # bytes of config data */ 0);
2376                 }
2377             }
2378           if (PREDICT_TRUE(error1 == IP4_ERROR_NONE))
2379             {
2380               p1->current_data -= rw_len1;
2381               p1->current_length += rw_len1;
2382
2383               tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2384               vnet_buffer (p1)->sw_if_index[VLIB_TX] =
2385                   tx_sw_if_index1;
2386
2387               if (PREDICT_FALSE
2388                   (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features,
2389                                     tx_sw_if_index1)))
2390                 {
2391                   p1->current_config_index =
2392                     vec_elt (cm->config_index_by_sw_if_index,
2393                              tx_sw_if_index1);
2394                   vnet_get_config_data (&cm->config_main,
2395                                         &p1->current_config_index,
2396                                         &next1,
2397                                         /* # bytes of config data */ 0);
2398                 }
2399             }
2400
2401           /* Guess we are only writing on simple Ethernet header. */
2402           vnet_rewrite_two_headers (adj0[0], adj1[0],
2403                                     ip0, ip1,
2404                                     sizeof (ethernet_header_t));
2405
2406           if (is_midchain)
2407           {
2408               adj0->sub_type.midchain.fixup_func(vm, adj0, p0);
2409               adj1->sub_type.midchain.fixup_func(vm, adj1, p1);
2410           }
2411
2412           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2413                                            to_next, n_left_to_next,
2414                                            pi0, pi1, next0, next1);
2415         }
2416
2417       while (n_left_from > 0 && n_left_to_next > 0)
2418         {
2419           ip_adjacency_t * adj0;
2420           vlib_buffer_t * p0;
2421           ip4_header_t * ip0;
2422           u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2423           u32 next0_override;
2424           u32 tx_sw_if_index0;
2425
2426           if (rewrite_for_locally_received_packets)
2427               next0_override = 0;
2428
2429           pi0 = to_next[0] = from[0];
2430
2431           p0 = vlib_get_buffer (vm, pi0);
2432
2433           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2434
2435           /* We should never rewrite a pkt using the MISS adjacency */
2436           ASSERT(adj_index0);
2437
2438           adj0 = ip_get_adjacency (lm, adj_index0);
2439
2440           ip0 = vlib_buffer_get_current (p0);
2441
2442           error0 = IP4_ERROR_NONE;
2443           next0 = IP4_REWRITE_NEXT_DROP;            /* drop on error */
2444
2445           /* Decrement TTL & update checksum. */
2446           if (! rewrite_for_locally_received_packets)
2447             {
2448               i32 ttl0 = ip0->ttl;
2449
2450               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2451
2452               checksum0 += checksum0 >= 0xffff;
2453
2454               ip0->checksum = checksum0;
2455
2456               ASSERT (ip0->ttl > 0);
2457
2458               ttl0 -= 1;
2459
2460               ip0->ttl = ttl0;
2461
2462               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2463
2464               if (PREDICT_FALSE(ttl0 <= 0))
2465                 {
2466                   /*
2467                    * If the ttl drops below 1 when forwarding, generate
2468                    * an ICMP response.
2469                    */
2470                   error0 = IP4_ERROR_TIME_EXPIRED;
2471                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2472                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2473                   icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2474                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2475                 }
2476             }
2477
2478           if (rewrite_for_locally_received_packets)
2479             {
2480               /*
2481                * We have to override the next_index in ARP adjacencies,
2482                * because they're set up for ip4-arp, not this node...
2483                */
2484               if (PREDICT_FALSE(adj0->lookup_next_index
2485                                 == IP_LOOKUP_NEXT_ARP))
2486                 next0_override = IP4_REWRITE_NEXT_ARP;
2487             }
2488
2489           /* Guess we are only writing on simple Ethernet header. */
2490           vnet_rewrite_one_header (adj0[0], ip0,
2491                                    sizeof (ethernet_header_t));
2492
2493           /* Update packet buffer attributes/set output interface. */
2494           rw_len0 = adj0[0].rewrite_header.data_bytes;
2495           vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
2496
2497           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2498               vlib_increment_combined_counter
2499                   (&adjacency_counters,
2500                    cpu_index, adj_index0,
2501                    /* packet increment */ 0,
2502                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2503
2504           /* Check MTU of outgoing interface. */
2505           error0 = (vlib_buffer_length_in_chain (vm, p0)
2506                     > adj0[0].rewrite_header.max_l3_packet_bytes
2507                     ? IP4_ERROR_MTU_EXCEEDED
2508                     : error0);
2509
2510           p0->error = error_node->errors[error0];
2511
2512           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2513            * to see the IP headerr */
2514           if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2515             {
2516               p0->current_data -= rw_len0;
2517               p0->current_length += rw_len0;
2518               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2519
2520               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2521               next0 = adj0[0].rewrite_header.next_index;
2522
2523               if (is_midchain)
2524                 {
2525                   adj0->sub_type.midchain.fixup_func(vm, adj0, p0);
2526                 }
2527
2528               if (PREDICT_FALSE
2529                   (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features,
2530                                     tx_sw_if_index0)))
2531                   {
2532                     p0->current_config_index =
2533                       vec_elt (cm->config_index_by_sw_if_index,
2534                                tx_sw_if_index0);
2535                     vnet_get_config_data (&cm->config_main,
2536                                           &p0->current_config_index,
2537                                           &next0,
2538                                           /* # bytes of config data */ 0);
2539                   }
2540             }
2541
2542           if (rewrite_for_locally_received_packets)
2543               next0 = next0 && next0_override ? next0_override : next0;
2544
2545           from += 1;
2546           n_left_from -= 1;
2547           to_next += 1;
2548           n_left_to_next -= 1;
2549
2550           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2551                                            to_next, n_left_to_next,
2552                                            pi0, next0);
2553         }
2554
2555       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2556     }
2557
2558   /* Need to do trace after rewrites to pick up new packet data. */
2559   if (node->flags & VLIB_NODE_FLAG_TRACE)
2560     ip4_forward_next_trace (vm, node, frame, adj_rx_tx);
2561
2562   return frame->n_vectors;
2563 }
2564
2565
2566 /** @brief IPv4 transit rewrite node.
2567     @node ip4-rewrite-transit
2568
2569     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2570     header checksum, fetch the ip adjacency, check the outbound mtu,
2571     apply the adjacency rewrite, and send pkts to the adjacency
2572     rewrite header's rewrite_next_index.
2573
2574     @param vm vlib_main_t corresponding to the current thread
2575     @param node vlib_node_runtime_t
2576     @param frame vlib_frame_t whose contents should be dispatched
2577
2578     @par Graph mechanics: buffer metadata, next index usage
2579
2580     @em Uses:
2581     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2582         - the rewrite adjacency index
2583     - <code>adj->lookup_next_index</code>
2584         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2585           the packet will be dropped.
2586     - <code>adj->rewrite_header</code>
2587         - Rewrite string length, rewrite string, next_index
2588
2589     @em Sets:
2590     - <code>b->current_data, b->current_length</code>
2591         - Updated net of applying the rewrite string
2592
2593     <em>Next Indices:</em>
2594     - <code> adj->rewrite_header.next_index </code>
2595       or @c error-drop
2596 */
2597 static uword
2598 ip4_rewrite_transit (vlib_main_t * vm,
2599                      vlib_node_runtime_t * node,
2600                      vlib_frame_t * frame)
2601 {
2602   return ip4_rewrite_inline (vm, node, frame,
2603                              /* rewrite_for_locally_received_packets */ 0, 0);
2604 }
2605
2606 /** @brief IPv4 local rewrite node.
2607     @node ip4-rewrite-local
2608
2609     This is the IPv4 local rewrite node. Fetch the ip adjacency, check
2610     the outbound interface mtu, apply the adjacency rewrite, and send
2611     pkts to the adjacency rewrite header's rewrite_next_index. Deal
2612     with hemorrhoids of the form "some clown sends an icmp4 w/ src =
2613     dst = interface addr."
2614
2615     @param vm vlib_main_t corresponding to the current thread
2616     @param node vlib_node_runtime_t
2617     @param frame vlib_frame_t whose contents should be dispatched
2618
2619     @par Graph mechanics: buffer metadata, next index usage
2620
2621     @em Uses:
2622     - <code>vnet_buffer(b)->ip.adj_index[VLIB_RX]</code>
2623         - the rewrite adjacency index
2624     - <code>adj->lookup_next_index</code>
2625         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2626           the packet will be dropped.
2627     - <code>adj->rewrite_header</code>
2628         - Rewrite string length, rewrite string, next_index
2629
2630     @em Sets:
2631     - <code>b->current_data, b->current_length</code>
2632         - Updated net of applying the rewrite string
2633
2634     <em>Next Indices:</em>
2635     - <code> adj->rewrite_header.next_index </code>
2636       or @c error-drop
2637 */
2638
2639 static uword
2640 ip4_rewrite_local (vlib_main_t * vm,
2641                    vlib_node_runtime_t * node,
2642                    vlib_frame_t * frame)
2643 {
2644   return ip4_rewrite_inline (vm, node, frame,
2645                              /* rewrite_for_locally_received_packets */ 1, 0);
2646 }
2647
2648 static uword
2649 ip4_midchain (vlib_main_t * vm,
2650               vlib_node_runtime_t * node,
2651               vlib_frame_t * frame)
2652 {
2653   return ip4_rewrite_inline (vm, node, frame,
2654                              /* rewrite_for_locally_received_packets */ 0, 1);
2655 }
2656
2657 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2658   .function = ip4_rewrite_transit,
2659   .name = "ip4-rewrite-transit",
2660   .vector_size = sizeof (u32),
2661
2662   .format_trace = format_ip4_rewrite_trace,
2663
2664   .n_next_nodes = 3,
2665   .next_nodes = {
2666     [IP4_REWRITE_NEXT_DROP] = "error-drop",
2667     [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
2668     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2669   },
2670 };
2671
2672 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite_transit)
2673
2674 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2675   .function = ip4_midchain,
2676   .name = "ip4-midchain",
2677   .vector_size = sizeof (u32),
2678
2679   .format_trace = format_ip4_forward_next_trace,
2680
2681   .sibling_of = "ip4-rewrite-transit",
2682 };
2683
2684 VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain)
2685
2686 VLIB_REGISTER_NODE (ip4_rewrite_local_node) = {
2687   .function = ip4_rewrite_local,
2688   .name = "ip4-rewrite-local",
2689   .vector_size = sizeof (u32),
2690
2691   .sibling_of = "ip4-rewrite-transit",
2692
2693   .format_trace = format_ip4_rewrite_trace,
2694
2695   .n_next_nodes = 0,
2696 };
2697
2698 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_local_node, ip4_rewrite_local)
2699
2700 static clib_error_t *
2701 add_del_interface_table (vlib_main_t * vm,
2702                          unformat_input_t * input,
2703                          vlib_cli_command_t * cmd)
2704 {
2705   vnet_main_t * vnm = vnet_get_main();
2706   clib_error_t * error = 0;
2707   u32 sw_if_index, table_id;
2708
2709   sw_if_index = ~0;
2710
2711   if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2712     {
2713       error = clib_error_return (0, "unknown interface `%U'",
2714                                  format_unformat_error, input);
2715       goto done;
2716     }
2717
2718   if (unformat (input, "%d", &table_id))
2719     ;
2720   else
2721     {
2722       error = clib_error_return (0, "expected table id `%U'",
2723                                  format_unformat_error, input);
2724       goto done;
2725     }
2726
2727   {
2728     ip4_main_t * im = &ip4_main;
2729     u32 fib_index;
2730
2731     fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
2732                                                    table_id);
2733
2734     //
2735     // FIXME-LATER
2736     //  changing an interface's table has consequences for any connecteds
2737     //  and adj-fibs already installed.
2738     //
2739     vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
2740     im->fib_index_by_sw_if_index[sw_if_index] = fib_index;
2741   }
2742
2743  done:
2744   return error;
2745 }
2746
2747 /*?
2748  * Place the indicated interface into the supplied IPv4 FIB table (also known
2749  * as a VRF). If the FIB table does not exist, this command creates it. To
2750  * display the current IPv4 FIB table, use the command '<em>show ip fib</em>'.
2751  * FIB table will only be displayed if a route has been added to the table, or
2752  * an IP Address is assigned to an interface in the table (which adds a route
2753  * automatically).
2754  *
2755  * @note IP addresses added after setting the interface IP table end up in
2756  * the indicated FIB table. If the IP address is added prior to adding the
2757  * interface to the FIB table, it will NOT be part of the FIB table. Predictable
2758  * but potentially counter-intuitive results occur if you provision interface
2759  * addresses in multiple FIBs. Upon RX, packets will be processed in the last
2760  * IP table ID provisioned. It might be marginally useful to evade source RPF
2761  * drops to put an interface address into multiple FIBs.
2762  *
2763  * @cliexpar
2764  * Example of how to add an interface to an IPv4 FIB table (where 2 is the table-id):
2765  * @cliexcmd{set interface ip table GigabitEthernet2/0/0 2}
2766  ?*/
2767 /* *INDENT-OFF* */
2768 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
2769   .path = "set interface ip table",
2770   .function = add_del_interface_table,
2771   .short_help = "set interface ip table <interface> <table-id>",
2772 };
2773 /* *INDENT-ON* */
2774
2775
2776 static uword
2777 ip4_lookup_multicast (vlib_main_t * vm,
2778                       vlib_node_runtime_t * node,
2779                       vlib_frame_t * frame)
2780 {
2781   ip4_main_t * im = &ip4_main;
2782   vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
2783   u32 n_left_from, n_left_to_next, * from, * to_next;
2784   ip_lookup_next_t next;
2785   u32 cpu_index = os_get_cpu_number();
2786
2787   from = vlib_frame_vector_args (frame);
2788   n_left_from = frame->n_vectors;
2789   next = node->cached_next_index;
2790
2791   while (n_left_from > 0)
2792     {
2793       vlib_get_next_frame (vm, node, next,
2794                            to_next, n_left_to_next);
2795
2796       while (n_left_from >= 4 && n_left_to_next >= 2)
2797         {
2798           vlib_buffer_t * p0, * p1;
2799           u32 pi0, pi1, lb_index0, lb_index1, wrong_next;
2800           ip_lookup_next_t next0, next1;
2801           ip4_header_t * ip0, * ip1;
2802           u32 fib_index0, fib_index1;
2803           const dpo_id_t *dpo0, *dpo1;
2804           const load_balance_t * lb0, * lb1;
2805
2806           /* Prefetch next iteration. */
2807           {
2808             vlib_buffer_t * p2, * p3;
2809
2810             p2 = vlib_get_buffer (vm, from[2]);
2811             p3 = vlib_get_buffer (vm, from[3]);
2812
2813             vlib_prefetch_buffer_header (p2, LOAD);
2814             vlib_prefetch_buffer_header (p3, LOAD);
2815
2816             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
2817             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
2818           }
2819
2820           pi0 = to_next[0] = from[0];
2821           pi1 = to_next[1] = from[1];
2822
2823           p0 = vlib_get_buffer (vm, pi0);
2824           p1 = vlib_get_buffer (vm, pi1);
2825
2826           ip0 = vlib_buffer_get_current (p0);
2827           ip1 = vlib_buffer_get_current (p1);
2828
2829           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2830           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
2831           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2832             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2833           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
2834             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
2835
2836           lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0),
2837                                                &ip0->dst_address);
2838           lb_index1 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index1),
2839                                                &ip1->dst_address);
2840
2841           lb0 = load_balance_get (lb_index0);
2842           lb1 = load_balance_get (lb_index1);
2843
2844           ASSERT (lb0->lb_n_buckets > 0);
2845           ASSERT (is_pow2 (lb0->lb_n_buckets));
2846           ASSERT (lb1->lb_n_buckets > 0);
2847           ASSERT (is_pow2 (lb1->lb_n_buckets));
2848
2849           vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash
2850               (ip0, lb0->lb_hash_config);
2851
2852           vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash
2853               (ip1, lb1->lb_hash_config);
2854
2855           dpo0 = load_balance_get_bucket_i(lb0,
2856                                            (vnet_buffer (p0)->ip.flow_hash &
2857                                             (lb0->lb_n_buckets_minus_1)));
2858           dpo1 = load_balance_get_bucket_i(lb1,
2859                                            (vnet_buffer (p1)->ip.flow_hash &
2860                                             (lb1->lb_n_buckets_minus_1)));
2861
2862           next0 = dpo0->dpoi_next_node;
2863           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
2864           next1 = dpo1->dpoi_next_node;
2865           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
2866
2867           if (1) /* $$$$$$ HACK FIXME */
2868           vlib_increment_combined_counter
2869               (cm, cpu_index, lb_index0, 1,
2870                vlib_buffer_length_in_chain (vm, p0));
2871           if (1) /* $$$$$$ HACK FIXME */
2872           vlib_increment_combined_counter
2873               (cm, cpu_index, lb_index1, 1,
2874                vlib_buffer_length_in_chain (vm, p1));
2875
2876           from += 2;
2877           to_next += 2;
2878           n_left_to_next -= 2;
2879           n_left_from -= 2;
2880
2881           wrong_next = (next0 != next) + 2*(next1 != next);
2882           if (PREDICT_FALSE (wrong_next != 0))
2883             {
2884               switch (wrong_next)
2885                 {
2886                 case 1:
2887                   /* A B A */
2888                   to_next[-2] = pi1;
2889                   to_next -= 1;
2890                   n_left_to_next += 1;
2891                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2892                   break;
2893
2894                 case 2:
2895                   /* A A B */
2896                   to_next -= 1;
2897                   n_left_to_next += 1;
2898                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2899                   break;
2900
2901                 case 3:
2902                   /* A B C */
2903                   to_next -= 2;
2904                   n_left_to_next += 2;
2905                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2906                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2907                   if (next0 == next1)
2908                     {
2909                       /* A B B */
2910                       vlib_put_next_frame (vm, node, next, n_left_to_next);
2911                       next = next1;
2912                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
2913                     }
2914                 }
2915             }
2916         }
2917
2918       while (n_left_from > 0 && n_left_to_next > 0)
2919         {
2920           vlib_buffer_t * p0;
2921           ip4_header_t * ip0;
2922           u32 pi0, lb_index0;
2923           ip_lookup_next_t next0;
2924           u32 fib_index0;
2925           const dpo_id_t *dpo0;
2926           const load_balance_t * lb0;
2927
2928           pi0 = from[0];
2929           to_next[0] = pi0;
2930
2931           p0 = vlib_get_buffer (vm, pi0);
2932
2933           ip0 = vlib_buffer_get_current (p0);
2934
2935           fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
2936                                 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2937           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2938               fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2939
2940           lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0),
2941                                                &ip0->dst_address);
2942
2943           lb0 = load_balance_get (lb_index0);
2944
2945           ASSERT (lb0->lb_n_buckets > 0);
2946           ASSERT (is_pow2 (lb0->lb_n_buckets));
2947
2948           vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash
2949               (ip0, lb0->lb_hash_config);
2950
2951           dpo0 = load_balance_get_bucket_i(lb0,
2952                                            (vnet_buffer (p0)->ip.flow_hash &
2953                                             (lb0->lb_n_buckets_minus_1)));
2954
2955           next0 = dpo0->dpoi_next_node;
2956           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
2957
2958           if (1) /* $$$$$$ HACK FIXME */
2959               vlib_increment_combined_counter
2960                   (cm, cpu_index, lb_index0, 1,
2961                    vlib_buffer_length_in_chain (vm, p0));
2962
2963           from += 1;
2964           to_next += 1;
2965           n_left_to_next -= 1;
2966           n_left_from -= 1;
2967
2968           if (PREDICT_FALSE (next0 != next))
2969             {
2970               n_left_to_next += 1;
2971               vlib_put_next_frame (vm, node, next, n_left_to_next);
2972               next = next0;
2973               vlib_get_next_frame (vm, node, next,
2974                                    to_next, n_left_to_next);
2975               to_next[0] = pi0;
2976               to_next += 1;
2977               n_left_to_next -= 1;
2978             }
2979         }
2980
2981       vlib_put_next_frame (vm, node, next, n_left_to_next);
2982     }
2983
2984   if (node->flags & VLIB_NODE_FLAG_TRACE)
2985       ip4_forward_next_trace(vm, node, frame, VLIB_TX);
2986
2987   return frame->n_vectors;
2988 }
2989
2990 VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = {
2991   .function = ip4_lookup_multicast,
2992   .name = "ip4-lookup-multicast",
2993   .vector_size = sizeof (u32),
2994   .sibling_of = "ip4-lookup",
2995   .format_trace = format_ip4_lookup_trace,
2996
2997   .n_next_nodes = 0,
2998 };
2999
3000 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_multicast_node, ip4_lookup_multicast)
3001
3002 VLIB_REGISTER_NODE (ip4_multicast_node,static) = {
3003   .function = ip4_drop,
3004   .name = "ip4-multicast",
3005   .vector_size = sizeof (u32),
3006
3007   .format_trace = format_ip4_forward_next_trace,
3008
3009   .n_next_nodes = 1,
3010   .next_nodes = {
3011     [0] = "error-drop",
3012   },
3013 };
3014
3015 int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0)
3016 {
3017   ip4_fib_mtrie_t * mtrie0;
3018   ip4_fib_mtrie_leaf_t leaf0;
3019   u32 lbi0;
3020
3021   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
3022
3023   leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
3024   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
3025   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
3026   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
3027   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
3028
3029   /* Handle default route. */
3030   leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
3031
3032   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
3033
3034   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0), a);
3035 }
3036
3037 static clib_error_t *
3038 test_lookup_command_fn (vlib_main_t * vm,
3039                         unformat_input_t * input,
3040                         vlib_cli_command_t * cmd)
3041 {
3042   ip4_fib_t *fib;
3043   u32 table_id = 0;
3044   f64 count = 1;
3045   u32 n;
3046   int i;
3047   ip4_address_t ip4_base_address;
3048   u64 errors = 0;
3049
3050   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3051       if (unformat (input, "table %d", &table_id))
3052       {
3053           /* Make sure the entry exists. */
3054           fib = ip4_fib_get(table_id);
3055           if ((fib) && (fib->index != table_id))
3056               return clib_error_return (0, "<fib-index> %d does not exist",
3057                                         table_id);
3058       }
3059       else if (unformat (input, "count %f", &count))
3060         ;
3061
3062       else if (unformat (input, "%U",
3063                          unformat_ip4_address, &ip4_base_address))
3064         ;
3065       else
3066         return clib_error_return (0, "unknown input `%U'",
3067                                   format_unformat_error, input);
3068   }
3069
3070   n = count;
3071
3072   for (i = 0; i < n; i++)
3073     {
3074       if (!ip4_lookup_validate (&ip4_base_address, table_id))
3075         errors++;
3076
3077       ip4_base_address.as_u32 =
3078         clib_host_to_net_u32 (1 +
3079                               clib_net_to_host_u32 (ip4_base_address.as_u32));
3080     }
3081
3082   if (errors)
3083     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
3084   else
3085     vlib_cli_output (vm, "No errors in %d lookups\n", n);
3086
3087   return 0;
3088 }
3089
3090 /*?
3091  * Perform a lookup of an IPv4 Address (or range of addresses) in the
3092  * given FIB table to determine if there is a conflict with the
3093  * adjacency table. The fib-id can be determined by using the
3094  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
3095  * of 0 is used.
3096  *
3097  * @todo This command uses fib-id, other commands use table-id (not
3098  * just a name, they are different indexes). Would like to change this
3099  * to table-id for consistency.
3100  *
3101  * @cliexpar
3102  * Example of how to run the test lookup command:
3103  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
3104  * No errors in 2 lookups
3105  * @cliexend
3106 ?*/
3107 /* *INDENT-OFF* */
3108 VLIB_CLI_COMMAND (lookup_test_command, static) = {
3109     .path = "test lookup",
3110     .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
3111     .function = test_lookup_command_fn,
3112 };
3113 /* *INDENT-ON* */
3114
3115 int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3116 {
3117   ip4_main_t * im4 = &ip4_main;
3118   ip4_fib_t * fib;
3119   uword * p = hash_get (im4->fib_index_by_table_id, table_id);
3120
3121   if (p == 0)
3122     return VNET_API_ERROR_NO_SUCH_FIB;
3123
3124   fib = ip4_fib_get (p[0]);
3125
3126   fib->flow_hash_config = flow_hash_config;
3127   return 0;
3128 }
3129
3130 static clib_error_t *
3131 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3132                              unformat_input_t * input,
3133                              vlib_cli_command_t * cmd)
3134 {
3135   int matched = 0;
3136   u32 table_id = 0;
3137   u32 flow_hash_config = 0;
3138   int rv;
3139
3140   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3141     if (unformat (input, "table %d", &table_id))
3142       matched = 1;
3143 #define _(a,v) \
3144     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3145     foreach_flow_hash_bit
3146 #undef _
3147     else break;
3148   }
3149
3150   if (matched == 0)
3151     return clib_error_return (0, "unknown input `%U'",
3152                               format_unformat_error, input);
3153
3154   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3155   switch (rv)
3156     {
3157     case 0:
3158       break;
3159
3160     case VNET_API_ERROR_NO_SUCH_FIB:
3161       return clib_error_return (0, "no such FIB table %d", table_id);
3162
3163     default:
3164       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3165       break;
3166     }
3167
3168   return 0;
3169 }
3170
3171 /*?
3172  * Configure the set of IPv4 fields used by the flow hash.
3173  *
3174  * @cliexpar
3175  * Example of how to set the flow hash on a given table:
3176  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
3177  * Example of display the configured flow hash:
3178  * @cliexstart{show ip fib}
3179  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
3180  * 0.0.0.0/0
3181  *   unicast-ip4-chain
3182  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
3183  *     [0] [@0]: dpo-drop ip6
3184  * 0.0.0.0/32
3185  *   unicast-ip4-chain
3186  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
3187  *     [0] [@0]: dpo-drop ip6
3188  * 224.0.0.0/8
3189  *   unicast-ip4-chain
3190  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
3191  *     [0] [@0]: dpo-drop ip6
3192  * 6.0.1.2/32
3193  *   unicast-ip4-chain
3194  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
3195  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3196  * 7.0.0.1/32
3197  *   unicast-ip4-chain
3198  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
3199  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3200  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3201  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3202  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3203  * 240.0.0.0/8
3204  *   unicast-ip4-chain
3205  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
3206  *     [0] [@0]: dpo-drop ip6
3207  * 255.255.255.255/32
3208  *   unicast-ip4-chain
3209  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
3210  *     [0] [@0]: dpo-drop ip6
3211  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
3212  * 0.0.0.0/0
3213  *   unicast-ip4-chain
3214  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
3215  *     [0] [@0]: dpo-drop ip6
3216  * 0.0.0.0/32
3217  *   unicast-ip4-chain
3218  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
3219  *     [0] [@0]: dpo-drop ip6
3220  * 172.16.1.0/24
3221  *   unicast-ip4-chain
3222  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
3223  *     [0] [@4]: ipv4-glean: af_packet0
3224  * 172.16.1.1/32
3225  *   unicast-ip4-chain
3226  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
3227  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
3228  * 172.16.1.2/32
3229  *   unicast-ip4-chain
3230  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
3231  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
3232  * 172.16.2.0/24
3233  *   unicast-ip4-chain
3234  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
3235  *     [0] [@4]: ipv4-glean: af_packet1
3236  * 172.16.2.1/32
3237  *   unicast-ip4-chain
3238  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
3239  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
3240  * 224.0.0.0/8
3241  *   unicast-ip4-chain
3242  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
3243  *     [0] [@0]: dpo-drop ip6
3244  * 240.0.0.0/8
3245  *   unicast-ip4-chain
3246  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
3247  *     [0] [@0]: dpo-drop ip6
3248  * 255.255.255.255/32
3249  *   unicast-ip4-chain
3250  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
3251  *     [0] [@0]: dpo-drop ip6
3252  * @cliexend
3253 ?*/
3254 /* *INDENT-OFF* */
3255 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
3256   .path = "set ip flow-hash",
3257   .short_help =
3258   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
3259   .function = set_ip_flow_hash_command_fn,
3260 };
3261 /* *INDENT-ON* */
3262
3263 int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3264                                  u32 table_index)
3265 {
3266   vnet_main_t * vnm = vnet_get_main();
3267   vnet_interface_main_t * im = &vnm->interface_main;
3268   ip4_main_t * ipm = &ip4_main;
3269   ip_lookup_main_t * lm = &ipm->lookup_main;
3270   vnet_classify_main_t * cm = &vnet_classify_main;
3271   ip4_address_t *if_addr;
3272
3273   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3274     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3275
3276   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3277     return VNET_API_ERROR_NO_SUCH_ENTRY;
3278
3279   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3280   lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
3281
3282   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
3283
3284   if (NULL != if_addr)
3285   {
3286       fib_prefix_t pfx = {
3287           .fp_len = 32,
3288           .fp_proto = FIB_PROTOCOL_IP4,
3289           .fp_addr.ip4 = *if_addr,
3290       };
3291       u32 fib_index;
3292
3293       fib_index = fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
3294                                                       sw_if_index);
3295
3296
3297       if (table_index != (u32) ~0)
3298       {
3299           dpo_id_t dpo = DPO_INVALID;
3300
3301           dpo_set(&dpo,
3302                   DPO_CLASSIFY,
3303                   DPO_PROTO_IP4,
3304                   classify_dpo_create(FIB_PROTOCOL_IP4,
3305                                       table_index));
3306
3307           fib_table_entry_special_dpo_add(fib_index,
3308                                           &pfx,
3309                                           FIB_SOURCE_CLASSIFY,
3310                                           FIB_ENTRY_FLAG_NONE,
3311                                           &dpo);
3312           dpo_reset(&dpo);
3313       }
3314       else
3315       {
3316           fib_table_entry_special_remove(fib_index,
3317                                          &pfx,
3318                                          FIB_SOURCE_CLASSIFY);
3319       }
3320   }
3321
3322   return 0;
3323 }
3324
3325 static clib_error_t *
3326 set_ip_classify_command_fn (vlib_main_t * vm,
3327                             unformat_input_t * input,
3328                             vlib_cli_command_t * cmd)
3329 {
3330   u32 table_index = ~0;
3331   int table_index_set = 0;
3332   u32 sw_if_index = ~0;
3333   int rv;
3334
3335   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3336     if (unformat (input, "table-index %d", &table_index))
3337       table_index_set = 1;
3338     else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3339                        vnet_get_main(), &sw_if_index))
3340       ;
3341     else
3342       break;
3343   }
3344
3345   if (table_index_set == 0)
3346     return clib_error_return (0, "classify table-index must be specified");
3347
3348   if (sw_if_index == ~0)
3349     return clib_error_return (0, "interface / subif must be specified");
3350
3351   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3352
3353   switch (rv)
3354     {
3355     case 0:
3356       break;
3357
3358     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3359       return clib_error_return (0, "No such interface");
3360
3361     case VNET_API_ERROR_NO_SUCH_ENTRY:
3362       return clib_error_return (0, "No such classifier table");
3363     }
3364   return 0;
3365 }
3366
3367 /*?
3368  * Assign a classification table to an interface. The classification
3369  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3370  * commands. Once the table is create, use this command to filter packets
3371  * on an interface.
3372  *
3373  * @cliexpar
3374  * Example of how to assign a classification table to an interface:
3375  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3376 ?*/
3377 /* *INDENT-OFF* */
3378 VLIB_CLI_COMMAND (set_ip_classify_command, static) = {
3379     .path = "set ip classify",
3380     .short_help =
3381     "set ip classify intfc <interface> table-index <classify-idx>",
3382     .function = set_ip_classify_command_fn,
3383 };
3384 /* *INDENT-ON* */
3385