21973453af72697e9d3f6f4b61d4dd8311ff657d
[vpp.git] / vnet / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h>     /* for API error numbers */
47 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
48 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_urpf_list.h> /* for FIB uRPF check */
50 #include <vnet/fib/ip4_fib.h>
51 #include <vnet/dpo/load_balance.h>
52 #include <vnet/dpo/classify_dpo.h>
53
54 /**
55  * @file
56  * @brief IPv4 Forwarding.
57  *
58  * This file contains the source code for IPv4 forwarding.
59  */
60
61 void
62 ip4_forward_next_trace (vlib_main_t * vm,
63                         vlib_node_runtime_t * node,
64                         vlib_frame_t * frame,
65                         vlib_rx_or_tx_t which_adj_index);
66
67 always_inline uword
68 ip4_lookup_inline (vlib_main_t * vm,
69                    vlib_node_runtime_t * node,
70                    vlib_frame_t * frame,
71                    int lookup_for_responses_to_locally_received_packets)
72 {
73   ip4_main_t * im = &ip4_main;
74   vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
75   u32 n_left_from, n_left_to_next, * from, * to_next;
76   ip_lookup_next_t next;
77   u32 cpu_index = os_get_cpu_number();
78
79   from = vlib_frame_vector_args (frame);
80   n_left_from = frame->n_vectors;
81   next = node->cached_next_index;
82
83   while (n_left_from > 0)
84     {
85       vlib_get_next_frame (vm, node, next,
86                            to_next, n_left_to_next);
87
88       while (n_left_from >= 4 && n_left_to_next >= 2)
89         {
90           vlib_buffer_t * p0, * p1;
91           ip4_header_t * ip0, * ip1;
92           __attribute__((unused)) tcp_header_t * tcp0, * tcp1;
93           ip_lookup_next_t next0, next1;
94           const load_balance_t * lb0, * lb1;
95           ip4_fib_mtrie_t * mtrie0, * mtrie1;
96           ip4_fib_mtrie_leaf_t leaf0, leaf1;
97           ip4_address_t * dst_addr0, *dst_addr1;
98           __attribute__((unused)) u32 pi0, fib_index0, lb_index0, is_tcp_udp0;
99           __attribute__((unused)) u32 pi1, fib_index1, lb_index1, is_tcp_udp1;
100           flow_hash_config_t flow_hash_config0, flow_hash_config1;
101           u32 hash_c0, hash_c1;
102           u32 wrong_next;
103           const dpo_id_t *dpo0, *dpo1;
104
105           /* Prefetch next iteration. */
106           {
107             vlib_buffer_t * p2, * p3;
108
109             p2 = vlib_get_buffer (vm, from[2]);
110             p3 = vlib_get_buffer (vm, from[3]);
111
112             vlib_prefetch_buffer_header (p2, LOAD);
113             vlib_prefetch_buffer_header (p3, LOAD);
114
115             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
116             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
117           }
118
119           pi0 = to_next[0] = from[0];
120           pi1 = to_next[1] = from[1];
121
122           p0 = vlib_get_buffer (vm, pi0);
123           p1 = vlib_get_buffer (vm, pi1);
124
125           ip0 = vlib_buffer_get_current (p0);
126           ip1 = vlib_buffer_get_current (p1);
127
128           dst_addr0 = &ip0->dst_address;
129           dst_addr1 = &ip1->dst_address;
130
131           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
132           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
133           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
134             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
135           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
136             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
137
138
139           if (! lookup_for_responses_to_locally_received_packets)
140             {
141               mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
142               mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
143
144               leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
145
146               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
147               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
148             }
149
150           tcp0 = (void *) (ip0 + 1);
151           tcp1 = (void *) (ip1 + 1);
152
153           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
154                          || ip0->protocol == IP_PROTOCOL_UDP);
155           is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
156                          || ip1->protocol == IP_PROTOCOL_UDP);
157
158           if (! lookup_for_responses_to_locally_received_packets)
159             {
160               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
161               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
162             }
163
164           if (! lookup_for_responses_to_locally_received_packets)
165             {
166               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
167               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
168             }
169
170           if (! lookup_for_responses_to_locally_received_packets)
171             {
172               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
173               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
174             }
175
176           if (lookup_for_responses_to_locally_received_packets)
177             {
178               lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
179               lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
180             }
181           else
182             {
183               /* Handle default route. */
184               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
185               leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
186
187               lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
188               lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
189             }
190
191           lb0 = load_balance_get (lb_index0);
192           lb1 = load_balance_get (lb_index1);
193
194           /* Use flow hash to compute multipath adjacency. */
195           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
196           hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
197           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
198             {
199               flow_hash_config0 = lb0->lb_hash_config;
200               hash_c0 = vnet_buffer (p0)->ip.flow_hash =
201                 ip4_compute_flow_hash (ip0, flow_hash_config0);
202             }
203           if (PREDICT_FALSE(lb1->lb_n_buckets > 1))
204             {
205               flow_hash_config1 = lb1->lb_hash_config;
206               hash_c1 = vnet_buffer (p1)->ip.flow_hash =
207                 ip4_compute_flow_hash (ip1, flow_hash_config1);
208             }
209
210           ASSERT (lb0->lb_n_buckets > 0);
211           ASSERT (is_pow2 (lb0->lb_n_buckets));
212           ASSERT (lb1->lb_n_buckets > 0);
213           ASSERT (is_pow2 (lb1->lb_n_buckets));
214
215           dpo0 = load_balance_get_bucket_i(lb0,
216                                            (hash_c0 &
217                                             (lb0->lb_n_buckets_minus_1)));
218           dpo1 = load_balance_get_bucket_i(lb1,
219                                            (hash_c1 &
220                                             (lb0->lb_n_buckets_minus_1)));
221
222           next0 = dpo0->dpoi_next_node;
223           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
224           next1 = dpo1->dpoi_next_node;
225           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
226
227           vlib_increment_combined_counter
228               (cm, cpu_index, lb_index0, 1,
229                vlib_buffer_length_in_chain (vm, p0)
230                + sizeof(ethernet_header_t));
231           vlib_increment_combined_counter
232               (cm, cpu_index, lb_index1, 1,
233                vlib_buffer_length_in_chain (vm, p1)
234                + sizeof(ethernet_header_t));
235
236           from += 2;
237           to_next += 2;
238           n_left_to_next -= 2;
239           n_left_from -= 2;
240
241           wrong_next = (next0 != next) + 2*(next1 != next);
242           if (PREDICT_FALSE (wrong_next != 0))
243             {
244               switch (wrong_next)
245                 {
246                 case 1:
247                   /* A B A */
248                   to_next[-2] = pi1;
249                   to_next -= 1;
250                   n_left_to_next += 1;
251                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
252                   break;
253
254                 case 2:
255                   /* A A B */
256                   to_next -= 1;
257                   n_left_to_next += 1;
258                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
259                   break;
260
261                 case 3:
262                   /* A B C */
263                   to_next -= 2;
264                   n_left_to_next += 2;
265                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
266                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
267                   if (next0 == next1)
268                     {
269                       /* A B B */
270                       vlib_put_next_frame (vm, node, next, n_left_to_next);
271                       next = next1;
272                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
273                     }
274                 }
275             }
276         }
277
278       while (n_left_from > 0 && n_left_to_next > 0)
279         {
280           vlib_buffer_t * p0;
281           ip4_header_t * ip0;
282           __attribute__((unused)) tcp_header_t * tcp0;
283           ip_lookup_next_t next0;
284           const load_balance_t *lb0;
285           ip4_fib_mtrie_t * mtrie0;
286           ip4_fib_mtrie_leaf_t leaf0;
287           ip4_address_t * dst_addr0;
288           __attribute__((unused)) u32 pi0, fib_index0, is_tcp_udp0, lbi0;
289           flow_hash_config_t flow_hash_config0;
290           const dpo_id_t *dpo0;
291           u32 hash_c0;
292
293           pi0 = from[0];
294           to_next[0] = pi0;
295
296           p0 = vlib_get_buffer (vm, pi0);
297
298           ip0 = vlib_buffer_get_current (p0);
299
300           dst_addr0 = &ip0->dst_address;
301
302           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
303           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
304             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
305
306           if (! lookup_for_responses_to_locally_received_packets)
307             {
308               mtrie0 = &ip4_fib_get( fib_index0)->mtrie;
309
310               leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
311
312               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
313             }
314
315           tcp0 = (void *) (ip0 + 1);
316
317           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
318                          || ip0->protocol == IP_PROTOCOL_UDP);
319
320           if (! lookup_for_responses_to_locally_received_packets)
321             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
322
323           if (! lookup_for_responses_to_locally_received_packets)
324             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
325
326           if (! lookup_for_responses_to_locally_received_packets)
327             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
328
329           if (lookup_for_responses_to_locally_received_packets)
330             lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
331           else
332             {
333               /* Handle default route. */
334               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
335               lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
336             }
337
338           lb0 = load_balance_get (lbi0);
339
340           /* Use flow hash to compute multipath adjacency. */
341           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
342           if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
343             {
344               flow_hash_config0 = lb0->lb_hash_config;
345
346               hash_c0 = vnet_buffer (p0)->ip.flow_hash =
347                 ip4_compute_flow_hash (ip0, flow_hash_config0);
348             }
349
350           ASSERT (lb0->lb_n_buckets > 0);
351           ASSERT (is_pow2 (lb0->lb_n_buckets));
352
353           dpo0 = load_balance_get_bucket_i(lb0,
354                                            (hash_c0 &
355                                             (lb0->lb_n_buckets_minus_1)));
356
357           next0 = dpo0->dpoi_next_node;
358           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
359
360           vlib_increment_combined_counter
361               (cm, cpu_index, lbi0, 1,
362                vlib_buffer_length_in_chain (vm, p0));
363
364           from += 1;
365           to_next += 1;
366           n_left_to_next -= 1;
367           n_left_from -= 1;
368
369           if (PREDICT_FALSE (next0 != next))
370             {
371               n_left_to_next += 1;
372               vlib_put_next_frame (vm, node, next, n_left_to_next);
373               next = next0;
374               vlib_get_next_frame (vm, node, next,
375                                    to_next, n_left_to_next);
376               to_next[0] = pi0;
377               to_next += 1;
378               n_left_to_next -= 1;
379             }
380         }
381
382       vlib_put_next_frame (vm, node, next, n_left_to_next);
383     }
384
385   if (node->flags & VLIB_NODE_FLAG_TRACE)
386     ip4_forward_next_trace(vm, node, frame, VLIB_TX);
387
388   return frame->n_vectors;
389 }
390
391 /** @brief IPv4 lookup node.
392     @node ip4-lookup
393
394     This is the main IPv4 lookup dispatch node.
395
396     @param vm vlib_main_t corresponding to the current thread
397     @param node vlib_node_runtime_t
398     @param frame vlib_frame_t whose contents should be dispatched
399
400     @par Graph mechanics: buffer metadata, next index usage
401
402     @em Uses:
403     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
404         - Indicates the @c sw_if_index value of the interface that the
405           packet was received on.
406     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
407         - When the value is @c ~0 then the node performs a longest prefix
408           match (LPM) for the packet destination address in the FIB attached
409           to the receive interface.
410         - Otherwise perform LPM for the packet destination address in the
411           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
412           value (0, 1, ...) and not a VRF id.
413
414     @em Sets:
415     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
416         - The lookup result adjacency index.
417
418     <em>Next Index:</em>
419     - Dispatches the packet to the node index found in
420       ip_adjacency_t @c adj->lookup_next_index
421       (where @c adj is the lookup result adjacency).
422 */
423 static uword
424 ip4_lookup (vlib_main_t * vm,
425             vlib_node_runtime_t * node,
426             vlib_frame_t * frame)
427 {
428   return ip4_lookup_inline (vm, node, frame,
429                             /* lookup_for_responses_to_locally_received_packets */ 0);
430
431 }
432
433 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args);
434
435 VLIB_REGISTER_NODE (ip4_lookup_node) = {
436   .function = ip4_lookup,
437   .name = "ip4-lookup",
438   .vector_size = sizeof (u32),
439
440   .format_trace = format_ip4_lookup_trace,
441   .n_next_nodes = IP_LOOKUP_N_NEXT,
442   .next_nodes = IP4_LOOKUP_NEXT_NODES,
443 };
444
445 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup)
446
447 always_inline uword
448 ip4_load_balance (vlib_main_t * vm,
449                   vlib_node_runtime_t * node,
450                   vlib_frame_t * frame)
451 {
452   vlib_combined_counter_main_t * cm = &load_balance_main.lbm_via_counters;
453   u32 n_left_from, n_left_to_next, * from, * to_next;
454   ip_lookup_next_t next;
455   u32 cpu_index = os_get_cpu_number();
456
457   from = vlib_frame_vector_args (frame);
458   n_left_from = frame->n_vectors;
459   next = node->cached_next_index;
460
461   if (node->flags & VLIB_NODE_FLAG_TRACE)
462       ip4_forward_next_trace(vm, node, frame, VLIB_TX);
463
464   while (n_left_from > 0)
465     {
466       vlib_get_next_frame (vm, node, next,
467                            to_next, n_left_to_next);
468
469
470       while (n_left_from > 0 && n_left_to_next > 0)
471         {
472           ip_lookup_next_t next0;
473           const load_balance_t *lb0;
474           vlib_buffer_t * p0;
475           u32 pi0, lbi0, hc0;
476           const ip4_header_t *ip0;
477           const dpo_id_t *dpo0;
478
479           pi0 = from[0];
480           to_next[0] = pi0;
481
482           p0 = vlib_get_buffer (vm, pi0);
483
484           ip0 = vlib_buffer_get_current (p0);
485           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
486
487           lb0 = load_balance_get(lbi0);
488           hc0 = lb0->lb_hash_config;
489           vnet_buffer(p0)->ip.flow_hash = ip4_compute_flow_hash(ip0, hc0);
490
491           dpo0 = load_balance_get_bucket_i(lb0,
492                                            vnet_buffer(p0)->ip.flow_hash &
493                                            (lb0->lb_n_buckets_minus_1));
494
495           next0 = dpo0->dpoi_next_node;
496           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
497
498           vlib_increment_combined_counter
499               (cm, cpu_index, lbi0, 1,
500                vlib_buffer_length_in_chain (vm, p0));
501
502           from += 1;
503           to_next += 1;
504           n_left_to_next -= 1;
505           n_left_from -= 1;
506
507           if (PREDICT_FALSE (next0 != next))
508             {
509               n_left_to_next += 1;
510               vlib_put_next_frame (vm, node, next, n_left_to_next);
511               next = next0;
512               vlib_get_next_frame (vm, node, next,
513                                    to_next, n_left_to_next);
514               to_next[0] = pi0;
515               to_next += 1;
516               n_left_to_next -= 1;
517             }
518         }
519
520       vlib_put_next_frame (vm, node, next, n_left_to_next);
521     }
522
523   return frame->n_vectors;
524 }
525
526 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args);
527
528 VLIB_REGISTER_NODE (ip4_load_balance_node) = {
529   .function = ip4_load_balance,
530   .name = "ip4-load-balance",
531   .vector_size = sizeof (u32),
532   .sibling_of = "ip4-lookup",
533
534   .format_trace = format_ip4_forward_next_trace,
535 };
536
537 VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance)
538
539 /* get first interface address */
540 ip4_address_t *
541 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
542                              ip_interface_address_t ** result_ia)
543 {
544   ip_lookup_main_t * lm = &im->lookup_main;
545   ip_interface_address_t * ia = 0;
546   ip4_address_t * result = 0;
547
548   foreach_ip_interface_address (lm, ia, sw_if_index,
549                                 1 /* honor unnumbered */,
550   ({
551     ip4_address_t * a = ip_interface_address_get_address (lm, ia);
552     result = a;
553     break;
554   }));
555   if (result_ia)
556     *result_ia = result ? ia : 0;
557   return result;
558 }
559
560 static void
561 ip4_add_interface_routes (u32 sw_if_index,
562                           ip4_main_t * im, u32 fib_index,
563                           ip_interface_address_t * a)
564 {
565   ip_lookup_main_t * lm = &im->lookup_main;
566   ip4_address_t * address = ip_interface_address_get_address (lm, a);
567   fib_prefix_t pfx = {
568       .fp_len = a->address_length,
569       .fp_proto = FIB_PROTOCOL_IP4,
570       .fp_addr.ip4 = *address,
571   };
572
573   a->neighbor_probe_adj_index = ~0;
574
575   if (pfx.fp_len < 32)
576   {
577       fib_node_index_t fei;
578
579       fei = fib_table_entry_update_one_path(fib_index,
580                                             &pfx,
581                                             FIB_SOURCE_INTERFACE,
582                                             (FIB_ENTRY_FLAG_CONNECTED |
583                                              FIB_ENTRY_FLAG_ATTACHED),
584                                             FIB_PROTOCOL_IP4,
585                                             NULL, /* No next-hop address */
586                                             sw_if_index,
587                                             ~0, // invalid FIB index
588                                             1,
589                                             MPLS_LABEL_INVALID,
590                                             FIB_ROUTE_PATH_FLAG_NONE);
591       a->neighbor_probe_adj_index = fib_entry_get_adj(fei);
592   }
593
594   pfx.fp_len = 32;
595
596   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
597   {
598       u32 classify_table_index =
599           lm->classify_table_index_by_sw_if_index [sw_if_index];
600       if (classify_table_index != (u32) ~0)
601       {
602           dpo_id_t dpo = DPO_NULL;
603
604           dpo_set(&dpo,
605                   DPO_CLASSIFY,
606                   DPO_PROTO_IP4,
607                   classify_dpo_create(FIB_PROTOCOL_IP4,
608                                       classify_table_index));
609
610           fib_table_entry_special_dpo_add(fib_index,
611                                           &pfx,
612                                           FIB_SOURCE_CLASSIFY,
613                                           FIB_ENTRY_FLAG_NONE,
614                                           &dpo);
615           dpo_reset(&dpo);
616       }
617   }
618
619   fib_table_entry_update_one_path(fib_index,
620                                   &pfx,
621                                   FIB_SOURCE_INTERFACE,
622                                   (FIB_ENTRY_FLAG_CONNECTED |
623                                    FIB_ENTRY_FLAG_LOCAL),
624                                   FIB_PROTOCOL_IP4,
625                                   &pfx.fp_addr,
626                                   sw_if_index,
627                                   ~0, // invalid FIB index
628                                   1,
629                                   MPLS_LABEL_INVALID,
630                                   FIB_ROUTE_PATH_FLAG_NONE);
631 }
632
633 static void
634 ip4_del_interface_routes (ip4_main_t * im,
635                           u32 fib_index,
636                           ip4_address_t * address,
637                           u32 address_length)
638 {
639     fib_prefix_t pfx = {
640         .fp_len = address_length,
641         .fp_proto = FIB_PROTOCOL_IP4,
642         .fp_addr.ip4 = *address,
643     };
644
645     if (pfx.fp_len < 32)
646     {
647         fib_table_entry_delete(fib_index,
648                                &pfx,
649                                FIB_SOURCE_INTERFACE);
650     }
651
652     pfx.fp_len = 32;
653     fib_table_entry_delete(fib_index,
654                            &pfx,
655                            FIB_SOURCE_INTERFACE);
656 }
657
658 void
659 ip4_sw_interface_enable_disable (u32 sw_if_index,
660                                  u32 is_enable)
661 {
662   vlib_main_t * vm = vlib_get_main();
663   ip4_main_t * im = &ip4_main;
664   ip_lookup_main_t * lm = &im->lookup_main;
665   u32 ci, cast;
666   u32 lookup_feature_index;
667
668   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
669
670   /*
671    * enable/disable only on the 1<->0 transition
672    */
673   if (is_enable)
674     {
675       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
676         return;
677     }
678   else
679     {
680       ASSERT(im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
681       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
682         return;
683     }
684
685   for (cast = 0; cast <= VNET_IP_RX_MULTICAST_FEAT; cast++)
686     {
687       ip_config_main_t * cm = &lm->feature_config_mains[cast];
688       vnet_config_main_t * vcm = &cm->config_main;
689
690       vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
691       ci = cm->config_index_by_sw_if_index[sw_if_index];
692
693       if (cast == VNET_IP_RX_UNICAST_FEAT)
694         lookup_feature_index = im->ip4_unicast_rx_feature_lookup;
695       else
696         lookup_feature_index = im->ip4_multicast_rx_feature_lookup;
697
698       if (is_enable)
699         ci = vnet_config_add_feature (vm, vcm,
700                                       ci,
701                                       lookup_feature_index,
702                                       /* config data */ 0,
703                                       /* # bytes of config data */ 0);
704       else
705         ci = vnet_config_del_feature (vm, vcm,
706                                       ci,
707                                       lookup_feature_index,
708                                       /* config data */ 0,
709                                       /* # bytes of config data */ 0);
710       cm->config_index_by_sw_if_index[sw_if_index] = ci;
711     }
712 }
713
714 static clib_error_t *
715 ip4_add_del_interface_address_internal (vlib_main_t * vm,
716                                         u32 sw_if_index,
717                                         ip4_address_t * address,
718                                         u32 address_length,
719                                         u32 is_del)
720 {
721   vnet_main_t * vnm = vnet_get_main();
722   ip4_main_t * im = &ip4_main;
723   ip_lookup_main_t * lm = &im->lookup_main;
724   clib_error_t * error = 0;
725   u32 if_address_index, elts_before;
726   ip4_address_fib_t ip4_af, * addr_fib = 0;
727
728   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
729   ip4_addr_fib_init (&ip4_af, address,
730                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
731   vec_add1 (addr_fib, ip4_af);
732
733   /* FIXME-LATER
734    * there is no support for adj-fib handling in the presence of overlapping
735    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
736    * most routers do.
737    */
738   if (! is_del)
739     {
740       /* When adding an address check that it does not conflict
741          with an existing address. */
742       ip_interface_address_t * ia;
743       foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
744                                     0 /* honor unnumbered */,
745       ({
746         ip4_address_t * x = ip_interface_address_get_address (&im->lookup_main, ia);
747
748         if (ip4_destination_matches_route (im, address, x, ia->address_length)
749             || ip4_destination_matches_route (im, x, address, address_length))
750           return clib_error_create ("failed to add %U which conflicts with %U for interface %U",
751                                     format_ip4_address_and_length, address, address_length,
752                                     format_ip4_address_and_length, x, ia->address_length,
753                                     format_vnet_sw_if_index_name, vnm, sw_if_index);
754        }));
755     }
756
757   elts_before = pool_elts (lm->if_address_pool);
758
759   error = ip_interface_address_add_del
760     (lm,
761      sw_if_index,
762      addr_fib,
763      address_length,
764      is_del,
765      &if_address_index);
766   if (error)
767     goto done;
768
769   ip4_sw_interface_enable_disable(sw_if_index, !is_del);
770
771   if (is_del)
772       ip4_del_interface_routes (im, ip4_af.fib_index, address,
773                                 address_length);
774   else
775       ip4_add_interface_routes (sw_if_index,
776                                 im, ip4_af.fib_index,
777                                 pool_elt_at_index
778                                 (lm->if_address_pool, if_address_index));
779
780   /* If pool did not grow/shrink: add duplicate address. */
781   if (elts_before != pool_elts (lm->if_address_pool))
782     {
783       ip4_add_del_interface_address_callback_t * cb;
784       vec_foreach (cb, im->add_del_interface_address_callbacks)
785         cb->function (im, cb->function_opaque, sw_if_index,
786                       address, address_length,
787                       if_address_index,
788                       is_del);
789     }
790
791  done:
792   vec_free (addr_fib);
793   return error;
794 }
795
796 clib_error_t *
797 ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
798                                ip4_address_t * address, u32 address_length,
799                                u32 is_del)
800 {
801   return ip4_add_del_interface_address_internal
802     (vm, sw_if_index, address, address_length,
803      is_del);
804 }
805
806 /* Built-in ip4 unicast rx feature path definition */
807 VNET_IP4_UNICAST_FEATURE_INIT (ip4_flow_classify, static) = {
808   .node_name = "ip4-flow-classify",
809   .runs_before = ORDER_CONSTRAINTS {"ip4-inacl", 0},
810   .feature_index = &ip4_main.ip4_unicast_rx_feature_flow_classify,
811 };
812
813 VNET_IP4_UNICAST_FEATURE_INIT (ip4_inacl, static) = {
814   .node_name = "ip4-inacl",
815   .runs_before = ORDER_CONSTRAINTS {"ip4-source-check-via-rx", 0},
816   .feature_index = &ip4_main.ip4_unicast_rx_feature_check_access,
817 };
818
819 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_1, static) = {
820   .node_name = "ip4-source-check-via-rx",
821   .runs_before = ORDER_CONSTRAINTS {"ip4-source-check-via-any", 0},
822   .feature_index =
823   &ip4_main.ip4_unicast_rx_feature_source_reachable_via_rx,
824 };
825
826 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_2, static) = {
827   .node_name = "ip4-source-check-via-any",
828   .runs_before = ORDER_CONSTRAINTS {"ip4-policer-classify", 0},
829   .feature_index =
830   &ip4_main.ip4_unicast_rx_feature_source_reachable_via_any,
831 };
832
833 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) = {
834   .node_name = "ip4-source-and-port-range-check-rx",
835   .runs_before = ORDER_CONSTRAINTS {"ip4-policer-classify", 0},
836   .feature_index =
837   &ip4_main.ip4_unicast_rx_feature_source_and_port_range_check,
838 };
839
840 VNET_IP4_UNICAST_FEATURE_INIT (ip4_policer_classify, static) = {
841   .node_name = "ip4-policer-classify",
842   .runs_before = ORDER_CONSTRAINTS {"ipsec-input-ip4", 0},
843   .feature_index =
844   &ip4_main.ip4_unicast_rx_feature_policer_classify,
845 };
846
847 VNET_IP4_UNICAST_FEATURE_INIT (ip4_ipsec, static) = {
848   .node_name = "ipsec-input-ip4",
849   .runs_before = ORDER_CONSTRAINTS {"vpath-input-ip4", 0},
850   .feature_index = &ip4_main.ip4_unicast_rx_feature_ipsec,
851 };
852
853 VNET_IP4_UNICAST_FEATURE_INIT (ip4_vpath, static) = {
854   .node_name = "vpath-input-ip4",
855   .runs_before = ORDER_CONSTRAINTS {"ip4-lookup", 0},
856   .feature_index = &ip4_main.ip4_unicast_rx_feature_vpath,
857 };
858
859 VNET_IP4_UNICAST_FEATURE_INIT (ip4_lookup, static) = {
860   .node_name = "ip4-lookup",
861   .runs_before = ORDER_CONSTRAINTS {"ip4-drop", 0},
862   .feature_index = &ip4_main.ip4_unicast_rx_feature_lookup,
863 };
864
865 VNET_IP4_UNICAST_FEATURE_INIT (ip4_drop, static) = {
866   .node_name = "ip4-drop",
867   .runs_before = 0, /* not before any other features */
868   .feature_index = &ip4_main.ip4_unicast_rx_feature_drop,
869 };
870
871
872 /* Built-in ip4 multicast rx feature path definition */
873 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_vpath_mc, static) = {
874   .node_name = "vpath-input-ip4",
875   .runs_before = ORDER_CONSTRAINTS {"ip4-lookup-multicast", 0},
876   .feature_index = &ip4_main.ip4_multicast_rx_feature_vpath,
877 };
878
879 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_lookup_mc, static) = {
880   .node_name = "ip4-lookup-multicast",
881   .runs_before = ORDER_CONSTRAINTS {"ip4-drop", 0},
882   .feature_index = &ip4_main.ip4_multicast_rx_feature_lookup,
883 };
884
885 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_mc_drop, static) = {
886   .node_name = "ip4-drop",
887   .runs_before = 0, /* last feature */
888   .feature_index = &ip4_main.ip4_multicast_rx_feature_drop,
889 };
890
891 static char * rx_feature_start_nodes[] =
892   { "ip4-input", "ip4-input-no-checksum"};
893
894 static char * tx_feature_start_nodes[] =
895 {
896   "ip4-rewrite-transit",
897   "ip4-midchain",
898 };
899
900 /* Source and port-range check ip4 tx feature path definition */
901 VNET_IP4_TX_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) = {
902   .node_name = "ip4-source-and-port-range-check-tx",
903   .runs_before = ORDER_CONSTRAINTS {"interface-output", 0},
904   .feature_index =
905   &ip4_main.ip4_unicast_tx_feature_source_and_port_range_check,
906
907 };
908
909 /* Built-in ip4 tx feature path definition */
910 VNET_IP4_TX_FEATURE_INIT (interface_output, static) = {
911   .node_name = "interface-output",
912   .runs_before = 0, /* not before any other features */
913   .feature_index = &ip4_main.ip4_tx_feature_interface_output,
914 };
915
916 static clib_error_t *
917 ip4_feature_init (vlib_main_t * vm, ip4_main_t * im)
918 {
919   ip_lookup_main_t * lm = &im->lookup_main;
920   clib_error_t * error;
921   vnet_cast_t cast;
922   ip_config_main_t * cm;
923   vnet_config_main_t * vcm;
924   char **feature_start_nodes;
925   int feature_start_len;
926
927   for (cast = 0; cast < VNET_N_IP_FEAT; cast++)
928     {
929       cm = &lm->feature_config_mains[cast];
930       vcm = &cm->config_main;
931
932       if (cast < VNET_IP_TX_FEAT)
933         {
934           feature_start_nodes = rx_feature_start_nodes;
935           feature_start_len = ARRAY_LEN(rx_feature_start_nodes);
936         }
937       else
938         {
939           feature_start_nodes = tx_feature_start_nodes;
940           feature_start_len = ARRAY_LEN(tx_feature_start_nodes);
941         }
942
943       if ((error = vnet_feature_arc_init (vm, vcm,
944                                          feature_start_nodes,
945                                          feature_start_len,
946                                          im->next_feature[cast],
947                                          &im->feature_nodes[cast])))
948         return error;
949     }
950
951   return 0;
952 }
953
954 static clib_error_t *
955 ip4_sw_interface_add_del (vnet_main_t * vnm,
956                           u32 sw_if_index,
957                           u32 is_add)
958 {
959   vlib_main_t * vm = vnm->vlib_main;
960   ip4_main_t * im = &ip4_main;
961   ip_lookup_main_t * lm = &im->lookup_main;
962   u32 ci, cast;
963   u32 feature_index;
964
965   /* Fill in lookup tables with default table (0). */
966   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
967
968   for (cast = 0; cast < VNET_N_IP_FEAT; cast++)
969     {
970       ip_config_main_t * cm = &lm->feature_config_mains[cast];
971       vnet_config_main_t * vcm = &cm->config_main;
972
973       vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
974       ci = cm->config_index_by_sw_if_index[sw_if_index];
975
976       if (cast == VNET_IP_RX_UNICAST_FEAT)
977         feature_index = im->ip4_unicast_rx_feature_drop;
978       else if (cast == VNET_IP_RX_MULTICAST_FEAT)
979         feature_index = im->ip4_multicast_rx_feature_drop;
980       else
981         feature_index = im->ip4_tx_feature_interface_output;
982
983       if (is_add)
984         ci = vnet_config_add_feature (vm, vcm,
985                                       ci,
986                                       feature_index,
987                                       /* config data */ 0,
988                                       /* # bytes of config data */ 0);
989       else
990         {
991           ci = vnet_config_del_feature (vm, vcm, ci,
992                                         feature_index,
993                                         /* config data */ 0,
994                                         /* # bytes of config data */ 0);
995           if (vec_len(im->ip_enabled_by_sw_if_index) > sw_if_index)
996               im->ip_enabled_by_sw_if_index[sw_if_index] = 0;
997         }
998       cm->config_index_by_sw_if_index[sw_if_index] = ci;
999       /*
1000        * note: do not update the tx feature count here.
1001        */
1002     }
1003
1004   return /* no error */ 0;
1005 }
1006
1007 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1008
1009 /* Global IP4 main. */
1010 ip4_main_t ip4_main;
1011
1012 clib_error_t *
1013 ip4_lookup_init (vlib_main_t * vm)
1014 {
1015   ip4_main_t * im = &ip4_main;
1016   clib_error_t * error;
1017   uword i;
1018
1019   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1020     {
1021       u32 m;
1022
1023       if (i < 32)
1024         m = pow2_mask (i) << (32 - i);
1025       else
1026         m = ~0;
1027       im->fib_masks[i] = clib_host_to_net_u32 (m);
1028     }
1029
1030   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1031
1032   /* Create FIB with index 0 and table id of 0. */
1033   fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 0);
1034
1035   {
1036     pg_node_t * pn;
1037     pn = pg_get_node (ip4_lookup_node.index);
1038     pn->unformat_edit = unformat_pg_ip4_header;
1039   }
1040
1041   {
1042     ethernet_arp_header_t h;
1043
1044     memset (&h, 0, sizeof (h));
1045
1046     /* Set target ethernet address to all zeros. */
1047     memset (h.ip4_over_ethernet[1].ethernet, 0, sizeof (h.ip4_over_ethernet[1].ethernet));
1048
1049 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1050 #define _8(f,v) h.f = v;
1051     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1052     _16 (l3_type, ETHERNET_TYPE_IP4);
1053     _8 (n_l2_address_bytes, 6);
1054     _8 (n_l3_address_bytes, 4);
1055     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1056 #undef _16
1057 #undef _8
1058
1059     vlib_packet_template_init (vm,
1060                                &im->ip4_arp_request_packet_template,
1061                                /* data */ &h,
1062                                sizeof (h),
1063                                /* alloc chunk size */ 8,
1064                                "ip4 arp");
1065   }
1066
1067   error = ip4_feature_init (vm, im);
1068
1069   return error;
1070 }
1071
1072 VLIB_INIT_FUNCTION (ip4_lookup_init);
1073
1074 typedef struct {
1075   /* Adjacency taken. */
1076   u32 dpo_index;
1077   u32 flow_hash;
1078   u32 fib_index;
1079
1080   /* Packet data, possibly *after* rewrite. */
1081   u8 packet_data[64 - 1*sizeof(u32)];
1082 } ip4_forward_next_trace_t;
1083
1084 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args)
1085 {
1086   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1087   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1088   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1089   uword indent = format_get_indent (s);
1090   s = format (s, "%U%U",
1091               format_white_space, indent,
1092               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1093   return s;
1094 }
1095
1096 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args)
1097 {
1098   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1099   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1100   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1101   uword indent = format_get_indent (s);
1102
1103   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1104               t->fib_index, t->dpo_index, t->flow_hash);
1105   s = format (s, "\n%U%U",
1106               format_white_space, indent,
1107               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1108   return s;
1109 }
1110
1111 static u8 * format_ip4_rewrite_trace (u8 * s, va_list * args)
1112 {
1113   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1114   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1115   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1116   vnet_main_t * vnm = vnet_get_main();
1117   uword indent = format_get_indent (s);
1118
1119   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1120               t->fib_index, t->dpo_index, format_ip_adjacency,
1121               vnm, t->dpo_index, FORMAT_IP_ADJACENCY_NONE,
1122               t->flow_hash);
1123   s = format (s, "\n%U%U",
1124               format_white_space, indent,
1125               format_ip_adjacency_packet_data,
1126               vnm, t->dpo_index,
1127               t->packet_data, sizeof (t->packet_data));
1128   return s;
1129 }
1130
1131 /* Common trace function for all ip4-forward next nodes. */
1132 void
1133 ip4_forward_next_trace (vlib_main_t * vm,
1134                         vlib_node_runtime_t * node,
1135                         vlib_frame_t * frame,
1136                         vlib_rx_or_tx_t which_adj_index)
1137 {
1138   u32 * from, n_left;
1139   ip4_main_t * im = &ip4_main;
1140
1141   n_left = frame->n_vectors;
1142   from = vlib_frame_vector_args (frame);
1143
1144   while (n_left >= 4)
1145     {
1146       u32 bi0, bi1;
1147       vlib_buffer_t * b0, * b1;
1148       ip4_forward_next_trace_t * t0, * t1;
1149
1150       /* Prefetch next iteration. */
1151       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1152       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1153
1154       bi0 = from[0];
1155       bi1 = from[1];
1156
1157       b0 = vlib_get_buffer (vm, bi0);
1158       b1 = vlib_get_buffer (vm, bi1);
1159
1160       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1161         {
1162           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1163           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1164           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1165           t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1166               vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1167               vec_elt (im->fib_index_by_sw_if_index,
1168                        vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1169
1170           clib_memcpy (t0->packet_data,
1171                   vlib_buffer_get_current (b0),
1172                   sizeof (t0->packet_data));
1173         }
1174       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1175         {
1176           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1177           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1178           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1179           t1->fib_index = (vnet_buffer(b1)->sw_if_index[VLIB_TX] != (u32)~0) ?
1180               vnet_buffer(b1)->sw_if_index[VLIB_TX] :
1181               vec_elt (im->fib_index_by_sw_if_index,
1182                        vnet_buffer(b1)->sw_if_index[VLIB_RX]);
1183           clib_memcpy (t1->packet_data,
1184                   vlib_buffer_get_current (b1),
1185                   sizeof (t1->packet_data));
1186         }
1187       from += 2;
1188       n_left -= 2;
1189     }
1190
1191   while (n_left >= 1)
1192     {
1193       u32 bi0;
1194       vlib_buffer_t * b0;
1195       ip4_forward_next_trace_t * t0;
1196
1197       bi0 = from[0];
1198
1199       b0 = vlib_get_buffer (vm, bi0);
1200
1201       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1202         {
1203           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1204           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1205           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1206           t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1207               vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1208               vec_elt (im->fib_index_by_sw_if_index,
1209                        vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1210           clib_memcpy (t0->packet_data,
1211                   vlib_buffer_get_current (b0),
1212                   sizeof (t0->packet_data));
1213         }
1214       from += 1;
1215       n_left -= 1;
1216     }
1217 }
1218
1219 static uword
1220 ip4_drop_or_punt (vlib_main_t * vm,
1221                   vlib_node_runtime_t * node,
1222                   vlib_frame_t * frame,
1223                   ip4_error_t error_code)
1224 {
1225   u32 * buffers = vlib_frame_vector_args (frame);
1226   uword n_packets = frame->n_vectors;
1227
1228   vlib_error_drop_buffers (vm, node,
1229                            buffers,
1230                            /* stride */ 1,
1231                            n_packets,
1232                            /* next */ 0,
1233                            ip4_input_node.index,
1234                            error_code);
1235
1236   if (node->flags & VLIB_NODE_FLAG_TRACE)
1237     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1238
1239   return n_packets;
1240 }
1241
1242 static uword
1243 ip4_drop (vlib_main_t * vm,
1244           vlib_node_runtime_t * node,
1245           vlib_frame_t * frame)
1246 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP); }
1247
1248 static uword
1249 ip4_punt (vlib_main_t * vm,
1250           vlib_node_runtime_t * node,
1251           vlib_frame_t * frame)
1252 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); }
1253
1254 VLIB_REGISTER_NODE (ip4_drop_node,static) = {
1255   .function = ip4_drop,
1256   .name = "ip4-drop",
1257   .vector_size = sizeof (u32),
1258
1259   .format_trace = format_ip4_forward_next_trace,
1260
1261   .n_next_nodes = 1,
1262   .next_nodes = {
1263     [0] = "error-drop",
1264   },
1265 };
1266
1267 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop)
1268
1269 VLIB_REGISTER_NODE (ip4_punt_node,static) = {
1270   .function = ip4_punt,
1271   .name = "ip4-punt",
1272   .vector_size = sizeof (u32),
1273
1274   .format_trace = format_ip4_forward_next_trace,
1275
1276   .n_next_nodes = 1,
1277   .next_nodes = {
1278     [0] = "error-punt",
1279   },
1280 };
1281
1282 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt)
1283
1284 /* Compute TCP/UDP/ICMP4 checksum in software. */
1285 u16
1286 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1287                               ip4_header_t * ip0)
1288 {
1289   ip_csum_t sum0;
1290   u32 ip_header_length, payload_length_host_byte_order;
1291   u32 n_this_buffer, n_bytes_left;
1292   u16 sum16;
1293   void * data_this_buffer;
1294
1295   /* Initialize checksum with ip header. */
1296   ip_header_length = ip4_header_bytes (ip0);
1297   payload_length_host_byte_order = clib_net_to_host_u16 (ip0->length) - ip_header_length;
1298   sum0 = clib_host_to_net_u32 (payload_length_host_byte_order + (ip0->protocol << 16));
1299
1300   if (BITS (uword) == 32)
1301     {
1302       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u32));
1303       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32));
1304     }
1305   else
1306     sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1307
1308   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1309   data_this_buffer = (void *) ip0 + ip_header_length;
1310   if (n_this_buffer + ip_header_length > p0->current_length)
1311     n_this_buffer = p0->current_length > ip_header_length ? p0->current_length - ip_header_length : 0;
1312   while (1)
1313     {
1314       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1315       n_bytes_left -= n_this_buffer;
1316       if (n_bytes_left == 0)
1317         break;
1318
1319       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1320       p0 = vlib_get_buffer (vm, p0->next_buffer);
1321       data_this_buffer = vlib_buffer_get_current (p0);
1322       n_this_buffer = p0->current_length;
1323     }
1324
1325   sum16 = ~ ip_csum_fold (sum0);
1326
1327   return sum16;
1328 }
1329
1330 static u32
1331 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1332 {
1333   ip4_header_t * ip0 = vlib_buffer_get_current (p0);
1334   udp_header_t * udp0;
1335   u16 sum16;
1336
1337   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1338           || ip0->protocol == IP_PROTOCOL_UDP);
1339
1340   udp0 = (void *) (ip0 + 1);
1341   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1342     {
1343       p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1344                     | IP_BUFFER_L4_CHECKSUM_CORRECT);
1345       return p0->flags;
1346     }
1347
1348   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1349
1350   p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1351                 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1352
1353   return p0->flags;
1354 }
1355
1356 static uword
1357 ip4_local (vlib_main_t * vm,
1358            vlib_node_runtime_t * node,
1359            vlib_frame_t * frame)
1360 {
1361   ip4_main_t * im = &ip4_main;
1362   ip_lookup_main_t * lm = &im->lookup_main;
1363   ip_local_next_t next_index;
1364   u32 * from, * to_next, n_left_from, n_left_to_next;
1365   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
1366
1367   from = vlib_frame_vector_args (frame);
1368   n_left_from = frame->n_vectors;
1369   next_index = node->cached_next_index;
1370
1371   if (node->flags & VLIB_NODE_FLAG_TRACE)
1372     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1373
1374   while (n_left_from > 0)
1375     {
1376       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1377
1378       while (n_left_from >= 4 && n_left_to_next >= 2)
1379         {
1380           vlib_buffer_t * p0, * p1;
1381           ip4_header_t * ip0, * ip1;
1382           udp_header_t * udp0, * udp1;
1383           ip4_fib_mtrie_t * mtrie0, * mtrie1;
1384           ip4_fib_mtrie_leaf_t leaf0, leaf1;
1385           const dpo_id_t *dpo0, *dpo1;
1386           const load_balance_t *lb0, *lb1;
1387           u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, lbi0;
1388           u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, lbi1;
1389           i32 len_diff0, len_diff1;
1390           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1391           u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1392           u8 enqueue_code;
1393
1394           pi0 = to_next[0] = from[0];
1395           pi1 = to_next[1] = from[1];
1396           from += 2;
1397           n_left_from -= 2;
1398           to_next += 2;
1399           n_left_to_next -= 2;
1400
1401           p0 = vlib_get_buffer (vm, pi0);
1402           p1 = vlib_get_buffer (vm, pi1);
1403
1404           ip0 = vlib_buffer_get_current (p0);
1405           ip1 = vlib_buffer_get_current (p1);
1406
1407           fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
1408                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1409           fib_index1 = vec_elt (im->fib_index_by_sw_if_index,
1410                                 vnet_buffer(p1)->sw_if_index[VLIB_RX]);
1411
1412           mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1413           mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
1414
1415           leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
1416
1417           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1418           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
1419
1420           /* Treat IP frag packets as "experimental" protocol for now
1421              until support of IP frag reassembly is implemented */
1422           proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
1423           proto1 = ip4_is_fragment(ip1) ? 0xfe : ip1->protocol;
1424           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1425           is_udp1 = proto1 == IP_PROTOCOL_UDP;
1426           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1427           is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1428
1429           flags0 = p0->flags;
1430           flags1 = p1->flags;
1431
1432           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1433           good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1434
1435           udp0 = ip4_next_header (ip0);
1436           udp1 = ip4_next_header (ip1);
1437
1438           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1439           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1440           good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1441
1442           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1443           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
1444
1445           /* Verify UDP length. */
1446           ip_len0 = clib_net_to_host_u16 (ip0->length);
1447           ip_len1 = clib_net_to_host_u16 (ip1->length);
1448           udp_len0 = clib_net_to_host_u16 (udp0->length);
1449           udp_len1 = clib_net_to_host_u16 (udp1->length);
1450
1451           len_diff0 = ip_len0 - udp_len0;
1452           len_diff1 = ip_len1 - udp_len1;
1453
1454           len_diff0 = is_udp0 ? len_diff0 : 0;
1455           len_diff1 = is_udp1 ? len_diff1 : 0;
1456
1457           if (PREDICT_FALSE (! (is_tcp_udp0 & is_tcp_udp1
1458                                 & good_tcp_udp0 & good_tcp_udp1)))
1459             {
1460               if (is_tcp_udp0)
1461                 {
1462                   if (is_tcp_udp0
1463                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1464                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1465                   good_tcp_udp0 =
1466                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1467                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1468                 }
1469               if (is_tcp_udp1)
1470                 {
1471                   if (is_tcp_udp1
1472                       && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1473                     flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
1474                   good_tcp_udp1 =
1475                     (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1476                   good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1477                 }
1478             }
1479
1480           good_tcp_udp0 &= len_diff0 >= 0;
1481           good_tcp_udp1 &= len_diff1 >= 0;
1482
1483           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1484           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
1485
1486           error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1487
1488           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1489           error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
1490
1491           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1492           error0 = (is_tcp_udp0 && ! good_tcp_udp0
1493                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1494                     : error0);
1495           error1 = (is_tcp_udp1 && ! good_tcp_udp1
1496                     ? IP4_ERROR_TCP_CHECKSUM + is_udp1
1497                     : error1);
1498
1499           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1500           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
1501           leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1502           leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
1503
1504           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1505           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1506
1507           vnet_buffer (p1)->ip.adj_index[VLIB_RX] = lbi1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1508           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = lbi1;
1509
1510           lb0 = load_balance_get(lbi0);
1511           lb1 = load_balance_get(lbi1);
1512           dpo0 = load_balance_get_bucket_i(lb0, 0);
1513           dpo1 = load_balance_get_bucket_i(lb1, 0);
1514
1515           /*
1516            * Must have a route to source otherwise we drop the packet.
1517            * ip4 broadcasts are accepted, e.g. to make dhcp client work
1518            *
1519            * The checks are:
1520            *  - the source is a recieve => it's from us => bogus, do this
1521            *    first since it sets a different error code.
1522            *  - uRPF check for any route to source - accept if passes.
1523            *  - allow packets destined to the broadcast address from unknown sources
1524            */
1525           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1526                      dpo0->dpoi_type == DPO_RECEIVE) ?
1527                     IP4_ERROR_SPOOFED_LOCAL_PACKETS :
1528                     error0);
1529           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1530                      !fib_urpf_check_size(lb0->lb_urpf) &&
1531                      ip0->dst_address.as_u32 != 0xFFFFFFFF)
1532                     ? IP4_ERROR_SRC_LOOKUP_MISS
1533                     : error0);
1534           error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1535                      dpo1->dpoi_type == DPO_RECEIVE) ?
1536                     IP4_ERROR_SPOOFED_LOCAL_PACKETS :
1537                     error1);
1538           error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1539                      !fib_urpf_check_size(lb1->lb_urpf) &&
1540                      ip1->dst_address.as_u32 != 0xFFFFFFFF)
1541                     ? IP4_ERROR_SRC_LOOKUP_MISS
1542                     : error1);
1543
1544           next0 = lm->local_next_by_ip_protocol[proto0];
1545           next1 = lm->local_next_by_ip_protocol[proto1];
1546
1547           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1548           next1 = error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1549
1550           p0->error = error0 ? error_node->errors[error0] : 0;
1551           p1->error = error1 ? error_node->errors[error1] : 0;
1552
1553           enqueue_code = (next0 != next_index) + 2*(next1 != next_index);
1554
1555           if (PREDICT_FALSE (enqueue_code != 0))
1556             {
1557               switch (enqueue_code)
1558                 {
1559                 case 1:
1560                   /* A B A */
1561                   to_next[-2] = pi1;
1562                   to_next -= 1;
1563                   n_left_to_next += 1;
1564                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
1565                   break;
1566
1567                 case 2:
1568                   /* A A B */
1569                   to_next -= 1;
1570                   n_left_to_next += 1;
1571                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
1572                   break;
1573
1574                 case 3:
1575                   /* A B B or A B C */
1576                   to_next -= 2;
1577                   n_left_to_next += 2;
1578                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
1579                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
1580                   if (next0 == next1)
1581                     {
1582                       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1583                       next_index = next1;
1584                       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1585                     }
1586                   break;
1587                 }
1588             }
1589         }
1590
1591       while (n_left_from > 0 && n_left_to_next > 0)
1592         {
1593           vlib_buffer_t * p0;
1594           ip4_header_t * ip0;
1595           udp_header_t * udp0;
1596           ip4_fib_mtrie_t * mtrie0;
1597           ip4_fib_mtrie_leaf_t leaf0;
1598           u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, lbi0;
1599           i32 len_diff0;
1600           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1601           load_balance_t *lb0;
1602           const dpo_id_t *dpo0;
1603
1604           pi0 = to_next[0] = from[0];
1605           from += 1;
1606           n_left_from -= 1;
1607           to_next += 1;
1608           n_left_to_next -= 1;
1609
1610           p0 = vlib_get_buffer (vm, pi0);
1611
1612           ip0 = vlib_buffer_get_current (p0);
1613
1614           fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
1615                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1616
1617           mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1618
1619           leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
1620
1621           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1622
1623           /* Treat IP frag packets as "experimental" protocol for now
1624              until support of IP frag reassembly is implemented */
1625           proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
1626           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1627           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1628
1629           flags0 = p0->flags;
1630
1631           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1632
1633           udp0 = ip4_next_header (ip0);
1634
1635           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1636           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1637
1638           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1639
1640           /* Verify UDP length. */
1641           ip_len0 = clib_net_to_host_u16 (ip0->length);
1642           udp_len0 = clib_net_to_host_u16 (udp0->length);
1643
1644           len_diff0 = ip_len0 - udp_len0;
1645
1646           len_diff0 = is_udp0 ? len_diff0 : 0;
1647
1648           if (PREDICT_FALSE (! (is_tcp_udp0 & good_tcp_udp0)))
1649             {
1650               if (is_tcp_udp0)
1651                 {
1652                   if (is_tcp_udp0
1653                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1654                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1655                   good_tcp_udp0 =
1656                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1657                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1658                 }
1659             }
1660
1661           good_tcp_udp0 &= len_diff0 >= 0;
1662
1663           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1664
1665           error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1666
1667           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1668
1669           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1670           error0 = (is_tcp_udp0 && ! good_tcp_udp0
1671                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1672                     : error0);
1673
1674           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1675           leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1676
1677           lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1678           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1679
1680           lb0 = load_balance_get(lbi0);
1681           dpo0 = load_balance_get_bucket_i(lb0, 0);
1682
1683           vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
1684               vnet_buffer (p0)->ip.adj_index[VLIB_RX] =
1685                   dpo0->dpoi_index;
1686
1687           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1688                      dpo0->dpoi_type == DPO_RECEIVE) ?
1689                     IP4_ERROR_SPOOFED_LOCAL_PACKETS :
1690                     error0);
1691           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1692                      !fib_urpf_check_size(lb0->lb_urpf) &&
1693                      ip0->dst_address.as_u32 != 0xFFFFFFFF)
1694                     ? IP4_ERROR_SRC_LOOKUP_MISS
1695                     : error0);
1696
1697           next0 = lm->local_next_by_ip_protocol[proto0];
1698
1699           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1700
1701           p0->error = error0? error_node->errors[error0] : 0;
1702
1703           if (PREDICT_FALSE (next0 != next_index))
1704             {
1705               n_left_to_next += 1;
1706               vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1707
1708               next_index = next0;
1709               vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1710               to_next[0] = pi0;
1711               to_next += 1;
1712               n_left_to_next -= 1;
1713             }
1714         }
1715
1716       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1717     }
1718
1719   return frame->n_vectors;
1720 }
1721
1722 VLIB_REGISTER_NODE (ip4_local_node,static) = {
1723   .function = ip4_local,
1724   .name = "ip4-local",
1725   .vector_size = sizeof (u32),
1726
1727   .format_trace = format_ip4_forward_next_trace,
1728
1729   .n_next_nodes = IP_LOCAL_N_NEXT,
1730   .next_nodes = {
1731     [IP_LOCAL_NEXT_DROP] = "error-drop",
1732     [IP_LOCAL_NEXT_PUNT] = "error-punt",
1733     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1734     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1735   },
1736 };
1737
1738 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local)
1739
1740 void ip4_register_protocol (u32 protocol, u32 node_index)
1741 {
1742   vlib_main_t * vm = vlib_get_main();
1743   ip4_main_t * im = &ip4_main;
1744   ip_lookup_main_t * lm = &im->lookup_main;
1745
1746   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1747   lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip4_local_node.index, node_index);
1748 }
1749
1750 static clib_error_t *
1751 show_ip_local_command_fn (vlib_main_t * vm,
1752                           unformat_input_t * input,
1753                          vlib_cli_command_t * cmd)
1754 {
1755   ip4_main_t * im = &ip4_main;
1756   ip_lookup_main_t * lm = &im->lookup_main;
1757   int i;
1758
1759   vlib_cli_output (vm, "Protocols handled by ip4_local");
1760   for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
1761     {
1762       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1763         vlib_cli_output (vm, "%d", i);
1764     }
1765   return 0;
1766 }
1767
1768
1769
1770 /*?
1771  * Display the set of protocols handled by the local IPv4 stack.
1772  *
1773  * @cliexpar
1774  * Example of how to display local protocol table:
1775  * @cliexstart{show ip local}
1776  * Protocols handled by ip4_local
1777  * 1
1778  * 17
1779  * 47
1780  * @cliexend
1781 ?*/
1782 /* *INDENT-OFF* */
1783 VLIB_CLI_COMMAND (show_ip_local, static) = {
1784   .path = "show ip local",
1785   .function = show_ip_local_command_fn,
1786   .short_help = "show ip local",
1787 };
1788 /* *INDENT-ON* */
1789
1790 always_inline uword
1791 ip4_arp_inline (vlib_main_t * vm,
1792                 vlib_node_runtime_t * node,
1793                 vlib_frame_t * frame,
1794                 int is_glean)
1795 {
1796   vnet_main_t * vnm = vnet_get_main();
1797   ip4_main_t * im = &ip4_main;
1798   ip_lookup_main_t * lm = &im->lookup_main;
1799   u32 * from, * to_next_drop;
1800   uword n_left_from, n_left_to_next_drop, next_index;
1801   static f64 time_last_seed_change = -1e100;
1802   static u32 hash_seeds[3];
1803   static uword hash_bitmap[256 / BITS (uword)];
1804   f64 time_now;
1805
1806   if (node->flags & VLIB_NODE_FLAG_TRACE)
1807     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1808
1809   time_now = vlib_time_now (vm);
1810   if (time_now - time_last_seed_change > 1e-3)
1811     {
1812       uword i;
1813       u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
1814                                              sizeof (hash_seeds));
1815       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
1816         hash_seeds[i] = r[i];
1817
1818       /* Mark all hash keys as been no-seen before. */
1819       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
1820         hash_bitmap[i] = 0;
1821
1822       time_last_seed_change = time_now;
1823     }
1824
1825   from = vlib_frame_vector_args (frame);
1826   n_left_from = frame->n_vectors;
1827   next_index = node->cached_next_index;
1828   if (next_index == IP4_ARP_NEXT_DROP)
1829     next_index = IP4_ARP_N_NEXT; /* point to first interface */
1830
1831   while (n_left_from > 0)
1832     {
1833       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1834                            to_next_drop, n_left_to_next_drop);
1835
1836       while (n_left_from > 0 && n_left_to_next_drop > 0)
1837         {
1838           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
1839           ip_adjacency_t * adj0;
1840           vlib_buffer_t * p0;
1841           ip4_header_t * ip0;
1842           uword bm0;
1843
1844           pi0 = from[0];
1845
1846           p0 = vlib_get_buffer (vm, pi0);
1847
1848           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1849           adj0 = ip_get_adjacency (lm, adj_index0);
1850           ip0 = vlib_buffer_get_current (p0);
1851
1852           /*
1853            * this is the Glean case, so we are ARPing for the
1854            * packet's destination
1855            */
1856           a0 = hash_seeds[0];
1857           b0 = hash_seeds[1];
1858           c0 = hash_seeds[2];
1859
1860           sw_if_index0 = adj0->rewrite_header.sw_if_index;
1861           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1862
1863           if (is_glean)
1864           {
1865               a0 ^= ip0->dst_address.data_u32;
1866           }
1867           else
1868           {
1869               a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32;
1870           }
1871           b0 ^= sw_if_index0;
1872
1873           hash_v3_finalize32 (a0, b0, c0);
1874
1875           c0 &= BITS (hash_bitmap) - 1;
1876           c0 = c0 / BITS (uword);
1877           m0 = (uword) 1 << (c0 % BITS (uword));
1878
1879           bm0 = hash_bitmap[c0];
1880           drop0 = (bm0 & m0) != 0;
1881
1882           /* Mark it as seen. */
1883           hash_bitmap[c0] = bm0 | m0;
1884
1885           from += 1;
1886           n_left_from -= 1;
1887           to_next_drop[0] = pi0;
1888           to_next_drop += 1;
1889           n_left_to_next_drop -= 1;
1890
1891           p0->error = node->errors[drop0 ? IP4_ARP_ERROR_DROP : IP4_ARP_ERROR_REQUEST_SENT];
1892
1893           if (drop0)
1894             continue;
1895
1896           /*
1897            * Can happen if the control-plane is programming tables
1898            * with traffic flowing; at least that's today's lame excuse.
1899            */
1900           if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN) ||
1901               (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
1902           {
1903             p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
1904           }
1905           else
1906           /* Send ARP request. */
1907           {
1908             u32 bi0 = 0;
1909             vlib_buffer_t * b0;
1910             ethernet_arp_header_t * h0;
1911             vnet_hw_interface_t * hw_if0;
1912
1913             h0 = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi0);
1914
1915             /* Add rewrite/encap string for ARP packet. */
1916             vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
1917
1918             hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1919
1920             /* Src ethernet address in ARP header. */
1921             clib_memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address,
1922                     sizeof (h0->ip4_over_ethernet[0].ethernet));
1923
1924             if (is_glean)
1925             {
1926                 /* The interface's source address is stashed in the Glean Adj */
1927                 h0->ip4_over_ethernet[0].ip4 = adj0->sub_type.glean.receive_addr.ip4;
1928
1929                 /* Copy in destination address we are requesting. This is the
1930                 * glean case, so it's the packet's destination.*/
1931                 h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32;
1932             }
1933             else
1934             {
1935                 /* Src IP address in ARP header. */
1936                 if (ip4_src_address_for_packet(lm, sw_if_index0,
1937                                                &h0->ip4_over_ethernet[0].ip4))
1938                 {
1939                     /* No source address available */
1940                     p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
1941                     vlib_buffer_free(vm, &bi0, 1);
1942                     continue;
1943                 }
1944
1945                 /* Copy in destination address we are requesting from the
1946                    incomplete adj */
1947                 h0->ip4_over_ethernet[1].ip4.data_u32 =
1948                     adj0->sub_type.nbr.next_hop.ip4.as_u32;
1949             }
1950
1951             vlib_buffer_copy_trace_flag (vm, p0, bi0);
1952             b0 = vlib_get_buffer (vm, bi0);
1953             vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
1954
1955             vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
1956
1957             vlib_set_next_frame_buffer (vm, node, adj0->rewrite_header.next_index, bi0);
1958           }
1959         }
1960
1961       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
1962     }
1963
1964   return frame->n_vectors;
1965 }
1966
1967 static uword
1968 ip4_arp (vlib_main_t * vm,
1969          vlib_node_runtime_t * node,
1970          vlib_frame_t * frame)
1971 {
1972     return (ip4_arp_inline(vm, node, frame, 0));
1973 }
1974
1975 static uword
1976 ip4_glean (vlib_main_t * vm,
1977            vlib_node_runtime_t * node,
1978            vlib_frame_t * frame)
1979 {
1980     return (ip4_arp_inline(vm, node, frame, 1));
1981 }
1982
1983 static char * ip4_arp_error_strings[] = {
1984   [IP4_ARP_ERROR_DROP] = "address overflow drops",
1985   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
1986   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
1987   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
1988   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
1989   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
1990 };
1991
1992 VLIB_REGISTER_NODE (ip4_arp_node) = {
1993   .function = ip4_arp,
1994   .name = "ip4-arp",
1995   .vector_size = sizeof (u32),
1996
1997   .format_trace = format_ip4_forward_next_trace,
1998
1999   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2000   .error_strings = ip4_arp_error_strings,
2001
2002   .n_next_nodes = IP4_ARP_N_NEXT,
2003   .next_nodes = {
2004     [IP4_ARP_NEXT_DROP] = "error-drop",
2005   },
2006 };
2007
2008 VLIB_REGISTER_NODE (ip4_glean_node) = {
2009   .function = ip4_glean,
2010   .name = "ip4-glean",
2011   .vector_size = sizeof (u32),
2012
2013   .format_trace = format_ip4_forward_next_trace,
2014
2015   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2016   .error_strings = ip4_arp_error_strings,
2017
2018   .n_next_nodes = IP4_ARP_N_NEXT,
2019   .next_nodes = {
2020     [IP4_ARP_NEXT_DROP] = "error-drop",
2021   },
2022 };
2023
2024 #define foreach_notrace_ip4_arp_error           \
2025 _(DROP)                                         \
2026 _(REQUEST_SENT)                                 \
2027 _(REPLICATE_DROP)                               \
2028 _(REPLICATE_FAIL)
2029
2030 clib_error_t * arp_notrace_init (vlib_main_t * vm)
2031 {
2032   vlib_node_runtime_t *rt =
2033     vlib_node_get_runtime (vm, ip4_arp_node.index);
2034
2035   /* don't trace ARP request packets */
2036 #define _(a)                                    \
2037     vnet_pcap_drop_trace_filter_add_del         \
2038         (rt->errors[IP4_ARP_ERROR_##a],         \
2039          1 /* is_add */);
2040     foreach_notrace_ip4_arp_error;
2041 #undef _
2042   return 0;
2043 }
2044
2045 VLIB_INIT_FUNCTION(arp_notrace_init);
2046
2047
2048 /* Send an ARP request to see if given destination is reachable on given interface. */
2049 clib_error_t *
2050 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2051 {
2052   vnet_main_t * vnm = vnet_get_main();
2053   ip4_main_t * im = &ip4_main;
2054   ethernet_arp_header_t * h;
2055   ip4_address_t * src;
2056   ip_interface_address_t * ia;
2057   ip_adjacency_t * adj;
2058   vnet_hw_interface_t * hi;
2059   vnet_sw_interface_t * si;
2060   vlib_buffer_t * b;
2061   u32 bi = 0;
2062
2063   si = vnet_get_sw_interface (vnm, sw_if_index);
2064
2065   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2066     {
2067       return clib_error_return (0, "%U: interface %U down",
2068                                 format_ip4_address, dst,
2069                                 format_vnet_sw_if_index_name, vnm,
2070                                 sw_if_index);
2071     }
2072
2073   src = ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2074   if (! src)
2075     {
2076       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2077       return clib_error_return
2078         (0, "no matching interface address for destination %U (interface %U)",
2079          format_ip4_address, dst,
2080          format_vnet_sw_if_index_name, vnm, sw_if_index);
2081     }
2082
2083   adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2084
2085   h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi);
2086
2087   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2088
2089   clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
2090
2091   h->ip4_over_ethernet[0].ip4 = src[0];
2092   h->ip4_over_ethernet[1].ip4 = dst[0];
2093
2094   b = vlib_get_buffer (vm, bi);
2095   vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2096
2097   /* Add encapsulation string for software interface (e.g. ethernet header). */
2098   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2099   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2100
2101   {
2102     vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
2103     u32 * to_next = vlib_frame_vector_args (f);
2104     to_next[0] = bi;
2105     f->n_vectors = 1;
2106     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2107   }
2108
2109   return /* no error */ 0;
2110 }
2111
2112 typedef enum {
2113   IP4_REWRITE_NEXT_DROP,
2114   IP4_REWRITE_NEXT_ARP,
2115   IP4_REWRITE_NEXT_ICMP_ERROR,
2116 } ip4_rewrite_next_t;
2117
2118 always_inline uword
2119 ip4_rewrite_inline (vlib_main_t * vm,
2120                     vlib_node_runtime_t * node,
2121                     vlib_frame_t * frame,
2122                     int rewrite_for_locally_received_packets,
2123                     int is_midchain)
2124 {
2125   ip_lookup_main_t * lm = &ip4_main.lookup_main;
2126   u32 * from = vlib_frame_vector_args (frame);
2127   u32 n_left_from, n_left_to_next, * to_next, next_index;
2128   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
2129   vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
2130   ip_config_main_t * cm = &lm->feature_config_mains[VNET_IP_TX_FEAT];
2131
2132   n_left_from = frame->n_vectors;
2133   next_index = node->cached_next_index;
2134   u32 cpu_index = os_get_cpu_number();
2135
2136   while (n_left_from > 0)
2137     {
2138       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2139
2140       while (n_left_from >= 4 && n_left_to_next >= 2)
2141         {
2142           ip_adjacency_t * adj0, * adj1;
2143           vlib_buffer_t * p0, * p1;
2144           ip4_header_t * ip0, * ip1;
2145           u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2146           u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2147           u32 next0_override, next1_override;
2148           u32 tx_sw_if_index0, tx_sw_if_index1;
2149
2150           if (rewrite_for_locally_received_packets)
2151               next0_override = next1_override = 0;
2152
2153           /* Prefetch next iteration. */
2154           {
2155             vlib_buffer_t * p2, * p3;
2156
2157             p2 = vlib_get_buffer (vm, from[2]);
2158             p3 = vlib_get_buffer (vm, from[3]);
2159
2160             vlib_prefetch_buffer_header (p2, STORE);
2161             vlib_prefetch_buffer_header (p3, STORE);
2162
2163             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2164             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2165           }
2166
2167           pi0 = to_next[0] = from[0];
2168           pi1 = to_next[1] = from[1];
2169
2170           from += 2;
2171           n_left_from -= 2;
2172           to_next += 2;
2173           n_left_to_next -= 2;
2174
2175           p0 = vlib_get_buffer (vm, pi0);
2176           p1 = vlib_get_buffer (vm, pi1);
2177
2178           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2179           adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
2180
2181           /* We should never rewrite a pkt using the MISS adjacency */
2182           ASSERT(adj_index0 && adj_index1);
2183
2184           ip0 = vlib_buffer_get_current (p0);
2185           ip1 = vlib_buffer_get_current (p1);
2186
2187           error0 = error1 = IP4_ERROR_NONE;
2188           next0 = next1 = IP4_REWRITE_NEXT_DROP;
2189
2190           /* Decrement TTL & update checksum.
2191              Works either endian, so no need for byte swap. */
2192           if (! rewrite_for_locally_received_packets)
2193             {
2194               i32 ttl0 = ip0->ttl, ttl1 = ip1->ttl;
2195
2196               /* Input node should have reject packets with ttl 0. */
2197               ASSERT (ip0->ttl > 0);
2198               ASSERT (ip1->ttl > 0);
2199
2200               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2201               checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2202
2203               checksum0 += checksum0 >= 0xffff;
2204               checksum1 += checksum1 >= 0xffff;
2205
2206               ip0->checksum = checksum0;
2207               ip1->checksum = checksum1;
2208
2209               ttl0 -= 1;
2210               ttl1 -= 1;
2211
2212               ip0->ttl = ttl0;
2213               ip1->ttl = ttl1;
2214
2215               /*
2216                * If the ttl drops below 1 when forwarding, generate
2217                * an ICMP response.
2218                */
2219               if (PREDICT_FALSE(ttl0 <= 0))
2220                 {
2221                   error0 = IP4_ERROR_TIME_EXPIRED;
2222                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2223                   icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2224                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2225                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2226                 }
2227               if (PREDICT_FALSE(ttl1 <= 0))
2228                 {
2229                   error1 = IP4_ERROR_TIME_EXPIRED;
2230                   vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32)~0;
2231                   icmp4_error_set_vnet_buffer(p1, ICMP4_time_exceeded,
2232                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2233                   next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2234                 }
2235
2236               /* Verify checksum. */
2237               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2238               ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2239             }
2240
2241           /* Rewrite packet header and updates lengths. */
2242           adj0 = ip_get_adjacency (lm, adj_index0);
2243           adj1 = ip_get_adjacency (lm, adj_index1);
2244
2245           if (rewrite_for_locally_received_packets)
2246             {
2247               if (PREDICT_FALSE(adj0->lookup_next_index
2248                                 == IP_LOOKUP_NEXT_ARP))
2249                 next0_override = IP4_REWRITE_NEXT_ARP;
2250               if (PREDICT_FALSE(adj1->lookup_next_index
2251                                 == IP_LOOKUP_NEXT_ARP))
2252                 next1_override = IP4_REWRITE_NEXT_ARP;
2253             }
2254
2255           /* Worth pipelining. No guarantee that adj0,1 are hot... */
2256           rw_len0 = adj0[0].rewrite_header.data_bytes;
2257           rw_len1 = adj1[0].rewrite_header.data_bytes;
2258           vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
2259           vnet_buffer(p1)->ip.save_rewrite_length = rw_len1;
2260
2261           /* Check MTU of outgoing interface. */
2262           error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2263                     ? IP4_ERROR_MTU_EXCEEDED
2264                     : error0);
2265           error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
2266                     ? IP4_ERROR_MTU_EXCEEDED
2267                     : error1);
2268
2269           next0 = (error0 == IP4_ERROR_NONE)
2270             ? adj0[0].rewrite_header.next_index : next0;
2271
2272           if (rewrite_for_locally_received_packets)
2273               next0 = next0 && next0_override ? next0_override : next0;
2274
2275           next1 = (error1 == IP4_ERROR_NONE)
2276             ? adj1[0].rewrite_header.next_index : next1;
2277
2278           if (rewrite_for_locally_received_packets)
2279               next1 = next1 && next1_override ? next1_override : next1;
2280
2281           /*
2282            * We've already accounted for an ethernet_header_t elsewhere
2283            */
2284           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2285               vlib_increment_combined_counter
2286                   (&adjacency_counters,
2287                    cpu_index, adj_index0,
2288                    /* packet increment */ 0,
2289                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2290
2291           if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t)))
2292               vlib_increment_combined_counter
2293                   (&adjacency_counters,
2294                    cpu_index, adj_index1,
2295                    /* packet increment */ 0,
2296                    /* byte increment */ rw_len1-sizeof(ethernet_header_t));
2297
2298           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2299            * to see the IP headerr */
2300           if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2301             {
2302               p0->current_data -= rw_len0;
2303               p0->current_length += rw_len0;
2304               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2305               vnet_buffer (p0)->sw_if_index[VLIB_TX] =
2306                   tx_sw_if_index0;
2307
2308               if (PREDICT_FALSE
2309                   (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features,
2310                                     tx_sw_if_index0)))
2311                 {
2312                   p0->current_config_index =
2313                     vec_elt (cm->config_index_by_sw_if_index,
2314                              tx_sw_if_index0);
2315                   vnet_get_config_data (&cm->config_main,
2316                                         &p0->current_config_index,
2317                                         &next0,
2318                                         /* # bytes of config data */ 0);
2319                 }
2320             }
2321           if (PREDICT_TRUE(error1 == IP4_ERROR_NONE))
2322             {
2323               p1->current_data -= rw_len1;
2324               p1->current_length += rw_len1;
2325
2326               tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2327               vnet_buffer (p1)->sw_if_index[VLIB_TX] =
2328                   tx_sw_if_index1;
2329
2330               if (PREDICT_FALSE
2331                   (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features,
2332                                     tx_sw_if_index1)))
2333                 {
2334                   p1->current_config_index =
2335                     vec_elt (cm->config_index_by_sw_if_index,
2336                              tx_sw_if_index1);
2337                   vnet_get_config_data (&cm->config_main,
2338                                         &p1->current_config_index,
2339                                         &next1,
2340                                         /* # bytes of config data */ 0);
2341                 }
2342             }
2343
2344           /* Guess we are only writing on simple Ethernet header. */
2345           vnet_rewrite_two_headers (adj0[0], adj1[0],
2346                                     ip0, ip1,
2347                                     sizeof (ethernet_header_t));
2348
2349           if (is_midchain)
2350           {
2351               adj0->sub_type.midchain.fixup_func(vm, adj0, p0);
2352               adj1->sub_type.midchain.fixup_func(vm, adj1, p1);
2353           }
2354
2355           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2356                                            to_next, n_left_to_next,
2357                                            pi0, pi1, next0, next1);
2358         }
2359
2360       while (n_left_from > 0 && n_left_to_next > 0)
2361         {
2362           ip_adjacency_t * adj0;
2363           vlib_buffer_t * p0;
2364           ip4_header_t * ip0;
2365           u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2366           u32 next0_override;
2367           u32 tx_sw_if_index0;
2368
2369           if (rewrite_for_locally_received_packets)
2370               next0_override = 0;
2371
2372           pi0 = to_next[0] = from[0];
2373
2374           p0 = vlib_get_buffer (vm, pi0);
2375
2376           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2377
2378           /* We should never rewrite a pkt using the MISS adjacency */
2379           ASSERT(adj_index0);
2380
2381           adj0 = ip_get_adjacency (lm, adj_index0);
2382
2383           ip0 = vlib_buffer_get_current (p0);
2384
2385           error0 = IP4_ERROR_NONE;
2386           next0 = IP4_REWRITE_NEXT_DROP;            /* drop on error */
2387
2388           /* Decrement TTL & update checksum. */
2389           if (! rewrite_for_locally_received_packets)
2390             {
2391               i32 ttl0 = ip0->ttl;
2392
2393               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2394
2395               checksum0 += checksum0 >= 0xffff;
2396
2397               ip0->checksum = checksum0;
2398
2399               ASSERT (ip0->ttl > 0);
2400
2401               ttl0 -= 1;
2402
2403               ip0->ttl = ttl0;
2404
2405               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2406
2407               if (PREDICT_FALSE(ttl0 <= 0))
2408                 {
2409                   /*
2410                    * If the ttl drops below 1 when forwarding, generate
2411                    * an ICMP response.
2412                    */
2413                   error0 = IP4_ERROR_TIME_EXPIRED;
2414                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2415                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2416                   icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2417                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2418                 }
2419             }
2420
2421           if (rewrite_for_locally_received_packets)
2422             {
2423               /*
2424                * We have to override the next_index in ARP adjacencies,
2425                * because they're set up for ip4-arp, not this node...
2426                */
2427               if (PREDICT_FALSE(adj0->lookup_next_index
2428                                 == IP_LOOKUP_NEXT_ARP))
2429                 next0_override = IP4_REWRITE_NEXT_ARP;
2430             }
2431
2432           /* Guess we are only writing on simple Ethernet header. */
2433           vnet_rewrite_one_header (adj0[0], ip0,
2434                                    sizeof (ethernet_header_t));
2435
2436           /* Update packet buffer attributes/set output interface. */
2437           rw_len0 = adj0[0].rewrite_header.data_bytes;
2438           vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
2439
2440           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2441               vlib_increment_combined_counter
2442                   (&adjacency_counters,
2443                    cpu_index, adj_index0,
2444                    /* packet increment */ 0,
2445                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2446
2447           /* Check MTU of outgoing interface. */
2448           error0 = (vlib_buffer_length_in_chain (vm, p0)
2449                     > adj0[0].rewrite_header.max_l3_packet_bytes
2450                     ? IP4_ERROR_MTU_EXCEEDED
2451                     : error0);
2452
2453           p0->error = error_node->errors[error0];
2454
2455           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2456            * to see the IP headerr */
2457           if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2458             {
2459               p0->current_data -= rw_len0;
2460               p0->current_length += rw_len0;
2461               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2462
2463               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2464               next0 = adj0[0].rewrite_header.next_index;
2465
2466               if (is_midchain)
2467                 {
2468                   adj0->sub_type.midchain.fixup_func(vm, adj0, p0);
2469                 }
2470
2471               if (PREDICT_FALSE
2472                   (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features,
2473                                     tx_sw_if_index0)))
2474                   {
2475                     p0->current_config_index =
2476                       vec_elt (cm->config_index_by_sw_if_index,
2477                                tx_sw_if_index0);
2478                     vnet_get_config_data (&cm->config_main,
2479                                           &p0->current_config_index,
2480                                           &next0,
2481                                           /* # bytes of config data */ 0);
2482                   }
2483             }
2484
2485           if (rewrite_for_locally_received_packets)
2486               next0 = next0 && next0_override ? next0_override : next0;
2487
2488           from += 1;
2489           n_left_from -= 1;
2490           to_next += 1;
2491           n_left_to_next -= 1;
2492
2493           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2494                                            to_next, n_left_to_next,
2495                                            pi0, next0);
2496         }
2497
2498       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2499     }
2500
2501   /* Need to do trace after rewrites to pick up new packet data. */
2502   if (node->flags & VLIB_NODE_FLAG_TRACE)
2503     ip4_forward_next_trace (vm, node, frame, adj_rx_tx);
2504
2505   return frame->n_vectors;
2506 }
2507
2508
2509 /** @brief IPv4 transit rewrite node.
2510     @node ip4-rewrite-transit
2511
2512     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2513     header checksum, fetch the ip adjacency, check the outbound mtu,
2514     apply the adjacency rewrite, and send pkts to the adjacency
2515     rewrite header's rewrite_next_index.
2516
2517     @param vm vlib_main_t corresponding to the current thread
2518     @param node vlib_node_runtime_t
2519     @param frame vlib_frame_t whose contents should be dispatched
2520
2521     @par Graph mechanics: buffer metadata, next index usage
2522
2523     @em Uses:
2524     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2525         - the rewrite adjacency index
2526     - <code>adj->lookup_next_index</code>
2527         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2528           the packet will be dropped.
2529     - <code>adj->rewrite_header</code>
2530         - Rewrite string length, rewrite string, next_index
2531
2532     @em Sets:
2533     - <code>b->current_data, b->current_length</code>
2534         - Updated net of applying the rewrite string
2535
2536     <em>Next Indices:</em>
2537     - <code> adj->rewrite_header.next_index </code>
2538       or @c error-drop
2539 */
2540 static uword
2541 ip4_rewrite_transit (vlib_main_t * vm,
2542                      vlib_node_runtime_t * node,
2543                      vlib_frame_t * frame)
2544 {
2545   return ip4_rewrite_inline (vm, node, frame,
2546                              /* rewrite_for_locally_received_packets */ 0, 0);
2547 }
2548
2549 /** @brief IPv4 local rewrite node.
2550     @node ip4-rewrite-local
2551
2552     This is the IPv4 local rewrite node. Fetch the ip adjacency, check
2553     the outbound interface mtu, apply the adjacency rewrite, and send
2554     pkts to the adjacency rewrite header's rewrite_next_index. Deal
2555     with hemorrhoids of the form "some clown sends an icmp4 w/ src =
2556     dst = interface addr."
2557
2558     @param vm vlib_main_t corresponding to the current thread
2559     @param node vlib_node_runtime_t
2560     @param frame vlib_frame_t whose contents should be dispatched
2561
2562     @par Graph mechanics: buffer metadata, next index usage
2563
2564     @em Uses:
2565     - <code>vnet_buffer(b)->ip.adj_index[VLIB_RX]</code>
2566         - the rewrite adjacency index
2567     - <code>adj->lookup_next_index</code>
2568         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2569           the packet will be dropped.
2570     - <code>adj->rewrite_header</code>
2571         - Rewrite string length, rewrite string, next_index
2572
2573     @em Sets:
2574     - <code>b->current_data, b->current_length</code>
2575         - Updated net of applying the rewrite string
2576
2577     <em>Next Indices:</em>
2578     - <code> adj->rewrite_header.next_index </code>
2579       or @c error-drop
2580 */
2581
2582 static uword
2583 ip4_rewrite_local (vlib_main_t * vm,
2584                    vlib_node_runtime_t * node,
2585                    vlib_frame_t * frame)
2586 {
2587   return ip4_rewrite_inline (vm, node, frame,
2588                              /* rewrite_for_locally_received_packets */ 1, 0);
2589 }
2590
2591 static uword
2592 ip4_midchain (vlib_main_t * vm,
2593               vlib_node_runtime_t * node,
2594               vlib_frame_t * frame)
2595 {
2596   return ip4_rewrite_inline (vm, node, frame,
2597                              /* rewrite_for_locally_received_packets */ 0, 1);
2598 }
2599
2600 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2601   .function = ip4_rewrite_transit,
2602   .name = "ip4-rewrite-transit",
2603   .vector_size = sizeof (u32),
2604
2605   .format_trace = format_ip4_rewrite_trace,
2606
2607   .n_next_nodes = 3,
2608   .next_nodes = {
2609     [IP4_REWRITE_NEXT_DROP] = "error-drop",
2610     [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
2611     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2612   },
2613 };
2614
2615 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite_transit)
2616
2617 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2618   .function = ip4_midchain,
2619   .name = "ip4-midchain",
2620   .vector_size = sizeof (u32),
2621
2622   .format_trace = format_ip4_forward_next_trace,
2623
2624   .sibling_of = "ip4-rewrite-transit",
2625 };
2626
2627 VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain)
2628
2629 VLIB_REGISTER_NODE (ip4_rewrite_local_node) = {
2630   .function = ip4_rewrite_local,
2631   .name = "ip4-rewrite-local",
2632   .vector_size = sizeof (u32),
2633
2634   .sibling_of = "ip4-rewrite-transit",
2635
2636   .format_trace = format_ip4_rewrite_trace,
2637
2638   .n_next_nodes = 0,
2639 };
2640
2641 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_local_node, ip4_rewrite_local)
2642
2643 static clib_error_t *
2644 add_del_interface_table (vlib_main_t * vm,
2645                          unformat_input_t * input,
2646                          vlib_cli_command_t * cmd)
2647 {
2648   vnet_main_t * vnm = vnet_get_main();
2649   clib_error_t * error = 0;
2650   u32 sw_if_index, table_id;
2651
2652   sw_if_index = ~0;
2653
2654   if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2655     {
2656       error = clib_error_return (0, "unknown interface `%U'",
2657                                  format_unformat_error, input);
2658       goto done;
2659     }
2660
2661   if (unformat (input, "%d", &table_id))
2662     ;
2663   else
2664     {
2665       error = clib_error_return (0, "expected table id `%U'",
2666                                  format_unformat_error, input);
2667       goto done;
2668     }
2669
2670   {
2671     ip4_main_t * im = &ip4_main;
2672     u32 fib_index;
2673
2674     fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
2675                                                    table_id);
2676
2677     //
2678     // FIXME-LATER
2679     //  changing an interface's table has consequences for any connecteds
2680     //  and adj-fibs already installed.
2681     //
2682     vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
2683     im->fib_index_by_sw_if_index[sw_if_index] = fib_index;
2684   }
2685
2686  done:
2687   return error;
2688 }
2689
2690 /*?
2691  * Place the indicated interface into the supplied IPv4 FIB table (also known
2692  * as a VRF). If the FIB table does not exist, this command creates it. To
2693  * display the current IPv4 FIB table, use the command '<em>show ip fib</em>'.
2694  * FIB table will only be displayed if a route has been added to the table, or
2695  * an IP Address is assigned to an interface in the table (which adds a route
2696  * automatically), or '<em>include-empty</em>' is included.
2697  *
2698  * @note IP addresses added after setting the interface IP table end up in
2699  * the indicated FIB table. If the IP address is added prior to adding the
2700  * interface to the FIB table, it will NOT be part of the FIB table. Predictable
2701  * but potentially counter-intuitive results occur if you provision interface
2702  * addresses in multiple FIBs. Upon RX, packets will be processed in the last
2703  * IP table ID provisioned. It might be marginally useful to evade source RPF
2704  * drops to put an interface address into multiple FIBs.
2705  *
2706  * @cliexpar
2707  * Example of how to add an interface to an IPv4 FIB table (where 2 is the table-id):
2708  * @cliexcmd{set interface ip table GigabitEthernet2/0/0 2}
2709  ?*/
2710 /* *INDENT-OFF* */
2711 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
2712   .path = "set interface ip table",
2713   .function = add_del_interface_table,
2714   .short_help = "set interface ip table <interface> <table-id>",
2715 };
2716 /* *INDENT-ON* */
2717
2718
2719 static uword
2720 ip4_lookup_multicast (vlib_main_t * vm,
2721                       vlib_node_runtime_t * node,
2722                       vlib_frame_t * frame)
2723 {
2724   ip4_main_t * im = &ip4_main;
2725   vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
2726   u32 n_left_from, n_left_to_next, * from, * to_next;
2727   ip_lookup_next_t next;
2728   u32 cpu_index = os_get_cpu_number();
2729
2730   from = vlib_frame_vector_args (frame);
2731   n_left_from = frame->n_vectors;
2732   next = node->cached_next_index;
2733
2734   while (n_left_from > 0)
2735     {
2736       vlib_get_next_frame (vm, node, next,
2737                            to_next, n_left_to_next);
2738
2739       while (n_left_from >= 4 && n_left_to_next >= 2)
2740         {
2741           vlib_buffer_t * p0, * p1;
2742           u32 pi0, pi1, lb_index0, lb_index1, wrong_next;
2743           ip_lookup_next_t next0, next1;
2744           ip4_header_t * ip0, * ip1;
2745           u32 fib_index0, fib_index1;
2746           const dpo_id_t *dpo0, *dpo1;
2747           const load_balance_t * lb0, * lb1;
2748
2749           /* Prefetch next iteration. */
2750           {
2751             vlib_buffer_t * p2, * p3;
2752
2753             p2 = vlib_get_buffer (vm, from[2]);
2754             p3 = vlib_get_buffer (vm, from[3]);
2755
2756             vlib_prefetch_buffer_header (p2, LOAD);
2757             vlib_prefetch_buffer_header (p3, LOAD);
2758
2759             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
2760             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
2761           }
2762
2763           pi0 = to_next[0] = from[0];
2764           pi1 = to_next[1] = from[1];
2765
2766           p0 = vlib_get_buffer (vm, pi0);
2767           p1 = vlib_get_buffer (vm, pi1);
2768
2769           ip0 = vlib_buffer_get_current (p0);
2770           ip1 = vlib_buffer_get_current (p1);
2771
2772           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2773           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
2774           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2775             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2776           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
2777             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
2778
2779           lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0),
2780                                                &ip0->dst_address);
2781           lb_index1 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index1),
2782                                                &ip1->dst_address);
2783
2784           lb0 = load_balance_get (lb_index0);
2785           lb1 = load_balance_get (lb_index1);
2786
2787           ASSERT (lb0->lb_n_buckets > 0);
2788           ASSERT (is_pow2 (lb0->lb_n_buckets));
2789           ASSERT (lb1->lb_n_buckets > 0);
2790           ASSERT (is_pow2 (lb1->lb_n_buckets));
2791
2792           vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash
2793               (ip0, lb0->lb_hash_config);
2794
2795           vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash
2796               (ip1, lb1->lb_hash_config);
2797
2798           dpo0 = load_balance_get_bucket_i(lb0,
2799                                            (vnet_buffer (p0)->ip.flow_hash &
2800                                             (lb0->lb_n_buckets_minus_1)));
2801           dpo1 = load_balance_get_bucket_i(lb1,
2802                                            (vnet_buffer (p1)->ip.flow_hash &
2803                                             (lb0->lb_n_buckets_minus_1)));
2804
2805           next0 = dpo0->dpoi_next_node;
2806           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
2807           next1 = dpo1->dpoi_next_node;
2808           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
2809
2810           if (1) /* $$$$$$ HACK FIXME */
2811           vlib_increment_combined_counter
2812               (cm, cpu_index, lb_index0, 1,
2813                vlib_buffer_length_in_chain (vm, p0));
2814           if (1) /* $$$$$$ HACK FIXME */
2815           vlib_increment_combined_counter
2816               (cm, cpu_index, lb_index1, 1,
2817                vlib_buffer_length_in_chain (vm, p1));
2818
2819           from += 2;
2820           to_next += 2;
2821           n_left_to_next -= 2;
2822           n_left_from -= 2;
2823
2824           wrong_next = (next0 != next) + 2*(next1 != next);
2825           if (PREDICT_FALSE (wrong_next != 0))
2826             {
2827               switch (wrong_next)
2828                 {
2829                 case 1:
2830                   /* A B A */
2831                   to_next[-2] = pi1;
2832                   to_next -= 1;
2833                   n_left_to_next += 1;
2834                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2835                   break;
2836
2837                 case 2:
2838                   /* A A B */
2839                   to_next -= 1;
2840                   n_left_to_next += 1;
2841                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2842                   break;
2843
2844                 case 3:
2845                   /* A B C */
2846                   to_next -= 2;
2847                   n_left_to_next += 2;
2848                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2849                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2850                   if (next0 == next1)
2851                     {
2852                       /* A B B */
2853                       vlib_put_next_frame (vm, node, next, n_left_to_next);
2854                       next = next1;
2855                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
2856                     }
2857                 }
2858             }
2859         }
2860
2861       while (n_left_from > 0 && n_left_to_next > 0)
2862         {
2863           vlib_buffer_t * p0;
2864           ip4_header_t * ip0;
2865           u32 pi0, lb_index0;
2866           ip_lookup_next_t next0;
2867           u32 fib_index0;
2868           const dpo_id_t *dpo0;
2869           const load_balance_t * lb0;
2870
2871           pi0 = from[0];
2872           to_next[0] = pi0;
2873
2874           p0 = vlib_get_buffer (vm, pi0);
2875
2876           ip0 = vlib_buffer_get_current (p0);
2877
2878           fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
2879                                 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2880           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2881               fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2882
2883           lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0),
2884                                                &ip0->dst_address);
2885
2886           lb0 = load_balance_get (lb_index0);
2887
2888           ASSERT (lb0->lb_n_buckets > 0);
2889           ASSERT (is_pow2 (lb0->lb_n_buckets));
2890
2891           vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash
2892               (ip0, lb0->lb_hash_config);
2893
2894           dpo0 = load_balance_get_bucket_i(lb0,
2895                                            (vnet_buffer (p0)->ip.flow_hash &
2896                                             (lb0->lb_n_buckets_minus_1)));
2897
2898           next0 = dpo0->dpoi_next_node;
2899           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
2900
2901           if (1) /* $$$$$$ HACK FIXME */
2902               vlib_increment_combined_counter
2903                   (cm, cpu_index, lb_index0, 1,
2904                    vlib_buffer_length_in_chain (vm, p0));
2905
2906           from += 1;
2907           to_next += 1;
2908           n_left_to_next -= 1;
2909           n_left_from -= 1;
2910
2911           if (PREDICT_FALSE (next0 != next))
2912             {
2913               n_left_to_next += 1;
2914               vlib_put_next_frame (vm, node, next, n_left_to_next);
2915               next = next0;
2916               vlib_get_next_frame (vm, node, next,
2917                                    to_next, n_left_to_next);
2918               to_next[0] = pi0;
2919               to_next += 1;
2920               n_left_to_next -= 1;
2921             }
2922         }
2923
2924       vlib_put_next_frame (vm, node, next, n_left_to_next);
2925     }
2926
2927   if (node->flags & VLIB_NODE_FLAG_TRACE)
2928       ip4_forward_next_trace(vm, node, frame, VLIB_TX);
2929
2930   return frame->n_vectors;
2931 }
2932
2933 VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = {
2934   .function = ip4_lookup_multicast,
2935   .name = "ip4-lookup-multicast",
2936   .vector_size = sizeof (u32),
2937   .sibling_of = "ip4-lookup",
2938   .format_trace = format_ip4_lookup_trace,
2939
2940   .n_next_nodes = 0,
2941 };
2942
2943 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_multicast_node, ip4_lookup_multicast)
2944
2945 VLIB_REGISTER_NODE (ip4_multicast_node,static) = {
2946   .function = ip4_drop,
2947   .name = "ip4-multicast",
2948   .vector_size = sizeof (u32),
2949
2950   .format_trace = format_ip4_forward_next_trace,
2951
2952   .n_next_nodes = 1,
2953   .next_nodes = {
2954     [0] = "error-drop",
2955   },
2956 };
2957
2958 int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0)
2959 {
2960   ip4_fib_mtrie_t * mtrie0;
2961   ip4_fib_mtrie_leaf_t leaf0;
2962   u32 lbi0;
2963
2964   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2965
2966   leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
2967   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
2968   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
2969   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2970   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2971
2972   /* Handle default route. */
2973   leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
2974
2975   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2976
2977   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0), a);
2978 }
2979
2980 static clib_error_t *
2981 test_lookup_command_fn (vlib_main_t * vm,
2982                         unformat_input_t * input,
2983                         vlib_cli_command_t * cmd)
2984 {
2985   u32 table_id = 0;
2986   f64 count = 1;
2987   u32 n;
2988   int i;
2989   ip4_address_t ip4_base_address;
2990   u64 errors = 0;
2991
2992   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
2993       if (unformat (input, "table %d", &table_id))
2994         ;
2995       else if (unformat (input, "count %f", &count))
2996         ;
2997
2998       else if (unformat (input, "%U",
2999                          unformat_ip4_address, &ip4_base_address))
3000         ;
3001       else
3002         return clib_error_return (0, "unknown input `%U'",
3003                                   format_unformat_error, input);
3004   }
3005
3006   n = count;
3007
3008   for (i = 0; i < n; i++)
3009     {
3010       if (!ip4_lookup_validate (&ip4_base_address, table_id))
3011         errors++;
3012
3013       ip4_base_address.as_u32 =
3014         clib_host_to_net_u32 (1 +
3015                               clib_net_to_host_u32 (ip4_base_address.as_u32));
3016     }
3017
3018   if (errors)
3019     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
3020   else
3021     vlib_cli_output (vm, "No errors in %d lookups\n", n);
3022
3023   return 0;
3024 }
3025
3026 /*?
3027  * Perform a lookup of an IPv4 Address (or range of addresses) in the
3028  * given FIB table to determine if there is a conflict with the
3029  * adjacency table. The fib-id can be determined by using the
3030  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
3031  * of 0 is used.
3032  *
3033  * @todo This command uses fib-id, other commands use table-id (not
3034  * just a name, they are different indexes). Would like to change this
3035  * to table-id for consistency.
3036  *
3037  * @cliexpar
3038  * Example of how to run the test lookup command:
3039  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
3040  * No errors in 2 lookups
3041  * @cliexend
3042 ?*/
3043 /* *INDENT-OFF* */
3044 VLIB_CLI_COMMAND (lookup_test_command, static) = {
3045     .path = "test lookup",
3046     .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
3047     .function = test_lookup_command_fn,
3048 };
3049 /* *INDENT-ON* */
3050
3051 int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3052 {
3053   ip4_main_t * im4 = &ip4_main;
3054   ip4_fib_t * fib;
3055   uword * p = hash_get (im4->fib_index_by_table_id, table_id);
3056
3057   if (p == 0)
3058     return VNET_API_ERROR_NO_SUCH_FIB;
3059
3060   fib = ip4_fib_get (p[0]);
3061
3062   fib->flow_hash_config = flow_hash_config;
3063   return 0;
3064 }
3065
3066 static clib_error_t *
3067 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3068                              unformat_input_t * input,
3069                              vlib_cli_command_t * cmd)
3070 {
3071   int matched = 0;
3072   u32 table_id = 0;
3073   u32 flow_hash_config = 0;
3074   int rv;
3075
3076   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3077     if (unformat (input, "table %d", &table_id))
3078       matched = 1;
3079 #define _(a,v) \
3080     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3081     foreach_flow_hash_bit
3082 #undef _
3083     else break;
3084   }
3085
3086   if (matched == 0)
3087     return clib_error_return (0, "unknown input `%U'",
3088                               format_unformat_error, input);
3089
3090   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3091   switch (rv)
3092     {
3093     case 0:
3094       break;
3095
3096     case VNET_API_ERROR_NO_SUCH_FIB:
3097       return clib_error_return (0, "no such FIB table %d", table_id);
3098
3099     default:
3100       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3101       break;
3102     }
3103
3104   return 0;
3105 }
3106
3107 /*?
3108  * Configure the set of IPv4 fields used by the flow hash.
3109  *
3110  * @cliexpar
3111  * Example of how to set the flow hash on a given table:
3112  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
3113  * Example of display the configured flow hash:
3114  * @cliexstart{show ip fib}
3115  * Table 0, fib_index 0, flow hash: src dst sport dport proto
3116  *      Destination         Packets          Bytes         Adjacency
3117  * 172.16.2.0/24                      0               0 weight 1, index 5
3118  *                                                       172.16.2.1/24
3119  * 172.16.2.1/32                      0               0 weight 1, index 6
3120  *                                                       172.16.2.1/24
3121  * Table 7, fib_index 1, flow hash: dst sport dport proto
3122  *      Destination         Packets          Bytes         Adjacency
3123  * 172.16.1.0/24                      0               0 weight 1, index 3
3124  *                                                       172.16.1.1/24
3125  * 172.16.1.1/32                      1              98 weight 1, index 4
3126  *                                                       172.16.1.1/24
3127  * 172.16.1.2/32                      0               0 weight 1, index 7
3128  *                                                      GigabitEthernet2/0/0
3129  *                                                      IP4: 02:fe:6a:07:39:6f -> 16:d9:e0:91:79:86
3130  * @cliexend
3131 ?*/
3132 /* *INDENT-OFF* */
3133 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
3134   .path = "set ip flow-hash",
3135   .short_help =
3136   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
3137   .function = set_ip_flow_hash_command_fn,
3138 };
3139 /* *INDENT-ON* */
3140
3141 int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3142                                  u32 table_index)
3143 {
3144   vnet_main_t * vnm = vnet_get_main();
3145   vnet_interface_main_t * im = &vnm->interface_main;
3146   ip4_main_t * ipm = &ip4_main;
3147   ip_lookup_main_t * lm = &ipm->lookup_main;
3148   vnet_classify_main_t * cm = &vnet_classify_main;
3149   ip4_address_t *if_addr;
3150
3151   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3152     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3153
3154   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3155     return VNET_API_ERROR_NO_SUCH_ENTRY;
3156
3157   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3158   lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
3159
3160   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
3161
3162   if (NULL != if_addr)
3163   {
3164       fib_prefix_t pfx = {
3165           .fp_len = 32,
3166           .fp_proto = FIB_PROTOCOL_IP4,
3167           .fp_addr.ip4 = *if_addr,
3168       };
3169       u32 fib_index;
3170
3171       fib_index = fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
3172                                                       sw_if_index);
3173
3174
3175       if (table_index != (u32) ~0)
3176       {
3177           dpo_id_t dpo = DPO_NULL;
3178
3179           dpo_set(&dpo,
3180                   DPO_CLASSIFY,
3181                   DPO_PROTO_IP4,
3182                   classify_dpo_create(FIB_PROTOCOL_IP4,
3183                                       table_index));
3184
3185           fib_table_entry_special_dpo_add(fib_index,
3186                                           &pfx,
3187                                           FIB_SOURCE_CLASSIFY,
3188                                           FIB_ENTRY_FLAG_NONE,
3189                                           &dpo);
3190           dpo_reset(&dpo);
3191       }
3192       else
3193       {
3194           fib_table_entry_special_remove(fib_index,
3195                                          &pfx,
3196                                          FIB_SOURCE_CLASSIFY);
3197       }
3198   }
3199
3200   return 0;
3201 }
3202
3203 static clib_error_t *
3204 set_ip_classify_command_fn (vlib_main_t * vm,
3205                             unformat_input_t * input,
3206                             vlib_cli_command_t * cmd)
3207 {
3208   u32 table_index = ~0;
3209   int table_index_set = 0;
3210   u32 sw_if_index = ~0;
3211   int rv;
3212
3213   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3214     if (unformat (input, "table-index %d", &table_index))
3215       table_index_set = 1;
3216     else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3217                        vnet_get_main(), &sw_if_index))
3218       ;
3219     else
3220       break;
3221   }
3222
3223   if (table_index_set == 0)
3224     return clib_error_return (0, "classify table-index must be specified");
3225
3226   if (sw_if_index == ~0)
3227     return clib_error_return (0, "interface / subif must be specified");
3228
3229   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3230
3231   switch (rv)
3232     {
3233     case 0:
3234       break;
3235
3236     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3237       return clib_error_return (0, "No such interface");
3238
3239     case VNET_API_ERROR_NO_SUCH_ENTRY:
3240       return clib_error_return (0, "No such classifier table");
3241     }
3242   return 0;
3243 }
3244
3245 /*?
3246  * Assign a classification table to an interface. The classification
3247  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3248  * commands. Once the table is create, use this command to filter packets
3249  * on an interface.
3250  *
3251  * @cliexpar
3252  * Example of how to assign a classification table to an interface:
3253  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3254 ?*/
3255 /* *INDENT-OFF* */
3256 VLIB_CLI_COMMAND (set_ip_classify_command, static) = {
3257     .path = "set ip classify",
3258     .short_help =
3259     "set ip classify intfc <interface> table-index <classify-idx>",
3260     .function = set_ip_classify_command_fn,
3261 };
3262 /* *INDENT-ON* */
3263