Fix packet trace for IP4/IP6 lookup/forwarding nodes
[vpp.git] / vnet / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h>     /* for API error numbers */
47 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
48 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
49 #include <vnet/fib/ip4_fib.h>
50 #include <vnet/dpo/load_balance.h>
51 #include <vnet/dpo/classify_dpo.h>
52
53 void
54 ip4_forward_next_trace (vlib_main_t * vm,
55                         vlib_node_runtime_t * node,
56                         vlib_frame_t * frame,
57                         vlib_rx_or_tx_t which_adj_index);
58
59 always_inline uword
60 ip4_lookup_inline (vlib_main_t * vm,
61                    vlib_node_runtime_t * node,
62                    vlib_frame_t * frame,
63                    int lookup_for_responses_to_locally_received_packets)
64 {
65   ip4_main_t * im = &ip4_main;
66   vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
67   u32 n_left_from, n_left_to_next, * from, * to_next;
68   ip_lookup_next_t next;
69   u32 cpu_index = os_get_cpu_number();
70
71   from = vlib_frame_vector_args (frame);
72   n_left_from = frame->n_vectors;
73   next = node->cached_next_index;
74
75   while (n_left_from > 0)
76     {
77       vlib_get_next_frame (vm, node, next,
78                            to_next, n_left_to_next);
79
80       while (n_left_from >= 4 && n_left_to_next >= 2)
81         {
82           vlib_buffer_t * p0, * p1;
83           ip4_header_t * ip0, * ip1;
84           __attribute__((unused)) tcp_header_t * tcp0, * tcp1;
85           ip_lookup_next_t next0, next1;
86           const load_balance_t * lb0, * lb1;
87           ip4_fib_mtrie_t * mtrie0, * mtrie1;
88           ip4_fib_mtrie_leaf_t leaf0, leaf1;
89           ip4_address_t * dst_addr0, *dst_addr1;
90           __attribute__((unused)) u32 pi0, fib_index0, lb_index0, is_tcp_udp0;
91           __attribute__((unused)) u32 pi1, fib_index1, lb_index1, is_tcp_udp1;
92           flow_hash_config_t flow_hash_config0, flow_hash_config1;
93           u32 hash_c0, hash_c1;
94           u32 wrong_next;
95           const dpo_id_t *dpo0, *dpo1;
96
97           /* Prefetch next iteration. */
98           {
99             vlib_buffer_t * p2, * p3;
100
101             p2 = vlib_get_buffer (vm, from[2]);
102             p3 = vlib_get_buffer (vm, from[3]);
103
104             vlib_prefetch_buffer_header (p2, LOAD);
105             vlib_prefetch_buffer_header (p3, LOAD);
106
107             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
108             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
109           }
110
111           pi0 = to_next[0] = from[0];
112           pi1 = to_next[1] = from[1];
113
114           p0 = vlib_get_buffer (vm, pi0);
115           p1 = vlib_get_buffer (vm, pi1);
116
117           ip0 = vlib_buffer_get_current (p0);
118           ip1 = vlib_buffer_get_current (p1);
119
120           dst_addr0 = &ip0->dst_address;
121           dst_addr1 = &ip1->dst_address;
122
123           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
124           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
125           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
126             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
127           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
128             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
129
130
131           if (! lookup_for_responses_to_locally_received_packets)
132             {
133               mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
134               mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
135
136               leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
137
138               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
139               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
140             }
141
142           tcp0 = (void *) (ip0 + 1);
143           tcp1 = (void *) (ip1 + 1);
144
145           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
146                          || ip0->protocol == IP_PROTOCOL_UDP);
147           is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
148                          || ip1->protocol == IP_PROTOCOL_UDP);
149
150           if (! lookup_for_responses_to_locally_received_packets)
151             {
152               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
153               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
154             }
155
156           if (! lookup_for_responses_to_locally_received_packets)
157             {
158               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
159               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
160             }
161
162           if (! lookup_for_responses_to_locally_received_packets)
163             {
164               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
165               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
166             }
167
168           if (lookup_for_responses_to_locally_received_packets)
169             {
170               lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
171               lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
172             }
173           else
174             {
175               /* Handle default route. */
176               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
177               leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
178
179               lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
180               lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
181             }
182
183           lb0 = load_balance_get (lb_index0);
184           lb1 = load_balance_get (lb_index1);
185
186           /* Use flow hash to compute multipath adjacency. */
187           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
188           hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
189           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
190             {
191               flow_hash_config0 = lb0->lb_hash_config;
192               hash_c0 = vnet_buffer (p0)->ip.flow_hash =
193                 ip4_compute_flow_hash (ip0, flow_hash_config0);
194             }
195           if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
196             {
197               flow_hash_config1 = lb1->lb_hash_config;
198               hash_c1 = vnet_buffer (p1)->ip.flow_hash =
199                 ip4_compute_flow_hash (ip1, flow_hash_config1);
200             }
201
202           ASSERT (lb0->lb_n_buckets > 0);
203           ASSERT (is_pow2 (lb0->lb_n_buckets));
204           ASSERT (lb1->lb_n_buckets > 0);
205           ASSERT (is_pow2 (lb1->lb_n_buckets));
206
207           dpo0 = load_balance_get_bucket_i(lb0,
208                                            (hash_c0 &
209                                             (lb0->lb_n_buckets_minus_1)));
210           dpo1 = load_balance_get_bucket_i(lb1,
211                                            (hash_c1 &
212                                             (lb0->lb_n_buckets_minus_1)));
213
214           next0 = dpo0->dpoi_next_node;
215           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
216           next1 = dpo1->dpoi_next_node;
217           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
218
219           vlib_increment_combined_counter
220               (cm, cpu_index, lb_index0, 1,
221                vlib_buffer_length_in_chain (vm, p0)
222                + sizeof(ethernet_header_t));
223           vlib_increment_combined_counter
224               (cm, cpu_index, lb_index1, 1,
225                vlib_buffer_length_in_chain (vm, p1)
226                + sizeof(ethernet_header_t));
227
228           from += 2;
229           to_next += 2;
230           n_left_to_next -= 2;
231           n_left_from -= 2;
232
233           wrong_next = (next0 != next) + 2*(next1 != next);
234           if (PREDICT_FALSE (wrong_next != 0))
235             {
236               switch (wrong_next)
237                 {
238                 case 1:
239                   /* A B A */
240                   to_next[-2] = pi1;
241                   to_next -= 1;
242                   n_left_to_next += 1;
243                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
244                   break;
245
246                 case 2:
247                   /* A A B */
248                   to_next -= 1;
249                   n_left_to_next += 1;
250                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
251                   break;
252
253                 case 3:
254                   /* A B C */
255                   to_next -= 2;
256                   n_left_to_next += 2;
257                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
258                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
259                   if (next0 == next1)
260                     {
261                       /* A B B */
262                       vlib_put_next_frame (vm, node, next, n_left_to_next);
263                       next = next1;
264                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
265                     }
266                 }
267             }
268         }
269     
270       while (n_left_from > 0 && n_left_to_next > 0)
271         {
272           vlib_buffer_t * p0;
273           ip4_header_t * ip0;
274           __attribute__((unused)) tcp_header_t * tcp0;
275           ip_lookup_next_t next0;
276           const load_balance_t *lb0;
277           ip4_fib_mtrie_t * mtrie0;
278           ip4_fib_mtrie_leaf_t leaf0;
279           ip4_address_t * dst_addr0;
280           __attribute__((unused)) u32 pi0, fib_index0, is_tcp_udp0, lbi0;
281           flow_hash_config_t flow_hash_config0;
282           const dpo_id_t *dpo0;
283           u32 hash_c0;
284
285           pi0 = from[0];
286           to_next[0] = pi0;
287
288           p0 = vlib_get_buffer (vm, pi0);
289
290           ip0 = vlib_buffer_get_current (p0);
291
292           dst_addr0 = &ip0->dst_address;
293
294           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
295           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
296             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
297
298           if (! lookup_for_responses_to_locally_received_packets)
299             {
300               mtrie0 = &ip4_fib_get( fib_index0)->mtrie;
301
302               leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
303
304               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
305             }
306
307           tcp0 = (void *) (ip0 + 1);
308
309           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
310                          || ip0->protocol == IP_PROTOCOL_UDP);
311
312           if (! lookup_for_responses_to_locally_received_packets)
313             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
314
315           if (! lookup_for_responses_to_locally_received_packets)
316             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
317
318           if (! lookup_for_responses_to_locally_received_packets)
319             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
320
321           if (lookup_for_responses_to_locally_received_packets)
322             lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
323           else
324             {
325               /* Handle default route. */
326               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
327               lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
328             }
329
330           lb0 = load_balance_get (lbi0);
331
332           /* Use flow hash to compute multipath adjacency. */
333           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
334           if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
335             {
336               flow_hash_config0 = lb0->lb_hash_config;
337
338               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
339                 ip4_compute_flow_hash (ip0, flow_hash_config0);
340             }
341
342           ASSERT (lb0->lb_n_buckets > 0);
343           ASSERT (is_pow2 (lb0->lb_n_buckets));
344
345           dpo0 = load_balance_get_bucket_i(lb0,
346                                            (hash_c0 &
347                                             (lb0->lb_n_buckets_minus_1)));
348
349           next0 = dpo0->dpoi_next_node;
350           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
351
352           vlib_increment_combined_counter 
353               (cm, cpu_index, lbi0, 1,
354                vlib_buffer_length_in_chain (vm, p0));
355
356           from += 1;
357           to_next += 1;
358           n_left_to_next -= 1;
359           n_left_from -= 1;
360
361           if (PREDICT_FALSE (next0 != next))
362             {
363               n_left_to_next += 1;
364               vlib_put_next_frame (vm, node, next, n_left_to_next);
365               next = next0;
366               vlib_get_next_frame (vm, node, next,
367                                    to_next, n_left_to_next);
368               to_next[0] = pi0;
369               to_next += 1;
370               n_left_to_next -= 1;
371             }
372         }
373
374       vlib_put_next_frame (vm, node, next, n_left_to_next);
375     }
376
377   if (node->flags & VLIB_NODE_FLAG_TRACE)
378     ip4_forward_next_trace(vm, node, frame, VLIB_TX);
379
380   return frame->n_vectors;
381 }
382
383 /** @brief IPv4 lookup node.
384     @node ip4-lookup
385
386     This is the main IPv4 lookup dispatch node.
387
388     @param vm vlib_main_t corresponding to the current thread
389     @param node vlib_node_runtime_t
390     @param frame vlib_frame_t whose contents should be dispatched
391
392     @par Graph mechanics: buffer metadata, next index usage
393
394     @em Uses:
395     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
396         - Indicates the @c sw_if_index value of the interface that the
397           packet was received on.
398     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
399         - When the value is @c ~0 then the node performs a longest prefix
400           match (LPM) for the packet destination address in the FIB attached
401           to the receive interface.
402         - Otherwise perform LPM for the packet destination address in the
403           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
404           value (0, 1, ...) and not a VRF id.
405
406     @em Sets:
407     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
408         - The lookup result adjacency index.
409
410     <em>Next Index:</em>
411     - Dispatches the packet to the node index found in
412       ip_adjacency_t @c adj->lookup_next_index
413       (where @c adj is the lookup result adjacency).
414 */
415 static uword
416 ip4_lookup (vlib_main_t * vm,
417             vlib_node_runtime_t * node,
418             vlib_frame_t * frame)
419 {
420   return ip4_lookup_inline (vm, node, frame,
421                             /* lookup_for_responses_to_locally_received_packets */ 0);
422
423 }
424
425 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args);
426
427 VLIB_REGISTER_NODE (ip4_lookup_node) = {
428   .function = ip4_lookup,
429   .name = "ip4-lookup",
430   .vector_size = sizeof (u32),
431
432   .format_trace = format_ip4_lookup_trace,
433   .n_next_nodes = IP_LOOKUP_N_NEXT,
434   .next_nodes = IP4_LOOKUP_NEXT_NODES,
435 };
436
437 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup)
438
439 always_inline uword
440 ip4_load_balance (vlib_main_t * vm,
441                   vlib_node_runtime_t * node,
442                   vlib_frame_t * frame)
443 {
444   vlib_combined_counter_main_t * cm = &load_balance_main.lbm_via_counters;
445   u32 n_left_from, n_left_to_next, * from, * to_next;
446   ip_lookup_next_t next;
447   u32 cpu_index = os_get_cpu_number();
448
449   from = vlib_frame_vector_args (frame);
450   n_left_from = frame->n_vectors;
451   next = node->cached_next_index;
452
453   if (node->flags & VLIB_NODE_FLAG_TRACE)
454       ip4_forward_next_trace(vm, node, frame, VLIB_TX);
455
456   while (n_left_from > 0)
457     {
458       vlib_get_next_frame (vm, node, next,
459                            to_next, n_left_to_next);
460
461     
462       while (n_left_from > 0 && n_left_to_next > 0)
463         {
464           ip_lookup_next_t next0;
465           const load_balance_t *lb0;
466           vlib_buffer_t * p0;
467           u32 pi0, lbi0, hc0;
468           const ip4_header_t *ip0;
469           const dpo_id_t *dpo0;
470
471           pi0 = from[0];
472           to_next[0] = pi0;
473
474           p0 = vlib_get_buffer (vm, pi0);
475
476           ip0 = vlib_buffer_get_current (p0);
477           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
478
479           lb0 = load_balance_get(lbi0);
480           hc0 = lb0->lb_hash_config;
481           vnet_buffer(p0)->ip.flow_hash = ip4_compute_flow_hash(ip0, hc0);
482
483           dpo0 = load_balance_get_bucket_i(lb0, 
484                                            vnet_buffer(p0)->ip.flow_hash &
485                                            (lb0->lb_n_buckets_minus_1));
486
487           next0 = dpo0->dpoi_next_node;
488           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
489
490           vlib_increment_combined_counter 
491               (cm, cpu_index, lbi0, 1,
492                vlib_buffer_length_in_chain (vm, p0));
493
494           from += 1;
495           to_next += 1;
496           n_left_to_next -= 1;
497           n_left_from -= 1;
498
499           if (PREDICT_FALSE (next0 != next))
500             {
501               n_left_to_next += 1;
502               vlib_put_next_frame (vm, node, next, n_left_to_next);
503               next = next0;
504               vlib_get_next_frame (vm, node, next,
505                                    to_next, n_left_to_next);
506               to_next[0] = pi0;
507               to_next += 1;
508               n_left_to_next -= 1;
509             }
510         }
511
512       vlib_put_next_frame (vm, node, next, n_left_to_next);
513     }
514
515   return frame->n_vectors;
516 }
517
518 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args);
519
520 VLIB_REGISTER_NODE (ip4_load_balance_node) = {
521   .function = ip4_load_balance,
522   .name = "ip4-load-balance",
523   .vector_size = sizeof (u32),
524   .sibling_of = "ip4-lookup",
525
526   .format_trace = format_ip4_forward_next_trace,
527 };
528
529 VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance)
530
531 /* get first interface address */
532 ip4_address_t *
533 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
534                              ip_interface_address_t ** result_ia)
535 {
536   ip_lookup_main_t * lm = &im->lookup_main;
537   ip_interface_address_t * ia = 0;
538   ip4_address_t * result = 0;
539
540   foreach_ip_interface_address (lm, ia, sw_if_index, 
541                                 1 /* honor unnumbered */,
542   ({
543     ip4_address_t * a = ip_interface_address_get_address (lm, ia);
544     result = a;
545     break;
546   }));
547   if (result_ia)
548     *result_ia = result ? ia : 0;
549   return result;
550 }
551
552 static void
553 ip4_add_interface_routes (u32 sw_if_index,
554                           ip4_main_t * im, u32 fib_index,
555                           ip_interface_address_t * a)
556 {
557   ip_lookup_main_t * lm = &im->lookup_main;
558   ip4_address_t * address = ip_interface_address_get_address (lm, a);
559   fib_prefix_t pfx = {
560       .fp_len = a->address_length,
561       .fp_proto = FIB_PROTOCOL_IP4,
562       .fp_addr.ip4 = *address,
563   };
564
565   a->neighbor_probe_adj_index = ~0;
566
567   if (pfx.fp_len < 32)
568   {
569       fib_node_index_t fei;
570
571       fei = fib_table_entry_update_one_path(fib_index,
572                                             &pfx,
573                                             FIB_SOURCE_INTERFACE,
574                                             (FIB_ENTRY_FLAG_CONNECTED |
575                                              FIB_ENTRY_FLAG_ATTACHED),
576                                             FIB_PROTOCOL_IP4,
577                                             NULL, /* No next-hop address */
578                                             sw_if_index,
579                                             ~0, // invalid FIB index
580                                             1,
581                                             MPLS_LABEL_INVALID,
582                                             FIB_ROUTE_PATH_FLAG_NONE);
583       a->neighbor_probe_adj_index = fib_entry_get_adj(fei);
584   }
585
586   pfx.fp_len = 32;
587
588   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
589   {
590       u32 classify_table_index =
591           lm->classify_table_index_by_sw_if_index [sw_if_index];
592       if (classify_table_index != (u32) ~0)
593       {
594           dpo_id_t dpo = DPO_NULL;
595
596           dpo_set(&dpo,
597                   DPO_CLASSIFY,
598                   DPO_PROTO_IP4,
599                   classify_dpo_create(FIB_PROTOCOL_IP4,
600                                       classify_table_index));
601
602           fib_table_entry_special_dpo_add(fib_index,
603                                           &pfx,
604                                           FIB_SOURCE_CLASSIFY,
605                                           FIB_ENTRY_FLAG_NONE,
606                                           &dpo);
607           dpo_reset(&dpo);
608       }
609   }
610
611   fib_table_entry_update_one_path(fib_index,
612                                   &pfx,
613                                   FIB_SOURCE_INTERFACE,
614                                   (FIB_ENTRY_FLAG_CONNECTED |
615                                    FIB_ENTRY_FLAG_LOCAL),
616                                   FIB_PROTOCOL_IP4,
617                                   &pfx.fp_addr,
618                                   sw_if_index,
619                                   ~0, // invalid FIB index
620                                   1,
621                                   MPLS_LABEL_INVALID,
622                                   FIB_ROUTE_PATH_FLAG_NONE);
623 }
624
625 static void
626 ip4_del_interface_routes (ip4_main_t * im,
627                           u32 fib_index,
628                           ip4_address_t * address,
629                           u32 address_length)
630 {
631     fib_prefix_t pfx = {
632         .fp_len = address_length,
633         .fp_proto = FIB_PROTOCOL_IP4,
634         .fp_addr.ip4 = *address,
635     };
636
637     if (pfx.fp_len < 32)
638     {
639         fib_table_entry_delete(fib_index,
640                                &pfx,
641                                FIB_SOURCE_INTERFACE);
642     }
643
644     pfx.fp_len = 32;
645     fib_table_entry_delete(fib_index,
646                            &pfx,
647                            FIB_SOURCE_INTERFACE);
648 }
649
650 void
651 ip4_sw_interface_enable_disable (u32 sw_if_index,
652                                  u32 is_enable)
653 {
654   vlib_main_t * vm = vlib_get_main();
655   ip4_main_t * im = &ip4_main;
656   ip_lookup_main_t * lm = &im->lookup_main;
657   u32 ci, cast;
658   u32 lookup_feature_index;
659
660   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
661
662   /*
663    * enable/disable only on the 1<->0 transition
664    */
665   if (is_enable)
666     {
667       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
668         return;
669     }
670   else
671     {
672       ASSERT(im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
673       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
674         return;
675     }
676
677   for (cast = 0; cast <= VNET_IP_RX_MULTICAST_FEAT; cast++)
678     {
679       ip_config_main_t * cm = &lm->feature_config_mains[cast];
680       vnet_config_main_t * vcm = &cm->config_main;
681
682       vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
683       ci = cm->config_index_by_sw_if_index[sw_if_index];
684
685       if (cast == VNET_IP_RX_UNICAST_FEAT)
686         lookup_feature_index = im->ip4_unicast_rx_feature_lookup;
687       else
688         lookup_feature_index = im->ip4_multicast_rx_feature_lookup;
689
690       if (is_enable)
691         ci = vnet_config_add_feature (vm, vcm,
692                                       ci,
693                                       lookup_feature_index,
694                                       /* config data */ 0,
695                                       /* # bytes of config data */ 0);
696       else
697         ci = vnet_config_del_feature (vm, vcm,
698                                       ci,
699                                       lookup_feature_index,
700                                       /* config data */ 0,
701                                       /* # bytes of config data */ 0);
702       cm->config_index_by_sw_if_index[sw_if_index] = ci;
703     }
704 }
705
706 static clib_error_t *
707 ip4_add_del_interface_address_internal (vlib_main_t * vm,
708                                         u32 sw_if_index,
709                                         ip4_address_t * address,
710                                         u32 address_length,
711                                         u32 is_del)
712 {
713   vnet_main_t * vnm = vnet_get_main();
714   ip4_main_t * im = &ip4_main;
715   ip_lookup_main_t * lm = &im->lookup_main;
716   clib_error_t * error = 0;
717   u32 if_address_index, elts_before;
718   ip4_address_fib_t ip4_af, * addr_fib = 0;
719
720   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
721   ip4_addr_fib_init (&ip4_af, address,
722                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
723   vec_add1 (addr_fib, ip4_af);
724
725   /* FIXME-LATER
726    * there is no support for adj-fib handling in the presence of overlapping
727    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
728    * most routers do.
729    */
730   if (! is_del)
731     {
732       /* When adding an address check that it does not conflict
733          with an existing address. */
734       ip_interface_address_t * ia;
735       foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
736                                     0 /* honor unnumbered */,
737       ({
738         ip4_address_t * x = ip_interface_address_get_address (&im->lookup_main, ia);
739
740         if (ip4_destination_matches_route (im, address, x, ia->address_length)
741             || ip4_destination_matches_route (im, x, address, address_length))
742           return clib_error_create ("failed to add %U which conflicts with %U for interface %U",
743                                     format_ip4_address_and_length, address, address_length,
744                                     format_ip4_address_and_length, x, ia->address_length,
745                                     format_vnet_sw_if_index_name, vnm, sw_if_index);
746        }));
747     }
748
749   elts_before = pool_elts (lm->if_address_pool);
750
751   error = ip_interface_address_add_del
752     (lm,
753      sw_if_index,
754      addr_fib,
755      address_length,
756      is_del,
757      &if_address_index);
758   if (error)
759     goto done;
760   
761   ip4_sw_interface_enable_disable(sw_if_index, !is_del);
762
763   if (is_del)
764       ip4_del_interface_routes (im, ip4_af.fib_index, address,
765                                 address_length);
766   else
767       ip4_add_interface_routes (sw_if_index,
768                                 im, ip4_af.fib_index,
769                                 pool_elt_at_index 
770                                 (lm->if_address_pool, if_address_index));
771
772   /* If pool did not grow/shrink: add duplicate address. */
773   if (elts_before != pool_elts (lm->if_address_pool))
774     {
775       ip4_add_del_interface_address_callback_t * cb;
776       vec_foreach (cb, im->add_del_interface_address_callbacks)
777         cb->function (im, cb->function_opaque, sw_if_index,
778                       address, address_length,
779                       if_address_index,
780                       is_del);
781     }
782
783  done:
784   vec_free (addr_fib);
785   return error;
786 }
787
788 clib_error_t *
789 ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
790                                ip4_address_t * address, u32 address_length,
791                                u32 is_del)
792 {
793   return ip4_add_del_interface_address_internal
794     (vm, sw_if_index, address, address_length,
795      is_del);
796 }
797
798 /* Built-in ip4 unicast rx feature path definition */
799 VNET_IP4_UNICAST_FEATURE_INIT (ip4_inacl, static) = {
800   .node_name = "ip4-inacl", 
801   .runs_before = ORDER_CONSTRAINTS {"ip4-source-check-via-rx", 0},
802   .feature_index = &ip4_main.ip4_unicast_rx_feature_check_access,
803 };
804
805 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_1, static) = {
806   .node_name = "ip4-source-check-via-rx",
807   .runs_before = ORDER_CONSTRAINTS {"ip4-source-check-via-any", 0},
808   .feature_index = 
809   &ip4_main.ip4_unicast_rx_feature_source_reachable_via_rx,
810 };
811
812 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_2, static) = {
813   .node_name = "ip4-source-check-via-any",
814   .runs_before = ORDER_CONSTRAINTS {"ip4-policer-classify", 0},
815   .feature_index = 
816   &ip4_main.ip4_unicast_rx_feature_source_reachable_via_any,
817 };
818
819 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) = {
820   .node_name = "ip4-source-and-port-range-check-rx",
821   .runs_before = ORDER_CONSTRAINTS {"ip4-policer-classify", 0},
822   .feature_index =
823   &ip4_main.ip4_unicast_rx_feature_source_and_port_range_check,
824 };
825
826 VNET_IP4_UNICAST_FEATURE_INIT (ip4_policer_classify, static) = {
827   .node_name = "ip4-policer-classify",
828   .runs_before = ORDER_CONSTRAINTS {"ipsec-input-ip4", 0},
829   .feature_index =
830   &ip4_main.ip4_unicast_rx_feature_policer_classify,
831 };
832
833 VNET_IP4_UNICAST_FEATURE_INIT (ip4_ipsec, static) = {
834   .node_name = "ipsec-input-ip4",
835   .runs_before = ORDER_CONSTRAINTS {"vpath-input-ip4", 0},
836   .feature_index = &ip4_main.ip4_unicast_rx_feature_ipsec,
837 };
838
839 VNET_IP4_UNICAST_FEATURE_INIT (ip4_vpath, static) = {
840   .node_name = "vpath-input-ip4",
841   .runs_before = ORDER_CONSTRAINTS {"ip4-lookup", 0},
842   .feature_index = &ip4_main.ip4_unicast_rx_feature_vpath,
843 };
844
845 VNET_IP4_UNICAST_FEATURE_INIT (ip4_lookup, static) = {
846   .node_name = "ip4-lookup",
847   .runs_before = ORDER_CONSTRAINTS {"ip4-drop", 0},
848   .feature_index = &ip4_main.ip4_unicast_rx_feature_lookup,
849 };
850
851 VNET_IP4_UNICAST_FEATURE_INIT (ip4_drop, static) = {
852   .node_name = "ip4-drop",
853   .runs_before = 0, /* not before any other features */
854   .feature_index = &ip4_main.ip4_unicast_rx_feature_drop,
855 };
856
857
858 /* Built-in ip4 multicast rx feature path definition */
859 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_vpath_mc, static) = {
860   .node_name = "vpath-input-ip4",
861   .runs_before = ORDER_CONSTRAINTS {"ip4-lookup-multicast", 0},
862   .feature_index = &ip4_main.ip4_multicast_rx_feature_vpath,
863 };
864
865 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_lookup_mc, static) = {
866   .node_name = "ip4-lookup-multicast",
867   .runs_before = ORDER_CONSTRAINTS {"ip4-drop", 0},
868   .feature_index = &ip4_main.ip4_multicast_rx_feature_lookup,
869 };
870
871 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_mc_drop, static) = {
872   .node_name = "ip4-drop",
873   .runs_before = 0, /* last feature */
874   .feature_index = &ip4_main.ip4_multicast_rx_feature_drop,
875 };
876
877 static char * rx_feature_start_nodes[] = 
878   { "ip4-input", "ip4-input-no-checksum"};
879
880 static char * tx_feature_start_nodes[] = 
881 { "ip4-rewrite-transit"};
882
883 /* Source and port-range check ip4 tx feature path definition */
884 VNET_IP4_TX_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) = {
885   .node_name = "ip4-source-and-port-range-check-tx",
886   .runs_before = ORDER_CONSTRAINTS {"interface-output", 0},
887   .feature_index =
888   &ip4_main.ip4_unicast_tx_feature_source_and_port_range_check,
889
890 };
891
892 /* Built-in ip4 tx feature path definition */
893 VNET_IP4_TX_FEATURE_INIT (interface_output, static) = {
894   .node_name = "interface-output",
895   .runs_before = 0, /* not before any other features */
896   .feature_index = &ip4_main.ip4_tx_feature_interface_output,
897 };
898
899 static clib_error_t *
900 ip4_feature_init (vlib_main_t * vm, ip4_main_t * im)
901 {
902   ip_lookup_main_t * lm = &im->lookup_main;
903   clib_error_t * error;
904   vnet_cast_t cast;
905   ip_config_main_t * cm;
906   vnet_config_main_t * vcm;
907   char **feature_start_nodes;
908   int feature_start_len;
909
910   for (cast = 0; cast < VNET_N_IP_FEAT; cast++)
911     {
912       cm = &lm->feature_config_mains[cast];
913       vcm = &cm->config_main;
914
915       if (cast < VNET_IP_TX_FEAT)
916         {
917           feature_start_nodes = rx_feature_start_nodes;
918           feature_start_len = ARRAY_LEN(rx_feature_start_nodes);
919         }
920       else
921         {
922           feature_start_nodes = tx_feature_start_nodes;
923           feature_start_len = ARRAY_LEN(tx_feature_start_nodes);
924         }
925       
926       if ((error = ip_feature_init_cast (vm, cm, vcm, 
927                                          feature_start_nodes,
928                                          feature_start_len,
929                                          cast,
930                                          VNET_L3_PACKET_TYPE_IP4)))
931         return error;
932     }
933
934   return 0;
935 }
936
937 static clib_error_t *
938 ip4_sw_interface_add_del (vnet_main_t * vnm,
939                           u32 sw_if_index,
940                           u32 is_add)
941 {
942   vlib_main_t * vm = vnm->vlib_main;
943   ip4_main_t * im = &ip4_main;
944   ip_lookup_main_t * lm = &im->lookup_main;
945   u32 ci, cast;
946   u32 feature_index;
947
948   /* Fill in lookup tables with default table (0). */
949   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
950
951   for (cast = 0; cast < VNET_N_IP_FEAT; cast++)
952     {
953       ip_config_main_t * cm = &lm->feature_config_mains[cast];
954       vnet_config_main_t * vcm = &cm->config_main;
955
956       vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
957       ci = cm->config_index_by_sw_if_index[sw_if_index];
958
959       if (cast == VNET_IP_RX_UNICAST_FEAT)
960         feature_index = im->ip4_unicast_rx_feature_drop;
961       else if (cast == VNET_IP_RX_MULTICAST_FEAT)
962         feature_index = im->ip4_multicast_rx_feature_drop;
963       else
964         feature_index = im->ip4_tx_feature_interface_output;
965
966       if (is_add)
967         ci = vnet_config_add_feature (vm, vcm, 
968                                       ci,
969                                       feature_index,
970                                       /* config data */ 0,
971                                       /* # bytes of config data */ 0);
972       else
973         {
974           ci = vnet_config_del_feature (vm, vcm, ci,
975                                         feature_index,
976                                         /* config data */ 0,
977                                         /* # bytes of config data */ 0);
978           if (vec_len(im->ip_enabled_by_sw_if_index) > sw_if_index)
979               im->ip_enabled_by_sw_if_index[sw_if_index] = 0;
980         }
981       cm->config_index_by_sw_if_index[sw_if_index] = ci;
982       /*
983        * note: do not update the tx feature count here.
984        */
985     }
986
987   return /* no error */ 0;
988 }
989
990 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
991
992 /* Global IP4 main. */
993 ip4_main_t ip4_main;
994
995 clib_error_t *
996 ip4_lookup_init (vlib_main_t * vm)
997 {
998   ip4_main_t * im = &ip4_main;
999   clib_error_t * error;
1000   uword i;
1001
1002   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1003     {
1004       u32 m;
1005
1006       if (i < 32)
1007         m = pow2_mask (i) << (32 - i);
1008       else 
1009         m = ~0;
1010       im->fib_masks[i] = clib_host_to_net_u32 (m);
1011     }
1012
1013   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1014
1015   /* Create FIB with index 0 and table id of 0. */
1016   fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 0);
1017
1018   {
1019     pg_node_t * pn;
1020     pn = pg_get_node (ip4_lookup_node.index);
1021     pn->unformat_edit = unformat_pg_ip4_header;
1022   }
1023
1024   {
1025     ethernet_arp_header_t h;
1026
1027     memset (&h, 0, sizeof (h));
1028
1029     /* Set target ethernet address to all zeros. */
1030     memset (h.ip4_over_ethernet[1].ethernet, 0, sizeof (h.ip4_over_ethernet[1].ethernet));
1031
1032 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1033 #define _8(f,v) h.f = v;
1034     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1035     _16 (l3_type, ETHERNET_TYPE_IP4);
1036     _8 (n_l2_address_bytes, 6);
1037     _8 (n_l3_address_bytes, 4);
1038     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1039 #undef _16
1040 #undef _8
1041
1042     vlib_packet_template_init (vm,
1043                                &im->ip4_arp_request_packet_template,
1044                                /* data */ &h,
1045                                sizeof (h),
1046                                /* alloc chunk size */ 8,
1047                                "ip4 arp");
1048   }
1049
1050   error = ip4_feature_init (vm, im);
1051
1052   return error;
1053 }
1054
1055 VLIB_INIT_FUNCTION (ip4_lookup_init);
1056
1057 typedef struct {
1058   /* Adjacency taken. */
1059   u32 adj_index;
1060   u32 flow_hash;
1061   u32 fib_index;
1062
1063   /* Packet data, possibly *after* rewrite. */
1064   u8 packet_data[64 - 1*sizeof(u32)];
1065 } ip4_forward_next_trace_t;
1066
1067 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args)
1068 {
1069   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1070   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1071   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1072   uword indent = format_get_indent (s);
1073   s = format (s, "%U%U",
1074               format_white_space, indent,
1075               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1076   return s;
1077 }
1078
1079 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args)
1080 {
1081   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1082   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1083   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1084   uword indent = format_get_indent (s);
1085
1086   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1087               t->fib_index, t->adj_index, t->flow_hash);
1088   s = format (s, "\n%U%U",
1089               format_white_space, indent,
1090               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1091   return s;
1092 }
1093
1094 static u8 * format_ip4_rewrite_trace (u8 * s, va_list * args)
1095 {
1096   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1097   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1098   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1099   vnet_main_t * vnm = vnet_get_main();
1100   uword indent = format_get_indent (s);
1101
1102   s = format (s, "tx_sw_if_index %d adj-idx %d : %U flow hash: 0x%08x",
1103               t->fib_index, t->adj_index, format_ip_adjacency,
1104               vnm, t->adj_index, FORMAT_IP_ADJACENCY_NONE,
1105               t->flow_hash);
1106   s = format (s, "\n%U%U",
1107               format_white_space, indent,
1108               format_ip_adjacency_packet_data,
1109               vnm, t->adj_index,
1110               t->packet_data, sizeof (t->packet_data));
1111   return s;
1112 }
1113
1114 /* Common trace function for all ip4-forward next nodes. */
1115 void
1116 ip4_forward_next_trace (vlib_main_t * vm,
1117                         vlib_node_runtime_t * node,
1118                         vlib_frame_t * frame,
1119                         vlib_rx_or_tx_t which_adj_index)
1120 {
1121   u32 * from, n_left;
1122   ip4_main_t * im = &ip4_main;
1123
1124   n_left = frame->n_vectors;
1125   from = vlib_frame_vector_args (frame);
1126   
1127   while (n_left >= 4)
1128     {
1129       u32 bi0, bi1;
1130       vlib_buffer_t * b0, * b1;
1131       ip4_forward_next_trace_t * t0, * t1;
1132
1133       /* Prefetch next iteration. */
1134       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1135       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1136
1137       bi0 = from[0];
1138       bi1 = from[1];
1139
1140       b0 = vlib_get_buffer (vm, bi0);
1141       b1 = vlib_get_buffer (vm, bi1);
1142
1143       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1144         {
1145           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1146           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1147           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1148           t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1149               vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1150               vec_elt (im->fib_index_by_sw_if_index,
1151                        vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1152
1153           clib_memcpy (t0->packet_data,
1154                   vlib_buffer_get_current (b0),
1155                   sizeof (t0->packet_data));
1156         }
1157       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1158         {
1159           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1160           t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1161           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1162           t1->fib_index = (vnet_buffer(b1)->sw_if_index[VLIB_TX] != (u32)~0) ?
1163               vnet_buffer(b1)->sw_if_index[VLIB_TX] :
1164               vec_elt (im->fib_index_by_sw_if_index,
1165                        vnet_buffer(b1)->sw_if_index[VLIB_RX]);
1166           clib_memcpy (t1->packet_data,
1167                   vlib_buffer_get_current (b1),
1168                   sizeof (t1->packet_data));
1169         }
1170       from += 2;
1171       n_left -= 2;
1172     }
1173
1174   while (n_left >= 1)
1175     {
1176       u32 bi0;
1177       vlib_buffer_t * b0;
1178       ip4_forward_next_trace_t * t0;
1179
1180       bi0 = from[0];
1181
1182       b0 = vlib_get_buffer (vm, bi0);
1183
1184       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1185         {
1186           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1187           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1188           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1189           t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1190               vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1191               vec_elt (im->fib_index_by_sw_if_index,
1192                        vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1193           clib_memcpy (t0->packet_data,
1194                   vlib_buffer_get_current (b0),
1195                   sizeof (t0->packet_data));
1196         }
1197       from += 1;
1198       n_left -= 1;
1199     }
1200 }
1201
1202 static uword
1203 ip4_drop_or_punt (vlib_main_t * vm,
1204                   vlib_node_runtime_t * node,
1205                   vlib_frame_t * frame,
1206                   ip4_error_t error_code)
1207 {
1208   u32 * buffers = vlib_frame_vector_args (frame);
1209   uword n_packets = frame->n_vectors;
1210
1211   vlib_error_drop_buffers (vm, node,
1212                            buffers,
1213                            /* stride */ 1,
1214                            n_packets,
1215                            /* next */ 0,
1216                            ip4_input_node.index,
1217                            error_code);
1218
1219   if (node->flags & VLIB_NODE_FLAG_TRACE)
1220     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1221
1222   return n_packets;
1223 }
1224
1225 static uword
1226 ip4_drop (vlib_main_t * vm,
1227           vlib_node_runtime_t * node,
1228           vlib_frame_t * frame)
1229 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP); }
1230
1231 static uword
1232 ip4_punt (vlib_main_t * vm,
1233           vlib_node_runtime_t * node,
1234           vlib_frame_t * frame)
1235 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); }
1236
1237 VLIB_REGISTER_NODE (ip4_drop_node,static) = {
1238   .function = ip4_drop,
1239   .name = "ip4-drop",
1240   .vector_size = sizeof (u32),
1241
1242   .format_trace = format_ip4_forward_next_trace,
1243
1244   .n_next_nodes = 1,
1245   .next_nodes = {
1246     [0] = "error-drop",
1247   },
1248 };
1249
1250 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop)
1251
1252 VLIB_REGISTER_NODE (ip4_punt_node,static) = {
1253   .function = ip4_punt,
1254   .name = "ip4-punt",
1255   .vector_size = sizeof (u32),
1256
1257   .format_trace = format_ip4_forward_next_trace,
1258
1259   .n_next_nodes = 1,
1260   .next_nodes = {
1261     [0] = "error-punt",
1262   },
1263 };
1264
1265 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt)
1266
1267 /* Compute TCP/UDP/ICMP4 checksum in software. */
1268 u16
1269 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1270                               ip4_header_t * ip0)
1271 {
1272   ip_csum_t sum0;
1273   u32 ip_header_length, payload_length_host_byte_order;
1274   u32 n_this_buffer, n_bytes_left;
1275   u16 sum16;
1276   void * data_this_buffer;
1277   
1278   /* Initialize checksum with ip header. */
1279   ip_header_length = ip4_header_bytes (ip0);
1280   payload_length_host_byte_order = clib_net_to_host_u16 (ip0->length) - ip_header_length;
1281   sum0 = clib_host_to_net_u32 (payload_length_host_byte_order + (ip0->protocol << 16));
1282
1283   if (BITS (uword) == 32)
1284     {
1285       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u32));
1286       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32));
1287     }
1288   else
1289     sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1290
1291   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1292   data_this_buffer = (void *) ip0 + ip_header_length;
1293   if (n_this_buffer + ip_header_length > p0->current_length)
1294     n_this_buffer = p0->current_length > ip_header_length ? p0->current_length - ip_header_length : 0;
1295   while (1)
1296     {
1297       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1298       n_bytes_left -= n_this_buffer;
1299       if (n_bytes_left == 0)
1300         break;
1301
1302       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1303       p0 = vlib_get_buffer (vm, p0->next_buffer);
1304       data_this_buffer = vlib_buffer_get_current (p0);
1305       n_this_buffer = p0->current_length;
1306     }
1307
1308   sum16 = ~ ip_csum_fold (sum0);
1309
1310   return sum16;
1311 }
1312
1313 static u32
1314 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1315 {
1316   ip4_header_t * ip0 = vlib_buffer_get_current (p0);
1317   udp_header_t * udp0;
1318   u16 sum16;
1319
1320   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1321           || ip0->protocol == IP_PROTOCOL_UDP);
1322
1323   udp0 = (void *) (ip0 + 1);
1324   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1325     {
1326       p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1327                     | IP_BUFFER_L4_CHECKSUM_CORRECT);
1328       return p0->flags;
1329     }
1330
1331   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1332
1333   p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1334                 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1335
1336   return p0->flags;
1337 }
1338
1339 static uword
1340 ip4_local (vlib_main_t * vm,
1341            vlib_node_runtime_t * node,
1342            vlib_frame_t * frame)
1343 {
1344   ip4_main_t * im = &ip4_main;
1345   ip_lookup_main_t * lm = &im->lookup_main;
1346   ip_local_next_t next_index;
1347   u32 * from, * to_next, n_left_from, n_left_to_next;
1348   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
1349
1350   from = vlib_frame_vector_args (frame);
1351   n_left_from = frame->n_vectors;
1352   next_index = node->cached_next_index;
1353   
1354   if (node->flags & VLIB_NODE_FLAG_TRACE)
1355     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1356
1357   while (n_left_from > 0)
1358     {
1359       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1360
1361       while (n_left_from >= 4 && n_left_to_next >= 2)
1362         {
1363           vlib_buffer_t * p0, * p1;
1364           ip4_header_t * ip0, * ip1;
1365           udp_header_t * udp0, * udp1;
1366           ip4_fib_mtrie_t * mtrie0, * mtrie1;
1367           ip4_fib_mtrie_leaf_t leaf0, leaf1;
1368           const dpo_id_t *dpo0, *dpo1;
1369           const load_balance_t *lb0, *lb1;
1370           u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, lbi0;
1371           u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, lbi1;
1372           i32 len_diff0, len_diff1;
1373           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1374           u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1375           u8 enqueue_code;
1376       
1377           pi0 = to_next[0] = from[0];
1378           pi1 = to_next[1] = from[1];
1379           from += 2;
1380           n_left_from -= 2;
1381           to_next += 2;
1382           n_left_to_next -= 2;
1383       
1384           p0 = vlib_get_buffer (vm, pi0);
1385           p1 = vlib_get_buffer (vm, pi1);
1386
1387           ip0 = vlib_buffer_get_current (p0);
1388           ip1 = vlib_buffer_get_current (p1);
1389
1390           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
1391                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1392           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, 
1393                                 vnet_buffer(p1)->sw_if_index[VLIB_RX]);
1394
1395           mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1396           mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
1397
1398           leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
1399
1400           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1401           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
1402
1403           /* Treat IP frag packets as "experimental" protocol for now
1404              until support of IP frag reassembly is implemented */
1405           proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
1406           proto1 = ip4_is_fragment(ip1) ? 0xfe : ip1->protocol;
1407           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1408           is_udp1 = proto1 == IP_PROTOCOL_UDP;
1409           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1410           is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1411
1412           flags0 = p0->flags;
1413           flags1 = p1->flags;
1414
1415           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1416           good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1417
1418           udp0 = ip4_next_header (ip0);
1419           udp1 = ip4_next_header (ip1);
1420
1421           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1422           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1423           good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1424
1425           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1426           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
1427
1428           /* Verify UDP length. */
1429           ip_len0 = clib_net_to_host_u16 (ip0->length);
1430           ip_len1 = clib_net_to_host_u16 (ip1->length);
1431           udp_len0 = clib_net_to_host_u16 (udp0->length);
1432           udp_len1 = clib_net_to_host_u16 (udp1->length);
1433
1434           len_diff0 = ip_len0 - udp_len0;
1435           len_diff1 = ip_len1 - udp_len1;
1436
1437           len_diff0 = is_udp0 ? len_diff0 : 0;
1438           len_diff1 = is_udp1 ? len_diff1 : 0;
1439
1440           if (PREDICT_FALSE (! (is_tcp_udp0 & is_tcp_udp1
1441                                 & good_tcp_udp0 & good_tcp_udp1)))
1442             {
1443               if (is_tcp_udp0)
1444                 {
1445                   if (is_tcp_udp0
1446                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1447                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1448                   good_tcp_udp0 =
1449                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1450                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1451                 }
1452               if (is_tcp_udp1)
1453                 {
1454                   if (is_tcp_udp1
1455                       && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1456                     flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
1457                   good_tcp_udp1 =
1458                     (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1459                   good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1460                 }
1461             }
1462
1463           good_tcp_udp0 &= len_diff0 >= 0;
1464           good_tcp_udp1 &= len_diff1 >= 0;
1465
1466           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1467           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
1468
1469           error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1470
1471           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1472           error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
1473
1474           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1475           error0 = (is_tcp_udp0 && ! good_tcp_udp0
1476                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1477                     : error0);
1478           error1 = (is_tcp_udp1 && ! good_tcp_udp1
1479                     ? IP4_ERROR_TCP_CHECKSUM + is_udp1
1480                     : error1);
1481
1482           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1483           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
1484           leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1485           leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
1486
1487           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1488           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1489
1490           vnet_buffer (p1)->ip.adj_index[VLIB_RX] = lbi1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1491           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = lbi1;
1492
1493           lb0 = load_balance_get(lbi0);
1494           lb1 = load_balance_get(lbi1);
1495           dpo0 = load_balance_get_bucket_i(lb0, 0);
1496           dpo1 = load_balance_get_bucket_i(lb1, 0);
1497
1498           /* 
1499            * Must have a route to source otherwise we drop the packet.
1500            * ip4 broadcasts are accepted, e.g. to make dhcp client work
1501            */
1502           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1503                     && dpo0->dpoi_type != DPO_ADJACENCY
1504                     && dpo0->dpoi_type != DPO_ADJACENCY_INCOMPLETE
1505                     && dpo0->dpoi_type != DPO_RECEIVE
1506                     && dpo0->dpoi_type != DPO_DROP
1507                     && dpo0->dpoi_type != DPO_ADJACENCY_GLEAN
1508                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
1509                     ? IP4_ERROR_SRC_LOOKUP_MISS
1510                     : error0);
1511           error0 = (dpo0->dpoi_type == DPO_RECEIVE ?
1512                     IP4_ERROR_SPOOFED_LOCAL_PACKETS : 
1513                     error0);
1514           error1 = (error1 == IP4_ERROR_UNKNOWN_PROTOCOL
1515                     && dpo1->dpoi_type != DPO_ADJACENCY
1516                     && dpo1->dpoi_type != DPO_ADJACENCY_INCOMPLETE
1517                     && dpo1->dpoi_type != DPO_RECEIVE
1518                     && dpo1->dpoi_type != DPO_DROP
1519                     && dpo1->dpoi_type != DPO_ADJACENCY_GLEAN
1520                     && ip1->dst_address.as_u32 != 0xFFFFFFFF
1521                     ? IP4_ERROR_SRC_LOOKUP_MISS
1522                     : error1);
1523           error1 = (dpo0->dpoi_type == DPO_RECEIVE ?
1524                     IP4_ERROR_SPOOFED_LOCAL_PACKETS : 
1525                     error1);
1526
1527           next0 = lm->local_next_by_ip_protocol[proto0];
1528           next1 = lm->local_next_by_ip_protocol[proto1];
1529
1530           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1531           next1 = error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1532
1533           p0->error = error0 ? error_node->errors[error0] : 0;
1534           p1->error = error1 ? error_node->errors[error1] : 0;
1535
1536           enqueue_code = (next0 != next_index) + 2*(next1 != next_index);
1537
1538           if (PREDICT_FALSE (enqueue_code != 0))
1539             {
1540               switch (enqueue_code)
1541                 {
1542                 case 1:
1543                   /* A B A */
1544                   to_next[-2] = pi1;
1545                   to_next -= 1;
1546                   n_left_to_next += 1;
1547                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
1548                   break;
1549
1550                 case 2:
1551                   /* A A B */
1552                   to_next -= 1;
1553                   n_left_to_next += 1;
1554                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
1555                   break;
1556
1557                 case 3:
1558                   /* A B B or A B C */
1559                   to_next -= 2;
1560                   n_left_to_next += 2;
1561                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
1562                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
1563                   if (next0 == next1)
1564                     {
1565                       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1566                       next_index = next1;
1567                       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1568                     }
1569                   break;
1570                 }
1571             }
1572         }
1573
1574       while (n_left_from > 0 && n_left_to_next > 0)
1575         {
1576           vlib_buffer_t * p0;
1577           ip4_header_t * ip0;
1578           udp_header_t * udp0;
1579           ip4_fib_mtrie_t * mtrie0;
1580           ip4_fib_mtrie_leaf_t leaf0;
1581           u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, lbi0;
1582           i32 len_diff0;
1583           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1584           load_balance_t *lb0;
1585           const dpo_id_t *dpo0;
1586
1587           pi0 = to_next[0] = from[0];
1588           from += 1;
1589           n_left_from -= 1;
1590           to_next += 1;
1591           n_left_to_next -= 1;
1592       
1593           p0 = vlib_get_buffer (vm, pi0);
1594
1595           ip0 = vlib_buffer_get_current (p0);
1596
1597           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
1598                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1599
1600           mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1601
1602           leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
1603
1604           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1605
1606           /* Treat IP frag packets as "experimental" protocol for now
1607              until support of IP frag reassembly is implemented */
1608           proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
1609           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1610           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1611
1612           flags0 = p0->flags;
1613
1614           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1615
1616           udp0 = ip4_next_header (ip0);
1617
1618           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1619           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1620
1621           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1622
1623           /* Verify UDP length. */
1624           ip_len0 = clib_net_to_host_u16 (ip0->length);
1625           udp_len0 = clib_net_to_host_u16 (udp0->length);
1626
1627           len_diff0 = ip_len0 - udp_len0;
1628
1629           len_diff0 = is_udp0 ? len_diff0 : 0;
1630
1631           if (PREDICT_FALSE (! (is_tcp_udp0 & good_tcp_udp0)))
1632             {
1633               if (is_tcp_udp0)
1634                 {
1635                   if (is_tcp_udp0
1636                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1637                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1638                   good_tcp_udp0 =
1639                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1640                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1641                 }
1642             }
1643
1644           good_tcp_udp0 &= len_diff0 >= 0;
1645
1646           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1647
1648           error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1649
1650           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1651
1652           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1653           error0 = (is_tcp_udp0 && ! good_tcp_udp0
1654                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1655                     : error0);
1656
1657           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1658           leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1659
1660           lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1661           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1662
1663           lb0 = load_balance_get(lbi0);
1664           dpo0 = load_balance_get_bucket_i(lb0, 0);
1665
1666           vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
1667               vnet_buffer (p0)->ip.adj_index[VLIB_RX] =
1668                   dpo0->dpoi_index;
1669
1670           /* Must have a route to source otherwise we drop the packet. */
1671           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1672                     && dpo0->dpoi_type != DPO_ADJACENCY
1673                     && dpo0->dpoi_type != DPO_ADJACENCY_INCOMPLETE
1674                     && dpo0->dpoi_type != DPO_RECEIVE
1675                     && dpo0->dpoi_type != DPO_DROP
1676                     && dpo0->dpoi_type != DPO_ADJACENCY_GLEAN
1677                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
1678                     ? IP4_ERROR_SRC_LOOKUP_MISS
1679                     : error0);
1680           /* Packet originated from a local address => spoofing */
1681           error0 = (dpo0->dpoi_type == DPO_RECEIVE ?
1682                     IP4_ERROR_SPOOFED_LOCAL_PACKETS : 
1683                     error0);
1684
1685           next0 = lm->local_next_by_ip_protocol[proto0];
1686
1687           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1688
1689           p0->error = error0? error_node->errors[error0] : 0;
1690
1691           if (PREDICT_FALSE (next0 != next_index))
1692             {
1693               n_left_to_next += 1;
1694               vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1695
1696               next_index = next0;
1697               vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1698               to_next[0] = pi0;
1699               to_next += 1;
1700               n_left_to_next -= 1;
1701             }
1702         }
1703   
1704       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1705     }
1706
1707   return frame->n_vectors;
1708 }
1709
1710 VLIB_REGISTER_NODE (ip4_local_node,static) = {
1711   .function = ip4_local,
1712   .name = "ip4-local",
1713   .vector_size = sizeof (u32),
1714
1715   .format_trace = format_ip4_forward_next_trace,
1716
1717   .n_next_nodes = IP_LOCAL_N_NEXT,
1718   .next_nodes = {
1719     [IP_LOCAL_NEXT_DROP] = "error-drop",
1720     [IP_LOCAL_NEXT_PUNT] = "error-punt",
1721     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1722     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1723   },
1724 };
1725
1726 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local)
1727
1728 void ip4_register_protocol (u32 protocol, u32 node_index)
1729 {
1730   vlib_main_t * vm = vlib_get_main();
1731   ip4_main_t * im = &ip4_main;
1732   ip_lookup_main_t * lm = &im->lookup_main;
1733
1734   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1735   lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip4_local_node.index, node_index);
1736 }
1737
1738 static clib_error_t *
1739 show_ip_local_command_fn (vlib_main_t * vm,
1740                           unformat_input_t * input,
1741                          vlib_cli_command_t * cmd)
1742 {
1743   ip4_main_t * im = &ip4_main;
1744   ip_lookup_main_t * lm = &im->lookup_main;
1745   int i;
1746
1747   vlib_cli_output (vm, "Protocols handled by ip4_local");
1748   for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
1749     {
1750       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1751         vlib_cli_output (vm, "%d", i);
1752     }
1753   return 0;
1754 }
1755
1756
1757
1758 VLIB_CLI_COMMAND (show_ip_local, static) = {
1759   .path = "show ip local",
1760   .function = show_ip_local_command_fn,
1761   .short_help = "Show ip local protocol table",
1762 };
1763
1764 always_inline uword
1765 ip4_arp_inline (vlib_main_t * vm,
1766                 vlib_node_runtime_t * node,
1767                 vlib_frame_t * frame,
1768                 int is_glean)
1769 {
1770   vnet_main_t * vnm = vnet_get_main();
1771   ip4_main_t * im = &ip4_main;
1772   ip_lookup_main_t * lm = &im->lookup_main;
1773   u32 * from, * to_next_drop;
1774   uword n_left_from, n_left_to_next_drop, next_index;
1775   static f64 time_last_seed_change = -1e100;
1776   static u32 hash_seeds[3];
1777   static uword hash_bitmap[256 / BITS (uword)]; 
1778   f64 time_now;
1779
1780   if (node->flags & VLIB_NODE_FLAG_TRACE)
1781     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1782
1783   time_now = vlib_time_now (vm);
1784   if (time_now - time_last_seed_change > 1e-3)
1785     {
1786       uword i;
1787       u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
1788                                              sizeof (hash_seeds));
1789       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
1790         hash_seeds[i] = r[i];
1791
1792       /* Mark all hash keys as been no-seen before. */
1793       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
1794         hash_bitmap[i] = 0;
1795
1796       time_last_seed_change = time_now;
1797     }
1798
1799   from = vlib_frame_vector_args (frame);
1800   n_left_from = frame->n_vectors;
1801   next_index = node->cached_next_index;
1802   if (next_index == IP4_ARP_NEXT_DROP)
1803     next_index = IP4_ARP_N_NEXT; /* point to first interface */
1804
1805   while (n_left_from > 0)
1806     {
1807       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1808                            to_next_drop, n_left_to_next_drop);
1809
1810       while (n_left_from > 0 && n_left_to_next_drop > 0)
1811         {
1812           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
1813           ip_adjacency_t * adj0;
1814           vlib_buffer_t * p0;
1815           ip4_header_t * ip0;
1816           uword bm0;
1817
1818           pi0 = from[0];
1819
1820           p0 = vlib_get_buffer (vm, pi0);
1821
1822           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1823           adj0 = ip_get_adjacency (lm, adj_index0);
1824           ip0 = vlib_buffer_get_current (p0);
1825
1826           /*
1827            * this is the Glean case, so we are ARPing for the
1828            * packet's destination 
1829            */
1830           a0 = hash_seeds[0];
1831           b0 = hash_seeds[1];
1832           c0 = hash_seeds[2];
1833
1834           sw_if_index0 = adj0->rewrite_header.sw_if_index;
1835           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1836
1837           if (is_glean)
1838           {
1839               a0 ^= ip0->dst_address.data_u32;
1840           }
1841           else
1842           {
1843               a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32;
1844           }
1845           b0 ^= sw_if_index0;
1846
1847           hash_v3_finalize32 (a0, b0, c0);
1848
1849           c0 &= BITS (hash_bitmap) - 1;
1850           c0 = c0 / BITS (uword);
1851           m0 = (uword) 1 << (c0 % BITS (uword));
1852
1853           bm0 = hash_bitmap[c0];
1854           drop0 = (bm0 & m0) != 0;
1855
1856           /* Mark it as seen. */
1857           hash_bitmap[c0] = bm0 | m0;
1858
1859           from += 1;
1860           n_left_from -= 1;
1861           to_next_drop[0] = pi0;
1862           to_next_drop += 1;
1863           n_left_to_next_drop -= 1;
1864
1865           p0->error = node->errors[drop0 ? IP4_ARP_ERROR_DROP : IP4_ARP_ERROR_REQUEST_SENT];
1866
1867           if (drop0)
1868             continue;
1869
1870           /* 
1871            * Can happen if the control-plane is programming tables
1872            * with traffic flowing; at least that's today's lame excuse.
1873            */
1874           if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN) ||
1875               (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
1876           {
1877             p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
1878           }
1879           else
1880           /* Send ARP request. */
1881           {
1882             u32 bi0 = 0;
1883             vlib_buffer_t * b0;
1884             ethernet_arp_header_t * h0;
1885             vnet_hw_interface_t * hw_if0;
1886
1887             h0 = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi0);
1888
1889             /* Add rewrite/encap string for ARP packet. */
1890             vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
1891
1892             hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1893
1894             /* Src ethernet address in ARP header. */
1895             clib_memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address,
1896                     sizeof (h0->ip4_over_ethernet[0].ethernet));
1897
1898             if (is_glean)
1899             {
1900                 /* The interface's source address is stashed in the Glean Adj */
1901                 h0->ip4_over_ethernet[0].ip4 = adj0->sub_type.glean.receive_addr.ip4;
1902
1903                 /* Copy in destination address we are requesting. This is the
1904                 * glean case, so it's the packet's destination.*/
1905                 h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32;
1906             }
1907             else
1908             {
1909                 /* Src IP address in ARP header. */
1910                 if (ip4_src_address_for_packet(lm, sw_if_index0,
1911                                                &h0->ip4_over_ethernet[0].ip4))
1912                 {
1913                     /* No source address available */
1914                     p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
1915                     vlib_buffer_free(vm, &bi0, 1);
1916                     continue;
1917                 }
1918
1919                 /* Copy in destination address we are requesting from the
1920                    incomplete adj */
1921                 h0->ip4_over_ethernet[1].ip4.data_u32 =
1922                     adj0->sub_type.nbr.next_hop.ip4.as_u32;
1923             }
1924
1925             vlib_buffer_copy_trace_flag (vm, p0, bi0);
1926             b0 = vlib_get_buffer (vm, bi0);
1927             vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
1928
1929             vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
1930
1931             vlib_set_next_frame_buffer (vm, node, adj0->rewrite_header.next_index, bi0);
1932           }
1933         }
1934
1935       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
1936     }
1937
1938   return frame->n_vectors;
1939 }
1940
1941 static uword
1942 ip4_arp (vlib_main_t * vm,
1943          vlib_node_runtime_t * node,
1944          vlib_frame_t * frame)
1945 {
1946     return (ip4_arp_inline(vm, node, frame, 0));
1947 }
1948
1949 static uword
1950 ip4_glean (vlib_main_t * vm,
1951            vlib_node_runtime_t * node,
1952            vlib_frame_t * frame)
1953 {
1954     return (ip4_arp_inline(vm, node, frame, 1));
1955 }
1956
1957 static char * ip4_arp_error_strings[] = {
1958   [IP4_ARP_ERROR_DROP] = "address overflow drops",
1959   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
1960   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
1961   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
1962   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
1963   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
1964 };
1965
1966 VLIB_REGISTER_NODE (ip4_arp_node) = {
1967   .function = ip4_arp,
1968   .name = "ip4-arp",
1969   .vector_size = sizeof (u32),
1970
1971   .format_trace = format_ip4_forward_next_trace,
1972
1973   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1974   .error_strings = ip4_arp_error_strings,
1975
1976   .n_next_nodes = IP4_ARP_N_NEXT,
1977   .next_nodes = {
1978     [IP4_ARP_NEXT_DROP] = "error-drop",
1979   },
1980 };
1981
1982 VLIB_REGISTER_NODE (ip4_glean_node) = {
1983   .function = ip4_glean,
1984   .name = "ip4-glean",
1985   .vector_size = sizeof (u32),
1986
1987   .format_trace = format_ip4_forward_next_trace,
1988
1989   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1990   .error_strings = ip4_arp_error_strings,
1991
1992   .n_next_nodes = IP4_ARP_N_NEXT,
1993   .next_nodes = {
1994     [IP4_ARP_NEXT_DROP] = "error-drop",
1995   },
1996 };
1997
1998 #define foreach_notrace_ip4_arp_error           \
1999 _(DROP)                                         \
2000 _(REQUEST_SENT)                                 \
2001 _(REPLICATE_DROP)                               \
2002 _(REPLICATE_FAIL)
2003
2004 clib_error_t * arp_notrace_init (vlib_main_t * vm)
2005 {
2006   vlib_node_runtime_t *rt = 
2007     vlib_node_get_runtime (vm, ip4_arp_node.index);
2008
2009   /* don't trace ARP request packets */
2010 #define _(a)                                    \
2011     vnet_pcap_drop_trace_filter_add_del         \
2012         (rt->errors[IP4_ARP_ERROR_##a],         \
2013          1 /* is_add */);
2014     foreach_notrace_ip4_arp_error;
2015 #undef _
2016   return 0;
2017 }
2018
2019 VLIB_INIT_FUNCTION(arp_notrace_init);
2020
2021
2022 /* Send an ARP request to see if given destination is reachable on given interface. */
2023 clib_error_t *
2024 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2025 {
2026   vnet_main_t * vnm = vnet_get_main();
2027   ip4_main_t * im = &ip4_main;
2028   ethernet_arp_header_t * h;
2029   ip4_address_t * src;
2030   ip_interface_address_t * ia;
2031   ip_adjacency_t * adj;
2032   vnet_hw_interface_t * hi;
2033   vnet_sw_interface_t * si;
2034   vlib_buffer_t * b;
2035   u32 bi = 0;
2036
2037   si = vnet_get_sw_interface (vnm, sw_if_index);
2038
2039   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2040     {
2041       return clib_error_return (0, "%U: interface %U down",
2042                                 format_ip4_address, dst, 
2043                                 format_vnet_sw_if_index_name, vnm, 
2044                                 sw_if_index);
2045     }
2046
2047   src = ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2048   if (! src)
2049     {
2050       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2051       return clib_error_return 
2052         (0, "no matching interface address for destination %U (interface %U)",
2053          format_ip4_address, dst,
2054          format_vnet_sw_if_index_name, vnm, sw_if_index);
2055     }
2056
2057   adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2058
2059   h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi);
2060
2061   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2062
2063   clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
2064
2065   h->ip4_over_ethernet[0].ip4 = src[0];
2066   h->ip4_over_ethernet[1].ip4 = dst[0];
2067
2068   b = vlib_get_buffer (vm, bi);
2069   vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2070
2071   /* Add encapsulation string for software interface (e.g. ethernet header). */
2072   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2073   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2074
2075   {
2076     vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
2077     u32 * to_next = vlib_frame_vector_args (f);
2078     to_next[0] = bi;
2079     f->n_vectors = 1;
2080     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2081   }
2082
2083   return /* no error */ 0;
2084 }
2085
2086 typedef enum {
2087   IP4_REWRITE_NEXT_DROP,
2088   IP4_REWRITE_NEXT_ARP,
2089   IP4_REWRITE_NEXT_ICMP_ERROR,
2090 } ip4_rewrite_next_t;
2091
2092 always_inline uword
2093 ip4_rewrite_inline (vlib_main_t * vm,
2094                     vlib_node_runtime_t * node,
2095                     vlib_frame_t * frame,
2096                     int rewrite_for_locally_received_packets)
2097 {
2098   ip_lookup_main_t * lm = &ip4_main.lookup_main;
2099   u32 * from = vlib_frame_vector_args (frame);
2100   u32 n_left_from, n_left_to_next, * to_next, next_index;
2101   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
2102   vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
2103   ip_config_main_t * cm = &lm->feature_config_mains[VNET_IP_TX_FEAT];
2104
2105   n_left_from = frame->n_vectors;
2106   next_index = node->cached_next_index;
2107   u32 cpu_index = os_get_cpu_number();
2108   
2109   while (n_left_from > 0)
2110     {
2111       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2112
2113       while (n_left_from >= 4 && n_left_to_next >= 2)
2114         {
2115           ip_adjacency_t * adj0, * adj1;
2116           vlib_buffer_t * p0, * p1;
2117           ip4_header_t * ip0, * ip1;
2118           u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2119           u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2120           u32 next0_override, next1_override;
2121           u32 tx_sw_if_index0, tx_sw_if_index1;
2122
2123           if (rewrite_for_locally_received_packets)
2124               next0_override = next1_override = 0;
2125
2126           /* Prefetch next iteration. */
2127           {
2128             vlib_buffer_t * p2, * p3;
2129
2130             p2 = vlib_get_buffer (vm, from[2]);
2131             p3 = vlib_get_buffer (vm, from[3]);
2132
2133             vlib_prefetch_buffer_header (p2, STORE);
2134             vlib_prefetch_buffer_header (p3, STORE);
2135
2136             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2137             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2138           }
2139
2140           pi0 = to_next[0] = from[0];
2141           pi1 = to_next[1] = from[1];
2142
2143           from += 2;
2144           n_left_from -= 2;
2145           to_next += 2;
2146           n_left_to_next -= 2;
2147       
2148           p0 = vlib_get_buffer (vm, pi0);
2149           p1 = vlib_get_buffer (vm, pi1);
2150
2151           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2152           adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
2153
2154           /* We should never rewrite a pkt using the MISS adjacency */
2155           ASSERT(adj_index0 && adj_index1);
2156
2157           ip0 = vlib_buffer_get_current (p0);
2158           ip1 = vlib_buffer_get_current (p1);
2159
2160           error0 = error1 = IP4_ERROR_NONE;
2161           next0 = next1 = IP4_REWRITE_NEXT_DROP;
2162
2163           /* Decrement TTL & update checksum.
2164              Works either endian, so no need for byte swap. */
2165           if (! rewrite_for_locally_received_packets)
2166             {
2167               i32 ttl0 = ip0->ttl, ttl1 = ip1->ttl;
2168
2169               /* Input node should have reject packets with ttl 0. */
2170               ASSERT (ip0->ttl > 0);
2171               ASSERT (ip1->ttl > 0);
2172
2173               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2174               checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2175
2176               checksum0 += checksum0 >= 0xffff;
2177               checksum1 += checksum1 >= 0xffff;
2178
2179               ip0->checksum = checksum0;
2180               ip1->checksum = checksum1;
2181
2182               ttl0 -= 1;
2183               ttl1 -= 1;
2184
2185               ip0->ttl = ttl0;
2186               ip1->ttl = ttl1;
2187
2188               /*
2189                * If the ttl drops below 1 when forwarding, generate
2190                * an ICMP response.
2191                */
2192               if (PREDICT_FALSE(ttl0 <= 0))
2193                 {
2194                   error0 = IP4_ERROR_TIME_EXPIRED;
2195                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2196                   icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2197                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2198                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2199                 }
2200               if (PREDICT_FALSE(ttl1 <= 0))
2201                 {
2202                   error1 = IP4_ERROR_TIME_EXPIRED;
2203                   vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32)~0;
2204                   icmp4_error_set_vnet_buffer(p1, ICMP4_time_exceeded,
2205                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2206                   next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2207                 }
2208
2209               /* Verify checksum. */
2210               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2211               ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2212             }
2213
2214           /* Rewrite packet header and updates lengths. */
2215           adj0 = ip_get_adjacency (lm, adj_index0);
2216           adj1 = ip_get_adjacency (lm, adj_index1);
2217       
2218           if (rewrite_for_locally_received_packets)
2219             {
2220               if (PREDICT_FALSE(adj0->lookup_next_index
2221                                 == IP_LOOKUP_NEXT_ARP))
2222                 next0_override = IP4_REWRITE_NEXT_ARP;
2223               if (PREDICT_FALSE(adj1->lookup_next_index
2224                                 == IP_LOOKUP_NEXT_ARP))
2225                 next1_override = IP4_REWRITE_NEXT_ARP;
2226             }
2227
2228           /* Worth pipelining. No guarantee that adj0,1 are hot... */
2229           rw_len0 = adj0[0].rewrite_header.data_bytes;
2230           rw_len1 = adj1[0].rewrite_header.data_bytes;
2231           vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
2232           vnet_buffer(p1)->ip.save_rewrite_length = rw_len1;
2233
2234           /* Check MTU of outgoing interface. */
2235           error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2236                     ? IP4_ERROR_MTU_EXCEEDED
2237                     : error0);
2238           error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
2239                     ? IP4_ERROR_MTU_EXCEEDED
2240                     : error1);
2241
2242           next0 = (error0 == IP4_ERROR_NONE)
2243             ? adj0[0].rewrite_header.next_index : next0;
2244
2245           if (rewrite_for_locally_received_packets)
2246               next0 = next0 && next0_override ? next0_override : next0;
2247
2248           next1 = (error1 == IP4_ERROR_NONE)
2249             ? adj1[0].rewrite_header.next_index : next1;
2250
2251           if (rewrite_for_locally_received_packets)
2252               next1 = next1 && next1_override ? next1_override : next1;
2253
2254           /* 
2255            * We've already accounted for an ethernet_header_t elsewhere
2256            */
2257           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2258               vlib_increment_combined_counter 
2259                   (&adjacency_counters,
2260                    cpu_index, adj_index0, 
2261                    /* packet increment */ 0,
2262                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2263
2264           if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t)))
2265               vlib_increment_combined_counter 
2266                   (&adjacency_counters,
2267                    cpu_index, adj_index1, 
2268                    /* packet increment */ 0,
2269                    /* byte increment */ rw_len1-sizeof(ethernet_header_t));
2270
2271           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2272            * to see the IP headerr */
2273           if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2274             {
2275               p0->current_data -= rw_len0;
2276               p0->current_length += rw_len0;
2277               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2278               vnet_buffer (p0)->sw_if_index[VLIB_TX] =
2279                   tx_sw_if_index0;
2280
2281               if (PREDICT_FALSE 
2282                   (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features, 
2283                                     tx_sw_if_index0)))
2284                 {
2285                   p0->current_config_index = 
2286                     vec_elt (cm->config_index_by_sw_if_index, 
2287                              tx_sw_if_index0);
2288                   vnet_get_config_data (&cm->config_main,
2289                                         &p0->current_config_index,
2290                                         &next0,
2291                                         /* # bytes of config data */ 0);
2292                 }
2293             }
2294           if (PREDICT_TRUE(error1 == IP4_ERROR_NONE))
2295             {
2296               p1->current_data -= rw_len1;
2297               p1->current_length += rw_len1;
2298
2299               tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2300               vnet_buffer (p1)->sw_if_index[VLIB_TX] =
2301                   tx_sw_if_index1;
2302
2303               if (PREDICT_FALSE 
2304                   (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features, 
2305                                     tx_sw_if_index1)))
2306                 {
2307                   p1->current_config_index = 
2308                     vec_elt (cm->config_index_by_sw_if_index, 
2309                              tx_sw_if_index1);
2310                   vnet_get_config_data (&cm->config_main,
2311                                         &p1->current_config_index,
2312                                         &next1,
2313                                         /* # bytes of config data */ 0);
2314                 }
2315             }
2316
2317           /* Guess we are only writing on simple Ethernet header. */
2318           vnet_rewrite_two_headers (adj0[0], adj1[0],
2319                                     ip0, ip1,
2320                                     sizeof (ethernet_header_t));
2321       
2322           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2323                                            to_next, n_left_to_next,
2324                                            pi0, pi1, next0, next1);
2325         }
2326
2327       while (n_left_from > 0 && n_left_to_next > 0)
2328         {
2329           ip_adjacency_t * adj0;
2330           vlib_buffer_t * p0;
2331           ip4_header_t * ip0;
2332           u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2333           u32 next0_override;
2334           u32 tx_sw_if_index0;
2335
2336           if (rewrite_for_locally_received_packets)
2337               next0_override = 0;
2338
2339           pi0 = to_next[0] = from[0];
2340
2341           p0 = vlib_get_buffer (vm, pi0);
2342
2343           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2344
2345           /* We should never rewrite a pkt using the MISS adjacency */
2346           ASSERT(adj_index0);
2347
2348           adj0 = ip_get_adjacency (lm, adj_index0);
2349       
2350           ip0 = vlib_buffer_get_current (p0);
2351
2352           error0 = IP4_ERROR_NONE;
2353           next0 = IP4_REWRITE_NEXT_DROP;            /* drop on error */
2354
2355           /* Decrement TTL & update checksum. */
2356           if (! rewrite_for_locally_received_packets)
2357             {
2358               i32 ttl0 = ip0->ttl;
2359
2360               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2361
2362               checksum0 += checksum0 >= 0xffff;
2363
2364               ip0->checksum = checksum0;
2365
2366               ASSERT (ip0->ttl > 0);
2367
2368               ttl0 -= 1;
2369
2370               ip0->ttl = ttl0;
2371
2372               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2373
2374               if (PREDICT_FALSE(ttl0 <= 0))
2375                 {
2376                   /*
2377                    * If the ttl drops below 1 when forwarding, generate
2378                    * an ICMP response.
2379                    */
2380                   error0 = IP4_ERROR_TIME_EXPIRED;
2381                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2382                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2383                   icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2384                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2385                 }
2386             }
2387
2388           if (rewrite_for_locally_received_packets)
2389             {
2390               /* 
2391                * We have to override the next_index in ARP adjacencies,
2392                * because they're set up for ip4-arp, not this node...
2393                */
2394               if (PREDICT_FALSE(adj0->lookup_next_index
2395                                 == IP_LOOKUP_NEXT_ARP))
2396                 next0_override = IP4_REWRITE_NEXT_ARP;
2397             }
2398
2399           /* Guess we are only writing on simple Ethernet header. */
2400           vnet_rewrite_one_header (adj0[0], ip0, 
2401                                    sizeof (ethernet_header_t));
2402           
2403           /* Update packet buffer attributes/set output interface. */
2404           rw_len0 = adj0[0].rewrite_header.data_bytes;
2405           vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
2406           
2407           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2408               vlib_increment_combined_counter 
2409                   (&adjacency_counters,
2410                    cpu_index, adj_index0, 
2411                    /* packet increment */ 0,
2412                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2413           
2414           /* Check MTU of outgoing interface. */
2415           error0 = (vlib_buffer_length_in_chain (vm, p0) 
2416                     > adj0[0].rewrite_header.max_l3_packet_bytes
2417                     ? IP4_ERROR_MTU_EXCEEDED
2418                     : error0);
2419
2420           p0->error = error_node->errors[error0];
2421
2422           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2423            * to see the IP headerr */
2424           if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2425             {
2426               p0->current_data -= rw_len0;
2427               p0->current_length += rw_len0;
2428               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2429
2430               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2431               next0 = adj0[0].rewrite_header.next_index;
2432
2433               if (PREDICT_FALSE 
2434                   (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features, 
2435                                     tx_sw_if_index0)))
2436                   {
2437                     p0->current_config_index = 
2438                       vec_elt (cm->config_index_by_sw_if_index, 
2439                                tx_sw_if_index0);
2440                     vnet_get_config_data (&cm->config_main,
2441                                           &p0->current_config_index,
2442                                           &next0,
2443                                           /* # bytes of config data */ 0);
2444                   }
2445             }
2446
2447           if (rewrite_for_locally_received_packets)
2448               next0 = next0 && next0_override ? next0_override : next0;
2449
2450           from += 1;
2451           n_left_from -= 1;
2452           to_next += 1;
2453           n_left_to_next -= 1;
2454       
2455           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2456                                            to_next, n_left_to_next,
2457                                            pi0, next0);
2458         }
2459   
2460       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2461     }
2462
2463   /* Need to do trace after rewrites to pick up new packet data. */
2464   if (node->flags & VLIB_NODE_FLAG_TRACE)
2465     ip4_forward_next_trace (vm, node, frame, adj_rx_tx);
2466
2467   return frame->n_vectors;
2468 }
2469
2470
2471 /** @brief IPv4 transit rewrite node.
2472     @node ip4-rewrite-transit
2473
2474     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2475     header checksum, fetch the ip adjacency, check the outbound mtu,
2476     apply the adjacency rewrite, and send pkts to the adjacency
2477     rewrite header's rewrite_next_index.
2478
2479     @param vm vlib_main_t corresponding to the current thread
2480     @param node vlib_node_runtime_t
2481     @param frame vlib_frame_t whose contents should be dispatched
2482
2483     @par Graph mechanics: buffer metadata, next index usage
2484
2485     @em Uses:
2486     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2487         - the rewrite adjacency index
2488     - <code>adj->lookup_next_index</code>
2489         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2490           the packet will be dropped. 
2491     - <code>adj->rewrite_header</code>
2492         - Rewrite string length, rewrite string, next_index
2493
2494     @em Sets:
2495     - <code>b->current_data, b->current_length</code>
2496         - Updated net of applying the rewrite string
2497
2498     <em>Next Indices:</em>
2499     - <code> adj->rewrite_header.next_index </code>
2500       or @c error-drop 
2501 */
2502 static uword
2503 ip4_rewrite_transit (vlib_main_t * vm,
2504                      vlib_node_runtime_t * node,
2505                      vlib_frame_t * frame)
2506 {
2507   return ip4_rewrite_inline (vm, node, frame,
2508                              /* rewrite_for_locally_received_packets */ 0);
2509 }
2510
2511 /** @brief IPv4 local rewrite node.
2512     @node ip4-rewrite-local
2513
2514     This is the IPv4 local rewrite node. Fetch the ip adjacency, check
2515     the outbound interface mtu, apply the adjacency rewrite, and send
2516     pkts to the adjacency rewrite header's rewrite_next_index. Deal
2517     with hemorrhoids of the form "some clown sends an icmp4 w/ src =
2518     dst = interface addr."
2519
2520     @param vm vlib_main_t corresponding to the current thread
2521     @param node vlib_node_runtime_t
2522     @param frame vlib_frame_t whose contents should be dispatched
2523
2524     @par Graph mechanics: buffer metadata, next index usage
2525
2526     @em Uses:
2527     - <code>vnet_buffer(b)->ip.adj_index[VLIB_RX]</code>
2528         - the rewrite adjacency index
2529     - <code>adj->lookup_next_index</code>
2530         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2531           the packet will be dropped. 
2532     - <code>adj->rewrite_header</code>
2533         - Rewrite string length, rewrite string, next_index
2534
2535     @em Sets:
2536     - <code>b->current_data, b->current_length</code>
2537         - Updated net of applying the rewrite string
2538
2539     <em>Next Indices:</em>
2540     - <code> adj->rewrite_header.next_index </code>
2541       or @c error-drop 
2542 */
2543
2544 static uword
2545 ip4_rewrite_local (vlib_main_t * vm,
2546                    vlib_node_runtime_t * node,
2547                    vlib_frame_t * frame)
2548 {
2549   return ip4_rewrite_inline (vm, node, frame,
2550                              /* rewrite_for_locally_received_packets */ 1);
2551 }
2552
2553 static uword
2554 ip4_midchain (vlib_main_t * vm,
2555               vlib_node_runtime_t * node,
2556               vlib_frame_t * frame)
2557 {
2558   return ip4_rewrite_inline (vm, node, frame,
2559                              /* rewrite_for_locally_received_packets */ 0);
2560 }
2561
2562 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2563   .function = ip4_rewrite_transit,
2564   .name = "ip4-rewrite-transit",
2565   .vector_size = sizeof (u32),
2566
2567   .format_trace = format_ip4_rewrite_trace,
2568
2569   .n_next_nodes = 3,
2570   .next_nodes = {
2571     [IP4_REWRITE_NEXT_DROP] = "error-drop",
2572     [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
2573     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2574   },
2575 };
2576
2577 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite_transit)
2578
2579 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2580   .function = ip4_midchain,
2581   .name = "ip4-midchain",
2582   .vector_size = sizeof (u32),
2583
2584   .format_trace = format_ip4_forward_next_trace,
2585
2586   .n_next_nodes = 2,
2587   .next_nodes = {
2588     [IP4_REWRITE_NEXT_DROP] = "error-drop",
2589     [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
2590   },
2591 };
2592
2593 VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain)
2594
2595 VLIB_REGISTER_NODE (ip4_rewrite_local_node) = {
2596   .function = ip4_rewrite_local,
2597   .name = "ip4-rewrite-local",
2598   .vector_size = sizeof (u32),
2599
2600   .sibling_of = "ip4-rewrite-transit",
2601
2602   .format_trace = format_ip4_rewrite_trace,
2603
2604   .n_next_nodes = 0,
2605 };
2606
2607 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_local_node, ip4_rewrite_local)
2608
2609 static clib_error_t *
2610 add_del_interface_table (vlib_main_t * vm,
2611                          unformat_input_t * input,
2612                          vlib_cli_command_t * cmd)
2613 {
2614   vnet_main_t * vnm = vnet_get_main();
2615   clib_error_t * error = 0;
2616   u32 sw_if_index, table_id;
2617
2618   sw_if_index = ~0;
2619
2620   if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2621     {
2622       error = clib_error_return (0, "unknown interface `%U'",
2623                                  format_unformat_error, input);
2624       goto done;
2625     }
2626
2627   if (unformat (input, "%d", &table_id))
2628     ;
2629   else
2630     {
2631       error = clib_error_return (0, "expected table id `%U'",
2632                                  format_unformat_error, input);
2633       goto done;
2634     }
2635
2636   {
2637     ip4_main_t * im = &ip4_main;
2638     u32 fib_index;
2639
2640     fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
2641                                                    table_id);
2642
2643     //
2644     // FIXME-LATER
2645     //  changing an interface's table has consequences for any connecteds
2646     //  and adj-fibs already installed.
2647     //
2648     vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
2649     im->fib_index_by_sw_if_index[sw_if_index] = fib_index;
2650   }
2651
2652  done:
2653   return error;
2654 }
2655
2656 /*?
2657  * Place the indicated interface into the supplied VRF
2658  *
2659  * @cliexpar
2660  * @cliexstart{set interface ip table}
2661  *
2662  *  vpp# set interface ip table GigabitEthernet2/0/0 2
2663  *
2664  * Interface addresses added after setting the interface IP table end up in the indicated VRF table.
2665  * Predictable but potentially counter-intuitive results occur if you provision interface addresses in multiple FIBs.
2666  * Upon RX, packets will be processed in the last IP table ID provisioned.
2667  * It might be marginally useful to evade source RPF drops to put an interface address into multiple FIBs.
2668  * @cliexend
2669  ?*/
2670 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
2671   .path = "set interface ip table",
2672   .function = add_del_interface_table,
2673   .short_help = "Add/delete FIB table id for interface",
2674 };
2675
2676
2677 static uword
2678 ip4_lookup_multicast (vlib_main_t * vm,
2679                       vlib_node_runtime_t * node,
2680                       vlib_frame_t * frame)
2681 {
2682   ip4_main_t * im = &ip4_main;
2683   vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
2684   u32 n_left_from, n_left_to_next, * from, * to_next;
2685   ip_lookup_next_t next;
2686   u32 cpu_index = os_get_cpu_number();
2687
2688   from = vlib_frame_vector_args (frame);
2689   n_left_from = frame->n_vectors;
2690   next = node->cached_next_index;
2691
2692   while (n_left_from > 0)
2693     {
2694       vlib_get_next_frame (vm, node, next,
2695                            to_next, n_left_to_next);
2696
2697       while (n_left_from >= 4 && n_left_to_next >= 2)
2698         {
2699           vlib_buffer_t * p0, * p1;
2700           u32 pi0, pi1, lb_index0, lb_index1, wrong_next;
2701           ip_lookup_next_t next0, next1;
2702           ip4_header_t * ip0, * ip1;
2703           u32 fib_index0, fib_index1;
2704           const dpo_id_t *dpo0, *dpo1;
2705           const load_balance_t * lb0, * lb1;
2706
2707           /* Prefetch next iteration. */
2708           {
2709             vlib_buffer_t * p2, * p3;
2710
2711             p2 = vlib_get_buffer (vm, from[2]);
2712             p3 = vlib_get_buffer (vm, from[3]);
2713
2714             vlib_prefetch_buffer_header (p2, LOAD);
2715             vlib_prefetch_buffer_header (p3, LOAD);
2716
2717             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
2718             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
2719           }
2720
2721           pi0 = to_next[0] = from[0];
2722           pi1 = to_next[1] = from[1];
2723
2724           p0 = vlib_get_buffer (vm, pi0);
2725           p1 = vlib_get_buffer (vm, pi1);
2726
2727           ip0 = vlib_buffer_get_current (p0);
2728           ip1 = vlib_buffer_get_current (p1);
2729
2730           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2731           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
2732           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2733             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2734           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
2735             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
2736
2737           lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0),
2738                                                &ip0->dst_address);
2739           lb_index1 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index1),
2740                                                &ip1->dst_address);
2741
2742           lb0 = load_balance_get (lb_index0);
2743           lb1 = load_balance_get (lb_index1);
2744
2745           ASSERT (lb0->lb_n_buckets > 0);
2746           ASSERT (is_pow2 (lb0->lb_n_buckets));
2747           ASSERT (lb1->lb_n_buckets > 0);
2748           ASSERT (is_pow2 (lb1->lb_n_buckets));
2749
2750           vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash 
2751               (ip0, lb0->lb_hash_config);
2752                                                                   
2753           vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash 
2754               (ip1, lb1->lb_hash_config);
2755
2756           dpo0 = load_balance_get_bucket_i(lb0,
2757                                            (vnet_buffer (p0)->ip.flow_hash &
2758                                             (lb0->lb_n_buckets_minus_1)));
2759           dpo1 = load_balance_get_bucket_i(lb1,
2760                                            (vnet_buffer (p1)->ip.flow_hash &
2761                                             (lb0->lb_n_buckets_minus_1)));
2762
2763           next0 = dpo0->dpoi_next_node;
2764           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
2765           next1 = dpo1->dpoi_next_node;
2766           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
2767
2768           if (1) /* $$$$$$ HACK FIXME */
2769           vlib_increment_combined_counter 
2770               (cm, cpu_index, lb_index0, 1,
2771                vlib_buffer_length_in_chain (vm, p0));
2772           if (1) /* $$$$$$ HACK FIXME */
2773           vlib_increment_combined_counter 
2774               (cm, cpu_index, lb_index1, 1,
2775                vlib_buffer_length_in_chain (vm, p1));
2776
2777           from += 2;
2778           to_next += 2;
2779           n_left_to_next -= 2;
2780           n_left_from -= 2;
2781
2782           wrong_next = (next0 != next) + 2*(next1 != next);
2783           if (PREDICT_FALSE (wrong_next != 0))
2784             {
2785               switch (wrong_next)
2786                 {
2787                 case 1:
2788                   /* A B A */
2789                   to_next[-2] = pi1;
2790                   to_next -= 1;
2791                   n_left_to_next += 1;
2792                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2793                   break;
2794
2795                 case 2:
2796                   /* A A B */
2797                   to_next -= 1;
2798                   n_left_to_next += 1;
2799                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2800                   break;
2801
2802                 case 3:
2803                   /* A B C */
2804                   to_next -= 2;
2805                   n_left_to_next += 2;
2806                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2807                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2808                   if (next0 == next1)
2809                     {
2810                       /* A B B */
2811                       vlib_put_next_frame (vm, node, next, n_left_to_next);
2812                       next = next1;
2813                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
2814                     }
2815                 }
2816             }
2817         }
2818     
2819       while (n_left_from > 0 && n_left_to_next > 0)
2820         {
2821           vlib_buffer_t * p0;
2822           ip4_header_t * ip0;
2823           u32 pi0, lb_index0;
2824           ip_lookup_next_t next0;
2825           u32 fib_index0;
2826           const dpo_id_t *dpo0;
2827           const load_balance_t * lb0;
2828
2829           pi0 = from[0];
2830           to_next[0] = pi0;
2831
2832           p0 = vlib_get_buffer (vm, pi0);
2833
2834           ip0 = vlib_buffer_get_current (p0);
2835
2836           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
2837                                 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2838           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2839               fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2840           
2841           lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0),
2842                                                &ip0->dst_address);
2843
2844           lb0 = load_balance_get (lb_index0);
2845
2846           ASSERT (lb0->lb_n_buckets > 0);
2847           ASSERT (is_pow2 (lb0->lb_n_buckets));
2848
2849           vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash 
2850               (ip0, lb0->lb_hash_config);
2851
2852           dpo0 = load_balance_get_bucket_i(lb0,
2853                                            (vnet_buffer (p0)->ip.flow_hash &
2854                                             (lb0->lb_n_buckets_minus_1)));
2855
2856           next0 = dpo0->dpoi_next_node;
2857           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
2858
2859           if (1) /* $$$$$$ HACK FIXME */
2860               vlib_increment_combined_counter 
2861                   (cm, cpu_index, lb_index0, 1,
2862                    vlib_buffer_length_in_chain (vm, p0));
2863
2864           from += 1;
2865           to_next += 1;
2866           n_left_to_next -= 1;
2867           n_left_from -= 1;
2868
2869           if (PREDICT_FALSE (next0 != next))
2870             {
2871               n_left_to_next += 1;
2872               vlib_put_next_frame (vm, node, next, n_left_to_next);
2873               next = next0;
2874               vlib_get_next_frame (vm, node, next,
2875                                    to_next, n_left_to_next);
2876               to_next[0] = pi0;
2877               to_next += 1;
2878               n_left_to_next -= 1;
2879             }
2880         }
2881
2882       vlib_put_next_frame (vm, node, next, n_left_to_next);
2883     }
2884
2885   if (node->flags & VLIB_NODE_FLAG_TRACE)
2886       ip4_forward_next_trace(vm, node, frame, VLIB_TX);
2887
2888   return frame->n_vectors;
2889 }
2890
2891 VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = {
2892   .function = ip4_lookup_multicast,
2893   .name = "ip4-lookup-multicast",
2894   .vector_size = sizeof (u32),
2895   .sibling_of = "ip4-lookup",
2896   .format_trace = format_ip4_lookup_trace,
2897
2898   .n_next_nodes = 0,
2899 };
2900
2901 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_multicast_node, ip4_lookup_multicast)
2902
2903 VLIB_REGISTER_NODE (ip4_multicast_node,static) = {
2904   .function = ip4_drop,
2905   .name = "ip4-multicast",
2906   .vector_size = sizeof (u32),
2907
2908   .format_trace = format_ip4_forward_next_trace,
2909
2910   .n_next_nodes = 1,
2911   .next_nodes = {
2912     [0] = "error-drop",
2913   },
2914 };
2915
2916 int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0)
2917 {
2918   ip4_fib_mtrie_t * mtrie0;
2919   ip4_fib_mtrie_leaf_t leaf0;
2920   u32 lbi0;
2921     
2922   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2923
2924   leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
2925   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
2926   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
2927   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2928   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2929   
2930   /* Handle default route. */
2931   leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
2932   
2933   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2934   
2935   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0), a);
2936 }
2937  
2938 static clib_error_t *
2939 test_lookup_command_fn (vlib_main_t * vm,
2940                         unformat_input_t * input,
2941                         vlib_cli_command_t * cmd)
2942 {
2943   u32 table_id = 0;
2944   f64 count = 1;
2945   u32 n;
2946   int i;
2947   ip4_address_t ip4_base_address;
2948   u64 errors = 0;
2949
2950   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
2951       if (unformat (input, "table %d", &table_id))
2952         ;
2953       else if (unformat (input, "count %f", &count))
2954         ;
2955
2956       else if (unformat (input, "%U",
2957                          unformat_ip4_address, &ip4_base_address))
2958         ;
2959       else
2960         return clib_error_return (0, "unknown input `%U'",
2961                                   format_unformat_error, input);
2962   }
2963
2964   n = count;
2965
2966   for (i = 0; i < n; i++)
2967     {
2968       if (!ip4_lookup_validate (&ip4_base_address, table_id))
2969         errors++;
2970
2971       ip4_base_address.as_u32 = 
2972         clib_host_to_net_u32 (1 + 
2973                               clib_net_to_host_u32 (ip4_base_address.as_u32));
2974     }
2975
2976   if (errors) 
2977     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2978   else
2979     vlib_cli_output (vm, "No errors in %d lookups\n", n);
2980
2981   return 0;
2982 }
2983
2984 VLIB_CLI_COMMAND (lookup_test_command, static) = {
2985     .path = "test lookup",
2986     .short_help = "test lookup",
2987     .function = test_lookup_command_fn,
2988 };
2989
2990 int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2991 {
2992   ip4_main_t * im4 = &ip4_main;
2993   ip4_fib_t * fib;
2994   uword * p = hash_get (im4->fib_index_by_table_id, table_id);
2995
2996   if (p == 0)
2997     return VNET_API_ERROR_NO_SUCH_FIB;
2998
2999   fib = ip4_fib_get (p[0]);
3000
3001   fib->flow_hash_config = flow_hash_config;
3002   return 0;
3003 }
3004  
3005 static clib_error_t *
3006 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3007                              unformat_input_t * input,
3008                              vlib_cli_command_t * cmd)
3009 {
3010   int matched = 0;
3011   u32 table_id = 0;
3012   u32 flow_hash_config = 0;
3013   int rv;
3014
3015   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3016     if (unformat (input, "table %d", &table_id))
3017       matched = 1;
3018 #define _(a,v) \
3019     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3020     foreach_flow_hash_bit
3021 #undef _
3022     else break;
3023   }
3024   
3025   if (matched == 0)
3026     return clib_error_return (0, "unknown input `%U'",
3027                               format_unformat_error, input);
3028   
3029   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3030   switch (rv)
3031     {
3032     case 0:
3033       break;
3034       
3035     case VNET_API_ERROR_NO_SUCH_FIB:
3036       return clib_error_return (0, "no such FIB table %d", table_id);
3037       
3038     default:
3039       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3040       break;
3041     }
3042   
3043   return 0;
3044 }
3045  
3046 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
3047   .path = "set ip flow-hash",
3048   .short_help = 
3049   "set ip table flow-hash table <fib-id> src dst sport dport proto reverse",
3050   .function = set_ip_flow_hash_command_fn,
3051 };
3052  
3053 int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index, 
3054                                  u32 table_index)
3055 {
3056   vnet_main_t * vnm = vnet_get_main();
3057   vnet_interface_main_t * im = &vnm->interface_main;
3058   ip4_main_t * ipm = &ip4_main;
3059   ip_lookup_main_t * lm = &ipm->lookup_main;
3060   vnet_classify_main_t * cm = &vnet_classify_main;
3061
3062   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3063     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3064
3065   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3066     return VNET_API_ERROR_NO_SUCH_ENTRY;
3067
3068   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3069   lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
3070
3071   return 0;
3072 }
3073
3074 static clib_error_t *
3075 set_ip_classify_command_fn (vlib_main_t * vm,
3076                             unformat_input_t * input,
3077                             vlib_cli_command_t * cmd)
3078 {
3079   u32 table_index = ~0;
3080   int table_index_set = 0;
3081   u32 sw_if_index = ~0;
3082   int rv;
3083   
3084   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3085     if (unformat (input, "table-index %d", &table_index))
3086       table_index_set = 1;
3087     else if (unformat (input, "intfc %U", unformat_vnet_sw_interface, 
3088                        vnet_get_main(), &sw_if_index))
3089       ;
3090     else
3091       break;
3092   }
3093       
3094   if (table_index_set == 0)
3095     return clib_error_return (0, "classify table-index must be specified");
3096
3097   if (sw_if_index == ~0)
3098     return clib_error_return (0, "interface / subif must be specified");
3099
3100   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3101
3102   switch (rv)
3103     {
3104     case 0:
3105       break;
3106
3107     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3108       return clib_error_return (0, "No such interface");
3109
3110     case VNET_API_ERROR_NO_SUCH_ENTRY:
3111       return clib_error_return (0, "No such classifier table");
3112     }
3113   return 0;
3114 }
3115
3116 VLIB_CLI_COMMAND (set_ip_classify_command, static) = {
3117     .path = "set ip classify",
3118     .short_help = 
3119     "set ip classify intfc <int> table-index <index>",
3120     .function = set_ip_classify_command_fn,
3121 };
3122