Fix crash caused by "show trace" due to incorrect usage of IP adjacency.
[vpp.git] / vnet / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h>     /* for API error numbers */
47 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
48 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
49 #include <vnet/fib/ip4_fib.h>
50 #include <vnet/dpo/load_balance.h>
51 #include <vnet/dpo/classify_dpo.h>
52
53 void
54 ip4_forward_next_trace (vlib_main_t * vm,
55                         vlib_node_runtime_t * node,
56                         vlib_frame_t * frame,
57                         vlib_rx_or_tx_t which_adj_index);
58
59 always_inline uword
60 ip4_lookup_inline (vlib_main_t * vm,
61                    vlib_node_runtime_t * node,
62                    vlib_frame_t * frame,
63                    int lookup_for_responses_to_locally_received_packets)
64 {
65   ip4_main_t * im = &ip4_main;
66   vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
67   u32 n_left_from, n_left_to_next, * from, * to_next;
68   ip_lookup_next_t next;
69   u32 cpu_index = os_get_cpu_number();
70
71   from = vlib_frame_vector_args (frame);
72   n_left_from = frame->n_vectors;
73   next = node->cached_next_index;
74
75   while (n_left_from > 0)
76     {
77       vlib_get_next_frame (vm, node, next,
78                            to_next, n_left_to_next);
79
80       while (n_left_from >= 4 && n_left_to_next >= 2)
81         {
82           vlib_buffer_t * p0, * p1;
83           ip4_header_t * ip0, * ip1;
84           __attribute__((unused)) tcp_header_t * tcp0, * tcp1;
85           ip_lookup_next_t next0, next1;
86           const load_balance_t * lb0, * lb1;
87           ip4_fib_mtrie_t * mtrie0, * mtrie1;
88           ip4_fib_mtrie_leaf_t leaf0, leaf1;
89           ip4_address_t * dst_addr0, *dst_addr1;
90           __attribute__((unused)) u32 pi0, fib_index0, lb_index0, is_tcp_udp0;
91           __attribute__((unused)) u32 pi1, fib_index1, lb_index1, is_tcp_udp1;
92           flow_hash_config_t flow_hash_config0, flow_hash_config1;
93           u32 hash_c0, hash_c1;
94           u32 wrong_next;
95           const dpo_id_t *dpo0, *dpo1;
96
97           /* Prefetch next iteration. */
98           {
99             vlib_buffer_t * p2, * p3;
100
101             p2 = vlib_get_buffer (vm, from[2]);
102             p3 = vlib_get_buffer (vm, from[3]);
103
104             vlib_prefetch_buffer_header (p2, LOAD);
105             vlib_prefetch_buffer_header (p3, LOAD);
106
107             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
108             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
109           }
110
111           pi0 = to_next[0] = from[0];
112           pi1 = to_next[1] = from[1];
113
114           p0 = vlib_get_buffer (vm, pi0);
115           p1 = vlib_get_buffer (vm, pi1);
116
117           ip0 = vlib_buffer_get_current (p0);
118           ip1 = vlib_buffer_get_current (p1);
119
120           dst_addr0 = &ip0->dst_address;
121           dst_addr1 = &ip1->dst_address;
122
123           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
124           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
125           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
126             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
127           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
128             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
129
130
131           if (! lookup_for_responses_to_locally_received_packets)
132             {
133               mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
134               mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
135
136               leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
137
138               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
139               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
140             }
141
142           tcp0 = (void *) (ip0 + 1);
143           tcp1 = (void *) (ip1 + 1);
144
145           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
146                          || ip0->protocol == IP_PROTOCOL_UDP);
147           is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
148                          || ip1->protocol == IP_PROTOCOL_UDP);
149
150           if (! lookup_for_responses_to_locally_received_packets)
151             {
152               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
153               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
154             }
155
156           if (! lookup_for_responses_to_locally_received_packets)
157             {
158               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
159               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
160             }
161
162           if (! lookup_for_responses_to_locally_received_packets)
163             {
164               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
165               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
166             }
167
168           if (lookup_for_responses_to_locally_received_packets)
169             {
170               lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
171               lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
172             }
173           else
174             {
175               /* Handle default route. */
176               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
177               leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
178
179               lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
180               lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
181             }
182
183           lb0 = load_balance_get (lb_index0);
184           lb1 = load_balance_get (lb_index1);
185
186           /* Use flow hash to compute multipath adjacency. */
187           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
188           hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
189           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
190             {
191               flow_hash_config0 = lb0->lb_hash_config;
192               hash_c0 = vnet_buffer (p0)->ip.flow_hash =
193                 ip4_compute_flow_hash (ip0, flow_hash_config0);
194             }
195           if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
196             {
197               flow_hash_config1 = lb1->lb_hash_config;
198               hash_c1 = vnet_buffer (p1)->ip.flow_hash =
199                 ip4_compute_flow_hash (ip1, flow_hash_config1);
200             }
201
202           ASSERT (lb0->lb_n_buckets > 0);
203           ASSERT (is_pow2 (lb0->lb_n_buckets));
204           ASSERT (lb1->lb_n_buckets > 0);
205           ASSERT (is_pow2 (lb1->lb_n_buckets));
206
207           dpo0 = load_balance_get_bucket_i(lb0,
208                                            (hash_c0 &
209                                             (lb0->lb_n_buckets_minus_1)));
210           dpo1 = load_balance_get_bucket_i(lb1,
211                                            (hash_c1 &
212                                             (lb0->lb_n_buckets_minus_1)));
213
214           next0 = dpo0->dpoi_next_node;
215           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
216           next1 = dpo1->dpoi_next_node;
217           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
218
219           vlib_increment_combined_counter
220               (cm, cpu_index, lb_index0, 1,
221                vlib_buffer_length_in_chain (vm, p0)
222                + sizeof(ethernet_header_t));
223           vlib_increment_combined_counter
224               (cm, cpu_index, lb_index1, 1,
225                vlib_buffer_length_in_chain (vm, p1)
226                + sizeof(ethernet_header_t));
227
228           from += 2;
229           to_next += 2;
230           n_left_to_next -= 2;
231           n_left_from -= 2;
232
233           wrong_next = (next0 != next) + 2*(next1 != next);
234           if (PREDICT_FALSE (wrong_next != 0))
235             {
236               switch (wrong_next)
237                 {
238                 case 1:
239                   /* A B A */
240                   to_next[-2] = pi1;
241                   to_next -= 1;
242                   n_left_to_next += 1;
243                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
244                   break;
245
246                 case 2:
247                   /* A A B */
248                   to_next -= 1;
249                   n_left_to_next += 1;
250                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
251                   break;
252
253                 case 3:
254                   /* A B C */
255                   to_next -= 2;
256                   n_left_to_next += 2;
257                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
258                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
259                   if (next0 == next1)
260                     {
261                       /* A B B */
262                       vlib_put_next_frame (vm, node, next, n_left_to_next);
263                       next = next1;
264                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
265                     }
266                 }
267             }
268         }
269     
270       while (n_left_from > 0 && n_left_to_next > 0)
271         {
272           vlib_buffer_t * p0;
273           ip4_header_t * ip0;
274           __attribute__((unused)) tcp_header_t * tcp0;
275           ip_lookup_next_t next0;
276           const load_balance_t *lb0;
277           ip4_fib_mtrie_t * mtrie0;
278           ip4_fib_mtrie_leaf_t leaf0;
279           ip4_address_t * dst_addr0;
280           __attribute__((unused)) u32 pi0, fib_index0, is_tcp_udp0, lbi0;
281           flow_hash_config_t flow_hash_config0;
282           const dpo_id_t *dpo0;
283           u32 hash_c0;
284
285           pi0 = from[0];
286           to_next[0] = pi0;
287
288           p0 = vlib_get_buffer (vm, pi0);
289
290           ip0 = vlib_buffer_get_current (p0);
291
292           dst_addr0 = &ip0->dst_address;
293
294           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
295           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
296             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
297
298           if (! lookup_for_responses_to_locally_received_packets)
299             {
300               mtrie0 = &ip4_fib_get( fib_index0)->mtrie;
301
302               leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
303
304               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
305             }
306
307           tcp0 = (void *) (ip0 + 1);
308
309           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
310                          || ip0->protocol == IP_PROTOCOL_UDP);
311
312           if (! lookup_for_responses_to_locally_received_packets)
313             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
314
315           if (! lookup_for_responses_to_locally_received_packets)
316             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
317
318           if (! lookup_for_responses_to_locally_received_packets)
319             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
320
321           if (lookup_for_responses_to_locally_received_packets)
322             lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
323           else
324             {
325               /* Handle default route. */
326               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
327               lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
328             }
329
330           lb0 = load_balance_get (lbi0);
331
332           /* Use flow hash to compute multipath adjacency. */
333           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
334           if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
335             {
336               flow_hash_config0 = lb0->lb_hash_config;
337
338               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
339                 ip4_compute_flow_hash (ip0, flow_hash_config0);
340             }
341
342           ASSERT (lb0->lb_n_buckets > 0);
343           ASSERT (is_pow2 (lb0->lb_n_buckets));
344
345           dpo0 = load_balance_get_bucket_i(lb0,
346                                            (hash_c0 &
347                                             (lb0->lb_n_buckets_minus_1)));
348
349           next0 = dpo0->dpoi_next_node;
350           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
351
352           vlib_increment_combined_counter 
353               (cm, cpu_index, lbi0, 1,
354                vlib_buffer_length_in_chain (vm, p0));
355
356           from += 1;
357           to_next += 1;
358           n_left_to_next -= 1;
359           n_left_from -= 1;
360
361           if (PREDICT_FALSE (next0 != next))
362             {
363               n_left_to_next += 1;
364               vlib_put_next_frame (vm, node, next, n_left_to_next);
365               next = next0;
366               vlib_get_next_frame (vm, node, next,
367                                    to_next, n_left_to_next);
368               to_next[0] = pi0;
369               to_next += 1;
370               n_left_to_next -= 1;
371             }
372         }
373
374       vlib_put_next_frame (vm, node, next, n_left_to_next);
375     }
376
377   if (node->flags & VLIB_NODE_FLAG_TRACE)
378     ip4_forward_next_trace(vm, node, frame, VLIB_TX);
379
380   return frame->n_vectors;
381 }
382
383 /** @brief IPv4 lookup node.
384     @node ip4-lookup
385
386     This is the main IPv4 lookup dispatch node.
387
388     @param vm vlib_main_t corresponding to the current thread
389     @param node vlib_node_runtime_t
390     @param frame vlib_frame_t whose contents should be dispatched
391
392     @par Graph mechanics: buffer metadata, next index usage
393
394     @em Uses:
395     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
396         - Indicates the @c sw_if_index value of the interface that the
397           packet was received on.
398     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
399         - When the value is @c ~0 then the node performs a longest prefix
400           match (LPM) for the packet destination address in the FIB attached
401           to the receive interface.
402         - Otherwise perform LPM for the packet destination address in the
403           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
404           value (0, 1, ...) and not a VRF id.
405
406     @em Sets:
407     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
408         - The lookup result adjacency index.
409
410     <em>Next Index:</em>
411     - Dispatches the packet to the node index found in
412       ip_adjacency_t @c adj->lookup_next_index
413       (where @c adj is the lookup result adjacency).
414 */
415 static uword
416 ip4_lookup (vlib_main_t * vm,
417             vlib_node_runtime_t * node,
418             vlib_frame_t * frame)
419 {
420   return ip4_lookup_inline (vm, node, frame,
421                             /* lookup_for_responses_to_locally_received_packets */ 0);
422
423 }
424
425 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args);
426
427 VLIB_REGISTER_NODE (ip4_lookup_node) = {
428   .function = ip4_lookup,
429   .name = "ip4-lookup",
430   .vector_size = sizeof (u32),
431
432   .format_trace = format_ip4_lookup_trace,
433   .n_next_nodes = IP_LOOKUP_N_NEXT,
434   .next_nodes = IP4_LOOKUP_NEXT_NODES,
435 };
436
437 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup)
438
439 always_inline uword
440 ip4_load_balance (vlib_main_t * vm,
441                   vlib_node_runtime_t * node,
442                   vlib_frame_t * frame)
443 {
444   vlib_combined_counter_main_t * cm = &load_balance_main.lbm_via_counters;
445   u32 n_left_from, n_left_to_next, * from, * to_next;
446   ip_lookup_next_t next;
447   u32 cpu_index = os_get_cpu_number();
448
449   from = vlib_frame_vector_args (frame);
450   n_left_from = frame->n_vectors;
451   next = node->cached_next_index;
452
453   if (node->flags & VLIB_NODE_FLAG_TRACE)
454       ip4_forward_next_trace(vm, node, frame, VLIB_TX);
455
456   while (n_left_from > 0)
457     {
458       vlib_get_next_frame (vm, node, next,
459                            to_next, n_left_to_next);
460
461     
462       while (n_left_from > 0 && n_left_to_next > 0)
463         {
464           ip_lookup_next_t next0;
465           const load_balance_t *lb0;
466           vlib_buffer_t * p0;
467           u32 pi0, lbi0, hc0;
468           const ip4_header_t *ip0;
469           const dpo_id_t *dpo0;
470
471           pi0 = from[0];
472           to_next[0] = pi0;
473
474           p0 = vlib_get_buffer (vm, pi0);
475
476           ip0 = vlib_buffer_get_current (p0);
477           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
478
479           lb0 = load_balance_get(lbi0);
480           hc0 = lb0->lb_hash_config;
481           vnet_buffer(p0)->ip.flow_hash = ip4_compute_flow_hash(ip0, hc0);
482
483           dpo0 = load_balance_get_bucket_i(lb0, 
484                                            vnet_buffer(p0)->ip.flow_hash &
485                                            (lb0->lb_n_buckets_minus_1));
486
487           next0 = dpo0->dpoi_next_node;
488           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
489
490           vlib_increment_combined_counter 
491               (cm, cpu_index, lbi0, 1,
492                vlib_buffer_length_in_chain (vm, p0));
493
494           from += 1;
495           to_next += 1;
496           n_left_to_next -= 1;
497           n_left_from -= 1;
498
499           if (PREDICT_FALSE (next0 != next))
500             {
501               n_left_to_next += 1;
502               vlib_put_next_frame (vm, node, next, n_left_to_next);
503               next = next0;
504               vlib_get_next_frame (vm, node, next,
505                                    to_next, n_left_to_next);
506               to_next[0] = pi0;
507               to_next += 1;
508               n_left_to_next -= 1;
509             }
510         }
511
512       vlib_put_next_frame (vm, node, next, n_left_to_next);
513     }
514
515   return frame->n_vectors;
516 }
517
518 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args);
519
520 VLIB_REGISTER_NODE (ip4_load_balance_node) = {
521   .function = ip4_load_balance,
522   .name = "ip4-load-balance",
523   .vector_size = sizeof (u32),
524   .sibling_of = "ip4-lookup",
525
526   .format_trace = format_ip4_forward_next_trace,
527 };
528
529 VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance)
530
531 /* get first interface address */
532 ip4_address_t *
533 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
534                              ip_interface_address_t ** result_ia)
535 {
536   ip_lookup_main_t * lm = &im->lookup_main;
537   ip_interface_address_t * ia = 0;
538   ip4_address_t * result = 0;
539
540   foreach_ip_interface_address (lm, ia, sw_if_index, 
541                                 1 /* honor unnumbered */,
542   ({
543     ip4_address_t * a = ip_interface_address_get_address (lm, ia);
544     result = a;
545     break;
546   }));
547   if (result_ia)
548     *result_ia = result ? ia : 0;
549   return result;
550 }
551
552 static void
553 ip4_add_interface_routes (u32 sw_if_index,
554                           ip4_main_t * im, u32 fib_index,
555                           ip_interface_address_t * a)
556 {
557   ip_lookup_main_t * lm = &im->lookup_main;
558   ip4_address_t * address = ip_interface_address_get_address (lm, a);
559   fib_prefix_t pfx = {
560       .fp_len = a->address_length,
561       .fp_proto = FIB_PROTOCOL_IP4,
562       .fp_addr.ip4 = *address,
563   };
564
565   a->neighbor_probe_adj_index = ~0;
566
567   if (pfx.fp_len < 32)
568   {
569       fib_node_index_t fei;
570
571       fei = fib_table_entry_update_one_path(fib_index,
572                                             &pfx,
573                                             FIB_SOURCE_INTERFACE,
574                                             (FIB_ENTRY_FLAG_CONNECTED |
575                                              FIB_ENTRY_FLAG_ATTACHED),
576                                             FIB_PROTOCOL_IP4,
577                                             NULL, /* No next-hop address */
578                                             sw_if_index,
579                                             ~0, // invalid FIB index
580                                             1,
581                                             MPLS_LABEL_INVALID,
582                                             FIB_ROUTE_PATH_FLAG_NONE);
583       a->neighbor_probe_adj_index = fib_entry_get_adj(fei);
584   }
585
586   pfx.fp_len = 32;
587
588   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
589   {
590       u32 classify_table_index =
591           lm->classify_table_index_by_sw_if_index [sw_if_index];
592       if (classify_table_index != (u32) ~0)
593       {
594           dpo_id_t dpo = DPO_NULL;
595
596           dpo_set(&dpo,
597                   DPO_CLASSIFY,
598                   DPO_PROTO_IP4,
599                   classify_dpo_create(FIB_PROTOCOL_IP4,
600                                       classify_table_index));
601
602           fib_table_entry_special_dpo_add(fib_index,
603                                           &pfx,
604                                           FIB_SOURCE_CLASSIFY,
605                                           FIB_ENTRY_FLAG_NONE,
606                                           &dpo);
607           dpo_reset(&dpo);
608       }
609   }
610
611   fib_table_entry_update_one_path(fib_index,
612                                   &pfx,
613                                   FIB_SOURCE_INTERFACE,
614                                   (FIB_ENTRY_FLAG_CONNECTED |
615                                    FIB_ENTRY_FLAG_LOCAL),
616                                   FIB_PROTOCOL_IP4,
617                                   &pfx.fp_addr,
618                                   sw_if_index,
619                                   ~0, // invalid FIB index
620                                   1,
621                                   MPLS_LABEL_INVALID,
622                                   FIB_ROUTE_PATH_FLAG_NONE);
623 }
624
625 static void
626 ip4_del_interface_routes (ip4_main_t * im,
627                           u32 fib_index,
628                           ip4_address_t * address,
629                           u32 address_length)
630 {
631     fib_prefix_t pfx = {
632         .fp_len = address_length,
633         .fp_proto = FIB_PROTOCOL_IP4,
634         .fp_addr.ip4 = *address,
635     };
636
637     if (pfx.fp_len < 32)
638     {
639         fib_table_entry_delete(fib_index,
640                                &pfx,
641                                FIB_SOURCE_INTERFACE);
642     }
643
644     pfx.fp_len = 32;
645     fib_table_entry_delete(fib_index,
646                            &pfx,
647                            FIB_SOURCE_INTERFACE);
648 }
649
650 void
651 ip4_sw_interface_enable_disable (u32 sw_if_index,
652                                  u32 is_enable)
653 {
654   vlib_main_t * vm = vlib_get_main();
655   ip4_main_t * im = &ip4_main;
656   ip_lookup_main_t * lm = &im->lookup_main;
657   u32 ci, cast;
658   u32 lookup_feature_index;
659
660   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
661
662   /*
663    * enable/disable only on the 1<->0 transition
664    */
665   if (is_enable)
666     {
667       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
668         return;
669     }
670   else
671     {
672       ASSERT(im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
673       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
674         return;
675     }
676
677   for (cast = 0; cast <= VNET_IP_RX_MULTICAST_FEAT; cast++)
678     {
679       ip_config_main_t * cm = &lm->feature_config_mains[cast];
680       vnet_config_main_t * vcm = &cm->config_main;
681
682       vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
683       ci = cm->config_index_by_sw_if_index[sw_if_index];
684
685       if (cast == VNET_IP_RX_UNICAST_FEAT)
686         lookup_feature_index = im->ip4_unicast_rx_feature_lookup;
687       else
688         lookup_feature_index = im->ip4_multicast_rx_feature_lookup;
689
690       if (is_enable)
691         ci = vnet_config_add_feature (vm, vcm,
692                                       ci,
693                                       lookup_feature_index,
694                                       /* config data */ 0,
695                                       /* # bytes of config data */ 0);
696       else
697         ci = vnet_config_del_feature (vm, vcm,
698                                       ci,
699                                       lookup_feature_index,
700                                       /* config data */ 0,
701                                       /* # bytes of config data */ 0);
702       cm->config_index_by_sw_if_index[sw_if_index] = ci;
703     }
704 }
705
706 static clib_error_t *
707 ip4_add_del_interface_address_internal (vlib_main_t * vm,
708                                         u32 sw_if_index,
709                                         ip4_address_t * address,
710                                         u32 address_length,
711                                         u32 is_del)
712 {
713   vnet_main_t * vnm = vnet_get_main();
714   ip4_main_t * im = &ip4_main;
715   ip_lookup_main_t * lm = &im->lookup_main;
716   clib_error_t * error = 0;
717   u32 if_address_index, elts_before;
718   ip4_address_fib_t ip4_af, * addr_fib = 0;
719
720   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
721   ip4_addr_fib_init (&ip4_af, address,
722                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
723   vec_add1 (addr_fib, ip4_af);
724
725   /* FIXME-LATER
726    * there is no support for adj-fib handling in the presence of overlapping
727    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
728    * most routers do.
729    */
730   if (! is_del)
731     {
732       /* When adding an address check that it does not conflict
733          with an existing address. */
734       ip_interface_address_t * ia;
735       foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
736                                     0 /* honor unnumbered */,
737       ({
738         ip4_address_t * x = ip_interface_address_get_address (&im->lookup_main, ia);
739
740         if (ip4_destination_matches_route (im, address, x, ia->address_length)
741             || ip4_destination_matches_route (im, x, address, address_length))
742           return clib_error_create ("failed to add %U which conflicts with %U for interface %U",
743                                     format_ip4_address_and_length, address, address_length,
744                                     format_ip4_address_and_length, x, ia->address_length,
745                                     format_vnet_sw_if_index_name, vnm, sw_if_index);
746        }));
747     }
748
749   elts_before = pool_elts (lm->if_address_pool);
750
751   error = ip_interface_address_add_del
752     (lm,
753      sw_if_index,
754      addr_fib,
755      address_length,
756      is_del,
757      &if_address_index);
758   if (error)
759     goto done;
760   
761   ip4_sw_interface_enable_disable(sw_if_index, !is_del);
762
763   if (is_del)
764       ip4_del_interface_routes (im, ip4_af.fib_index, address,
765                                 address_length);
766   else
767       ip4_add_interface_routes (sw_if_index,
768                                 im, ip4_af.fib_index,
769                                 pool_elt_at_index 
770                                 (lm->if_address_pool, if_address_index));
771
772   /* If pool did not grow/shrink: add duplicate address. */
773   if (elts_before != pool_elts (lm->if_address_pool))
774     {
775       ip4_add_del_interface_address_callback_t * cb;
776       vec_foreach (cb, im->add_del_interface_address_callbacks)
777         cb->function (im, cb->function_opaque, sw_if_index,
778                       address, address_length,
779                       if_address_index,
780                       is_del);
781     }
782
783  done:
784   vec_free (addr_fib);
785   return error;
786 }
787
788 clib_error_t *
789 ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
790                                ip4_address_t * address, u32 address_length,
791                                u32 is_del)
792 {
793   return ip4_add_del_interface_address_internal
794     (vm, sw_if_index, address, address_length,
795      is_del);
796 }
797
798 /* Built-in ip4 unicast rx feature path definition */
799 VNET_IP4_UNICAST_FEATURE_INIT (ip4_inacl, static) = {
800   .node_name = "ip4-inacl", 
801   .runs_before = ORDER_CONSTRAINTS {"ip4-source-check-via-rx", 0},
802   .feature_index = &ip4_main.ip4_unicast_rx_feature_check_access,
803 };
804
805 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_1, static) = {
806   .node_name = "ip4-source-check-via-rx",
807   .runs_before = ORDER_CONSTRAINTS {"ip4-source-check-via-any", 0},
808   .feature_index = 
809   &ip4_main.ip4_unicast_rx_feature_source_reachable_via_rx,
810 };
811
812 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_2, static) = {
813   .node_name = "ip4-source-check-via-any",
814   .runs_before = ORDER_CONSTRAINTS {"ip4-policer-classify", 0},
815   .feature_index = 
816   &ip4_main.ip4_unicast_rx_feature_source_reachable_via_any,
817 };
818
819 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) = {
820   .node_name = "ip4-source-and-port-range-check-rx",
821   .runs_before = ORDER_CONSTRAINTS {"ip4-policer-classify", 0},
822   .feature_index =
823   &ip4_main.ip4_unicast_rx_feature_source_and_port_range_check,
824 };
825
826 VNET_IP4_UNICAST_FEATURE_INIT (ip4_policer_classify, static) = {
827   .node_name = "ip4-policer-classify",
828   .runs_before = ORDER_CONSTRAINTS {"ipsec-input-ip4", 0},
829   .feature_index =
830   &ip4_main.ip4_unicast_rx_feature_policer_classify,
831 };
832
833 VNET_IP4_UNICAST_FEATURE_INIT (ip4_ipsec, static) = {
834   .node_name = "ipsec-input-ip4",
835   .runs_before = ORDER_CONSTRAINTS {"vpath-input-ip4", 0},
836   .feature_index = &ip4_main.ip4_unicast_rx_feature_ipsec,
837 };
838
839 VNET_IP4_UNICAST_FEATURE_INIT (ip4_vpath, static) = {
840   .node_name = "vpath-input-ip4",
841   .runs_before = ORDER_CONSTRAINTS {"ip4-lookup", 0},
842   .feature_index = &ip4_main.ip4_unicast_rx_feature_vpath,
843 };
844
845 VNET_IP4_UNICAST_FEATURE_INIT (ip4_lookup, static) = {
846   .node_name = "ip4-lookup",
847   .runs_before = ORDER_CONSTRAINTS {"ip4-drop", 0},
848   .feature_index = &ip4_main.ip4_unicast_rx_feature_lookup,
849 };
850
851 VNET_IP4_UNICAST_FEATURE_INIT (ip4_drop, static) = {
852   .node_name = "ip4-drop",
853   .runs_before = 0, /* not before any other features */
854   .feature_index = &ip4_main.ip4_unicast_rx_feature_drop,
855 };
856
857
858 /* Built-in ip4 multicast rx feature path definition */
859 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_vpath_mc, static) = {
860   .node_name = "vpath-input-ip4",
861   .runs_before = ORDER_CONSTRAINTS {"ip4-lookup-multicast", 0},
862   .feature_index = &ip4_main.ip4_multicast_rx_feature_vpath,
863 };
864
865 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_lookup_mc, static) = {
866   .node_name = "ip4-lookup-multicast",
867   .runs_before = ORDER_CONSTRAINTS {"ip4-drop", 0},
868   .feature_index = &ip4_main.ip4_multicast_rx_feature_lookup,
869 };
870
871 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_mc_drop, static) = {
872   .node_name = "ip4-drop",
873   .runs_before = 0, /* last feature */
874   .feature_index = &ip4_main.ip4_multicast_rx_feature_drop,
875 };
876
877 static char * rx_feature_start_nodes[] = 
878   { "ip4-input", "ip4-input-no-checksum"};
879
880 static char * tx_feature_start_nodes[] = 
881 {
882   "ip4-rewrite-transit",
883   "ip4-midchain",
884 };
885
886 /* Source and port-range check ip4 tx feature path definition */
887 VNET_IP4_TX_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) = {
888   .node_name = "ip4-source-and-port-range-check-tx",
889   .runs_before = ORDER_CONSTRAINTS {"interface-output", 0},
890   .feature_index =
891   &ip4_main.ip4_unicast_tx_feature_source_and_port_range_check,
892
893 };
894
895 /* Built-in ip4 tx feature path definition */
896 VNET_IP4_TX_FEATURE_INIT (interface_output, static) = {
897   .node_name = "interface-output",
898   .runs_before = 0, /* not before any other features */
899   .feature_index = &ip4_main.ip4_tx_feature_interface_output,
900 };
901
902 static clib_error_t *
903 ip4_feature_init (vlib_main_t * vm, ip4_main_t * im)
904 {
905   ip_lookup_main_t * lm = &im->lookup_main;
906   clib_error_t * error;
907   vnet_cast_t cast;
908   ip_config_main_t * cm;
909   vnet_config_main_t * vcm;
910   char **feature_start_nodes;
911   int feature_start_len;
912
913   for (cast = 0; cast < VNET_N_IP_FEAT; cast++)
914     {
915       cm = &lm->feature_config_mains[cast];
916       vcm = &cm->config_main;
917
918       if (cast < VNET_IP_TX_FEAT)
919         {
920           feature_start_nodes = rx_feature_start_nodes;
921           feature_start_len = ARRAY_LEN(rx_feature_start_nodes);
922         }
923       else
924         {
925           feature_start_nodes = tx_feature_start_nodes;
926           feature_start_len = ARRAY_LEN(tx_feature_start_nodes);
927         }
928       
929       if ((error = ip_feature_init_cast (vm, cm, vcm, 
930                                          feature_start_nodes,
931                                          feature_start_len,
932                                          im->next_feature[cast],
933                                          &im->feature_nodes[cast])))
934         return error;
935     }
936
937   return 0;
938 }
939
940 static clib_error_t *
941 ip4_sw_interface_add_del (vnet_main_t * vnm,
942                           u32 sw_if_index,
943                           u32 is_add)
944 {
945   vlib_main_t * vm = vnm->vlib_main;
946   ip4_main_t * im = &ip4_main;
947   ip_lookup_main_t * lm = &im->lookup_main;
948   u32 ci, cast;
949   u32 feature_index;
950
951   /* Fill in lookup tables with default table (0). */
952   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
953
954   for (cast = 0; cast < VNET_N_IP_FEAT; cast++)
955     {
956       ip_config_main_t * cm = &lm->feature_config_mains[cast];
957       vnet_config_main_t * vcm = &cm->config_main;
958
959       vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
960       ci = cm->config_index_by_sw_if_index[sw_if_index];
961
962       if (cast == VNET_IP_RX_UNICAST_FEAT)
963         feature_index = im->ip4_unicast_rx_feature_drop;
964       else if (cast == VNET_IP_RX_MULTICAST_FEAT)
965         feature_index = im->ip4_multicast_rx_feature_drop;
966       else
967         feature_index = im->ip4_tx_feature_interface_output;
968
969       if (is_add)
970         ci = vnet_config_add_feature (vm, vcm, 
971                                       ci,
972                                       feature_index,
973                                       /* config data */ 0,
974                                       /* # bytes of config data */ 0);
975       else
976         {
977           ci = vnet_config_del_feature (vm, vcm, ci,
978                                         feature_index,
979                                         /* config data */ 0,
980                                         /* # bytes of config data */ 0);
981           if (vec_len(im->ip_enabled_by_sw_if_index) > sw_if_index)
982               im->ip_enabled_by_sw_if_index[sw_if_index] = 0;
983         }
984       cm->config_index_by_sw_if_index[sw_if_index] = ci;
985       /*
986        * note: do not update the tx feature count here.
987        */
988     }
989
990   return /* no error */ 0;
991 }
992
993 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
994
995 /* Global IP4 main. */
996 ip4_main_t ip4_main;
997
998 clib_error_t *
999 ip4_lookup_init (vlib_main_t * vm)
1000 {
1001   ip4_main_t * im = &ip4_main;
1002   clib_error_t * error;
1003   uword i;
1004
1005   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1006     {
1007       u32 m;
1008
1009       if (i < 32)
1010         m = pow2_mask (i) << (32 - i);
1011       else 
1012         m = ~0;
1013       im->fib_masks[i] = clib_host_to_net_u32 (m);
1014     }
1015
1016   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1017
1018   /* Create FIB with index 0 and table id of 0. */
1019   fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 0);
1020
1021   {
1022     pg_node_t * pn;
1023     pn = pg_get_node (ip4_lookup_node.index);
1024     pn->unformat_edit = unformat_pg_ip4_header;
1025   }
1026
1027   {
1028     ethernet_arp_header_t h;
1029
1030     memset (&h, 0, sizeof (h));
1031
1032     /* Set target ethernet address to all zeros. */
1033     memset (h.ip4_over_ethernet[1].ethernet, 0, sizeof (h.ip4_over_ethernet[1].ethernet));
1034
1035 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1036 #define _8(f,v) h.f = v;
1037     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1038     _16 (l3_type, ETHERNET_TYPE_IP4);
1039     _8 (n_l2_address_bytes, 6);
1040     _8 (n_l3_address_bytes, 4);
1041     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1042 #undef _16
1043 #undef _8
1044
1045     vlib_packet_template_init (vm,
1046                                &im->ip4_arp_request_packet_template,
1047                                /* data */ &h,
1048                                sizeof (h),
1049                                /* alloc chunk size */ 8,
1050                                "ip4 arp");
1051   }
1052
1053   error = ip4_feature_init (vm, im);
1054
1055   return error;
1056 }
1057
1058 VLIB_INIT_FUNCTION (ip4_lookup_init);
1059
1060 typedef struct {
1061   /* Adjacency taken. */
1062   u32 dpo_index;
1063   u32 flow_hash;
1064   u32 fib_index;
1065
1066   /* Packet data, possibly *after* rewrite. */
1067   u8 packet_data[64 - 1*sizeof(u32)];
1068 } ip4_forward_next_trace_t;
1069
1070 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args)
1071 {
1072   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1073   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1074   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1075   uword indent = format_get_indent (s);
1076   s = format (s, "%U%U",
1077               format_white_space, indent,
1078               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1079   return s;
1080 }
1081
1082 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args)
1083 {
1084   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1085   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1086   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1087   uword indent = format_get_indent (s);
1088
1089   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1090               t->fib_index, t->dpo_index, t->flow_hash);
1091   s = format (s, "\n%U%U",
1092               format_white_space, indent,
1093               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1094   return s;
1095 }
1096
1097 static u8 * format_ip4_rewrite_trace (u8 * s, va_list * args)
1098 {
1099   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1100   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1101   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1102   vnet_main_t * vnm = vnet_get_main();
1103   uword indent = format_get_indent (s);
1104
1105   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1106               t->fib_index, t->dpo_index, format_ip_adjacency,
1107               vnm, t->dpo_index, FORMAT_IP_ADJACENCY_NONE,
1108               t->flow_hash);
1109   s = format (s, "\n%U%U",
1110               format_white_space, indent,
1111               format_ip_adjacency_packet_data,
1112               vnm, t->dpo_index,
1113               t->packet_data, sizeof (t->packet_data));
1114   return s;
1115 }
1116
1117 /* Common trace function for all ip4-forward next nodes. */
1118 void
1119 ip4_forward_next_trace (vlib_main_t * vm,
1120                         vlib_node_runtime_t * node,
1121                         vlib_frame_t * frame,
1122                         vlib_rx_or_tx_t which_adj_index)
1123 {
1124   u32 * from, n_left;
1125   ip4_main_t * im = &ip4_main;
1126
1127   n_left = frame->n_vectors;
1128   from = vlib_frame_vector_args (frame);
1129   
1130   while (n_left >= 4)
1131     {
1132       u32 bi0, bi1;
1133       vlib_buffer_t * b0, * b1;
1134       ip4_forward_next_trace_t * t0, * t1;
1135
1136       /* Prefetch next iteration. */
1137       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1138       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1139
1140       bi0 = from[0];
1141       bi1 = from[1];
1142
1143       b0 = vlib_get_buffer (vm, bi0);
1144       b1 = vlib_get_buffer (vm, bi1);
1145
1146       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1147         {
1148           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1149           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1150           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1151           t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1152               vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1153               vec_elt (im->fib_index_by_sw_if_index,
1154                        vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1155
1156           clib_memcpy (t0->packet_data,
1157                   vlib_buffer_get_current (b0),
1158                   sizeof (t0->packet_data));
1159         }
1160       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1161         {
1162           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1163           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1164           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1165           t1->fib_index = (vnet_buffer(b1)->sw_if_index[VLIB_TX] != (u32)~0) ?
1166               vnet_buffer(b1)->sw_if_index[VLIB_TX] :
1167               vec_elt (im->fib_index_by_sw_if_index,
1168                        vnet_buffer(b1)->sw_if_index[VLIB_RX]);
1169           clib_memcpy (t1->packet_data,
1170                   vlib_buffer_get_current (b1),
1171                   sizeof (t1->packet_data));
1172         }
1173       from += 2;
1174       n_left -= 2;
1175     }
1176
1177   while (n_left >= 1)
1178     {
1179       u32 bi0;
1180       vlib_buffer_t * b0;
1181       ip4_forward_next_trace_t * t0;
1182
1183       bi0 = from[0];
1184
1185       b0 = vlib_get_buffer (vm, bi0);
1186
1187       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1188         {
1189           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1190           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1191           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1192           t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1193               vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1194               vec_elt (im->fib_index_by_sw_if_index,
1195                        vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1196           clib_memcpy (t0->packet_data,
1197                   vlib_buffer_get_current (b0),
1198                   sizeof (t0->packet_data));
1199         }
1200       from += 1;
1201       n_left -= 1;
1202     }
1203 }
1204
1205 static uword
1206 ip4_drop_or_punt (vlib_main_t * vm,
1207                   vlib_node_runtime_t * node,
1208                   vlib_frame_t * frame,
1209                   ip4_error_t error_code)
1210 {
1211   u32 * buffers = vlib_frame_vector_args (frame);
1212   uword n_packets = frame->n_vectors;
1213
1214   vlib_error_drop_buffers (vm, node,
1215                            buffers,
1216                            /* stride */ 1,
1217                            n_packets,
1218                            /* next */ 0,
1219                            ip4_input_node.index,
1220                            error_code);
1221
1222   if (node->flags & VLIB_NODE_FLAG_TRACE)
1223     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1224
1225   return n_packets;
1226 }
1227
1228 static uword
1229 ip4_drop (vlib_main_t * vm,
1230           vlib_node_runtime_t * node,
1231           vlib_frame_t * frame)
1232 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP); }
1233
1234 static uword
1235 ip4_punt (vlib_main_t * vm,
1236           vlib_node_runtime_t * node,
1237           vlib_frame_t * frame)
1238 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); }
1239
1240 VLIB_REGISTER_NODE (ip4_drop_node,static) = {
1241   .function = ip4_drop,
1242   .name = "ip4-drop",
1243   .vector_size = sizeof (u32),
1244
1245   .format_trace = format_ip4_forward_next_trace,
1246
1247   .n_next_nodes = 1,
1248   .next_nodes = {
1249     [0] = "error-drop",
1250   },
1251 };
1252
1253 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop)
1254
1255 VLIB_REGISTER_NODE (ip4_punt_node,static) = {
1256   .function = ip4_punt,
1257   .name = "ip4-punt",
1258   .vector_size = sizeof (u32),
1259
1260   .format_trace = format_ip4_forward_next_trace,
1261
1262   .n_next_nodes = 1,
1263   .next_nodes = {
1264     [0] = "error-punt",
1265   },
1266 };
1267
1268 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt)
1269
1270 /* Compute TCP/UDP/ICMP4 checksum in software. */
1271 u16
1272 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1273                               ip4_header_t * ip0)
1274 {
1275   ip_csum_t sum0;
1276   u32 ip_header_length, payload_length_host_byte_order;
1277   u32 n_this_buffer, n_bytes_left;
1278   u16 sum16;
1279   void * data_this_buffer;
1280   
1281   /* Initialize checksum with ip header. */
1282   ip_header_length = ip4_header_bytes (ip0);
1283   payload_length_host_byte_order = clib_net_to_host_u16 (ip0->length) - ip_header_length;
1284   sum0 = clib_host_to_net_u32 (payload_length_host_byte_order + (ip0->protocol << 16));
1285
1286   if (BITS (uword) == 32)
1287     {
1288       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u32));
1289       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32));
1290     }
1291   else
1292     sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1293
1294   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1295   data_this_buffer = (void *) ip0 + ip_header_length;
1296   if (n_this_buffer + ip_header_length > p0->current_length)
1297     n_this_buffer = p0->current_length > ip_header_length ? p0->current_length - ip_header_length : 0;
1298   while (1)
1299     {
1300       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1301       n_bytes_left -= n_this_buffer;
1302       if (n_bytes_left == 0)
1303         break;
1304
1305       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1306       p0 = vlib_get_buffer (vm, p0->next_buffer);
1307       data_this_buffer = vlib_buffer_get_current (p0);
1308       n_this_buffer = p0->current_length;
1309     }
1310
1311   sum16 = ~ ip_csum_fold (sum0);
1312
1313   return sum16;
1314 }
1315
1316 static u32
1317 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1318 {
1319   ip4_header_t * ip0 = vlib_buffer_get_current (p0);
1320   udp_header_t * udp0;
1321   u16 sum16;
1322
1323   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1324           || ip0->protocol == IP_PROTOCOL_UDP);
1325
1326   udp0 = (void *) (ip0 + 1);
1327   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1328     {
1329       p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1330                     | IP_BUFFER_L4_CHECKSUM_CORRECT);
1331       return p0->flags;
1332     }
1333
1334   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1335
1336   p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1337                 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1338
1339   return p0->flags;
1340 }
1341
1342 static uword
1343 ip4_local (vlib_main_t * vm,
1344            vlib_node_runtime_t * node,
1345            vlib_frame_t * frame)
1346 {
1347   ip4_main_t * im = &ip4_main;
1348   ip_lookup_main_t * lm = &im->lookup_main;
1349   ip_local_next_t next_index;
1350   u32 * from, * to_next, n_left_from, n_left_to_next;
1351   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
1352
1353   from = vlib_frame_vector_args (frame);
1354   n_left_from = frame->n_vectors;
1355   next_index = node->cached_next_index;
1356   
1357   if (node->flags & VLIB_NODE_FLAG_TRACE)
1358     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1359
1360   while (n_left_from > 0)
1361     {
1362       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1363
1364       while (n_left_from >= 4 && n_left_to_next >= 2)
1365         {
1366           vlib_buffer_t * p0, * p1;
1367           ip4_header_t * ip0, * ip1;
1368           udp_header_t * udp0, * udp1;
1369           ip4_fib_mtrie_t * mtrie0, * mtrie1;
1370           ip4_fib_mtrie_leaf_t leaf0, leaf1;
1371           const dpo_id_t *dpo0, *dpo1;
1372           const load_balance_t *lb0, *lb1;
1373           u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, lbi0;
1374           u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, lbi1;
1375           i32 len_diff0, len_diff1;
1376           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1377           u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1378           u8 enqueue_code;
1379       
1380           pi0 = to_next[0] = from[0];
1381           pi1 = to_next[1] = from[1];
1382           from += 2;
1383           n_left_from -= 2;
1384           to_next += 2;
1385           n_left_to_next -= 2;
1386       
1387           p0 = vlib_get_buffer (vm, pi0);
1388           p1 = vlib_get_buffer (vm, pi1);
1389
1390           ip0 = vlib_buffer_get_current (p0);
1391           ip1 = vlib_buffer_get_current (p1);
1392
1393           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
1394                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1395           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, 
1396                                 vnet_buffer(p1)->sw_if_index[VLIB_RX]);
1397
1398           mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1399           mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
1400
1401           leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
1402
1403           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1404           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
1405
1406           /* Treat IP frag packets as "experimental" protocol for now
1407              until support of IP frag reassembly is implemented */
1408           proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
1409           proto1 = ip4_is_fragment(ip1) ? 0xfe : ip1->protocol;
1410           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1411           is_udp1 = proto1 == IP_PROTOCOL_UDP;
1412           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1413           is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1414
1415           flags0 = p0->flags;
1416           flags1 = p1->flags;
1417
1418           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1419           good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1420
1421           udp0 = ip4_next_header (ip0);
1422           udp1 = ip4_next_header (ip1);
1423
1424           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1425           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1426           good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1427
1428           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1429           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
1430
1431           /* Verify UDP length. */
1432           ip_len0 = clib_net_to_host_u16 (ip0->length);
1433           ip_len1 = clib_net_to_host_u16 (ip1->length);
1434           udp_len0 = clib_net_to_host_u16 (udp0->length);
1435           udp_len1 = clib_net_to_host_u16 (udp1->length);
1436
1437           len_diff0 = ip_len0 - udp_len0;
1438           len_diff1 = ip_len1 - udp_len1;
1439
1440           len_diff0 = is_udp0 ? len_diff0 : 0;
1441           len_diff1 = is_udp1 ? len_diff1 : 0;
1442
1443           if (PREDICT_FALSE (! (is_tcp_udp0 & is_tcp_udp1
1444                                 & good_tcp_udp0 & good_tcp_udp1)))
1445             {
1446               if (is_tcp_udp0)
1447                 {
1448                   if (is_tcp_udp0
1449                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1450                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1451                   good_tcp_udp0 =
1452                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1453                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1454                 }
1455               if (is_tcp_udp1)
1456                 {
1457                   if (is_tcp_udp1
1458                       && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1459                     flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
1460                   good_tcp_udp1 =
1461                     (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1462                   good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1463                 }
1464             }
1465
1466           good_tcp_udp0 &= len_diff0 >= 0;
1467           good_tcp_udp1 &= len_diff1 >= 0;
1468
1469           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1470           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
1471
1472           error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1473
1474           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1475           error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
1476
1477           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1478           error0 = (is_tcp_udp0 && ! good_tcp_udp0
1479                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1480                     : error0);
1481           error1 = (is_tcp_udp1 && ! good_tcp_udp1
1482                     ? IP4_ERROR_TCP_CHECKSUM + is_udp1
1483                     : error1);
1484
1485           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1486           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
1487           leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1488           leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
1489
1490           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1491           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1492
1493           vnet_buffer (p1)->ip.adj_index[VLIB_RX] = lbi1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1494           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = lbi1;
1495
1496           lb0 = load_balance_get(lbi0);
1497           lb1 = load_balance_get(lbi1);
1498           dpo0 = load_balance_get_bucket_i(lb0, 0);
1499           dpo1 = load_balance_get_bucket_i(lb1, 0);
1500
1501           /* 
1502            * Must have a route to source otherwise we drop the packet.
1503            * ip4 broadcasts are accepted, e.g. to make dhcp client work
1504            */
1505           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1506                     && dpo0->dpoi_type != DPO_ADJACENCY
1507                     && dpo0->dpoi_type != DPO_ADJACENCY_INCOMPLETE
1508                     && dpo0->dpoi_type != DPO_RECEIVE
1509                     && dpo0->dpoi_type != DPO_DROP
1510                     && dpo0->dpoi_type != DPO_ADJACENCY_GLEAN
1511                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
1512                     ? IP4_ERROR_SRC_LOOKUP_MISS
1513                     : error0);
1514           error0 = (dpo0->dpoi_type == DPO_RECEIVE ?
1515                     IP4_ERROR_SPOOFED_LOCAL_PACKETS : 
1516                     error0);
1517           error1 = (error1 == IP4_ERROR_UNKNOWN_PROTOCOL
1518                     && dpo1->dpoi_type != DPO_ADJACENCY
1519                     && dpo1->dpoi_type != DPO_ADJACENCY_INCOMPLETE
1520                     && dpo1->dpoi_type != DPO_RECEIVE
1521                     && dpo1->dpoi_type != DPO_DROP
1522                     && dpo1->dpoi_type != DPO_ADJACENCY_GLEAN
1523                     && ip1->dst_address.as_u32 != 0xFFFFFFFF
1524                     ? IP4_ERROR_SRC_LOOKUP_MISS
1525                     : error1);
1526           error1 = (dpo0->dpoi_type == DPO_RECEIVE ?
1527                     IP4_ERROR_SPOOFED_LOCAL_PACKETS : 
1528                     error1);
1529
1530           next0 = lm->local_next_by_ip_protocol[proto0];
1531           next1 = lm->local_next_by_ip_protocol[proto1];
1532
1533           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1534           next1 = error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1535
1536           p0->error = error0 ? error_node->errors[error0] : 0;
1537           p1->error = error1 ? error_node->errors[error1] : 0;
1538
1539           enqueue_code = (next0 != next_index) + 2*(next1 != next_index);
1540
1541           if (PREDICT_FALSE (enqueue_code != 0))
1542             {
1543               switch (enqueue_code)
1544                 {
1545                 case 1:
1546                   /* A B A */
1547                   to_next[-2] = pi1;
1548                   to_next -= 1;
1549                   n_left_to_next += 1;
1550                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
1551                   break;
1552
1553                 case 2:
1554                   /* A A B */
1555                   to_next -= 1;
1556                   n_left_to_next += 1;
1557                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
1558                   break;
1559
1560                 case 3:
1561                   /* A B B or A B C */
1562                   to_next -= 2;
1563                   n_left_to_next += 2;
1564                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
1565                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
1566                   if (next0 == next1)
1567                     {
1568                       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1569                       next_index = next1;
1570                       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1571                     }
1572                   break;
1573                 }
1574             }
1575         }
1576
1577       while (n_left_from > 0 && n_left_to_next > 0)
1578         {
1579           vlib_buffer_t * p0;
1580           ip4_header_t * ip0;
1581           udp_header_t * udp0;
1582           ip4_fib_mtrie_t * mtrie0;
1583           ip4_fib_mtrie_leaf_t leaf0;
1584           u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, lbi0;
1585           i32 len_diff0;
1586           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1587           load_balance_t *lb0;
1588           const dpo_id_t *dpo0;
1589
1590           pi0 = to_next[0] = from[0];
1591           from += 1;
1592           n_left_from -= 1;
1593           to_next += 1;
1594           n_left_to_next -= 1;
1595       
1596           p0 = vlib_get_buffer (vm, pi0);
1597
1598           ip0 = vlib_buffer_get_current (p0);
1599
1600           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
1601                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1602
1603           mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1604
1605           leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
1606
1607           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1608
1609           /* Treat IP frag packets as "experimental" protocol for now
1610              until support of IP frag reassembly is implemented */
1611           proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
1612           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1613           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1614
1615           flags0 = p0->flags;
1616
1617           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1618
1619           udp0 = ip4_next_header (ip0);
1620
1621           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1622           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1623
1624           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1625
1626           /* Verify UDP length. */
1627           ip_len0 = clib_net_to_host_u16 (ip0->length);
1628           udp_len0 = clib_net_to_host_u16 (udp0->length);
1629
1630           len_diff0 = ip_len0 - udp_len0;
1631
1632           len_diff0 = is_udp0 ? len_diff0 : 0;
1633
1634           if (PREDICT_FALSE (! (is_tcp_udp0 & good_tcp_udp0)))
1635             {
1636               if (is_tcp_udp0)
1637                 {
1638                   if (is_tcp_udp0
1639                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1640                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1641                   good_tcp_udp0 =
1642                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1643                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1644                 }
1645             }
1646
1647           good_tcp_udp0 &= len_diff0 >= 0;
1648
1649           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1650
1651           error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1652
1653           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1654
1655           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1656           error0 = (is_tcp_udp0 && ! good_tcp_udp0
1657                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1658                     : error0);
1659
1660           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1661           leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1662
1663           lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1664           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1665
1666           lb0 = load_balance_get(lbi0);
1667           dpo0 = load_balance_get_bucket_i(lb0, 0);
1668
1669           vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
1670               vnet_buffer (p0)->ip.adj_index[VLIB_RX] =
1671                   dpo0->dpoi_index;
1672
1673           /* Must have a route to source otherwise we drop the packet. */
1674           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1675                     && dpo0->dpoi_type != DPO_ADJACENCY
1676                     && dpo0->dpoi_type != DPO_ADJACENCY_INCOMPLETE
1677                     && dpo0->dpoi_type != DPO_RECEIVE
1678                     && dpo0->dpoi_type != DPO_DROP
1679                     && dpo0->dpoi_type != DPO_ADJACENCY_GLEAN
1680                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
1681                     ? IP4_ERROR_SRC_LOOKUP_MISS
1682                     : error0);
1683           /* Packet originated from a local address => spoofing */
1684           error0 = (dpo0->dpoi_type == DPO_RECEIVE ?
1685                     IP4_ERROR_SPOOFED_LOCAL_PACKETS : 
1686                     error0);
1687
1688           next0 = lm->local_next_by_ip_protocol[proto0];
1689
1690           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1691
1692           p0->error = error0? error_node->errors[error0] : 0;
1693
1694           if (PREDICT_FALSE (next0 != next_index))
1695             {
1696               n_left_to_next += 1;
1697               vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1698
1699               next_index = next0;
1700               vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1701               to_next[0] = pi0;
1702               to_next += 1;
1703               n_left_to_next -= 1;
1704             }
1705         }
1706   
1707       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1708     }
1709
1710   return frame->n_vectors;
1711 }
1712
1713 VLIB_REGISTER_NODE (ip4_local_node,static) = {
1714   .function = ip4_local,
1715   .name = "ip4-local",
1716   .vector_size = sizeof (u32),
1717
1718   .format_trace = format_ip4_forward_next_trace,
1719
1720   .n_next_nodes = IP_LOCAL_N_NEXT,
1721   .next_nodes = {
1722     [IP_LOCAL_NEXT_DROP] = "error-drop",
1723     [IP_LOCAL_NEXT_PUNT] = "error-punt",
1724     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1725     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1726   },
1727 };
1728
1729 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local)
1730
1731 void ip4_register_protocol (u32 protocol, u32 node_index)
1732 {
1733   vlib_main_t * vm = vlib_get_main();
1734   ip4_main_t * im = &ip4_main;
1735   ip_lookup_main_t * lm = &im->lookup_main;
1736
1737   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1738   lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip4_local_node.index, node_index);
1739 }
1740
1741 static clib_error_t *
1742 show_ip_local_command_fn (vlib_main_t * vm,
1743                           unformat_input_t * input,
1744                          vlib_cli_command_t * cmd)
1745 {
1746   ip4_main_t * im = &ip4_main;
1747   ip_lookup_main_t * lm = &im->lookup_main;
1748   int i;
1749
1750   vlib_cli_output (vm, "Protocols handled by ip4_local");
1751   for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
1752     {
1753       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1754         vlib_cli_output (vm, "%d", i);
1755     }
1756   return 0;
1757 }
1758
1759
1760
1761 VLIB_CLI_COMMAND (show_ip_local, static) = {
1762   .path = "show ip local",
1763   .function = show_ip_local_command_fn,
1764   .short_help = "Show ip local protocol table",
1765 };
1766
1767 always_inline uword
1768 ip4_arp_inline (vlib_main_t * vm,
1769                 vlib_node_runtime_t * node,
1770                 vlib_frame_t * frame,
1771                 int is_glean)
1772 {
1773   vnet_main_t * vnm = vnet_get_main();
1774   ip4_main_t * im = &ip4_main;
1775   ip_lookup_main_t * lm = &im->lookup_main;
1776   u32 * from, * to_next_drop;
1777   uword n_left_from, n_left_to_next_drop, next_index;
1778   static f64 time_last_seed_change = -1e100;
1779   static u32 hash_seeds[3];
1780   static uword hash_bitmap[256 / BITS (uword)]; 
1781   f64 time_now;
1782
1783   if (node->flags & VLIB_NODE_FLAG_TRACE)
1784     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1785
1786   time_now = vlib_time_now (vm);
1787   if (time_now - time_last_seed_change > 1e-3)
1788     {
1789       uword i;
1790       u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
1791                                              sizeof (hash_seeds));
1792       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
1793         hash_seeds[i] = r[i];
1794
1795       /* Mark all hash keys as been no-seen before. */
1796       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
1797         hash_bitmap[i] = 0;
1798
1799       time_last_seed_change = time_now;
1800     }
1801
1802   from = vlib_frame_vector_args (frame);
1803   n_left_from = frame->n_vectors;
1804   next_index = node->cached_next_index;
1805   if (next_index == IP4_ARP_NEXT_DROP)
1806     next_index = IP4_ARP_N_NEXT; /* point to first interface */
1807
1808   while (n_left_from > 0)
1809     {
1810       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1811                            to_next_drop, n_left_to_next_drop);
1812
1813       while (n_left_from > 0 && n_left_to_next_drop > 0)
1814         {
1815           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
1816           ip_adjacency_t * adj0;
1817           vlib_buffer_t * p0;
1818           ip4_header_t * ip0;
1819           uword bm0;
1820
1821           pi0 = from[0];
1822
1823           p0 = vlib_get_buffer (vm, pi0);
1824
1825           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1826           adj0 = ip_get_adjacency (lm, adj_index0);
1827           ip0 = vlib_buffer_get_current (p0);
1828
1829           /*
1830            * this is the Glean case, so we are ARPing for the
1831            * packet's destination 
1832            */
1833           a0 = hash_seeds[0];
1834           b0 = hash_seeds[1];
1835           c0 = hash_seeds[2];
1836
1837           sw_if_index0 = adj0->rewrite_header.sw_if_index;
1838           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1839
1840           if (is_glean)
1841           {
1842               a0 ^= ip0->dst_address.data_u32;
1843           }
1844           else
1845           {
1846               a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32;
1847           }
1848           b0 ^= sw_if_index0;
1849
1850           hash_v3_finalize32 (a0, b0, c0);
1851
1852           c0 &= BITS (hash_bitmap) - 1;
1853           c0 = c0 / BITS (uword);
1854           m0 = (uword) 1 << (c0 % BITS (uword));
1855
1856           bm0 = hash_bitmap[c0];
1857           drop0 = (bm0 & m0) != 0;
1858
1859           /* Mark it as seen. */
1860           hash_bitmap[c0] = bm0 | m0;
1861
1862           from += 1;
1863           n_left_from -= 1;
1864           to_next_drop[0] = pi0;
1865           to_next_drop += 1;
1866           n_left_to_next_drop -= 1;
1867
1868           p0->error = node->errors[drop0 ? IP4_ARP_ERROR_DROP : IP4_ARP_ERROR_REQUEST_SENT];
1869
1870           if (drop0)
1871             continue;
1872
1873           /* 
1874            * Can happen if the control-plane is programming tables
1875            * with traffic flowing; at least that's today's lame excuse.
1876            */
1877           if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN) ||
1878               (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
1879           {
1880             p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
1881           }
1882           else
1883           /* Send ARP request. */
1884           {
1885             u32 bi0 = 0;
1886             vlib_buffer_t * b0;
1887             ethernet_arp_header_t * h0;
1888             vnet_hw_interface_t * hw_if0;
1889
1890             h0 = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi0);
1891
1892             /* Add rewrite/encap string for ARP packet. */
1893             vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
1894
1895             hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1896
1897             /* Src ethernet address in ARP header. */
1898             clib_memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address,
1899                     sizeof (h0->ip4_over_ethernet[0].ethernet));
1900
1901             if (is_glean)
1902             {
1903                 /* The interface's source address is stashed in the Glean Adj */
1904                 h0->ip4_over_ethernet[0].ip4 = adj0->sub_type.glean.receive_addr.ip4;
1905
1906                 /* Copy in destination address we are requesting. This is the
1907                 * glean case, so it's the packet's destination.*/
1908                 h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32;
1909             }
1910             else
1911             {
1912                 /* Src IP address in ARP header. */
1913                 if (ip4_src_address_for_packet(lm, sw_if_index0,
1914                                                &h0->ip4_over_ethernet[0].ip4))
1915                 {
1916                     /* No source address available */
1917                     p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
1918                     vlib_buffer_free(vm, &bi0, 1);
1919                     continue;
1920                 }
1921
1922                 /* Copy in destination address we are requesting from the
1923                    incomplete adj */
1924                 h0->ip4_over_ethernet[1].ip4.data_u32 =
1925                     adj0->sub_type.nbr.next_hop.ip4.as_u32;
1926             }
1927
1928             vlib_buffer_copy_trace_flag (vm, p0, bi0);
1929             b0 = vlib_get_buffer (vm, bi0);
1930             vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
1931
1932             vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
1933
1934             vlib_set_next_frame_buffer (vm, node, adj0->rewrite_header.next_index, bi0);
1935           }
1936         }
1937
1938       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
1939     }
1940
1941   return frame->n_vectors;
1942 }
1943
1944 static uword
1945 ip4_arp (vlib_main_t * vm,
1946          vlib_node_runtime_t * node,
1947          vlib_frame_t * frame)
1948 {
1949     return (ip4_arp_inline(vm, node, frame, 0));
1950 }
1951
1952 static uword
1953 ip4_glean (vlib_main_t * vm,
1954            vlib_node_runtime_t * node,
1955            vlib_frame_t * frame)
1956 {
1957     return (ip4_arp_inline(vm, node, frame, 1));
1958 }
1959
1960 static char * ip4_arp_error_strings[] = {
1961   [IP4_ARP_ERROR_DROP] = "address overflow drops",
1962   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
1963   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
1964   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
1965   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
1966   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
1967 };
1968
1969 VLIB_REGISTER_NODE (ip4_arp_node) = {
1970   .function = ip4_arp,
1971   .name = "ip4-arp",
1972   .vector_size = sizeof (u32),
1973
1974   .format_trace = format_ip4_forward_next_trace,
1975
1976   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1977   .error_strings = ip4_arp_error_strings,
1978
1979   .n_next_nodes = IP4_ARP_N_NEXT,
1980   .next_nodes = {
1981     [IP4_ARP_NEXT_DROP] = "error-drop",
1982   },
1983 };
1984
1985 VLIB_REGISTER_NODE (ip4_glean_node) = {
1986   .function = ip4_glean,
1987   .name = "ip4-glean",
1988   .vector_size = sizeof (u32),
1989
1990   .format_trace = format_ip4_forward_next_trace,
1991
1992   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1993   .error_strings = ip4_arp_error_strings,
1994
1995   .n_next_nodes = IP4_ARP_N_NEXT,
1996   .next_nodes = {
1997     [IP4_ARP_NEXT_DROP] = "error-drop",
1998   },
1999 };
2000
2001 #define foreach_notrace_ip4_arp_error           \
2002 _(DROP)                                         \
2003 _(REQUEST_SENT)                                 \
2004 _(REPLICATE_DROP)                               \
2005 _(REPLICATE_FAIL)
2006
2007 clib_error_t * arp_notrace_init (vlib_main_t * vm)
2008 {
2009   vlib_node_runtime_t *rt = 
2010     vlib_node_get_runtime (vm, ip4_arp_node.index);
2011
2012   /* don't trace ARP request packets */
2013 #define _(a)                                    \
2014     vnet_pcap_drop_trace_filter_add_del         \
2015         (rt->errors[IP4_ARP_ERROR_##a],         \
2016          1 /* is_add */);
2017     foreach_notrace_ip4_arp_error;
2018 #undef _
2019   return 0;
2020 }
2021
2022 VLIB_INIT_FUNCTION(arp_notrace_init);
2023
2024
2025 /* Send an ARP request to see if given destination is reachable on given interface. */
2026 clib_error_t *
2027 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2028 {
2029   vnet_main_t * vnm = vnet_get_main();
2030   ip4_main_t * im = &ip4_main;
2031   ethernet_arp_header_t * h;
2032   ip4_address_t * src;
2033   ip_interface_address_t * ia;
2034   ip_adjacency_t * adj;
2035   vnet_hw_interface_t * hi;
2036   vnet_sw_interface_t * si;
2037   vlib_buffer_t * b;
2038   u32 bi = 0;
2039
2040   si = vnet_get_sw_interface (vnm, sw_if_index);
2041
2042   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2043     {
2044       return clib_error_return (0, "%U: interface %U down",
2045                                 format_ip4_address, dst, 
2046                                 format_vnet_sw_if_index_name, vnm, 
2047                                 sw_if_index);
2048     }
2049
2050   src = ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2051   if (! src)
2052     {
2053       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2054       return clib_error_return 
2055         (0, "no matching interface address for destination %U (interface %U)",
2056          format_ip4_address, dst,
2057          format_vnet_sw_if_index_name, vnm, sw_if_index);
2058     }
2059
2060   adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2061
2062   h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi);
2063
2064   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2065
2066   clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
2067
2068   h->ip4_over_ethernet[0].ip4 = src[0];
2069   h->ip4_over_ethernet[1].ip4 = dst[0];
2070
2071   b = vlib_get_buffer (vm, bi);
2072   vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2073
2074   /* Add encapsulation string for software interface (e.g. ethernet header). */
2075   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2076   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2077
2078   {
2079     vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
2080     u32 * to_next = vlib_frame_vector_args (f);
2081     to_next[0] = bi;
2082     f->n_vectors = 1;
2083     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2084   }
2085
2086   return /* no error */ 0;
2087 }
2088
2089 typedef enum {
2090   IP4_REWRITE_NEXT_DROP,
2091   IP4_REWRITE_NEXT_ARP,
2092   IP4_REWRITE_NEXT_ICMP_ERROR,
2093 } ip4_rewrite_next_t;
2094
2095 always_inline uword
2096 ip4_rewrite_inline (vlib_main_t * vm,
2097                     vlib_node_runtime_t * node,
2098                     vlib_frame_t * frame,
2099                     int rewrite_for_locally_received_packets,
2100                     int is_midchain)
2101 {
2102   ip_lookup_main_t * lm = &ip4_main.lookup_main;
2103   u32 * from = vlib_frame_vector_args (frame);
2104   u32 n_left_from, n_left_to_next, * to_next, next_index;
2105   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
2106   vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
2107   ip_config_main_t * cm = &lm->feature_config_mains[VNET_IP_TX_FEAT];
2108
2109   n_left_from = frame->n_vectors;
2110   next_index = node->cached_next_index;
2111   u32 cpu_index = os_get_cpu_number();
2112   
2113   while (n_left_from > 0)
2114     {
2115       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2116
2117       while (n_left_from >= 4 && n_left_to_next >= 2)
2118         {
2119           ip_adjacency_t * adj0, * adj1;
2120           vlib_buffer_t * p0, * p1;
2121           ip4_header_t * ip0, * ip1;
2122           u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2123           u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2124           u32 next0_override, next1_override;
2125           u32 tx_sw_if_index0, tx_sw_if_index1;
2126
2127           if (rewrite_for_locally_received_packets)
2128               next0_override = next1_override = 0;
2129
2130           /* Prefetch next iteration. */
2131           {
2132             vlib_buffer_t * p2, * p3;
2133
2134             p2 = vlib_get_buffer (vm, from[2]);
2135             p3 = vlib_get_buffer (vm, from[3]);
2136
2137             vlib_prefetch_buffer_header (p2, STORE);
2138             vlib_prefetch_buffer_header (p3, STORE);
2139
2140             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2141             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2142           }
2143
2144           pi0 = to_next[0] = from[0];
2145           pi1 = to_next[1] = from[1];
2146
2147           from += 2;
2148           n_left_from -= 2;
2149           to_next += 2;
2150           n_left_to_next -= 2;
2151       
2152           p0 = vlib_get_buffer (vm, pi0);
2153           p1 = vlib_get_buffer (vm, pi1);
2154
2155           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2156           adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
2157
2158           /* We should never rewrite a pkt using the MISS adjacency */
2159           ASSERT(adj_index0 && adj_index1);
2160
2161           ip0 = vlib_buffer_get_current (p0);
2162           ip1 = vlib_buffer_get_current (p1);
2163
2164           error0 = error1 = IP4_ERROR_NONE;
2165           next0 = next1 = IP4_REWRITE_NEXT_DROP;
2166
2167           /* Decrement TTL & update checksum.
2168              Works either endian, so no need for byte swap. */
2169           if (! rewrite_for_locally_received_packets)
2170             {
2171               i32 ttl0 = ip0->ttl, ttl1 = ip1->ttl;
2172
2173               /* Input node should have reject packets with ttl 0. */
2174               ASSERT (ip0->ttl > 0);
2175               ASSERT (ip1->ttl > 0);
2176
2177               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2178               checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2179
2180               checksum0 += checksum0 >= 0xffff;
2181               checksum1 += checksum1 >= 0xffff;
2182
2183               ip0->checksum = checksum0;
2184               ip1->checksum = checksum1;
2185
2186               ttl0 -= 1;
2187               ttl1 -= 1;
2188
2189               ip0->ttl = ttl0;
2190               ip1->ttl = ttl1;
2191
2192               /*
2193                * If the ttl drops below 1 when forwarding, generate
2194                * an ICMP response.
2195                */
2196               if (PREDICT_FALSE(ttl0 <= 0))
2197                 {
2198                   error0 = IP4_ERROR_TIME_EXPIRED;
2199                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2200                   icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2201                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2202                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2203                 }
2204               if (PREDICT_FALSE(ttl1 <= 0))
2205                 {
2206                   error1 = IP4_ERROR_TIME_EXPIRED;
2207                   vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32)~0;
2208                   icmp4_error_set_vnet_buffer(p1, ICMP4_time_exceeded,
2209                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2210                   next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2211                 }
2212
2213               /* Verify checksum. */
2214               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2215               ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2216             }
2217
2218           /* Rewrite packet header and updates lengths. */
2219           adj0 = ip_get_adjacency (lm, adj_index0);
2220           adj1 = ip_get_adjacency (lm, adj_index1);
2221       
2222           if (rewrite_for_locally_received_packets)
2223             {
2224               if (PREDICT_FALSE(adj0->lookup_next_index
2225                                 == IP_LOOKUP_NEXT_ARP))
2226                 next0_override = IP4_REWRITE_NEXT_ARP;
2227               if (PREDICT_FALSE(adj1->lookup_next_index
2228                                 == IP_LOOKUP_NEXT_ARP))
2229                 next1_override = IP4_REWRITE_NEXT_ARP;
2230             }
2231
2232           /* Worth pipelining. No guarantee that adj0,1 are hot... */
2233           rw_len0 = adj0[0].rewrite_header.data_bytes;
2234           rw_len1 = adj1[0].rewrite_header.data_bytes;
2235           vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
2236           vnet_buffer(p1)->ip.save_rewrite_length = rw_len1;
2237
2238           /* Check MTU of outgoing interface. */
2239           error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2240                     ? IP4_ERROR_MTU_EXCEEDED
2241                     : error0);
2242           error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
2243                     ? IP4_ERROR_MTU_EXCEEDED
2244                     : error1);
2245
2246           next0 = (error0 == IP4_ERROR_NONE)
2247             ? adj0[0].rewrite_header.next_index : next0;
2248
2249           if (rewrite_for_locally_received_packets)
2250               next0 = next0 && next0_override ? next0_override : next0;
2251
2252           next1 = (error1 == IP4_ERROR_NONE)
2253             ? adj1[0].rewrite_header.next_index : next1;
2254
2255           if (rewrite_for_locally_received_packets)
2256               next1 = next1 && next1_override ? next1_override : next1;
2257
2258           /* 
2259            * We've already accounted for an ethernet_header_t elsewhere
2260            */
2261           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2262               vlib_increment_combined_counter 
2263                   (&adjacency_counters,
2264                    cpu_index, adj_index0, 
2265                    /* packet increment */ 0,
2266                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2267
2268           if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t)))
2269               vlib_increment_combined_counter 
2270                   (&adjacency_counters,
2271                    cpu_index, adj_index1, 
2272                    /* packet increment */ 0,
2273                    /* byte increment */ rw_len1-sizeof(ethernet_header_t));
2274
2275           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2276            * to see the IP headerr */
2277           if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2278             {
2279               p0->current_data -= rw_len0;
2280               p0->current_length += rw_len0;
2281               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2282               vnet_buffer (p0)->sw_if_index[VLIB_TX] =
2283                   tx_sw_if_index0;
2284
2285               if (PREDICT_FALSE 
2286                   (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features, 
2287                                     tx_sw_if_index0)))
2288                 {
2289                   p0->current_config_index = 
2290                     vec_elt (cm->config_index_by_sw_if_index, 
2291                              tx_sw_if_index0);
2292                   vnet_get_config_data (&cm->config_main,
2293                                         &p0->current_config_index,
2294                                         &next0,
2295                                         /* # bytes of config data */ 0);
2296                 }
2297             }
2298           if (PREDICT_TRUE(error1 == IP4_ERROR_NONE))
2299             {
2300               p1->current_data -= rw_len1;
2301               p1->current_length += rw_len1;
2302
2303               tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2304               vnet_buffer (p1)->sw_if_index[VLIB_TX] =
2305                   tx_sw_if_index1;
2306
2307               if (PREDICT_FALSE 
2308                   (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features, 
2309                                     tx_sw_if_index1)))
2310                 {
2311                   p1->current_config_index = 
2312                     vec_elt (cm->config_index_by_sw_if_index, 
2313                              tx_sw_if_index1);
2314                   vnet_get_config_data (&cm->config_main,
2315                                         &p1->current_config_index,
2316                                         &next1,
2317                                         /* # bytes of config data */ 0);
2318                 }
2319             }
2320
2321           /* Guess we are only writing on simple Ethernet header. */
2322           vnet_rewrite_two_headers (adj0[0], adj1[0],
2323                                     ip0, ip1,
2324                                     sizeof (ethernet_header_t));
2325
2326           if (is_midchain)
2327           {
2328               adj0->sub_type.midchain.fixup_func(vm, adj0, p0);
2329               adj1->sub_type.midchain.fixup_func(vm, adj1, p1);
2330           }
2331       
2332           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2333                                            to_next, n_left_to_next,
2334                                            pi0, pi1, next0, next1);
2335         }
2336
2337       while (n_left_from > 0 && n_left_to_next > 0)
2338         {
2339           ip_adjacency_t * adj0;
2340           vlib_buffer_t * p0;
2341           ip4_header_t * ip0;
2342           u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2343           u32 next0_override;
2344           u32 tx_sw_if_index0;
2345
2346           if (rewrite_for_locally_received_packets)
2347               next0_override = 0;
2348
2349           pi0 = to_next[0] = from[0];
2350
2351           p0 = vlib_get_buffer (vm, pi0);
2352
2353           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2354
2355           /* We should never rewrite a pkt using the MISS adjacency */
2356           ASSERT(adj_index0);
2357
2358           adj0 = ip_get_adjacency (lm, adj_index0);
2359       
2360           ip0 = vlib_buffer_get_current (p0);
2361
2362           error0 = IP4_ERROR_NONE;
2363           next0 = IP4_REWRITE_NEXT_DROP;            /* drop on error */
2364
2365           /* Decrement TTL & update checksum. */
2366           if (! rewrite_for_locally_received_packets)
2367             {
2368               i32 ttl0 = ip0->ttl;
2369
2370               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2371
2372               checksum0 += checksum0 >= 0xffff;
2373
2374               ip0->checksum = checksum0;
2375
2376               ASSERT (ip0->ttl > 0);
2377
2378               ttl0 -= 1;
2379
2380               ip0->ttl = ttl0;
2381
2382               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2383
2384               if (PREDICT_FALSE(ttl0 <= 0))
2385                 {
2386                   /*
2387                    * If the ttl drops below 1 when forwarding, generate
2388                    * an ICMP response.
2389                    */
2390                   error0 = IP4_ERROR_TIME_EXPIRED;
2391                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2392                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2393                   icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2394                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2395                 }
2396             }
2397
2398           if (rewrite_for_locally_received_packets)
2399             {
2400               /* 
2401                * We have to override the next_index in ARP adjacencies,
2402                * because they're set up for ip4-arp, not this node...
2403                */
2404               if (PREDICT_FALSE(adj0->lookup_next_index
2405                                 == IP_LOOKUP_NEXT_ARP))
2406                 next0_override = IP4_REWRITE_NEXT_ARP;
2407             }
2408
2409           /* Guess we are only writing on simple Ethernet header. */
2410           vnet_rewrite_one_header (adj0[0], ip0, 
2411                                    sizeof (ethernet_header_t));
2412           
2413           /* Update packet buffer attributes/set output interface. */
2414           rw_len0 = adj0[0].rewrite_header.data_bytes;
2415           vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
2416           
2417           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2418               vlib_increment_combined_counter 
2419                   (&adjacency_counters,
2420                    cpu_index, adj_index0, 
2421                    /* packet increment */ 0,
2422                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2423           
2424           /* Check MTU of outgoing interface. */
2425           error0 = (vlib_buffer_length_in_chain (vm, p0) 
2426                     > adj0[0].rewrite_header.max_l3_packet_bytes
2427                     ? IP4_ERROR_MTU_EXCEEDED
2428                     : error0);
2429
2430           p0->error = error_node->errors[error0];
2431
2432           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2433            * to see the IP headerr */
2434           if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2435             {
2436               p0->current_data -= rw_len0;
2437               p0->current_length += rw_len0;
2438               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2439
2440               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2441               next0 = adj0[0].rewrite_header.next_index;
2442
2443               if (is_midchain)
2444                 {
2445                   adj0->sub_type.midchain.fixup_func(vm, adj0, p0);
2446                 }
2447
2448               if (PREDICT_FALSE 
2449                   (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features, 
2450                                     tx_sw_if_index0)))
2451                   {
2452                     p0->current_config_index = 
2453                       vec_elt (cm->config_index_by_sw_if_index, 
2454                                tx_sw_if_index0);
2455                     vnet_get_config_data (&cm->config_main,
2456                                           &p0->current_config_index,
2457                                           &next0,
2458                                           /* # bytes of config data */ 0);
2459                   }
2460             }
2461
2462           if (rewrite_for_locally_received_packets)
2463               next0 = next0 && next0_override ? next0_override : next0;
2464
2465           from += 1;
2466           n_left_from -= 1;
2467           to_next += 1;
2468           n_left_to_next -= 1;
2469       
2470           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2471                                            to_next, n_left_to_next,
2472                                            pi0, next0);
2473         }
2474   
2475       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2476     }
2477
2478   /* Need to do trace after rewrites to pick up new packet data. */
2479   if (node->flags & VLIB_NODE_FLAG_TRACE)
2480     ip4_forward_next_trace (vm, node, frame, adj_rx_tx);
2481
2482   return frame->n_vectors;
2483 }
2484
2485
2486 /** @brief IPv4 transit rewrite node.
2487     @node ip4-rewrite-transit
2488
2489     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2490     header checksum, fetch the ip adjacency, check the outbound mtu,
2491     apply the adjacency rewrite, and send pkts to the adjacency
2492     rewrite header's rewrite_next_index.
2493
2494     @param vm vlib_main_t corresponding to the current thread
2495     @param node vlib_node_runtime_t
2496     @param frame vlib_frame_t whose contents should be dispatched
2497
2498     @par Graph mechanics: buffer metadata, next index usage
2499
2500     @em Uses:
2501     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2502         - the rewrite adjacency index
2503     - <code>adj->lookup_next_index</code>
2504         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2505           the packet will be dropped. 
2506     - <code>adj->rewrite_header</code>
2507         - Rewrite string length, rewrite string, next_index
2508
2509     @em Sets:
2510     - <code>b->current_data, b->current_length</code>
2511         - Updated net of applying the rewrite string
2512
2513     <em>Next Indices:</em>
2514     - <code> adj->rewrite_header.next_index </code>
2515       or @c error-drop 
2516 */
2517 static uword
2518 ip4_rewrite_transit (vlib_main_t * vm,
2519                      vlib_node_runtime_t * node,
2520                      vlib_frame_t * frame)
2521 {
2522   return ip4_rewrite_inline (vm, node, frame,
2523                              /* rewrite_for_locally_received_packets */ 0, 0);
2524 }
2525
2526 /** @brief IPv4 local rewrite node.
2527     @node ip4-rewrite-local
2528
2529     This is the IPv4 local rewrite node. Fetch the ip adjacency, check
2530     the outbound interface mtu, apply the adjacency rewrite, and send
2531     pkts to the adjacency rewrite header's rewrite_next_index. Deal
2532     with hemorrhoids of the form "some clown sends an icmp4 w/ src =
2533     dst = interface addr."
2534
2535     @param vm vlib_main_t corresponding to the current thread
2536     @param node vlib_node_runtime_t
2537     @param frame vlib_frame_t whose contents should be dispatched
2538
2539     @par Graph mechanics: buffer metadata, next index usage
2540
2541     @em Uses:
2542     - <code>vnet_buffer(b)->ip.adj_index[VLIB_RX]</code>
2543         - the rewrite adjacency index
2544     - <code>adj->lookup_next_index</code>
2545         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2546           the packet will be dropped. 
2547     - <code>adj->rewrite_header</code>
2548         - Rewrite string length, rewrite string, next_index
2549
2550     @em Sets:
2551     - <code>b->current_data, b->current_length</code>
2552         - Updated net of applying the rewrite string
2553
2554     <em>Next Indices:</em>
2555     - <code> adj->rewrite_header.next_index </code>
2556       or @c error-drop 
2557 */
2558
2559 static uword
2560 ip4_rewrite_local (vlib_main_t * vm,
2561                    vlib_node_runtime_t * node,
2562                    vlib_frame_t * frame)
2563 {
2564   return ip4_rewrite_inline (vm, node, frame,
2565                              /* rewrite_for_locally_received_packets */ 1, 0);
2566 }
2567
2568 static uword
2569 ip4_midchain (vlib_main_t * vm,
2570               vlib_node_runtime_t * node,
2571               vlib_frame_t * frame)
2572 {
2573   return ip4_rewrite_inline (vm, node, frame,
2574                              /* rewrite_for_locally_received_packets */ 0, 1);
2575 }
2576
2577 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2578   .function = ip4_rewrite_transit,
2579   .name = "ip4-rewrite-transit",
2580   .vector_size = sizeof (u32),
2581
2582   .format_trace = format_ip4_rewrite_trace,
2583
2584   .n_next_nodes = 3,
2585   .next_nodes = {
2586     [IP4_REWRITE_NEXT_DROP] = "error-drop",
2587     [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
2588     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2589   },
2590 };
2591
2592 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite_transit)
2593
2594 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2595   .function = ip4_midchain,
2596   .name = "ip4-midchain",
2597   .vector_size = sizeof (u32),
2598
2599   .format_trace = format_ip4_forward_next_trace,
2600
2601   .sibling_of = "ip4-rewrite-transit",
2602 };
2603
2604 VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain)
2605
2606 VLIB_REGISTER_NODE (ip4_rewrite_local_node) = {
2607   .function = ip4_rewrite_local,
2608   .name = "ip4-rewrite-local",
2609   .vector_size = sizeof (u32),
2610
2611   .sibling_of = "ip4-rewrite-transit",
2612
2613   .format_trace = format_ip4_rewrite_trace,
2614
2615   .n_next_nodes = 0,
2616 };
2617
2618 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_local_node, ip4_rewrite_local)
2619
2620 static clib_error_t *
2621 add_del_interface_table (vlib_main_t * vm,
2622                          unformat_input_t * input,
2623                          vlib_cli_command_t * cmd)
2624 {
2625   vnet_main_t * vnm = vnet_get_main();
2626   clib_error_t * error = 0;
2627   u32 sw_if_index, table_id;
2628
2629   sw_if_index = ~0;
2630
2631   if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2632     {
2633       error = clib_error_return (0, "unknown interface `%U'",
2634                                  format_unformat_error, input);
2635       goto done;
2636     }
2637
2638   if (unformat (input, "%d", &table_id))
2639     ;
2640   else
2641     {
2642       error = clib_error_return (0, "expected table id `%U'",
2643                                  format_unformat_error, input);
2644       goto done;
2645     }
2646
2647   {
2648     ip4_main_t * im = &ip4_main;
2649     u32 fib_index;
2650
2651     fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
2652                                                    table_id);
2653
2654     //
2655     // FIXME-LATER
2656     //  changing an interface's table has consequences for any connecteds
2657     //  and adj-fibs already installed.
2658     //
2659     vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
2660     im->fib_index_by_sw_if_index[sw_if_index] = fib_index;
2661   }
2662
2663  done:
2664   return error;
2665 }
2666
2667 /*?
2668  * Place the indicated interface into the supplied VRF
2669  *
2670  * @cliexpar
2671  * @cliexstart{set interface ip table}
2672  *
2673  *  vpp# set interface ip table GigabitEthernet2/0/0 2
2674  *
2675  * Interface addresses added after setting the interface IP table end up in the indicated VRF table.
2676  * Predictable but potentially counter-intuitive results occur if you provision interface addresses in multiple FIBs.
2677  * Upon RX, packets will be processed in the last IP table ID provisioned.
2678  * It might be marginally useful to evade source RPF drops to put an interface address into multiple FIBs.
2679  * @cliexend
2680  ?*/
2681 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
2682   .path = "set interface ip table",
2683   .function = add_del_interface_table,
2684   .short_help = "Add/delete FIB table id for interface",
2685 };
2686
2687
2688 static uword
2689 ip4_lookup_multicast (vlib_main_t * vm,
2690                       vlib_node_runtime_t * node,
2691                       vlib_frame_t * frame)
2692 {
2693   ip4_main_t * im = &ip4_main;
2694   vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
2695   u32 n_left_from, n_left_to_next, * from, * to_next;
2696   ip_lookup_next_t next;
2697   u32 cpu_index = os_get_cpu_number();
2698
2699   from = vlib_frame_vector_args (frame);
2700   n_left_from = frame->n_vectors;
2701   next = node->cached_next_index;
2702
2703   while (n_left_from > 0)
2704     {
2705       vlib_get_next_frame (vm, node, next,
2706                            to_next, n_left_to_next);
2707
2708       while (n_left_from >= 4 && n_left_to_next >= 2)
2709         {
2710           vlib_buffer_t * p0, * p1;
2711           u32 pi0, pi1, lb_index0, lb_index1, wrong_next;
2712           ip_lookup_next_t next0, next1;
2713           ip4_header_t * ip0, * ip1;
2714           u32 fib_index0, fib_index1;
2715           const dpo_id_t *dpo0, *dpo1;
2716           const load_balance_t * lb0, * lb1;
2717
2718           /* Prefetch next iteration. */
2719           {
2720             vlib_buffer_t * p2, * p3;
2721
2722             p2 = vlib_get_buffer (vm, from[2]);
2723             p3 = vlib_get_buffer (vm, from[3]);
2724
2725             vlib_prefetch_buffer_header (p2, LOAD);
2726             vlib_prefetch_buffer_header (p3, LOAD);
2727
2728             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
2729             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
2730           }
2731
2732           pi0 = to_next[0] = from[0];
2733           pi1 = to_next[1] = from[1];
2734
2735           p0 = vlib_get_buffer (vm, pi0);
2736           p1 = vlib_get_buffer (vm, pi1);
2737
2738           ip0 = vlib_buffer_get_current (p0);
2739           ip1 = vlib_buffer_get_current (p1);
2740
2741           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2742           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
2743           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2744             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2745           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
2746             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
2747
2748           lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0),
2749                                                &ip0->dst_address);
2750           lb_index1 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index1),
2751                                                &ip1->dst_address);
2752
2753           lb0 = load_balance_get (lb_index0);
2754           lb1 = load_balance_get (lb_index1);
2755
2756           ASSERT (lb0->lb_n_buckets > 0);
2757           ASSERT (is_pow2 (lb0->lb_n_buckets));
2758           ASSERT (lb1->lb_n_buckets > 0);
2759           ASSERT (is_pow2 (lb1->lb_n_buckets));
2760
2761           vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash 
2762               (ip0, lb0->lb_hash_config);
2763                                                                   
2764           vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash 
2765               (ip1, lb1->lb_hash_config);
2766
2767           dpo0 = load_balance_get_bucket_i(lb0,
2768                                            (vnet_buffer (p0)->ip.flow_hash &
2769                                             (lb0->lb_n_buckets_minus_1)));
2770           dpo1 = load_balance_get_bucket_i(lb1,
2771                                            (vnet_buffer (p1)->ip.flow_hash &
2772                                             (lb0->lb_n_buckets_minus_1)));
2773
2774           next0 = dpo0->dpoi_next_node;
2775           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
2776           next1 = dpo1->dpoi_next_node;
2777           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
2778
2779           if (1) /* $$$$$$ HACK FIXME */
2780           vlib_increment_combined_counter 
2781               (cm, cpu_index, lb_index0, 1,
2782                vlib_buffer_length_in_chain (vm, p0));
2783           if (1) /* $$$$$$ HACK FIXME */
2784           vlib_increment_combined_counter 
2785               (cm, cpu_index, lb_index1, 1,
2786                vlib_buffer_length_in_chain (vm, p1));
2787
2788           from += 2;
2789           to_next += 2;
2790           n_left_to_next -= 2;
2791           n_left_from -= 2;
2792
2793           wrong_next = (next0 != next) + 2*(next1 != next);
2794           if (PREDICT_FALSE (wrong_next != 0))
2795             {
2796               switch (wrong_next)
2797                 {
2798                 case 1:
2799                   /* A B A */
2800                   to_next[-2] = pi1;
2801                   to_next -= 1;
2802                   n_left_to_next += 1;
2803                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2804                   break;
2805
2806                 case 2:
2807                   /* A A B */
2808                   to_next -= 1;
2809                   n_left_to_next += 1;
2810                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2811                   break;
2812
2813                 case 3:
2814                   /* A B C */
2815                   to_next -= 2;
2816                   n_left_to_next += 2;
2817                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2818                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2819                   if (next0 == next1)
2820                     {
2821                       /* A B B */
2822                       vlib_put_next_frame (vm, node, next, n_left_to_next);
2823                       next = next1;
2824                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
2825                     }
2826                 }
2827             }
2828         }
2829     
2830       while (n_left_from > 0 && n_left_to_next > 0)
2831         {
2832           vlib_buffer_t * p0;
2833           ip4_header_t * ip0;
2834           u32 pi0, lb_index0;
2835           ip_lookup_next_t next0;
2836           u32 fib_index0;
2837           const dpo_id_t *dpo0;
2838           const load_balance_t * lb0;
2839
2840           pi0 = from[0];
2841           to_next[0] = pi0;
2842
2843           p0 = vlib_get_buffer (vm, pi0);
2844
2845           ip0 = vlib_buffer_get_current (p0);
2846
2847           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
2848                                 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2849           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2850               fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2851           
2852           lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0),
2853                                                &ip0->dst_address);
2854
2855           lb0 = load_balance_get (lb_index0);
2856
2857           ASSERT (lb0->lb_n_buckets > 0);
2858           ASSERT (is_pow2 (lb0->lb_n_buckets));
2859
2860           vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash 
2861               (ip0, lb0->lb_hash_config);
2862
2863           dpo0 = load_balance_get_bucket_i(lb0,
2864                                            (vnet_buffer (p0)->ip.flow_hash &
2865                                             (lb0->lb_n_buckets_minus_1)));
2866
2867           next0 = dpo0->dpoi_next_node;
2868           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
2869
2870           if (1) /* $$$$$$ HACK FIXME */
2871               vlib_increment_combined_counter 
2872                   (cm, cpu_index, lb_index0, 1,
2873                    vlib_buffer_length_in_chain (vm, p0));
2874
2875           from += 1;
2876           to_next += 1;
2877           n_left_to_next -= 1;
2878           n_left_from -= 1;
2879
2880           if (PREDICT_FALSE (next0 != next))
2881             {
2882               n_left_to_next += 1;
2883               vlib_put_next_frame (vm, node, next, n_left_to_next);
2884               next = next0;
2885               vlib_get_next_frame (vm, node, next,
2886                                    to_next, n_left_to_next);
2887               to_next[0] = pi0;
2888               to_next += 1;
2889               n_left_to_next -= 1;
2890             }
2891         }
2892
2893       vlib_put_next_frame (vm, node, next, n_left_to_next);
2894     }
2895
2896   if (node->flags & VLIB_NODE_FLAG_TRACE)
2897       ip4_forward_next_trace(vm, node, frame, VLIB_TX);
2898
2899   return frame->n_vectors;
2900 }
2901
2902 VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = {
2903   .function = ip4_lookup_multicast,
2904   .name = "ip4-lookup-multicast",
2905   .vector_size = sizeof (u32),
2906   .sibling_of = "ip4-lookup",
2907   .format_trace = format_ip4_lookup_trace,
2908
2909   .n_next_nodes = 0,
2910 };
2911
2912 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_multicast_node, ip4_lookup_multicast)
2913
2914 VLIB_REGISTER_NODE (ip4_multicast_node,static) = {
2915   .function = ip4_drop,
2916   .name = "ip4-multicast",
2917   .vector_size = sizeof (u32),
2918
2919   .format_trace = format_ip4_forward_next_trace,
2920
2921   .n_next_nodes = 1,
2922   .next_nodes = {
2923     [0] = "error-drop",
2924   },
2925 };
2926
2927 int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0)
2928 {
2929   ip4_fib_mtrie_t * mtrie0;
2930   ip4_fib_mtrie_leaf_t leaf0;
2931   u32 lbi0;
2932     
2933   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2934
2935   leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
2936   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
2937   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
2938   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2939   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2940   
2941   /* Handle default route. */
2942   leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
2943   
2944   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2945   
2946   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0), a);
2947 }
2948  
2949 static clib_error_t *
2950 test_lookup_command_fn (vlib_main_t * vm,
2951                         unformat_input_t * input,
2952                         vlib_cli_command_t * cmd)
2953 {
2954   u32 table_id = 0;
2955   f64 count = 1;
2956   u32 n;
2957   int i;
2958   ip4_address_t ip4_base_address;
2959   u64 errors = 0;
2960
2961   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
2962       if (unformat (input, "table %d", &table_id))
2963         ;
2964       else if (unformat (input, "count %f", &count))
2965         ;
2966
2967       else if (unformat (input, "%U",
2968                          unformat_ip4_address, &ip4_base_address))
2969         ;
2970       else
2971         return clib_error_return (0, "unknown input `%U'",
2972                                   format_unformat_error, input);
2973   }
2974
2975   n = count;
2976
2977   for (i = 0; i < n; i++)
2978     {
2979       if (!ip4_lookup_validate (&ip4_base_address, table_id))
2980         errors++;
2981
2982       ip4_base_address.as_u32 = 
2983         clib_host_to_net_u32 (1 + 
2984                               clib_net_to_host_u32 (ip4_base_address.as_u32));
2985     }
2986
2987   if (errors) 
2988     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2989   else
2990     vlib_cli_output (vm, "No errors in %d lookups\n", n);
2991
2992   return 0;
2993 }
2994
2995 VLIB_CLI_COMMAND (lookup_test_command, static) = {
2996     .path = "test lookup",
2997     .short_help = "test lookup",
2998     .function = test_lookup_command_fn,
2999 };
3000
3001 int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3002 {
3003   ip4_main_t * im4 = &ip4_main;
3004   ip4_fib_t * fib;
3005   uword * p = hash_get (im4->fib_index_by_table_id, table_id);
3006
3007   if (p == 0)
3008     return VNET_API_ERROR_NO_SUCH_FIB;
3009
3010   fib = ip4_fib_get (p[0]);
3011
3012   fib->flow_hash_config = flow_hash_config;
3013   return 0;
3014 }
3015  
3016 static clib_error_t *
3017 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3018                              unformat_input_t * input,
3019                              vlib_cli_command_t * cmd)
3020 {
3021   int matched = 0;
3022   u32 table_id = 0;
3023   u32 flow_hash_config = 0;
3024   int rv;
3025
3026   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3027     if (unformat (input, "table %d", &table_id))
3028       matched = 1;
3029 #define _(a,v) \
3030     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3031     foreach_flow_hash_bit
3032 #undef _
3033     else break;
3034   }
3035   
3036   if (matched == 0)
3037     return clib_error_return (0, "unknown input `%U'",
3038                               format_unformat_error, input);
3039   
3040   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3041   switch (rv)
3042     {
3043     case 0:
3044       break;
3045       
3046     case VNET_API_ERROR_NO_SUCH_FIB:
3047       return clib_error_return (0, "no such FIB table %d", table_id);
3048       
3049     default:
3050       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3051       break;
3052     }
3053   
3054   return 0;
3055 }
3056  
3057 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
3058   .path = "set ip flow-hash",
3059   .short_help = 
3060   "set ip table flow-hash table <fib-id> src dst sport dport proto reverse",
3061   .function = set_ip_flow_hash_command_fn,
3062 };
3063  
3064 int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index, 
3065                                  u32 table_index)
3066 {
3067   vnet_main_t * vnm = vnet_get_main();
3068   vnet_interface_main_t * im = &vnm->interface_main;
3069   ip4_main_t * ipm = &ip4_main;
3070   ip_lookup_main_t * lm = &ipm->lookup_main;
3071   vnet_classify_main_t * cm = &vnet_classify_main;
3072   ip4_address_t *if_addr;
3073
3074   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3075     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3076
3077   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3078     return VNET_API_ERROR_NO_SUCH_ENTRY;
3079
3080   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3081   lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
3082
3083   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
3084
3085   if (NULL != if_addr)
3086   {
3087       fib_prefix_t pfx = {
3088           .fp_len = 32,
3089           .fp_proto = FIB_PROTOCOL_IP4,
3090           .fp_addr.ip4 = *if_addr,
3091       };
3092       u32 fib_index;
3093
3094       fib_index = fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
3095                                                       sw_if_index);
3096
3097
3098       if (table_index != (u32) ~0)
3099       {
3100           dpo_id_t dpo = DPO_NULL;
3101
3102           dpo_set(&dpo,
3103                   DPO_CLASSIFY,
3104                   DPO_PROTO_IP4,
3105                   classify_dpo_create(FIB_PROTOCOL_IP4,
3106                                       table_index));
3107
3108           fib_table_entry_special_dpo_add(fib_index,
3109                                           &pfx,
3110                                           FIB_SOURCE_CLASSIFY,
3111                                           FIB_ENTRY_FLAG_NONE,
3112                                           &dpo);
3113           dpo_reset(&dpo);
3114       }
3115       else
3116       {
3117           fib_table_entry_special_remove(fib_index,
3118                                          &pfx,
3119                                          FIB_SOURCE_CLASSIFY);
3120       }
3121   }
3122
3123   return 0;
3124 }
3125
3126 static clib_error_t *
3127 set_ip_classify_command_fn (vlib_main_t * vm,
3128                             unformat_input_t * input,
3129                             vlib_cli_command_t * cmd)
3130 {
3131   u32 table_index = ~0;
3132   int table_index_set = 0;
3133   u32 sw_if_index = ~0;
3134   int rv;
3135   
3136   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3137     if (unformat (input, "table-index %d", &table_index))
3138       table_index_set = 1;
3139     else if (unformat (input, "intfc %U", unformat_vnet_sw_interface, 
3140                        vnet_get_main(), &sw_if_index))
3141       ;
3142     else
3143       break;
3144   }
3145       
3146   if (table_index_set == 0)
3147     return clib_error_return (0, "classify table-index must be specified");
3148
3149   if (sw_if_index == ~0)
3150     return clib_error_return (0, "interface / subif must be specified");
3151
3152   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3153
3154   switch (rv)
3155     {
3156     case 0:
3157       break;
3158
3159     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3160       return clib_error_return (0, "No such interface");
3161
3162     case VNET_API_ERROR_NO_SUCH_ENTRY:
3163       return clib_error_return (0, "No such classifier table");
3164     }
3165   return 0;
3166 }
3167
3168 VLIB_CLI_COMMAND (set_ip_classify_command, static) = {
3169     .path = "set ip classify",
3170     .short_help = 
3171     "set ip classify intfc <int> table-index <index>",
3172     .function = set_ip_classify_command_fn,
3173 };
3174