A Protocol Independent Hierarchical FIB (VPP-352)
[vpp.git] / vnet / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h>     /* for API error numbers */
47 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
48 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
49 #include <vnet/fib/ip4_fib.h>
50 #include <vnet/dpo/load_balance.h>
51 #include <vnet/dpo/classify_dpo.h>
52
53 void
54 ip4_forward_next_trace (vlib_main_t * vm,
55                         vlib_node_runtime_t * node,
56                         vlib_frame_t * frame,
57                         vlib_rx_or_tx_t which_adj_index);
58
59 always_inline uword
60 ip4_lookup_inline (vlib_main_t * vm,
61                    vlib_node_runtime_t * node,
62                    vlib_frame_t * frame,
63                    int lookup_for_responses_to_locally_received_packets)
64 {
65   ip4_main_t * im = &ip4_main;
66   vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
67   u32 n_left_from, n_left_to_next, * from, * to_next;
68   ip_lookup_next_t next;
69   u32 cpu_index = os_get_cpu_number();
70
71   from = vlib_frame_vector_args (frame);
72   n_left_from = frame->n_vectors;
73   next = node->cached_next_index;
74
75   while (n_left_from > 0)
76     {
77       vlib_get_next_frame (vm, node, next,
78                            to_next, n_left_to_next);
79
80       while (n_left_from >= 4 && n_left_to_next >= 2)
81         {
82           vlib_buffer_t * p0, * p1;
83           ip4_header_t * ip0, * ip1;
84           __attribute__((unused)) tcp_header_t * tcp0, * tcp1;
85           ip_lookup_next_t next0, next1;
86           const load_balance_t * lb0, * lb1;
87           ip4_fib_mtrie_t * mtrie0, * mtrie1;
88           ip4_fib_mtrie_leaf_t leaf0, leaf1;
89           ip4_address_t * dst_addr0, *dst_addr1;
90           __attribute__((unused)) u32 pi0, fib_index0, lb_index0, is_tcp_udp0;
91           __attribute__((unused)) u32 pi1, fib_index1, lb_index1, is_tcp_udp1;
92           flow_hash_config_t flow_hash_config0, flow_hash_config1;
93           u32 hash_c0, hash_c1;
94           u32 wrong_next;
95           const dpo_id_t *dpo0, *dpo1;
96
97           /* Prefetch next iteration. */
98           {
99             vlib_buffer_t * p2, * p3;
100
101             p2 = vlib_get_buffer (vm, from[2]);
102             p3 = vlib_get_buffer (vm, from[3]);
103
104             vlib_prefetch_buffer_header (p2, LOAD);
105             vlib_prefetch_buffer_header (p3, LOAD);
106
107             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
108             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
109           }
110
111           pi0 = to_next[0] = from[0];
112           pi1 = to_next[1] = from[1];
113
114           p0 = vlib_get_buffer (vm, pi0);
115           p1 = vlib_get_buffer (vm, pi1);
116
117           ip0 = vlib_buffer_get_current (p0);
118           ip1 = vlib_buffer_get_current (p1);
119
120           dst_addr0 = &ip0->dst_address;
121           dst_addr1 = &ip1->dst_address;
122
123           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
124           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
125           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
126             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
127           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
128             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
129
130
131           if (! lookup_for_responses_to_locally_received_packets)
132             {
133               mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
134               mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
135
136               leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
137
138               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
139               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
140             }
141
142           tcp0 = (void *) (ip0 + 1);
143           tcp1 = (void *) (ip1 + 1);
144
145           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
146                          || ip0->protocol == IP_PROTOCOL_UDP);
147           is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
148                          || ip1->protocol == IP_PROTOCOL_UDP);
149
150           if (! lookup_for_responses_to_locally_received_packets)
151             {
152               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
153               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
154             }
155
156           if (! lookup_for_responses_to_locally_received_packets)
157             {
158               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
159               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
160             }
161
162           if (! lookup_for_responses_to_locally_received_packets)
163             {
164               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
165               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
166             }
167
168           if (lookup_for_responses_to_locally_received_packets)
169             {
170               lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
171               lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
172             }
173           else
174             {
175               /* Handle default route. */
176               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
177               leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
178
179               lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
180               lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
181             }
182
183           lb0 = load_balance_get (lb_index0);
184           lb1 = load_balance_get (lb_index1);
185
186           /* Use flow hash to compute multipath adjacency. */
187           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
188           hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
189           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
190             {
191               flow_hash_config0 = lb0->lb_hash_config;
192               hash_c0 = vnet_buffer (p0)->ip.flow_hash =
193                 ip4_compute_flow_hash (ip0, flow_hash_config0);
194             }
195           if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
196             {
197               flow_hash_config1 = lb1->lb_hash_config;
198               hash_c1 = vnet_buffer (p1)->ip.flow_hash =
199                 ip4_compute_flow_hash (ip1, flow_hash_config1);
200             }
201
202           ASSERT (lb0->lb_n_buckets > 0);
203           ASSERT (is_pow2 (lb0->lb_n_buckets));
204           ASSERT (lb1->lb_n_buckets > 0);
205           ASSERT (is_pow2 (lb1->lb_n_buckets));
206
207           dpo0 = load_balance_get_bucket_i(lb0,
208                                            (hash_c0 &
209                                             (lb0->lb_n_buckets_minus_1)));
210           dpo1 = load_balance_get_bucket_i(lb1,
211                                            (hash_c1 &
212                                             (lb0->lb_n_buckets_minus_1)));
213
214           next0 = dpo0->dpoi_next_node;
215           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
216           next1 = dpo1->dpoi_next_node;
217           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
218
219           vlib_increment_combined_counter
220               (cm, cpu_index, lb_index0, 1,
221                vlib_buffer_length_in_chain (vm, p0)
222                + sizeof(ethernet_header_t));
223           vlib_increment_combined_counter
224               (cm, cpu_index, lb_index1, 1,
225                vlib_buffer_length_in_chain (vm, p1)
226                + sizeof(ethernet_header_t));
227
228           from += 2;
229           to_next += 2;
230           n_left_to_next -= 2;
231           n_left_from -= 2;
232
233           wrong_next = (next0 != next) + 2*(next1 != next);
234           if (PREDICT_FALSE (wrong_next != 0))
235             {
236               switch (wrong_next)
237                 {
238                 case 1:
239                   /* A B A */
240                   to_next[-2] = pi1;
241                   to_next -= 1;
242                   n_left_to_next += 1;
243                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
244                   break;
245
246                 case 2:
247                   /* A A B */
248                   to_next -= 1;
249                   n_left_to_next += 1;
250                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
251                   break;
252
253                 case 3:
254                   /* A B C */
255                   to_next -= 2;
256                   n_left_to_next += 2;
257                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
258                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
259                   if (next0 == next1)
260                     {
261                       /* A B B */
262                       vlib_put_next_frame (vm, node, next, n_left_to_next);
263                       next = next1;
264                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
265                     }
266                 }
267             }
268         }
269     
270       while (n_left_from > 0 && n_left_to_next > 0)
271         {
272           vlib_buffer_t * p0;
273           ip4_header_t * ip0;
274           __attribute__((unused)) tcp_header_t * tcp0;
275           ip_lookup_next_t next0;
276           const load_balance_t *lb0;
277           ip4_fib_mtrie_t * mtrie0;
278           ip4_fib_mtrie_leaf_t leaf0;
279           ip4_address_t * dst_addr0;
280           __attribute__((unused)) u32 pi0, fib_index0, is_tcp_udp0, lbi0;
281           flow_hash_config_t flow_hash_config0;
282           const dpo_id_t *dpo0;
283           u32 hash_c0;
284
285           pi0 = from[0];
286           to_next[0] = pi0;
287
288           p0 = vlib_get_buffer (vm, pi0);
289
290           ip0 = vlib_buffer_get_current (p0);
291
292           dst_addr0 = &ip0->dst_address;
293
294           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
295           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
296             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
297
298           if (! lookup_for_responses_to_locally_received_packets)
299             {
300               mtrie0 = &ip4_fib_get( fib_index0)->mtrie;
301
302               leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
303
304               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
305             }
306
307           tcp0 = (void *) (ip0 + 1);
308
309           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
310                          || ip0->protocol == IP_PROTOCOL_UDP);
311
312           if (! lookup_for_responses_to_locally_received_packets)
313             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
314
315           if (! lookup_for_responses_to_locally_received_packets)
316             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
317
318           if (! lookup_for_responses_to_locally_received_packets)
319             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
320
321           if (lookup_for_responses_to_locally_received_packets)
322             lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
323           else
324             {
325               /* Handle default route. */
326               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
327               lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
328             }
329
330           lb0 = load_balance_get (lbi0);
331
332           /* Use flow hash to compute multipath adjacency. */
333           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
334           if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
335             {
336               flow_hash_config0 = lb0->lb_hash_config;
337
338               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
339                 ip4_compute_flow_hash (ip0, flow_hash_config0);
340             }
341
342           ASSERT (lb0->lb_n_buckets > 0);
343           ASSERT (is_pow2 (lb0->lb_n_buckets));
344
345           dpo0 = load_balance_get_bucket_i(lb0,
346                                            (hash_c0 &
347                                             (lb0->lb_n_buckets_minus_1)));
348
349           next0 = dpo0->dpoi_next_node;
350           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
351
352           vlib_increment_combined_counter 
353               (cm, cpu_index, lbi0, 1,
354                vlib_buffer_length_in_chain (vm, p0));
355
356           from += 1;
357           to_next += 1;
358           n_left_to_next -= 1;
359           n_left_from -= 1;
360
361           if (PREDICT_FALSE (next0 != next))
362             {
363               n_left_to_next += 1;
364               vlib_put_next_frame (vm, node, next, n_left_to_next);
365               next = next0;
366               vlib_get_next_frame (vm, node, next,
367                                    to_next, n_left_to_next);
368               to_next[0] = pi0;
369               to_next += 1;
370               n_left_to_next -= 1;
371             }
372         }
373
374       vlib_put_next_frame (vm, node, next, n_left_to_next);
375     }
376
377   if (node->flags & VLIB_NODE_FLAG_TRACE)
378     ip4_forward_next_trace(vm, node, frame, VLIB_TX);
379
380   return frame->n_vectors;
381 }
382
383 /** @brief IPv4 lookup node.
384     @node ip4-lookup
385
386     This is the main IPv4 lookup dispatch node.
387
388     @param vm vlib_main_t corresponding to the current thread
389     @param node vlib_node_runtime_t
390     @param frame vlib_frame_t whose contents should be dispatched
391
392     @par Graph mechanics: buffer metadata, next index usage
393
394     @em Uses:
395     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
396         - Indicates the @c sw_if_index value of the interface that the
397           packet was received on.
398     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
399         - When the value is @c ~0 then the node performs a longest prefix
400           match (LPM) for the packet destination address in the FIB attached
401           to the receive interface.
402         - Otherwise perform LPM for the packet destination address in the
403           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
404           value (0, 1, ...) and not a VRF id.
405
406     @em Sets:
407     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
408         - The lookup result adjacency index.
409
410     <em>Next Index:</em>
411     - Dispatches the packet to the node index found in
412       ip_adjacency_t @c adj->lookup_next_index
413       (where @c adj is the lookup result adjacency).
414 */
415 static uword
416 ip4_lookup (vlib_main_t * vm,
417             vlib_node_runtime_t * node,
418             vlib_frame_t * frame)
419 {
420   return ip4_lookup_inline (vm, node, frame,
421                             /* lookup_for_responses_to_locally_received_packets */ 0);
422
423 }
424
425 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args);
426
427 VLIB_REGISTER_NODE (ip4_lookup_node) = {
428   .function = ip4_lookup,
429   .name = "ip4-lookup",
430   .vector_size = sizeof (u32),
431
432   .format_trace = format_ip4_lookup_trace,
433   .n_next_nodes = IP_LOOKUP_N_NEXT,
434   .next_nodes = IP4_LOOKUP_NEXT_NODES,
435 };
436
437 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup)
438
439 always_inline uword
440 ip4_load_balance (vlib_main_t * vm,
441                   vlib_node_runtime_t * node,
442                   vlib_frame_t * frame)
443 {
444   vlib_combined_counter_main_t * cm = &load_balance_main.lbm_via_counters;
445   u32 n_left_from, n_left_to_next, * from, * to_next;
446   ip_lookup_next_t next;
447   u32 cpu_index = os_get_cpu_number();
448
449   from = vlib_frame_vector_args (frame);
450   n_left_from = frame->n_vectors;
451   next = node->cached_next_index;
452
453   if (node->flags & VLIB_NODE_FLAG_TRACE)
454       ip4_forward_next_trace(vm, node, frame, VLIB_TX);
455
456   while (n_left_from > 0)
457     {
458       vlib_get_next_frame (vm, node, next,
459                            to_next, n_left_to_next);
460
461     
462       while (n_left_from > 0 && n_left_to_next > 0)
463         {
464           ip_lookup_next_t next0;
465           const load_balance_t *lb0;
466           vlib_buffer_t * p0;
467           u32 pi0, lbi0, hc0;
468           const ip4_header_t *ip0;
469           const dpo_id_t *dpo0;
470
471           pi0 = from[0];
472           to_next[0] = pi0;
473
474           p0 = vlib_get_buffer (vm, pi0);
475
476           ip0 = vlib_buffer_get_current (p0);
477           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
478
479           lb0 = load_balance_get(lbi0);
480           hc0 = lb0->lb_hash_config;
481           vnet_buffer(p0)->ip.flow_hash = ip4_compute_flow_hash(ip0, hc0);
482
483           dpo0 = load_balance_get_bucket_i(lb0, 
484                                            vnet_buffer(p0)->ip.flow_hash &
485                                            (lb0->lb_n_buckets_minus_1));
486
487           next0 = dpo0->dpoi_next_node;
488           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
489
490           vlib_increment_combined_counter 
491               (cm, cpu_index, lbi0, 1,
492                vlib_buffer_length_in_chain (vm, p0));
493
494           from += 1;
495           to_next += 1;
496           n_left_to_next -= 1;
497           n_left_from -= 1;
498
499           if (PREDICT_FALSE (next0 != next))
500             {
501               n_left_to_next += 1;
502               vlib_put_next_frame (vm, node, next, n_left_to_next);
503               next = next0;
504               vlib_get_next_frame (vm, node, next,
505                                    to_next, n_left_to_next);
506               to_next[0] = pi0;
507               to_next += 1;
508               n_left_to_next -= 1;
509             }
510         }
511
512       vlib_put_next_frame (vm, node, next, n_left_to_next);
513     }
514
515   return frame->n_vectors;
516 }
517
518 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args);
519
520 VLIB_REGISTER_NODE (ip4_load_balance_node) = {
521   .function = ip4_load_balance,
522   .name = "ip4-load-balance",
523   .vector_size = sizeof (u32),
524   .sibling_of = "ip4-lookup",
525
526   .format_trace = format_ip4_forward_next_trace,
527 };
528
529 VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance)
530
531 /* get first interface address */
532 ip4_address_t *
533 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
534                              ip_interface_address_t ** result_ia)
535 {
536   ip_lookup_main_t * lm = &im->lookup_main;
537   ip_interface_address_t * ia = 0;
538   ip4_address_t * result = 0;
539
540   foreach_ip_interface_address (lm, ia, sw_if_index, 
541                                 1 /* honor unnumbered */,
542   ({
543     ip4_address_t * a = ip_interface_address_get_address (lm, ia);
544     result = a;
545     break;
546   }));
547   if (result_ia)
548     *result_ia = result ? ia : 0;
549   return result;
550 }
551
552 static void
553 ip4_add_interface_routes (u32 sw_if_index,
554                           ip4_main_t * im, u32 fib_index,
555                           ip_interface_address_t * a)
556 {
557   ip_lookup_main_t * lm = &im->lookup_main;
558   ip4_address_t * address = ip_interface_address_get_address (lm, a);
559   fib_prefix_t pfx = {
560       .fp_len = a->address_length,
561       .fp_proto = FIB_PROTOCOL_IP4,
562       .fp_addr.ip4 = *address,
563   };
564
565   a->neighbor_probe_adj_index = ~0;
566
567   if (pfx.fp_len < 32)
568   {
569       fib_node_index_t fei;
570
571       fei = fib_table_entry_update_one_path(fib_index,
572                                             &pfx,
573                                             FIB_SOURCE_INTERFACE,
574                                             (FIB_ENTRY_FLAG_CONNECTED |
575                                              FIB_ENTRY_FLAG_ATTACHED),
576                                             FIB_PROTOCOL_IP4,
577                                             NULL, /* No next-hop address */
578                                             sw_if_index,
579                                             ~0, // invalid FIB index
580                                             1,
581                                             MPLS_LABEL_INVALID,
582                                             FIB_ROUTE_PATH_FLAG_NONE);
583       a->neighbor_probe_adj_index = fib_entry_get_adj(fei);
584   }
585
586   pfx.fp_len = 32;
587
588   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
589   {
590       u32 classify_table_index =
591           lm->classify_table_index_by_sw_if_index [sw_if_index];
592       if (classify_table_index != (u32) ~0)
593       {
594           dpo_id_t dpo = DPO_NULL;
595
596           dpo_set(&dpo,
597                   DPO_CLASSIFY,
598                   DPO_PROTO_IP4,
599                   classify_dpo_create(FIB_PROTOCOL_IP4,
600                                       classify_table_index));
601
602           fib_table_entry_special_dpo_add(fib_index,
603                                           &pfx,
604                                           FIB_SOURCE_CLASSIFY,
605                                           FIB_ENTRY_FLAG_NONE,
606                                           &dpo);
607           dpo_reset(&dpo);
608       }
609   }
610
611   fib_table_entry_update_one_path(fib_index,
612                                   &pfx,
613                                   FIB_SOURCE_INTERFACE,
614                                   (FIB_ENTRY_FLAG_CONNECTED |
615                                    FIB_ENTRY_FLAG_LOCAL),
616                                   FIB_PROTOCOL_IP4,
617                                   &pfx.fp_addr,
618                                   sw_if_index,
619                                   ~0, // invalid FIB index
620                                   1,
621                                   MPLS_LABEL_INVALID,
622                                   FIB_ROUTE_PATH_FLAG_NONE);
623 }
624
625 static void
626 ip4_del_interface_routes (ip4_main_t * im,
627                           u32 fib_index,
628                           ip4_address_t * address,
629                           u32 address_length)
630 {
631     fib_prefix_t pfx = {
632         .fp_len = address_length,
633         .fp_proto = FIB_PROTOCOL_IP4,
634         .fp_addr.ip4 = *address,
635     };
636
637     if (pfx.fp_len < 32)
638     {
639         fib_table_entry_delete(fib_index,
640                                &pfx,
641                                FIB_SOURCE_INTERFACE);
642     }
643
644     pfx.fp_len = 32;
645     fib_table_entry_delete(fib_index,
646                            &pfx,
647                            FIB_SOURCE_INTERFACE);
648 }
649
650 void
651 ip4_sw_interface_enable_disable (u32 sw_if_index,
652                                  u32 is_enable)
653 {
654   vlib_main_t * vm = vlib_get_main();
655   ip4_main_t * im = &ip4_main;
656   ip_lookup_main_t * lm = &im->lookup_main;
657   u32 ci, cast;
658   u32 lookup_feature_index;
659
660   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
661
662   /*
663    * enable/disable only on the 1<->0 transition
664    */
665   if (is_enable)
666     {
667       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
668         return;
669     }
670   else
671     {
672       ASSERT(im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
673       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
674         return;
675     }
676
677   for (cast = 0; cast <= VNET_IP_RX_MULTICAST_FEAT; cast++)
678     {
679       ip_config_main_t * cm = &lm->feature_config_mains[cast];
680       vnet_config_main_t * vcm = &cm->config_main;
681
682       vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
683       ci = cm->config_index_by_sw_if_index[sw_if_index];
684
685       if (cast == VNET_IP_RX_UNICAST_FEAT)
686         lookup_feature_index = im->ip4_unicast_rx_feature_lookup;
687       else
688         lookup_feature_index = im->ip4_multicast_rx_feature_lookup;
689
690       if (is_enable)
691         ci = vnet_config_add_feature (vm, vcm,
692                                       ci,
693                                       lookup_feature_index,
694                                       /* config data */ 0,
695                                       /* # bytes of config data */ 0);
696       else
697         ci = vnet_config_del_feature (vm, vcm,
698                                       ci,
699                                       lookup_feature_index,
700                                       /* config data */ 0,
701                                       /* # bytes of config data */ 0);
702       cm->config_index_by_sw_if_index[sw_if_index] = ci;
703     }
704 }
705
706 static clib_error_t *
707 ip4_add_del_interface_address_internal (vlib_main_t * vm,
708                                         u32 sw_if_index,
709                                         ip4_address_t * address,
710                                         u32 address_length,
711                                         u32 is_del)
712 {
713   vnet_main_t * vnm = vnet_get_main();
714   ip4_main_t * im = &ip4_main;
715   ip_lookup_main_t * lm = &im->lookup_main;
716   clib_error_t * error = 0;
717   u32 if_address_index, elts_before;
718   ip4_address_fib_t ip4_af, * addr_fib = 0;
719
720   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
721   ip4_addr_fib_init (&ip4_af, address,
722                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
723   vec_add1 (addr_fib, ip4_af);
724
725   /* FIXME-LATER
726    * there is no support for adj-fib handling in the presence of overlapping
727    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
728    * most routers do.
729    */
730   if (! is_del)
731     {
732       /* When adding an address check that it does not conflict
733          with an existing address. */
734       ip_interface_address_t * ia;
735       foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
736                                     0 /* honor unnumbered */,
737       ({
738         ip4_address_t * x = ip_interface_address_get_address (&im->lookup_main, ia);
739
740         if (ip4_destination_matches_route (im, address, x, ia->address_length)
741             || ip4_destination_matches_route (im, x, address, address_length))
742           return clib_error_create ("failed to add %U which conflicts with %U for interface %U",
743                                     format_ip4_address_and_length, address, address_length,
744                                     format_ip4_address_and_length, x, ia->address_length,
745                                     format_vnet_sw_if_index_name, vnm, sw_if_index);
746        }));
747     }
748
749   elts_before = pool_elts (lm->if_address_pool);
750
751   error = ip_interface_address_add_del
752     (lm,
753      sw_if_index,
754      addr_fib,
755      address_length,
756      is_del,
757      &if_address_index);
758   if (error)
759     goto done;
760   
761   ip4_sw_interface_enable_disable(sw_if_index, !is_del);
762
763   if (is_del)
764       ip4_del_interface_routes (im, ip4_af.fib_index, address,
765                                 address_length);
766   else
767       ip4_add_interface_routes (sw_if_index,
768                                 im, ip4_af.fib_index,
769                                 pool_elt_at_index 
770                                 (lm->if_address_pool, if_address_index));
771
772   /* If pool did not grow/shrink: add duplicate address. */
773   if (elts_before != pool_elts (lm->if_address_pool))
774     {
775       ip4_add_del_interface_address_callback_t * cb;
776       vec_foreach (cb, im->add_del_interface_address_callbacks)
777         cb->function (im, cb->function_opaque, sw_if_index,
778                       address, address_length,
779                       if_address_index,
780                       is_del);
781     }
782
783  done:
784   vec_free (addr_fib);
785   return error;
786 }
787
788 clib_error_t *
789 ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
790                                ip4_address_t * address, u32 address_length,
791                                u32 is_del)
792 {
793   return ip4_add_del_interface_address_internal
794     (vm, sw_if_index, address, address_length,
795      is_del);
796 }
797
798 /* Built-in ip4 unicast rx feature path definition */
799 VNET_IP4_UNICAST_FEATURE_INIT (ip4_inacl, static) = {
800   .node_name = "ip4-inacl", 
801   .runs_before = ORDER_CONSTRAINTS {"ip4-source-check-via-rx", 0},
802   .feature_index = &ip4_main.ip4_unicast_rx_feature_check_access,
803 };
804
805 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_1, static) = {
806   .node_name = "ip4-source-check-via-rx",
807   .runs_before = ORDER_CONSTRAINTS {"ip4-source-check-via-any", 0},
808   .feature_index = 
809   &ip4_main.ip4_unicast_rx_feature_source_reachable_via_rx,
810 };
811
812 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_2, static) = {
813   .node_name = "ip4-source-check-via-any",
814   .runs_before = ORDER_CONSTRAINTS {"ip4-policer-classify", 0},
815   .feature_index = 
816   &ip4_main.ip4_unicast_rx_feature_source_reachable_via_any,
817 };
818
819 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) = {
820   .node_name = "ip4-source-and-port-range-check-rx",
821   .runs_before = ORDER_CONSTRAINTS {"ip4-policer-classify", 0},
822   .feature_index =
823   &ip4_main.ip4_unicast_rx_feature_source_and_port_range_check,
824 };
825
826 VNET_IP4_UNICAST_FEATURE_INIT (ip4_policer_classify, static) = {
827   .node_name = "ip4-policer-classify",
828   .runs_before = ORDER_CONSTRAINTS {"ipsec-input-ip4", 0},
829   .feature_index =
830   &ip4_main.ip4_unicast_rx_feature_policer_classify,
831 };
832
833 VNET_IP4_UNICAST_FEATURE_INIT (ip4_ipsec, static) = {
834   .node_name = "ipsec-input-ip4",
835   .runs_before = ORDER_CONSTRAINTS {"vpath-input-ip4", 0},
836   .feature_index = &ip4_main.ip4_unicast_rx_feature_ipsec,
837 };
838
839 VNET_IP4_UNICAST_FEATURE_INIT (ip4_vpath, static) = {
840   .node_name = "vpath-input-ip4",
841   .runs_before = ORDER_CONSTRAINTS {"ip4-lookup", 0},
842   .feature_index = &ip4_main.ip4_unicast_rx_feature_vpath,
843 };
844
845 VNET_IP4_UNICAST_FEATURE_INIT (ip4_lookup, static) = {
846   .node_name = "ip4-lookup",
847   .runs_before = ORDER_CONSTRAINTS {"ip4-drop", 0},
848   .feature_index = &ip4_main.ip4_unicast_rx_feature_lookup,
849 };
850
851 VNET_IP4_UNICAST_FEATURE_INIT (ip4_drop, static) = {
852   .node_name = "ip4-drop",
853   .runs_before = 0, /* not before any other features */
854   .feature_index = &ip4_main.ip4_unicast_rx_feature_drop,
855 };
856
857
858 /* Built-in ip4 multicast rx feature path definition */
859 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_vpath_mc, static) = {
860   .node_name = "vpath-input-ip4",
861   .runs_before = ORDER_CONSTRAINTS {"ip4-lookup-multicast", 0},
862   .feature_index = &ip4_main.ip4_multicast_rx_feature_vpath,
863 };
864
865 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_lookup_mc, static) = {
866   .node_name = "ip4-lookup-multicast",
867   .runs_before = ORDER_CONSTRAINTS {"ip4-drop", 0},
868   .feature_index = &ip4_main.ip4_multicast_rx_feature_lookup,
869 };
870
871 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_mc_drop, static) = {
872   .node_name = "ip4-drop",
873   .runs_before = 0, /* last feature */
874   .feature_index = &ip4_main.ip4_multicast_rx_feature_drop,
875 };
876
877 static char * rx_feature_start_nodes[] = 
878   { "ip4-input", "ip4-input-no-checksum"};
879
880 static char * tx_feature_start_nodes[] = 
881 { "ip4-rewrite-transit"};
882
883 /* Source and port-range check ip4 tx feature path definition */
884 VNET_IP4_TX_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) = {
885   .node_name = "ip4-source-and-port-range-check-tx",
886   .runs_before = ORDER_CONSTRAINTS {"interface-output", 0},
887   .feature_index =
888   &ip4_main.ip4_unicast_tx_feature_source_and_port_range_check,
889
890 };
891
892 /* Built-in ip4 tx feature path definition */
893 VNET_IP4_TX_FEATURE_INIT (interface_output, static) = {
894   .node_name = "interface-output",
895   .runs_before = 0, /* not before any other features */
896   .feature_index = &ip4_main.ip4_tx_feature_interface_output,
897 };
898
899 static clib_error_t *
900 ip4_feature_init (vlib_main_t * vm, ip4_main_t * im)
901 {
902   ip_lookup_main_t * lm = &im->lookup_main;
903   clib_error_t * error;
904   vnet_cast_t cast;
905   ip_config_main_t * cm;
906   vnet_config_main_t * vcm;
907   char **feature_start_nodes;
908   int feature_start_len;
909
910   for (cast = 0; cast < VNET_N_IP_FEAT; cast++)
911     {
912       cm = &lm->feature_config_mains[cast];
913       vcm = &cm->config_main;
914
915       if (cast < VNET_IP_TX_FEAT)
916         {
917           feature_start_nodes = rx_feature_start_nodes;
918           feature_start_len = ARRAY_LEN(rx_feature_start_nodes);
919         }
920       else
921         {
922           feature_start_nodes = tx_feature_start_nodes;
923           feature_start_len = ARRAY_LEN(tx_feature_start_nodes);
924         }
925       
926       if ((error = ip_feature_init_cast (vm, cm, vcm, 
927                                          feature_start_nodes,
928                                          feature_start_len,
929                                          cast,
930                                          VNET_L3_PACKET_TYPE_IP4)))
931         return error;
932     }
933
934   return 0;
935 }
936
937 static clib_error_t *
938 ip4_sw_interface_add_del (vnet_main_t * vnm,
939                           u32 sw_if_index,
940                           u32 is_add)
941 {
942   vlib_main_t * vm = vnm->vlib_main;
943   ip4_main_t * im = &ip4_main;
944   ip_lookup_main_t * lm = &im->lookup_main;
945   u32 ci, cast;
946   u32 feature_index;
947
948   /* Fill in lookup tables with default table (0). */
949   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
950
951   for (cast = 0; cast < VNET_N_IP_FEAT; cast++)
952     {
953       ip_config_main_t * cm = &lm->feature_config_mains[cast];
954       vnet_config_main_t * vcm = &cm->config_main;
955
956       vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
957       ci = cm->config_index_by_sw_if_index[sw_if_index];
958
959       if (cast == VNET_IP_RX_UNICAST_FEAT)
960         feature_index = im->ip4_unicast_rx_feature_drop;
961       else if (cast == VNET_IP_RX_MULTICAST_FEAT)
962         feature_index = im->ip4_multicast_rx_feature_drop;
963       else
964         feature_index = im->ip4_tx_feature_interface_output;
965
966       if (is_add)
967         ci = vnet_config_add_feature (vm, vcm, 
968                                       ci,
969                                       feature_index,
970                                       /* config data */ 0,
971                                       /* # bytes of config data */ 0);
972       else
973         {
974           ci = vnet_config_del_feature (vm, vcm, ci,
975                                         feature_index,
976                                         /* config data */ 0,
977                                         /* # bytes of config data */ 0);
978           if (vec_len(im->ip_enabled_by_sw_if_index) > sw_if_index)
979               im->ip_enabled_by_sw_if_index[sw_if_index] = 0;
980         }
981       cm->config_index_by_sw_if_index[sw_if_index] = ci;
982       /*
983        * note: do not update the tx feature count here.
984        */
985     }
986
987   return /* no error */ 0;
988 }
989
990 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
991
992 /* Global IP4 main. */
993 ip4_main_t ip4_main;
994
995 clib_error_t *
996 ip4_lookup_init (vlib_main_t * vm)
997 {
998   ip4_main_t * im = &ip4_main;
999   clib_error_t * error;
1000   uword i;
1001
1002   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1003     {
1004       u32 m;
1005
1006       if (i < 32)
1007         m = pow2_mask (i) << (32 - i);
1008       else 
1009         m = ~0;
1010       im->fib_masks[i] = clib_host_to_net_u32 (m);
1011     }
1012
1013   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1014
1015   /* Create FIB with index 0 and table id of 0. */
1016   fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 0);
1017
1018   {
1019     pg_node_t * pn;
1020     pn = pg_get_node (ip4_lookup_node.index);
1021     pn->unformat_edit = unformat_pg_ip4_header;
1022   }
1023
1024   {
1025     ethernet_arp_header_t h;
1026
1027     memset (&h, 0, sizeof (h));
1028
1029     /* Set target ethernet address to all zeros. */
1030     memset (h.ip4_over_ethernet[1].ethernet, 0, sizeof (h.ip4_over_ethernet[1].ethernet));
1031
1032 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1033 #define _8(f,v) h.f = v;
1034     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1035     _16 (l3_type, ETHERNET_TYPE_IP4);
1036     _8 (n_l2_address_bytes, 6);
1037     _8 (n_l3_address_bytes, 4);
1038     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1039 #undef _16
1040 #undef _8
1041
1042     vlib_packet_template_init (vm,
1043                                &im->ip4_arp_request_packet_template,
1044                                /* data */ &h,
1045                                sizeof (h),
1046                                /* alloc chunk size */ 8,
1047                                "ip4 arp");
1048   }
1049
1050   error = ip4_feature_init (vm, im);
1051
1052   return error;
1053 }
1054
1055 VLIB_INIT_FUNCTION (ip4_lookup_init);
1056
1057 typedef struct {
1058   /* Adjacency taken. */
1059   u32 adj_index;
1060   u32 flow_hash;
1061   u32 fib_index;
1062
1063   /* Packet data, possibly *after* rewrite. */
1064   u8 packet_data[64 - 1*sizeof(u32)];
1065 } ip4_forward_next_trace_t;
1066
1067 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args)
1068 {
1069   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1070   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1071   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1072   uword indent = format_get_indent (s);
1073   s = format (s, "%U%U",
1074                 format_white_space, indent,
1075                 format_ip4_header, t->packet_data);
1076   return s;
1077 }
1078
1079 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args)
1080 {
1081   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1082   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1083   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1084   vnet_main_t * vnm = vnet_get_main();
1085   uword indent = format_get_indent (s);
1086
1087   s = format (s, "fib %d adj-idx %d : %U flow hash: 0x%08x",
1088               t->fib_index, t->adj_index, format_ip_adjacency,
1089               vnm, t->adj_index, FORMAT_IP_ADJACENCY_NONE, 
1090               t->flow_hash);
1091   s = format (s, "\n%U%U",
1092               format_white_space, indent,
1093               format_ip4_header, t->packet_data);
1094   return s;
1095 }
1096
1097 static u8 * format_ip4_rewrite_trace (u8 * s, va_list * args)
1098 {
1099   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1100   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1101   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1102   vnet_main_t * vnm = vnet_get_main();
1103   uword indent = format_get_indent (s);
1104
1105   s = format (s, "tx_sw_if_index %d adj-idx %d : %U flow hash: 0x%08x",
1106               t->fib_index, t->adj_index, format_ip_adjacency,
1107               vnm, t->adj_index, FORMAT_IP_ADJACENCY_NONE,
1108               t->flow_hash);
1109   s = format (s, "\n%U%U",
1110               format_white_space, indent,
1111               format_ip_adjacency_packet_data,
1112               vnm, t->adj_index,
1113               t->packet_data, sizeof (t->packet_data));
1114   return s;
1115 }
1116
1117 /* Common trace function for all ip4-forward next nodes. */
1118 void
1119 ip4_forward_next_trace (vlib_main_t * vm,
1120                         vlib_node_runtime_t * node,
1121                         vlib_frame_t * frame,
1122                         vlib_rx_or_tx_t which_adj_index)
1123 {
1124   u32 * from, n_left;
1125   ip4_main_t * im = &ip4_main;
1126
1127   n_left = frame->n_vectors;
1128   from = vlib_frame_vector_args (frame);
1129   
1130   while (n_left >= 4)
1131     {
1132       u32 bi0, bi1;
1133       vlib_buffer_t * b0, * b1;
1134       ip4_forward_next_trace_t * t0, * t1;
1135
1136       /* Prefetch next iteration. */
1137       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1138       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1139
1140       bi0 = from[0];
1141       bi1 = from[1];
1142
1143       b0 = vlib_get_buffer (vm, bi0);
1144       b1 = vlib_get_buffer (vm, bi1);
1145
1146       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1147         {
1148           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1149           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1150           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1151           t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1152               vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1153               vec_elt (im->fib_index_by_sw_if_index,
1154                        vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1155
1156           clib_memcpy (t0->packet_data,
1157                   vlib_buffer_get_current (b0),
1158                   sizeof (t0->packet_data));
1159         }
1160       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1161         {
1162           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1163           t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1164           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1165           t1->fib_index = (vnet_buffer(b1)->sw_if_index[VLIB_TX] != (u32)~0) ?
1166               vnet_buffer(b1)->sw_if_index[VLIB_TX] :
1167               vec_elt (im->fib_index_by_sw_if_index,
1168                        vnet_buffer(b1)->sw_if_index[VLIB_RX]);
1169           clib_memcpy (t1->packet_data,
1170                   vlib_buffer_get_current (b1),
1171                   sizeof (t1->packet_data));
1172         }
1173       from += 2;
1174       n_left -= 2;
1175     }
1176
1177   while (n_left >= 1)
1178     {
1179       u32 bi0;
1180       vlib_buffer_t * b0;
1181       ip4_forward_next_trace_t * t0;
1182
1183       bi0 = from[0];
1184
1185       b0 = vlib_get_buffer (vm, bi0);
1186
1187       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1188         {
1189           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1190           t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1191           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1192           t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1193               vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1194               vec_elt (im->fib_index_by_sw_if_index,
1195                        vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1196           clib_memcpy (t0->packet_data,
1197                   vlib_buffer_get_current (b0),
1198                   sizeof (t0->packet_data));
1199         }
1200       from += 1;
1201       n_left -= 1;
1202     }
1203 }
1204
1205 static uword
1206 ip4_drop_or_punt (vlib_main_t * vm,
1207                   vlib_node_runtime_t * node,
1208                   vlib_frame_t * frame,
1209                   ip4_error_t error_code)
1210 {
1211   u32 * buffers = vlib_frame_vector_args (frame);
1212   uword n_packets = frame->n_vectors;
1213
1214   vlib_error_drop_buffers (vm, node,
1215                            buffers,
1216                            /* stride */ 1,
1217                            n_packets,
1218                            /* next */ 0,
1219                            ip4_input_node.index,
1220                            error_code);
1221
1222   if (node->flags & VLIB_NODE_FLAG_TRACE)
1223     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1224
1225   return n_packets;
1226 }
1227
1228 static uword
1229 ip4_drop (vlib_main_t * vm,
1230           vlib_node_runtime_t * node,
1231           vlib_frame_t * frame)
1232 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP); }
1233
1234 static uword
1235 ip4_punt (vlib_main_t * vm,
1236           vlib_node_runtime_t * node,
1237           vlib_frame_t * frame)
1238 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); }
1239
1240 VLIB_REGISTER_NODE (ip4_drop_node,static) = {
1241   .function = ip4_drop,
1242   .name = "ip4-drop",
1243   .vector_size = sizeof (u32),
1244
1245   .format_trace = format_ip4_forward_next_trace,
1246
1247   .n_next_nodes = 1,
1248   .next_nodes = {
1249     [0] = "error-drop",
1250   },
1251 };
1252
1253 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop)
1254
1255 VLIB_REGISTER_NODE (ip4_punt_node,static) = {
1256   .function = ip4_punt,
1257   .name = "ip4-punt",
1258   .vector_size = sizeof (u32),
1259
1260   .format_trace = format_ip4_forward_next_trace,
1261
1262   .n_next_nodes = 1,
1263   .next_nodes = {
1264     [0] = "error-punt",
1265   },
1266 };
1267
1268 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt)
1269
1270 /* Compute TCP/UDP/ICMP4 checksum in software. */
1271 u16
1272 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1273                               ip4_header_t * ip0)
1274 {
1275   ip_csum_t sum0;
1276   u32 ip_header_length, payload_length_host_byte_order;
1277   u32 n_this_buffer, n_bytes_left;
1278   u16 sum16;
1279   void * data_this_buffer;
1280   
1281   /* Initialize checksum with ip header. */
1282   ip_header_length = ip4_header_bytes (ip0);
1283   payload_length_host_byte_order = clib_net_to_host_u16 (ip0->length) - ip_header_length;
1284   sum0 = clib_host_to_net_u32 (payload_length_host_byte_order + (ip0->protocol << 16));
1285
1286   if (BITS (uword) == 32)
1287     {
1288       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u32));
1289       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32));
1290     }
1291   else
1292     sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1293
1294   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1295   data_this_buffer = (void *) ip0 + ip_header_length;
1296   if (n_this_buffer + ip_header_length > p0->current_length)
1297     n_this_buffer = p0->current_length > ip_header_length ? p0->current_length - ip_header_length : 0;
1298   while (1)
1299     {
1300       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1301       n_bytes_left -= n_this_buffer;
1302       if (n_bytes_left == 0)
1303         break;
1304
1305       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1306       p0 = vlib_get_buffer (vm, p0->next_buffer);
1307       data_this_buffer = vlib_buffer_get_current (p0);
1308       n_this_buffer = p0->current_length;
1309     }
1310
1311   sum16 = ~ ip_csum_fold (sum0);
1312
1313   return sum16;
1314 }
1315
1316 static u32
1317 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1318 {
1319   ip4_header_t * ip0 = vlib_buffer_get_current (p0);
1320   udp_header_t * udp0;
1321   u16 sum16;
1322
1323   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1324           || ip0->protocol == IP_PROTOCOL_UDP);
1325
1326   udp0 = (void *) (ip0 + 1);
1327   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1328     {
1329       p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1330                     | IP_BUFFER_L4_CHECKSUM_CORRECT);
1331       return p0->flags;
1332     }
1333
1334   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1335
1336   p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1337                 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1338
1339   return p0->flags;
1340 }
1341
1342 static uword
1343 ip4_local (vlib_main_t * vm,
1344            vlib_node_runtime_t * node,
1345            vlib_frame_t * frame)
1346 {
1347   ip4_main_t * im = &ip4_main;
1348   ip_lookup_main_t * lm = &im->lookup_main;
1349   ip_local_next_t next_index;
1350   u32 * from, * to_next, n_left_from, n_left_to_next;
1351   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
1352
1353   from = vlib_frame_vector_args (frame);
1354   n_left_from = frame->n_vectors;
1355   next_index = node->cached_next_index;
1356   
1357   if (node->flags & VLIB_NODE_FLAG_TRACE)
1358     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1359
1360   while (n_left_from > 0)
1361     {
1362       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1363
1364       while (n_left_from >= 4 && n_left_to_next >= 2)
1365         {
1366           vlib_buffer_t * p0, * p1;
1367           ip4_header_t * ip0, * ip1;
1368           udp_header_t * udp0, * udp1;
1369           ip4_fib_mtrie_t * mtrie0, * mtrie1;
1370           ip4_fib_mtrie_leaf_t leaf0, leaf1;
1371           const dpo_id_t *dpo0, *dpo1;
1372           const load_balance_t *lb0, *lb1;
1373           u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, lbi0;
1374           u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, lbi1;
1375           i32 len_diff0, len_diff1;
1376           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1377           u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1378           u8 enqueue_code;
1379       
1380           pi0 = to_next[0] = from[0];
1381           pi1 = to_next[1] = from[1];
1382           from += 2;
1383           n_left_from -= 2;
1384           to_next += 2;
1385           n_left_to_next -= 2;
1386       
1387           p0 = vlib_get_buffer (vm, pi0);
1388           p1 = vlib_get_buffer (vm, pi1);
1389
1390           ip0 = vlib_buffer_get_current (p0);
1391           ip1 = vlib_buffer_get_current (p1);
1392
1393           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
1394                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1395           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, 
1396                                 vnet_buffer(p1)->sw_if_index[VLIB_RX]);
1397
1398           mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1399           mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
1400
1401           leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
1402
1403           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1404           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
1405
1406           /* Treat IP frag packets as "experimental" protocol for now
1407              until support of IP frag reassembly is implemented */
1408           proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
1409           proto1 = ip4_is_fragment(ip1) ? 0xfe : ip1->protocol;
1410           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1411           is_udp1 = proto1 == IP_PROTOCOL_UDP;
1412           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1413           is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1414
1415           flags0 = p0->flags;
1416           flags1 = p1->flags;
1417
1418           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1419           good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1420
1421           udp0 = ip4_next_header (ip0);
1422           udp1 = ip4_next_header (ip1);
1423
1424           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1425           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1426           good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1427
1428           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1429           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
1430
1431           /* Verify UDP length. */
1432           ip_len0 = clib_net_to_host_u16 (ip0->length);
1433           ip_len1 = clib_net_to_host_u16 (ip1->length);
1434           udp_len0 = clib_net_to_host_u16 (udp0->length);
1435           udp_len1 = clib_net_to_host_u16 (udp1->length);
1436
1437           len_diff0 = ip_len0 - udp_len0;
1438           len_diff1 = ip_len1 - udp_len1;
1439
1440           len_diff0 = is_udp0 ? len_diff0 : 0;
1441           len_diff1 = is_udp1 ? len_diff1 : 0;
1442
1443           if (PREDICT_FALSE (! (is_tcp_udp0 & is_tcp_udp1
1444                                 & good_tcp_udp0 & good_tcp_udp1)))
1445             {
1446               if (is_tcp_udp0)
1447                 {
1448                   if (is_tcp_udp0
1449                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1450                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1451                   good_tcp_udp0 =
1452                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1453                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1454                 }
1455               if (is_tcp_udp1)
1456                 {
1457                   if (is_tcp_udp1
1458                       && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1459                     flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
1460                   good_tcp_udp1 =
1461                     (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1462                   good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1463                 }
1464             }
1465
1466           good_tcp_udp0 &= len_diff0 >= 0;
1467           good_tcp_udp1 &= len_diff1 >= 0;
1468
1469           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1470           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
1471
1472           error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1473
1474           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1475           error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
1476
1477           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1478           error0 = (is_tcp_udp0 && ! good_tcp_udp0
1479                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1480                     : error0);
1481           error1 = (is_tcp_udp1 && ! good_tcp_udp1
1482                     ? IP4_ERROR_TCP_CHECKSUM + is_udp1
1483                     : error1);
1484
1485           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1486           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
1487           leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1488           leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
1489
1490           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1491           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1492
1493           vnet_buffer (p1)->ip.adj_index[VLIB_RX] = lbi1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1494           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = lbi1;
1495
1496           lb0 = load_balance_get(lbi0);
1497           lb1 = load_balance_get(lbi1);
1498           dpo0 = load_balance_get_bucket_i(lb0, 0);
1499           dpo1 = load_balance_get_bucket_i(lb1, 0);
1500
1501           /* 
1502            * Must have a route to source otherwise we drop the packet.
1503            * ip4 broadcasts are accepted, e.g. to make dhcp client work
1504            */
1505           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1506                     && dpo0->dpoi_type != DPO_ADJACENCY
1507                     && dpo0->dpoi_type != DPO_ADJACENCY_INCOMPLETE
1508                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
1509                     ? IP4_ERROR_SRC_LOOKUP_MISS
1510                     : error0);
1511           error0 = (dpo0->dpoi_type == DPO_RECEIVE ?
1512                     IP4_ERROR_SPOOFED_LOCAL_PACKETS : 
1513                     error0);
1514           error1 = (error1 == IP4_ERROR_UNKNOWN_PROTOCOL
1515                     && dpo1->dpoi_type != DPO_ADJACENCY
1516                     && dpo1->dpoi_type != DPO_ADJACENCY_INCOMPLETE
1517                     && ip1->dst_address.as_u32 != 0xFFFFFFFF
1518                     ? IP4_ERROR_SRC_LOOKUP_MISS
1519                     : error1);
1520           error1 = (dpo0->dpoi_type == DPO_RECEIVE ?
1521                     IP4_ERROR_SPOOFED_LOCAL_PACKETS : 
1522                     error1);
1523
1524           next0 = lm->local_next_by_ip_protocol[proto0];
1525           next1 = lm->local_next_by_ip_protocol[proto1];
1526
1527           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1528           next1 = error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1529
1530           p0->error = error0 ? error_node->errors[error0] : 0;
1531           p1->error = error1 ? error_node->errors[error1] : 0;
1532
1533           enqueue_code = (next0 != next_index) + 2*(next1 != next_index);
1534
1535           if (PREDICT_FALSE (enqueue_code != 0))
1536             {
1537               switch (enqueue_code)
1538                 {
1539                 case 1:
1540                   /* A B A */
1541                   to_next[-2] = pi1;
1542                   to_next -= 1;
1543                   n_left_to_next += 1;
1544                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
1545                   break;
1546
1547                 case 2:
1548                   /* A A B */
1549                   to_next -= 1;
1550                   n_left_to_next += 1;
1551                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
1552                   break;
1553
1554                 case 3:
1555                   /* A B B or A B C */
1556                   to_next -= 2;
1557                   n_left_to_next += 2;
1558                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
1559                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
1560                   if (next0 == next1)
1561                     {
1562                       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1563                       next_index = next1;
1564                       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1565                     }
1566                   break;
1567                 }
1568             }
1569         }
1570
1571       while (n_left_from > 0 && n_left_to_next > 0)
1572         {
1573           vlib_buffer_t * p0;
1574           ip4_header_t * ip0;
1575           udp_header_t * udp0;
1576           ip4_fib_mtrie_t * mtrie0;
1577           ip4_fib_mtrie_leaf_t leaf0;
1578           u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, lbi0;
1579           i32 len_diff0;
1580           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1581           load_balance_t *lb0;
1582           const dpo_id_t *dpo0;
1583
1584           pi0 = to_next[0] = from[0];
1585           from += 1;
1586           n_left_from -= 1;
1587           to_next += 1;
1588           n_left_to_next -= 1;
1589       
1590           p0 = vlib_get_buffer (vm, pi0);
1591
1592           ip0 = vlib_buffer_get_current (p0);
1593
1594           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
1595                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1596
1597           mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1598
1599           leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
1600
1601           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1602
1603           /* Treat IP frag packets as "experimental" protocol for now
1604              until support of IP frag reassembly is implemented */
1605           proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
1606           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1607           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1608
1609           flags0 = p0->flags;
1610
1611           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1612
1613           udp0 = ip4_next_header (ip0);
1614
1615           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1616           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1617
1618           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1619
1620           /* Verify UDP length. */
1621           ip_len0 = clib_net_to_host_u16 (ip0->length);
1622           udp_len0 = clib_net_to_host_u16 (udp0->length);
1623
1624           len_diff0 = ip_len0 - udp_len0;
1625
1626           len_diff0 = is_udp0 ? len_diff0 : 0;
1627
1628           if (PREDICT_FALSE (! (is_tcp_udp0 & good_tcp_udp0)))
1629             {
1630               if (is_tcp_udp0)
1631                 {
1632                   if (is_tcp_udp0
1633                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1634                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1635                   good_tcp_udp0 =
1636                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1637                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1638                 }
1639             }
1640
1641           good_tcp_udp0 &= len_diff0 >= 0;
1642
1643           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1644
1645           error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1646
1647           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1648
1649           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1650           error0 = (is_tcp_udp0 && ! good_tcp_udp0
1651                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1652                     : error0);
1653
1654           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1655           leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1656
1657           lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1658           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1659
1660           lb0 = load_balance_get(lbi0);
1661           dpo0 = load_balance_get_bucket_i(lb0, 0);
1662
1663           vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
1664               vnet_buffer (p0)->ip.adj_index[VLIB_RX] =
1665                   dpo0->dpoi_index;
1666
1667           /* Must have a route to source otherwise we drop the packet. */
1668           error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1669                     && dpo0->dpoi_type != DPO_ADJACENCY
1670                     && dpo0->dpoi_type != DPO_ADJACENCY_INCOMPLETE
1671                     && dpo0->dpoi_type != DPO_RECEIVE
1672                     && ip0->dst_address.as_u32 != 0xFFFFFFFF
1673                     ? IP4_ERROR_SRC_LOOKUP_MISS
1674                     : error0);
1675           /* Packet originated from a local address => spoofing */
1676           error0 = (dpo0->dpoi_type == DPO_RECEIVE ?
1677                     IP4_ERROR_SPOOFED_LOCAL_PACKETS : 
1678                     error0);
1679
1680           next0 = lm->local_next_by_ip_protocol[proto0];
1681
1682           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1683
1684           p0->error = error0? error_node->errors[error0] : 0;
1685
1686           if (PREDICT_FALSE (next0 != next_index))
1687             {
1688               n_left_to_next += 1;
1689               vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1690
1691               next_index = next0;
1692               vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1693               to_next[0] = pi0;
1694               to_next += 1;
1695               n_left_to_next -= 1;
1696             }
1697         }
1698   
1699       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1700     }
1701
1702   return frame->n_vectors;
1703 }
1704
1705 VLIB_REGISTER_NODE (ip4_local_node,static) = {
1706   .function = ip4_local,
1707   .name = "ip4-local",
1708   .vector_size = sizeof (u32),
1709
1710   .format_trace = format_ip4_forward_next_trace,
1711
1712   .n_next_nodes = IP_LOCAL_N_NEXT,
1713   .next_nodes = {
1714     [IP_LOCAL_NEXT_DROP] = "error-drop",
1715     [IP_LOCAL_NEXT_PUNT] = "error-punt",
1716     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1717     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1718   },
1719 };
1720
1721 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local)
1722
1723 void ip4_register_protocol (u32 protocol, u32 node_index)
1724 {
1725   vlib_main_t * vm = vlib_get_main();
1726   ip4_main_t * im = &ip4_main;
1727   ip_lookup_main_t * lm = &im->lookup_main;
1728
1729   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1730   lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip4_local_node.index, node_index);
1731 }
1732
1733 static clib_error_t *
1734 show_ip_local_command_fn (vlib_main_t * vm,
1735                           unformat_input_t * input,
1736                          vlib_cli_command_t * cmd)
1737 {
1738   ip4_main_t * im = &ip4_main;
1739   ip_lookup_main_t * lm = &im->lookup_main;
1740   int i;
1741
1742   vlib_cli_output (vm, "Protocols handled by ip4_local");
1743   for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
1744     {
1745       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1746         vlib_cli_output (vm, "%d", i);
1747     }
1748   return 0;
1749 }
1750
1751
1752
1753 VLIB_CLI_COMMAND (show_ip_local, static) = {
1754   .path = "show ip local",
1755   .function = show_ip_local_command_fn,
1756   .short_help = "Show ip local protocol table",
1757 };
1758
1759 always_inline uword
1760 ip4_arp_inline (vlib_main_t * vm,
1761                 vlib_node_runtime_t * node,
1762                 vlib_frame_t * frame,
1763                 int is_glean)
1764 {
1765   vnet_main_t * vnm = vnet_get_main();
1766   ip4_main_t * im = &ip4_main;
1767   ip_lookup_main_t * lm = &im->lookup_main;
1768   u32 * from, * to_next_drop;
1769   uword n_left_from, n_left_to_next_drop, next_index;
1770   static f64 time_last_seed_change = -1e100;
1771   static u32 hash_seeds[3];
1772   static uword hash_bitmap[256 / BITS (uword)]; 
1773   f64 time_now;
1774
1775   if (node->flags & VLIB_NODE_FLAG_TRACE)
1776     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1777
1778   time_now = vlib_time_now (vm);
1779   if (time_now - time_last_seed_change > 1e-3)
1780     {
1781       uword i;
1782       u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
1783                                              sizeof (hash_seeds));
1784       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
1785         hash_seeds[i] = r[i];
1786
1787       /* Mark all hash keys as been no-seen before. */
1788       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
1789         hash_bitmap[i] = 0;
1790
1791       time_last_seed_change = time_now;
1792     }
1793
1794   from = vlib_frame_vector_args (frame);
1795   n_left_from = frame->n_vectors;
1796   next_index = node->cached_next_index;
1797   if (next_index == IP4_ARP_NEXT_DROP)
1798     next_index = IP4_ARP_N_NEXT; /* point to first interface */
1799
1800   while (n_left_from > 0)
1801     {
1802       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1803                            to_next_drop, n_left_to_next_drop);
1804
1805       while (n_left_from > 0 && n_left_to_next_drop > 0)
1806         {
1807           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
1808           ip_adjacency_t * adj0;
1809           vlib_buffer_t * p0;
1810           ip4_header_t * ip0;
1811           uword bm0;
1812
1813           pi0 = from[0];
1814
1815           p0 = vlib_get_buffer (vm, pi0);
1816
1817           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1818           adj0 = ip_get_adjacency (lm, adj_index0);
1819           ip0 = vlib_buffer_get_current (p0);
1820
1821           /*
1822            * this is the Glean case, so we are ARPing for the
1823            * packet's destination 
1824            */
1825           a0 = hash_seeds[0];
1826           b0 = hash_seeds[1];
1827           c0 = hash_seeds[2];
1828
1829           sw_if_index0 = adj0->rewrite_header.sw_if_index;
1830           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1831
1832           if (is_glean)
1833           {
1834               a0 ^= ip0->dst_address.data_u32;
1835           }
1836           else
1837           {
1838               a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32;
1839           }
1840           b0 ^= sw_if_index0;
1841
1842           hash_v3_finalize32 (a0, b0, c0);
1843
1844           c0 &= BITS (hash_bitmap) - 1;
1845           c0 = c0 / BITS (uword);
1846           m0 = (uword) 1 << (c0 % BITS (uword));
1847
1848           bm0 = hash_bitmap[c0];
1849           drop0 = (bm0 & m0) != 0;
1850
1851           /* Mark it as seen. */
1852           hash_bitmap[c0] = bm0 | m0;
1853
1854           from += 1;
1855           n_left_from -= 1;
1856           to_next_drop[0] = pi0;
1857           to_next_drop += 1;
1858           n_left_to_next_drop -= 1;
1859
1860           p0->error = node->errors[drop0 ? IP4_ARP_ERROR_DROP : IP4_ARP_ERROR_REQUEST_SENT];
1861
1862           if (drop0)
1863             continue;
1864
1865           /* 
1866            * Can happen if the control-plane is programming tables
1867            * with traffic flowing; at least that's today's lame excuse.
1868            */
1869           if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN) ||
1870               (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
1871           {
1872             p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
1873           }
1874           else
1875           /* Send ARP request. */
1876           {
1877             u32 bi0 = 0;
1878             vlib_buffer_t * b0;
1879             ethernet_arp_header_t * h0;
1880             vnet_hw_interface_t * hw_if0;
1881
1882             h0 = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi0);
1883
1884             /* Add rewrite/encap string for ARP packet. */
1885             vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
1886
1887             hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1888
1889             /* Src ethernet address in ARP header. */
1890             clib_memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address,
1891                     sizeof (h0->ip4_over_ethernet[0].ethernet));
1892
1893             if (is_glean)
1894             {
1895                 /* The interface's source address is stashed in the Glean Adj */
1896                 h0->ip4_over_ethernet[0].ip4 = adj0->sub_type.glean.receive_addr.ip4;
1897
1898                 /* Copy in destination address we are requesting. This is the
1899                 * glean case, so it's the packet's destination.*/
1900                 h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32;
1901             }
1902             else
1903             {
1904                 /* Src IP address in ARP header. */
1905                 if (ip4_src_address_for_packet(lm, sw_if_index0,
1906                                                &h0->ip4_over_ethernet[0].ip4))
1907                 {
1908                     /* No source address available */
1909                     p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
1910                     vlib_buffer_free(vm, &bi0, 1);
1911                     continue;
1912                 }
1913
1914                 /* Copy in destination address we are requesting from the
1915                    incomplete adj */
1916                 h0->ip4_over_ethernet[1].ip4.data_u32 =
1917                     adj0->sub_type.nbr.next_hop.ip4.as_u32;
1918             }
1919
1920             vlib_buffer_copy_trace_flag (vm, p0, bi0);
1921             b0 = vlib_get_buffer (vm, bi0);
1922             vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
1923
1924             vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
1925
1926             vlib_set_next_frame_buffer (vm, node, adj0->rewrite_header.next_index, bi0);
1927           }
1928         }
1929
1930       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
1931     }
1932
1933   return frame->n_vectors;
1934 }
1935
1936 static uword
1937 ip4_arp (vlib_main_t * vm,
1938          vlib_node_runtime_t * node,
1939          vlib_frame_t * frame)
1940 {
1941     return (ip4_arp_inline(vm, node, frame, 0));
1942 }
1943
1944 static uword
1945 ip4_glean (vlib_main_t * vm,
1946            vlib_node_runtime_t * node,
1947            vlib_frame_t * frame)
1948 {
1949     return (ip4_arp_inline(vm, node, frame, 1));
1950 }
1951
1952 static char * ip4_arp_error_strings[] = {
1953   [IP4_ARP_ERROR_DROP] = "address overflow drops",
1954   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
1955   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
1956   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
1957   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
1958   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
1959 };
1960
1961 VLIB_REGISTER_NODE (ip4_arp_node) = {
1962   .function = ip4_arp,
1963   .name = "ip4-arp",
1964   .vector_size = sizeof (u32),
1965
1966   .format_trace = format_ip4_forward_next_trace,
1967
1968   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1969   .error_strings = ip4_arp_error_strings,
1970
1971   .n_next_nodes = IP4_ARP_N_NEXT,
1972   .next_nodes = {
1973     [IP4_ARP_NEXT_DROP] = "error-drop",
1974   },
1975 };
1976
1977 VLIB_REGISTER_NODE (ip4_glean_node) = {
1978   .function = ip4_glean,
1979   .name = "ip4-glean",
1980   .vector_size = sizeof (u32),
1981
1982   .format_trace = format_ip4_forward_next_trace,
1983
1984   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1985   .error_strings = ip4_arp_error_strings,
1986
1987   .n_next_nodes = IP4_ARP_N_NEXT,
1988   .next_nodes = {
1989     [IP4_ARP_NEXT_DROP] = "error-drop",
1990   },
1991 };
1992
1993 #define foreach_notrace_ip4_arp_error           \
1994 _(DROP)                                         \
1995 _(REQUEST_SENT)                                 \
1996 _(REPLICATE_DROP)                               \
1997 _(REPLICATE_FAIL)
1998
1999 clib_error_t * arp_notrace_init (vlib_main_t * vm)
2000 {
2001   vlib_node_runtime_t *rt = 
2002     vlib_node_get_runtime (vm, ip4_arp_node.index);
2003
2004   /* don't trace ARP request packets */
2005 #define _(a)                                    \
2006     vnet_pcap_drop_trace_filter_add_del         \
2007         (rt->errors[IP4_ARP_ERROR_##a],         \
2008          1 /* is_add */);
2009     foreach_notrace_ip4_arp_error;
2010 #undef _
2011   return 0;
2012 }
2013
2014 VLIB_INIT_FUNCTION(arp_notrace_init);
2015
2016
2017 /* Send an ARP request to see if given destination is reachable on given interface. */
2018 clib_error_t *
2019 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2020 {
2021   vnet_main_t * vnm = vnet_get_main();
2022   ip4_main_t * im = &ip4_main;
2023   ethernet_arp_header_t * h;
2024   ip4_address_t * src;
2025   ip_interface_address_t * ia;
2026   ip_adjacency_t * adj;
2027   vnet_hw_interface_t * hi;
2028   vnet_sw_interface_t * si;
2029   vlib_buffer_t * b;
2030   u32 bi = 0;
2031
2032   si = vnet_get_sw_interface (vnm, sw_if_index);
2033
2034   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2035     {
2036       return clib_error_return (0, "%U: interface %U down",
2037                                 format_ip4_address, dst, 
2038                                 format_vnet_sw_if_index_name, vnm, 
2039                                 sw_if_index);
2040     }
2041
2042   src = ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2043   if (! src)
2044     {
2045       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2046       return clib_error_return 
2047         (0, "no matching interface address for destination %U (interface %U)",
2048          format_ip4_address, dst,
2049          format_vnet_sw_if_index_name, vnm, sw_if_index);
2050     }
2051
2052   adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2053
2054   h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi);
2055
2056   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2057
2058   clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
2059
2060   h->ip4_over_ethernet[0].ip4 = src[0];
2061   h->ip4_over_ethernet[1].ip4 = dst[0];
2062
2063   b = vlib_get_buffer (vm, bi);
2064   vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2065
2066   /* Add encapsulation string for software interface (e.g. ethernet header). */
2067   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2068   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2069
2070   {
2071     vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
2072     u32 * to_next = vlib_frame_vector_args (f);
2073     to_next[0] = bi;
2074     f->n_vectors = 1;
2075     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2076   }
2077
2078   return /* no error */ 0;
2079 }
2080
2081 typedef enum {
2082   IP4_REWRITE_NEXT_DROP,
2083   IP4_REWRITE_NEXT_ARP,
2084   IP4_REWRITE_NEXT_ICMP_ERROR,
2085 } ip4_rewrite_next_t;
2086
2087 always_inline uword
2088 ip4_rewrite_inline (vlib_main_t * vm,
2089                     vlib_node_runtime_t * node,
2090                     vlib_frame_t * frame,
2091                     int rewrite_for_locally_received_packets)
2092 {
2093   ip_lookup_main_t * lm = &ip4_main.lookup_main;
2094   u32 * from = vlib_frame_vector_args (frame);
2095   u32 n_left_from, n_left_to_next, * to_next, next_index;
2096   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
2097   vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
2098   ip_config_main_t * cm = &lm->feature_config_mains[VNET_IP_TX_FEAT];
2099
2100   n_left_from = frame->n_vectors;
2101   next_index = node->cached_next_index;
2102   u32 cpu_index = os_get_cpu_number();
2103   
2104   while (n_left_from > 0)
2105     {
2106       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2107
2108       while (n_left_from >= 4 && n_left_to_next >= 2)
2109         {
2110           ip_adjacency_t * adj0, * adj1;
2111           vlib_buffer_t * p0, * p1;
2112           ip4_header_t * ip0, * ip1;
2113           u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2114           u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2115           u32 next0_override, next1_override;
2116           u32 tx_sw_if_index0, tx_sw_if_index1;
2117
2118           if (rewrite_for_locally_received_packets)
2119               next0_override = next1_override = 0;
2120
2121           /* Prefetch next iteration. */
2122           {
2123             vlib_buffer_t * p2, * p3;
2124
2125             p2 = vlib_get_buffer (vm, from[2]);
2126             p3 = vlib_get_buffer (vm, from[3]);
2127
2128             vlib_prefetch_buffer_header (p2, STORE);
2129             vlib_prefetch_buffer_header (p3, STORE);
2130
2131             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2132             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2133           }
2134
2135           pi0 = to_next[0] = from[0];
2136           pi1 = to_next[1] = from[1];
2137
2138           from += 2;
2139           n_left_from -= 2;
2140           to_next += 2;
2141           n_left_to_next -= 2;
2142       
2143           p0 = vlib_get_buffer (vm, pi0);
2144           p1 = vlib_get_buffer (vm, pi1);
2145
2146           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2147           adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
2148
2149           /* We should never rewrite a pkt using the MISS adjacency */
2150           ASSERT(adj_index0 && adj_index1);
2151
2152           ip0 = vlib_buffer_get_current (p0);
2153           ip1 = vlib_buffer_get_current (p1);
2154
2155           error0 = error1 = IP4_ERROR_NONE;
2156           next0 = next1 = IP4_REWRITE_NEXT_DROP;
2157
2158           /* Decrement TTL & update checksum.
2159              Works either endian, so no need for byte swap. */
2160           if (! rewrite_for_locally_received_packets)
2161             {
2162               i32 ttl0 = ip0->ttl, ttl1 = ip1->ttl;
2163
2164               /* Input node should have reject packets with ttl 0. */
2165               ASSERT (ip0->ttl > 0);
2166               ASSERT (ip1->ttl > 0);
2167
2168               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2169               checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2170
2171               checksum0 += checksum0 >= 0xffff;
2172               checksum1 += checksum1 >= 0xffff;
2173
2174               ip0->checksum = checksum0;
2175               ip1->checksum = checksum1;
2176
2177               ttl0 -= 1;
2178               ttl1 -= 1;
2179
2180               ip0->ttl = ttl0;
2181               ip1->ttl = ttl1;
2182
2183               /*
2184                * If the ttl drops below 1 when forwarding, generate
2185                * an ICMP response.
2186                */
2187               if (PREDICT_FALSE(ttl0 <= 0))
2188                 {
2189                   error0 = IP4_ERROR_TIME_EXPIRED;
2190                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2191                   icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2192                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2193                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2194                 }
2195               if (PREDICT_FALSE(ttl1 <= 0))
2196                 {
2197                   error1 = IP4_ERROR_TIME_EXPIRED;
2198                   vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32)~0;
2199                   icmp4_error_set_vnet_buffer(p1, ICMP4_time_exceeded,
2200                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2201                   next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2202                 }
2203
2204               /* Verify checksum. */
2205               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2206               ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2207             }
2208
2209           /* Rewrite packet header and updates lengths. */
2210           adj0 = ip_get_adjacency (lm, adj_index0);
2211           adj1 = ip_get_adjacency (lm, adj_index1);
2212       
2213           if (rewrite_for_locally_received_packets)
2214             {
2215               if (PREDICT_FALSE(adj0->lookup_next_index
2216                                 == IP_LOOKUP_NEXT_ARP))
2217                 next0_override = IP4_REWRITE_NEXT_ARP;
2218               if (PREDICT_FALSE(adj1->lookup_next_index
2219                                 == IP_LOOKUP_NEXT_ARP))
2220                 next1_override = IP4_REWRITE_NEXT_ARP;
2221             }
2222
2223           /* Worth pipelining. No guarantee that adj0,1 are hot... */
2224           rw_len0 = adj0[0].rewrite_header.data_bytes;
2225           rw_len1 = adj1[0].rewrite_header.data_bytes;
2226           vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
2227           vnet_buffer(p1)->ip.save_rewrite_length = rw_len1;
2228
2229           /* Check MTU of outgoing interface. */
2230           error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2231                     ? IP4_ERROR_MTU_EXCEEDED
2232                     : error0);
2233           error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
2234                     ? IP4_ERROR_MTU_EXCEEDED
2235                     : error1);
2236
2237           next0 = (error0 == IP4_ERROR_NONE)
2238             ? adj0[0].rewrite_header.next_index : next0;
2239
2240           if (rewrite_for_locally_received_packets)
2241               next0 = next0 && next0_override ? next0_override : next0;
2242
2243           next1 = (error1 == IP4_ERROR_NONE)
2244             ? adj1[0].rewrite_header.next_index : next1;
2245
2246           if (rewrite_for_locally_received_packets)
2247               next1 = next1 && next1_override ? next1_override : next1;
2248
2249           /* 
2250            * We've already accounted for an ethernet_header_t elsewhere
2251            */
2252           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2253               vlib_increment_combined_counter 
2254                   (&adjacency_counters,
2255                    cpu_index, adj_index0, 
2256                    /* packet increment */ 0,
2257                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2258
2259           if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t)))
2260               vlib_increment_combined_counter 
2261                   (&adjacency_counters,
2262                    cpu_index, adj_index1, 
2263                    /* packet increment */ 0,
2264                    /* byte increment */ rw_len1-sizeof(ethernet_header_t));
2265
2266           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2267            * to see the IP headerr */
2268           if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2269             {
2270               p0->current_data -= rw_len0;
2271               p0->current_length += rw_len0;
2272               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2273               vnet_buffer (p0)->sw_if_index[VLIB_TX] =
2274                   tx_sw_if_index0;
2275
2276               if (PREDICT_FALSE 
2277                   (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features, 
2278                                     tx_sw_if_index0)))
2279                 {
2280                   p0->current_config_index = 
2281                     vec_elt (cm->config_index_by_sw_if_index, 
2282                              tx_sw_if_index0);
2283                   vnet_get_config_data (&cm->config_main,
2284                                         &p0->current_config_index,
2285                                         &next0,
2286                                         /* # bytes of config data */ 0);
2287                 }
2288             }
2289           if (PREDICT_TRUE(error1 == IP4_ERROR_NONE))
2290             {
2291               p1->current_data -= rw_len1;
2292               p1->current_length += rw_len1;
2293
2294               tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2295               vnet_buffer (p1)->sw_if_index[VLIB_TX] =
2296                   tx_sw_if_index1;
2297
2298               if (PREDICT_FALSE 
2299                   (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features, 
2300                                     tx_sw_if_index1)))
2301                 {
2302                   p1->current_config_index = 
2303                     vec_elt (cm->config_index_by_sw_if_index, 
2304                              tx_sw_if_index1);
2305                   vnet_get_config_data (&cm->config_main,
2306                                         &p1->current_config_index,
2307                                         &next1,
2308                                         /* # bytes of config data */ 0);
2309                 }
2310             }
2311
2312           /* Guess we are only writing on simple Ethernet header. */
2313           vnet_rewrite_two_headers (adj0[0], adj1[0],
2314                                     ip0, ip1,
2315                                     sizeof (ethernet_header_t));
2316       
2317           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2318                                            to_next, n_left_to_next,
2319                                            pi0, pi1, next0, next1);
2320         }
2321
2322       while (n_left_from > 0 && n_left_to_next > 0)
2323         {
2324           ip_adjacency_t * adj0;
2325           vlib_buffer_t * p0;
2326           ip4_header_t * ip0;
2327           u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2328           u32 next0_override;
2329           u32 tx_sw_if_index0;
2330
2331           if (rewrite_for_locally_received_packets)
2332               next0_override = 0;
2333
2334           pi0 = to_next[0] = from[0];
2335
2336           p0 = vlib_get_buffer (vm, pi0);
2337
2338           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2339
2340           /* We should never rewrite a pkt using the MISS adjacency */
2341           ASSERT(adj_index0);
2342
2343           adj0 = ip_get_adjacency (lm, adj_index0);
2344       
2345           ip0 = vlib_buffer_get_current (p0);
2346
2347           error0 = IP4_ERROR_NONE;
2348           next0 = IP4_REWRITE_NEXT_DROP;            /* drop on error */
2349
2350           /* Decrement TTL & update checksum. */
2351           if (! rewrite_for_locally_received_packets)
2352             {
2353               i32 ttl0 = ip0->ttl;
2354
2355               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2356
2357               checksum0 += checksum0 >= 0xffff;
2358
2359               ip0->checksum = checksum0;
2360
2361               ASSERT (ip0->ttl > 0);
2362
2363               ttl0 -= 1;
2364
2365               ip0->ttl = ttl0;
2366
2367               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2368
2369               if (PREDICT_FALSE(ttl0 <= 0))
2370                 {
2371                   /*
2372                    * If the ttl drops below 1 when forwarding, generate
2373                    * an ICMP response.
2374                    */
2375                   error0 = IP4_ERROR_TIME_EXPIRED;
2376                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2377                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2378                   icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2379                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2380                 }
2381             }
2382
2383           if (rewrite_for_locally_received_packets)
2384             {
2385               /* 
2386                * We have to override the next_index in ARP adjacencies,
2387                * because they're set up for ip4-arp, not this node...
2388                */
2389               if (PREDICT_FALSE(adj0->lookup_next_index
2390                                 == IP_LOOKUP_NEXT_ARP))
2391                 next0_override = IP4_REWRITE_NEXT_ARP;
2392             }
2393
2394           /* Guess we are only writing on simple Ethernet header. */
2395           vnet_rewrite_one_header (adj0[0], ip0, 
2396                                    sizeof (ethernet_header_t));
2397           
2398           /* Update packet buffer attributes/set output interface. */
2399           rw_len0 = adj0[0].rewrite_header.data_bytes;
2400           vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
2401           
2402           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2403               vlib_increment_combined_counter 
2404                   (&adjacency_counters,
2405                    cpu_index, adj_index0, 
2406                    /* packet increment */ 0,
2407                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2408           
2409           /* Check MTU of outgoing interface. */
2410           error0 = (vlib_buffer_length_in_chain (vm, p0) 
2411                     > adj0[0].rewrite_header.max_l3_packet_bytes
2412                     ? IP4_ERROR_MTU_EXCEEDED
2413                     : error0);
2414
2415           p0->error = error_node->errors[error0];
2416
2417           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2418            * to see the IP headerr */
2419           if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2420             {
2421               p0->current_data -= rw_len0;
2422               p0->current_length += rw_len0;
2423               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2424
2425               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2426               next0 = adj0[0].rewrite_header.next_index;
2427
2428               if (PREDICT_FALSE 
2429                   (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features, 
2430                                     tx_sw_if_index0)))
2431                   {
2432                     p0->current_config_index = 
2433                       vec_elt (cm->config_index_by_sw_if_index, 
2434                                tx_sw_if_index0);
2435                     vnet_get_config_data (&cm->config_main,
2436                                           &p0->current_config_index,
2437                                           &next0,
2438                                           /* # bytes of config data */ 0);
2439                   }
2440             }
2441
2442           if (rewrite_for_locally_received_packets)
2443               next0 = next0 && next0_override ? next0_override : next0;
2444
2445           from += 1;
2446           n_left_from -= 1;
2447           to_next += 1;
2448           n_left_to_next -= 1;
2449       
2450           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2451                                            to_next, n_left_to_next,
2452                                            pi0, next0);
2453         }
2454   
2455       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2456     }
2457
2458   /* Need to do trace after rewrites to pick up new packet data. */
2459   if (node->flags & VLIB_NODE_FLAG_TRACE)
2460     ip4_forward_next_trace (vm, node, frame, adj_rx_tx);
2461
2462   return frame->n_vectors;
2463 }
2464
2465
2466 /** @brief IPv4 transit rewrite node.
2467     @node ip4-rewrite-transit
2468
2469     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2470     header checksum, fetch the ip adjacency, check the outbound mtu,
2471     apply the adjacency rewrite, and send pkts to the adjacency
2472     rewrite header's rewrite_next_index.
2473
2474     @param vm vlib_main_t corresponding to the current thread
2475     @param node vlib_node_runtime_t
2476     @param frame vlib_frame_t whose contents should be dispatched
2477
2478     @par Graph mechanics: buffer metadata, next index usage
2479
2480     @em Uses:
2481     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2482         - the rewrite adjacency index
2483     - <code>adj->lookup_next_index</code>
2484         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2485           the packet will be dropped. 
2486     - <code>adj->rewrite_header</code>
2487         - Rewrite string length, rewrite string, next_index
2488
2489     @em Sets:
2490     - <code>b->current_data, b->current_length</code>
2491         - Updated net of applying the rewrite string
2492
2493     <em>Next Indices:</em>
2494     - <code> adj->rewrite_header.next_index </code>
2495       or @c error-drop 
2496 */
2497 static uword
2498 ip4_rewrite_transit (vlib_main_t * vm,
2499                      vlib_node_runtime_t * node,
2500                      vlib_frame_t * frame)
2501 {
2502   return ip4_rewrite_inline (vm, node, frame,
2503                              /* rewrite_for_locally_received_packets */ 0);
2504 }
2505
2506 /** @brief IPv4 local rewrite node.
2507     @node ip4-rewrite-local
2508
2509     This is the IPv4 local rewrite node. Fetch the ip adjacency, check
2510     the outbound interface mtu, apply the adjacency rewrite, and send
2511     pkts to the adjacency rewrite header's rewrite_next_index. Deal
2512     with hemorrhoids of the form "some clown sends an icmp4 w/ src =
2513     dst = interface addr."
2514
2515     @param vm vlib_main_t corresponding to the current thread
2516     @param node vlib_node_runtime_t
2517     @param frame vlib_frame_t whose contents should be dispatched
2518
2519     @par Graph mechanics: buffer metadata, next index usage
2520
2521     @em Uses:
2522     - <code>vnet_buffer(b)->ip.adj_index[VLIB_RX]</code>
2523         - the rewrite adjacency index
2524     - <code>adj->lookup_next_index</code>
2525         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2526           the packet will be dropped. 
2527     - <code>adj->rewrite_header</code>
2528         - Rewrite string length, rewrite string, next_index
2529
2530     @em Sets:
2531     - <code>b->current_data, b->current_length</code>
2532         - Updated net of applying the rewrite string
2533
2534     <em>Next Indices:</em>
2535     - <code> adj->rewrite_header.next_index </code>
2536       or @c error-drop 
2537 */
2538
2539 static uword
2540 ip4_rewrite_local (vlib_main_t * vm,
2541                    vlib_node_runtime_t * node,
2542                    vlib_frame_t * frame)
2543 {
2544   return ip4_rewrite_inline (vm, node, frame,
2545                              /* rewrite_for_locally_received_packets */ 1);
2546 }
2547
2548 static uword
2549 ip4_midchain (vlib_main_t * vm,
2550               vlib_node_runtime_t * node,
2551               vlib_frame_t * frame)
2552 {
2553   return ip4_rewrite_inline (vm, node, frame,
2554                              /* rewrite_for_locally_received_packets */ 0);
2555 }
2556
2557 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2558   .function = ip4_rewrite_transit,
2559   .name = "ip4-rewrite-transit",
2560   .vector_size = sizeof (u32),
2561
2562   .format_trace = format_ip4_rewrite_trace,
2563
2564   .n_next_nodes = 3,
2565   .next_nodes = {
2566     [IP4_REWRITE_NEXT_DROP] = "error-drop",
2567     [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
2568     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2569   },
2570 };
2571
2572 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite_transit)
2573
2574 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2575   .function = ip4_midchain,
2576   .name = "ip4-midchain",
2577   .vector_size = sizeof (u32),
2578
2579   .format_trace = format_ip4_forward_next_trace,
2580
2581   .n_next_nodes = 2,
2582   .next_nodes = {
2583     [IP4_REWRITE_NEXT_DROP] = "error-drop",
2584     [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
2585   },
2586 };
2587
2588 VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain)
2589
2590 VLIB_REGISTER_NODE (ip4_rewrite_local_node) = {
2591   .function = ip4_rewrite_local,
2592   .name = "ip4-rewrite-local",
2593   .vector_size = sizeof (u32),
2594
2595   .sibling_of = "ip4-rewrite-transit",
2596
2597   .format_trace = format_ip4_rewrite_trace,
2598
2599   .n_next_nodes = 0,
2600 };
2601
2602 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_local_node, ip4_rewrite_local)
2603
2604 static clib_error_t *
2605 add_del_interface_table (vlib_main_t * vm,
2606                          unformat_input_t * input,
2607                          vlib_cli_command_t * cmd)
2608 {
2609   vnet_main_t * vnm = vnet_get_main();
2610   clib_error_t * error = 0;
2611   u32 sw_if_index, table_id;
2612
2613   sw_if_index = ~0;
2614
2615   if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2616     {
2617       error = clib_error_return (0, "unknown interface `%U'",
2618                                  format_unformat_error, input);
2619       goto done;
2620     }
2621
2622   if (unformat (input, "%d", &table_id))
2623     ;
2624   else
2625     {
2626       error = clib_error_return (0, "expected table id `%U'",
2627                                  format_unformat_error, input);
2628       goto done;
2629     }
2630
2631   {
2632     ip4_main_t * im = &ip4_main;
2633     u32 fib_index;
2634
2635     fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
2636                                                    table_id);
2637
2638     //
2639     // FIXME-LATER
2640     //  changing an interface's table has consequences for any connecteds
2641     //  and adj-fibs already installed.
2642     //
2643     vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
2644     im->fib_index_by_sw_if_index[sw_if_index] = fib_index;
2645   }
2646
2647  done:
2648   return error;
2649 }
2650
2651 /*?
2652  * Place the indicated interface into the supplied VRF
2653  *
2654  * @cliexpar
2655  * @cliexstart{set interface ip table}
2656  *
2657  *  vpp# set interface ip table GigabitEthernet2/0/0 2
2658  *
2659  * Interface addresses added after setting the interface IP table end up in the indicated VRF table.
2660  * Predictable but potentially counter-intuitive results occur if you provision interface addresses in multiple FIBs.
2661  * Upon RX, packets will be processed in the last IP table ID provisioned.
2662  * It might be marginally useful to evade source RPF drops to put an interface address into multiple FIBs.
2663  * @cliexend
2664  ?*/
2665 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
2666   .path = "set interface ip table",
2667   .function = add_del_interface_table,
2668   .short_help = "Add/delete FIB table id for interface",
2669 };
2670
2671
2672 static uword
2673 ip4_lookup_multicast (vlib_main_t * vm,
2674                       vlib_node_runtime_t * node,
2675                       vlib_frame_t * frame)
2676 {
2677   ip4_main_t * im = &ip4_main;
2678   vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
2679   u32 n_left_from, n_left_to_next, * from, * to_next;
2680   ip_lookup_next_t next;
2681   u32 cpu_index = os_get_cpu_number();
2682
2683   from = vlib_frame_vector_args (frame);
2684   n_left_from = frame->n_vectors;
2685   next = node->cached_next_index;
2686
2687   while (n_left_from > 0)
2688     {
2689       vlib_get_next_frame (vm, node, next,
2690                            to_next, n_left_to_next);
2691
2692       while (n_left_from >= 4 && n_left_to_next >= 2)
2693         {
2694           vlib_buffer_t * p0, * p1;
2695           u32 pi0, pi1, lb_index0, lb_index1, wrong_next;
2696           ip_lookup_next_t next0, next1;
2697           ip4_header_t * ip0, * ip1;
2698           u32 fib_index0, fib_index1;
2699           const dpo_id_t *dpo0, *dpo1;
2700           const load_balance_t * lb0, * lb1;
2701
2702           /* Prefetch next iteration. */
2703           {
2704             vlib_buffer_t * p2, * p3;
2705
2706             p2 = vlib_get_buffer (vm, from[2]);
2707             p3 = vlib_get_buffer (vm, from[3]);
2708
2709             vlib_prefetch_buffer_header (p2, LOAD);
2710             vlib_prefetch_buffer_header (p3, LOAD);
2711
2712             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
2713             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
2714           }
2715
2716           pi0 = to_next[0] = from[0];
2717           pi1 = to_next[1] = from[1];
2718
2719           p0 = vlib_get_buffer (vm, pi0);
2720           p1 = vlib_get_buffer (vm, pi1);
2721
2722           ip0 = vlib_buffer_get_current (p0);
2723           ip1 = vlib_buffer_get_current (p1);
2724
2725           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2726           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
2727           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2728             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2729           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
2730             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
2731
2732           lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0),
2733                                                &ip0->dst_address);
2734           lb_index1 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index1),
2735                                                &ip1->dst_address);
2736
2737           lb0 = load_balance_get (lb_index0);
2738           lb1 = load_balance_get (lb_index1);
2739
2740           ASSERT (lb0->lb_n_buckets > 0);
2741           ASSERT (is_pow2 (lb0->lb_n_buckets));
2742           ASSERT (lb1->lb_n_buckets > 0);
2743           ASSERT (is_pow2 (lb1->lb_n_buckets));
2744
2745           vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash 
2746               (ip0, lb0->lb_hash_config);
2747                                                                   
2748           vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash 
2749               (ip1, lb1->lb_hash_config);
2750
2751           dpo0 = load_balance_get_bucket_i(lb0,
2752                                            (vnet_buffer (p0)->ip.flow_hash &
2753                                             (lb0->lb_n_buckets_minus_1)));
2754           dpo1 = load_balance_get_bucket_i(lb1,
2755                                            (vnet_buffer (p1)->ip.flow_hash &
2756                                             (lb0->lb_n_buckets_minus_1)));
2757
2758           next0 = dpo0->dpoi_next_node;
2759           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
2760           next1 = dpo1->dpoi_next_node;
2761           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
2762
2763           if (1) /* $$$$$$ HACK FIXME */
2764           vlib_increment_combined_counter 
2765               (cm, cpu_index, lb_index0, 1,
2766                vlib_buffer_length_in_chain (vm, p0));
2767           if (1) /* $$$$$$ HACK FIXME */
2768           vlib_increment_combined_counter 
2769               (cm, cpu_index, lb_index1, 1,
2770                vlib_buffer_length_in_chain (vm, p1));
2771
2772           from += 2;
2773           to_next += 2;
2774           n_left_to_next -= 2;
2775           n_left_from -= 2;
2776
2777           wrong_next = (next0 != next) + 2*(next1 != next);
2778           if (PREDICT_FALSE (wrong_next != 0))
2779             {
2780               switch (wrong_next)
2781                 {
2782                 case 1:
2783                   /* A B A */
2784                   to_next[-2] = pi1;
2785                   to_next -= 1;
2786                   n_left_to_next += 1;
2787                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2788                   break;
2789
2790                 case 2:
2791                   /* A A B */
2792                   to_next -= 1;
2793                   n_left_to_next += 1;
2794                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2795                   break;
2796
2797                 case 3:
2798                   /* A B C */
2799                   to_next -= 2;
2800                   n_left_to_next += 2;
2801                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2802                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2803                   if (next0 == next1)
2804                     {
2805                       /* A B B */
2806                       vlib_put_next_frame (vm, node, next, n_left_to_next);
2807                       next = next1;
2808                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
2809                     }
2810                 }
2811             }
2812         }
2813     
2814       while (n_left_from > 0 && n_left_to_next > 0)
2815         {
2816           vlib_buffer_t * p0;
2817           ip4_header_t * ip0;
2818           u32 pi0, lb_index0;
2819           ip_lookup_next_t next0;
2820           u32 fib_index0;
2821           const dpo_id_t *dpo0;
2822           const load_balance_t * lb0;
2823
2824           pi0 = from[0];
2825           to_next[0] = pi0;
2826
2827           p0 = vlib_get_buffer (vm, pi0);
2828
2829           ip0 = vlib_buffer_get_current (p0);
2830
2831           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
2832                                 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2833           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2834               fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2835           
2836           lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0),
2837                                                &ip0->dst_address);
2838
2839           lb0 = load_balance_get (lb_index0);
2840
2841           ASSERT (lb0->lb_n_buckets > 0);
2842           ASSERT (is_pow2 (lb0->lb_n_buckets));
2843
2844           vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash 
2845               (ip0, lb0->lb_hash_config);
2846
2847           dpo0 = load_balance_get_bucket_i(lb0,
2848                                            (vnet_buffer (p0)->ip.flow_hash &
2849                                             (lb0->lb_n_buckets_minus_1)));
2850
2851           next0 = dpo0->dpoi_next_node;
2852           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
2853
2854           if (1) /* $$$$$$ HACK FIXME */
2855               vlib_increment_combined_counter 
2856                   (cm, cpu_index, lb_index0, 1,
2857                    vlib_buffer_length_in_chain (vm, p0));
2858
2859           from += 1;
2860           to_next += 1;
2861           n_left_to_next -= 1;
2862           n_left_from -= 1;
2863
2864           if (PREDICT_FALSE (next0 != next))
2865             {
2866               n_left_to_next += 1;
2867               vlib_put_next_frame (vm, node, next, n_left_to_next);
2868               next = next0;
2869               vlib_get_next_frame (vm, node, next,
2870                                    to_next, n_left_to_next);
2871               to_next[0] = pi0;
2872               to_next += 1;
2873               n_left_to_next -= 1;
2874             }
2875         }
2876
2877       vlib_put_next_frame (vm, node, next, n_left_to_next);
2878     }
2879
2880   if (node->flags & VLIB_NODE_FLAG_TRACE)
2881       ip4_forward_next_trace(vm, node, frame, VLIB_TX);
2882
2883   return frame->n_vectors;
2884 }
2885
2886 VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = {
2887   .function = ip4_lookup_multicast,
2888   .name = "ip4-lookup-multicast",
2889   .vector_size = sizeof (u32),
2890   .sibling_of = "ip4-lookup",
2891   .format_trace = format_ip4_lookup_trace,
2892
2893   .n_next_nodes = 0,
2894 };
2895
2896 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_multicast_node, ip4_lookup_multicast)
2897
2898 VLIB_REGISTER_NODE (ip4_multicast_node,static) = {
2899   .function = ip4_drop,
2900   .name = "ip4-multicast",
2901   .vector_size = sizeof (u32),
2902
2903   .format_trace = format_ip4_forward_next_trace,
2904
2905   .n_next_nodes = 1,
2906   .next_nodes = {
2907     [0] = "error-drop",
2908   },
2909 };
2910
2911 int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0)
2912 {
2913   ip4_fib_mtrie_t * mtrie0;
2914   ip4_fib_mtrie_leaf_t leaf0;
2915   u32 lbi0;
2916     
2917   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2918
2919   leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
2920   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
2921   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
2922   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2923   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2924   
2925   /* Handle default route. */
2926   leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
2927   
2928   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2929   
2930   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0), a);
2931 }
2932  
2933 static clib_error_t *
2934 test_lookup_command_fn (vlib_main_t * vm,
2935                         unformat_input_t * input,
2936                         vlib_cli_command_t * cmd)
2937 {
2938   u32 table_id = 0;
2939   f64 count = 1;
2940   u32 n;
2941   int i;
2942   ip4_address_t ip4_base_address;
2943   u64 errors = 0;
2944
2945   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
2946       if (unformat (input, "table %d", &table_id))
2947         ;
2948       else if (unformat (input, "count %f", &count))
2949         ;
2950
2951       else if (unformat (input, "%U",
2952                          unformat_ip4_address, &ip4_base_address))
2953         ;
2954       else
2955         return clib_error_return (0, "unknown input `%U'",
2956                                   format_unformat_error, input);
2957   }
2958
2959   n = count;
2960
2961   for (i = 0; i < n; i++)
2962     {
2963       if (!ip4_lookup_validate (&ip4_base_address, table_id))
2964         errors++;
2965
2966       ip4_base_address.as_u32 = 
2967         clib_host_to_net_u32 (1 + 
2968                               clib_net_to_host_u32 (ip4_base_address.as_u32));
2969     }
2970
2971   if (errors) 
2972     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2973   else
2974     vlib_cli_output (vm, "No errors in %d lookups\n", n);
2975
2976   return 0;
2977 }
2978
2979 VLIB_CLI_COMMAND (lookup_test_command, static) = {
2980     .path = "test lookup",
2981     .short_help = "test lookup",
2982     .function = test_lookup_command_fn,
2983 };
2984
2985 int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2986 {
2987   ip4_main_t * im4 = &ip4_main;
2988   ip4_fib_t * fib;
2989   uword * p = hash_get (im4->fib_index_by_table_id, table_id);
2990
2991   if (p == 0)
2992     return VNET_API_ERROR_NO_SUCH_FIB;
2993
2994   fib = ip4_fib_get (p[0]);
2995
2996   fib->flow_hash_config = flow_hash_config;
2997   return 0;
2998 }
2999  
3000 static clib_error_t *
3001 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3002                              unformat_input_t * input,
3003                              vlib_cli_command_t * cmd)
3004 {
3005   int matched = 0;
3006   u32 table_id = 0;
3007   u32 flow_hash_config = 0;
3008   int rv;
3009
3010   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3011     if (unformat (input, "table %d", &table_id))
3012       matched = 1;
3013 #define _(a,v) \
3014     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3015     foreach_flow_hash_bit
3016 #undef _
3017     else break;
3018   }
3019   
3020   if (matched == 0)
3021     return clib_error_return (0, "unknown input `%U'",
3022                               format_unformat_error, input);
3023   
3024   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3025   switch (rv)
3026     {
3027     case 0:
3028       break;
3029       
3030     case VNET_API_ERROR_NO_SUCH_FIB:
3031       return clib_error_return (0, "no such FIB table %d", table_id);
3032       
3033     default:
3034       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3035       break;
3036     }
3037   
3038   return 0;
3039 }
3040  
3041 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
3042   .path = "set ip flow-hash",
3043   .short_help = 
3044   "set ip table flow-hash table <fib-id> src dst sport dport proto reverse",
3045   .function = set_ip_flow_hash_command_fn,
3046 };
3047  
3048 int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index, 
3049                                  u32 table_index)
3050 {
3051   vnet_main_t * vnm = vnet_get_main();
3052   vnet_interface_main_t * im = &vnm->interface_main;
3053   ip4_main_t * ipm = &ip4_main;
3054   ip_lookup_main_t * lm = &ipm->lookup_main;
3055   vnet_classify_main_t * cm = &vnet_classify_main;
3056
3057   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3058     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3059
3060   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3061     return VNET_API_ERROR_NO_SUCH_ENTRY;
3062
3063   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3064   lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
3065
3066   return 0;
3067 }
3068
3069 static clib_error_t *
3070 set_ip_classify_command_fn (vlib_main_t * vm,
3071                             unformat_input_t * input,
3072                             vlib_cli_command_t * cmd)
3073 {
3074   u32 table_index = ~0;
3075   int table_index_set = 0;
3076   u32 sw_if_index = ~0;
3077   int rv;
3078   
3079   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3080     if (unformat (input, "table-index %d", &table_index))
3081       table_index_set = 1;
3082     else if (unformat (input, "intfc %U", unformat_vnet_sw_interface, 
3083                        vnet_get_main(), &sw_if_index))
3084       ;
3085     else
3086       break;
3087   }
3088       
3089   if (table_index_set == 0)
3090     return clib_error_return (0, "classify table-index must be specified");
3091
3092   if (sw_if_index == ~0)
3093     return clib_error_return (0, "interface / subif must be specified");
3094
3095   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3096
3097   switch (rv)
3098     {
3099     case 0:
3100       break;
3101
3102     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3103       return clib_error_return (0, "No such interface");
3104
3105     case VNET_API_ERROR_NO_SUCH_ENTRY:
3106       return clib_error_return (0, "No such classifier table");
3107     }
3108   return 0;
3109 }
3110
3111 VLIB_CLI_COMMAND (set_ip_classify_command, static) = {
3112     .path = "set ip classify",
3113     .short_help = 
3114     "set ip classify intfc <int> table-index <index>",
3115     .function = set_ip_classify_command_fn,
3116 };
3117