e997366c9834004f579a62ea9116d8876937d9af
[vpp.git] / vnet / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h>     /* for API error numbers */
47 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
48 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_urpf_list.h> /* for FIB uRPF check */
50 #include <vnet/fib/ip4_fib.h>
51 #include <vnet/dpo/load_balance.h>
52 #include <vnet/dpo/classify_dpo.h>
53
54 void
55 ip4_forward_next_trace (vlib_main_t * vm,
56                         vlib_node_runtime_t * node,
57                         vlib_frame_t * frame,
58                         vlib_rx_or_tx_t which_adj_index);
59
60 always_inline uword
61 ip4_lookup_inline (vlib_main_t * vm,
62                    vlib_node_runtime_t * node,
63                    vlib_frame_t * frame,
64                    int lookup_for_responses_to_locally_received_packets)
65 {
66   ip4_main_t * im = &ip4_main;
67   vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
68   u32 n_left_from, n_left_to_next, * from, * to_next;
69   ip_lookup_next_t next;
70   u32 cpu_index = os_get_cpu_number();
71
72   from = vlib_frame_vector_args (frame);
73   n_left_from = frame->n_vectors;
74   next = node->cached_next_index;
75
76   while (n_left_from > 0)
77     {
78       vlib_get_next_frame (vm, node, next,
79                            to_next, n_left_to_next);
80
81       while (n_left_from >= 4 && n_left_to_next >= 2)
82         {
83           vlib_buffer_t * p0, * p1;
84           ip4_header_t * ip0, * ip1;
85           __attribute__((unused)) tcp_header_t * tcp0, * tcp1;
86           ip_lookup_next_t next0, next1;
87           const load_balance_t * lb0, * lb1;
88           ip4_fib_mtrie_t * mtrie0, * mtrie1;
89           ip4_fib_mtrie_leaf_t leaf0, leaf1;
90           ip4_address_t * dst_addr0, *dst_addr1;
91           __attribute__((unused)) u32 pi0, fib_index0, lb_index0, is_tcp_udp0;
92           __attribute__((unused)) u32 pi1, fib_index1, lb_index1, is_tcp_udp1;
93           flow_hash_config_t flow_hash_config0, flow_hash_config1;
94           u32 hash_c0, hash_c1;
95           u32 wrong_next;
96           const dpo_id_t *dpo0, *dpo1;
97
98           /* Prefetch next iteration. */
99           {
100             vlib_buffer_t * p2, * p3;
101
102             p2 = vlib_get_buffer (vm, from[2]);
103             p3 = vlib_get_buffer (vm, from[3]);
104
105             vlib_prefetch_buffer_header (p2, LOAD);
106             vlib_prefetch_buffer_header (p3, LOAD);
107
108             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
109             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
110           }
111
112           pi0 = to_next[0] = from[0];
113           pi1 = to_next[1] = from[1];
114
115           p0 = vlib_get_buffer (vm, pi0);
116           p1 = vlib_get_buffer (vm, pi1);
117
118           ip0 = vlib_buffer_get_current (p0);
119           ip1 = vlib_buffer_get_current (p1);
120
121           dst_addr0 = &ip0->dst_address;
122           dst_addr1 = &ip1->dst_address;
123
124           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
125           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
126           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
127             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
128           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
129             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
130
131
132           if (! lookup_for_responses_to_locally_received_packets)
133             {
134               mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
135               mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
136
137               leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
138
139               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
140               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
141             }
142
143           tcp0 = (void *) (ip0 + 1);
144           tcp1 = (void *) (ip1 + 1);
145
146           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
147                          || ip0->protocol == IP_PROTOCOL_UDP);
148           is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
149                          || ip1->protocol == IP_PROTOCOL_UDP);
150
151           if (! lookup_for_responses_to_locally_received_packets)
152             {
153               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
154               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
155             }
156
157           if (! lookup_for_responses_to_locally_received_packets)
158             {
159               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
160               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
161             }
162
163           if (! lookup_for_responses_to_locally_received_packets)
164             {
165               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
166               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
167             }
168
169           if (lookup_for_responses_to_locally_received_packets)
170             {
171               lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
172               lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
173             }
174           else
175             {
176               /* Handle default route. */
177               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
178               leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
179
180               lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
181               lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
182             }
183
184           lb0 = load_balance_get (lb_index0);
185           lb1 = load_balance_get (lb_index1);
186
187           /* Use flow hash to compute multipath adjacency. */
188           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
189           hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
190           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
191             {
192               flow_hash_config0 = lb0->lb_hash_config;
193               hash_c0 = vnet_buffer (p0)->ip.flow_hash =
194                 ip4_compute_flow_hash (ip0, flow_hash_config0);
195             }
196           if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
197             {
198               flow_hash_config1 = lb1->lb_hash_config;
199               hash_c1 = vnet_buffer (p1)->ip.flow_hash =
200                 ip4_compute_flow_hash (ip1, flow_hash_config1);
201             }
202
203           ASSERT (lb0->lb_n_buckets > 0);
204           ASSERT (is_pow2 (lb0->lb_n_buckets));
205           ASSERT (lb1->lb_n_buckets > 0);
206           ASSERT (is_pow2 (lb1->lb_n_buckets));
207
208           dpo0 = load_balance_get_bucket_i(lb0,
209                                            (hash_c0 &
210                                             (lb0->lb_n_buckets_minus_1)));
211           dpo1 = load_balance_get_bucket_i(lb1,
212                                            (hash_c1 &
213                                             (lb0->lb_n_buckets_minus_1)));
214
215           next0 = dpo0->dpoi_next_node;
216           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
217           next1 = dpo1->dpoi_next_node;
218           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
219
220           vlib_increment_combined_counter
221               (cm, cpu_index, lb_index0, 1,
222                vlib_buffer_length_in_chain (vm, p0)
223                + sizeof(ethernet_header_t));
224           vlib_increment_combined_counter
225               (cm, cpu_index, lb_index1, 1,
226                vlib_buffer_length_in_chain (vm, p1)
227                + sizeof(ethernet_header_t));
228
229           from += 2;
230           to_next += 2;
231           n_left_to_next -= 2;
232           n_left_from -= 2;
233
234           wrong_next = (next0 != next) + 2*(next1 != next);
235           if (PREDICT_FALSE (wrong_next != 0))
236             {
237               switch (wrong_next)
238                 {
239                 case 1:
240                   /* A B A */
241                   to_next[-2] = pi1;
242                   to_next -= 1;
243                   n_left_to_next += 1;
244                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
245                   break;
246
247                 case 2:
248                   /* A A B */
249                   to_next -= 1;
250                   n_left_to_next += 1;
251                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
252                   break;
253
254                 case 3:
255                   /* A B C */
256                   to_next -= 2;
257                   n_left_to_next += 2;
258                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
259                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
260                   if (next0 == next1)
261                     {
262                       /* A B B */
263                       vlib_put_next_frame (vm, node, next, n_left_to_next);
264                       next = next1;
265                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
266                     }
267                 }
268             }
269         }
270     
271       while (n_left_from > 0 && n_left_to_next > 0)
272         {
273           vlib_buffer_t * p0;
274           ip4_header_t * ip0;
275           __attribute__((unused)) tcp_header_t * tcp0;
276           ip_lookup_next_t next0;
277           const load_balance_t *lb0;
278           ip4_fib_mtrie_t * mtrie0;
279           ip4_fib_mtrie_leaf_t leaf0;
280           ip4_address_t * dst_addr0;
281           __attribute__((unused)) u32 pi0, fib_index0, is_tcp_udp0, lbi0;
282           flow_hash_config_t flow_hash_config0;
283           const dpo_id_t *dpo0;
284           u32 hash_c0;
285
286           pi0 = from[0];
287           to_next[0] = pi0;
288
289           p0 = vlib_get_buffer (vm, pi0);
290
291           ip0 = vlib_buffer_get_current (p0);
292
293           dst_addr0 = &ip0->dst_address;
294
295           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
296           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
297             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
298
299           if (! lookup_for_responses_to_locally_received_packets)
300             {
301               mtrie0 = &ip4_fib_get( fib_index0)->mtrie;
302
303               leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
304
305               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
306             }
307
308           tcp0 = (void *) (ip0 + 1);
309
310           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
311                          || ip0->protocol == IP_PROTOCOL_UDP);
312
313           if (! lookup_for_responses_to_locally_received_packets)
314             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
315
316           if (! lookup_for_responses_to_locally_received_packets)
317             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
318
319           if (! lookup_for_responses_to_locally_received_packets)
320             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
321
322           if (lookup_for_responses_to_locally_received_packets)
323             lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
324           else
325             {
326               /* Handle default route. */
327               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
328               lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
329             }
330
331           lb0 = load_balance_get (lbi0);
332
333           /* Use flow hash to compute multipath adjacency. */
334           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
335           if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
336             {
337               flow_hash_config0 = lb0->lb_hash_config;
338
339               hash_c0 = vnet_buffer (p0)->ip.flow_hash = 
340                 ip4_compute_flow_hash (ip0, flow_hash_config0);
341             }
342
343           ASSERT (lb0->lb_n_buckets > 0);
344           ASSERT (is_pow2 (lb0->lb_n_buckets));
345
346           dpo0 = load_balance_get_bucket_i(lb0,
347                                            (hash_c0 &
348                                             (lb0->lb_n_buckets_minus_1)));
349
350           next0 = dpo0->dpoi_next_node;
351           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
352
353           vlib_increment_combined_counter 
354               (cm, cpu_index, lbi0, 1,
355                vlib_buffer_length_in_chain (vm, p0));
356
357           from += 1;
358           to_next += 1;
359           n_left_to_next -= 1;
360           n_left_from -= 1;
361
362           if (PREDICT_FALSE (next0 != next))
363             {
364               n_left_to_next += 1;
365               vlib_put_next_frame (vm, node, next, n_left_to_next);
366               next = next0;
367               vlib_get_next_frame (vm, node, next,
368                                    to_next, n_left_to_next);
369               to_next[0] = pi0;
370               to_next += 1;
371               n_left_to_next -= 1;
372             }
373         }
374
375       vlib_put_next_frame (vm, node, next, n_left_to_next);
376     }
377
378   if (node->flags & VLIB_NODE_FLAG_TRACE)
379     ip4_forward_next_trace(vm, node, frame, VLIB_TX);
380
381   return frame->n_vectors;
382 }
383
384 /** @brief IPv4 lookup node.
385     @node ip4-lookup
386
387     This is the main IPv4 lookup dispatch node.
388
389     @param vm vlib_main_t corresponding to the current thread
390     @param node vlib_node_runtime_t
391     @param frame vlib_frame_t whose contents should be dispatched
392
393     @par Graph mechanics: buffer metadata, next index usage
394
395     @em Uses:
396     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
397         - Indicates the @c sw_if_index value of the interface that the
398           packet was received on.
399     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
400         - When the value is @c ~0 then the node performs a longest prefix
401           match (LPM) for the packet destination address in the FIB attached
402           to the receive interface.
403         - Otherwise perform LPM for the packet destination address in the
404           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
405           value (0, 1, ...) and not a VRF id.
406
407     @em Sets:
408     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
409         - The lookup result adjacency index.
410
411     <em>Next Index:</em>
412     - Dispatches the packet to the node index found in
413       ip_adjacency_t @c adj->lookup_next_index
414       (where @c adj is the lookup result adjacency).
415 */
416 static uword
417 ip4_lookup (vlib_main_t * vm,
418             vlib_node_runtime_t * node,
419             vlib_frame_t * frame)
420 {
421   return ip4_lookup_inline (vm, node, frame,
422                             /* lookup_for_responses_to_locally_received_packets */ 0);
423
424 }
425
426 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args);
427
428 VLIB_REGISTER_NODE (ip4_lookup_node) = {
429   .function = ip4_lookup,
430   .name = "ip4-lookup",
431   .vector_size = sizeof (u32),
432
433   .format_trace = format_ip4_lookup_trace,
434   .n_next_nodes = IP_LOOKUP_N_NEXT,
435   .next_nodes = IP4_LOOKUP_NEXT_NODES,
436 };
437
438 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup)
439
440 always_inline uword
441 ip4_load_balance (vlib_main_t * vm,
442                   vlib_node_runtime_t * node,
443                   vlib_frame_t * frame)
444 {
445   vlib_combined_counter_main_t * cm = &load_balance_main.lbm_via_counters;
446   u32 n_left_from, n_left_to_next, * from, * to_next;
447   ip_lookup_next_t next;
448   u32 cpu_index = os_get_cpu_number();
449
450   from = vlib_frame_vector_args (frame);
451   n_left_from = frame->n_vectors;
452   next = node->cached_next_index;
453
454   if (node->flags & VLIB_NODE_FLAG_TRACE)
455       ip4_forward_next_trace(vm, node, frame, VLIB_TX);
456
457   while (n_left_from > 0)
458     {
459       vlib_get_next_frame (vm, node, next,
460                            to_next, n_left_to_next);
461
462     
463       while (n_left_from > 0 && n_left_to_next > 0)
464         {
465           ip_lookup_next_t next0;
466           const load_balance_t *lb0;
467           vlib_buffer_t * p0;
468           u32 pi0, lbi0, hc0;
469           const ip4_header_t *ip0;
470           const dpo_id_t *dpo0;
471
472           pi0 = from[0];
473           to_next[0] = pi0;
474
475           p0 = vlib_get_buffer (vm, pi0);
476
477           ip0 = vlib_buffer_get_current (p0);
478           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
479
480           lb0 = load_balance_get(lbi0);
481           hc0 = lb0->lb_hash_config;
482           vnet_buffer(p0)->ip.flow_hash = ip4_compute_flow_hash(ip0, hc0);
483
484           dpo0 = load_balance_get_bucket_i(lb0, 
485                                            vnet_buffer(p0)->ip.flow_hash &
486                                            (lb0->lb_n_buckets_minus_1));
487
488           next0 = dpo0->dpoi_next_node;
489           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
490
491           vlib_increment_combined_counter 
492               (cm, cpu_index, lbi0, 1,
493                vlib_buffer_length_in_chain (vm, p0));
494
495           from += 1;
496           to_next += 1;
497           n_left_to_next -= 1;
498           n_left_from -= 1;
499
500           if (PREDICT_FALSE (next0 != next))
501             {
502               n_left_to_next += 1;
503               vlib_put_next_frame (vm, node, next, n_left_to_next);
504               next = next0;
505               vlib_get_next_frame (vm, node, next,
506                                    to_next, n_left_to_next);
507               to_next[0] = pi0;
508               to_next += 1;
509               n_left_to_next -= 1;
510             }
511         }
512
513       vlib_put_next_frame (vm, node, next, n_left_to_next);
514     }
515
516   return frame->n_vectors;
517 }
518
519 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args);
520
521 VLIB_REGISTER_NODE (ip4_load_balance_node) = {
522   .function = ip4_load_balance,
523   .name = "ip4-load-balance",
524   .vector_size = sizeof (u32),
525   .sibling_of = "ip4-lookup",
526
527   .format_trace = format_ip4_forward_next_trace,
528 };
529
530 VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance)
531
532 /* get first interface address */
533 ip4_address_t *
534 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
535                              ip_interface_address_t ** result_ia)
536 {
537   ip_lookup_main_t * lm = &im->lookup_main;
538   ip_interface_address_t * ia = 0;
539   ip4_address_t * result = 0;
540
541   foreach_ip_interface_address (lm, ia, sw_if_index, 
542                                 1 /* honor unnumbered */,
543   ({
544     ip4_address_t * a = ip_interface_address_get_address (lm, ia);
545     result = a;
546     break;
547   }));
548   if (result_ia)
549     *result_ia = result ? ia : 0;
550   return result;
551 }
552
553 static void
554 ip4_add_interface_routes (u32 sw_if_index,
555                           ip4_main_t * im, u32 fib_index,
556                           ip_interface_address_t * a)
557 {
558   ip_lookup_main_t * lm = &im->lookup_main;
559   ip4_address_t * address = ip_interface_address_get_address (lm, a);
560   fib_prefix_t pfx = {
561       .fp_len = a->address_length,
562       .fp_proto = FIB_PROTOCOL_IP4,
563       .fp_addr.ip4 = *address,
564   };
565
566   a->neighbor_probe_adj_index = ~0;
567
568   if (pfx.fp_len < 32)
569   {
570       fib_node_index_t fei;
571
572       fei = fib_table_entry_update_one_path(fib_index,
573                                             &pfx,
574                                             FIB_SOURCE_INTERFACE,
575                                             (FIB_ENTRY_FLAG_CONNECTED |
576                                              FIB_ENTRY_FLAG_ATTACHED),
577                                             FIB_PROTOCOL_IP4,
578                                             NULL, /* No next-hop address */
579                                             sw_if_index,
580                                             ~0, // invalid FIB index
581                                             1,
582                                             MPLS_LABEL_INVALID,
583                                             FIB_ROUTE_PATH_FLAG_NONE);
584       a->neighbor_probe_adj_index = fib_entry_get_adj(fei);
585   }
586
587   pfx.fp_len = 32;
588
589   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
590   {
591       u32 classify_table_index =
592           lm->classify_table_index_by_sw_if_index [sw_if_index];
593       if (classify_table_index != (u32) ~0)
594       {
595           dpo_id_t dpo = DPO_NULL;
596
597           dpo_set(&dpo,
598                   DPO_CLASSIFY,
599                   DPO_PROTO_IP4,
600                   classify_dpo_create(FIB_PROTOCOL_IP4,
601                                       classify_table_index));
602
603           fib_table_entry_special_dpo_add(fib_index,
604                                           &pfx,
605                                           FIB_SOURCE_CLASSIFY,
606                                           FIB_ENTRY_FLAG_NONE,
607                                           &dpo);
608           dpo_reset(&dpo);
609       }
610   }
611
612   fib_table_entry_update_one_path(fib_index,
613                                   &pfx,
614                                   FIB_SOURCE_INTERFACE,
615                                   (FIB_ENTRY_FLAG_CONNECTED |
616                                    FIB_ENTRY_FLAG_LOCAL),
617                                   FIB_PROTOCOL_IP4,
618                                   &pfx.fp_addr,
619                                   sw_if_index,
620                                   ~0, // invalid FIB index
621                                   1,
622                                   MPLS_LABEL_INVALID,
623                                   FIB_ROUTE_PATH_FLAG_NONE);
624 }
625
626 static void
627 ip4_del_interface_routes (ip4_main_t * im,
628                           u32 fib_index,
629                           ip4_address_t * address,
630                           u32 address_length)
631 {
632     fib_prefix_t pfx = {
633         .fp_len = address_length,
634         .fp_proto = FIB_PROTOCOL_IP4,
635         .fp_addr.ip4 = *address,
636     };
637
638     if (pfx.fp_len < 32)
639     {
640         fib_table_entry_delete(fib_index,
641                                &pfx,
642                                FIB_SOURCE_INTERFACE);
643     }
644
645     pfx.fp_len = 32;
646     fib_table_entry_delete(fib_index,
647                            &pfx,
648                            FIB_SOURCE_INTERFACE);
649 }
650
651 void
652 ip4_sw_interface_enable_disable (u32 sw_if_index,
653                                  u32 is_enable)
654 {
655   vlib_main_t * vm = vlib_get_main();
656   ip4_main_t * im = &ip4_main;
657   ip_lookup_main_t * lm = &im->lookup_main;
658   u32 ci, cast;
659   u32 lookup_feature_index;
660
661   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
662
663   /*
664    * enable/disable only on the 1<->0 transition
665    */
666   if (is_enable)
667     {
668       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
669         return;
670     }
671   else
672     {
673       ASSERT(im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
674       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
675         return;
676     }
677
678   for (cast = 0; cast <= VNET_IP_RX_MULTICAST_FEAT; cast++)
679     {
680       ip_config_main_t * cm = &lm->feature_config_mains[cast];
681       vnet_config_main_t * vcm = &cm->config_main;
682
683       vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
684       ci = cm->config_index_by_sw_if_index[sw_if_index];
685
686       if (cast == VNET_IP_RX_UNICAST_FEAT)
687         lookup_feature_index = im->ip4_unicast_rx_feature_lookup;
688       else
689         lookup_feature_index = im->ip4_multicast_rx_feature_lookup;
690
691       if (is_enable)
692         ci = vnet_config_add_feature (vm, vcm,
693                                       ci,
694                                       lookup_feature_index,
695                                       /* config data */ 0,
696                                       /* # bytes of config data */ 0);
697       else
698         ci = vnet_config_del_feature (vm, vcm,
699                                       ci,
700                                       lookup_feature_index,
701                                       /* config data */ 0,
702                                       /* # bytes of config data */ 0);
703       cm->config_index_by_sw_if_index[sw_if_index] = ci;
704     }
705 }
706
707 static clib_error_t *
708 ip4_add_del_interface_address_internal (vlib_main_t * vm,
709                                         u32 sw_if_index,
710                                         ip4_address_t * address,
711                                         u32 address_length,
712                                         u32 is_del)
713 {
714   vnet_main_t * vnm = vnet_get_main();
715   ip4_main_t * im = &ip4_main;
716   ip_lookup_main_t * lm = &im->lookup_main;
717   clib_error_t * error = 0;
718   u32 if_address_index, elts_before;
719   ip4_address_fib_t ip4_af, * addr_fib = 0;
720
721   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
722   ip4_addr_fib_init (&ip4_af, address,
723                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
724   vec_add1 (addr_fib, ip4_af);
725
726   /* FIXME-LATER
727    * there is no support for adj-fib handling in the presence of overlapping
728    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
729    * most routers do.
730    */
731   if (! is_del)
732     {
733       /* When adding an address check that it does not conflict
734          with an existing address. */
735       ip_interface_address_t * ia;
736       foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 
737                                     0 /* honor unnumbered */,
738       ({
739         ip4_address_t * x = ip_interface_address_get_address (&im->lookup_main, ia);
740
741         if (ip4_destination_matches_route (im, address, x, ia->address_length)
742             || ip4_destination_matches_route (im, x, address, address_length))
743           return clib_error_create ("failed to add %U which conflicts with %U for interface %U",
744                                     format_ip4_address_and_length, address, address_length,
745                                     format_ip4_address_and_length, x, ia->address_length,
746                                     format_vnet_sw_if_index_name, vnm, sw_if_index);
747        }));
748     }
749
750   elts_before = pool_elts (lm->if_address_pool);
751
752   error = ip_interface_address_add_del
753     (lm,
754      sw_if_index,
755      addr_fib,
756      address_length,
757      is_del,
758      &if_address_index);
759   if (error)
760     goto done;
761   
762   ip4_sw_interface_enable_disable(sw_if_index, !is_del);
763
764   if (is_del)
765       ip4_del_interface_routes (im, ip4_af.fib_index, address,
766                                 address_length);
767   else
768       ip4_add_interface_routes (sw_if_index,
769                                 im, ip4_af.fib_index,
770                                 pool_elt_at_index 
771                                 (lm->if_address_pool, if_address_index));
772
773   /* If pool did not grow/shrink: add duplicate address. */
774   if (elts_before != pool_elts (lm->if_address_pool))
775     {
776       ip4_add_del_interface_address_callback_t * cb;
777       vec_foreach (cb, im->add_del_interface_address_callbacks)
778         cb->function (im, cb->function_opaque, sw_if_index,
779                       address, address_length,
780                       if_address_index,
781                       is_del);
782     }
783
784  done:
785   vec_free (addr_fib);
786   return error;
787 }
788
789 clib_error_t *
790 ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
791                                ip4_address_t * address, u32 address_length,
792                                u32 is_del)
793 {
794   return ip4_add_del_interface_address_internal
795     (vm, sw_if_index, address, address_length,
796      is_del);
797 }
798
799 /* Built-in ip4 unicast rx feature path definition */
800 VNET_IP4_UNICAST_FEATURE_INIT (ip4_flow_classify, static) = {
801   .node_name = "ip4-flow-classify",
802   .runs_before = ORDER_CONSTRAINTS {"ip4-inacl", 0},
803   .feature_index = &ip4_main.ip4_unicast_rx_feature_flow_classify,
804 };
805
806 VNET_IP4_UNICAST_FEATURE_INIT (ip4_inacl, static) = {
807   .node_name = "ip4-inacl", 
808   .runs_before = ORDER_CONSTRAINTS {"ip4-source-check-via-rx", 0},
809   .feature_index = &ip4_main.ip4_unicast_rx_feature_check_access,
810 };
811
812 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_1, static) = {
813   .node_name = "ip4-source-check-via-rx",
814   .runs_before = ORDER_CONSTRAINTS {"ip4-source-check-via-any", 0},
815   .feature_index = 
816   &ip4_main.ip4_unicast_rx_feature_source_reachable_via_rx,
817 };
818
819 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_2, static) = {
820   .node_name = "ip4-source-check-via-any",
821   .runs_before = ORDER_CONSTRAINTS {"ip4-policer-classify", 0},
822   .feature_index = 
823   &ip4_main.ip4_unicast_rx_feature_source_reachable_via_any,
824 };
825
826 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) = {
827   .node_name = "ip4-source-and-port-range-check-rx",
828   .runs_before = ORDER_CONSTRAINTS {"ip4-policer-classify", 0},
829   .feature_index =
830   &ip4_main.ip4_unicast_rx_feature_source_and_port_range_check,
831 };
832
833 VNET_IP4_UNICAST_FEATURE_INIT (ip4_policer_classify, static) = {
834   .node_name = "ip4-policer-classify",
835   .runs_before = ORDER_CONSTRAINTS {"ipsec-input-ip4", 0},
836   .feature_index =
837   &ip4_main.ip4_unicast_rx_feature_policer_classify,
838 };
839
840 VNET_IP4_UNICAST_FEATURE_INIT (ip4_ipsec, static) = {
841   .node_name = "ipsec-input-ip4",
842   .runs_before = ORDER_CONSTRAINTS {"vpath-input-ip4", 0},
843   .feature_index = &ip4_main.ip4_unicast_rx_feature_ipsec,
844 };
845
846 VNET_IP4_UNICAST_FEATURE_INIT (ip4_vpath, static) = {
847   .node_name = "vpath-input-ip4",
848   .runs_before = ORDER_CONSTRAINTS {"ip4-lookup", 0},
849   .feature_index = &ip4_main.ip4_unicast_rx_feature_vpath,
850 };
851
852 VNET_IP4_UNICAST_FEATURE_INIT (ip4_lookup, static) = {
853   .node_name = "ip4-lookup",
854   .runs_before = ORDER_CONSTRAINTS {"ip4-drop", 0},
855   .feature_index = &ip4_main.ip4_unicast_rx_feature_lookup,
856 };
857
858 VNET_IP4_UNICAST_FEATURE_INIT (ip4_drop, static) = {
859   .node_name = "ip4-drop",
860   .runs_before = 0, /* not before any other features */
861   .feature_index = &ip4_main.ip4_unicast_rx_feature_drop,
862 };
863
864
865 /* Built-in ip4 multicast rx feature path definition */
866 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_vpath_mc, static) = {
867   .node_name = "vpath-input-ip4",
868   .runs_before = ORDER_CONSTRAINTS {"ip4-lookup-multicast", 0},
869   .feature_index = &ip4_main.ip4_multicast_rx_feature_vpath,
870 };
871
872 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_lookup_mc, static) = {
873   .node_name = "ip4-lookup-multicast",
874   .runs_before = ORDER_CONSTRAINTS {"ip4-drop", 0},
875   .feature_index = &ip4_main.ip4_multicast_rx_feature_lookup,
876 };
877
878 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_mc_drop, static) = {
879   .node_name = "ip4-drop",
880   .runs_before = 0, /* last feature */
881   .feature_index = &ip4_main.ip4_multicast_rx_feature_drop,
882 };
883
884 static char * rx_feature_start_nodes[] = 
885   { "ip4-input", "ip4-input-no-checksum"};
886
887 static char * tx_feature_start_nodes[] = 
888 {
889   "ip4-rewrite-transit",
890   "ip4-midchain",
891 };
892
893 /* Source and port-range check ip4 tx feature path definition */
894 VNET_IP4_TX_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) = {
895   .node_name = "ip4-source-and-port-range-check-tx",
896   .runs_before = ORDER_CONSTRAINTS {"interface-output", 0},
897   .feature_index =
898   &ip4_main.ip4_unicast_tx_feature_source_and_port_range_check,
899
900 };
901
902 /* Built-in ip4 tx feature path definition */
903 VNET_IP4_TX_FEATURE_INIT (interface_output, static) = {
904   .node_name = "interface-output",
905   .runs_before = 0, /* not before any other features */
906   .feature_index = &ip4_main.ip4_tx_feature_interface_output,
907 };
908
909 static clib_error_t *
910 ip4_feature_init (vlib_main_t * vm, ip4_main_t * im)
911 {
912   ip_lookup_main_t * lm = &im->lookup_main;
913   clib_error_t * error;
914   vnet_cast_t cast;
915   ip_config_main_t * cm;
916   vnet_config_main_t * vcm;
917   char **feature_start_nodes;
918   int feature_start_len;
919
920   for (cast = 0; cast < VNET_N_IP_FEAT; cast++)
921     {
922       cm = &lm->feature_config_mains[cast];
923       vcm = &cm->config_main;
924
925       if (cast < VNET_IP_TX_FEAT)
926         {
927           feature_start_nodes = rx_feature_start_nodes;
928           feature_start_len = ARRAY_LEN(rx_feature_start_nodes);
929         }
930       else
931         {
932           feature_start_nodes = tx_feature_start_nodes;
933           feature_start_len = ARRAY_LEN(tx_feature_start_nodes);
934         }
935       
936       if ((error = ip_feature_init_cast (vm, cm, vcm, 
937                                          feature_start_nodes,
938                                          feature_start_len,
939                                          im->next_feature[cast],
940                                          &im->feature_nodes[cast])))
941         return error;
942     }
943
944   return 0;
945 }
946
947 static clib_error_t *
948 ip4_sw_interface_add_del (vnet_main_t * vnm,
949                           u32 sw_if_index,
950                           u32 is_add)
951 {
952   vlib_main_t * vm = vnm->vlib_main;
953   ip4_main_t * im = &ip4_main;
954   ip_lookup_main_t * lm = &im->lookup_main;
955   u32 ci, cast;
956   u32 feature_index;
957
958   /* Fill in lookup tables with default table (0). */
959   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
960
961   for (cast = 0; cast < VNET_N_IP_FEAT; cast++)
962     {
963       ip_config_main_t * cm = &lm->feature_config_mains[cast];
964       vnet_config_main_t * vcm = &cm->config_main;
965
966       vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
967       ci = cm->config_index_by_sw_if_index[sw_if_index];
968
969       if (cast == VNET_IP_RX_UNICAST_FEAT)
970         feature_index = im->ip4_unicast_rx_feature_drop;
971       else if (cast == VNET_IP_RX_MULTICAST_FEAT)
972         feature_index = im->ip4_multicast_rx_feature_drop;
973       else
974         feature_index = im->ip4_tx_feature_interface_output;
975
976       if (is_add)
977         ci = vnet_config_add_feature (vm, vcm, 
978                                       ci,
979                                       feature_index,
980                                       /* config data */ 0,
981                                       /* # bytes of config data */ 0);
982       else
983         {
984           ci = vnet_config_del_feature (vm, vcm, ci,
985                                         feature_index,
986                                         /* config data */ 0,
987                                         /* # bytes of config data */ 0);
988           if (vec_len(im->ip_enabled_by_sw_if_index) > sw_if_index)
989               im->ip_enabled_by_sw_if_index[sw_if_index] = 0;
990         }
991       cm->config_index_by_sw_if_index[sw_if_index] = ci;
992       /*
993        * note: do not update the tx feature count here.
994        */
995     }
996
997   return /* no error */ 0;
998 }
999
1000 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1001
1002 /* Global IP4 main. */
1003 ip4_main_t ip4_main;
1004
1005 clib_error_t *
1006 ip4_lookup_init (vlib_main_t * vm)
1007 {
1008   ip4_main_t * im = &ip4_main;
1009   clib_error_t * error;
1010   uword i;
1011
1012   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1013     {
1014       u32 m;
1015
1016       if (i < 32)
1017         m = pow2_mask (i) << (32 - i);
1018       else 
1019         m = ~0;
1020       im->fib_masks[i] = clib_host_to_net_u32 (m);
1021     }
1022
1023   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1024
1025   /* Create FIB with index 0 and table id of 0. */
1026   fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 0);
1027
1028   {
1029     pg_node_t * pn;
1030     pn = pg_get_node (ip4_lookup_node.index);
1031     pn->unformat_edit = unformat_pg_ip4_header;
1032   }
1033
1034   {
1035     ethernet_arp_header_t h;
1036
1037     memset (&h, 0, sizeof (h));
1038
1039     /* Set target ethernet address to all zeros. */
1040     memset (h.ip4_over_ethernet[1].ethernet, 0, sizeof (h.ip4_over_ethernet[1].ethernet));
1041
1042 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1043 #define _8(f,v) h.f = v;
1044     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1045     _16 (l3_type, ETHERNET_TYPE_IP4);
1046     _8 (n_l2_address_bytes, 6);
1047     _8 (n_l3_address_bytes, 4);
1048     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1049 #undef _16
1050 #undef _8
1051
1052     vlib_packet_template_init (vm,
1053                                &im->ip4_arp_request_packet_template,
1054                                /* data */ &h,
1055                                sizeof (h),
1056                                /* alloc chunk size */ 8,
1057                                "ip4 arp");
1058   }
1059
1060   error = ip4_feature_init (vm, im);
1061
1062   return error;
1063 }
1064
1065 VLIB_INIT_FUNCTION (ip4_lookup_init);
1066
1067 typedef struct {
1068   /* Adjacency taken. */
1069   u32 dpo_index;
1070   u32 flow_hash;
1071   u32 fib_index;
1072
1073   /* Packet data, possibly *after* rewrite. */
1074   u8 packet_data[64 - 1*sizeof(u32)];
1075 } ip4_forward_next_trace_t;
1076
1077 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args)
1078 {
1079   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1080   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1081   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1082   uword indent = format_get_indent (s);
1083   s = format (s, "%U%U",
1084               format_white_space, indent,
1085               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1086   return s;
1087 }
1088
1089 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args)
1090 {
1091   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1092   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1093   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1094   uword indent = format_get_indent (s);
1095
1096   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1097               t->fib_index, t->dpo_index, t->flow_hash);
1098   s = format (s, "\n%U%U",
1099               format_white_space, indent,
1100               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1101   return s;
1102 }
1103
1104 static u8 * format_ip4_rewrite_trace (u8 * s, va_list * args)
1105 {
1106   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1107   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1108   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1109   vnet_main_t * vnm = vnet_get_main();
1110   uword indent = format_get_indent (s);
1111
1112   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1113               t->fib_index, t->dpo_index, format_ip_adjacency,
1114               vnm, t->dpo_index, FORMAT_IP_ADJACENCY_NONE,
1115               t->flow_hash);
1116   s = format (s, "\n%U%U",
1117               format_white_space, indent,
1118               format_ip_adjacency_packet_data,
1119               vnm, t->dpo_index,
1120               t->packet_data, sizeof (t->packet_data));
1121   return s;
1122 }
1123
1124 /* Common trace function for all ip4-forward next nodes. */
1125 void
1126 ip4_forward_next_trace (vlib_main_t * vm,
1127                         vlib_node_runtime_t * node,
1128                         vlib_frame_t * frame,
1129                         vlib_rx_or_tx_t which_adj_index)
1130 {
1131   u32 * from, n_left;
1132   ip4_main_t * im = &ip4_main;
1133
1134   n_left = frame->n_vectors;
1135   from = vlib_frame_vector_args (frame);
1136   
1137   while (n_left >= 4)
1138     {
1139       u32 bi0, bi1;
1140       vlib_buffer_t * b0, * b1;
1141       ip4_forward_next_trace_t * t0, * t1;
1142
1143       /* Prefetch next iteration. */
1144       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1145       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1146
1147       bi0 = from[0];
1148       bi1 = from[1];
1149
1150       b0 = vlib_get_buffer (vm, bi0);
1151       b1 = vlib_get_buffer (vm, bi1);
1152
1153       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1154         {
1155           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1156           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1157           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1158           t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1159               vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1160               vec_elt (im->fib_index_by_sw_if_index,
1161                        vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1162
1163           clib_memcpy (t0->packet_data,
1164                   vlib_buffer_get_current (b0),
1165                   sizeof (t0->packet_data));
1166         }
1167       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1168         {
1169           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1170           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1171           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1172           t1->fib_index = (vnet_buffer(b1)->sw_if_index[VLIB_TX] != (u32)~0) ?
1173               vnet_buffer(b1)->sw_if_index[VLIB_TX] :
1174               vec_elt (im->fib_index_by_sw_if_index,
1175                        vnet_buffer(b1)->sw_if_index[VLIB_RX]);
1176           clib_memcpy (t1->packet_data,
1177                   vlib_buffer_get_current (b1),
1178                   sizeof (t1->packet_data));
1179         }
1180       from += 2;
1181       n_left -= 2;
1182     }
1183
1184   while (n_left >= 1)
1185     {
1186       u32 bi0;
1187       vlib_buffer_t * b0;
1188       ip4_forward_next_trace_t * t0;
1189
1190       bi0 = from[0];
1191
1192       b0 = vlib_get_buffer (vm, bi0);
1193
1194       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1195         {
1196           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1197           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1198           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1199           t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1200               vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1201               vec_elt (im->fib_index_by_sw_if_index,
1202                        vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1203           clib_memcpy (t0->packet_data,
1204                   vlib_buffer_get_current (b0),
1205                   sizeof (t0->packet_data));
1206         }
1207       from += 1;
1208       n_left -= 1;
1209     }
1210 }
1211
1212 static uword
1213 ip4_drop_or_punt (vlib_main_t * vm,
1214                   vlib_node_runtime_t * node,
1215                   vlib_frame_t * frame,
1216                   ip4_error_t error_code)
1217 {
1218   u32 * buffers = vlib_frame_vector_args (frame);
1219   uword n_packets = frame->n_vectors;
1220
1221   vlib_error_drop_buffers (vm, node,
1222                            buffers,
1223                            /* stride */ 1,
1224                            n_packets,
1225                            /* next */ 0,
1226                            ip4_input_node.index,
1227                            error_code);
1228
1229   if (node->flags & VLIB_NODE_FLAG_TRACE)
1230     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1231
1232   return n_packets;
1233 }
1234
1235 static uword
1236 ip4_drop (vlib_main_t * vm,
1237           vlib_node_runtime_t * node,
1238           vlib_frame_t * frame)
1239 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP); }
1240
1241 static uword
1242 ip4_punt (vlib_main_t * vm,
1243           vlib_node_runtime_t * node,
1244           vlib_frame_t * frame)
1245 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); }
1246
1247 VLIB_REGISTER_NODE (ip4_drop_node,static) = {
1248   .function = ip4_drop,
1249   .name = "ip4-drop",
1250   .vector_size = sizeof (u32),
1251
1252   .format_trace = format_ip4_forward_next_trace,
1253
1254   .n_next_nodes = 1,
1255   .next_nodes = {
1256     [0] = "error-drop",
1257   },
1258 };
1259
1260 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop)
1261
1262 VLIB_REGISTER_NODE (ip4_punt_node,static) = {
1263   .function = ip4_punt,
1264   .name = "ip4-punt",
1265   .vector_size = sizeof (u32),
1266
1267   .format_trace = format_ip4_forward_next_trace,
1268
1269   .n_next_nodes = 1,
1270   .next_nodes = {
1271     [0] = "error-punt",
1272   },
1273 };
1274
1275 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt)
1276
1277 /* Compute TCP/UDP/ICMP4 checksum in software. */
1278 u16
1279 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1280                               ip4_header_t * ip0)
1281 {
1282   ip_csum_t sum0;
1283   u32 ip_header_length, payload_length_host_byte_order;
1284   u32 n_this_buffer, n_bytes_left;
1285   u16 sum16;
1286   void * data_this_buffer;
1287   
1288   /* Initialize checksum with ip header. */
1289   ip_header_length = ip4_header_bytes (ip0);
1290   payload_length_host_byte_order = clib_net_to_host_u16 (ip0->length) - ip_header_length;
1291   sum0 = clib_host_to_net_u32 (payload_length_host_byte_order + (ip0->protocol << 16));
1292
1293   if (BITS (uword) == 32)
1294     {
1295       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u32));
1296       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32));
1297     }
1298   else
1299     sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1300
1301   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1302   data_this_buffer = (void *) ip0 + ip_header_length;
1303   if (n_this_buffer + ip_header_length > p0->current_length)
1304     n_this_buffer = p0->current_length > ip_header_length ? p0->current_length - ip_header_length : 0;
1305   while (1)
1306     {
1307       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1308       n_bytes_left -= n_this_buffer;
1309       if (n_bytes_left == 0)
1310         break;
1311
1312       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1313       p0 = vlib_get_buffer (vm, p0->next_buffer);
1314       data_this_buffer = vlib_buffer_get_current (p0);
1315       n_this_buffer = p0->current_length;
1316     }
1317
1318   sum16 = ~ ip_csum_fold (sum0);
1319
1320   return sum16;
1321 }
1322
1323 static u32
1324 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1325 {
1326   ip4_header_t * ip0 = vlib_buffer_get_current (p0);
1327   udp_header_t * udp0;
1328   u16 sum16;
1329
1330   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1331           || ip0->protocol == IP_PROTOCOL_UDP);
1332
1333   udp0 = (void *) (ip0 + 1);
1334   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1335     {
1336       p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1337                     | IP_BUFFER_L4_CHECKSUM_CORRECT);
1338       return p0->flags;
1339     }
1340
1341   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1342
1343   p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1344                 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1345
1346   return p0->flags;
1347 }
1348
1349 static uword
1350 ip4_local (vlib_main_t * vm,
1351            vlib_node_runtime_t * node,
1352            vlib_frame_t * frame)
1353 {
1354   ip4_main_t * im = &ip4_main;
1355   ip_lookup_main_t * lm = &im->lookup_main;
1356   ip_local_next_t next_index;
1357   u32 * from, * to_next, n_left_from, n_left_to_next;
1358   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
1359
1360   from = vlib_frame_vector_args (frame);
1361   n_left_from = frame->n_vectors;
1362   next_index = node->cached_next_index;
1363   
1364   if (node->flags & VLIB_NODE_FLAG_TRACE)
1365     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1366
1367   while (n_left_from > 0)
1368     {
1369       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1370
1371       while (n_left_from >= 4 && n_left_to_next >= 2)
1372         {
1373           vlib_buffer_t * p0, * p1;
1374           ip4_header_t * ip0, * ip1;
1375           udp_header_t * udp0, * udp1;
1376           ip4_fib_mtrie_t * mtrie0, * mtrie1;
1377           ip4_fib_mtrie_leaf_t leaf0, leaf1;
1378           const dpo_id_t *dpo0, *dpo1;
1379           const load_balance_t *lb0, *lb1;
1380           u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, lbi0;
1381           u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, lbi1;
1382           i32 len_diff0, len_diff1;
1383           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1384           u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1385           u8 enqueue_code;
1386       
1387           pi0 = to_next[0] = from[0];
1388           pi1 = to_next[1] = from[1];
1389           from += 2;
1390           n_left_from -= 2;
1391           to_next += 2;
1392           n_left_to_next -= 2;
1393       
1394           p0 = vlib_get_buffer (vm, pi0);
1395           p1 = vlib_get_buffer (vm, pi1);
1396
1397           ip0 = vlib_buffer_get_current (p0);
1398           ip1 = vlib_buffer_get_current (p1);
1399
1400           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
1401                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1402           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, 
1403                                 vnet_buffer(p1)->sw_if_index[VLIB_RX]);
1404
1405           mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1406           mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
1407
1408           leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
1409
1410           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1411           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
1412
1413           /* Treat IP frag packets as "experimental" protocol for now
1414              until support of IP frag reassembly is implemented */
1415           proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
1416           proto1 = ip4_is_fragment(ip1) ? 0xfe : ip1->protocol;
1417           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1418           is_udp1 = proto1 == IP_PROTOCOL_UDP;
1419           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1420           is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1421
1422           flags0 = p0->flags;
1423           flags1 = p1->flags;
1424
1425           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1426           good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1427
1428           udp0 = ip4_next_header (ip0);
1429           udp1 = ip4_next_header (ip1);
1430
1431           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1432           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1433           good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1434
1435           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1436           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
1437
1438           /* Verify UDP length. */
1439           ip_len0 = clib_net_to_host_u16 (ip0->length);
1440           ip_len1 = clib_net_to_host_u16 (ip1->length);
1441           udp_len0 = clib_net_to_host_u16 (udp0->length);
1442           udp_len1 = clib_net_to_host_u16 (udp1->length);
1443
1444           len_diff0 = ip_len0 - udp_len0;
1445           len_diff1 = ip_len1 - udp_len1;
1446
1447           len_diff0 = is_udp0 ? len_diff0 : 0;
1448           len_diff1 = is_udp1 ? len_diff1 : 0;
1449
1450           if (PREDICT_FALSE (! (is_tcp_udp0 & is_tcp_udp1
1451                                 & good_tcp_udp0 & good_tcp_udp1)))
1452             {
1453               if (is_tcp_udp0)
1454                 {
1455                   if (is_tcp_udp0
1456                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1457                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1458                   good_tcp_udp0 =
1459                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1460                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1461                 }
1462               if (is_tcp_udp1)
1463                 {
1464                   if (is_tcp_udp1
1465                       && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1466                     flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
1467                   good_tcp_udp1 =
1468                     (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1469                   good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1470                 }
1471             }
1472
1473           good_tcp_udp0 &= len_diff0 >= 0;
1474           good_tcp_udp1 &= len_diff1 >= 0;
1475
1476           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1477           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
1478
1479           error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1480
1481           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1482           error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
1483
1484           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1485           error0 = (is_tcp_udp0 && ! good_tcp_udp0
1486                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1487                     : error0);
1488           error1 = (is_tcp_udp1 && ! good_tcp_udp1
1489                     ? IP4_ERROR_TCP_CHECKSUM + is_udp1
1490                     : error1);
1491
1492           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1493           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
1494           leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1495           leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
1496
1497           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1498           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1499
1500           vnet_buffer (p1)->ip.adj_index[VLIB_RX] = lbi1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1501           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = lbi1;
1502
1503           lb0 = load_balance_get(lbi0);
1504           lb1 = load_balance_get(lbi1);
1505           dpo0 = load_balance_get_bucket_i(lb0, 0);
1506           dpo1 = load_balance_get_bucket_i(lb1, 0);
1507
1508           /* 
1509            * Must have a route to source otherwise we drop the packet.
1510            * ip4 broadcasts are accepted, e.g. to make dhcp client work
1511            *
1512            * The checks are:
1513            *  - the source is a recieve => it's from us => bogus, do this
1514            *    first since it sets a different error code.
1515            *  - uRPF check for any route to source - accept if passes.
1516            *  - allow packets destined to the broadcast address from unknown sources
1517            */
1518           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1519                      dpo0->dpoi_type == DPO_RECEIVE) ?
1520                     IP4_ERROR_SPOOFED_LOCAL_PACKETS : 
1521                     error0);
1522           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1523                      !fib_urpf_check_size(lb0->lb_urpf) &&
1524                      ip0->dst_address.as_u32 != 0xFFFFFFFF)
1525                     ? IP4_ERROR_SRC_LOOKUP_MISS
1526                     : error0);
1527           error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1528                      dpo1->dpoi_type == DPO_RECEIVE) ?
1529                     IP4_ERROR_SPOOFED_LOCAL_PACKETS : 
1530                     error1);
1531           error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1532                      !fib_urpf_check_size(lb1->lb_urpf) &&
1533                      ip1->dst_address.as_u32 != 0xFFFFFFFF)
1534                     ? IP4_ERROR_SRC_LOOKUP_MISS
1535                     : error1);
1536
1537           next0 = lm->local_next_by_ip_protocol[proto0];
1538           next1 = lm->local_next_by_ip_protocol[proto1];
1539
1540           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1541           next1 = error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1542
1543           p0->error = error0 ? error_node->errors[error0] : 0;
1544           p1->error = error1 ? error_node->errors[error1] : 0;
1545
1546           enqueue_code = (next0 != next_index) + 2*(next1 != next_index);
1547
1548           if (PREDICT_FALSE (enqueue_code != 0))
1549             {
1550               switch (enqueue_code)
1551                 {
1552                 case 1:
1553                   /* A B A */
1554                   to_next[-2] = pi1;
1555                   to_next -= 1;
1556                   n_left_to_next += 1;
1557                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
1558                   break;
1559
1560                 case 2:
1561                   /* A A B */
1562                   to_next -= 1;
1563                   n_left_to_next += 1;
1564                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
1565                   break;
1566
1567                 case 3:
1568                   /* A B B or A B C */
1569                   to_next -= 2;
1570                   n_left_to_next += 2;
1571                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
1572                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
1573                   if (next0 == next1)
1574                     {
1575                       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1576                       next_index = next1;
1577                       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1578                     }
1579                   break;
1580                 }
1581             }
1582         }
1583
1584       while (n_left_from > 0 && n_left_to_next > 0)
1585         {
1586           vlib_buffer_t * p0;
1587           ip4_header_t * ip0;
1588           udp_header_t * udp0;
1589           ip4_fib_mtrie_t * mtrie0;
1590           ip4_fib_mtrie_leaf_t leaf0;
1591           u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, lbi0;
1592           i32 len_diff0;
1593           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1594           load_balance_t *lb0;
1595           const dpo_id_t *dpo0;
1596
1597           pi0 = to_next[0] = from[0];
1598           from += 1;
1599           n_left_from -= 1;
1600           to_next += 1;
1601           n_left_to_next -= 1;
1602       
1603           p0 = vlib_get_buffer (vm, pi0);
1604
1605           ip0 = vlib_buffer_get_current (p0);
1606
1607           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
1608                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1609
1610           mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1611
1612           leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
1613
1614           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1615
1616           /* Treat IP frag packets as "experimental" protocol for now
1617              until support of IP frag reassembly is implemented */
1618           proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
1619           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1620           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1621
1622           flags0 = p0->flags;
1623
1624           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1625
1626           udp0 = ip4_next_header (ip0);
1627
1628           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1629           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1630
1631           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1632
1633           /* Verify UDP length. */
1634           ip_len0 = clib_net_to_host_u16 (ip0->length);
1635           udp_len0 = clib_net_to_host_u16 (udp0->length);
1636
1637           len_diff0 = ip_len0 - udp_len0;
1638
1639           len_diff0 = is_udp0 ? len_diff0 : 0;
1640
1641           if (PREDICT_FALSE (! (is_tcp_udp0 & good_tcp_udp0)))
1642             {
1643               if (is_tcp_udp0)
1644                 {
1645                   if (is_tcp_udp0
1646                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1647                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1648                   good_tcp_udp0 =
1649                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1650                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1651                 }
1652             }
1653
1654           good_tcp_udp0 &= len_diff0 >= 0;
1655
1656           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1657
1658           error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1659
1660           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1661
1662           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1663           error0 = (is_tcp_udp0 && ! good_tcp_udp0
1664                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1665                     : error0);
1666
1667           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1668           leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1669
1670           lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1671           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1672
1673           lb0 = load_balance_get(lbi0);
1674           dpo0 = load_balance_get_bucket_i(lb0, 0);
1675
1676           vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
1677               vnet_buffer (p0)->ip.adj_index[VLIB_RX] =
1678                   dpo0->dpoi_index;
1679
1680           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1681                      dpo0->dpoi_type == DPO_RECEIVE) ?
1682                     IP4_ERROR_SPOOFED_LOCAL_PACKETS : 
1683                     error0);
1684           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1685                      !fib_urpf_check_size(lb0->lb_urpf) &&
1686                      ip0->dst_address.as_u32 != 0xFFFFFFFF)
1687                     ? IP4_ERROR_SRC_LOOKUP_MISS
1688                     : error0);
1689
1690           next0 = lm->local_next_by_ip_protocol[proto0];
1691
1692           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1693
1694           p0->error = error0? error_node->errors[error0] : 0;
1695
1696           if (PREDICT_FALSE (next0 != next_index))
1697             {
1698               n_left_to_next += 1;
1699               vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1700
1701               next_index = next0;
1702               vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1703               to_next[0] = pi0;
1704               to_next += 1;
1705               n_left_to_next -= 1;
1706             }
1707         }
1708   
1709       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1710     }
1711
1712   return frame->n_vectors;
1713 }
1714
1715 VLIB_REGISTER_NODE (ip4_local_node,static) = {
1716   .function = ip4_local,
1717   .name = "ip4-local",
1718   .vector_size = sizeof (u32),
1719
1720   .format_trace = format_ip4_forward_next_trace,
1721
1722   .n_next_nodes = IP_LOCAL_N_NEXT,
1723   .next_nodes = {
1724     [IP_LOCAL_NEXT_DROP] = "error-drop",
1725     [IP_LOCAL_NEXT_PUNT] = "error-punt",
1726     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1727     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1728   },
1729 };
1730
1731 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local)
1732
1733 void ip4_register_protocol (u32 protocol, u32 node_index)
1734 {
1735   vlib_main_t * vm = vlib_get_main();
1736   ip4_main_t * im = &ip4_main;
1737   ip_lookup_main_t * lm = &im->lookup_main;
1738
1739   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1740   lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip4_local_node.index, node_index);
1741 }
1742
1743 static clib_error_t *
1744 show_ip_local_command_fn (vlib_main_t * vm,
1745                           unformat_input_t * input,
1746                          vlib_cli_command_t * cmd)
1747 {
1748   ip4_main_t * im = &ip4_main;
1749   ip_lookup_main_t * lm = &im->lookup_main;
1750   int i;
1751
1752   vlib_cli_output (vm, "Protocols handled by ip4_local");
1753   for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
1754     {
1755       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1756         vlib_cli_output (vm, "%d", i);
1757     }
1758   return 0;
1759 }
1760
1761
1762
1763 VLIB_CLI_COMMAND (show_ip_local, static) = {
1764   .path = "show ip local",
1765   .function = show_ip_local_command_fn,
1766   .short_help = "Show ip local protocol table",
1767 };
1768
1769 always_inline uword
1770 ip4_arp_inline (vlib_main_t * vm,
1771                 vlib_node_runtime_t * node,
1772                 vlib_frame_t * frame,
1773                 int is_glean)
1774 {
1775   vnet_main_t * vnm = vnet_get_main();
1776   ip4_main_t * im = &ip4_main;
1777   ip_lookup_main_t * lm = &im->lookup_main;
1778   u32 * from, * to_next_drop;
1779   uword n_left_from, n_left_to_next_drop, next_index;
1780   static f64 time_last_seed_change = -1e100;
1781   static u32 hash_seeds[3];
1782   static uword hash_bitmap[256 / BITS (uword)]; 
1783   f64 time_now;
1784
1785   if (node->flags & VLIB_NODE_FLAG_TRACE)
1786     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1787
1788   time_now = vlib_time_now (vm);
1789   if (time_now - time_last_seed_change > 1e-3)
1790     {
1791       uword i;
1792       u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
1793                                              sizeof (hash_seeds));
1794       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
1795         hash_seeds[i] = r[i];
1796
1797       /* Mark all hash keys as been no-seen before. */
1798       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
1799         hash_bitmap[i] = 0;
1800
1801       time_last_seed_change = time_now;
1802     }
1803
1804   from = vlib_frame_vector_args (frame);
1805   n_left_from = frame->n_vectors;
1806   next_index = node->cached_next_index;
1807   if (next_index == IP4_ARP_NEXT_DROP)
1808     next_index = IP4_ARP_N_NEXT; /* point to first interface */
1809
1810   while (n_left_from > 0)
1811     {
1812       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1813                            to_next_drop, n_left_to_next_drop);
1814
1815       while (n_left_from > 0 && n_left_to_next_drop > 0)
1816         {
1817           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
1818           ip_adjacency_t * adj0;
1819           vlib_buffer_t * p0;
1820           ip4_header_t * ip0;
1821           uword bm0;
1822
1823           pi0 = from[0];
1824
1825           p0 = vlib_get_buffer (vm, pi0);
1826
1827           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1828           adj0 = ip_get_adjacency (lm, adj_index0);
1829           ip0 = vlib_buffer_get_current (p0);
1830
1831           /*
1832            * this is the Glean case, so we are ARPing for the
1833            * packet's destination 
1834            */
1835           a0 = hash_seeds[0];
1836           b0 = hash_seeds[1];
1837           c0 = hash_seeds[2];
1838
1839           sw_if_index0 = adj0->rewrite_header.sw_if_index;
1840           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1841
1842           if (is_glean)
1843           {
1844               a0 ^= ip0->dst_address.data_u32;
1845           }
1846           else
1847           {
1848               a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32;
1849           }
1850           b0 ^= sw_if_index0;
1851
1852           hash_v3_finalize32 (a0, b0, c0);
1853
1854           c0 &= BITS (hash_bitmap) - 1;
1855           c0 = c0 / BITS (uword);
1856           m0 = (uword) 1 << (c0 % BITS (uword));
1857
1858           bm0 = hash_bitmap[c0];
1859           drop0 = (bm0 & m0) != 0;
1860
1861           /* Mark it as seen. */
1862           hash_bitmap[c0] = bm0 | m0;
1863
1864           from += 1;
1865           n_left_from -= 1;
1866           to_next_drop[0] = pi0;
1867           to_next_drop += 1;
1868           n_left_to_next_drop -= 1;
1869
1870           p0->error = node->errors[drop0 ? IP4_ARP_ERROR_DROP : IP4_ARP_ERROR_REQUEST_SENT];
1871
1872           if (drop0)
1873             continue;
1874
1875           /* 
1876            * Can happen if the control-plane is programming tables
1877            * with traffic flowing; at least that's today's lame excuse.
1878            */
1879           if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN) ||
1880               (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
1881           {
1882             p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
1883           }
1884           else
1885           /* Send ARP request. */
1886           {
1887             u32 bi0 = 0;
1888             vlib_buffer_t * b0;
1889             ethernet_arp_header_t * h0;
1890             vnet_hw_interface_t * hw_if0;
1891
1892             h0 = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi0);
1893
1894             /* Add rewrite/encap string for ARP packet. */
1895             vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
1896
1897             hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1898
1899             /* Src ethernet address in ARP header. */
1900             clib_memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address,
1901                     sizeof (h0->ip4_over_ethernet[0].ethernet));
1902
1903             if (is_glean)
1904             {
1905                 /* The interface's source address is stashed in the Glean Adj */
1906                 h0->ip4_over_ethernet[0].ip4 = adj0->sub_type.glean.receive_addr.ip4;
1907
1908                 /* Copy in destination address we are requesting. This is the
1909                 * glean case, so it's the packet's destination.*/
1910                 h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32;
1911             }
1912             else
1913             {
1914                 /* Src IP address in ARP header. */
1915                 if (ip4_src_address_for_packet(lm, sw_if_index0,
1916                                                &h0->ip4_over_ethernet[0].ip4))
1917                 {
1918                     /* No source address available */
1919                     p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
1920                     vlib_buffer_free(vm, &bi0, 1);
1921                     continue;
1922                 }
1923
1924                 /* Copy in destination address we are requesting from the
1925                    incomplete adj */
1926                 h0->ip4_over_ethernet[1].ip4.data_u32 =
1927                     adj0->sub_type.nbr.next_hop.ip4.as_u32;
1928             }
1929
1930             vlib_buffer_copy_trace_flag (vm, p0, bi0);
1931             b0 = vlib_get_buffer (vm, bi0);
1932             vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
1933
1934             vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
1935
1936             vlib_set_next_frame_buffer (vm, node, adj0->rewrite_header.next_index, bi0);
1937           }
1938         }
1939
1940       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
1941     }
1942
1943   return frame->n_vectors;
1944 }
1945
1946 static uword
1947 ip4_arp (vlib_main_t * vm,
1948          vlib_node_runtime_t * node,
1949          vlib_frame_t * frame)
1950 {
1951     return (ip4_arp_inline(vm, node, frame, 0));
1952 }
1953
1954 static uword
1955 ip4_glean (vlib_main_t * vm,
1956            vlib_node_runtime_t * node,
1957            vlib_frame_t * frame)
1958 {
1959     return (ip4_arp_inline(vm, node, frame, 1));
1960 }
1961
1962 static char * ip4_arp_error_strings[] = {
1963   [IP4_ARP_ERROR_DROP] = "address overflow drops",
1964   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
1965   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
1966   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
1967   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
1968   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
1969 };
1970
1971 VLIB_REGISTER_NODE (ip4_arp_node) = {
1972   .function = ip4_arp,
1973   .name = "ip4-arp",
1974   .vector_size = sizeof (u32),
1975
1976   .format_trace = format_ip4_forward_next_trace,
1977
1978   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1979   .error_strings = ip4_arp_error_strings,
1980
1981   .n_next_nodes = IP4_ARP_N_NEXT,
1982   .next_nodes = {
1983     [IP4_ARP_NEXT_DROP] = "error-drop",
1984   },
1985 };
1986
1987 VLIB_REGISTER_NODE (ip4_glean_node) = {
1988   .function = ip4_glean,
1989   .name = "ip4-glean",
1990   .vector_size = sizeof (u32),
1991
1992   .format_trace = format_ip4_forward_next_trace,
1993
1994   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1995   .error_strings = ip4_arp_error_strings,
1996
1997   .n_next_nodes = IP4_ARP_N_NEXT,
1998   .next_nodes = {
1999     [IP4_ARP_NEXT_DROP] = "error-drop",
2000   },
2001 };
2002
2003 #define foreach_notrace_ip4_arp_error           \
2004 _(DROP)                                         \
2005 _(REQUEST_SENT)                                 \
2006 _(REPLICATE_DROP)                               \
2007 _(REPLICATE_FAIL)
2008
2009 clib_error_t * arp_notrace_init (vlib_main_t * vm)
2010 {
2011   vlib_node_runtime_t *rt = 
2012     vlib_node_get_runtime (vm, ip4_arp_node.index);
2013
2014   /* don't trace ARP request packets */
2015 #define _(a)                                    \
2016     vnet_pcap_drop_trace_filter_add_del         \
2017         (rt->errors[IP4_ARP_ERROR_##a],         \
2018          1 /* is_add */);
2019     foreach_notrace_ip4_arp_error;
2020 #undef _
2021   return 0;
2022 }
2023
2024 VLIB_INIT_FUNCTION(arp_notrace_init);
2025
2026
2027 /* Send an ARP request to see if given destination is reachable on given interface. */
2028 clib_error_t *
2029 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2030 {
2031   vnet_main_t * vnm = vnet_get_main();
2032   ip4_main_t * im = &ip4_main;
2033   ethernet_arp_header_t * h;
2034   ip4_address_t * src;
2035   ip_interface_address_t * ia;
2036   ip_adjacency_t * adj;
2037   vnet_hw_interface_t * hi;
2038   vnet_sw_interface_t * si;
2039   vlib_buffer_t * b;
2040   u32 bi = 0;
2041
2042   si = vnet_get_sw_interface (vnm, sw_if_index);
2043
2044   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2045     {
2046       return clib_error_return (0, "%U: interface %U down",
2047                                 format_ip4_address, dst, 
2048                                 format_vnet_sw_if_index_name, vnm, 
2049                                 sw_if_index);
2050     }
2051
2052   src = ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2053   if (! src)
2054     {
2055       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2056       return clib_error_return 
2057         (0, "no matching interface address for destination %U (interface %U)",
2058          format_ip4_address, dst,
2059          format_vnet_sw_if_index_name, vnm, sw_if_index);
2060     }
2061
2062   adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2063
2064   h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi);
2065
2066   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2067
2068   clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
2069
2070   h->ip4_over_ethernet[0].ip4 = src[0];
2071   h->ip4_over_ethernet[1].ip4 = dst[0];
2072
2073   b = vlib_get_buffer (vm, bi);
2074   vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2075
2076   /* Add encapsulation string for software interface (e.g. ethernet header). */
2077   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2078   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2079
2080   {
2081     vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
2082     u32 * to_next = vlib_frame_vector_args (f);
2083     to_next[0] = bi;
2084     f->n_vectors = 1;
2085     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2086   }
2087
2088   return /* no error */ 0;
2089 }
2090
2091 typedef enum {
2092   IP4_REWRITE_NEXT_DROP,
2093   IP4_REWRITE_NEXT_ARP,
2094   IP4_REWRITE_NEXT_ICMP_ERROR,
2095 } ip4_rewrite_next_t;
2096
2097 always_inline uword
2098 ip4_rewrite_inline (vlib_main_t * vm,
2099                     vlib_node_runtime_t * node,
2100                     vlib_frame_t * frame,
2101                     int rewrite_for_locally_received_packets,
2102                     int is_midchain)
2103 {
2104   ip_lookup_main_t * lm = &ip4_main.lookup_main;
2105   u32 * from = vlib_frame_vector_args (frame);
2106   u32 n_left_from, n_left_to_next, * to_next, next_index;
2107   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
2108   vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
2109   ip_config_main_t * cm = &lm->feature_config_mains[VNET_IP_TX_FEAT];
2110
2111   n_left_from = frame->n_vectors;
2112   next_index = node->cached_next_index;
2113   u32 cpu_index = os_get_cpu_number();
2114   
2115   while (n_left_from > 0)
2116     {
2117       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2118
2119       while (n_left_from >= 4 && n_left_to_next >= 2)
2120         {
2121           ip_adjacency_t * adj0, * adj1;
2122           vlib_buffer_t * p0, * p1;
2123           ip4_header_t * ip0, * ip1;
2124           u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2125           u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2126           u32 next0_override, next1_override;
2127           u32 tx_sw_if_index0, tx_sw_if_index1;
2128
2129           if (rewrite_for_locally_received_packets)
2130               next0_override = next1_override = 0;
2131
2132           /* Prefetch next iteration. */
2133           {
2134             vlib_buffer_t * p2, * p3;
2135
2136             p2 = vlib_get_buffer (vm, from[2]);
2137             p3 = vlib_get_buffer (vm, from[3]);
2138
2139             vlib_prefetch_buffer_header (p2, STORE);
2140             vlib_prefetch_buffer_header (p3, STORE);
2141
2142             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2143             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2144           }
2145
2146           pi0 = to_next[0] = from[0];
2147           pi1 = to_next[1] = from[1];
2148
2149           from += 2;
2150           n_left_from -= 2;
2151           to_next += 2;
2152           n_left_to_next -= 2;
2153       
2154           p0 = vlib_get_buffer (vm, pi0);
2155           p1 = vlib_get_buffer (vm, pi1);
2156
2157           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2158           adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
2159
2160           /* We should never rewrite a pkt using the MISS adjacency */
2161           ASSERT(adj_index0 && adj_index1);
2162
2163           ip0 = vlib_buffer_get_current (p0);
2164           ip1 = vlib_buffer_get_current (p1);
2165
2166           error0 = error1 = IP4_ERROR_NONE;
2167           next0 = next1 = IP4_REWRITE_NEXT_DROP;
2168
2169           /* Decrement TTL & update checksum.
2170              Works either endian, so no need for byte swap. */
2171           if (! rewrite_for_locally_received_packets)
2172             {
2173               i32 ttl0 = ip0->ttl, ttl1 = ip1->ttl;
2174
2175               /* Input node should have reject packets with ttl 0. */
2176               ASSERT (ip0->ttl > 0);
2177               ASSERT (ip1->ttl > 0);
2178
2179               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2180               checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2181
2182               checksum0 += checksum0 >= 0xffff;
2183               checksum1 += checksum1 >= 0xffff;
2184
2185               ip0->checksum = checksum0;
2186               ip1->checksum = checksum1;
2187
2188               ttl0 -= 1;
2189               ttl1 -= 1;
2190
2191               ip0->ttl = ttl0;
2192               ip1->ttl = ttl1;
2193
2194               /*
2195                * If the ttl drops below 1 when forwarding, generate
2196                * an ICMP response.
2197                */
2198               if (PREDICT_FALSE(ttl0 <= 0))
2199                 {
2200                   error0 = IP4_ERROR_TIME_EXPIRED;
2201                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2202                   icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2203                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2204                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2205                 }
2206               if (PREDICT_FALSE(ttl1 <= 0))
2207                 {
2208                   error1 = IP4_ERROR_TIME_EXPIRED;
2209                   vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32)~0;
2210                   icmp4_error_set_vnet_buffer(p1, ICMP4_time_exceeded,
2211                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2212                   next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2213                 }
2214
2215               /* Verify checksum. */
2216               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2217               ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2218             }
2219
2220           /* Rewrite packet header and updates lengths. */
2221           adj0 = ip_get_adjacency (lm, adj_index0);
2222           adj1 = ip_get_adjacency (lm, adj_index1);
2223       
2224           if (rewrite_for_locally_received_packets)
2225             {
2226               if (PREDICT_FALSE(adj0->lookup_next_index
2227                                 == IP_LOOKUP_NEXT_ARP))
2228                 next0_override = IP4_REWRITE_NEXT_ARP;
2229               if (PREDICT_FALSE(adj1->lookup_next_index
2230                                 == IP_LOOKUP_NEXT_ARP))
2231                 next1_override = IP4_REWRITE_NEXT_ARP;
2232             }
2233
2234           /* Worth pipelining. No guarantee that adj0,1 are hot... */
2235           rw_len0 = adj0[0].rewrite_header.data_bytes;
2236           rw_len1 = adj1[0].rewrite_header.data_bytes;
2237           vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
2238           vnet_buffer(p1)->ip.save_rewrite_length = rw_len1;
2239
2240           /* Check MTU of outgoing interface. */
2241           error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2242                     ? IP4_ERROR_MTU_EXCEEDED
2243                     : error0);
2244           error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
2245                     ? IP4_ERROR_MTU_EXCEEDED
2246                     : error1);
2247
2248           next0 = (error0 == IP4_ERROR_NONE)
2249             ? adj0[0].rewrite_header.next_index : next0;
2250
2251           if (rewrite_for_locally_received_packets)
2252               next0 = next0 && next0_override ? next0_override : next0;
2253
2254           next1 = (error1 == IP4_ERROR_NONE)
2255             ? adj1[0].rewrite_header.next_index : next1;
2256
2257           if (rewrite_for_locally_received_packets)
2258               next1 = next1 && next1_override ? next1_override : next1;
2259
2260           /* 
2261            * We've already accounted for an ethernet_header_t elsewhere
2262            */
2263           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2264               vlib_increment_combined_counter 
2265                   (&adjacency_counters,
2266                    cpu_index, adj_index0, 
2267                    /* packet increment */ 0,
2268                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2269
2270           if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t)))
2271               vlib_increment_combined_counter 
2272                   (&adjacency_counters,
2273                    cpu_index, adj_index1, 
2274                    /* packet increment */ 0,
2275                    /* byte increment */ rw_len1-sizeof(ethernet_header_t));
2276
2277           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2278            * to see the IP headerr */
2279           if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2280             {
2281               p0->current_data -= rw_len0;
2282               p0->current_length += rw_len0;
2283               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2284               vnet_buffer (p0)->sw_if_index[VLIB_TX] =
2285                   tx_sw_if_index0;
2286
2287               if (PREDICT_FALSE 
2288                   (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features, 
2289                                     tx_sw_if_index0)))
2290                 {
2291                   p0->current_config_index = 
2292                     vec_elt (cm->config_index_by_sw_if_index, 
2293                              tx_sw_if_index0);
2294                   vnet_get_config_data (&cm->config_main,
2295                                         &p0->current_config_index,
2296                                         &next0,
2297                                         /* # bytes of config data */ 0);
2298                 }
2299             }
2300           if (PREDICT_TRUE(error1 == IP4_ERROR_NONE))
2301             {
2302               p1->current_data -= rw_len1;
2303               p1->current_length += rw_len1;
2304
2305               tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2306               vnet_buffer (p1)->sw_if_index[VLIB_TX] =
2307                   tx_sw_if_index1;
2308
2309               if (PREDICT_FALSE 
2310                   (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features, 
2311                                     tx_sw_if_index1)))
2312                 {
2313                   p1->current_config_index = 
2314                     vec_elt (cm->config_index_by_sw_if_index, 
2315                              tx_sw_if_index1);
2316                   vnet_get_config_data (&cm->config_main,
2317                                         &p1->current_config_index,
2318                                         &next1,
2319                                         /* # bytes of config data */ 0);
2320                 }
2321             }
2322
2323           /* Guess we are only writing on simple Ethernet header. */
2324           vnet_rewrite_two_headers (adj0[0], adj1[0],
2325                                     ip0, ip1,
2326                                     sizeof (ethernet_header_t));
2327
2328           if (is_midchain)
2329           {
2330               adj0->sub_type.midchain.fixup_func(vm, adj0, p0);
2331               adj1->sub_type.midchain.fixup_func(vm, adj1, p1);
2332           }
2333       
2334           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2335                                            to_next, n_left_to_next,
2336                                            pi0, pi1, next0, next1);
2337         }
2338
2339       while (n_left_from > 0 && n_left_to_next > 0)
2340         {
2341           ip_adjacency_t * adj0;
2342           vlib_buffer_t * p0;
2343           ip4_header_t * ip0;
2344           u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2345           u32 next0_override;
2346           u32 tx_sw_if_index0;
2347
2348           if (rewrite_for_locally_received_packets)
2349               next0_override = 0;
2350
2351           pi0 = to_next[0] = from[0];
2352
2353           p0 = vlib_get_buffer (vm, pi0);
2354
2355           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2356
2357           /* We should never rewrite a pkt using the MISS adjacency */
2358           ASSERT(adj_index0);
2359
2360           adj0 = ip_get_adjacency (lm, adj_index0);
2361       
2362           ip0 = vlib_buffer_get_current (p0);
2363
2364           error0 = IP4_ERROR_NONE;
2365           next0 = IP4_REWRITE_NEXT_DROP;            /* drop on error */
2366
2367           /* Decrement TTL & update checksum. */
2368           if (! rewrite_for_locally_received_packets)
2369             {
2370               i32 ttl0 = ip0->ttl;
2371
2372               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2373
2374               checksum0 += checksum0 >= 0xffff;
2375
2376               ip0->checksum = checksum0;
2377
2378               ASSERT (ip0->ttl > 0);
2379
2380               ttl0 -= 1;
2381
2382               ip0->ttl = ttl0;
2383
2384               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2385
2386               if (PREDICT_FALSE(ttl0 <= 0))
2387                 {
2388                   /*
2389                    * If the ttl drops below 1 when forwarding, generate
2390                    * an ICMP response.
2391                    */
2392                   error0 = IP4_ERROR_TIME_EXPIRED;
2393                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2394                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2395                   icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2396                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2397                 }
2398             }
2399
2400           if (rewrite_for_locally_received_packets)
2401             {
2402               /* 
2403                * We have to override the next_index in ARP adjacencies,
2404                * because they're set up for ip4-arp, not this node...
2405                */
2406               if (PREDICT_FALSE(adj0->lookup_next_index
2407                                 == IP_LOOKUP_NEXT_ARP))
2408                 next0_override = IP4_REWRITE_NEXT_ARP;
2409             }
2410
2411           /* Guess we are only writing on simple Ethernet header. */
2412           vnet_rewrite_one_header (adj0[0], ip0, 
2413                                    sizeof (ethernet_header_t));
2414           
2415           /* Update packet buffer attributes/set output interface. */
2416           rw_len0 = adj0[0].rewrite_header.data_bytes;
2417           vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
2418           
2419           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2420               vlib_increment_combined_counter 
2421                   (&adjacency_counters,
2422                    cpu_index, adj_index0, 
2423                    /* packet increment */ 0,
2424                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2425           
2426           /* Check MTU of outgoing interface. */
2427           error0 = (vlib_buffer_length_in_chain (vm, p0) 
2428                     > adj0[0].rewrite_header.max_l3_packet_bytes
2429                     ? IP4_ERROR_MTU_EXCEEDED
2430                     : error0);
2431
2432           p0->error = error_node->errors[error0];
2433
2434           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2435            * to see the IP headerr */
2436           if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2437             {
2438               p0->current_data -= rw_len0;
2439               p0->current_length += rw_len0;
2440               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2441
2442               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2443               next0 = adj0[0].rewrite_header.next_index;
2444
2445               if (is_midchain)
2446                 {
2447                   adj0->sub_type.midchain.fixup_func(vm, adj0, p0);
2448                 }
2449
2450               if (PREDICT_FALSE 
2451                   (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features, 
2452                                     tx_sw_if_index0)))
2453                   {
2454                     p0->current_config_index = 
2455                       vec_elt (cm->config_index_by_sw_if_index, 
2456                                tx_sw_if_index0);
2457                     vnet_get_config_data (&cm->config_main,
2458                                           &p0->current_config_index,
2459                                           &next0,
2460                                           /* # bytes of config data */ 0);
2461                   }
2462             }
2463
2464           if (rewrite_for_locally_received_packets)
2465               next0 = next0 && next0_override ? next0_override : next0;
2466
2467           from += 1;
2468           n_left_from -= 1;
2469           to_next += 1;
2470           n_left_to_next -= 1;
2471       
2472           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2473                                            to_next, n_left_to_next,
2474                                            pi0, next0);
2475         }
2476   
2477       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2478     }
2479
2480   /* Need to do trace after rewrites to pick up new packet data. */
2481   if (node->flags & VLIB_NODE_FLAG_TRACE)
2482     ip4_forward_next_trace (vm, node, frame, adj_rx_tx);
2483
2484   return frame->n_vectors;
2485 }
2486
2487
2488 /** @brief IPv4 transit rewrite node.
2489     @node ip4-rewrite-transit
2490
2491     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2492     header checksum, fetch the ip adjacency, check the outbound mtu,
2493     apply the adjacency rewrite, and send pkts to the adjacency
2494     rewrite header's rewrite_next_index.
2495
2496     @param vm vlib_main_t corresponding to the current thread
2497     @param node vlib_node_runtime_t
2498     @param frame vlib_frame_t whose contents should be dispatched
2499
2500     @par Graph mechanics: buffer metadata, next index usage
2501
2502     @em Uses:
2503     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2504         - the rewrite adjacency index
2505     - <code>adj->lookup_next_index</code>
2506         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2507           the packet will be dropped. 
2508     - <code>adj->rewrite_header</code>
2509         - Rewrite string length, rewrite string, next_index
2510
2511     @em Sets:
2512     - <code>b->current_data, b->current_length</code>
2513         - Updated net of applying the rewrite string
2514
2515     <em>Next Indices:</em>
2516     - <code> adj->rewrite_header.next_index </code>
2517       or @c error-drop 
2518 */
2519 static uword
2520 ip4_rewrite_transit (vlib_main_t * vm,
2521                      vlib_node_runtime_t * node,
2522                      vlib_frame_t * frame)
2523 {
2524   return ip4_rewrite_inline (vm, node, frame,
2525                              /* rewrite_for_locally_received_packets */ 0, 0);
2526 }
2527
2528 /** @brief IPv4 local rewrite node.
2529     @node ip4-rewrite-local
2530
2531     This is the IPv4 local rewrite node. Fetch the ip adjacency, check
2532     the outbound interface mtu, apply the adjacency rewrite, and send
2533     pkts to the adjacency rewrite header's rewrite_next_index. Deal
2534     with hemorrhoids of the form "some clown sends an icmp4 w/ src =
2535     dst = interface addr."
2536
2537     @param vm vlib_main_t corresponding to the current thread
2538     @param node vlib_node_runtime_t
2539     @param frame vlib_frame_t whose contents should be dispatched
2540
2541     @par Graph mechanics: buffer metadata, next index usage
2542
2543     @em Uses:
2544     - <code>vnet_buffer(b)->ip.adj_index[VLIB_RX]</code>
2545         - the rewrite adjacency index
2546     - <code>adj->lookup_next_index</code>
2547         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2548           the packet will be dropped. 
2549     - <code>adj->rewrite_header</code>
2550         - Rewrite string length, rewrite string, next_index
2551
2552     @em Sets:
2553     - <code>b->current_data, b->current_length</code>
2554         - Updated net of applying the rewrite string
2555
2556     <em>Next Indices:</em>
2557     - <code> adj->rewrite_header.next_index </code>
2558       or @c error-drop 
2559 */
2560
2561 static uword
2562 ip4_rewrite_local (vlib_main_t * vm,
2563                    vlib_node_runtime_t * node,
2564                    vlib_frame_t * frame)
2565 {
2566   return ip4_rewrite_inline (vm, node, frame,
2567                              /* rewrite_for_locally_received_packets */ 1, 0);
2568 }
2569
2570 static uword
2571 ip4_midchain (vlib_main_t * vm,
2572               vlib_node_runtime_t * node,
2573               vlib_frame_t * frame)
2574 {
2575   return ip4_rewrite_inline (vm, node, frame,
2576                              /* rewrite_for_locally_received_packets */ 0, 1);
2577 }
2578
2579 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2580   .function = ip4_rewrite_transit,
2581   .name = "ip4-rewrite-transit",
2582   .vector_size = sizeof (u32),
2583
2584   .format_trace = format_ip4_rewrite_trace,
2585
2586   .n_next_nodes = 3,
2587   .next_nodes = {
2588     [IP4_REWRITE_NEXT_DROP] = "error-drop",
2589     [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
2590     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2591   },
2592 };
2593
2594 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite_transit)
2595
2596 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2597   .function = ip4_midchain,
2598   .name = "ip4-midchain",
2599   .vector_size = sizeof (u32),
2600
2601   .format_trace = format_ip4_forward_next_trace,
2602
2603   .sibling_of = "ip4-rewrite-transit",
2604 };
2605
2606 VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain)
2607
2608 VLIB_REGISTER_NODE (ip4_rewrite_local_node) = {
2609   .function = ip4_rewrite_local,
2610   .name = "ip4-rewrite-local",
2611   .vector_size = sizeof (u32),
2612
2613   .sibling_of = "ip4-rewrite-transit",
2614
2615   .format_trace = format_ip4_rewrite_trace,
2616
2617   .n_next_nodes = 0,
2618 };
2619
2620 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_local_node, ip4_rewrite_local)
2621
2622 static clib_error_t *
2623 add_del_interface_table (vlib_main_t * vm,
2624                          unformat_input_t * input,
2625                          vlib_cli_command_t * cmd)
2626 {
2627   vnet_main_t * vnm = vnet_get_main();
2628   clib_error_t * error = 0;
2629   u32 sw_if_index, table_id;
2630
2631   sw_if_index = ~0;
2632
2633   if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2634     {
2635       error = clib_error_return (0, "unknown interface `%U'",
2636                                  format_unformat_error, input);
2637       goto done;
2638     }
2639
2640   if (unformat (input, "%d", &table_id))
2641     ;
2642   else
2643     {
2644       error = clib_error_return (0, "expected table id `%U'",
2645                                  format_unformat_error, input);
2646       goto done;
2647     }
2648
2649   {
2650     ip4_main_t * im = &ip4_main;
2651     u32 fib_index;
2652
2653     fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
2654                                                    table_id);
2655
2656     //
2657     // FIXME-LATER
2658     //  changing an interface's table has consequences for any connecteds
2659     //  and adj-fibs already installed.
2660     //
2661     vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
2662     im->fib_index_by_sw_if_index[sw_if_index] = fib_index;
2663   }
2664
2665  done:
2666   return error;
2667 }
2668
2669 /*?
2670  * Place the indicated interface into the supplied VRF
2671  *
2672  * @cliexpar
2673  * @cliexstart{set interface ip table}
2674  *
2675  *  vpp# set interface ip table GigabitEthernet2/0/0 2
2676  *
2677  * Interface addresses added after setting the interface IP table end up in the indicated VRF table.
2678  * Predictable but potentially counter-intuitive results occur if you provision interface addresses in multiple FIBs.
2679  * Upon RX, packets will be processed in the last IP table ID provisioned.
2680  * It might be marginally useful to evade source RPF drops to put an interface address into multiple FIBs.
2681  * @cliexend
2682  ?*/
2683 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
2684   .path = "set interface ip table",
2685   .function = add_del_interface_table,
2686   .short_help = "Add/delete FIB table id for interface",
2687 };
2688
2689
2690 static uword
2691 ip4_lookup_multicast (vlib_main_t * vm,
2692                       vlib_node_runtime_t * node,
2693                       vlib_frame_t * frame)
2694 {
2695   ip4_main_t * im = &ip4_main;
2696   vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
2697   u32 n_left_from, n_left_to_next, * from, * to_next;
2698   ip_lookup_next_t next;
2699   u32 cpu_index = os_get_cpu_number();
2700
2701   from = vlib_frame_vector_args (frame);
2702   n_left_from = frame->n_vectors;
2703   next = node->cached_next_index;
2704
2705   while (n_left_from > 0)
2706     {
2707       vlib_get_next_frame (vm, node, next,
2708                            to_next, n_left_to_next);
2709
2710       while (n_left_from >= 4 && n_left_to_next >= 2)
2711         {
2712           vlib_buffer_t * p0, * p1;
2713           u32 pi0, pi1, lb_index0, lb_index1, wrong_next;
2714           ip_lookup_next_t next0, next1;
2715           ip4_header_t * ip0, * ip1;
2716           u32 fib_index0, fib_index1;
2717           const dpo_id_t *dpo0, *dpo1;
2718           const load_balance_t * lb0, * lb1;
2719
2720           /* Prefetch next iteration. */
2721           {
2722             vlib_buffer_t * p2, * p3;
2723
2724             p2 = vlib_get_buffer (vm, from[2]);
2725             p3 = vlib_get_buffer (vm, from[3]);
2726
2727             vlib_prefetch_buffer_header (p2, LOAD);
2728             vlib_prefetch_buffer_header (p3, LOAD);
2729
2730             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
2731             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
2732           }
2733
2734           pi0 = to_next[0] = from[0];
2735           pi1 = to_next[1] = from[1];
2736
2737           p0 = vlib_get_buffer (vm, pi0);
2738           p1 = vlib_get_buffer (vm, pi1);
2739
2740           ip0 = vlib_buffer_get_current (p0);
2741           ip1 = vlib_buffer_get_current (p1);
2742
2743           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2744           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
2745           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2746             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2747           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
2748             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
2749
2750           lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0),
2751                                                &ip0->dst_address);
2752           lb_index1 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index1),
2753                                                &ip1->dst_address);
2754
2755           lb0 = load_balance_get (lb_index0);
2756           lb1 = load_balance_get (lb_index1);
2757
2758           ASSERT (lb0->lb_n_buckets > 0);
2759           ASSERT (is_pow2 (lb0->lb_n_buckets));
2760           ASSERT (lb1->lb_n_buckets > 0);
2761           ASSERT (is_pow2 (lb1->lb_n_buckets));
2762
2763           vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash 
2764               (ip0, lb0->lb_hash_config);
2765                                                                   
2766           vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash 
2767               (ip1, lb1->lb_hash_config);
2768
2769           dpo0 = load_balance_get_bucket_i(lb0,
2770                                            (vnet_buffer (p0)->ip.flow_hash &
2771                                             (lb0->lb_n_buckets_minus_1)));
2772           dpo1 = load_balance_get_bucket_i(lb1,
2773                                            (vnet_buffer (p1)->ip.flow_hash &
2774                                             (lb0->lb_n_buckets_minus_1)));
2775
2776           next0 = dpo0->dpoi_next_node;
2777           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
2778           next1 = dpo1->dpoi_next_node;
2779           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
2780
2781           if (1) /* $$$$$$ HACK FIXME */
2782           vlib_increment_combined_counter 
2783               (cm, cpu_index, lb_index0, 1,
2784                vlib_buffer_length_in_chain (vm, p0));
2785           if (1) /* $$$$$$ HACK FIXME */
2786           vlib_increment_combined_counter 
2787               (cm, cpu_index, lb_index1, 1,
2788                vlib_buffer_length_in_chain (vm, p1));
2789
2790           from += 2;
2791           to_next += 2;
2792           n_left_to_next -= 2;
2793           n_left_from -= 2;
2794
2795           wrong_next = (next0 != next) + 2*(next1 != next);
2796           if (PREDICT_FALSE (wrong_next != 0))
2797             {
2798               switch (wrong_next)
2799                 {
2800                 case 1:
2801                   /* A B A */
2802                   to_next[-2] = pi1;
2803                   to_next -= 1;
2804                   n_left_to_next += 1;
2805                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2806                   break;
2807
2808                 case 2:
2809                   /* A A B */
2810                   to_next -= 1;
2811                   n_left_to_next += 1;
2812                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2813                   break;
2814
2815                 case 3:
2816                   /* A B C */
2817                   to_next -= 2;
2818                   n_left_to_next += 2;
2819                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2820                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2821                   if (next0 == next1)
2822                     {
2823                       /* A B B */
2824                       vlib_put_next_frame (vm, node, next, n_left_to_next);
2825                       next = next1;
2826                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
2827                     }
2828                 }
2829             }
2830         }
2831     
2832       while (n_left_from > 0 && n_left_to_next > 0)
2833         {
2834           vlib_buffer_t * p0;
2835           ip4_header_t * ip0;
2836           u32 pi0, lb_index0;
2837           ip_lookup_next_t next0;
2838           u32 fib_index0;
2839           const dpo_id_t *dpo0;
2840           const load_balance_t * lb0;
2841
2842           pi0 = from[0];
2843           to_next[0] = pi0;
2844
2845           p0 = vlib_get_buffer (vm, pi0);
2846
2847           ip0 = vlib_buffer_get_current (p0);
2848
2849           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, 
2850                                 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2851           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2852               fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2853           
2854           lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0),
2855                                                &ip0->dst_address);
2856
2857           lb0 = load_balance_get (lb_index0);
2858
2859           ASSERT (lb0->lb_n_buckets > 0);
2860           ASSERT (is_pow2 (lb0->lb_n_buckets));
2861
2862           vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash 
2863               (ip0, lb0->lb_hash_config);
2864
2865           dpo0 = load_balance_get_bucket_i(lb0,
2866                                            (vnet_buffer (p0)->ip.flow_hash &
2867                                             (lb0->lb_n_buckets_minus_1)));
2868
2869           next0 = dpo0->dpoi_next_node;
2870           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
2871
2872           if (1) /* $$$$$$ HACK FIXME */
2873               vlib_increment_combined_counter 
2874                   (cm, cpu_index, lb_index0, 1,
2875                    vlib_buffer_length_in_chain (vm, p0));
2876
2877           from += 1;
2878           to_next += 1;
2879           n_left_to_next -= 1;
2880           n_left_from -= 1;
2881
2882           if (PREDICT_FALSE (next0 != next))
2883             {
2884               n_left_to_next += 1;
2885               vlib_put_next_frame (vm, node, next, n_left_to_next);
2886               next = next0;
2887               vlib_get_next_frame (vm, node, next,
2888                                    to_next, n_left_to_next);
2889               to_next[0] = pi0;
2890               to_next += 1;
2891               n_left_to_next -= 1;
2892             }
2893         }
2894
2895       vlib_put_next_frame (vm, node, next, n_left_to_next);
2896     }
2897
2898   if (node->flags & VLIB_NODE_FLAG_TRACE)
2899       ip4_forward_next_trace(vm, node, frame, VLIB_TX);
2900
2901   return frame->n_vectors;
2902 }
2903
2904 VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = {
2905   .function = ip4_lookup_multicast,
2906   .name = "ip4-lookup-multicast",
2907   .vector_size = sizeof (u32),
2908   .sibling_of = "ip4-lookup",
2909   .format_trace = format_ip4_lookup_trace,
2910
2911   .n_next_nodes = 0,
2912 };
2913
2914 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_multicast_node, ip4_lookup_multicast)
2915
2916 VLIB_REGISTER_NODE (ip4_multicast_node,static) = {
2917   .function = ip4_drop,
2918   .name = "ip4-multicast",
2919   .vector_size = sizeof (u32),
2920
2921   .format_trace = format_ip4_forward_next_trace,
2922
2923   .n_next_nodes = 1,
2924   .next_nodes = {
2925     [0] = "error-drop",
2926   },
2927 };
2928
2929 int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0)
2930 {
2931   ip4_fib_mtrie_t * mtrie0;
2932   ip4_fib_mtrie_leaf_t leaf0;
2933   u32 lbi0;
2934     
2935   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2936
2937   leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
2938   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
2939   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
2940   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2941   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2942   
2943   /* Handle default route. */
2944   leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
2945   
2946   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2947   
2948   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0), a);
2949 }
2950  
2951 static clib_error_t *
2952 test_lookup_command_fn (vlib_main_t * vm,
2953                         unformat_input_t * input,
2954                         vlib_cli_command_t * cmd)
2955 {
2956   u32 table_id = 0;
2957   f64 count = 1;
2958   u32 n;
2959   int i;
2960   ip4_address_t ip4_base_address;
2961   u64 errors = 0;
2962
2963   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
2964       if (unformat (input, "table %d", &table_id))
2965         ;
2966       else if (unformat (input, "count %f", &count))
2967         ;
2968
2969       else if (unformat (input, "%U",
2970                          unformat_ip4_address, &ip4_base_address))
2971         ;
2972       else
2973         return clib_error_return (0, "unknown input `%U'",
2974                                   format_unformat_error, input);
2975   }
2976
2977   n = count;
2978
2979   for (i = 0; i < n; i++)
2980     {
2981       if (!ip4_lookup_validate (&ip4_base_address, table_id))
2982         errors++;
2983
2984       ip4_base_address.as_u32 = 
2985         clib_host_to_net_u32 (1 + 
2986                               clib_net_to_host_u32 (ip4_base_address.as_u32));
2987     }
2988
2989   if (errors) 
2990     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2991   else
2992     vlib_cli_output (vm, "No errors in %d lookups\n", n);
2993
2994   return 0;
2995 }
2996
2997 VLIB_CLI_COMMAND (lookup_test_command, static) = {
2998     .path = "test lookup",
2999     .short_help = "test lookup",
3000     .function = test_lookup_command_fn,
3001 };
3002
3003 int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3004 {
3005   ip4_main_t * im4 = &ip4_main;
3006   ip4_fib_t * fib;
3007   uword * p = hash_get (im4->fib_index_by_table_id, table_id);
3008
3009   if (p == 0)
3010     return VNET_API_ERROR_NO_SUCH_FIB;
3011
3012   fib = ip4_fib_get (p[0]);
3013
3014   fib->flow_hash_config = flow_hash_config;
3015   return 0;
3016 }
3017  
3018 static clib_error_t *
3019 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3020                              unformat_input_t * input,
3021                              vlib_cli_command_t * cmd)
3022 {
3023   int matched = 0;
3024   u32 table_id = 0;
3025   u32 flow_hash_config = 0;
3026   int rv;
3027
3028   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3029     if (unformat (input, "table %d", &table_id))
3030       matched = 1;
3031 #define _(a,v) \
3032     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3033     foreach_flow_hash_bit
3034 #undef _
3035     else break;
3036   }
3037   
3038   if (matched == 0)
3039     return clib_error_return (0, "unknown input `%U'",
3040                               format_unformat_error, input);
3041   
3042   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3043   switch (rv)
3044     {
3045     case 0:
3046       break;
3047       
3048     case VNET_API_ERROR_NO_SUCH_FIB:
3049       return clib_error_return (0, "no such FIB table %d", table_id);
3050       
3051     default:
3052       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3053       break;
3054     }
3055   
3056   return 0;
3057 }
3058  
3059 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
3060   .path = "set ip flow-hash",
3061   .short_help = 
3062   "set ip table flow-hash table <fib-id> src dst sport dport proto reverse",
3063   .function = set_ip_flow_hash_command_fn,
3064 };
3065  
3066 int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index, 
3067                                  u32 table_index)
3068 {
3069   vnet_main_t * vnm = vnet_get_main();
3070   vnet_interface_main_t * im = &vnm->interface_main;
3071   ip4_main_t * ipm = &ip4_main;
3072   ip_lookup_main_t * lm = &ipm->lookup_main;
3073   vnet_classify_main_t * cm = &vnet_classify_main;
3074   ip4_address_t *if_addr;
3075
3076   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3077     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3078
3079   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3080     return VNET_API_ERROR_NO_SUCH_ENTRY;
3081
3082   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3083   lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
3084
3085   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
3086
3087   if (NULL != if_addr)
3088   {
3089       fib_prefix_t pfx = {
3090           .fp_len = 32,
3091           .fp_proto = FIB_PROTOCOL_IP4,
3092           .fp_addr.ip4 = *if_addr,
3093       };
3094       u32 fib_index;
3095
3096       fib_index = fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
3097                                                       sw_if_index);
3098
3099
3100       if (table_index != (u32) ~0)
3101       {
3102           dpo_id_t dpo = DPO_NULL;
3103
3104           dpo_set(&dpo,
3105                   DPO_CLASSIFY,
3106                   DPO_PROTO_IP4,
3107                   classify_dpo_create(FIB_PROTOCOL_IP4,
3108                                       table_index));
3109
3110           fib_table_entry_special_dpo_add(fib_index,
3111                                           &pfx,
3112                                           FIB_SOURCE_CLASSIFY,
3113                                           FIB_ENTRY_FLAG_NONE,
3114                                           &dpo);
3115           dpo_reset(&dpo);
3116       }
3117       else
3118       {
3119           fib_table_entry_special_remove(fib_index,
3120                                          &pfx,
3121                                          FIB_SOURCE_CLASSIFY);
3122       }
3123   }
3124
3125   return 0;
3126 }
3127
3128 static clib_error_t *
3129 set_ip_classify_command_fn (vlib_main_t * vm,
3130                             unformat_input_t * input,
3131                             vlib_cli_command_t * cmd)
3132 {
3133   u32 table_index = ~0;
3134   int table_index_set = 0;
3135   u32 sw_if_index = ~0;
3136   int rv;
3137   
3138   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3139     if (unformat (input, "table-index %d", &table_index))
3140       table_index_set = 1;
3141     else if (unformat (input, "intfc %U", unformat_vnet_sw_interface, 
3142                        vnet_get_main(), &sw_if_index))
3143       ;
3144     else
3145       break;
3146   }
3147       
3148   if (table_index_set == 0)
3149     return clib_error_return (0, "classify table-index must be specified");
3150
3151   if (sw_if_index == ~0)
3152     return clib_error_return (0, "interface / subif must be specified");
3153
3154   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3155
3156   switch (rv)
3157     {
3158     case 0:
3159       break;
3160
3161     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3162       return clib_error_return (0, "No such interface");
3163
3164     case VNET_API_ERROR_NO_SUCH_ENTRY:
3165       return clib_error_return (0, "No such classifier table");
3166     }
3167   return 0;
3168 }
3169
3170 VLIB_CLI_COMMAND (set_ip_classify_command, static) = {
3171     .path = "set ip classify",
3172     .short_help = 
3173     "set ip classify intfc <int> table-index <index>",
3174     .function = set_ip_classify_command_fn,
3175 };
3176