VPP-491: Update CLI Command documentation for "show ip fib" and "show ip6 fib".
[vpp.git] / vnet / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h>     /* for API error numbers */
47 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
48 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_urpf_list.h> /* for FIB uRPF check */
50 #include <vnet/fib/ip4_fib.h>
51 #include <vnet/dpo/load_balance.h>
52 #include <vnet/dpo/classify_dpo.h>
53
54 /**
55  * @file
56  * @brief IPv4 Forwarding.
57  *
58  * This file contains the source code for IPv4 forwarding.
59  */
60
61 void
62 ip4_forward_next_trace (vlib_main_t * vm,
63                         vlib_node_runtime_t * node,
64                         vlib_frame_t * frame,
65                         vlib_rx_or_tx_t which_adj_index);
66
67 always_inline uword
68 ip4_lookup_inline (vlib_main_t * vm,
69                    vlib_node_runtime_t * node,
70                    vlib_frame_t * frame,
71                    int lookup_for_responses_to_locally_received_packets)
72 {
73   ip4_main_t * im = &ip4_main;
74   vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
75   u32 n_left_from, n_left_to_next, * from, * to_next;
76   ip_lookup_next_t next;
77   u32 cpu_index = os_get_cpu_number();
78
79   from = vlib_frame_vector_args (frame);
80   n_left_from = frame->n_vectors;
81   next = node->cached_next_index;
82
83   while (n_left_from > 0)
84     {
85       vlib_get_next_frame (vm, node, next,
86                            to_next, n_left_to_next);
87
88       while (n_left_from >= 4 && n_left_to_next >= 2)
89         {
90           vlib_buffer_t * p0, * p1;
91           ip4_header_t * ip0, * ip1;
92           __attribute__((unused)) tcp_header_t * tcp0, * tcp1;
93           ip_lookup_next_t next0, next1;
94           const load_balance_t * lb0, * lb1;
95           ip4_fib_mtrie_t * mtrie0, * mtrie1;
96           ip4_fib_mtrie_leaf_t leaf0, leaf1;
97           ip4_address_t * dst_addr0, *dst_addr1;
98           __attribute__((unused)) u32 pi0, fib_index0, lb_index0, is_tcp_udp0;
99           __attribute__((unused)) u32 pi1, fib_index1, lb_index1, is_tcp_udp1;
100           flow_hash_config_t flow_hash_config0, flow_hash_config1;
101           u32 hash_c0, hash_c1;
102           u32 wrong_next;
103           const dpo_id_t *dpo0, *dpo1;
104
105           /* Prefetch next iteration. */
106           {
107             vlib_buffer_t * p2, * p3;
108
109             p2 = vlib_get_buffer (vm, from[2]);
110             p3 = vlib_get_buffer (vm, from[3]);
111
112             vlib_prefetch_buffer_header (p2, LOAD);
113             vlib_prefetch_buffer_header (p3, LOAD);
114
115             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
116             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
117           }
118
119           pi0 = to_next[0] = from[0];
120           pi1 = to_next[1] = from[1];
121
122           p0 = vlib_get_buffer (vm, pi0);
123           p1 = vlib_get_buffer (vm, pi1);
124
125           ip0 = vlib_buffer_get_current (p0);
126           ip1 = vlib_buffer_get_current (p1);
127
128           dst_addr0 = &ip0->dst_address;
129           dst_addr1 = &ip1->dst_address;
130
131           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
132           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
133           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
134             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
135           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
136             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
137
138
139           if (! lookup_for_responses_to_locally_received_packets)
140             {
141               mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
142               mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
143
144               leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
145
146               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
147               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
148             }
149
150           tcp0 = (void *) (ip0 + 1);
151           tcp1 = (void *) (ip1 + 1);
152
153           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
154                          || ip0->protocol == IP_PROTOCOL_UDP);
155           is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
156                          || ip1->protocol == IP_PROTOCOL_UDP);
157
158           if (! lookup_for_responses_to_locally_received_packets)
159             {
160               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
161               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
162             }
163
164           if (! lookup_for_responses_to_locally_received_packets)
165             {
166               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
167               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
168             }
169
170           if (! lookup_for_responses_to_locally_received_packets)
171             {
172               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
173               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
174             }
175
176           if (lookup_for_responses_to_locally_received_packets)
177             {
178               lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
179               lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
180             }
181           else
182             {
183               /* Handle default route. */
184               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
185               leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
186
187               lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
188               lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
189             }
190
191           lb0 = load_balance_get (lb_index0);
192           lb1 = load_balance_get (lb_index1);
193
194           /* Use flow hash to compute multipath adjacency. */
195           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
196           hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
197           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
198             {
199               flow_hash_config0 = lb0->lb_hash_config;
200               hash_c0 = vnet_buffer (p0)->ip.flow_hash =
201                 ip4_compute_flow_hash (ip0, flow_hash_config0);
202             }
203           if (PREDICT_FALSE(lb1->lb_n_buckets > 1))
204             {
205               flow_hash_config1 = lb1->lb_hash_config;
206               hash_c1 = vnet_buffer (p1)->ip.flow_hash =
207                 ip4_compute_flow_hash (ip1, flow_hash_config1);
208             }
209
210           ASSERT (lb0->lb_n_buckets > 0);
211           ASSERT (is_pow2 (lb0->lb_n_buckets));
212           ASSERT (lb1->lb_n_buckets > 0);
213           ASSERT (is_pow2 (lb1->lb_n_buckets));
214
215           dpo0 = load_balance_get_bucket_i(lb0,
216                                            (hash_c0 &
217                                             (lb0->lb_n_buckets_minus_1)));
218           dpo1 = load_balance_get_bucket_i(lb1,
219                                            (hash_c1 &
220                                             (lb0->lb_n_buckets_minus_1)));
221
222           next0 = dpo0->dpoi_next_node;
223           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
224           next1 = dpo1->dpoi_next_node;
225           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
226
227           vlib_increment_combined_counter
228               (cm, cpu_index, lb_index0, 1,
229                vlib_buffer_length_in_chain (vm, p0)
230                + sizeof(ethernet_header_t));
231           vlib_increment_combined_counter
232               (cm, cpu_index, lb_index1, 1,
233                vlib_buffer_length_in_chain (vm, p1)
234                + sizeof(ethernet_header_t));
235
236           from += 2;
237           to_next += 2;
238           n_left_to_next -= 2;
239           n_left_from -= 2;
240
241           wrong_next = (next0 != next) + 2*(next1 != next);
242           if (PREDICT_FALSE (wrong_next != 0))
243             {
244               switch (wrong_next)
245                 {
246                 case 1:
247                   /* A B A */
248                   to_next[-2] = pi1;
249                   to_next -= 1;
250                   n_left_to_next += 1;
251                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
252                   break;
253
254                 case 2:
255                   /* A A B */
256                   to_next -= 1;
257                   n_left_to_next += 1;
258                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
259                   break;
260
261                 case 3:
262                   /* A B C */
263                   to_next -= 2;
264                   n_left_to_next += 2;
265                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
266                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
267                   if (next0 == next1)
268                     {
269                       /* A B B */
270                       vlib_put_next_frame (vm, node, next, n_left_to_next);
271                       next = next1;
272                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
273                     }
274                 }
275             }
276         }
277
278       while (n_left_from > 0 && n_left_to_next > 0)
279         {
280           vlib_buffer_t * p0;
281           ip4_header_t * ip0;
282           __attribute__((unused)) tcp_header_t * tcp0;
283           ip_lookup_next_t next0;
284           const load_balance_t *lb0;
285           ip4_fib_mtrie_t * mtrie0;
286           ip4_fib_mtrie_leaf_t leaf0;
287           ip4_address_t * dst_addr0;
288           __attribute__((unused)) u32 pi0, fib_index0, is_tcp_udp0, lbi0;
289           flow_hash_config_t flow_hash_config0;
290           const dpo_id_t *dpo0;
291           u32 hash_c0;
292
293           pi0 = from[0];
294           to_next[0] = pi0;
295
296           p0 = vlib_get_buffer (vm, pi0);
297
298           ip0 = vlib_buffer_get_current (p0);
299
300           dst_addr0 = &ip0->dst_address;
301
302           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
303           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
304             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
305
306           if (! lookup_for_responses_to_locally_received_packets)
307             {
308               mtrie0 = &ip4_fib_get( fib_index0)->mtrie;
309
310               leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
311
312               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
313             }
314
315           tcp0 = (void *) (ip0 + 1);
316
317           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
318                          || ip0->protocol == IP_PROTOCOL_UDP);
319
320           if (! lookup_for_responses_to_locally_received_packets)
321             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
322
323           if (! lookup_for_responses_to_locally_received_packets)
324             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
325
326           if (! lookup_for_responses_to_locally_received_packets)
327             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
328
329           if (lookup_for_responses_to_locally_received_packets)
330             lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
331           else
332             {
333               /* Handle default route. */
334               leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
335               lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
336             }
337
338           lb0 = load_balance_get (lbi0);
339
340           /* Use flow hash to compute multipath adjacency. */
341           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
342           if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
343             {
344               flow_hash_config0 = lb0->lb_hash_config;
345
346               hash_c0 = vnet_buffer (p0)->ip.flow_hash =
347                 ip4_compute_flow_hash (ip0, flow_hash_config0);
348             }
349
350           ASSERT (lb0->lb_n_buckets > 0);
351           ASSERT (is_pow2 (lb0->lb_n_buckets));
352
353           dpo0 = load_balance_get_bucket_i(lb0,
354                                            (hash_c0 &
355                                             (lb0->lb_n_buckets_minus_1)));
356
357           next0 = dpo0->dpoi_next_node;
358           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
359
360           vlib_increment_combined_counter
361               (cm, cpu_index, lbi0, 1,
362                vlib_buffer_length_in_chain (vm, p0));
363
364           from += 1;
365           to_next += 1;
366           n_left_to_next -= 1;
367           n_left_from -= 1;
368
369           if (PREDICT_FALSE (next0 != next))
370             {
371               n_left_to_next += 1;
372               vlib_put_next_frame (vm, node, next, n_left_to_next);
373               next = next0;
374               vlib_get_next_frame (vm, node, next,
375                                    to_next, n_left_to_next);
376               to_next[0] = pi0;
377               to_next += 1;
378               n_left_to_next -= 1;
379             }
380         }
381
382       vlib_put_next_frame (vm, node, next, n_left_to_next);
383     }
384
385   if (node->flags & VLIB_NODE_FLAG_TRACE)
386     ip4_forward_next_trace(vm, node, frame, VLIB_TX);
387
388   return frame->n_vectors;
389 }
390
391 /** @brief IPv4 lookup node.
392     @node ip4-lookup
393
394     This is the main IPv4 lookup dispatch node.
395
396     @param vm vlib_main_t corresponding to the current thread
397     @param node vlib_node_runtime_t
398     @param frame vlib_frame_t whose contents should be dispatched
399
400     @par Graph mechanics: buffer metadata, next index usage
401
402     @em Uses:
403     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
404         - Indicates the @c sw_if_index value of the interface that the
405           packet was received on.
406     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
407         - When the value is @c ~0 then the node performs a longest prefix
408           match (LPM) for the packet destination address in the FIB attached
409           to the receive interface.
410         - Otherwise perform LPM for the packet destination address in the
411           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
412           value (0, 1, ...) and not a VRF id.
413
414     @em Sets:
415     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
416         - The lookup result adjacency index.
417
418     <em>Next Index:</em>
419     - Dispatches the packet to the node index found in
420       ip_adjacency_t @c adj->lookup_next_index
421       (where @c adj is the lookup result adjacency).
422 */
423 static uword
424 ip4_lookup (vlib_main_t * vm,
425             vlib_node_runtime_t * node,
426             vlib_frame_t * frame)
427 {
428   return ip4_lookup_inline (vm, node, frame,
429                             /* lookup_for_responses_to_locally_received_packets */ 0);
430
431 }
432
433 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args);
434
435 VLIB_REGISTER_NODE (ip4_lookup_node) = {
436   .function = ip4_lookup,
437   .name = "ip4-lookup",
438   .vector_size = sizeof (u32),
439
440   .format_trace = format_ip4_lookup_trace,
441   .n_next_nodes = IP_LOOKUP_N_NEXT,
442   .next_nodes = IP4_LOOKUP_NEXT_NODES,
443 };
444
445 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup)
446
447 always_inline uword
448 ip4_load_balance (vlib_main_t * vm,
449                   vlib_node_runtime_t * node,
450                   vlib_frame_t * frame)
451 {
452   vlib_combined_counter_main_t * cm = &load_balance_main.lbm_via_counters;
453   u32 n_left_from, n_left_to_next, * from, * to_next;
454   ip_lookup_next_t next;
455   u32 cpu_index = os_get_cpu_number();
456
457   from = vlib_frame_vector_args (frame);
458   n_left_from = frame->n_vectors;
459   next = node->cached_next_index;
460
461   if (node->flags & VLIB_NODE_FLAG_TRACE)
462       ip4_forward_next_trace(vm, node, frame, VLIB_TX);
463
464   while (n_left_from > 0)
465     {
466       vlib_get_next_frame (vm, node, next,
467                            to_next, n_left_to_next);
468
469
470       while (n_left_from > 0 && n_left_to_next > 0)
471         {
472           ip_lookup_next_t next0;
473           const load_balance_t *lb0;
474           vlib_buffer_t * p0;
475           u32 pi0, lbi0, hc0;
476           const ip4_header_t *ip0;
477           const dpo_id_t *dpo0;
478
479           pi0 = from[0];
480           to_next[0] = pi0;
481
482           p0 = vlib_get_buffer (vm, pi0);
483
484           ip0 = vlib_buffer_get_current (p0);
485           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
486
487           lb0 = load_balance_get(lbi0);
488           hc0 = lb0->lb_hash_config;
489           vnet_buffer(p0)->ip.flow_hash = ip4_compute_flow_hash(ip0, hc0);
490
491           dpo0 = load_balance_get_bucket_i(lb0,
492                                            vnet_buffer(p0)->ip.flow_hash &
493                                            (lb0->lb_n_buckets_minus_1));
494
495           next0 = dpo0->dpoi_next_node;
496           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
497
498           vlib_increment_combined_counter
499               (cm, cpu_index, lbi0, 1,
500                vlib_buffer_length_in_chain (vm, p0));
501
502           from += 1;
503           to_next += 1;
504           n_left_to_next -= 1;
505           n_left_from -= 1;
506
507           if (PREDICT_FALSE (next0 != next))
508             {
509               n_left_to_next += 1;
510               vlib_put_next_frame (vm, node, next, n_left_to_next);
511               next = next0;
512               vlib_get_next_frame (vm, node, next,
513                                    to_next, n_left_to_next);
514               to_next[0] = pi0;
515               to_next += 1;
516               n_left_to_next -= 1;
517             }
518         }
519
520       vlib_put_next_frame (vm, node, next, n_left_to_next);
521     }
522
523   return frame->n_vectors;
524 }
525
526 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args);
527
528 VLIB_REGISTER_NODE (ip4_load_balance_node) = {
529   .function = ip4_load_balance,
530   .name = "ip4-load-balance",
531   .vector_size = sizeof (u32),
532   .sibling_of = "ip4-lookup",
533
534   .format_trace = format_ip4_forward_next_trace,
535 };
536
537 VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance)
538
539 /* get first interface address */
540 ip4_address_t *
541 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
542                              ip_interface_address_t ** result_ia)
543 {
544   ip_lookup_main_t * lm = &im->lookup_main;
545   ip_interface_address_t * ia = 0;
546   ip4_address_t * result = 0;
547
548   foreach_ip_interface_address (lm, ia, sw_if_index,
549                                 1 /* honor unnumbered */,
550   ({
551     ip4_address_t * a = ip_interface_address_get_address (lm, ia);
552     result = a;
553     break;
554   }));
555   if (result_ia)
556     *result_ia = result ? ia : 0;
557   return result;
558 }
559
560 static void
561 ip4_add_interface_routes (u32 sw_if_index,
562                           ip4_main_t * im, u32 fib_index,
563                           ip_interface_address_t * a)
564 {
565   ip_lookup_main_t * lm = &im->lookup_main;
566   ip4_address_t * address = ip_interface_address_get_address (lm, a);
567   fib_prefix_t pfx = {
568       .fp_len = a->address_length,
569       .fp_proto = FIB_PROTOCOL_IP4,
570       .fp_addr.ip4 = *address,
571   };
572
573   a->neighbor_probe_adj_index = ~0;
574
575   if (pfx.fp_len < 32)
576   {
577       fib_node_index_t fei;
578
579       fei = fib_table_entry_update_one_path(fib_index,
580                                             &pfx,
581                                             FIB_SOURCE_INTERFACE,
582                                             (FIB_ENTRY_FLAG_CONNECTED |
583                                              FIB_ENTRY_FLAG_ATTACHED),
584                                             FIB_PROTOCOL_IP4,
585                                             NULL, /* No next-hop address */
586                                             sw_if_index,
587                                             ~0, // invalid FIB index
588                                             1,
589                                             MPLS_LABEL_INVALID,
590                                             FIB_ROUTE_PATH_FLAG_NONE);
591       a->neighbor_probe_adj_index = fib_entry_get_adj(fei);
592   }
593
594   pfx.fp_len = 32;
595
596   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
597   {
598       u32 classify_table_index =
599           lm->classify_table_index_by_sw_if_index [sw_if_index];
600       if (classify_table_index != (u32) ~0)
601       {
602           dpo_id_t dpo = DPO_NULL;
603
604           dpo_set(&dpo,
605                   DPO_CLASSIFY,
606                   DPO_PROTO_IP4,
607                   classify_dpo_create(FIB_PROTOCOL_IP4,
608                                       classify_table_index));
609
610           fib_table_entry_special_dpo_add(fib_index,
611                                           &pfx,
612                                           FIB_SOURCE_CLASSIFY,
613                                           FIB_ENTRY_FLAG_NONE,
614                                           &dpo);
615           dpo_reset(&dpo);
616       }
617   }
618
619   fib_table_entry_update_one_path(fib_index,
620                                   &pfx,
621                                   FIB_SOURCE_INTERFACE,
622                                   (FIB_ENTRY_FLAG_CONNECTED |
623                                    FIB_ENTRY_FLAG_LOCAL),
624                                   FIB_PROTOCOL_IP4,
625                                   &pfx.fp_addr,
626                                   sw_if_index,
627                                   ~0, // invalid FIB index
628                                   1,
629                                   MPLS_LABEL_INVALID,
630                                   FIB_ROUTE_PATH_FLAG_NONE);
631 }
632
633 static void
634 ip4_del_interface_routes (ip4_main_t * im,
635                           u32 fib_index,
636                           ip4_address_t * address,
637                           u32 address_length)
638 {
639     fib_prefix_t pfx = {
640         .fp_len = address_length,
641         .fp_proto = FIB_PROTOCOL_IP4,
642         .fp_addr.ip4 = *address,
643     };
644
645     if (pfx.fp_len < 32)
646     {
647         fib_table_entry_delete(fib_index,
648                                &pfx,
649                                FIB_SOURCE_INTERFACE);
650     }
651
652     pfx.fp_len = 32;
653     fib_table_entry_delete(fib_index,
654                            &pfx,
655                            FIB_SOURCE_INTERFACE);
656 }
657
658 void
659 ip4_sw_interface_enable_disable (u32 sw_if_index,
660                                  u32 is_enable)
661 {
662   vlib_main_t * vm = vlib_get_main();
663   ip4_main_t * im = &ip4_main;
664   ip_lookup_main_t * lm = &im->lookup_main;
665   u32 ci, cast;
666   u32 lookup_feature_index;
667
668   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
669
670   /*
671    * enable/disable only on the 1<->0 transition
672    */
673   if (is_enable)
674     {
675       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
676         return;
677     }
678   else
679     {
680       ASSERT(im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
681       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
682         return;
683     }
684
685   for (cast = 0; cast <= VNET_IP_RX_MULTICAST_FEAT; cast++)
686     {
687       ip_config_main_t * cm = &lm->feature_config_mains[cast];
688       vnet_config_main_t * vcm = &cm->config_main;
689
690       vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
691       ci = cm->config_index_by_sw_if_index[sw_if_index];
692
693       if (cast == VNET_IP_RX_UNICAST_FEAT)
694         lookup_feature_index = im->ip4_unicast_rx_feature_lookup;
695       else
696         lookup_feature_index = im->ip4_multicast_rx_feature_lookup;
697
698       if (is_enable)
699         ci = vnet_config_add_feature (vm, vcm,
700                                       ci,
701                                       lookup_feature_index,
702                                       /* config data */ 0,
703                                       /* # bytes of config data */ 0);
704       else
705         ci = vnet_config_del_feature (vm, vcm,
706                                       ci,
707                                       lookup_feature_index,
708                                       /* config data */ 0,
709                                       /* # bytes of config data */ 0);
710       cm->config_index_by_sw_if_index[sw_if_index] = ci;
711     }
712 }
713
714 static clib_error_t *
715 ip4_add_del_interface_address_internal (vlib_main_t * vm,
716                                         u32 sw_if_index,
717                                         ip4_address_t * address,
718                                         u32 address_length,
719                                         u32 is_del)
720 {
721   vnet_main_t * vnm = vnet_get_main();
722   ip4_main_t * im = &ip4_main;
723   ip_lookup_main_t * lm = &im->lookup_main;
724   clib_error_t * error = 0;
725   u32 if_address_index, elts_before;
726   ip4_address_fib_t ip4_af, * addr_fib = 0;
727
728   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
729   ip4_addr_fib_init (&ip4_af, address,
730                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
731   vec_add1 (addr_fib, ip4_af);
732
733   /* FIXME-LATER
734    * there is no support for adj-fib handling in the presence of overlapping
735    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
736    * most routers do.
737    */
738   if (! is_del)
739     {
740       /* When adding an address check that it does not conflict
741          with an existing address. */
742       ip_interface_address_t * ia;
743       foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
744                                     0 /* honor unnumbered */,
745       ({
746         ip4_address_t * x = ip_interface_address_get_address (&im->lookup_main, ia);
747
748         if (ip4_destination_matches_route (im, address, x, ia->address_length)
749             || ip4_destination_matches_route (im, x, address, address_length))
750           return clib_error_create ("failed to add %U which conflicts with %U for interface %U",
751                                     format_ip4_address_and_length, address, address_length,
752                                     format_ip4_address_and_length, x, ia->address_length,
753                                     format_vnet_sw_if_index_name, vnm, sw_if_index);
754        }));
755     }
756
757   elts_before = pool_elts (lm->if_address_pool);
758
759   error = ip_interface_address_add_del
760     (lm,
761      sw_if_index,
762      addr_fib,
763      address_length,
764      is_del,
765      &if_address_index);
766   if (error)
767     goto done;
768
769   ip4_sw_interface_enable_disable(sw_if_index, !is_del);
770
771   if (is_del)
772       ip4_del_interface_routes (im, ip4_af.fib_index, address,
773                                 address_length);
774   else
775       ip4_add_interface_routes (sw_if_index,
776                                 im, ip4_af.fib_index,
777                                 pool_elt_at_index
778                                 (lm->if_address_pool, if_address_index));
779
780   /* If pool did not grow/shrink: add duplicate address. */
781   if (elts_before != pool_elts (lm->if_address_pool))
782     {
783       ip4_add_del_interface_address_callback_t * cb;
784       vec_foreach (cb, im->add_del_interface_address_callbacks)
785         cb->function (im, cb->function_opaque, sw_if_index,
786                       address, address_length,
787                       if_address_index,
788                       is_del);
789     }
790
791  done:
792   vec_free (addr_fib);
793   return error;
794 }
795
796 clib_error_t *
797 ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
798                                ip4_address_t * address, u32 address_length,
799                                u32 is_del)
800 {
801   return ip4_add_del_interface_address_internal
802     (vm, sw_if_index, address, address_length,
803      is_del);
804 }
805
806 /* Built-in ip4 unicast rx feature path definition */
807 VNET_IP4_UNICAST_FEATURE_INIT (ip4_flow_classify, static) = {
808   .node_name = "ip4-flow-classify",
809   .runs_before = ORDER_CONSTRAINTS {"ip4-inacl", 0},
810   .feature_index = &ip4_main.ip4_unicast_rx_feature_flow_classify,
811 };
812
813 VNET_IP4_UNICAST_FEATURE_INIT (ip4_inacl, static) = {
814   .node_name = "ip4-inacl",
815   .runs_before = ORDER_CONSTRAINTS {"ip4-source-check-via-rx", 0},
816   .feature_index = &ip4_main.ip4_unicast_rx_feature_check_access,
817 };
818
819 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_1, static) = {
820   .node_name = "ip4-source-check-via-rx",
821   .runs_before = ORDER_CONSTRAINTS {"ip4-source-check-via-any", 0},
822   .feature_index =
823   &ip4_main.ip4_unicast_rx_feature_source_reachable_via_rx,
824 };
825
826 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_2, static) = {
827   .node_name = "ip4-source-check-via-any",
828   .runs_before = ORDER_CONSTRAINTS {"ip4-policer-classify", 0},
829   .feature_index =
830   &ip4_main.ip4_unicast_rx_feature_source_reachable_via_any,
831 };
832
833 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) = {
834   .node_name = "ip4-source-and-port-range-check-rx",
835   .runs_before = ORDER_CONSTRAINTS {"ip4-policer-classify", 0},
836   .feature_index =
837   &ip4_main.ip4_unicast_rx_feature_source_and_port_range_check,
838 };
839
840 VNET_IP4_UNICAST_FEATURE_INIT (ip4_policer_classify, static) = {
841   .node_name = "ip4-policer-classify",
842   .runs_before = ORDER_CONSTRAINTS {"ipsec-input-ip4", 0},
843   .feature_index =
844   &ip4_main.ip4_unicast_rx_feature_policer_classify,
845 };
846
847 VNET_IP4_UNICAST_FEATURE_INIT (ip4_ipsec, static) = {
848   .node_name = "ipsec-input-ip4",
849   .runs_before = ORDER_CONSTRAINTS {"vpath-input-ip4", 0},
850   .feature_index = &ip4_main.ip4_unicast_rx_feature_ipsec,
851 };
852
853 VNET_IP4_UNICAST_FEATURE_INIT (ip4_vpath, static) = {
854   .node_name = "vpath-input-ip4",
855   .runs_before = ORDER_CONSTRAINTS {"ip4-lookup", 0},
856   .feature_index = &ip4_main.ip4_unicast_rx_feature_vpath,
857 };
858
859 VNET_IP4_UNICAST_FEATURE_INIT (ip4_lookup, static) = {
860   .node_name = "ip4-lookup",
861   .runs_before = ORDER_CONSTRAINTS {"ip4-drop", 0},
862   .feature_index = &ip4_main.ip4_unicast_rx_feature_lookup,
863 };
864
865 VNET_IP4_UNICAST_FEATURE_INIT (ip4_drop, static) = {
866   .node_name = "ip4-drop",
867   .runs_before = 0, /* not before any other features */
868   .feature_index = &ip4_main.ip4_unicast_rx_feature_drop,
869 };
870
871
872 /* Built-in ip4 multicast rx feature path definition */
873 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_vpath_mc, static) = {
874   .node_name = "vpath-input-ip4",
875   .runs_before = ORDER_CONSTRAINTS {"ip4-lookup-multicast", 0},
876   .feature_index = &ip4_main.ip4_multicast_rx_feature_vpath,
877 };
878
879 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_lookup_mc, static) = {
880   .node_name = "ip4-lookup-multicast",
881   .runs_before = ORDER_CONSTRAINTS {"ip4-drop", 0},
882   .feature_index = &ip4_main.ip4_multicast_rx_feature_lookup,
883 };
884
885 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_mc_drop, static) = {
886   .node_name = "ip4-drop",
887   .runs_before = 0, /* last feature */
888   .feature_index = &ip4_main.ip4_multicast_rx_feature_drop,
889 };
890
891 static char * rx_feature_start_nodes[] =
892   { "ip4-input", "ip4-input-no-checksum"};
893
894 static char * tx_feature_start_nodes[] =
895 {
896   "ip4-rewrite-transit",
897   "ip4-midchain",
898 };
899
900 /* Source and port-range check ip4 tx feature path definition */
901 VNET_IP4_TX_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) = {
902   .node_name = "ip4-source-and-port-range-check-tx",
903   .runs_before = ORDER_CONSTRAINTS {"interface-output", 0},
904   .feature_index =
905   &ip4_main.ip4_unicast_tx_feature_source_and_port_range_check,
906
907 };
908
909 /* Built-in ip4 tx feature path definition */
910 VNET_IP4_TX_FEATURE_INIT (interface_output, static) = {
911   .node_name = "interface-output",
912   .runs_before = 0, /* not before any other features */
913   .feature_index = &ip4_main.ip4_tx_feature_interface_output,
914 };
915
916 static clib_error_t *
917 ip4_feature_init (vlib_main_t * vm, ip4_main_t * im)
918 {
919   ip_lookup_main_t * lm = &im->lookup_main;
920   clib_error_t * error;
921   vnet_cast_t cast;
922   ip_config_main_t * cm;
923   vnet_config_main_t * vcm;
924   char **feature_start_nodes;
925   int feature_start_len;
926
927   for (cast = 0; cast < VNET_N_IP_FEAT; cast++)
928     {
929       cm = &lm->feature_config_mains[cast];
930       vcm = &cm->config_main;
931
932       if (cast < VNET_IP_TX_FEAT)
933         {
934           feature_start_nodes = rx_feature_start_nodes;
935           feature_start_len = ARRAY_LEN(rx_feature_start_nodes);
936         }
937       else
938         {
939           feature_start_nodes = tx_feature_start_nodes;
940           feature_start_len = ARRAY_LEN(tx_feature_start_nodes);
941         }
942
943       if ((error = vnet_feature_arc_init (vm, vcm,
944                                          feature_start_nodes,
945                                          feature_start_len,
946                                          im->next_feature[cast],
947                                          &im->feature_nodes[cast])))
948         return error;
949     }
950
951   return 0;
952 }
953
954 static clib_error_t *
955 ip4_sw_interface_add_del (vnet_main_t * vnm,
956                           u32 sw_if_index,
957                           u32 is_add)
958 {
959   vlib_main_t * vm = vnm->vlib_main;
960   ip4_main_t * im = &ip4_main;
961   ip_lookup_main_t * lm = &im->lookup_main;
962   u32 ci, cast;
963   u32 feature_index;
964
965   /* Fill in lookup tables with default table (0). */
966   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
967
968   for (cast = 0; cast < VNET_N_IP_FEAT; cast++)
969     {
970       ip_config_main_t * cm = &lm->feature_config_mains[cast];
971       vnet_config_main_t * vcm = &cm->config_main;
972
973       vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
974       ci = cm->config_index_by_sw_if_index[sw_if_index];
975
976       if (cast == VNET_IP_RX_UNICAST_FEAT)
977         feature_index = im->ip4_unicast_rx_feature_drop;
978       else if (cast == VNET_IP_RX_MULTICAST_FEAT)
979         feature_index = im->ip4_multicast_rx_feature_drop;
980       else
981         feature_index = im->ip4_tx_feature_interface_output;
982
983       if (is_add)
984         ci = vnet_config_add_feature (vm, vcm,
985                                       ci,
986                                       feature_index,
987                                       /* config data */ 0,
988                                       /* # bytes of config data */ 0);
989       else
990         {
991           ci = vnet_config_del_feature (vm, vcm, ci,
992                                         feature_index,
993                                         /* config data */ 0,
994                                         /* # bytes of config data */ 0);
995           if (vec_len(im->ip_enabled_by_sw_if_index) > sw_if_index)
996               im->ip_enabled_by_sw_if_index[sw_if_index] = 0;
997         }
998       cm->config_index_by_sw_if_index[sw_if_index] = ci;
999       /*
1000        * note: do not update the tx feature count here.
1001        */
1002     }
1003
1004   return /* no error */ 0;
1005 }
1006
1007 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1008
1009 /* Global IP4 main. */
1010 ip4_main_t ip4_main;
1011
1012 clib_error_t *
1013 ip4_lookup_init (vlib_main_t * vm)
1014 {
1015   ip4_main_t * im = &ip4_main;
1016   clib_error_t * error;
1017   uword i;
1018
1019   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1020     {
1021       u32 m;
1022
1023       if (i < 32)
1024         m = pow2_mask (i) << (32 - i);
1025       else
1026         m = ~0;
1027       im->fib_masks[i] = clib_host_to_net_u32 (m);
1028     }
1029
1030   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1031
1032   /* Create FIB with index 0 and table id of 0. */
1033   fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 0);
1034
1035   {
1036     pg_node_t * pn;
1037     pn = pg_get_node (ip4_lookup_node.index);
1038     pn->unformat_edit = unformat_pg_ip4_header;
1039   }
1040
1041   {
1042     ethernet_arp_header_t h;
1043
1044     memset (&h, 0, sizeof (h));
1045
1046     /* Set target ethernet address to all zeros. */
1047     memset (h.ip4_over_ethernet[1].ethernet, 0, sizeof (h.ip4_over_ethernet[1].ethernet));
1048
1049 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1050 #define _8(f,v) h.f = v;
1051     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1052     _16 (l3_type, ETHERNET_TYPE_IP4);
1053     _8 (n_l2_address_bytes, 6);
1054     _8 (n_l3_address_bytes, 4);
1055     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1056 #undef _16
1057 #undef _8
1058
1059     vlib_packet_template_init (vm,
1060                                &im->ip4_arp_request_packet_template,
1061                                /* data */ &h,
1062                                sizeof (h),
1063                                /* alloc chunk size */ 8,
1064                                "ip4 arp");
1065   }
1066
1067   error = ip4_feature_init (vm, im);
1068
1069   return error;
1070 }
1071
1072 VLIB_INIT_FUNCTION (ip4_lookup_init);
1073
1074 typedef struct {
1075   /* Adjacency taken. */
1076   u32 dpo_index;
1077   u32 flow_hash;
1078   u32 fib_index;
1079
1080   /* Packet data, possibly *after* rewrite. */
1081   u8 packet_data[64 - 1*sizeof(u32)];
1082 } ip4_forward_next_trace_t;
1083
1084 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args)
1085 {
1086   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1087   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1088   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1089   uword indent = format_get_indent (s);
1090   s = format (s, "%U%U",
1091               format_white_space, indent,
1092               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1093   return s;
1094 }
1095
1096 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args)
1097 {
1098   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1099   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1100   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1101   uword indent = format_get_indent (s);
1102
1103   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1104               t->fib_index, t->dpo_index, t->flow_hash);
1105   s = format (s, "\n%U%U",
1106               format_white_space, indent,
1107               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1108   return s;
1109 }
1110
1111 static u8 * format_ip4_rewrite_trace (u8 * s, va_list * args)
1112 {
1113   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1114   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1115   ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1116   vnet_main_t * vnm = vnet_get_main();
1117   uword indent = format_get_indent (s);
1118
1119   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1120               t->fib_index, t->dpo_index, format_ip_adjacency,
1121               t->dpo_index, FORMAT_IP_ADJACENCY_NONE,
1122               t->flow_hash);
1123   s = format (s, "\n%U%U",
1124               format_white_space, indent,
1125               format_ip_adjacency_packet_data,
1126               vnm, t->dpo_index,
1127               t->packet_data, sizeof (t->packet_data));
1128   return s;
1129 }
1130
1131 /* Common trace function for all ip4-forward next nodes. */
1132 void
1133 ip4_forward_next_trace (vlib_main_t * vm,
1134                         vlib_node_runtime_t * node,
1135                         vlib_frame_t * frame,
1136                         vlib_rx_or_tx_t which_adj_index)
1137 {
1138   u32 * from, n_left;
1139   ip4_main_t * im = &ip4_main;
1140
1141   n_left = frame->n_vectors;
1142   from = vlib_frame_vector_args (frame);
1143
1144   while (n_left >= 4)
1145     {
1146       u32 bi0, bi1;
1147       vlib_buffer_t * b0, * b1;
1148       ip4_forward_next_trace_t * t0, * t1;
1149
1150       /* Prefetch next iteration. */
1151       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1152       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1153
1154       bi0 = from[0];
1155       bi1 = from[1];
1156
1157       b0 = vlib_get_buffer (vm, bi0);
1158       b1 = vlib_get_buffer (vm, bi1);
1159
1160       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1161         {
1162           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1163           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1164           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1165           t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1166               vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1167               vec_elt (im->fib_index_by_sw_if_index,
1168                        vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1169
1170           clib_memcpy (t0->packet_data,
1171                   vlib_buffer_get_current (b0),
1172                   sizeof (t0->packet_data));
1173         }
1174       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1175         {
1176           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1177           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1178           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1179           t1->fib_index = (vnet_buffer(b1)->sw_if_index[VLIB_TX] != (u32)~0) ?
1180               vnet_buffer(b1)->sw_if_index[VLIB_TX] :
1181               vec_elt (im->fib_index_by_sw_if_index,
1182                        vnet_buffer(b1)->sw_if_index[VLIB_RX]);
1183           clib_memcpy (t1->packet_data,
1184                   vlib_buffer_get_current (b1),
1185                   sizeof (t1->packet_data));
1186         }
1187       from += 2;
1188       n_left -= 2;
1189     }
1190
1191   while (n_left >= 1)
1192     {
1193       u32 bi0;
1194       vlib_buffer_t * b0;
1195       ip4_forward_next_trace_t * t0;
1196
1197       bi0 = from[0];
1198
1199       b0 = vlib_get_buffer (vm, bi0);
1200
1201       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1202         {
1203           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1204           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1205           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1206           t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1207               vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1208               vec_elt (im->fib_index_by_sw_if_index,
1209                        vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1210           clib_memcpy (t0->packet_data,
1211                   vlib_buffer_get_current (b0),
1212                   sizeof (t0->packet_data));
1213         }
1214       from += 1;
1215       n_left -= 1;
1216     }
1217 }
1218
1219 static uword
1220 ip4_drop_or_punt (vlib_main_t * vm,
1221                   vlib_node_runtime_t * node,
1222                   vlib_frame_t * frame,
1223                   ip4_error_t error_code)
1224 {
1225   u32 * buffers = vlib_frame_vector_args (frame);
1226   uword n_packets = frame->n_vectors;
1227
1228   vlib_error_drop_buffers (vm, node,
1229                            buffers,
1230                            /* stride */ 1,
1231                            n_packets,
1232                            /* next */ 0,
1233                            ip4_input_node.index,
1234                            error_code);
1235
1236   if (node->flags & VLIB_NODE_FLAG_TRACE)
1237     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1238
1239   return n_packets;
1240 }
1241
1242 static uword
1243 ip4_drop (vlib_main_t * vm,
1244           vlib_node_runtime_t * node,
1245           vlib_frame_t * frame)
1246 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP); }
1247
1248 static uword
1249 ip4_punt (vlib_main_t * vm,
1250           vlib_node_runtime_t * node,
1251           vlib_frame_t * frame)
1252 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); }
1253
1254 VLIB_REGISTER_NODE (ip4_drop_node,static) = {
1255   .function = ip4_drop,
1256   .name = "ip4-drop",
1257   .vector_size = sizeof (u32),
1258
1259   .format_trace = format_ip4_forward_next_trace,
1260
1261   .n_next_nodes = 1,
1262   .next_nodes = {
1263     [0] = "error-drop",
1264   },
1265 };
1266
1267 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop)
1268
1269 VLIB_REGISTER_NODE (ip4_punt_node,static) = {
1270   .function = ip4_punt,
1271   .name = "ip4-punt",
1272   .vector_size = sizeof (u32),
1273
1274   .format_trace = format_ip4_forward_next_trace,
1275
1276   .n_next_nodes = 1,
1277   .next_nodes = {
1278     [0] = "error-punt",
1279   },
1280 };
1281
1282 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt)
1283
1284 /* Compute TCP/UDP/ICMP4 checksum in software. */
1285 u16
1286 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1287                               ip4_header_t * ip0)
1288 {
1289   ip_csum_t sum0;
1290   u32 ip_header_length, payload_length_host_byte_order;
1291   u32 n_this_buffer, n_bytes_left;
1292   u16 sum16;
1293   void * data_this_buffer;
1294
1295   /* Initialize checksum with ip header. */
1296   ip_header_length = ip4_header_bytes (ip0);
1297   payload_length_host_byte_order = clib_net_to_host_u16 (ip0->length) - ip_header_length;
1298   sum0 = clib_host_to_net_u32 (payload_length_host_byte_order + (ip0->protocol << 16));
1299
1300   if (BITS (uword) == 32)
1301     {
1302       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u32));
1303       sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32));
1304     }
1305   else
1306     sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1307
1308   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1309   data_this_buffer = (void *) ip0 + ip_header_length;
1310   if (n_this_buffer + ip_header_length > p0->current_length)
1311     n_this_buffer = p0->current_length > ip_header_length ? p0->current_length - ip_header_length : 0;
1312   while (1)
1313     {
1314       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1315       n_bytes_left -= n_this_buffer;
1316       if (n_bytes_left == 0)
1317         break;
1318
1319       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1320       p0 = vlib_get_buffer (vm, p0->next_buffer);
1321       data_this_buffer = vlib_buffer_get_current (p0);
1322       n_this_buffer = p0->current_length;
1323     }
1324
1325   sum16 = ~ ip_csum_fold (sum0);
1326
1327   return sum16;
1328 }
1329
1330 static u32
1331 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1332 {
1333   ip4_header_t * ip0 = vlib_buffer_get_current (p0);
1334   udp_header_t * udp0;
1335   u16 sum16;
1336
1337   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1338           || ip0->protocol == IP_PROTOCOL_UDP);
1339
1340   udp0 = (void *) (ip0 + 1);
1341   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1342     {
1343       p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1344                     | IP_BUFFER_L4_CHECKSUM_CORRECT);
1345       return p0->flags;
1346     }
1347
1348   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1349
1350   p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1351                 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1352
1353   return p0->flags;
1354 }
1355
1356 static uword
1357 ip4_local (vlib_main_t * vm,
1358            vlib_node_runtime_t * node,
1359            vlib_frame_t * frame)
1360 {
1361   ip4_main_t * im = &ip4_main;
1362   ip_lookup_main_t * lm = &im->lookup_main;
1363   ip_local_next_t next_index;
1364   u32 * from, * to_next, n_left_from, n_left_to_next;
1365   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
1366
1367   from = vlib_frame_vector_args (frame);
1368   n_left_from = frame->n_vectors;
1369   next_index = node->cached_next_index;
1370
1371   if (node->flags & VLIB_NODE_FLAG_TRACE)
1372     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1373
1374   while (n_left_from > 0)
1375     {
1376       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1377
1378       while (n_left_from >= 4 && n_left_to_next >= 2)
1379         {
1380           vlib_buffer_t * p0, * p1;
1381           ip4_header_t * ip0, * ip1;
1382           udp_header_t * udp0, * udp1;
1383           ip4_fib_mtrie_t * mtrie0, * mtrie1;
1384           ip4_fib_mtrie_leaf_t leaf0, leaf1;
1385           const dpo_id_t *dpo0, *dpo1;
1386           const load_balance_t *lb0, *lb1;
1387           u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, lbi0;
1388           u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, lbi1;
1389           i32 len_diff0, len_diff1;
1390           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1391           u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1392           u8 enqueue_code;
1393
1394           pi0 = to_next[0] = from[0];
1395           pi1 = to_next[1] = from[1];
1396           from += 2;
1397           n_left_from -= 2;
1398           to_next += 2;
1399           n_left_to_next -= 2;
1400
1401           p0 = vlib_get_buffer (vm, pi0);
1402           p1 = vlib_get_buffer (vm, pi1);
1403
1404           ip0 = vlib_buffer_get_current (p0);
1405           ip1 = vlib_buffer_get_current (p1);
1406
1407           fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
1408                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1409           fib_index1 = vec_elt (im->fib_index_by_sw_if_index,
1410                                 vnet_buffer(p1)->sw_if_index[VLIB_RX]);
1411
1412           mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1413           mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
1414
1415           leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
1416
1417           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1418           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
1419
1420           /* Treat IP frag packets as "experimental" protocol for now
1421              until support of IP frag reassembly is implemented */
1422           proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
1423           proto1 = ip4_is_fragment(ip1) ? 0xfe : ip1->protocol;
1424           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1425           is_udp1 = proto1 == IP_PROTOCOL_UDP;
1426           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1427           is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1428
1429           flags0 = p0->flags;
1430           flags1 = p1->flags;
1431
1432           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1433           good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1434
1435           udp0 = ip4_next_header (ip0);
1436           udp1 = ip4_next_header (ip1);
1437
1438           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1439           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1440           good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1441
1442           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1443           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
1444
1445           /* Verify UDP length. */
1446           ip_len0 = clib_net_to_host_u16 (ip0->length);
1447           ip_len1 = clib_net_to_host_u16 (ip1->length);
1448           udp_len0 = clib_net_to_host_u16 (udp0->length);
1449           udp_len1 = clib_net_to_host_u16 (udp1->length);
1450
1451           len_diff0 = ip_len0 - udp_len0;
1452           len_diff1 = ip_len1 - udp_len1;
1453
1454           len_diff0 = is_udp0 ? len_diff0 : 0;
1455           len_diff1 = is_udp1 ? len_diff1 : 0;
1456
1457           if (PREDICT_FALSE (! (is_tcp_udp0 & is_tcp_udp1
1458                                 & good_tcp_udp0 & good_tcp_udp1)))
1459             {
1460               if (is_tcp_udp0)
1461                 {
1462                   if (is_tcp_udp0
1463                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1464                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1465                   good_tcp_udp0 =
1466                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1467                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1468                 }
1469               if (is_tcp_udp1)
1470                 {
1471                   if (is_tcp_udp1
1472                       && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1473                     flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
1474                   good_tcp_udp1 =
1475                     (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1476                   good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1477                 }
1478             }
1479
1480           good_tcp_udp0 &= len_diff0 >= 0;
1481           good_tcp_udp1 &= len_diff1 >= 0;
1482
1483           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1484           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
1485
1486           error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1487
1488           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1489           error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
1490
1491           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1492           error0 = (is_tcp_udp0 && ! good_tcp_udp0
1493                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1494                     : error0);
1495           error1 = (is_tcp_udp1 && ! good_tcp_udp1
1496                     ? IP4_ERROR_TCP_CHECKSUM + is_udp1
1497                     : error1);
1498
1499           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1500           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
1501           leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1502           leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
1503
1504           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1505           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1506
1507           vnet_buffer (p1)->ip.adj_index[VLIB_RX] = lbi1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1508           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = lbi1;
1509
1510           lb0 = load_balance_get(lbi0);
1511           lb1 = load_balance_get(lbi1);
1512           dpo0 = load_balance_get_bucket_i(lb0, 0);
1513           dpo1 = load_balance_get_bucket_i(lb1, 0);
1514
1515           /*
1516            * Must have a route to source otherwise we drop the packet.
1517            * ip4 broadcasts are accepted, e.g. to make dhcp client work
1518            *
1519            * The checks are:
1520            *  - the source is a recieve => it's from us => bogus, do this
1521            *    first since it sets a different error code.
1522            *  - uRPF check for any route to source - accept if passes.
1523            *  - allow packets destined to the broadcast address from unknown sources
1524            */
1525           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1526                      dpo0->dpoi_type == DPO_RECEIVE) ?
1527                     IP4_ERROR_SPOOFED_LOCAL_PACKETS :
1528                     error0);
1529           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1530                      !fib_urpf_check_size(lb0->lb_urpf) &&
1531                      ip0->dst_address.as_u32 != 0xFFFFFFFF)
1532                     ? IP4_ERROR_SRC_LOOKUP_MISS
1533                     : error0);
1534           error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1535                      dpo1->dpoi_type == DPO_RECEIVE) ?
1536                     IP4_ERROR_SPOOFED_LOCAL_PACKETS :
1537                     error1);
1538           error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1539                      !fib_urpf_check_size(lb1->lb_urpf) &&
1540                      ip1->dst_address.as_u32 != 0xFFFFFFFF)
1541                     ? IP4_ERROR_SRC_LOOKUP_MISS
1542                     : error1);
1543
1544           next0 = lm->local_next_by_ip_protocol[proto0];
1545           next1 = lm->local_next_by_ip_protocol[proto1];
1546
1547           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1548           next1 = error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1549
1550           p0->error = error0 ? error_node->errors[error0] : 0;
1551           p1->error = error1 ? error_node->errors[error1] : 0;
1552
1553           enqueue_code = (next0 != next_index) + 2*(next1 != next_index);
1554
1555           if (PREDICT_FALSE (enqueue_code != 0))
1556             {
1557               switch (enqueue_code)
1558                 {
1559                 case 1:
1560                   /* A B A */
1561                   to_next[-2] = pi1;
1562                   to_next -= 1;
1563                   n_left_to_next += 1;
1564                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
1565                   break;
1566
1567                 case 2:
1568                   /* A A B */
1569                   to_next -= 1;
1570                   n_left_to_next += 1;
1571                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
1572                   break;
1573
1574                 case 3:
1575                   /* A B B or A B C */
1576                   to_next -= 2;
1577                   n_left_to_next += 2;
1578                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
1579                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
1580                   if (next0 == next1)
1581                     {
1582                       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1583                       next_index = next1;
1584                       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1585                     }
1586                   break;
1587                 }
1588             }
1589         }
1590
1591       while (n_left_from > 0 && n_left_to_next > 0)
1592         {
1593           vlib_buffer_t * p0;
1594           ip4_header_t * ip0;
1595           udp_header_t * udp0;
1596           ip4_fib_mtrie_t * mtrie0;
1597           ip4_fib_mtrie_leaf_t leaf0;
1598           u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, lbi0;
1599           i32 len_diff0;
1600           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1601           load_balance_t *lb0;
1602           const dpo_id_t *dpo0;
1603
1604           pi0 = to_next[0] = from[0];
1605           from += 1;
1606           n_left_from -= 1;
1607           to_next += 1;
1608           n_left_to_next -= 1;
1609
1610           p0 = vlib_get_buffer (vm, pi0);
1611
1612           ip0 = vlib_buffer_get_current (p0);
1613
1614           fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
1615                                 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1616
1617           mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1618
1619           leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
1620
1621           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1622
1623           /* Treat IP frag packets as "experimental" protocol for now
1624              until support of IP frag reassembly is implemented */
1625           proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
1626           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1627           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1628
1629           flags0 = p0->flags;
1630
1631           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1632
1633           udp0 = ip4_next_header (ip0);
1634
1635           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1636           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1637
1638           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1639
1640           /* Verify UDP length. */
1641           ip_len0 = clib_net_to_host_u16 (ip0->length);
1642           udp_len0 = clib_net_to_host_u16 (udp0->length);
1643
1644           len_diff0 = ip_len0 - udp_len0;
1645
1646           len_diff0 = is_udp0 ? len_diff0 : 0;
1647
1648           if (PREDICT_FALSE (! (is_tcp_udp0 & good_tcp_udp0)))
1649             {
1650               if (is_tcp_udp0)
1651                 {
1652                   if (is_tcp_udp0
1653                       && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1654                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1655                   good_tcp_udp0 =
1656                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1657                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1658                 }
1659             }
1660
1661           good_tcp_udp0 &= len_diff0 >= 0;
1662
1663           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1664
1665           error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1666
1667           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1668
1669           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1670           error0 = (is_tcp_udp0 && ! good_tcp_udp0
1671                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1672                     : error0);
1673
1674           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1675           leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1676
1677           lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1678           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1679
1680           lb0 = load_balance_get(lbi0);
1681           dpo0 = load_balance_get_bucket_i(lb0, 0);
1682
1683           vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
1684               vnet_buffer (p0)->ip.adj_index[VLIB_RX] =
1685                   dpo0->dpoi_index;
1686
1687           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1688                      dpo0->dpoi_type == DPO_RECEIVE) ?
1689                     IP4_ERROR_SPOOFED_LOCAL_PACKETS :
1690                     error0);
1691           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1692                      !fib_urpf_check_size(lb0->lb_urpf) &&
1693                      ip0->dst_address.as_u32 != 0xFFFFFFFF)
1694                     ? IP4_ERROR_SRC_LOOKUP_MISS
1695                     : error0);
1696
1697           next0 = lm->local_next_by_ip_protocol[proto0];
1698
1699           next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1700
1701           p0->error = error0? error_node->errors[error0] : 0;
1702
1703           if (PREDICT_FALSE (next0 != next_index))
1704             {
1705               n_left_to_next += 1;
1706               vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1707
1708               next_index = next0;
1709               vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1710               to_next[0] = pi0;
1711               to_next += 1;
1712               n_left_to_next -= 1;
1713             }
1714         }
1715
1716       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1717     }
1718
1719   return frame->n_vectors;
1720 }
1721
1722 VLIB_REGISTER_NODE (ip4_local_node,static) = {
1723   .function = ip4_local,
1724   .name = "ip4-local",
1725   .vector_size = sizeof (u32),
1726
1727   .format_trace = format_ip4_forward_next_trace,
1728
1729   .n_next_nodes = IP_LOCAL_N_NEXT,
1730   .next_nodes = {
1731     [IP_LOCAL_NEXT_DROP] = "error-drop",
1732     [IP_LOCAL_NEXT_PUNT] = "error-punt",
1733     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1734     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1735   },
1736 };
1737
1738 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local)
1739
1740 void ip4_register_protocol (u32 protocol, u32 node_index)
1741 {
1742   vlib_main_t * vm = vlib_get_main();
1743   ip4_main_t * im = &ip4_main;
1744   ip_lookup_main_t * lm = &im->lookup_main;
1745
1746   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1747   lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip4_local_node.index, node_index);
1748 }
1749
1750 static clib_error_t *
1751 show_ip_local_command_fn (vlib_main_t * vm,
1752                           unformat_input_t * input,
1753                          vlib_cli_command_t * cmd)
1754 {
1755   ip4_main_t * im = &ip4_main;
1756   ip_lookup_main_t * lm = &im->lookup_main;
1757   int i;
1758
1759   vlib_cli_output (vm, "Protocols handled by ip4_local");
1760   for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
1761     {
1762       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1763         vlib_cli_output (vm, "%d", i);
1764     }
1765   return 0;
1766 }
1767
1768
1769
1770 /*?
1771  * Display the set of protocols handled by the local IPv4 stack.
1772  *
1773  * @cliexpar
1774  * Example of how to display local protocol table:
1775  * @cliexstart{show ip local}
1776  * Protocols handled by ip4_local
1777  * 1
1778  * 17
1779  * 47
1780  * @cliexend
1781 ?*/
1782 /* *INDENT-OFF* */
1783 VLIB_CLI_COMMAND (show_ip_local, static) = {
1784   .path = "show ip local",
1785   .function = show_ip_local_command_fn,
1786   .short_help = "show ip local",
1787 };
1788 /* *INDENT-ON* */
1789
1790 always_inline uword
1791 ip4_arp_inline (vlib_main_t * vm,
1792                 vlib_node_runtime_t * node,
1793                 vlib_frame_t * frame,
1794                 int is_glean)
1795 {
1796   vnet_main_t * vnm = vnet_get_main();
1797   ip4_main_t * im = &ip4_main;
1798   ip_lookup_main_t * lm = &im->lookup_main;
1799   u32 * from, * to_next_drop;
1800   uword n_left_from, n_left_to_next_drop, next_index;
1801   static f64 time_last_seed_change = -1e100;
1802   static u32 hash_seeds[3];
1803   static uword hash_bitmap[256 / BITS (uword)];
1804   f64 time_now;
1805
1806   if (node->flags & VLIB_NODE_FLAG_TRACE)
1807     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1808
1809   time_now = vlib_time_now (vm);
1810   if (time_now - time_last_seed_change > 1e-3)
1811     {
1812       uword i;
1813       u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
1814                                              sizeof (hash_seeds));
1815       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
1816         hash_seeds[i] = r[i];
1817
1818       /* Mark all hash keys as been no-seen before. */
1819       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
1820         hash_bitmap[i] = 0;
1821
1822       time_last_seed_change = time_now;
1823     }
1824
1825   from = vlib_frame_vector_args (frame);
1826   n_left_from = frame->n_vectors;
1827   next_index = node->cached_next_index;
1828   if (next_index == IP4_ARP_NEXT_DROP)
1829     next_index = IP4_ARP_N_NEXT; /* point to first interface */
1830
1831   while (n_left_from > 0)
1832     {
1833       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1834                            to_next_drop, n_left_to_next_drop);
1835
1836       while (n_left_from > 0 && n_left_to_next_drop > 0)
1837         {
1838           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
1839           ip_adjacency_t * adj0;
1840           vlib_buffer_t * p0;
1841           ip4_header_t * ip0;
1842           uword bm0;
1843
1844           pi0 = from[0];
1845
1846           p0 = vlib_get_buffer (vm, pi0);
1847
1848           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1849           adj0 = ip_get_adjacency (lm, adj_index0);
1850           ip0 = vlib_buffer_get_current (p0);
1851
1852           /*
1853            * this is the Glean case, so we are ARPing for the
1854            * packet's destination
1855            */
1856           a0 = hash_seeds[0];
1857           b0 = hash_seeds[1];
1858           c0 = hash_seeds[2];
1859
1860           sw_if_index0 = adj0->rewrite_header.sw_if_index;
1861           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1862
1863           if (is_glean)
1864           {
1865               a0 ^= ip0->dst_address.data_u32;
1866           }
1867           else
1868           {
1869               a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32;
1870           }
1871           b0 ^= sw_if_index0;
1872
1873           hash_v3_finalize32 (a0, b0, c0);
1874
1875           c0 &= BITS (hash_bitmap) - 1;
1876           c0 = c0 / BITS (uword);
1877           m0 = (uword) 1 << (c0 % BITS (uword));
1878
1879           bm0 = hash_bitmap[c0];
1880           drop0 = (bm0 & m0) != 0;
1881
1882           /* Mark it as seen. */
1883           hash_bitmap[c0] = bm0 | m0;
1884
1885           from += 1;
1886           n_left_from -= 1;
1887           to_next_drop[0] = pi0;
1888           to_next_drop += 1;
1889           n_left_to_next_drop -= 1;
1890
1891           p0->error = node->errors[drop0 ? IP4_ARP_ERROR_DROP : IP4_ARP_ERROR_REQUEST_SENT];
1892
1893           /*
1894            * the adj has been updated to a rewrite but the node the DPO that got
1895            * us here hasn't - yet. no big deal. we'll drop while we wait.
1896            */
1897           if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
1898             continue;
1899
1900           if (drop0)
1901             continue;
1902
1903           /*
1904            * Can happen if the control-plane is programming tables
1905            * with traffic flowing; at least that's today's lame excuse.
1906            */
1907           if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN) ||
1908               (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
1909           {
1910             p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
1911           }
1912           else
1913           /* Send ARP request. */
1914           {
1915             u32 bi0 = 0;
1916             vlib_buffer_t * b0;
1917             ethernet_arp_header_t * h0;
1918             vnet_hw_interface_t * hw_if0;
1919
1920             h0 = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi0);
1921
1922             /* Add rewrite/encap string for ARP packet. */
1923             vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
1924
1925             hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1926
1927             /* Src ethernet address in ARP header. */
1928             clib_memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address,
1929                     sizeof (h0->ip4_over_ethernet[0].ethernet));
1930
1931             if (is_glean)
1932             {
1933                 /* The interface's source address is stashed in the Glean Adj */
1934                 h0->ip4_over_ethernet[0].ip4 = adj0->sub_type.glean.receive_addr.ip4;
1935
1936                 /* Copy in destination address we are requesting. This is the
1937                 * glean case, so it's the packet's destination.*/
1938                 h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32;
1939             }
1940             else
1941             {
1942                 /* Src IP address in ARP header. */
1943                 if (ip4_src_address_for_packet(lm, sw_if_index0,
1944                                                &h0->ip4_over_ethernet[0].ip4))
1945                 {
1946                     /* No source address available */
1947                     p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
1948                     vlib_buffer_free(vm, &bi0, 1);
1949                     continue;
1950                 }
1951
1952                 /* Copy in destination address we are requesting from the
1953                    incomplete adj */
1954                 h0->ip4_over_ethernet[1].ip4.data_u32 =
1955                     adj0->sub_type.nbr.next_hop.ip4.as_u32;
1956             }
1957
1958             vlib_buffer_copy_trace_flag (vm, p0, bi0);
1959             b0 = vlib_get_buffer (vm, bi0);
1960             vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
1961
1962             vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
1963
1964             vlib_set_next_frame_buffer (vm, node, adj0->rewrite_header.next_index, bi0);
1965           }
1966         }
1967
1968       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
1969     }
1970
1971   return frame->n_vectors;
1972 }
1973
1974 static uword
1975 ip4_arp (vlib_main_t * vm,
1976          vlib_node_runtime_t * node,
1977          vlib_frame_t * frame)
1978 {
1979     return (ip4_arp_inline(vm, node, frame, 0));
1980 }
1981
1982 static uword
1983 ip4_glean (vlib_main_t * vm,
1984            vlib_node_runtime_t * node,
1985            vlib_frame_t * frame)
1986 {
1987     return (ip4_arp_inline(vm, node, frame, 1));
1988 }
1989
1990 static char * ip4_arp_error_strings[] = {
1991   [IP4_ARP_ERROR_DROP] = "address overflow drops",
1992   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
1993   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
1994   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
1995   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
1996   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
1997 };
1998
1999 VLIB_REGISTER_NODE (ip4_arp_node) = {
2000   .function = ip4_arp,
2001   .name = "ip4-arp",
2002   .vector_size = sizeof (u32),
2003
2004   .format_trace = format_ip4_forward_next_trace,
2005
2006   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2007   .error_strings = ip4_arp_error_strings,
2008
2009   .n_next_nodes = IP4_ARP_N_NEXT,
2010   .next_nodes = {
2011     [IP4_ARP_NEXT_DROP] = "error-drop",
2012   },
2013 };
2014
2015 VLIB_REGISTER_NODE (ip4_glean_node) = {
2016   .function = ip4_glean,
2017   .name = "ip4-glean",
2018   .vector_size = sizeof (u32),
2019
2020   .format_trace = format_ip4_forward_next_trace,
2021
2022   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2023   .error_strings = ip4_arp_error_strings,
2024
2025   .n_next_nodes = IP4_ARP_N_NEXT,
2026   .next_nodes = {
2027     [IP4_ARP_NEXT_DROP] = "error-drop",
2028   },
2029 };
2030
2031 #define foreach_notrace_ip4_arp_error           \
2032 _(DROP)                                         \
2033 _(REQUEST_SENT)                                 \
2034 _(REPLICATE_DROP)                               \
2035 _(REPLICATE_FAIL)
2036
2037 clib_error_t * arp_notrace_init (vlib_main_t * vm)
2038 {
2039   vlib_node_runtime_t *rt =
2040     vlib_node_get_runtime (vm, ip4_arp_node.index);
2041
2042   /* don't trace ARP request packets */
2043 #define _(a)                                    \
2044     vnet_pcap_drop_trace_filter_add_del         \
2045         (rt->errors[IP4_ARP_ERROR_##a],         \
2046          1 /* is_add */);
2047     foreach_notrace_ip4_arp_error;
2048 #undef _
2049   return 0;
2050 }
2051
2052 VLIB_INIT_FUNCTION(arp_notrace_init);
2053
2054
2055 /* Send an ARP request to see if given destination is reachable on given interface. */
2056 clib_error_t *
2057 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2058 {
2059   vnet_main_t * vnm = vnet_get_main();
2060   ip4_main_t * im = &ip4_main;
2061   ethernet_arp_header_t * h;
2062   ip4_address_t * src;
2063   ip_interface_address_t * ia;
2064   ip_adjacency_t * adj;
2065   vnet_hw_interface_t * hi;
2066   vnet_sw_interface_t * si;
2067   vlib_buffer_t * b;
2068   u32 bi = 0;
2069
2070   si = vnet_get_sw_interface (vnm, sw_if_index);
2071
2072   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2073     {
2074       return clib_error_return (0, "%U: interface %U down",
2075                                 format_ip4_address, dst,
2076                                 format_vnet_sw_if_index_name, vnm,
2077                                 sw_if_index);
2078     }
2079
2080   src = ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2081   if (! src)
2082     {
2083       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2084       return clib_error_return
2085         (0, "no matching interface address for destination %U (interface %U)",
2086          format_ip4_address, dst,
2087          format_vnet_sw_if_index_name, vnm, sw_if_index);
2088     }
2089
2090   adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2091
2092   h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi);
2093
2094   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2095
2096   clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
2097
2098   h->ip4_over_ethernet[0].ip4 = src[0];
2099   h->ip4_over_ethernet[1].ip4 = dst[0];
2100
2101   b = vlib_get_buffer (vm, bi);
2102   vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2103
2104   /* Add encapsulation string for software interface (e.g. ethernet header). */
2105   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2106   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2107
2108   {
2109     vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
2110     u32 * to_next = vlib_frame_vector_args (f);
2111     to_next[0] = bi;
2112     f->n_vectors = 1;
2113     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2114   }
2115
2116   return /* no error */ 0;
2117 }
2118
2119 typedef enum {
2120   IP4_REWRITE_NEXT_DROP,
2121   IP4_REWRITE_NEXT_ARP,
2122   IP4_REWRITE_NEXT_ICMP_ERROR,
2123 } ip4_rewrite_next_t;
2124
2125 always_inline uword
2126 ip4_rewrite_inline (vlib_main_t * vm,
2127                     vlib_node_runtime_t * node,
2128                     vlib_frame_t * frame,
2129                     int rewrite_for_locally_received_packets,
2130                     int is_midchain)
2131 {
2132   ip_lookup_main_t * lm = &ip4_main.lookup_main;
2133   u32 * from = vlib_frame_vector_args (frame);
2134   u32 n_left_from, n_left_to_next, * to_next, next_index;
2135   vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
2136   vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
2137   ip_config_main_t * cm = &lm->feature_config_mains[VNET_IP_TX_FEAT];
2138
2139   n_left_from = frame->n_vectors;
2140   next_index = node->cached_next_index;
2141   u32 cpu_index = os_get_cpu_number();
2142
2143   while (n_left_from > 0)
2144     {
2145       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2146
2147       while (n_left_from >= 4 && n_left_to_next >= 2)
2148         {
2149           ip_adjacency_t * adj0, * adj1;
2150           vlib_buffer_t * p0, * p1;
2151           ip4_header_t * ip0, * ip1;
2152           u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2153           u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2154           u32 next0_override, next1_override;
2155           u32 tx_sw_if_index0, tx_sw_if_index1;
2156
2157           if (rewrite_for_locally_received_packets)
2158               next0_override = next1_override = 0;
2159
2160           /* Prefetch next iteration. */
2161           {
2162             vlib_buffer_t * p2, * p3;
2163
2164             p2 = vlib_get_buffer (vm, from[2]);
2165             p3 = vlib_get_buffer (vm, from[3]);
2166
2167             vlib_prefetch_buffer_header (p2, STORE);
2168             vlib_prefetch_buffer_header (p3, STORE);
2169
2170             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2171             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2172           }
2173
2174           pi0 = to_next[0] = from[0];
2175           pi1 = to_next[1] = from[1];
2176
2177           from += 2;
2178           n_left_from -= 2;
2179           to_next += 2;
2180           n_left_to_next -= 2;
2181
2182           p0 = vlib_get_buffer (vm, pi0);
2183           p1 = vlib_get_buffer (vm, pi1);
2184
2185           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2186           adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
2187
2188           /* We should never rewrite a pkt using the MISS adjacency */
2189           ASSERT(adj_index0 && adj_index1);
2190
2191           ip0 = vlib_buffer_get_current (p0);
2192           ip1 = vlib_buffer_get_current (p1);
2193
2194           error0 = error1 = IP4_ERROR_NONE;
2195           next0 = next1 = IP4_REWRITE_NEXT_DROP;
2196
2197           /* Decrement TTL & update checksum.
2198              Works either endian, so no need for byte swap. */
2199           if (! rewrite_for_locally_received_packets)
2200             {
2201               i32 ttl0 = ip0->ttl, ttl1 = ip1->ttl;
2202
2203               /* Input node should have reject packets with ttl 0. */
2204               ASSERT (ip0->ttl > 0);
2205               ASSERT (ip1->ttl > 0);
2206
2207               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2208               checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2209
2210               checksum0 += checksum0 >= 0xffff;
2211               checksum1 += checksum1 >= 0xffff;
2212
2213               ip0->checksum = checksum0;
2214               ip1->checksum = checksum1;
2215
2216               ttl0 -= 1;
2217               ttl1 -= 1;
2218
2219               ip0->ttl = ttl0;
2220               ip1->ttl = ttl1;
2221
2222               /*
2223                * If the ttl drops below 1 when forwarding, generate
2224                * an ICMP response.
2225                */
2226               if (PREDICT_FALSE(ttl0 <= 0))
2227                 {
2228                   error0 = IP4_ERROR_TIME_EXPIRED;
2229                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2230                   icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2231                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2232                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2233                 }
2234               if (PREDICT_FALSE(ttl1 <= 0))
2235                 {
2236                   error1 = IP4_ERROR_TIME_EXPIRED;
2237                   vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32)~0;
2238                   icmp4_error_set_vnet_buffer(p1, ICMP4_time_exceeded,
2239                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2240                   next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2241                 }
2242
2243               /* Verify checksum. */
2244               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2245               ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2246             }
2247
2248           /* Rewrite packet header and updates lengths. */
2249           adj0 = ip_get_adjacency (lm, adj_index0);
2250           adj1 = ip_get_adjacency (lm, adj_index1);
2251
2252           if (rewrite_for_locally_received_packets)
2253             {
2254               if (PREDICT_FALSE(adj0->lookup_next_index
2255                                 == IP_LOOKUP_NEXT_ARP))
2256                 next0_override = IP4_REWRITE_NEXT_ARP;
2257               if (PREDICT_FALSE(adj1->lookup_next_index
2258                                 == IP_LOOKUP_NEXT_ARP))
2259                 next1_override = IP4_REWRITE_NEXT_ARP;
2260             }
2261
2262           /* Worth pipelining. No guarantee that adj0,1 are hot... */
2263           rw_len0 = adj0[0].rewrite_header.data_bytes;
2264           rw_len1 = adj1[0].rewrite_header.data_bytes;
2265           vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
2266           vnet_buffer(p1)->ip.save_rewrite_length = rw_len1;
2267
2268           /* Check MTU of outgoing interface. */
2269           error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2270                     ? IP4_ERROR_MTU_EXCEEDED
2271                     : error0);
2272           error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
2273                     ? IP4_ERROR_MTU_EXCEEDED
2274                     : error1);
2275
2276           next0 = (error0 == IP4_ERROR_NONE)
2277             ? adj0[0].rewrite_header.next_index : next0;
2278
2279           if (rewrite_for_locally_received_packets)
2280               next0 = next0 && next0_override ? next0_override : next0;
2281
2282           next1 = (error1 == IP4_ERROR_NONE)
2283             ? adj1[0].rewrite_header.next_index : next1;
2284
2285           if (rewrite_for_locally_received_packets)
2286               next1 = next1 && next1_override ? next1_override : next1;
2287
2288           /*
2289            * We've already accounted for an ethernet_header_t elsewhere
2290            */
2291           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2292               vlib_increment_combined_counter
2293                   (&adjacency_counters,
2294                    cpu_index, adj_index0,
2295                    /* packet increment */ 0,
2296                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2297
2298           if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t)))
2299               vlib_increment_combined_counter
2300                   (&adjacency_counters,
2301                    cpu_index, adj_index1,
2302                    /* packet increment */ 0,
2303                    /* byte increment */ rw_len1-sizeof(ethernet_header_t));
2304
2305           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2306            * to see the IP headerr */
2307           if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2308             {
2309               p0->current_data -= rw_len0;
2310               p0->current_length += rw_len0;
2311               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2312               vnet_buffer (p0)->sw_if_index[VLIB_TX] =
2313                   tx_sw_if_index0;
2314
2315               if (PREDICT_FALSE
2316                   (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features,
2317                                     tx_sw_if_index0)))
2318                 {
2319                   p0->current_config_index =
2320                     vec_elt (cm->config_index_by_sw_if_index,
2321                              tx_sw_if_index0);
2322                   vnet_get_config_data (&cm->config_main,
2323                                         &p0->current_config_index,
2324                                         &next0,
2325                                         /* # bytes of config data */ 0);
2326                 }
2327             }
2328           if (PREDICT_TRUE(error1 == IP4_ERROR_NONE))
2329             {
2330               p1->current_data -= rw_len1;
2331               p1->current_length += rw_len1;
2332
2333               tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2334               vnet_buffer (p1)->sw_if_index[VLIB_TX] =
2335                   tx_sw_if_index1;
2336
2337               if (PREDICT_FALSE
2338                   (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features,
2339                                     tx_sw_if_index1)))
2340                 {
2341                   p1->current_config_index =
2342                     vec_elt (cm->config_index_by_sw_if_index,
2343                              tx_sw_if_index1);
2344                   vnet_get_config_data (&cm->config_main,
2345                                         &p1->current_config_index,
2346                                         &next1,
2347                                         /* # bytes of config data */ 0);
2348                 }
2349             }
2350
2351           /* Guess we are only writing on simple Ethernet header. */
2352           vnet_rewrite_two_headers (adj0[0], adj1[0],
2353                                     ip0, ip1,
2354                                     sizeof (ethernet_header_t));
2355
2356           if (is_midchain)
2357           {
2358               adj0->sub_type.midchain.fixup_func(vm, adj0, p0);
2359               adj1->sub_type.midchain.fixup_func(vm, adj1, p1);
2360           }
2361
2362           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2363                                            to_next, n_left_to_next,
2364                                            pi0, pi1, next0, next1);
2365         }
2366
2367       while (n_left_from > 0 && n_left_to_next > 0)
2368         {
2369           ip_adjacency_t * adj0;
2370           vlib_buffer_t * p0;
2371           ip4_header_t * ip0;
2372           u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2373           u32 next0_override;
2374           u32 tx_sw_if_index0;
2375
2376           if (rewrite_for_locally_received_packets)
2377               next0_override = 0;
2378
2379           pi0 = to_next[0] = from[0];
2380
2381           p0 = vlib_get_buffer (vm, pi0);
2382
2383           adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2384
2385           /* We should never rewrite a pkt using the MISS adjacency */
2386           ASSERT(adj_index0);
2387
2388           adj0 = ip_get_adjacency (lm, adj_index0);
2389
2390           ip0 = vlib_buffer_get_current (p0);
2391
2392           error0 = IP4_ERROR_NONE;
2393           next0 = IP4_REWRITE_NEXT_DROP;            /* drop on error */
2394
2395           /* Decrement TTL & update checksum. */
2396           if (! rewrite_for_locally_received_packets)
2397             {
2398               i32 ttl0 = ip0->ttl;
2399
2400               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2401
2402               checksum0 += checksum0 >= 0xffff;
2403
2404               ip0->checksum = checksum0;
2405
2406               ASSERT (ip0->ttl > 0);
2407
2408               ttl0 -= 1;
2409
2410               ip0->ttl = ttl0;
2411
2412               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2413
2414               if (PREDICT_FALSE(ttl0 <= 0))
2415                 {
2416                   /*
2417                    * If the ttl drops below 1 when forwarding, generate
2418                    * an ICMP response.
2419                    */
2420                   error0 = IP4_ERROR_TIME_EXPIRED;
2421                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2422                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2423                   icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2424                               ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2425                 }
2426             }
2427
2428           if (rewrite_for_locally_received_packets)
2429             {
2430               /*
2431                * We have to override the next_index in ARP adjacencies,
2432                * because they're set up for ip4-arp, not this node...
2433                */
2434               if (PREDICT_FALSE(adj0->lookup_next_index
2435                                 == IP_LOOKUP_NEXT_ARP))
2436                 next0_override = IP4_REWRITE_NEXT_ARP;
2437             }
2438
2439           /* Guess we are only writing on simple Ethernet header. */
2440           vnet_rewrite_one_header (adj0[0], ip0,
2441                                    sizeof (ethernet_header_t));
2442
2443           /* Update packet buffer attributes/set output interface. */
2444           rw_len0 = adj0[0].rewrite_header.data_bytes;
2445           vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
2446
2447           if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2448               vlib_increment_combined_counter
2449                   (&adjacency_counters,
2450                    cpu_index, adj_index0,
2451                    /* packet increment */ 0,
2452                    /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2453
2454           /* Check MTU of outgoing interface. */
2455           error0 = (vlib_buffer_length_in_chain (vm, p0)
2456                     > adj0[0].rewrite_header.max_l3_packet_bytes
2457                     ? IP4_ERROR_MTU_EXCEEDED
2458                     : error0);
2459
2460           p0->error = error_node->errors[error0];
2461
2462           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2463            * to see the IP headerr */
2464           if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2465             {
2466               p0->current_data -= rw_len0;
2467               p0->current_length += rw_len0;
2468               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2469
2470               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2471               next0 = adj0[0].rewrite_header.next_index;
2472
2473               if (is_midchain)
2474                 {
2475                   adj0->sub_type.midchain.fixup_func(vm, adj0, p0);
2476                 }
2477
2478               if (PREDICT_FALSE
2479                   (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features,
2480                                     tx_sw_if_index0)))
2481                   {
2482                     p0->current_config_index =
2483                       vec_elt (cm->config_index_by_sw_if_index,
2484                                tx_sw_if_index0);
2485                     vnet_get_config_data (&cm->config_main,
2486                                           &p0->current_config_index,
2487                                           &next0,
2488                                           /* # bytes of config data */ 0);
2489                   }
2490             }
2491
2492           if (rewrite_for_locally_received_packets)
2493               next0 = next0 && next0_override ? next0_override : next0;
2494
2495           from += 1;
2496           n_left_from -= 1;
2497           to_next += 1;
2498           n_left_to_next -= 1;
2499
2500           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2501                                            to_next, n_left_to_next,
2502                                            pi0, next0);
2503         }
2504
2505       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2506     }
2507
2508   /* Need to do trace after rewrites to pick up new packet data. */
2509   if (node->flags & VLIB_NODE_FLAG_TRACE)
2510     ip4_forward_next_trace (vm, node, frame, adj_rx_tx);
2511
2512   return frame->n_vectors;
2513 }
2514
2515
2516 /** @brief IPv4 transit rewrite node.
2517     @node ip4-rewrite-transit
2518
2519     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2520     header checksum, fetch the ip adjacency, check the outbound mtu,
2521     apply the adjacency rewrite, and send pkts to the adjacency
2522     rewrite header's rewrite_next_index.
2523
2524     @param vm vlib_main_t corresponding to the current thread
2525     @param node vlib_node_runtime_t
2526     @param frame vlib_frame_t whose contents should be dispatched
2527
2528     @par Graph mechanics: buffer metadata, next index usage
2529
2530     @em Uses:
2531     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2532         - the rewrite adjacency index
2533     - <code>adj->lookup_next_index</code>
2534         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2535           the packet will be dropped.
2536     - <code>adj->rewrite_header</code>
2537         - Rewrite string length, rewrite string, next_index
2538
2539     @em Sets:
2540     - <code>b->current_data, b->current_length</code>
2541         - Updated net of applying the rewrite string
2542
2543     <em>Next Indices:</em>
2544     - <code> adj->rewrite_header.next_index </code>
2545       or @c error-drop
2546 */
2547 static uword
2548 ip4_rewrite_transit (vlib_main_t * vm,
2549                      vlib_node_runtime_t * node,
2550                      vlib_frame_t * frame)
2551 {
2552   return ip4_rewrite_inline (vm, node, frame,
2553                              /* rewrite_for_locally_received_packets */ 0, 0);
2554 }
2555
2556 /** @brief IPv4 local rewrite node.
2557     @node ip4-rewrite-local
2558
2559     This is the IPv4 local rewrite node. Fetch the ip adjacency, check
2560     the outbound interface mtu, apply the adjacency rewrite, and send
2561     pkts to the adjacency rewrite header's rewrite_next_index. Deal
2562     with hemorrhoids of the form "some clown sends an icmp4 w/ src =
2563     dst = interface addr."
2564
2565     @param vm vlib_main_t corresponding to the current thread
2566     @param node vlib_node_runtime_t
2567     @param frame vlib_frame_t whose contents should be dispatched
2568
2569     @par Graph mechanics: buffer metadata, next index usage
2570
2571     @em Uses:
2572     - <code>vnet_buffer(b)->ip.adj_index[VLIB_RX]</code>
2573         - the rewrite adjacency index
2574     - <code>adj->lookup_next_index</code>
2575         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2576           the packet will be dropped.
2577     - <code>adj->rewrite_header</code>
2578         - Rewrite string length, rewrite string, next_index
2579
2580     @em Sets:
2581     - <code>b->current_data, b->current_length</code>
2582         - Updated net of applying the rewrite string
2583
2584     <em>Next Indices:</em>
2585     - <code> adj->rewrite_header.next_index </code>
2586       or @c error-drop
2587 */
2588
2589 static uword
2590 ip4_rewrite_local (vlib_main_t * vm,
2591                    vlib_node_runtime_t * node,
2592                    vlib_frame_t * frame)
2593 {
2594   return ip4_rewrite_inline (vm, node, frame,
2595                              /* rewrite_for_locally_received_packets */ 1, 0);
2596 }
2597
2598 static uword
2599 ip4_midchain (vlib_main_t * vm,
2600               vlib_node_runtime_t * node,
2601               vlib_frame_t * frame)
2602 {
2603   return ip4_rewrite_inline (vm, node, frame,
2604                              /* rewrite_for_locally_received_packets */ 0, 1);
2605 }
2606
2607 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2608   .function = ip4_rewrite_transit,
2609   .name = "ip4-rewrite-transit",
2610   .vector_size = sizeof (u32),
2611
2612   .format_trace = format_ip4_rewrite_trace,
2613
2614   .n_next_nodes = 3,
2615   .next_nodes = {
2616     [IP4_REWRITE_NEXT_DROP] = "error-drop",
2617     [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
2618     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2619   },
2620 };
2621
2622 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite_transit)
2623
2624 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2625   .function = ip4_midchain,
2626   .name = "ip4-midchain",
2627   .vector_size = sizeof (u32),
2628
2629   .format_trace = format_ip4_forward_next_trace,
2630
2631   .sibling_of = "ip4-rewrite-transit",
2632 };
2633
2634 VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain)
2635
2636 VLIB_REGISTER_NODE (ip4_rewrite_local_node) = {
2637   .function = ip4_rewrite_local,
2638   .name = "ip4-rewrite-local",
2639   .vector_size = sizeof (u32),
2640
2641   .sibling_of = "ip4-rewrite-transit",
2642
2643   .format_trace = format_ip4_rewrite_trace,
2644
2645   .n_next_nodes = 0,
2646 };
2647
2648 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_local_node, ip4_rewrite_local)
2649
2650 static clib_error_t *
2651 add_del_interface_table (vlib_main_t * vm,
2652                          unformat_input_t * input,
2653                          vlib_cli_command_t * cmd)
2654 {
2655   vnet_main_t * vnm = vnet_get_main();
2656   clib_error_t * error = 0;
2657   u32 sw_if_index, table_id;
2658
2659   sw_if_index = ~0;
2660
2661   if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2662     {
2663       error = clib_error_return (0, "unknown interface `%U'",
2664                                  format_unformat_error, input);
2665       goto done;
2666     }
2667
2668   if (unformat (input, "%d", &table_id))
2669     ;
2670   else
2671     {
2672       error = clib_error_return (0, "expected table id `%U'",
2673                                  format_unformat_error, input);
2674       goto done;
2675     }
2676
2677   {
2678     ip4_main_t * im = &ip4_main;
2679     u32 fib_index;
2680
2681     fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
2682                                                    table_id);
2683
2684     //
2685     // FIXME-LATER
2686     //  changing an interface's table has consequences for any connecteds
2687     //  and adj-fibs already installed.
2688     //
2689     vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
2690     im->fib_index_by_sw_if_index[sw_if_index] = fib_index;
2691   }
2692
2693  done:
2694   return error;
2695 }
2696
2697 /*?
2698  * Place the indicated interface into the supplied IPv4 FIB table (also known
2699  * as a VRF). If the FIB table does not exist, this command creates it. To
2700  * display the current IPv4 FIB table, use the command '<em>show ip fib</em>'.
2701  * FIB table will only be displayed if a route has been added to the table, or
2702  * an IP Address is assigned to an interface in the table (which adds a route
2703  * automatically).
2704  *
2705  * @note IP addresses added after setting the interface IP table end up in
2706  * the indicated FIB table. If the IP address is added prior to adding the
2707  * interface to the FIB table, it will NOT be part of the FIB table. Predictable
2708  * but potentially counter-intuitive results occur if you provision interface
2709  * addresses in multiple FIBs. Upon RX, packets will be processed in the last
2710  * IP table ID provisioned. It might be marginally useful to evade source RPF
2711  * drops to put an interface address into multiple FIBs.
2712  *
2713  * @cliexpar
2714  * Example of how to add an interface to an IPv4 FIB table (where 2 is the table-id):
2715  * @cliexcmd{set interface ip table GigabitEthernet2/0/0 2}
2716  ?*/
2717 /* *INDENT-OFF* */
2718 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
2719   .path = "set interface ip table",
2720   .function = add_del_interface_table,
2721   .short_help = "set interface ip table <interface> <table-id>",
2722 };
2723 /* *INDENT-ON* */
2724
2725
2726 static uword
2727 ip4_lookup_multicast (vlib_main_t * vm,
2728                       vlib_node_runtime_t * node,
2729                       vlib_frame_t * frame)
2730 {
2731   ip4_main_t * im = &ip4_main;
2732   vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
2733   u32 n_left_from, n_left_to_next, * from, * to_next;
2734   ip_lookup_next_t next;
2735   u32 cpu_index = os_get_cpu_number();
2736
2737   from = vlib_frame_vector_args (frame);
2738   n_left_from = frame->n_vectors;
2739   next = node->cached_next_index;
2740
2741   while (n_left_from > 0)
2742     {
2743       vlib_get_next_frame (vm, node, next,
2744                            to_next, n_left_to_next);
2745
2746       while (n_left_from >= 4 && n_left_to_next >= 2)
2747         {
2748           vlib_buffer_t * p0, * p1;
2749           u32 pi0, pi1, lb_index0, lb_index1, wrong_next;
2750           ip_lookup_next_t next0, next1;
2751           ip4_header_t * ip0, * ip1;
2752           u32 fib_index0, fib_index1;
2753           const dpo_id_t *dpo0, *dpo1;
2754           const load_balance_t * lb0, * lb1;
2755
2756           /* Prefetch next iteration. */
2757           {
2758             vlib_buffer_t * p2, * p3;
2759
2760             p2 = vlib_get_buffer (vm, from[2]);
2761             p3 = vlib_get_buffer (vm, from[3]);
2762
2763             vlib_prefetch_buffer_header (p2, LOAD);
2764             vlib_prefetch_buffer_header (p3, LOAD);
2765
2766             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
2767             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
2768           }
2769
2770           pi0 = to_next[0] = from[0];
2771           pi1 = to_next[1] = from[1];
2772
2773           p0 = vlib_get_buffer (vm, pi0);
2774           p1 = vlib_get_buffer (vm, pi1);
2775
2776           ip0 = vlib_buffer_get_current (p0);
2777           ip1 = vlib_buffer_get_current (p1);
2778
2779           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2780           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
2781           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2782             fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2783           fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
2784             fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
2785
2786           lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0),
2787                                                &ip0->dst_address);
2788           lb_index1 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index1),
2789                                                &ip1->dst_address);
2790
2791           lb0 = load_balance_get (lb_index0);
2792           lb1 = load_balance_get (lb_index1);
2793
2794           ASSERT (lb0->lb_n_buckets > 0);
2795           ASSERT (is_pow2 (lb0->lb_n_buckets));
2796           ASSERT (lb1->lb_n_buckets > 0);
2797           ASSERT (is_pow2 (lb1->lb_n_buckets));
2798
2799           vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash
2800               (ip0, lb0->lb_hash_config);
2801
2802           vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash
2803               (ip1, lb1->lb_hash_config);
2804
2805           dpo0 = load_balance_get_bucket_i(lb0,
2806                                            (vnet_buffer (p0)->ip.flow_hash &
2807                                             (lb0->lb_n_buckets_minus_1)));
2808           dpo1 = load_balance_get_bucket_i(lb1,
2809                                            (vnet_buffer (p1)->ip.flow_hash &
2810                                             (lb0->lb_n_buckets_minus_1)));
2811
2812           next0 = dpo0->dpoi_next_node;
2813           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
2814           next1 = dpo1->dpoi_next_node;
2815           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
2816
2817           if (1) /* $$$$$$ HACK FIXME */
2818           vlib_increment_combined_counter
2819               (cm, cpu_index, lb_index0, 1,
2820                vlib_buffer_length_in_chain (vm, p0));
2821           if (1) /* $$$$$$ HACK FIXME */
2822           vlib_increment_combined_counter
2823               (cm, cpu_index, lb_index1, 1,
2824                vlib_buffer_length_in_chain (vm, p1));
2825
2826           from += 2;
2827           to_next += 2;
2828           n_left_to_next -= 2;
2829           n_left_from -= 2;
2830
2831           wrong_next = (next0 != next) + 2*(next1 != next);
2832           if (PREDICT_FALSE (wrong_next != 0))
2833             {
2834               switch (wrong_next)
2835                 {
2836                 case 1:
2837                   /* A B A */
2838                   to_next[-2] = pi1;
2839                   to_next -= 1;
2840                   n_left_to_next += 1;
2841                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2842                   break;
2843
2844                 case 2:
2845                   /* A A B */
2846                   to_next -= 1;
2847                   n_left_to_next += 1;
2848                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2849                   break;
2850
2851                 case 3:
2852                   /* A B C */
2853                   to_next -= 2;
2854                   n_left_to_next += 2;
2855                   vlib_set_next_frame_buffer (vm, node, next0, pi0);
2856                   vlib_set_next_frame_buffer (vm, node, next1, pi1);
2857                   if (next0 == next1)
2858                     {
2859                       /* A B B */
2860                       vlib_put_next_frame (vm, node, next, n_left_to_next);
2861                       next = next1;
2862                       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
2863                     }
2864                 }
2865             }
2866         }
2867
2868       while (n_left_from > 0 && n_left_to_next > 0)
2869         {
2870           vlib_buffer_t * p0;
2871           ip4_header_t * ip0;
2872           u32 pi0, lb_index0;
2873           ip_lookup_next_t next0;
2874           u32 fib_index0;
2875           const dpo_id_t *dpo0;
2876           const load_balance_t * lb0;
2877
2878           pi0 = from[0];
2879           to_next[0] = pi0;
2880
2881           p0 = vlib_get_buffer (vm, pi0);
2882
2883           ip0 = vlib_buffer_get_current (p0);
2884
2885           fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
2886                                 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2887           fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2888               fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2889
2890           lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0),
2891                                                &ip0->dst_address);
2892
2893           lb0 = load_balance_get (lb_index0);
2894
2895           ASSERT (lb0->lb_n_buckets > 0);
2896           ASSERT (is_pow2 (lb0->lb_n_buckets));
2897
2898           vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash
2899               (ip0, lb0->lb_hash_config);
2900
2901           dpo0 = load_balance_get_bucket_i(lb0,
2902                                            (vnet_buffer (p0)->ip.flow_hash &
2903                                             (lb0->lb_n_buckets_minus_1)));
2904
2905           next0 = dpo0->dpoi_next_node;
2906           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
2907
2908           if (1) /* $$$$$$ HACK FIXME */
2909               vlib_increment_combined_counter
2910                   (cm, cpu_index, lb_index0, 1,
2911                    vlib_buffer_length_in_chain (vm, p0));
2912
2913           from += 1;
2914           to_next += 1;
2915           n_left_to_next -= 1;
2916           n_left_from -= 1;
2917
2918           if (PREDICT_FALSE (next0 != next))
2919             {
2920               n_left_to_next += 1;
2921               vlib_put_next_frame (vm, node, next, n_left_to_next);
2922               next = next0;
2923               vlib_get_next_frame (vm, node, next,
2924                                    to_next, n_left_to_next);
2925               to_next[0] = pi0;
2926               to_next += 1;
2927               n_left_to_next -= 1;
2928             }
2929         }
2930
2931       vlib_put_next_frame (vm, node, next, n_left_to_next);
2932     }
2933
2934   if (node->flags & VLIB_NODE_FLAG_TRACE)
2935       ip4_forward_next_trace(vm, node, frame, VLIB_TX);
2936
2937   return frame->n_vectors;
2938 }
2939
2940 VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = {
2941   .function = ip4_lookup_multicast,
2942   .name = "ip4-lookup-multicast",
2943   .vector_size = sizeof (u32),
2944   .sibling_of = "ip4-lookup",
2945   .format_trace = format_ip4_lookup_trace,
2946
2947   .n_next_nodes = 0,
2948 };
2949
2950 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_multicast_node, ip4_lookup_multicast)
2951
2952 VLIB_REGISTER_NODE (ip4_multicast_node,static) = {
2953   .function = ip4_drop,
2954   .name = "ip4-multicast",
2955   .vector_size = sizeof (u32),
2956
2957   .format_trace = format_ip4_forward_next_trace,
2958
2959   .n_next_nodes = 1,
2960   .next_nodes = {
2961     [0] = "error-drop",
2962   },
2963 };
2964
2965 int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0)
2966 {
2967   ip4_fib_mtrie_t * mtrie0;
2968   ip4_fib_mtrie_leaf_t leaf0;
2969   u32 lbi0;
2970
2971   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2972
2973   leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
2974   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
2975   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
2976   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2977   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2978
2979   /* Handle default route. */
2980   leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
2981
2982   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2983
2984   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0), a);
2985 }
2986
2987 static clib_error_t *
2988 test_lookup_command_fn (vlib_main_t * vm,
2989                         unformat_input_t * input,
2990                         vlib_cli_command_t * cmd)
2991 {
2992   ip4_fib_t *fib;
2993   u32 table_id = 0;
2994   f64 count = 1;
2995   u32 n;
2996   int i;
2997   ip4_address_t ip4_base_address;
2998   u64 errors = 0;
2999
3000   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3001       if (unformat (input, "table %d", &table_id))
3002       {
3003           /* Make sure the entry exists. */
3004           fib = ip4_fib_get(table_id);
3005           if ((fib) && (fib->index != table_id))
3006               return clib_error_return (0, "<fib-index> %d does not exist",
3007                                         table_id);
3008       }
3009       else if (unformat (input, "count %f", &count))
3010         ;
3011
3012       else if (unformat (input, "%U",
3013                          unformat_ip4_address, &ip4_base_address))
3014         ;
3015       else
3016         return clib_error_return (0, "unknown input `%U'",
3017                                   format_unformat_error, input);
3018   }
3019
3020   n = count;
3021
3022   for (i = 0; i < n; i++)
3023     {
3024       if (!ip4_lookup_validate (&ip4_base_address, table_id))
3025         errors++;
3026
3027       ip4_base_address.as_u32 =
3028         clib_host_to_net_u32 (1 +
3029                               clib_net_to_host_u32 (ip4_base_address.as_u32));
3030     }
3031
3032   if (errors)
3033     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
3034   else
3035     vlib_cli_output (vm, "No errors in %d lookups\n", n);
3036
3037   return 0;
3038 }
3039
3040 /*?
3041  * Perform a lookup of an IPv4 Address (or range of addresses) in the
3042  * given FIB table to determine if there is a conflict with the
3043  * adjacency table. The fib-id can be determined by using the
3044  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
3045  * of 0 is used.
3046  *
3047  * @todo This command uses fib-id, other commands use table-id (not
3048  * just a name, they are different indexes). Would like to change this
3049  * to table-id for consistency.
3050  *
3051  * @cliexpar
3052  * Example of how to run the test lookup command:
3053  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
3054  * No errors in 2 lookups
3055  * @cliexend
3056 ?*/
3057 /* *INDENT-OFF* */
3058 VLIB_CLI_COMMAND (lookup_test_command, static) = {
3059     .path = "test lookup",
3060     .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
3061     .function = test_lookup_command_fn,
3062 };
3063 /* *INDENT-ON* */
3064
3065 int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3066 {
3067   ip4_main_t * im4 = &ip4_main;
3068   ip4_fib_t * fib;
3069   uword * p = hash_get (im4->fib_index_by_table_id, table_id);
3070
3071   if (p == 0)
3072     return VNET_API_ERROR_NO_SUCH_FIB;
3073
3074   fib = ip4_fib_get (p[0]);
3075
3076   fib->flow_hash_config = flow_hash_config;
3077   return 0;
3078 }
3079
3080 static clib_error_t *
3081 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3082                              unformat_input_t * input,
3083                              vlib_cli_command_t * cmd)
3084 {
3085   int matched = 0;
3086   u32 table_id = 0;
3087   u32 flow_hash_config = 0;
3088   int rv;
3089
3090   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3091     if (unformat (input, "table %d", &table_id))
3092       matched = 1;
3093 #define _(a,v) \
3094     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3095     foreach_flow_hash_bit
3096 #undef _
3097     else break;
3098   }
3099
3100   if (matched == 0)
3101     return clib_error_return (0, "unknown input `%U'",
3102                               format_unformat_error, input);
3103
3104   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3105   switch (rv)
3106     {
3107     case 0:
3108       break;
3109
3110     case VNET_API_ERROR_NO_SUCH_FIB:
3111       return clib_error_return (0, "no such FIB table %d", table_id);
3112
3113     default:
3114       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3115       break;
3116     }
3117
3118   return 0;
3119 }
3120
3121 /*?
3122  * Configure the set of IPv4 fields used by the flow hash.
3123  *
3124  * @cliexpar
3125  * Example of how to set the flow hash on a given table:
3126  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
3127  * Example of display the configured flow hash:
3128  * @cliexstart{show ip fib}
3129  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
3130  * 0.0.0.0/0
3131  *   unicast-ip4-chain
3132  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
3133  *     [0] [@0]: dpo-drop ip6
3134  * 0.0.0.0/32
3135  *   unicast-ip4-chain
3136  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
3137  *     [0] [@0]: dpo-drop ip6
3138  * 224.0.0.0/8
3139  *   unicast-ip4-chain
3140  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
3141  *     [0] [@0]: dpo-drop ip6
3142  * 6.0.1.2/32
3143  *   unicast-ip4-chain
3144  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
3145  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3146  * 7.0.0.1/32
3147  *   unicast-ip4-chain
3148  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
3149  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3150  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3151  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3152  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3153  * 240.0.0.0/8
3154  *   unicast-ip4-chain
3155  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
3156  *     [0] [@0]: dpo-drop ip6
3157  * 255.255.255.255/32
3158  *   unicast-ip4-chain
3159  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
3160  *     [0] [@0]: dpo-drop ip6
3161  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
3162  * 0.0.0.0/0
3163  *   unicast-ip4-chain
3164  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
3165  *     [0] [@0]: dpo-drop ip6
3166  * 0.0.0.0/32
3167  *   unicast-ip4-chain
3168  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
3169  *     [0] [@0]: dpo-drop ip6
3170  * 172.16.1.0/24
3171  *   unicast-ip4-chain
3172  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
3173  *     [0] [@4]: ipv4-glean: af_packet0
3174  * 172.16.1.1/32
3175  *   unicast-ip4-chain
3176  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
3177  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
3178  * 172.16.1.2/32
3179  *   unicast-ip4-chain
3180  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
3181  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
3182  * 172.16.2.0/24
3183  *   unicast-ip4-chain
3184  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
3185  *     [0] [@4]: ipv4-glean: af_packet1
3186  * 172.16.2.1/32
3187  *   unicast-ip4-chain
3188  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
3189  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
3190  * 224.0.0.0/8
3191  *   unicast-ip4-chain
3192  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
3193  *     [0] [@0]: dpo-drop ip6
3194  * 240.0.0.0/8
3195  *   unicast-ip4-chain
3196  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
3197  *     [0] [@0]: dpo-drop ip6
3198  * 255.255.255.255/32
3199  *   unicast-ip4-chain
3200  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
3201  *     [0] [@0]: dpo-drop ip6
3202  * @cliexend
3203 ?*/
3204 /* *INDENT-OFF* */
3205 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
3206   .path = "set ip flow-hash",
3207   .short_help =
3208   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
3209   .function = set_ip_flow_hash_command_fn,
3210 };
3211 /* *INDENT-ON* */
3212
3213 int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3214                                  u32 table_index)
3215 {
3216   vnet_main_t * vnm = vnet_get_main();
3217   vnet_interface_main_t * im = &vnm->interface_main;
3218   ip4_main_t * ipm = &ip4_main;
3219   ip_lookup_main_t * lm = &ipm->lookup_main;
3220   vnet_classify_main_t * cm = &vnet_classify_main;
3221   ip4_address_t *if_addr;
3222
3223   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3224     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3225
3226   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3227     return VNET_API_ERROR_NO_SUCH_ENTRY;
3228
3229   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3230   lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
3231
3232   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
3233
3234   if (NULL != if_addr)
3235   {
3236       fib_prefix_t pfx = {
3237           .fp_len = 32,
3238           .fp_proto = FIB_PROTOCOL_IP4,
3239           .fp_addr.ip4 = *if_addr,
3240       };
3241       u32 fib_index;
3242
3243       fib_index = fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
3244                                                       sw_if_index);
3245
3246
3247       if (table_index != (u32) ~0)
3248       {
3249           dpo_id_t dpo = DPO_NULL;
3250
3251           dpo_set(&dpo,
3252                   DPO_CLASSIFY,
3253                   DPO_PROTO_IP4,
3254                   classify_dpo_create(FIB_PROTOCOL_IP4,
3255                                       table_index));
3256
3257           fib_table_entry_special_dpo_add(fib_index,
3258                                           &pfx,
3259                                           FIB_SOURCE_CLASSIFY,
3260                                           FIB_ENTRY_FLAG_NONE,
3261                                           &dpo);
3262           dpo_reset(&dpo);
3263       }
3264       else
3265       {
3266           fib_table_entry_special_remove(fib_index,
3267                                          &pfx,
3268                                          FIB_SOURCE_CLASSIFY);
3269       }
3270   }
3271
3272   return 0;
3273 }
3274
3275 static clib_error_t *
3276 set_ip_classify_command_fn (vlib_main_t * vm,
3277                             unformat_input_t * input,
3278                             vlib_cli_command_t * cmd)
3279 {
3280   u32 table_index = ~0;
3281   int table_index_set = 0;
3282   u32 sw_if_index = ~0;
3283   int rv;
3284
3285   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3286     if (unformat (input, "table-index %d", &table_index))
3287       table_index_set = 1;
3288     else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3289                        vnet_get_main(), &sw_if_index))
3290       ;
3291     else
3292       break;
3293   }
3294
3295   if (table_index_set == 0)
3296     return clib_error_return (0, "classify table-index must be specified");
3297
3298   if (sw_if_index == ~0)
3299     return clib_error_return (0, "interface / subif must be specified");
3300
3301   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3302
3303   switch (rv)
3304     {
3305     case 0:
3306       break;
3307
3308     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3309       return clib_error_return (0, "No such interface");
3310
3311     case VNET_API_ERROR_NO_SUCH_ENTRY:
3312       return clib_error_return (0, "No such classifier table");
3313     }
3314   return 0;
3315 }
3316
3317 /*?
3318  * Assign a classification table to an interface. The classification
3319  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3320  * commands. Once the table is create, use this command to filter packets
3321  * on an interface.
3322  *
3323  * @cliexpar
3324  * Example of how to assign a classification table to an interface:
3325  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3326 ?*/
3327 /* *INDENT-OFF* */
3328 VLIB_CLI_COMMAND (set_ip_classify_command, static) = {
3329     .path = "set ip classify",
3330     .short_help =
3331     "set ip classify intfc <interface> table-index <classify-idx>",
3332     .function = set_ip_classify_command_fn,
3333 };
3334 /* *INDENT-ON* */
3335