6d5f07bda2c162c1dcc36e216c382cb00d68c2c6
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h>     /* for API error numbers */
47 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
48 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
50 #include <vnet/fib/ip4_fib.h>
51 #include <vnet/dpo/load_balance.h>
52 #include <vnet/dpo/load_balance_map.h>
53 #include <vnet/dpo/classify_dpo.h>
54 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
55
56 #include <vnet/ip/ip4_forward.h>
57
58 /** @brief IPv4 lookup node.
59     @node ip4-lookup
60
61     This is the main IPv4 lookup dispatch node.
62
63     @param vm vlib_main_t corresponding to the current thread
64     @param node vlib_node_runtime_t
65     @param frame vlib_frame_t whose contents should be dispatched
66
67     @par Graph mechanics: buffer metadata, next index usage
68
69     @em Uses:
70     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
71         - Indicates the @c sw_if_index value of the interface that the
72           packet was received on.
73     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
74         - When the value is @c ~0 then the node performs a longest prefix
75           match (LPM) for the packet destination address in the FIB attached
76           to the receive interface.
77         - Otherwise perform LPM for the packet destination address in the
78           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
79           value (0, 1, ...) and not a VRF id.
80
81     @em Sets:
82     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
83         - The lookup result adjacency index.
84
85     <em>Next Index:</em>
86     - Dispatches the packet to the node index found in
87       ip_adjacency_t @c adj->lookup_next_index
88       (where @c adj is the lookup result adjacency).
89 */
90 static uword
91 ip4_lookup (vlib_main_t * vm,
92             vlib_node_runtime_t * node, vlib_frame_t * frame)
93 {
94   return ip4_lookup_inline (vm, node, frame,
95                             /* lookup_for_responses_to_locally_received_packets */
96                             0);
97
98 }
99
100 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
101
102 /* *INDENT-OFF* */
103 VLIB_REGISTER_NODE (ip4_lookup_node) =
104 {
105   .function = ip4_lookup,
106   .name = "ip4-lookup",
107   .vector_size = sizeof (u32),
108   .format_trace = format_ip4_lookup_trace,
109   .n_next_nodes = IP_LOOKUP_N_NEXT,
110   .next_nodes = IP4_LOOKUP_NEXT_NODES,
111 };
112 /* *INDENT-ON* */
113
114 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup);
115
116 always_inline uword
117 ip4_load_balance (vlib_main_t * vm,
118                   vlib_node_runtime_t * node, vlib_frame_t * frame)
119 {
120   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
121   u32 n_left_from, n_left_to_next, *from, *to_next;
122   ip_lookup_next_t next;
123   u32 thread_index = vlib_get_thread_index ();
124
125   from = vlib_frame_vector_args (frame);
126   n_left_from = frame->n_vectors;
127   next = node->cached_next_index;
128
129   if (node->flags & VLIB_NODE_FLAG_TRACE)
130     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
131
132   while (n_left_from > 0)
133     {
134       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
135
136
137       while (n_left_from >= 4 && n_left_to_next >= 2)
138         {
139           ip_lookup_next_t next0, next1;
140           const load_balance_t *lb0, *lb1;
141           vlib_buffer_t *p0, *p1;
142           u32 pi0, lbi0, hc0, pi1, lbi1, hc1;
143           const ip4_header_t *ip0, *ip1;
144           const dpo_id_t *dpo0, *dpo1;
145
146           /* Prefetch next iteration. */
147           {
148             vlib_buffer_t *p2, *p3;
149
150             p2 = vlib_get_buffer (vm, from[2]);
151             p3 = vlib_get_buffer (vm, from[3]);
152
153             vlib_prefetch_buffer_header (p2, STORE);
154             vlib_prefetch_buffer_header (p3, STORE);
155
156             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
157             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
158           }
159
160           pi0 = to_next[0] = from[0];
161           pi1 = to_next[1] = from[1];
162
163           from += 2;
164           n_left_from -= 2;
165           to_next += 2;
166           n_left_to_next -= 2;
167
168           p0 = vlib_get_buffer (vm, pi0);
169           p1 = vlib_get_buffer (vm, pi1);
170
171           ip0 = vlib_buffer_get_current (p0);
172           ip1 = vlib_buffer_get_current (p1);
173           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
174           lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
175
176           lb0 = load_balance_get (lbi0);
177           lb1 = load_balance_get (lbi1);
178
179           /*
180            * this node is for via FIBs we can re-use the hash value from the
181            * to node if present.
182            * We don't want to use the same hash value at each level in the recursion
183            * graph as that would lead to polarisation
184            */
185           hc0 = hc1 = 0;
186
187           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
188             {
189               if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
190                 {
191                   hc0 = vnet_buffer (p0)->ip.flow_hash =
192                     vnet_buffer (p0)->ip.flow_hash >> 1;
193                 }
194               else
195                 {
196                   hc0 = vnet_buffer (p0)->ip.flow_hash =
197                     ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
198                 }
199               dpo0 = load_balance_get_fwd_bucket
200                 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
201             }
202           else
203             {
204               dpo0 = load_balance_get_bucket_i (lb0, 0);
205             }
206           if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
207             {
208               if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash))
209                 {
210                   hc1 = vnet_buffer (p1)->ip.flow_hash =
211                     vnet_buffer (p1)->ip.flow_hash >> 1;
212                 }
213               else
214                 {
215                   hc1 = vnet_buffer (p1)->ip.flow_hash =
216                     ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
217                 }
218               dpo1 = load_balance_get_fwd_bucket
219                 (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
220             }
221           else
222             {
223               dpo1 = load_balance_get_bucket_i (lb1, 0);
224             }
225
226           next0 = dpo0->dpoi_next_node;
227           next1 = dpo1->dpoi_next_node;
228
229           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
230           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
231
232           vlib_increment_combined_counter
233             (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
234           vlib_increment_combined_counter
235             (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
236
237           vlib_validate_buffer_enqueue_x2 (vm, node, next,
238                                            to_next, n_left_to_next,
239                                            pi0, pi1, next0, next1);
240         }
241
242       while (n_left_from > 0 && n_left_to_next > 0)
243         {
244           ip_lookup_next_t next0;
245           const load_balance_t *lb0;
246           vlib_buffer_t *p0;
247           u32 pi0, lbi0, hc0;
248           const ip4_header_t *ip0;
249           const dpo_id_t *dpo0;
250
251           pi0 = from[0];
252           to_next[0] = pi0;
253           from += 1;
254           to_next += 1;
255           n_left_to_next -= 1;
256           n_left_from -= 1;
257
258           p0 = vlib_get_buffer (vm, pi0);
259
260           ip0 = vlib_buffer_get_current (p0);
261           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
262
263           lb0 = load_balance_get (lbi0);
264
265           hc0 = 0;
266           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
267             {
268               if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
269                 {
270                   hc0 = vnet_buffer (p0)->ip.flow_hash =
271                     vnet_buffer (p0)->ip.flow_hash >> 1;
272                 }
273               else
274                 {
275                   hc0 = vnet_buffer (p0)->ip.flow_hash =
276                     ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
277                 }
278               dpo0 = load_balance_get_fwd_bucket
279                 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
280             }
281           else
282             {
283               dpo0 = load_balance_get_bucket_i (lb0, 0);
284             }
285
286           next0 = dpo0->dpoi_next_node;
287           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
288
289           vlib_increment_combined_counter
290             (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
291
292           vlib_validate_buffer_enqueue_x1 (vm, node, next,
293                                            to_next, n_left_to_next,
294                                            pi0, next0);
295         }
296
297       vlib_put_next_frame (vm, node, next, n_left_to_next);
298     }
299
300   return frame->n_vectors;
301 }
302
303 /* *INDENT-OFF* */
304 VLIB_REGISTER_NODE (ip4_load_balance_node) =
305 {
306   .function = ip4_load_balance,
307   .name = "ip4-load-balance",
308   .vector_size = sizeof (u32),
309   .sibling_of = "ip4-lookup",
310   .format_trace =
311   format_ip4_lookup_trace,
312 };
313 /* *INDENT-ON* */
314
315 VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance);
316
317 /* get first interface address */
318 ip4_address_t *
319 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
320                              ip_interface_address_t ** result_ia)
321 {
322   ip_lookup_main_t *lm = &im->lookup_main;
323   ip_interface_address_t *ia = 0;
324   ip4_address_t *result = 0;
325
326   /* *INDENT-OFF* */
327   foreach_ip_interface_address
328     (lm, ia, sw_if_index,
329      1 /* honor unnumbered */ ,
330      ({
331        ip4_address_t * a =
332          ip_interface_address_get_address (lm, ia);
333        result = a;
334        break;
335      }));
336   /* *INDENT-OFF* */
337   if (result_ia)
338     *result_ia = result ? ia : 0;
339   return result;
340 }
341
342 static void
343 ip4_add_interface_routes (u32 sw_if_index,
344                           ip4_main_t * im, u32 fib_index,
345                           ip_interface_address_t * a)
346 {
347   ip_lookup_main_t *lm = &im->lookup_main;
348   ip4_address_t *address = ip_interface_address_get_address (lm, a);
349   fib_prefix_t pfx = {
350     .fp_len = a->address_length,
351     .fp_proto = FIB_PROTOCOL_IP4,
352     .fp_addr.ip4 = *address,
353   };
354
355   if (pfx.fp_len <= 30)
356     {
357       /* a /30 or shorter - add a glean for the network address */
358       fib_table_entry_update_one_path (fib_index, &pfx,
359                                        FIB_SOURCE_INTERFACE,
360                                        (FIB_ENTRY_FLAG_CONNECTED |
361                                         FIB_ENTRY_FLAG_ATTACHED),
362                                        DPO_PROTO_IP4,
363                                        /* No next-hop address */
364                                        NULL,
365                                        sw_if_index,
366                                        // invalid FIB index
367                                        ~0,
368                                        1,
369                                        // no out-label stack
370                                        NULL,
371                                        FIB_ROUTE_PATH_FLAG_NONE);
372
373       /* Add the two broadcast addresses as drop */
374       fib_prefix_t net_pfx = {
375         .fp_len = 32,
376         .fp_proto = FIB_PROTOCOL_IP4,
377         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
378       };
379       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
380         fib_table_entry_special_add(fib_index,
381                                     &net_pfx,
382                                     FIB_SOURCE_INTERFACE,
383                                     (FIB_ENTRY_FLAG_DROP |
384                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
385       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
386       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
387         fib_table_entry_special_add(fib_index,
388                                     &net_pfx,
389                                     FIB_SOURCE_INTERFACE,
390                                     (FIB_ENTRY_FLAG_DROP |
391                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
392     }
393   else if (pfx.fp_len == 31)
394     {
395       u32 mask = clib_host_to_net_u32(1);
396       fib_prefix_t net_pfx = pfx;
397
398       net_pfx.fp_len = 32;
399       net_pfx.fp_addr.ip4.as_u32 ^= mask;
400
401       /* a /31 - add the other end as an attached host */
402       fib_table_entry_update_one_path (fib_index, &net_pfx,
403                                        FIB_SOURCE_INTERFACE,
404                                        (FIB_ENTRY_FLAG_ATTACHED),
405                                        DPO_PROTO_IP4,
406                                        &net_pfx.fp_addr,
407                                        sw_if_index,
408                                        // invalid FIB index
409                                        ~0,
410                                        1,
411                                        NULL,
412                                        FIB_ROUTE_PATH_FLAG_NONE);
413     }
414   pfx.fp_len = 32;
415
416   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
417     {
418       u32 classify_table_index =
419         lm->classify_table_index_by_sw_if_index[sw_if_index];
420       if (classify_table_index != (u32) ~ 0)
421         {
422           dpo_id_t dpo = DPO_INVALID;
423
424           dpo_set (&dpo,
425                    DPO_CLASSIFY,
426                    DPO_PROTO_IP4,
427                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
428
429           fib_table_entry_special_dpo_add (fib_index,
430                                            &pfx,
431                                            FIB_SOURCE_CLASSIFY,
432                                            FIB_ENTRY_FLAG_NONE, &dpo);
433           dpo_reset (&dpo);
434         }
435     }
436
437   fib_table_entry_update_one_path (fib_index, &pfx,
438                                    FIB_SOURCE_INTERFACE,
439                                    (FIB_ENTRY_FLAG_CONNECTED |
440                                     FIB_ENTRY_FLAG_LOCAL),
441                                    DPO_PROTO_IP4,
442                                    &pfx.fp_addr,
443                                    sw_if_index,
444                                    // invalid FIB index
445                                    ~0,
446                                    1, NULL,
447                                    FIB_ROUTE_PATH_FLAG_NONE);
448 }
449
450 static void
451 ip4_del_interface_routes (ip4_main_t * im,
452                           u32 fib_index,
453                           ip4_address_t * address, u32 address_length)
454 {
455   fib_prefix_t pfx = {
456     .fp_len = address_length,
457     .fp_proto = FIB_PROTOCOL_IP4,
458     .fp_addr.ip4 = *address,
459   };
460
461   if (pfx.fp_len <= 30)
462     {
463       fib_prefix_t net_pfx = {
464         .fp_len = 32,
465         .fp_proto = FIB_PROTOCOL_IP4,
466         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
467       };
468       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
469         fib_table_entry_special_remove(fib_index,
470                                        &net_pfx,
471                                        FIB_SOURCE_INTERFACE);
472       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
473       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
474         fib_table_entry_special_remove(fib_index,
475                                        &net_pfx,
476                                        FIB_SOURCE_INTERFACE);
477       fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
478     }
479     else if (pfx.fp_len == 31)
480     {
481       u32 mask = clib_host_to_net_u32(1);
482       fib_prefix_t net_pfx = pfx;
483
484       net_pfx.fp_len = 32;
485       net_pfx.fp_addr.ip4.as_u32 ^= mask;
486
487       fib_table_entry_delete (fib_index, &net_pfx, FIB_SOURCE_INTERFACE);
488     }
489
490   pfx.fp_len = 32;
491   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
492 }
493
494 void
495 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
496 {
497   ip4_main_t *im = &ip4_main;
498
499   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
500
501   /*
502    * enable/disable only on the 1<->0 transition
503    */
504   if (is_enable)
505     {
506       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
507         return;
508     }
509   else
510     {
511       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
512       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
513         return;
514     }
515   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
516                                !is_enable, 0, 0);
517
518
519   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
520                                sw_if_index, !is_enable, 0, 0);
521 }
522
523 static clib_error_t *
524 ip4_add_del_interface_address_internal (vlib_main_t * vm,
525                                         u32 sw_if_index,
526                                         ip4_address_t * address,
527                                         u32 address_length, u32 is_del)
528 {
529   vnet_main_t *vnm = vnet_get_main ();
530   ip4_main_t *im = &ip4_main;
531   ip_lookup_main_t *lm = &im->lookup_main;
532   clib_error_t *error = 0;
533   u32 if_address_index, elts_before;
534   ip4_address_fib_t ip4_af, *addr_fib = 0;
535
536   /* local0 interface doesn't support IP addressing  */
537   if (sw_if_index == 0)
538     {
539       return
540        clib_error_create ("local0 interface doesn't support IP addressing");
541     }
542
543   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
544   ip4_addr_fib_init (&ip4_af, address,
545                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
546   vec_add1 (addr_fib, ip4_af);
547
548   /*
549    * there is no support for adj-fib handling in the presence of overlapping
550    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
551    * most routers do.
552    */
553   /* *INDENT-OFF* */
554   if (!is_del)
555     {
556       /* When adding an address check that it does not conflict
557          with an existing address on any interface in this table. */
558       ip_interface_address_t *ia;
559       vnet_sw_interface_t *sif;
560
561       pool_foreach(sif, vnm->interface_main.sw_interfaces,
562       ({
563           if (im->fib_index_by_sw_if_index[sw_if_index] ==
564               im->fib_index_by_sw_if_index[sif->sw_if_index])
565             {
566               foreach_ip_interface_address
567                 (&im->lookup_main, ia, sif->sw_if_index,
568                  0 /* honor unnumbered */ ,
569                  ({
570                    ip4_address_t * x =
571                      ip_interface_address_get_address
572                      (&im->lookup_main, ia);
573                    if (ip4_destination_matches_route
574                        (im, address, x, ia->address_length) ||
575                        ip4_destination_matches_route (im,
576                                                       x,
577                                                       address,
578                                                       address_length))
579                      {
580                        vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
581
582                        return
583                          clib_error_create
584                          ("failed to add %U which conflicts with %U for interface %U",
585                           format_ip4_address_and_length, address,
586                           address_length,
587                           format_ip4_address_and_length, x,
588                           ia->address_length,
589                           format_vnet_sw_if_index_name, vnm,
590                           sif->sw_if_index);
591                      }
592                  }));
593             }
594       }));
595     }
596   /* *INDENT-ON* */
597
598   elts_before = pool_elts (lm->if_address_pool);
599
600   error = ip_interface_address_add_del
601     (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
602   if (error)
603     goto done;
604
605   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
606
607   if (is_del)
608     ip4_del_interface_routes (im, ip4_af.fib_index, address, address_length);
609   else
610     ip4_add_interface_routes (sw_if_index,
611                               im, ip4_af.fib_index,
612                               pool_elt_at_index
613                               (lm->if_address_pool, if_address_index));
614
615   /* If pool did not grow/shrink: add duplicate address. */
616   if (elts_before != pool_elts (lm->if_address_pool))
617     {
618       ip4_add_del_interface_address_callback_t *cb;
619       vec_foreach (cb, im->add_del_interface_address_callbacks)
620         cb->function (im, cb->function_opaque, sw_if_index,
621                       address, address_length, if_address_index, is_del);
622     }
623
624 done:
625   vec_free (addr_fib);
626   return error;
627 }
628
629 clib_error_t *
630 ip4_add_del_interface_address (vlib_main_t * vm,
631                                u32 sw_if_index,
632                                ip4_address_t * address,
633                                u32 address_length, u32 is_del)
634 {
635   return ip4_add_del_interface_address_internal
636     (vm, sw_if_index, address, address_length, is_del);
637 }
638
639 /* Built-in ip4 unicast rx feature path definition */
640 /* *INDENT-OFF* */
641 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
642 {
643   .arc_name = "ip4-unicast",
644   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
645   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
646 };
647
648 VNET_FEATURE_INIT (ip4_flow_classify, static) =
649 {
650   .arc_name = "ip4-unicast",
651   .node_name = "ip4-flow-classify",
652   .runs_before = VNET_FEATURES ("ip4-inacl"),
653 };
654
655 VNET_FEATURE_INIT (ip4_inacl, static) =
656 {
657   .arc_name = "ip4-unicast",
658   .node_name = "ip4-inacl",
659   .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
660 };
661
662 VNET_FEATURE_INIT (ip4_source_check_1, static) =
663 {
664   .arc_name = "ip4-unicast",
665   .node_name = "ip4-source-check-via-rx",
666   .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
667 };
668
669 VNET_FEATURE_INIT (ip4_source_check_2, static) =
670 {
671   .arc_name = "ip4-unicast",
672   .node_name = "ip4-source-check-via-any",
673   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
674 };
675
676 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
677 {
678   .arc_name = "ip4-unicast",
679   .node_name = "ip4-source-and-port-range-check-rx",
680   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
681 };
682
683 VNET_FEATURE_INIT (ip4_policer_classify, static) =
684 {
685   .arc_name = "ip4-unicast",
686   .node_name = "ip4-policer-classify",
687   .runs_before = VNET_FEATURES ("ipsec-input-ip4"),
688 };
689
690 VNET_FEATURE_INIT (ip4_ipsec, static) =
691 {
692   .arc_name = "ip4-unicast",
693   .node_name = "ipsec-input-ip4",
694   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
695 };
696
697 VNET_FEATURE_INIT (ip4_vpath, static) =
698 {
699   .arc_name = "ip4-unicast",
700   .node_name = "vpath-input-ip4",
701   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
702 };
703
704 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
705 {
706   .arc_name = "ip4-unicast",
707   .node_name = "ip4-vxlan-bypass",
708   .runs_before = VNET_FEATURES ("ip4-lookup"),
709 };
710
711 VNET_FEATURE_INIT (ip4_not_enabled, static) =
712 {
713   .arc_name = "ip4-unicast",
714   .node_name = "ip4-not-enabled",
715   .runs_before = VNET_FEATURES ("ip4-lookup"),
716 };
717
718 VNET_FEATURE_INIT (ip4_lookup, static) =
719 {
720   .arc_name = "ip4-unicast",
721   .node_name = "ip4-lookup",
722   .runs_before = 0,     /* not before any other features */
723 };
724
725 /* Built-in ip4 multicast rx feature path definition */
726 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
727 {
728   .arc_name = "ip4-multicast",
729   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
730   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
731 };
732
733 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
734 {
735   .arc_name = "ip4-multicast",
736   .node_name = "vpath-input-ip4",
737   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
738 };
739
740 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
741 {
742   .arc_name = "ip4-multicast",
743   .node_name = "ip4-not-enabled",
744   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
745 };
746
747 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
748 {
749   .arc_name = "ip4-multicast",
750   .node_name = "ip4-mfib-forward-lookup",
751   .runs_before = 0,     /* last feature */
752 };
753
754 /* Source and port-range check ip4 tx feature path definition */
755 VNET_FEATURE_ARC_INIT (ip4_output, static) =
756 {
757   .arc_name = "ip4-output",
758   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
759   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
760 };
761
762 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
763 {
764   .arc_name = "ip4-output",
765   .node_name = "ip4-source-and-port-range-check-tx",
766   .runs_before = VNET_FEATURES ("ip4-outacl"),
767 };
768
769 VNET_FEATURE_INIT (ip4_outacl, static) =
770 {
771   .arc_name = "ip4-output",
772   .node_name = "ip4-outacl",
773   .runs_before = VNET_FEATURES ("ipsec-output-ip4"),
774 };
775
776 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
777 {
778   .arc_name = "ip4-output",
779   .node_name = "ipsec-output-ip4",
780   .runs_before = VNET_FEATURES ("interface-output"),
781 };
782
783 /* Built-in ip4 tx feature path definition */
784 VNET_FEATURE_INIT (ip4_interface_output, static) =
785 {
786   .arc_name = "ip4-output",
787   .node_name = "interface-output",
788   .runs_before = 0,     /* not before any other features */
789 };
790 /* *INDENT-ON* */
791
792 static clib_error_t *
793 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
794 {
795   ip4_main_t *im = &ip4_main;
796
797   /* Fill in lookup tables with default table (0). */
798   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
799   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
800
801   if (!is_add)
802     {
803       ip4_main_t *im4 = &ip4_main;
804       ip_lookup_main_t *lm4 = &im4->lookup_main;
805       ip_interface_address_t *ia = 0;
806       ip4_address_t *address;
807       vlib_main_t *vm = vlib_get_main ();
808
809       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
810       /* *INDENT-OFF* */
811       foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
812       ({
813         address = ip_interface_address_get_address (lm4, ia);
814         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
815       }));
816       /* *INDENT-ON* */
817     }
818
819   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
820                                is_add, 0, 0);
821
822   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
823                                sw_if_index, is_add, 0, 0);
824
825   return /* no error */ 0;
826 }
827
828 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
829
830 /* Global IP4 main. */
831 ip4_main_t ip4_main;
832
833 clib_error_t *
834 ip4_lookup_init (vlib_main_t * vm)
835 {
836   ip4_main_t *im = &ip4_main;
837   clib_error_t *error;
838   uword i;
839
840   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
841     return error;
842   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
843     return (error);
844   if ((error = vlib_call_init_function (vm, fib_module_init)))
845     return error;
846   if ((error = vlib_call_init_function (vm, mfib_module_init)))
847     return error;
848
849   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
850     {
851       u32 m;
852
853       if (i < 32)
854         m = pow2_mask (i) << (32 - i);
855       else
856         m = ~0;
857       im->fib_masks[i] = clib_host_to_net_u32 (m);
858     }
859
860   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
861
862   /* Create FIB with index 0 and table id of 0. */
863   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
864                                      FIB_SOURCE_DEFAULT_ROUTE);
865   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
866                                       MFIB_SOURCE_DEFAULT_ROUTE);
867
868   {
869     pg_node_t *pn;
870     pn = pg_get_node (ip4_lookup_node.index);
871     pn->unformat_edit = unformat_pg_ip4_header;
872   }
873
874   {
875     ethernet_arp_header_t h;
876
877     memset (&h, 0, sizeof (h));
878
879     /* Set target ethernet address to all zeros. */
880     memset (h.ip4_over_ethernet[1].ethernet, 0,
881             sizeof (h.ip4_over_ethernet[1].ethernet));
882
883 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
884 #define _8(f,v) h.f = v;
885     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
886     _16 (l3_type, ETHERNET_TYPE_IP4);
887     _8 (n_l2_address_bytes, 6);
888     _8 (n_l3_address_bytes, 4);
889     _16 (opcode, ETHERNET_ARP_OPCODE_request);
890 #undef _16
891 #undef _8
892
893     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
894                                /* data */ &h,
895                                sizeof (h),
896                                /* alloc chunk size */ 8,
897                                "ip4 arp");
898   }
899
900   return error;
901 }
902
903 VLIB_INIT_FUNCTION (ip4_lookup_init);
904
905 typedef struct
906 {
907   /* Adjacency taken. */
908   u32 dpo_index;
909   u32 flow_hash;
910   u32 fib_index;
911
912   /* Packet data, possibly *after* rewrite. */
913   u8 packet_data[64 - 1 * sizeof (u32)];
914 }
915 ip4_forward_next_trace_t;
916
917 u8 *
918 format_ip4_forward_next_trace (u8 * s, va_list * args)
919 {
920   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
921   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
922   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
923   u32 indent = format_get_indent (s);
924   s = format (s, "%U%U",
925               format_white_space, indent,
926               format_ip4_header, t->packet_data, sizeof (t->packet_data));
927   return s;
928 }
929
930 static u8 *
931 format_ip4_lookup_trace (u8 * s, va_list * args)
932 {
933   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
934   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
935   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
936   u32 indent = format_get_indent (s);
937
938   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
939               t->fib_index, t->dpo_index, t->flow_hash);
940   s = format (s, "\n%U%U",
941               format_white_space, indent,
942               format_ip4_header, t->packet_data, sizeof (t->packet_data));
943   return s;
944 }
945
946 static u8 *
947 format_ip4_rewrite_trace (u8 * s, va_list * args)
948 {
949   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
950   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
951   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
952   u32 indent = format_get_indent (s);
953
954   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
955               t->fib_index, t->dpo_index, format_ip_adjacency,
956               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
957   s = format (s, "\n%U%U",
958               format_white_space, indent,
959               format_ip_adjacency_packet_data,
960               t->dpo_index, t->packet_data, sizeof (t->packet_data));
961   return s;
962 }
963
964 /* Common trace function for all ip4-forward next nodes. */
965 void
966 ip4_forward_next_trace (vlib_main_t * vm,
967                         vlib_node_runtime_t * node,
968                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
969 {
970   u32 *from, n_left;
971   ip4_main_t *im = &ip4_main;
972
973   n_left = frame->n_vectors;
974   from = vlib_frame_vector_args (frame);
975
976   while (n_left >= 4)
977     {
978       u32 bi0, bi1;
979       vlib_buffer_t *b0, *b1;
980       ip4_forward_next_trace_t *t0, *t1;
981
982       /* Prefetch next iteration. */
983       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
984       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
985
986       bi0 = from[0];
987       bi1 = from[1];
988
989       b0 = vlib_get_buffer (vm, bi0);
990       b1 = vlib_get_buffer (vm, bi1);
991
992       if (b0->flags & VLIB_BUFFER_IS_TRACED)
993         {
994           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
995           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
996           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
997           t0->fib_index =
998             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
999              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1000             vec_elt (im->fib_index_by_sw_if_index,
1001                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1002
1003           clib_memcpy (t0->packet_data,
1004                        vlib_buffer_get_current (b0),
1005                        sizeof (t0->packet_data));
1006         }
1007       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1008         {
1009           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1010           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1011           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1012           t1->fib_index =
1013             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1014              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1015             vec_elt (im->fib_index_by_sw_if_index,
1016                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1017           clib_memcpy (t1->packet_data, vlib_buffer_get_current (b1),
1018                        sizeof (t1->packet_data));
1019         }
1020       from += 2;
1021       n_left -= 2;
1022     }
1023
1024   while (n_left >= 1)
1025     {
1026       u32 bi0;
1027       vlib_buffer_t *b0;
1028       ip4_forward_next_trace_t *t0;
1029
1030       bi0 = from[0];
1031
1032       b0 = vlib_get_buffer (vm, bi0);
1033
1034       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1035         {
1036           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1037           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1038           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1039           t0->fib_index =
1040             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1041              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1042             vec_elt (im->fib_index_by_sw_if_index,
1043                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1044           clib_memcpy (t0->packet_data, vlib_buffer_get_current (b0),
1045                        sizeof (t0->packet_data));
1046         }
1047       from += 1;
1048       n_left -= 1;
1049     }
1050 }
1051
1052 /* Compute TCP/UDP/ICMP4 checksum in software. */
1053 u16
1054 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1055                               ip4_header_t * ip0)
1056 {
1057   ip_csum_t sum0;
1058   u32 ip_header_length, payload_length_host_byte_order;
1059   u32 n_this_buffer, n_bytes_left, n_ip_bytes_this_buffer;
1060   u16 sum16;
1061   void *data_this_buffer;
1062
1063   /* Initialize checksum with ip header. */
1064   ip_header_length = ip4_header_bytes (ip0);
1065   payload_length_host_byte_order =
1066     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1067   sum0 =
1068     clib_host_to_net_u32 (payload_length_host_byte_order +
1069                           (ip0->protocol << 16));
1070
1071   if (BITS (uword) == 32)
1072     {
1073       sum0 =
1074         ip_csum_with_carry (sum0,
1075                             clib_mem_unaligned (&ip0->src_address, u32));
1076       sum0 =
1077         ip_csum_with_carry (sum0,
1078                             clib_mem_unaligned (&ip0->dst_address, u32));
1079     }
1080   else
1081     sum0 =
1082       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1083
1084   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1085   data_this_buffer = (void *) ip0 + ip_header_length;
1086   n_ip_bytes_this_buffer =
1087     p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data);
1088   if (n_this_buffer + ip_header_length > n_ip_bytes_this_buffer)
1089     {
1090       n_this_buffer = n_ip_bytes_this_buffer > ip_header_length ?
1091         n_ip_bytes_this_buffer - ip_header_length : 0;
1092     }
1093   while (1)
1094     {
1095       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1096       n_bytes_left -= n_this_buffer;
1097       if (n_bytes_left == 0)
1098         break;
1099
1100       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1101       p0 = vlib_get_buffer (vm, p0->next_buffer);
1102       data_this_buffer = vlib_buffer_get_current (p0);
1103       n_this_buffer = p0->current_length;
1104     }
1105
1106   sum16 = ~ip_csum_fold (sum0);
1107
1108   return sum16;
1109 }
1110
1111 u32
1112 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1113 {
1114   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1115   udp_header_t *udp0;
1116   u16 sum16;
1117
1118   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1119           || ip0->protocol == IP_PROTOCOL_UDP);
1120
1121   udp0 = (void *) (ip0 + 1);
1122   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1123     {
1124       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1125                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1126       return p0->flags;
1127     }
1128
1129   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1130
1131   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1132                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1133
1134   return p0->flags;
1135 }
1136
1137 /* *INDENT-OFF* */
1138 VNET_FEATURE_ARC_INIT (ip4_local) =
1139 {
1140   .arc_name  = "ip4-local",
1141   .start_nodes = VNET_FEATURES ("ip4-local"),
1142 };
1143 /* *INDENT-ON* */
1144
1145 static inline void
1146 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1147                             ip4_header_t * ip, u8 is_udp, u8 * error,
1148                             u8 * good_tcp_udp)
1149 {
1150   u32 flags0;
1151   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1152   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1153   if (is_udp)
1154     {
1155       udp_header_t *udp;
1156       u32 ip_len, udp_len;
1157       i32 len_diff;
1158       udp = ip4_next_header (ip);
1159       /* Verify UDP length. */
1160       ip_len = clib_net_to_host_u16 (ip->length);
1161       udp_len = clib_net_to_host_u16 (udp->length);
1162
1163       len_diff = ip_len - udp_len;
1164       *good_tcp_udp &= len_diff >= 0;
1165       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1166     }
1167 }
1168
1169 #define ip4_local_csum_is_offloaded(_b)                                 \
1170     _b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM                         \
1171         || _b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM
1172
1173 #define ip4_local_need_csum_check(is_tcp_udp, _b)                       \
1174     (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED     \
1175         || ip4_local_csum_is_offloaded (_b)))
1176
1177 #define ip4_local_csum_is_valid(_b)                                     \
1178     (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT                      \
1179         || (ip4_local_csum_is_offloaded (_b))) != 0
1180
1181 static inline void
1182 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1183                          ip4_header_t * ih, u8 * error)
1184 {
1185   u8 is_udp, is_tcp_udp, good_tcp_udp;
1186
1187   is_udp = ih->protocol == IP_PROTOCOL_UDP;
1188   is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1189
1190   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1191     ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1192   else
1193     good_tcp_udp = ip4_local_csum_is_valid (b);
1194
1195   ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1196   *error = (is_tcp_udp && !good_tcp_udp
1197             ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1198 }
1199
1200 static inline void
1201 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1202                             ip4_header_t ** ih, u8 * error)
1203 {
1204   u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1205
1206   is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1207   is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1208
1209   is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1210   is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1211
1212   good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1213   good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1214
1215   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1216                      || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1217     {
1218       if (is_tcp_udp[0])
1219         ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1220                                     &good_tcp_udp[0]);
1221       if (is_tcp_udp[1])
1222         ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1223                                     &good_tcp_udp[1]);
1224     }
1225
1226   error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1227               IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1228   error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1229               IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1230 }
1231
1232 static inline void
1233 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1234                               vlib_buffer_t * b, u16 * next, u8 error,
1235                               u8 head_of_feature_arc)
1236 {
1237   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1238   u32 next_index;
1239
1240   *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1241   b->error = error ? error_node->errors[error] : 0;
1242   if (head_of_feature_arc)
1243     {
1244       next_index = *next;
1245       if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1246         {
1247           vnet_feature_arc_start (arc_index,
1248                                   vnet_buffer (b)->sw_if_index[VLIB_RX],
1249                                   &next_index, b);
1250           *next = next_index;
1251         }
1252     }
1253 }
1254
1255 typedef struct
1256 {
1257   ip4_address_t src;
1258   u32 lbi;
1259   u8 error;
1260 } ip4_local_last_check_t;
1261
1262 static inline void
1263 ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
1264                      ip4_local_last_check_t * last_check, u8 * error0)
1265 {
1266   ip4_fib_mtrie_leaf_t leaf0;
1267   ip4_fib_mtrie_t *mtrie0;
1268   const dpo_id_t *dpo0;
1269   load_balance_t *lb0;
1270   u32 lbi0;
1271
1272   vnet_buffer (b)->ip.fib_index =
1273     vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1274     vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1275
1276   if (PREDICT_FALSE (last_check->src.as_u32 != ip0->src_address.as_u32))
1277     {
1278       mtrie0 = &ip4_fib_get (vnet_buffer (b)->ip.fib_index)->mtrie;
1279       leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1280       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1281       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1282       lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1283
1284       vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1285       vnet_buffer (b)->ip.adj_index[VLIB_RX] = lbi0;
1286
1287       lb0 = load_balance_get (lbi0);
1288       dpo0 = load_balance_get_bucket_i (lb0, 0);
1289
1290       /*
1291        * Must have a route to source otherwise we drop the packet.
1292        * ip4 broadcasts are accepted, e.g. to make dhcp client work
1293        *
1294        * The checks are:
1295        *  - the source is a recieve => it's from us => bogus, do this
1296        *    first since it sets a different error code.
1297        *  - uRPF check for any route to source - accept if passes.
1298        *  - allow packets destined to the broadcast address from unknown sources
1299        */
1300
1301       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1302                   && dpo0->dpoi_type == DPO_RECEIVE) ?
1303                  IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1304       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1305                   && !fib_urpf_check_size (lb0->lb_urpf)
1306                   && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1307                  IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1308
1309       last_check->src.as_u32 = ip0->src_address.as_u32;
1310       last_check->lbi = lbi0;
1311       last_check->error = *error0;
1312     }
1313   else
1314     {
1315       vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1316       vnet_buffer (b)->ip.adj_index[VLIB_RX] = last_check->lbi;
1317       *error0 = last_check->error;
1318     }
1319 }
1320
1321 static inline void
1322 ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
1323                         ip4_local_last_check_t * last_check, u8 * error)
1324 {
1325   ip4_fib_mtrie_leaf_t leaf[2];
1326   ip4_fib_mtrie_t *mtrie[2];
1327   const dpo_id_t *dpo[2];
1328   load_balance_t *lb[2];
1329   u32 not_last_hit = 0;
1330   u32 lbi[2];
1331
1332   not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1333   not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1334
1335   vnet_buffer (b[0])->ip.fib_index =
1336     vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1337     vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1338     vnet_buffer (b[0])->ip.fib_index;
1339
1340   vnet_buffer (b[1])->ip.fib_index =
1341     vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1342     vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1343     vnet_buffer (b[1])->ip.fib_index;
1344
1345   if (PREDICT_FALSE (not_last_hit))
1346     {
1347       mtrie[0] = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie;
1348       mtrie[1] = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie;
1349
1350       leaf[0] = ip4_fib_mtrie_lookup_step_one (mtrie[0], &ip[0]->src_address);
1351       leaf[1] = ip4_fib_mtrie_lookup_step_one (mtrie[1], &ip[1]->src_address);
1352
1353       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1354                                            &ip[0]->src_address, 2);
1355       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1356                                            &ip[1]->src_address, 2);
1357
1358       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1359                                            &ip[0]->src_address, 3);
1360       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1361                                            &ip[1]->src_address, 3);
1362
1363       lbi[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf[0]);
1364       lbi[1] = ip4_fib_mtrie_leaf_get_adj_index (leaf[1]);
1365
1366       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1367       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] = lbi[0];
1368
1369       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1370       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] = lbi[1];
1371
1372       lb[0] = load_balance_get (lbi[0]);
1373       lb[1] = load_balance_get (lbi[1]);
1374
1375       dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1376       dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1377
1378       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1379                    dpo[0]->dpoi_type == DPO_RECEIVE) ?
1380                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1381       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1382                    !fib_urpf_check_size (lb[0]->lb_urpf) &&
1383                    ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1384                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1385
1386       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1387                    dpo[1]->dpoi_type == DPO_RECEIVE) ?
1388                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1389       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1390                    !fib_urpf_check_size (lb[1]->lb_urpf) &&
1391                    ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1392                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1393
1394       last_check->src.as_u32 = ip[1]->src_address.as_u32;
1395       last_check->lbi = lbi[1];
1396       last_check->error = error[1];
1397     }
1398   else
1399     {
1400       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1401       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] = last_check->lbi;
1402
1403       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1404       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] = last_check->lbi;
1405
1406       error[0] = last_check->error;
1407       error[1] = last_check->error;
1408     }
1409 }
1410
1411 static inline uword
1412 ip4_local_inline (vlib_main_t * vm,
1413                   vlib_node_runtime_t * node,
1414                   vlib_frame_t * frame, int head_of_feature_arc)
1415 {
1416   ip4_main_t *im = &ip4_main;
1417   ip_lookup_main_t *lm = &im->lookup_main;
1418   u32 *from, n_left_from;
1419   vlib_node_runtime_t *error_node =
1420     vlib_node_get_runtime (vm, ip4_input_node.index);
1421   u16 nexts[VLIB_FRAME_SIZE], *next;
1422   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1423   ip4_header_t *ip[2];
1424   u8 error[2];
1425
1426   ip4_local_last_check_t last_check = {
1427     .src = {.as_u32 = 0},
1428     .lbi = ~0,
1429     .error = IP4_ERROR_UNKNOWN_PROTOCOL
1430   };
1431
1432   from = vlib_frame_vector_args (frame);
1433   n_left_from = frame->n_vectors;
1434
1435   if (node->flags & VLIB_NODE_FLAG_TRACE)
1436     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1437
1438   vlib_get_buffers (vm, from, bufs, n_left_from);
1439   b = bufs;
1440   next = nexts;
1441
1442   while (n_left_from >= 6)
1443     {
1444       u32 is_nat, not_batch = 0;
1445
1446       /* Prefetch next iteration. */
1447       {
1448         vlib_prefetch_buffer_header (b[4], LOAD);
1449         vlib_prefetch_buffer_header (b[5], LOAD);
1450
1451         CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1452         CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1453       }
1454
1455       error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1456
1457       ip[0] = vlib_buffer_get_current (b[0]);
1458       ip[1] = vlib_buffer_get_current (b[1]);
1459
1460       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1461       vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1462
1463       is_nat = b[0]->flags & VNET_BUFFER_F_IS_NATED;
1464       not_batch |= is_nat ^ (b[1]->flags & VNET_BUFFER_F_IS_NATED);
1465
1466       if (head_of_feature_arc == 0 || (is_nat && not_batch == 0))
1467         goto skip_checks;
1468
1469       if (PREDICT_TRUE (not_batch == 0))
1470         {
1471           ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1472           ip4_local_check_src_x2 (b, ip, &last_check, error);
1473         }
1474       else
1475         {
1476           if (!(b[0]->flags & VNET_BUFFER_F_IS_NATED))
1477             {
1478               ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1479               ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1480             }
1481           if (!(b[1]->flags & VNET_BUFFER_F_IS_NATED))
1482             {
1483               ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1484               ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
1485             }
1486         }
1487
1488     skip_checks:
1489
1490       next[0] = lm->local_next_by_ip_protocol[ip[0]->protocol];
1491       next[1] = lm->local_next_by_ip_protocol[ip[1]->protocol];
1492       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1493                                     head_of_feature_arc);
1494       ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1495                                     head_of_feature_arc);
1496
1497       b += 2;
1498       next += 2;
1499       n_left_from -= 2;
1500     }
1501
1502   while (n_left_from > 0)
1503     {
1504       error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1505
1506       ip[0] = vlib_buffer_get_current (b[0]);
1507       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1508
1509       if (head_of_feature_arc == 0 || (b[0]->flags & VNET_BUFFER_F_IS_NATED))
1510         goto skip_check;
1511
1512       ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1513       ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1514
1515     skip_check:
1516
1517       next[0] = lm->local_next_by_ip_protocol[ip[0]->protocol];
1518       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1519                                     head_of_feature_arc);
1520
1521       b += 1;
1522       next += 1;
1523       n_left_from -= 1;
1524     }
1525
1526   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1527   return frame->n_vectors;
1528 }
1529
1530 static uword
1531 ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1532 {
1533   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1534 }
1535
1536 /* *INDENT-OFF* */
1537 VLIB_REGISTER_NODE (ip4_local_node) =
1538 {
1539   .function = ip4_local,
1540   .name = "ip4-local",
1541   .vector_size = sizeof (u32),
1542   .format_trace = format_ip4_forward_next_trace,
1543   .n_next_nodes = IP_LOCAL_N_NEXT,
1544   .next_nodes =
1545   {
1546     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1547     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1548     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1549     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1550   },
1551 };
1552 /* *INDENT-ON* */
1553
1554 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local);
1555
1556 static uword
1557 ip4_local_end_of_arc (vlib_main_t * vm,
1558                       vlib_node_runtime_t * node, vlib_frame_t * frame)
1559 {
1560   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1561 }
1562
1563 /* *INDENT-OFF* */
1564 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node,static) = {
1565   .function = ip4_local_end_of_arc,
1566   .name = "ip4-local-end-of-arc",
1567   .vector_size = sizeof (u32),
1568
1569   .format_trace = format_ip4_forward_next_trace,
1570   .sibling_of = "ip4-local",
1571 };
1572
1573 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_end_of_arc_node, ip4_local_end_of_arc)
1574
1575 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1576   .arc_name = "ip4-local",
1577   .node_name = "ip4-local-end-of-arc",
1578   .runs_before = 0, /* not before any other features */
1579 };
1580 /* *INDENT-ON* */
1581
1582 void
1583 ip4_register_protocol (u32 protocol, u32 node_index)
1584 {
1585   vlib_main_t *vm = vlib_get_main ();
1586   ip4_main_t *im = &ip4_main;
1587   ip_lookup_main_t *lm = &im->lookup_main;
1588
1589   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1590   lm->local_next_by_ip_protocol[protocol] =
1591     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1592 }
1593
1594 static clib_error_t *
1595 show_ip_local_command_fn (vlib_main_t * vm,
1596                           unformat_input_t * input, vlib_cli_command_t * cmd)
1597 {
1598   ip4_main_t *im = &ip4_main;
1599   ip_lookup_main_t *lm = &im->lookup_main;
1600   int i;
1601
1602   vlib_cli_output (vm, "Protocols handled by ip4_local");
1603   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1604     {
1605       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1606         {
1607           u32 node_index = vlib_get_node (vm,
1608                                           ip4_local_node.index)->
1609             next_nodes[lm->local_next_by_ip_protocol[i]];
1610           vlib_cli_output (vm, "%d: %U", i, format_vlib_node_name, vm,
1611                            node_index);
1612         }
1613     }
1614   return 0;
1615 }
1616
1617
1618
1619 /*?
1620  * Display the set of protocols handled by the local IPv4 stack.
1621  *
1622  * @cliexpar
1623  * Example of how to display local protocol table:
1624  * @cliexstart{show ip local}
1625  * Protocols handled by ip4_local
1626  * 1
1627  * 17
1628  * 47
1629  * @cliexend
1630 ?*/
1631 /* *INDENT-OFF* */
1632 VLIB_CLI_COMMAND (show_ip_local, static) =
1633 {
1634   .path = "show ip local",
1635   .function = show_ip_local_command_fn,
1636   .short_help = "show ip local",
1637 };
1638 /* *INDENT-ON* */
1639
1640 always_inline uword
1641 ip4_arp_inline (vlib_main_t * vm,
1642                 vlib_node_runtime_t * node,
1643                 vlib_frame_t * frame, int is_glean)
1644 {
1645   vnet_main_t *vnm = vnet_get_main ();
1646   ip4_main_t *im = &ip4_main;
1647   ip_lookup_main_t *lm = &im->lookup_main;
1648   u32 *from, *to_next_drop;
1649   uword n_left_from, n_left_to_next_drop, next_index;
1650   static f64 time_last_seed_change = -1e100;
1651   static u32 hash_seeds[3];
1652   static uword hash_bitmap[256 / BITS (uword)];
1653   f64 time_now;
1654
1655   if (node->flags & VLIB_NODE_FLAG_TRACE)
1656     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1657
1658   time_now = vlib_time_now (vm);
1659   if (time_now - time_last_seed_change > 1e-3)
1660     {
1661       uword i;
1662       u32 *r = clib_random_buffer_get_data (&vm->random_buffer,
1663                                             sizeof (hash_seeds));
1664       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
1665         hash_seeds[i] = r[i];
1666
1667       /* Mark all hash keys as been no-seen before. */
1668       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
1669         hash_bitmap[i] = 0;
1670
1671       time_last_seed_change = time_now;
1672     }
1673
1674   from = vlib_frame_vector_args (frame);
1675   n_left_from = frame->n_vectors;
1676   next_index = node->cached_next_index;
1677   if (next_index == IP4_ARP_NEXT_DROP)
1678     next_index = IP4_ARP_N_NEXT;        /* point to first interface */
1679
1680   while (n_left_from > 0)
1681     {
1682       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1683                            to_next_drop, n_left_to_next_drop);
1684
1685       while (n_left_from > 0 && n_left_to_next_drop > 0)
1686         {
1687           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
1688           ip_adjacency_t *adj0;
1689           vlib_buffer_t *p0;
1690           ip4_header_t *ip0;
1691           uword bm0;
1692
1693           pi0 = from[0];
1694
1695           p0 = vlib_get_buffer (vm, pi0);
1696
1697           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1698           adj0 = adj_get (adj_index0);
1699           ip0 = vlib_buffer_get_current (p0);
1700
1701           a0 = hash_seeds[0];
1702           b0 = hash_seeds[1];
1703           c0 = hash_seeds[2];
1704
1705           sw_if_index0 = adj0->rewrite_header.sw_if_index;
1706           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1707
1708           if (is_glean)
1709             {
1710               /*
1711                * this is the Glean case, so we are ARPing for the
1712                * packet's destination
1713                */
1714               a0 ^= ip0->dst_address.data_u32;
1715             }
1716           else
1717             {
1718               a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32;
1719             }
1720           b0 ^= sw_if_index0;
1721
1722           hash_v3_mix32 (a0, b0, c0);
1723           hash_v3_finalize32 (a0, b0, c0);
1724
1725           c0 &= BITS (hash_bitmap) - 1;
1726           m0 = (uword) 1 << (c0 % BITS (uword));
1727           c0 = c0 / BITS (uword);
1728
1729           bm0 = hash_bitmap[c0];
1730           drop0 = (bm0 & m0) != 0;
1731
1732           /* Mark it as seen. */
1733           hash_bitmap[c0] = bm0 | m0;
1734
1735           from += 1;
1736           n_left_from -= 1;
1737           to_next_drop[0] = pi0;
1738           to_next_drop += 1;
1739           n_left_to_next_drop -= 1;
1740
1741           p0->error =
1742             node->errors[drop0 ? IP4_ARP_ERROR_DROP :
1743                          IP4_ARP_ERROR_REQUEST_SENT];
1744
1745           /*
1746            * the adj has been updated to a rewrite but the node the DPO that got
1747            * us here hasn't - yet. no big deal. we'll drop while we wait.
1748            */
1749           if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
1750             continue;
1751
1752           if (drop0)
1753             continue;
1754
1755           /*
1756            * Can happen if the control-plane is programming tables
1757            * with traffic flowing; at least that's today's lame excuse.
1758            */
1759           if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
1760               || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
1761             {
1762               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
1763             }
1764           else
1765             /* Send ARP request. */
1766             {
1767               u32 bi0 = 0;
1768               vlib_buffer_t *b0;
1769               ethernet_arp_header_t *h0;
1770               vnet_hw_interface_t *hw_if0;
1771
1772               h0 =
1773                 vlib_packet_template_get_packet (vm,
1774                                                  &im->ip4_arp_request_packet_template,
1775                                                  &bi0);
1776
1777               /* Seems we're out of buffers */
1778               if (PREDICT_FALSE (!h0))
1779                 continue;
1780
1781               /* Add rewrite/encap string for ARP packet. */
1782               vnet_rewrite_one_header (adj0[0], h0,
1783                                        sizeof (ethernet_header_t));
1784
1785               hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1786
1787               /* Src ethernet address in ARP header. */
1788               clib_memcpy (h0->ip4_over_ethernet[0].ethernet,
1789                            hw_if0->hw_address,
1790                            sizeof (h0->ip4_over_ethernet[0].ethernet));
1791
1792               if (is_glean)
1793                 {
1794                   /* The interface's source address is stashed in the Glean Adj */
1795                   h0->ip4_over_ethernet[0].ip4 =
1796                     adj0->sub_type.glean.receive_addr.ip4;
1797
1798                   /* Copy in destination address we are requesting. This is the
1799                    * glean case, so it's the packet's destination.*/
1800                   h0->ip4_over_ethernet[1].ip4.data_u32 =
1801                     ip0->dst_address.data_u32;
1802                 }
1803               else
1804                 {
1805                   /* Src IP address in ARP header. */
1806                   if (ip4_src_address_for_packet (lm, sw_if_index0,
1807                                                   &h0->
1808                                                   ip4_over_ethernet[0].ip4))
1809                     {
1810                       /* No source address available */
1811                       p0->error =
1812                         node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
1813                       vlib_buffer_free (vm, &bi0, 1);
1814                       continue;
1815                     }
1816
1817                   /* Copy in destination address we are requesting from the
1818                      incomplete adj */
1819                   h0->ip4_over_ethernet[1].ip4.data_u32 =
1820                     adj0->sub_type.nbr.next_hop.ip4.as_u32;
1821                 }
1822
1823               vlib_buffer_copy_trace_flag (vm, p0, bi0);
1824               b0 = vlib_get_buffer (vm, bi0);
1825               VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
1826               vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
1827
1828               vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
1829
1830               vlib_set_next_frame_buffer (vm, node,
1831                                           adj0->rewrite_header.next_index,
1832                                           bi0);
1833             }
1834         }
1835
1836       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
1837     }
1838
1839   return frame->n_vectors;
1840 }
1841
1842 static uword
1843 ip4_arp (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1844 {
1845   return (ip4_arp_inline (vm, node, frame, 0));
1846 }
1847
1848 static uword
1849 ip4_glean (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1850 {
1851   return (ip4_arp_inline (vm, node, frame, 1));
1852 }
1853
1854 static char *ip4_arp_error_strings[] = {
1855   [IP4_ARP_ERROR_DROP] = "address overflow drops",
1856   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
1857   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
1858   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
1859   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
1860   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
1861 };
1862
1863 /* *INDENT-OFF* */
1864 VLIB_REGISTER_NODE (ip4_arp_node) =
1865 {
1866   .function = ip4_arp,
1867   .name = "ip4-arp",
1868   .vector_size = sizeof (u32),
1869   .format_trace = format_ip4_forward_next_trace,
1870   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1871   .error_strings = ip4_arp_error_strings,
1872   .n_next_nodes = IP4_ARP_N_NEXT,
1873   .next_nodes =
1874   {
1875     [IP4_ARP_NEXT_DROP] = "error-drop",
1876   },
1877 };
1878
1879 VLIB_REGISTER_NODE (ip4_glean_node) =
1880 {
1881   .function = ip4_glean,
1882   .name = "ip4-glean",
1883   .vector_size = sizeof (u32),
1884   .format_trace = format_ip4_forward_next_trace,
1885   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1886   .error_strings = ip4_arp_error_strings,
1887   .n_next_nodes = IP4_ARP_N_NEXT,
1888   .next_nodes = {
1889   [IP4_ARP_NEXT_DROP] = "error-drop",
1890   },
1891 };
1892 /* *INDENT-ON* */
1893
1894 #define foreach_notrace_ip4_arp_error           \
1895 _(DROP)                                         \
1896 _(REQUEST_SENT)                                 \
1897 _(REPLICATE_DROP)                               \
1898 _(REPLICATE_FAIL)
1899
1900 clib_error_t *
1901 arp_notrace_init (vlib_main_t * vm)
1902 {
1903   vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
1904
1905   /* don't trace ARP request packets */
1906 #define _(a)                                    \
1907     vnet_pcap_drop_trace_filter_add_del         \
1908         (rt->errors[IP4_ARP_ERROR_##a],         \
1909          1 /* is_add */);
1910   foreach_notrace_ip4_arp_error;
1911 #undef _
1912   return 0;
1913 }
1914
1915 VLIB_INIT_FUNCTION (arp_notrace_init);
1916
1917
1918 /* Send an ARP request to see if given destination is reachable on given interface. */
1919 clib_error_t *
1920 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
1921 {
1922   vnet_main_t *vnm = vnet_get_main ();
1923   ip4_main_t *im = &ip4_main;
1924   ethernet_arp_header_t *h;
1925   ip4_address_t *src;
1926   ip_interface_address_t *ia;
1927   ip_adjacency_t *adj;
1928   vnet_hw_interface_t *hi;
1929   vnet_sw_interface_t *si;
1930   vlib_buffer_t *b;
1931   adj_index_t ai;
1932   u32 bi = 0;
1933
1934   si = vnet_get_sw_interface (vnm, sw_if_index);
1935
1936   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
1937     {
1938       return clib_error_return (0, "%U: interface %U down",
1939                                 format_ip4_address, dst,
1940                                 format_vnet_sw_if_index_name, vnm,
1941                                 sw_if_index);
1942     }
1943
1944   src =
1945     ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
1946   if (!src)
1947     {
1948       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
1949       return clib_error_return
1950         (0,
1951          "no matching interface address for destination %U (interface %U)",
1952          format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
1953          sw_if_index);
1954     }
1955
1956   h = vlib_packet_template_get_packet (vm,
1957                                        &im->ip4_arp_request_packet_template,
1958                                        &bi);
1959
1960   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
1961   if (PREDICT_FALSE (!hi->hw_address))
1962     {
1963       return clib_error_return (0, "%U: interface %U do not support ip probe",
1964                                 format_ip4_address, dst,
1965                                 format_vnet_sw_if_index_name, vnm,
1966                                 sw_if_index);
1967     }
1968
1969   clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address,
1970                sizeof (h->ip4_over_ethernet[0].ethernet));
1971
1972   h->ip4_over_ethernet[0].ip4 = src[0];
1973   h->ip4_over_ethernet[1].ip4 = dst[0];
1974
1975   b = vlib_get_buffer (vm, bi);
1976   vnet_buffer (b)->sw_if_index[VLIB_RX] =
1977     vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
1978
1979   ip46_address_t nh = {
1980     .ip4 = *dst,
1981   };
1982
1983   ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4,
1984                             VNET_LINK_IP4, &nh, sw_if_index);
1985   adj = adj_get (ai);
1986
1987   /* Peer has been previously resolved, retrieve glean adj instead */
1988   if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE)
1989     {
1990       adj_unlock (ai);
1991       ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4,
1992                                   VNET_LINK_IP4, sw_if_index, &nh);
1993       adj = adj_get (ai);
1994     }
1995
1996   /* Add encapsulation string for software interface (e.g. ethernet header). */
1997   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
1998   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
1999
2000   {
2001     vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2002     u32 *to_next = vlib_frame_vector_args (f);
2003     to_next[0] = bi;
2004     f->n_vectors = 1;
2005     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2006   }
2007
2008   adj_unlock (ai);
2009   return /* no error */ 0;
2010 }
2011
2012 typedef enum
2013 {
2014   IP4_REWRITE_NEXT_DROP,
2015   IP4_REWRITE_NEXT_ICMP_ERROR,
2016 } ip4_rewrite_next_t;
2017
2018 /**
2019  * This bits of an IPv4 address to mask to construct a multicast
2020  * MAC address
2021  */
2022 #if CLIB_ARCH_IS_BIG_ENDIAN
2023 #define IP4_MCAST_ADDR_MASK 0x007fffff
2024 #else
2025 #define IP4_MCAST_ADDR_MASK 0xffff7f00
2026 #endif
2027
2028 always_inline void
2029 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
2030                u16 adj_packet_bytes, bool df, u32 * next, u32 * error)
2031 {
2032   if (packet_len > adj_packet_bytes)
2033     {
2034       *error = IP4_ERROR_MTU_EXCEEDED;
2035       if (df)
2036         {
2037           icmp4_error_set_vnet_buffer
2038             (b, ICMP4_destination_unreachable,
2039              ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
2040              adj_packet_bytes);
2041           *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2042         }
2043       else
2044         {
2045           /* Add support for fragmentation here */
2046           *next = IP4_REWRITE_NEXT_DROP;
2047         }
2048     }
2049 }
2050
2051 always_inline uword
2052 ip4_rewrite_inline (vlib_main_t * vm,
2053                     vlib_node_runtime_t * node,
2054                     vlib_frame_t * frame,
2055                     int do_counters, int is_midchain, int is_mcast)
2056 {
2057   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2058   u32 *from = vlib_frame_vector_args (frame);
2059   u32 n_left_from, n_left_to_next, *to_next, next_index;
2060   vlib_node_runtime_t *error_node =
2061     vlib_node_get_runtime (vm, ip4_input_node.index);
2062
2063   n_left_from = frame->n_vectors;
2064   next_index = node->cached_next_index;
2065   u32 thread_index = vlib_get_thread_index ();
2066
2067   while (n_left_from > 0)
2068     {
2069       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2070
2071       while (n_left_from >= 4 && n_left_to_next >= 2)
2072         {
2073           ip_adjacency_t *adj0, *adj1;
2074           vlib_buffer_t *p0, *p1;
2075           ip4_header_t *ip0, *ip1;
2076           u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2077           u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2078           u32 tx_sw_if_index0, tx_sw_if_index1;
2079
2080           /* Prefetch next iteration. */
2081           {
2082             vlib_buffer_t *p2, *p3;
2083
2084             p2 = vlib_get_buffer (vm, from[2]);
2085             p3 = vlib_get_buffer (vm, from[3]);
2086
2087             vlib_prefetch_buffer_header (p2, STORE);
2088             vlib_prefetch_buffer_header (p3, STORE);
2089
2090             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2091             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2092           }
2093
2094           pi0 = to_next[0] = from[0];
2095           pi1 = to_next[1] = from[1];
2096
2097           from += 2;
2098           n_left_from -= 2;
2099           to_next += 2;
2100           n_left_to_next -= 2;
2101
2102           p0 = vlib_get_buffer (vm, pi0);
2103           p1 = vlib_get_buffer (vm, pi1);
2104
2105           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2106           adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
2107
2108           /*
2109            * pre-fetch the per-adjacency counters
2110            */
2111           if (do_counters)
2112             {
2113               vlib_prefetch_combined_counter (&adjacency_counters,
2114                                               thread_index, adj_index0);
2115               vlib_prefetch_combined_counter (&adjacency_counters,
2116                                               thread_index, adj_index1);
2117             }
2118
2119           ip0 = vlib_buffer_get_current (p0);
2120           ip1 = vlib_buffer_get_current (p1);
2121
2122           error0 = error1 = IP4_ERROR_NONE;
2123           next0 = next1 = IP4_REWRITE_NEXT_DROP;
2124
2125           /* Decrement TTL & update checksum.
2126              Works either endian, so no need for byte swap. */
2127           if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
2128             {
2129               i32 ttl0 = ip0->ttl;
2130
2131               /* Input node should have reject packets with ttl 0. */
2132               ASSERT (ip0->ttl > 0);
2133
2134               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2135               checksum0 += checksum0 >= 0xffff;
2136
2137               ip0->checksum = checksum0;
2138               ttl0 -= 1;
2139               ip0->ttl = ttl0;
2140
2141               /*
2142                * If the ttl drops below 1 when forwarding, generate
2143                * an ICMP response.
2144                */
2145               if (PREDICT_FALSE (ttl0 <= 0))
2146                 {
2147                   error0 = IP4_ERROR_TIME_EXPIRED;
2148                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2149                   icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2150                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2151                                                0);
2152                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2153                 }
2154
2155               /* Verify checksum. */
2156               ASSERT ((ip0->checksum == ip4_header_checksum (ip0)) ||
2157                       (p0->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2158             }
2159           else
2160             {
2161               p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2162             }
2163           if (PREDICT_TRUE (!(p1->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
2164             {
2165               i32 ttl1 = ip1->ttl;
2166
2167               /* Input node should have reject packets with ttl 0. */
2168               ASSERT (ip1->ttl > 0);
2169
2170               checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2171               checksum1 += checksum1 >= 0xffff;
2172
2173               ip1->checksum = checksum1;
2174               ttl1 -= 1;
2175               ip1->ttl = ttl1;
2176
2177               /*
2178                * If the ttl drops below 1 when forwarding, generate
2179                * an ICMP response.
2180                */
2181               if (PREDICT_FALSE (ttl1 <= 0))
2182                 {
2183                   error1 = IP4_ERROR_TIME_EXPIRED;
2184                   vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2185                   icmp4_error_set_vnet_buffer (p1, ICMP4_time_exceeded,
2186                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2187                                                0);
2188                   next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2189                 }
2190
2191               /* Verify checksum. */
2192               ASSERT ((ip1->checksum == ip4_header_checksum (ip1)) ||
2193                       (p1->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2194             }
2195           else
2196             {
2197               p1->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2198             }
2199
2200           /* Rewrite packet header and updates lengths. */
2201           adj0 = adj_get (adj_index0);
2202           adj1 = adj_get (adj_index1);
2203
2204           /* Worth pipelining. No guarantee that adj0,1 are hot... */
2205           rw_len0 = adj0[0].rewrite_header.data_bytes;
2206           rw_len1 = adj1[0].rewrite_header.data_bytes;
2207           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2208           vnet_buffer (p1)->ip.save_rewrite_length = rw_len1;
2209
2210           /* Check MTU of outgoing interface. */
2211           ip4_mtu_check (p0, clib_net_to_host_u16 (ip0->length),
2212                          adj0[0].rewrite_header.max_l3_packet_bytes,
2213                          ip0->flags_and_fragment_offset &
2214                          clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2215                          &next0, &error0);
2216           ip4_mtu_check (p1, clib_net_to_host_u16 (ip1->length),
2217                          adj1[0].rewrite_header.max_l3_packet_bytes,
2218                          ip1->flags_and_fragment_offset &
2219                          clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2220                          &next1, &error1);
2221
2222           if (is_mcast)
2223             {
2224               error0 = ((adj0[0].rewrite_header.sw_if_index ==
2225                          vnet_buffer (p0)->sw_if_index[VLIB_RX]) ?
2226                         IP4_ERROR_SAME_INTERFACE : error0);
2227               error1 = ((adj1[0].rewrite_header.sw_if_index ==
2228                          vnet_buffer (p1)->sw_if_index[VLIB_RX]) ?
2229                         IP4_ERROR_SAME_INTERFACE : error1);
2230             }
2231
2232           p0->error = error_node->errors[error0];
2233           p1->error = error_node->errors[error1];
2234           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2235            * to see the IP headerr */
2236           if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2237             {
2238               next0 = adj0[0].rewrite_header.next_index;
2239               p0->current_data -= rw_len0;
2240               p0->current_length += rw_len0;
2241               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2242               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2243
2244               if (PREDICT_FALSE
2245                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2246                 vnet_feature_arc_start (lm->output_feature_arc_index,
2247                                         tx_sw_if_index0, &next0, p0);
2248             }
2249           if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2250             {
2251               next1 = adj1[0].rewrite_header.next_index;
2252               p1->current_data -= rw_len1;
2253               p1->current_length += rw_len1;
2254
2255               tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2256               vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2257
2258               if (PREDICT_FALSE
2259                   (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2260                 vnet_feature_arc_start (lm->output_feature_arc_index,
2261                                         tx_sw_if_index1, &next1, p1);
2262             }
2263
2264           /* Guess we are only writing on simple Ethernet header. */
2265           vnet_rewrite_two_headers (adj0[0], adj1[0],
2266                                     ip0, ip1, sizeof (ethernet_header_t));
2267
2268           /*
2269            * Bump the per-adjacency counters
2270            */
2271           if (do_counters)
2272             {
2273               vlib_increment_combined_counter
2274                 (&adjacency_counters,
2275                  thread_index,
2276                  adj_index0, 1,
2277                  vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2278
2279               vlib_increment_combined_counter
2280                 (&adjacency_counters,
2281                  thread_index,
2282                  adj_index1, 1,
2283                  vlib_buffer_length_in_chain (vm, p1) + rw_len1);
2284             }
2285
2286           if (is_midchain)
2287             {
2288               adj0->sub_type.midchain.fixup_func
2289                 (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
2290               adj1->sub_type.midchain.fixup_func
2291                 (vm, adj1, p1, adj0->sub_type.midchain.fixup_data);
2292             }
2293           if (is_mcast)
2294             {
2295               /*
2296                * copy bytes from the IP address into the MAC rewrite
2297                */
2298               vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2299                                           adj0->
2300                                           rewrite_header.dst_mcast_offset,
2301                                           &ip0->dst_address.as_u32,
2302                                           (u8 *) ip0);
2303               vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2304                                           adj0->
2305                                           rewrite_header.dst_mcast_offset,
2306                                           &ip1->dst_address.as_u32,
2307                                           (u8 *) ip1);
2308             }
2309
2310           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2311                                            to_next, n_left_to_next,
2312                                            pi0, pi1, next0, next1);
2313         }
2314
2315       while (n_left_from > 0 && n_left_to_next > 0)
2316         {
2317           ip_adjacency_t *adj0;
2318           vlib_buffer_t *p0;
2319           ip4_header_t *ip0;
2320           u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2321           u32 tx_sw_if_index0;
2322
2323           pi0 = to_next[0] = from[0];
2324
2325           p0 = vlib_get_buffer (vm, pi0);
2326
2327           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2328
2329           adj0 = adj_get (adj_index0);
2330
2331           ip0 = vlib_buffer_get_current (p0);
2332
2333           error0 = IP4_ERROR_NONE;
2334           next0 = IP4_REWRITE_NEXT_DROP;        /* drop on error */
2335
2336           /* Decrement TTL & update checksum. */
2337           if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
2338             {
2339               i32 ttl0 = ip0->ttl;
2340
2341               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2342
2343               checksum0 += checksum0 >= 0xffff;
2344
2345               ip0->checksum = checksum0;
2346
2347               ASSERT (ip0->ttl > 0);
2348
2349               ttl0 -= 1;
2350
2351               ip0->ttl = ttl0;
2352
2353               ASSERT ((ip0->checksum == ip4_header_checksum (ip0)) ||
2354                       (p0->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2355
2356               if (PREDICT_FALSE (ttl0 <= 0))
2357                 {
2358                   /*
2359                    * If the ttl drops below 1 when forwarding, generate
2360                    * an ICMP response.
2361                    */
2362                   error0 = IP4_ERROR_TIME_EXPIRED;
2363                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2364                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2365                   icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2366                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2367                                                0);
2368                 }
2369             }
2370           else
2371             {
2372               p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2373             }
2374
2375           if (do_counters)
2376             vlib_prefetch_combined_counter (&adjacency_counters,
2377                                             thread_index, adj_index0);
2378
2379           /* Guess we are only writing on simple Ethernet header. */
2380           vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2381           if (is_mcast)
2382             {
2383               /*
2384                * copy bytes from the IP address into the MAC rewrite
2385                */
2386               vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2387                                           adj0->
2388                                           rewrite_header.dst_mcast_offset,
2389                                           &ip0->dst_address.as_u32,
2390                                           (u8 *) ip0);
2391             }
2392
2393           /* Update packet buffer attributes/set output interface. */
2394           rw_len0 = adj0[0].rewrite_header.data_bytes;
2395           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2396
2397           if (do_counters)
2398             vlib_increment_combined_counter
2399               (&adjacency_counters,
2400                thread_index, adj_index0, 1,
2401                vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2402
2403           /* Check MTU of outgoing interface. */
2404           ip4_mtu_check (p0, clib_net_to_host_u16 (ip0->length),
2405                          adj0[0].rewrite_header.max_l3_packet_bytes,
2406                          ip0->flags_and_fragment_offset &
2407                          clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2408                          &next0, &error0);
2409
2410           if (is_mcast)
2411             {
2412               error0 = ((adj0[0].rewrite_header.sw_if_index ==
2413                          vnet_buffer (p0)->sw_if_index[VLIB_RX]) ?
2414                         IP4_ERROR_SAME_INTERFACE : error0);
2415             }
2416           p0->error = error_node->errors[error0];
2417
2418           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2419            * to see the IP headerr */
2420           if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2421             {
2422               p0->current_data -= rw_len0;
2423               p0->current_length += rw_len0;
2424               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2425
2426               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2427               next0 = adj0[0].rewrite_header.next_index;
2428
2429               if (is_midchain)
2430                 {
2431                   adj0->sub_type.midchain.fixup_func
2432                     (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
2433                 }
2434
2435               if (PREDICT_FALSE
2436                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2437                 vnet_feature_arc_start (lm->output_feature_arc_index,
2438                                         tx_sw_if_index0, &next0, p0);
2439
2440             }
2441
2442           from += 1;
2443           n_left_from -= 1;
2444           to_next += 1;
2445           n_left_to_next -= 1;
2446
2447           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2448                                            to_next, n_left_to_next,
2449                                            pi0, next0);
2450         }
2451
2452       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2453     }
2454
2455   /* Need to do trace after rewrites to pick up new packet data. */
2456   if (node->flags & VLIB_NODE_FLAG_TRACE)
2457     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2458
2459   return frame->n_vectors;
2460 }
2461
2462
2463 /** @brief IPv4 rewrite node.
2464     @node ip4-rewrite
2465
2466     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2467     header checksum, fetch the ip adjacency, check the outbound mtu,
2468     apply the adjacency rewrite, and send pkts to the adjacency
2469     rewrite header's rewrite_next_index.
2470
2471     @param vm vlib_main_t corresponding to the current thread
2472     @param node vlib_node_runtime_t
2473     @param frame vlib_frame_t whose contents should be dispatched
2474
2475     @par Graph mechanics: buffer metadata, next index usage
2476
2477     @em Uses:
2478     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2479         - the rewrite adjacency index
2480     - <code>adj->lookup_next_index</code>
2481         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2482           the packet will be dropped.
2483     - <code>adj->rewrite_header</code>
2484         - Rewrite string length, rewrite string, next_index
2485
2486     @em Sets:
2487     - <code>b->current_data, b->current_length</code>
2488         - Updated net of applying the rewrite string
2489
2490     <em>Next Indices:</em>
2491     - <code> adj->rewrite_header.next_index </code>
2492       or @c ip4-drop
2493 */
2494 static uword
2495 ip4_rewrite (vlib_main_t * vm,
2496              vlib_node_runtime_t * node, vlib_frame_t * frame)
2497 {
2498   if (adj_are_counters_enabled ())
2499     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2500   else
2501     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2502 }
2503
2504 static uword
2505 ip4_midchain (vlib_main_t * vm,
2506               vlib_node_runtime_t * node, vlib_frame_t * frame)
2507 {
2508   if (adj_are_counters_enabled ())
2509     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2510   else
2511     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2512 }
2513
2514 static uword
2515 ip4_rewrite_mcast (vlib_main_t * vm,
2516                    vlib_node_runtime_t * node, vlib_frame_t * frame)
2517 {
2518   if (adj_are_counters_enabled ())
2519     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2520   else
2521     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2522 }
2523
2524 static uword
2525 ip4_mcast_midchain (vlib_main_t * vm,
2526                     vlib_node_runtime_t * node, vlib_frame_t * frame)
2527 {
2528   if (adj_are_counters_enabled ())
2529     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2530   else
2531     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2532 }
2533
2534 /* *INDENT-OFF* */
2535 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2536   .function = ip4_rewrite,
2537   .name = "ip4-rewrite",
2538   .vector_size = sizeof (u32),
2539
2540   .format_trace = format_ip4_rewrite_trace,
2541
2542   .n_next_nodes = 2,
2543   .next_nodes = {
2544     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2545     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2546   },
2547 };
2548 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite)
2549
2550 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2551   .function = ip4_rewrite_mcast,
2552   .name = "ip4-rewrite-mcast",
2553   .vector_size = sizeof (u32),
2554
2555   .format_trace = format_ip4_rewrite_trace,
2556   .sibling_of = "ip4-rewrite",
2557 };
2558 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_mcast_node, ip4_rewrite_mcast)
2559
2560 VLIB_REGISTER_NODE (ip4_mcast_midchain_node, static) = {
2561   .function = ip4_mcast_midchain,
2562   .name = "ip4-mcast-midchain",
2563   .vector_size = sizeof (u32),
2564
2565   .format_trace = format_ip4_rewrite_trace,
2566   .sibling_of = "ip4-rewrite",
2567 };
2568 VLIB_NODE_FUNCTION_MULTIARCH (ip4_mcast_midchain_node, ip4_mcast_midchain)
2569
2570 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2571   .function = ip4_midchain,
2572   .name = "ip4-midchain",
2573   .vector_size = sizeof (u32),
2574   .format_trace = format_ip4_forward_next_trace,
2575   .sibling_of =  "ip4-rewrite",
2576 };
2577 VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain);
2578 /* *INDENT-ON */
2579
2580 int
2581 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2582 {
2583   ip4_fib_mtrie_t *mtrie0;
2584   ip4_fib_mtrie_leaf_t leaf0;
2585   u32 lbi0;
2586
2587   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2588
2589   leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2590   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2591   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2592
2593   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2594
2595   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2596 }
2597
2598 static clib_error_t *
2599 test_lookup_command_fn (vlib_main_t * vm,
2600                         unformat_input_t * input, vlib_cli_command_t * cmd)
2601 {
2602   ip4_fib_t *fib;
2603   u32 table_id = 0;
2604   f64 count = 1;
2605   u32 n;
2606   int i;
2607   ip4_address_t ip4_base_address;
2608   u64 errors = 0;
2609
2610   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2611     {
2612       if (unformat (input, "table %d", &table_id))
2613         {
2614           /* Make sure the entry exists. */
2615           fib = ip4_fib_get (table_id);
2616           if ((fib) && (fib->index != table_id))
2617             return clib_error_return (0, "<fib-index> %d does not exist",
2618                                       table_id);
2619         }
2620       else if (unformat (input, "count %f", &count))
2621         ;
2622
2623       else if (unformat (input, "%U",
2624                          unformat_ip4_address, &ip4_base_address))
2625         ;
2626       else
2627         return clib_error_return (0, "unknown input `%U'",
2628                                   format_unformat_error, input);
2629     }
2630
2631   n = count;
2632
2633   for (i = 0; i < n; i++)
2634     {
2635       if (!ip4_lookup_validate (&ip4_base_address, table_id))
2636         errors++;
2637
2638       ip4_base_address.as_u32 =
2639         clib_host_to_net_u32 (1 +
2640                               clib_net_to_host_u32 (ip4_base_address.as_u32));
2641     }
2642
2643   if (errors)
2644     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2645   else
2646     vlib_cli_output (vm, "No errors in %d lookups\n", n);
2647
2648   return 0;
2649 }
2650
2651 /*?
2652  * Perform a lookup of an IPv4 Address (or range of addresses) in the
2653  * given FIB table to determine if there is a conflict with the
2654  * adjacency table. The fib-id can be determined by using the
2655  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2656  * of 0 is used.
2657  *
2658  * @todo This command uses fib-id, other commands use table-id (not
2659  * just a name, they are different indexes). Would like to change this
2660  * to table-id for consistency.
2661  *
2662  * @cliexpar
2663  * Example of how to run the test lookup command:
2664  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2665  * No errors in 2 lookups
2666  * @cliexend
2667 ?*/
2668 /* *INDENT-OFF* */
2669 VLIB_CLI_COMMAND (lookup_test_command, static) =
2670 {
2671   .path = "test lookup",
2672   .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2673   .function = test_lookup_command_fn,
2674 };
2675 /* *INDENT-ON* */
2676
2677 int
2678 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2679 {
2680   u32 fib_index;
2681
2682   fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
2683
2684   if (~0 == fib_index)
2685     return VNET_API_ERROR_NO_SUCH_FIB;
2686
2687   fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
2688                                   flow_hash_config);
2689
2690   return 0;
2691 }
2692
2693 static clib_error_t *
2694 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2695                              unformat_input_t * input,
2696                              vlib_cli_command_t * cmd)
2697 {
2698   int matched = 0;
2699   u32 table_id = 0;
2700   u32 flow_hash_config = 0;
2701   int rv;
2702
2703   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2704     {
2705       if (unformat (input, "table %d", &table_id))
2706         matched = 1;
2707 #define _(a,v) \
2708     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2709       foreach_flow_hash_bit
2710 #undef _
2711         else
2712         break;
2713     }
2714
2715   if (matched == 0)
2716     return clib_error_return (0, "unknown input `%U'",
2717                               format_unformat_error, input);
2718
2719   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
2720   switch (rv)
2721     {
2722     case 0:
2723       break;
2724
2725     case VNET_API_ERROR_NO_SUCH_FIB:
2726       return clib_error_return (0, "no such FIB table %d", table_id);
2727
2728     default:
2729       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2730       break;
2731     }
2732
2733   return 0;
2734 }
2735
2736 /*?
2737  * Configure the set of IPv4 fields used by the flow hash.
2738  *
2739  * @cliexpar
2740  * Example of how to set the flow hash on a given table:
2741  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2742  * Example of display the configured flow hash:
2743  * @cliexstart{show ip fib}
2744  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2745  * 0.0.0.0/0
2746  *   unicast-ip4-chain
2747  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2748  *     [0] [@0]: dpo-drop ip6
2749  * 0.0.0.0/32
2750  *   unicast-ip4-chain
2751  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2752  *     [0] [@0]: dpo-drop ip6
2753  * 224.0.0.0/8
2754  *   unicast-ip4-chain
2755  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2756  *     [0] [@0]: dpo-drop ip6
2757  * 6.0.1.2/32
2758  *   unicast-ip4-chain
2759  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2760  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2761  * 7.0.0.1/32
2762  *   unicast-ip4-chain
2763  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
2764  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2765  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2766  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2767  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2768  * 240.0.0.0/8
2769  *   unicast-ip4-chain
2770  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
2771  *     [0] [@0]: dpo-drop ip6
2772  * 255.255.255.255/32
2773  *   unicast-ip4-chain
2774  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
2775  *     [0] [@0]: dpo-drop ip6
2776  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
2777  * 0.0.0.0/0
2778  *   unicast-ip4-chain
2779  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
2780  *     [0] [@0]: dpo-drop ip6
2781  * 0.0.0.0/32
2782  *   unicast-ip4-chain
2783  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
2784  *     [0] [@0]: dpo-drop ip6
2785  * 172.16.1.0/24
2786  *   unicast-ip4-chain
2787  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
2788  *     [0] [@4]: ipv4-glean: af_packet0
2789  * 172.16.1.1/32
2790  *   unicast-ip4-chain
2791  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
2792  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
2793  * 172.16.1.2/32
2794  *   unicast-ip4-chain
2795  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2796  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
2797  * 172.16.2.0/24
2798  *   unicast-ip4-chain
2799  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
2800  *     [0] [@4]: ipv4-glean: af_packet1
2801  * 172.16.2.1/32
2802  *   unicast-ip4-chain
2803  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
2804  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
2805  * 224.0.0.0/8
2806  *   unicast-ip4-chain
2807  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
2808  *     [0] [@0]: dpo-drop ip6
2809  * 240.0.0.0/8
2810  *   unicast-ip4-chain
2811  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
2812  *     [0] [@0]: dpo-drop ip6
2813  * 255.255.255.255/32
2814  *   unicast-ip4-chain
2815  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
2816  *     [0] [@0]: dpo-drop ip6
2817  * @cliexend
2818 ?*/
2819 /* *INDENT-OFF* */
2820 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
2821 {
2822   .path = "set ip flow-hash",
2823   .short_help =
2824   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
2825   .function = set_ip_flow_hash_command_fn,
2826 };
2827 /* *INDENT-ON* */
2828
2829 int
2830 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2831                              u32 table_index)
2832 {
2833   vnet_main_t *vnm = vnet_get_main ();
2834   vnet_interface_main_t *im = &vnm->interface_main;
2835   ip4_main_t *ipm = &ip4_main;
2836   ip_lookup_main_t *lm = &ipm->lookup_main;
2837   vnet_classify_main_t *cm = &vnet_classify_main;
2838   ip4_address_t *if_addr;
2839
2840   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2841     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2842
2843   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2844     return VNET_API_ERROR_NO_SUCH_ENTRY;
2845
2846   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2847   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
2848
2849   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
2850
2851   if (NULL != if_addr)
2852     {
2853       fib_prefix_t pfx = {
2854         .fp_len = 32,
2855         .fp_proto = FIB_PROTOCOL_IP4,
2856         .fp_addr.ip4 = *if_addr,
2857       };
2858       u32 fib_index;
2859
2860       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2861                                                        sw_if_index);
2862
2863
2864       if (table_index != (u32) ~ 0)
2865         {
2866           dpo_id_t dpo = DPO_INVALID;
2867
2868           dpo_set (&dpo,
2869                    DPO_CLASSIFY,
2870                    DPO_PROTO_IP4,
2871                    classify_dpo_create (DPO_PROTO_IP4, table_index));
2872
2873           fib_table_entry_special_dpo_add (fib_index,
2874                                            &pfx,
2875                                            FIB_SOURCE_CLASSIFY,
2876                                            FIB_ENTRY_FLAG_NONE, &dpo);
2877           dpo_reset (&dpo);
2878         }
2879       else
2880         {
2881           fib_table_entry_special_remove (fib_index,
2882                                           &pfx, FIB_SOURCE_CLASSIFY);
2883         }
2884     }
2885
2886   return 0;
2887 }
2888
2889 static clib_error_t *
2890 set_ip_classify_command_fn (vlib_main_t * vm,
2891                             unformat_input_t * input,
2892                             vlib_cli_command_t * cmd)
2893 {
2894   u32 table_index = ~0;
2895   int table_index_set = 0;
2896   u32 sw_if_index = ~0;
2897   int rv;
2898
2899   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2900     {
2901       if (unformat (input, "table-index %d", &table_index))
2902         table_index_set = 1;
2903       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
2904                          vnet_get_main (), &sw_if_index))
2905         ;
2906       else
2907         break;
2908     }
2909
2910   if (table_index_set == 0)
2911     return clib_error_return (0, "classify table-index must be specified");
2912
2913   if (sw_if_index == ~0)
2914     return clib_error_return (0, "interface / subif must be specified");
2915
2916   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
2917
2918   switch (rv)
2919     {
2920     case 0:
2921       break;
2922
2923     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
2924       return clib_error_return (0, "No such interface");
2925
2926     case VNET_API_ERROR_NO_SUCH_ENTRY:
2927       return clib_error_return (0, "No such classifier table");
2928     }
2929   return 0;
2930 }
2931
2932 /*?
2933  * Assign a classification table to an interface. The classification
2934  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
2935  * commands. Once the table is create, use this command to filter packets
2936  * on an interface.
2937  *
2938  * @cliexpar
2939  * Example of how to assign a classification table to an interface:
2940  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
2941 ?*/
2942 /* *INDENT-OFF* */
2943 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
2944 {
2945     .path = "set ip classify",
2946     .short_help =
2947     "set ip classify intfc <interface> table-index <classify-idx>",
2948     .function = set_ip_classify_command_fn,
2949 };
2950 /* *INDENT-ON* */
2951
2952 static clib_error_t *
2953 ip4_config (vlib_main_t * vm, unformat_input_t * input)
2954 {
2955   ip4_main_t *im = &ip4_main;
2956   uword heapsize = 0;
2957
2958   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2959     {
2960       if (unformat (input, "heap-size %U", unformat_memory_size, &heapsize))
2961         ;
2962       else
2963         return clib_error_return (0,
2964                                   "invalid heap-size parameter `%U'",
2965                                   format_unformat_error, input);
2966     }
2967
2968   im->mtrie_heap_size = heapsize;
2969
2970   return 0;
2971 }
2972
2973 VLIB_EARLY_CONFIG_FUNCTION (ip4_config, "ip");
2974
2975 /*
2976  * fd.io coding-style-patch-verification: ON
2977  *
2978  * Local Variables:
2979  * eval: (c-set-style "gnu")
2980  * End:
2981  */