d556f0a9310a79582ce3d26264a7561ea7d4aabc
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h>     /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/dpo/load_balance.h>
53 #include <vnet/dpo/load_balance_map.h>
54 #include <vnet/dpo/classify_dpo.h>
55 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
56
57 #include <vnet/ip/ip4_forward.h>
58
59 /** @brief IPv4 lookup node.
60     @node ip4-lookup
61
62     This is the main IPv4 lookup dispatch node.
63
64     @param vm vlib_main_t corresponding to the current thread
65     @param node vlib_node_runtime_t
66     @param frame vlib_frame_t whose contents should be dispatched
67
68     @par Graph mechanics: buffer metadata, next index usage
69
70     @em Uses:
71     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
72         - Indicates the @c sw_if_index value of the interface that the
73           packet was received on.
74     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
75         - When the value is @c ~0 then the node performs a longest prefix
76           match (LPM) for the packet destination address in the FIB attached
77           to the receive interface.
78         - Otherwise perform LPM for the packet destination address in the
79           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
80           value (0, 1, ...) and not a VRF id.
81
82     @em Sets:
83     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
84         - The lookup result adjacency index.
85
86     <em>Next Index:</em>
87     - Dispatches the packet to the node index found in
88       ip_adjacency_t @c adj->lookup_next_index
89       (where @c adj is the lookup result adjacency).
90 */
91 static uword
92 ip4_lookup (vlib_main_t * vm,
93             vlib_node_runtime_t * node, vlib_frame_t * frame)
94 {
95   return ip4_lookup_inline (vm, node, frame,
96                             /* lookup_for_responses_to_locally_received_packets */
97                             0);
98
99 }
100
101 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
102
103 /* *INDENT-OFF* */
104 VLIB_REGISTER_NODE (ip4_lookup_node) =
105 {
106   .function = ip4_lookup,
107   .name = "ip4-lookup",
108   .vector_size = sizeof (u32),
109   .format_trace = format_ip4_lookup_trace,
110   .n_next_nodes = IP_LOOKUP_N_NEXT,
111   .next_nodes = IP4_LOOKUP_NEXT_NODES,
112 };
113 /* *INDENT-ON* */
114
115 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup);
116
117 static uword
118 ip4_load_balance (vlib_main_t * vm,
119                   vlib_node_runtime_t * node, vlib_frame_t * frame)
120 {
121   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
122   u32 n_left_from, n_left_to_next, *from, *to_next;
123   ip_lookup_next_t next;
124   u32 thread_index = vlib_get_thread_index ();
125
126   from = vlib_frame_vector_args (frame);
127   n_left_from = frame->n_vectors;
128   next = node->cached_next_index;
129
130   if (node->flags & VLIB_NODE_FLAG_TRACE)
131     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
132
133   while (n_left_from > 0)
134     {
135       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
136
137
138       while (n_left_from >= 4 && n_left_to_next >= 2)
139         {
140           ip_lookup_next_t next0, next1;
141           const load_balance_t *lb0, *lb1;
142           vlib_buffer_t *p0, *p1;
143           u32 pi0, lbi0, hc0, pi1, lbi1, hc1;
144           const ip4_header_t *ip0, *ip1;
145           const dpo_id_t *dpo0, *dpo1;
146
147           /* Prefetch next iteration. */
148           {
149             vlib_buffer_t *p2, *p3;
150
151             p2 = vlib_get_buffer (vm, from[2]);
152             p3 = vlib_get_buffer (vm, from[3]);
153
154             vlib_prefetch_buffer_header (p2, STORE);
155             vlib_prefetch_buffer_header (p3, STORE);
156
157             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
158             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
159           }
160
161           pi0 = to_next[0] = from[0];
162           pi1 = to_next[1] = from[1];
163
164           from += 2;
165           n_left_from -= 2;
166           to_next += 2;
167           n_left_to_next -= 2;
168
169           p0 = vlib_get_buffer (vm, pi0);
170           p1 = vlib_get_buffer (vm, pi1);
171
172           ip0 = vlib_buffer_get_current (p0);
173           ip1 = vlib_buffer_get_current (p1);
174           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
175           lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
176
177           lb0 = load_balance_get (lbi0);
178           lb1 = load_balance_get (lbi1);
179
180           /*
181            * this node is for via FIBs we can re-use the hash value from the
182            * to node if present.
183            * We don't want to use the same hash value at each level in the recursion
184            * graph as that would lead to polarisation
185            */
186           hc0 = hc1 = 0;
187
188           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
189             {
190               if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
191                 {
192                   hc0 = vnet_buffer (p0)->ip.flow_hash =
193                     vnet_buffer (p0)->ip.flow_hash >> 1;
194                 }
195               else
196                 {
197                   hc0 = vnet_buffer (p0)->ip.flow_hash =
198                     ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
199                 }
200               dpo0 = load_balance_get_fwd_bucket
201                 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
202             }
203           else
204             {
205               dpo0 = load_balance_get_bucket_i (lb0, 0);
206             }
207           if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
208             {
209               if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash))
210                 {
211                   hc1 = vnet_buffer (p1)->ip.flow_hash =
212                     vnet_buffer (p1)->ip.flow_hash >> 1;
213                 }
214               else
215                 {
216                   hc1 = vnet_buffer (p1)->ip.flow_hash =
217                     ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
218                 }
219               dpo1 = load_balance_get_fwd_bucket
220                 (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
221             }
222           else
223             {
224               dpo1 = load_balance_get_bucket_i (lb1, 0);
225             }
226
227           next0 = dpo0->dpoi_next_node;
228           next1 = dpo1->dpoi_next_node;
229
230           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
231           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
232
233           vlib_increment_combined_counter
234             (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
235           vlib_increment_combined_counter
236             (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
237
238           vlib_validate_buffer_enqueue_x2 (vm, node, next,
239                                            to_next, n_left_to_next,
240                                            pi0, pi1, next0, next1);
241         }
242
243       while (n_left_from > 0 && n_left_to_next > 0)
244         {
245           ip_lookup_next_t next0;
246           const load_balance_t *lb0;
247           vlib_buffer_t *p0;
248           u32 pi0, lbi0, hc0;
249           const ip4_header_t *ip0;
250           const dpo_id_t *dpo0;
251
252           pi0 = from[0];
253           to_next[0] = pi0;
254           from += 1;
255           to_next += 1;
256           n_left_to_next -= 1;
257           n_left_from -= 1;
258
259           p0 = vlib_get_buffer (vm, pi0);
260
261           ip0 = vlib_buffer_get_current (p0);
262           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
263
264           lb0 = load_balance_get (lbi0);
265
266           hc0 = 0;
267           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
268             {
269               if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
270                 {
271                   hc0 = vnet_buffer (p0)->ip.flow_hash =
272                     vnet_buffer (p0)->ip.flow_hash >> 1;
273                 }
274               else
275                 {
276                   hc0 = vnet_buffer (p0)->ip.flow_hash =
277                     ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
278                 }
279               dpo0 = load_balance_get_fwd_bucket
280                 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
281             }
282           else
283             {
284               dpo0 = load_balance_get_bucket_i (lb0, 0);
285             }
286
287           next0 = dpo0->dpoi_next_node;
288           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
289
290           vlib_increment_combined_counter
291             (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
292
293           vlib_validate_buffer_enqueue_x1 (vm, node, next,
294                                            to_next, n_left_to_next,
295                                            pi0, next0);
296         }
297
298       vlib_put_next_frame (vm, node, next, n_left_to_next);
299     }
300
301   return frame->n_vectors;
302 }
303
304 /* *INDENT-OFF* */
305 VLIB_REGISTER_NODE (ip4_load_balance_node) =
306 {
307   .function = ip4_load_balance,
308   .name = "ip4-load-balance",
309   .vector_size = sizeof (u32),
310   .sibling_of = "ip4-lookup",
311   .format_trace =
312   format_ip4_lookup_trace,
313 };
314 /* *INDENT-ON* */
315
316 VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance);
317
318 /* get first interface address */
319 ip4_address_t *
320 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
321                              ip_interface_address_t ** result_ia)
322 {
323   ip_lookup_main_t *lm = &im->lookup_main;
324   ip_interface_address_t *ia = 0;
325   ip4_address_t *result = 0;
326
327   /* *INDENT-OFF* */
328   foreach_ip_interface_address
329     (lm, ia, sw_if_index,
330      1 /* honor unnumbered */ ,
331      ({
332        ip4_address_t * a =
333          ip_interface_address_get_address (lm, ia);
334        result = a;
335        break;
336      }));
337   /* *INDENT-OFF* */
338   if (result_ia)
339     *result_ia = result ? ia : 0;
340   return result;
341 }
342
343 static void
344 ip4_add_interface_routes (u32 sw_if_index,
345                           ip4_main_t * im, u32 fib_index,
346                           ip_interface_address_t * a)
347 {
348   ip_lookup_main_t *lm = &im->lookup_main;
349   ip4_address_t *address = ip_interface_address_get_address (lm, a);
350   fib_prefix_t pfx = {
351     .fp_len = a->address_length,
352     .fp_proto = FIB_PROTOCOL_IP4,
353     .fp_addr.ip4 = *address,
354   };
355
356   if (pfx.fp_len <= 30)
357     {
358       /* a /30 or shorter - add a glean for the network address */
359       fib_table_entry_update_one_path (fib_index, &pfx,
360                                        FIB_SOURCE_INTERFACE,
361                                        (FIB_ENTRY_FLAG_CONNECTED |
362                                         FIB_ENTRY_FLAG_ATTACHED),
363                                        DPO_PROTO_IP4,
364                                        /* No next-hop address */
365                                        NULL,
366                                        sw_if_index,
367                                        // invalid FIB index
368                                        ~0,
369                                        1,
370                                        // no out-label stack
371                                        NULL,
372                                        FIB_ROUTE_PATH_FLAG_NONE);
373
374       /* Add the two broadcast addresses as drop */
375       fib_prefix_t net_pfx = {
376         .fp_len = 32,
377         .fp_proto = FIB_PROTOCOL_IP4,
378         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
379       };
380       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
381         fib_table_entry_special_add(fib_index,
382                                     &net_pfx,
383                                     FIB_SOURCE_INTERFACE,
384                                     (FIB_ENTRY_FLAG_DROP |
385                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
386       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
387       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
388         fib_table_entry_special_add(fib_index,
389                                     &net_pfx,
390                                     FIB_SOURCE_INTERFACE,
391                                     (FIB_ENTRY_FLAG_DROP |
392                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
393     }
394   else if (pfx.fp_len == 31)
395     {
396       u32 mask = clib_host_to_net_u32(1);
397       fib_prefix_t net_pfx = pfx;
398
399       net_pfx.fp_len = 32;
400       net_pfx.fp_addr.ip4.as_u32 ^= mask;
401
402       /* a /31 - add the other end as an attached host */
403       fib_table_entry_update_one_path (fib_index, &net_pfx,
404                                        FIB_SOURCE_INTERFACE,
405                                        (FIB_ENTRY_FLAG_ATTACHED),
406                                        DPO_PROTO_IP4,
407                                        &net_pfx.fp_addr,
408                                        sw_if_index,
409                                        // invalid FIB index
410                                        ~0,
411                                        1,
412                                        NULL,
413                                        FIB_ROUTE_PATH_FLAG_NONE);
414     }
415   pfx.fp_len = 32;
416
417   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
418     {
419       u32 classify_table_index =
420         lm->classify_table_index_by_sw_if_index[sw_if_index];
421       if (classify_table_index != (u32) ~ 0)
422         {
423           dpo_id_t dpo = DPO_INVALID;
424
425           dpo_set (&dpo,
426                    DPO_CLASSIFY,
427                    DPO_PROTO_IP4,
428                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
429
430           fib_table_entry_special_dpo_add (fib_index,
431                                            &pfx,
432                                            FIB_SOURCE_CLASSIFY,
433                                            FIB_ENTRY_FLAG_NONE, &dpo);
434           dpo_reset (&dpo);
435         }
436     }
437
438   fib_table_entry_update_one_path (fib_index, &pfx,
439                                    FIB_SOURCE_INTERFACE,
440                                    (FIB_ENTRY_FLAG_CONNECTED |
441                                     FIB_ENTRY_FLAG_LOCAL),
442                                    DPO_PROTO_IP4,
443                                    &pfx.fp_addr,
444                                    sw_if_index,
445                                    // invalid FIB index
446                                    ~0,
447                                    1, NULL,
448                                    FIB_ROUTE_PATH_FLAG_NONE);
449 }
450
451 static void
452 ip4_del_interface_routes (ip4_main_t * im,
453                           u32 fib_index,
454                           ip4_address_t * address, u32 address_length)
455 {
456   fib_prefix_t pfx = {
457     .fp_len = address_length,
458     .fp_proto = FIB_PROTOCOL_IP4,
459     .fp_addr.ip4 = *address,
460   };
461
462   if (pfx.fp_len <= 30)
463     {
464       fib_prefix_t net_pfx = {
465         .fp_len = 32,
466         .fp_proto = FIB_PROTOCOL_IP4,
467         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
468       };
469       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
470         fib_table_entry_special_remove(fib_index,
471                                        &net_pfx,
472                                        FIB_SOURCE_INTERFACE);
473       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
474       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
475         fib_table_entry_special_remove(fib_index,
476                                        &net_pfx,
477                                        FIB_SOURCE_INTERFACE);
478       fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
479     }
480     else if (pfx.fp_len == 31)
481     {
482       u32 mask = clib_host_to_net_u32(1);
483       fib_prefix_t net_pfx = pfx;
484
485       net_pfx.fp_len = 32;
486       net_pfx.fp_addr.ip4.as_u32 ^= mask;
487
488       fib_table_entry_delete (fib_index, &net_pfx, FIB_SOURCE_INTERFACE);
489     }
490
491   pfx.fp_len = 32;
492   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
493 }
494
495 void
496 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
497 {
498   ip4_main_t *im = &ip4_main;
499
500   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
501
502   /*
503    * enable/disable only on the 1<->0 transition
504    */
505   if (is_enable)
506     {
507       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
508         return;
509     }
510   else
511     {
512       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
513       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
514         return;
515     }
516   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
517                                !is_enable, 0, 0);
518
519
520   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
521                                sw_if_index, !is_enable, 0, 0);
522 }
523
524 static clib_error_t *
525 ip4_add_del_interface_address_internal (vlib_main_t * vm,
526                                         u32 sw_if_index,
527                                         ip4_address_t * address,
528                                         u32 address_length, u32 is_del)
529 {
530   vnet_main_t *vnm = vnet_get_main ();
531   ip4_main_t *im = &ip4_main;
532   ip_lookup_main_t *lm = &im->lookup_main;
533   clib_error_t *error = 0;
534   u32 if_address_index, elts_before;
535   ip4_address_fib_t ip4_af, *addr_fib = 0;
536
537   /* local0 interface doesn't support IP addressing  */
538   if (sw_if_index == 0)
539     {
540       return
541        clib_error_create ("local0 interface doesn't support IP addressing");
542     }
543
544   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
545   ip4_addr_fib_init (&ip4_af, address,
546                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
547   vec_add1 (addr_fib, ip4_af);
548
549   /*
550    * there is no support for adj-fib handling in the presence of overlapping
551    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
552    * most routers do.
553    */
554   /* *INDENT-OFF* */
555   if (!is_del)
556     {
557       /* When adding an address check that it does not conflict
558          with an existing address on any interface in this table. */
559       ip_interface_address_t *ia;
560       vnet_sw_interface_t *sif;
561
562       pool_foreach(sif, vnm->interface_main.sw_interfaces,
563       ({
564           if (im->fib_index_by_sw_if_index[sw_if_index] ==
565               im->fib_index_by_sw_if_index[sif->sw_if_index])
566             {
567               foreach_ip_interface_address
568                 (&im->lookup_main, ia, sif->sw_if_index,
569                  0 /* honor unnumbered */ ,
570                  ({
571                    ip4_address_t * x =
572                      ip_interface_address_get_address
573                      (&im->lookup_main, ia);
574                    if (ip4_destination_matches_route
575                        (im, address, x, ia->address_length) ||
576                        ip4_destination_matches_route (im,
577                                                       x,
578                                                       address,
579                                                       address_length))
580                      {
581                        vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
582
583                        return
584                          clib_error_create
585                          ("failed to add %U which conflicts with %U for interface %U",
586                           format_ip4_address_and_length, address,
587                           address_length,
588                           format_ip4_address_and_length, x,
589                           ia->address_length,
590                           format_vnet_sw_if_index_name, vnm,
591                           sif->sw_if_index);
592                      }
593                  }));
594             }
595       }));
596     }
597   /* *INDENT-ON* */
598
599   elts_before = pool_elts (lm->if_address_pool);
600
601   error = ip_interface_address_add_del
602     (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
603   if (error)
604     goto done;
605
606   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
607
608   if (is_del)
609     ip4_del_interface_routes (im, ip4_af.fib_index, address, address_length);
610   else
611     ip4_add_interface_routes (sw_if_index,
612                               im, ip4_af.fib_index,
613                               pool_elt_at_index
614                               (lm->if_address_pool, if_address_index));
615
616   /* If pool did not grow/shrink: add duplicate address. */
617   if (elts_before != pool_elts (lm->if_address_pool))
618     {
619       ip4_add_del_interface_address_callback_t *cb;
620       vec_foreach (cb, im->add_del_interface_address_callbacks)
621         cb->function (im, cb->function_opaque, sw_if_index,
622                       address, address_length, if_address_index, is_del);
623     }
624
625 done:
626   vec_free (addr_fib);
627   return error;
628 }
629
630 clib_error_t *
631 ip4_add_del_interface_address (vlib_main_t * vm,
632                                u32 sw_if_index,
633                                ip4_address_t * address,
634                                u32 address_length, u32 is_del)
635 {
636   return ip4_add_del_interface_address_internal
637     (vm, sw_if_index, address, address_length, is_del);
638 }
639
640 /* Built-in ip4 unicast rx feature path definition */
641 /* *INDENT-OFF* */
642 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
643 {
644   .arc_name = "ip4-unicast",
645   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
646   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
647 };
648
649 VNET_FEATURE_INIT (ip4_flow_classify, static) =
650 {
651   .arc_name = "ip4-unicast",
652   .node_name = "ip4-flow-classify",
653   .runs_before = VNET_FEATURES ("ip4-inacl"),
654 };
655
656 VNET_FEATURE_INIT (ip4_inacl, static) =
657 {
658   .arc_name = "ip4-unicast",
659   .node_name = "ip4-inacl",
660   .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
661 };
662
663 VNET_FEATURE_INIT (ip4_source_check_1, static) =
664 {
665   .arc_name = "ip4-unicast",
666   .node_name = "ip4-source-check-via-rx",
667   .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
668 };
669
670 VNET_FEATURE_INIT (ip4_source_check_2, static) =
671 {
672   .arc_name = "ip4-unicast",
673   .node_name = "ip4-source-check-via-any",
674   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
675 };
676
677 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
678 {
679   .arc_name = "ip4-unicast",
680   .node_name = "ip4-source-and-port-range-check-rx",
681   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
682 };
683
684 VNET_FEATURE_INIT (ip4_policer_classify, static) =
685 {
686   .arc_name = "ip4-unicast",
687   .node_name = "ip4-policer-classify",
688   .runs_before = VNET_FEATURES ("ipsec-input-ip4"),
689 };
690
691 VNET_FEATURE_INIT (ip4_ipsec, static) =
692 {
693   .arc_name = "ip4-unicast",
694   .node_name = "ipsec-input-ip4",
695   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
696 };
697
698 VNET_FEATURE_INIT (ip4_vpath, static) =
699 {
700   .arc_name = "ip4-unicast",
701   .node_name = "vpath-input-ip4",
702   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
703 };
704
705 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
706 {
707   .arc_name = "ip4-unicast",
708   .node_name = "ip4-vxlan-bypass",
709   .runs_before = VNET_FEATURES ("ip4-lookup"),
710 };
711
712 VNET_FEATURE_INIT (ip4_not_enabled, static) =
713 {
714   .arc_name = "ip4-unicast",
715   .node_name = "ip4-not-enabled",
716   .runs_before = VNET_FEATURES ("ip4-lookup"),
717 };
718
719 VNET_FEATURE_INIT (ip4_lookup, static) =
720 {
721   .arc_name = "ip4-unicast",
722   .node_name = "ip4-lookup",
723   .runs_before = 0,     /* not before any other features */
724 };
725
726 /* Built-in ip4 multicast rx feature path definition */
727 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
728 {
729   .arc_name = "ip4-multicast",
730   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
731   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
732 };
733
734 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
735 {
736   .arc_name = "ip4-multicast",
737   .node_name = "vpath-input-ip4",
738   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
739 };
740
741 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
742 {
743   .arc_name = "ip4-multicast",
744   .node_name = "ip4-not-enabled",
745   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
746 };
747
748 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
749 {
750   .arc_name = "ip4-multicast",
751   .node_name = "ip4-mfib-forward-lookup",
752   .runs_before = 0,     /* last feature */
753 };
754
755 /* Source and port-range check ip4 tx feature path definition */
756 VNET_FEATURE_ARC_INIT (ip4_output, static) =
757 {
758   .arc_name = "ip4-output",
759   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
760   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
761 };
762
763 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
764 {
765   .arc_name = "ip4-output",
766   .node_name = "ip4-source-and-port-range-check-tx",
767   .runs_before = VNET_FEATURES ("ip4-outacl"),
768 };
769
770 VNET_FEATURE_INIT (ip4_outacl, static) =
771 {
772   .arc_name = "ip4-output",
773   .node_name = "ip4-outacl",
774   .runs_before = VNET_FEATURES ("ipsec-output-ip4"),
775 };
776
777 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
778 {
779   .arc_name = "ip4-output",
780   .node_name = "ipsec-output-ip4",
781   .runs_before = VNET_FEATURES ("interface-output"),
782 };
783
784 /* Built-in ip4 tx feature path definition */
785 VNET_FEATURE_INIT (ip4_interface_output, static) =
786 {
787   .arc_name = "ip4-output",
788   .node_name = "interface-output",
789   .runs_before = 0,     /* not before any other features */
790 };
791 /* *INDENT-ON* */
792
793 static clib_error_t *
794 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
795 {
796   ip4_main_t *im = &ip4_main;
797
798   /* Fill in lookup tables with default table (0). */
799   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
800   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
801
802   if (!is_add)
803     {
804       ip4_main_t *im4 = &ip4_main;
805       ip_lookup_main_t *lm4 = &im4->lookup_main;
806       ip_interface_address_t *ia = 0;
807       ip4_address_t *address;
808       vlib_main_t *vm = vlib_get_main ();
809
810       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
811       /* *INDENT-OFF* */
812       foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
813       ({
814         address = ip_interface_address_get_address (lm4, ia);
815         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
816       }));
817       /* *INDENT-ON* */
818     }
819
820   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
821                                is_add, 0, 0);
822
823   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
824                                sw_if_index, is_add, 0, 0);
825
826   return /* no error */ 0;
827 }
828
829 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
830
831 /* Global IP4 main. */
832 ip4_main_t ip4_main;
833
834 clib_error_t *
835 ip4_lookup_init (vlib_main_t * vm)
836 {
837   ip4_main_t *im = &ip4_main;
838   clib_error_t *error;
839   uword i;
840
841   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
842     return error;
843   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
844     return (error);
845   if ((error = vlib_call_init_function (vm, fib_module_init)))
846     return error;
847   if ((error = vlib_call_init_function (vm, mfib_module_init)))
848     return error;
849
850   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
851     {
852       u32 m;
853
854       if (i < 32)
855         m = pow2_mask (i) << (32 - i);
856       else
857         m = ~0;
858       im->fib_masks[i] = clib_host_to_net_u32 (m);
859     }
860
861   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
862
863   /* Create FIB with index 0 and table id of 0. */
864   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
865                                      FIB_SOURCE_DEFAULT_ROUTE);
866   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
867                                       MFIB_SOURCE_DEFAULT_ROUTE);
868
869   {
870     pg_node_t *pn;
871     pn = pg_get_node (ip4_lookup_node.index);
872     pn->unformat_edit = unformat_pg_ip4_header;
873   }
874
875   {
876     ethernet_arp_header_t h;
877
878     memset (&h, 0, sizeof (h));
879
880     /* Set target ethernet address to all zeros. */
881     memset (h.ip4_over_ethernet[1].ethernet, 0,
882             sizeof (h.ip4_over_ethernet[1].ethernet));
883
884 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
885 #define _8(f,v) h.f = v;
886     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
887     _16 (l3_type, ETHERNET_TYPE_IP4);
888     _8 (n_l2_address_bytes, 6);
889     _8 (n_l3_address_bytes, 4);
890     _16 (opcode, ETHERNET_ARP_OPCODE_request);
891 #undef _16
892 #undef _8
893
894     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
895                                /* data */ &h,
896                                sizeof (h),
897                                /* alloc chunk size */ 8,
898                                "ip4 arp");
899   }
900
901   return error;
902 }
903
904 VLIB_INIT_FUNCTION (ip4_lookup_init);
905
906 typedef struct
907 {
908   /* Adjacency taken. */
909   u32 dpo_index;
910   u32 flow_hash;
911   u32 fib_index;
912
913   /* Packet data, possibly *after* rewrite. */
914   u8 packet_data[64 - 1 * sizeof (u32)];
915 }
916 ip4_forward_next_trace_t;
917
918 u8 *
919 format_ip4_forward_next_trace (u8 * s, va_list * args)
920 {
921   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
922   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
923   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
924   u32 indent = format_get_indent (s);
925   s = format (s, "%U%U",
926               format_white_space, indent,
927               format_ip4_header, t->packet_data, sizeof (t->packet_data));
928   return s;
929 }
930
931 static u8 *
932 format_ip4_lookup_trace (u8 * s, va_list * args)
933 {
934   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
935   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
936   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
937   u32 indent = format_get_indent (s);
938
939   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
940               t->fib_index, t->dpo_index, t->flow_hash);
941   s = format (s, "\n%U%U",
942               format_white_space, indent,
943               format_ip4_header, t->packet_data, sizeof (t->packet_data));
944   return s;
945 }
946
947 static u8 *
948 format_ip4_rewrite_trace (u8 * s, va_list * args)
949 {
950   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
951   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
952   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
953   u32 indent = format_get_indent (s);
954
955   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
956               t->fib_index, t->dpo_index, format_ip_adjacency,
957               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
958   s = format (s, "\n%U%U",
959               format_white_space, indent,
960               format_ip_adjacency_packet_data,
961               t->dpo_index, t->packet_data, sizeof (t->packet_data));
962   return s;
963 }
964
965 /* Common trace function for all ip4-forward next nodes. */
966 void
967 ip4_forward_next_trace (vlib_main_t * vm,
968                         vlib_node_runtime_t * node,
969                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
970 {
971   u32 *from, n_left;
972   ip4_main_t *im = &ip4_main;
973
974   n_left = frame->n_vectors;
975   from = vlib_frame_vector_args (frame);
976
977   while (n_left >= 4)
978     {
979       u32 bi0, bi1;
980       vlib_buffer_t *b0, *b1;
981       ip4_forward_next_trace_t *t0, *t1;
982
983       /* Prefetch next iteration. */
984       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
985       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
986
987       bi0 = from[0];
988       bi1 = from[1];
989
990       b0 = vlib_get_buffer (vm, bi0);
991       b1 = vlib_get_buffer (vm, bi1);
992
993       if (b0->flags & VLIB_BUFFER_IS_TRACED)
994         {
995           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
996           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
997           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
998           t0->fib_index =
999             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1000              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1001             vec_elt (im->fib_index_by_sw_if_index,
1002                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1003
1004           clib_memcpy (t0->packet_data,
1005                        vlib_buffer_get_current (b0),
1006                        sizeof (t0->packet_data));
1007         }
1008       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1009         {
1010           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1011           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1012           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1013           t1->fib_index =
1014             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1015              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1016             vec_elt (im->fib_index_by_sw_if_index,
1017                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1018           clib_memcpy (t1->packet_data, vlib_buffer_get_current (b1),
1019                        sizeof (t1->packet_data));
1020         }
1021       from += 2;
1022       n_left -= 2;
1023     }
1024
1025   while (n_left >= 1)
1026     {
1027       u32 bi0;
1028       vlib_buffer_t *b0;
1029       ip4_forward_next_trace_t *t0;
1030
1031       bi0 = from[0];
1032
1033       b0 = vlib_get_buffer (vm, bi0);
1034
1035       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1036         {
1037           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1038           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1039           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1040           t0->fib_index =
1041             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1042              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1043             vec_elt (im->fib_index_by_sw_if_index,
1044                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1045           clib_memcpy (t0->packet_data, vlib_buffer_get_current (b0),
1046                        sizeof (t0->packet_data));
1047         }
1048       from += 1;
1049       n_left -= 1;
1050     }
1051 }
1052
1053 /* Compute TCP/UDP/ICMP4 checksum in software. */
1054 u16
1055 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1056                               ip4_header_t * ip0)
1057 {
1058   ip_csum_t sum0;
1059   u32 ip_header_length, payload_length_host_byte_order;
1060   u32 n_this_buffer, n_bytes_left, n_ip_bytes_this_buffer;
1061   u16 sum16;
1062   void *data_this_buffer;
1063
1064   /* Initialize checksum with ip header. */
1065   ip_header_length = ip4_header_bytes (ip0);
1066   payload_length_host_byte_order =
1067     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1068   sum0 =
1069     clib_host_to_net_u32 (payload_length_host_byte_order +
1070                           (ip0->protocol << 16));
1071
1072   if (BITS (uword) == 32)
1073     {
1074       sum0 =
1075         ip_csum_with_carry (sum0,
1076                             clib_mem_unaligned (&ip0->src_address, u32));
1077       sum0 =
1078         ip_csum_with_carry (sum0,
1079                             clib_mem_unaligned (&ip0->dst_address, u32));
1080     }
1081   else
1082     sum0 =
1083       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1084
1085   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1086   data_this_buffer = (void *) ip0 + ip_header_length;
1087   n_ip_bytes_this_buffer =
1088     p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data);
1089   if (n_this_buffer + ip_header_length > n_ip_bytes_this_buffer)
1090     {
1091       n_this_buffer = n_ip_bytes_this_buffer > ip_header_length ?
1092         n_ip_bytes_this_buffer - ip_header_length : 0;
1093     }
1094   while (1)
1095     {
1096       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1097       n_bytes_left -= n_this_buffer;
1098       if (n_bytes_left == 0)
1099         break;
1100
1101       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1102       p0 = vlib_get_buffer (vm, p0->next_buffer);
1103       data_this_buffer = vlib_buffer_get_current (p0);
1104       n_this_buffer = p0->current_length;
1105     }
1106
1107   sum16 = ~ip_csum_fold (sum0);
1108
1109   return sum16;
1110 }
1111
1112 u32
1113 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1114 {
1115   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1116   udp_header_t *udp0;
1117   u16 sum16;
1118
1119   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1120           || ip0->protocol == IP_PROTOCOL_UDP);
1121
1122   udp0 = (void *) (ip0 + 1);
1123   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1124     {
1125       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1126                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1127       return p0->flags;
1128     }
1129
1130   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1131
1132   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1133                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1134
1135   return p0->flags;
1136 }
1137
1138 /* *INDENT-OFF* */
1139 VNET_FEATURE_ARC_INIT (ip4_local) =
1140 {
1141   .arc_name  = "ip4-local",
1142   .start_nodes = VNET_FEATURES ("ip4-local"),
1143 };
1144 /* *INDENT-ON* */
1145
1146 static inline void
1147 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1148                             ip4_header_t * ip, u8 is_udp, u8 * error,
1149                             u8 * good_tcp_udp)
1150 {
1151   u32 flags0;
1152   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1153   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1154   if (is_udp)
1155     {
1156       udp_header_t *udp;
1157       u32 ip_len, udp_len;
1158       i32 len_diff;
1159       udp = ip4_next_header (ip);
1160       /* Verify UDP length. */
1161       ip_len = clib_net_to_host_u16 (ip->length);
1162       udp_len = clib_net_to_host_u16 (udp->length);
1163
1164       len_diff = ip_len - udp_len;
1165       *good_tcp_udp &= len_diff >= 0;
1166       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1167     }
1168 }
1169
1170 #define ip4_local_csum_is_offloaded(_b)                                 \
1171     _b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM                         \
1172         || _b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM
1173
1174 #define ip4_local_need_csum_check(is_tcp_udp, _b)                       \
1175     (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED     \
1176         || ip4_local_csum_is_offloaded (_b)))
1177
1178 #define ip4_local_csum_is_valid(_b)                                     \
1179     (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT                      \
1180         || (ip4_local_csum_is_offloaded (_b))) != 0
1181
1182 static inline void
1183 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1184                          ip4_header_t * ih, u8 * error)
1185 {
1186   u8 is_udp, is_tcp_udp, good_tcp_udp;
1187
1188   is_udp = ih->protocol == IP_PROTOCOL_UDP;
1189   is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1190
1191   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1192     ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1193   else
1194     good_tcp_udp = ip4_local_csum_is_valid (b);
1195
1196   ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1197   *error = (is_tcp_udp && !good_tcp_udp
1198             ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1199 }
1200
1201 static inline void
1202 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1203                             ip4_header_t ** ih, u8 * error)
1204 {
1205   u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1206
1207   is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1208   is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1209
1210   is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1211   is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1212
1213   good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1214   good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1215
1216   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1217                      || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1218     {
1219       if (is_tcp_udp[0])
1220         ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1221                                     &good_tcp_udp[0]);
1222       if (is_tcp_udp[1])
1223         ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1224                                     &good_tcp_udp[1]);
1225     }
1226
1227   error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1228               IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1229   error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1230               IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1231 }
1232
1233 static inline void
1234 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1235                               vlib_buffer_t * b, u16 * next, u8 error,
1236                               u8 head_of_feature_arc)
1237 {
1238   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1239   u32 next_index;
1240
1241   *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1242   b->error = error ? error_node->errors[error] : 0;
1243   if (head_of_feature_arc)
1244     {
1245       next_index = *next;
1246       if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1247         {
1248           vnet_feature_arc_start (arc_index,
1249                                   vnet_buffer (b)->sw_if_index[VLIB_RX],
1250                                   &next_index, b);
1251           *next = next_index;
1252         }
1253     }
1254 }
1255
1256 typedef struct
1257 {
1258   ip4_address_t src;
1259   u32 lbi;
1260   u8 error;
1261 } ip4_local_last_check_t;
1262
1263 static inline void
1264 ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
1265                      ip4_local_last_check_t * last_check, u8 * error0)
1266 {
1267   ip4_fib_mtrie_leaf_t leaf0;
1268   ip4_fib_mtrie_t *mtrie0;
1269   const dpo_id_t *dpo0;
1270   load_balance_t *lb0;
1271   u32 lbi0;
1272
1273   vnet_buffer (b)->ip.fib_index =
1274     vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1275     vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1276
1277   if (PREDICT_FALSE (last_check->src.as_u32 != ip0->src_address.as_u32))
1278     {
1279       mtrie0 = &ip4_fib_get (vnet_buffer (b)->ip.fib_index)->mtrie;
1280       leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1281       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1282       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1283       lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1284
1285       vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1286       vnet_buffer (b)->ip.adj_index[VLIB_RX] = lbi0;
1287
1288       lb0 = load_balance_get (lbi0);
1289       dpo0 = load_balance_get_bucket_i (lb0, 0);
1290
1291       /*
1292        * Must have a route to source otherwise we drop the packet.
1293        * ip4 broadcasts are accepted, e.g. to make dhcp client work
1294        *
1295        * The checks are:
1296        *  - the source is a recieve => it's from us => bogus, do this
1297        *    first since it sets a different error code.
1298        *  - uRPF check for any route to source - accept if passes.
1299        *  - allow packets destined to the broadcast address from unknown sources
1300        */
1301
1302       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1303                   && dpo0->dpoi_type == DPO_RECEIVE) ?
1304                  IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1305       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1306                   && !fib_urpf_check_size (lb0->lb_urpf)
1307                   && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1308                  IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1309
1310       last_check->src.as_u32 = ip0->src_address.as_u32;
1311       last_check->lbi = lbi0;
1312       last_check->error = *error0;
1313     }
1314   else
1315     {
1316       vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1317       vnet_buffer (b)->ip.adj_index[VLIB_RX] = last_check->lbi;
1318       *error0 = last_check->error;
1319     }
1320 }
1321
1322 static inline void
1323 ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
1324                         ip4_local_last_check_t * last_check, u8 * error)
1325 {
1326   ip4_fib_mtrie_leaf_t leaf[2];
1327   ip4_fib_mtrie_t *mtrie[2];
1328   const dpo_id_t *dpo[2];
1329   load_balance_t *lb[2];
1330   u32 not_last_hit = 0;
1331   u32 lbi[2];
1332
1333   not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1334   not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1335
1336   vnet_buffer (b[0])->ip.fib_index =
1337     vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1338     vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1339     vnet_buffer (b[0])->ip.fib_index;
1340
1341   vnet_buffer (b[1])->ip.fib_index =
1342     vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1343     vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1344     vnet_buffer (b[1])->ip.fib_index;
1345
1346   if (PREDICT_FALSE (not_last_hit))
1347     {
1348       mtrie[0] = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie;
1349       mtrie[1] = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie;
1350
1351       leaf[0] = ip4_fib_mtrie_lookup_step_one (mtrie[0], &ip[0]->src_address);
1352       leaf[1] = ip4_fib_mtrie_lookup_step_one (mtrie[1], &ip[1]->src_address);
1353
1354       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1355                                            &ip[0]->src_address, 2);
1356       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1357                                            &ip[1]->src_address, 2);
1358
1359       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1360                                            &ip[0]->src_address, 3);
1361       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1362                                            &ip[1]->src_address, 3);
1363
1364       lbi[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf[0]);
1365       lbi[1] = ip4_fib_mtrie_leaf_get_adj_index (leaf[1]);
1366
1367       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1368       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] = lbi[0];
1369
1370       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1371       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] = lbi[1];
1372
1373       lb[0] = load_balance_get (lbi[0]);
1374       lb[1] = load_balance_get (lbi[1]);
1375
1376       dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1377       dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1378
1379       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1380                    dpo[0]->dpoi_type == DPO_RECEIVE) ?
1381                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1382       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1383                    !fib_urpf_check_size (lb[0]->lb_urpf) &&
1384                    ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1385                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1386
1387       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1388                    dpo[1]->dpoi_type == DPO_RECEIVE) ?
1389                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1390       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1391                    !fib_urpf_check_size (lb[1]->lb_urpf) &&
1392                    ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1393                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1394
1395       last_check->src.as_u32 = ip[1]->src_address.as_u32;
1396       last_check->lbi = lbi[1];
1397       last_check->error = error[1];
1398     }
1399   else
1400     {
1401       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1402       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] = last_check->lbi;
1403
1404       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1405       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] = last_check->lbi;
1406
1407       error[0] = last_check->error;
1408       error[1] = last_check->error;
1409     }
1410 }
1411
1412 static inline uword
1413 ip4_local_inline (vlib_main_t * vm,
1414                   vlib_node_runtime_t * node,
1415                   vlib_frame_t * frame, int head_of_feature_arc)
1416 {
1417   ip4_main_t *im = &ip4_main;
1418   ip_lookup_main_t *lm = &im->lookup_main;
1419   u32 *from, n_left_from;
1420   vlib_node_runtime_t *error_node =
1421     vlib_node_get_runtime (vm, ip4_input_node.index);
1422   u16 nexts[VLIB_FRAME_SIZE], *next;
1423   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1424   ip4_header_t *ip[2];
1425   u8 error[2];
1426
1427   ip4_local_last_check_t last_check = {
1428     .src = {.as_u32 = 0},
1429     .lbi = ~0,
1430     .error = IP4_ERROR_UNKNOWN_PROTOCOL
1431   };
1432
1433   from = vlib_frame_vector_args (frame);
1434   n_left_from = frame->n_vectors;
1435
1436   if (node->flags & VLIB_NODE_FLAG_TRACE)
1437     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1438
1439   vlib_get_buffers (vm, from, bufs, n_left_from);
1440   b = bufs;
1441   next = nexts;
1442
1443   while (n_left_from >= 6)
1444     {
1445       u32 is_nat, not_batch = 0;
1446
1447       /* Prefetch next iteration. */
1448       {
1449         vlib_prefetch_buffer_header (b[4], LOAD);
1450         vlib_prefetch_buffer_header (b[5], LOAD);
1451
1452         CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1453         CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1454       }
1455
1456       error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1457
1458       ip[0] = vlib_buffer_get_current (b[0]);
1459       ip[1] = vlib_buffer_get_current (b[1]);
1460
1461       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1462       vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1463
1464       is_nat = b[0]->flags & VNET_BUFFER_F_IS_NATED;
1465       not_batch |= is_nat ^ (b[1]->flags & VNET_BUFFER_F_IS_NATED);
1466
1467       if (head_of_feature_arc == 0 || (is_nat && not_batch == 0))
1468         goto skip_checks;
1469
1470       if (PREDICT_TRUE (not_batch == 0))
1471         {
1472           ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1473           ip4_local_check_src_x2 (b, ip, &last_check, error);
1474         }
1475       else
1476         {
1477           if (!(b[0]->flags & VNET_BUFFER_F_IS_NATED))
1478             {
1479               ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1480               ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1481             }
1482           if (!(b[1]->flags & VNET_BUFFER_F_IS_NATED))
1483             {
1484               ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1485               ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
1486             }
1487         }
1488
1489     skip_checks:
1490
1491       next[0] = lm->local_next_by_ip_protocol[ip[0]->protocol];
1492       next[1] = lm->local_next_by_ip_protocol[ip[1]->protocol];
1493       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1494                                     head_of_feature_arc);
1495       ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1496                                     head_of_feature_arc);
1497
1498       b += 2;
1499       next += 2;
1500       n_left_from -= 2;
1501     }
1502
1503   while (n_left_from > 0)
1504     {
1505       error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1506
1507       ip[0] = vlib_buffer_get_current (b[0]);
1508       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1509
1510       if (head_of_feature_arc == 0 || (b[0]->flags & VNET_BUFFER_F_IS_NATED))
1511         goto skip_check;
1512
1513       ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1514       ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1515
1516     skip_check:
1517
1518       next[0] = lm->local_next_by_ip_protocol[ip[0]->protocol];
1519       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1520                                     head_of_feature_arc);
1521
1522       b += 1;
1523       next += 1;
1524       n_left_from -= 1;
1525     }
1526
1527   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1528   return frame->n_vectors;
1529 }
1530
1531 static uword
1532 ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1533 {
1534   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1535 }
1536
1537 /* *INDENT-OFF* */
1538 VLIB_REGISTER_NODE (ip4_local_node) =
1539 {
1540   .function = ip4_local,
1541   .name = "ip4-local",
1542   .vector_size = sizeof (u32),
1543   .format_trace = format_ip4_forward_next_trace,
1544   .n_next_nodes = IP_LOCAL_N_NEXT,
1545   .next_nodes =
1546   {
1547     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1548     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1549     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1550     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1551   },
1552 };
1553 /* *INDENT-ON* */
1554
1555 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local);
1556
1557 static uword
1558 ip4_local_end_of_arc (vlib_main_t * vm,
1559                       vlib_node_runtime_t * node, vlib_frame_t * frame)
1560 {
1561   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1562 }
1563
1564 /* *INDENT-OFF* */
1565 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node,static) = {
1566   .function = ip4_local_end_of_arc,
1567   .name = "ip4-local-end-of-arc",
1568   .vector_size = sizeof (u32),
1569
1570   .format_trace = format_ip4_forward_next_trace,
1571   .sibling_of = "ip4-local",
1572 };
1573
1574 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_end_of_arc_node, ip4_local_end_of_arc)
1575
1576 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1577   .arc_name = "ip4-local",
1578   .node_name = "ip4-local-end-of-arc",
1579   .runs_before = 0, /* not before any other features */
1580 };
1581 /* *INDENT-ON* */
1582
1583 void
1584 ip4_register_protocol (u32 protocol, u32 node_index)
1585 {
1586   vlib_main_t *vm = vlib_get_main ();
1587   ip4_main_t *im = &ip4_main;
1588   ip_lookup_main_t *lm = &im->lookup_main;
1589
1590   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1591   lm->local_next_by_ip_protocol[protocol] =
1592     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1593 }
1594
1595 static clib_error_t *
1596 show_ip_local_command_fn (vlib_main_t * vm,
1597                           unformat_input_t * input, vlib_cli_command_t * cmd)
1598 {
1599   ip4_main_t *im = &ip4_main;
1600   ip_lookup_main_t *lm = &im->lookup_main;
1601   int i;
1602
1603   vlib_cli_output (vm, "Protocols handled by ip4_local");
1604   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1605     {
1606       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1607         {
1608           u32 node_index = vlib_get_node (vm,
1609                                           ip4_local_node.index)->
1610             next_nodes[lm->local_next_by_ip_protocol[i]];
1611           vlib_cli_output (vm, "%d: %U", i, format_vlib_node_name, vm,
1612                            node_index);
1613         }
1614     }
1615   return 0;
1616 }
1617
1618
1619
1620 /*?
1621  * Display the set of protocols handled by the local IPv4 stack.
1622  *
1623  * @cliexpar
1624  * Example of how to display local protocol table:
1625  * @cliexstart{show ip local}
1626  * Protocols handled by ip4_local
1627  * 1
1628  * 17
1629  * 47
1630  * @cliexend
1631 ?*/
1632 /* *INDENT-OFF* */
1633 VLIB_CLI_COMMAND (show_ip_local, static) =
1634 {
1635   .path = "show ip local",
1636   .function = show_ip_local_command_fn,
1637   .short_help = "show ip local",
1638 };
1639 /* *INDENT-ON* */
1640
1641 always_inline uword
1642 ip4_arp_inline (vlib_main_t * vm,
1643                 vlib_node_runtime_t * node,
1644                 vlib_frame_t * frame, int is_glean)
1645 {
1646   vnet_main_t *vnm = vnet_get_main ();
1647   ip4_main_t *im = &ip4_main;
1648   ip_lookup_main_t *lm = &im->lookup_main;
1649   u32 *from, *to_next_drop;
1650   uword n_left_from, n_left_to_next_drop, next_index;
1651   static f64 time_last_seed_change = -1e100;
1652   static u32 hash_seeds[3];
1653   static uword hash_bitmap[256 / BITS (uword)];
1654   f64 time_now;
1655
1656   if (node->flags & VLIB_NODE_FLAG_TRACE)
1657     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1658
1659   time_now = vlib_time_now (vm);
1660   if (time_now - time_last_seed_change > 1e-3)
1661     {
1662       uword i;
1663       u32 *r = clib_random_buffer_get_data (&vm->random_buffer,
1664                                             sizeof (hash_seeds));
1665       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
1666         hash_seeds[i] = r[i];
1667
1668       /* Mark all hash keys as been no-seen before. */
1669       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
1670         hash_bitmap[i] = 0;
1671
1672       time_last_seed_change = time_now;
1673     }
1674
1675   from = vlib_frame_vector_args (frame);
1676   n_left_from = frame->n_vectors;
1677   next_index = node->cached_next_index;
1678   if (next_index == IP4_ARP_NEXT_DROP)
1679     next_index = IP4_ARP_N_NEXT;        /* point to first interface */
1680
1681   while (n_left_from > 0)
1682     {
1683       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1684                            to_next_drop, n_left_to_next_drop);
1685
1686       while (n_left_from > 0 && n_left_to_next_drop > 0)
1687         {
1688           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
1689           ip_adjacency_t *adj0;
1690           vlib_buffer_t *p0;
1691           ip4_header_t *ip0;
1692           uword bm0;
1693
1694           pi0 = from[0];
1695
1696           p0 = vlib_get_buffer (vm, pi0);
1697
1698           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1699           adj0 = adj_get (adj_index0);
1700           ip0 = vlib_buffer_get_current (p0);
1701
1702           a0 = hash_seeds[0];
1703           b0 = hash_seeds[1];
1704           c0 = hash_seeds[2];
1705
1706           sw_if_index0 = adj0->rewrite_header.sw_if_index;
1707           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1708
1709           if (is_glean)
1710             {
1711               /*
1712                * this is the Glean case, so we are ARPing for the
1713                * packet's destination
1714                */
1715               a0 ^= ip0->dst_address.data_u32;
1716             }
1717           else
1718             {
1719               a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32;
1720             }
1721           b0 ^= sw_if_index0;
1722
1723           hash_v3_mix32 (a0, b0, c0);
1724           hash_v3_finalize32 (a0, b0, c0);
1725
1726           c0 &= BITS (hash_bitmap) - 1;
1727           m0 = (uword) 1 << (c0 % BITS (uword));
1728           c0 = c0 / BITS (uword);
1729
1730           bm0 = hash_bitmap[c0];
1731           drop0 = (bm0 & m0) != 0;
1732
1733           /* Mark it as seen. */
1734           hash_bitmap[c0] = bm0 | m0;
1735
1736           from += 1;
1737           n_left_from -= 1;
1738           to_next_drop[0] = pi0;
1739           to_next_drop += 1;
1740           n_left_to_next_drop -= 1;
1741
1742           p0->error =
1743             node->errors[drop0 ? IP4_ARP_ERROR_DROP :
1744                          IP4_ARP_ERROR_REQUEST_SENT];
1745
1746           /*
1747            * the adj has been updated to a rewrite but the node the DPO that got
1748            * us here hasn't - yet. no big deal. we'll drop while we wait.
1749            */
1750           if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
1751             continue;
1752
1753           if (drop0)
1754             continue;
1755
1756           /*
1757            * Can happen if the control-plane is programming tables
1758            * with traffic flowing; at least that's today's lame excuse.
1759            */
1760           if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
1761               || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
1762             {
1763               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
1764             }
1765           else
1766             /* Send ARP request. */
1767             {
1768               u32 bi0 = 0;
1769               vlib_buffer_t *b0;
1770               ethernet_arp_header_t *h0;
1771               vnet_hw_interface_t *hw_if0;
1772
1773               h0 =
1774                 vlib_packet_template_get_packet (vm,
1775                                                  &im->ip4_arp_request_packet_template,
1776                                                  &bi0);
1777
1778               /* Seems we're out of buffers */
1779               if (PREDICT_FALSE (!h0))
1780                 continue;
1781
1782               /* Add rewrite/encap string for ARP packet. */
1783               vnet_rewrite_one_header (adj0[0], h0,
1784                                        sizeof (ethernet_header_t));
1785
1786               hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1787
1788               /* Src ethernet address in ARP header. */
1789               clib_memcpy (h0->ip4_over_ethernet[0].ethernet,
1790                            hw_if0->hw_address,
1791                            sizeof (h0->ip4_over_ethernet[0].ethernet));
1792
1793               if (is_glean)
1794                 {
1795                   /* The interface's source address is stashed in the Glean Adj */
1796                   h0->ip4_over_ethernet[0].ip4 =
1797                     adj0->sub_type.glean.receive_addr.ip4;
1798
1799                   /* Copy in destination address we are requesting. This is the
1800                    * glean case, so it's the packet's destination.*/
1801                   h0->ip4_over_ethernet[1].ip4.data_u32 =
1802                     ip0->dst_address.data_u32;
1803                 }
1804               else
1805                 {
1806                   /* Src IP address in ARP header. */
1807                   if (ip4_src_address_for_packet (lm, sw_if_index0,
1808                                                   &h0->
1809                                                   ip4_over_ethernet[0].ip4))
1810                     {
1811                       /* No source address available */
1812                       p0->error =
1813                         node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
1814                       vlib_buffer_free (vm, &bi0, 1);
1815                       continue;
1816                     }
1817
1818                   /* Copy in destination address we are requesting from the
1819                      incomplete adj */
1820                   h0->ip4_over_ethernet[1].ip4.data_u32 =
1821                     adj0->sub_type.nbr.next_hop.ip4.as_u32;
1822                 }
1823
1824               vlib_buffer_copy_trace_flag (vm, p0, bi0);
1825               b0 = vlib_get_buffer (vm, bi0);
1826               VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
1827               vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
1828
1829               vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
1830
1831               vlib_set_next_frame_buffer (vm, node,
1832                                           adj0->rewrite_header.next_index,
1833                                           bi0);
1834             }
1835         }
1836
1837       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
1838     }
1839
1840   return frame->n_vectors;
1841 }
1842
1843 static uword
1844 ip4_arp (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1845 {
1846   return (ip4_arp_inline (vm, node, frame, 0));
1847 }
1848
1849 static uword
1850 ip4_glean (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1851 {
1852   return (ip4_arp_inline (vm, node, frame, 1));
1853 }
1854
1855 static char *ip4_arp_error_strings[] = {
1856   [IP4_ARP_ERROR_DROP] = "address overflow drops",
1857   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
1858   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
1859   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
1860   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
1861   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
1862 };
1863
1864 /* *INDENT-OFF* */
1865 VLIB_REGISTER_NODE (ip4_arp_node) =
1866 {
1867   .function = ip4_arp,
1868   .name = "ip4-arp",
1869   .vector_size = sizeof (u32),
1870   .format_trace = format_ip4_forward_next_trace,
1871   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1872   .error_strings = ip4_arp_error_strings,
1873   .n_next_nodes = IP4_ARP_N_NEXT,
1874   .next_nodes =
1875   {
1876     [IP4_ARP_NEXT_DROP] = "error-drop",
1877   },
1878 };
1879
1880 VLIB_REGISTER_NODE (ip4_glean_node) =
1881 {
1882   .function = ip4_glean,
1883   .name = "ip4-glean",
1884   .vector_size = sizeof (u32),
1885   .format_trace = format_ip4_forward_next_trace,
1886   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1887   .error_strings = ip4_arp_error_strings,
1888   .n_next_nodes = IP4_ARP_N_NEXT,
1889   .next_nodes = {
1890   [IP4_ARP_NEXT_DROP] = "error-drop",
1891   },
1892 };
1893 /* *INDENT-ON* */
1894
1895 #define foreach_notrace_ip4_arp_error           \
1896 _(DROP)                                         \
1897 _(REQUEST_SENT)                                 \
1898 _(REPLICATE_DROP)                               \
1899 _(REPLICATE_FAIL)
1900
1901 clib_error_t *
1902 arp_notrace_init (vlib_main_t * vm)
1903 {
1904   vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
1905
1906   /* don't trace ARP request packets */
1907 #define _(a)                                    \
1908     vnet_pcap_drop_trace_filter_add_del         \
1909         (rt->errors[IP4_ARP_ERROR_##a],         \
1910          1 /* is_add */);
1911   foreach_notrace_ip4_arp_error;
1912 #undef _
1913   return 0;
1914 }
1915
1916 VLIB_INIT_FUNCTION (arp_notrace_init);
1917
1918
1919 /* Send an ARP request to see if given destination is reachable on given interface. */
1920 clib_error_t *
1921 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index,
1922                     u8 refresh)
1923 {
1924   vnet_main_t *vnm = vnet_get_main ();
1925   ip4_main_t *im = &ip4_main;
1926   ethernet_arp_header_t *h;
1927   ip4_address_t *src;
1928   ip_interface_address_t *ia;
1929   ip_adjacency_t *adj;
1930   vnet_hw_interface_t *hi;
1931   vnet_sw_interface_t *si;
1932   vlib_buffer_t *b;
1933   adj_index_t ai;
1934   u32 bi = 0;
1935   u8 unicast_rewrite = 0;
1936
1937   si = vnet_get_sw_interface (vnm, sw_if_index);
1938
1939   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
1940     {
1941       return clib_error_return (0, "%U: interface %U down",
1942                                 format_ip4_address, dst,
1943                                 format_vnet_sw_if_index_name, vnm,
1944                                 sw_if_index);
1945     }
1946
1947   src =
1948     ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
1949   if (!src)
1950     {
1951       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
1952       return clib_error_return
1953         (0,
1954          "no matching interface address for destination %U (interface %U)",
1955          format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
1956          sw_if_index);
1957     }
1958
1959   h = vlib_packet_template_get_packet (vm,
1960                                        &im->ip4_arp_request_packet_template,
1961                                        &bi);
1962
1963   if (!h)
1964     return clib_error_return (0, "ARP request packet allocation failed");
1965
1966   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
1967   if (PREDICT_FALSE (!hi->hw_address))
1968     {
1969       return clib_error_return (0, "%U: interface %U do not support ip probe",
1970                                 format_ip4_address, dst,
1971                                 format_vnet_sw_if_index_name, vnm,
1972                                 sw_if_index);
1973     }
1974
1975   clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address,
1976                sizeof (h->ip4_over_ethernet[0].ethernet));
1977
1978   h->ip4_over_ethernet[0].ip4 = src[0];
1979   h->ip4_over_ethernet[1].ip4 = dst[0];
1980
1981   b = vlib_get_buffer (vm, bi);
1982   vnet_buffer (b)->sw_if_index[VLIB_RX] =
1983     vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
1984
1985   ip46_address_t nh = {
1986     .ip4 = *dst,
1987   };
1988
1989   ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4,
1990                             VNET_LINK_IP4, &nh, sw_if_index);
1991   adj = adj_get (ai);
1992
1993   /* Peer has been previously resolved, retrieve glean adj instead */
1994   if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE)
1995     {
1996       if (refresh)
1997         unicast_rewrite = 1;
1998       else
1999         {
2000           adj_unlock (ai);
2001           ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4,
2002                                       VNET_LINK_IP4, sw_if_index, &nh);
2003           adj = adj_get (ai);
2004         }
2005     }
2006
2007   /* Add encapsulation string for software interface (e.g. ethernet header). */
2008   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2009   if (unicast_rewrite)
2010     {
2011       u16 *etype = vlib_buffer_get_current (b) - 2;
2012       etype[0] = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
2013     }
2014   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2015
2016   {
2017     vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2018     u32 *to_next = vlib_frame_vector_args (f);
2019     to_next[0] = bi;
2020     f->n_vectors = 1;
2021     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2022   }
2023
2024   adj_unlock (ai);
2025   return /* no error */ 0;
2026 }
2027
2028 typedef enum
2029 {
2030   IP4_REWRITE_NEXT_DROP,
2031   IP4_REWRITE_NEXT_ICMP_ERROR,
2032   IP4_REWRITE_NEXT_FRAGMENT,
2033   IP4_REWRITE_N_NEXT            /* Last */
2034 } ip4_rewrite_next_t;
2035
2036 /**
2037  * This bits of an IPv4 address to mask to construct a multicast
2038  * MAC address
2039  */
2040 #if CLIB_ARCH_IS_BIG_ENDIAN
2041 #define IP4_MCAST_ADDR_MASK 0x007fffff
2042 #else
2043 #define IP4_MCAST_ADDR_MASK 0xffff7f00
2044 #endif
2045
2046 always_inline void
2047 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
2048                u16 adj_packet_bytes, bool df, u32 * next, u32 * error)
2049 {
2050   if (packet_len > adj_packet_bytes)
2051     {
2052       *error = IP4_ERROR_MTU_EXCEEDED;
2053       if (df)
2054         {
2055           icmp4_error_set_vnet_buffer
2056             (b, ICMP4_destination_unreachable,
2057              ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
2058              adj_packet_bytes);
2059           *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2060         }
2061       else
2062         {
2063           /* IP fragmentation */
2064           ip_frag_set_vnet_buffer (b, 0, adj_packet_bytes,
2065                                    IP4_FRAG_NEXT_IP4_LOOKUP, 0);
2066           *next = IP4_REWRITE_NEXT_FRAGMENT;
2067         }
2068     }
2069 }
2070
2071 always_inline uword
2072 ip4_rewrite_inline (vlib_main_t * vm,
2073                     vlib_node_runtime_t * node,
2074                     vlib_frame_t * frame,
2075                     int do_counters, int is_midchain, int is_mcast)
2076 {
2077   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2078   u32 *from = vlib_frame_vector_args (frame);
2079   u32 n_left_from, n_left_to_next, *to_next, next_index;
2080   vlib_node_runtime_t *error_node =
2081     vlib_node_get_runtime (vm, ip4_input_node.index);
2082
2083   n_left_from = frame->n_vectors;
2084   next_index = node->cached_next_index;
2085   u32 thread_index = vlib_get_thread_index ();
2086
2087   while (n_left_from > 0)
2088     {
2089       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2090
2091       while (n_left_from >= 4 && n_left_to_next >= 2)
2092         {
2093           ip_adjacency_t *adj0, *adj1;
2094           vlib_buffer_t *p0, *p1;
2095           ip4_header_t *ip0, *ip1;
2096           u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2097           u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2098           u32 tx_sw_if_index0, tx_sw_if_index1;
2099
2100           /* Prefetch next iteration. */
2101           {
2102             vlib_buffer_t *p2, *p3;
2103
2104             p2 = vlib_get_buffer (vm, from[2]);
2105             p3 = vlib_get_buffer (vm, from[3]);
2106
2107             vlib_prefetch_buffer_header (p2, STORE);
2108             vlib_prefetch_buffer_header (p3, STORE);
2109
2110             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2111             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2112           }
2113
2114           pi0 = to_next[0] = from[0];
2115           pi1 = to_next[1] = from[1];
2116
2117           from += 2;
2118           n_left_from -= 2;
2119           to_next += 2;
2120           n_left_to_next -= 2;
2121
2122           p0 = vlib_get_buffer (vm, pi0);
2123           p1 = vlib_get_buffer (vm, pi1);
2124
2125           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2126           adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
2127
2128           /*
2129            * pre-fetch the per-adjacency counters
2130            */
2131           if (do_counters)
2132             {
2133               vlib_prefetch_combined_counter (&adjacency_counters,
2134                                               thread_index, adj_index0);
2135               vlib_prefetch_combined_counter (&adjacency_counters,
2136                                               thread_index, adj_index1);
2137             }
2138
2139           ip0 = vlib_buffer_get_current (p0);
2140           ip1 = vlib_buffer_get_current (p1);
2141
2142           error0 = error1 = IP4_ERROR_NONE;
2143           next0 = next1 = IP4_REWRITE_NEXT_DROP;
2144
2145           /* Decrement TTL & update checksum.
2146              Works either endian, so no need for byte swap. */
2147           if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
2148             {
2149               i32 ttl0 = ip0->ttl;
2150
2151               /* Input node should have reject packets with ttl 0. */
2152               ASSERT (ip0->ttl > 0);
2153
2154               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2155               checksum0 += checksum0 >= 0xffff;
2156
2157               ip0->checksum = checksum0;
2158               ttl0 -= 1;
2159               ip0->ttl = ttl0;
2160
2161               /*
2162                * If the ttl drops below 1 when forwarding, generate
2163                * an ICMP response.
2164                */
2165               if (PREDICT_FALSE (ttl0 <= 0))
2166                 {
2167                   error0 = IP4_ERROR_TIME_EXPIRED;
2168                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2169                   icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2170                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2171                                                0);
2172                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2173                 }
2174
2175               /* Verify checksum. */
2176               ASSERT ((ip0->checksum == ip4_header_checksum (ip0)) ||
2177                       (p0->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2178             }
2179           else
2180             {
2181               p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2182             }
2183           if (PREDICT_TRUE (!(p1->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
2184             {
2185               i32 ttl1 = ip1->ttl;
2186
2187               /* Input node should have reject packets with ttl 0. */
2188               ASSERT (ip1->ttl > 0);
2189
2190               checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2191               checksum1 += checksum1 >= 0xffff;
2192
2193               ip1->checksum = checksum1;
2194               ttl1 -= 1;
2195               ip1->ttl = ttl1;
2196
2197               /*
2198                * If the ttl drops below 1 when forwarding, generate
2199                * an ICMP response.
2200                */
2201               if (PREDICT_FALSE (ttl1 <= 0))
2202                 {
2203                   error1 = IP4_ERROR_TIME_EXPIRED;
2204                   vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2205                   icmp4_error_set_vnet_buffer (p1, ICMP4_time_exceeded,
2206                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2207                                                0);
2208                   next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2209                 }
2210
2211               /* Verify checksum. */
2212               ASSERT ((ip1->checksum == ip4_header_checksum (ip1)) ||
2213                       (p1->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2214             }
2215           else
2216             {
2217               p1->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2218             }
2219
2220           /* Rewrite packet header and updates lengths. */
2221           adj0 = adj_get (adj_index0);
2222           adj1 = adj_get (adj_index1);
2223
2224           /* Worth pipelining. No guarantee that adj0,1 are hot... */
2225           rw_len0 = adj0[0].rewrite_header.data_bytes;
2226           rw_len1 = adj1[0].rewrite_header.data_bytes;
2227           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2228           vnet_buffer (p1)->ip.save_rewrite_length = rw_len1;
2229
2230           /* Check MTU of outgoing interface. */
2231           ip4_mtu_check (p0, clib_net_to_host_u16 (ip0->length),
2232                          adj0[0].rewrite_header.max_l3_packet_bytes,
2233                          ip0->flags_and_fragment_offset &
2234                          clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2235                          &next0, &error0);
2236           ip4_mtu_check (p1, clib_net_to_host_u16 (ip1->length),
2237                          adj1[0].rewrite_header.max_l3_packet_bytes,
2238                          ip1->flags_and_fragment_offset &
2239                          clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2240                          &next1, &error1);
2241
2242           if (is_mcast)
2243             {
2244               error0 = ((adj0[0].rewrite_header.sw_if_index ==
2245                          vnet_buffer (p0)->sw_if_index[VLIB_RX]) ?
2246                         IP4_ERROR_SAME_INTERFACE : error0);
2247               error1 = ((adj1[0].rewrite_header.sw_if_index ==
2248                          vnet_buffer (p1)->sw_if_index[VLIB_RX]) ?
2249                         IP4_ERROR_SAME_INTERFACE : error1);
2250             }
2251
2252           p0->error = error_node->errors[error0];
2253           p1->error = error_node->errors[error1];
2254           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2255            * to see the IP headerr */
2256           if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2257             {
2258               next0 = adj0[0].rewrite_header.next_index;
2259               p0->current_data -= rw_len0;
2260               p0->current_length += rw_len0;
2261               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2262               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2263
2264               if (PREDICT_FALSE
2265                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2266                 vnet_feature_arc_start (lm->output_feature_arc_index,
2267                                         tx_sw_if_index0, &next0, p0);
2268             }
2269           if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2270             {
2271               next1 = adj1[0].rewrite_header.next_index;
2272               p1->current_data -= rw_len1;
2273               p1->current_length += rw_len1;
2274
2275               tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2276               vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2277
2278               if (PREDICT_FALSE
2279                   (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2280                 vnet_feature_arc_start (lm->output_feature_arc_index,
2281                                         tx_sw_if_index1, &next1, p1);
2282             }
2283
2284           /* Guess we are only writing on simple Ethernet header. */
2285           vnet_rewrite_two_headers (adj0[0], adj1[0],
2286                                     ip0, ip1, sizeof (ethernet_header_t));
2287
2288           /*
2289            * Bump the per-adjacency counters
2290            */
2291           if (do_counters)
2292             {
2293               vlib_increment_combined_counter
2294                 (&adjacency_counters,
2295                  thread_index,
2296                  adj_index0, 1,
2297                  vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2298
2299               vlib_increment_combined_counter
2300                 (&adjacency_counters,
2301                  thread_index,
2302                  adj_index1, 1,
2303                  vlib_buffer_length_in_chain (vm, p1) + rw_len1);
2304             }
2305
2306           if (is_midchain)
2307             {
2308               adj0->sub_type.midchain.fixup_func
2309                 (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
2310               adj1->sub_type.midchain.fixup_func
2311                 (vm, adj1, p1, adj0->sub_type.midchain.fixup_data);
2312             }
2313           if (is_mcast)
2314             {
2315               /*
2316                * copy bytes from the IP address into the MAC rewrite
2317                */
2318               vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2319                                           adj0->
2320                                           rewrite_header.dst_mcast_offset,
2321                                           &ip0->dst_address.as_u32,
2322                                           (u8 *) ip0);
2323               vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2324                                           adj0->
2325                                           rewrite_header.dst_mcast_offset,
2326                                           &ip1->dst_address.as_u32,
2327                                           (u8 *) ip1);
2328             }
2329
2330           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2331                                            to_next, n_left_to_next,
2332                                            pi0, pi1, next0, next1);
2333         }
2334
2335       while (n_left_from > 0 && n_left_to_next > 0)
2336         {
2337           ip_adjacency_t *adj0;
2338           vlib_buffer_t *p0;
2339           ip4_header_t *ip0;
2340           u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2341           u32 tx_sw_if_index0;
2342
2343           pi0 = to_next[0] = from[0];
2344
2345           p0 = vlib_get_buffer (vm, pi0);
2346
2347           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2348
2349           adj0 = adj_get (adj_index0);
2350
2351           ip0 = vlib_buffer_get_current (p0);
2352
2353           error0 = IP4_ERROR_NONE;
2354           next0 = IP4_REWRITE_NEXT_DROP;        /* drop on error */
2355
2356           /* Decrement TTL & update checksum. */
2357           if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
2358             {
2359               i32 ttl0 = ip0->ttl;
2360
2361               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2362
2363               checksum0 += checksum0 >= 0xffff;
2364
2365               ip0->checksum = checksum0;
2366
2367               ASSERT (ip0->ttl > 0);
2368
2369               ttl0 -= 1;
2370
2371               ip0->ttl = ttl0;
2372
2373               ASSERT ((ip0->checksum == ip4_header_checksum (ip0)) ||
2374                       (p0->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2375
2376               if (PREDICT_FALSE (ttl0 <= 0))
2377                 {
2378                   /*
2379                    * If the ttl drops below 1 when forwarding, generate
2380                    * an ICMP response.
2381                    */
2382                   error0 = IP4_ERROR_TIME_EXPIRED;
2383                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2384                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2385                   icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2386                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2387                                                0);
2388                 }
2389             }
2390           else
2391             {
2392               p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2393             }
2394
2395           if (do_counters)
2396             vlib_prefetch_combined_counter (&adjacency_counters,
2397                                             thread_index, adj_index0);
2398
2399           /* Guess we are only writing on simple Ethernet header. */
2400           vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2401           if (is_mcast)
2402             {
2403               /*
2404                * copy bytes from the IP address into the MAC rewrite
2405                */
2406               vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2407                                           adj0->
2408                                           rewrite_header.dst_mcast_offset,
2409                                           &ip0->dst_address.as_u32,
2410                                           (u8 *) ip0);
2411             }
2412
2413           /* Update packet buffer attributes/set output interface. */
2414           rw_len0 = adj0[0].rewrite_header.data_bytes;
2415           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2416
2417           if (do_counters)
2418             vlib_increment_combined_counter
2419               (&adjacency_counters,
2420                thread_index, adj_index0, 1,
2421                vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2422
2423           /* Check MTU of outgoing interface. */
2424           ip4_mtu_check (p0, clib_net_to_host_u16 (ip0->length),
2425                          adj0[0].rewrite_header.max_l3_packet_bytes,
2426                          ip0->flags_and_fragment_offset &
2427                          clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2428                          &next0, &error0);
2429
2430           if (is_mcast)
2431             {
2432               error0 = ((adj0[0].rewrite_header.sw_if_index ==
2433                          vnet_buffer (p0)->sw_if_index[VLIB_RX]) ?
2434                         IP4_ERROR_SAME_INTERFACE : error0);
2435             }
2436           p0->error = error_node->errors[error0];
2437
2438           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2439            * to see the IP headerr */
2440           if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2441             {
2442               p0->current_data -= rw_len0;
2443               p0->current_length += rw_len0;
2444               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2445
2446               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2447               next0 = adj0[0].rewrite_header.next_index;
2448
2449               if (is_midchain)
2450                 {
2451                   adj0->sub_type.midchain.fixup_func
2452                     (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
2453                 }
2454
2455               if (PREDICT_FALSE
2456                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2457                 vnet_feature_arc_start (lm->output_feature_arc_index,
2458                                         tx_sw_if_index0, &next0, p0);
2459
2460             }
2461
2462           from += 1;
2463           n_left_from -= 1;
2464           to_next += 1;
2465           n_left_to_next -= 1;
2466
2467           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2468                                            to_next, n_left_to_next,
2469                                            pi0, next0);
2470         }
2471
2472       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2473     }
2474
2475   /* Need to do trace after rewrites to pick up new packet data. */
2476   if (node->flags & VLIB_NODE_FLAG_TRACE)
2477     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2478
2479   return frame->n_vectors;
2480 }
2481
2482
2483 /** @brief IPv4 rewrite node.
2484     @node ip4-rewrite
2485
2486     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2487     header checksum, fetch the ip adjacency, check the outbound mtu,
2488     apply the adjacency rewrite, and send pkts to the adjacency
2489     rewrite header's rewrite_next_index.
2490
2491     @param vm vlib_main_t corresponding to the current thread
2492     @param node vlib_node_runtime_t
2493     @param frame vlib_frame_t whose contents should be dispatched
2494
2495     @par Graph mechanics: buffer metadata, next index usage
2496
2497     @em Uses:
2498     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2499         - the rewrite adjacency index
2500     - <code>adj->lookup_next_index</code>
2501         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2502           the packet will be dropped.
2503     - <code>adj->rewrite_header</code>
2504         - Rewrite string length, rewrite string, next_index
2505
2506     @em Sets:
2507     - <code>b->current_data, b->current_length</code>
2508         - Updated net of applying the rewrite string
2509
2510     <em>Next Indices:</em>
2511     - <code> adj->rewrite_header.next_index </code>
2512       or @c ip4-drop
2513 */
2514 static uword
2515 ip4_rewrite (vlib_main_t * vm,
2516              vlib_node_runtime_t * node, vlib_frame_t * frame)
2517 {
2518   if (adj_are_counters_enabled ())
2519     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2520   else
2521     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2522 }
2523
2524 static uword
2525 ip4_midchain (vlib_main_t * vm,
2526               vlib_node_runtime_t * node, vlib_frame_t * frame)
2527 {
2528   if (adj_are_counters_enabled ())
2529     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2530   else
2531     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2532 }
2533
2534 static uword
2535 ip4_rewrite_mcast (vlib_main_t * vm,
2536                    vlib_node_runtime_t * node, vlib_frame_t * frame)
2537 {
2538   if (adj_are_counters_enabled ())
2539     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2540   else
2541     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2542 }
2543
2544 static uword
2545 ip4_mcast_midchain (vlib_main_t * vm,
2546                     vlib_node_runtime_t * node, vlib_frame_t * frame)
2547 {
2548   if (adj_are_counters_enabled ())
2549     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2550   else
2551     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2552 }
2553
2554 /* *INDENT-OFF* */
2555 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2556   .function = ip4_rewrite,
2557   .name = "ip4-rewrite",
2558   .vector_size = sizeof (u32),
2559
2560   .format_trace = format_ip4_rewrite_trace,
2561
2562   .n_next_nodes = IP4_REWRITE_N_NEXT,
2563   .next_nodes = {
2564     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2565     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2566     [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2567   },
2568 };
2569 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite)
2570
2571 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2572   .function = ip4_rewrite_mcast,
2573   .name = "ip4-rewrite-mcast",
2574   .vector_size = sizeof (u32),
2575
2576   .format_trace = format_ip4_rewrite_trace,
2577   .sibling_of = "ip4-rewrite",
2578 };
2579 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_mcast_node, ip4_rewrite_mcast)
2580
2581 VLIB_REGISTER_NODE (ip4_mcast_midchain_node, static) = {
2582   .function = ip4_mcast_midchain,
2583   .name = "ip4-mcast-midchain",
2584   .vector_size = sizeof (u32),
2585
2586   .format_trace = format_ip4_rewrite_trace,
2587   .sibling_of = "ip4-rewrite",
2588 };
2589 VLIB_NODE_FUNCTION_MULTIARCH (ip4_mcast_midchain_node, ip4_mcast_midchain)
2590
2591 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2592   .function = ip4_midchain,
2593   .name = "ip4-midchain",
2594   .vector_size = sizeof (u32),
2595   .format_trace = format_ip4_forward_next_trace,
2596   .sibling_of =  "ip4-rewrite",
2597 };
2598 VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain);
2599 /* *INDENT-ON */
2600
2601 int
2602 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2603 {
2604   ip4_fib_mtrie_t *mtrie0;
2605   ip4_fib_mtrie_leaf_t leaf0;
2606   u32 lbi0;
2607
2608   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2609
2610   leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2611   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2612   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2613
2614   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2615
2616   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2617 }
2618
2619 static clib_error_t *
2620 test_lookup_command_fn (vlib_main_t * vm,
2621                         unformat_input_t * input, vlib_cli_command_t * cmd)
2622 {
2623   ip4_fib_t *fib;
2624   u32 table_id = 0;
2625   f64 count = 1;
2626   u32 n;
2627   int i;
2628   ip4_address_t ip4_base_address;
2629   u64 errors = 0;
2630
2631   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2632     {
2633       if (unformat (input, "table %d", &table_id))
2634         {
2635           /* Make sure the entry exists. */
2636           fib = ip4_fib_get (table_id);
2637           if ((fib) && (fib->index != table_id))
2638             return clib_error_return (0, "<fib-index> %d does not exist",
2639                                       table_id);
2640         }
2641       else if (unformat (input, "count %f", &count))
2642         ;
2643
2644       else if (unformat (input, "%U",
2645                          unformat_ip4_address, &ip4_base_address))
2646         ;
2647       else
2648         return clib_error_return (0, "unknown input `%U'",
2649                                   format_unformat_error, input);
2650     }
2651
2652   n = count;
2653
2654   for (i = 0; i < n; i++)
2655     {
2656       if (!ip4_lookup_validate (&ip4_base_address, table_id))
2657         errors++;
2658
2659       ip4_base_address.as_u32 =
2660         clib_host_to_net_u32 (1 +
2661                               clib_net_to_host_u32 (ip4_base_address.as_u32));
2662     }
2663
2664   if (errors)
2665     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2666   else
2667     vlib_cli_output (vm, "No errors in %d lookups\n", n);
2668
2669   return 0;
2670 }
2671
2672 /*?
2673  * Perform a lookup of an IPv4 Address (or range of addresses) in the
2674  * given FIB table to determine if there is a conflict with the
2675  * adjacency table. The fib-id can be determined by using the
2676  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2677  * of 0 is used.
2678  *
2679  * @todo This command uses fib-id, other commands use table-id (not
2680  * just a name, they are different indexes). Would like to change this
2681  * to table-id for consistency.
2682  *
2683  * @cliexpar
2684  * Example of how to run the test lookup command:
2685  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2686  * No errors in 2 lookups
2687  * @cliexend
2688 ?*/
2689 /* *INDENT-OFF* */
2690 VLIB_CLI_COMMAND (lookup_test_command, static) =
2691 {
2692   .path = "test lookup",
2693   .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2694   .function = test_lookup_command_fn,
2695 };
2696 /* *INDENT-ON* */
2697
2698 int
2699 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2700 {
2701   u32 fib_index;
2702
2703   fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
2704
2705   if (~0 == fib_index)
2706     return VNET_API_ERROR_NO_SUCH_FIB;
2707
2708   fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
2709                                   flow_hash_config);
2710
2711   return 0;
2712 }
2713
2714 static clib_error_t *
2715 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2716                              unformat_input_t * input,
2717                              vlib_cli_command_t * cmd)
2718 {
2719   int matched = 0;
2720   u32 table_id = 0;
2721   u32 flow_hash_config = 0;
2722   int rv;
2723
2724   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2725     {
2726       if (unformat (input, "table %d", &table_id))
2727         matched = 1;
2728 #define _(a,v) \
2729     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2730       foreach_flow_hash_bit
2731 #undef _
2732         else
2733         break;
2734     }
2735
2736   if (matched == 0)
2737     return clib_error_return (0, "unknown input `%U'",
2738                               format_unformat_error, input);
2739
2740   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
2741   switch (rv)
2742     {
2743     case 0:
2744       break;
2745
2746     case VNET_API_ERROR_NO_SUCH_FIB:
2747       return clib_error_return (0, "no such FIB table %d", table_id);
2748
2749     default:
2750       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2751       break;
2752     }
2753
2754   return 0;
2755 }
2756
2757 /*?
2758  * Configure the set of IPv4 fields used by the flow hash.
2759  *
2760  * @cliexpar
2761  * Example of how to set the flow hash on a given table:
2762  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2763  * Example of display the configured flow hash:
2764  * @cliexstart{show ip fib}
2765  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2766  * 0.0.0.0/0
2767  *   unicast-ip4-chain
2768  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2769  *     [0] [@0]: dpo-drop ip6
2770  * 0.0.0.0/32
2771  *   unicast-ip4-chain
2772  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2773  *     [0] [@0]: dpo-drop ip6
2774  * 224.0.0.0/8
2775  *   unicast-ip4-chain
2776  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2777  *     [0] [@0]: dpo-drop ip6
2778  * 6.0.1.2/32
2779  *   unicast-ip4-chain
2780  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2781  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2782  * 7.0.0.1/32
2783  *   unicast-ip4-chain
2784  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
2785  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2786  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2787  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2788  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2789  * 240.0.0.0/8
2790  *   unicast-ip4-chain
2791  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
2792  *     [0] [@0]: dpo-drop ip6
2793  * 255.255.255.255/32
2794  *   unicast-ip4-chain
2795  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
2796  *     [0] [@0]: dpo-drop ip6
2797  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
2798  * 0.0.0.0/0
2799  *   unicast-ip4-chain
2800  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
2801  *     [0] [@0]: dpo-drop ip6
2802  * 0.0.0.0/32
2803  *   unicast-ip4-chain
2804  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
2805  *     [0] [@0]: dpo-drop ip6
2806  * 172.16.1.0/24
2807  *   unicast-ip4-chain
2808  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
2809  *     [0] [@4]: ipv4-glean: af_packet0
2810  * 172.16.1.1/32
2811  *   unicast-ip4-chain
2812  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
2813  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
2814  * 172.16.1.2/32
2815  *   unicast-ip4-chain
2816  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2817  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
2818  * 172.16.2.0/24
2819  *   unicast-ip4-chain
2820  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
2821  *     [0] [@4]: ipv4-glean: af_packet1
2822  * 172.16.2.1/32
2823  *   unicast-ip4-chain
2824  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
2825  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
2826  * 224.0.0.0/8
2827  *   unicast-ip4-chain
2828  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
2829  *     [0] [@0]: dpo-drop ip6
2830  * 240.0.0.0/8
2831  *   unicast-ip4-chain
2832  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
2833  *     [0] [@0]: dpo-drop ip6
2834  * 255.255.255.255/32
2835  *   unicast-ip4-chain
2836  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
2837  *     [0] [@0]: dpo-drop ip6
2838  * @cliexend
2839 ?*/
2840 /* *INDENT-OFF* */
2841 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
2842 {
2843   .path = "set ip flow-hash",
2844   .short_help =
2845   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
2846   .function = set_ip_flow_hash_command_fn,
2847 };
2848 /* *INDENT-ON* */
2849
2850 int
2851 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2852                              u32 table_index)
2853 {
2854   vnet_main_t *vnm = vnet_get_main ();
2855   vnet_interface_main_t *im = &vnm->interface_main;
2856   ip4_main_t *ipm = &ip4_main;
2857   ip_lookup_main_t *lm = &ipm->lookup_main;
2858   vnet_classify_main_t *cm = &vnet_classify_main;
2859   ip4_address_t *if_addr;
2860
2861   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2862     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2863
2864   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2865     return VNET_API_ERROR_NO_SUCH_ENTRY;
2866
2867   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2868   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
2869
2870   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
2871
2872   if (NULL != if_addr)
2873     {
2874       fib_prefix_t pfx = {
2875         .fp_len = 32,
2876         .fp_proto = FIB_PROTOCOL_IP4,
2877         .fp_addr.ip4 = *if_addr,
2878       };
2879       u32 fib_index;
2880
2881       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2882                                                        sw_if_index);
2883
2884
2885       if (table_index != (u32) ~ 0)
2886         {
2887           dpo_id_t dpo = DPO_INVALID;
2888
2889           dpo_set (&dpo,
2890                    DPO_CLASSIFY,
2891                    DPO_PROTO_IP4,
2892                    classify_dpo_create (DPO_PROTO_IP4, table_index));
2893
2894           fib_table_entry_special_dpo_add (fib_index,
2895                                            &pfx,
2896                                            FIB_SOURCE_CLASSIFY,
2897                                            FIB_ENTRY_FLAG_NONE, &dpo);
2898           dpo_reset (&dpo);
2899         }
2900       else
2901         {
2902           fib_table_entry_special_remove (fib_index,
2903                                           &pfx, FIB_SOURCE_CLASSIFY);
2904         }
2905     }
2906
2907   return 0;
2908 }
2909
2910 static clib_error_t *
2911 set_ip_classify_command_fn (vlib_main_t * vm,
2912                             unformat_input_t * input,
2913                             vlib_cli_command_t * cmd)
2914 {
2915   u32 table_index = ~0;
2916   int table_index_set = 0;
2917   u32 sw_if_index = ~0;
2918   int rv;
2919
2920   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2921     {
2922       if (unformat (input, "table-index %d", &table_index))
2923         table_index_set = 1;
2924       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
2925                          vnet_get_main (), &sw_if_index))
2926         ;
2927       else
2928         break;
2929     }
2930
2931   if (table_index_set == 0)
2932     return clib_error_return (0, "classify table-index must be specified");
2933
2934   if (sw_if_index == ~0)
2935     return clib_error_return (0, "interface / subif must be specified");
2936
2937   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
2938
2939   switch (rv)
2940     {
2941     case 0:
2942       break;
2943
2944     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
2945       return clib_error_return (0, "No such interface");
2946
2947     case VNET_API_ERROR_NO_SUCH_ENTRY:
2948       return clib_error_return (0, "No such classifier table");
2949     }
2950   return 0;
2951 }
2952
2953 /*?
2954  * Assign a classification table to an interface. The classification
2955  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
2956  * commands. Once the table is create, use this command to filter packets
2957  * on an interface.
2958  *
2959  * @cliexpar
2960  * Example of how to assign a classification table to an interface:
2961  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
2962 ?*/
2963 /* *INDENT-OFF* */
2964 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
2965 {
2966     .path = "set ip classify",
2967     .short_help =
2968     "set ip classify intfc <interface> table-index <classify-idx>",
2969     .function = set_ip_classify_command_fn,
2970 };
2971 /* *INDENT-ON* */
2972
2973 static clib_error_t *
2974 ip4_config (vlib_main_t * vm, unformat_input_t * input)
2975 {
2976   ip4_main_t *im = &ip4_main;
2977   uword heapsize = 0;
2978
2979   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2980     {
2981       if (unformat (input, "heap-size %U", unformat_memory_size, &heapsize))
2982         ;
2983       else
2984         return clib_error_return (0,
2985                                   "invalid heap-size parameter `%U'",
2986                                   format_unformat_error, input);
2987     }
2988
2989   im->mtrie_heap_size = heapsize;
2990
2991   return 0;
2992 }
2993
2994 VLIB_EARLY_CONFIG_FUNCTION (ip4_config, "ip");
2995
2996 /*
2997  * fd.io coding-style-patch-verification: ON
2998  *
2999  * Local Variables:
3000  * eval: (c-set-style "gnu")
3001  * End:
3002  */