MTU: IP fragmentation added to ip4-rewrite and ip6-rewrite
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h>     /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/dpo/load_balance.h>
53 #include <vnet/dpo/load_balance_map.h>
54 #include <vnet/dpo/classify_dpo.h>
55 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
56
57 #include <vnet/ip/ip4_forward.h>
58
59 /** @brief IPv4 lookup node.
60     @node ip4-lookup
61
62     This is the main IPv4 lookup dispatch node.
63
64     @param vm vlib_main_t corresponding to the current thread
65     @param node vlib_node_runtime_t
66     @param frame vlib_frame_t whose contents should be dispatched
67
68     @par Graph mechanics: buffer metadata, next index usage
69
70     @em Uses:
71     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
72         - Indicates the @c sw_if_index value of the interface that the
73           packet was received on.
74     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
75         - When the value is @c ~0 then the node performs a longest prefix
76           match (LPM) for the packet destination address in the FIB attached
77           to the receive interface.
78         - Otherwise perform LPM for the packet destination address in the
79           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
80           value (0, 1, ...) and not a VRF id.
81
82     @em Sets:
83     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
84         - The lookup result adjacency index.
85
86     <em>Next Index:</em>
87     - Dispatches the packet to the node index found in
88       ip_adjacency_t @c adj->lookup_next_index
89       (where @c adj is the lookup result adjacency).
90 */
91 static uword
92 ip4_lookup (vlib_main_t * vm,
93             vlib_node_runtime_t * node, vlib_frame_t * frame)
94 {
95   return ip4_lookup_inline (vm, node, frame,
96                             /* lookup_for_responses_to_locally_received_packets */
97                             0);
98
99 }
100
101 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
102
103 /* *INDENT-OFF* */
104 VLIB_REGISTER_NODE (ip4_lookup_node) =
105 {
106   .function = ip4_lookup,
107   .name = "ip4-lookup",
108   .vector_size = sizeof (u32),
109   .format_trace = format_ip4_lookup_trace,
110   .n_next_nodes = IP_LOOKUP_N_NEXT,
111   .next_nodes = IP4_LOOKUP_NEXT_NODES,
112 };
113 /* *INDENT-ON* */
114
115 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup);
116
117 always_inline uword
118 ip4_load_balance (vlib_main_t * vm,
119                   vlib_node_runtime_t * node, vlib_frame_t * frame)
120 {
121   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
122   u32 n_left_from, n_left_to_next, *from, *to_next;
123   ip_lookup_next_t next;
124   u32 thread_index = vlib_get_thread_index ();
125
126   from = vlib_frame_vector_args (frame);
127   n_left_from = frame->n_vectors;
128   next = node->cached_next_index;
129
130   if (node->flags & VLIB_NODE_FLAG_TRACE)
131     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
132
133   while (n_left_from > 0)
134     {
135       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
136
137
138       while (n_left_from >= 4 && n_left_to_next >= 2)
139         {
140           ip_lookup_next_t next0, next1;
141           const load_balance_t *lb0, *lb1;
142           vlib_buffer_t *p0, *p1;
143           u32 pi0, lbi0, hc0, pi1, lbi1, hc1;
144           const ip4_header_t *ip0, *ip1;
145           const dpo_id_t *dpo0, *dpo1;
146
147           /* Prefetch next iteration. */
148           {
149             vlib_buffer_t *p2, *p3;
150
151             p2 = vlib_get_buffer (vm, from[2]);
152             p3 = vlib_get_buffer (vm, from[3]);
153
154             vlib_prefetch_buffer_header (p2, STORE);
155             vlib_prefetch_buffer_header (p3, STORE);
156
157             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
158             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
159           }
160
161           pi0 = to_next[0] = from[0];
162           pi1 = to_next[1] = from[1];
163
164           from += 2;
165           n_left_from -= 2;
166           to_next += 2;
167           n_left_to_next -= 2;
168
169           p0 = vlib_get_buffer (vm, pi0);
170           p1 = vlib_get_buffer (vm, pi1);
171
172           ip0 = vlib_buffer_get_current (p0);
173           ip1 = vlib_buffer_get_current (p1);
174           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
175           lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
176
177           lb0 = load_balance_get (lbi0);
178           lb1 = load_balance_get (lbi1);
179
180           /*
181            * this node is for via FIBs we can re-use the hash value from the
182            * to node if present.
183            * We don't want to use the same hash value at each level in the recursion
184            * graph as that would lead to polarisation
185            */
186           hc0 = hc1 = 0;
187
188           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
189             {
190               if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
191                 {
192                   hc0 = vnet_buffer (p0)->ip.flow_hash =
193                     vnet_buffer (p0)->ip.flow_hash >> 1;
194                 }
195               else
196                 {
197                   hc0 = vnet_buffer (p0)->ip.flow_hash =
198                     ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
199                 }
200               dpo0 = load_balance_get_fwd_bucket
201                 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
202             }
203           else
204             {
205               dpo0 = load_balance_get_bucket_i (lb0, 0);
206             }
207           if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
208             {
209               if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash))
210                 {
211                   hc1 = vnet_buffer (p1)->ip.flow_hash =
212                     vnet_buffer (p1)->ip.flow_hash >> 1;
213                 }
214               else
215                 {
216                   hc1 = vnet_buffer (p1)->ip.flow_hash =
217                     ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
218                 }
219               dpo1 = load_balance_get_fwd_bucket
220                 (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
221             }
222           else
223             {
224               dpo1 = load_balance_get_bucket_i (lb1, 0);
225             }
226
227           next0 = dpo0->dpoi_next_node;
228           next1 = dpo1->dpoi_next_node;
229
230           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
231           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
232
233           vlib_increment_combined_counter
234             (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
235           vlib_increment_combined_counter
236             (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
237
238           vlib_validate_buffer_enqueue_x2 (vm, node, next,
239                                            to_next, n_left_to_next,
240                                            pi0, pi1, next0, next1);
241         }
242
243       while (n_left_from > 0 && n_left_to_next > 0)
244         {
245           ip_lookup_next_t next0;
246           const load_balance_t *lb0;
247           vlib_buffer_t *p0;
248           u32 pi0, lbi0, hc0;
249           const ip4_header_t *ip0;
250           const dpo_id_t *dpo0;
251
252           pi0 = from[0];
253           to_next[0] = pi0;
254           from += 1;
255           to_next += 1;
256           n_left_to_next -= 1;
257           n_left_from -= 1;
258
259           p0 = vlib_get_buffer (vm, pi0);
260
261           ip0 = vlib_buffer_get_current (p0);
262           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
263
264           lb0 = load_balance_get (lbi0);
265
266           hc0 = 0;
267           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
268             {
269               if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
270                 {
271                   hc0 = vnet_buffer (p0)->ip.flow_hash =
272                     vnet_buffer (p0)->ip.flow_hash >> 1;
273                 }
274               else
275                 {
276                   hc0 = vnet_buffer (p0)->ip.flow_hash =
277                     ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
278                 }
279               dpo0 = load_balance_get_fwd_bucket
280                 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
281             }
282           else
283             {
284               dpo0 = load_balance_get_bucket_i (lb0, 0);
285             }
286
287           next0 = dpo0->dpoi_next_node;
288           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
289
290           vlib_increment_combined_counter
291             (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
292
293           vlib_validate_buffer_enqueue_x1 (vm, node, next,
294                                            to_next, n_left_to_next,
295                                            pi0, next0);
296         }
297
298       vlib_put_next_frame (vm, node, next, n_left_to_next);
299     }
300
301   return frame->n_vectors;
302 }
303
304 /* *INDENT-OFF* */
305 VLIB_REGISTER_NODE (ip4_load_balance_node) =
306 {
307   .function = ip4_load_balance,
308   .name = "ip4-load-balance",
309   .vector_size = sizeof (u32),
310   .sibling_of = "ip4-lookup",
311   .format_trace =
312   format_ip4_lookup_trace,
313 };
314 /* *INDENT-ON* */
315
316 VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance);
317
318 /* get first interface address */
319 ip4_address_t *
320 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
321                              ip_interface_address_t ** result_ia)
322 {
323   ip_lookup_main_t *lm = &im->lookup_main;
324   ip_interface_address_t *ia = 0;
325   ip4_address_t *result = 0;
326
327   /* *INDENT-OFF* */
328   foreach_ip_interface_address
329     (lm, ia, sw_if_index,
330      1 /* honor unnumbered */ ,
331      ({
332        ip4_address_t * a =
333          ip_interface_address_get_address (lm, ia);
334        result = a;
335        break;
336      }));
337   /* *INDENT-OFF* */
338   if (result_ia)
339     *result_ia = result ? ia : 0;
340   return result;
341 }
342
343 static void
344 ip4_add_interface_routes (u32 sw_if_index,
345                           ip4_main_t * im, u32 fib_index,
346                           ip_interface_address_t * a)
347 {
348   ip_lookup_main_t *lm = &im->lookup_main;
349   ip4_address_t *address = ip_interface_address_get_address (lm, a);
350   fib_prefix_t pfx = {
351     .fp_len = a->address_length,
352     .fp_proto = FIB_PROTOCOL_IP4,
353     .fp_addr.ip4 = *address,
354   };
355
356   if (pfx.fp_len <= 30)
357     {
358       /* a /30 or shorter - add a glean for the network address */
359       fib_table_entry_update_one_path (fib_index, &pfx,
360                                        FIB_SOURCE_INTERFACE,
361                                        (FIB_ENTRY_FLAG_CONNECTED |
362                                         FIB_ENTRY_FLAG_ATTACHED),
363                                        DPO_PROTO_IP4,
364                                        /* No next-hop address */
365                                        NULL,
366                                        sw_if_index,
367                                        // invalid FIB index
368                                        ~0,
369                                        1,
370                                        // no out-label stack
371                                        NULL,
372                                        FIB_ROUTE_PATH_FLAG_NONE);
373
374       /* Add the two broadcast addresses as drop */
375       fib_prefix_t net_pfx = {
376         .fp_len = 32,
377         .fp_proto = FIB_PROTOCOL_IP4,
378         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
379       };
380       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
381         fib_table_entry_special_add(fib_index,
382                                     &net_pfx,
383                                     FIB_SOURCE_INTERFACE,
384                                     (FIB_ENTRY_FLAG_DROP |
385                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
386       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
387       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
388         fib_table_entry_special_add(fib_index,
389                                     &net_pfx,
390                                     FIB_SOURCE_INTERFACE,
391                                     (FIB_ENTRY_FLAG_DROP |
392                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
393     }
394   else if (pfx.fp_len == 31)
395     {
396       u32 mask = clib_host_to_net_u32(1);
397       fib_prefix_t net_pfx = pfx;
398
399       net_pfx.fp_len = 32;
400       net_pfx.fp_addr.ip4.as_u32 ^= mask;
401
402       /* a /31 - add the other end as an attached host */
403       fib_table_entry_update_one_path (fib_index, &net_pfx,
404                                        FIB_SOURCE_INTERFACE,
405                                        (FIB_ENTRY_FLAG_ATTACHED),
406                                        DPO_PROTO_IP4,
407                                        &net_pfx.fp_addr,
408                                        sw_if_index,
409                                        // invalid FIB index
410                                        ~0,
411                                        1,
412                                        NULL,
413                                        FIB_ROUTE_PATH_FLAG_NONE);
414     }
415   pfx.fp_len = 32;
416
417   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
418     {
419       u32 classify_table_index =
420         lm->classify_table_index_by_sw_if_index[sw_if_index];
421       if (classify_table_index != (u32) ~ 0)
422         {
423           dpo_id_t dpo = DPO_INVALID;
424
425           dpo_set (&dpo,
426                    DPO_CLASSIFY,
427                    DPO_PROTO_IP4,
428                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
429
430           fib_table_entry_special_dpo_add (fib_index,
431                                            &pfx,
432                                            FIB_SOURCE_CLASSIFY,
433                                            FIB_ENTRY_FLAG_NONE, &dpo);
434           dpo_reset (&dpo);
435         }
436     }
437
438   fib_table_entry_update_one_path (fib_index, &pfx,
439                                    FIB_SOURCE_INTERFACE,
440                                    (FIB_ENTRY_FLAG_CONNECTED |
441                                     FIB_ENTRY_FLAG_LOCAL),
442                                    DPO_PROTO_IP4,
443                                    &pfx.fp_addr,
444                                    sw_if_index,
445                                    // invalid FIB index
446                                    ~0,
447                                    1, NULL,
448                                    FIB_ROUTE_PATH_FLAG_NONE);
449 }
450
451 static void
452 ip4_del_interface_routes (ip4_main_t * im,
453                           u32 fib_index,
454                           ip4_address_t * address, u32 address_length)
455 {
456   fib_prefix_t pfx = {
457     .fp_len = address_length,
458     .fp_proto = FIB_PROTOCOL_IP4,
459     .fp_addr.ip4 = *address,
460   };
461
462   if (pfx.fp_len <= 30)
463     {
464       fib_prefix_t net_pfx = {
465         .fp_len = 32,
466         .fp_proto = FIB_PROTOCOL_IP4,
467         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
468       };
469       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
470         fib_table_entry_special_remove(fib_index,
471                                        &net_pfx,
472                                        FIB_SOURCE_INTERFACE);
473       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
474       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
475         fib_table_entry_special_remove(fib_index,
476                                        &net_pfx,
477                                        FIB_SOURCE_INTERFACE);
478       fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
479     }
480     else if (pfx.fp_len == 31)
481     {
482       u32 mask = clib_host_to_net_u32(1);
483       fib_prefix_t net_pfx = pfx;
484
485       net_pfx.fp_len = 32;
486       net_pfx.fp_addr.ip4.as_u32 ^= mask;
487
488       fib_table_entry_delete (fib_index, &net_pfx, FIB_SOURCE_INTERFACE);
489     }
490
491   pfx.fp_len = 32;
492   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
493 }
494
495 void
496 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
497 {
498   ip4_main_t *im = &ip4_main;
499
500   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
501
502   /*
503    * enable/disable only on the 1<->0 transition
504    */
505   if (is_enable)
506     {
507       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
508         return;
509     }
510   else
511     {
512       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
513       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
514         return;
515     }
516   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
517                                !is_enable, 0, 0);
518
519
520   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
521                                sw_if_index, !is_enable, 0, 0);
522 }
523
524 static clib_error_t *
525 ip4_add_del_interface_address_internal (vlib_main_t * vm,
526                                         u32 sw_if_index,
527                                         ip4_address_t * address,
528                                         u32 address_length, u32 is_del)
529 {
530   vnet_main_t *vnm = vnet_get_main ();
531   ip4_main_t *im = &ip4_main;
532   ip_lookup_main_t *lm = &im->lookup_main;
533   clib_error_t *error = 0;
534   u32 if_address_index, elts_before;
535   ip4_address_fib_t ip4_af, *addr_fib = 0;
536
537   /* local0 interface doesn't support IP addressing  */
538   if (sw_if_index == 0)
539     {
540       return
541        clib_error_create ("local0 interface doesn't support IP addressing");
542     }
543
544   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
545   ip4_addr_fib_init (&ip4_af, address,
546                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
547   vec_add1 (addr_fib, ip4_af);
548
549   /*
550    * there is no support for adj-fib handling in the presence of overlapping
551    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
552    * most routers do.
553    */
554   /* *INDENT-OFF* */
555   if (!is_del)
556     {
557       /* When adding an address check that it does not conflict
558          with an existing address on any interface in this table. */
559       ip_interface_address_t *ia;
560       vnet_sw_interface_t *sif;
561
562       pool_foreach(sif, vnm->interface_main.sw_interfaces,
563       ({
564           if (im->fib_index_by_sw_if_index[sw_if_index] ==
565               im->fib_index_by_sw_if_index[sif->sw_if_index])
566             {
567               foreach_ip_interface_address
568                 (&im->lookup_main, ia, sif->sw_if_index,
569                  0 /* honor unnumbered */ ,
570                  ({
571                    ip4_address_t * x =
572                      ip_interface_address_get_address
573                      (&im->lookup_main, ia);
574                    if (ip4_destination_matches_route
575                        (im, address, x, ia->address_length) ||
576                        ip4_destination_matches_route (im,
577                                                       x,
578                                                       address,
579                                                       address_length))
580                      {
581                        vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
582
583                        return
584                          clib_error_create
585                          ("failed to add %U which conflicts with %U for interface %U",
586                           format_ip4_address_and_length, address,
587                           address_length,
588                           format_ip4_address_and_length, x,
589                           ia->address_length,
590                           format_vnet_sw_if_index_name, vnm,
591                           sif->sw_if_index);
592                      }
593                  }));
594             }
595       }));
596     }
597   /* *INDENT-ON* */
598
599   elts_before = pool_elts (lm->if_address_pool);
600
601   error = ip_interface_address_add_del
602     (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
603   if (error)
604     goto done;
605
606   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
607
608   if (is_del)
609     ip4_del_interface_routes (im, ip4_af.fib_index, address, address_length);
610   else
611     ip4_add_interface_routes (sw_if_index,
612                               im, ip4_af.fib_index,
613                               pool_elt_at_index
614                               (lm->if_address_pool, if_address_index));
615
616   /* If pool did not grow/shrink: add duplicate address. */
617   if (elts_before != pool_elts (lm->if_address_pool))
618     {
619       ip4_add_del_interface_address_callback_t *cb;
620       vec_foreach (cb, im->add_del_interface_address_callbacks)
621         cb->function (im, cb->function_opaque, sw_if_index,
622                       address, address_length, if_address_index, is_del);
623     }
624
625 done:
626   vec_free (addr_fib);
627   return error;
628 }
629
630 clib_error_t *
631 ip4_add_del_interface_address (vlib_main_t * vm,
632                                u32 sw_if_index,
633                                ip4_address_t * address,
634                                u32 address_length, u32 is_del)
635 {
636   return ip4_add_del_interface_address_internal
637     (vm, sw_if_index, address, address_length, is_del);
638 }
639
640 /* Built-in ip4 unicast rx feature path definition */
641 /* *INDENT-OFF* */
642 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
643 {
644   .arc_name = "ip4-unicast",
645   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
646   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
647 };
648
649 VNET_FEATURE_INIT (ip4_flow_classify, static) =
650 {
651   .arc_name = "ip4-unicast",
652   .node_name = "ip4-flow-classify",
653   .runs_before = VNET_FEATURES ("ip4-inacl"),
654 };
655
656 VNET_FEATURE_INIT (ip4_inacl, static) =
657 {
658   .arc_name = "ip4-unicast",
659   .node_name = "ip4-inacl",
660   .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
661 };
662
663 VNET_FEATURE_INIT (ip4_source_check_1, static) =
664 {
665   .arc_name = "ip4-unicast",
666   .node_name = "ip4-source-check-via-rx",
667   .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
668 };
669
670 VNET_FEATURE_INIT (ip4_source_check_2, static) =
671 {
672   .arc_name = "ip4-unicast",
673   .node_name = "ip4-source-check-via-any",
674   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
675 };
676
677 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
678 {
679   .arc_name = "ip4-unicast",
680   .node_name = "ip4-source-and-port-range-check-rx",
681   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
682 };
683
684 VNET_FEATURE_INIT (ip4_policer_classify, static) =
685 {
686   .arc_name = "ip4-unicast",
687   .node_name = "ip4-policer-classify",
688   .runs_before = VNET_FEATURES ("ipsec-input-ip4"),
689 };
690
691 VNET_FEATURE_INIT (ip4_ipsec, static) =
692 {
693   .arc_name = "ip4-unicast",
694   .node_name = "ipsec-input-ip4",
695   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
696 };
697
698 VNET_FEATURE_INIT (ip4_vpath, static) =
699 {
700   .arc_name = "ip4-unicast",
701   .node_name = "vpath-input-ip4",
702   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
703 };
704
705 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
706 {
707   .arc_name = "ip4-unicast",
708   .node_name = "ip4-vxlan-bypass",
709   .runs_before = VNET_FEATURES ("ip4-lookup"),
710 };
711
712 VNET_FEATURE_INIT (ip4_not_enabled, static) =
713 {
714   .arc_name = "ip4-unicast",
715   .node_name = "ip4-not-enabled",
716   .runs_before = VNET_FEATURES ("ip4-lookup"),
717 };
718
719 VNET_FEATURE_INIT (ip4_lookup, static) =
720 {
721   .arc_name = "ip4-unicast",
722   .node_name = "ip4-lookup",
723   .runs_before = 0,     /* not before any other features */
724 };
725
726 /* Built-in ip4 multicast rx feature path definition */
727 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
728 {
729   .arc_name = "ip4-multicast",
730   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
731   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
732 };
733
734 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
735 {
736   .arc_name = "ip4-multicast",
737   .node_name = "vpath-input-ip4",
738   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
739 };
740
741 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
742 {
743   .arc_name = "ip4-multicast",
744   .node_name = "ip4-not-enabled",
745   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
746 };
747
748 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
749 {
750   .arc_name = "ip4-multicast",
751   .node_name = "ip4-mfib-forward-lookup",
752   .runs_before = 0,     /* last feature */
753 };
754
755 /* Source and port-range check ip4 tx feature path definition */
756 VNET_FEATURE_ARC_INIT (ip4_output, static) =
757 {
758   .arc_name = "ip4-output",
759   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
760   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
761 };
762
763 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
764 {
765   .arc_name = "ip4-output",
766   .node_name = "ip4-source-and-port-range-check-tx",
767   .runs_before = VNET_FEATURES ("ip4-outacl"),
768 };
769
770 VNET_FEATURE_INIT (ip4_outacl, static) =
771 {
772   .arc_name = "ip4-output",
773   .node_name = "ip4-outacl",
774   .runs_before = VNET_FEATURES ("ipsec-output-ip4"),
775 };
776
777 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
778 {
779   .arc_name = "ip4-output",
780   .node_name = "ipsec-output-ip4",
781   .runs_before = VNET_FEATURES ("interface-output"),
782 };
783
784 /* Built-in ip4 tx feature path definition */
785 VNET_FEATURE_INIT (ip4_interface_output, static) =
786 {
787   .arc_name = "ip4-output",
788   .node_name = "interface-output",
789   .runs_before = 0,     /* not before any other features */
790 };
791 /* *INDENT-ON* */
792
793 static clib_error_t *
794 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
795 {
796   ip4_main_t *im = &ip4_main;
797
798   /* Fill in lookup tables with default table (0). */
799   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
800   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
801
802   if (!is_add)
803     {
804       ip4_main_t *im4 = &ip4_main;
805       ip_lookup_main_t *lm4 = &im4->lookup_main;
806       ip_interface_address_t *ia = 0;
807       ip4_address_t *address;
808       vlib_main_t *vm = vlib_get_main ();
809
810       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
811       /* *INDENT-OFF* */
812       foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
813       ({
814         address = ip_interface_address_get_address (lm4, ia);
815         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
816       }));
817       /* *INDENT-ON* */
818     }
819
820   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
821                                is_add, 0, 0);
822
823   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
824                                sw_if_index, is_add, 0, 0);
825
826   return /* no error */ 0;
827 }
828
829 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
830
831 /* Global IP4 main. */
832 ip4_main_t ip4_main;
833
834 clib_error_t *
835 ip4_lookup_init (vlib_main_t * vm)
836 {
837   ip4_main_t *im = &ip4_main;
838   clib_error_t *error;
839   uword i;
840
841   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
842     return error;
843   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
844     return (error);
845   if ((error = vlib_call_init_function (vm, fib_module_init)))
846     return error;
847   if ((error = vlib_call_init_function (vm, mfib_module_init)))
848     return error;
849
850   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
851     {
852       u32 m;
853
854       if (i < 32)
855         m = pow2_mask (i) << (32 - i);
856       else
857         m = ~0;
858       im->fib_masks[i] = clib_host_to_net_u32 (m);
859     }
860
861   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
862
863   /* Create FIB with index 0 and table id of 0. */
864   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
865                                      FIB_SOURCE_DEFAULT_ROUTE);
866   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
867                                       MFIB_SOURCE_DEFAULT_ROUTE);
868
869   {
870     pg_node_t *pn;
871     pn = pg_get_node (ip4_lookup_node.index);
872     pn->unformat_edit = unformat_pg_ip4_header;
873   }
874
875   {
876     ethernet_arp_header_t h;
877
878     memset (&h, 0, sizeof (h));
879
880     /* Set target ethernet address to all zeros. */
881     memset (h.ip4_over_ethernet[1].ethernet, 0,
882             sizeof (h.ip4_over_ethernet[1].ethernet));
883
884 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
885 #define _8(f,v) h.f = v;
886     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
887     _16 (l3_type, ETHERNET_TYPE_IP4);
888     _8 (n_l2_address_bytes, 6);
889     _8 (n_l3_address_bytes, 4);
890     _16 (opcode, ETHERNET_ARP_OPCODE_request);
891 #undef _16
892 #undef _8
893
894     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
895                                /* data */ &h,
896                                sizeof (h),
897                                /* alloc chunk size */ 8,
898                                "ip4 arp");
899   }
900
901   return error;
902 }
903
904 VLIB_INIT_FUNCTION (ip4_lookup_init);
905
906 typedef struct
907 {
908   /* Adjacency taken. */
909   u32 dpo_index;
910   u32 flow_hash;
911   u32 fib_index;
912
913   /* Packet data, possibly *after* rewrite. */
914   u8 packet_data[64 - 1 * sizeof (u32)];
915 }
916 ip4_forward_next_trace_t;
917
918 u8 *
919 format_ip4_forward_next_trace (u8 * s, va_list * args)
920 {
921   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
922   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
923   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
924   u32 indent = format_get_indent (s);
925   s = format (s, "%U%U",
926               format_white_space, indent,
927               format_ip4_header, t->packet_data, sizeof (t->packet_data));
928   return s;
929 }
930
931 static u8 *
932 format_ip4_lookup_trace (u8 * s, va_list * args)
933 {
934   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
935   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
936   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
937   u32 indent = format_get_indent (s);
938
939   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
940               t->fib_index, t->dpo_index, t->flow_hash);
941   s = format (s, "\n%U%U",
942               format_white_space, indent,
943               format_ip4_header, t->packet_data, sizeof (t->packet_data));
944   return s;
945 }
946
947 static u8 *
948 format_ip4_rewrite_trace (u8 * s, va_list * args)
949 {
950   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
951   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
952   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
953   u32 indent = format_get_indent (s);
954
955   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
956               t->fib_index, t->dpo_index, format_ip_adjacency,
957               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
958   s = format (s, "\n%U%U",
959               format_white_space, indent,
960               format_ip_adjacency_packet_data,
961               t->dpo_index, t->packet_data, sizeof (t->packet_data));
962   return s;
963 }
964
965 /* Common trace function for all ip4-forward next nodes. */
966 void
967 ip4_forward_next_trace (vlib_main_t * vm,
968                         vlib_node_runtime_t * node,
969                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
970 {
971   u32 *from, n_left;
972   ip4_main_t *im = &ip4_main;
973
974   n_left = frame->n_vectors;
975   from = vlib_frame_vector_args (frame);
976
977   while (n_left >= 4)
978     {
979       u32 bi0, bi1;
980       vlib_buffer_t *b0, *b1;
981       ip4_forward_next_trace_t *t0, *t1;
982
983       /* Prefetch next iteration. */
984       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
985       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
986
987       bi0 = from[0];
988       bi1 = from[1];
989
990       b0 = vlib_get_buffer (vm, bi0);
991       b1 = vlib_get_buffer (vm, bi1);
992
993       if (b0->flags & VLIB_BUFFER_IS_TRACED)
994         {
995           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
996           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
997           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
998           t0->fib_index =
999             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1000              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1001             vec_elt (im->fib_index_by_sw_if_index,
1002                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1003
1004           clib_memcpy (t0->packet_data,
1005                        vlib_buffer_get_current (b0),
1006                        sizeof (t0->packet_data));
1007         }
1008       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1009         {
1010           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1011           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1012           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1013           t1->fib_index =
1014             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1015              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1016             vec_elt (im->fib_index_by_sw_if_index,
1017                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1018           clib_memcpy (t1->packet_data, vlib_buffer_get_current (b1),
1019                        sizeof (t1->packet_data));
1020         }
1021       from += 2;
1022       n_left -= 2;
1023     }
1024
1025   while (n_left >= 1)
1026     {
1027       u32 bi0;
1028       vlib_buffer_t *b0;
1029       ip4_forward_next_trace_t *t0;
1030
1031       bi0 = from[0];
1032
1033       b0 = vlib_get_buffer (vm, bi0);
1034
1035       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1036         {
1037           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1038           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1039           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1040           t0->fib_index =
1041             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1042              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1043             vec_elt (im->fib_index_by_sw_if_index,
1044                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1045           clib_memcpy (t0->packet_data, vlib_buffer_get_current (b0),
1046                        sizeof (t0->packet_data));
1047         }
1048       from += 1;
1049       n_left -= 1;
1050     }
1051 }
1052
1053 /* Compute TCP/UDP/ICMP4 checksum in software. */
1054 u16
1055 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1056                               ip4_header_t * ip0)
1057 {
1058   ip_csum_t sum0;
1059   u32 ip_header_length, payload_length_host_byte_order;
1060   u32 n_this_buffer, n_bytes_left, n_ip_bytes_this_buffer;
1061   u16 sum16;
1062   void *data_this_buffer;
1063
1064   /* Initialize checksum with ip header. */
1065   ip_header_length = ip4_header_bytes (ip0);
1066   payload_length_host_byte_order =
1067     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1068   sum0 =
1069     clib_host_to_net_u32 (payload_length_host_byte_order +
1070                           (ip0->protocol << 16));
1071
1072   if (BITS (uword) == 32)
1073     {
1074       sum0 =
1075         ip_csum_with_carry (sum0,
1076                             clib_mem_unaligned (&ip0->src_address, u32));
1077       sum0 =
1078         ip_csum_with_carry (sum0,
1079                             clib_mem_unaligned (&ip0->dst_address, u32));
1080     }
1081   else
1082     sum0 =
1083       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1084
1085   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1086   data_this_buffer = (void *) ip0 + ip_header_length;
1087   n_ip_bytes_this_buffer =
1088     p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data);
1089   if (n_this_buffer + ip_header_length > n_ip_bytes_this_buffer)
1090     {
1091       n_this_buffer = n_ip_bytes_this_buffer > ip_header_length ?
1092         n_ip_bytes_this_buffer - ip_header_length : 0;
1093     }
1094   while (1)
1095     {
1096       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1097       n_bytes_left -= n_this_buffer;
1098       if (n_bytes_left == 0)
1099         break;
1100
1101       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1102       p0 = vlib_get_buffer (vm, p0->next_buffer);
1103       data_this_buffer = vlib_buffer_get_current (p0);
1104       n_this_buffer = p0->current_length;
1105     }
1106
1107   sum16 = ~ip_csum_fold (sum0);
1108
1109   return sum16;
1110 }
1111
1112 u32
1113 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1114 {
1115   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1116   udp_header_t *udp0;
1117   u16 sum16;
1118
1119   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1120           || ip0->protocol == IP_PROTOCOL_UDP);
1121
1122   udp0 = (void *) (ip0 + 1);
1123   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1124     {
1125       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1126                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1127       return p0->flags;
1128     }
1129
1130   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1131
1132   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1133                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1134
1135   return p0->flags;
1136 }
1137
1138 /* *INDENT-OFF* */
1139 VNET_FEATURE_ARC_INIT (ip4_local) =
1140 {
1141   .arc_name  = "ip4-local",
1142   .start_nodes = VNET_FEATURES ("ip4-local"),
1143 };
1144 /* *INDENT-ON* */
1145
1146 static inline void
1147 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1148                             ip4_header_t * ip, u8 is_udp, u8 * error,
1149                             u8 * good_tcp_udp)
1150 {
1151   u32 flags0;
1152   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1153   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1154   if (is_udp)
1155     {
1156       udp_header_t *udp;
1157       u32 ip_len, udp_len;
1158       i32 len_diff;
1159       udp = ip4_next_header (ip);
1160       /* Verify UDP length. */
1161       ip_len = clib_net_to_host_u16 (ip->length);
1162       udp_len = clib_net_to_host_u16 (udp->length);
1163
1164       len_diff = ip_len - udp_len;
1165       *good_tcp_udp &= len_diff >= 0;
1166       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1167     }
1168 }
1169
1170 #define ip4_local_csum_is_offloaded(_b)                                 \
1171     _b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM                         \
1172         || _b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM
1173
1174 #define ip4_local_need_csum_check(is_tcp_udp, _b)                       \
1175     (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED     \
1176         || ip4_local_csum_is_offloaded (_b)))
1177
1178 #define ip4_local_csum_is_valid(_b)                                     \
1179     (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT                      \
1180         || (ip4_local_csum_is_offloaded (_b))) != 0
1181
1182 static inline void
1183 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1184                          ip4_header_t * ih, u8 * error)
1185 {
1186   u8 is_udp, is_tcp_udp, good_tcp_udp;
1187
1188   is_udp = ih->protocol == IP_PROTOCOL_UDP;
1189   is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1190
1191   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1192     ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1193   else
1194     good_tcp_udp = ip4_local_csum_is_valid (b);
1195
1196   ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1197   *error = (is_tcp_udp && !good_tcp_udp
1198             ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1199 }
1200
1201 static inline void
1202 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1203                             ip4_header_t ** ih, u8 * error)
1204 {
1205   u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1206
1207   is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1208   is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1209
1210   is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1211   is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1212
1213   good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1214   good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1215
1216   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1217                      || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1218     {
1219       if (is_tcp_udp[0])
1220         ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1221                                     &good_tcp_udp[0]);
1222       if (is_tcp_udp[1])
1223         ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1224                                     &good_tcp_udp[1]);
1225     }
1226
1227   error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1228               IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1229   error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1230               IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1231 }
1232
1233 static inline void
1234 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1235                               vlib_buffer_t * b, u16 * next, u8 error,
1236                               u8 head_of_feature_arc)
1237 {
1238   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1239   u32 next_index;
1240
1241   *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1242   b->error = error ? error_node->errors[error] : 0;
1243   if (head_of_feature_arc)
1244     {
1245       next_index = *next;
1246       if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1247         {
1248           vnet_feature_arc_start (arc_index,
1249                                   vnet_buffer (b)->sw_if_index[VLIB_RX],
1250                                   &next_index, b);
1251           *next = next_index;
1252         }
1253     }
1254 }
1255
1256 typedef struct
1257 {
1258   ip4_address_t src;
1259   u32 lbi;
1260   u8 error;
1261 } ip4_local_last_check_t;
1262
1263 static inline void
1264 ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
1265                      ip4_local_last_check_t * last_check, u8 * error0)
1266 {
1267   ip4_fib_mtrie_leaf_t leaf0;
1268   ip4_fib_mtrie_t *mtrie0;
1269   const dpo_id_t *dpo0;
1270   load_balance_t *lb0;
1271   u32 lbi0;
1272
1273   vnet_buffer (b)->ip.fib_index =
1274     vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1275     vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1276
1277   if (PREDICT_FALSE (last_check->src.as_u32 != ip0->src_address.as_u32))
1278     {
1279       mtrie0 = &ip4_fib_get (vnet_buffer (b)->ip.fib_index)->mtrie;
1280       leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1281       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1282       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1283       lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1284
1285       vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1286       vnet_buffer (b)->ip.adj_index[VLIB_RX] = lbi0;
1287
1288       lb0 = load_balance_get (lbi0);
1289       dpo0 = load_balance_get_bucket_i (lb0, 0);
1290
1291       /*
1292        * Must have a route to source otherwise we drop the packet.
1293        * ip4 broadcasts are accepted, e.g. to make dhcp client work
1294        *
1295        * The checks are:
1296        *  - the source is a recieve => it's from us => bogus, do this
1297        *    first since it sets a different error code.
1298        *  - uRPF check for any route to source - accept if passes.
1299        *  - allow packets destined to the broadcast address from unknown sources
1300        */
1301
1302       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1303                   && dpo0->dpoi_type == DPO_RECEIVE) ?
1304                  IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1305       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1306                   && !fib_urpf_check_size (lb0->lb_urpf)
1307                   && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1308                  IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1309
1310       last_check->src.as_u32 = ip0->src_address.as_u32;
1311       last_check->lbi = lbi0;
1312       last_check->error = *error0;
1313     }
1314   else
1315     {
1316       vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1317       vnet_buffer (b)->ip.adj_index[VLIB_RX] = last_check->lbi;
1318       *error0 = last_check->error;
1319     }
1320 }
1321
1322 static inline void
1323 ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
1324                         ip4_local_last_check_t * last_check, u8 * error)
1325 {
1326   ip4_fib_mtrie_leaf_t leaf[2];
1327   ip4_fib_mtrie_t *mtrie[2];
1328   const dpo_id_t *dpo[2];
1329   load_balance_t *lb[2];
1330   u32 not_last_hit = 0;
1331   u32 lbi[2];
1332
1333   not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1334   not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1335
1336   vnet_buffer (b[0])->ip.fib_index =
1337     vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1338     vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1339     vnet_buffer (b[0])->ip.fib_index;
1340
1341   vnet_buffer (b[1])->ip.fib_index =
1342     vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1343     vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1344     vnet_buffer (b[1])->ip.fib_index;
1345
1346   if (PREDICT_FALSE (not_last_hit))
1347     {
1348       mtrie[0] = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie;
1349       mtrie[1] = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie;
1350
1351       leaf[0] = ip4_fib_mtrie_lookup_step_one (mtrie[0], &ip[0]->src_address);
1352       leaf[1] = ip4_fib_mtrie_lookup_step_one (mtrie[1], &ip[1]->src_address);
1353
1354       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1355                                            &ip[0]->src_address, 2);
1356       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1357                                            &ip[1]->src_address, 2);
1358
1359       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1360                                            &ip[0]->src_address, 3);
1361       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1362                                            &ip[1]->src_address, 3);
1363
1364       lbi[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf[0]);
1365       lbi[1] = ip4_fib_mtrie_leaf_get_adj_index (leaf[1]);
1366
1367       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1368       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] = lbi[0];
1369
1370       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1371       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] = lbi[1];
1372
1373       lb[0] = load_balance_get (lbi[0]);
1374       lb[1] = load_balance_get (lbi[1]);
1375
1376       dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1377       dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1378
1379       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1380                    dpo[0]->dpoi_type == DPO_RECEIVE) ?
1381                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1382       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1383                    !fib_urpf_check_size (lb[0]->lb_urpf) &&
1384                    ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1385                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1386
1387       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1388                    dpo[1]->dpoi_type == DPO_RECEIVE) ?
1389                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1390       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1391                    !fib_urpf_check_size (lb[1]->lb_urpf) &&
1392                    ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1393                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1394
1395       last_check->src.as_u32 = ip[1]->src_address.as_u32;
1396       last_check->lbi = lbi[1];
1397       last_check->error = error[1];
1398     }
1399   else
1400     {
1401       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1402       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] = last_check->lbi;
1403
1404       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1405       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] = last_check->lbi;
1406
1407       error[0] = last_check->error;
1408       error[1] = last_check->error;
1409     }
1410 }
1411
1412 static inline uword
1413 ip4_local_inline (vlib_main_t * vm,
1414                   vlib_node_runtime_t * node,
1415                   vlib_frame_t * frame, int head_of_feature_arc)
1416 {
1417   ip4_main_t *im = &ip4_main;
1418   ip_lookup_main_t *lm = &im->lookup_main;
1419   u32 *from, n_left_from;
1420   vlib_node_runtime_t *error_node =
1421     vlib_node_get_runtime (vm, ip4_input_node.index);
1422   u16 nexts[VLIB_FRAME_SIZE], *next;
1423   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1424   ip4_header_t *ip[2];
1425   u8 error[2];
1426
1427   ip4_local_last_check_t last_check = {
1428     .src = {.as_u32 = 0},
1429     .lbi = ~0,
1430     .error = IP4_ERROR_UNKNOWN_PROTOCOL
1431   };
1432
1433   from = vlib_frame_vector_args (frame);
1434   n_left_from = frame->n_vectors;
1435
1436   if (node->flags & VLIB_NODE_FLAG_TRACE)
1437     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1438
1439   vlib_get_buffers (vm, from, bufs, n_left_from);
1440   b = bufs;
1441   next = nexts;
1442
1443   while (n_left_from >= 6)
1444     {
1445       u32 is_nat, not_batch = 0;
1446
1447       /* Prefetch next iteration. */
1448       {
1449         vlib_prefetch_buffer_header (b[4], LOAD);
1450         vlib_prefetch_buffer_header (b[5], LOAD);
1451
1452         CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1453         CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1454       }
1455
1456       error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1457
1458       ip[0] = vlib_buffer_get_current (b[0]);
1459       ip[1] = vlib_buffer_get_current (b[1]);
1460
1461       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1462       vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1463
1464       is_nat = b[0]->flags & VNET_BUFFER_F_IS_NATED;
1465       not_batch |= is_nat ^ (b[1]->flags & VNET_BUFFER_F_IS_NATED);
1466
1467       if (head_of_feature_arc == 0 || (is_nat && not_batch == 0))
1468         goto skip_checks;
1469
1470       if (PREDICT_TRUE (not_batch == 0))
1471         {
1472           ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1473           ip4_local_check_src_x2 (b, ip, &last_check, error);
1474         }
1475       else
1476         {
1477           if (!(b[0]->flags & VNET_BUFFER_F_IS_NATED))
1478             {
1479               ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1480               ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1481             }
1482           if (!(b[1]->flags & VNET_BUFFER_F_IS_NATED))
1483             {
1484               ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1485               ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
1486             }
1487         }
1488
1489     skip_checks:
1490
1491       next[0] = lm->local_next_by_ip_protocol[ip[0]->protocol];
1492       next[1] = lm->local_next_by_ip_protocol[ip[1]->protocol];
1493       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1494                                     head_of_feature_arc);
1495       ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1496                                     head_of_feature_arc);
1497
1498       b += 2;
1499       next += 2;
1500       n_left_from -= 2;
1501     }
1502
1503   while (n_left_from > 0)
1504     {
1505       error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1506
1507       ip[0] = vlib_buffer_get_current (b[0]);
1508       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1509
1510       if (head_of_feature_arc == 0 || (b[0]->flags & VNET_BUFFER_F_IS_NATED))
1511         goto skip_check;
1512
1513       ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1514       ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1515
1516     skip_check:
1517
1518       next[0] = lm->local_next_by_ip_protocol[ip[0]->protocol];
1519       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1520                                     head_of_feature_arc);
1521
1522       b += 1;
1523       next += 1;
1524       n_left_from -= 1;
1525     }
1526
1527   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1528   return frame->n_vectors;
1529 }
1530
1531 static uword
1532 ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1533 {
1534   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1535 }
1536
1537 /* *INDENT-OFF* */
1538 VLIB_REGISTER_NODE (ip4_local_node) =
1539 {
1540   .function = ip4_local,
1541   .name = "ip4-local",
1542   .vector_size = sizeof (u32),
1543   .format_trace = format_ip4_forward_next_trace,
1544   .n_next_nodes = IP_LOCAL_N_NEXT,
1545   .next_nodes =
1546   {
1547     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1548     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1549     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1550     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1551   },
1552 };
1553 /* *INDENT-ON* */
1554
1555 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local);
1556
1557 static uword
1558 ip4_local_end_of_arc (vlib_main_t * vm,
1559                       vlib_node_runtime_t * node, vlib_frame_t * frame)
1560 {
1561   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1562 }
1563
1564 /* *INDENT-OFF* */
1565 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node,static) = {
1566   .function = ip4_local_end_of_arc,
1567   .name = "ip4-local-end-of-arc",
1568   .vector_size = sizeof (u32),
1569
1570   .format_trace = format_ip4_forward_next_trace,
1571   .sibling_of = "ip4-local",
1572 };
1573
1574 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_end_of_arc_node, ip4_local_end_of_arc)
1575
1576 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1577   .arc_name = "ip4-local",
1578   .node_name = "ip4-local-end-of-arc",
1579   .runs_before = 0, /* not before any other features */
1580 };
1581 /* *INDENT-ON* */
1582
1583 void
1584 ip4_register_protocol (u32 protocol, u32 node_index)
1585 {
1586   vlib_main_t *vm = vlib_get_main ();
1587   ip4_main_t *im = &ip4_main;
1588   ip_lookup_main_t *lm = &im->lookup_main;
1589
1590   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1591   lm->local_next_by_ip_protocol[protocol] =
1592     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1593 }
1594
1595 static clib_error_t *
1596 show_ip_local_command_fn (vlib_main_t * vm,
1597                           unformat_input_t * input, vlib_cli_command_t * cmd)
1598 {
1599   ip4_main_t *im = &ip4_main;
1600   ip_lookup_main_t *lm = &im->lookup_main;
1601   int i;
1602
1603   vlib_cli_output (vm, "Protocols handled by ip4_local");
1604   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1605     {
1606       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1607         {
1608           u32 node_index = vlib_get_node (vm,
1609                                           ip4_local_node.index)->
1610             next_nodes[lm->local_next_by_ip_protocol[i]];
1611           vlib_cli_output (vm, "%d: %U", i, format_vlib_node_name, vm,
1612                            node_index);
1613         }
1614     }
1615   return 0;
1616 }
1617
1618
1619
1620 /*?
1621  * Display the set of protocols handled by the local IPv4 stack.
1622  *
1623  * @cliexpar
1624  * Example of how to display local protocol table:
1625  * @cliexstart{show ip local}
1626  * Protocols handled by ip4_local
1627  * 1
1628  * 17
1629  * 47
1630  * @cliexend
1631 ?*/
1632 /* *INDENT-OFF* */
1633 VLIB_CLI_COMMAND (show_ip_local, static) =
1634 {
1635   .path = "show ip local",
1636   .function = show_ip_local_command_fn,
1637   .short_help = "show ip local",
1638 };
1639 /* *INDENT-ON* */
1640
1641 always_inline uword
1642 ip4_arp_inline (vlib_main_t * vm,
1643                 vlib_node_runtime_t * node,
1644                 vlib_frame_t * frame, int is_glean)
1645 {
1646   vnet_main_t *vnm = vnet_get_main ();
1647   ip4_main_t *im = &ip4_main;
1648   ip_lookup_main_t *lm = &im->lookup_main;
1649   u32 *from, *to_next_drop;
1650   uword n_left_from, n_left_to_next_drop, next_index;
1651   static f64 time_last_seed_change = -1e100;
1652   static u32 hash_seeds[3];
1653   static uword hash_bitmap[256 / BITS (uword)];
1654   f64 time_now;
1655
1656   if (node->flags & VLIB_NODE_FLAG_TRACE)
1657     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1658
1659   time_now = vlib_time_now (vm);
1660   if (time_now - time_last_seed_change > 1e-3)
1661     {
1662       uword i;
1663       u32 *r = clib_random_buffer_get_data (&vm->random_buffer,
1664                                             sizeof (hash_seeds));
1665       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
1666         hash_seeds[i] = r[i];
1667
1668       /* Mark all hash keys as been no-seen before. */
1669       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
1670         hash_bitmap[i] = 0;
1671
1672       time_last_seed_change = time_now;
1673     }
1674
1675   from = vlib_frame_vector_args (frame);
1676   n_left_from = frame->n_vectors;
1677   next_index = node->cached_next_index;
1678   if (next_index == IP4_ARP_NEXT_DROP)
1679     next_index = IP4_ARP_N_NEXT;        /* point to first interface */
1680
1681   while (n_left_from > 0)
1682     {
1683       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1684                            to_next_drop, n_left_to_next_drop);
1685
1686       while (n_left_from > 0 && n_left_to_next_drop > 0)
1687         {
1688           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
1689           ip_adjacency_t *adj0;
1690           vlib_buffer_t *p0;
1691           ip4_header_t *ip0;
1692           uword bm0;
1693
1694           pi0 = from[0];
1695
1696           p0 = vlib_get_buffer (vm, pi0);
1697
1698           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1699           adj0 = adj_get (adj_index0);
1700           ip0 = vlib_buffer_get_current (p0);
1701
1702           a0 = hash_seeds[0];
1703           b0 = hash_seeds[1];
1704           c0 = hash_seeds[2];
1705
1706           sw_if_index0 = adj0->rewrite_header.sw_if_index;
1707           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1708
1709           if (is_glean)
1710             {
1711               /*
1712                * this is the Glean case, so we are ARPing for the
1713                * packet's destination
1714                */
1715               a0 ^= ip0->dst_address.data_u32;
1716             }
1717           else
1718             {
1719               a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32;
1720             }
1721           b0 ^= sw_if_index0;
1722
1723           hash_v3_mix32 (a0, b0, c0);
1724           hash_v3_finalize32 (a0, b0, c0);
1725
1726           c0 &= BITS (hash_bitmap) - 1;
1727           m0 = (uword) 1 << (c0 % BITS (uword));
1728           c0 = c0 / BITS (uword);
1729
1730           bm0 = hash_bitmap[c0];
1731           drop0 = (bm0 & m0) != 0;
1732
1733           /* Mark it as seen. */
1734           hash_bitmap[c0] = bm0 | m0;
1735
1736           from += 1;
1737           n_left_from -= 1;
1738           to_next_drop[0] = pi0;
1739           to_next_drop += 1;
1740           n_left_to_next_drop -= 1;
1741
1742           p0->error =
1743             node->errors[drop0 ? IP4_ARP_ERROR_DROP :
1744                          IP4_ARP_ERROR_REQUEST_SENT];
1745
1746           /*
1747            * the adj has been updated to a rewrite but the node the DPO that got
1748            * us here hasn't - yet. no big deal. we'll drop while we wait.
1749            */
1750           if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
1751             continue;
1752
1753           if (drop0)
1754             continue;
1755
1756           /*
1757            * Can happen if the control-plane is programming tables
1758            * with traffic flowing; at least that's today's lame excuse.
1759            */
1760           if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
1761               || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
1762             {
1763               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
1764             }
1765           else
1766             /* Send ARP request. */
1767             {
1768               u32 bi0 = 0;
1769               vlib_buffer_t *b0;
1770               ethernet_arp_header_t *h0;
1771               vnet_hw_interface_t *hw_if0;
1772
1773               h0 =
1774                 vlib_packet_template_get_packet (vm,
1775                                                  &im->ip4_arp_request_packet_template,
1776                                                  &bi0);
1777
1778               /* Seems we're out of buffers */
1779               if (PREDICT_FALSE (!h0))
1780                 continue;
1781
1782               /* Add rewrite/encap string for ARP packet. */
1783               vnet_rewrite_one_header (adj0[0], h0,
1784                                        sizeof (ethernet_header_t));
1785
1786               hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1787
1788               /* Src ethernet address in ARP header. */
1789               clib_memcpy (h0->ip4_over_ethernet[0].ethernet,
1790                            hw_if0->hw_address,
1791                            sizeof (h0->ip4_over_ethernet[0].ethernet));
1792
1793               if (is_glean)
1794                 {
1795                   /* The interface's source address is stashed in the Glean Adj */
1796                   h0->ip4_over_ethernet[0].ip4 =
1797                     adj0->sub_type.glean.receive_addr.ip4;
1798
1799                   /* Copy in destination address we are requesting. This is the
1800                    * glean case, so it's the packet's destination.*/
1801                   h0->ip4_over_ethernet[1].ip4.data_u32 =
1802                     ip0->dst_address.data_u32;
1803                 }
1804               else
1805                 {
1806                   /* Src IP address in ARP header. */
1807                   if (ip4_src_address_for_packet (lm, sw_if_index0,
1808                                                   &h0->
1809                                                   ip4_over_ethernet[0].ip4))
1810                     {
1811                       /* No source address available */
1812                       p0->error =
1813                         node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
1814                       vlib_buffer_free (vm, &bi0, 1);
1815                       continue;
1816                     }
1817
1818                   /* Copy in destination address we are requesting from the
1819                      incomplete adj */
1820                   h0->ip4_over_ethernet[1].ip4.data_u32 =
1821                     adj0->sub_type.nbr.next_hop.ip4.as_u32;
1822                 }
1823
1824               vlib_buffer_copy_trace_flag (vm, p0, bi0);
1825               b0 = vlib_get_buffer (vm, bi0);
1826               VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
1827               vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
1828
1829               vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
1830
1831               vlib_set_next_frame_buffer (vm, node,
1832                                           adj0->rewrite_header.next_index,
1833                                           bi0);
1834             }
1835         }
1836
1837       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
1838     }
1839
1840   return frame->n_vectors;
1841 }
1842
1843 static uword
1844 ip4_arp (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1845 {
1846   return (ip4_arp_inline (vm, node, frame, 0));
1847 }
1848
1849 static uword
1850 ip4_glean (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1851 {
1852   return (ip4_arp_inline (vm, node, frame, 1));
1853 }
1854
1855 static char *ip4_arp_error_strings[] = {
1856   [IP4_ARP_ERROR_DROP] = "address overflow drops",
1857   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
1858   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
1859   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
1860   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
1861   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
1862 };
1863
1864 /* *INDENT-OFF* */
1865 VLIB_REGISTER_NODE (ip4_arp_node) =
1866 {
1867   .function = ip4_arp,
1868   .name = "ip4-arp",
1869   .vector_size = sizeof (u32),
1870   .format_trace = format_ip4_forward_next_trace,
1871   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1872   .error_strings = ip4_arp_error_strings,
1873   .n_next_nodes = IP4_ARP_N_NEXT,
1874   .next_nodes =
1875   {
1876     [IP4_ARP_NEXT_DROP] = "error-drop",
1877   },
1878 };
1879
1880 VLIB_REGISTER_NODE (ip4_glean_node) =
1881 {
1882   .function = ip4_glean,
1883   .name = "ip4-glean",
1884   .vector_size = sizeof (u32),
1885   .format_trace = format_ip4_forward_next_trace,
1886   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1887   .error_strings = ip4_arp_error_strings,
1888   .n_next_nodes = IP4_ARP_N_NEXT,
1889   .next_nodes = {
1890   [IP4_ARP_NEXT_DROP] = "error-drop",
1891   },
1892 };
1893 /* *INDENT-ON* */
1894
1895 #define foreach_notrace_ip4_arp_error           \
1896 _(DROP)                                         \
1897 _(REQUEST_SENT)                                 \
1898 _(REPLICATE_DROP)                               \
1899 _(REPLICATE_FAIL)
1900
1901 clib_error_t *
1902 arp_notrace_init (vlib_main_t * vm)
1903 {
1904   vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
1905
1906   /* don't trace ARP request packets */
1907 #define _(a)                                    \
1908     vnet_pcap_drop_trace_filter_add_del         \
1909         (rt->errors[IP4_ARP_ERROR_##a],         \
1910          1 /* is_add */);
1911   foreach_notrace_ip4_arp_error;
1912 #undef _
1913   return 0;
1914 }
1915
1916 VLIB_INIT_FUNCTION (arp_notrace_init);
1917
1918
1919 /* Send an ARP request to see if given destination is reachable on given interface. */
1920 clib_error_t *
1921 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
1922 {
1923   vnet_main_t *vnm = vnet_get_main ();
1924   ip4_main_t *im = &ip4_main;
1925   ethernet_arp_header_t *h;
1926   ip4_address_t *src;
1927   ip_interface_address_t *ia;
1928   ip_adjacency_t *adj;
1929   vnet_hw_interface_t *hi;
1930   vnet_sw_interface_t *si;
1931   vlib_buffer_t *b;
1932   adj_index_t ai;
1933   u32 bi = 0;
1934
1935   si = vnet_get_sw_interface (vnm, sw_if_index);
1936
1937   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
1938     {
1939       return clib_error_return (0, "%U: interface %U down",
1940                                 format_ip4_address, dst,
1941                                 format_vnet_sw_if_index_name, vnm,
1942                                 sw_if_index);
1943     }
1944
1945   src =
1946     ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
1947   if (!src)
1948     {
1949       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
1950       return clib_error_return
1951         (0,
1952          "no matching interface address for destination %U (interface %U)",
1953          format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
1954          sw_if_index);
1955     }
1956
1957   h = vlib_packet_template_get_packet (vm,
1958                                        &im->ip4_arp_request_packet_template,
1959                                        &bi);
1960
1961   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
1962   if (PREDICT_FALSE (!hi->hw_address))
1963     {
1964       return clib_error_return (0, "%U: interface %U do not support ip probe",
1965                                 format_ip4_address, dst,
1966                                 format_vnet_sw_if_index_name, vnm,
1967                                 sw_if_index);
1968     }
1969
1970   clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address,
1971                sizeof (h->ip4_over_ethernet[0].ethernet));
1972
1973   h->ip4_over_ethernet[0].ip4 = src[0];
1974   h->ip4_over_ethernet[1].ip4 = dst[0];
1975
1976   b = vlib_get_buffer (vm, bi);
1977   vnet_buffer (b)->sw_if_index[VLIB_RX] =
1978     vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
1979
1980   ip46_address_t nh = {
1981     .ip4 = *dst,
1982   };
1983
1984   ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4,
1985                             VNET_LINK_IP4, &nh, sw_if_index);
1986   adj = adj_get (ai);
1987
1988   /* Peer has been previously resolved, retrieve glean adj instead */
1989   if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE)
1990     {
1991       adj_unlock (ai);
1992       ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4,
1993                                   VNET_LINK_IP4, sw_if_index, &nh);
1994       adj = adj_get (ai);
1995     }
1996
1997   /* Add encapsulation string for software interface (e.g. ethernet header). */
1998   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
1999   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2000
2001   {
2002     vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2003     u32 *to_next = vlib_frame_vector_args (f);
2004     to_next[0] = bi;
2005     f->n_vectors = 1;
2006     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2007   }
2008
2009   adj_unlock (ai);
2010   return /* no error */ 0;
2011 }
2012
2013 typedef enum
2014 {
2015   IP4_REWRITE_NEXT_DROP,
2016   IP4_REWRITE_NEXT_ICMP_ERROR,
2017   IP4_REWRITE_NEXT_FRAGMENT,
2018   IP4_REWRITE_N_NEXT            /* Last */
2019 } ip4_rewrite_next_t;
2020
2021 /**
2022  * This bits of an IPv4 address to mask to construct a multicast
2023  * MAC address
2024  */
2025 #if CLIB_ARCH_IS_BIG_ENDIAN
2026 #define IP4_MCAST_ADDR_MASK 0x007fffff
2027 #else
2028 #define IP4_MCAST_ADDR_MASK 0xffff7f00
2029 #endif
2030
2031 always_inline void
2032 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
2033                u16 adj_packet_bytes, bool df, u32 * next, u32 * error)
2034 {
2035   if (packet_len > adj_packet_bytes)
2036     {
2037       *error = IP4_ERROR_MTU_EXCEEDED;
2038       if (df)
2039         {
2040           icmp4_error_set_vnet_buffer
2041             (b, ICMP4_destination_unreachable,
2042              ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
2043              adj_packet_bytes);
2044           *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2045         }
2046       else
2047         {
2048           /* IP fragmentation */
2049           ip_frag_set_vnet_buffer (b, 0, adj_packet_bytes,
2050                                    IP4_FRAG_NEXT_IP4_LOOKUP, 0);
2051           *next = IP4_REWRITE_NEXT_FRAGMENT;
2052         }
2053     }
2054 }
2055
2056 always_inline uword
2057 ip4_rewrite_inline (vlib_main_t * vm,
2058                     vlib_node_runtime_t * node,
2059                     vlib_frame_t * frame,
2060                     int do_counters, int is_midchain, int is_mcast)
2061 {
2062   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2063   u32 *from = vlib_frame_vector_args (frame);
2064   u32 n_left_from, n_left_to_next, *to_next, next_index;
2065   vlib_node_runtime_t *error_node =
2066     vlib_node_get_runtime (vm, ip4_input_node.index);
2067
2068   n_left_from = frame->n_vectors;
2069   next_index = node->cached_next_index;
2070   u32 thread_index = vlib_get_thread_index ();
2071
2072   while (n_left_from > 0)
2073     {
2074       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2075
2076       while (n_left_from >= 4 && n_left_to_next >= 2)
2077         {
2078           ip_adjacency_t *adj0, *adj1;
2079           vlib_buffer_t *p0, *p1;
2080           ip4_header_t *ip0, *ip1;
2081           u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2082           u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2083           u32 tx_sw_if_index0, tx_sw_if_index1;
2084
2085           /* Prefetch next iteration. */
2086           {
2087             vlib_buffer_t *p2, *p3;
2088
2089             p2 = vlib_get_buffer (vm, from[2]);
2090             p3 = vlib_get_buffer (vm, from[3]);
2091
2092             vlib_prefetch_buffer_header (p2, STORE);
2093             vlib_prefetch_buffer_header (p3, STORE);
2094
2095             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2096             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2097           }
2098
2099           pi0 = to_next[0] = from[0];
2100           pi1 = to_next[1] = from[1];
2101
2102           from += 2;
2103           n_left_from -= 2;
2104           to_next += 2;
2105           n_left_to_next -= 2;
2106
2107           p0 = vlib_get_buffer (vm, pi0);
2108           p1 = vlib_get_buffer (vm, pi1);
2109
2110           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2111           adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
2112
2113           /*
2114            * pre-fetch the per-adjacency counters
2115            */
2116           if (do_counters)
2117             {
2118               vlib_prefetch_combined_counter (&adjacency_counters,
2119                                               thread_index, adj_index0);
2120               vlib_prefetch_combined_counter (&adjacency_counters,
2121                                               thread_index, adj_index1);
2122             }
2123
2124           ip0 = vlib_buffer_get_current (p0);
2125           ip1 = vlib_buffer_get_current (p1);
2126
2127           error0 = error1 = IP4_ERROR_NONE;
2128           next0 = next1 = IP4_REWRITE_NEXT_DROP;
2129
2130           /* Decrement TTL & update checksum.
2131              Works either endian, so no need for byte swap. */
2132           if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
2133             {
2134               i32 ttl0 = ip0->ttl;
2135
2136               /* Input node should have reject packets with ttl 0. */
2137               ASSERT (ip0->ttl > 0);
2138
2139               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2140               checksum0 += checksum0 >= 0xffff;
2141
2142               ip0->checksum = checksum0;
2143               ttl0 -= 1;
2144               ip0->ttl = ttl0;
2145
2146               /*
2147                * If the ttl drops below 1 when forwarding, generate
2148                * an ICMP response.
2149                */
2150               if (PREDICT_FALSE (ttl0 <= 0))
2151                 {
2152                   error0 = IP4_ERROR_TIME_EXPIRED;
2153                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2154                   icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2155                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2156                                                0);
2157                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2158                 }
2159
2160               /* Verify checksum. */
2161               ASSERT ((ip0->checksum == ip4_header_checksum (ip0)) ||
2162                       (p0->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2163             }
2164           else
2165             {
2166               p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2167             }
2168           if (PREDICT_TRUE (!(p1->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
2169             {
2170               i32 ttl1 = ip1->ttl;
2171
2172               /* Input node should have reject packets with ttl 0. */
2173               ASSERT (ip1->ttl > 0);
2174
2175               checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2176               checksum1 += checksum1 >= 0xffff;
2177
2178               ip1->checksum = checksum1;
2179               ttl1 -= 1;
2180               ip1->ttl = ttl1;
2181
2182               /*
2183                * If the ttl drops below 1 when forwarding, generate
2184                * an ICMP response.
2185                */
2186               if (PREDICT_FALSE (ttl1 <= 0))
2187                 {
2188                   error1 = IP4_ERROR_TIME_EXPIRED;
2189                   vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2190                   icmp4_error_set_vnet_buffer (p1, ICMP4_time_exceeded,
2191                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2192                                                0);
2193                   next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2194                 }
2195
2196               /* Verify checksum. */
2197               ASSERT ((ip1->checksum == ip4_header_checksum (ip1)) ||
2198                       (p1->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2199             }
2200           else
2201             {
2202               p1->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2203             }
2204
2205           /* Rewrite packet header and updates lengths. */
2206           adj0 = adj_get (adj_index0);
2207           adj1 = adj_get (adj_index1);
2208
2209           /* Worth pipelining. No guarantee that adj0,1 are hot... */
2210           rw_len0 = adj0[0].rewrite_header.data_bytes;
2211           rw_len1 = adj1[0].rewrite_header.data_bytes;
2212           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2213           vnet_buffer (p1)->ip.save_rewrite_length = rw_len1;
2214
2215           /* Check MTU of outgoing interface. */
2216           ip4_mtu_check (p0, clib_net_to_host_u16 (ip0->length),
2217                          adj0[0].rewrite_header.max_l3_packet_bytes,
2218                          ip0->flags_and_fragment_offset &
2219                          clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2220                          &next0, &error0);
2221           ip4_mtu_check (p1, clib_net_to_host_u16 (ip1->length),
2222                          adj1[0].rewrite_header.max_l3_packet_bytes,
2223                          ip1->flags_and_fragment_offset &
2224                          clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2225                          &next1, &error1);
2226
2227           if (is_mcast)
2228             {
2229               error0 = ((adj0[0].rewrite_header.sw_if_index ==
2230                          vnet_buffer (p0)->sw_if_index[VLIB_RX]) ?
2231                         IP4_ERROR_SAME_INTERFACE : error0);
2232               error1 = ((adj1[0].rewrite_header.sw_if_index ==
2233                          vnet_buffer (p1)->sw_if_index[VLIB_RX]) ?
2234                         IP4_ERROR_SAME_INTERFACE : error1);
2235             }
2236
2237           p0->error = error_node->errors[error0];
2238           p1->error = error_node->errors[error1];
2239           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2240            * to see the IP headerr */
2241           if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2242             {
2243               next0 = adj0[0].rewrite_header.next_index;
2244               p0->current_data -= rw_len0;
2245               p0->current_length += rw_len0;
2246               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2247               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2248
2249               if (PREDICT_FALSE
2250                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2251                 vnet_feature_arc_start (lm->output_feature_arc_index,
2252                                         tx_sw_if_index0, &next0, p0);
2253             }
2254           if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2255             {
2256               next1 = adj1[0].rewrite_header.next_index;
2257               p1->current_data -= rw_len1;
2258               p1->current_length += rw_len1;
2259
2260               tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2261               vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2262
2263               if (PREDICT_FALSE
2264                   (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2265                 vnet_feature_arc_start (lm->output_feature_arc_index,
2266                                         tx_sw_if_index1, &next1, p1);
2267             }
2268
2269           /* Guess we are only writing on simple Ethernet header. */
2270           vnet_rewrite_two_headers (adj0[0], adj1[0],
2271                                     ip0, ip1, sizeof (ethernet_header_t));
2272
2273           /*
2274            * Bump the per-adjacency counters
2275            */
2276           if (do_counters)
2277             {
2278               vlib_increment_combined_counter
2279                 (&adjacency_counters,
2280                  thread_index,
2281                  adj_index0, 1,
2282                  vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2283
2284               vlib_increment_combined_counter
2285                 (&adjacency_counters,
2286                  thread_index,
2287                  adj_index1, 1,
2288                  vlib_buffer_length_in_chain (vm, p1) + rw_len1);
2289             }
2290
2291           if (is_midchain)
2292             {
2293               adj0->sub_type.midchain.fixup_func
2294                 (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
2295               adj1->sub_type.midchain.fixup_func
2296                 (vm, adj1, p1, adj0->sub_type.midchain.fixup_data);
2297             }
2298           if (is_mcast)
2299             {
2300               /*
2301                * copy bytes from the IP address into the MAC rewrite
2302                */
2303               vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2304                                           adj0->
2305                                           rewrite_header.dst_mcast_offset,
2306                                           &ip0->dst_address.as_u32,
2307                                           (u8 *) ip0);
2308               vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2309                                           adj0->
2310                                           rewrite_header.dst_mcast_offset,
2311                                           &ip1->dst_address.as_u32,
2312                                           (u8 *) ip1);
2313             }
2314
2315           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2316                                            to_next, n_left_to_next,
2317                                            pi0, pi1, next0, next1);
2318         }
2319
2320       while (n_left_from > 0 && n_left_to_next > 0)
2321         {
2322           ip_adjacency_t *adj0;
2323           vlib_buffer_t *p0;
2324           ip4_header_t *ip0;
2325           u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2326           u32 tx_sw_if_index0;
2327
2328           pi0 = to_next[0] = from[0];
2329
2330           p0 = vlib_get_buffer (vm, pi0);
2331
2332           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2333
2334           adj0 = adj_get (adj_index0);
2335
2336           ip0 = vlib_buffer_get_current (p0);
2337
2338           error0 = IP4_ERROR_NONE;
2339           next0 = IP4_REWRITE_NEXT_DROP;        /* drop on error */
2340
2341           /* Decrement TTL & update checksum. */
2342           if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
2343             {
2344               i32 ttl0 = ip0->ttl;
2345
2346               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2347
2348               checksum0 += checksum0 >= 0xffff;
2349
2350               ip0->checksum = checksum0;
2351
2352               ASSERT (ip0->ttl > 0);
2353
2354               ttl0 -= 1;
2355
2356               ip0->ttl = ttl0;
2357
2358               ASSERT ((ip0->checksum == ip4_header_checksum (ip0)) ||
2359                       (p0->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2360
2361               if (PREDICT_FALSE (ttl0 <= 0))
2362                 {
2363                   /*
2364                    * If the ttl drops below 1 when forwarding, generate
2365                    * an ICMP response.
2366                    */
2367                   error0 = IP4_ERROR_TIME_EXPIRED;
2368                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2369                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2370                   icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2371                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2372                                                0);
2373                 }
2374             }
2375           else
2376             {
2377               p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2378             }
2379
2380           if (do_counters)
2381             vlib_prefetch_combined_counter (&adjacency_counters,
2382                                             thread_index, adj_index0);
2383
2384           /* Guess we are only writing on simple Ethernet header. */
2385           vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2386           if (is_mcast)
2387             {
2388               /*
2389                * copy bytes from the IP address into the MAC rewrite
2390                */
2391               vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2392                                           adj0->
2393                                           rewrite_header.dst_mcast_offset,
2394                                           &ip0->dst_address.as_u32,
2395                                           (u8 *) ip0);
2396             }
2397
2398           /* Update packet buffer attributes/set output interface. */
2399           rw_len0 = adj0[0].rewrite_header.data_bytes;
2400           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2401
2402           if (do_counters)
2403             vlib_increment_combined_counter
2404               (&adjacency_counters,
2405                thread_index, adj_index0, 1,
2406                vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2407
2408           /* Check MTU of outgoing interface. */
2409           ip4_mtu_check (p0, clib_net_to_host_u16 (ip0->length),
2410                          adj0[0].rewrite_header.max_l3_packet_bytes,
2411                          ip0->flags_and_fragment_offset &
2412                          clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2413                          &next0, &error0);
2414
2415           if (is_mcast)
2416             {
2417               error0 = ((adj0[0].rewrite_header.sw_if_index ==
2418                          vnet_buffer (p0)->sw_if_index[VLIB_RX]) ?
2419                         IP4_ERROR_SAME_INTERFACE : error0);
2420             }
2421           p0->error = error_node->errors[error0];
2422
2423           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2424            * to see the IP headerr */
2425           if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2426             {
2427               p0->current_data -= rw_len0;
2428               p0->current_length += rw_len0;
2429               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2430
2431               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2432               next0 = adj0[0].rewrite_header.next_index;
2433
2434               if (is_midchain)
2435                 {
2436                   adj0->sub_type.midchain.fixup_func
2437                     (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
2438                 }
2439
2440               if (PREDICT_FALSE
2441                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2442                 vnet_feature_arc_start (lm->output_feature_arc_index,
2443                                         tx_sw_if_index0, &next0, p0);
2444
2445             }
2446
2447           from += 1;
2448           n_left_from -= 1;
2449           to_next += 1;
2450           n_left_to_next -= 1;
2451
2452           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2453                                            to_next, n_left_to_next,
2454                                            pi0, next0);
2455         }
2456
2457       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2458     }
2459
2460   /* Need to do trace after rewrites to pick up new packet data. */
2461   if (node->flags & VLIB_NODE_FLAG_TRACE)
2462     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2463
2464   return frame->n_vectors;
2465 }
2466
2467
2468 /** @brief IPv4 rewrite node.
2469     @node ip4-rewrite
2470
2471     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2472     header checksum, fetch the ip adjacency, check the outbound mtu,
2473     apply the adjacency rewrite, and send pkts to the adjacency
2474     rewrite header's rewrite_next_index.
2475
2476     @param vm vlib_main_t corresponding to the current thread
2477     @param node vlib_node_runtime_t
2478     @param frame vlib_frame_t whose contents should be dispatched
2479
2480     @par Graph mechanics: buffer metadata, next index usage
2481
2482     @em Uses:
2483     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2484         - the rewrite adjacency index
2485     - <code>adj->lookup_next_index</code>
2486         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2487           the packet will be dropped.
2488     - <code>adj->rewrite_header</code>
2489         - Rewrite string length, rewrite string, next_index
2490
2491     @em Sets:
2492     - <code>b->current_data, b->current_length</code>
2493         - Updated net of applying the rewrite string
2494
2495     <em>Next Indices:</em>
2496     - <code> adj->rewrite_header.next_index </code>
2497       or @c ip4-drop
2498 */
2499 static uword
2500 ip4_rewrite (vlib_main_t * vm,
2501              vlib_node_runtime_t * node, vlib_frame_t * frame)
2502 {
2503   if (adj_are_counters_enabled ())
2504     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2505   else
2506     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2507 }
2508
2509 static uword
2510 ip4_midchain (vlib_main_t * vm,
2511               vlib_node_runtime_t * node, vlib_frame_t * frame)
2512 {
2513   if (adj_are_counters_enabled ())
2514     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2515   else
2516     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2517 }
2518
2519 static uword
2520 ip4_rewrite_mcast (vlib_main_t * vm,
2521                    vlib_node_runtime_t * node, vlib_frame_t * frame)
2522 {
2523   if (adj_are_counters_enabled ())
2524     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2525   else
2526     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2527 }
2528
2529 static uword
2530 ip4_mcast_midchain (vlib_main_t * vm,
2531                     vlib_node_runtime_t * node, vlib_frame_t * frame)
2532 {
2533   if (adj_are_counters_enabled ())
2534     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2535   else
2536     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2537 }
2538
2539 /* *INDENT-OFF* */
2540 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2541   .function = ip4_rewrite,
2542   .name = "ip4-rewrite",
2543   .vector_size = sizeof (u32),
2544
2545   .format_trace = format_ip4_rewrite_trace,
2546
2547   .n_next_nodes = IP4_REWRITE_N_NEXT,
2548   .next_nodes = {
2549     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2550     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2551     [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2552   },
2553 };
2554 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite)
2555
2556 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2557   .function = ip4_rewrite_mcast,
2558   .name = "ip4-rewrite-mcast",
2559   .vector_size = sizeof (u32),
2560
2561   .format_trace = format_ip4_rewrite_trace,
2562   .sibling_of = "ip4-rewrite",
2563 };
2564 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_mcast_node, ip4_rewrite_mcast)
2565
2566 VLIB_REGISTER_NODE (ip4_mcast_midchain_node, static) = {
2567   .function = ip4_mcast_midchain,
2568   .name = "ip4-mcast-midchain",
2569   .vector_size = sizeof (u32),
2570
2571   .format_trace = format_ip4_rewrite_trace,
2572   .sibling_of = "ip4-rewrite",
2573 };
2574 VLIB_NODE_FUNCTION_MULTIARCH (ip4_mcast_midchain_node, ip4_mcast_midchain)
2575
2576 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2577   .function = ip4_midchain,
2578   .name = "ip4-midchain",
2579   .vector_size = sizeof (u32),
2580   .format_trace = format_ip4_forward_next_trace,
2581   .sibling_of =  "ip4-rewrite",
2582 };
2583 VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain);
2584 /* *INDENT-ON */
2585
2586 int
2587 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2588 {
2589   ip4_fib_mtrie_t *mtrie0;
2590   ip4_fib_mtrie_leaf_t leaf0;
2591   u32 lbi0;
2592
2593   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2594
2595   leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2596   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2597   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2598
2599   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2600
2601   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2602 }
2603
2604 static clib_error_t *
2605 test_lookup_command_fn (vlib_main_t * vm,
2606                         unformat_input_t * input, vlib_cli_command_t * cmd)
2607 {
2608   ip4_fib_t *fib;
2609   u32 table_id = 0;
2610   f64 count = 1;
2611   u32 n;
2612   int i;
2613   ip4_address_t ip4_base_address;
2614   u64 errors = 0;
2615
2616   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2617     {
2618       if (unformat (input, "table %d", &table_id))
2619         {
2620           /* Make sure the entry exists. */
2621           fib = ip4_fib_get (table_id);
2622           if ((fib) && (fib->index != table_id))
2623             return clib_error_return (0, "<fib-index> %d does not exist",
2624                                       table_id);
2625         }
2626       else if (unformat (input, "count %f", &count))
2627         ;
2628
2629       else if (unformat (input, "%U",
2630                          unformat_ip4_address, &ip4_base_address))
2631         ;
2632       else
2633         return clib_error_return (0, "unknown input `%U'",
2634                                   format_unformat_error, input);
2635     }
2636
2637   n = count;
2638
2639   for (i = 0; i < n; i++)
2640     {
2641       if (!ip4_lookup_validate (&ip4_base_address, table_id))
2642         errors++;
2643
2644       ip4_base_address.as_u32 =
2645         clib_host_to_net_u32 (1 +
2646                               clib_net_to_host_u32 (ip4_base_address.as_u32));
2647     }
2648
2649   if (errors)
2650     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2651   else
2652     vlib_cli_output (vm, "No errors in %d lookups\n", n);
2653
2654   return 0;
2655 }
2656
2657 /*?
2658  * Perform a lookup of an IPv4 Address (or range of addresses) in the
2659  * given FIB table to determine if there is a conflict with the
2660  * adjacency table. The fib-id can be determined by using the
2661  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2662  * of 0 is used.
2663  *
2664  * @todo This command uses fib-id, other commands use table-id (not
2665  * just a name, they are different indexes). Would like to change this
2666  * to table-id for consistency.
2667  *
2668  * @cliexpar
2669  * Example of how to run the test lookup command:
2670  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2671  * No errors in 2 lookups
2672  * @cliexend
2673 ?*/
2674 /* *INDENT-OFF* */
2675 VLIB_CLI_COMMAND (lookup_test_command, static) =
2676 {
2677   .path = "test lookup",
2678   .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2679   .function = test_lookup_command_fn,
2680 };
2681 /* *INDENT-ON* */
2682
2683 int
2684 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2685 {
2686   u32 fib_index;
2687
2688   fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
2689
2690   if (~0 == fib_index)
2691     return VNET_API_ERROR_NO_SUCH_FIB;
2692
2693   fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
2694                                   flow_hash_config);
2695
2696   return 0;
2697 }
2698
2699 static clib_error_t *
2700 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2701                              unformat_input_t * input,
2702                              vlib_cli_command_t * cmd)
2703 {
2704   int matched = 0;
2705   u32 table_id = 0;
2706   u32 flow_hash_config = 0;
2707   int rv;
2708
2709   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2710     {
2711       if (unformat (input, "table %d", &table_id))
2712         matched = 1;
2713 #define _(a,v) \
2714     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2715       foreach_flow_hash_bit
2716 #undef _
2717         else
2718         break;
2719     }
2720
2721   if (matched == 0)
2722     return clib_error_return (0, "unknown input `%U'",
2723                               format_unformat_error, input);
2724
2725   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
2726   switch (rv)
2727     {
2728     case 0:
2729       break;
2730
2731     case VNET_API_ERROR_NO_SUCH_FIB:
2732       return clib_error_return (0, "no such FIB table %d", table_id);
2733
2734     default:
2735       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2736       break;
2737     }
2738
2739   return 0;
2740 }
2741
2742 /*?
2743  * Configure the set of IPv4 fields used by the flow hash.
2744  *
2745  * @cliexpar
2746  * Example of how to set the flow hash on a given table:
2747  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2748  * Example of display the configured flow hash:
2749  * @cliexstart{show ip fib}
2750  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2751  * 0.0.0.0/0
2752  *   unicast-ip4-chain
2753  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2754  *     [0] [@0]: dpo-drop ip6
2755  * 0.0.0.0/32
2756  *   unicast-ip4-chain
2757  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2758  *     [0] [@0]: dpo-drop ip6
2759  * 224.0.0.0/8
2760  *   unicast-ip4-chain
2761  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2762  *     [0] [@0]: dpo-drop ip6
2763  * 6.0.1.2/32
2764  *   unicast-ip4-chain
2765  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2766  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2767  * 7.0.0.1/32
2768  *   unicast-ip4-chain
2769  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
2770  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2771  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2772  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2773  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2774  * 240.0.0.0/8
2775  *   unicast-ip4-chain
2776  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
2777  *     [0] [@0]: dpo-drop ip6
2778  * 255.255.255.255/32
2779  *   unicast-ip4-chain
2780  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
2781  *     [0] [@0]: dpo-drop ip6
2782  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
2783  * 0.0.0.0/0
2784  *   unicast-ip4-chain
2785  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
2786  *     [0] [@0]: dpo-drop ip6
2787  * 0.0.0.0/32
2788  *   unicast-ip4-chain
2789  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
2790  *     [0] [@0]: dpo-drop ip6
2791  * 172.16.1.0/24
2792  *   unicast-ip4-chain
2793  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
2794  *     [0] [@4]: ipv4-glean: af_packet0
2795  * 172.16.1.1/32
2796  *   unicast-ip4-chain
2797  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
2798  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
2799  * 172.16.1.2/32
2800  *   unicast-ip4-chain
2801  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2802  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
2803  * 172.16.2.0/24
2804  *   unicast-ip4-chain
2805  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
2806  *     [0] [@4]: ipv4-glean: af_packet1
2807  * 172.16.2.1/32
2808  *   unicast-ip4-chain
2809  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
2810  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
2811  * 224.0.0.0/8
2812  *   unicast-ip4-chain
2813  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
2814  *     [0] [@0]: dpo-drop ip6
2815  * 240.0.0.0/8
2816  *   unicast-ip4-chain
2817  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
2818  *     [0] [@0]: dpo-drop ip6
2819  * 255.255.255.255/32
2820  *   unicast-ip4-chain
2821  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
2822  *     [0] [@0]: dpo-drop ip6
2823  * @cliexend
2824 ?*/
2825 /* *INDENT-OFF* */
2826 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
2827 {
2828   .path = "set ip flow-hash",
2829   .short_help =
2830   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
2831   .function = set_ip_flow_hash_command_fn,
2832 };
2833 /* *INDENT-ON* */
2834
2835 int
2836 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2837                              u32 table_index)
2838 {
2839   vnet_main_t *vnm = vnet_get_main ();
2840   vnet_interface_main_t *im = &vnm->interface_main;
2841   ip4_main_t *ipm = &ip4_main;
2842   ip_lookup_main_t *lm = &ipm->lookup_main;
2843   vnet_classify_main_t *cm = &vnet_classify_main;
2844   ip4_address_t *if_addr;
2845
2846   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2847     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2848
2849   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2850     return VNET_API_ERROR_NO_SUCH_ENTRY;
2851
2852   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2853   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
2854
2855   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
2856
2857   if (NULL != if_addr)
2858     {
2859       fib_prefix_t pfx = {
2860         .fp_len = 32,
2861         .fp_proto = FIB_PROTOCOL_IP4,
2862         .fp_addr.ip4 = *if_addr,
2863       };
2864       u32 fib_index;
2865
2866       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2867                                                        sw_if_index);
2868
2869
2870       if (table_index != (u32) ~ 0)
2871         {
2872           dpo_id_t dpo = DPO_INVALID;
2873
2874           dpo_set (&dpo,
2875                    DPO_CLASSIFY,
2876                    DPO_PROTO_IP4,
2877                    classify_dpo_create (DPO_PROTO_IP4, table_index));
2878
2879           fib_table_entry_special_dpo_add (fib_index,
2880                                            &pfx,
2881                                            FIB_SOURCE_CLASSIFY,
2882                                            FIB_ENTRY_FLAG_NONE, &dpo);
2883           dpo_reset (&dpo);
2884         }
2885       else
2886         {
2887           fib_table_entry_special_remove (fib_index,
2888                                           &pfx, FIB_SOURCE_CLASSIFY);
2889         }
2890     }
2891
2892   return 0;
2893 }
2894
2895 static clib_error_t *
2896 set_ip_classify_command_fn (vlib_main_t * vm,
2897                             unformat_input_t * input,
2898                             vlib_cli_command_t * cmd)
2899 {
2900   u32 table_index = ~0;
2901   int table_index_set = 0;
2902   u32 sw_if_index = ~0;
2903   int rv;
2904
2905   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2906     {
2907       if (unformat (input, "table-index %d", &table_index))
2908         table_index_set = 1;
2909       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
2910                          vnet_get_main (), &sw_if_index))
2911         ;
2912       else
2913         break;
2914     }
2915
2916   if (table_index_set == 0)
2917     return clib_error_return (0, "classify table-index must be specified");
2918
2919   if (sw_if_index == ~0)
2920     return clib_error_return (0, "interface / subif must be specified");
2921
2922   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
2923
2924   switch (rv)
2925     {
2926     case 0:
2927       break;
2928
2929     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
2930       return clib_error_return (0, "No such interface");
2931
2932     case VNET_API_ERROR_NO_SUCH_ENTRY:
2933       return clib_error_return (0, "No such classifier table");
2934     }
2935   return 0;
2936 }
2937
2938 /*?
2939  * Assign a classification table to an interface. The classification
2940  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
2941  * commands. Once the table is create, use this command to filter packets
2942  * on an interface.
2943  *
2944  * @cliexpar
2945  * Example of how to assign a classification table to an interface:
2946  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
2947 ?*/
2948 /* *INDENT-OFF* */
2949 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
2950 {
2951     .path = "set ip classify",
2952     .short_help =
2953     "set ip classify intfc <interface> table-index <classify-idx>",
2954     .function = set_ip_classify_command_fn,
2955 };
2956 /* *INDENT-ON* */
2957
2958 static clib_error_t *
2959 ip4_config (vlib_main_t * vm, unformat_input_t * input)
2960 {
2961   ip4_main_t *im = &ip4_main;
2962   uword heapsize = 0;
2963
2964   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2965     {
2966       if (unformat (input, "heap-size %U", unformat_memory_size, &heapsize))
2967         ;
2968       else
2969         return clib_error_return (0,
2970                                   "invalid heap-size parameter `%U'",
2971                                   format_unformat_error, input);
2972     }
2973
2974   im->mtrie_heap_size = heapsize;
2975
2976   return 0;
2977 }
2978
2979 VLIB_EARLY_CONFIG_FUNCTION (ip4_config, "ip");
2980
2981 /*
2982  * fd.io coding-style-patch-verification: ON
2983  *
2984  * Local Variables:
2985  * eval: (c-set-style "gnu")
2986  * End:
2987  */