ARP: add feature arc
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h>     /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/dpo/load_balance.h>
53 #include <vnet/dpo/load_balance_map.h>
54 #include <vnet/dpo/classify_dpo.h>
55 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
56
57 #include <vnet/ip/ip4_forward.h>
58 #include <vnet/interface_output.h>
59
60 /** @brief IPv4 lookup node.
61     @node ip4-lookup
62
63     This is the main IPv4 lookup dispatch node.
64
65     @param vm vlib_main_t corresponding to the current thread
66     @param node vlib_node_runtime_t
67     @param frame vlib_frame_t whose contents should be dispatched
68
69     @par Graph mechanics: buffer metadata, next index usage
70
71     @em Uses:
72     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
73         - Indicates the @c sw_if_index value of the interface that the
74           packet was received on.
75     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
76         - When the value is @c ~0 then the node performs a longest prefix
77           match (LPM) for the packet destination address in the FIB attached
78           to the receive interface.
79         - Otherwise perform LPM for the packet destination address in the
80           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
81           value (0, 1, ...) and not a VRF id.
82
83     @em Sets:
84     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
85         - The lookup result adjacency index.
86
87     <em>Next Index:</em>
88     - Dispatches the packet to the node index found in
89       ip_adjacency_t @c adj->lookup_next_index
90       (where @c adj is the lookup result adjacency).
91 */
92 VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
93                                 vlib_frame_t * frame)
94 {
95   return ip4_lookup_inline (vm, node, frame,
96                             /* lookup_for_responses_to_locally_received_packets */
97                             0);
98
99 }
100
101 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
102
103 /* *INDENT-OFF* */
104 VLIB_REGISTER_NODE (ip4_lookup_node) =
105 {
106   .name = "ip4-lookup",
107   .vector_size = sizeof (u32),
108   .format_trace = format_ip4_lookup_trace,
109   .n_next_nodes = IP_LOOKUP_N_NEXT,
110   .next_nodes = IP4_LOOKUP_NEXT_NODES,
111 };
112 /* *INDENT-ON* */
113
114 VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
115                                       vlib_node_runtime_t * node,
116                                       vlib_frame_t * frame)
117 {
118   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
119   u32 n_left, *from;
120   u32 thread_index = vm->thread_index;
121   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
122   u16 nexts[VLIB_FRAME_SIZE], *next;
123
124   from = vlib_frame_vector_args (frame);
125   n_left = frame->n_vectors;
126   next = nexts;
127
128   vlib_get_buffers (vm, from, bufs, n_left);
129
130   while (n_left >= 4)
131     {
132       const load_balance_t *lb0, *lb1;
133       const ip4_header_t *ip0, *ip1;
134       u32 lbi0, hc0, lbi1, hc1;
135       const dpo_id_t *dpo0, *dpo1;
136
137       /* Prefetch next iteration. */
138       {
139         vlib_prefetch_buffer_header (b[2], LOAD);
140         vlib_prefetch_buffer_header (b[3], LOAD);
141
142         CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), LOAD);
143         CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), LOAD);
144       }
145
146       ip0 = vlib_buffer_get_current (b[0]);
147       ip1 = vlib_buffer_get_current (b[1]);
148       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
149       lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
150
151       lb0 = load_balance_get (lbi0);
152       lb1 = load_balance_get (lbi1);
153
154       /*
155        * this node is for via FIBs we can re-use the hash value from the
156        * to node if present.
157        * We don't want to use the same hash value at each level in the recursion
158        * graph as that would lead to polarisation
159        */
160       hc0 = hc1 = 0;
161
162       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
163         {
164           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
165             {
166               hc0 = vnet_buffer (b[0])->ip.flow_hash =
167                 vnet_buffer (b[0])->ip.flow_hash >> 1;
168             }
169           else
170             {
171               hc0 = vnet_buffer (b[0])->ip.flow_hash =
172                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
173             }
174           dpo0 = load_balance_get_fwd_bucket
175             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
176         }
177       else
178         {
179           dpo0 = load_balance_get_bucket_i (lb0, 0);
180         }
181       if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
182         {
183           if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
184             {
185               hc1 = vnet_buffer (b[1])->ip.flow_hash =
186                 vnet_buffer (b[1])->ip.flow_hash >> 1;
187             }
188           else
189             {
190               hc1 = vnet_buffer (b[1])->ip.flow_hash =
191                 ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
192             }
193           dpo1 = load_balance_get_fwd_bucket
194             (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
195         }
196       else
197         {
198           dpo1 = load_balance_get_bucket_i (lb1, 0);
199         }
200
201       next[0] = dpo0->dpoi_next_node;
202       next[1] = dpo1->dpoi_next_node;
203
204       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
205       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
206
207       vlib_increment_combined_counter
208         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
209       vlib_increment_combined_counter
210         (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
211
212       b += 2;
213       next += 2;
214       n_left -= 2;
215     }
216
217   while (n_left > 0)
218     {
219       const load_balance_t *lb0;
220       const ip4_header_t *ip0;
221       const dpo_id_t *dpo0;
222       u32 lbi0, hc0;
223
224       ip0 = vlib_buffer_get_current (b[0]);
225       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
226
227       lb0 = load_balance_get (lbi0);
228
229       hc0 = 0;
230       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
231         {
232           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
233             {
234               hc0 = vnet_buffer (b[0])->ip.flow_hash =
235                 vnet_buffer (b[0])->ip.flow_hash >> 1;
236             }
237           else
238             {
239               hc0 = vnet_buffer (b[0])->ip.flow_hash =
240                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
241             }
242           dpo0 = load_balance_get_fwd_bucket
243             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
244         }
245       else
246         {
247           dpo0 = load_balance_get_bucket_i (lb0, 0);
248         }
249
250       next[0] = dpo0->dpoi_next_node;
251       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
252
253       vlib_increment_combined_counter
254         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
255
256       b += 1;
257       next += 1;
258       n_left -= 1;
259     }
260
261   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
262   if (node->flags & VLIB_NODE_FLAG_TRACE)
263     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
264
265   return frame->n_vectors;
266 }
267
268 /* *INDENT-OFF* */
269 VLIB_REGISTER_NODE (ip4_load_balance_node) =
270 {
271   .name = "ip4-load-balance",
272   .vector_size = sizeof (u32),
273   .sibling_of = "ip4-lookup",
274   .format_trace = format_ip4_lookup_trace,
275 };
276 /* *INDENT-ON* */
277
278 #ifndef CLIB_MARCH_VARIANT
279 /* get first interface address */
280 ip4_address_t *
281 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
282                              ip_interface_address_t ** result_ia)
283 {
284   ip_lookup_main_t *lm = &im->lookup_main;
285   ip_interface_address_t *ia = 0;
286   ip4_address_t *result = 0;
287
288   /* *INDENT-OFF* */
289   foreach_ip_interface_address
290     (lm, ia, sw_if_index,
291      1 /* honor unnumbered */ ,
292      ({
293        ip4_address_t * a =
294          ip_interface_address_get_address (lm, ia);
295        result = a;
296        break;
297      }));
298   /* *INDENT-OFF* */
299   if (result_ia)
300     *result_ia = result ? ia : 0;
301   return result;
302 }
303
304 static void
305 ip4_add_subnet_bcast_route (u32 fib_index,
306                             fib_prefix_t *pfx,
307                             u32 sw_if_index)
308 {
309   vnet_sw_interface_flags_t iflags;
310
311   iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
312
313   fib_table_entry_special_remove(fib_index,
314                                  pfx,
315                                  FIB_SOURCE_INTERFACE);
316
317   if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
318     {
319       fib_table_entry_update_one_path (fib_index, pfx,
320                                        FIB_SOURCE_INTERFACE,
321                                        FIB_ENTRY_FLAG_NONE,
322                                        DPO_PROTO_IP4,
323                                        /* No next-hop address */
324                                        &ADJ_BCAST_ADDR,
325                                        sw_if_index,
326                                        // invalid FIB index
327                                        ~0,
328                                        1,
329                                        // no out-label stack
330                                        NULL,
331                                        FIB_ROUTE_PATH_FLAG_NONE);
332     }
333   else
334     {
335         fib_table_entry_special_add(fib_index,
336                                     pfx,
337                                     FIB_SOURCE_INTERFACE,
338                                     (FIB_ENTRY_FLAG_DROP |
339                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
340     }
341 }
342
343 static void
344 ip4_add_interface_routes (u32 sw_if_index,
345                           ip4_main_t * im, u32 fib_index,
346                           ip_interface_address_t * a)
347 {
348   ip_lookup_main_t *lm = &im->lookup_main;
349   ip4_address_t *address = ip_interface_address_get_address (lm, a);
350   fib_prefix_t pfx = {
351     .fp_len = a->address_length,
352     .fp_proto = FIB_PROTOCOL_IP4,
353     .fp_addr.ip4 = *address,
354   };
355
356   if (pfx.fp_len <= 30)
357     {
358       /* a /30 or shorter - add a glean for the network address */
359       fib_table_entry_update_one_path (fib_index, &pfx,
360                                        FIB_SOURCE_INTERFACE,
361                                        (FIB_ENTRY_FLAG_CONNECTED |
362                                         FIB_ENTRY_FLAG_ATTACHED),
363                                        DPO_PROTO_IP4,
364                                        /* No next-hop address */
365                                        NULL,
366                                        sw_if_index,
367                                        // invalid FIB index
368                                        ~0,
369                                        1,
370                                        // no out-label stack
371                                        NULL,
372                                        FIB_ROUTE_PATH_FLAG_NONE);
373
374       /* Add the two broadcast addresses as drop */
375       fib_prefix_t net_pfx = {
376         .fp_len = 32,
377         .fp_proto = FIB_PROTOCOL_IP4,
378         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
379       };
380       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
381         fib_table_entry_special_add(fib_index,
382                                     &net_pfx,
383                                     FIB_SOURCE_INTERFACE,
384                                     (FIB_ENTRY_FLAG_DROP |
385                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
386       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
387       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
388         ip4_add_subnet_bcast_route(fib_index, &net_pfx, sw_if_index);
389     }
390   else if (pfx.fp_len == 31)
391     {
392       u32 mask = clib_host_to_net_u32(1);
393       fib_prefix_t net_pfx = pfx;
394
395       net_pfx.fp_len = 32;
396       net_pfx.fp_addr.ip4.as_u32 ^= mask;
397
398       /* a /31 - add the other end as an attached host */
399       fib_table_entry_update_one_path (fib_index, &net_pfx,
400                                        FIB_SOURCE_INTERFACE,
401                                        (FIB_ENTRY_FLAG_ATTACHED),
402                                        DPO_PROTO_IP4,
403                                        &net_pfx.fp_addr,
404                                        sw_if_index,
405                                        // invalid FIB index
406                                        ~0,
407                                        1,
408                                        NULL,
409                                        FIB_ROUTE_PATH_FLAG_NONE);
410     }
411   pfx.fp_len = 32;
412
413   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
414     {
415       u32 classify_table_index =
416         lm->classify_table_index_by_sw_if_index[sw_if_index];
417       if (classify_table_index != (u32) ~ 0)
418         {
419           dpo_id_t dpo = DPO_INVALID;
420
421           dpo_set (&dpo,
422                    DPO_CLASSIFY,
423                    DPO_PROTO_IP4,
424                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
425
426           fib_table_entry_special_dpo_add (fib_index,
427                                            &pfx,
428                                            FIB_SOURCE_CLASSIFY,
429                                            FIB_ENTRY_FLAG_NONE, &dpo);
430           dpo_reset (&dpo);
431         }
432     }
433
434   fib_table_entry_update_one_path (fib_index, &pfx,
435                                    FIB_SOURCE_INTERFACE,
436                                    (FIB_ENTRY_FLAG_CONNECTED |
437                                     FIB_ENTRY_FLAG_LOCAL),
438                                    DPO_PROTO_IP4,
439                                    &pfx.fp_addr,
440                                    sw_if_index,
441                                    // invalid FIB index
442                                    ~0,
443                                    1, NULL,
444                                    FIB_ROUTE_PATH_FLAG_NONE);
445 }
446
447 static void
448 ip4_del_interface_routes (ip4_main_t * im,
449                           u32 fib_index,
450                           ip4_address_t * address, u32 address_length)
451 {
452   fib_prefix_t pfx = {
453     .fp_len = address_length,
454     .fp_proto = FIB_PROTOCOL_IP4,
455     .fp_addr.ip4 = *address,
456   };
457
458   if (pfx.fp_len <= 30)
459     {
460       fib_prefix_t net_pfx = {
461         .fp_len = 32,
462         .fp_proto = FIB_PROTOCOL_IP4,
463         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
464       };
465       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
466         fib_table_entry_special_remove(fib_index,
467                                        &net_pfx,
468                                        FIB_SOURCE_INTERFACE);
469       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
470       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
471         fib_table_entry_special_remove(fib_index,
472                                        &net_pfx,
473                                        FIB_SOURCE_INTERFACE);
474       fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
475     }
476     else if (pfx.fp_len == 31)
477     {
478       u32 mask = clib_host_to_net_u32(1);
479       fib_prefix_t net_pfx = pfx;
480
481       net_pfx.fp_len = 32;
482       net_pfx.fp_addr.ip4.as_u32 ^= mask;
483
484       fib_table_entry_delete (fib_index, &net_pfx, FIB_SOURCE_INTERFACE);
485     }
486
487   pfx.fp_len = 32;
488   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
489 }
490
491 void
492 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
493 {
494   ip4_main_t *im = &ip4_main;
495
496   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
497
498   /*
499    * enable/disable only on the 1<->0 transition
500    */
501   if (is_enable)
502     {
503       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
504         return;
505     }
506   else
507     {
508       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
509       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
510         return;
511     }
512   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
513                                !is_enable, 0, 0);
514
515
516   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
517                                sw_if_index, !is_enable, 0, 0);
518
519   {
520     ip4_enable_disable_interface_callback_t *cb;
521     vec_foreach (cb, im->enable_disable_interface_callbacks)
522       cb->function (im, cb->function_opaque, sw_if_index, is_enable);
523   }
524 }
525
526 static clib_error_t *
527 ip4_add_del_interface_address_internal (vlib_main_t * vm,
528                                         u32 sw_if_index,
529                                         ip4_address_t * address,
530                                         u32 address_length, u32 is_del)
531 {
532   vnet_main_t *vnm = vnet_get_main ();
533   ip4_main_t *im = &ip4_main;
534   ip_lookup_main_t *lm = &im->lookup_main;
535   clib_error_t *error = 0;
536   u32 if_address_index, elts_before;
537   ip4_address_fib_t ip4_af, *addr_fib = 0;
538
539   /* local0 interface doesn't support IP addressing  */
540   if (sw_if_index == 0)
541     {
542       return
543        clib_error_create ("local0 interface doesn't support IP addressing");
544     }
545
546   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
547   ip4_addr_fib_init (&ip4_af, address,
548                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
549   vec_add1 (addr_fib, ip4_af);
550
551   /*
552    * there is no support for adj-fib handling in the presence of overlapping
553    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
554    * most routers do.
555    */
556   /* *INDENT-OFF* */
557   if (!is_del)
558     {
559       /* When adding an address check that it does not conflict
560          with an existing address on any interface in this table. */
561       ip_interface_address_t *ia;
562       vnet_sw_interface_t *sif;
563
564       pool_foreach(sif, vnm->interface_main.sw_interfaces,
565       ({
566           if (im->fib_index_by_sw_if_index[sw_if_index] ==
567               im->fib_index_by_sw_if_index[sif->sw_if_index])
568             {
569               foreach_ip_interface_address
570                 (&im->lookup_main, ia, sif->sw_if_index,
571                  0 /* honor unnumbered */ ,
572                  ({
573                    ip4_address_t * x =
574                      ip_interface_address_get_address
575                      (&im->lookup_main, ia);
576                    if (ip4_destination_matches_route
577                        (im, address, x, ia->address_length) ||
578                        ip4_destination_matches_route (im,
579                                                       x,
580                                                       address,
581                                                       address_length))
582                      {
583                        vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
584
585                        return
586                          clib_error_create
587                          ("failed to add %U which conflicts with %U for interface %U",
588                           format_ip4_address_and_length, address,
589                           address_length,
590                           format_ip4_address_and_length, x,
591                           ia->address_length,
592                           format_vnet_sw_if_index_name, vnm,
593                           sif->sw_if_index);
594                      }
595                  }));
596             }
597       }));
598     }
599   /* *INDENT-ON* */
600
601   elts_before = pool_elts (lm->if_address_pool);
602
603   error = ip_interface_address_add_del
604     (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
605   if (error)
606     goto done;
607
608   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
609
610   if (is_del)
611     ip4_del_interface_routes (im, ip4_af.fib_index, address, address_length);
612   else
613     ip4_add_interface_routes (sw_if_index,
614                               im, ip4_af.fib_index,
615                               pool_elt_at_index
616                               (lm->if_address_pool, if_address_index));
617
618   /* If pool did not grow/shrink: add duplicate address. */
619   if (elts_before != pool_elts (lm->if_address_pool))
620     {
621       ip4_add_del_interface_address_callback_t *cb;
622       vec_foreach (cb, im->add_del_interface_address_callbacks)
623         cb->function (im, cb->function_opaque, sw_if_index,
624                       address, address_length, if_address_index, is_del);
625     }
626
627 done:
628   vec_free (addr_fib);
629   return error;
630 }
631
632 clib_error_t *
633 ip4_add_del_interface_address (vlib_main_t * vm,
634                                u32 sw_if_index,
635                                ip4_address_t * address,
636                                u32 address_length, u32 is_del)
637 {
638   return ip4_add_del_interface_address_internal
639     (vm, sw_if_index, address, address_length, is_del);
640 }
641
642 void
643 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
644 {
645   ip_interface_address_t *ia;
646   ip4_main_t *im;
647
648   im = &ip4_main;
649
650   /*
651    * when directed broadcast is enabled, the subnet braodcast route will forward
652    * packets using an adjacency with a broadcast MAC. otherwise it drops
653    */
654   /* *INDENT-OFF* */
655   foreach_ip_interface_address(&im->lookup_main, ia,
656                                sw_if_index, 0,
657      ({
658        if (ia->address_length <= 30)
659          {
660            ip4_address_t *ipa;
661
662            ipa = ip_interface_address_get_address (&im->lookup_main, ia);
663
664            fib_prefix_t pfx = {
665              .fp_len = 32,
666              .fp_proto = FIB_PROTOCOL_IP4,
667              .fp_addr = {
668                .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
669              },
670            };
671
672            ip4_add_subnet_bcast_route
673              (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
674                                                   sw_if_index),
675               &pfx, sw_if_index);
676          }
677      }));
678   /* *INDENT-ON* */
679 }
680 #endif
681
682 /* Built-in ip4 unicast rx feature path definition */
683 /* *INDENT-OFF* */
684 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
685 {
686   .arc_name = "ip4-unicast",
687   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
688   .last_in_arc = "ip4-lookup",
689   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
690 };
691
692 VNET_FEATURE_INIT (ip4_flow_classify, static) =
693 {
694   .arc_name = "ip4-unicast",
695   .node_name = "ip4-flow-classify",
696   .runs_before = VNET_FEATURES ("ip4-inacl"),
697 };
698
699 VNET_FEATURE_INIT (ip4_inacl, static) =
700 {
701   .arc_name = "ip4-unicast",
702   .node_name = "ip4-inacl",
703   .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
704 };
705
706 VNET_FEATURE_INIT (ip4_source_check_1, static) =
707 {
708   .arc_name = "ip4-unicast",
709   .node_name = "ip4-source-check-via-rx",
710   .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
711 };
712
713 VNET_FEATURE_INIT (ip4_source_check_2, static) =
714 {
715   .arc_name = "ip4-unicast",
716   .node_name = "ip4-source-check-via-any",
717   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
718 };
719
720 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
721 {
722   .arc_name = "ip4-unicast",
723   .node_name = "ip4-source-and-port-range-check-rx",
724   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
725 };
726
727 VNET_FEATURE_INIT (ip4_policer_classify, static) =
728 {
729   .arc_name = "ip4-unicast",
730   .node_name = "ip4-policer-classify",
731   .runs_before = VNET_FEATURES ("ipsec4-input-feature"),
732 };
733
734 VNET_FEATURE_INIT (ip4_ipsec, static) =
735 {
736   .arc_name = "ip4-unicast",
737   .node_name = "ipsec4-input-feature",
738   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
739 };
740
741 VNET_FEATURE_INIT (ip4_vpath, static) =
742 {
743   .arc_name = "ip4-unicast",
744   .node_name = "vpath-input-ip4",
745   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
746 };
747
748 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
749 {
750   .arc_name = "ip4-unicast",
751   .node_name = "ip4-vxlan-bypass",
752   .runs_before = VNET_FEATURES ("ip4-lookup"),
753 };
754
755 VNET_FEATURE_INIT (ip4_not_enabled, static) =
756 {
757   .arc_name = "ip4-unicast",
758   .node_name = "ip4-not-enabled",
759   .runs_before = VNET_FEATURES ("ip4-lookup"),
760 };
761
762 VNET_FEATURE_INIT (ip4_lookup, static) =
763 {
764   .arc_name = "ip4-unicast",
765   .node_name = "ip4-lookup",
766   .runs_before = 0,     /* not before any other features */
767 };
768
769 /* Built-in ip4 multicast rx feature path definition */
770 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
771 {
772   .arc_name = "ip4-multicast",
773   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
774   .last_in_arc = "ip4-mfib-forward-lookup",
775   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
776 };
777
778 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
779 {
780   .arc_name = "ip4-multicast",
781   .node_name = "vpath-input-ip4",
782   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
783 };
784
785 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
786 {
787   .arc_name = "ip4-multicast",
788   .node_name = "ip4-not-enabled",
789   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
790 };
791
792 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
793 {
794   .arc_name = "ip4-multicast",
795   .node_name = "ip4-mfib-forward-lookup",
796   .runs_before = 0,     /* last feature */
797 };
798
799 /* Source and port-range check ip4 tx feature path definition */
800 VNET_FEATURE_ARC_INIT (ip4_output, static) =
801 {
802   .arc_name = "ip4-output",
803   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
804   .last_in_arc = "interface-output",
805   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
806 };
807
808 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
809 {
810   .arc_name = "ip4-output",
811   .node_name = "ip4-source-and-port-range-check-tx",
812   .runs_before = VNET_FEATURES ("ip4-outacl"),
813 };
814
815 VNET_FEATURE_INIT (ip4_outacl, static) =
816 {
817   .arc_name = "ip4-output",
818   .node_name = "ip4-outacl",
819   .runs_before = VNET_FEATURES ("ipsec4-output-feature"),
820 };
821
822 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
823 {
824   .arc_name = "ip4-output",
825   .node_name = "ipsec4-output-feature",
826   .runs_before = VNET_FEATURES ("interface-output"),
827 };
828
829 /* Built-in ip4 tx feature path definition */
830 VNET_FEATURE_INIT (ip4_interface_output, static) =
831 {
832   .arc_name = "ip4-output",
833   .node_name = "interface-output",
834   .runs_before = 0,     /* not before any other features */
835 };
836 /* *INDENT-ON* */
837
838 static clib_error_t *
839 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
840 {
841   ip4_main_t *im = &ip4_main;
842
843   /* Fill in lookup tables with default table (0). */
844   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
845   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
846
847   if (!is_add)
848     {
849       ip4_main_t *im4 = &ip4_main;
850       ip_lookup_main_t *lm4 = &im4->lookup_main;
851       ip_interface_address_t *ia = 0;
852       ip4_address_t *address;
853       vlib_main_t *vm = vlib_get_main ();
854
855       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
856       /* *INDENT-OFF* */
857       foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
858       ({
859         address = ip_interface_address_get_address (lm4, ia);
860         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
861       }));
862       /* *INDENT-ON* */
863     }
864
865   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
866                                is_add, 0, 0);
867
868   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
869                                sw_if_index, is_add, 0, 0);
870
871   return /* no error */ 0;
872 }
873
874 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
875
876 /* Global IP4 main. */
877 #ifndef CLIB_MARCH_VARIANT
878 ip4_main_t ip4_main;
879 #endif /* CLIB_MARCH_VARIANT */
880
881 static clib_error_t *
882 ip4_lookup_init (vlib_main_t * vm)
883 {
884   ip4_main_t *im = &ip4_main;
885   clib_error_t *error;
886   uword i;
887
888   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
889     return error;
890   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
891     return (error);
892   if ((error = vlib_call_init_function (vm, fib_module_init)))
893     return error;
894   if ((error = vlib_call_init_function (vm, mfib_module_init)))
895     return error;
896
897   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
898     {
899       u32 m;
900
901       if (i < 32)
902         m = pow2_mask (i) << (32 - i);
903       else
904         m = ~0;
905       im->fib_masks[i] = clib_host_to_net_u32 (m);
906     }
907
908   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
909
910   /* Create FIB with index 0 and table id of 0. */
911   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
912                                      FIB_SOURCE_DEFAULT_ROUTE);
913   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
914                                       MFIB_SOURCE_DEFAULT_ROUTE);
915
916   {
917     pg_node_t *pn;
918     pn = pg_get_node (ip4_lookup_node.index);
919     pn->unformat_edit = unformat_pg_ip4_header;
920   }
921
922   {
923     ethernet_arp_header_t h;
924
925     clib_memset (&h, 0, sizeof (h));
926
927 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
928 #define _8(f,v) h.f = v;
929     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
930     _16 (l3_type, ETHERNET_TYPE_IP4);
931     _8 (n_l2_address_bytes, 6);
932     _8 (n_l3_address_bytes, 4);
933     _16 (opcode, ETHERNET_ARP_OPCODE_request);
934 #undef _16
935 #undef _8
936
937     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
938                                /* data */ &h,
939                                sizeof (h),
940                                /* alloc chunk size */ 8,
941                                "ip4 arp");
942   }
943
944   return error;
945 }
946
947 VLIB_INIT_FUNCTION (ip4_lookup_init);
948
949 typedef struct
950 {
951   /* Adjacency taken. */
952   u32 dpo_index;
953   u32 flow_hash;
954   u32 fib_index;
955
956   /* Packet data, possibly *after* rewrite. */
957   u8 packet_data[64 - 1 * sizeof (u32)];
958 }
959 ip4_forward_next_trace_t;
960
961 #ifndef CLIB_MARCH_VARIANT
962 u8 *
963 format_ip4_forward_next_trace (u8 * s, va_list * args)
964 {
965   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
966   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
967   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
968   u32 indent = format_get_indent (s);
969   s = format (s, "%U%U",
970               format_white_space, indent,
971               format_ip4_header, t->packet_data, sizeof (t->packet_data));
972   return s;
973 }
974 #endif
975
976 static u8 *
977 format_ip4_lookup_trace (u8 * s, va_list * args)
978 {
979   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
980   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
981   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
982   u32 indent = format_get_indent (s);
983
984   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
985               t->fib_index, t->dpo_index, t->flow_hash);
986   s = format (s, "\n%U%U",
987               format_white_space, indent,
988               format_ip4_header, t->packet_data, sizeof (t->packet_data));
989   return s;
990 }
991
992 static u8 *
993 format_ip4_rewrite_trace (u8 * s, va_list * args)
994 {
995   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
996   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
997   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
998   u32 indent = format_get_indent (s);
999
1000   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1001               t->fib_index, t->dpo_index, format_ip_adjacency,
1002               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1003   s = format (s, "\n%U%U",
1004               format_white_space, indent,
1005               format_ip_adjacency_packet_data,
1006               t->dpo_index, t->packet_data, sizeof (t->packet_data));
1007   return s;
1008 }
1009
1010 #ifndef CLIB_MARCH_VARIANT
1011 /* Common trace function for all ip4-forward next nodes. */
1012 void
1013 ip4_forward_next_trace (vlib_main_t * vm,
1014                         vlib_node_runtime_t * node,
1015                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1016 {
1017   u32 *from, n_left;
1018   ip4_main_t *im = &ip4_main;
1019
1020   n_left = frame->n_vectors;
1021   from = vlib_frame_vector_args (frame);
1022
1023   while (n_left >= 4)
1024     {
1025       u32 bi0, bi1;
1026       vlib_buffer_t *b0, *b1;
1027       ip4_forward_next_trace_t *t0, *t1;
1028
1029       /* Prefetch next iteration. */
1030       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1031       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1032
1033       bi0 = from[0];
1034       bi1 = from[1];
1035
1036       b0 = vlib_get_buffer (vm, bi0);
1037       b1 = vlib_get_buffer (vm, bi1);
1038
1039       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1040         {
1041           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1042           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1043           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1044           t0->fib_index =
1045             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1046              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1047             vec_elt (im->fib_index_by_sw_if_index,
1048                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1049
1050           clib_memcpy_fast (t0->packet_data,
1051                             vlib_buffer_get_current (b0),
1052                             sizeof (t0->packet_data));
1053         }
1054       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1055         {
1056           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1057           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1058           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1059           t1->fib_index =
1060             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1061              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1062             vec_elt (im->fib_index_by_sw_if_index,
1063                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1064           clib_memcpy_fast (t1->packet_data, vlib_buffer_get_current (b1),
1065                             sizeof (t1->packet_data));
1066         }
1067       from += 2;
1068       n_left -= 2;
1069     }
1070
1071   while (n_left >= 1)
1072     {
1073       u32 bi0;
1074       vlib_buffer_t *b0;
1075       ip4_forward_next_trace_t *t0;
1076
1077       bi0 = from[0];
1078
1079       b0 = vlib_get_buffer (vm, bi0);
1080
1081       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1082         {
1083           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1084           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1085           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1086           t0->fib_index =
1087             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1088              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1089             vec_elt (im->fib_index_by_sw_if_index,
1090                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1091           clib_memcpy_fast (t0->packet_data, vlib_buffer_get_current (b0),
1092                             sizeof (t0->packet_data));
1093         }
1094       from += 1;
1095       n_left -= 1;
1096     }
1097 }
1098
1099 /* Compute TCP/UDP/ICMP4 checksum in software. */
1100 u16
1101 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1102                               ip4_header_t * ip0)
1103 {
1104   ip_csum_t sum0;
1105   u32 ip_header_length, payload_length_host_byte_order;
1106   u32 n_this_buffer, n_bytes_left, n_ip_bytes_this_buffer;
1107   u16 sum16;
1108   void *data_this_buffer;
1109
1110   /* Initialize checksum with ip header. */
1111   ip_header_length = ip4_header_bytes (ip0);
1112   payload_length_host_byte_order =
1113     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1114   sum0 =
1115     clib_host_to_net_u32 (payload_length_host_byte_order +
1116                           (ip0->protocol << 16));
1117
1118   if (BITS (uword) == 32)
1119     {
1120       sum0 =
1121         ip_csum_with_carry (sum0,
1122                             clib_mem_unaligned (&ip0->src_address, u32));
1123       sum0 =
1124         ip_csum_with_carry (sum0,
1125                             clib_mem_unaligned (&ip0->dst_address, u32));
1126     }
1127   else
1128     sum0 =
1129       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1130
1131   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1132   data_this_buffer = (void *) ip0 + ip_header_length;
1133   n_ip_bytes_this_buffer =
1134     p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data);
1135   if (n_this_buffer + ip_header_length > n_ip_bytes_this_buffer)
1136     {
1137       n_this_buffer = n_ip_bytes_this_buffer > ip_header_length ?
1138         n_ip_bytes_this_buffer - ip_header_length : 0;
1139     }
1140   while (1)
1141     {
1142       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1143       n_bytes_left -= n_this_buffer;
1144       if (n_bytes_left == 0)
1145         break;
1146
1147       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1148       p0 = vlib_get_buffer (vm, p0->next_buffer);
1149       data_this_buffer = vlib_buffer_get_current (p0);
1150       n_this_buffer = clib_min (p0->current_length, n_bytes_left);
1151     }
1152
1153   sum16 = ~ip_csum_fold (sum0);
1154
1155   return sum16;
1156 }
1157
1158 u32
1159 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1160 {
1161   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1162   udp_header_t *udp0;
1163   u16 sum16;
1164
1165   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1166           || ip0->protocol == IP_PROTOCOL_UDP);
1167
1168   udp0 = (void *) (ip0 + 1);
1169   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1170     {
1171       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1172                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1173       return p0->flags;
1174     }
1175
1176   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1177
1178   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1179                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1180
1181   return p0->flags;
1182 }
1183 #endif
1184
1185 /* *INDENT-OFF* */
1186 VNET_FEATURE_ARC_INIT (ip4_local) =
1187 {
1188   .arc_name  = "ip4-local",
1189   .start_nodes = VNET_FEATURES ("ip4-local"),
1190   .last_in_arc = "ip4-local-end-of-arc",
1191 };
1192 /* *INDENT-ON* */
1193
1194 static inline void
1195 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1196                             ip4_header_t * ip, u8 is_udp, u8 * error,
1197                             u8 * good_tcp_udp)
1198 {
1199   u32 flags0;
1200   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1201   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1202   if (is_udp)
1203     {
1204       udp_header_t *udp;
1205       u32 ip_len, udp_len;
1206       i32 len_diff;
1207       udp = ip4_next_header (ip);
1208       /* Verify UDP length. */
1209       ip_len = clib_net_to_host_u16 (ip->length);
1210       udp_len = clib_net_to_host_u16 (udp->length);
1211
1212       len_diff = ip_len - udp_len;
1213       *good_tcp_udp &= len_diff >= 0;
1214       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1215     }
1216 }
1217
1218 #define ip4_local_csum_is_offloaded(_b)                                 \
1219     _b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM                         \
1220         || _b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM
1221
1222 #define ip4_local_need_csum_check(is_tcp_udp, _b)                       \
1223     (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED     \
1224         || ip4_local_csum_is_offloaded (_b)))
1225
1226 #define ip4_local_csum_is_valid(_b)                                     \
1227     (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT                      \
1228         || (ip4_local_csum_is_offloaded (_b))) != 0
1229
1230 static inline void
1231 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1232                          ip4_header_t * ih, u8 * error)
1233 {
1234   u8 is_udp, is_tcp_udp, good_tcp_udp;
1235
1236   is_udp = ih->protocol == IP_PROTOCOL_UDP;
1237   is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1238
1239   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1240     ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1241   else
1242     good_tcp_udp = ip4_local_csum_is_valid (b);
1243
1244   ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1245   *error = (is_tcp_udp && !good_tcp_udp
1246             ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1247 }
1248
1249 static inline void
1250 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1251                             ip4_header_t ** ih, u8 * error)
1252 {
1253   u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1254
1255   is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1256   is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1257
1258   is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1259   is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1260
1261   good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1262   good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1263
1264   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1265                      || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1266     {
1267       if (is_tcp_udp[0])
1268         ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1269                                     &good_tcp_udp[0]);
1270       if (is_tcp_udp[1])
1271         ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1272                                     &good_tcp_udp[1]);
1273     }
1274
1275   error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1276               IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1277   error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1278               IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1279 }
1280
1281 static inline void
1282 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1283                               vlib_buffer_t * b, u16 * next, u8 error,
1284                               u8 head_of_feature_arc)
1285 {
1286   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1287   u32 next_index;
1288
1289   *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1290   b->error = error ? error_node->errors[error] : 0;
1291   if (head_of_feature_arc)
1292     {
1293       next_index = *next;
1294       if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1295         {
1296           vnet_feature_arc_start (arc_index,
1297                                   vnet_buffer (b)->sw_if_index[VLIB_RX],
1298                                   &next_index, b);
1299           *next = next_index;
1300         }
1301     }
1302 }
1303
1304 typedef struct
1305 {
1306   ip4_address_t src;
1307   u32 lbi;
1308   u8 error;
1309   u8 first;
1310 } ip4_local_last_check_t;
1311
1312 static inline void
1313 ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
1314                      ip4_local_last_check_t * last_check, u8 * error0)
1315 {
1316   ip4_fib_mtrie_leaf_t leaf0;
1317   ip4_fib_mtrie_t *mtrie0;
1318   const dpo_id_t *dpo0;
1319   load_balance_t *lb0;
1320   u32 lbi0;
1321
1322   vnet_buffer (b)->ip.fib_index =
1323     vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1324     vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1325
1326   if (PREDICT_FALSE (last_check->first ||
1327                      (last_check->src.as_u32 != ip0->src_address.as_u32)))
1328     {
1329       mtrie0 = &ip4_fib_get (vnet_buffer (b)->ip.fib_index)->mtrie;
1330       leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1331       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1332       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1333       lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1334
1335       vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1336       vnet_buffer (b)->ip.adj_index[VLIB_RX] = lbi0;
1337
1338       lb0 = load_balance_get (lbi0);
1339       dpo0 = load_balance_get_bucket_i (lb0, 0);
1340
1341       /*
1342        * Must have a route to source otherwise we drop the packet.
1343        * ip4 broadcasts are accepted, e.g. to make dhcp client work
1344        *
1345        * The checks are:
1346        *  - the source is a recieve => it's from us => bogus, do this
1347        *    first since it sets a different error code.
1348        *  - uRPF check for any route to source - accept if passes.
1349        *  - allow packets destined to the broadcast address from unknown sources
1350        */
1351
1352       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1353                   && dpo0->dpoi_type == DPO_RECEIVE) ?
1354                  IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1355       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1356                   && !fib_urpf_check_size (lb0->lb_urpf)
1357                   && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1358                  IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1359
1360       last_check->src.as_u32 = ip0->src_address.as_u32;
1361       last_check->lbi = lbi0;
1362       last_check->error = *error0;
1363     }
1364   else
1365     {
1366       vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1367       vnet_buffer (b)->ip.adj_index[VLIB_RX] = last_check->lbi;
1368       *error0 = last_check->error;
1369       last_check->first = 0;
1370     }
1371 }
1372
1373 static inline void
1374 ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
1375                         ip4_local_last_check_t * last_check, u8 * error)
1376 {
1377   ip4_fib_mtrie_leaf_t leaf[2];
1378   ip4_fib_mtrie_t *mtrie[2];
1379   const dpo_id_t *dpo[2];
1380   load_balance_t *lb[2];
1381   u32 not_last_hit;
1382   u32 lbi[2];
1383
1384   not_last_hit = last_check->first;
1385   not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1386   not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1387
1388   vnet_buffer (b[0])->ip.fib_index =
1389     vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1390     vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1391     vnet_buffer (b[0])->ip.fib_index;
1392
1393   vnet_buffer (b[1])->ip.fib_index =
1394     vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1395     vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1396     vnet_buffer (b[1])->ip.fib_index;
1397
1398   if (PREDICT_FALSE (not_last_hit))
1399     {
1400       mtrie[0] = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie;
1401       mtrie[1] = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie;
1402
1403       leaf[0] = ip4_fib_mtrie_lookup_step_one (mtrie[0], &ip[0]->src_address);
1404       leaf[1] = ip4_fib_mtrie_lookup_step_one (mtrie[1], &ip[1]->src_address);
1405
1406       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1407                                            &ip[0]->src_address, 2);
1408       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1409                                            &ip[1]->src_address, 2);
1410
1411       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1412                                            &ip[0]->src_address, 3);
1413       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1414                                            &ip[1]->src_address, 3);
1415
1416       lbi[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf[0]);
1417       lbi[1] = ip4_fib_mtrie_leaf_get_adj_index (leaf[1]);
1418
1419       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1420       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] = lbi[0];
1421
1422       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1423       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] = lbi[1];
1424
1425       lb[0] = load_balance_get (lbi[0]);
1426       lb[1] = load_balance_get (lbi[1]);
1427
1428       dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1429       dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1430
1431       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1432                    dpo[0]->dpoi_type == DPO_RECEIVE) ?
1433                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1434       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1435                    !fib_urpf_check_size (lb[0]->lb_urpf) &&
1436                    ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1437                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1438
1439       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1440                    dpo[1]->dpoi_type == DPO_RECEIVE) ?
1441                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1442       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1443                    !fib_urpf_check_size (lb[1]->lb_urpf) &&
1444                    ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1445                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1446
1447       last_check->src.as_u32 = ip[1]->src_address.as_u32;
1448       last_check->lbi = lbi[1];
1449       last_check->error = error[1];
1450     }
1451   else
1452     {
1453       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1454       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] = last_check->lbi;
1455
1456       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1457       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] = last_check->lbi;
1458
1459       error[0] = last_check->error;
1460       error[1] = last_check->error;
1461       last_check->first = 0;
1462     }
1463 }
1464
1465 enum ip_local_packet_type_e
1466 {
1467   IP_LOCAL_PACKET_TYPE_L4,
1468   IP_LOCAL_PACKET_TYPE_NAT,
1469   IP_LOCAL_PACKET_TYPE_FRAG,
1470 };
1471
1472 /**
1473  * Determine packet type and next node.
1474  *
1475  * The expectation is that all packets that are not L4 will skip
1476  * checksums and source checks.
1477  */
1478 always_inline u8
1479 ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
1480 {
1481   ip_lookup_main_t *lm = &ip4_main.lookup_main;
1482
1483   if (PREDICT_FALSE (ip4_is_fragment (ip)))
1484     {
1485       *next = IP_LOCAL_NEXT_REASSEMBLY;
1486       return IP_LOCAL_PACKET_TYPE_FRAG;
1487     }
1488   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_IS_NATED))
1489     {
1490       *next = lm->local_next_by_ip_protocol[ip->protocol];
1491       return IP_LOCAL_PACKET_TYPE_NAT;
1492     }
1493
1494   *next = lm->local_next_by_ip_protocol[ip->protocol];
1495   return IP_LOCAL_PACKET_TYPE_L4;
1496 }
1497
1498 static inline uword
1499 ip4_local_inline (vlib_main_t * vm,
1500                   vlib_node_runtime_t * node,
1501                   vlib_frame_t * frame, int head_of_feature_arc)
1502 {
1503   u32 *from, n_left_from;
1504   vlib_node_runtime_t *error_node =
1505     vlib_node_get_runtime (vm, ip4_input_node.index);
1506   u16 nexts[VLIB_FRAME_SIZE], *next;
1507   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1508   ip4_header_t *ip[2];
1509   u8 error[2], pt[2];
1510
1511   ip4_local_last_check_t last_check = {
1512     /*
1513      * 0.0.0.0 can appear as the source address of an IP packet,
1514      * as can any other address, hence the need to use the 'first'
1515      * member to make sure the .lbi is initialised for the first
1516      * packet.
1517      */
1518     .src = {.as_u32 = 0},
1519     .lbi = ~0,
1520     .error = IP4_ERROR_UNKNOWN_PROTOCOL,
1521     .first = 1,
1522   };
1523
1524   from = vlib_frame_vector_args (frame);
1525   n_left_from = frame->n_vectors;
1526
1527   if (node->flags & VLIB_NODE_FLAG_TRACE)
1528     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1529
1530   vlib_get_buffers (vm, from, bufs, n_left_from);
1531   b = bufs;
1532   next = nexts;
1533
1534   while (n_left_from >= 6)
1535     {
1536       u8 not_batch = 0;
1537
1538       /* Prefetch next iteration. */
1539       {
1540         vlib_prefetch_buffer_header (b[4], LOAD);
1541         vlib_prefetch_buffer_header (b[5], LOAD);
1542
1543         CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1544         CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1545       }
1546
1547       error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1548
1549       ip[0] = vlib_buffer_get_current (b[0]);
1550       ip[1] = vlib_buffer_get_current (b[1]);
1551
1552       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1553       vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1554
1555       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1556       pt[1] = ip4_local_classify (b[1], ip[1], &next[1]);
1557
1558       not_batch = pt[0] ^ pt[1];
1559
1560       if (head_of_feature_arc == 0 || (pt[0] && not_batch == 0))
1561         goto skip_checks;
1562
1563       if (PREDICT_TRUE (not_batch == 0))
1564         {
1565           ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1566           ip4_local_check_src_x2 (b, ip, &last_check, error);
1567         }
1568       else
1569         {
1570           if (!pt[0])
1571             {
1572               ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1573               ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1574             }
1575           if (!pt[1])
1576             {
1577               ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1578               ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
1579             }
1580         }
1581
1582     skip_checks:
1583
1584       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1585                                     head_of_feature_arc);
1586       ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1587                                     head_of_feature_arc);
1588
1589       b += 2;
1590       next += 2;
1591       n_left_from -= 2;
1592     }
1593
1594   while (n_left_from > 0)
1595     {
1596       error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1597
1598       ip[0] = vlib_buffer_get_current (b[0]);
1599       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1600       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1601
1602       if (head_of_feature_arc == 0 || pt[0])
1603         goto skip_check;
1604
1605       ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1606       ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1607
1608     skip_check:
1609
1610       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1611                                     head_of_feature_arc);
1612
1613       b += 1;
1614       next += 1;
1615       n_left_from -= 1;
1616     }
1617
1618   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1619   return frame->n_vectors;
1620 }
1621
1622 VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1623                                vlib_frame_t * frame)
1624 {
1625   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1626 }
1627
1628 /* *INDENT-OFF* */
1629 VLIB_REGISTER_NODE (ip4_local_node) =
1630 {
1631   .name = "ip4-local",
1632   .vector_size = sizeof (u32),
1633   .format_trace = format_ip4_forward_next_trace,
1634   .n_next_nodes = IP_LOCAL_N_NEXT,
1635   .next_nodes =
1636   {
1637     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1638     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1639     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1640     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1641     [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-reassembly",
1642   },
1643 };
1644 /* *INDENT-ON* */
1645
1646
1647 VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
1648                                           vlib_node_runtime_t * node,
1649                                           vlib_frame_t * frame)
1650 {
1651   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1652 }
1653
1654 /* *INDENT-OFF* */
1655 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
1656   .name = "ip4-local-end-of-arc",
1657   .vector_size = sizeof (u32),
1658
1659   .format_trace = format_ip4_forward_next_trace,
1660   .sibling_of = "ip4-local",
1661 };
1662
1663 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1664   .arc_name = "ip4-local",
1665   .node_name = "ip4-local-end-of-arc",
1666   .runs_before = 0, /* not before any other features */
1667 };
1668 /* *INDENT-ON* */
1669
1670 #ifndef CLIB_MARCH_VARIANT
1671 void
1672 ip4_register_protocol (u32 protocol, u32 node_index)
1673 {
1674   vlib_main_t *vm = vlib_get_main ();
1675   ip4_main_t *im = &ip4_main;
1676   ip_lookup_main_t *lm = &im->lookup_main;
1677
1678   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1679   lm->local_next_by_ip_protocol[protocol] =
1680     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1681 }
1682 #endif
1683
1684 static clib_error_t *
1685 show_ip_local_command_fn (vlib_main_t * vm,
1686                           unformat_input_t * input, vlib_cli_command_t * cmd)
1687 {
1688   ip4_main_t *im = &ip4_main;
1689   ip_lookup_main_t *lm = &im->lookup_main;
1690   int i;
1691
1692   vlib_cli_output (vm, "Protocols handled by ip4_local");
1693   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1694     {
1695       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1696         {
1697           u32 node_index = vlib_get_node (vm,
1698                                           ip4_local_node.index)->
1699             next_nodes[lm->local_next_by_ip_protocol[i]];
1700           vlib_cli_output (vm, "%d: %U", i, format_vlib_node_name, vm,
1701                            node_index);
1702         }
1703     }
1704   return 0;
1705 }
1706
1707
1708
1709 /*?
1710  * Display the set of protocols handled by the local IPv4 stack.
1711  *
1712  * @cliexpar
1713  * Example of how to display local protocol table:
1714  * @cliexstart{show ip local}
1715  * Protocols handled by ip4_local
1716  * 1
1717  * 17
1718  * 47
1719  * @cliexend
1720 ?*/
1721 /* *INDENT-OFF* */
1722 VLIB_CLI_COMMAND (show_ip_local, static) =
1723 {
1724   .path = "show ip local",
1725   .function = show_ip_local_command_fn,
1726   .short_help = "show ip local",
1727 };
1728 /* *INDENT-ON* */
1729
1730 always_inline uword
1731 ip4_arp_inline (vlib_main_t * vm,
1732                 vlib_node_runtime_t * node,
1733                 vlib_frame_t * frame, int is_glean)
1734 {
1735   vnet_main_t *vnm = vnet_get_main ();
1736   ip4_main_t *im = &ip4_main;
1737   ip_lookup_main_t *lm = &im->lookup_main;
1738   u32 *from, *to_next_drop;
1739   uword n_left_from, n_left_to_next_drop, next_index;
1740   u32 thread_index = vm->thread_index;
1741   u64 seed;
1742
1743   if (node->flags & VLIB_NODE_FLAG_TRACE)
1744     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1745
1746   seed = throttle_seed (&im->arp_throttle, thread_index, vlib_time_now (vm));
1747
1748   from = vlib_frame_vector_args (frame);
1749   n_left_from = frame->n_vectors;
1750   next_index = node->cached_next_index;
1751   if (next_index == IP4_ARP_NEXT_DROP)
1752     next_index = IP4_ARP_N_NEXT;        /* point to first interface */
1753
1754   while (n_left_from > 0)
1755     {
1756       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1757                            to_next_drop, n_left_to_next_drop);
1758
1759       while (n_left_from > 0 && n_left_to_next_drop > 0)
1760         {
1761           u32 pi0, bi0, adj_index0, sw_if_index0;
1762           ip_adjacency_t *adj0;
1763           vlib_buffer_t *p0, *b0;
1764           ip4_address_t resolve0;
1765           ethernet_arp_header_t *h0;
1766           vnet_hw_interface_t *hw_if0;
1767           u64 r0;
1768
1769           pi0 = from[0];
1770           p0 = vlib_get_buffer (vm, pi0);
1771
1772           from += 1;
1773           n_left_from -= 1;
1774           to_next_drop[0] = pi0;
1775           to_next_drop += 1;
1776           n_left_to_next_drop -= 1;
1777
1778           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1779           adj0 = adj_get (adj_index0);
1780
1781           if (is_glean)
1782             {
1783               /* resolve the packet's destination */
1784               ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1785               resolve0 = ip0->dst_address;
1786             }
1787           else
1788             {
1789               /* resolve the incomplete adj */
1790               resolve0 = adj0->sub_type.nbr.next_hop.ip4;
1791             }
1792
1793           /* combine the address and interface for the hash key */
1794           sw_if_index0 = adj0->rewrite_header.sw_if_index;
1795           r0 = (u64) resolve0.data_u32 << 32;
1796           r0 |= sw_if_index0;
1797
1798           if (throttle_check (&im->arp_throttle, thread_index, r0, seed))
1799             {
1800               p0->error = node->errors[IP4_ARP_ERROR_THROTTLED];
1801               continue;
1802             }
1803
1804           /*
1805            * the adj has been updated to a rewrite but the node the DPO that got
1806            * us here hasn't - yet. no big deal. we'll drop while we wait.
1807            */
1808           if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
1809             {
1810               p0->error = node->errors[IP4_ARP_ERROR_RESOLVED];
1811               continue;
1812             }
1813
1814           /*
1815            * Can happen if the control-plane is programming tables
1816            * with traffic flowing; at least that's today's lame excuse.
1817            */
1818           if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
1819               || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
1820             {
1821               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
1822               continue;
1823             }
1824           /* Send ARP request. */
1825           h0 =
1826             vlib_packet_template_get_packet (vm,
1827                                              &im->ip4_arp_request_packet_template,
1828                                              &bi0);
1829           /* Seems we're out of buffers */
1830           if (PREDICT_FALSE (!h0))
1831             {
1832               p0->error = node->errors[IP4_ARP_ERROR_NO_BUFFERS];
1833               continue;
1834             }
1835
1836           b0 = vlib_get_buffer (vm, bi0);
1837
1838           /* copy the persistent fields from the original */
1839           clib_memcpy_fast (b0->opaque2, p0->opaque2, sizeof (p0->opaque2));
1840
1841           /* Add rewrite/encap string for ARP packet. */
1842           vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
1843
1844           hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1845
1846           /* Src ethernet address in ARP header. */
1847           mac_address_from_bytes (&h0->ip4_over_ethernet[0].mac,
1848                                   hw_if0->hw_address);
1849           if (is_glean)
1850             {
1851               /* The interface's source address is stashed in the Glean Adj */
1852               h0->ip4_over_ethernet[0].ip4 =
1853                 adj0->sub_type.glean.receive_addr.ip4;
1854             }
1855           else
1856             {
1857               /* Src IP address in ARP header. */
1858               if (ip4_src_address_for_packet (lm, sw_if_index0,
1859                                               &h0->ip4_over_ethernet[0].ip4))
1860                 {
1861                   /* No source address available */
1862                   p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
1863                   vlib_buffer_free (vm, &bi0, 1);
1864                   continue;
1865                 }
1866             }
1867           h0->ip4_over_ethernet[1].ip4 = resolve0;
1868
1869           p0->error = node->errors[IP4_ARP_ERROR_REQUEST_SENT];
1870
1871           vlib_buffer_copy_trace_flag (vm, p0, bi0);
1872           VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
1873           vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
1874
1875           vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
1876
1877           vlib_set_next_frame_buffer (vm, node,
1878                                       adj0->rewrite_header.next_index, bi0);
1879         }
1880
1881       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
1882     }
1883
1884   return frame->n_vectors;
1885 }
1886
1887 VLIB_NODE_FN (ip4_arp_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1888                              vlib_frame_t * frame)
1889 {
1890   return (ip4_arp_inline (vm, node, frame, 0));
1891 }
1892
1893 VLIB_NODE_FN (ip4_glean_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1894                                vlib_frame_t * frame)
1895 {
1896   return (ip4_arp_inline (vm, node, frame, 1));
1897 }
1898
1899 static char *ip4_arp_error_strings[] = {
1900   [IP4_ARP_ERROR_THROTTLED] = "ARP requests throttled",
1901   [IP4_ARP_ERROR_RESOLVED] = "ARP requests resolved",
1902   [IP4_ARP_ERROR_NO_BUFFERS] = "ARP requests out of buffer",
1903   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
1904   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
1905   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
1906 };
1907
1908 /* *INDENT-OFF* */
1909 VLIB_REGISTER_NODE (ip4_arp_node) =
1910 {
1911   .name = "ip4-arp",
1912   .vector_size = sizeof (u32),
1913   .format_trace = format_ip4_forward_next_trace,
1914   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1915   .error_strings = ip4_arp_error_strings,
1916   .n_next_nodes = IP4_ARP_N_NEXT,
1917   .next_nodes =
1918   {
1919     [IP4_ARP_NEXT_DROP] = "error-drop",
1920   },
1921 };
1922
1923 VLIB_REGISTER_NODE (ip4_glean_node) =
1924 {
1925   .name = "ip4-glean",
1926   .vector_size = sizeof (u32),
1927   .format_trace = format_ip4_forward_next_trace,
1928   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1929   .error_strings = ip4_arp_error_strings,
1930   .n_next_nodes = IP4_ARP_N_NEXT,
1931   .next_nodes = {
1932   [IP4_ARP_NEXT_DROP] = "error-drop",
1933   },
1934 };
1935 /* *INDENT-ON* */
1936
1937 #define foreach_notrace_ip4_arp_error           \
1938 _(THROTTLED)                                    \
1939 _(RESOLVED)                                     \
1940 _(NO_BUFFERS)                                   \
1941 _(REQUEST_SENT)                                 \
1942 _(NON_ARP_ADJ)                                  \
1943 _(NO_SOURCE_ADDRESS)
1944
1945 static clib_error_t *
1946 arp_notrace_init (vlib_main_t * vm)
1947 {
1948   vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
1949
1950   /* don't trace ARP request packets */
1951 #define _(a)                                    \
1952     vnet_pcap_drop_trace_filter_add_del         \
1953         (rt->errors[IP4_ARP_ERROR_##a],         \
1954          1 /* is_add */);
1955   foreach_notrace_ip4_arp_error;
1956 #undef _
1957   return 0;
1958 }
1959
1960 VLIB_INIT_FUNCTION (arp_notrace_init);
1961
1962
1963 #ifndef CLIB_MARCH_VARIANT
1964 /* Send an ARP request to see if given destination is reachable on given interface. */
1965 clib_error_t *
1966 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index,
1967                     u8 refresh)
1968 {
1969   vnet_main_t *vnm = vnet_get_main ();
1970   ip4_main_t *im = &ip4_main;
1971   ethernet_arp_header_t *h;
1972   ip4_address_t *src;
1973   ip_interface_address_t *ia;
1974   ip_adjacency_t *adj;
1975   vnet_hw_interface_t *hi;
1976   vnet_sw_interface_t *si;
1977   vlib_buffer_t *b;
1978   adj_index_t ai;
1979   u32 bi = 0;
1980   u8 unicast_rewrite = 0;
1981
1982   si = vnet_get_sw_interface (vnm, sw_if_index);
1983
1984   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
1985     {
1986       return clib_error_return (0, "%U: interface %U down",
1987                                 format_ip4_address, dst,
1988                                 format_vnet_sw_if_index_name, vnm,
1989                                 sw_if_index);
1990     }
1991
1992   src =
1993     ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
1994   if (!src)
1995     {
1996       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
1997       return clib_error_return
1998         (0,
1999          "no matching interface address for destination %U (interface %U)",
2000          format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
2001          sw_if_index);
2002     }
2003
2004   h = vlib_packet_template_get_packet (vm,
2005                                        &im->ip4_arp_request_packet_template,
2006                                        &bi);
2007
2008   if (!h)
2009     return clib_error_return (0, "ARP request packet allocation failed");
2010
2011   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2012   if (PREDICT_FALSE (!hi->hw_address))
2013     {
2014       return clib_error_return (0, "%U: interface %U do not support ip probe",
2015                                 format_ip4_address, dst,
2016                                 format_vnet_sw_if_index_name, vnm,
2017                                 sw_if_index);
2018     }
2019
2020   mac_address_from_bytes (&h->ip4_over_ethernet[0].mac, hi->hw_address);
2021
2022   h->ip4_over_ethernet[0].ip4 = src[0];
2023   h->ip4_over_ethernet[1].ip4 = dst[0];
2024
2025   b = vlib_get_buffer (vm, bi);
2026   vnet_buffer (b)->sw_if_index[VLIB_RX] =
2027     vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2028
2029   ip46_address_t nh = {
2030     .ip4 = *dst,
2031   };
2032
2033   ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4,
2034                             VNET_LINK_IP4, &nh, sw_if_index);
2035   adj = adj_get (ai);
2036
2037   /* Peer has been previously resolved, retrieve glean adj instead */
2038   if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE)
2039     {
2040       if (refresh)
2041         unicast_rewrite = 1;
2042       else
2043         {
2044           adj_unlock (ai);
2045           ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4,
2046                                       VNET_LINK_IP4, sw_if_index, &nh);
2047           adj = adj_get (ai);
2048         }
2049     }
2050
2051   /* Add encapsulation string for software interface (e.g. ethernet header). */
2052   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2053   if (unicast_rewrite)
2054     {
2055       u16 *etype = vlib_buffer_get_current (b) - 2;
2056       etype[0] = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
2057     }
2058   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2059
2060   {
2061     vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2062     u32 *to_next = vlib_frame_vector_args (f);
2063     to_next[0] = bi;
2064     f->n_vectors = 1;
2065     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2066   }
2067
2068   adj_unlock (ai);
2069   return /* no error */ 0;
2070 }
2071 #endif
2072
2073 typedef enum
2074 {
2075   IP4_REWRITE_NEXT_DROP,
2076   IP4_REWRITE_NEXT_ICMP_ERROR,
2077   IP4_REWRITE_NEXT_FRAGMENT,
2078   IP4_REWRITE_N_NEXT            /* Last */
2079 } ip4_rewrite_next_t;
2080
2081 /**
2082  * This bits of an IPv4 address to mask to construct a multicast
2083  * MAC address
2084  */
2085 #if CLIB_ARCH_IS_BIG_ENDIAN
2086 #define IP4_MCAST_ADDR_MASK 0x007fffff
2087 #else
2088 #define IP4_MCAST_ADDR_MASK 0xffff7f00
2089 #endif
2090
2091 always_inline void
2092 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
2093                u16 adj_packet_bytes, bool df, u16 * next, u32 * error)
2094 {
2095   if (packet_len > adj_packet_bytes)
2096     {
2097       *error = IP4_ERROR_MTU_EXCEEDED;
2098       if (df)
2099         {
2100           icmp4_error_set_vnet_buffer
2101             (b, ICMP4_destination_unreachable,
2102              ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
2103              adj_packet_bytes);
2104           *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2105         }
2106       else
2107         {
2108           /* IP fragmentation */
2109           ip_frag_set_vnet_buffer (b, adj_packet_bytes,
2110                                    IP4_FRAG_NEXT_IP4_REWRITE, 0);
2111           *next = IP4_REWRITE_NEXT_FRAGMENT;
2112         }
2113     }
2114 }
2115
2116 /* Decrement TTL & update checksum.
2117    Works either endian, so no need for byte swap. */
2118 static_always_inline void
2119 ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
2120                             u32 * error)
2121 {
2122   i32 ttl;
2123   u32 checksum;
2124   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2125     {
2126       b->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2127       return;
2128     }
2129
2130   ttl = ip->ttl;
2131
2132   /* Input node should have reject packets with ttl 0. */
2133   ASSERT (ip->ttl > 0);
2134
2135   checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
2136   checksum += checksum >= 0xffff;
2137
2138   ip->checksum = checksum;
2139   ttl -= 1;
2140   ip->ttl = ttl;
2141
2142   /*
2143    * If the ttl drops below 1 when forwarding, generate
2144    * an ICMP response.
2145    */
2146   if (PREDICT_FALSE (ttl <= 0))
2147     {
2148       *error = IP4_ERROR_TIME_EXPIRED;
2149       vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2150       icmp4_error_set_vnet_buffer (b, ICMP4_time_exceeded,
2151                                    ICMP4_time_exceeded_ttl_exceeded_in_transit,
2152                                    0);
2153       *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2154     }
2155
2156   /* Verify checksum. */
2157   ASSERT ((ip->checksum == ip4_header_checksum (ip)) ||
2158           (b->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2159 }
2160
2161
2162 always_inline uword
2163 ip4_rewrite_inline_with_gso (vlib_main_t * vm,
2164                              vlib_node_runtime_t * node,
2165                              vlib_frame_t * frame,
2166                              int do_counters, int is_midchain, int is_mcast,
2167                              int do_gso)
2168 {
2169   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2170   u32 *from = vlib_frame_vector_args (frame);
2171   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2172   u16 nexts[VLIB_FRAME_SIZE], *next;
2173   u32 n_left_from;
2174   vlib_node_runtime_t *error_node =
2175     vlib_node_get_runtime (vm, ip4_input_node.index);
2176
2177   n_left_from = frame->n_vectors;
2178   u32 thread_index = vm->thread_index;
2179
2180   vlib_get_buffers (vm, from, bufs, n_left_from);
2181   clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
2182
2183   if (n_left_from >= 6)
2184     {
2185       int i;
2186       for (i = 2; i < 6; i++)
2187         vlib_prefetch_buffer_header (bufs[i], LOAD);
2188     }
2189
2190   next = nexts;
2191   b = bufs;
2192   while (n_left_from >= 8)
2193     {
2194       ip_adjacency_t *adj0, *adj1;
2195       ip4_header_t *ip0, *ip1;
2196       u32 rw_len0, error0, adj_index0;
2197       u32 rw_len1, error1, adj_index1;
2198       u32 tx_sw_if_index0, tx_sw_if_index1;
2199       u8 *p;
2200
2201       vlib_prefetch_buffer_header (b[6], LOAD);
2202       vlib_prefetch_buffer_header (b[7], LOAD);
2203
2204       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2205       adj_index1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
2206
2207       /*
2208        * pre-fetch the per-adjacency counters
2209        */
2210       if (do_counters)
2211         {
2212           vlib_prefetch_combined_counter (&adjacency_counters,
2213                                           thread_index, adj_index0);
2214           vlib_prefetch_combined_counter (&adjacency_counters,
2215                                           thread_index, adj_index1);
2216         }
2217
2218       ip0 = vlib_buffer_get_current (b[0]);
2219       ip1 = vlib_buffer_get_current (b[1]);
2220
2221       error0 = error1 = IP4_ERROR_NONE;
2222
2223       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2224       ip4_ttl_and_checksum_check (b[1], ip1, next + 1, &error1);
2225
2226       /* Rewrite packet header and updates lengths. */
2227       adj0 = adj_get (adj_index0);
2228       adj1 = adj_get (adj_index1);
2229
2230       /* Worth pipelining. No guarantee that adj0,1 are hot... */
2231       rw_len0 = adj0[0].rewrite_header.data_bytes;
2232       rw_len1 = adj1[0].rewrite_header.data_bytes;
2233       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2234       vnet_buffer (b[1])->ip.save_rewrite_length = rw_len1;
2235
2236       p = vlib_buffer_get_current (b[2]);
2237       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2238       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2239
2240       p = vlib_buffer_get_current (b[3]);
2241       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2242       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2243
2244       /* Check MTU of outgoing interface. */
2245       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2246       u16 ip1_len = clib_net_to_host_u16 (ip1->length);
2247
2248       if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO))
2249         ip0_len = gso_mtu_sz (b[0]);
2250       if (do_gso && (b[1]->flags & VNET_BUFFER_F_GSO))
2251         ip1_len = gso_mtu_sz (b[1]);
2252
2253       ip4_mtu_check (b[0], ip0_len,
2254                      adj0[0].rewrite_header.max_l3_packet_bytes,
2255                      ip0->flags_and_fragment_offset &
2256                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2257                      next + 0, &error0);
2258       ip4_mtu_check (b[1], ip1_len,
2259                      adj1[0].rewrite_header.max_l3_packet_bytes,
2260                      ip1->flags_and_fragment_offset &
2261                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2262                      next + 1, &error1);
2263
2264       if (is_mcast)
2265         {
2266           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2267                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2268                     IP4_ERROR_SAME_INTERFACE : error0);
2269           error1 = ((adj1[0].rewrite_header.sw_if_index ==
2270                      vnet_buffer (b[1])->sw_if_index[VLIB_RX]) ?
2271                     IP4_ERROR_SAME_INTERFACE : error1);
2272         }
2273
2274       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2275        * to see the IP header */
2276       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2277         {
2278           u32 next_index = adj0[0].rewrite_header.next_index;
2279           b[0]->current_data -= rw_len0;
2280           b[0]->current_length += rw_len0;
2281           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2282           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2283
2284           if (PREDICT_FALSE
2285               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2286             vnet_feature_arc_start (lm->output_feature_arc_index,
2287                                     tx_sw_if_index0, &next_index, b[0]);
2288           next[0] = next_index;
2289         }
2290       else
2291         {
2292           b[0]->error = error_node->errors[error0];
2293         }
2294       if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2295         {
2296           u32 next_index = adj1[0].rewrite_header.next_index;
2297           b[1]->current_data -= rw_len1;
2298           b[1]->current_length += rw_len1;
2299
2300           tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2301           vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2302
2303           if (PREDICT_FALSE
2304               (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2305             vnet_feature_arc_start (lm->output_feature_arc_index,
2306                                     tx_sw_if_index1, &next_index, b[1]);
2307           next[1] = next_index;
2308         }
2309       else
2310         {
2311           b[1]->error = error_node->errors[error1];
2312         }
2313       if (is_midchain)
2314         {
2315           calc_checksums (vm, b[0]);
2316           calc_checksums (vm, b[1]);
2317         }
2318       /* Guess we are only writing on simple Ethernet header. */
2319       vnet_rewrite_two_headers (adj0[0], adj1[0],
2320                                 ip0, ip1, sizeof (ethernet_header_t));
2321
2322       /*
2323        * Bump the per-adjacency counters
2324        */
2325       if (do_counters)
2326         {
2327           vlib_increment_combined_counter
2328             (&adjacency_counters,
2329              thread_index,
2330              adj_index0, 1, vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2331
2332           vlib_increment_combined_counter
2333             (&adjacency_counters,
2334              thread_index,
2335              adj_index1, 1, vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
2336         }
2337
2338       if (is_midchain)
2339         {
2340           if (adj0->sub_type.midchain.fixup_func)
2341             adj0->sub_type.midchain.fixup_func
2342               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2343           if (adj1->sub_type.midchain.fixup_func)
2344             adj1->sub_type.midchain.fixup_func
2345               (vm, adj1, b[1], adj1->sub_type.midchain.fixup_data);
2346         }
2347
2348       if (is_mcast)
2349         {
2350           /*
2351            * copy bytes from the IP address into the MAC rewrite
2352            */
2353           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2354                                       adj0->rewrite_header.dst_mcast_offset,
2355                                       &ip0->dst_address.as_u32, (u8 *) ip0);
2356           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2357                                       adj1->rewrite_header.dst_mcast_offset,
2358                                       &ip1->dst_address.as_u32, (u8 *) ip1);
2359         }
2360
2361       next += 2;
2362       b += 2;
2363       n_left_from -= 2;
2364     }
2365
2366   while (n_left_from > 0)
2367     {
2368       ip_adjacency_t *adj0;
2369       ip4_header_t *ip0;
2370       u32 rw_len0, adj_index0, error0;
2371       u32 tx_sw_if_index0;
2372
2373       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2374
2375       adj0 = adj_get (adj_index0);
2376
2377       if (do_counters)
2378         vlib_prefetch_combined_counter (&adjacency_counters,
2379                                         thread_index, adj_index0);
2380
2381       ip0 = vlib_buffer_get_current (b[0]);
2382
2383       error0 = IP4_ERROR_NONE;
2384
2385       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2386
2387
2388       /* Update packet buffer attributes/set output interface. */
2389       rw_len0 = adj0[0].rewrite_header.data_bytes;
2390       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2391
2392       /* Check MTU of outgoing interface. */
2393       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2394       if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO))
2395         ip0_len = gso_mtu_sz (b[0]);
2396
2397       ip4_mtu_check (b[0], ip0_len,
2398                      adj0[0].rewrite_header.max_l3_packet_bytes,
2399                      ip0->flags_and_fragment_offset &
2400                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2401                      next + 0, &error0);
2402
2403       if (is_mcast)
2404         {
2405           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2406                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2407                     IP4_ERROR_SAME_INTERFACE : error0);
2408         }
2409
2410       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2411        * to see the IP header */
2412       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2413         {
2414           u32 next_index = adj0[0].rewrite_header.next_index;
2415           b[0]->current_data -= rw_len0;
2416           b[0]->current_length += rw_len0;
2417           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2418           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2419
2420           if (PREDICT_FALSE
2421               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2422             vnet_feature_arc_start (lm->output_feature_arc_index,
2423                                     tx_sw_if_index0, &next_index, b[0]);
2424           next[0] = next_index;
2425         }
2426       else
2427         {
2428           b[0]->error = error_node->errors[error0];
2429         }
2430       if (is_midchain)
2431         {
2432           calc_checksums (vm, b[0]);
2433         }
2434       /* Guess we are only writing on simple Ethernet header. */
2435       vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2436
2437       if (do_counters)
2438         vlib_increment_combined_counter
2439           (&adjacency_counters,
2440            thread_index, adj_index0, 1,
2441            vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2442
2443       if (is_midchain)
2444         {
2445           if (adj0->sub_type.midchain.fixup_func)
2446             adj0->sub_type.midchain.fixup_func
2447               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2448         }
2449
2450       if (is_mcast)
2451         {
2452           /*
2453            * copy bytes from the IP address into the MAC rewrite
2454            */
2455           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2456                                       adj0->rewrite_header.dst_mcast_offset,
2457                                       &ip0->dst_address.as_u32, (u8 *) ip0);
2458         }
2459
2460       next += 1;
2461       b += 1;
2462       n_left_from -= 1;
2463     }
2464
2465
2466   /* Need to do trace after rewrites to pick up new packet data. */
2467   if (node->flags & VLIB_NODE_FLAG_TRACE)
2468     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2469
2470   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2471   return frame->n_vectors;
2472 }
2473
2474 always_inline uword
2475 ip4_rewrite_inline (vlib_main_t * vm,
2476                     vlib_node_runtime_t * node,
2477                     vlib_frame_t * frame,
2478                     int do_counters, int is_midchain, int is_mcast)
2479 {
2480   vnet_main_t *vnm = vnet_get_main ();
2481   if (PREDICT_FALSE (vnm->interface_main.gso_interface_count > 0))
2482     return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2483                                         is_midchain, is_mcast,
2484                                         1 /* do_gso */ );
2485   else
2486     return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2487                                         is_midchain, is_mcast,
2488                                         0 /* no do_gso */ );
2489 }
2490
2491
2492 /** @brief IPv4 rewrite node.
2493     @node ip4-rewrite
2494
2495     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2496     header checksum, fetch the ip adjacency, check the outbound mtu,
2497     apply the adjacency rewrite, and send pkts to the adjacency
2498     rewrite header's rewrite_next_index.
2499
2500     @param vm vlib_main_t corresponding to the current thread
2501     @param node vlib_node_runtime_t
2502     @param frame vlib_frame_t whose contents should be dispatched
2503
2504     @par Graph mechanics: buffer metadata, next index usage
2505
2506     @em Uses:
2507     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2508         - the rewrite adjacency index
2509     - <code>adj->lookup_next_index</code>
2510         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2511           the packet will be dropped.
2512     - <code>adj->rewrite_header</code>
2513         - Rewrite string length, rewrite string, next_index
2514
2515     @em Sets:
2516     - <code>b->current_data, b->current_length</code>
2517         - Updated net of applying the rewrite string
2518
2519     <em>Next Indices:</em>
2520     - <code> adj->rewrite_header.next_index </code>
2521       or @c ip4-drop
2522 */
2523
2524 VLIB_NODE_FN (ip4_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2525                                  vlib_frame_t * frame)
2526 {
2527   if (adj_are_counters_enabled ())
2528     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2529   else
2530     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2531 }
2532
2533 VLIB_NODE_FN (ip4_rewrite_bcast_node) (vlib_main_t * vm,
2534                                        vlib_node_runtime_t * node,
2535                                        vlib_frame_t * frame)
2536 {
2537   if (adj_are_counters_enabled ())
2538     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2539   else
2540     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2541 }
2542
2543 VLIB_NODE_FN (ip4_midchain_node) (vlib_main_t * vm,
2544                                   vlib_node_runtime_t * node,
2545                                   vlib_frame_t * frame)
2546 {
2547   if (adj_are_counters_enabled ())
2548     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2549   else
2550     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2551 }
2552
2553 VLIB_NODE_FN (ip4_rewrite_mcast_node) (vlib_main_t * vm,
2554                                        vlib_node_runtime_t * node,
2555                                        vlib_frame_t * frame)
2556 {
2557   if (adj_are_counters_enabled ())
2558     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2559   else
2560     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2561 }
2562
2563 VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
2564                                         vlib_node_runtime_t * node,
2565                                         vlib_frame_t * frame)
2566 {
2567   if (adj_are_counters_enabled ())
2568     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2569   else
2570     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2571 }
2572
2573 /* *INDENT-OFF* */
2574 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2575   .name = "ip4-rewrite",
2576   .vector_size = sizeof (u32),
2577
2578   .format_trace = format_ip4_rewrite_trace,
2579
2580   .n_next_nodes = IP4_REWRITE_N_NEXT,
2581   .next_nodes = {
2582     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2583     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2584     [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2585   },
2586 };
2587
2588 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2589   .name = "ip4-rewrite-bcast",
2590   .vector_size = sizeof (u32),
2591
2592   .format_trace = format_ip4_rewrite_trace,
2593   .sibling_of = "ip4-rewrite",
2594 };
2595
2596 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2597   .name = "ip4-rewrite-mcast",
2598   .vector_size = sizeof (u32),
2599
2600   .format_trace = format_ip4_rewrite_trace,
2601   .sibling_of = "ip4-rewrite",
2602 };
2603
2604 VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
2605   .name = "ip4-mcast-midchain",
2606   .vector_size = sizeof (u32),
2607
2608   .format_trace = format_ip4_rewrite_trace,
2609   .sibling_of = "ip4-rewrite",
2610 };
2611
2612 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2613   .name = "ip4-midchain",
2614   .vector_size = sizeof (u32),
2615   .format_trace = format_ip4_forward_next_trace,
2616   .sibling_of =  "ip4-rewrite",
2617 };
2618 /* *INDENT-ON */
2619
2620 static int
2621 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2622 {
2623   ip4_fib_mtrie_t *mtrie0;
2624   ip4_fib_mtrie_leaf_t leaf0;
2625   u32 lbi0;
2626
2627   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2628
2629   leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2630   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2631   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2632
2633   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2634
2635   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2636 }
2637
2638 static clib_error_t *
2639 test_lookup_command_fn (vlib_main_t * vm,
2640                         unformat_input_t * input, vlib_cli_command_t * cmd)
2641 {
2642   ip4_fib_t *fib;
2643   u32 table_id = 0;
2644   f64 count = 1;
2645   u32 n;
2646   int i;
2647   ip4_address_t ip4_base_address;
2648   u64 errors = 0;
2649
2650   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2651     {
2652       if (unformat (input, "table %d", &table_id))
2653         {
2654           /* Make sure the entry exists. */
2655           fib = ip4_fib_get (table_id);
2656           if ((fib) && (fib->index != table_id))
2657             return clib_error_return (0, "<fib-index> %d does not exist",
2658                                       table_id);
2659         }
2660       else if (unformat (input, "count %f", &count))
2661         ;
2662
2663       else if (unformat (input, "%U",
2664                          unformat_ip4_address, &ip4_base_address))
2665         ;
2666       else
2667         return clib_error_return (0, "unknown input `%U'",
2668                                   format_unformat_error, input);
2669     }
2670
2671   n = count;
2672
2673   for (i = 0; i < n; i++)
2674     {
2675       if (!ip4_lookup_validate (&ip4_base_address, table_id))
2676         errors++;
2677
2678       ip4_base_address.as_u32 =
2679         clib_host_to_net_u32 (1 +
2680                               clib_net_to_host_u32 (ip4_base_address.as_u32));
2681     }
2682
2683   if (errors)
2684     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2685   else
2686     vlib_cli_output (vm, "No errors in %d lookups\n", n);
2687
2688   return 0;
2689 }
2690
2691 /*?
2692  * Perform a lookup of an IPv4 Address (or range of addresses) in the
2693  * given FIB table to determine if there is a conflict with the
2694  * adjacency table. The fib-id can be determined by using the
2695  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2696  * of 0 is used.
2697  *
2698  * @todo This command uses fib-id, other commands use table-id (not
2699  * just a name, they are different indexes). Would like to change this
2700  * to table-id for consistency.
2701  *
2702  * @cliexpar
2703  * Example of how to run the test lookup command:
2704  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2705  * No errors in 2 lookups
2706  * @cliexend
2707 ?*/
2708 /* *INDENT-OFF* */
2709 VLIB_CLI_COMMAND (lookup_test_command, static) =
2710 {
2711   .path = "test lookup",
2712   .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2713   .function = test_lookup_command_fn,
2714 };
2715 /* *INDENT-ON* */
2716
2717 #ifndef CLIB_MARCH_VARIANT
2718 int
2719 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2720 {
2721   u32 fib_index;
2722
2723   fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
2724
2725   if (~0 == fib_index)
2726     return VNET_API_ERROR_NO_SUCH_FIB;
2727
2728   fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
2729                                   flow_hash_config);
2730
2731   return 0;
2732 }
2733 #endif
2734
2735 static clib_error_t *
2736 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2737                              unformat_input_t * input,
2738                              vlib_cli_command_t * cmd)
2739 {
2740   int matched = 0;
2741   u32 table_id = 0;
2742   u32 flow_hash_config = 0;
2743   int rv;
2744
2745   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2746     {
2747       if (unformat (input, "table %d", &table_id))
2748         matched = 1;
2749 #define _(a,v) \
2750     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2751       foreach_flow_hash_bit
2752 #undef _
2753         else
2754         break;
2755     }
2756
2757   if (matched == 0)
2758     return clib_error_return (0, "unknown input `%U'",
2759                               format_unformat_error, input);
2760
2761   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
2762   switch (rv)
2763     {
2764     case 0:
2765       break;
2766
2767     case VNET_API_ERROR_NO_SUCH_FIB:
2768       return clib_error_return (0, "no such FIB table %d", table_id);
2769
2770     default:
2771       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2772       break;
2773     }
2774
2775   return 0;
2776 }
2777
2778 /*?
2779  * Configure the set of IPv4 fields used by the flow hash.
2780  *
2781  * @cliexpar
2782  * Example of how to set the flow hash on a given table:
2783  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2784  * Example of display the configured flow hash:
2785  * @cliexstart{show ip fib}
2786  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2787  * 0.0.0.0/0
2788  *   unicast-ip4-chain
2789  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2790  *     [0] [@0]: dpo-drop ip6
2791  * 0.0.0.0/32
2792  *   unicast-ip4-chain
2793  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2794  *     [0] [@0]: dpo-drop ip6
2795  * 224.0.0.0/8
2796  *   unicast-ip4-chain
2797  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2798  *     [0] [@0]: dpo-drop ip6
2799  * 6.0.1.2/32
2800  *   unicast-ip4-chain
2801  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2802  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2803  * 7.0.0.1/32
2804  *   unicast-ip4-chain
2805  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
2806  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2807  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2808  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2809  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2810  * 240.0.0.0/8
2811  *   unicast-ip4-chain
2812  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
2813  *     [0] [@0]: dpo-drop ip6
2814  * 255.255.255.255/32
2815  *   unicast-ip4-chain
2816  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
2817  *     [0] [@0]: dpo-drop ip6
2818  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
2819  * 0.0.0.0/0
2820  *   unicast-ip4-chain
2821  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
2822  *     [0] [@0]: dpo-drop ip6
2823  * 0.0.0.0/32
2824  *   unicast-ip4-chain
2825  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
2826  *     [0] [@0]: dpo-drop ip6
2827  * 172.16.1.0/24
2828  *   unicast-ip4-chain
2829  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
2830  *     [0] [@4]: ipv4-glean: af_packet0
2831  * 172.16.1.1/32
2832  *   unicast-ip4-chain
2833  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
2834  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
2835  * 172.16.1.2/32
2836  *   unicast-ip4-chain
2837  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2838  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
2839  * 172.16.2.0/24
2840  *   unicast-ip4-chain
2841  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
2842  *     [0] [@4]: ipv4-glean: af_packet1
2843  * 172.16.2.1/32
2844  *   unicast-ip4-chain
2845  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
2846  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
2847  * 224.0.0.0/8
2848  *   unicast-ip4-chain
2849  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
2850  *     [0] [@0]: dpo-drop ip6
2851  * 240.0.0.0/8
2852  *   unicast-ip4-chain
2853  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
2854  *     [0] [@0]: dpo-drop ip6
2855  * 255.255.255.255/32
2856  *   unicast-ip4-chain
2857  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
2858  *     [0] [@0]: dpo-drop ip6
2859  * @cliexend
2860 ?*/
2861 /* *INDENT-OFF* */
2862 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
2863 {
2864   .path = "set ip flow-hash",
2865   .short_help =
2866   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
2867   .function = set_ip_flow_hash_command_fn,
2868 };
2869 /* *INDENT-ON* */
2870
2871 #ifndef CLIB_MARCH_VARIANT
2872 int
2873 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2874                              u32 table_index)
2875 {
2876   vnet_main_t *vnm = vnet_get_main ();
2877   vnet_interface_main_t *im = &vnm->interface_main;
2878   ip4_main_t *ipm = &ip4_main;
2879   ip_lookup_main_t *lm = &ipm->lookup_main;
2880   vnet_classify_main_t *cm = &vnet_classify_main;
2881   ip4_address_t *if_addr;
2882
2883   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2884     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2885
2886   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2887     return VNET_API_ERROR_NO_SUCH_ENTRY;
2888
2889   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2890   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
2891
2892   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
2893
2894   if (NULL != if_addr)
2895     {
2896       fib_prefix_t pfx = {
2897         .fp_len = 32,
2898         .fp_proto = FIB_PROTOCOL_IP4,
2899         .fp_addr.ip4 = *if_addr,
2900       };
2901       u32 fib_index;
2902
2903       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2904                                                        sw_if_index);
2905
2906
2907       if (table_index != (u32) ~ 0)
2908         {
2909           dpo_id_t dpo = DPO_INVALID;
2910
2911           dpo_set (&dpo,
2912                    DPO_CLASSIFY,
2913                    DPO_PROTO_IP4,
2914                    classify_dpo_create (DPO_PROTO_IP4, table_index));
2915
2916           fib_table_entry_special_dpo_add (fib_index,
2917                                            &pfx,
2918                                            FIB_SOURCE_CLASSIFY,
2919                                            FIB_ENTRY_FLAG_NONE, &dpo);
2920           dpo_reset (&dpo);
2921         }
2922       else
2923         {
2924           fib_table_entry_special_remove (fib_index,
2925                                           &pfx, FIB_SOURCE_CLASSIFY);
2926         }
2927     }
2928
2929   return 0;
2930 }
2931 #endif
2932
2933 static clib_error_t *
2934 set_ip_classify_command_fn (vlib_main_t * vm,
2935                             unformat_input_t * input,
2936                             vlib_cli_command_t * cmd)
2937 {
2938   u32 table_index = ~0;
2939   int table_index_set = 0;
2940   u32 sw_if_index = ~0;
2941   int rv;
2942
2943   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2944     {
2945       if (unformat (input, "table-index %d", &table_index))
2946         table_index_set = 1;
2947       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
2948                          vnet_get_main (), &sw_if_index))
2949         ;
2950       else
2951         break;
2952     }
2953
2954   if (table_index_set == 0)
2955     return clib_error_return (0, "classify table-index must be specified");
2956
2957   if (sw_if_index == ~0)
2958     return clib_error_return (0, "interface / subif must be specified");
2959
2960   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
2961
2962   switch (rv)
2963     {
2964     case 0:
2965       break;
2966
2967     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
2968       return clib_error_return (0, "No such interface");
2969
2970     case VNET_API_ERROR_NO_SUCH_ENTRY:
2971       return clib_error_return (0, "No such classifier table");
2972     }
2973   return 0;
2974 }
2975
2976 /*?
2977  * Assign a classification table to an interface. The classification
2978  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
2979  * commands. Once the table is create, use this command to filter packets
2980  * on an interface.
2981  *
2982  * @cliexpar
2983  * Example of how to assign a classification table to an interface:
2984  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
2985 ?*/
2986 /* *INDENT-OFF* */
2987 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
2988 {
2989     .path = "set ip classify",
2990     .short_help =
2991     "set ip classify intfc <interface> table-index <classify-idx>",
2992     .function = set_ip_classify_command_fn,
2993 };
2994 /* *INDENT-ON* */
2995
2996 static clib_error_t *
2997 ip4_config (vlib_main_t * vm, unformat_input_t * input)
2998 {
2999   ip4_main_t *im = &ip4_main;
3000   uword heapsize = 0;
3001
3002   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3003     {
3004       if (unformat (input, "heap-size %U", unformat_memory_size, &heapsize))
3005         ;
3006       else
3007         return clib_error_return (0,
3008                                   "invalid heap-size parameter `%U'",
3009                                   format_unformat_error, input);
3010     }
3011
3012   im->mtrie_heap_size = heapsize;
3013
3014   return 0;
3015 }
3016
3017 VLIB_EARLY_CONFIG_FUNCTION (ip4_config, "ip");
3018
3019 /*
3020  * fd.io coding-style-patch-verification: ON
3021  *
3022  * Local Variables:
3023  * eval: (c-set-style "gnu")
3024  * End:
3025  */