Punt: specify packets by IP protocol Type
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h>     /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/dpo/load_balance.h>
53 #include <vnet/dpo/load_balance_map.h>
54 #include <vnet/dpo/classify_dpo.h>
55 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
56
57 #include <vnet/ip/ip4_forward.h>
58 #include <vnet/interface_output.h>
59
60 /** @brief IPv4 lookup node.
61     @node ip4-lookup
62
63     This is the main IPv4 lookup dispatch node.
64
65     @param vm vlib_main_t corresponding to the current thread
66     @param node vlib_node_runtime_t
67     @param frame vlib_frame_t whose contents should be dispatched
68
69     @par Graph mechanics: buffer metadata, next index usage
70
71     @em Uses:
72     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
73         - Indicates the @c sw_if_index value of the interface that the
74           packet was received on.
75     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
76         - When the value is @c ~0 then the node performs a longest prefix
77           match (LPM) for the packet destination address in the FIB attached
78           to the receive interface.
79         - Otherwise perform LPM for the packet destination address in the
80           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
81           value (0, 1, ...) and not a VRF id.
82
83     @em Sets:
84     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
85         - The lookup result adjacency index.
86
87     <em>Next Index:</em>
88     - Dispatches the packet to the node index found in
89       ip_adjacency_t @c adj->lookup_next_index
90       (where @c adj is the lookup result adjacency).
91 */
92 VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
93                                 vlib_frame_t * frame)
94 {
95   return ip4_lookup_inline (vm, node, frame,
96                             /* lookup_for_responses_to_locally_received_packets */
97                             0);
98
99 }
100
101 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
102
103 /* *INDENT-OFF* */
104 VLIB_REGISTER_NODE (ip4_lookup_node) =
105 {
106   .name = "ip4-lookup",
107   .vector_size = sizeof (u32),
108   .format_trace = format_ip4_lookup_trace,
109   .n_next_nodes = IP_LOOKUP_N_NEXT,
110   .next_nodes = IP4_LOOKUP_NEXT_NODES,
111 };
112 /* *INDENT-ON* */
113
114 VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
115                                       vlib_node_runtime_t * node,
116                                       vlib_frame_t * frame)
117 {
118   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
119   u32 n_left, *from;
120   u32 thread_index = vm->thread_index;
121   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
122   u16 nexts[VLIB_FRAME_SIZE], *next;
123
124   from = vlib_frame_vector_args (frame);
125   n_left = frame->n_vectors;
126   next = nexts;
127
128   vlib_get_buffers (vm, from, bufs, n_left);
129
130   while (n_left >= 4)
131     {
132       const load_balance_t *lb0, *lb1;
133       const ip4_header_t *ip0, *ip1;
134       u32 lbi0, hc0, lbi1, hc1;
135       const dpo_id_t *dpo0, *dpo1;
136
137       /* Prefetch next iteration. */
138       {
139         vlib_prefetch_buffer_header (b[2], LOAD);
140         vlib_prefetch_buffer_header (b[3], LOAD);
141
142         CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), LOAD);
143         CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), LOAD);
144       }
145
146       ip0 = vlib_buffer_get_current (b[0]);
147       ip1 = vlib_buffer_get_current (b[1]);
148       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
149       lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
150
151       lb0 = load_balance_get (lbi0);
152       lb1 = load_balance_get (lbi1);
153
154       /*
155        * this node is for via FIBs we can re-use the hash value from the
156        * to node if present.
157        * We don't want to use the same hash value at each level in the recursion
158        * graph as that would lead to polarisation
159        */
160       hc0 = hc1 = 0;
161
162       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
163         {
164           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
165             {
166               hc0 = vnet_buffer (b[0])->ip.flow_hash =
167                 vnet_buffer (b[0])->ip.flow_hash >> 1;
168             }
169           else
170             {
171               hc0 = vnet_buffer (b[0])->ip.flow_hash =
172                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
173             }
174           dpo0 = load_balance_get_fwd_bucket
175             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
176         }
177       else
178         {
179           dpo0 = load_balance_get_bucket_i (lb0, 0);
180         }
181       if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
182         {
183           if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
184             {
185               hc1 = vnet_buffer (b[1])->ip.flow_hash =
186                 vnet_buffer (b[1])->ip.flow_hash >> 1;
187             }
188           else
189             {
190               hc1 = vnet_buffer (b[1])->ip.flow_hash =
191                 ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
192             }
193           dpo1 = load_balance_get_fwd_bucket
194             (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
195         }
196       else
197         {
198           dpo1 = load_balance_get_bucket_i (lb1, 0);
199         }
200
201       next[0] = dpo0->dpoi_next_node;
202       next[1] = dpo1->dpoi_next_node;
203
204       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
205       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
206
207       vlib_increment_combined_counter
208         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
209       vlib_increment_combined_counter
210         (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
211
212       b += 2;
213       next += 2;
214       n_left -= 2;
215     }
216
217   while (n_left > 0)
218     {
219       const load_balance_t *lb0;
220       const ip4_header_t *ip0;
221       const dpo_id_t *dpo0;
222       u32 lbi0, hc0;
223
224       ip0 = vlib_buffer_get_current (b[0]);
225       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
226
227       lb0 = load_balance_get (lbi0);
228
229       hc0 = 0;
230       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
231         {
232           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
233             {
234               hc0 = vnet_buffer (b[0])->ip.flow_hash =
235                 vnet_buffer (b[0])->ip.flow_hash >> 1;
236             }
237           else
238             {
239               hc0 = vnet_buffer (b[0])->ip.flow_hash =
240                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
241             }
242           dpo0 = load_balance_get_fwd_bucket
243             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
244         }
245       else
246         {
247           dpo0 = load_balance_get_bucket_i (lb0, 0);
248         }
249
250       next[0] = dpo0->dpoi_next_node;
251       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
252
253       vlib_increment_combined_counter
254         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
255
256       b += 1;
257       next += 1;
258       n_left -= 1;
259     }
260
261   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
262   if (node->flags & VLIB_NODE_FLAG_TRACE)
263     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
264
265   return frame->n_vectors;
266 }
267
268 /* *INDENT-OFF* */
269 VLIB_REGISTER_NODE (ip4_load_balance_node) =
270 {
271   .name = "ip4-load-balance",
272   .vector_size = sizeof (u32),
273   .sibling_of = "ip4-lookup",
274   .format_trace = format_ip4_lookup_trace,
275 };
276 /* *INDENT-ON* */
277
278 #ifndef CLIB_MARCH_VARIANT
279 /* get first interface address */
280 ip4_address_t *
281 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
282                              ip_interface_address_t ** result_ia)
283 {
284   ip_lookup_main_t *lm = &im->lookup_main;
285   ip_interface_address_t *ia = 0;
286   ip4_address_t *result = 0;
287
288   /* *INDENT-OFF* */
289   foreach_ip_interface_address
290     (lm, ia, sw_if_index,
291      1 /* honor unnumbered */ ,
292      ({
293        ip4_address_t * a =
294          ip_interface_address_get_address (lm, ia);
295        result = a;
296        break;
297      }));
298   /* *INDENT-OFF* */
299   if (result_ia)
300     *result_ia = result ? ia : 0;
301   return result;
302 }
303
304 static void
305 ip4_add_subnet_bcast_route (u32 fib_index,
306                             fib_prefix_t *pfx,
307                             u32 sw_if_index)
308 {
309   vnet_sw_interface_flags_t iflags;
310
311   iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
312
313   fib_table_entry_special_remove(fib_index,
314                                  pfx,
315                                  FIB_SOURCE_INTERFACE);
316
317   if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
318     {
319       fib_table_entry_update_one_path (fib_index, pfx,
320                                        FIB_SOURCE_INTERFACE,
321                                        FIB_ENTRY_FLAG_NONE,
322                                        DPO_PROTO_IP4,
323                                        /* No next-hop address */
324                                        &ADJ_BCAST_ADDR,
325                                        sw_if_index,
326                                        // invalid FIB index
327                                        ~0,
328                                        1,
329                                        // no out-label stack
330                                        NULL,
331                                        FIB_ROUTE_PATH_FLAG_NONE);
332     }
333   else
334     {
335         fib_table_entry_special_add(fib_index,
336                                     pfx,
337                                     FIB_SOURCE_INTERFACE,
338                                     (FIB_ENTRY_FLAG_DROP |
339                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
340     }
341 }
342
343 static void
344 ip4_add_interface_routes (u32 sw_if_index,
345                           ip4_main_t * im, u32 fib_index,
346                           ip_interface_address_t * a)
347 {
348   ip_lookup_main_t *lm = &im->lookup_main;
349   ip4_address_t *address = ip_interface_address_get_address (lm, a);
350   fib_prefix_t pfx = {
351     .fp_len = a->address_length,
352     .fp_proto = FIB_PROTOCOL_IP4,
353     .fp_addr.ip4 = *address,
354   };
355
356   if (pfx.fp_len <= 30)
357     {
358       /* a /30 or shorter - add a glean for the network address */
359       fib_table_entry_update_one_path (fib_index, &pfx,
360                                        FIB_SOURCE_INTERFACE,
361                                        (FIB_ENTRY_FLAG_CONNECTED |
362                                         FIB_ENTRY_FLAG_ATTACHED),
363                                        DPO_PROTO_IP4,
364                                        /* No next-hop address */
365                                        NULL,
366                                        sw_if_index,
367                                        // invalid FIB index
368                                        ~0,
369                                        1,
370                                        // no out-label stack
371                                        NULL,
372                                        FIB_ROUTE_PATH_FLAG_NONE);
373
374       /* Add the two broadcast addresses as drop */
375       fib_prefix_t net_pfx = {
376         .fp_len = 32,
377         .fp_proto = FIB_PROTOCOL_IP4,
378         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
379       };
380       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
381         fib_table_entry_special_add(fib_index,
382                                     &net_pfx,
383                                     FIB_SOURCE_INTERFACE,
384                                     (FIB_ENTRY_FLAG_DROP |
385                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
386       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
387       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
388         ip4_add_subnet_bcast_route(fib_index, &net_pfx, sw_if_index);
389     }
390   else if (pfx.fp_len == 31)
391     {
392       u32 mask = clib_host_to_net_u32(1);
393       fib_prefix_t net_pfx = pfx;
394
395       net_pfx.fp_len = 32;
396       net_pfx.fp_addr.ip4.as_u32 ^= mask;
397
398       /* a /31 - add the other end as an attached host */
399       fib_table_entry_update_one_path (fib_index, &net_pfx,
400                                        FIB_SOURCE_INTERFACE,
401                                        (FIB_ENTRY_FLAG_ATTACHED),
402                                        DPO_PROTO_IP4,
403                                        &net_pfx.fp_addr,
404                                        sw_if_index,
405                                        // invalid FIB index
406                                        ~0,
407                                        1,
408                                        NULL,
409                                        FIB_ROUTE_PATH_FLAG_NONE);
410     }
411   pfx.fp_len = 32;
412
413   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
414     {
415       u32 classify_table_index =
416         lm->classify_table_index_by_sw_if_index[sw_if_index];
417       if (classify_table_index != (u32) ~ 0)
418         {
419           dpo_id_t dpo = DPO_INVALID;
420
421           dpo_set (&dpo,
422                    DPO_CLASSIFY,
423                    DPO_PROTO_IP4,
424                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
425
426           fib_table_entry_special_dpo_add (fib_index,
427                                            &pfx,
428                                            FIB_SOURCE_CLASSIFY,
429                                            FIB_ENTRY_FLAG_NONE, &dpo);
430           dpo_reset (&dpo);
431         }
432     }
433
434   fib_table_entry_update_one_path (fib_index, &pfx,
435                                    FIB_SOURCE_INTERFACE,
436                                    (FIB_ENTRY_FLAG_CONNECTED |
437                                     FIB_ENTRY_FLAG_LOCAL),
438                                    DPO_PROTO_IP4,
439                                    &pfx.fp_addr,
440                                    sw_if_index,
441                                    // invalid FIB index
442                                    ~0,
443                                    1, NULL,
444                                    FIB_ROUTE_PATH_FLAG_NONE);
445 }
446
447 static void
448 ip4_del_interface_routes (ip4_main_t * im,
449                           u32 fib_index,
450                           ip4_address_t * address, u32 address_length)
451 {
452   fib_prefix_t pfx = {
453     .fp_len = address_length,
454     .fp_proto = FIB_PROTOCOL_IP4,
455     .fp_addr.ip4 = *address,
456   };
457
458   if (pfx.fp_len <= 30)
459     {
460       fib_prefix_t net_pfx = {
461         .fp_len = 32,
462         .fp_proto = FIB_PROTOCOL_IP4,
463         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
464       };
465       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
466         fib_table_entry_special_remove(fib_index,
467                                        &net_pfx,
468                                        FIB_SOURCE_INTERFACE);
469       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
470       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
471         fib_table_entry_special_remove(fib_index,
472                                        &net_pfx,
473                                        FIB_SOURCE_INTERFACE);
474       fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
475     }
476     else if (pfx.fp_len == 31)
477     {
478       u32 mask = clib_host_to_net_u32(1);
479       fib_prefix_t net_pfx = pfx;
480
481       net_pfx.fp_len = 32;
482       net_pfx.fp_addr.ip4.as_u32 ^= mask;
483
484       fib_table_entry_delete (fib_index, &net_pfx, FIB_SOURCE_INTERFACE);
485     }
486
487   pfx.fp_len = 32;
488   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
489 }
490
491 void
492 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
493 {
494   ip4_main_t *im = &ip4_main;
495
496   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
497
498   /*
499    * enable/disable only on the 1<->0 transition
500    */
501   if (is_enable)
502     {
503       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
504         return;
505     }
506   else
507     {
508       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
509       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
510         return;
511     }
512   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
513                                !is_enable, 0, 0);
514
515
516   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
517                                sw_if_index, !is_enable, 0, 0);
518
519   {
520     ip4_enable_disable_interface_callback_t *cb;
521     vec_foreach (cb, im->enable_disable_interface_callbacks)
522       cb->function (im, cb->function_opaque, sw_if_index, is_enable);
523   }
524 }
525
526 static clib_error_t *
527 ip4_add_del_interface_address_internal (vlib_main_t * vm,
528                                         u32 sw_if_index,
529                                         ip4_address_t * address,
530                                         u32 address_length, u32 is_del)
531 {
532   vnet_main_t *vnm = vnet_get_main ();
533   ip4_main_t *im = &ip4_main;
534   ip_lookup_main_t *lm = &im->lookup_main;
535   clib_error_t *error = 0;
536   u32 if_address_index, elts_before;
537   ip4_address_fib_t ip4_af, *addr_fib = 0;
538
539   /* local0 interface doesn't support IP addressing  */
540   if (sw_if_index == 0)
541     {
542       return
543        clib_error_create ("local0 interface doesn't support IP addressing");
544     }
545
546   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
547   ip4_addr_fib_init (&ip4_af, address,
548                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
549   vec_add1 (addr_fib, ip4_af);
550
551   /*
552    * there is no support for adj-fib handling in the presence of overlapping
553    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
554    * most routers do.
555    */
556   /* *INDENT-OFF* */
557   if (!is_del)
558     {
559       /* When adding an address check that it does not conflict
560          with an existing address on any interface in this table. */
561       ip_interface_address_t *ia;
562       vnet_sw_interface_t *sif;
563
564       pool_foreach(sif, vnm->interface_main.sw_interfaces,
565       ({
566           if (im->fib_index_by_sw_if_index[sw_if_index] ==
567               im->fib_index_by_sw_if_index[sif->sw_if_index])
568             {
569               foreach_ip_interface_address
570                 (&im->lookup_main, ia, sif->sw_if_index,
571                  0 /* honor unnumbered */ ,
572                  ({
573                    ip4_address_t * x =
574                      ip_interface_address_get_address
575                      (&im->lookup_main, ia);
576                    if (ip4_destination_matches_route
577                        (im, address, x, ia->address_length) ||
578                        ip4_destination_matches_route (im,
579                                                       x,
580                                                       address,
581                                                       address_length))
582                      {
583                        vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
584
585                        return
586                          clib_error_create
587                          ("failed to add %U which conflicts with %U for interface %U",
588                           format_ip4_address_and_length, address,
589                           address_length,
590                           format_ip4_address_and_length, x,
591                           ia->address_length,
592                           format_vnet_sw_if_index_name, vnm,
593                           sif->sw_if_index);
594                      }
595                  }));
596             }
597       }));
598     }
599   /* *INDENT-ON* */
600
601   elts_before = pool_elts (lm->if_address_pool);
602
603   error = ip_interface_address_add_del
604     (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
605   if (error)
606     goto done;
607
608   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
609
610   if (is_del)
611     ip4_del_interface_routes (im, ip4_af.fib_index, address, address_length);
612   else
613     ip4_add_interface_routes (sw_if_index,
614                               im, ip4_af.fib_index,
615                               pool_elt_at_index
616                               (lm->if_address_pool, if_address_index));
617
618   /* If pool did not grow/shrink: add duplicate address. */
619   if (elts_before != pool_elts (lm->if_address_pool))
620     {
621       ip4_add_del_interface_address_callback_t *cb;
622       vec_foreach (cb, im->add_del_interface_address_callbacks)
623         cb->function (im, cb->function_opaque, sw_if_index,
624                       address, address_length, if_address_index, is_del);
625     }
626
627 done:
628   vec_free (addr_fib);
629   return error;
630 }
631
632 clib_error_t *
633 ip4_add_del_interface_address (vlib_main_t * vm,
634                                u32 sw_if_index,
635                                ip4_address_t * address,
636                                u32 address_length, u32 is_del)
637 {
638   return ip4_add_del_interface_address_internal
639     (vm, sw_if_index, address, address_length, is_del);
640 }
641
642 void
643 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
644 {
645   ip_interface_address_t *ia;
646   ip4_main_t *im;
647
648   im = &ip4_main;
649
650   /*
651    * when directed broadcast is enabled, the subnet braodcast route will forward
652    * packets using an adjacency with a broadcast MAC. otherwise it drops
653    */
654   /* *INDENT-OFF* */
655   foreach_ip_interface_address(&im->lookup_main, ia,
656                                sw_if_index, 0,
657      ({
658        if (ia->address_length <= 30)
659          {
660            ip4_address_t *ipa;
661
662            ipa = ip_interface_address_get_address (&im->lookup_main, ia);
663
664            fib_prefix_t pfx = {
665              .fp_len = 32,
666              .fp_proto = FIB_PROTOCOL_IP4,
667              .fp_addr = {
668                .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
669              },
670            };
671
672            ip4_add_subnet_bcast_route
673              (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
674                                                   sw_if_index),
675               &pfx, sw_if_index);
676          }
677      }));
678   /* *INDENT-ON* */
679 }
680 #endif
681
682 /* Built-in ip4 unicast rx feature path definition */
683 /* *INDENT-OFF* */
684 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
685 {
686   .arc_name = "ip4-unicast",
687   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
688   .last_in_arc = "ip4-lookup",
689   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
690 };
691
692 VNET_FEATURE_INIT (ip4_flow_classify, static) =
693 {
694   .arc_name = "ip4-unicast",
695   .node_name = "ip4-flow-classify",
696   .runs_before = VNET_FEATURES ("ip4-inacl"),
697 };
698
699 VNET_FEATURE_INIT (ip4_inacl, static) =
700 {
701   .arc_name = "ip4-unicast",
702   .node_name = "ip4-inacl",
703   .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
704 };
705
706 VNET_FEATURE_INIT (ip4_source_check_1, static) =
707 {
708   .arc_name = "ip4-unicast",
709   .node_name = "ip4-source-check-via-rx",
710   .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
711 };
712
713 VNET_FEATURE_INIT (ip4_source_check_2, static) =
714 {
715   .arc_name = "ip4-unicast",
716   .node_name = "ip4-source-check-via-any",
717   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
718 };
719
720 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
721 {
722   .arc_name = "ip4-unicast",
723   .node_name = "ip4-source-and-port-range-check-rx",
724   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
725 };
726
727 VNET_FEATURE_INIT (ip4_policer_classify, static) =
728 {
729   .arc_name = "ip4-unicast",
730   .node_name = "ip4-policer-classify",
731   .runs_before = VNET_FEATURES ("ipsec4-input-feature"),
732 };
733
734 VNET_FEATURE_INIT (ip4_ipsec, static) =
735 {
736   .arc_name = "ip4-unicast",
737   .node_name = "ipsec4-input-feature",
738   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
739 };
740
741 VNET_FEATURE_INIT (ip4_vpath, static) =
742 {
743   .arc_name = "ip4-unicast",
744   .node_name = "vpath-input-ip4",
745   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
746 };
747
748 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
749 {
750   .arc_name = "ip4-unicast",
751   .node_name = "ip4-vxlan-bypass",
752   .runs_before = VNET_FEATURES ("ip4-lookup"),
753 };
754
755 VNET_FEATURE_INIT (ip4_not_enabled, static) =
756 {
757   .arc_name = "ip4-unicast",
758   .node_name = "ip4-not-enabled",
759   .runs_before = VNET_FEATURES ("ip4-lookup"),
760 };
761
762 VNET_FEATURE_INIT (ip4_lookup, static) =
763 {
764   .arc_name = "ip4-unicast",
765   .node_name = "ip4-lookup",
766   .runs_before = 0,     /* not before any other features */
767 };
768
769 /* Built-in ip4 multicast rx feature path definition */
770 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
771 {
772   .arc_name = "ip4-multicast",
773   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
774   .last_in_arc = "ip4-mfib-forward-lookup",
775   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
776 };
777
778 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
779 {
780   .arc_name = "ip4-multicast",
781   .node_name = "vpath-input-ip4",
782   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
783 };
784
785 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
786 {
787   .arc_name = "ip4-multicast",
788   .node_name = "ip4-not-enabled",
789   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
790 };
791
792 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
793 {
794   .arc_name = "ip4-multicast",
795   .node_name = "ip4-mfib-forward-lookup",
796   .runs_before = 0,     /* last feature */
797 };
798
799 /* Source and port-range check ip4 tx feature path definition */
800 VNET_FEATURE_ARC_INIT (ip4_output, static) =
801 {
802   .arc_name = "ip4-output",
803   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
804   .last_in_arc = "interface-output",
805   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
806 };
807
808 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
809 {
810   .arc_name = "ip4-output",
811   .node_name = "ip4-source-and-port-range-check-tx",
812   .runs_before = VNET_FEATURES ("ip4-outacl"),
813 };
814
815 VNET_FEATURE_INIT (ip4_outacl, static) =
816 {
817   .arc_name = "ip4-output",
818   .node_name = "ip4-outacl",
819   .runs_before = VNET_FEATURES ("ipsec4-output-feature"),
820 };
821
822 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
823 {
824   .arc_name = "ip4-output",
825   .node_name = "ipsec4-output-feature",
826   .runs_before = VNET_FEATURES ("interface-output"),
827 };
828
829 /* Built-in ip4 tx feature path definition */
830 VNET_FEATURE_INIT (ip4_interface_output, static) =
831 {
832   .arc_name = "ip4-output",
833   .node_name = "interface-output",
834   .runs_before = 0,     /* not before any other features */
835 };
836 /* *INDENT-ON* */
837
838 static clib_error_t *
839 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
840 {
841   ip4_main_t *im = &ip4_main;
842
843   /* Fill in lookup tables with default table (0). */
844   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
845   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
846
847   if (!is_add)
848     {
849       ip4_main_t *im4 = &ip4_main;
850       ip_lookup_main_t *lm4 = &im4->lookup_main;
851       ip_interface_address_t *ia = 0;
852       ip4_address_t *address;
853       vlib_main_t *vm = vlib_get_main ();
854
855       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
856       /* *INDENT-OFF* */
857       foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
858       ({
859         address = ip_interface_address_get_address (lm4, ia);
860         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
861       }));
862       /* *INDENT-ON* */
863     }
864
865   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
866                                is_add, 0, 0);
867
868   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
869                                sw_if_index, is_add, 0, 0);
870
871   return /* no error */ 0;
872 }
873
874 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
875
876 /* Global IP4 main. */
877 #ifndef CLIB_MARCH_VARIANT
878 ip4_main_t ip4_main;
879 #endif /* CLIB_MARCH_VARIANT */
880
881 static clib_error_t *
882 ip4_lookup_init (vlib_main_t * vm)
883 {
884   ip4_main_t *im = &ip4_main;
885   clib_error_t *error;
886   uword i;
887
888   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
889     return error;
890   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
891     return (error);
892   if ((error = vlib_call_init_function (vm, fib_module_init)))
893     return error;
894   if ((error = vlib_call_init_function (vm, mfib_module_init)))
895     return error;
896
897   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
898     {
899       u32 m;
900
901       if (i < 32)
902         m = pow2_mask (i) << (32 - i);
903       else
904         m = ~0;
905       im->fib_masks[i] = clib_host_to_net_u32 (m);
906     }
907
908   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
909
910   /* Create FIB with index 0 and table id of 0. */
911   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
912                                      FIB_SOURCE_DEFAULT_ROUTE);
913   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
914                                       MFIB_SOURCE_DEFAULT_ROUTE);
915
916   {
917     pg_node_t *pn;
918     pn = pg_get_node (ip4_lookup_node.index);
919     pn->unformat_edit = unformat_pg_ip4_header;
920   }
921
922   {
923     ethernet_arp_header_t h;
924
925     clib_memset (&h, 0, sizeof (h));
926
927 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
928 #define _8(f,v) h.f = v;
929     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
930     _16 (l3_type, ETHERNET_TYPE_IP4);
931     _8 (n_l2_address_bytes, 6);
932     _8 (n_l3_address_bytes, 4);
933     _16 (opcode, ETHERNET_ARP_OPCODE_request);
934 #undef _16
935 #undef _8
936
937     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
938                                /* data */ &h,
939                                sizeof (h),
940                                /* alloc chunk size */ 8,
941                                "ip4 arp");
942   }
943
944   return error;
945 }
946
947 VLIB_INIT_FUNCTION (ip4_lookup_init);
948
949 typedef struct
950 {
951   /* Adjacency taken. */
952   u32 dpo_index;
953   u32 flow_hash;
954   u32 fib_index;
955
956   /* Packet data, possibly *after* rewrite. */
957   u8 packet_data[64 - 1 * sizeof (u32)];
958 }
959 ip4_forward_next_trace_t;
960
961 #ifndef CLIB_MARCH_VARIANT
962 u8 *
963 format_ip4_forward_next_trace (u8 * s, va_list * args)
964 {
965   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
966   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
967   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
968   u32 indent = format_get_indent (s);
969   s = format (s, "%U%U",
970               format_white_space, indent,
971               format_ip4_header, t->packet_data, sizeof (t->packet_data));
972   return s;
973 }
974 #endif
975
976 static u8 *
977 format_ip4_lookup_trace (u8 * s, va_list * args)
978 {
979   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
980   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
981   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
982   u32 indent = format_get_indent (s);
983
984   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
985               t->fib_index, t->dpo_index, t->flow_hash);
986   s = format (s, "\n%U%U",
987               format_white_space, indent,
988               format_ip4_header, t->packet_data, sizeof (t->packet_data));
989   return s;
990 }
991
992 static u8 *
993 format_ip4_rewrite_trace (u8 * s, va_list * args)
994 {
995   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
996   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
997   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
998   u32 indent = format_get_indent (s);
999
1000   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1001               t->fib_index, t->dpo_index, format_ip_adjacency,
1002               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1003   s = format (s, "\n%U%U",
1004               format_white_space, indent,
1005               format_ip_adjacency_packet_data,
1006               t->dpo_index, t->packet_data, sizeof (t->packet_data));
1007   return s;
1008 }
1009
1010 #ifndef CLIB_MARCH_VARIANT
1011 /* Common trace function for all ip4-forward next nodes. */
1012 void
1013 ip4_forward_next_trace (vlib_main_t * vm,
1014                         vlib_node_runtime_t * node,
1015                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1016 {
1017   u32 *from, n_left;
1018   ip4_main_t *im = &ip4_main;
1019
1020   n_left = frame->n_vectors;
1021   from = vlib_frame_vector_args (frame);
1022
1023   while (n_left >= 4)
1024     {
1025       u32 bi0, bi1;
1026       vlib_buffer_t *b0, *b1;
1027       ip4_forward_next_trace_t *t0, *t1;
1028
1029       /* Prefetch next iteration. */
1030       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1031       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1032
1033       bi0 = from[0];
1034       bi1 = from[1];
1035
1036       b0 = vlib_get_buffer (vm, bi0);
1037       b1 = vlib_get_buffer (vm, bi1);
1038
1039       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1040         {
1041           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1042           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1043           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1044           t0->fib_index =
1045             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1046              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1047             vec_elt (im->fib_index_by_sw_if_index,
1048                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1049
1050           clib_memcpy_fast (t0->packet_data,
1051                             vlib_buffer_get_current (b0),
1052                             sizeof (t0->packet_data));
1053         }
1054       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1055         {
1056           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1057           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1058           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1059           t1->fib_index =
1060             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1061              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1062             vec_elt (im->fib_index_by_sw_if_index,
1063                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1064           clib_memcpy_fast (t1->packet_data, vlib_buffer_get_current (b1),
1065                             sizeof (t1->packet_data));
1066         }
1067       from += 2;
1068       n_left -= 2;
1069     }
1070
1071   while (n_left >= 1)
1072     {
1073       u32 bi0;
1074       vlib_buffer_t *b0;
1075       ip4_forward_next_trace_t *t0;
1076
1077       bi0 = from[0];
1078
1079       b0 = vlib_get_buffer (vm, bi0);
1080
1081       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1082         {
1083           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1084           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1085           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1086           t0->fib_index =
1087             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1088              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1089             vec_elt (im->fib_index_by_sw_if_index,
1090                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1091           clib_memcpy_fast (t0->packet_data, vlib_buffer_get_current (b0),
1092                             sizeof (t0->packet_data));
1093         }
1094       from += 1;
1095       n_left -= 1;
1096     }
1097 }
1098
1099 /* Compute TCP/UDP/ICMP4 checksum in software. */
1100 u16
1101 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1102                               ip4_header_t * ip0)
1103 {
1104   ip_csum_t sum0;
1105   u32 ip_header_length, payload_length_host_byte_order;
1106   u32 n_this_buffer, n_bytes_left, n_ip_bytes_this_buffer;
1107   u16 sum16;
1108   void *data_this_buffer;
1109
1110   /* Initialize checksum with ip header. */
1111   ip_header_length = ip4_header_bytes (ip0);
1112   payload_length_host_byte_order =
1113     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1114   sum0 =
1115     clib_host_to_net_u32 (payload_length_host_byte_order +
1116                           (ip0->protocol << 16));
1117
1118   if (BITS (uword) == 32)
1119     {
1120       sum0 =
1121         ip_csum_with_carry (sum0,
1122                             clib_mem_unaligned (&ip0->src_address, u32));
1123       sum0 =
1124         ip_csum_with_carry (sum0,
1125                             clib_mem_unaligned (&ip0->dst_address, u32));
1126     }
1127   else
1128     sum0 =
1129       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1130
1131   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1132   data_this_buffer = (void *) ip0 + ip_header_length;
1133   n_ip_bytes_this_buffer =
1134     p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data);
1135   if (n_this_buffer + ip_header_length > n_ip_bytes_this_buffer)
1136     {
1137       n_this_buffer = n_ip_bytes_this_buffer > ip_header_length ?
1138         n_ip_bytes_this_buffer - ip_header_length : 0;
1139     }
1140   while (1)
1141     {
1142       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1143       n_bytes_left -= n_this_buffer;
1144       if (n_bytes_left == 0)
1145         break;
1146
1147       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1148       p0 = vlib_get_buffer (vm, p0->next_buffer);
1149       data_this_buffer = vlib_buffer_get_current (p0);
1150       n_this_buffer = clib_min (p0->current_length, n_bytes_left);
1151     }
1152
1153   sum16 = ~ip_csum_fold (sum0);
1154
1155   return sum16;
1156 }
1157
1158 u32
1159 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1160 {
1161   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1162   udp_header_t *udp0;
1163   u16 sum16;
1164
1165   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1166           || ip0->protocol == IP_PROTOCOL_UDP);
1167
1168   udp0 = (void *) (ip0 + 1);
1169   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1170     {
1171       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1172                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1173       return p0->flags;
1174     }
1175
1176   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1177
1178   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1179                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1180
1181   return p0->flags;
1182 }
1183 #endif
1184
1185 /* *INDENT-OFF* */
1186 VNET_FEATURE_ARC_INIT (ip4_local) =
1187 {
1188   .arc_name  = "ip4-local",
1189   .start_nodes = VNET_FEATURES ("ip4-local"),
1190   .last_in_arc = "ip4-local-end-of-arc",
1191 };
1192 /* *INDENT-ON* */
1193
1194 static inline void
1195 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1196                             ip4_header_t * ip, u8 is_udp, u8 * error,
1197                             u8 * good_tcp_udp)
1198 {
1199   u32 flags0;
1200   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1201   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1202   if (is_udp)
1203     {
1204       udp_header_t *udp;
1205       u32 ip_len, udp_len;
1206       i32 len_diff;
1207       udp = ip4_next_header (ip);
1208       /* Verify UDP length. */
1209       ip_len = clib_net_to_host_u16 (ip->length);
1210       udp_len = clib_net_to_host_u16 (udp->length);
1211
1212       len_diff = ip_len - udp_len;
1213       *good_tcp_udp &= len_diff >= 0;
1214       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1215     }
1216 }
1217
1218 #define ip4_local_csum_is_offloaded(_b)                                 \
1219     _b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM                         \
1220         || _b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM
1221
1222 #define ip4_local_need_csum_check(is_tcp_udp, _b)                       \
1223     (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED     \
1224         || ip4_local_csum_is_offloaded (_b)))
1225
1226 #define ip4_local_csum_is_valid(_b)                                     \
1227     (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT                      \
1228         || (ip4_local_csum_is_offloaded (_b))) != 0
1229
1230 static inline void
1231 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1232                          ip4_header_t * ih, u8 * error)
1233 {
1234   u8 is_udp, is_tcp_udp, good_tcp_udp;
1235
1236   is_udp = ih->protocol == IP_PROTOCOL_UDP;
1237   is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1238
1239   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1240     ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1241   else
1242     good_tcp_udp = ip4_local_csum_is_valid (b);
1243
1244   ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1245   *error = (is_tcp_udp && !good_tcp_udp
1246             ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1247 }
1248
1249 static inline void
1250 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1251                             ip4_header_t ** ih, u8 * error)
1252 {
1253   u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1254
1255   is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1256   is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1257
1258   is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1259   is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1260
1261   good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1262   good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1263
1264   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1265                      || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1266     {
1267       if (is_tcp_udp[0])
1268         ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1269                                     &good_tcp_udp[0]);
1270       if (is_tcp_udp[1])
1271         ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1272                                     &good_tcp_udp[1]);
1273     }
1274
1275   error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1276               IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1277   error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1278               IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1279 }
1280
1281 static inline void
1282 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1283                               vlib_buffer_t * b, u16 * next, u8 error,
1284                               u8 head_of_feature_arc)
1285 {
1286   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1287   u32 next_index;
1288
1289   *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1290   b->error = error ? error_node->errors[error] : 0;
1291   if (head_of_feature_arc)
1292     {
1293       next_index = *next;
1294       if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1295         {
1296           vnet_feature_arc_start (arc_index,
1297                                   vnet_buffer (b)->sw_if_index[VLIB_RX],
1298                                   &next_index, b);
1299           *next = next_index;
1300         }
1301     }
1302 }
1303
1304 typedef struct
1305 {
1306   ip4_address_t src;
1307   u32 lbi;
1308   u8 error;
1309   u8 first;
1310 } ip4_local_last_check_t;
1311
1312 static inline void
1313 ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
1314                      ip4_local_last_check_t * last_check, u8 * error0)
1315 {
1316   ip4_fib_mtrie_leaf_t leaf0;
1317   ip4_fib_mtrie_t *mtrie0;
1318   const dpo_id_t *dpo0;
1319   load_balance_t *lb0;
1320   u32 lbi0;
1321
1322   vnet_buffer (b)->ip.fib_index =
1323     vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1324     vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1325
1326   if (PREDICT_FALSE (last_check->first ||
1327                      (last_check->src.as_u32 != ip0->src_address.as_u32)))
1328     {
1329       mtrie0 = &ip4_fib_get (vnet_buffer (b)->ip.fib_index)->mtrie;
1330       leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1331       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1332       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1333       lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1334
1335       vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1336       vnet_buffer (b)->ip.adj_index[VLIB_RX] = lbi0;
1337
1338       lb0 = load_balance_get (lbi0);
1339       dpo0 = load_balance_get_bucket_i (lb0, 0);
1340
1341       /*
1342        * Must have a route to source otherwise we drop the packet.
1343        * ip4 broadcasts are accepted, e.g. to make dhcp client work
1344        *
1345        * The checks are:
1346        *  - the source is a recieve => it's from us => bogus, do this
1347        *    first since it sets a different error code.
1348        *  - uRPF check for any route to source - accept if passes.
1349        *  - allow packets destined to the broadcast address from unknown sources
1350        */
1351
1352       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1353                   && dpo0->dpoi_type == DPO_RECEIVE) ?
1354                  IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1355       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1356                   && !fib_urpf_check_size (lb0->lb_urpf)
1357                   && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1358                  IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1359
1360       last_check->src.as_u32 = ip0->src_address.as_u32;
1361       last_check->lbi = lbi0;
1362       last_check->error = *error0;
1363     }
1364   else
1365     {
1366       vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1367       vnet_buffer (b)->ip.adj_index[VLIB_RX] = last_check->lbi;
1368       *error0 = last_check->error;
1369       last_check->first = 0;
1370     }
1371 }
1372
1373 static inline void
1374 ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
1375                         ip4_local_last_check_t * last_check, u8 * error)
1376 {
1377   ip4_fib_mtrie_leaf_t leaf[2];
1378   ip4_fib_mtrie_t *mtrie[2];
1379   const dpo_id_t *dpo[2];
1380   load_balance_t *lb[2];
1381   u32 not_last_hit;
1382   u32 lbi[2];
1383
1384   not_last_hit = last_check->first;
1385   not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1386   not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1387
1388   vnet_buffer (b[0])->ip.fib_index =
1389     vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1390     vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1391     vnet_buffer (b[0])->ip.fib_index;
1392
1393   vnet_buffer (b[1])->ip.fib_index =
1394     vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1395     vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1396     vnet_buffer (b[1])->ip.fib_index;
1397
1398   if (PREDICT_FALSE (not_last_hit))
1399     {
1400       mtrie[0] = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie;
1401       mtrie[1] = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie;
1402
1403       leaf[0] = ip4_fib_mtrie_lookup_step_one (mtrie[0], &ip[0]->src_address);
1404       leaf[1] = ip4_fib_mtrie_lookup_step_one (mtrie[1], &ip[1]->src_address);
1405
1406       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1407                                            &ip[0]->src_address, 2);
1408       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1409                                            &ip[1]->src_address, 2);
1410
1411       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1412                                            &ip[0]->src_address, 3);
1413       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1414                                            &ip[1]->src_address, 3);
1415
1416       lbi[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf[0]);
1417       lbi[1] = ip4_fib_mtrie_leaf_get_adj_index (leaf[1]);
1418
1419       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1420       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] = lbi[0];
1421
1422       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1423       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] = lbi[1];
1424
1425       lb[0] = load_balance_get (lbi[0]);
1426       lb[1] = load_balance_get (lbi[1]);
1427
1428       dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1429       dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1430
1431       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1432                    dpo[0]->dpoi_type == DPO_RECEIVE) ?
1433                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1434       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1435                    !fib_urpf_check_size (lb[0]->lb_urpf) &&
1436                    ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1437                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1438
1439       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1440                    dpo[1]->dpoi_type == DPO_RECEIVE) ?
1441                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1442       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1443                    !fib_urpf_check_size (lb[1]->lb_urpf) &&
1444                    ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1445                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1446
1447       last_check->src.as_u32 = ip[1]->src_address.as_u32;
1448       last_check->lbi = lbi[1];
1449       last_check->error = error[1];
1450     }
1451   else
1452     {
1453       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1454       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] = last_check->lbi;
1455
1456       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1457       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] = last_check->lbi;
1458
1459       error[0] = last_check->error;
1460       error[1] = last_check->error;
1461       last_check->first = 0;
1462     }
1463 }
1464
1465 enum ip_local_packet_type_e
1466 {
1467   IP_LOCAL_PACKET_TYPE_L4,
1468   IP_LOCAL_PACKET_TYPE_NAT,
1469   IP_LOCAL_PACKET_TYPE_FRAG,
1470 };
1471
1472 /**
1473  * Determine packet type and next node.
1474  *
1475  * The expectation is that all packets that are not L4 will skip
1476  * checksums and source checks.
1477  */
1478 always_inline u8
1479 ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
1480 {
1481   ip_lookup_main_t *lm = &ip4_main.lookup_main;
1482
1483   if (PREDICT_FALSE (ip4_is_fragment (ip)))
1484     {
1485       *next = IP_LOCAL_NEXT_REASSEMBLY;
1486       return IP_LOCAL_PACKET_TYPE_FRAG;
1487     }
1488   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_IS_NATED))
1489     {
1490       *next = lm->local_next_by_ip_protocol[ip->protocol];
1491       return IP_LOCAL_PACKET_TYPE_NAT;
1492     }
1493
1494   *next = lm->local_next_by_ip_protocol[ip->protocol];
1495   return IP_LOCAL_PACKET_TYPE_L4;
1496 }
1497
1498 static inline uword
1499 ip4_local_inline (vlib_main_t * vm,
1500                   vlib_node_runtime_t * node,
1501                   vlib_frame_t * frame, int head_of_feature_arc)
1502 {
1503   u32 *from, n_left_from;
1504   vlib_node_runtime_t *error_node =
1505     vlib_node_get_runtime (vm, ip4_input_node.index);
1506   u16 nexts[VLIB_FRAME_SIZE], *next;
1507   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1508   ip4_header_t *ip[2];
1509   u8 error[2], pt[2];
1510
1511   ip4_local_last_check_t last_check = {
1512     /*
1513      * 0.0.0.0 can appear as the source address of an IP packet,
1514      * as can any other address, hence the need to use the 'first'
1515      * member to make sure the .lbi is initialised for the first
1516      * packet.
1517      */
1518     .src = {.as_u32 = 0},
1519     .lbi = ~0,
1520     .error = IP4_ERROR_UNKNOWN_PROTOCOL,
1521     .first = 1,
1522   };
1523
1524   from = vlib_frame_vector_args (frame);
1525   n_left_from = frame->n_vectors;
1526
1527   if (node->flags & VLIB_NODE_FLAG_TRACE)
1528     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1529
1530   vlib_get_buffers (vm, from, bufs, n_left_from);
1531   b = bufs;
1532   next = nexts;
1533
1534   while (n_left_from >= 6)
1535     {
1536       u8 not_batch = 0;
1537
1538       /* Prefetch next iteration. */
1539       {
1540         vlib_prefetch_buffer_header (b[4], LOAD);
1541         vlib_prefetch_buffer_header (b[5], LOAD);
1542
1543         CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1544         CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1545       }
1546
1547       error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1548
1549       ip[0] = vlib_buffer_get_current (b[0]);
1550       ip[1] = vlib_buffer_get_current (b[1]);
1551
1552       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1553       vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1554
1555       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1556       pt[1] = ip4_local_classify (b[1], ip[1], &next[1]);
1557
1558       not_batch = pt[0] ^ pt[1];
1559
1560       if (head_of_feature_arc == 0 || (pt[0] && not_batch == 0))
1561         goto skip_checks;
1562
1563       if (PREDICT_TRUE (not_batch == 0))
1564         {
1565           ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1566           ip4_local_check_src_x2 (b, ip, &last_check, error);
1567         }
1568       else
1569         {
1570           if (!pt[0])
1571             {
1572               ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1573               ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1574             }
1575           if (!pt[1])
1576             {
1577               ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1578               ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
1579             }
1580         }
1581
1582     skip_checks:
1583
1584       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1585                                     head_of_feature_arc);
1586       ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1587                                     head_of_feature_arc);
1588
1589       b += 2;
1590       next += 2;
1591       n_left_from -= 2;
1592     }
1593
1594   while (n_left_from > 0)
1595     {
1596       error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1597
1598       ip[0] = vlib_buffer_get_current (b[0]);
1599       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1600       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1601
1602       if (head_of_feature_arc == 0 || pt[0])
1603         goto skip_check;
1604
1605       ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1606       ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1607
1608     skip_check:
1609
1610       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1611                                     head_of_feature_arc);
1612
1613       b += 1;
1614       next += 1;
1615       n_left_from -= 1;
1616     }
1617
1618   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1619   return frame->n_vectors;
1620 }
1621
1622 VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1623                                vlib_frame_t * frame)
1624 {
1625   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1626 }
1627
1628 /* *INDENT-OFF* */
1629 VLIB_REGISTER_NODE (ip4_local_node) =
1630 {
1631   .name = "ip4-local",
1632   .vector_size = sizeof (u32),
1633   .format_trace = format_ip4_forward_next_trace,
1634   .n_next_nodes = IP_LOCAL_N_NEXT,
1635   .next_nodes =
1636   {
1637     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1638     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1639     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1640     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1641     [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-reassembly",
1642   },
1643 };
1644 /* *INDENT-ON* */
1645
1646
1647 VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
1648                                           vlib_node_runtime_t * node,
1649                                           vlib_frame_t * frame)
1650 {
1651   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1652 }
1653
1654 /* *INDENT-OFF* */
1655 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
1656   .name = "ip4-local-end-of-arc",
1657   .vector_size = sizeof (u32),
1658
1659   .format_trace = format_ip4_forward_next_trace,
1660   .sibling_of = "ip4-local",
1661 };
1662
1663 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1664   .arc_name = "ip4-local",
1665   .node_name = "ip4-local-end-of-arc",
1666   .runs_before = 0, /* not before any other features */
1667 };
1668 /* *INDENT-ON* */
1669
1670 #ifndef CLIB_MARCH_VARIANT
1671 void
1672 ip4_register_protocol (u32 protocol, u32 node_index)
1673 {
1674   vlib_main_t *vm = vlib_get_main ();
1675   ip4_main_t *im = &ip4_main;
1676   ip_lookup_main_t *lm = &im->lookup_main;
1677
1678   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1679   lm->local_next_by_ip_protocol[protocol] =
1680     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1681 }
1682
1683 void
1684 ip4_unregister_protocol (u32 protocol)
1685 {
1686   ip4_main_t *im = &ip4_main;
1687   ip_lookup_main_t *lm = &im->lookup_main;
1688
1689   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1690   lm->local_next_by_ip_protocol[protocol] = IP_LOCAL_NEXT_PUNT;
1691 }
1692 #endif
1693
1694 static clib_error_t *
1695 show_ip_local_command_fn (vlib_main_t * vm,
1696                           unformat_input_t * input, vlib_cli_command_t * cmd)
1697 {
1698   ip4_main_t *im = &ip4_main;
1699   ip_lookup_main_t *lm = &im->lookup_main;
1700   int i;
1701
1702   vlib_cli_output (vm, "Protocols handled by ip4_local");
1703   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1704     {
1705       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1706         {
1707           u32 node_index = vlib_get_node (vm,
1708                                           ip4_local_node.index)->
1709             next_nodes[lm->local_next_by_ip_protocol[i]];
1710           vlib_cli_output (vm, "%U: %U", format_ip_protocol, i,
1711                            format_vlib_node_name, vm, node_index);
1712         }
1713     }
1714   return 0;
1715 }
1716
1717
1718
1719 /*?
1720  * Display the set of protocols handled by the local IPv4 stack.
1721  *
1722  * @cliexpar
1723  * Example of how to display local protocol table:
1724  * @cliexstart{show ip local}
1725  * Protocols handled by ip4_local
1726  * 1
1727  * 17
1728  * 47
1729  * @cliexend
1730 ?*/
1731 /* *INDENT-OFF* */
1732 VLIB_CLI_COMMAND (show_ip_local, static) =
1733 {
1734   .path = "show ip local",
1735   .function = show_ip_local_command_fn,
1736   .short_help = "show ip local",
1737 };
1738 /* *INDENT-ON* */
1739
1740 always_inline uword
1741 ip4_arp_inline (vlib_main_t * vm,
1742                 vlib_node_runtime_t * node,
1743                 vlib_frame_t * frame, int is_glean)
1744 {
1745   vnet_main_t *vnm = vnet_get_main ();
1746   ip4_main_t *im = &ip4_main;
1747   ip_lookup_main_t *lm = &im->lookup_main;
1748   u32 *from, *to_next_drop;
1749   uword n_left_from, n_left_to_next_drop, next_index;
1750   u32 thread_index = vm->thread_index;
1751   u64 seed;
1752
1753   if (node->flags & VLIB_NODE_FLAG_TRACE)
1754     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1755
1756   seed = throttle_seed (&im->arp_throttle, thread_index, vlib_time_now (vm));
1757
1758   from = vlib_frame_vector_args (frame);
1759   n_left_from = frame->n_vectors;
1760   next_index = node->cached_next_index;
1761   if (next_index == IP4_ARP_NEXT_DROP)
1762     next_index = IP4_ARP_N_NEXT;        /* point to first interface */
1763
1764   while (n_left_from > 0)
1765     {
1766       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1767                            to_next_drop, n_left_to_next_drop);
1768
1769       while (n_left_from > 0 && n_left_to_next_drop > 0)
1770         {
1771           u32 pi0, bi0, adj_index0, sw_if_index0;
1772           ip_adjacency_t *adj0;
1773           vlib_buffer_t *p0, *b0;
1774           ip4_address_t resolve0;
1775           ethernet_arp_header_t *h0;
1776           vnet_hw_interface_t *hw_if0;
1777           u64 r0;
1778
1779           pi0 = from[0];
1780           p0 = vlib_get_buffer (vm, pi0);
1781
1782           from += 1;
1783           n_left_from -= 1;
1784           to_next_drop[0] = pi0;
1785           to_next_drop += 1;
1786           n_left_to_next_drop -= 1;
1787
1788           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1789           adj0 = adj_get (adj_index0);
1790
1791           if (is_glean)
1792             {
1793               /* resolve the packet's destination */
1794               ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1795               resolve0 = ip0->dst_address;
1796             }
1797           else
1798             {
1799               /* resolve the incomplete adj */
1800               resolve0 = adj0->sub_type.nbr.next_hop.ip4;
1801             }
1802
1803           /* combine the address and interface for the hash key */
1804           sw_if_index0 = adj0->rewrite_header.sw_if_index;
1805           r0 = (u64) resolve0.data_u32 << 32;
1806           r0 |= sw_if_index0;
1807
1808           if (throttle_check (&im->arp_throttle, thread_index, r0, seed))
1809             {
1810               p0->error = node->errors[IP4_ARP_ERROR_THROTTLED];
1811               continue;
1812             }
1813
1814           /*
1815            * the adj has been updated to a rewrite but the node the DPO that got
1816            * us here hasn't - yet. no big deal. we'll drop while we wait.
1817            */
1818           if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
1819             {
1820               p0->error = node->errors[IP4_ARP_ERROR_RESOLVED];
1821               continue;
1822             }
1823
1824           /*
1825            * Can happen if the control-plane is programming tables
1826            * with traffic flowing; at least that's today's lame excuse.
1827            */
1828           if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
1829               || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
1830             {
1831               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
1832               continue;
1833             }
1834           /* Send ARP request. */
1835           h0 =
1836             vlib_packet_template_get_packet (vm,
1837                                              &im->ip4_arp_request_packet_template,
1838                                              &bi0);
1839           /* Seems we're out of buffers */
1840           if (PREDICT_FALSE (!h0))
1841             {
1842               p0->error = node->errors[IP4_ARP_ERROR_NO_BUFFERS];
1843               continue;
1844             }
1845
1846           b0 = vlib_get_buffer (vm, bi0);
1847
1848           /* copy the persistent fields from the original */
1849           clib_memcpy_fast (b0->opaque2, p0->opaque2, sizeof (p0->opaque2));
1850
1851           /* Add rewrite/encap string for ARP packet. */
1852           vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
1853
1854           hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1855
1856           /* Src ethernet address in ARP header. */
1857           mac_address_from_bytes (&h0->ip4_over_ethernet[0].mac,
1858                                   hw_if0->hw_address);
1859           if (is_glean)
1860             {
1861               /* The interface's source address is stashed in the Glean Adj */
1862               h0->ip4_over_ethernet[0].ip4 =
1863                 adj0->sub_type.glean.receive_addr.ip4;
1864             }
1865           else
1866             {
1867               /* Src IP address in ARP header. */
1868               if (ip4_src_address_for_packet (lm, sw_if_index0,
1869                                               &h0->ip4_over_ethernet[0].ip4))
1870                 {
1871                   /* No source address available */
1872                   p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
1873                   vlib_buffer_free (vm, &bi0, 1);
1874                   continue;
1875                 }
1876             }
1877           h0->ip4_over_ethernet[1].ip4 = resolve0;
1878
1879           p0->error = node->errors[IP4_ARP_ERROR_REQUEST_SENT];
1880
1881           vlib_buffer_copy_trace_flag (vm, p0, bi0);
1882           VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
1883           vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
1884
1885           vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
1886
1887           vlib_set_next_frame_buffer (vm, node,
1888                                       adj0->rewrite_header.next_index, bi0);
1889         }
1890
1891       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
1892     }
1893
1894   return frame->n_vectors;
1895 }
1896
1897 VLIB_NODE_FN (ip4_arp_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1898                              vlib_frame_t * frame)
1899 {
1900   return (ip4_arp_inline (vm, node, frame, 0));
1901 }
1902
1903 VLIB_NODE_FN (ip4_glean_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1904                                vlib_frame_t * frame)
1905 {
1906   return (ip4_arp_inline (vm, node, frame, 1));
1907 }
1908
1909 static char *ip4_arp_error_strings[] = {
1910   [IP4_ARP_ERROR_THROTTLED] = "ARP requests throttled",
1911   [IP4_ARP_ERROR_RESOLVED] = "ARP requests resolved",
1912   [IP4_ARP_ERROR_NO_BUFFERS] = "ARP requests out of buffer",
1913   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
1914   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
1915   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
1916 };
1917
1918 /* *INDENT-OFF* */
1919 VLIB_REGISTER_NODE (ip4_arp_node) =
1920 {
1921   .name = "ip4-arp",
1922   .vector_size = sizeof (u32),
1923   .format_trace = format_ip4_forward_next_trace,
1924   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1925   .error_strings = ip4_arp_error_strings,
1926   .n_next_nodes = IP4_ARP_N_NEXT,
1927   .next_nodes =
1928   {
1929     [IP4_ARP_NEXT_DROP] = "error-drop",
1930   },
1931 };
1932
1933 VLIB_REGISTER_NODE (ip4_glean_node) =
1934 {
1935   .name = "ip4-glean",
1936   .vector_size = sizeof (u32),
1937   .format_trace = format_ip4_forward_next_trace,
1938   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1939   .error_strings = ip4_arp_error_strings,
1940   .n_next_nodes = IP4_ARP_N_NEXT,
1941   .next_nodes = {
1942   [IP4_ARP_NEXT_DROP] = "error-drop",
1943   },
1944 };
1945 /* *INDENT-ON* */
1946
1947 #define foreach_notrace_ip4_arp_error           \
1948 _(THROTTLED)                                    \
1949 _(RESOLVED)                                     \
1950 _(NO_BUFFERS)                                   \
1951 _(REQUEST_SENT)                                 \
1952 _(NON_ARP_ADJ)                                  \
1953 _(NO_SOURCE_ADDRESS)
1954
1955 static clib_error_t *
1956 arp_notrace_init (vlib_main_t * vm)
1957 {
1958   vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
1959
1960   /* don't trace ARP request packets */
1961 #define _(a)                                    \
1962     vnet_pcap_drop_trace_filter_add_del         \
1963         (rt->errors[IP4_ARP_ERROR_##a],         \
1964          1 /* is_add */);
1965   foreach_notrace_ip4_arp_error;
1966 #undef _
1967   return 0;
1968 }
1969
1970 VLIB_INIT_FUNCTION (arp_notrace_init);
1971
1972
1973 #ifndef CLIB_MARCH_VARIANT
1974 /* Send an ARP request to see if given destination is reachable on given interface. */
1975 clib_error_t *
1976 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index,
1977                     u8 refresh)
1978 {
1979   vnet_main_t *vnm = vnet_get_main ();
1980   ip4_main_t *im = &ip4_main;
1981   ethernet_arp_header_t *h;
1982   ip4_address_t *src;
1983   ip_interface_address_t *ia;
1984   ip_adjacency_t *adj;
1985   vnet_hw_interface_t *hi;
1986   vnet_sw_interface_t *si;
1987   vlib_buffer_t *b;
1988   adj_index_t ai;
1989   u32 bi = 0;
1990   u8 unicast_rewrite = 0;
1991
1992   si = vnet_get_sw_interface (vnm, sw_if_index);
1993
1994   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
1995     {
1996       return clib_error_return (0, "%U: interface %U down",
1997                                 format_ip4_address, dst,
1998                                 format_vnet_sw_if_index_name, vnm,
1999                                 sw_if_index);
2000     }
2001
2002   src =
2003     ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2004   if (!src)
2005     {
2006       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2007       return clib_error_return
2008         (0,
2009          "no matching interface address for destination %U (interface %U)",
2010          format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
2011          sw_if_index);
2012     }
2013
2014   h = vlib_packet_template_get_packet (vm,
2015                                        &im->ip4_arp_request_packet_template,
2016                                        &bi);
2017
2018   if (!h)
2019     return clib_error_return (0, "ARP request packet allocation failed");
2020
2021   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2022   if (PREDICT_FALSE (!hi->hw_address))
2023     {
2024       return clib_error_return (0, "%U: interface %U do not support ip probe",
2025                                 format_ip4_address, dst,
2026                                 format_vnet_sw_if_index_name, vnm,
2027                                 sw_if_index);
2028     }
2029
2030   mac_address_from_bytes (&h->ip4_over_ethernet[0].mac, hi->hw_address);
2031
2032   h->ip4_over_ethernet[0].ip4 = src[0];
2033   h->ip4_over_ethernet[1].ip4 = dst[0];
2034
2035   b = vlib_get_buffer (vm, bi);
2036   vnet_buffer (b)->sw_if_index[VLIB_RX] =
2037     vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2038
2039   ip46_address_t nh = {
2040     .ip4 = *dst,
2041   };
2042
2043   ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4,
2044                             VNET_LINK_IP4, &nh, sw_if_index);
2045   adj = adj_get (ai);
2046
2047   /* Peer has been previously resolved, retrieve glean adj instead */
2048   if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE)
2049     {
2050       if (refresh)
2051         unicast_rewrite = 1;
2052       else
2053         {
2054           adj_unlock (ai);
2055           ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4,
2056                                       VNET_LINK_IP4, sw_if_index, &nh);
2057           adj = adj_get (ai);
2058         }
2059     }
2060
2061   /* Add encapsulation string for software interface (e.g. ethernet header). */
2062   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2063   if (unicast_rewrite)
2064     {
2065       u16 *etype = vlib_buffer_get_current (b) - 2;
2066       etype[0] = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
2067     }
2068   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2069
2070   {
2071     vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2072     u32 *to_next = vlib_frame_vector_args (f);
2073     to_next[0] = bi;
2074     f->n_vectors = 1;
2075     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2076   }
2077
2078   adj_unlock (ai);
2079   return /* no error */ 0;
2080 }
2081 #endif
2082
2083 typedef enum
2084 {
2085   IP4_REWRITE_NEXT_DROP,
2086   IP4_REWRITE_NEXT_ICMP_ERROR,
2087   IP4_REWRITE_NEXT_FRAGMENT,
2088   IP4_REWRITE_N_NEXT            /* Last */
2089 } ip4_rewrite_next_t;
2090
2091 /**
2092  * This bits of an IPv4 address to mask to construct a multicast
2093  * MAC address
2094  */
2095 #if CLIB_ARCH_IS_BIG_ENDIAN
2096 #define IP4_MCAST_ADDR_MASK 0x007fffff
2097 #else
2098 #define IP4_MCAST_ADDR_MASK 0xffff7f00
2099 #endif
2100
2101 always_inline void
2102 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
2103                u16 adj_packet_bytes, bool df, u16 * next, u32 * error)
2104 {
2105   if (packet_len > adj_packet_bytes)
2106     {
2107       *error = IP4_ERROR_MTU_EXCEEDED;
2108       if (df)
2109         {
2110           icmp4_error_set_vnet_buffer
2111             (b, ICMP4_destination_unreachable,
2112              ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
2113              adj_packet_bytes);
2114           *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2115         }
2116       else
2117         {
2118           /* IP fragmentation */
2119           ip_frag_set_vnet_buffer (b, adj_packet_bytes,
2120                                    IP4_FRAG_NEXT_IP4_REWRITE, 0);
2121           *next = IP4_REWRITE_NEXT_FRAGMENT;
2122         }
2123     }
2124 }
2125
2126 /* Decrement TTL & update checksum.
2127    Works either endian, so no need for byte swap. */
2128 static_always_inline void
2129 ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
2130                             u32 * error)
2131 {
2132   i32 ttl;
2133   u32 checksum;
2134   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2135     {
2136       b->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2137       return;
2138     }
2139
2140   ttl = ip->ttl;
2141
2142   /* Input node should have reject packets with ttl 0. */
2143   ASSERT (ip->ttl > 0);
2144
2145   checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
2146   checksum += checksum >= 0xffff;
2147
2148   ip->checksum = checksum;
2149   ttl -= 1;
2150   ip->ttl = ttl;
2151
2152   /*
2153    * If the ttl drops below 1 when forwarding, generate
2154    * an ICMP response.
2155    */
2156   if (PREDICT_FALSE (ttl <= 0))
2157     {
2158       *error = IP4_ERROR_TIME_EXPIRED;
2159       vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2160       icmp4_error_set_vnet_buffer (b, ICMP4_time_exceeded,
2161                                    ICMP4_time_exceeded_ttl_exceeded_in_transit,
2162                                    0);
2163       *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2164     }
2165
2166   /* Verify checksum. */
2167   ASSERT ((ip->checksum == ip4_header_checksum (ip)) ||
2168           (b->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2169 }
2170
2171
2172 always_inline uword
2173 ip4_rewrite_inline_with_gso (vlib_main_t * vm,
2174                              vlib_node_runtime_t * node,
2175                              vlib_frame_t * frame,
2176                              int do_counters, int is_midchain, int is_mcast,
2177                              int do_gso)
2178 {
2179   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2180   u32 *from = vlib_frame_vector_args (frame);
2181   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2182   u16 nexts[VLIB_FRAME_SIZE], *next;
2183   u32 n_left_from;
2184   vlib_node_runtime_t *error_node =
2185     vlib_node_get_runtime (vm, ip4_input_node.index);
2186
2187   n_left_from = frame->n_vectors;
2188   u32 thread_index = vm->thread_index;
2189
2190   vlib_get_buffers (vm, from, bufs, n_left_from);
2191   clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
2192
2193   if (n_left_from >= 6)
2194     {
2195       int i;
2196       for (i = 2; i < 6; i++)
2197         vlib_prefetch_buffer_header (bufs[i], LOAD);
2198     }
2199
2200   next = nexts;
2201   b = bufs;
2202   while (n_left_from >= 8)
2203     {
2204       ip_adjacency_t *adj0, *adj1;
2205       ip4_header_t *ip0, *ip1;
2206       u32 rw_len0, error0, adj_index0;
2207       u32 rw_len1, error1, adj_index1;
2208       u32 tx_sw_if_index0, tx_sw_if_index1;
2209       u8 *p;
2210
2211       vlib_prefetch_buffer_header (b[6], LOAD);
2212       vlib_prefetch_buffer_header (b[7], LOAD);
2213
2214       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2215       adj_index1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
2216
2217       /*
2218        * pre-fetch the per-adjacency counters
2219        */
2220       if (do_counters)
2221         {
2222           vlib_prefetch_combined_counter (&adjacency_counters,
2223                                           thread_index, adj_index0);
2224           vlib_prefetch_combined_counter (&adjacency_counters,
2225                                           thread_index, adj_index1);
2226         }
2227
2228       ip0 = vlib_buffer_get_current (b[0]);
2229       ip1 = vlib_buffer_get_current (b[1]);
2230
2231       error0 = error1 = IP4_ERROR_NONE;
2232
2233       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2234       ip4_ttl_and_checksum_check (b[1], ip1, next + 1, &error1);
2235
2236       /* Rewrite packet header and updates lengths. */
2237       adj0 = adj_get (adj_index0);
2238       adj1 = adj_get (adj_index1);
2239
2240       /* Worth pipelining. No guarantee that adj0,1 are hot... */
2241       rw_len0 = adj0[0].rewrite_header.data_bytes;
2242       rw_len1 = adj1[0].rewrite_header.data_bytes;
2243       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2244       vnet_buffer (b[1])->ip.save_rewrite_length = rw_len1;
2245
2246       p = vlib_buffer_get_current (b[2]);
2247       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2248       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2249
2250       p = vlib_buffer_get_current (b[3]);
2251       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2252       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2253
2254       /* Check MTU of outgoing interface. */
2255       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2256       u16 ip1_len = clib_net_to_host_u16 (ip1->length);
2257
2258       if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO))
2259         ip0_len = gso_mtu_sz (b[0]);
2260       if (do_gso && (b[1]->flags & VNET_BUFFER_F_GSO))
2261         ip1_len = gso_mtu_sz (b[1]);
2262
2263       ip4_mtu_check (b[0], ip0_len,
2264                      adj0[0].rewrite_header.max_l3_packet_bytes,
2265                      ip0->flags_and_fragment_offset &
2266                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2267                      next + 0, &error0);
2268       ip4_mtu_check (b[1], ip1_len,
2269                      adj1[0].rewrite_header.max_l3_packet_bytes,
2270                      ip1->flags_and_fragment_offset &
2271                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2272                      next + 1, &error1);
2273
2274       if (is_mcast)
2275         {
2276           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2277                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2278                     IP4_ERROR_SAME_INTERFACE : error0);
2279           error1 = ((adj1[0].rewrite_header.sw_if_index ==
2280                      vnet_buffer (b[1])->sw_if_index[VLIB_RX]) ?
2281                     IP4_ERROR_SAME_INTERFACE : error1);
2282         }
2283
2284       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2285        * to see the IP header */
2286       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2287         {
2288           u32 next_index = adj0[0].rewrite_header.next_index;
2289           b[0]->current_data -= rw_len0;
2290           b[0]->current_length += rw_len0;
2291           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2292           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2293
2294           if (PREDICT_FALSE
2295               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2296             vnet_feature_arc_start (lm->output_feature_arc_index,
2297                                     tx_sw_if_index0, &next_index, b[0]);
2298           next[0] = next_index;
2299         }
2300       else
2301         {
2302           b[0]->error = error_node->errors[error0];
2303         }
2304       if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2305         {
2306           u32 next_index = adj1[0].rewrite_header.next_index;
2307           b[1]->current_data -= rw_len1;
2308           b[1]->current_length += rw_len1;
2309
2310           tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2311           vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2312
2313           if (PREDICT_FALSE
2314               (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2315             vnet_feature_arc_start (lm->output_feature_arc_index,
2316                                     tx_sw_if_index1, &next_index, b[1]);
2317           next[1] = next_index;
2318         }
2319       else
2320         {
2321           b[1]->error = error_node->errors[error1];
2322         }
2323       if (is_midchain)
2324         {
2325           calc_checksums (vm, b[0]);
2326           calc_checksums (vm, b[1]);
2327         }
2328       /* Guess we are only writing on simple Ethernet header. */
2329       vnet_rewrite_two_headers (adj0[0], adj1[0],
2330                                 ip0, ip1, sizeof (ethernet_header_t));
2331
2332       /*
2333        * Bump the per-adjacency counters
2334        */
2335       if (do_counters)
2336         {
2337           vlib_increment_combined_counter
2338             (&adjacency_counters,
2339              thread_index,
2340              adj_index0, 1, vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2341
2342           vlib_increment_combined_counter
2343             (&adjacency_counters,
2344              thread_index,
2345              adj_index1, 1, vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
2346         }
2347
2348       if (is_midchain)
2349         {
2350           if (adj0->sub_type.midchain.fixup_func)
2351             adj0->sub_type.midchain.fixup_func
2352               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2353           if (adj1->sub_type.midchain.fixup_func)
2354             adj1->sub_type.midchain.fixup_func
2355               (vm, adj1, b[1], adj1->sub_type.midchain.fixup_data);
2356         }
2357
2358       if (is_mcast)
2359         {
2360           /*
2361            * copy bytes from the IP address into the MAC rewrite
2362            */
2363           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2364                                       adj0->rewrite_header.dst_mcast_offset,
2365                                       &ip0->dst_address.as_u32, (u8 *) ip0);
2366           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2367                                       adj1->rewrite_header.dst_mcast_offset,
2368                                       &ip1->dst_address.as_u32, (u8 *) ip1);
2369         }
2370
2371       next += 2;
2372       b += 2;
2373       n_left_from -= 2;
2374     }
2375
2376   while (n_left_from > 0)
2377     {
2378       ip_adjacency_t *adj0;
2379       ip4_header_t *ip0;
2380       u32 rw_len0, adj_index0, error0;
2381       u32 tx_sw_if_index0;
2382
2383       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2384
2385       adj0 = adj_get (adj_index0);
2386
2387       if (do_counters)
2388         vlib_prefetch_combined_counter (&adjacency_counters,
2389                                         thread_index, adj_index0);
2390
2391       ip0 = vlib_buffer_get_current (b[0]);
2392
2393       error0 = IP4_ERROR_NONE;
2394
2395       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2396
2397
2398       /* Update packet buffer attributes/set output interface. */
2399       rw_len0 = adj0[0].rewrite_header.data_bytes;
2400       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2401
2402       /* Check MTU of outgoing interface. */
2403       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2404       if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO))
2405         ip0_len = gso_mtu_sz (b[0]);
2406
2407       ip4_mtu_check (b[0], ip0_len,
2408                      adj0[0].rewrite_header.max_l3_packet_bytes,
2409                      ip0->flags_and_fragment_offset &
2410                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2411                      next + 0, &error0);
2412
2413       if (is_mcast)
2414         {
2415           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2416                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2417                     IP4_ERROR_SAME_INTERFACE : error0);
2418         }
2419
2420       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2421        * to see the IP header */
2422       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2423         {
2424           u32 next_index = adj0[0].rewrite_header.next_index;
2425           b[0]->current_data -= rw_len0;
2426           b[0]->current_length += rw_len0;
2427           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2428           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2429
2430           if (PREDICT_FALSE
2431               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2432             vnet_feature_arc_start (lm->output_feature_arc_index,
2433                                     tx_sw_if_index0, &next_index, b[0]);
2434           next[0] = next_index;
2435         }
2436       else
2437         {
2438           b[0]->error = error_node->errors[error0];
2439         }
2440       if (is_midchain)
2441         {
2442           calc_checksums (vm, b[0]);
2443         }
2444       /* Guess we are only writing on simple Ethernet header. */
2445       vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2446
2447       if (do_counters)
2448         vlib_increment_combined_counter
2449           (&adjacency_counters,
2450            thread_index, adj_index0, 1,
2451            vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2452
2453       if (is_midchain)
2454         {
2455           if (adj0->sub_type.midchain.fixup_func)
2456             adj0->sub_type.midchain.fixup_func
2457               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2458         }
2459
2460       if (is_mcast)
2461         {
2462           /*
2463            * copy bytes from the IP address into the MAC rewrite
2464            */
2465           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2466                                       adj0->rewrite_header.dst_mcast_offset,
2467                                       &ip0->dst_address.as_u32, (u8 *) ip0);
2468         }
2469
2470       next += 1;
2471       b += 1;
2472       n_left_from -= 1;
2473     }
2474
2475
2476   /* Need to do trace after rewrites to pick up new packet data. */
2477   if (node->flags & VLIB_NODE_FLAG_TRACE)
2478     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2479
2480   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2481   return frame->n_vectors;
2482 }
2483
2484 always_inline uword
2485 ip4_rewrite_inline (vlib_main_t * vm,
2486                     vlib_node_runtime_t * node,
2487                     vlib_frame_t * frame,
2488                     int do_counters, int is_midchain, int is_mcast)
2489 {
2490   vnet_main_t *vnm = vnet_get_main ();
2491   if (PREDICT_FALSE (vnm->interface_main.gso_interface_count > 0))
2492     return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2493                                         is_midchain, is_mcast,
2494                                         1 /* do_gso */ );
2495   else
2496     return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2497                                         is_midchain, is_mcast,
2498                                         0 /* no do_gso */ );
2499 }
2500
2501
2502 /** @brief IPv4 rewrite node.
2503     @node ip4-rewrite
2504
2505     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2506     header checksum, fetch the ip adjacency, check the outbound mtu,
2507     apply the adjacency rewrite, and send pkts to the adjacency
2508     rewrite header's rewrite_next_index.
2509
2510     @param vm vlib_main_t corresponding to the current thread
2511     @param node vlib_node_runtime_t
2512     @param frame vlib_frame_t whose contents should be dispatched
2513
2514     @par Graph mechanics: buffer metadata, next index usage
2515
2516     @em Uses:
2517     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2518         - the rewrite adjacency index
2519     - <code>adj->lookup_next_index</code>
2520         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2521           the packet will be dropped.
2522     - <code>adj->rewrite_header</code>
2523         - Rewrite string length, rewrite string, next_index
2524
2525     @em Sets:
2526     - <code>b->current_data, b->current_length</code>
2527         - Updated net of applying the rewrite string
2528
2529     <em>Next Indices:</em>
2530     - <code> adj->rewrite_header.next_index </code>
2531       or @c ip4-drop
2532 */
2533
2534 VLIB_NODE_FN (ip4_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2535                                  vlib_frame_t * frame)
2536 {
2537   if (adj_are_counters_enabled ())
2538     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2539   else
2540     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2541 }
2542
2543 VLIB_NODE_FN (ip4_rewrite_bcast_node) (vlib_main_t * vm,
2544                                        vlib_node_runtime_t * node,
2545                                        vlib_frame_t * frame)
2546 {
2547   if (adj_are_counters_enabled ())
2548     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2549   else
2550     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2551 }
2552
2553 VLIB_NODE_FN (ip4_midchain_node) (vlib_main_t * vm,
2554                                   vlib_node_runtime_t * node,
2555                                   vlib_frame_t * frame)
2556 {
2557   if (adj_are_counters_enabled ())
2558     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2559   else
2560     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2561 }
2562
2563 VLIB_NODE_FN (ip4_rewrite_mcast_node) (vlib_main_t * vm,
2564                                        vlib_node_runtime_t * node,
2565                                        vlib_frame_t * frame)
2566 {
2567   if (adj_are_counters_enabled ())
2568     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2569   else
2570     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2571 }
2572
2573 VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
2574                                         vlib_node_runtime_t * node,
2575                                         vlib_frame_t * frame)
2576 {
2577   if (adj_are_counters_enabled ())
2578     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2579   else
2580     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2581 }
2582
2583 /* *INDENT-OFF* */
2584 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2585   .name = "ip4-rewrite",
2586   .vector_size = sizeof (u32),
2587
2588   .format_trace = format_ip4_rewrite_trace,
2589
2590   .n_next_nodes = IP4_REWRITE_N_NEXT,
2591   .next_nodes = {
2592     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2593     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2594     [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2595   },
2596 };
2597
2598 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2599   .name = "ip4-rewrite-bcast",
2600   .vector_size = sizeof (u32),
2601
2602   .format_trace = format_ip4_rewrite_trace,
2603   .sibling_of = "ip4-rewrite",
2604 };
2605
2606 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2607   .name = "ip4-rewrite-mcast",
2608   .vector_size = sizeof (u32),
2609
2610   .format_trace = format_ip4_rewrite_trace,
2611   .sibling_of = "ip4-rewrite",
2612 };
2613
2614 VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
2615   .name = "ip4-mcast-midchain",
2616   .vector_size = sizeof (u32),
2617
2618   .format_trace = format_ip4_rewrite_trace,
2619   .sibling_of = "ip4-rewrite",
2620 };
2621
2622 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2623   .name = "ip4-midchain",
2624   .vector_size = sizeof (u32),
2625   .format_trace = format_ip4_forward_next_trace,
2626   .sibling_of =  "ip4-rewrite",
2627 };
2628 /* *INDENT-ON */
2629
2630 static int
2631 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2632 {
2633   ip4_fib_mtrie_t *mtrie0;
2634   ip4_fib_mtrie_leaf_t leaf0;
2635   u32 lbi0;
2636
2637   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2638
2639   leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2640   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2641   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2642
2643   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2644
2645   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2646 }
2647
2648 static clib_error_t *
2649 test_lookup_command_fn (vlib_main_t * vm,
2650                         unformat_input_t * input, vlib_cli_command_t * cmd)
2651 {
2652   ip4_fib_t *fib;
2653   u32 table_id = 0;
2654   f64 count = 1;
2655   u32 n;
2656   int i;
2657   ip4_address_t ip4_base_address;
2658   u64 errors = 0;
2659
2660   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2661     {
2662       if (unformat (input, "table %d", &table_id))
2663         {
2664           /* Make sure the entry exists. */
2665           fib = ip4_fib_get (table_id);
2666           if ((fib) && (fib->index != table_id))
2667             return clib_error_return (0, "<fib-index> %d does not exist",
2668                                       table_id);
2669         }
2670       else if (unformat (input, "count %f", &count))
2671         ;
2672
2673       else if (unformat (input, "%U",
2674                          unformat_ip4_address, &ip4_base_address))
2675         ;
2676       else
2677         return clib_error_return (0, "unknown input `%U'",
2678                                   format_unformat_error, input);
2679     }
2680
2681   n = count;
2682
2683   for (i = 0; i < n; i++)
2684     {
2685       if (!ip4_lookup_validate (&ip4_base_address, table_id))
2686         errors++;
2687
2688       ip4_base_address.as_u32 =
2689         clib_host_to_net_u32 (1 +
2690                               clib_net_to_host_u32 (ip4_base_address.as_u32));
2691     }
2692
2693   if (errors)
2694     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2695   else
2696     vlib_cli_output (vm, "No errors in %d lookups\n", n);
2697
2698   return 0;
2699 }
2700
2701 /*?
2702  * Perform a lookup of an IPv4 Address (or range of addresses) in the
2703  * given FIB table to determine if there is a conflict with the
2704  * adjacency table. The fib-id can be determined by using the
2705  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2706  * of 0 is used.
2707  *
2708  * @todo This command uses fib-id, other commands use table-id (not
2709  * just a name, they are different indexes). Would like to change this
2710  * to table-id for consistency.
2711  *
2712  * @cliexpar
2713  * Example of how to run the test lookup command:
2714  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2715  * No errors in 2 lookups
2716  * @cliexend
2717 ?*/
2718 /* *INDENT-OFF* */
2719 VLIB_CLI_COMMAND (lookup_test_command, static) =
2720 {
2721   .path = "test lookup",
2722   .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2723   .function = test_lookup_command_fn,
2724 };
2725 /* *INDENT-ON* */
2726
2727 #ifndef CLIB_MARCH_VARIANT
2728 int
2729 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2730 {
2731   u32 fib_index;
2732
2733   fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
2734
2735   if (~0 == fib_index)
2736     return VNET_API_ERROR_NO_SUCH_FIB;
2737
2738   fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
2739                                   flow_hash_config);
2740
2741   return 0;
2742 }
2743 #endif
2744
2745 static clib_error_t *
2746 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2747                              unformat_input_t * input,
2748                              vlib_cli_command_t * cmd)
2749 {
2750   int matched = 0;
2751   u32 table_id = 0;
2752   u32 flow_hash_config = 0;
2753   int rv;
2754
2755   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2756     {
2757       if (unformat (input, "table %d", &table_id))
2758         matched = 1;
2759 #define _(a,v) \
2760     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2761       foreach_flow_hash_bit
2762 #undef _
2763         else
2764         break;
2765     }
2766
2767   if (matched == 0)
2768     return clib_error_return (0, "unknown input `%U'",
2769                               format_unformat_error, input);
2770
2771   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
2772   switch (rv)
2773     {
2774     case 0:
2775       break;
2776
2777     case VNET_API_ERROR_NO_SUCH_FIB:
2778       return clib_error_return (0, "no such FIB table %d", table_id);
2779
2780     default:
2781       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2782       break;
2783     }
2784
2785   return 0;
2786 }
2787
2788 /*?
2789  * Configure the set of IPv4 fields used by the flow hash.
2790  *
2791  * @cliexpar
2792  * Example of how to set the flow hash on a given table:
2793  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2794  * Example of display the configured flow hash:
2795  * @cliexstart{show ip fib}
2796  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2797  * 0.0.0.0/0
2798  *   unicast-ip4-chain
2799  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2800  *     [0] [@0]: dpo-drop ip6
2801  * 0.0.0.0/32
2802  *   unicast-ip4-chain
2803  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2804  *     [0] [@0]: dpo-drop ip6
2805  * 224.0.0.0/8
2806  *   unicast-ip4-chain
2807  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2808  *     [0] [@0]: dpo-drop ip6
2809  * 6.0.1.2/32
2810  *   unicast-ip4-chain
2811  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2812  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2813  * 7.0.0.1/32
2814  *   unicast-ip4-chain
2815  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
2816  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2817  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2818  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2819  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2820  * 240.0.0.0/8
2821  *   unicast-ip4-chain
2822  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
2823  *     [0] [@0]: dpo-drop ip6
2824  * 255.255.255.255/32
2825  *   unicast-ip4-chain
2826  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
2827  *     [0] [@0]: dpo-drop ip6
2828  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
2829  * 0.0.0.0/0
2830  *   unicast-ip4-chain
2831  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
2832  *     [0] [@0]: dpo-drop ip6
2833  * 0.0.0.0/32
2834  *   unicast-ip4-chain
2835  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
2836  *     [0] [@0]: dpo-drop ip6
2837  * 172.16.1.0/24
2838  *   unicast-ip4-chain
2839  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
2840  *     [0] [@4]: ipv4-glean: af_packet0
2841  * 172.16.1.1/32
2842  *   unicast-ip4-chain
2843  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
2844  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
2845  * 172.16.1.2/32
2846  *   unicast-ip4-chain
2847  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2848  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
2849  * 172.16.2.0/24
2850  *   unicast-ip4-chain
2851  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
2852  *     [0] [@4]: ipv4-glean: af_packet1
2853  * 172.16.2.1/32
2854  *   unicast-ip4-chain
2855  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
2856  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
2857  * 224.0.0.0/8
2858  *   unicast-ip4-chain
2859  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
2860  *     [0] [@0]: dpo-drop ip6
2861  * 240.0.0.0/8
2862  *   unicast-ip4-chain
2863  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
2864  *     [0] [@0]: dpo-drop ip6
2865  * 255.255.255.255/32
2866  *   unicast-ip4-chain
2867  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
2868  *     [0] [@0]: dpo-drop ip6
2869  * @cliexend
2870 ?*/
2871 /* *INDENT-OFF* */
2872 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
2873 {
2874   .path = "set ip flow-hash",
2875   .short_help =
2876   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
2877   .function = set_ip_flow_hash_command_fn,
2878 };
2879 /* *INDENT-ON* */
2880
2881 #ifndef CLIB_MARCH_VARIANT
2882 int
2883 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2884                              u32 table_index)
2885 {
2886   vnet_main_t *vnm = vnet_get_main ();
2887   vnet_interface_main_t *im = &vnm->interface_main;
2888   ip4_main_t *ipm = &ip4_main;
2889   ip_lookup_main_t *lm = &ipm->lookup_main;
2890   vnet_classify_main_t *cm = &vnet_classify_main;
2891   ip4_address_t *if_addr;
2892
2893   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2894     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2895
2896   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2897     return VNET_API_ERROR_NO_SUCH_ENTRY;
2898
2899   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2900   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
2901
2902   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
2903
2904   if (NULL != if_addr)
2905     {
2906       fib_prefix_t pfx = {
2907         .fp_len = 32,
2908         .fp_proto = FIB_PROTOCOL_IP4,
2909         .fp_addr.ip4 = *if_addr,
2910       };
2911       u32 fib_index;
2912
2913       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2914                                                        sw_if_index);
2915
2916
2917       if (table_index != (u32) ~ 0)
2918         {
2919           dpo_id_t dpo = DPO_INVALID;
2920
2921           dpo_set (&dpo,
2922                    DPO_CLASSIFY,
2923                    DPO_PROTO_IP4,
2924                    classify_dpo_create (DPO_PROTO_IP4, table_index));
2925
2926           fib_table_entry_special_dpo_add (fib_index,
2927                                            &pfx,
2928                                            FIB_SOURCE_CLASSIFY,
2929                                            FIB_ENTRY_FLAG_NONE, &dpo);
2930           dpo_reset (&dpo);
2931         }
2932       else
2933         {
2934           fib_table_entry_special_remove (fib_index,
2935                                           &pfx, FIB_SOURCE_CLASSIFY);
2936         }
2937     }
2938
2939   return 0;
2940 }
2941 #endif
2942
2943 static clib_error_t *
2944 set_ip_classify_command_fn (vlib_main_t * vm,
2945                             unformat_input_t * input,
2946                             vlib_cli_command_t * cmd)
2947 {
2948   u32 table_index = ~0;
2949   int table_index_set = 0;
2950   u32 sw_if_index = ~0;
2951   int rv;
2952
2953   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2954     {
2955       if (unformat (input, "table-index %d", &table_index))
2956         table_index_set = 1;
2957       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
2958                          vnet_get_main (), &sw_if_index))
2959         ;
2960       else
2961         break;
2962     }
2963
2964   if (table_index_set == 0)
2965     return clib_error_return (0, "classify table-index must be specified");
2966
2967   if (sw_if_index == ~0)
2968     return clib_error_return (0, "interface / subif must be specified");
2969
2970   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
2971
2972   switch (rv)
2973     {
2974     case 0:
2975       break;
2976
2977     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
2978       return clib_error_return (0, "No such interface");
2979
2980     case VNET_API_ERROR_NO_SUCH_ENTRY:
2981       return clib_error_return (0, "No such classifier table");
2982     }
2983   return 0;
2984 }
2985
2986 /*?
2987  * Assign a classification table to an interface. The classification
2988  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
2989  * commands. Once the table is create, use this command to filter packets
2990  * on an interface.
2991  *
2992  * @cliexpar
2993  * Example of how to assign a classification table to an interface:
2994  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
2995 ?*/
2996 /* *INDENT-OFF* */
2997 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
2998 {
2999     .path = "set ip classify",
3000     .short_help =
3001     "set ip classify intfc <interface> table-index <classify-idx>",
3002     .function = set_ip_classify_command_fn,
3003 };
3004 /* *INDENT-ON* */
3005
3006 static clib_error_t *
3007 ip4_config (vlib_main_t * vm, unformat_input_t * input)
3008 {
3009   ip4_main_t *im = &ip4_main;
3010   uword heapsize = 0;
3011
3012   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3013     {
3014       if (unformat (input, "heap-size %U", unformat_memory_size, &heapsize))
3015         ;
3016       else
3017         return clib_error_return (0,
3018                                   "invalid heap-size parameter `%U'",
3019                                   format_unformat_error, input);
3020     }
3021
3022   im->mtrie_heap_size = heapsize;
3023
3024   return 0;
3025 }
3026
3027 VLIB_EARLY_CONFIG_FUNCTION (ip4_config, "ip");
3028
3029 /*
3030  * fd.io coding-style-patch-verification: ON
3031  *
3032  * Local Variables:
3033  * eval: (c-set-style "gnu")
3034  * End:
3035  */