952915f5317c9c36804cfa7261ecac960ae2d426
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h>     /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/mfib/ip4_mfib.h>
53 #include <vnet/dpo/load_balance.h>
54 #include <vnet/dpo/load_balance_map.h>
55 #include <vnet/dpo/classify_dpo.h>
56 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
57
58 #include <vnet/ip/ip4_forward.h>
59 #include <vnet/interface_output.h>
60 #include <vnet/classify/vnet_classify.h>
61
62 /** @brief IPv4 lookup node.
63     @node ip4-lookup
64
65     This is the main IPv4 lookup dispatch node.
66
67     @param vm vlib_main_t corresponding to the current thread
68     @param node vlib_node_runtime_t
69     @param frame vlib_frame_t whose contents should be dispatched
70
71     @par Graph mechanics: buffer metadata, next index usage
72
73     @em Uses:
74     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
75         - Indicates the @c sw_if_index value of the interface that the
76           packet was received on.
77     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
78         - When the value is @c ~0 then the node performs a longest prefix
79           match (LPM) for the packet destination address in the FIB attached
80           to the receive interface.
81         - Otherwise perform LPM for the packet destination address in the
82           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
83           value (0, 1, ...) and not a VRF id.
84
85     @em Sets:
86     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
87         - The lookup result adjacency index.
88
89     <em>Next Index:</em>
90     - Dispatches the packet to the node index found in
91       ip_adjacency_t @c adj->lookup_next_index
92       (where @c adj is the lookup result adjacency).
93 */
94 VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
95                                 vlib_frame_t * frame)
96 {
97   return ip4_lookup_inline (vm, node, frame);
98 }
99
100 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
101
102 /* *INDENT-OFF* */
103 VLIB_REGISTER_NODE (ip4_lookup_node) =
104 {
105   .name = "ip4-lookup",
106   .vector_size = sizeof (u32),
107   .format_trace = format_ip4_lookup_trace,
108   .n_next_nodes = IP_LOOKUP_N_NEXT,
109   .next_nodes = IP4_LOOKUP_NEXT_NODES,
110 };
111 /* *INDENT-ON* */
112
113 VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
114                                       vlib_node_runtime_t * node,
115                                       vlib_frame_t * frame)
116 {
117   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
118   u32 n_left, *from;
119   u32 thread_index = vm->thread_index;
120   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
121   u16 nexts[VLIB_FRAME_SIZE], *next;
122
123   from = vlib_frame_vector_args (frame);
124   n_left = frame->n_vectors;
125   next = nexts;
126
127   vlib_get_buffers (vm, from, bufs, n_left);
128
129   while (n_left >= 4)
130     {
131       const load_balance_t *lb0, *lb1;
132       const ip4_header_t *ip0, *ip1;
133       u32 lbi0, hc0, lbi1, hc1;
134       const dpo_id_t *dpo0, *dpo1;
135
136       /* Prefetch next iteration. */
137       {
138         vlib_prefetch_buffer_header (b[2], LOAD);
139         vlib_prefetch_buffer_header (b[3], LOAD);
140
141         CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), LOAD);
142         CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), LOAD);
143       }
144
145       ip0 = vlib_buffer_get_current (b[0]);
146       ip1 = vlib_buffer_get_current (b[1]);
147       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
148       lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
149
150       lb0 = load_balance_get (lbi0);
151       lb1 = load_balance_get (lbi1);
152
153       /*
154        * this node is for via FIBs we can re-use the hash value from the
155        * to node if present.
156        * We don't want to use the same hash value at each level in the recursion
157        * graph as that would lead to polarisation
158        */
159       hc0 = hc1 = 0;
160
161       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
162         {
163           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
164             {
165               hc0 = vnet_buffer (b[0])->ip.flow_hash =
166                 vnet_buffer (b[0])->ip.flow_hash >> 1;
167             }
168           else
169             {
170               hc0 = vnet_buffer (b[0])->ip.flow_hash =
171                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
172             }
173           dpo0 = load_balance_get_fwd_bucket
174             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
175         }
176       else
177         {
178           dpo0 = load_balance_get_bucket_i (lb0, 0);
179         }
180       if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
181         {
182           if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
183             {
184               hc1 = vnet_buffer (b[1])->ip.flow_hash =
185                 vnet_buffer (b[1])->ip.flow_hash >> 1;
186             }
187           else
188             {
189               hc1 = vnet_buffer (b[1])->ip.flow_hash =
190                 ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
191             }
192           dpo1 = load_balance_get_fwd_bucket
193             (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
194         }
195       else
196         {
197           dpo1 = load_balance_get_bucket_i (lb1, 0);
198         }
199
200       next[0] = dpo0->dpoi_next_node;
201       next[1] = dpo1->dpoi_next_node;
202
203       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
204       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
205
206       vlib_increment_combined_counter
207         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
208       vlib_increment_combined_counter
209         (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
210
211       b += 2;
212       next += 2;
213       n_left -= 2;
214     }
215
216   while (n_left > 0)
217     {
218       const load_balance_t *lb0;
219       const ip4_header_t *ip0;
220       const dpo_id_t *dpo0;
221       u32 lbi0, hc0;
222
223       ip0 = vlib_buffer_get_current (b[0]);
224       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
225
226       lb0 = load_balance_get (lbi0);
227
228       hc0 = 0;
229       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
230         {
231           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
232             {
233               hc0 = vnet_buffer (b[0])->ip.flow_hash =
234                 vnet_buffer (b[0])->ip.flow_hash >> 1;
235             }
236           else
237             {
238               hc0 = vnet_buffer (b[0])->ip.flow_hash =
239                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
240             }
241           dpo0 = load_balance_get_fwd_bucket
242             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
243         }
244       else
245         {
246           dpo0 = load_balance_get_bucket_i (lb0, 0);
247         }
248
249       next[0] = dpo0->dpoi_next_node;
250       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
251
252       vlib_increment_combined_counter
253         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
254
255       b += 1;
256       next += 1;
257       n_left -= 1;
258     }
259
260   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
261   if (node->flags & VLIB_NODE_FLAG_TRACE)
262     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
263
264   return frame->n_vectors;
265 }
266
267 /* *INDENT-OFF* */
268 VLIB_REGISTER_NODE (ip4_load_balance_node) =
269 {
270   .name = "ip4-load-balance",
271   .vector_size = sizeof (u32),
272   .sibling_of = "ip4-lookup",
273   .format_trace = format_ip4_lookup_trace,
274 };
275 /* *INDENT-ON* */
276
277 #ifndef CLIB_MARCH_VARIANT
278 /* get first interface address */
279 ip4_address_t *
280 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
281                              ip_interface_address_t ** result_ia)
282 {
283   ip_lookup_main_t *lm = &im->lookup_main;
284   ip_interface_address_t *ia = 0;
285   ip4_address_t *result = 0;
286
287   /* *INDENT-OFF* */
288   foreach_ip_interface_address
289     (lm, ia, sw_if_index,
290      1 /* honor unnumbered */ ,
291      ({
292        ip4_address_t * a =
293          ip_interface_address_get_address (lm, ia);
294        result = a;
295        break;
296      }));
297   /* *INDENT-OFF* */
298   if (result_ia)
299     *result_ia = result ? ia : 0;
300   return result;
301 }
302 #endif
303
304 static void
305 ip4_add_subnet_bcast_route (u32 fib_index,
306                             fib_prefix_t *pfx,
307                             u32 sw_if_index)
308 {
309   vnet_sw_interface_flags_t iflags;
310
311   iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
312
313   fib_table_entry_special_remove(fib_index,
314                                  pfx,
315                                  FIB_SOURCE_INTERFACE);
316
317   if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
318     {
319       fib_table_entry_update_one_path (fib_index, pfx,
320                                        FIB_SOURCE_INTERFACE,
321                                        FIB_ENTRY_FLAG_NONE,
322                                        DPO_PROTO_IP4,
323                                        /* No next-hop address */
324                                        &ADJ_BCAST_ADDR,
325                                        sw_if_index,
326                                        // invalid FIB index
327                                        ~0,
328                                        1,
329                                        // no out-label stack
330                                        NULL,
331                                        FIB_ROUTE_PATH_FLAG_NONE);
332     }
333   else
334     {
335         fib_table_entry_special_add(fib_index,
336                                     pfx,
337                                     FIB_SOURCE_INTERFACE,
338                                     (FIB_ENTRY_FLAG_DROP |
339                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
340     }
341 }
342
343 static void
344 ip4_add_interface_prefix_routes (ip4_main_t *im,
345                                  u32 sw_if_index,
346                                  u32 fib_index,
347                                  ip_interface_address_t * a)
348 {
349   ip_lookup_main_t *lm = &im->lookup_main;
350   ip_interface_prefix_t *if_prefix;
351   ip4_address_t *address = ip_interface_address_get_address (lm, a);
352
353   ip_interface_prefix_key_t key = {
354     .prefix = {
355       .fp_len = a->address_length,
356       .fp_proto = FIB_PROTOCOL_IP4,
357       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[a->address_length],
358     },
359     .sw_if_index = sw_if_index,
360   };
361
362   fib_prefix_t pfx_special = {
363     .fp_proto = FIB_PROTOCOL_IP4,
364   };
365
366   /* If prefix already set on interface, just increment ref count & return */
367   if_prefix = ip_get_interface_prefix (lm, &key);
368   if (if_prefix)
369     {
370       if_prefix->ref_count += 1;
371       return;
372     }
373
374   /* New prefix - allocate a pool entry, initialize it, add to the hash */
375   pool_get (lm->if_prefix_pool, if_prefix);
376   if_prefix->ref_count = 1;
377   if_prefix->src_ia_index = a - lm->if_address_pool;
378   clib_memcpy (&if_prefix->key, &key, sizeof (key));
379   mhash_set (&lm->prefix_to_if_prefix_index, &key,
380              if_prefix - lm->if_prefix_pool, 0 /* old value */);
381
382   /* length <= 30 - add glean, drop first address, maybe drop bcast address */
383   if (a->address_length <= 30)
384     {
385       pfx_special.fp_len = a->address_length;
386       pfx_special.fp_addr.ip4.as_u32 = address->as_u32;
387
388       /* set the glean route for the prefix */
389       fib_table_entry_update_one_path (fib_index, &pfx_special,
390                                        FIB_SOURCE_INTERFACE,
391                                        (FIB_ENTRY_FLAG_CONNECTED |
392                                         FIB_ENTRY_FLAG_ATTACHED),
393                                        DPO_PROTO_IP4,
394                                        /* No next-hop address */
395                                        NULL,
396                                        sw_if_index,
397                                        /* invalid FIB index */
398                                        ~0,
399                                        1,
400                                        /* no out-label stack */
401                                        NULL,
402                                        FIB_ROUTE_PATH_FLAG_NONE);
403
404       /* set a drop route for the base address of the prefix */
405       pfx_special.fp_len = 32;
406       pfx_special.fp_addr.ip4.as_u32 =
407         address->as_u32 & im->fib_masks[a->address_length];
408
409       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
410         fib_table_entry_special_add (fib_index, &pfx_special,
411                                      FIB_SOURCE_INTERFACE,
412                                      (FIB_ENTRY_FLAG_DROP |
413                                       FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
414
415       /* set a route for the broadcast address of the prefix */
416       pfx_special.fp_len = 32;
417       pfx_special.fp_addr.ip4.as_u32 =
418         address->as_u32 | ~im->fib_masks[a->address_length];
419       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
420         ip4_add_subnet_bcast_route (fib_index, &pfx_special, sw_if_index);
421
422
423     }
424   /* length == 31 - add an attached route for the other address */
425   else if (a->address_length == 31)
426     {
427       pfx_special.fp_len = 32;
428       pfx_special.fp_addr.ip4.as_u32 =
429         address->as_u32 ^ clib_host_to_net_u32(1);
430
431       fib_table_entry_update_one_path (fib_index, &pfx_special,
432                                        FIB_SOURCE_INTERFACE,
433                                        (FIB_ENTRY_FLAG_ATTACHED),
434                                        DPO_PROTO_IP4,
435                                        &pfx_special.fp_addr,
436                                        sw_if_index,
437                                        /* invalid FIB index */
438                                        ~0,
439                                        1,
440                                        NULL,
441                                        FIB_ROUTE_PATH_FLAG_NONE);
442     }
443 }
444
445 static void
446 ip4_add_interface_routes (u32 sw_if_index,
447                           ip4_main_t * im, u32 fib_index,
448                           ip_interface_address_t * a)
449 {
450   ip_lookup_main_t *lm = &im->lookup_main;
451   ip4_address_t *address = ip_interface_address_get_address (lm, a);
452   fib_prefix_t pfx = {
453     .fp_len = 32,
454     .fp_proto = FIB_PROTOCOL_IP4,
455     .fp_addr.ip4 = *address,
456   };
457
458   /* set special routes for the prefix if needed */
459   ip4_add_interface_prefix_routes (im, sw_if_index, fib_index, a);
460
461   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
462     {
463       u32 classify_table_index =
464         lm->classify_table_index_by_sw_if_index[sw_if_index];
465       if (classify_table_index != (u32) ~ 0)
466         {
467           dpo_id_t dpo = DPO_INVALID;
468
469           dpo_set (&dpo,
470                    DPO_CLASSIFY,
471                    DPO_PROTO_IP4,
472                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
473
474           fib_table_entry_special_dpo_add (fib_index,
475                                            &pfx,
476                                            FIB_SOURCE_CLASSIFY,
477                                            FIB_ENTRY_FLAG_NONE, &dpo);
478           dpo_reset (&dpo);
479         }
480     }
481
482   fib_table_entry_update_one_path (fib_index, &pfx,
483                                    FIB_SOURCE_INTERFACE,
484                                    (FIB_ENTRY_FLAG_CONNECTED |
485                                     FIB_ENTRY_FLAG_LOCAL),
486                                    DPO_PROTO_IP4,
487                                    &pfx.fp_addr,
488                                    sw_if_index,
489                                    // invalid FIB index
490                                    ~0,
491                                    1, NULL,
492                                    FIB_ROUTE_PATH_FLAG_NONE);
493 }
494
495 static void
496 ip4_del_interface_prefix_routes (ip4_main_t * im,
497                                  u32 sw_if_index,
498                                  u32 fib_index,
499                                  ip4_address_t * address,
500                                  u32 address_length)
501 {
502   ip_lookup_main_t *lm = &im->lookup_main;
503   ip_interface_prefix_t *if_prefix;
504
505   ip_interface_prefix_key_t key = {
506     .prefix = {
507       .fp_len = address_length,
508       .fp_proto = FIB_PROTOCOL_IP4,
509       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[address_length],
510     },
511     .sw_if_index = sw_if_index,
512   };
513
514   fib_prefix_t pfx_special = {
515     .fp_len = 32,
516     .fp_proto = FIB_PROTOCOL_IP4,
517   };
518
519   if_prefix = ip_get_interface_prefix (lm, &key);
520   if (!if_prefix)
521     {
522       clib_warning ("Prefix not found while deleting %U",
523                     format_ip4_address_and_length, address, address_length);
524       return;
525     }
526
527   if_prefix->ref_count -= 1;
528
529   /*
530    * Routes need to be adjusted if:
531    * - deleting last intf addr in prefix
532    * - deleting intf addr used as default source address in glean adjacency
533    *
534    * We're done now otherwise
535    */
536   if ((if_prefix->ref_count > 0) &&
537       !pool_is_free_index (lm->if_address_pool, if_prefix->src_ia_index))
538     return;
539
540   /* length <= 30, delete glean route, first address, last address */
541   if (address_length <= 30)
542     {
543
544       /* remove glean route for prefix */
545       pfx_special.fp_addr.ip4 = *address;
546       pfx_special.fp_len = address_length;
547       fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
548
549       /* if no more intf addresses in prefix, remove other special routes */
550       if (!if_prefix->ref_count)
551         {
552           /* first address in prefix */
553           pfx_special.fp_addr.ip4.as_u32 =
554             address->as_u32 & im->fib_masks[address_length];
555           pfx_special.fp_len = 32;
556
557           if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
558           fib_table_entry_special_remove (fib_index,
559                                           &pfx_special,
560                                           FIB_SOURCE_INTERFACE);
561
562           /* prefix broadcast address */
563           pfx_special.fp_addr.ip4.as_u32 =
564             address->as_u32 | ~im->fib_masks[address_length];
565           pfx_special.fp_len = 32;
566
567           if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
568           fib_table_entry_special_remove (fib_index,
569                                           &pfx_special,
570                                           FIB_SOURCE_INTERFACE);
571         }
572       else
573         /* default source addr just got deleted, find another */
574         {
575           ip_interface_address_t *new_src_ia = NULL;
576           ip4_address_t *new_src_addr = NULL;
577
578           new_src_addr =
579             ip4_interface_address_matching_destination
580               (im, address, sw_if_index, &new_src_ia);
581
582           if_prefix->src_ia_index = new_src_ia - lm->if_address_pool;
583
584           pfx_special.fp_len = address_length;
585           pfx_special.fp_addr.ip4 = *new_src_addr;
586
587           /* set new glean route for the prefix */
588           fib_table_entry_update_one_path (fib_index, &pfx_special,
589                                            FIB_SOURCE_INTERFACE,
590                                            (FIB_ENTRY_FLAG_CONNECTED |
591                                             FIB_ENTRY_FLAG_ATTACHED),
592                                            DPO_PROTO_IP4,
593                                            /* No next-hop address */
594                                            NULL,
595                                            sw_if_index,
596                                            /* invalid FIB index */
597                                            ~0,
598                                            1,
599                                            /* no out-label stack */
600                                            NULL,
601                                            FIB_ROUTE_PATH_FLAG_NONE);
602           return;
603         }
604     }
605   /* length == 31, delete attached route for the other address */
606   else if (address_length == 31)
607     {
608       pfx_special.fp_addr.ip4.as_u32 =
609         address->as_u32 ^ clib_host_to_net_u32(1);
610
611       fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
612     }
613
614   mhash_unset (&lm->prefix_to_if_prefix_index, &key, 0 /* old_value */);
615   pool_put (lm->if_prefix_pool, if_prefix);
616 }
617
618 static void
619 ip4_del_interface_routes (u32 sw_if_index,
620                           ip4_main_t * im,
621                           u32 fib_index,
622                           ip4_address_t * address, u32 address_length)
623 {
624   fib_prefix_t pfx = {
625     .fp_len = address_length,
626     .fp_proto = FIB_PROTOCOL_IP4,
627     .fp_addr.ip4 = *address,
628   };
629
630   ip4_del_interface_prefix_routes (im, sw_if_index, fib_index,
631                                    address, address_length);
632
633   pfx.fp_len = 32;
634   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
635 }
636
637 #ifndef CLIB_MARCH_VARIANT
638 void
639 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
640 {
641   ip4_main_t *im = &ip4_main;
642
643   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
644
645   /*
646    * enable/disable only on the 1<->0 transition
647    */
648   if (is_enable)
649     {
650       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
651         return;
652     }
653   else
654     {
655       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
656       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
657         return;
658     }
659   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
660                                !is_enable, 0, 0);
661
662
663   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
664                                sw_if_index, !is_enable, 0, 0);
665
666   {
667     ip4_enable_disable_interface_callback_t *cb;
668     vec_foreach (cb, im->enable_disable_interface_callbacks)
669       cb->function (im, cb->function_opaque, sw_if_index, is_enable);
670   }
671 }
672
673 static clib_error_t *
674 ip4_add_del_interface_address_internal (vlib_main_t * vm,
675                                         u32 sw_if_index,
676                                         ip4_address_t * address,
677                                         u32 address_length, u32 is_del)
678 {
679   vnet_main_t *vnm = vnet_get_main ();
680   ip4_main_t *im = &ip4_main;
681   ip_lookup_main_t *lm = &im->lookup_main;
682   clib_error_t *error = 0;
683   u32 if_address_index, elts_before;
684   ip4_address_fib_t ip4_af, *addr_fib = 0;
685
686   /* local0 interface doesn't support IP addressing  */
687   if (sw_if_index == 0)
688     {
689       return
690        clib_error_create ("local0 interface doesn't support IP addressing");
691     }
692
693   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
694   ip4_addr_fib_init (&ip4_af, address,
695                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
696   vec_add1 (addr_fib, ip4_af);
697
698   /*
699    * there is no support for adj-fib handling in the presence of overlapping
700    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
701    * most routers do.
702    */
703   /* *INDENT-OFF* */
704   if (!is_del)
705     {
706       /* When adding an address check that it does not conflict
707          with an existing address on any interface in this table. */
708       ip_interface_address_t *ia;
709       vnet_sw_interface_t *sif;
710
711       pool_foreach(sif, vnm->interface_main.sw_interfaces,
712       ({
713           if (im->fib_index_by_sw_if_index[sw_if_index] ==
714               im->fib_index_by_sw_if_index[sif->sw_if_index])
715             {
716               foreach_ip_interface_address
717                 (&im->lookup_main, ia, sif->sw_if_index,
718                  0 /* honor unnumbered */ ,
719                  ({
720                    ip4_address_t * x =
721                      ip_interface_address_get_address
722                      (&im->lookup_main, ia);
723                    if (ip4_destination_matches_route
724                        (im, address, x, ia->address_length) ||
725                        ip4_destination_matches_route (im,
726                                                       x,
727                                                       address,
728                                                       address_length))
729                      {
730                        /* an intf may have >1 addr from the same prefix */
731                        if ((sw_if_index == sif->sw_if_index) &&
732                            (ia->address_length == address_length) &&
733                            (x->as_u32 != address->as_u32))
734                          continue;
735
736                        /* error if the length or intf was different */
737                        vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
738
739                        return
740                          clib_error_create
741                          ("failed to add %U on %U which conflicts with %U for interface %U",
742                           format_ip4_address_and_length, address,
743                           address_length,
744                           format_vnet_sw_if_index_name, vnm,
745                           sw_if_index,
746                           format_ip4_address_and_length, x,
747                           ia->address_length,
748                           format_vnet_sw_if_index_name, vnm,
749                           sif->sw_if_index);
750                      }
751                  }));
752             }
753       }));
754     }
755   /* *INDENT-ON* */
756
757   elts_before = pool_elts (lm->if_address_pool);
758
759   error = ip_interface_address_add_del
760     (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
761   if (error)
762     goto done;
763
764   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
765   ip4_mfib_interface_enable_disable (sw_if_index, !is_del);
766
767   /* intf addr routes are added/deleted on admin up/down */
768   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index))
769     {
770       if (is_del)
771         ip4_del_interface_routes (sw_if_index,
772                                   im, ip4_af.fib_index, address,
773                                   address_length);
774       else
775         ip4_add_interface_routes (sw_if_index,
776                                   im, ip4_af.fib_index,
777                                   pool_elt_at_index
778                                   (lm->if_address_pool, if_address_index));
779     }
780
781   /* If pool did not grow/shrink: add duplicate address. */
782   if (elts_before != pool_elts (lm->if_address_pool))
783     {
784       ip4_add_del_interface_address_callback_t *cb;
785       vec_foreach (cb, im->add_del_interface_address_callbacks)
786         cb->function (im, cb->function_opaque, sw_if_index,
787                       address, address_length, if_address_index, is_del);
788     }
789
790 done:
791   vec_free (addr_fib);
792   return error;
793 }
794
795 clib_error_t *
796 ip4_add_del_interface_address (vlib_main_t * vm,
797                                u32 sw_if_index,
798                                ip4_address_t * address,
799                                u32 address_length, u32 is_del)
800 {
801   return ip4_add_del_interface_address_internal
802     (vm, sw_if_index, address, address_length, is_del);
803 }
804
805 void
806 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
807 {
808   ip_interface_address_t *ia;
809   ip4_main_t *im;
810
811   im = &ip4_main;
812
813   /*
814    * when directed broadcast is enabled, the subnet braodcast route will forward
815    * packets using an adjacency with a broadcast MAC. otherwise it drops
816    */
817   /* *INDENT-OFF* */
818   foreach_ip_interface_address(&im->lookup_main, ia,
819                                sw_if_index, 0,
820      ({
821        if (ia->address_length <= 30)
822          {
823            ip4_address_t *ipa;
824
825            ipa = ip_interface_address_get_address (&im->lookup_main, ia);
826
827            fib_prefix_t pfx = {
828              .fp_len = 32,
829              .fp_proto = FIB_PROTOCOL_IP4,
830              .fp_addr = {
831                .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
832              },
833            };
834
835            ip4_add_subnet_bcast_route
836              (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
837                                                   sw_if_index),
838               &pfx, sw_if_index);
839          }
840      }));
841   /* *INDENT-ON* */
842 }
843 #endif
844
845 static clib_error_t *
846 ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
847 {
848   ip4_main_t *im = &ip4_main;
849   ip_interface_address_t *ia;
850   ip4_address_t *a;
851   u32 is_admin_up, fib_index;
852
853   /* Fill in lookup tables with default table (0). */
854   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
855
856   vec_validate_init_empty (im->
857                            lookup_main.if_address_pool_index_by_sw_if_index,
858                            sw_if_index, ~0);
859
860   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
861
862   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
863
864   /* *INDENT-OFF* */
865   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
866                                 0 /* honor unnumbered */,
867   ({
868     a = ip_interface_address_get_address (&im->lookup_main, ia);
869     if (is_admin_up)
870       ip4_add_interface_routes (sw_if_index,
871                                 im, fib_index,
872                                 ia);
873     else
874       ip4_del_interface_routes (sw_if_index,
875                                 im, fib_index,
876                                 a, ia->address_length);
877   }));
878   /* *INDENT-ON* */
879
880   return 0;
881 }
882
883 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
884
885 /* Built-in ip4 unicast rx feature path definition */
886 /* *INDENT-OFF* */
887 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
888 {
889   .arc_name = "ip4-unicast",
890   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
891   .last_in_arc = "ip4-lookup",
892   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
893 };
894
895 VNET_FEATURE_INIT (ip4_flow_classify, static) =
896 {
897   .arc_name = "ip4-unicast",
898   .node_name = "ip4-flow-classify",
899   .runs_before = VNET_FEATURES ("ip4-inacl"),
900 };
901
902 VNET_FEATURE_INIT (ip4_inacl, static) =
903 {
904   .arc_name = "ip4-unicast",
905   .node_name = "ip4-inacl",
906   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
907 };
908
909 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
910 {
911   .arc_name = "ip4-unicast",
912   .node_name = "ip4-source-and-port-range-check-rx",
913   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
914 };
915
916 VNET_FEATURE_INIT (ip4_policer_classify, static) =
917 {
918   .arc_name = "ip4-unicast",
919   .node_name = "ip4-policer-classify",
920   .runs_before = VNET_FEATURES ("ipsec4-input-feature"),
921 };
922
923 VNET_FEATURE_INIT (ip4_ipsec, static) =
924 {
925   .arc_name = "ip4-unicast",
926   .node_name = "ipsec4-input-feature",
927   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
928 };
929
930 VNET_FEATURE_INIT (ip4_vpath, static) =
931 {
932   .arc_name = "ip4-unicast",
933   .node_name = "vpath-input-ip4",
934   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
935 };
936
937 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
938 {
939   .arc_name = "ip4-unicast",
940   .node_name = "ip4-vxlan-bypass",
941   .runs_before = VNET_FEATURES ("ip4-lookup"),
942 };
943
944 VNET_FEATURE_INIT (ip4_not_enabled, static) =
945 {
946   .arc_name = "ip4-unicast",
947   .node_name = "ip4-not-enabled",
948   .runs_before = VNET_FEATURES ("ip4-lookup"),
949 };
950
951 VNET_FEATURE_INIT (ip4_lookup, static) =
952 {
953   .arc_name = "ip4-unicast",
954   .node_name = "ip4-lookup",
955   .runs_before = 0,     /* not before any other features */
956 };
957
958 /* Built-in ip4 multicast rx feature path definition */
959 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
960 {
961   .arc_name = "ip4-multicast",
962   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
963   .last_in_arc = "ip4-mfib-forward-lookup",
964   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
965 };
966
967 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
968 {
969   .arc_name = "ip4-multicast",
970   .node_name = "vpath-input-ip4",
971   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
972 };
973
974 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
975 {
976   .arc_name = "ip4-multicast",
977   .node_name = "ip4-not-enabled",
978   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
979 };
980
981 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
982 {
983   .arc_name = "ip4-multicast",
984   .node_name = "ip4-mfib-forward-lookup",
985   .runs_before = 0,     /* last feature */
986 };
987
988 /* Source and port-range check ip4 tx feature path definition */
989 VNET_FEATURE_ARC_INIT (ip4_output, static) =
990 {
991   .arc_name = "ip4-output",
992   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
993   .last_in_arc = "interface-output",
994   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
995 };
996
997 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
998 {
999   .arc_name = "ip4-output",
1000   .node_name = "ip4-source-and-port-range-check-tx",
1001   .runs_before = VNET_FEATURES ("ip4-outacl"),
1002 };
1003
1004 VNET_FEATURE_INIT (ip4_outacl, static) =
1005 {
1006   .arc_name = "ip4-output",
1007   .node_name = "ip4-outacl",
1008   .runs_before = VNET_FEATURES ("ipsec4-output-feature"),
1009 };
1010
1011 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1012 {
1013   .arc_name = "ip4-output",
1014   .node_name = "ipsec4-output-feature",
1015   .runs_before = VNET_FEATURES ("interface-output"),
1016 };
1017
1018 /* Built-in ip4 tx feature path definition */
1019 VNET_FEATURE_INIT (ip4_interface_output, static) =
1020 {
1021   .arc_name = "ip4-output",
1022   .node_name = "interface-output",
1023   .runs_before = 0,     /* not before any other features */
1024 };
1025 /* *INDENT-ON* */
1026
1027 static clib_error_t *
1028 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1029 {
1030   ip4_main_t *im = &ip4_main;
1031
1032   /* Fill in lookup tables with default table (0). */
1033   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1034   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
1035
1036   if (!is_add)
1037     {
1038       ip4_main_t *im4 = &ip4_main;
1039       ip_lookup_main_t *lm4 = &im4->lookup_main;
1040       ip_interface_address_t *ia = 0;
1041       ip4_address_t *address;
1042       vlib_main_t *vm = vlib_get_main ();
1043
1044       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
1045       /* *INDENT-OFF* */
1046       foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
1047       ({
1048         address = ip_interface_address_get_address (lm4, ia);
1049         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
1050       }));
1051       /* *INDENT-ON* */
1052       ip4_mfib_interface_enable_disable (sw_if_index, 0);
1053     }
1054
1055   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
1056                                is_add, 0, 0);
1057
1058   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
1059                                sw_if_index, is_add, 0, 0);
1060
1061   return /* no error */ 0;
1062 }
1063
1064 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1065
1066 /* Global IP4 main. */
1067 #ifndef CLIB_MARCH_VARIANT
1068 ip4_main_t ip4_main;
1069 #endif /* CLIB_MARCH_VARIANT */
1070
1071 static clib_error_t *
1072 ip4_lookup_init (vlib_main_t * vm)
1073 {
1074   ip4_main_t *im = &ip4_main;
1075   clib_error_t *error;
1076   uword i;
1077
1078   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1079     return error;
1080   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
1081     return (error);
1082   if ((error = vlib_call_init_function (vm, fib_module_init)))
1083     return error;
1084   if ((error = vlib_call_init_function (vm, mfib_module_init)))
1085     return error;
1086
1087   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1088     {
1089       u32 m;
1090
1091       if (i < 32)
1092         m = pow2_mask (i) << (32 - i);
1093       else
1094         m = ~0;
1095       im->fib_masks[i] = clib_host_to_net_u32 (m);
1096     }
1097
1098   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1099
1100   /* Create FIB with index 0 and table id of 0. */
1101   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1102                                      FIB_SOURCE_DEFAULT_ROUTE);
1103   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1104                                       MFIB_SOURCE_DEFAULT_ROUTE);
1105
1106   {
1107     pg_node_t *pn;
1108     pn = pg_get_node (ip4_lookup_node.index);
1109     pn->unformat_edit = unformat_pg_ip4_header;
1110   }
1111
1112   {
1113     ethernet_arp_header_t h;
1114
1115     clib_memset (&h, 0, sizeof (h));
1116
1117 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1118 #define _8(f,v) h.f = v;
1119     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1120     _16 (l3_type, ETHERNET_TYPE_IP4);
1121     _8 (n_l2_address_bytes, 6);
1122     _8 (n_l3_address_bytes, 4);
1123     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1124 #undef _16
1125 #undef _8
1126
1127     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1128                                /* data */ &h,
1129                                sizeof (h),
1130                                /* alloc chunk size */ 8,
1131                                "ip4 arp");
1132   }
1133
1134   return error;
1135 }
1136
1137 VLIB_INIT_FUNCTION (ip4_lookup_init);
1138
1139 typedef struct
1140 {
1141   /* Adjacency taken. */
1142   u32 dpo_index;
1143   u32 flow_hash;
1144   u32 fib_index;
1145
1146   /* Packet data, possibly *after* rewrite. */
1147   u8 packet_data[64 - 1 * sizeof (u32)];
1148 }
1149 ip4_forward_next_trace_t;
1150
1151 #ifndef CLIB_MARCH_VARIANT
1152 u8 *
1153 format_ip4_forward_next_trace (u8 * s, va_list * args)
1154 {
1155   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1156   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1157   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1158   u32 indent = format_get_indent (s);
1159   s = format (s, "%U%U",
1160               format_white_space, indent,
1161               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1162   return s;
1163 }
1164 #endif
1165
1166 static u8 *
1167 format_ip4_lookup_trace (u8 * s, va_list * args)
1168 {
1169   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1170   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1171   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1172   u32 indent = format_get_indent (s);
1173
1174   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1175               t->fib_index, t->dpo_index, t->flow_hash);
1176   s = format (s, "\n%U%U",
1177               format_white_space, indent,
1178               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1179   return s;
1180 }
1181
1182 static u8 *
1183 format_ip4_rewrite_trace (u8 * s, va_list * args)
1184 {
1185   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1186   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1187   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1188   u32 indent = format_get_indent (s);
1189
1190   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1191               t->fib_index, t->dpo_index, format_ip_adjacency,
1192               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1193   s = format (s, "\n%U%U",
1194               format_white_space, indent,
1195               format_ip_adjacency_packet_data,
1196               t->packet_data, sizeof (t->packet_data));
1197   return s;
1198 }
1199
1200 #ifndef CLIB_MARCH_VARIANT
1201 /* Common trace function for all ip4-forward next nodes. */
1202 void
1203 ip4_forward_next_trace (vlib_main_t * vm,
1204                         vlib_node_runtime_t * node,
1205                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1206 {
1207   u32 *from, n_left;
1208   ip4_main_t *im = &ip4_main;
1209
1210   n_left = frame->n_vectors;
1211   from = vlib_frame_vector_args (frame);
1212
1213   while (n_left >= 4)
1214     {
1215       u32 bi0, bi1;
1216       vlib_buffer_t *b0, *b1;
1217       ip4_forward_next_trace_t *t0, *t1;
1218
1219       /* Prefetch next iteration. */
1220       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1221       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1222
1223       bi0 = from[0];
1224       bi1 = from[1];
1225
1226       b0 = vlib_get_buffer (vm, bi0);
1227       b1 = vlib_get_buffer (vm, bi1);
1228
1229       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1230         {
1231           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1232           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1233           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1234           t0->fib_index =
1235             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1236              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1237             vec_elt (im->fib_index_by_sw_if_index,
1238                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1239
1240           clib_memcpy_fast (t0->packet_data,
1241                             vlib_buffer_get_current (b0),
1242                             sizeof (t0->packet_data));
1243         }
1244       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1245         {
1246           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1247           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1248           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1249           t1->fib_index =
1250             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1251              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1252             vec_elt (im->fib_index_by_sw_if_index,
1253                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1254           clib_memcpy_fast (t1->packet_data, vlib_buffer_get_current (b1),
1255                             sizeof (t1->packet_data));
1256         }
1257       from += 2;
1258       n_left -= 2;
1259     }
1260
1261   while (n_left >= 1)
1262     {
1263       u32 bi0;
1264       vlib_buffer_t *b0;
1265       ip4_forward_next_trace_t *t0;
1266
1267       bi0 = from[0];
1268
1269       b0 = vlib_get_buffer (vm, bi0);
1270
1271       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1272         {
1273           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1274           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1275           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1276           t0->fib_index =
1277             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1278              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1279             vec_elt (im->fib_index_by_sw_if_index,
1280                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1281           clib_memcpy_fast (t0->packet_data, vlib_buffer_get_current (b0),
1282                             sizeof (t0->packet_data));
1283         }
1284       from += 1;
1285       n_left -= 1;
1286     }
1287 }
1288
1289 /* Compute TCP/UDP/ICMP4 checksum in software. */
1290 u16
1291 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1292                               ip4_header_t * ip0)
1293 {
1294   ip_csum_t sum0;
1295   u32 ip_header_length, payload_length_host_byte_order;
1296
1297   /* Initialize checksum with ip header. */
1298   ip_header_length = ip4_header_bytes (ip0);
1299   payload_length_host_byte_order =
1300     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1301   sum0 =
1302     clib_host_to_net_u32 (payload_length_host_byte_order +
1303                           (ip0->protocol << 16));
1304
1305   if (BITS (uword) == 32)
1306     {
1307       sum0 =
1308         ip_csum_with_carry (sum0,
1309                             clib_mem_unaligned (&ip0->src_address, u32));
1310       sum0 =
1311         ip_csum_with_carry (sum0,
1312                             clib_mem_unaligned (&ip0->dst_address, u32));
1313     }
1314   else
1315     sum0 =
1316       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1317
1318   return ip_calculate_l4_checksum (vm, p0, sum0,
1319                                    payload_length_host_byte_order, (u8 *) ip0,
1320                                    ip_header_length, NULL);
1321 }
1322
1323 u32
1324 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1325 {
1326   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1327   udp_header_t *udp0;
1328   u16 sum16;
1329
1330   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1331           || ip0->protocol == IP_PROTOCOL_UDP);
1332
1333   udp0 = (void *) (ip0 + 1);
1334   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1335     {
1336       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1337                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1338       return p0->flags;
1339     }
1340
1341   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1342
1343   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1344                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1345
1346   return p0->flags;
1347 }
1348 #endif
1349
1350 /* *INDENT-OFF* */
1351 VNET_FEATURE_ARC_INIT (ip4_local) =
1352 {
1353   .arc_name  = "ip4-local",
1354   .start_nodes = VNET_FEATURES ("ip4-local"),
1355   .last_in_arc = "ip4-local-end-of-arc",
1356 };
1357 /* *INDENT-ON* */
1358
1359 static inline void
1360 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1361                             ip4_header_t * ip, u8 is_udp, u8 * error,
1362                             u8 * good_tcp_udp)
1363 {
1364   u32 flags0;
1365   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1366   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1367   if (is_udp)
1368     {
1369       udp_header_t *udp;
1370       u32 ip_len, udp_len;
1371       i32 len_diff;
1372       udp = ip4_next_header (ip);
1373       /* Verify UDP length. */
1374       ip_len = clib_net_to_host_u16 (ip->length);
1375       udp_len = clib_net_to_host_u16 (udp->length);
1376
1377       len_diff = ip_len - udp_len;
1378       *good_tcp_udp &= len_diff >= 0;
1379       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1380     }
1381 }
1382
1383 #define ip4_local_csum_is_offloaded(_b)                                 \
1384     _b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM                         \
1385         || _b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM
1386
1387 #define ip4_local_need_csum_check(is_tcp_udp, _b)                       \
1388     (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED     \
1389         || ip4_local_csum_is_offloaded (_b)))
1390
1391 #define ip4_local_csum_is_valid(_b)                                     \
1392     (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT                      \
1393         || (ip4_local_csum_is_offloaded (_b))) != 0
1394
1395 static inline void
1396 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1397                          ip4_header_t * ih, u8 * error)
1398 {
1399   u8 is_udp, is_tcp_udp, good_tcp_udp;
1400
1401   is_udp = ih->protocol == IP_PROTOCOL_UDP;
1402   is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1403
1404   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1405     ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1406   else
1407     good_tcp_udp = ip4_local_csum_is_valid (b);
1408
1409   ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1410   *error = (is_tcp_udp && !good_tcp_udp
1411             ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1412 }
1413
1414 static inline void
1415 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1416                             ip4_header_t ** ih, u8 * error)
1417 {
1418   u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1419
1420   is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1421   is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1422
1423   is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1424   is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1425
1426   good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1427   good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1428
1429   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1430                      || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1431     {
1432       if (is_tcp_udp[0])
1433         ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1434                                     &good_tcp_udp[0]);
1435       if (is_tcp_udp[1])
1436         ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1437                                     &good_tcp_udp[1]);
1438     }
1439
1440   error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1441               IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1442   error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1443               IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1444 }
1445
1446 static inline void
1447 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1448                               vlib_buffer_t * b, u16 * next, u8 error,
1449                               u8 head_of_feature_arc)
1450 {
1451   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1452   u32 next_index;
1453
1454   *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1455   b->error = error ? error_node->errors[error] : 0;
1456   if (head_of_feature_arc)
1457     {
1458       next_index = *next;
1459       if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1460         {
1461           vnet_feature_arc_start (arc_index,
1462                                   vnet_buffer (b)->sw_if_index[VLIB_RX],
1463                                   &next_index, b);
1464           *next = next_index;
1465         }
1466     }
1467 }
1468
1469 typedef struct
1470 {
1471   ip4_address_t src;
1472   u32 lbi;
1473   u8 error;
1474   u8 first;
1475 } ip4_local_last_check_t;
1476
1477 static inline void
1478 ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
1479                      ip4_local_last_check_t * last_check, u8 * error0)
1480 {
1481   ip4_fib_mtrie_leaf_t leaf0;
1482   ip4_fib_mtrie_t *mtrie0;
1483   const dpo_id_t *dpo0;
1484   load_balance_t *lb0;
1485   u32 lbi0;
1486
1487   vnet_buffer (b)->ip.fib_index =
1488     vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1489     vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1490
1491   /*
1492    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1493    *  adjacency for the destination address (the local interface address).
1494    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1495    *  adjacency for the source address (the remote sender's address)
1496    */
1497   if (PREDICT_TRUE (last_check->src.as_u32 != ip0->src_address.as_u32) ||
1498       last_check->first)
1499     {
1500       mtrie0 = &ip4_fib_get (vnet_buffer (b)->ip.fib_index)->mtrie;
1501       leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1502       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1503       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1504       lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1505
1506       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1507         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1508       vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1509
1510       lb0 = load_balance_get (lbi0);
1511       dpo0 = load_balance_get_bucket_i (lb0, 0);
1512
1513       /*
1514        * Must have a route to source otherwise we drop the packet.
1515        * ip4 broadcasts are accepted, e.g. to make dhcp client work
1516        *
1517        * The checks are:
1518        *  - the source is a recieve => it's from us => bogus, do this
1519        *    first since it sets a different error code.
1520        *  - uRPF check for any route to source - accept if passes.
1521        *  - allow packets destined to the broadcast address from unknown sources
1522        */
1523
1524       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1525                   && dpo0->dpoi_type == DPO_RECEIVE) ?
1526                  IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1527       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1528                   && !fib_urpf_check_size (lb0->lb_urpf)
1529                   && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1530                  IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1531
1532       last_check->src.as_u32 = ip0->src_address.as_u32;
1533       last_check->lbi = lbi0;
1534       last_check->error = *error0;
1535       last_check->first = 0;
1536     }
1537   else
1538     {
1539       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1540         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1541       vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1542       *error0 = last_check->error;
1543     }
1544 }
1545
1546 static inline void
1547 ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
1548                         ip4_local_last_check_t * last_check, u8 * error)
1549 {
1550   ip4_fib_mtrie_leaf_t leaf[2];
1551   ip4_fib_mtrie_t *mtrie[2];
1552   const dpo_id_t *dpo[2];
1553   load_balance_t *lb[2];
1554   u32 not_last_hit;
1555   u32 lbi[2];
1556
1557   not_last_hit = last_check->first;
1558   not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1559   not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1560
1561   vnet_buffer (b[0])->ip.fib_index =
1562     vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1563     vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1564     vnet_buffer (b[0])->ip.fib_index;
1565
1566   vnet_buffer (b[1])->ip.fib_index =
1567     vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1568     vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1569     vnet_buffer (b[1])->ip.fib_index;
1570
1571   /*
1572    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1573    *  adjacency for the destination address (the local interface address).
1574    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1575    *  adjacency for the source address (the remote sender's address)
1576    */
1577   if (PREDICT_TRUE (not_last_hit))
1578     {
1579       mtrie[0] = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie;
1580       mtrie[1] = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie;
1581
1582       leaf[0] = ip4_fib_mtrie_lookup_step_one (mtrie[0], &ip[0]->src_address);
1583       leaf[1] = ip4_fib_mtrie_lookup_step_one (mtrie[1], &ip[1]->src_address);
1584
1585       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1586                                            &ip[0]->src_address, 2);
1587       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1588                                            &ip[1]->src_address, 2);
1589
1590       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1591                                            &ip[0]->src_address, 3);
1592       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1593                                            &ip[1]->src_address, 3);
1594
1595       lbi[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf[0]);
1596       lbi[1] = ip4_fib_mtrie_leaf_get_adj_index (leaf[1]);
1597
1598       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1599         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1600       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1601
1602       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1603         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1604       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1605
1606       lb[0] = load_balance_get (lbi[0]);
1607       lb[1] = load_balance_get (lbi[1]);
1608
1609       dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1610       dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1611
1612       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1613                    dpo[0]->dpoi_type == DPO_RECEIVE) ?
1614                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1615       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1616                    !fib_urpf_check_size (lb[0]->lb_urpf) &&
1617                    ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1618                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1619
1620       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1621                    dpo[1]->dpoi_type == DPO_RECEIVE) ?
1622                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1623       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1624                    !fib_urpf_check_size (lb[1]->lb_urpf) &&
1625                    ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1626                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1627
1628       last_check->src.as_u32 = ip[1]->src_address.as_u32;
1629       last_check->lbi = lbi[1];
1630       last_check->error = error[1];
1631       last_check->first = 0;
1632     }
1633   else
1634     {
1635       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1636         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1637       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1638
1639       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1640         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1641       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1642
1643       error[0] = last_check->error;
1644       error[1] = last_check->error;
1645     }
1646 }
1647
1648 enum ip_local_packet_type_e
1649 {
1650   IP_LOCAL_PACKET_TYPE_L4,
1651   IP_LOCAL_PACKET_TYPE_NAT,
1652   IP_LOCAL_PACKET_TYPE_FRAG,
1653 };
1654
1655 /**
1656  * Determine packet type and next node.
1657  *
1658  * The expectation is that all packets that are not L4 will skip
1659  * checksums and source checks.
1660  */
1661 always_inline u8
1662 ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
1663 {
1664   ip_lookup_main_t *lm = &ip4_main.lookup_main;
1665
1666   if (PREDICT_FALSE (ip4_is_fragment (ip)))
1667     {
1668       *next = IP_LOCAL_NEXT_REASSEMBLY;
1669       return IP_LOCAL_PACKET_TYPE_FRAG;
1670     }
1671   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_IS_NATED))
1672     {
1673       *next = lm->local_next_by_ip_protocol[ip->protocol];
1674       return IP_LOCAL_PACKET_TYPE_NAT;
1675     }
1676
1677   *next = lm->local_next_by_ip_protocol[ip->protocol];
1678   return IP_LOCAL_PACKET_TYPE_L4;
1679 }
1680
1681 static inline uword
1682 ip4_local_inline (vlib_main_t * vm,
1683                   vlib_node_runtime_t * node,
1684                   vlib_frame_t * frame, int head_of_feature_arc)
1685 {
1686   u32 *from, n_left_from;
1687   vlib_node_runtime_t *error_node =
1688     vlib_node_get_runtime (vm, ip4_local_node.index);
1689   u16 nexts[VLIB_FRAME_SIZE], *next;
1690   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1691   ip4_header_t *ip[2];
1692   u8 error[2], pt[2];
1693
1694   ip4_local_last_check_t last_check = {
1695     /*
1696      * 0.0.0.0 can appear as the source address of an IP packet,
1697      * as can any other address, hence the need to use the 'first'
1698      * member to make sure the .lbi is initialised for the first
1699      * packet.
1700      */
1701     .src = {.as_u32 = 0},
1702     .lbi = ~0,
1703     .error = IP4_ERROR_UNKNOWN_PROTOCOL,
1704     .first = 1,
1705   };
1706
1707   from = vlib_frame_vector_args (frame);
1708   n_left_from = frame->n_vectors;
1709
1710   if (node->flags & VLIB_NODE_FLAG_TRACE)
1711     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1712
1713   vlib_get_buffers (vm, from, bufs, n_left_from);
1714   b = bufs;
1715   next = nexts;
1716
1717   while (n_left_from >= 6)
1718     {
1719       u8 not_batch = 0;
1720
1721       /* Prefetch next iteration. */
1722       {
1723         vlib_prefetch_buffer_header (b[4], LOAD);
1724         vlib_prefetch_buffer_header (b[5], LOAD);
1725
1726         CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1727         CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1728       }
1729
1730       error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1731
1732       ip[0] = vlib_buffer_get_current (b[0]);
1733       ip[1] = vlib_buffer_get_current (b[1]);
1734
1735       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1736       vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1737
1738       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1739       pt[1] = ip4_local_classify (b[1], ip[1], &next[1]);
1740
1741       not_batch = pt[0] ^ pt[1];
1742
1743       if (head_of_feature_arc == 0 || (pt[0] && not_batch == 0))
1744         goto skip_checks;
1745
1746       if (PREDICT_TRUE (not_batch == 0))
1747         {
1748           ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1749           ip4_local_check_src_x2 (b, ip, &last_check, error);
1750         }
1751       else
1752         {
1753           if (!pt[0])
1754             {
1755               ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1756               ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1757             }
1758           if (!pt[1])
1759             {
1760               ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1761               ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
1762             }
1763         }
1764
1765     skip_checks:
1766
1767       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1768                                     head_of_feature_arc);
1769       ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1770                                     head_of_feature_arc);
1771
1772       b += 2;
1773       next += 2;
1774       n_left_from -= 2;
1775     }
1776
1777   while (n_left_from > 0)
1778     {
1779       error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1780
1781       ip[0] = vlib_buffer_get_current (b[0]);
1782       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1783       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1784
1785       if (head_of_feature_arc == 0 || pt[0])
1786         goto skip_check;
1787
1788       ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1789       ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1790
1791     skip_check:
1792
1793       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1794                                     head_of_feature_arc);
1795
1796       b += 1;
1797       next += 1;
1798       n_left_from -= 1;
1799     }
1800
1801   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1802   return frame->n_vectors;
1803 }
1804
1805 VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1806                                vlib_frame_t * frame)
1807 {
1808   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1809 }
1810
1811 /* *INDENT-OFF* */
1812 VLIB_REGISTER_NODE (ip4_local_node) =
1813 {
1814   .name = "ip4-local",
1815   .vector_size = sizeof (u32),
1816   .format_trace = format_ip4_forward_next_trace,
1817   .n_errors = IP4_N_ERROR,
1818   .error_strings = ip4_error_strings,
1819   .n_next_nodes = IP_LOCAL_N_NEXT,
1820   .next_nodes =
1821   {
1822     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1823     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1824     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1825     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1826     [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-full-reassembly",
1827   },
1828 };
1829 /* *INDENT-ON* */
1830
1831
1832 VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
1833                                           vlib_node_runtime_t * node,
1834                                           vlib_frame_t * frame)
1835 {
1836   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1837 }
1838
1839 /* *INDENT-OFF* */
1840 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
1841   .name = "ip4-local-end-of-arc",
1842   .vector_size = sizeof (u32),
1843
1844   .format_trace = format_ip4_forward_next_trace,
1845   .sibling_of = "ip4-local",
1846 };
1847
1848 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1849   .arc_name = "ip4-local",
1850   .node_name = "ip4-local-end-of-arc",
1851   .runs_before = 0, /* not before any other features */
1852 };
1853 /* *INDENT-ON* */
1854
1855 #ifndef CLIB_MARCH_VARIANT
1856 void
1857 ip4_register_protocol (u32 protocol, u32 node_index)
1858 {
1859   vlib_main_t *vm = vlib_get_main ();
1860   ip4_main_t *im = &ip4_main;
1861   ip_lookup_main_t *lm = &im->lookup_main;
1862
1863   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1864   lm->local_next_by_ip_protocol[protocol] =
1865     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1866 }
1867
1868 void
1869 ip4_unregister_protocol (u32 protocol)
1870 {
1871   ip4_main_t *im = &ip4_main;
1872   ip_lookup_main_t *lm = &im->lookup_main;
1873
1874   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1875   lm->local_next_by_ip_protocol[protocol] = IP_LOCAL_NEXT_PUNT;
1876 }
1877 #endif
1878
1879 static clib_error_t *
1880 show_ip_local_command_fn (vlib_main_t * vm,
1881                           unformat_input_t * input, vlib_cli_command_t * cmd)
1882 {
1883   ip4_main_t *im = &ip4_main;
1884   ip_lookup_main_t *lm = &im->lookup_main;
1885   int i;
1886
1887   vlib_cli_output (vm, "Protocols handled by ip4_local");
1888   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1889     {
1890       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1891         {
1892           u32 node_index = vlib_get_node (vm,
1893                                           ip4_local_node.index)->
1894             next_nodes[lm->local_next_by_ip_protocol[i]];
1895           vlib_cli_output (vm, "%U: %U", format_ip_protocol, i,
1896                            format_vlib_node_name, vm, node_index);
1897         }
1898     }
1899   return 0;
1900 }
1901
1902
1903
1904 /*?
1905  * Display the set of protocols handled by the local IPv4 stack.
1906  *
1907  * @cliexpar
1908  * Example of how to display local protocol table:
1909  * @cliexstart{show ip local}
1910  * Protocols handled by ip4_local
1911  * 1
1912  * 17
1913  * 47
1914  * @cliexend
1915 ?*/
1916 /* *INDENT-OFF* */
1917 VLIB_CLI_COMMAND (show_ip_local, static) =
1918 {
1919   .path = "show ip local",
1920   .function = show_ip_local_command_fn,
1921   .short_help = "show ip local",
1922 };
1923 /* *INDENT-ON* */
1924
1925 typedef enum
1926 {
1927   IP4_REWRITE_NEXT_DROP,
1928   IP4_REWRITE_NEXT_ICMP_ERROR,
1929   IP4_REWRITE_NEXT_FRAGMENT,
1930   IP4_REWRITE_N_NEXT            /* Last */
1931 } ip4_rewrite_next_t;
1932
1933 /**
1934  * This bits of an IPv4 address to mask to construct a multicast
1935  * MAC address
1936  */
1937 #if CLIB_ARCH_IS_BIG_ENDIAN
1938 #define IP4_MCAST_ADDR_MASK 0x007fffff
1939 #else
1940 #define IP4_MCAST_ADDR_MASK 0xffff7f00
1941 #endif
1942
1943 always_inline void
1944 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
1945                u16 adj_packet_bytes, bool df, u16 * next,
1946                u8 is_midchain, u32 * error)
1947 {
1948   if (packet_len > adj_packet_bytes)
1949     {
1950       *error = IP4_ERROR_MTU_EXCEEDED;
1951       if (df)
1952         {
1953           icmp4_error_set_vnet_buffer
1954             (b, ICMP4_destination_unreachable,
1955              ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
1956              adj_packet_bytes);
1957           *next = IP4_REWRITE_NEXT_ICMP_ERROR;
1958         }
1959       else
1960         {
1961           /* IP fragmentation */
1962           ip_frag_set_vnet_buffer (b, adj_packet_bytes,
1963                                    (is_midchain ?
1964                                     IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN :
1965                                     IP_FRAG_NEXT_IP_REWRITE), 0);
1966           *next = IP4_REWRITE_NEXT_FRAGMENT;
1967         }
1968     }
1969 }
1970
1971 /* increment TTL & update checksum.
1972    Works either endian, so no need for byte swap. */
1973 static_always_inline void
1974 ip4_ttl_inc (vlib_buffer_t * b, ip4_header_t * ip)
1975 {
1976   i32 ttl;
1977   u32 checksum;
1978   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
1979     return;
1980
1981   ttl = ip->ttl;
1982
1983   checksum = ip->checksum - clib_host_to_net_u16 (0x0100);
1984   checksum += checksum >= 0xffff;
1985
1986   ip->checksum = checksum;
1987   ttl += 1;
1988   ip->ttl = ttl;
1989
1990   ASSERT (ip->checksum == ip4_header_checksum (ip));
1991 }
1992
1993 /* Decrement TTL & update checksum.
1994    Works either endian, so no need for byte swap. */
1995 static_always_inline void
1996 ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
1997                             u32 * error)
1998 {
1999   i32 ttl;
2000   u32 checksum;
2001   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2002     return;
2003
2004   ttl = ip->ttl;
2005
2006   /* Input node should have reject packets with ttl 0. */
2007   ASSERT (ip->ttl > 0);
2008
2009   checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
2010   checksum += checksum >= 0xffff;
2011
2012   ip->checksum = checksum;
2013   ttl -= 1;
2014   ip->ttl = ttl;
2015
2016   /*
2017    * If the ttl drops below 1 when forwarding, generate
2018    * an ICMP response.
2019    */
2020   if (PREDICT_FALSE (ttl <= 0))
2021     {
2022       *error = IP4_ERROR_TIME_EXPIRED;
2023       vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2024       icmp4_error_set_vnet_buffer (b, ICMP4_time_exceeded,
2025                                    ICMP4_time_exceeded_ttl_exceeded_in_transit,
2026                                    0);
2027       *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2028     }
2029
2030   /* Verify checksum. */
2031   ASSERT ((ip->checksum == ip4_header_checksum (ip)) ||
2032           (b->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2033 }
2034
2035
2036 always_inline uword
2037 ip4_rewrite_inline_with_gso (vlib_main_t * vm,
2038                              vlib_node_runtime_t * node,
2039                              vlib_frame_t * frame,
2040                              int do_counters, int is_midchain, int is_mcast)
2041 {
2042   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2043   u32 *from = vlib_frame_vector_args (frame);
2044   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2045   u16 nexts[VLIB_FRAME_SIZE], *next;
2046   u32 n_left_from;
2047   vlib_node_runtime_t *error_node =
2048     vlib_node_get_runtime (vm, ip4_input_node.index);
2049
2050   n_left_from = frame->n_vectors;
2051   u32 thread_index = vm->thread_index;
2052
2053   vlib_get_buffers (vm, from, bufs, n_left_from);
2054   clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
2055
2056 #if (CLIB_N_PREFETCHES >= 8)
2057   if (n_left_from >= 6)
2058     {
2059       int i;
2060       for (i = 2; i < 6; i++)
2061         vlib_prefetch_buffer_header (bufs[i], LOAD);
2062     }
2063
2064   next = nexts;
2065   b = bufs;
2066   while (n_left_from >= 8)
2067     {
2068       const ip_adjacency_t *adj0, *adj1;
2069       ip4_header_t *ip0, *ip1;
2070       u32 rw_len0, error0, adj_index0;
2071       u32 rw_len1, error1, adj_index1;
2072       u32 tx_sw_if_index0, tx_sw_if_index1;
2073       u8 *p;
2074
2075       vlib_prefetch_buffer_header (b[6], LOAD);
2076       vlib_prefetch_buffer_header (b[7], LOAD);
2077
2078       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2079       adj_index1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
2080
2081       /*
2082        * pre-fetch the per-adjacency counters
2083        */
2084       if (do_counters)
2085         {
2086           vlib_prefetch_combined_counter (&adjacency_counters,
2087                                           thread_index, adj_index0);
2088           vlib_prefetch_combined_counter (&adjacency_counters,
2089                                           thread_index, adj_index1);
2090         }
2091
2092       ip0 = vlib_buffer_get_current (b[0]);
2093       ip1 = vlib_buffer_get_current (b[1]);
2094
2095       error0 = error1 = IP4_ERROR_NONE;
2096
2097       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2098       ip4_ttl_and_checksum_check (b[1], ip1, next + 1, &error1);
2099
2100       /* Rewrite packet header and updates lengths. */
2101       adj0 = adj_get (adj_index0);
2102       adj1 = adj_get (adj_index1);
2103
2104       /* Worth pipelining. No guarantee that adj0,1 are hot... */
2105       rw_len0 = adj0[0].rewrite_header.data_bytes;
2106       rw_len1 = adj1[0].rewrite_header.data_bytes;
2107       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2108       vnet_buffer (b[1])->ip.save_rewrite_length = rw_len1;
2109
2110       p = vlib_buffer_get_current (b[2]);
2111       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2112       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2113
2114       p = vlib_buffer_get_current (b[3]);
2115       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2116       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2117
2118       /* Check MTU of outgoing interface. */
2119       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2120       u16 ip1_len = clib_net_to_host_u16 (ip1->length);
2121
2122       if (b[0]->flags & VNET_BUFFER_F_GSO)
2123         ip0_len = gso_mtu_sz (b[0]);
2124       if (b[1]->flags & VNET_BUFFER_F_GSO)
2125         ip1_len = gso_mtu_sz (b[1]);
2126
2127       ip4_mtu_check (b[0], ip0_len,
2128                      adj0[0].rewrite_header.max_l3_packet_bytes,
2129                      ip0->flags_and_fragment_offset &
2130                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2131                      next + 0, is_midchain, &error0);
2132       ip4_mtu_check (b[1], ip1_len,
2133                      adj1[0].rewrite_header.max_l3_packet_bytes,
2134                      ip1->flags_and_fragment_offset &
2135                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2136                      next + 1, is_midchain, &error1);
2137
2138       if (is_mcast)
2139         {
2140           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2141                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2142                     IP4_ERROR_SAME_INTERFACE : error0);
2143           error1 = ((adj1[0].rewrite_header.sw_if_index ==
2144                      vnet_buffer (b[1])->sw_if_index[VLIB_RX]) ?
2145                     IP4_ERROR_SAME_INTERFACE : error1);
2146         }
2147
2148       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2149        * to see the IP header */
2150       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2151         {
2152           u32 next_index = adj0[0].rewrite_header.next_index;
2153           vlib_buffer_advance (b[0], -(word) rw_len0);
2154
2155           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2156           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2157
2158           if (PREDICT_FALSE
2159               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2160             vnet_feature_arc_start (lm->output_feature_arc_index,
2161                                     tx_sw_if_index0, &next_index, b[0]);
2162           next[0] = next_index;
2163           if (is_midchain)
2164             vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2165                                         0 /* is_ip6 */ ,
2166                                         0 /* with gso */ );
2167         }
2168       else
2169         {
2170           b[0]->error = error_node->errors[error0];
2171           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2172             ip4_ttl_inc (b[0], ip0);
2173         }
2174       if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2175         {
2176           u32 next_index = adj1[0].rewrite_header.next_index;
2177           vlib_buffer_advance (b[1], -(word) rw_len1);
2178
2179           tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2180           vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2181
2182           if (PREDICT_FALSE
2183               (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2184             vnet_feature_arc_start (lm->output_feature_arc_index,
2185                                     tx_sw_if_index1, &next_index, b[1]);
2186           next[1] = next_index;
2187           if (is_midchain)
2188             vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2189                                         0 /* is_ip6 */ ,
2190                                         0 /* with gso */ );
2191         }
2192       else
2193         {
2194           b[1]->error = error_node->errors[error1];
2195           if (error1 == IP4_ERROR_MTU_EXCEEDED)
2196             ip4_ttl_inc (b[1], ip1);
2197         }
2198
2199       /* Guess we are only writing on simple Ethernet header. */
2200       vnet_rewrite_two_headers (adj0[0], adj1[0],
2201                                 ip0, ip1, sizeof (ethernet_header_t));
2202
2203       if (do_counters)
2204         {
2205           if (error0 == IP4_ERROR_NONE)
2206             vlib_increment_combined_counter
2207               (&adjacency_counters,
2208                thread_index,
2209                adj_index0, 1,
2210                vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2211
2212           if (error1 == IP4_ERROR_NONE)
2213             vlib_increment_combined_counter
2214               (&adjacency_counters,
2215                thread_index,
2216                adj_index1, 1,
2217                vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
2218         }
2219
2220       if (is_midchain)
2221         {
2222           if (error0 == IP4_ERROR_NONE && adj0->sub_type.midchain.fixup_func)
2223             adj0->sub_type.midchain.fixup_func
2224               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2225           if (error1 == IP4_ERROR_NONE && adj1->sub_type.midchain.fixup_func)
2226             adj1->sub_type.midchain.fixup_func
2227               (vm, adj1, b[1], adj1->sub_type.midchain.fixup_data);
2228         }
2229
2230       if (is_mcast)
2231         {
2232           /* copy bytes from the IP address into the MAC rewrite */
2233           if (error0 == IP4_ERROR_NONE)
2234             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2235                                         adj0->rewrite_header.dst_mcast_offset,
2236                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2237           if (error1 == IP4_ERROR_NONE)
2238             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2239                                         adj1->rewrite_header.dst_mcast_offset,
2240                                         &ip1->dst_address.as_u32, (u8 *) ip1);
2241         }
2242
2243       next += 2;
2244       b += 2;
2245       n_left_from -= 2;
2246     }
2247 #elif (CLIB_N_PREFETCHES >= 4)
2248   next = nexts;
2249   b = bufs;
2250   while (n_left_from >= 1)
2251     {
2252       ip_adjacency_t *adj0;
2253       ip4_header_t *ip0;
2254       u32 rw_len0, error0, adj_index0;
2255       u32 tx_sw_if_index0;
2256       u8 *p;
2257
2258       /* Prefetch next iteration */
2259       if (PREDICT_TRUE (n_left_from >= 4))
2260         {
2261           ip_adjacency_t *adj2;
2262           u32 adj_index2;
2263
2264           vlib_prefetch_buffer_header (b[3], LOAD);
2265           vlib_prefetch_buffer_data (b[2], LOAD);
2266
2267           /* Prefetch adj->rewrite_header */
2268           adj_index2 = vnet_buffer (b[2])->ip.adj_index[VLIB_TX];
2269           adj2 = adj_get (adj_index2);
2270           p = (u8 *) adj2;
2271           CLIB_PREFETCH (p + CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES,
2272                          LOAD);
2273         }
2274
2275       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2276
2277       /*
2278        * Prefetch the per-adjacency counters
2279        */
2280       if (do_counters)
2281         {
2282           vlib_prefetch_combined_counter (&adjacency_counters,
2283                                           thread_index, adj_index0);
2284         }
2285
2286       ip0 = vlib_buffer_get_current (b[0]);
2287
2288       error0 = IP4_ERROR_NONE;
2289
2290       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2291
2292       /* Rewrite packet header and updates lengths. */
2293       adj0 = adj_get (adj_index0);
2294
2295       /* Rewrite header was prefetched. */
2296       rw_len0 = adj0[0].rewrite_header.data_bytes;
2297       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2298
2299       /* Check MTU of outgoing interface. */
2300       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2301
2302       if (b[0]->flags & VNET_BUFFER_F_GSO)
2303         ip0_len = gso_mtu_sz (b[0]);
2304
2305       ip4_mtu_check (b[0], ip0_len,
2306                      adj0[0].rewrite_header.max_l3_packet_bytes,
2307                      ip0->flags_and_fragment_offset &
2308                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2309                      next + 0, is_midchain, &error0);
2310
2311       if (is_mcast)
2312         {
2313           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2314                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2315                     IP4_ERROR_SAME_INTERFACE : error0);
2316         }
2317
2318       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2319        * to see the IP header */
2320       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2321         {
2322           u32 next_index = adj0[0].rewrite_header.next_index;
2323           vlib_buffer_advance (b[0], -(word) rw_len0);
2324           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2325           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2326
2327           if (PREDICT_FALSE
2328               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2329             vnet_feature_arc_start (lm->output_feature_arc_index,
2330                                     tx_sw_if_index0, &next_index, b[0]);
2331           next[0] = next_index;
2332
2333           if (is_midchain)
2334             vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2335                                         0 /* is_ip6 */ ,
2336                                         0 /* with gso */ );
2337
2338           /* Guess we are only writing on simple Ethernet header. */
2339           vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2340
2341           /*
2342            * Bump the per-adjacency counters
2343            */
2344           if (do_counters)
2345             vlib_increment_combined_counter
2346               (&adjacency_counters,
2347                thread_index,
2348                adj_index0, 1, vlib_buffer_length_in_chain (vm,
2349                                                            b[0]) + rw_len0);
2350
2351           if (is_midchain && adj0->sub_type.midchain.fixup_func)
2352             adj0->sub_type.midchain.fixup_func
2353               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2354
2355           if (is_mcast)
2356             /* copy bytes from the IP address into the MAC rewrite */
2357             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2358                                         adj0->rewrite_header.dst_mcast_offset,
2359                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2360         }
2361       else
2362         {
2363           b[0]->error = error_node->errors[error0];
2364           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2365             ip4_ttl_inc (b[0], ip0);
2366         }
2367
2368       next += 1;
2369       b += 1;
2370       n_left_from -= 1;
2371     }
2372 #endif
2373
2374   while (n_left_from > 0)
2375     {
2376       ip_adjacency_t *adj0;
2377       ip4_header_t *ip0;
2378       u32 rw_len0, adj_index0, error0;
2379       u32 tx_sw_if_index0;
2380
2381       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2382
2383       adj0 = adj_get (adj_index0);
2384
2385       if (do_counters)
2386         vlib_prefetch_combined_counter (&adjacency_counters,
2387                                         thread_index, adj_index0);
2388
2389       ip0 = vlib_buffer_get_current (b[0]);
2390
2391       error0 = IP4_ERROR_NONE;
2392
2393       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2394
2395
2396       /* Update packet buffer attributes/set output interface. */
2397       rw_len0 = adj0[0].rewrite_header.data_bytes;
2398       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2399
2400       /* Check MTU of outgoing interface. */
2401       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2402       if (b[0]->flags & VNET_BUFFER_F_GSO)
2403         ip0_len = gso_mtu_sz (b[0]);
2404
2405       ip4_mtu_check (b[0], ip0_len,
2406                      adj0[0].rewrite_header.max_l3_packet_bytes,
2407                      ip0->flags_and_fragment_offset &
2408                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2409                      next + 0, is_midchain, &error0);
2410
2411       if (is_mcast)
2412         {
2413           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2414                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2415                     IP4_ERROR_SAME_INTERFACE : error0);
2416         }
2417
2418       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2419        * to see the IP header */
2420       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2421         {
2422           u32 next_index = adj0[0].rewrite_header.next_index;
2423           vlib_buffer_advance (b[0], -(word) rw_len0);
2424           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2425           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2426
2427           if (PREDICT_FALSE
2428               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2429             vnet_feature_arc_start (lm->output_feature_arc_index,
2430                                     tx_sw_if_index0, &next_index, b[0]);
2431           next[0] = next_index;
2432
2433           if (is_midchain)
2434             /* this acts on the packet that is about to be encapped */
2435             vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2436                                         0 /* is_ip6 */ ,
2437                                         0 /* with gso */ );
2438
2439           /* Guess we are only writing on simple Ethernet header. */
2440           vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2441
2442           if (do_counters)
2443             vlib_increment_combined_counter
2444               (&adjacency_counters,
2445                thread_index, adj_index0, 1,
2446                vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2447
2448           if (is_midchain && adj0->sub_type.midchain.fixup_func)
2449             adj0->sub_type.midchain.fixup_func
2450               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2451
2452           if (is_mcast)
2453             /* copy bytes from the IP address into the MAC rewrite */
2454             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2455                                         adj0->rewrite_header.dst_mcast_offset,
2456                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2457         }
2458       else
2459         {
2460           b[0]->error = error_node->errors[error0];
2461           /* undo the TTL decrement - we'll be back to do it again */
2462           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2463             ip4_ttl_inc (b[0], ip0);
2464         }
2465
2466       next += 1;
2467       b += 1;
2468       n_left_from -= 1;
2469     }
2470
2471
2472   /* Need to do trace after rewrites to pick up new packet data. */
2473   if (node->flags & VLIB_NODE_FLAG_TRACE)
2474     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2475
2476   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2477   return frame->n_vectors;
2478 }
2479
2480 always_inline uword
2481 ip4_rewrite_inline (vlib_main_t * vm,
2482                     vlib_node_runtime_t * node,
2483                     vlib_frame_t * frame,
2484                     int do_counters, int is_midchain, int is_mcast)
2485 {
2486   return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2487                                       is_midchain, is_mcast);
2488 }
2489
2490
2491 /** @brief IPv4 rewrite node.
2492     @node ip4-rewrite
2493
2494     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2495     header checksum, fetch the ip adjacency, check the outbound mtu,
2496     apply the adjacency rewrite, and send pkts to the adjacency
2497     rewrite header's rewrite_next_index.
2498
2499     @param vm vlib_main_t corresponding to the current thread
2500     @param node vlib_node_runtime_t
2501     @param frame vlib_frame_t whose contents should be dispatched
2502
2503     @par Graph mechanics: buffer metadata, next index usage
2504
2505     @em Uses:
2506     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2507         - the rewrite adjacency index
2508     - <code>adj->lookup_next_index</code>
2509         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2510           the packet will be dropped.
2511     - <code>adj->rewrite_header</code>
2512         - Rewrite string length, rewrite string, next_index
2513
2514     @em Sets:
2515     - <code>b->current_data, b->current_length</code>
2516         - Updated net of applying the rewrite string
2517
2518     <em>Next Indices:</em>
2519     - <code> adj->rewrite_header.next_index </code>
2520       or @c ip4-drop
2521 */
2522
2523 VLIB_NODE_FN (ip4_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2524                                  vlib_frame_t * frame)
2525 {
2526   if (adj_are_counters_enabled ())
2527     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2528   else
2529     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2530 }
2531
2532 VLIB_NODE_FN (ip4_rewrite_bcast_node) (vlib_main_t * vm,
2533                                        vlib_node_runtime_t * node,
2534                                        vlib_frame_t * frame)
2535 {
2536   if (adj_are_counters_enabled ())
2537     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2538   else
2539     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2540 }
2541
2542 VLIB_NODE_FN (ip4_midchain_node) (vlib_main_t * vm,
2543                                   vlib_node_runtime_t * node,
2544                                   vlib_frame_t * frame)
2545 {
2546   if (adj_are_counters_enabled ())
2547     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2548   else
2549     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2550 }
2551
2552 VLIB_NODE_FN (ip4_rewrite_mcast_node) (vlib_main_t * vm,
2553                                        vlib_node_runtime_t * node,
2554                                        vlib_frame_t * frame)
2555 {
2556   if (adj_are_counters_enabled ())
2557     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2558   else
2559     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2560 }
2561
2562 VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
2563                                         vlib_node_runtime_t * node,
2564                                         vlib_frame_t * frame)
2565 {
2566   if (adj_are_counters_enabled ())
2567     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2568   else
2569     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2570 }
2571
2572 /* *INDENT-OFF* */
2573 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2574   .name = "ip4-rewrite",
2575   .vector_size = sizeof (u32),
2576
2577   .format_trace = format_ip4_rewrite_trace,
2578
2579   .n_next_nodes = IP4_REWRITE_N_NEXT,
2580   .next_nodes = {
2581     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2582     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2583     [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2584   },
2585 };
2586
2587 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2588   .name = "ip4-rewrite-bcast",
2589   .vector_size = sizeof (u32),
2590
2591   .format_trace = format_ip4_rewrite_trace,
2592   .sibling_of = "ip4-rewrite",
2593 };
2594
2595 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2596   .name = "ip4-rewrite-mcast",
2597   .vector_size = sizeof (u32),
2598
2599   .format_trace = format_ip4_rewrite_trace,
2600   .sibling_of = "ip4-rewrite",
2601 };
2602
2603 VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
2604   .name = "ip4-mcast-midchain",
2605   .vector_size = sizeof (u32),
2606
2607   .format_trace = format_ip4_rewrite_trace,
2608   .sibling_of = "ip4-rewrite",
2609 };
2610
2611 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2612   .name = "ip4-midchain",
2613   .vector_size = sizeof (u32),
2614   .format_trace = format_ip4_rewrite_trace,
2615   .sibling_of = "ip4-rewrite",
2616 };
2617 /* *INDENT-ON */
2618
2619 static int
2620 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2621 {
2622   ip4_fib_mtrie_t *mtrie0;
2623   ip4_fib_mtrie_leaf_t leaf0;
2624   u32 lbi0;
2625
2626   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2627
2628   leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2629   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2630   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2631
2632   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2633
2634   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2635 }
2636
2637 static clib_error_t *
2638 test_lookup_command_fn (vlib_main_t * vm,
2639                         unformat_input_t * input, vlib_cli_command_t * cmd)
2640 {
2641   ip4_fib_t *fib;
2642   u32 table_id = 0;
2643   f64 count = 1;
2644   u32 n;
2645   int i;
2646   ip4_address_t ip4_base_address;
2647   u64 errors = 0;
2648
2649   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2650     {
2651       if (unformat (input, "table %d", &table_id))
2652         {
2653           /* Make sure the entry exists. */
2654           fib = ip4_fib_get (table_id);
2655           if ((fib) && (fib->index != table_id))
2656             return clib_error_return (0, "<fib-index> %d does not exist",
2657                                       table_id);
2658         }
2659       else if (unformat (input, "count %f", &count))
2660         ;
2661
2662       else if (unformat (input, "%U",
2663                          unformat_ip4_address, &ip4_base_address))
2664         ;
2665       else
2666         return clib_error_return (0, "unknown input `%U'",
2667                                   format_unformat_error, input);
2668     }
2669
2670   n = count;
2671
2672   for (i = 0; i < n; i++)
2673     {
2674       if (!ip4_lookup_validate (&ip4_base_address, table_id))
2675         errors++;
2676
2677       ip4_base_address.as_u32 =
2678         clib_host_to_net_u32 (1 +
2679                               clib_net_to_host_u32 (ip4_base_address.as_u32));
2680     }
2681
2682   if (errors)
2683     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2684   else
2685     vlib_cli_output (vm, "No errors in %d lookups\n", n);
2686
2687   return 0;
2688 }
2689
2690 /*?
2691  * Perform a lookup of an IPv4 Address (or range of addresses) in the
2692  * given FIB table to determine if there is a conflict with the
2693  * adjacency table. The fib-id can be determined by using the
2694  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2695  * of 0 is used.
2696  *
2697  * @todo This command uses fib-id, other commands use table-id (not
2698  * just a name, they are different indexes). Would like to change this
2699  * to table-id for consistency.
2700  *
2701  * @cliexpar
2702  * Example of how to run the test lookup command:
2703  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2704  * No errors in 2 lookups
2705  * @cliexend
2706 ?*/
2707 /* *INDENT-OFF* */
2708 VLIB_CLI_COMMAND (lookup_test_command, static) =
2709 {
2710   .path = "test lookup",
2711   .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2712   .function = test_lookup_command_fn,
2713 };
2714 /* *INDENT-ON* */
2715
2716 #ifndef CLIB_MARCH_VARIANT
2717 int
2718 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2719 {
2720   u32 fib_index;
2721
2722   fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
2723
2724   if (~0 == fib_index)
2725     return VNET_API_ERROR_NO_SUCH_FIB;
2726
2727   fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
2728                                   flow_hash_config);
2729
2730   return 0;
2731 }
2732 #endif
2733
2734 static clib_error_t *
2735 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2736                              unformat_input_t * input,
2737                              vlib_cli_command_t * cmd)
2738 {
2739   int matched = 0;
2740   u32 table_id = 0;
2741   u32 flow_hash_config = 0;
2742   int rv;
2743
2744   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2745     {
2746       if (unformat (input, "table %d", &table_id))
2747         matched = 1;
2748 #define _(a,v) \
2749     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2750       foreach_flow_hash_bit
2751 #undef _
2752         else
2753         break;
2754     }
2755
2756   if (matched == 0)
2757     return clib_error_return (0, "unknown input `%U'",
2758                               format_unformat_error, input);
2759
2760   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
2761   switch (rv)
2762     {
2763     case 0:
2764       break;
2765
2766     case VNET_API_ERROR_NO_SUCH_FIB:
2767       return clib_error_return (0, "no such FIB table %d", table_id);
2768
2769     default:
2770       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2771       break;
2772     }
2773
2774   return 0;
2775 }
2776
2777 /*?
2778  * Configure the set of IPv4 fields used by the flow hash.
2779  *
2780  * @cliexpar
2781  * Example of how to set the flow hash on a given table:
2782  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2783  * Example of display the configured flow hash:
2784  * @cliexstart{show ip fib}
2785  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2786  * 0.0.0.0/0
2787  *   unicast-ip4-chain
2788  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2789  *     [0] [@0]: dpo-drop ip6
2790  * 0.0.0.0/32
2791  *   unicast-ip4-chain
2792  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2793  *     [0] [@0]: dpo-drop ip6
2794  * 224.0.0.0/8
2795  *   unicast-ip4-chain
2796  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2797  *     [0] [@0]: dpo-drop ip6
2798  * 6.0.1.2/32
2799  *   unicast-ip4-chain
2800  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2801  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2802  * 7.0.0.1/32
2803  *   unicast-ip4-chain
2804  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
2805  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2806  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2807  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2808  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2809  * 240.0.0.0/8
2810  *   unicast-ip4-chain
2811  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
2812  *     [0] [@0]: dpo-drop ip6
2813  * 255.255.255.255/32
2814  *   unicast-ip4-chain
2815  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
2816  *     [0] [@0]: dpo-drop ip6
2817  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
2818  * 0.0.0.0/0
2819  *   unicast-ip4-chain
2820  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
2821  *     [0] [@0]: dpo-drop ip6
2822  * 0.0.0.0/32
2823  *   unicast-ip4-chain
2824  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
2825  *     [0] [@0]: dpo-drop ip6
2826  * 172.16.1.0/24
2827  *   unicast-ip4-chain
2828  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
2829  *     [0] [@4]: ipv4-glean: af_packet0
2830  * 172.16.1.1/32
2831  *   unicast-ip4-chain
2832  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
2833  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
2834  * 172.16.1.2/32
2835  *   unicast-ip4-chain
2836  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2837  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
2838  * 172.16.2.0/24
2839  *   unicast-ip4-chain
2840  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
2841  *     [0] [@4]: ipv4-glean: af_packet1
2842  * 172.16.2.1/32
2843  *   unicast-ip4-chain
2844  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
2845  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
2846  * 224.0.0.0/8
2847  *   unicast-ip4-chain
2848  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
2849  *     [0] [@0]: dpo-drop ip6
2850  * 240.0.0.0/8
2851  *   unicast-ip4-chain
2852  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
2853  *     [0] [@0]: dpo-drop ip6
2854  * 255.255.255.255/32
2855  *   unicast-ip4-chain
2856  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
2857  *     [0] [@0]: dpo-drop ip6
2858  * @cliexend
2859 ?*/
2860 /* *INDENT-OFF* */
2861 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
2862 {
2863   .path = "set ip flow-hash",
2864   .short_help =
2865   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
2866   .function = set_ip_flow_hash_command_fn,
2867 };
2868 /* *INDENT-ON* */
2869
2870 #ifndef CLIB_MARCH_VARIANT
2871 int
2872 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2873                              u32 table_index)
2874 {
2875   vnet_main_t *vnm = vnet_get_main ();
2876   vnet_interface_main_t *im = &vnm->interface_main;
2877   ip4_main_t *ipm = &ip4_main;
2878   ip_lookup_main_t *lm = &ipm->lookup_main;
2879   vnet_classify_main_t *cm = &vnet_classify_main;
2880   ip4_address_t *if_addr;
2881
2882   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2883     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2884
2885   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2886     return VNET_API_ERROR_NO_SUCH_ENTRY;
2887
2888   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2889   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
2890
2891   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
2892
2893   if (NULL != if_addr)
2894     {
2895       fib_prefix_t pfx = {
2896         .fp_len = 32,
2897         .fp_proto = FIB_PROTOCOL_IP4,
2898         .fp_addr.ip4 = *if_addr,
2899       };
2900       u32 fib_index;
2901
2902       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2903                                                        sw_if_index);
2904
2905
2906       if (table_index != (u32) ~ 0)
2907         {
2908           dpo_id_t dpo = DPO_INVALID;
2909
2910           dpo_set (&dpo,
2911                    DPO_CLASSIFY,
2912                    DPO_PROTO_IP4,
2913                    classify_dpo_create (DPO_PROTO_IP4, table_index));
2914
2915           fib_table_entry_special_dpo_add (fib_index,
2916                                            &pfx,
2917                                            FIB_SOURCE_CLASSIFY,
2918                                            FIB_ENTRY_FLAG_NONE, &dpo);
2919           dpo_reset (&dpo);
2920         }
2921       else
2922         {
2923           fib_table_entry_special_remove (fib_index,
2924                                           &pfx, FIB_SOURCE_CLASSIFY);
2925         }
2926     }
2927
2928   return 0;
2929 }
2930 #endif
2931
2932 static clib_error_t *
2933 set_ip_classify_command_fn (vlib_main_t * vm,
2934                             unformat_input_t * input,
2935                             vlib_cli_command_t * cmd)
2936 {
2937   u32 table_index = ~0;
2938   int table_index_set = 0;
2939   u32 sw_if_index = ~0;
2940   int rv;
2941
2942   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2943     {
2944       if (unformat (input, "table-index %d", &table_index))
2945         table_index_set = 1;
2946       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
2947                          vnet_get_main (), &sw_if_index))
2948         ;
2949       else
2950         break;
2951     }
2952
2953   if (table_index_set == 0)
2954     return clib_error_return (0, "classify table-index must be specified");
2955
2956   if (sw_if_index == ~0)
2957     return clib_error_return (0, "interface / subif must be specified");
2958
2959   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
2960
2961   switch (rv)
2962     {
2963     case 0:
2964       break;
2965
2966     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
2967       return clib_error_return (0, "No such interface");
2968
2969     case VNET_API_ERROR_NO_SUCH_ENTRY:
2970       return clib_error_return (0, "No such classifier table");
2971     }
2972   return 0;
2973 }
2974
2975 /*?
2976  * Assign a classification table to an interface. The classification
2977  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
2978  * commands. Once the table is create, use this command to filter packets
2979  * on an interface.
2980  *
2981  * @cliexpar
2982  * Example of how to assign a classification table to an interface:
2983  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
2984 ?*/
2985 /* *INDENT-OFF* */
2986 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
2987 {
2988     .path = "set ip classify",
2989     .short_help =
2990     "set ip classify intfc <interface> table-index <classify-idx>",
2991     .function = set_ip_classify_command_fn,
2992 };
2993 /* *INDENT-ON* */
2994
2995 static clib_error_t *
2996 ip4_config (vlib_main_t * vm, unformat_input_t * input)
2997 {
2998   ip4_main_t *im = &ip4_main;
2999   uword heapsize = 0;
3000
3001   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3002     {
3003       if (unformat (input, "heap-size %U", unformat_memory_size, &heapsize))
3004         ;
3005       else
3006         return clib_error_return (0,
3007                                   "invalid heap-size parameter `%U'",
3008                                   format_unformat_error, input);
3009     }
3010
3011   im->mtrie_heap_size = heapsize;
3012
3013   return 0;
3014 }
3015
3016 VLIB_EARLY_CONFIG_FUNCTION (ip4_config, "ip");
3017
3018 /*
3019  * fd.io coding-style-patch-verification: ON
3020  *
3021  * Local Variables:
3022  * eval: (c-set-style "gnu")
3023  * End:
3024  */