ip: do not clear the locally-originated flag
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h>     /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/mfib/ip4_mfib.h>
53 #include <vnet/dpo/load_balance.h>
54 #include <vnet/dpo/load_balance_map.h>
55 #include <vnet/dpo/classify_dpo.h>
56 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
57
58 #include <vnet/ip/ip4_forward.h>
59 #include <vnet/interface_output.h>
60 #include <vnet/classify/vnet_classify.h>
61
62 /** @brief IPv4 lookup node.
63     @node ip4-lookup
64
65     This is the main IPv4 lookup dispatch node.
66
67     @param vm vlib_main_t corresponding to the current thread
68     @param node vlib_node_runtime_t
69     @param frame vlib_frame_t whose contents should be dispatched
70
71     @par Graph mechanics: buffer metadata, next index usage
72
73     @em Uses:
74     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
75         - Indicates the @c sw_if_index value of the interface that the
76           packet was received on.
77     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
78         - When the value is @c ~0 then the node performs a longest prefix
79           match (LPM) for the packet destination address in the FIB attached
80           to the receive interface.
81         - Otherwise perform LPM for the packet destination address in the
82           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
83           value (0, 1, ...) and not a VRF id.
84
85     @em Sets:
86     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
87         - The lookup result adjacency index.
88
89     <em>Next Index:</em>
90     - Dispatches the packet to the node index found in
91       ip_adjacency_t @c adj->lookup_next_index
92       (where @c adj is the lookup result adjacency).
93 */
94 VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
95                                 vlib_frame_t * frame)
96 {
97   return ip4_lookup_inline (vm, node, frame);
98 }
99
100 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
101
102 /* *INDENT-OFF* */
103 VLIB_REGISTER_NODE (ip4_lookup_node) =
104 {
105   .name = "ip4-lookup",
106   .vector_size = sizeof (u32),
107   .format_trace = format_ip4_lookup_trace,
108   .n_next_nodes = IP_LOOKUP_N_NEXT,
109   .next_nodes = IP4_LOOKUP_NEXT_NODES,
110 };
111 /* *INDENT-ON* */
112
113 VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
114                                       vlib_node_runtime_t * node,
115                                       vlib_frame_t * frame)
116 {
117   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
118   u32 n_left, *from;
119   u32 thread_index = vm->thread_index;
120   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
121   u16 nexts[VLIB_FRAME_SIZE], *next;
122
123   from = vlib_frame_vector_args (frame);
124   n_left = frame->n_vectors;
125   next = nexts;
126
127   vlib_get_buffers (vm, from, bufs, n_left);
128
129   while (n_left >= 4)
130     {
131       const load_balance_t *lb0, *lb1;
132       const ip4_header_t *ip0, *ip1;
133       u32 lbi0, hc0, lbi1, hc1;
134       const dpo_id_t *dpo0, *dpo1;
135
136       /* Prefetch next iteration. */
137       {
138         vlib_prefetch_buffer_header (b[2], LOAD);
139         vlib_prefetch_buffer_header (b[3], LOAD);
140
141         CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), LOAD);
142         CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), LOAD);
143       }
144
145       ip0 = vlib_buffer_get_current (b[0]);
146       ip1 = vlib_buffer_get_current (b[1]);
147       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
148       lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
149
150       lb0 = load_balance_get (lbi0);
151       lb1 = load_balance_get (lbi1);
152
153       /*
154        * this node is for via FIBs we can re-use the hash value from the
155        * to node if present.
156        * We don't want to use the same hash value at each level in the recursion
157        * graph as that would lead to polarisation
158        */
159       hc0 = hc1 = 0;
160
161       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
162         {
163           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
164             {
165               hc0 = vnet_buffer (b[0])->ip.flow_hash =
166                 vnet_buffer (b[0])->ip.flow_hash >> 1;
167             }
168           else
169             {
170               hc0 = vnet_buffer (b[0])->ip.flow_hash =
171                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
172             }
173           dpo0 = load_balance_get_fwd_bucket
174             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
175         }
176       else
177         {
178           dpo0 = load_balance_get_bucket_i (lb0, 0);
179         }
180       if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
181         {
182           if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
183             {
184               hc1 = vnet_buffer (b[1])->ip.flow_hash =
185                 vnet_buffer (b[1])->ip.flow_hash >> 1;
186             }
187           else
188             {
189               hc1 = vnet_buffer (b[1])->ip.flow_hash =
190                 ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
191             }
192           dpo1 = load_balance_get_fwd_bucket
193             (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
194         }
195       else
196         {
197           dpo1 = load_balance_get_bucket_i (lb1, 0);
198         }
199
200       next[0] = dpo0->dpoi_next_node;
201       next[1] = dpo1->dpoi_next_node;
202
203       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
204       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
205
206       vlib_increment_combined_counter
207         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
208       vlib_increment_combined_counter
209         (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
210
211       b += 2;
212       next += 2;
213       n_left -= 2;
214     }
215
216   while (n_left > 0)
217     {
218       const load_balance_t *lb0;
219       const ip4_header_t *ip0;
220       const dpo_id_t *dpo0;
221       u32 lbi0, hc0;
222
223       ip0 = vlib_buffer_get_current (b[0]);
224       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
225
226       lb0 = load_balance_get (lbi0);
227
228       hc0 = 0;
229       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
230         {
231           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
232             {
233               hc0 = vnet_buffer (b[0])->ip.flow_hash =
234                 vnet_buffer (b[0])->ip.flow_hash >> 1;
235             }
236           else
237             {
238               hc0 = vnet_buffer (b[0])->ip.flow_hash =
239                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
240             }
241           dpo0 = load_balance_get_fwd_bucket
242             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
243         }
244       else
245         {
246           dpo0 = load_balance_get_bucket_i (lb0, 0);
247         }
248
249       next[0] = dpo0->dpoi_next_node;
250       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
251
252       vlib_increment_combined_counter
253         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
254
255       b += 1;
256       next += 1;
257       n_left -= 1;
258     }
259
260   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
261   if (node->flags & VLIB_NODE_FLAG_TRACE)
262     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
263
264   return frame->n_vectors;
265 }
266
267 /* *INDENT-OFF* */
268 VLIB_REGISTER_NODE (ip4_load_balance_node) =
269 {
270   .name = "ip4-load-balance",
271   .vector_size = sizeof (u32),
272   .sibling_of = "ip4-lookup",
273   .format_trace = format_ip4_lookup_trace,
274 };
275 /* *INDENT-ON* */
276
277 #ifndef CLIB_MARCH_VARIANT
278 /* get first interface address */
279 ip4_address_t *
280 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
281                              ip_interface_address_t ** result_ia)
282 {
283   ip_lookup_main_t *lm = &im->lookup_main;
284   ip_interface_address_t *ia = 0;
285   ip4_address_t *result = 0;
286
287   /* *INDENT-OFF* */
288   foreach_ip_interface_address
289     (lm, ia, sw_if_index,
290      1 /* honor unnumbered */ ,
291      ({
292        ip4_address_t * a =
293          ip_interface_address_get_address (lm, ia);
294        result = a;
295        break;
296      }));
297   /* *INDENT-OFF* */
298   if (result_ia)
299     *result_ia = result ? ia : 0;
300   return result;
301 }
302 #endif
303
304 static void
305 ip4_add_subnet_bcast_route (u32 fib_index,
306                             fib_prefix_t *pfx,
307                             u32 sw_if_index)
308 {
309   vnet_sw_interface_flags_t iflags;
310
311   iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
312
313   fib_table_entry_special_remove(fib_index,
314                                  pfx,
315                                  FIB_SOURCE_INTERFACE);
316
317   if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
318     {
319       fib_table_entry_update_one_path (fib_index, pfx,
320                                        FIB_SOURCE_INTERFACE,
321                                        FIB_ENTRY_FLAG_NONE,
322                                        DPO_PROTO_IP4,
323                                        /* No next-hop address */
324                                        &ADJ_BCAST_ADDR,
325                                        sw_if_index,
326                                        // invalid FIB index
327                                        ~0,
328                                        1,
329                                        // no out-label stack
330                                        NULL,
331                                        FIB_ROUTE_PATH_FLAG_NONE);
332     }
333   else
334     {
335         fib_table_entry_special_add(fib_index,
336                                     pfx,
337                                     FIB_SOURCE_INTERFACE,
338                                     (FIB_ENTRY_FLAG_DROP |
339                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
340     }
341 }
342
343 static void
344 ip4_add_interface_prefix_routes (ip4_main_t *im,
345                                  u32 sw_if_index,
346                                  u32 fib_index,
347                                  ip_interface_address_t * a)
348 {
349   ip_lookup_main_t *lm = &im->lookup_main;
350   ip_interface_prefix_t *if_prefix;
351   ip4_address_t *address = ip_interface_address_get_address (lm, a);
352
353   ip_interface_prefix_key_t key = {
354     .prefix = {
355       .fp_len = a->address_length,
356       .fp_proto = FIB_PROTOCOL_IP4,
357       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[a->address_length],
358     },
359     .sw_if_index = sw_if_index,
360   };
361
362   fib_prefix_t pfx_special = {
363     .fp_proto = FIB_PROTOCOL_IP4,
364   };
365
366   /* If prefix already set on interface, just increment ref count & return */
367   if_prefix = ip_get_interface_prefix (lm, &key);
368   if (if_prefix)
369     {
370       if_prefix->ref_count += 1;
371       return;
372     }
373
374   /* New prefix - allocate a pool entry, initialize it, add to the hash */
375   pool_get (lm->if_prefix_pool, if_prefix);
376   if_prefix->ref_count = 1;
377   if_prefix->src_ia_index = a - lm->if_address_pool;
378   clib_memcpy (&if_prefix->key, &key, sizeof (key));
379   mhash_set (&lm->prefix_to_if_prefix_index, &key,
380              if_prefix - lm->if_prefix_pool, 0 /* old value */);
381
382   /* length <= 30 - add glean, drop first address, maybe drop bcast address */
383   if (a->address_length <= 30)
384     {
385       pfx_special.fp_len = a->address_length;
386       pfx_special.fp_addr.ip4.as_u32 = address->as_u32;
387
388       /* set the glean route for the prefix */
389       fib_table_entry_update_one_path (fib_index, &pfx_special,
390                                        FIB_SOURCE_INTERFACE,
391                                        (FIB_ENTRY_FLAG_CONNECTED |
392                                         FIB_ENTRY_FLAG_ATTACHED),
393                                        DPO_PROTO_IP4,
394                                        /* No next-hop address */
395                                        NULL,
396                                        sw_if_index,
397                                        /* invalid FIB index */
398                                        ~0,
399                                        1,
400                                        /* no out-label stack */
401                                        NULL,
402                                        FIB_ROUTE_PATH_FLAG_NONE);
403
404       /* set a drop route for the base address of the prefix */
405       pfx_special.fp_len = 32;
406       pfx_special.fp_addr.ip4.as_u32 =
407         address->as_u32 & im->fib_masks[a->address_length];
408
409       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
410         fib_table_entry_special_add (fib_index, &pfx_special,
411                                      FIB_SOURCE_INTERFACE,
412                                      (FIB_ENTRY_FLAG_DROP |
413                                       FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
414
415       /* set a route for the broadcast address of the prefix */
416       pfx_special.fp_len = 32;
417       pfx_special.fp_addr.ip4.as_u32 =
418         address->as_u32 | ~im->fib_masks[a->address_length];
419       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
420         ip4_add_subnet_bcast_route (fib_index, &pfx_special, sw_if_index);
421
422
423     }
424   /* length == 31 - add an attached route for the other address */
425   else if (a->address_length == 31)
426     {
427       pfx_special.fp_len = 32;
428       pfx_special.fp_addr.ip4.as_u32 =
429         address->as_u32 ^ clib_host_to_net_u32(1);
430
431       fib_table_entry_update_one_path (fib_index, &pfx_special,
432                                        FIB_SOURCE_INTERFACE,
433                                        (FIB_ENTRY_FLAG_ATTACHED),
434                                        DPO_PROTO_IP4,
435                                        &pfx_special.fp_addr,
436                                        sw_if_index,
437                                        /* invalid FIB index */
438                                        ~0,
439                                        1,
440                                        NULL,
441                                        FIB_ROUTE_PATH_FLAG_NONE);
442     }
443 }
444
445 static void
446 ip4_add_interface_routes (u32 sw_if_index,
447                           ip4_main_t * im, u32 fib_index,
448                           ip_interface_address_t * a)
449 {
450   ip_lookup_main_t *lm = &im->lookup_main;
451   ip4_address_t *address = ip_interface_address_get_address (lm, a);
452   fib_prefix_t pfx = {
453     .fp_len = 32,
454     .fp_proto = FIB_PROTOCOL_IP4,
455     .fp_addr.ip4 = *address,
456   };
457
458   /* set special routes for the prefix if needed */
459   ip4_add_interface_prefix_routes (im, sw_if_index, fib_index, a);
460
461   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
462     {
463       u32 classify_table_index =
464         lm->classify_table_index_by_sw_if_index[sw_if_index];
465       if (classify_table_index != (u32) ~ 0)
466         {
467           dpo_id_t dpo = DPO_INVALID;
468
469           dpo_set (&dpo,
470                    DPO_CLASSIFY,
471                    DPO_PROTO_IP4,
472                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
473
474           fib_table_entry_special_dpo_add (fib_index,
475                                            &pfx,
476                                            FIB_SOURCE_CLASSIFY,
477                                            FIB_ENTRY_FLAG_NONE, &dpo);
478           dpo_reset (&dpo);
479         }
480     }
481
482   fib_table_entry_update_one_path (fib_index, &pfx,
483                                    FIB_SOURCE_INTERFACE,
484                                    (FIB_ENTRY_FLAG_CONNECTED |
485                                     FIB_ENTRY_FLAG_LOCAL),
486                                    DPO_PROTO_IP4,
487                                    &pfx.fp_addr,
488                                    sw_if_index,
489                                    // invalid FIB index
490                                    ~0,
491                                    1, NULL,
492                                    FIB_ROUTE_PATH_FLAG_NONE);
493 }
494
495 static void
496 ip4_del_interface_prefix_routes (ip4_main_t * im,
497                                  u32 sw_if_index,
498                                  u32 fib_index,
499                                  ip4_address_t * address,
500                                  u32 address_length)
501 {
502   ip_lookup_main_t *lm = &im->lookup_main;
503   ip_interface_prefix_t *if_prefix;
504
505   ip_interface_prefix_key_t key = {
506     .prefix = {
507       .fp_len = address_length,
508       .fp_proto = FIB_PROTOCOL_IP4,
509       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[address_length],
510     },
511     .sw_if_index = sw_if_index,
512   };
513
514   fib_prefix_t pfx_special = {
515     .fp_len = 32,
516     .fp_proto = FIB_PROTOCOL_IP4,
517   };
518
519   if_prefix = ip_get_interface_prefix (lm, &key);
520   if (!if_prefix)
521     {
522       clib_warning ("Prefix not found while deleting %U",
523                     format_ip4_address_and_length, address, address_length);
524       return;
525     }
526
527   if_prefix->ref_count -= 1;
528
529   /*
530    * Routes need to be adjusted if:
531    * - deleting last intf addr in prefix
532    * - deleting intf addr used as default source address in glean adjacency
533    *
534    * We're done now otherwise
535    */
536   if ((if_prefix->ref_count > 0) &&
537       !pool_is_free_index (lm->if_address_pool, if_prefix->src_ia_index))
538     return;
539
540   /* length <= 30, delete glean route, first address, last address */
541   if (address_length <= 30)
542     {
543
544       /* remove glean route for prefix */
545       pfx_special.fp_addr.ip4 = *address;
546       pfx_special.fp_len = address_length;
547       fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
548
549       /* if no more intf addresses in prefix, remove other special routes */
550       if (!if_prefix->ref_count)
551         {
552           /* first address in prefix */
553           pfx_special.fp_addr.ip4.as_u32 =
554             address->as_u32 & im->fib_masks[address_length];
555           pfx_special.fp_len = 32;
556
557           if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
558           fib_table_entry_special_remove (fib_index,
559                                           &pfx_special,
560                                           FIB_SOURCE_INTERFACE);
561
562           /* prefix broadcast address */
563           pfx_special.fp_addr.ip4.as_u32 =
564             address->as_u32 | ~im->fib_masks[address_length];
565           pfx_special.fp_len = 32;
566
567           if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
568           fib_table_entry_special_remove (fib_index,
569                                           &pfx_special,
570                                           FIB_SOURCE_INTERFACE);
571         }
572       else
573         /* default source addr just got deleted, find another */
574         {
575           ip_interface_address_t *new_src_ia = NULL;
576           ip4_address_t *new_src_addr = NULL;
577
578           new_src_addr =
579             ip4_interface_address_matching_destination
580               (im, address, sw_if_index, &new_src_ia);
581
582           if_prefix->src_ia_index = new_src_ia - lm->if_address_pool;
583
584           pfx_special.fp_len = address_length;
585           pfx_special.fp_addr.ip4 = *new_src_addr;
586
587           /* set new glean route for the prefix */
588           fib_table_entry_update_one_path (fib_index, &pfx_special,
589                                            FIB_SOURCE_INTERFACE,
590                                            (FIB_ENTRY_FLAG_CONNECTED |
591                                             FIB_ENTRY_FLAG_ATTACHED),
592                                            DPO_PROTO_IP4,
593                                            /* No next-hop address */
594                                            NULL,
595                                            sw_if_index,
596                                            /* invalid FIB index */
597                                            ~0,
598                                            1,
599                                            /* no out-label stack */
600                                            NULL,
601                                            FIB_ROUTE_PATH_FLAG_NONE);
602           return;
603         }
604     }
605   /* length == 31, delete attached route for the other address */
606   else if (address_length == 31)
607     {
608       pfx_special.fp_addr.ip4.as_u32 =
609         address->as_u32 ^ clib_host_to_net_u32(1);
610
611       fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
612     }
613
614   mhash_unset (&lm->prefix_to_if_prefix_index, &key, 0 /* old_value */);
615   pool_put (lm->if_prefix_pool, if_prefix);
616 }
617
618 static void
619 ip4_del_interface_routes (u32 sw_if_index,
620                           ip4_main_t * im,
621                           u32 fib_index,
622                           ip4_address_t * address, u32 address_length)
623 {
624   fib_prefix_t pfx = {
625     .fp_len = address_length,
626     .fp_proto = FIB_PROTOCOL_IP4,
627     .fp_addr.ip4 = *address,
628   };
629
630   ip4_del_interface_prefix_routes (im, sw_if_index, fib_index,
631                                    address, address_length);
632
633   pfx.fp_len = 32;
634   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
635 }
636
637 #ifndef CLIB_MARCH_VARIANT
638 void
639 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
640 {
641   ip4_main_t *im = &ip4_main;
642
643   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
644
645   /*
646    * enable/disable only on the 1<->0 transition
647    */
648   if (is_enable)
649     {
650       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
651         return;
652     }
653   else
654     {
655       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
656       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
657         return;
658     }
659   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
660                                !is_enable, 0, 0);
661
662
663   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
664                                sw_if_index, !is_enable, 0, 0);
665
666   {
667     ip4_enable_disable_interface_callback_t *cb;
668     vec_foreach (cb, im->enable_disable_interface_callbacks)
669       cb->function (im, cb->function_opaque, sw_if_index, is_enable);
670   }
671 }
672
673 static clib_error_t *
674 ip4_add_del_interface_address_internal (vlib_main_t * vm,
675                                         u32 sw_if_index,
676                                         ip4_address_t * address,
677                                         u32 address_length, u32 is_del)
678 {
679   vnet_main_t *vnm = vnet_get_main ();
680   ip4_main_t *im = &ip4_main;
681   ip_lookup_main_t *lm = &im->lookup_main;
682   clib_error_t *error = 0;
683   u32 if_address_index, elts_before;
684   ip4_address_fib_t ip4_af, *addr_fib = 0;
685
686   /* local0 interface doesn't support IP addressing  */
687   if (sw_if_index == 0)
688     {
689       return
690        clib_error_create ("local0 interface doesn't support IP addressing");
691     }
692
693   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
694   ip4_addr_fib_init (&ip4_af, address,
695                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
696   vec_add1 (addr_fib, ip4_af);
697
698   /*
699    * there is no support for adj-fib handling in the presence of overlapping
700    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
701    * most routers do.
702    */
703   /* *INDENT-OFF* */
704   if (!is_del)
705     {
706       /* When adding an address check that it does not conflict
707          with an existing address on any interface in this table. */
708       ip_interface_address_t *ia;
709       vnet_sw_interface_t *sif;
710
711       pool_foreach(sif, vnm->interface_main.sw_interfaces,
712       ({
713           if (im->fib_index_by_sw_if_index[sw_if_index] ==
714               im->fib_index_by_sw_if_index[sif->sw_if_index])
715             {
716               foreach_ip_interface_address
717                 (&im->lookup_main, ia, sif->sw_if_index,
718                  0 /* honor unnumbered */ ,
719                  ({
720                    ip4_address_t * x =
721                      ip_interface_address_get_address
722                      (&im->lookup_main, ia);
723                    if (ip4_destination_matches_route
724                        (im, address, x, ia->address_length) ||
725                        ip4_destination_matches_route (im,
726                                                       x,
727                                                       address,
728                                                       address_length))
729                      {
730                        /* an intf may have >1 addr from the same prefix */
731                        if ((sw_if_index == sif->sw_if_index) &&
732                            (ia->address_length == address_length) &&
733                            (x->as_u32 != address->as_u32))
734                          continue;
735
736                        /* error if the length or intf was different */
737                        vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
738
739                        return
740                          clib_error_create
741                          ("failed to add %U on %U which conflicts with %U for interface %U",
742                           format_ip4_address_and_length, address,
743                           address_length,
744                           format_vnet_sw_if_index_name, vnm,
745                           sw_if_index,
746                           format_ip4_address_and_length, x,
747                           ia->address_length,
748                           format_vnet_sw_if_index_name, vnm,
749                           sif->sw_if_index);
750                      }
751                  }));
752             }
753       }));
754     }
755   /* *INDENT-ON* */
756
757   elts_before = pool_elts (lm->if_address_pool);
758
759   error = ip_interface_address_add_del
760     (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
761   if (error)
762     goto done;
763
764   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
765   ip4_mfib_interface_enable_disable (sw_if_index, !is_del);
766
767   /* intf addr routes are added/deleted on admin up/down */
768   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index))
769     {
770       if (is_del)
771         ip4_del_interface_routes (sw_if_index,
772                                   im, ip4_af.fib_index, address,
773                                   address_length);
774       else
775         ip4_add_interface_routes (sw_if_index,
776                                   im, ip4_af.fib_index,
777                                   pool_elt_at_index
778                                   (lm->if_address_pool, if_address_index));
779     }
780
781   /* If pool did not grow/shrink: add duplicate address. */
782   if (elts_before != pool_elts (lm->if_address_pool))
783     {
784       ip4_add_del_interface_address_callback_t *cb;
785       vec_foreach (cb, im->add_del_interface_address_callbacks)
786         cb->function (im, cb->function_opaque, sw_if_index,
787                       address, address_length, if_address_index, is_del);
788     }
789
790 done:
791   vec_free (addr_fib);
792   return error;
793 }
794
795 clib_error_t *
796 ip4_add_del_interface_address (vlib_main_t * vm,
797                                u32 sw_if_index,
798                                ip4_address_t * address,
799                                u32 address_length, u32 is_del)
800 {
801   return ip4_add_del_interface_address_internal
802     (vm, sw_if_index, address, address_length, is_del);
803 }
804
805 void
806 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
807 {
808   ip_interface_address_t *ia;
809   ip4_main_t *im;
810
811   im = &ip4_main;
812
813   /*
814    * when directed broadcast is enabled, the subnet braodcast route will forward
815    * packets using an adjacency with a broadcast MAC. otherwise it drops
816    */
817   /* *INDENT-OFF* */
818   foreach_ip_interface_address(&im->lookup_main, ia,
819                                sw_if_index, 0,
820      ({
821        if (ia->address_length <= 30)
822          {
823            ip4_address_t *ipa;
824
825            ipa = ip_interface_address_get_address (&im->lookup_main, ia);
826
827            fib_prefix_t pfx = {
828              .fp_len = 32,
829              .fp_proto = FIB_PROTOCOL_IP4,
830              .fp_addr = {
831                .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
832              },
833            };
834
835            ip4_add_subnet_bcast_route
836              (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
837                                                   sw_if_index),
838               &pfx, sw_if_index);
839          }
840      }));
841   /* *INDENT-ON* */
842 }
843 #endif
844
845 static clib_error_t *
846 ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
847 {
848   ip4_main_t *im = &ip4_main;
849   ip_interface_address_t *ia;
850   ip4_address_t *a;
851   u32 is_admin_up, fib_index;
852
853   /* Fill in lookup tables with default table (0). */
854   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
855
856   vec_validate_init_empty (im->
857                            lookup_main.if_address_pool_index_by_sw_if_index,
858                            sw_if_index, ~0);
859
860   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
861
862   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
863
864   /* *INDENT-OFF* */
865   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
866                                 0 /* honor unnumbered */,
867   ({
868     a = ip_interface_address_get_address (&im->lookup_main, ia);
869     if (is_admin_up)
870       ip4_add_interface_routes (sw_if_index,
871                                 im, fib_index,
872                                 ia);
873     else
874       ip4_del_interface_routes (sw_if_index,
875                                 im, fib_index,
876                                 a, ia->address_length);
877   }));
878   /* *INDENT-ON* */
879
880   return 0;
881 }
882
883 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
884
885 /* Built-in ip4 unicast rx feature path definition */
886 /* *INDENT-OFF* */
887 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
888 {
889   .arc_name = "ip4-unicast",
890   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
891   .last_in_arc = "ip4-lookup",
892   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
893 };
894
895 VNET_FEATURE_INIT (ip4_flow_classify, static) =
896 {
897   .arc_name = "ip4-unicast",
898   .node_name = "ip4-flow-classify",
899   .runs_before = VNET_FEATURES ("ip4-inacl"),
900 };
901
902 VNET_FEATURE_INIT (ip4_inacl, static) =
903 {
904   .arc_name = "ip4-unicast",
905   .node_name = "ip4-inacl",
906   .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
907 };
908
909 VNET_FEATURE_INIT (ip4_source_check_1, static) =
910 {
911   .arc_name = "ip4-unicast",
912   .node_name = "ip4-source-check-via-rx",
913   .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
914 };
915
916 VNET_FEATURE_INIT (ip4_source_check_2, static) =
917 {
918   .arc_name = "ip4-unicast",
919   .node_name = "ip4-source-check-via-any",
920   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
921 };
922
923 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
924 {
925   .arc_name = "ip4-unicast",
926   .node_name = "ip4-source-and-port-range-check-rx",
927   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
928 };
929
930 VNET_FEATURE_INIT (ip4_policer_classify, static) =
931 {
932   .arc_name = "ip4-unicast",
933   .node_name = "ip4-policer-classify",
934   .runs_before = VNET_FEATURES ("ipsec4-input-feature"),
935 };
936
937 VNET_FEATURE_INIT (ip4_ipsec, static) =
938 {
939   .arc_name = "ip4-unicast",
940   .node_name = "ipsec4-input-feature",
941   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
942 };
943
944 VNET_FEATURE_INIT (ip4_vpath, static) =
945 {
946   .arc_name = "ip4-unicast",
947   .node_name = "vpath-input-ip4",
948   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
949 };
950
951 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
952 {
953   .arc_name = "ip4-unicast",
954   .node_name = "ip4-vxlan-bypass",
955   .runs_before = VNET_FEATURES ("ip4-lookup"),
956 };
957
958 VNET_FEATURE_INIT (ip4_not_enabled, static) =
959 {
960   .arc_name = "ip4-unicast",
961   .node_name = "ip4-not-enabled",
962   .runs_before = VNET_FEATURES ("ip4-lookup"),
963 };
964
965 VNET_FEATURE_INIT (ip4_lookup, static) =
966 {
967   .arc_name = "ip4-unicast",
968   .node_name = "ip4-lookup",
969   .runs_before = 0,     /* not before any other features */
970 };
971
972 /* Built-in ip4 multicast rx feature path definition */
973 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
974 {
975   .arc_name = "ip4-multicast",
976   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
977   .last_in_arc = "ip4-mfib-forward-lookup",
978   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
979 };
980
981 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
982 {
983   .arc_name = "ip4-multicast",
984   .node_name = "vpath-input-ip4",
985   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
986 };
987
988 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
989 {
990   .arc_name = "ip4-multicast",
991   .node_name = "ip4-not-enabled",
992   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
993 };
994
995 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
996 {
997   .arc_name = "ip4-multicast",
998   .node_name = "ip4-mfib-forward-lookup",
999   .runs_before = 0,     /* last feature */
1000 };
1001
1002 /* Source and port-range check ip4 tx feature path definition */
1003 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1004 {
1005   .arc_name = "ip4-output",
1006   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
1007   .last_in_arc = "interface-output",
1008   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1009 };
1010
1011 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1012 {
1013   .arc_name = "ip4-output",
1014   .node_name = "ip4-source-and-port-range-check-tx",
1015   .runs_before = VNET_FEATURES ("ip4-outacl"),
1016 };
1017
1018 VNET_FEATURE_INIT (ip4_outacl, static) =
1019 {
1020   .arc_name = "ip4-output",
1021   .node_name = "ip4-outacl",
1022   .runs_before = VNET_FEATURES ("ipsec4-output-feature"),
1023 };
1024
1025 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1026 {
1027   .arc_name = "ip4-output",
1028   .node_name = "ipsec4-output-feature",
1029   .runs_before = VNET_FEATURES ("interface-output"),
1030 };
1031
1032 /* Built-in ip4 tx feature path definition */
1033 VNET_FEATURE_INIT (ip4_interface_output, static) =
1034 {
1035   .arc_name = "ip4-output",
1036   .node_name = "interface-output",
1037   .runs_before = 0,     /* not before any other features */
1038 };
1039 /* *INDENT-ON* */
1040
1041 static clib_error_t *
1042 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1043 {
1044   ip4_main_t *im = &ip4_main;
1045
1046   /* Fill in lookup tables with default table (0). */
1047   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1048   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
1049
1050   if (!is_add)
1051     {
1052       ip4_main_t *im4 = &ip4_main;
1053       ip_lookup_main_t *lm4 = &im4->lookup_main;
1054       ip_interface_address_t *ia = 0;
1055       ip4_address_t *address;
1056       vlib_main_t *vm = vlib_get_main ();
1057
1058       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
1059       /* *INDENT-OFF* */
1060       foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
1061       ({
1062         address = ip_interface_address_get_address (lm4, ia);
1063         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
1064       }));
1065       /* *INDENT-ON* */
1066       ip4_mfib_interface_enable_disable (sw_if_index, 0);
1067     }
1068
1069   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
1070                                is_add, 0, 0);
1071
1072   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
1073                                sw_if_index, is_add, 0, 0);
1074
1075   return /* no error */ 0;
1076 }
1077
1078 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1079
1080 /* Global IP4 main. */
1081 #ifndef CLIB_MARCH_VARIANT
1082 ip4_main_t ip4_main;
1083 #endif /* CLIB_MARCH_VARIANT */
1084
1085 static clib_error_t *
1086 ip4_lookup_init (vlib_main_t * vm)
1087 {
1088   ip4_main_t *im = &ip4_main;
1089   clib_error_t *error;
1090   uword i;
1091
1092   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1093     return error;
1094   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
1095     return (error);
1096   if ((error = vlib_call_init_function (vm, fib_module_init)))
1097     return error;
1098   if ((error = vlib_call_init_function (vm, mfib_module_init)))
1099     return error;
1100
1101   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1102     {
1103       u32 m;
1104
1105       if (i < 32)
1106         m = pow2_mask (i) << (32 - i);
1107       else
1108         m = ~0;
1109       im->fib_masks[i] = clib_host_to_net_u32 (m);
1110     }
1111
1112   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1113
1114   /* Create FIB with index 0 and table id of 0. */
1115   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1116                                      FIB_SOURCE_DEFAULT_ROUTE);
1117   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1118                                       MFIB_SOURCE_DEFAULT_ROUTE);
1119
1120   {
1121     pg_node_t *pn;
1122     pn = pg_get_node (ip4_lookup_node.index);
1123     pn->unformat_edit = unformat_pg_ip4_header;
1124   }
1125
1126   {
1127     ethernet_arp_header_t h;
1128
1129     clib_memset (&h, 0, sizeof (h));
1130
1131 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1132 #define _8(f,v) h.f = v;
1133     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1134     _16 (l3_type, ETHERNET_TYPE_IP4);
1135     _8 (n_l2_address_bytes, 6);
1136     _8 (n_l3_address_bytes, 4);
1137     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1138 #undef _16
1139 #undef _8
1140
1141     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1142                                /* data */ &h,
1143                                sizeof (h),
1144                                /* alloc chunk size */ 8,
1145                                "ip4 arp");
1146   }
1147
1148   return error;
1149 }
1150
1151 VLIB_INIT_FUNCTION (ip4_lookup_init);
1152
1153 typedef struct
1154 {
1155   /* Adjacency taken. */
1156   u32 dpo_index;
1157   u32 flow_hash;
1158   u32 fib_index;
1159
1160   /* Packet data, possibly *after* rewrite. */
1161   u8 packet_data[64 - 1 * sizeof (u32)];
1162 }
1163 ip4_forward_next_trace_t;
1164
1165 #ifndef CLIB_MARCH_VARIANT
1166 u8 *
1167 format_ip4_forward_next_trace (u8 * s, va_list * args)
1168 {
1169   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1170   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1171   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1172   u32 indent = format_get_indent (s);
1173   s = format (s, "%U%U",
1174               format_white_space, indent,
1175               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1176   return s;
1177 }
1178 #endif
1179
1180 static u8 *
1181 format_ip4_lookup_trace (u8 * s, va_list * args)
1182 {
1183   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1184   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1185   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1186   u32 indent = format_get_indent (s);
1187
1188   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1189               t->fib_index, t->dpo_index, t->flow_hash);
1190   s = format (s, "\n%U%U",
1191               format_white_space, indent,
1192               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1193   return s;
1194 }
1195
1196 static u8 *
1197 format_ip4_rewrite_trace (u8 * s, va_list * args)
1198 {
1199   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1200   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1201   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1202   u32 indent = format_get_indent (s);
1203
1204   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1205               t->fib_index, t->dpo_index, format_ip_adjacency,
1206               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1207   s = format (s, "\n%U%U",
1208               format_white_space, indent,
1209               format_ip_adjacency_packet_data,
1210               t->packet_data, sizeof (t->packet_data));
1211   return s;
1212 }
1213
1214 #ifndef CLIB_MARCH_VARIANT
1215 /* Common trace function for all ip4-forward next nodes. */
1216 void
1217 ip4_forward_next_trace (vlib_main_t * vm,
1218                         vlib_node_runtime_t * node,
1219                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1220 {
1221   u32 *from, n_left;
1222   ip4_main_t *im = &ip4_main;
1223
1224   n_left = frame->n_vectors;
1225   from = vlib_frame_vector_args (frame);
1226
1227   while (n_left >= 4)
1228     {
1229       u32 bi0, bi1;
1230       vlib_buffer_t *b0, *b1;
1231       ip4_forward_next_trace_t *t0, *t1;
1232
1233       /* Prefetch next iteration. */
1234       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1235       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1236
1237       bi0 = from[0];
1238       bi1 = from[1];
1239
1240       b0 = vlib_get_buffer (vm, bi0);
1241       b1 = vlib_get_buffer (vm, bi1);
1242
1243       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1244         {
1245           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1246           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1247           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1248           t0->fib_index =
1249             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1250              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1251             vec_elt (im->fib_index_by_sw_if_index,
1252                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1253
1254           clib_memcpy_fast (t0->packet_data,
1255                             vlib_buffer_get_current (b0),
1256                             sizeof (t0->packet_data));
1257         }
1258       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1259         {
1260           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1261           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1262           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1263           t1->fib_index =
1264             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1265              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1266             vec_elt (im->fib_index_by_sw_if_index,
1267                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1268           clib_memcpy_fast (t1->packet_data, vlib_buffer_get_current (b1),
1269                             sizeof (t1->packet_data));
1270         }
1271       from += 2;
1272       n_left -= 2;
1273     }
1274
1275   while (n_left >= 1)
1276     {
1277       u32 bi0;
1278       vlib_buffer_t *b0;
1279       ip4_forward_next_trace_t *t0;
1280
1281       bi0 = from[0];
1282
1283       b0 = vlib_get_buffer (vm, bi0);
1284
1285       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1286         {
1287           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1288           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1289           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1290           t0->fib_index =
1291             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1292              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1293             vec_elt (im->fib_index_by_sw_if_index,
1294                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1295           clib_memcpy_fast (t0->packet_data, vlib_buffer_get_current (b0),
1296                             sizeof (t0->packet_data));
1297         }
1298       from += 1;
1299       n_left -= 1;
1300     }
1301 }
1302
1303 /* Compute TCP/UDP/ICMP4 checksum in software. */
1304 u16
1305 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1306                               ip4_header_t * ip0)
1307 {
1308   ip_csum_t sum0;
1309   u32 ip_header_length, payload_length_host_byte_order;
1310
1311   /* Initialize checksum with ip header. */
1312   ip_header_length = ip4_header_bytes (ip0);
1313   payload_length_host_byte_order =
1314     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1315   sum0 =
1316     clib_host_to_net_u32 (payload_length_host_byte_order +
1317                           (ip0->protocol << 16));
1318
1319   if (BITS (uword) == 32)
1320     {
1321       sum0 =
1322         ip_csum_with_carry (sum0,
1323                             clib_mem_unaligned (&ip0->src_address, u32));
1324       sum0 =
1325         ip_csum_with_carry (sum0,
1326                             clib_mem_unaligned (&ip0->dst_address, u32));
1327     }
1328   else
1329     sum0 =
1330       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1331
1332   return ip_calculate_l4_checksum (vm, p0, sum0,
1333                                    payload_length_host_byte_order, (u8 *) ip0,
1334                                    ip_header_length, NULL);
1335 }
1336
1337 u32
1338 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1339 {
1340   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1341   udp_header_t *udp0;
1342   u16 sum16;
1343
1344   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1345           || ip0->protocol == IP_PROTOCOL_UDP);
1346
1347   udp0 = (void *) (ip0 + 1);
1348   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1349     {
1350       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1351                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1352       return p0->flags;
1353     }
1354
1355   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1356
1357   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1358                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1359
1360   return p0->flags;
1361 }
1362 #endif
1363
1364 /* *INDENT-OFF* */
1365 VNET_FEATURE_ARC_INIT (ip4_local) =
1366 {
1367   .arc_name  = "ip4-local",
1368   .start_nodes = VNET_FEATURES ("ip4-local"),
1369   .last_in_arc = "ip4-local-end-of-arc",
1370 };
1371 /* *INDENT-ON* */
1372
1373 static inline void
1374 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1375                             ip4_header_t * ip, u8 is_udp, u8 * error,
1376                             u8 * good_tcp_udp)
1377 {
1378   u32 flags0;
1379   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1380   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1381   if (is_udp)
1382     {
1383       udp_header_t *udp;
1384       u32 ip_len, udp_len;
1385       i32 len_diff;
1386       udp = ip4_next_header (ip);
1387       /* Verify UDP length. */
1388       ip_len = clib_net_to_host_u16 (ip->length);
1389       udp_len = clib_net_to_host_u16 (udp->length);
1390
1391       len_diff = ip_len - udp_len;
1392       *good_tcp_udp &= len_diff >= 0;
1393       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1394     }
1395 }
1396
1397 #define ip4_local_csum_is_offloaded(_b)                                 \
1398     _b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM                         \
1399         || _b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM
1400
1401 #define ip4_local_need_csum_check(is_tcp_udp, _b)                       \
1402     (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED     \
1403         || ip4_local_csum_is_offloaded (_b)))
1404
1405 #define ip4_local_csum_is_valid(_b)                                     \
1406     (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT                      \
1407         || (ip4_local_csum_is_offloaded (_b))) != 0
1408
1409 static inline void
1410 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1411                          ip4_header_t * ih, u8 * error)
1412 {
1413   u8 is_udp, is_tcp_udp, good_tcp_udp;
1414
1415   is_udp = ih->protocol == IP_PROTOCOL_UDP;
1416   is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1417
1418   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1419     ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1420   else
1421     good_tcp_udp = ip4_local_csum_is_valid (b);
1422
1423   ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1424   *error = (is_tcp_udp && !good_tcp_udp
1425             ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1426 }
1427
1428 static inline void
1429 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1430                             ip4_header_t ** ih, u8 * error)
1431 {
1432   u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1433
1434   is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1435   is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1436
1437   is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1438   is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1439
1440   good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1441   good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1442
1443   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1444                      || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1445     {
1446       if (is_tcp_udp[0])
1447         ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1448                                     &good_tcp_udp[0]);
1449       if (is_tcp_udp[1])
1450         ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1451                                     &good_tcp_udp[1]);
1452     }
1453
1454   error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1455               IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1456   error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1457               IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1458 }
1459
1460 static inline void
1461 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1462                               vlib_buffer_t * b, u16 * next, u8 error,
1463                               u8 head_of_feature_arc)
1464 {
1465   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1466   u32 next_index;
1467
1468   *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1469   b->error = error ? error_node->errors[error] : 0;
1470   if (head_of_feature_arc)
1471     {
1472       next_index = *next;
1473       if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1474         {
1475           vnet_feature_arc_start (arc_index,
1476                                   vnet_buffer (b)->sw_if_index[VLIB_RX],
1477                                   &next_index, b);
1478           *next = next_index;
1479         }
1480     }
1481 }
1482
1483 typedef struct
1484 {
1485   ip4_address_t src;
1486   u32 lbi;
1487   u8 error;
1488   u8 first;
1489 } ip4_local_last_check_t;
1490
1491 static inline void
1492 ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
1493                      ip4_local_last_check_t * last_check, u8 * error0)
1494 {
1495   ip4_fib_mtrie_leaf_t leaf0;
1496   ip4_fib_mtrie_t *mtrie0;
1497   const dpo_id_t *dpo0;
1498   load_balance_t *lb0;
1499   u32 lbi0;
1500
1501   vnet_buffer (b)->ip.fib_index =
1502     vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1503     vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1504
1505   /*
1506    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1507    *  adjacency for the destination address (the local interface address).
1508    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1509    *  adjacency for the source address (the remote sender's address)
1510    */
1511   if (PREDICT_TRUE (last_check->src.as_u32 != ip0->src_address.as_u32) ||
1512       last_check->first)
1513     {
1514       mtrie0 = &ip4_fib_get (vnet_buffer (b)->ip.fib_index)->mtrie;
1515       leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1516       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1517       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1518       lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1519
1520       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1521         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1522       vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1523
1524       lb0 = load_balance_get (lbi0);
1525       dpo0 = load_balance_get_bucket_i (lb0, 0);
1526
1527       /*
1528        * Must have a route to source otherwise we drop the packet.
1529        * ip4 broadcasts are accepted, e.g. to make dhcp client work
1530        *
1531        * The checks are:
1532        *  - the source is a recieve => it's from us => bogus, do this
1533        *    first since it sets a different error code.
1534        *  - uRPF check for any route to source - accept if passes.
1535        *  - allow packets destined to the broadcast address from unknown sources
1536        */
1537
1538       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1539                   && dpo0->dpoi_type == DPO_RECEIVE) ?
1540                  IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1541       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1542                   && !fib_urpf_check_size (lb0->lb_urpf)
1543                   && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1544                  IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1545
1546       last_check->src.as_u32 = ip0->src_address.as_u32;
1547       last_check->lbi = lbi0;
1548       last_check->error = *error0;
1549       last_check->first = 0;
1550     }
1551   else
1552     {
1553       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1554         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1555       vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1556       *error0 = last_check->error;
1557     }
1558 }
1559
1560 static inline void
1561 ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
1562                         ip4_local_last_check_t * last_check, u8 * error)
1563 {
1564   ip4_fib_mtrie_leaf_t leaf[2];
1565   ip4_fib_mtrie_t *mtrie[2];
1566   const dpo_id_t *dpo[2];
1567   load_balance_t *lb[2];
1568   u32 not_last_hit;
1569   u32 lbi[2];
1570
1571   not_last_hit = last_check->first;
1572   not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1573   not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1574
1575   vnet_buffer (b[0])->ip.fib_index =
1576     vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1577     vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1578     vnet_buffer (b[0])->ip.fib_index;
1579
1580   vnet_buffer (b[1])->ip.fib_index =
1581     vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1582     vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1583     vnet_buffer (b[1])->ip.fib_index;
1584
1585   /*
1586    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1587    *  adjacency for the destination address (the local interface address).
1588    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1589    *  adjacency for the source address (the remote sender's address)
1590    */
1591   if (PREDICT_TRUE (not_last_hit))
1592     {
1593       mtrie[0] = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie;
1594       mtrie[1] = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie;
1595
1596       leaf[0] = ip4_fib_mtrie_lookup_step_one (mtrie[0], &ip[0]->src_address);
1597       leaf[1] = ip4_fib_mtrie_lookup_step_one (mtrie[1], &ip[1]->src_address);
1598
1599       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1600                                            &ip[0]->src_address, 2);
1601       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1602                                            &ip[1]->src_address, 2);
1603
1604       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1605                                            &ip[0]->src_address, 3);
1606       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1607                                            &ip[1]->src_address, 3);
1608
1609       lbi[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf[0]);
1610       lbi[1] = ip4_fib_mtrie_leaf_get_adj_index (leaf[1]);
1611
1612       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1613         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1614       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1615
1616       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1617         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1618       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1619
1620       lb[0] = load_balance_get (lbi[0]);
1621       lb[1] = load_balance_get (lbi[1]);
1622
1623       dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1624       dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1625
1626       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1627                    dpo[0]->dpoi_type == DPO_RECEIVE) ?
1628                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1629       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1630                    !fib_urpf_check_size (lb[0]->lb_urpf) &&
1631                    ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1632                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1633
1634       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1635                    dpo[1]->dpoi_type == DPO_RECEIVE) ?
1636                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1637       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1638                    !fib_urpf_check_size (lb[1]->lb_urpf) &&
1639                    ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1640                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1641
1642       last_check->src.as_u32 = ip[1]->src_address.as_u32;
1643       last_check->lbi = lbi[1];
1644       last_check->error = error[1];
1645       last_check->first = 0;
1646     }
1647   else
1648     {
1649       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1650         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1651       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1652
1653       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1654         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1655       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1656
1657       error[0] = last_check->error;
1658       error[1] = last_check->error;
1659     }
1660 }
1661
1662 enum ip_local_packet_type_e
1663 {
1664   IP_LOCAL_PACKET_TYPE_L4,
1665   IP_LOCAL_PACKET_TYPE_NAT,
1666   IP_LOCAL_PACKET_TYPE_FRAG,
1667 };
1668
1669 /**
1670  * Determine packet type and next node.
1671  *
1672  * The expectation is that all packets that are not L4 will skip
1673  * checksums and source checks.
1674  */
1675 always_inline u8
1676 ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
1677 {
1678   ip_lookup_main_t *lm = &ip4_main.lookup_main;
1679
1680   if (PREDICT_FALSE (ip4_is_fragment (ip)))
1681     {
1682       *next = IP_LOCAL_NEXT_REASSEMBLY;
1683       return IP_LOCAL_PACKET_TYPE_FRAG;
1684     }
1685   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_IS_NATED))
1686     {
1687       *next = lm->local_next_by_ip_protocol[ip->protocol];
1688       return IP_LOCAL_PACKET_TYPE_NAT;
1689     }
1690
1691   *next = lm->local_next_by_ip_protocol[ip->protocol];
1692   return IP_LOCAL_PACKET_TYPE_L4;
1693 }
1694
1695 static inline uword
1696 ip4_local_inline (vlib_main_t * vm,
1697                   vlib_node_runtime_t * node,
1698                   vlib_frame_t * frame, int head_of_feature_arc)
1699 {
1700   u32 *from, n_left_from;
1701   vlib_node_runtime_t *error_node =
1702     vlib_node_get_runtime (vm, ip4_local_node.index);
1703   u16 nexts[VLIB_FRAME_SIZE], *next;
1704   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1705   ip4_header_t *ip[2];
1706   u8 error[2], pt[2];
1707
1708   ip4_local_last_check_t last_check = {
1709     /*
1710      * 0.0.0.0 can appear as the source address of an IP packet,
1711      * as can any other address, hence the need to use the 'first'
1712      * member to make sure the .lbi is initialised for the first
1713      * packet.
1714      */
1715     .src = {.as_u32 = 0},
1716     .lbi = ~0,
1717     .error = IP4_ERROR_UNKNOWN_PROTOCOL,
1718     .first = 1,
1719   };
1720
1721   from = vlib_frame_vector_args (frame);
1722   n_left_from = frame->n_vectors;
1723
1724   if (node->flags & VLIB_NODE_FLAG_TRACE)
1725     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1726
1727   vlib_get_buffers (vm, from, bufs, n_left_from);
1728   b = bufs;
1729   next = nexts;
1730
1731   while (n_left_from >= 6)
1732     {
1733       u8 not_batch = 0;
1734
1735       /* Prefetch next iteration. */
1736       {
1737         vlib_prefetch_buffer_header (b[4], LOAD);
1738         vlib_prefetch_buffer_header (b[5], LOAD);
1739
1740         CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1741         CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1742       }
1743
1744       error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1745
1746       ip[0] = vlib_buffer_get_current (b[0]);
1747       ip[1] = vlib_buffer_get_current (b[1]);
1748
1749       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1750       vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1751
1752       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1753       pt[1] = ip4_local_classify (b[1], ip[1], &next[1]);
1754
1755       not_batch = pt[0] ^ pt[1];
1756
1757       if (head_of_feature_arc == 0 || (pt[0] && not_batch == 0))
1758         goto skip_checks;
1759
1760       if (PREDICT_TRUE (not_batch == 0))
1761         {
1762           ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1763           ip4_local_check_src_x2 (b, ip, &last_check, error);
1764         }
1765       else
1766         {
1767           if (!pt[0])
1768             {
1769               ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1770               ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1771             }
1772           if (!pt[1])
1773             {
1774               ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1775               ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
1776             }
1777         }
1778
1779     skip_checks:
1780
1781       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1782                                     head_of_feature_arc);
1783       ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1784                                     head_of_feature_arc);
1785
1786       b += 2;
1787       next += 2;
1788       n_left_from -= 2;
1789     }
1790
1791   while (n_left_from > 0)
1792     {
1793       error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1794
1795       ip[0] = vlib_buffer_get_current (b[0]);
1796       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1797       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1798
1799       if (head_of_feature_arc == 0 || pt[0])
1800         goto skip_check;
1801
1802       ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1803       ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1804
1805     skip_check:
1806
1807       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1808                                     head_of_feature_arc);
1809
1810       b += 1;
1811       next += 1;
1812       n_left_from -= 1;
1813     }
1814
1815   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1816   return frame->n_vectors;
1817 }
1818
1819 VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1820                                vlib_frame_t * frame)
1821 {
1822   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1823 }
1824
1825 /* *INDENT-OFF* */
1826 VLIB_REGISTER_NODE (ip4_local_node) =
1827 {
1828   .name = "ip4-local",
1829   .vector_size = sizeof (u32),
1830   .format_trace = format_ip4_forward_next_trace,
1831   .n_errors = IP4_N_ERROR,
1832   .error_strings = ip4_error_strings,
1833   .n_next_nodes = IP_LOCAL_N_NEXT,
1834   .next_nodes =
1835   {
1836     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1837     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1838     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1839     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1840     [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-full-reassembly",
1841   },
1842 };
1843 /* *INDENT-ON* */
1844
1845
1846 VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
1847                                           vlib_node_runtime_t * node,
1848                                           vlib_frame_t * frame)
1849 {
1850   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1851 }
1852
1853 /* *INDENT-OFF* */
1854 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
1855   .name = "ip4-local-end-of-arc",
1856   .vector_size = sizeof (u32),
1857
1858   .format_trace = format_ip4_forward_next_trace,
1859   .sibling_of = "ip4-local",
1860 };
1861
1862 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1863   .arc_name = "ip4-local",
1864   .node_name = "ip4-local-end-of-arc",
1865   .runs_before = 0, /* not before any other features */
1866 };
1867 /* *INDENT-ON* */
1868
1869 #ifndef CLIB_MARCH_VARIANT
1870 void
1871 ip4_register_protocol (u32 protocol, u32 node_index)
1872 {
1873   vlib_main_t *vm = vlib_get_main ();
1874   ip4_main_t *im = &ip4_main;
1875   ip_lookup_main_t *lm = &im->lookup_main;
1876
1877   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1878   lm->local_next_by_ip_protocol[protocol] =
1879     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1880 }
1881
1882 void
1883 ip4_unregister_protocol (u32 protocol)
1884 {
1885   ip4_main_t *im = &ip4_main;
1886   ip_lookup_main_t *lm = &im->lookup_main;
1887
1888   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1889   lm->local_next_by_ip_protocol[protocol] = IP_LOCAL_NEXT_PUNT;
1890 }
1891 #endif
1892
1893 static clib_error_t *
1894 show_ip_local_command_fn (vlib_main_t * vm,
1895                           unformat_input_t * input, vlib_cli_command_t * cmd)
1896 {
1897   ip4_main_t *im = &ip4_main;
1898   ip_lookup_main_t *lm = &im->lookup_main;
1899   int i;
1900
1901   vlib_cli_output (vm, "Protocols handled by ip4_local");
1902   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1903     {
1904       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1905         {
1906           u32 node_index = vlib_get_node (vm,
1907                                           ip4_local_node.index)->
1908             next_nodes[lm->local_next_by_ip_protocol[i]];
1909           vlib_cli_output (vm, "%U: %U", format_ip_protocol, i,
1910                            format_vlib_node_name, vm, node_index);
1911         }
1912     }
1913   return 0;
1914 }
1915
1916
1917
1918 /*?
1919  * Display the set of protocols handled by the local IPv4 stack.
1920  *
1921  * @cliexpar
1922  * Example of how to display local protocol table:
1923  * @cliexstart{show ip local}
1924  * Protocols handled by ip4_local
1925  * 1
1926  * 17
1927  * 47
1928  * @cliexend
1929 ?*/
1930 /* *INDENT-OFF* */
1931 VLIB_CLI_COMMAND (show_ip_local, static) =
1932 {
1933   .path = "show ip local",
1934   .function = show_ip_local_command_fn,
1935   .short_help = "show ip local",
1936 };
1937 /* *INDENT-ON* */
1938
1939 typedef enum
1940 {
1941   IP4_REWRITE_NEXT_DROP,
1942   IP4_REWRITE_NEXT_ICMP_ERROR,
1943   IP4_REWRITE_NEXT_FRAGMENT,
1944   IP4_REWRITE_N_NEXT            /* Last */
1945 } ip4_rewrite_next_t;
1946
1947 /**
1948  * This bits of an IPv4 address to mask to construct a multicast
1949  * MAC address
1950  */
1951 #if CLIB_ARCH_IS_BIG_ENDIAN
1952 #define IP4_MCAST_ADDR_MASK 0x007fffff
1953 #else
1954 #define IP4_MCAST_ADDR_MASK 0xffff7f00
1955 #endif
1956
1957 always_inline void
1958 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
1959                u16 adj_packet_bytes, bool df, u16 * next,
1960                u8 is_midchain, u32 * error)
1961 {
1962   if (packet_len > adj_packet_bytes)
1963     {
1964       *error = IP4_ERROR_MTU_EXCEEDED;
1965       if (df)
1966         {
1967           icmp4_error_set_vnet_buffer
1968             (b, ICMP4_destination_unreachable,
1969              ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
1970              adj_packet_bytes);
1971           *next = IP4_REWRITE_NEXT_ICMP_ERROR;
1972         }
1973       else
1974         {
1975           /* IP fragmentation */
1976           ip_frag_set_vnet_buffer (b, adj_packet_bytes,
1977                                    (is_midchain ?
1978                                     IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN :
1979                                     IP_FRAG_NEXT_IP_REWRITE), 0);
1980           *next = IP4_REWRITE_NEXT_FRAGMENT;
1981         }
1982     }
1983 }
1984
1985 /* increment TTL & update checksum.
1986    Works either endian, so no need for byte swap. */
1987 static_always_inline void
1988 ip4_ttl_inc (vlib_buffer_t * b, ip4_header_t * ip)
1989 {
1990   i32 ttl;
1991   u32 checksum;
1992   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
1993     return;
1994
1995   ttl = ip->ttl;
1996
1997   checksum = ip->checksum - clib_host_to_net_u16 (0x0100);
1998   checksum += checksum >= 0xffff;
1999
2000   ip->checksum = checksum;
2001   ttl += 1;
2002   ip->ttl = ttl;
2003
2004   ASSERT (ip->checksum == ip4_header_checksum (ip));
2005 }
2006
2007 /* Decrement TTL & update checksum.
2008    Works either endian, so no need for byte swap. */
2009 static_always_inline void
2010 ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
2011                             u32 * error)
2012 {
2013   i32 ttl;
2014   u32 checksum;
2015   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2016     return;
2017
2018   ttl = ip->ttl;
2019
2020   /* Input node should have reject packets with ttl 0. */
2021   ASSERT (ip->ttl > 0);
2022
2023   checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
2024   checksum += checksum >= 0xffff;
2025
2026   ip->checksum = checksum;
2027   ttl -= 1;
2028   ip->ttl = ttl;
2029
2030   /*
2031    * If the ttl drops below 1 when forwarding, generate
2032    * an ICMP response.
2033    */
2034   if (PREDICT_FALSE (ttl <= 0))
2035     {
2036       *error = IP4_ERROR_TIME_EXPIRED;
2037       vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2038       icmp4_error_set_vnet_buffer (b, ICMP4_time_exceeded,
2039                                    ICMP4_time_exceeded_ttl_exceeded_in_transit,
2040                                    0);
2041       *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2042     }
2043
2044   /* Verify checksum. */
2045   ASSERT ((ip->checksum == ip4_header_checksum (ip)) ||
2046           (b->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2047 }
2048
2049
2050 always_inline uword
2051 ip4_rewrite_inline_with_gso (vlib_main_t * vm,
2052                              vlib_node_runtime_t * node,
2053                              vlib_frame_t * frame,
2054                              int do_counters, int is_midchain, int is_mcast)
2055 {
2056   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2057   u32 *from = vlib_frame_vector_args (frame);
2058   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2059   u16 nexts[VLIB_FRAME_SIZE], *next;
2060   u32 n_left_from;
2061   vlib_node_runtime_t *error_node =
2062     vlib_node_get_runtime (vm, ip4_input_node.index);
2063
2064   n_left_from = frame->n_vectors;
2065   u32 thread_index = vm->thread_index;
2066
2067   vlib_get_buffers (vm, from, bufs, n_left_from);
2068   clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
2069
2070 #if (CLIB_N_PREFETCHES >= 8)
2071   if (n_left_from >= 6)
2072     {
2073       int i;
2074       for (i = 2; i < 6; i++)
2075         vlib_prefetch_buffer_header (bufs[i], LOAD);
2076     }
2077
2078   next = nexts;
2079   b = bufs;
2080   while (n_left_from >= 8)
2081     {
2082       const ip_adjacency_t *adj0, *adj1;
2083       ip4_header_t *ip0, *ip1;
2084       u32 rw_len0, error0, adj_index0;
2085       u32 rw_len1, error1, adj_index1;
2086       u32 tx_sw_if_index0, tx_sw_if_index1;
2087       u8 *p;
2088
2089       vlib_prefetch_buffer_header (b[6], LOAD);
2090       vlib_prefetch_buffer_header (b[7], LOAD);
2091
2092       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2093       adj_index1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
2094
2095       /*
2096        * pre-fetch the per-adjacency counters
2097        */
2098       if (do_counters)
2099         {
2100           vlib_prefetch_combined_counter (&adjacency_counters,
2101                                           thread_index, adj_index0);
2102           vlib_prefetch_combined_counter (&adjacency_counters,
2103                                           thread_index, adj_index1);
2104         }
2105
2106       ip0 = vlib_buffer_get_current (b[0]);
2107       ip1 = vlib_buffer_get_current (b[1]);
2108
2109       error0 = error1 = IP4_ERROR_NONE;
2110
2111       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2112       ip4_ttl_and_checksum_check (b[1], ip1, next + 1, &error1);
2113
2114       /* Rewrite packet header and updates lengths. */
2115       adj0 = adj_get (adj_index0);
2116       adj1 = adj_get (adj_index1);
2117
2118       /* Worth pipelining. No guarantee that adj0,1 are hot... */
2119       rw_len0 = adj0[0].rewrite_header.data_bytes;
2120       rw_len1 = adj1[0].rewrite_header.data_bytes;
2121       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2122       vnet_buffer (b[1])->ip.save_rewrite_length = rw_len1;
2123
2124       p = vlib_buffer_get_current (b[2]);
2125       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2126       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2127
2128       p = vlib_buffer_get_current (b[3]);
2129       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2130       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2131
2132       /* Check MTU of outgoing interface. */
2133       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2134       u16 ip1_len = clib_net_to_host_u16 (ip1->length);
2135
2136       if (b[0]->flags & VNET_BUFFER_F_GSO)
2137         ip0_len = gso_mtu_sz (b[0]);
2138       if (b[1]->flags & VNET_BUFFER_F_GSO)
2139         ip1_len = gso_mtu_sz (b[1]);
2140
2141       ip4_mtu_check (b[0], ip0_len,
2142                      adj0[0].rewrite_header.max_l3_packet_bytes,
2143                      ip0->flags_and_fragment_offset &
2144                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2145                      next + 0, is_midchain, &error0);
2146       ip4_mtu_check (b[1], ip1_len,
2147                      adj1[0].rewrite_header.max_l3_packet_bytes,
2148                      ip1->flags_and_fragment_offset &
2149                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2150                      next + 1, is_midchain, &error1);
2151
2152       if (is_mcast)
2153         {
2154           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2155                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2156                     IP4_ERROR_SAME_INTERFACE : error0);
2157           error1 = ((adj1[0].rewrite_header.sw_if_index ==
2158                      vnet_buffer (b[1])->sw_if_index[VLIB_RX]) ?
2159                     IP4_ERROR_SAME_INTERFACE : error1);
2160         }
2161
2162       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2163        * to see the IP header */
2164       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2165         {
2166           u32 next_index = adj0[0].rewrite_header.next_index;
2167           vlib_buffer_advance (b[0], -(word) rw_len0);
2168
2169           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2170           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2171
2172           if (PREDICT_FALSE
2173               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2174             vnet_feature_arc_start (lm->output_feature_arc_index,
2175                                     tx_sw_if_index0, &next_index, b[0]);
2176           next[0] = next_index;
2177           if (is_midchain)
2178             calc_checksums (vm, b[0]);
2179         }
2180       else
2181         {
2182           b[0]->error = error_node->errors[error0];
2183           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2184             ip4_ttl_inc (b[0], ip0);
2185         }
2186       if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2187         {
2188           u32 next_index = adj1[0].rewrite_header.next_index;
2189           vlib_buffer_advance (b[1], -(word) rw_len1);
2190
2191           tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2192           vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2193
2194           if (PREDICT_FALSE
2195               (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2196             vnet_feature_arc_start (lm->output_feature_arc_index,
2197                                     tx_sw_if_index1, &next_index, b[1]);
2198           next[1] = next_index;
2199           if (is_midchain)
2200             calc_checksums (vm, b[1]);
2201         }
2202       else
2203         {
2204           b[1]->error = error_node->errors[error1];
2205           if (error1 == IP4_ERROR_MTU_EXCEEDED)
2206             ip4_ttl_inc (b[1], ip1);
2207         }
2208
2209       /* Guess we are only writing on simple Ethernet header. */
2210       vnet_rewrite_two_headers (adj0[0], adj1[0],
2211                                 ip0, ip1, sizeof (ethernet_header_t));
2212
2213       if (do_counters)
2214         {
2215           if (error0 == IP4_ERROR_NONE)
2216             vlib_increment_combined_counter
2217               (&adjacency_counters,
2218                thread_index,
2219                adj_index0, 1,
2220                vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2221
2222           if (error1 == IP4_ERROR_NONE)
2223             vlib_increment_combined_counter
2224               (&adjacency_counters,
2225                thread_index,
2226                adj_index1, 1,
2227                vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
2228         }
2229
2230       if (is_midchain)
2231         {
2232           if (error0 == IP4_ERROR_NONE && adj0->sub_type.midchain.fixup_func)
2233             adj0->sub_type.midchain.fixup_func
2234               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2235           if (error1 == IP4_ERROR_NONE && adj1->sub_type.midchain.fixup_func)
2236             adj1->sub_type.midchain.fixup_func
2237               (vm, adj1, b[1], adj1->sub_type.midchain.fixup_data);
2238         }
2239
2240       if (is_mcast)
2241         {
2242           /* copy bytes from the IP address into the MAC rewrite */
2243           if (error0 == IP4_ERROR_NONE)
2244             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2245                                         adj0->rewrite_header.dst_mcast_offset,
2246                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2247           if (error1 == IP4_ERROR_NONE)
2248             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2249                                         adj1->rewrite_header.dst_mcast_offset,
2250                                         &ip1->dst_address.as_u32, (u8 *) ip1);
2251         }
2252
2253       next += 2;
2254       b += 2;
2255       n_left_from -= 2;
2256     }
2257 #elif (CLIB_N_PREFETCHES >= 4)
2258   next = nexts;
2259   b = bufs;
2260   while (n_left_from >= 1)
2261     {
2262       ip_adjacency_t *adj0;
2263       ip4_header_t *ip0;
2264       u32 rw_len0, error0, adj_index0;
2265       u32 tx_sw_if_index0;
2266       u8 *p;
2267
2268       /* Prefetch next iteration */
2269       if (PREDICT_TRUE (n_left_from >= 4))
2270         {
2271           ip_adjacency_t *adj2;
2272           u32 adj_index2;
2273
2274           vlib_prefetch_buffer_header (b[3], LOAD);
2275           vlib_prefetch_buffer_data (b[2], LOAD);
2276
2277           /* Prefetch adj->rewrite_header */
2278           adj_index2 = vnet_buffer (b[2])->ip.adj_index[VLIB_TX];
2279           adj2 = adj_get (adj_index2);
2280           p = (u8 *) adj2;
2281           CLIB_PREFETCH (p + CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES,
2282                          LOAD);
2283         }
2284
2285       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2286
2287       /*
2288        * Prefetch the per-adjacency counters
2289        */
2290       if (do_counters)
2291         {
2292           vlib_prefetch_combined_counter (&adjacency_counters,
2293                                           thread_index, adj_index0);
2294         }
2295
2296       ip0 = vlib_buffer_get_current (b[0]);
2297
2298       error0 = IP4_ERROR_NONE;
2299
2300       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2301
2302       /* Rewrite packet header and updates lengths. */
2303       adj0 = adj_get (adj_index0);
2304
2305       /* Rewrite header was prefetched. */
2306       rw_len0 = adj0[0].rewrite_header.data_bytes;
2307       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2308
2309       /* Check MTU of outgoing interface. */
2310       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2311
2312       if (b[0]->flags & VNET_BUFFER_F_GSO)
2313         ip0_len = gso_mtu_sz (b[0]);
2314
2315       ip4_mtu_check (b[0], ip0_len,
2316                      adj0[0].rewrite_header.max_l3_packet_bytes,
2317                      ip0->flags_and_fragment_offset &
2318                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2319                      next + 0, is_midchain, &error0);
2320
2321       if (is_mcast)
2322         {
2323           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2324                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2325                     IP4_ERROR_SAME_INTERFACE : error0);
2326         }
2327
2328       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2329        * to see the IP header */
2330       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2331         {
2332           u32 next_index = adj0[0].rewrite_header.next_index;
2333           vlib_buffer_advance (b[0], -(word) rw_len0);
2334           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2335           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2336
2337           if (PREDICT_FALSE
2338               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2339             vnet_feature_arc_start (lm->output_feature_arc_index,
2340                                     tx_sw_if_index0, &next_index, b[0]);
2341           next[0] = next_index;
2342
2343           if (is_midchain)
2344             calc_checksums (vm, b[0]);
2345
2346           /* Guess we are only writing on simple Ethernet header. */
2347           vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2348
2349           /*
2350            * Bump the per-adjacency counters
2351            */
2352           if (do_counters)
2353             vlib_increment_combined_counter
2354               (&adjacency_counters,
2355                thread_index,
2356                adj_index0, 1, vlib_buffer_length_in_chain (vm,
2357                                                            b[0]) + rw_len0);
2358
2359           if (is_midchain && adj0->sub_type.midchain.fixup_func)
2360             adj0->sub_type.midchain.fixup_func
2361               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2362
2363           if (is_mcast)
2364             /* copy bytes from the IP address into the MAC rewrite */
2365             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2366                                         adj0->rewrite_header.dst_mcast_offset,
2367                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2368         }
2369       else
2370         {
2371           b[0]->error = error_node->errors[error0];
2372           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2373             ip4_ttl_inc (b[0], ip0);
2374         }
2375
2376       next += 1;
2377       b += 1;
2378       n_left_from -= 1;
2379     }
2380 #endif
2381
2382   while (n_left_from > 0)
2383     {
2384       ip_adjacency_t *adj0;
2385       ip4_header_t *ip0;
2386       u32 rw_len0, adj_index0, error0;
2387       u32 tx_sw_if_index0;
2388
2389       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2390
2391       adj0 = adj_get (adj_index0);
2392
2393       if (do_counters)
2394         vlib_prefetch_combined_counter (&adjacency_counters,
2395                                         thread_index, adj_index0);
2396
2397       ip0 = vlib_buffer_get_current (b[0]);
2398
2399       error0 = IP4_ERROR_NONE;
2400
2401       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2402
2403
2404       /* Update packet buffer attributes/set output interface. */
2405       rw_len0 = adj0[0].rewrite_header.data_bytes;
2406       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2407
2408       /* Check MTU of outgoing interface. */
2409       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2410       if (b[0]->flags & VNET_BUFFER_F_GSO)
2411         ip0_len = gso_mtu_sz (b[0]);
2412
2413       ip4_mtu_check (b[0], ip0_len,
2414                      adj0[0].rewrite_header.max_l3_packet_bytes,
2415                      ip0->flags_and_fragment_offset &
2416                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2417                      next + 0, is_midchain, &error0);
2418
2419       if (is_mcast)
2420         {
2421           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2422                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2423                     IP4_ERROR_SAME_INTERFACE : error0);
2424         }
2425
2426       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2427        * to see the IP header */
2428       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2429         {
2430           u32 next_index = adj0[0].rewrite_header.next_index;
2431           vlib_buffer_advance (b[0], -(word) rw_len0);
2432           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2433           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2434
2435           if (PREDICT_FALSE
2436               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2437             vnet_feature_arc_start (lm->output_feature_arc_index,
2438                                     tx_sw_if_index0, &next_index, b[0]);
2439           next[0] = next_index;
2440
2441           if (is_midchain)
2442             /* this acts on the packet that is about to be encapped */
2443             calc_checksums (vm, b[0]);
2444
2445           /* Guess we are only writing on simple Ethernet header. */
2446           vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2447
2448           if (do_counters)
2449             vlib_increment_combined_counter
2450               (&adjacency_counters,
2451                thread_index, adj_index0, 1,
2452                vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2453
2454           if (is_midchain && adj0->sub_type.midchain.fixup_func)
2455             adj0->sub_type.midchain.fixup_func
2456               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2457
2458           if (is_mcast)
2459             /* copy bytes from the IP address into the MAC rewrite */
2460             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2461                                         adj0->rewrite_header.dst_mcast_offset,
2462                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2463         }
2464       else
2465         {
2466           b[0]->error = error_node->errors[error0];
2467           /* undo the TTL decrement - we'll be back to do it again */
2468           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2469             ip4_ttl_inc (b[0], ip0);
2470         }
2471
2472       next += 1;
2473       b += 1;
2474       n_left_from -= 1;
2475     }
2476
2477
2478   /* Need to do trace after rewrites to pick up new packet data. */
2479   if (node->flags & VLIB_NODE_FLAG_TRACE)
2480     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2481
2482   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2483   return frame->n_vectors;
2484 }
2485
2486 always_inline uword
2487 ip4_rewrite_inline (vlib_main_t * vm,
2488                     vlib_node_runtime_t * node,
2489                     vlib_frame_t * frame,
2490                     int do_counters, int is_midchain, int is_mcast)
2491 {
2492   return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2493                                       is_midchain, is_mcast);
2494 }
2495
2496
2497 /** @brief IPv4 rewrite node.
2498     @node ip4-rewrite
2499
2500     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2501     header checksum, fetch the ip adjacency, check the outbound mtu,
2502     apply the adjacency rewrite, and send pkts to the adjacency
2503     rewrite header's rewrite_next_index.
2504
2505     @param vm vlib_main_t corresponding to the current thread
2506     @param node vlib_node_runtime_t
2507     @param frame vlib_frame_t whose contents should be dispatched
2508
2509     @par Graph mechanics: buffer metadata, next index usage
2510
2511     @em Uses:
2512     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2513         - the rewrite adjacency index
2514     - <code>adj->lookup_next_index</code>
2515         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2516           the packet will be dropped.
2517     - <code>adj->rewrite_header</code>
2518         - Rewrite string length, rewrite string, next_index
2519
2520     @em Sets:
2521     - <code>b->current_data, b->current_length</code>
2522         - Updated net of applying the rewrite string
2523
2524     <em>Next Indices:</em>
2525     - <code> adj->rewrite_header.next_index </code>
2526       or @c ip4-drop
2527 */
2528
2529 VLIB_NODE_FN (ip4_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2530                                  vlib_frame_t * frame)
2531 {
2532   if (adj_are_counters_enabled ())
2533     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2534   else
2535     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2536 }
2537
2538 VLIB_NODE_FN (ip4_rewrite_bcast_node) (vlib_main_t * vm,
2539                                        vlib_node_runtime_t * node,
2540                                        vlib_frame_t * frame)
2541 {
2542   if (adj_are_counters_enabled ())
2543     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2544   else
2545     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2546 }
2547
2548 VLIB_NODE_FN (ip4_midchain_node) (vlib_main_t * vm,
2549                                   vlib_node_runtime_t * node,
2550                                   vlib_frame_t * frame)
2551 {
2552   if (adj_are_counters_enabled ())
2553     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2554   else
2555     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2556 }
2557
2558 VLIB_NODE_FN (ip4_rewrite_mcast_node) (vlib_main_t * vm,
2559                                        vlib_node_runtime_t * node,
2560                                        vlib_frame_t * frame)
2561 {
2562   if (adj_are_counters_enabled ())
2563     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2564   else
2565     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2566 }
2567
2568 VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
2569                                         vlib_node_runtime_t * node,
2570                                         vlib_frame_t * frame)
2571 {
2572   if (adj_are_counters_enabled ())
2573     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2574   else
2575     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2576 }
2577
2578 /* *INDENT-OFF* */
2579 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2580   .name = "ip4-rewrite",
2581   .vector_size = sizeof (u32),
2582
2583   .format_trace = format_ip4_rewrite_trace,
2584
2585   .n_next_nodes = IP4_REWRITE_N_NEXT,
2586   .next_nodes = {
2587     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2588     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2589     [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2590   },
2591 };
2592
2593 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2594   .name = "ip4-rewrite-bcast",
2595   .vector_size = sizeof (u32),
2596
2597   .format_trace = format_ip4_rewrite_trace,
2598   .sibling_of = "ip4-rewrite",
2599 };
2600
2601 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2602   .name = "ip4-rewrite-mcast",
2603   .vector_size = sizeof (u32),
2604
2605   .format_trace = format_ip4_rewrite_trace,
2606   .sibling_of = "ip4-rewrite",
2607 };
2608
2609 VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
2610   .name = "ip4-mcast-midchain",
2611   .vector_size = sizeof (u32),
2612
2613   .format_trace = format_ip4_rewrite_trace,
2614   .sibling_of = "ip4-rewrite",
2615 };
2616
2617 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2618   .name = "ip4-midchain",
2619   .vector_size = sizeof (u32),
2620   .format_trace = format_ip4_rewrite_trace,
2621   .sibling_of = "ip4-rewrite",
2622 };
2623 /* *INDENT-ON */
2624
2625 static int
2626 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2627 {
2628   ip4_fib_mtrie_t *mtrie0;
2629   ip4_fib_mtrie_leaf_t leaf0;
2630   u32 lbi0;
2631
2632   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2633
2634   leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2635   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2636   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2637
2638   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2639
2640   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2641 }
2642
2643 static clib_error_t *
2644 test_lookup_command_fn (vlib_main_t * vm,
2645                         unformat_input_t * input, vlib_cli_command_t * cmd)
2646 {
2647   ip4_fib_t *fib;
2648   u32 table_id = 0;
2649   f64 count = 1;
2650   u32 n;
2651   int i;
2652   ip4_address_t ip4_base_address;
2653   u64 errors = 0;
2654
2655   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2656     {
2657       if (unformat (input, "table %d", &table_id))
2658         {
2659           /* Make sure the entry exists. */
2660           fib = ip4_fib_get (table_id);
2661           if ((fib) && (fib->index != table_id))
2662             return clib_error_return (0, "<fib-index> %d does not exist",
2663                                       table_id);
2664         }
2665       else if (unformat (input, "count %f", &count))
2666         ;
2667
2668       else if (unformat (input, "%U",
2669                          unformat_ip4_address, &ip4_base_address))
2670         ;
2671       else
2672         return clib_error_return (0, "unknown input `%U'",
2673                                   format_unformat_error, input);
2674     }
2675
2676   n = count;
2677
2678   for (i = 0; i < n; i++)
2679     {
2680       if (!ip4_lookup_validate (&ip4_base_address, table_id))
2681         errors++;
2682
2683       ip4_base_address.as_u32 =
2684         clib_host_to_net_u32 (1 +
2685                               clib_net_to_host_u32 (ip4_base_address.as_u32));
2686     }
2687
2688   if (errors)
2689     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2690   else
2691     vlib_cli_output (vm, "No errors in %d lookups\n", n);
2692
2693   return 0;
2694 }
2695
2696 /*?
2697  * Perform a lookup of an IPv4 Address (or range of addresses) in the
2698  * given FIB table to determine if there is a conflict with the
2699  * adjacency table. The fib-id can be determined by using the
2700  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2701  * of 0 is used.
2702  *
2703  * @todo This command uses fib-id, other commands use table-id (not
2704  * just a name, they are different indexes). Would like to change this
2705  * to table-id for consistency.
2706  *
2707  * @cliexpar
2708  * Example of how to run the test lookup command:
2709  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2710  * No errors in 2 lookups
2711  * @cliexend
2712 ?*/
2713 /* *INDENT-OFF* */
2714 VLIB_CLI_COMMAND (lookup_test_command, static) =
2715 {
2716   .path = "test lookup",
2717   .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2718   .function = test_lookup_command_fn,
2719 };
2720 /* *INDENT-ON* */
2721
2722 #ifndef CLIB_MARCH_VARIANT
2723 int
2724 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2725 {
2726   u32 fib_index;
2727
2728   fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
2729
2730   if (~0 == fib_index)
2731     return VNET_API_ERROR_NO_SUCH_FIB;
2732
2733   fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
2734                                   flow_hash_config);
2735
2736   return 0;
2737 }
2738 #endif
2739
2740 static clib_error_t *
2741 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2742                              unformat_input_t * input,
2743                              vlib_cli_command_t * cmd)
2744 {
2745   int matched = 0;
2746   u32 table_id = 0;
2747   u32 flow_hash_config = 0;
2748   int rv;
2749
2750   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2751     {
2752       if (unformat (input, "table %d", &table_id))
2753         matched = 1;
2754 #define _(a,v) \
2755     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2756       foreach_flow_hash_bit
2757 #undef _
2758         else
2759         break;
2760     }
2761
2762   if (matched == 0)
2763     return clib_error_return (0, "unknown input `%U'",
2764                               format_unformat_error, input);
2765
2766   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
2767   switch (rv)
2768     {
2769     case 0:
2770       break;
2771
2772     case VNET_API_ERROR_NO_SUCH_FIB:
2773       return clib_error_return (0, "no such FIB table %d", table_id);
2774
2775     default:
2776       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2777       break;
2778     }
2779
2780   return 0;
2781 }
2782
2783 /*?
2784  * Configure the set of IPv4 fields used by the flow hash.
2785  *
2786  * @cliexpar
2787  * Example of how to set the flow hash on a given table:
2788  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2789  * Example of display the configured flow hash:
2790  * @cliexstart{show ip fib}
2791  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2792  * 0.0.0.0/0
2793  *   unicast-ip4-chain
2794  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2795  *     [0] [@0]: dpo-drop ip6
2796  * 0.0.0.0/32
2797  *   unicast-ip4-chain
2798  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2799  *     [0] [@0]: dpo-drop ip6
2800  * 224.0.0.0/8
2801  *   unicast-ip4-chain
2802  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2803  *     [0] [@0]: dpo-drop ip6
2804  * 6.0.1.2/32
2805  *   unicast-ip4-chain
2806  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2807  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2808  * 7.0.0.1/32
2809  *   unicast-ip4-chain
2810  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
2811  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2812  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2813  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2814  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2815  * 240.0.0.0/8
2816  *   unicast-ip4-chain
2817  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
2818  *     [0] [@0]: dpo-drop ip6
2819  * 255.255.255.255/32
2820  *   unicast-ip4-chain
2821  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
2822  *     [0] [@0]: dpo-drop ip6
2823  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
2824  * 0.0.0.0/0
2825  *   unicast-ip4-chain
2826  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
2827  *     [0] [@0]: dpo-drop ip6
2828  * 0.0.0.0/32
2829  *   unicast-ip4-chain
2830  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
2831  *     [0] [@0]: dpo-drop ip6
2832  * 172.16.1.0/24
2833  *   unicast-ip4-chain
2834  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
2835  *     [0] [@4]: ipv4-glean: af_packet0
2836  * 172.16.1.1/32
2837  *   unicast-ip4-chain
2838  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
2839  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
2840  * 172.16.1.2/32
2841  *   unicast-ip4-chain
2842  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2843  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
2844  * 172.16.2.0/24
2845  *   unicast-ip4-chain
2846  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
2847  *     [0] [@4]: ipv4-glean: af_packet1
2848  * 172.16.2.1/32
2849  *   unicast-ip4-chain
2850  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
2851  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
2852  * 224.0.0.0/8
2853  *   unicast-ip4-chain
2854  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
2855  *     [0] [@0]: dpo-drop ip6
2856  * 240.0.0.0/8
2857  *   unicast-ip4-chain
2858  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
2859  *     [0] [@0]: dpo-drop ip6
2860  * 255.255.255.255/32
2861  *   unicast-ip4-chain
2862  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
2863  *     [0] [@0]: dpo-drop ip6
2864  * @cliexend
2865 ?*/
2866 /* *INDENT-OFF* */
2867 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
2868 {
2869   .path = "set ip flow-hash",
2870   .short_help =
2871   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
2872   .function = set_ip_flow_hash_command_fn,
2873 };
2874 /* *INDENT-ON* */
2875
2876 #ifndef CLIB_MARCH_VARIANT
2877 int
2878 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2879                              u32 table_index)
2880 {
2881   vnet_main_t *vnm = vnet_get_main ();
2882   vnet_interface_main_t *im = &vnm->interface_main;
2883   ip4_main_t *ipm = &ip4_main;
2884   ip_lookup_main_t *lm = &ipm->lookup_main;
2885   vnet_classify_main_t *cm = &vnet_classify_main;
2886   ip4_address_t *if_addr;
2887
2888   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2889     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2890
2891   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2892     return VNET_API_ERROR_NO_SUCH_ENTRY;
2893
2894   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2895   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
2896
2897   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
2898
2899   if (NULL != if_addr)
2900     {
2901       fib_prefix_t pfx = {
2902         .fp_len = 32,
2903         .fp_proto = FIB_PROTOCOL_IP4,
2904         .fp_addr.ip4 = *if_addr,
2905       };
2906       u32 fib_index;
2907
2908       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2909                                                        sw_if_index);
2910
2911
2912       if (table_index != (u32) ~ 0)
2913         {
2914           dpo_id_t dpo = DPO_INVALID;
2915
2916           dpo_set (&dpo,
2917                    DPO_CLASSIFY,
2918                    DPO_PROTO_IP4,
2919                    classify_dpo_create (DPO_PROTO_IP4, table_index));
2920
2921           fib_table_entry_special_dpo_add (fib_index,
2922                                            &pfx,
2923                                            FIB_SOURCE_CLASSIFY,
2924                                            FIB_ENTRY_FLAG_NONE, &dpo);
2925           dpo_reset (&dpo);
2926         }
2927       else
2928         {
2929           fib_table_entry_special_remove (fib_index,
2930                                           &pfx, FIB_SOURCE_CLASSIFY);
2931         }
2932     }
2933
2934   return 0;
2935 }
2936 #endif
2937
2938 static clib_error_t *
2939 set_ip_classify_command_fn (vlib_main_t * vm,
2940                             unformat_input_t * input,
2941                             vlib_cli_command_t * cmd)
2942 {
2943   u32 table_index = ~0;
2944   int table_index_set = 0;
2945   u32 sw_if_index = ~0;
2946   int rv;
2947
2948   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2949     {
2950       if (unformat (input, "table-index %d", &table_index))
2951         table_index_set = 1;
2952       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
2953                          vnet_get_main (), &sw_if_index))
2954         ;
2955       else
2956         break;
2957     }
2958
2959   if (table_index_set == 0)
2960     return clib_error_return (0, "classify table-index must be specified");
2961
2962   if (sw_if_index == ~0)
2963     return clib_error_return (0, "interface / subif must be specified");
2964
2965   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
2966
2967   switch (rv)
2968     {
2969     case 0:
2970       break;
2971
2972     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
2973       return clib_error_return (0, "No such interface");
2974
2975     case VNET_API_ERROR_NO_SUCH_ENTRY:
2976       return clib_error_return (0, "No such classifier table");
2977     }
2978   return 0;
2979 }
2980
2981 /*?
2982  * Assign a classification table to an interface. The classification
2983  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
2984  * commands. Once the table is create, use this command to filter packets
2985  * on an interface.
2986  *
2987  * @cliexpar
2988  * Example of how to assign a classification table to an interface:
2989  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
2990 ?*/
2991 /* *INDENT-OFF* */
2992 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
2993 {
2994     .path = "set ip classify",
2995     .short_help =
2996     "set ip classify intfc <interface> table-index <classify-idx>",
2997     .function = set_ip_classify_command_fn,
2998 };
2999 /* *INDENT-ON* */
3000
3001 static clib_error_t *
3002 ip4_config (vlib_main_t * vm, unformat_input_t * input)
3003 {
3004   ip4_main_t *im = &ip4_main;
3005   uword heapsize = 0;
3006
3007   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3008     {
3009       if (unformat (input, "heap-size %U", unformat_memory_size, &heapsize))
3010         ;
3011       else
3012         return clib_error_return (0,
3013                                   "invalid heap-size parameter `%U'",
3014                                   format_unformat_error, input);
3015     }
3016
3017   im->mtrie_heap_size = heapsize;
3018
3019   return 0;
3020 }
3021
3022 VLIB_EARLY_CONFIG_FUNCTION (ip4_config, "ip");
3023
3024 /*
3025  * fd.io coding-style-patch-verification: ON
3026  *
3027  * Local Variables:
3028  * eval: (c-set-style "gnu")
3029  * End:
3030  */