ip: Protocol Independent IP Neighbors
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h>     /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/dpo/load_balance.h>
53 #include <vnet/dpo/load_balance_map.h>
54 #include <vnet/dpo/classify_dpo.h>
55 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
56
57 #include <vnet/ip/ip4_forward.h>
58 #include <vnet/interface_output.h>
59
60 /** @brief IPv4 lookup node.
61     @node ip4-lookup
62
63     This is the main IPv4 lookup dispatch node.
64
65     @param vm vlib_main_t corresponding to the current thread
66     @param node vlib_node_runtime_t
67     @param frame vlib_frame_t whose contents should be dispatched
68
69     @par Graph mechanics: buffer metadata, next index usage
70
71     @em Uses:
72     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
73         - Indicates the @c sw_if_index value of the interface that the
74           packet was received on.
75     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
76         - When the value is @c ~0 then the node performs a longest prefix
77           match (LPM) for the packet destination address in the FIB attached
78           to the receive interface.
79         - Otherwise perform LPM for the packet destination address in the
80           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
81           value (0, 1, ...) and not a VRF id.
82
83     @em Sets:
84     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
85         - The lookup result adjacency index.
86
87     <em>Next Index:</em>
88     - Dispatches the packet to the node index found in
89       ip_adjacency_t @c adj->lookup_next_index
90       (where @c adj is the lookup result adjacency).
91 */
92 VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
93                                 vlib_frame_t * frame)
94 {
95   return ip4_lookup_inline (vm, node, frame);
96 }
97
98 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
99
100 /* *INDENT-OFF* */
101 VLIB_REGISTER_NODE (ip4_lookup_node) =
102 {
103   .name = "ip4-lookup",
104   .vector_size = sizeof (u32),
105   .format_trace = format_ip4_lookup_trace,
106   .n_next_nodes = IP_LOOKUP_N_NEXT,
107   .next_nodes = IP4_LOOKUP_NEXT_NODES,
108 };
109 /* *INDENT-ON* */
110
111 VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
112                                       vlib_node_runtime_t * node,
113                                       vlib_frame_t * frame)
114 {
115   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
116   u32 n_left, *from;
117   u32 thread_index = vm->thread_index;
118   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
119   u16 nexts[VLIB_FRAME_SIZE], *next;
120
121   from = vlib_frame_vector_args (frame);
122   n_left = frame->n_vectors;
123   next = nexts;
124
125   vlib_get_buffers (vm, from, bufs, n_left);
126
127   while (n_left >= 4)
128     {
129       const load_balance_t *lb0, *lb1;
130       const ip4_header_t *ip0, *ip1;
131       u32 lbi0, hc0, lbi1, hc1;
132       const dpo_id_t *dpo0, *dpo1;
133
134       /* Prefetch next iteration. */
135       {
136         vlib_prefetch_buffer_header (b[2], LOAD);
137         vlib_prefetch_buffer_header (b[3], LOAD);
138
139         CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), LOAD);
140         CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), LOAD);
141       }
142
143       ip0 = vlib_buffer_get_current (b[0]);
144       ip1 = vlib_buffer_get_current (b[1]);
145       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
146       lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
147
148       lb0 = load_balance_get (lbi0);
149       lb1 = load_balance_get (lbi1);
150
151       /*
152        * this node is for via FIBs we can re-use the hash value from the
153        * to node if present.
154        * We don't want to use the same hash value at each level in the recursion
155        * graph as that would lead to polarisation
156        */
157       hc0 = hc1 = 0;
158
159       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
160         {
161           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
162             {
163               hc0 = vnet_buffer (b[0])->ip.flow_hash =
164                 vnet_buffer (b[0])->ip.flow_hash >> 1;
165             }
166           else
167             {
168               hc0 = vnet_buffer (b[0])->ip.flow_hash =
169                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
170             }
171           dpo0 = load_balance_get_fwd_bucket
172             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
173         }
174       else
175         {
176           dpo0 = load_balance_get_bucket_i (lb0, 0);
177         }
178       if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
179         {
180           if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
181             {
182               hc1 = vnet_buffer (b[1])->ip.flow_hash =
183                 vnet_buffer (b[1])->ip.flow_hash >> 1;
184             }
185           else
186             {
187               hc1 = vnet_buffer (b[1])->ip.flow_hash =
188                 ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
189             }
190           dpo1 = load_balance_get_fwd_bucket
191             (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
192         }
193       else
194         {
195           dpo1 = load_balance_get_bucket_i (lb1, 0);
196         }
197
198       next[0] = dpo0->dpoi_next_node;
199       next[1] = dpo1->dpoi_next_node;
200
201       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
202       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
203
204       vlib_increment_combined_counter
205         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
206       vlib_increment_combined_counter
207         (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
208
209       b += 2;
210       next += 2;
211       n_left -= 2;
212     }
213
214   while (n_left > 0)
215     {
216       const load_balance_t *lb0;
217       const ip4_header_t *ip0;
218       const dpo_id_t *dpo0;
219       u32 lbi0, hc0;
220
221       ip0 = vlib_buffer_get_current (b[0]);
222       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
223
224       lb0 = load_balance_get (lbi0);
225
226       hc0 = 0;
227       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
228         {
229           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
230             {
231               hc0 = vnet_buffer (b[0])->ip.flow_hash =
232                 vnet_buffer (b[0])->ip.flow_hash >> 1;
233             }
234           else
235             {
236               hc0 = vnet_buffer (b[0])->ip.flow_hash =
237                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
238             }
239           dpo0 = load_balance_get_fwd_bucket
240             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
241         }
242       else
243         {
244           dpo0 = load_balance_get_bucket_i (lb0, 0);
245         }
246
247       next[0] = dpo0->dpoi_next_node;
248       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
249
250       vlib_increment_combined_counter
251         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
252
253       b += 1;
254       next += 1;
255       n_left -= 1;
256     }
257
258   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
259   if (node->flags & VLIB_NODE_FLAG_TRACE)
260     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
261
262   return frame->n_vectors;
263 }
264
265 /* *INDENT-OFF* */
266 VLIB_REGISTER_NODE (ip4_load_balance_node) =
267 {
268   .name = "ip4-load-balance",
269   .vector_size = sizeof (u32),
270   .sibling_of = "ip4-lookup",
271   .format_trace = format_ip4_lookup_trace,
272 };
273 /* *INDENT-ON* */
274
275 #ifndef CLIB_MARCH_VARIANT
276 /* get first interface address */
277 ip4_address_t *
278 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
279                              ip_interface_address_t ** result_ia)
280 {
281   ip_lookup_main_t *lm = &im->lookup_main;
282   ip_interface_address_t *ia = 0;
283   ip4_address_t *result = 0;
284
285   /* *INDENT-OFF* */
286   foreach_ip_interface_address
287     (lm, ia, sw_if_index,
288      1 /* honor unnumbered */ ,
289      ({
290        ip4_address_t * a =
291          ip_interface_address_get_address (lm, ia);
292        result = a;
293        break;
294      }));
295   /* *INDENT-OFF* */
296   if (result_ia)
297     *result_ia = result ? ia : 0;
298   return result;
299 }
300 #endif
301
302 static void
303 ip4_add_subnet_bcast_route (u32 fib_index,
304                             fib_prefix_t *pfx,
305                             u32 sw_if_index)
306 {
307   vnet_sw_interface_flags_t iflags;
308
309   iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
310
311   fib_table_entry_special_remove(fib_index,
312                                  pfx,
313                                  FIB_SOURCE_INTERFACE);
314
315   if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
316     {
317       fib_table_entry_update_one_path (fib_index, pfx,
318                                        FIB_SOURCE_INTERFACE,
319                                        FIB_ENTRY_FLAG_NONE,
320                                        DPO_PROTO_IP4,
321                                        /* No next-hop address */
322                                        &ADJ_BCAST_ADDR,
323                                        sw_if_index,
324                                        // invalid FIB index
325                                        ~0,
326                                        1,
327                                        // no out-label stack
328                                        NULL,
329                                        FIB_ROUTE_PATH_FLAG_NONE);
330     }
331   else
332     {
333         fib_table_entry_special_add(fib_index,
334                                     pfx,
335                                     FIB_SOURCE_INTERFACE,
336                                     (FIB_ENTRY_FLAG_DROP |
337                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
338     }
339 }
340
341 static void
342 ip4_add_interface_prefix_routes (ip4_main_t *im,
343                                  u32 sw_if_index,
344                                  u32 fib_index,
345                                  ip_interface_address_t * a)
346 {
347   ip_lookup_main_t *lm = &im->lookup_main;
348   ip_interface_prefix_t *if_prefix;
349   ip4_address_t *address = ip_interface_address_get_address (lm, a);
350
351   ip_interface_prefix_key_t key = {
352     .prefix = {
353       .fp_len = a->address_length,
354       .fp_proto = FIB_PROTOCOL_IP4,
355       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[a->address_length],
356     },
357     .sw_if_index = sw_if_index,
358   };
359
360   fib_prefix_t pfx_special = {
361     .fp_proto = FIB_PROTOCOL_IP4,
362   };
363
364   /* If prefix already set on interface, just increment ref count & return */
365   if_prefix = ip_get_interface_prefix (lm, &key);
366   if (if_prefix)
367     {
368       if_prefix->ref_count += 1;
369       return;
370     }
371
372   /* New prefix - allocate a pool entry, initialize it, add to the hash */
373   pool_get (lm->if_prefix_pool, if_prefix);
374   if_prefix->ref_count = 1;
375   if_prefix->src_ia_index = a - lm->if_address_pool;
376   clib_memcpy (&if_prefix->key, &key, sizeof (key));
377   mhash_set (&lm->prefix_to_if_prefix_index, &key,
378              if_prefix - lm->if_prefix_pool, 0 /* old value */);
379
380   /* length <= 30 - add glean, drop first address, maybe drop bcast address */
381   if (a->address_length <= 30)
382     {
383       pfx_special.fp_len = a->address_length;
384       pfx_special.fp_addr.ip4.as_u32 = address->as_u32;
385
386       /* set the glean route for the prefix */
387       fib_table_entry_update_one_path (fib_index, &pfx_special,
388                                        FIB_SOURCE_INTERFACE,
389                                        (FIB_ENTRY_FLAG_CONNECTED |
390                                         FIB_ENTRY_FLAG_ATTACHED),
391                                        DPO_PROTO_IP4,
392                                        /* No next-hop address */
393                                        NULL,
394                                        sw_if_index,
395                                        /* invalid FIB index */
396                                        ~0,
397                                        1,
398                                        /* no out-label stack */
399                                        NULL,
400                                        FIB_ROUTE_PATH_FLAG_NONE);
401
402       /* set a drop route for the base address of the prefix */
403       pfx_special.fp_len = 32;
404       pfx_special.fp_addr.ip4.as_u32 =
405         address->as_u32 & im->fib_masks[a->address_length];
406
407       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
408         fib_table_entry_special_add (fib_index, &pfx_special,
409                                      FIB_SOURCE_INTERFACE,
410                                      (FIB_ENTRY_FLAG_DROP |
411                                       FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
412
413       /* set a route for the broadcast address of the prefix */
414       pfx_special.fp_len = 32;
415       pfx_special.fp_addr.ip4.as_u32 =
416         address->as_u32 | ~im->fib_masks[a->address_length];
417       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
418         ip4_add_subnet_bcast_route (fib_index, &pfx_special, sw_if_index);
419
420
421     }
422   /* length == 31 - add an attached route for the other address */
423   else if (a->address_length == 31)
424     {
425       pfx_special.fp_len = 32;
426       pfx_special.fp_addr.ip4.as_u32 =
427         address->as_u32 ^ clib_host_to_net_u32(1);
428
429       fib_table_entry_update_one_path (fib_index, &pfx_special,
430                                        FIB_SOURCE_INTERFACE,
431                                        (FIB_ENTRY_FLAG_ATTACHED),
432                                        DPO_PROTO_IP4,
433                                        &pfx_special.fp_addr,
434                                        sw_if_index,
435                                        /* invalid FIB index */
436                                        ~0,
437                                        1,
438                                        NULL,
439                                        FIB_ROUTE_PATH_FLAG_NONE);
440     }
441 }
442
443 static void
444 ip4_add_interface_routes (u32 sw_if_index,
445                           ip4_main_t * im, u32 fib_index,
446                           ip_interface_address_t * a)
447 {
448   ip_lookup_main_t *lm = &im->lookup_main;
449   ip4_address_t *address = ip_interface_address_get_address (lm, a);
450   fib_prefix_t pfx = {
451     .fp_len = 32,
452     .fp_proto = FIB_PROTOCOL_IP4,
453     .fp_addr.ip4 = *address,
454   };
455
456   /* set special routes for the prefix if needed */
457   ip4_add_interface_prefix_routes (im, sw_if_index, fib_index, a);
458
459   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
460     {
461       u32 classify_table_index =
462         lm->classify_table_index_by_sw_if_index[sw_if_index];
463       if (classify_table_index != (u32) ~ 0)
464         {
465           dpo_id_t dpo = DPO_INVALID;
466
467           dpo_set (&dpo,
468                    DPO_CLASSIFY,
469                    DPO_PROTO_IP4,
470                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
471
472           fib_table_entry_special_dpo_add (fib_index,
473                                            &pfx,
474                                            FIB_SOURCE_CLASSIFY,
475                                            FIB_ENTRY_FLAG_NONE, &dpo);
476           dpo_reset (&dpo);
477         }
478     }
479
480   fib_table_entry_update_one_path (fib_index, &pfx,
481                                    FIB_SOURCE_INTERFACE,
482                                    (FIB_ENTRY_FLAG_CONNECTED |
483                                     FIB_ENTRY_FLAG_LOCAL),
484                                    DPO_PROTO_IP4,
485                                    &pfx.fp_addr,
486                                    sw_if_index,
487                                    // invalid FIB index
488                                    ~0,
489                                    1, NULL,
490                                    FIB_ROUTE_PATH_FLAG_NONE);
491 }
492
493 static void
494 ip4_del_interface_prefix_routes (ip4_main_t * im,
495                                  u32 sw_if_index,
496                                  u32 fib_index,
497                                  ip4_address_t * address,
498                                  u32 address_length)
499 {
500   ip_lookup_main_t *lm = &im->lookup_main;
501   ip_interface_prefix_t *if_prefix;
502
503   ip_interface_prefix_key_t key = {
504     .prefix = {
505       .fp_len = address_length,
506       .fp_proto = FIB_PROTOCOL_IP4,
507       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[address_length],
508     },
509     .sw_if_index = sw_if_index,
510   };
511
512   fib_prefix_t pfx_special = {
513     .fp_len = 32,
514     .fp_proto = FIB_PROTOCOL_IP4,
515   };
516
517   if_prefix = ip_get_interface_prefix (lm, &key);
518   if (!if_prefix)
519     {
520       clib_warning ("Prefix not found while deleting %U",
521                     format_ip4_address_and_length, address, address_length);
522       return;
523     }
524
525   if_prefix->ref_count -= 1;
526
527   /*
528    * Routes need to be adjusted if:
529    * - deleting last intf addr in prefix
530    * - deleting intf addr used as default source address in glean adjacency
531    *
532    * We're done now otherwise
533    */
534   if ((if_prefix->ref_count > 0) &&
535       !pool_is_free_index (lm->if_address_pool, if_prefix->src_ia_index))
536     return;
537
538   /* length <= 30, delete glean route, first address, last address */
539   if (address_length <= 30)
540     {
541
542       /* remove glean route for prefix */
543       pfx_special.fp_addr.ip4 = *address;
544       pfx_special.fp_len = address_length;
545       fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
546
547       /* if no more intf addresses in prefix, remove other special routes */
548       if (!if_prefix->ref_count)
549         {
550           /* first address in prefix */
551           pfx_special.fp_addr.ip4.as_u32 =
552             address->as_u32 & im->fib_masks[address_length];
553           pfx_special.fp_len = 32;
554
555           if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
556           fib_table_entry_special_remove (fib_index,
557                                           &pfx_special,
558                                           FIB_SOURCE_INTERFACE);
559
560           /* prefix broadcast address */
561           pfx_special.fp_addr.ip4.as_u32 =
562             address->as_u32 | ~im->fib_masks[address_length];
563           pfx_special.fp_len = 32;
564
565           if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
566           fib_table_entry_special_remove (fib_index,
567                                           &pfx_special,
568                                           FIB_SOURCE_INTERFACE);
569         }
570       else
571         /* default source addr just got deleted, find another */
572         {
573           ip_interface_address_t *new_src_ia = NULL;
574           ip4_address_t *new_src_addr = NULL;
575
576           new_src_addr =
577             ip4_interface_address_matching_destination
578               (im, address, sw_if_index, &new_src_ia);
579
580           if_prefix->src_ia_index = new_src_ia - lm->if_address_pool;
581
582           pfx_special.fp_len = address_length;
583           pfx_special.fp_addr.ip4 = *new_src_addr;
584
585           /* set new glean route for the prefix */
586           fib_table_entry_update_one_path (fib_index, &pfx_special,
587                                            FIB_SOURCE_INTERFACE,
588                                            (FIB_ENTRY_FLAG_CONNECTED |
589                                             FIB_ENTRY_FLAG_ATTACHED),
590                                            DPO_PROTO_IP4,
591                                            /* No next-hop address */
592                                            NULL,
593                                            sw_if_index,
594                                            /* invalid FIB index */
595                                            ~0,
596                                            1,
597                                            /* no out-label stack */
598                                            NULL,
599                                            FIB_ROUTE_PATH_FLAG_NONE);
600           return;
601         }
602     }
603   /* length == 31, delete attached route for the other address */
604   else if (address_length == 31)
605     {
606       pfx_special.fp_addr.ip4.as_u32 =
607         address->as_u32 ^ clib_host_to_net_u32(1);
608
609       fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
610     }
611
612   mhash_unset (&lm->prefix_to_if_prefix_index, &key, 0 /* old_value */);
613   pool_put (lm->if_prefix_pool, if_prefix);
614 }
615
616 static void
617 ip4_del_interface_routes (u32 sw_if_index,
618                           ip4_main_t * im,
619                           u32 fib_index,
620                           ip4_address_t * address, u32 address_length)
621 {
622   fib_prefix_t pfx = {
623     .fp_len = address_length,
624     .fp_proto = FIB_PROTOCOL_IP4,
625     .fp_addr.ip4 = *address,
626   };
627
628   ip4_del_interface_prefix_routes (im, sw_if_index, fib_index,
629                                    address, address_length);
630
631   pfx.fp_len = 32;
632   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
633 }
634
635 #ifndef CLIB_MARCH_VARIANT
636 void
637 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
638 {
639   ip4_main_t *im = &ip4_main;
640
641   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
642
643   /*
644    * enable/disable only on the 1<->0 transition
645    */
646   if (is_enable)
647     {
648       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
649         return;
650     }
651   else
652     {
653       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
654       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
655         return;
656     }
657   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
658                                !is_enable, 0, 0);
659
660
661   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
662                                sw_if_index, !is_enable, 0, 0);
663
664   {
665     ip4_enable_disable_interface_callback_t *cb;
666     vec_foreach (cb, im->enable_disable_interface_callbacks)
667       cb->function (im, cb->function_opaque, sw_if_index, is_enable);
668   }
669 }
670
671 static clib_error_t *
672 ip4_add_del_interface_address_internal (vlib_main_t * vm,
673                                         u32 sw_if_index,
674                                         ip4_address_t * address,
675                                         u32 address_length, u32 is_del)
676 {
677   vnet_main_t *vnm = vnet_get_main ();
678   ip4_main_t *im = &ip4_main;
679   ip_lookup_main_t *lm = &im->lookup_main;
680   clib_error_t *error = 0;
681   u32 if_address_index, elts_before;
682   ip4_address_fib_t ip4_af, *addr_fib = 0;
683
684   /* local0 interface doesn't support IP addressing  */
685   if (sw_if_index == 0)
686     {
687       return
688        clib_error_create ("local0 interface doesn't support IP addressing");
689     }
690
691   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
692   ip4_addr_fib_init (&ip4_af, address,
693                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
694   vec_add1 (addr_fib, ip4_af);
695
696   /*
697    * there is no support for adj-fib handling in the presence of overlapping
698    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
699    * most routers do.
700    */
701   /* *INDENT-OFF* */
702   if (!is_del)
703     {
704       /* When adding an address check that it does not conflict
705          with an existing address on any interface in this table. */
706       ip_interface_address_t *ia;
707       vnet_sw_interface_t *sif;
708
709       pool_foreach(sif, vnm->interface_main.sw_interfaces,
710       ({
711           if (im->fib_index_by_sw_if_index[sw_if_index] ==
712               im->fib_index_by_sw_if_index[sif->sw_if_index])
713             {
714               foreach_ip_interface_address
715                 (&im->lookup_main, ia, sif->sw_if_index,
716                  0 /* honor unnumbered */ ,
717                  ({
718                    ip4_address_t * x =
719                      ip_interface_address_get_address
720                      (&im->lookup_main, ia);
721                    if (ip4_destination_matches_route
722                        (im, address, x, ia->address_length) ||
723                        ip4_destination_matches_route (im,
724                                                       x,
725                                                       address,
726                                                       address_length))
727                      {
728                        /* an intf may have >1 addr from the same prefix */
729                        if ((sw_if_index == sif->sw_if_index) &&
730                            (ia->address_length == address_length) &&
731                            (x->as_u32 != address->as_u32))
732                          continue;
733
734                        /* error if the length or intf was different */
735                        vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
736
737                        return
738                          clib_error_create
739                          ("failed to add %U on %U which conflicts with %U for interface %U",
740                           format_ip4_address_and_length, address,
741                           address_length,
742                           format_vnet_sw_if_index_name, vnm,
743                           sw_if_index,
744                           format_ip4_address_and_length, x,
745                           ia->address_length,
746                           format_vnet_sw_if_index_name, vnm,
747                           sif->sw_if_index);
748                      }
749                  }));
750             }
751       }));
752     }
753   /* *INDENT-ON* */
754
755   elts_before = pool_elts (lm->if_address_pool);
756
757   error = ip_interface_address_add_del
758     (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
759   if (error)
760     goto done;
761
762   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
763
764   /* intf addr routes are added/deleted on admin up/down */
765   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index))
766     {
767       if (is_del)
768         ip4_del_interface_routes (sw_if_index,
769                                   im, ip4_af.fib_index, address,
770                                   address_length);
771       else
772         ip4_add_interface_routes (sw_if_index,
773                                   im, ip4_af.fib_index,
774                                   pool_elt_at_index
775                                   (lm->if_address_pool, if_address_index));
776     }
777
778   /* If pool did not grow/shrink: add duplicate address. */
779   if (elts_before != pool_elts (lm->if_address_pool))
780     {
781       ip4_add_del_interface_address_callback_t *cb;
782       vec_foreach (cb, im->add_del_interface_address_callbacks)
783         cb->function (im, cb->function_opaque, sw_if_index,
784                       address, address_length, if_address_index, is_del);
785     }
786
787 done:
788   vec_free (addr_fib);
789   return error;
790 }
791
792 clib_error_t *
793 ip4_add_del_interface_address (vlib_main_t * vm,
794                                u32 sw_if_index,
795                                ip4_address_t * address,
796                                u32 address_length, u32 is_del)
797 {
798   return ip4_add_del_interface_address_internal
799     (vm, sw_if_index, address, address_length, is_del);
800 }
801
802 void
803 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
804 {
805   ip_interface_address_t *ia;
806   ip4_main_t *im;
807
808   im = &ip4_main;
809
810   /*
811    * when directed broadcast is enabled, the subnet braodcast route will forward
812    * packets using an adjacency with a broadcast MAC. otherwise it drops
813    */
814   /* *INDENT-OFF* */
815   foreach_ip_interface_address(&im->lookup_main, ia,
816                                sw_if_index, 0,
817      ({
818        if (ia->address_length <= 30)
819          {
820            ip4_address_t *ipa;
821
822            ipa = ip_interface_address_get_address (&im->lookup_main, ia);
823
824            fib_prefix_t pfx = {
825              .fp_len = 32,
826              .fp_proto = FIB_PROTOCOL_IP4,
827              .fp_addr = {
828                .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
829              },
830            };
831
832            ip4_add_subnet_bcast_route
833              (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
834                                                   sw_if_index),
835               &pfx, sw_if_index);
836          }
837      }));
838   /* *INDENT-ON* */
839 }
840 #endif
841
842 static clib_error_t *
843 ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
844 {
845   ip4_main_t *im = &ip4_main;
846   ip_interface_address_t *ia;
847   ip4_address_t *a;
848   u32 is_admin_up, fib_index;
849
850   /* Fill in lookup tables with default table (0). */
851   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
852
853   vec_validate_init_empty (im->
854                            lookup_main.if_address_pool_index_by_sw_if_index,
855                            sw_if_index, ~0);
856
857   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
858
859   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
860
861   /* *INDENT-OFF* */
862   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
863                                 0 /* honor unnumbered */,
864   ({
865     a = ip_interface_address_get_address (&im->lookup_main, ia);
866     if (is_admin_up)
867       ip4_add_interface_routes (sw_if_index,
868                                 im, fib_index,
869                                 ia);
870     else
871       ip4_del_interface_routes (sw_if_index,
872                                 im, fib_index,
873                                 a, ia->address_length);
874   }));
875   /* *INDENT-ON* */
876
877   return 0;
878 }
879
880 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
881
882 /* Built-in ip4 unicast rx feature path definition */
883 /* *INDENT-OFF* */
884 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
885 {
886   .arc_name = "ip4-unicast",
887   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
888   .last_in_arc = "ip4-lookup",
889   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
890 };
891
892 VNET_FEATURE_INIT (ip4_flow_classify, static) =
893 {
894   .arc_name = "ip4-unicast",
895   .node_name = "ip4-flow-classify",
896   .runs_before = VNET_FEATURES ("ip4-inacl"),
897 };
898
899 VNET_FEATURE_INIT (ip4_inacl, static) =
900 {
901   .arc_name = "ip4-unicast",
902   .node_name = "ip4-inacl",
903   .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
904 };
905
906 VNET_FEATURE_INIT (ip4_source_check_1, static) =
907 {
908   .arc_name = "ip4-unicast",
909   .node_name = "ip4-source-check-via-rx",
910   .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
911 };
912
913 VNET_FEATURE_INIT (ip4_source_check_2, static) =
914 {
915   .arc_name = "ip4-unicast",
916   .node_name = "ip4-source-check-via-any",
917   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
918 };
919
920 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
921 {
922   .arc_name = "ip4-unicast",
923   .node_name = "ip4-source-and-port-range-check-rx",
924   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
925 };
926
927 VNET_FEATURE_INIT (ip4_policer_classify, static) =
928 {
929   .arc_name = "ip4-unicast",
930   .node_name = "ip4-policer-classify",
931   .runs_before = VNET_FEATURES ("ipsec4-input-feature"),
932 };
933
934 VNET_FEATURE_INIT (ip4_ipsec, static) =
935 {
936   .arc_name = "ip4-unicast",
937   .node_name = "ipsec4-input-feature",
938   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
939 };
940
941 VNET_FEATURE_INIT (ip4_vpath, static) =
942 {
943   .arc_name = "ip4-unicast",
944   .node_name = "vpath-input-ip4",
945   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
946 };
947
948 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
949 {
950   .arc_name = "ip4-unicast",
951   .node_name = "ip4-vxlan-bypass",
952   .runs_before = VNET_FEATURES ("ip4-lookup"),
953 };
954
955 VNET_FEATURE_INIT (ip4_not_enabled, static) =
956 {
957   .arc_name = "ip4-unicast",
958   .node_name = "ip4-not-enabled",
959   .runs_before = VNET_FEATURES ("ip4-lookup"),
960 };
961
962 VNET_FEATURE_INIT (ip4_lookup, static) =
963 {
964   .arc_name = "ip4-unicast",
965   .node_name = "ip4-lookup",
966   .runs_before = 0,     /* not before any other features */
967 };
968
969 /* Built-in ip4 multicast rx feature path definition */
970 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
971 {
972   .arc_name = "ip4-multicast",
973   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
974   .last_in_arc = "ip4-mfib-forward-lookup",
975   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
976 };
977
978 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
979 {
980   .arc_name = "ip4-multicast",
981   .node_name = "vpath-input-ip4",
982   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
983 };
984
985 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
986 {
987   .arc_name = "ip4-multicast",
988   .node_name = "ip4-not-enabled",
989   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
990 };
991
992 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
993 {
994   .arc_name = "ip4-multicast",
995   .node_name = "ip4-mfib-forward-lookup",
996   .runs_before = 0,     /* last feature */
997 };
998
999 /* Source and port-range check ip4 tx feature path definition */
1000 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1001 {
1002   .arc_name = "ip4-output",
1003   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
1004   .last_in_arc = "interface-output",
1005   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1006 };
1007
1008 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1009 {
1010   .arc_name = "ip4-output",
1011   .node_name = "ip4-source-and-port-range-check-tx",
1012   .runs_before = VNET_FEATURES ("ip4-outacl"),
1013 };
1014
1015 VNET_FEATURE_INIT (ip4_outacl, static) =
1016 {
1017   .arc_name = "ip4-output",
1018   .node_name = "ip4-outacl",
1019   .runs_before = VNET_FEATURES ("ipsec4-output-feature"),
1020 };
1021
1022 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1023 {
1024   .arc_name = "ip4-output",
1025   .node_name = "ipsec4-output-feature",
1026   .runs_before = VNET_FEATURES ("interface-output"),
1027 };
1028
1029 /* Built-in ip4 tx feature path definition */
1030 VNET_FEATURE_INIT (ip4_interface_output, static) =
1031 {
1032   .arc_name = "ip4-output",
1033   .node_name = "interface-output",
1034   .runs_before = 0,     /* not before any other features */
1035 };
1036 /* *INDENT-ON* */
1037
1038 static clib_error_t *
1039 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1040 {
1041   ip4_main_t *im = &ip4_main;
1042
1043   /* Fill in lookup tables with default table (0). */
1044   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1045   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
1046
1047   if (!is_add)
1048     {
1049       ip4_main_t *im4 = &ip4_main;
1050       ip_lookup_main_t *lm4 = &im4->lookup_main;
1051       ip_interface_address_t *ia = 0;
1052       ip4_address_t *address;
1053       vlib_main_t *vm = vlib_get_main ();
1054
1055       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
1056       /* *INDENT-OFF* */
1057       foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
1058       ({
1059         address = ip_interface_address_get_address (lm4, ia);
1060         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
1061       }));
1062       /* *INDENT-ON* */
1063     }
1064
1065   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
1066                                is_add, 0, 0);
1067
1068   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
1069                                sw_if_index, is_add, 0, 0);
1070
1071   return /* no error */ 0;
1072 }
1073
1074 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1075
1076 /* Global IP4 main. */
1077 #ifndef CLIB_MARCH_VARIANT
1078 ip4_main_t ip4_main;
1079 #endif /* CLIB_MARCH_VARIANT */
1080
1081 static clib_error_t *
1082 ip4_lookup_init (vlib_main_t * vm)
1083 {
1084   ip4_main_t *im = &ip4_main;
1085   clib_error_t *error;
1086   uword i;
1087
1088   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1089     return error;
1090   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
1091     return (error);
1092   if ((error = vlib_call_init_function (vm, fib_module_init)))
1093     return error;
1094   if ((error = vlib_call_init_function (vm, mfib_module_init)))
1095     return error;
1096
1097   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1098     {
1099       u32 m;
1100
1101       if (i < 32)
1102         m = pow2_mask (i) << (32 - i);
1103       else
1104         m = ~0;
1105       im->fib_masks[i] = clib_host_to_net_u32 (m);
1106     }
1107
1108   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1109
1110   /* Create FIB with index 0 and table id of 0. */
1111   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1112                                      FIB_SOURCE_DEFAULT_ROUTE);
1113   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1114                                       MFIB_SOURCE_DEFAULT_ROUTE);
1115
1116   {
1117     pg_node_t *pn;
1118     pn = pg_get_node (ip4_lookup_node.index);
1119     pn->unformat_edit = unformat_pg_ip4_header;
1120   }
1121
1122   {
1123     ethernet_arp_header_t h;
1124
1125     clib_memset (&h, 0, sizeof (h));
1126
1127 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1128 #define _8(f,v) h.f = v;
1129     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1130     _16 (l3_type, ETHERNET_TYPE_IP4);
1131     _8 (n_l2_address_bytes, 6);
1132     _8 (n_l3_address_bytes, 4);
1133     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1134 #undef _16
1135 #undef _8
1136
1137     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1138                                /* data */ &h,
1139                                sizeof (h),
1140                                /* alloc chunk size */ 8,
1141                                "ip4 arp");
1142   }
1143
1144   return error;
1145 }
1146
1147 VLIB_INIT_FUNCTION (ip4_lookup_init);
1148
1149 typedef struct
1150 {
1151   /* Adjacency taken. */
1152   u32 dpo_index;
1153   u32 flow_hash;
1154   u32 fib_index;
1155
1156   /* Packet data, possibly *after* rewrite. */
1157   u8 packet_data[64 - 1 * sizeof (u32)];
1158 }
1159 ip4_forward_next_trace_t;
1160
1161 #ifndef CLIB_MARCH_VARIANT
1162 u8 *
1163 format_ip4_forward_next_trace (u8 * s, va_list * args)
1164 {
1165   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1166   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1167   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1168   u32 indent = format_get_indent (s);
1169   s = format (s, "%U%U",
1170               format_white_space, indent,
1171               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1172   return s;
1173 }
1174 #endif
1175
1176 static u8 *
1177 format_ip4_lookup_trace (u8 * s, va_list * args)
1178 {
1179   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1180   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1181   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1182   u32 indent = format_get_indent (s);
1183
1184   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1185               t->fib_index, t->dpo_index, t->flow_hash);
1186   s = format (s, "\n%U%U",
1187               format_white_space, indent,
1188               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1189   return s;
1190 }
1191
1192 static u8 *
1193 format_ip4_rewrite_trace (u8 * s, va_list * args)
1194 {
1195   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1196   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1197   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1198   u32 indent = format_get_indent (s);
1199
1200   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1201               t->fib_index, t->dpo_index, format_ip_adjacency,
1202               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1203   s = format (s, "\n%U%U",
1204               format_white_space, indent,
1205               format_ip_adjacency_packet_data,
1206               t->packet_data, sizeof (t->packet_data));
1207   return s;
1208 }
1209
1210 #ifndef CLIB_MARCH_VARIANT
1211 /* Common trace function for all ip4-forward next nodes. */
1212 void
1213 ip4_forward_next_trace (vlib_main_t * vm,
1214                         vlib_node_runtime_t * node,
1215                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1216 {
1217   u32 *from, n_left;
1218   ip4_main_t *im = &ip4_main;
1219
1220   n_left = frame->n_vectors;
1221   from = vlib_frame_vector_args (frame);
1222
1223   while (n_left >= 4)
1224     {
1225       u32 bi0, bi1;
1226       vlib_buffer_t *b0, *b1;
1227       ip4_forward_next_trace_t *t0, *t1;
1228
1229       /* Prefetch next iteration. */
1230       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1231       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1232
1233       bi0 = from[0];
1234       bi1 = from[1];
1235
1236       b0 = vlib_get_buffer (vm, bi0);
1237       b1 = vlib_get_buffer (vm, bi1);
1238
1239       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1240         {
1241           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1242           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1243           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1244           t0->fib_index =
1245             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1246              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1247             vec_elt (im->fib_index_by_sw_if_index,
1248                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1249
1250           clib_memcpy_fast (t0->packet_data,
1251                             vlib_buffer_get_current (b0),
1252                             sizeof (t0->packet_data));
1253         }
1254       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1255         {
1256           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1257           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1258           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1259           t1->fib_index =
1260             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1261              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1262             vec_elt (im->fib_index_by_sw_if_index,
1263                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1264           clib_memcpy_fast (t1->packet_data, vlib_buffer_get_current (b1),
1265                             sizeof (t1->packet_data));
1266         }
1267       from += 2;
1268       n_left -= 2;
1269     }
1270
1271   while (n_left >= 1)
1272     {
1273       u32 bi0;
1274       vlib_buffer_t *b0;
1275       ip4_forward_next_trace_t *t0;
1276
1277       bi0 = from[0];
1278
1279       b0 = vlib_get_buffer (vm, bi0);
1280
1281       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1282         {
1283           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1284           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1285           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1286           t0->fib_index =
1287             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1288              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1289             vec_elt (im->fib_index_by_sw_if_index,
1290                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1291           clib_memcpy_fast (t0->packet_data, vlib_buffer_get_current (b0),
1292                             sizeof (t0->packet_data));
1293         }
1294       from += 1;
1295       n_left -= 1;
1296     }
1297 }
1298
1299 /* Compute TCP/UDP/ICMP4 checksum in software. */
1300 u16
1301 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1302                               ip4_header_t * ip0)
1303 {
1304   ip_csum_t sum0;
1305   u32 ip_header_length, payload_length_host_byte_order;
1306
1307   /* Initialize checksum with ip header. */
1308   ip_header_length = ip4_header_bytes (ip0);
1309   payload_length_host_byte_order =
1310     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1311   sum0 =
1312     clib_host_to_net_u32 (payload_length_host_byte_order +
1313                           (ip0->protocol << 16));
1314
1315   if (BITS (uword) == 32)
1316     {
1317       sum0 =
1318         ip_csum_with_carry (sum0,
1319                             clib_mem_unaligned (&ip0->src_address, u32));
1320       sum0 =
1321         ip_csum_with_carry (sum0,
1322                             clib_mem_unaligned (&ip0->dst_address, u32));
1323     }
1324   else
1325     sum0 =
1326       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1327
1328   return ip_calculate_l4_checksum (vm, p0, sum0,
1329                                    payload_length_host_byte_order, (u8 *) ip0,
1330                                    ip_header_length, NULL);
1331 }
1332
1333 u32
1334 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1335 {
1336   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1337   udp_header_t *udp0;
1338   u16 sum16;
1339
1340   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1341           || ip0->protocol == IP_PROTOCOL_UDP);
1342
1343   udp0 = (void *) (ip0 + 1);
1344   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1345     {
1346       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1347                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1348       return p0->flags;
1349     }
1350
1351   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1352
1353   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1354                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1355
1356   return p0->flags;
1357 }
1358 #endif
1359
1360 /* *INDENT-OFF* */
1361 VNET_FEATURE_ARC_INIT (ip4_local) =
1362 {
1363   .arc_name  = "ip4-local",
1364   .start_nodes = VNET_FEATURES ("ip4-local"),
1365   .last_in_arc = "ip4-local-end-of-arc",
1366 };
1367 /* *INDENT-ON* */
1368
1369 static inline void
1370 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1371                             ip4_header_t * ip, u8 is_udp, u8 * error,
1372                             u8 * good_tcp_udp)
1373 {
1374   u32 flags0;
1375   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1376   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1377   if (is_udp)
1378     {
1379       udp_header_t *udp;
1380       u32 ip_len, udp_len;
1381       i32 len_diff;
1382       udp = ip4_next_header (ip);
1383       /* Verify UDP length. */
1384       ip_len = clib_net_to_host_u16 (ip->length);
1385       udp_len = clib_net_to_host_u16 (udp->length);
1386
1387       len_diff = ip_len - udp_len;
1388       *good_tcp_udp &= len_diff >= 0;
1389       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1390     }
1391 }
1392
1393 #define ip4_local_csum_is_offloaded(_b)                                 \
1394     _b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM                         \
1395         || _b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM
1396
1397 #define ip4_local_need_csum_check(is_tcp_udp, _b)                       \
1398     (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED     \
1399         || ip4_local_csum_is_offloaded (_b)))
1400
1401 #define ip4_local_csum_is_valid(_b)                                     \
1402     (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT                      \
1403         || (ip4_local_csum_is_offloaded (_b))) != 0
1404
1405 static inline void
1406 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1407                          ip4_header_t * ih, u8 * error)
1408 {
1409   u8 is_udp, is_tcp_udp, good_tcp_udp;
1410
1411   is_udp = ih->protocol == IP_PROTOCOL_UDP;
1412   is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1413
1414   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1415     ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1416   else
1417     good_tcp_udp = ip4_local_csum_is_valid (b);
1418
1419   ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1420   *error = (is_tcp_udp && !good_tcp_udp
1421             ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1422 }
1423
1424 static inline void
1425 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1426                             ip4_header_t ** ih, u8 * error)
1427 {
1428   u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1429
1430   is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1431   is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1432
1433   is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1434   is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1435
1436   good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1437   good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1438
1439   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1440                      || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1441     {
1442       if (is_tcp_udp[0])
1443         ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1444                                     &good_tcp_udp[0]);
1445       if (is_tcp_udp[1])
1446         ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1447                                     &good_tcp_udp[1]);
1448     }
1449
1450   error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1451               IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1452   error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1453               IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1454 }
1455
1456 static inline void
1457 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1458                               vlib_buffer_t * b, u16 * next, u8 error,
1459                               u8 head_of_feature_arc)
1460 {
1461   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1462   u32 next_index;
1463
1464   *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1465   b->error = error ? error_node->errors[error] : 0;
1466   if (head_of_feature_arc)
1467     {
1468       next_index = *next;
1469       if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1470         {
1471           vnet_feature_arc_start (arc_index,
1472                                   vnet_buffer (b)->sw_if_index[VLIB_RX],
1473                                   &next_index, b);
1474           *next = next_index;
1475         }
1476     }
1477 }
1478
1479 typedef struct
1480 {
1481   ip4_address_t src;
1482   u32 lbi;
1483   u8 error;
1484   u8 first;
1485 } ip4_local_last_check_t;
1486
1487 static inline void
1488 ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
1489                      ip4_local_last_check_t * last_check, u8 * error0)
1490 {
1491   ip4_fib_mtrie_leaf_t leaf0;
1492   ip4_fib_mtrie_t *mtrie0;
1493   const dpo_id_t *dpo0;
1494   load_balance_t *lb0;
1495   u32 lbi0;
1496
1497   vnet_buffer (b)->ip.fib_index =
1498     vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1499     vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1500
1501   /*
1502    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1503    *  adjacency for the destination address (the local interface address).
1504    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1505    *  adjacency for the source address (the remote sender's address)
1506    */
1507   if (PREDICT_FALSE (last_check->first ||
1508                      (last_check->src.as_u32 != ip0->src_address.as_u32)))
1509     {
1510       mtrie0 = &ip4_fib_get (vnet_buffer (b)->ip.fib_index)->mtrie;
1511       leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1512       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1513       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1514       lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1515
1516       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1517         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1518       vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1519
1520       lb0 = load_balance_get (lbi0);
1521       dpo0 = load_balance_get_bucket_i (lb0, 0);
1522
1523       /*
1524        * Must have a route to source otherwise we drop the packet.
1525        * ip4 broadcasts are accepted, e.g. to make dhcp client work
1526        *
1527        * The checks are:
1528        *  - the source is a recieve => it's from us => bogus, do this
1529        *    first since it sets a different error code.
1530        *  - uRPF check for any route to source - accept if passes.
1531        *  - allow packets destined to the broadcast address from unknown sources
1532        */
1533
1534       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1535                   && dpo0->dpoi_type == DPO_RECEIVE) ?
1536                  IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1537       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1538                   && !fib_urpf_check_size (lb0->lb_urpf)
1539                   && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1540                  IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1541
1542       last_check->src.as_u32 = ip0->src_address.as_u32;
1543       last_check->lbi = lbi0;
1544       last_check->error = *error0;
1545     }
1546   else
1547     {
1548       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1549         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1550       vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1551       *error0 = last_check->error;
1552       last_check->first = 0;
1553     }
1554 }
1555
1556 static inline void
1557 ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
1558                         ip4_local_last_check_t * last_check, u8 * error)
1559 {
1560   ip4_fib_mtrie_leaf_t leaf[2];
1561   ip4_fib_mtrie_t *mtrie[2];
1562   const dpo_id_t *dpo[2];
1563   load_balance_t *lb[2];
1564   u32 not_last_hit;
1565   u32 lbi[2];
1566
1567   not_last_hit = last_check->first;
1568   not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1569   not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1570
1571   vnet_buffer (b[0])->ip.fib_index =
1572     vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1573     vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1574     vnet_buffer (b[0])->ip.fib_index;
1575
1576   vnet_buffer (b[1])->ip.fib_index =
1577     vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1578     vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1579     vnet_buffer (b[1])->ip.fib_index;
1580
1581   /*
1582    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1583    *  adjacency for the destination address (the local interface address).
1584    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1585    *  adjacency for the source address (the remote sender's address)
1586    */
1587   if (PREDICT_FALSE (not_last_hit))
1588     {
1589       mtrie[0] = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie;
1590       mtrie[1] = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie;
1591
1592       leaf[0] = ip4_fib_mtrie_lookup_step_one (mtrie[0], &ip[0]->src_address);
1593       leaf[1] = ip4_fib_mtrie_lookup_step_one (mtrie[1], &ip[1]->src_address);
1594
1595       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1596                                            &ip[0]->src_address, 2);
1597       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1598                                            &ip[1]->src_address, 2);
1599
1600       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1601                                            &ip[0]->src_address, 3);
1602       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1603                                            &ip[1]->src_address, 3);
1604
1605       lbi[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf[0]);
1606       lbi[1] = ip4_fib_mtrie_leaf_get_adj_index (leaf[1]);
1607
1608       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1609         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1610       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1611
1612       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1613         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1614       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1615
1616       lb[0] = load_balance_get (lbi[0]);
1617       lb[1] = load_balance_get (lbi[1]);
1618
1619       dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1620       dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1621
1622       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1623                    dpo[0]->dpoi_type == DPO_RECEIVE) ?
1624                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1625       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1626                    !fib_urpf_check_size (lb[0]->lb_urpf) &&
1627                    ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1628                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1629
1630       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1631                    dpo[1]->dpoi_type == DPO_RECEIVE) ?
1632                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1633       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1634                    !fib_urpf_check_size (lb[1]->lb_urpf) &&
1635                    ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1636                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1637
1638       last_check->src.as_u32 = ip[1]->src_address.as_u32;
1639       last_check->lbi = lbi[1];
1640       last_check->error = error[1];
1641     }
1642   else
1643     {
1644       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1645         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1646       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1647
1648       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1649         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1650       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1651
1652       error[0] = last_check->error;
1653       error[1] = last_check->error;
1654       last_check->first = 0;
1655     }
1656 }
1657
1658 enum ip_local_packet_type_e
1659 {
1660   IP_LOCAL_PACKET_TYPE_L4,
1661   IP_LOCAL_PACKET_TYPE_NAT,
1662   IP_LOCAL_PACKET_TYPE_FRAG,
1663 };
1664
1665 /**
1666  * Determine packet type and next node.
1667  *
1668  * The expectation is that all packets that are not L4 will skip
1669  * checksums and source checks.
1670  */
1671 always_inline u8
1672 ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
1673 {
1674   ip_lookup_main_t *lm = &ip4_main.lookup_main;
1675
1676   if (PREDICT_FALSE (ip4_is_fragment (ip)))
1677     {
1678       *next = IP_LOCAL_NEXT_REASSEMBLY;
1679       return IP_LOCAL_PACKET_TYPE_FRAG;
1680     }
1681   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_IS_NATED))
1682     {
1683       *next = lm->local_next_by_ip_protocol[ip->protocol];
1684       return IP_LOCAL_PACKET_TYPE_NAT;
1685     }
1686
1687   *next = lm->local_next_by_ip_protocol[ip->protocol];
1688   return IP_LOCAL_PACKET_TYPE_L4;
1689 }
1690
1691 static inline uword
1692 ip4_local_inline (vlib_main_t * vm,
1693                   vlib_node_runtime_t * node,
1694                   vlib_frame_t * frame, int head_of_feature_arc)
1695 {
1696   u32 *from, n_left_from;
1697   vlib_node_runtime_t *error_node =
1698     vlib_node_get_runtime (vm, ip4_input_node.index);
1699   u16 nexts[VLIB_FRAME_SIZE], *next;
1700   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1701   ip4_header_t *ip[2];
1702   u8 error[2], pt[2];
1703
1704   ip4_local_last_check_t last_check = {
1705     /*
1706      * 0.0.0.0 can appear as the source address of an IP packet,
1707      * as can any other address, hence the need to use the 'first'
1708      * member to make sure the .lbi is initialised for the first
1709      * packet.
1710      */
1711     .src = {.as_u32 = 0},
1712     .lbi = ~0,
1713     .error = IP4_ERROR_UNKNOWN_PROTOCOL,
1714     .first = 1,
1715   };
1716
1717   from = vlib_frame_vector_args (frame);
1718   n_left_from = frame->n_vectors;
1719
1720   if (node->flags & VLIB_NODE_FLAG_TRACE)
1721     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1722
1723   vlib_get_buffers (vm, from, bufs, n_left_from);
1724   b = bufs;
1725   next = nexts;
1726
1727   while (n_left_from >= 6)
1728     {
1729       u8 not_batch = 0;
1730
1731       /* Prefetch next iteration. */
1732       {
1733         vlib_prefetch_buffer_header (b[4], LOAD);
1734         vlib_prefetch_buffer_header (b[5], LOAD);
1735
1736         CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1737         CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1738       }
1739
1740       error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1741
1742       ip[0] = vlib_buffer_get_current (b[0]);
1743       ip[1] = vlib_buffer_get_current (b[1]);
1744
1745       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1746       vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1747
1748       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1749       pt[1] = ip4_local_classify (b[1], ip[1], &next[1]);
1750
1751       not_batch = pt[0] ^ pt[1];
1752
1753       if (head_of_feature_arc == 0 || (pt[0] && not_batch == 0))
1754         goto skip_checks;
1755
1756       if (PREDICT_TRUE (not_batch == 0))
1757         {
1758           ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1759           ip4_local_check_src_x2 (b, ip, &last_check, error);
1760         }
1761       else
1762         {
1763           if (!pt[0])
1764             {
1765               ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1766               ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1767             }
1768           if (!pt[1])
1769             {
1770               ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1771               ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
1772             }
1773         }
1774
1775     skip_checks:
1776
1777       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1778                                     head_of_feature_arc);
1779       ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1780                                     head_of_feature_arc);
1781
1782       b += 2;
1783       next += 2;
1784       n_left_from -= 2;
1785     }
1786
1787   while (n_left_from > 0)
1788     {
1789       error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1790
1791       ip[0] = vlib_buffer_get_current (b[0]);
1792       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1793       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1794
1795       if (head_of_feature_arc == 0 || pt[0])
1796         goto skip_check;
1797
1798       ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1799       ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1800
1801     skip_check:
1802
1803       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1804                                     head_of_feature_arc);
1805
1806       b += 1;
1807       next += 1;
1808       n_left_from -= 1;
1809     }
1810
1811   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1812   return frame->n_vectors;
1813 }
1814
1815 VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1816                                vlib_frame_t * frame)
1817 {
1818   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1819 }
1820
1821 /* *INDENT-OFF* */
1822 VLIB_REGISTER_NODE (ip4_local_node) =
1823 {
1824   .name = "ip4-local",
1825   .vector_size = sizeof (u32),
1826   .format_trace = format_ip4_forward_next_trace,
1827   .n_next_nodes = IP_LOCAL_N_NEXT,
1828   .next_nodes =
1829   {
1830     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1831     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1832     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1833     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1834     [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-full-reassembly",
1835   },
1836 };
1837 /* *INDENT-ON* */
1838
1839
1840 VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
1841                                           vlib_node_runtime_t * node,
1842                                           vlib_frame_t * frame)
1843 {
1844   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1845 }
1846
1847 /* *INDENT-OFF* */
1848 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
1849   .name = "ip4-local-end-of-arc",
1850   .vector_size = sizeof (u32),
1851
1852   .format_trace = format_ip4_forward_next_trace,
1853   .sibling_of = "ip4-local",
1854 };
1855
1856 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1857   .arc_name = "ip4-local",
1858   .node_name = "ip4-local-end-of-arc",
1859   .runs_before = 0, /* not before any other features */
1860 };
1861 /* *INDENT-ON* */
1862
1863 #ifndef CLIB_MARCH_VARIANT
1864 void
1865 ip4_register_protocol (u32 protocol, u32 node_index)
1866 {
1867   vlib_main_t *vm = vlib_get_main ();
1868   ip4_main_t *im = &ip4_main;
1869   ip_lookup_main_t *lm = &im->lookup_main;
1870
1871   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1872   lm->local_next_by_ip_protocol[protocol] =
1873     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1874 }
1875
1876 void
1877 ip4_unregister_protocol (u32 protocol)
1878 {
1879   ip4_main_t *im = &ip4_main;
1880   ip_lookup_main_t *lm = &im->lookup_main;
1881
1882   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1883   lm->local_next_by_ip_protocol[protocol] = IP_LOCAL_NEXT_PUNT;
1884 }
1885 #endif
1886
1887 static clib_error_t *
1888 show_ip_local_command_fn (vlib_main_t * vm,
1889                           unformat_input_t * input, vlib_cli_command_t * cmd)
1890 {
1891   ip4_main_t *im = &ip4_main;
1892   ip_lookup_main_t *lm = &im->lookup_main;
1893   int i;
1894
1895   vlib_cli_output (vm, "Protocols handled by ip4_local");
1896   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1897     {
1898       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1899         {
1900           u32 node_index = vlib_get_node (vm,
1901                                           ip4_local_node.index)->
1902             next_nodes[lm->local_next_by_ip_protocol[i]];
1903           vlib_cli_output (vm, "%U: %U", format_ip_protocol, i,
1904                            format_vlib_node_name, vm, node_index);
1905         }
1906     }
1907   return 0;
1908 }
1909
1910
1911
1912 /*?
1913  * Display the set of protocols handled by the local IPv4 stack.
1914  *
1915  * @cliexpar
1916  * Example of how to display local protocol table:
1917  * @cliexstart{show ip local}
1918  * Protocols handled by ip4_local
1919  * 1
1920  * 17
1921  * 47
1922  * @cliexend
1923 ?*/
1924 /* *INDENT-OFF* */
1925 VLIB_CLI_COMMAND (show_ip_local, static) =
1926 {
1927   .path = "show ip local",
1928   .function = show_ip_local_command_fn,
1929   .short_help = "show ip local",
1930 };
1931 /* *INDENT-ON* */
1932
1933 typedef enum
1934 {
1935   IP4_REWRITE_NEXT_DROP,
1936   IP4_REWRITE_NEXT_ICMP_ERROR,
1937   IP4_REWRITE_NEXT_FRAGMENT,
1938   IP4_REWRITE_N_NEXT            /* Last */
1939 } ip4_rewrite_next_t;
1940
1941 /**
1942  * This bits of an IPv4 address to mask to construct a multicast
1943  * MAC address
1944  */
1945 #if CLIB_ARCH_IS_BIG_ENDIAN
1946 #define IP4_MCAST_ADDR_MASK 0x007fffff
1947 #else
1948 #define IP4_MCAST_ADDR_MASK 0xffff7f00
1949 #endif
1950
1951 always_inline void
1952 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
1953                u16 adj_packet_bytes, bool df, u16 * next,
1954                u8 is_midchain, u32 * error)
1955 {
1956   if (packet_len > adj_packet_bytes)
1957     {
1958       *error = IP4_ERROR_MTU_EXCEEDED;
1959       if (df)
1960         {
1961           icmp4_error_set_vnet_buffer
1962             (b, ICMP4_destination_unreachable,
1963              ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
1964              adj_packet_bytes);
1965           *next = IP4_REWRITE_NEXT_ICMP_ERROR;
1966         }
1967       else
1968         {
1969           /* IP fragmentation */
1970           ip_frag_set_vnet_buffer (b, adj_packet_bytes,
1971                                    (is_midchain ?
1972                                     IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN :
1973                                     IP_FRAG_NEXT_IP_REWRITE), 0);
1974           *next = IP4_REWRITE_NEXT_FRAGMENT;
1975         }
1976     }
1977 }
1978
1979 /* increment TTL & update checksum.
1980    Works either endian, so no need for byte swap. */
1981 static_always_inline void
1982 ip4_ttl_inc (vlib_buffer_t * b, ip4_header_t * ip)
1983 {
1984   i32 ttl;
1985   u32 checksum;
1986   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
1987     {
1988       b->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
1989       return;
1990     }
1991
1992   ttl = ip->ttl;
1993
1994   checksum = ip->checksum - clib_host_to_net_u16 (0x0100);
1995   checksum += checksum >= 0xffff;
1996
1997   ip->checksum = checksum;
1998   ttl += 1;
1999   ip->ttl = ttl;
2000
2001   ASSERT (ip->checksum == ip4_header_checksum (ip));
2002 }
2003
2004 /* Decrement TTL & update checksum.
2005    Works either endian, so no need for byte swap. */
2006 static_always_inline void
2007 ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
2008                             u32 * error)
2009 {
2010   i32 ttl;
2011   u32 checksum;
2012   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2013     {
2014       b->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2015       return;
2016     }
2017
2018   ttl = ip->ttl;
2019
2020   /* Input node should have reject packets with ttl 0. */
2021   ASSERT (ip->ttl > 0);
2022
2023   checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
2024   checksum += checksum >= 0xffff;
2025
2026   ip->checksum = checksum;
2027   ttl -= 1;
2028   ip->ttl = ttl;
2029
2030   /*
2031    * If the ttl drops below 1 when forwarding, generate
2032    * an ICMP response.
2033    */
2034   if (PREDICT_FALSE (ttl <= 0))
2035     {
2036       *error = IP4_ERROR_TIME_EXPIRED;
2037       vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2038       icmp4_error_set_vnet_buffer (b, ICMP4_time_exceeded,
2039                                    ICMP4_time_exceeded_ttl_exceeded_in_transit,
2040                                    0);
2041       *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2042     }
2043
2044   /* Verify checksum. */
2045   ASSERT ((ip->checksum == ip4_header_checksum (ip)) ||
2046           (b->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2047 }
2048
2049
2050 always_inline uword
2051 ip4_rewrite_inline_with_gso (vlib_main_t * vm,
2052                              vlib_node_runtime_t * node,
2053                              vlib_frame_t * frame,
2054                              int do_counters, int is_midchain, int is_mcast)
2055 {
2056   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2057   u32 *from = vlib_frame_vector_args (frame);
2058   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2059   u16 nexts[VLIB_FRAME_SIZE], *next;
2060   u32 n_left_from;
2061   vlib_node_runtime_t *error_node =
2062     vlib_node_get_runtime (vm, ip4_input_node.index);
2063
2064   n_left_from = frame->n_vectors;
2065   u32 thread_index = vm->thread_index;
2066
2067   vlib_get_buffers (vm, from, bufs, n_left_from);
2068   clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
2069
2070 #if (CLIB_N_PREFETCHES >= 8)
2071   if (n_left_from >= 6)
2072     {
2073       int i;
2074       for (i = 2; i < 6; i++)
2075         vlib_prefetch_buffer_header (bufs[i], LOAD);
2076     }
2077
2078   next = nexts;
2079   b = bufs;
2080   while (n_left_from >= 8)
2081     {
2082       const ip_adjacency_t *adj0, *adj1;
2083       ip4_header_t *ip0, *ip1;
2084       u32 rw_len0, error0, adj_index0;
2085       u32 rw_len1, error1, adj_index1;
2086       u32 tx_sw_if_index0, tx_sw_if_index1;
2087       u8 *p;
2088
2089       vlib_prefetch_buffer_header (b[6], LOAD);
2090       vlib_prefetch_buffer_header (b[7], LOAD);
2091
2092       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2093       adj_index1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
2094
2095       /*
2096        * pre-fetch the per-adjacency counters
2097        */
2098       if (do_counters)
2099         {
2100           vlib_prefetch_combined_counter (&adjacency_counters,
2101                                           thread_index, adj_index0);
2102           vlib_prefetch_combined_counter (&adjacency_counters,
2103                                           thread_index, adj_index1);
2104         }
2105
2106       ip0 = vlib_buffer_get_current (b[0]);
2107       ip1 = vlib_buffer_get_current (b[1]);
2108
2109       error0 = error1 = IP4_ERROR_NONE;
2110
2111       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2112       ip4_ttl_and_checksum_check (b[1], ip1, next + 1, &error1);
2113
2114       /* Rewrite packet header and updates lengths. */
2115       adj0 = adj_get (adj_index0);
2116       adj1 = adj_get (adj_index1);
2117
2118       /* Worth pipelining. No guarantee that adj0,1 are hot... */
2119       rw_len0 = adj0[0].rewrite_header.data_bytes;
2120       rw_len1 = adj1[0].rewrite_header.data_bytes;
2121       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2122       vnet_buffer (b[1])->ip.save_rewrite_length = rw_len1;
2123
2124       p = vlib_buffer_get_current (b[2]);
2125       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2126       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2127
2128       p = vlib_buffer_get_current (b[3]);
2129       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2130       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2131
2132       /* Check MTU of outgoing interface. */
2133       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2134       u16 ip1_len = clib_net_to_host_u16 (ip1->length);
2135
2136       if (b[0]->flags & VNET_BUFFER_F_GSO)
2137         ip0_len = gso_mtu_sz (b[0]);
2138       if (b[1]->flags & VNET_BUFFER_F_GSO)
2139         ip1_len = gso_mtu_sz (b[1]);
2140
2141       ip4_mtu_check (b[0], ip0_len,
2142                      adj0[0].rewrite_header.max_l3_packet_bytes,
2143                      ip0->flags_and_fragment_offset &
2144                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2145                      next + 0, is_midchain, &error0);
2146       ip4_mtu_check (b[1], ip1_len,
2147                      adj1[0].rewrite_header.max_l3_packet_bytes,
2148                      ip1->flags_and_fragment_offset &
2149                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2150                      next + 1, is_midchain, &error1);
2151
2152       if (is_mcast)
2153         {
2154           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2155                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2156                     IP4_ERROR_SAME_INTERFACE : error0);
2157           error1 = ((adj1[0].rewrite_header.sw_if_index ==
2158                      vnet_buffer (b[1])->sw_if_index[VLIB_RX]) ?
2159                     IP4_ERROR_SAME_INTERFACE : error1);
2160         }
2161
2162       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2163        * to see the IP header */
2164       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2165         {
2166           u32 next_index = adj0[0].rewrite_header.next_index;
2167           vlib_buffer_advance (b[0], -(word) rw_len0);
2168
2169           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2170           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2171
2172           if (PREDICT_FALSE
2173               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2174             vnet_feature_arc_start (lm->output_feature_arc_index,
2175                                     tx_sw_if_index0, &next_index, b[0]);
2176           next[0] = next_index;
2177           if (is_midchain)
2178             calc_checksums (vm, b[0]);
2179         }
2180       else
2181         {
2182           b[0]->error = error_node->errors[error0];
2183           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2184             ip4_ttl_inc (b[0], ip0);
2185         }
2186       if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2187         {
2188           u32 next_index = adj1[0].rewrite_header.next_index;
2189           vlib_buffer_advance (b[1], -(word) rw_len1);
2190
2191           tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2192           vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2193
2194           if (PREDICT_FALSE
2195               (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2196             vnet_feature_arc_start (lm->output_feature_arc_index,
2197                                     tx_sw_if_index1, &next_index, b[1]);
2198           next[1] = next_index;
2199           if (is_midchain)
2200             calc_checksums (vm, b[1]);
2201         }
2202       else
2203         {
2204           b[1]->error = error_node->errors[error1];
2205           if (error1 == IP4_ERROR_MTU_EXCEEDED)
2206             ip4_ttl_inc (b[1], ip1);
2207         }
2208
2209       /* Guess we are only writing on simple Ethernet header. */
2210       vnet_rewrite_two_headers (adj0[0], adj1[0],
2211                                 ip0, ip1, sizeof (ethernet_header_t));
2212
2213       if (do_counters)
2214         {
2215           if (error0 == IP4_ERROR_NONE)
2216             vlib_increment_combined_counter
2217               (&adjacency_counters,
2218                thread_index,
2219                adj_index0, 1,
2220                vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2221
2222           if (error1 == IP4_ERROR_NONE)
2223             vlib_increment_combined_counter
2224               (&adjacency_counters,
2225                thread_index,
2226                adj_index1, 1,
2227                vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
2228         }
2229
2230       if (is_midchain)
2231         {
2232           if (error0 == IP4_ERROR_NONE && adj0->sub_type.midchain.fixup_func)
2233             adj0->sub_type.midchain.fixup_func
2234               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2235           if (error1 == IP4_ERROR_NONE && adj1->sub_type.midchain.fixup_func)
2236             adj1->sub_type.midchain.fixup_func
2237               (vm, adj1, b[1], adj1->sub_type.midchain.fixup_data);
2238         }
2239
2240       if (is_mcast)
2241         {
2242           /* copy bytes from the IP address into the MAC rewrite */
2243           if (error0 == IP4_ERROR_NONE)
2244             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2245                                         adj0->rewrite_header.dst_mcast_offset,
2246                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2247           if (error1 == IP4_ERROR_NONE)
2248             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2249                                         adj1->rewrite_header.dst_mcast_offset,
2250                                         &ip1->dst_address.as_u32, (u8 *) ip1);
2251         }
2252
2253       next += 2;
2254       b += 2;
2255       n_left_from -= 2;
2256     }
2257 #elif (CLIB_N_PREFETCHES >= 4)
2258   next = nexts;
2259   b = bufs;
2260   while (n_left_from >= 1)
2261     {
2262       ip_adjacency_t *adj0;
2263       ip4_header_t *ip0;
2264       u32 rw_len0, error0, adj_index0;
2265       u32 tx_sw_if_index0;
2266       u8 *p;
2267
2268       /* Prefetch next iteration */
2269       if (PREDICT_TRUE (n_left_from >= 4))
2270         {
2271           ip_adjacency_t *adj2;
2272           u32 adj_index2;
2273
2274           vlib_prefetch_buffer_header (b[3], LOAD);
2275           vlib_prefetch_buffer_data (b[2], LOAD);
2276
2277           /* Prefetch adj->rewrite_header */
2278           adj_index2 = vnet_buffer (b[2])->ip.adj_index[VLIB_TX];
2279           adj2 = adj_get (adj_index2);
2280           p = (u8 *) adj2;
2281           CLIB_PREFETCH (p + CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES,
2282                          LOAD);
2283         }
2284
2285       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2286
2287       /*
2288        * Prefetch the per-adjacency counters
2289        */
2290       if (do_counters)
2291         {
2292           vlib_prefetch_combined_counter (&adjacency_counters,
2293                                           thread_index, adj_index0);
2294         }
2295
2296       ip0 = vlib_buffer_get_current (b[0]);
2297
2298       error0 = IP4_ERROR_NONE;
2299
2300       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2301
2302       /* Rewrite packet header and updates lengths. */
2303       adj0 = adj_get (adj_index0);
2304
2305       /* Rewrite header was prefetched. */
2306       rw_len0 = adj0[0].rewrite_header.data_bytes;
2307       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2308
2309       /* Check MTU of outgoing interface. */
2310       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2311
2312       if (b[0]->flags & VNET_BUFFER_F_GSO)
2313         ip0_len = gso_mtu_sz (b[0]);
2314
2315       ip4_mtu_check (b[0], ip0_len,
2316                      adj0[0].rewrite_header.max_l3_packet_bytes,
2317                      ip0->flags_and_fragment_offset &
2318                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2319                      next + 0, is_midchain, &error0);
2320
2321       if (is_mcast)
2322         {
2323           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2324                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2325                     IP4_ERROR_SAME_INTERFACE : error0);
2326         }
2327
2328       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2329        * to see the IP header */
2330       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2331         {
2332           u32 next_index = adj0[0].rewrite_header.next_index;
2333           vlib_buffer_advance (b[0], -(word) rw_len0);
2334           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2335           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2336
2337           if (PREDICT_FALSE
2338               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2339             vnet_feature_arc_start (lm->output_feature_arc_index,
2340                                     tx_sw_if_index0, &next_index, b[0]);
2341           next[0] = next_index;
2342
2343           if (is_midchain)
2344             calc_checksums (vm, b[0]);
2345
2346           /* Guess we are only writing on simple Ethernet header. */
2347           vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2348
2349           /*
2350            * Bump the per-adjacency counters
2351            */
2352           if (do_counters)
2353             vlib_increment_combined_counter
2354               (&adjacency_counters,
2355                thread_index,
2356                adj_index0, 1, vlib_buffer_length_in_chain (vm,
2357                                                            b[0]) + rw_len0);
2358
2359           if (is_midchain && adj0->sub_type.midchain.fixup_func)
2360             adj0->sub_type.midchain.fixup_func
2361               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2362
2363           if (is_mcast)
2364             /* copy bytes from the IP address into the MAC rewrite */
2365             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2366                                         adj0->rewrite_header.dst_mcast_offset,
2367                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2368         }
2369       else
2370         {
2371           b[0]->error = error_node->errors[error0];
2372           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2373             ip4_ttl_inc (b[0], ip0);
2374         }
2375
2376       next += 1;
2377       b += 1;
2378       n_left_from -= 1;
2379     }
2380 #endif
2381
2382   while (n_left_from > 0)
2383     {
2384       ip_adjacency_t *adj0;
2385       ip4_header_t *ip0;
2386       u32 rw_len0, adj_index0, error0;
2387       u32 tx_sw_if_index0;
2388
2389       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2390
2391       adj0 = adj_get (adj_index0);
2392
2393       if (do_counters)
2394         vlib_prefetch_combined_counter (&adjacency_counters,
2395                                         thread_index, adj_index0);
2396
2397       ip0 = vlib_buffer_get_current (b[0]);
2398
2399       error0 = IP4_ERROR_NONE;
2400
2401       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2402
2403
2404       /* Update packet buffer attributes/set output interface. */
2405       rw_len0 = adj0[0].rewrite_header.data_bytes;
2406       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2407
2408       /* Check MTU of outgoing interface. */
2409       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2410       if (b[0]->flags & VNET_BUFFER_F_GSO)
2411         ip0_len = gso_mtu_sz (b[0]);
2412
2413       ip4_mtu_check (b[0], ip0_len,
2414                      adj0[0].rewrite_header.max_l3_packet_bytes,
2415                      ip0->flags_and_fragment_offset &
2416                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2417                      next + 0, is_midchain, &error0);
2418
2419       if (is_mcast)
2420         {
2421           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2422                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2423                     IP4_ERROR_SAME_INTERFACE : error0);
2424         }
2425
2426       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2427        * to see the IP header */
2428       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2429         {
2430           u32 next_index = adj0[0].rewrite_header.next_index;
2431           vlib_buffer_advance (b[0], -(word) rw_len0);
2432           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2433           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2434
2435           if (PREDICT_FALSE
2436               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2437             vnet_feature_arc_start (lm->output_feature_arc_index,
2438                                     tx_sw_if_index0, &next_index, b[0]);
2439           next[0] = next_index;
2440
2441           if (is_midchain)
2442             /* this acts on the packet that is about to be encapped */
2443             calc_checksums (vm, b[0]);
2444
2445           /* Guess we are only writing on simple Ethernet header. */
2446           vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2447
2448           if (do_counters)
2449             vlib_increment_combined_counter
2450               (&adjacency_counters,
2451                thread_index, adj_index0, 1,
2452                vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2453
2454           if (is_midchain && adj0->sub_type.midchain.fixup_func)
2455             adj0->sub_type.midchain.fixup_func
2456               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2457
2458           if (is_mcast)
2459             /* copy bytes from the IP address into the MAC rewrite */
2460             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2461                                         adj0->rewrite_header.dst_mcast_offset,
2462                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2463         }
2464       else
2465         {
2466           b[0]->error = error_node->errors[error0];
2467           /* undo the TTL decrement - we'll be back to do it again */
2468           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2469             ip4_ttl_inc (b[0], ip0);
2470         }
2471
2472       next += 1;
2473       b += 1;
2474       n_left_from -= 1;
2475     }
2476
2477
2478   /* Need to do trace after rewrites to pick up new packet data. */
2479   if (node->flags & VLIB_NODE_FLAG_TRACE)
2480     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2481
2482   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2483   return frame->n_vectors;
2484 }
2485
2486 always_inline uword
2487 ip4_rewrite_inline (vlib_main_t * vm,
2488                     vlib_node_runtime_t * node,
2489                     vlib_frame_t * frame,
2490                     int do_counters, int is_midchain, int is_mcast)
2491 {
2492   return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2493                                       is_midchain, is_mcast);
2494 }
2495
2496
2497 /** @brief IPv4 rewrite node.
2498     @node ip4-rewrite
2499
2500     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2501     header checksum, fetch the ip adjacency, check the outbound mtu,
2502     apply the adjacency rewrite, and send pkts to the adjacency
2503     rewrite header's rewrite_next_index.
2504
2505     @param vm vlib_main_t corresponding to the current thread
2506     @param node vlib_node_runtime_t
2507     @param frame vlib_frame_t whose contents should be dispatched
2508
2509     @par Graph mechanics: buffer metadata, next index usage
2510
2511     @em Uses:
2512     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2513         - the rewrite adjacency index
2514     - <code>adj->lookup_next_index</code>
2515         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2516           the packet will be dropped.
2517     - <code>adj->rewrite_header</code>
2518         - Rewrite string length, rewrite string, next_index
2519
2520     @em Sets:
2521     - <code>b->current_data, b->current_length</code>
2522         - Updated net of applying the rewrite string
2523
2524     <em>Next Indices:</em>
2525     - <code> adj->rewrite_header.next_index </code>
2526       or @c ip4-drop
2527 */
2528
2529 VLIB_NODE_FN (ip4_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2530                                  vlib_frame_t * frame)
2531 {
2532   if (adj_are_counters_enabled ())
2533     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2534   else
2535     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2536 }
2537
2538 VLIB_NODE_FN (ip4_rewrite_bcast_node) (vlib_main_t * vm,
2539                                        vlib_node_runtime_t * node,
2540                                        vlib_frame_t * frame)
2541 {
2542   if (adj_are_counters_enabled ())
2543     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2544   else
2545     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2546 }
2547
2548 VLIB_NODE_FN (ip4_midchain_node) (vlib_main_t * vm,
2549                                   vlib_node_runtime_t * node,
2550                                   vlib_frame_t * frame)
2551 {
2552   if (adj_are_counters_enabled ())
2553     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2554   else
2555     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2556 }
2557
2558 VLIB_NODE_FN (ip4_rewrite_mcast_node) (vlib_main_t * vm,
2559                                        vlib_node_runtime_t * node,
2560                                        vlib_frame_t * frame)
2561 {
2562   if (adj_are_counters_enabled ())
2563     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2564   else
2565     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2566 }
2567
2568 VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
2569                                         vlib_node_runtime_t * node,
2570                                         vlib_frame_t * frame)
2571 {
2572   if (adj_are_counters_enabled ())
2573     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2574   else
2575     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2576 }
2577
2578 /* *INDENT-OFF* */
2579 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2580   .name = "ip4-rewrite",
2581   .vector_size = sizeof (u32),
2582
2583   .format_trace = format_ip4_rewrite_trace,
2584
2585   .n_next_nodes = IP4_REWRITE_N_NEXT,
2586   .next_nodes = {
2587     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2588     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2589     [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2590   },
2591 };
2592
2593 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2594   .name = "ip4-rewrite-bcast",
2595   .vector_size = sizeof (u32),
2596
2597   .format_trace = format_ip4_rewrite_trace,
2598   .sibling_of = "ip4-rewrite",
2599 };
2600
2601 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2602   .name = "ip4-rewrite-mcast",
2603   .vector_size = sizeof (u32),
2604
2605   .format_trace = format_ip4_rewrite_trace,
2606   .sibling_of = "ip4-rewrite",
2607 };
2608
2609 VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
2610   .name = "ip4-mcast-midchain",
2611   .vector_size = sizeof (u32),
2612
2613   .format_trace = format_ip4_rewrite_trace,
2614   .sibling_of = "ip4-rewrite",
2615 };
2616
2617 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2618   .name = "ip4-midchain",
2619   .vector_size = sizeof (u32),
2620   .format_trace = format_ip4_rewrite_trace,
2621   .sibling_of = "ip4-rewrite",
2622 };
2623 /* *INDENT-ON */
2624
2625 static int
2626 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2627 {
2628   ip4_fib_mtrie_t *mtrie0;
2629   ip4_fib_mtrie_leaf_t leaf0;
2630   u32 lbi0;
2631
2632   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2633
2634   leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2635   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2636   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2637
2638   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2639
2640   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2641 }
2642
2643 static clib_error_t *
2644 test_lookup_command_fn (vlib_main_t * vm,
2645                         unformat_input_t * input, vlib_cli_command_t * cmd)
2646 {
2647   ip4_fib_t *fib;
2648   u32 table_id = 0;
2649   f64 count = 1;
2650   u32 n;
2651   int i;
2652   ip4_address_t ip4_base_address;
2653   u64 errors = 0;
2654
2655   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2656     {
2657       if (unformat (input, "table %d", &table_id))
2658         {
2659           /* Make sure the entry exists. */
2660           fib = ip4_fib_get (table_id);
2661           if ((fib) && (fib->index != table_id))
2662             return clib_error_return (0, "<fib-index> %d does not exist",
2663                                       table_id);
2664         }
2665       else if (unformat (input, "count %f", &count))
2666         ;
2667
2668       else if (unformat (input, "%U",
2669                          unformat_ip4_address, &ip4_base_address))
2670         ;
2671       else
2672         return clib_error_return (0, "unknown input `%U'",
2673                                   format_unformat_error, input);
2674     }
2675
2676   n = count;
2677
2678   for (i = 0; i < n; i++)
2679     {
2680       if (!ip4_lookup_validate (&ip4_base_address, table_id))
2681         errors++;
2682
2683       ip4_base_address.as_u32 =
2684         clib_host_to_net_u32 (1 +
2685                               clib_net_to_host_u32 (ip4_base_address.as_u32));
2686     }
2687
2688   if (errors)
2689     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2690   else
2691     vlib_cli_output (vm, "No errors in %d lookups\n", n);
2692
2693   return 0;
2694 }
2695
2696 /*?
2697  * Perform a lookup of an IPv4 Address (or range of addresses) in the
2698  * given FIB table to determine if there is a conflict with the
2699  * adjacency table. The fib-id can be determined by using the
2700  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2701  * of 0 is used.
2702  *
2703  * @todo This command uses fib-id, other commands use table-id (not
2704  * just a name, they are different indexes). Would like to change this
2705  * to table-id for consistency.
2706  *
2707  * @cliexpar
2708  * Example of how to run the test lookup command:
2709  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2710  * No errors in 2 lookups
2711  * @cliexend
2712 ?*/
2713 /* *INDENT-OFF* */
2714 VLIB_CLI_COMMAND (lookup_test_command, static) =
2715 {
2716   .path = "test lookup",
2717   .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2718   .function = test_lookup_command_fn,
2719 };
2720 /* *INDENT-ON* */
2721
2722 #ifndef CLIB_MARCH_VARIANT
2723 int
2724 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2725 {
2726   u32 fib_index;
2727
2728   fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
2729
2730   if (~0 == fib_index)
2731     return VNET_API_ERROR_NO_SUCH_FIB;
2732
2733   fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
2734                                   flow_hash_config);
2735
2736   return 0;
2737 }
2738 #endif
2739
2740 static clib_error_t *
2741 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2742                              unformat_input_t * input,
2743                              vlib_cli_command_t * cmd)
2744 {
2745   int matched = 0;
2746   u32 table_id = 0;
2747   u32 flow_hash_config = 0;
2748   int rv;
2749
2750   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2751     {
2752       if (unformat (input, "table %d", &table_id))
2753         matched = 1;
2754 #define _(a,v) \
2755     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2756       foreach_flow_hash_bit
2757 #undef _
2758         else
2759         break;
2760     }
2761
2762   if (matched == 0)
2763     return clib_error_return (0, "unknown input `%U'",
2764                               format_unformat_error, input);
2765
2766   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
2767   switch (rv)
2768     {
2769     case 0:
2770       break;
2771
2772     case VNET_API_ERROR_NO_SUCH_FIB:
2773       return clib_error_return (0, "no such FIB table %d", table_id);
2774
2775     default:
2776       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2777       break;
2778     }
2779
2780   return 0;
2781 }
2782
2783 /*?
2784  * Configure the set of IPv4 fields used by the flow hash.
2785  *
2786  * @cliexpar
2787  * Example of how to set the flow hash on a given table:
2788  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2789  * Example of display the configured flow hash:
2790  * @cliexstart{show ip fib}
2791  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2792  * 0.0.0.0/0
2793  *   unicast-ip4-chain
2794  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2795  *     [0] [@0]: dpo-drop ip6
2796  * 0.0.0.0/32
2797  *   unicast-ip4-chain
2798  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2799  *     [0] [@0]: dpo-drop ip6
2800  * 224.0.0.0/8
2801  *   unicast-ip4-chain
2802  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2803  *     [0] [@0]: dpo-drop ip6
2804  * 6.0.1.2/32
2805  *   unicast-ip4-chain
2806  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2807  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2808  * 7.0.0.1/32
2809  *   unicast-ip4-chain
2810  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
2811  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2812  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2813  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2814  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2815  * 240.0.0.0/8
2816  *   unicast-ip4-chain
2817  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
2818  *     [0] [@0]: dpo-drop ip6
2819  * 255.255.255.255/32
2820  *   unicast-ip4-chain
2821  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
2822  *     [0] [@0]: dpo-drop ip6
2823  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
2824  * 0.0.0.0/0
2825  *   unicast-ip4-chain
2826  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
2827  *     [0] [@0]: dpo-drop ip6
2828  * 0.0.0.0/32
2829  *   unicast-ip4-chain
2830  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
2831  *     [0] [@0]: dpo-drop ip6
2832  * 172.16.1.0/24
2833  *   unicast-ip4-chain
2834  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
2835  *     [0] [@4]: ipv4-glean: af_packet0
2836  * 172.16.1.1/32
2837  *   unicast-ip4-chain
2838  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
2839  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
2840  * 172.16.1.2/32
2841  *   unicast-ip4-chain
2842  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2843  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
2844  * 172.16.2.0/24
2845  *   unicast-ip4-chain
2846  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
2847  *     [0] [@4]: ipv4-glean: af_packet1
2848  * 172.16.2.1/32
2849  *   unicast-ip4-chain
2850  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
2851  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
2852  * 224.0.0.0/8
2853  *   unicast-ip4-chain
2854  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
2855  *     [0] [@0]: dpo-drop ip6
2856  * 240.0.0.0/8
2857  *   unicast-ip4-chain
2858  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
2859  *     [0] [@0]: dpo-drop ip6
2860  * 255.255.255.255/32
2861  *   unicast-ip4-chain
2862  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
2863  *     [0] [@0]: dpo-drop ip6
2864  * @cliexend
2865 ?*/
2866 /* *INDENT-OFF* */
2867 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
2868 {
2869   .path = "set ip flow-hash",
2870   .short_help =
2871   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
2872   .function = set_ip_flow_hash_command_fn,
2873 };
2874 /* *INDENT-ON* */
2875
2876 #ifndef CLIB_MARCH_VARIANT
2877 int
2878 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2879                              u32 table_index)
2880 {
2881   vnet_main_t *vnm = vnet_get_main ();
2882   vnet_interface_main_t *im = &vnm->interface_main;
2883   ip4_main_t *ipm = &ip4_main;
2884   ip_lookup_main_t *lm = &ipm->lookup_main;
2885   vnet_classify_main_t *cm = &vnet_classify_main;
2886   ip4_address_t *if_addr;
2887
2888   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2889     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2890
2891   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2892     return VNET_API_ERROR_NO_SUCH_ENTRY;
2893
2894   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2895   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
2896
2897   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
2898
2899   if (NULL != if_addr)
2900     {
2901       fib_prefix_t pfx = {
2902         .fp_len = 32,
2903         .fp_proto = FIB_PROTOCOL_IP4,
2904         .fp_addr.ip4 = *if_addr,
2905       };
2906       u32 fib_index;
2907
2908       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2909                                                        sw_if_index);
2910
2911
2912       if (table_index != (u32) ~ 0)
2913         {
2914           dpo_id_t dpo = DPO_INVALID;
2915
2916           dpo_set (&dpo,
2917                    DPO_CLASSIFY,
2918                    DPO_PROTO_IP4,
2919                    classify_dpo_create (DPO_PROTO_IP4, table_index));
2920
2921           fib_table_entry_special_dpo_add (fib_index,
2922                                            &pfx,
2923                                            FIB_SOURCE_CLASSIFY,
2924                                            FIB_ENTRY_FLAG_NONE, &dpo);
2925           dpo_reset (&dpo);
2926         }
2927       else
2928         {
2929           fib_table_entry_special_remove (fib_index,
2930                                           &pfx, FIB_SOURCE_CLASSIFY);
2931         }
2932     }
2933
2934   return 0;
2935 }
2936 #endif
2937
2938 static clib_error_t *
2939 set_ip_classify_command_fn (vlib_main_t * vm,
2940                             unformat_input_t * input,
2941                             vlib_cli_command_t * cmd)
2942 {
2943   u32 table_index = ~0;
2944   int table_index_set = 0;
2945   u32 sw_if_index = ~0;
2946   int rv;
2947
2948   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2949     {
2950       if (unformat (input, "table-index %d", &table_index))
2951         table_index_set = 1;
2952       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
2953                          vnet_get_main (), &sw_if_index))
2954         ;
2955       else
2956         break;
2957     }
2958
2959   if (table_index_set == 0)
2960     return clib_error_return (0, "classify table-index must be specified");
2961
2962   if (sw_if_index == ~0)
2963     return clib_error_return (0, "interface / subif must be specified");
2964
2965   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
2966
2967   switch (rv)
2968     {
2969     case 0:
2970       break;
2971
2972     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
2973       return clib_error_return (0, "No such interface");
2974
2975     case VNET_API_ERROR_NO_SUCH_ENTRY:
2976       return clib_error_return (0, "No such classifier table");
2977     }
2978   return 0;
2979 }
2980
2981 /*?
2982  * Assign a classification table to an interface. The classification
2983  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
2984  * commands. Once the table is create, use this command to filter packets
2985  * on an interface.
2986  *
2987  * @cliexpar
2988  * Example of how to assign a classification table to an interface:
2989  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
2990 ?*/
2991 /* *INDENT-OFF* */
2992 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
2993 {
2994     .path = "set ip classify",
2995     .short_help =
2996     "set ip classify intfc <interface> table-index <classify-idx>",
2997     .function = set_ip_classify_command_fn,
2998 };
2999 /* *INDENT-ON* */
3000
3001 static clib_error_t *
3002 ip4_config (vlib_main_t * vm, unformat_input_t * input)
3003 {
3004   ip4_main_t *im = &ip4_main;
3005   uword heapsize = 0;
3006
3007   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3008     {
3009       if (unformat (input, "heap-size %U", unformat_memory_size, &heapsize))
3010         ;
3011       else
3012         return clib_error_return (0,
3013                                   "invalid heap-size parameter `%U'",
3014                                   format_unformat_error, input);
3015     }
3016
3017   im->mtrie_heap_size = heapsize;
3018
3019   return 0;
3020 }
3021
3022 VLIB_EARLY_CONFIG_FUNCTION (ip4_config, "ip");
3023
3024 /*
3025  * fd.io coding-style-patch-verification: ON
3026  *
3027  * Local Variables:
3028  * eval: (c-set-style "gnu")
3029  * End:
3030  */