fib: Always install all routers mcast addresses
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h>     /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/mfib/ip4_mfib.h>
53 #include <vnet/dpo/load_balance.h>
54 #include <vnet/dpo/load_balance_map.h>
55 #include <vnet/dpo/classify_dpo.h>
56 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
57
58 #include <vnet/ip/ip4_forward.h>
59 #include <vnet/interface_output.h>
60 #include <vnet/classify/vnet_classify.h>
61
62 /** @brief IPv4 lookup node.
63     @node ip4-lookup
64
65     This is the main IPv4 lookup dispatch node.
66
67     @param vm vlib_main_t corresponding to the current thread
68     @param node vlib_node_runtime_t
69     @param frame vlib_frame_t whose contents should be dispatched
70
71     @par Graph mechanics: buffer metadata, next index usage
72
73     @em Uses:
74     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
75         - Indicates the @c sw_if_index value of the interface that the
76           packet was received on.
77     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
78         - When the value is @c ~0 then the node performs a longest prefix
79           match (LPM) for the packet destination address in the FIB attached
80           to the receive interface.
81         - Otherwise perform LPM for the packet destination address in the
82           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
83           value (0, 1, ...) and not a VRF id.
84
85     @em Sets:
86     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
87         - The lookup result adjacency index.
88
89     <em>Next Index:</em>
90     - Dispatches the packet to the node index found in
91       ip_adjacency_t @c adj->lookup_next_index
92       (where @c adj is the lookup result adjacency).
93 */
94 VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
95                                 vlib_frame_t * frame)
96 {
97   return ip4_lookup_inline (vm, node, frame);
98 }
99
100 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
101
102 /* *INDENT-OFF* */
103 VLIB_REGISTER_NODE (ip4_lookup_node) =
104 {
105   .name = "ip4-lookup",
106   .vector_size = sizeof (u32),
107   .format_trace = format_ip4_lookup_trace,
108   .n_next_nodes = IP_LOOKUP_N_NEXT,
109   .next_nodes = IP4_LOOKUP_NEXT_NODES,
110 };
111 /* *INDENT-ON* */
112
113 VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
114                                       vlib_node_runtime_t * node,
115                                       vlib_frame_t * frame)
116 {
117   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
118   u32 n_left, *from;
119   u32 thread_index = vm->thread_index;
120   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
121   u16 nexts[VLIB_FRAME_SIZE], *next;
122
123   from = vlib_frame_vector_args (frame);
124   n_left = frame->n_vectors;
125   next = nexts;
126
127   vlib_get_buffers (vm, from, bufs, n_left);
128
129   while (n_left >= 4)
130     {
131       const load_balance_t *lb0, *lb1;
132       const ip4_header_t *ip0, *ip1;
133       u32 lbi0, hc0, lbi1, hc1;
134       const dpo_id_t *dpo0, *dpo1;
135
136       /* Prefetch next iteration. */
137       {
138         vlib_prefetch_buffer_header (b[2], LOAD);
139         vlib_prefetch_buffer_header (b[3], LOAD);
140
141         CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), LOAD);
142         CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), LOAD);
143       }
144
145       ip0 = vlib_buffer_get_current (b[0]);
146       ip1 = vlib_buffer_get_current (b[1]);
147       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
148       lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
149
150       lb0 = load_balance_get (lbi0);
151       lb1 = load_balance_get (lbi1);
152
153       /*
154        * this node is for via FIBs we can re-use the hash value from the
155        * to node if present.
156        * We don't want to use the same hash value at each level in the recursion
157        * graph as that would lead to polarisation
158        */
159       hc0 = hc1 = 0;
160
161       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
162         {
163           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
164             {
165               hc0 = vnet_buffer (b[0])->ip.flow_hash =
166                 vnet_buffer (b[0])->ip.flow_hash >> 1;
167             }
168           else
169             {
170               hc0 = vnet_buffer (b[0])->ip.flow_hash =
171                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
172             }
173           dpo0 = load_balance_get_fwd_bucket
174             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
175         }
176       else
177         {
178           dpo0 = load_balance_get_bucket_i (lb0, 0);
179         }
180       if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
181         {
182           if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
183             {
184               hc1 = vnet_buffer (b[1])->ip.flow_hash =
185                 vnet_buffer (b[1])->ip.flow_hash >> 1;
186             }
187           else
188             {
189               hc1 = vnet_buffer (b[1])->ip.flow_hash =
190                 ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
191             }
192           dpo1 = load_balance_get_fwd_bucket
193             (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
194         }
195       else
196         {
197           dpo1 = load_balance_get_bucket_i (lb1, 0);
198         }
199
200       next[0] = dpo0->dpoi_next_node;
201       next[1] = dpo1->dpoi_next_node;
202
203       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
204       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
205
206       vlib_increment_combined_counter
207         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
208       vlib_increment_combined_counter
209         (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
210
211       b += 2;
212       next += 2;
213       n_left -= 2;
214     }
215
216   while (n_left > 0)
217     {
218       const load_balance_t *lb0;
219       const ip4_header_t *ip0;
220       const dpo_id_t *dpo0;
221       u32 lbi0, hc0;
222
223       ip0 = vlib_buffer_get_current (b[0]);
224       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
225
226       lb0 = load_balance_get (lbi0);
227
228       hc0 = 0;
229       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
230         {
231           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
232             {
233               hc0 = vnet_buffer (b[0])->ip.flow_hash =
234                 vnet_buffer (b[0])->ip.flow_hash >> 1;
235             }
236           else
237             {
238               hc0 = vnet_buffer (b[0])->ip.flow_hash =
239                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
240             }
241           dpo0 = load_balance_get_fwd_bucket
242             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
243         }
244       else
245         {
246           dpo0 = load_balance_get_bucket_i (lb0, 0);
247         }
248
249       next[0] = dpo0->dpoi_next_node;
250       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
251
252       vlib_increment_combined_counter
253         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
254
255       b += 1;
256       next += 1;
257       n_left -= 1;
258     }
259
260   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
261   if (node->flags & VLIB_NODE_FLAG_TRACE)
262     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
263
264   return frame->n_vectors;
265 }
266
267 /* *INDENT-OFF* */
268 VLIB_REGISTER_NODE (ip4_load_balance_node) =
269 {
270   .name = "ip4-load-balance",
271   .vector_size = sizeof (u32),
272   .sibling_of = "ip4-lookup",
273   .format_trace = format_ip4_lookup_trace,
274 };
275 /* *INDENT-ON* */
276
277 #ifndef CLIB_MARCH_VARIANT
278 /* get first interface address */
279 ip4_address_t *
280 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
281                              ip_interface_address_t ** result_ia)
282 {
283   ip_lookup_main_t *lm = &im->lookup_main;
284   ip_interface_address_t *ia = 0;
285   ip4_address_t *result = 0;
286
287   /* *INDENT-OFF* */
288   foreach_ip_interface_address
289     (lm, ia, sw_if_index,
290      1 /* honor unnumbered */ ,
291      ({
292        ip4_address_t * a =
293          ip_interface_address_get_address (lm, ia);
294        result = a;
295        break;
296      }));
297   /* *INDENT-OFF* */
298   if (result_ia)
299     *result_ia = result ? ia : 0;
300   return result;
301 }
302 #endif
303
304 static void
305 ip4_add_subnet_bcast_route (u32 fib_index,
306                             fib_prefix_t *pfx,
307                             u32 sw_if_index)
308 {
309   vnet_sw_interface_flags_t iflags;
310
311   iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
312
313   fib_table_entry_special_remove(fib_index,
314                                  pfx,
315                                  FIB_SOURCE_INTERFACE);
316
317   if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
318     {
319       fib_table_entry_update_one_path (fib_index, pfx,
320                                        FIB_SOURCE_INTERFACE,
321                                        FIB_ENTRY_FLAG_NONE,
322                                        DPO_PROTO_IP4,
323                                        /* No next-hop address */
324                                        &ADJ_BCAST_ADDR,
325                                        sw_if_index,
326                                        // invalid FIB index
327                                        ~0,
328                                        1,
329                                        // no out-label stack
330                                        NULL,
331                                        FIB_ROUTE_PATH_FLAG_NONE);
332     }
333   else
334     {
335         fib_table_entry_special_add(fib_index,
336                                     pfx,
337                                     FIB_SOURCE_INTERFACE,
338                                     (FIB_ENTRY_FLAG_DROP |
339                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
340     }
341 }
342
343 static void
344 ip4_add_interface_prefix_routes (ip4_main_t *im,
345                                  u32 sw_if_index,
346                                  u32 fib_index,
347                                  ip_interface_address_t * a)
348 {
349   ip_lookup_main_t *lm = &im->lookup_main;
350   ip_interface_prefix_t *if_prefix;
351   ip4_address_t *address = ip_interface_address_get_address (lm, a);
352
353   ip_interface_prefix_key_t key = {
354     .prefix = {
355       .fp_len = a->address_length,
356       .fp_proto = FIB_PROTOCOL_IP4,
357       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[a->address_length],
358     },
359     .sw_if_index = sw_if_index,
360   };
361
362   fib_prefix_t pfx_special = {
363     .fp_proto = FIB_PROTOCOL_IP4,
364   };
365
366   /* If prefix already set on interface, just increment ref count & return */
367   if_prefix = ip_get_interface_prefix (lm, &key);
368   if (if_prefix)
369     {
370       if_prefix->ref_count += 1;
371       return;
372     }
373
374   /* New prefix - allocate a pool entry, initialize it, add to the hash */
375   pool_get (lm->if_prefix_pool, if_prefix);
376   if_prefix->ref_count = 1;
377   if_prefix->src_ia_index = a - lm->if_address_pool;
378   clib_memcpy (&if_prefix->key, &key, sizeof (key));
379   mhash_set (&lm->prefix_to_if_prefix_index, &key,
380              if_prefix - lm->if_prefix_pool, 0 /* old value */);
381
382   /* length <= 30 - add glean, drop first address, maybe drop bcast address */
383   if (a->address_length <= 30)
384     {
385       pfx_special.fp_len = a->address_length;
386       pfx_special.fp_addr.ip4.as_u32 = address->as_u32;
387
388       /* set the glean route for the prefix */
389       fib_table_entry_update_one_path (fib_index, &pfx_special,
390                                        FIB_SOURCE_INTERFACE,
391                                        (FIB_ENTRY_FLAG_CONNECTED |
392                                         FIB_ENTRY_FLAG_ATTACHED),
393                                        DPO_PROTO_IP4,
394                                        /* No next-hop address */
395                                        NULL,
396                                        sw_if_index,
397                                        /* invalid FIB index */
398                                        ~0,
399                                        1,
400                                        /* no out-label stack */
401                                        NULL,
402                                        FIB_ROUTE_PATH_FLAG_NONE);
403
404       /* set a drop route for the base address of the prefix */
405       pfx_special.fp_len = 32;
406       pfx_special.fp_addr.ip4.as_u32 =
407         address->as_u32 & im->fib_masks[a->address_length];
408
409       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
410         fib_table_entry_special_add (fib_index, &pfx_special,
411                                      FIB_SOURCE_INTERFACE,
412                                      (FIB_ENTRY_FLAG_DROP |
413                                       FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
414
415       /* set a route for the broadcast address of the prefix */
416       pfx_special.fp_len = 32;
417       pfx_special.fp_addr.ip4.as_u32 =
418         address->as_u32 | ~im->fib_masks[a->address_length];
419       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
420         ip4_add_subnet_bcast_route (fib_index, &pfx_special, sw_if_index);
421
422
423     }
424   /* length == 31 - add an attached route for the other address */
425   else if (a->address_length == 31)
426     {
427       pfx_special.fp_len = 32;
428       pfx_special.fp_addr.ip4.as_u32 =
429         address->as_u32 ^ clib_host_to_net_u32(1);
430
431       fib_table_entry_update_one_path (fib_index, &pfx_special,
432                                        FIB_SOURCE_INTERFACE,
433                                        (FIB_ENTRY_FLAG_ATTACHED),
434                                        DPO_PROTO_IP4,
435                                        &pfx_special.fp_addr,
436                                        sw_if_index,
437                                        /* invalid FIB index */
438                                        ~0,
439                                        1,
440                                        NULL,
441                                        FIB_ROUTE_PATH_FLAG_NONE);
442     }
443 }
444
445 static void
446 ip4_add_interface_routes (u32 sw_if_index,
447                           ip4_main_t * im, u32 fib_index,
448                           ip_interface_address_t * a)
449 {
450   ip_lookup_main_t *lm = &im->lookup_main;
451   ip4_address_t *address = ip_interface_address_get_address (lm, a);
452   fib_prefix_t pfx = {
453     .fp_len = 32,
454     .fp_proto = FIB_PROTOCOL_IP4,
455     .fp_addr.ip4 = *address,
456   };
457
458   /* set special routes for the prefix if needed */
459   ip4_add_interface_prefix_routes (im, sw_if_index, fib_index, a);
460
461   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
462     {
463       u32 classify_table_index =
464         lm->classify_table_index_by_sw_if_index[sw_if_index];
465       if (classify_table_index != (u32) ~ 0)
466         {
467           dpo_id_t dpo = DPO_INVALID;
468
469           dpo_set (&dpo,
470                    DPO_CLASSIFY,
471                    DPO_PROTO_IP4,
472                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
473
474           fib_table_entry_special_dpo_add (fib_index,
475                                            &pfx,
476                                            FIB_SOURCE_CLASSIFY,
477                                            FIB_ENTRY_FLAG_NONE, &dpo);
478           dpo_reset (&dpo);
479         }
480     }
481
482   fib_table_entry_update_one_path (fib_index, &pfx,
483                                    FIB_SOURCE_INTERFACE,
484                                    (FIB_ENTRY_FLAG_CONNECTED |
485                                     FIB_ENTRY_FLAG_LOCAL),
486                                    DPO_PROTO_IP4,
487                                    &pfx.fp_addr,
488                                    sw_if_index,
489                                    // invalid FIB index
490                                    ~0,
491                                    1, NULL,
492                                    FIB_ROUTE_PATH_FLAG_NONE);
493 }
494
495 static void
496 ip4_del_interface_prefix_routes (ip4_main_t * im,
497                                  u32 sw_if_index,
498                                  u32 fib_index,
499                                  ip4_address_t * address,
500                                  u32 address_length)
501 {
502   ip_lookup_main_t *lm = &im->lookup_main;
503   ip_interface_prefix_t *if_prefix;
504
505   ip_interface_prefix_key_t key = {
506     .prefix = {
507       .fp_len = address_length,
508       .fp_proto = FIB_PROTOCOL_IP4,
509       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[address_length],
510     },
511     .sw_if_index = sw_if_index,
512   };
513
514   fib_prefix_t pfx_special = {
515     .fp_len = 32,
516     .fp_proto = FIB_PROTOCOL_IP4,
517   };
518
519   if_prefix = ip_get_interface_prefix (lm, &key);
520   if (!if_prefix)
521     {
522       clib_warning ("Prefix not found while deleting %U",
523                     format_ip4_address_and_length, address, address_length);
524       return;
525     }
526
527   if_prefix->ref_count -= 1;
528
529   /*
530    * Routes need to be adjusted if:
531    * - deleting last intf addr in prefix
532    * - deleting intf addr used as default source address in glean adjacency
533    *
534    * We're done now otherwise
535    */
536   if ((if_prefix->ref_count > 0) &&
537       !pool_is_free_index (lm->if_address_pool, if_prefix->src_ia_index))
538     return;
539
540   /* length <= 30, delete glean route, first address, last address */
541   if (address_length <= 30)
542     {
543
544       /* remove glean route for prefix */
545       pfx_special.fp_addr.ip4 = *address;
546       pfx_special.fp_len = address_length;
547       fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
548
549       /* if no more intf addresses in prefix, remove other special routes */
550       if (!if_prefix->ref_count)
551         {
552           /* first address in prefix */
553           pfx_special.fp_addr.ip4.as_u32 =
554             address->as_u32 & im->fib_masks[address_length];
555           pfx_special.fp_len = 32;
556
557           if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
558           fib_table_entry_special_remove (fib_index,
559                                           &pfx_special,
560                                           FIB_SOURCE_INTERFACE);
561
562           /* prefix broadcast address */
563           pfx_special.fp_addr.ip4.as_u32 =
564             address->as_u32 | ~im->fib_masks[address_length];
565           pfx_special.fp_len = 32;
566
567           if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
568           fib_table_entry_special_remove (fib_index,
569                                           &pfx_special,
570                                           FIB_SOURCE_INTERFACE);
571         }
572       else
573         /* default source addr just got deleted, find another */
574         {
575           ip_interface_address_t *new_src_ia = NULL;
576           ip4_address_t *new_src_addr = NULL;
577
578           new_src_addr =
579             ip4_interface_address_matching_destination
580               (im, address, sw_if_index, &new_src_ia);
581
582           if_prefix->src_ia_index = new_src_ia - lm->if_address_pool;
583
584           pfx_special.fp_len = address_length;
585           pfx_special.fp_addr.ip4 = *new_src_addr;
586
587           /* set new glean route for the prefix */
588           fib_table_entry_update_one_path (fib_index, &pfx_special,
589                                            FIB_SOURCE_INTERFACE,
590                                            (FIB_ENTRY_FLAG_CONNECTED |
591                                             FIB_ENTRY_FLAG_ATTACHED),
592                                            DPO_PROTO_IP4,
593                                            /* No next-hop address */
594                                            NULL,
595                                            sw_if_index,
596                                            /* invalid FIB index */
597                                            ~0,
598                                            1,
599                                            /* no out-label stack */
600                                            NULL,
601                                            FIB_ROUTE_PATH_FLAG_NONE);
602           return;
603         }
604     }
605   /* length == 31, delete attached route for the other address */
606   else if (address_length == 31)
607     {
608       pfx_special.fp_addr.ip4.as_u32 =
609         address->as_u32 ^ clib_host_to_net_u32(1);
610
611       fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
612     }
613
614   mhash_unset (&lm->prefix_to_if_prefix_index, &key, 0 /* old_value */);
615   pool_put (lm->if_prefix_pool, if_prefix);
616 }
617
618 static void
619 ip4_del_interface_routes (u32 sw_if_index,
620                           ip4_main_t * im,
621                           u32 fib_index,
622                           ip4_address_t * address, u32 address_length)
623 {
624   fib_prefix_t pfx = {
625     .fp_len = address_length,
626     .fp_proto = FIB_PROTOCOL_IP4,
627     .fp_addr.ip4 = *address,
628   };
629
630   ip4_del_interface_prefix_routes (im, sw_if_index, fib_index,
631                                    address, address_length);
632
633   pfx.fp_len = 32;
634   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
635 }
636
637 #ifndef CLIB_MARCH_VARIANT
638 void
639 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
640 {
641   ip4_main_t *im = &ip4_main;
642
643   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
644
645   /*
646    * enable/disable only on the 1<->0 transition
647    */
648   if (is_enable)
649     {
650       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
651         return;
652     }
653   else
654     {
655       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
656       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
657         return;
658     }
659   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
660                                !is_enable, 0, 0);
661
662
663   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
664                                sw_if_index, !is_enable, 0, 0);
665
666   {
667     ip4_enable_disable_interface_callback_t *cb;
668     vec_foreach (cb, im->enable_disable_interface_callbacks)
669       cb->function (im, cb->function_opaque, sw_if_index, is_enable);
670   }
671 }
672
673 static clib_error_t *
674 ip4_add_del_interface_address_internal (vlib_main_t * vm,
675                                         u32 sw_if_index,
676                                         ip4_address_t * address,
677                                         u32 address_length, u32 is_del)
678 {
679   vnet_main_t *vnm = vnet_get_main ();
680   ip4_main_t *im = &ip4_main;
681   ip_lookup_main_t *lm = &im->lookup_main;
682   clib_error_t *error = 0;
683   u32 if_address_index, elts_before;
684   ip4_address_fib_t ip4_af, *addr_fib = 0;
685
686   /* local0 interface doesn't support IP addressing  */
687   if (sw_if_index == 0)
688     {
689       return
690        clib_error_create ("local0 interface doesn't support IP addressing");
691     }
692
693   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
694   ip4_addr_fib_init (&ip4_af, address,
695                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
696   vec_add1 (addr_fib, ip4_af);
697
698   /*
699    * there is no support for adj-fib handling in the presence of overlapping
700    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
701    * most routers do.
702    */
703   /* *INDENT-OFF* */
704   if (!is_del)
705     {
706       /* When adding an address check that it does not conflict
707          with an existing address on any interface in this table. */
708       ip_interface_address_t *ia;
709       vnet_sw_interface_t *sif;
710
711       pool_foreach(sif, vnm->interface_main.sw_interfaces,
712       ({
713           if (im->fib_index_by_sw_if_index[sw_if_index] ==
714               im->fib_index_by_sw_if_index[sif->sw_if_index])
715             {
716               foreach_ip_interface_address
717                 (&im->lookup_main, ia, sif->sw_if_index,
718                  0 /* honor unnumbered */ ,
719                  ({
720                    ip4_address_t * x =
721                      ip_interface_address_get_address
722                      (&im->lookup_main, ia);
723                    if (ip4_destination_matches_route
724                        (im, address, x, ia->address_length) ||
725                        ip4_destination_matches_route (im,
726                                                       x,
727                                                       address,
728                                                       address_length))
729                      {
730                        /* an intf may have >1 addr from the same prefix */
731                        if ((sw_if_index == sif->sw_if_index) &&
732                            (ia->address_length == address_length) &&
733                            (x->as_u32 != address->as_u32))
734                          continue;
735
736                        /* error if the length or intf was different */
737                        vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
738
739                        return
740                          clib_error_create
741                          ("failed to add %U on %U which conflicts with %U for interface %U",
742                           format_ip4_address_and_length, address,
743                           address_length,
744                           format_vnet_sw_if_index_name, vnm,
745                           sw_if_index,
746                           format_ip4_address_and_length, x,
747                           ia->address_length,
748                           format_vnet_sw_if_index_name, vnm,
749                           sif->sw_if_index);
750                      }
751                  }));
752             }
753       }));
754     }
755   /* *INDENT-ON* */
756
757   elts_before = pool_elts (lm->if_address_pool);
758
759   error = ip_interface_address_add_del
760     (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
761   if (error)
762     goto done;
763
764   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
765   ip4_mfib_interface_enable_disable (sw_if_index, !is_del);
766
767   /* intf addr routes are added/deleted on admin up/down */
768   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index))
769     {
770       if (is_del)
771         ip4_del_interface_routes (sw_if_index,
772                                   im, ip4_af.fib_index, address,
773                                   address_length);
774       else
775         ip4_add_interface_routes (sw_if_index,
776                                   im, ip4_af.fib_index,
777                                   pool_elt_at_index
778                                   (lm->if_address_pool, if_address_index));
779     }
780
781   /* If pool did not grow/shrink: add duplicate address. */
782   if (elts_before != pool_elts (lm->if_address_pool))
783     {
784       ip4_add_del_interface_address_callback_t *cb;
785       vec_foreach (cb, im->add_del_interface_address_callbacks)
786         cb->function (im, cb->function_opaque, sw_if_index,
787                       address, address_length, if_address_index, is_del);
788     }
789
790 done:
791   vec_free (addr_fib);
792   return error;
793 }
794
795 clib_error_t *
796 ip4_add_del_interface_address (vlib_main_t * vm,
797                                u32 sw_if_index,
798                                ip4_address_t * address,
799                                u32 address_length, u32 is_del)
800 {
801   return ip4_add_del_interface_address_internal
802     (vm, sw_if_index, address, address_length, is_del);
803 }
804
805 void
806 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
807 {
808   ip_interface_address_t *ia;
809   ip4_main_t *im;
810
811   im = &ip4_main;
812
813   /*
814    * when directed broadcast is enabled, the subnet braodcast route will forward
815    * packets using an adjacency with a broadcast MAC. otherwise it drops
816    */
817   /* *INDENT-OFF* */
818   foreach_ip_interface_address(&im->lookup_main, ia,
819                                sw_if_index, 0,
820      ({
821        if (ia->address_length <= 30)
822          {
823            ip4_address_t *ipa;
824
825            ipa = ip_interface_address_get_address (&im->lookup_main, ia);
826
827            fib_prefix_t pfx = {
828              .fp_len = 32,
829              .fp_proto = FIB_PROTOCOL_IP4,
830              .fp_addr = {
831                .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
832              },
833            };
834
835            ip4_add_subnet_bcast_route
836              (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
837                                                   sw_if_index),
838               &pfx, sw_if_index);
839          }
840      }));
841   /* *INDENT-ON* */
842 }
843 #endif
844
845 static clib_error_t *
846 ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
847 {
848   ip4_main_t *im = &ip4_main;
849   ip_interface_address_t *ia;
850   ip4_address_t *a;
851   u32 is_admin_up, fib_index;
852
853   /* Fill in lookup tables with default table (0). */
854   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
855
856   vec_validate_init_empty (im->
857                            lookup_main.if_address_pool_index_by_sw_if_index,
858                            sw_if_index, ~0);
859
860   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
861
862   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
863
864   /* *INDENT-OFF* */
865   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
866                                 0 /* honor unnumbered */,
867   ({
868     a = ip_interface_address_get_address (&im->lookup_main, ia);
869     if (is_admin_up)
870       ip4_add_interface_routes (sw_if_index,
871                                 im, fib_index,
872                                 ia);
873     else
874       ip4_del_interface_routes (sw_if_index,
875                                 im, fib_index,
876                                 a, ia->address_length);
877   }));
878   /* *INDENT-ON* */
879
880   return 0;
881 }
882
883 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
884
885 /* Built-in ip4 unicast rx feature path definition */
886 /* *INDENT-OFF* */
887 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
888 {
889   .arc_name = "ip4-unicast",
890   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
891   .last_in_arc = "ip4-lookup",
892   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
893 };
894
895 VNET_FEATURE_INIT (ip4_flow_classify, static) =
896 {
897   .arc_name = "ip4-unicast",
898   .node_name = "ip4-flow-classify",
899   .runs_before = VNET_FEATURES ("ip4-inacl"),
900 };
901
902 VNET_FEATURE_INIT (ip4_inacl, static) =
903 {
904   .arc_name = "ip4-unicast",
905   .node_name = "ip4-inacl",
906   .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
907 };
908
909 VNET_FEATURE_INIT (ip4_source_check_1, static) =
910 {
911   .arc_name = "ip4-unicast",
912   .node_name = "ip4-source-check-via-rx",
913   .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
914 };
915
916 VNET_FEATURE_INIT (ip4_source_check_2, static) =
917 {
918   .arc_name = "ip4-unicast",
919   .node_name = "ip4-source-check-via-any",
920   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
921 };
922
923 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
924 {
925   .arc_name = "ip4-unicast",
926   .node_name = "ip4-source-and-port-range-check-rx",
927   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
928 };
929
930 VNET_FEATURE_INIT (ip4_policer_classify, static) =
931 {
932   .arc_name = "ip4-unicast",
933   .node_name = "ip4-policer-classify",
934   .runs_before = VNET_FEATURES ("ipsec4-input-feature"),
935 };
936
937 VNET_FEATURE_INIT (ip4_ipsec, static) =
938 {
939   .arc_name = "ip4-unicast",
940   .node_name = "ipsec4-input-feature",
941   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
942 };
943
944 VNET_FEATURE_INIT (ip4_vpath, static) =
945 {
946   .arc_name = "ip4-unicast",
947   .node_name = "vpath-input-ip4",
948   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
949 };
950
951 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
952 {
953   .arc_name = "ip4-unicast",
954   .node_name = "ip4-vxlan-bypass",
955   .runs_before = VNET_FEATURES ("ip4-lookup"),
956 };
957
958 VNET_FEATURE_INIT (ip4_not_enabled, static) =
959 {
960   .arc_name = "ip4-unicast",
961   .node_name = "ip4-not-enabled",
962   .runs_before = VNET_FEATURES ("ip4-lookup"),
963 };
964
965 VNET_FEATURE_INIT (ip4_lookup, static) =
966 {
967   .arc_name = "ip4-unicast",
968   .node_name = "ip4-lookup",
969   .runs_before = 0,     /* not before any other features */
970 };
971
972 /* Built-in ip4 multicast rx feature path definition */
973 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
974 {
975   .arc_name = "ip4-multicast",
976   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
977   .last_in_arc = "ip4-mfib-forward-lookup",
978   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
979 };
980
981 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
982 {
983   .arc_name = "ip4-multicast",
984   .node_name = "vpath-input-ip4",
985   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
986 };
987
988 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
989 {
990   .arc_name = "ip4-multicast",
991   .node_name = "ip4-not-enabled",
992   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
993 };
994
995 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
996 {
997   .arc_name = "ip4-multicast",
998   .node_name = "ip4-mfib-forward-lookup",
999   .runs_before = 0,     /* last feature */
1000 };
1001
1002 /* Source and port-range check ip4 tx feature path definition */
1003 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1004 {
1005   .arc_name = "ip4-output",
1006   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
1007   .last_in_arc = "interface-output",
1008   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1009 };
1010
1011 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1012 {
1013   .arc_name = "ip4-output",
1014   .node_name = "ip4-source-and-port-range-check-tx",
1015   .runs_before = VNET_FEATURES ("ip4-outacl"),
1016 };
1017
1018 VNET_FEATURE_INIT (ip4_outacl, static) =
1019 {
1020   .arc_name = "ip4-output",
1021   .node_name = "ip4-outacl",
1022   .runs_before = VNET_FEATURES ("ipsec4-output-feature"),
1023 };
1024
1025 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1026 {
1027   .arc_name = "ip4-output",
1028   .node_name = "ipsec4-output-feature",
1029   .runs_before = VNET_FEATURES ("interface-output"),
1030 };
1031
1032 /* Built-in ip4 tx feature path definition */
1033 VNET_FEATURE_INIT (ip4_interface_output, static) =
1034 {
1035   .arc_name = "ip4-output",
1036   .node_name = "interface-output",
1037   .runs_before = 0,     /* not before any other features */
1038 };
1039 /* *INDENT-ON* */
1040
1041 static clib_error_t *
1042 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1043 {
1044   ip4_main_t *im = &ip4_main;
1045
1046   /* Fill in lookup tables with default table (0). */
1047   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1048   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
1049
1050   if (!is_add)
1051     {
1052       ip4_main_t *im4 = &ip4_main;
1053       ip_lookup_main_t *lm4 = &im4->lookup_main;
1054       ip_interface_address_t *ia = 0;
1055       ip4_address_t *address;
1056       vlib_main_t *vm = vlib_get_main ();
1057
1058       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
1059       /* *INDENT-OFF* */
1060       foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
1061       ({
1062         address = ip_interface_address_get_address (lm4, ia);
1063         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
1064       }));
1065       /* *INDENT-ON* */
1066       ip4_mfib_interface_enable_disable (sw_if_index, 0);
1067     }
1068
1069   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
1070                                is_add, 0, 0);
1071
1072   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
1073                                sw_if_index, is_add, 0, 0);
1074
1075   return /* no error */ 0;
1076 }
1077
1078 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1079
1080 /* Global IP4 main. */
1081 #ifndef CLIB_MARCH_VARIANT
1082 ip4_main_t ip4_main;
1083 #endif /* CLIB_MARCH_VARIANT */
1084
1085 static clib_error_t *
1086 ip4_lookup_init (vlib_main_t * vm)
1087 {
1088   ip4_main_t *im = &ip4_main;
1089   clib_error_t *error;
1090   uword i;
1091
1092   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1093     return error;
1094   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
1095     return (error);
1096   if ((error = vlib_call_init_function (vm, fib_module_init)))
1097     return error;
1098   if ((error = vlib_call_init_function (vm, mfib_module_init)))
1099     return error;
1100
1101   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1102     {
1103       u32 m;
1104
1105       if (i < 32)
1106         m = pow2_mask (i) << (32 - i);
1107       else
1108         m = ~0;
1109       im->fib_masks[i] = clib_host_to_net_u32 (m);
1110     }
1111
1112   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1113
1114   /* Create FIB with index 0 and table id of 0. */
1115   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1116                                      FIB_SOURCE_DEFAULT_ROUTE);
1117   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1118                                       MFIB_SOURCE_DEFAULT_ROUTE);
1119
1120   {
1121     pg_node_t *pn;
1122     pn = pg_get_node (ip4_lookup_node.index);
1123     pn->unformat_edit = unformat_pg_ip4_header;
1124   }
1125
1126   {
1127     ethernet_arp_header_t h;
1128
1129     clib_memset (&h, 0, sizeof (h));
1130
1131 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1132 #define _8(f,v) h.f = v;
1133     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1134     _16 (l3_type, ETHERNET_TYPE_IP4);
1135     _8 (n_l2_address_bytes, 6);
1136     _8 (n_l3_address_bytes, 4);
1137     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1138 #undef _16
1139 #undef _8
1140
1141     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1142                                /* data */ &h,
1143                                sizeof (h),
1144                                /* alloc chunk size */ 8,
1145                                "ip4 arp");
1146   }
1147
1148   return error;
1149 }
1150
1151 VLIB_INIT_FUNCTION (ip4_lookup_init);
1152
1153 typedef struct
1154 {
1155   /* Adjacency taken. */
1156   u32 dpo_index;
1157   u32 flow_hash;
1158   u32 fib_index;
1159
1160   /* Packet data, possibly *after* rewrite. */
1161   u8 packet_data[64 - 1 * sizeof (u32)];
1162 }
1163 ip4_forward_next_trace_t;
1164
1165 #ifndef CLIB_MARCH_VARIANT
1166 u8 *
1167 format_ip4_forward_next_trace (u8 * s, va_list * args)
1168 {
1169   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1170   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1171   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1172   u32 indent = format_get_indent (s);
1173   s = format (s, "%U%U",
1174               format_white_space, indent,
1175               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1176   return s;
1177 }
1178 #endif
1179
1180 static u8 *
1181 format_ip4_lookup_trace (u8 * s, va_list * args)
1182 {
1183   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1184   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1185   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1186   u32 indent = format_get_indent (s);
1187
1188   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1189               t->fib_index, t->dpo_index, t->flow_hash);
1190   s = format (s, "\n%U%U",
1191               format_white_space, indent,
1192               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1193   return s;
1194 }
1195
1196 static u8 *
1197 format_ip4_rewrite_trace (u8 * s, va_list * args)
1198 {
1199   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1200   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1201   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1202   u32 indent = format_get_indent (s);
1203
1204   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1205               t->fib_index, t->dpo_index, format_ip_adjacency,
1206               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1207   s = format (s, "\n%U%U",
1208               format_white_space, indent,
1209               format_ip_adjacency_packet_data,
1210               t->packet_data, sizeof (t->packet_data));
1211   return s;
1212 }
1213
1214 #ifndef CLIB_MARCH_VARIANT
1215 /* Common trace function for all ip4-forward next nodes. */
1216 void
1217 ip4_forward_next_trace (vlib_main_t * vm,
1218                         vlib_node_runtime_t * node,
1219                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1220 {
1221   u32 *from, n_left;
1222   ip4_main_t *im = &ip4_main;
1223
1224   n_left = frame->n_vectors;
1225   from = vlib_frame_vector_args (frame);
1226
1227   while (n_left >= 4)
1228     {
1229       u32 bi0, bi1;
1230       vlib_buffer_t *b0, *b1;
1231       ip4_forward_next_trace_t *t0, *t1;
1232
1233       /* Prefetch next iteration. */
1234       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1235       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1236
1237       bi0 = from[0];
1238       bi1 = from[1];
1239
1240       b0 = vlib_get_buffer (vm, bi0);
1241       b1 = vlib_get_buffer (vm, bi1);
1242
1243       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1244         {
1245           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1246           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1247           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1248           t0->fib_index =
1249             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1250              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1251             vec_elt (im->fib_index_by_sw_if_index,
1252                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1253
1254           clib_memcpy_fast (t0->packet_data,
1255                             vlib_buffer_get_current (b0),
1256                             sizeof (t0->packet_data));
1257         }
1258       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1259         {
1260           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1261           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1262           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1263           t1->fib_index =
1264             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1265              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1266             vec_elt (im->fib_index_by_sw_if_index,
1267                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1268           clib_memcpy_fast (t1->packet_data, vlib_buffer_get_current (b1),
1269                             sizeof (t1->packet_data));
1270         }
1271       from += 2;
1272       n_left -= 2;
1273     }
1274
1275   while (n_left >= 1)
1276     {
1277       u32 bi0;
1278       vlib_buffer_t *b0;
1279       ip4_forward_next_trace_t *t0;
1280
1281       bi0 = from[0];
1282
1283       b0 = vlib_get_buffer (vm, bi0);
1284
1285       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1286         {
1287           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1288           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1289           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1290           t0->fib_index =
1291             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1292              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1293             vec_elt (im->fib_index_by_sw_if_index,
1294                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1295           clib_memcpy_fast (t0->packet_data, vlib_buffer_get_current (b0),
1296                             sizeof (t0->packet_data));
1297         }
1298       from += 1;
1299       n_left -= 1;
1300     }
1301 }
1302
1303 /* Compute TCP/UDP/ICMP4 checksum in software. */
1304 u16
1305 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1306                               ip4_header_t * ip0)
1307 {
1308   ip_csum_t sum0;
1309   u32 ip_header_length, payload_length_host_byte_order;
1310
1311   /* Initialize checksum with ip header. */
1312   ip_header_length = ip4_header_bytes (ip0);
1313   payload_length_host_byte_order =
1314     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1315   sum0 =
1316     clib_host_to_net_u32 (payload_length_host_byte_order +
1317                           (ip0->protocol << 16));
1318
1319   if (BITS (uword) == 32)
1320     {
1321       sum0 =
1322         ip_csum_with_carry (sum0,
1323                             clib_mem_unaligned (&ip0->src_address, u32));
1324       sum0 =
1325         ip_csum_with_carry (sum0,
1326                             clib_mem_unaligned (&ip0->dst_address, u32));
1327     }
1328   else
1329     sum0 =
1330       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1331
1332   return ip_calculate_l4_checksum (vm, p0, sum0,
1333                                    payload_length_host_byte_order, (u8 *) ip0,
1334                                    ip_header_length, NULL);
1335 }
1336
1337 u32
1338 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1339 {
1340   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1341   udp_header_t *udp0;
1342   u16 sum16;
1343
1344   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1345           || ip0->protocol == IP_PROTOCOL_UDP);
1346
1347   udp0 = (void *) (ip0 + 1);
1348   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1349     {
1350       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1351                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1352       return p0->flags;
1353     }
1354
1355   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1356
1357   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1358                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1359
1360   return p0->flags;
1361 }
1362 #endif
1363
1364 /* *INDENT-OFF* */
1365 VNET_FEATURE_ARC_INIT (ip4_local) =
1366 {
1367   .arc_name  = "ip4-local",
1368   .start_nodes = VNET_FEATURES ("ip4-local"),
1369   .last_in_arc = "ip4-local-end-of-arc",
1370 };
1371 /* *INDENT-ON* */
1372
1373 static inline void
1374 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1375                             ip4_header_t * ip, u8 is_udp, u8 * error,
1376                             u8 * good_tcp_udp)
1377 {
1378   u32 flags0;
1379   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1380   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1381   if (is_udp)
1382     {
1383       udp_header_t *udp;
1384       u32 ip_len, udp_len;
1385       i32 len_diff;
1386       udp = ip4_next_header (ip);
1387       /* Verify UDP length. */
1388       ip_len = clib_net_to_host_u16 (ip->length);
1389       udp_len = clib_net_to_host_u16 (udp->length);
1390
1391       len_diff = ip_len - udp_len;
1392       *good_tcp_udp &= len_diff >= 0;
1393       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1394     }
1395 }
1396
1397 #define ip4_local_csum_is_offloaded(_b)                                 \
1398     _b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM                         \
1399         || _b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM
1400
1401 #define ip4_local_need_csum_check(is_tcp_udp, _b)                       \
1402     (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED     \
1403         || ip4_local_csum_is_offloaded (_b)))
1404
1405 #define ip4_local_csum_is_valid(_b)                                     \
1406     (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT                      \
1407         || (ip4_local_csum_is_offloaded (_b))) != 0
1408
1409 static inline void
1410 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1411                          ip4_header_t * ih, u8 * error)
1412 {
1413   u8 is_udp, is_tcp_udp, good_tcp_udp;
1414
1415   is_udp = ih->protocol == IP_PROTOCOL_UDP;
1416   is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1417
1418   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1419     ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1420   else
1421     good_tcp_udp = ip4_local_csum_is_valid (b);
1422
1423   ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1424   *error = (is_tcp_udp && !good_tcp_udp
1425             ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1426 }
1427
1428 static inline void
1429 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1430                             ip4_header_t ** ih, u8 * error)
1431 {
1432   u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1433
1434   is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1435   is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1436
1437   is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1438   is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1439
1440   good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1441   good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1442
1443   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1444                      || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1445     {
1446       if (is_tcp_udp[0])
1447         ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1448                                     &good_tcp_udp[0]);
1449       if (is_tcp_udp[1])
1450         ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1451                                     &good_tcp_udp[1]);
1452     }
1453
1454   error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1455               IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1456   error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1457               IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1458 }
1459
1460 static inline void
1461 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1462                               vlib_buffer_t * b, u16 * next, u8 error,
1463                               u8 head_of_feature_arc)
1464 {
1465   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1466   u32 next_index;
1467
1468   *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1469   b->error = error ? error_node->errors[error] : 0;
1470   if (head_of_feature_arc)
1471     {
1472       next_index = *next;
1473       if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1474         {
1475           vnet_feature_arc_start (arc_index,
1476                                   vnet_buffer (b)->sw_if_index[VLIB_RX],
1477                                   &next_index, b);
1478           *next = next_index;
1479         }
1480     }
1481 }
1482
1483 typedef struct
1484 {
1485   ip4_address_t src;
1486   u32 lbi;
1487   u8 error;
1488   u8 first;
1489 } ip4_local_last_check_t;
1490
1491 static inline void
1492 ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
1493                      ip4_local_last_check_t * last_check, u8 * error0)
1494 {
1495   ip4_fib_mtrie_leaf_t leaf0;
1496   ip4_fib_mtrie_t *mtrie0;
1497   const dpo_id_t *dpo0;
1498   load_balance_t *lb0;
1499   u32 lbi0;
1500
1501   vnet_buffer (b)->ip.fib_index =
1502     vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1503     vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1504
1505   /*
1506    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1507    *  adjacency for the destination address (the local interface address).
1508    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1509    *  adjacency for the source address (the remote sender's address)
1510    */
1511   if (PREDICT_TRUE (last_check->src.as_u32 != ip0->src_address.as_u32) ||
1512       last_check->first)
1513     {
1514       mtrie0 = &ip4_fib_get (vnet_buffer (b)->ip.fib_index)->mtrie;
1515       leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1516       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1517       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1518       lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1519
1520       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1521         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1522       vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1523
1524       lb0 = load_balance_get (lbi0);
1525       dpo0 = load_balance_get_bucket_i (lb0, 0);
1526
1527       /*
1528        * Must have a route to source otherwise we drop the packet.
1529        * ip4 broadcasts are accepted, e.g. to make dhcp client work
1530        *
1531        * The checks are:
1532        *  - the source is a recieve => it's from us => bogus, do this
1533        *    first since it sets a different error code.
1534        *  - uRPF check for any route to source - accept if passes.
1535        *  - allow packets destined to the broadcast address from unknown sources
1536        */
1537
1538       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1539                   && dpo0->dpoi_type == DPO_RECEIVE) ?
1540                  IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1541       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1542                   && !fib_urpf_check_size (lb0->lb_urpf)
1543                   && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1544                  IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1545
1546       last_check->src.as_u32 = ip0->src_address.as_u32;
1547       last_check->lbi = lbi0;
1548       last_check->error = *error0;
1549       last_check->first = 0;
1550     }
1551   else
1552     {
1553       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1554         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1555       vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1556       *error0 = last_check->error;
1557     }
1558 }
1559
1560 static inline void
1561 ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
1562                         ip4_local_last_check_t * last_check, u8 * error)
1563 {
1564   ip4_fib_mtrie_leaf_t leaf[2];
1565   ip4_fib_mtrie_t *mtrie[2];
1566   const dpo_id_t *dpo[2];
1567   load_balance_t *lb[2];
1568   u32 not_last_hit;
1569   u32 lbi[2];
1570
1571   not_last_hit = last_check->first;
1572   not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1573   not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1574
1575   vnet_buffer (b[0])->ip.fib_index =
1576     vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1577     vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1578     vnet_buffer (b[0])->ip.fib_index;
1579
1580   vnet_buffer (b[1])->ip.fib_index =
1581     vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1582     vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1583     vnet_buffer (b[1])->ip.fib_index;
1584
1585   /*
1586    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1587    *  adjacency for the destination address (the local interface address).
1588    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1589    *  adjacency for the source address (the remote sender's address)
1590    */
1591   if (PREDICT_TRUE (not_last_hit))
1592     {
1593       mtrie[0] = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie;
1594       mtrie[1] = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie;
1595
1596       leaf[0] = ip4_fib_mtrie_lookup_step_one (mtrie[0], &ip[0]->src_address);
1597       leaf[1] = ip4_fib_mtrie_lookup_step_one (mtrie[1], &ip[1]->src_address);
1598
1599       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1600                                            &ip[0]->src_address, 2);
1601       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1602                                            &ip[1]->src_address, 2);
1603
1604       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1605                                            &ip[0]->src_address, 3);
1606       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1607                                            &ip[1]->src_address, 3);
1608
1609       lbi[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf[0]);
1610       lbi[1] = ip4_fib_mtrie_leaf_get_adj_index (leaf[1]);
1611
1612       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1613         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1614       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1615
1616       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1617         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1618       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1619
1620       lb[0] = load_balance_get (lbi[0]);
1621       lb[1] = load_balance_get (lbi[1]);
1622
1623       dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1624       dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1625
1626       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1627                    dpo[0]->dpoi_type == DPO_RECEIVE) ?
1628                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1629       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1630                    !fib_urpf_check_size (lb[0]->lb_urpf) &&
1631                    ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1632                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1633
1634       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1635                    dpo[1]->dpoi_type == DPO_RECEIVE) ?
1636                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1637       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1638                    !fib_urpf_check_size (lb[1]->lb_urpf) &&
1639                    ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1640                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1641
1642       last_check->src.as_u32 = ip[1]->src_address.as_u32;
1643       last_check->lbi = lbi[1];
1644       last_check->error = error[1];
1645       last_check->first = 0;
1646     }
1647   else
1648     {
1649       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1650         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1651       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1652
1653       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1654         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1655       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1656
1657       error[0] = last_check->error;
1658       error[1] = last_check->error;
1659     }
1660 }
1661
1662 enum ip_local_packet_type_e
1663 {
1664   IP_LOCAL_PACKET_TYPE_L4,
1665   IP_LOCAL_PACKET_TYPE_NAT,
1666   IP_LOCAL_PACKET_TYPE_FRAG,
1667 };
1668
1669 /**
1670  * Determine packet type and next node.
1671  *
1672  * The expectation is that all packets that are not L4 will skip
1673  * checksums and source checks.
1674  */
1675 always_inline u8
1676 ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
1677 {
1678   ip_lookup_main_t *lm = &ip4_main.lookup_main;
1679
1680   if (PREDICT_FALSE (ip4_is_fragment (ip)))
1681     {
1682       *next = IP_LOCAL_NEXT_REASSEMBLY;
1683       return IP_LOCAL_PACKET_TYPE_FRAG;
1684     }
1685   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_IS_NATED))
1686     {
1687       *next = lm->local_next_by_ip_protocol[ip->protocol];
1688       return IP_LOCAL_PACKET_TYPE_NAT;
1689     }
1690
1691   *next = lm->local_next_by_ip_protocol[ip->protocol];
1692   return IP_LOCAL_PACKET_TYPE_L4;
1693 }
1694
1695 static inline uword
1696 ip4_local_inline (vlib_main_t * vm,
1697                   vlib_node_runtime_t * node,
1698                   vlib_frame_t * frame, int head_of_feature_arc)
1699 {
1700   u32 *from, n_left_from;
1701   vlib_node_runtime_t *error_node =
1702     vlib_node_get_runtime (vm, ip4_local_node.index);
1703   u16 nexts[VLIB_FRAME_SIZE], *next;
1704   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1705   ip4_header_t *ip[2];
1706   u8 error[2], pt[2];
1707
1708   ip4_local_last_check_t last_check = {
1709     /*
1710      * 0.0.0.0 can appear as the source address of an IP packet,
1711      * as can any other address, hence the need to use the 'first'
1712      * member to make sure the .lbi is initialised for the first
1713      * packet.
1714      */
1715     .src = {.as_u32 = 0},
1716     .lbi = ~0,
1717     .error = IP4_ERROR_UNKNOWN_PROTOCOL,
1718     .first = 1,
1719   };
1720
1721   from = vlib_frame_vector_args (frame);
1722   n_left_from = frame->n_vectors;
1723
1724   if (node->flags & VLIB_NODE_FLAG_TRACE)
1725     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1726
1727   vlib_get_buffers (vm, from, bufs, n_left_from);
1728   b = bufs;
1729   next = nexts;
1730
1731   while (n_left_from >= 6)
1732     {
1733       u8 not_batch = 0;
1734
1735       /* Prefetch next iteration. */
1736       {
1737         vlib_prefetch_buffer_header (b[4], LOAD);
1738         vlib_prefetch_buffer_header (b[5], LOAD);
1739
1740         CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1741         CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1742       }
1743
1744       error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1745
1746       ip[0] = vlib_buffer_get_current (b[0]);
1747       ip[1] = vlib_buffer_get_current (b[1]);
1748
1749       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1750       vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1751
1752       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1753       pt[1] = ip4_local_classify (b[1], ip[1], &next[1]);
1754
1755       not_batch = pt[0] ^ pt[1];
1756
1757       if (head_of_feature_arc == 0 || (pt[0] && not_batch == 0))
1758         goto skip_checks;
1759
1760       if (PREDICT_TRUE (not_batch == 0))
1761         {
1762           ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1763           ip4_local_check_src_x2 (b, ip, &last_check, error);
1764         }
1765       else
1766         {
1767           if (!pt[0])
1768             {
1769               ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1770               ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1771             }
1772           if (!pt[1])
1773             {
1774               ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1775               ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
1776             }
1777         }
1778
1779     skip_checks:
1780
1781       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1782                                     head_of_feature_arc);
1783       ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1784                                     head_of_feature_arc);
1785
1786       b += 2;
1787       next += 2;
1788       n_left_from -= 2;
1789     }
1790
1791   while (n_left_from > 0)
1792     {
1793       error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1794
1795       ip[0] = vlib_buffer_get_current (b[0]);
1796       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1797       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1798
1799       if (head_of_feature_arc == 0 || pt[0])
1800         goto skip_check;
1801
1802       ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1803       ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1804
1805     skip_check:
1806
1807       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1808                                     head_of_feature_arc);
1809
1810       b += 1;
1811       next += 1;
1812       n_left_from -= 1;
1813     }
1814
1815   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1816   return frame->n_vectors;
1817 }
1818
1819 VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1820                                vlib_frame_t * frame)
1821 {
1822   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1823 }
1824
1825 /* *INDENT-OFF* */
1826 VLIB_REGISTER_NODE (ip4_local_node) =
1827 {
1828   .name = "ip4-local",
1829   .vector_size = sizeof (u32),
1830   .format_trace = format_ip4_forward_next_trace,
1831   .n_errors = IP4_N_ERROR,
1832   .error_strings = ip4_error_strings,
1833   .n_next_nodes = IP_LOCAL_N_NEXT,
1834   .next_nodes =
1835   {
1836     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1837     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1838     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1839     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1840     [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-full-reassembly",
1841   },
1842 };
1843 /* *INDENT-ON* */
1844
1845
1846 VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
1847                                           vlib_node_runtime_t * node,
1848                                           vlib_frame_t * frame)
1849 {
1850   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1851 }
1852
1853 /* *INDENT-OFF* */
1854 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
1855   .name = "ip4-local-end-of-arc",
1856   .vector_size = sizeof (u32),
1857
1858   .format_trace = format_ip4_forward_next_trace,
1859   .sibling_of = "ip4-local",
1860 };
1861
1862 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1863   .arc_name = "ip4-local",
1864   .node_name = "ip4-local-end-of-arc",
1865   .runs_before = 0, /* not before any other features */
1866 };
1867 /* *INDENT-ON* */
1868
1869 #ifndef CLIB_MARCH_VARIANT
1870 void
1871 ip4_register_protocol (u32 protocol, u32 node_index)
1872 {
1873   vlib_main_t *vm = vlib_get_main ();
1874   ip4_main_t *im = &ip4_main;
1875   ip_lookup_main_t *lm = &im->lookup_main;
1876
1877   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1878   lm->local_next_by_ip_protocol[protocol] =
1879     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1880 }
1881
1882 void
1883 ip4_unregister_protocol (u32 protocol)
1884 {
1885   ip4_main_t *im = &ip4_main;
1886   ip_lookup_main_t *lm = &im->lookup_main;
1887
1888   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1889   lm->local_next_by_ip_protocol[protocol] = IP_LOCAL_NEXT_PUNT;
1890 }
1891 #endif
1892
1893 static clib_error_t *
1894 show_ip_local_command_fn (vlib_main_t * vm,
1895                           unformat_input_t * input, vlib_cli_command_t * cmd)
1896 {
1897   ip4_main_t *im = &ip4_main;
1898   ip_lookup_main_t *lm = &im->lookup_main;
1899   int i;
1900
1901   vlib_cli_output (vm, "Protocols handled by ip4_local");
1902   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1903     {
1904       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1905         {
1906           u32 node_index = vlib_get_node (vm,
1907                                           ip4_local_node.index)->
1908             next_nodes[lm->local_next_by_ip_protocol[i]];
1909           vlib_cli_output (vm, "%U: %U", format_ip_protocol, i,
1910                            format_vlib_node_name, vm, node_index);
1911         }
1912     }
1913   return 0;
1914 }
1915
1916
1917
1918 /*?
1919  * Display the set of protocols handled by the local IPv4 stack.
1920  *
1921  * @cliexpar
1922  * Example of how to display local protocol table:
1923  * @cliexstart{show ip local}
1924  * Protocols handled by ip4_local
1925  * 1
1926  * 17
1927  * 47
1928  * @cliexend
1929 ?*/
1930 /* *INDENT-OFF* */
1931 VLIB_CLI_COMMAND (show_ip_local, static) =
1932 {
1933   .path = "show ip local",
1934   .function = show_ip_local_command_fn,
1935   .short_help = "show ip local",
1936 };
1937 /* *INDENT-ON* */
1938
1939 typedef enum
1940 {
1941   IP4_REWRITE_NEXT_DROP,
1942   IP4_REWRITE_NEXT_ICMP_ERROR,
1943   IP4_REWRITE_NEXT_FRAGMENT,
1944   IP4_REWRITE_N_NEXT            /* Last */
1945 } ip4_rewrite_next_t;
1946
1947 /**
1948  * This bits of an IPv4 address to mask to construct a multicast
1949  * MAC address
1950  */
1951 #if CLIB_ARCH_IS_BIG_ENDIAN
1952 #define IP4_MCAST_ADDR_MASK 0x007fffff
1953 #else
1954 #define IP4_MCAST_ADDR_MASK 0xffff7f00
1955 #endif
1956
1957 always_inline void
1958 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
1959                u16 adj_packet_bytes, bool df, u16 * next,
1960                u8 is_midchain, u32 * error)
1961 {
1962   if (packet_len > adj_packet_bytes)
1963     {
1964       *error = IP4_ERROR_MTU_EXCEEDED;
1965       if (df)
1966         {
1967           icmp4_error_set_vnet_buffer
1968             (b, ICMP4_destination_unreachable,
1969              ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
1970              adj_packet_bytes);
1971           *next = IP4_REWRITE_NEXT_ICMP_ERROR;
1972         }
1973       else
1974         {
1975           /* IP fragmentation */
1976           ip_frag_set_vnet_buffer (b, adj_packet_bytes,
1977                                    (is_midchain ?
1978                                     IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN :
1979                                     IP_FRAG_NEXT_IP_REWRITE), 0);
1980           *next = IP4_REWRITE_NEXT_FRAGMENT;
1981         }
1982     }
1983 }
1984
1985 /* increment TTL & update checksum.
1986    Works either endian, so no need for byte swap. */
1987 static_always_inline void
1988 ip4_ttl_inc (vlib_buffer_t * b, ip4_header_t * ip)
1989 {
1990   i32 ttl;
1991   u32 checksum;
1992   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
1993     {
1994       b->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
1995       return;
1996     }
1997
1998   ttl = ip->ttl;
1999
2000   checksum = ip->checksum - clib_host_to_net_u16 (0x0100);
2001   checksum += checksum >= 0xffff;
2002
2003   ip->checksum = checksum;
2004   ttl += 1;
2005   ip->ttl = ttl;
2006
2007   ASSERT (ip->checksum == ip4_header_checksum (ip));
2008 }
2009
2010 /* Decrement TTL & update checksum.
2011    Works either endian, so no need for byte swap. */
2012 static_always_inline void
2013 ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
2014                             u32 * error)
2015 {
2016   i32 ttl;
2017   u32 checksum;
2018   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2019     {
2020       b->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2021       return;
2022     }
2023
2024   ttl = ip->ttl;
2025
2026   /* Input node should have reject packets with ttl 0. */
2027   ASSERT (ip->ttl > 0);
2028
2029   checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
2030   checksum += checksum >= 0xffff;
2031
2032   ip->checksum = checksum;
2033   ttl -= 1;
2034   ip->ttl = ttl;
2035
2036   /*
2037    * If the ttl drops below 1 when forwarding, generate
2038    * an ICMP response.
2039    */
2040   if (PREDICT_FALSE (ttl <= 0))
2041     {
2042       *error = IP4_ERROR_TIME_EXPIRED;
2043       vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2044       icmp4_error_set_vnet_buffer (b, ICMP4_time_exceeded,
2045                                    ICMP4_time_exceeded_ttl_exceeded_in_transit,
2046                                    0);
2047       *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2048     }
2049
2050   /* Verify checksum. */
2051   ASSERT ((ip->checksum == ip4_header_checksum (ip)) ||
2052           (b->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2053 }
2054
2055
2056 always_inline uword
2057 ip4_rewrite_inline_with_gso (vlib_main_t * vm,
2058                              vlib_node_runtime_t * node,
2059                              vlib_frame_t * frame,
2060                              int do_counters, int is_midchain, int is_mcast)
2061 {
2062   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2063   u32 *from = vlib_frame_vector_args (frame);
2064   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2065   u16 nexts[VLIB_FRAME_SIZE], *next;
2066   u32 n_left_from;
2067   vlib_node_runtime_t *error_node =
2068     vlib_node_get_runtime (vm, ip4_input_node.index);
2069
2070   n_left_from = frame->n_vectors;
2071   u32 thread_index = vm->thread_index;
2072
2073   vlib_get_buffers (vm, from, bufs, n_left_from);
2074   clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
2075
2076 #if (CLIB_N_PREFETCHES >= 8)
2077   if (n_left_from >= 6)
2078     {
2079       int i;
2080       for (i = 2; i < 6; i++)
2081         vlib_prefetch_buffer_header (bufs[i], LOAD);
2082     }
2083
2084   next = nexts;
2085   b = bufs;
2086   while (n_left_from >= 8)
2087     {
2088       const ip_adjacency_t *adj0, *adj1;
2089       ip4_header_t *ip0, *ip1;
2090       u32 rw_len0, error0, adj_index0;
2091       u32 rw_len1, error1, adj_index1;
2092       u32 tx_sw_if_index0, tx_sw_if_index1;
2093       u8 *p;
2094
2095       vlib_prefetch_buffer_header (b[6], LOAD);
2096       vlib_prefetch_buffer_header (b[7], LOAD);
2097
2098       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2099       adj_index1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
2100
2101       /*
2102        * pre-fetch the per-adjacency counters
2103        */
2104       if (do_counters)
2105         {
2106           vlib_prefetch_combined_counter (&adjacency_counters,
2107                                           thread_index, adj_index0);
2108           vlib_prefetch_combined_counter (&adjacency_counters,
2109                                           thread_index, adj_index1);
2110         }
2111
2112       ip0 = vlib_buffer_get_current (b[0]);
2113       ip1 = vlib_buffer_get_current (b[1]);
2114
2115       error0 = error1 = IP4_ERROR_NONE;
2116
2117       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2118       ip4_ttl_and_checksum_check (b[1], ip1, next + 1, &error1);
2119
2120       /* Rewrite packet header and updates lengths. */
2121       adj0 = adj_get (adj_index0);
2122       adj1 = adj_get (adj_index1);
2123
2124       /* Worth pipelining. No guarantee that adj0,1 are hot... */
2125       rw_len0 = adj0[0].rewrite_header.data_bytes;
2126       rw_len1 = adj1[0].rewrite_header.data_bytes;
2127       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2128       vnet_buffer (b[1])->ip.save_rewrite_length = rw_len1;
2129
2130       p = vlib_buffer_get_current (b[2]);
2131       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2132       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2133
2134       p = vlib_buffer_get_current (b[3]);
2135       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2136       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2137
2138       /* Check MTU of outgoing interface. */
2139       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2140       u16 ip1_len = clib_net_to_host_u16 (ip1->length);
2141
2142       if (b[0]->flags & VNET_BUFFER_F_GSO)
2143         ip0_len = gso_mtu_sz (b[0]);
2144       if (b[1]->flags & VNET_BUFFER_F_GSO)
2145         ip1_len = gso_mtu_sz (b[1]);
2146
2147       ip4_mtu_check (b[0], ip0_len,
2148                      adj0[0].rewrite_header.max_l3_packet_bytes,
2149                      ip0->flags_and_fragment_offset &
2150                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2151                      next + 0, is_midchain, &error0);
2152       ip4_mtu_check (b[1], ip1_len,
2153                      adj1[0].rewrite_header.max_l3_packet_bytes,
2154                      ip1->flags_and_fragment_offset &
2155                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2156                      next + 1, is_midchain, &error1);
2157
2158       if (is_mcast)
2159         {
2160           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2161                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2162                     IP4_ERROR_SAME_INTERFACE : error0);
2163           error1 = ((adj1[0].rewrite_header.sw_if_index ==
2164                      vnet_buffer (b[1])->sw_if_index[VLIB_RX]) ?
2165                     IP4_ERROR_SAME_INTERFACE : error1);
2166         }
2167
2168       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2169        * to see the IP header */
2170       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2171         {
2172           u32 next_index = adj0[0].rewrite_header.next_index;
2173           vlib_buffer_advance (b[0], -(word) rw_len0);
2174
2175           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2176           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2177
2178           if (PREDICT_FALSE
2179               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2180             vnet_feature_arc_start (lm->output_feature_arc_index,
2181                                     tx_sw_if_index0, &next_index, b[0]);
2182           next[0] = next_index;
2183           if (is_midchain)
2184             calc_checksums (vm, b[0]);
2185         }
2186       else
2187         {
2188           b[0]->error = error_node->errors[error0];
2189           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2190             ip4_ttl_inc (b[0], ip0);
2191         }
2192       if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2193         {
2194           u32 next_index = adj1[0].rewrite_header.next_index;
2195           vlib_buffer_advance (b[1], -(word) rw_len1);
2196
2197           tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2198           vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2199
2200           if (PREDICT_FALSE
2201               (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2202             vnet_feature_arc_start (lm->output_feature_arc_index,
2203                                     tx_sw_if_index1, &next_index, b[1]);
2204           next[1] = next_index;
2205           if (is_midchain)
2206             calc_checksums (vm, b[1]);
2207         }
2208       else
2209         {
2210           b[1]->error = error_node->errors[error1];
2211           if (error1 == IP4_ERROR_MTU_EXCEEDED)
2212             ip4_ttl_inc (b[1], ip1);
2213         }
2214
2215       /* Guess we are only writing on simple Ethernet header. */
2216       vnet_rewrite_two_headers (adj0[0], adj1[0],
2217                                 ip0, ip1, sizeof (ethernet_header_t));
2218
2219       if (do_counters)
2220         {
2221           if (error0 == IP4_ERROR_NONE)
2222             vlib_increment_combined_counter
2223               (&adjacency_counters,
2224                thread_index,
2225                adj_index0, 1,
2226                vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2227
2228           if (error1 == IP4_ERROR_NONE)
2229             vlib_increment_combined_counter
2230               (&adjacency_counters,
2231                thread_index,
2232                adj_index1, 1,
2233                vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
2234         }
2235
2236       if (is_midchain)
2237         {
2238           if (error0 == IP4_ERROR_NONE && adj0->sub_type.midchain.fixup_func)
2239             adj0->sub_type.midchain.fixup_func
2240               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2241           if (error1 == IP4_ERROR_NONE && adj1->sub_type.midchain.fixup_func)
2242             adj1->sub_type.midchain.fixup_func
2243               (vm, adj1, b[1], adj1->sub_type.midchain.fixup_data);
2244         }
2245
2246       if (is_mcast)
2247         {
2248           /* copy bytes from the IP address into the MAC rewrite */
2249           if (error0 == IP4_ERROR_NONE)
2250             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2251                                         adj0->rewrite_header.dst_mcast_offset,
2252                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2253           if (error1 == IP4_ERROR_NONE)
2254             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2255                                         adj1->rewrite_header.dst_mcast_offset,
2256                                         &ip1->dst_address.as_u32, (u8 *) ip1);
2257         }
2258
2259       next += 2;
2260       b += 2;
2261       n_left_from -= 2;
2262     }
2263 #elif (CLIB_N_PREFETCHES >= 4)
2264   next = nexts;
2265   b = bufs;
2266   while (n_left_from >= 1)
2267     {
2268       ip_adjacency_t *adj0;
2269       ip4_header_t *ip0;
2270       u32 rw_len0, error0, adj_index0;
2271       u32 tx_sw_if_index0;
2272       u8 *p;
2273
2274       /* Prefetch next iteration */
2275       if (PREDICT_TRUE (n_left_from >= 4))
2276         {
2277           ip_adjacency_t *adj2;
2278           u32 adj_index2;
2279
2280           vlib_prefetch_buffer_header (b[3], LOAD);
2281           vlib_prefetch_buffer_data (b[2], LOAD);
2282
2283           /* Prefetch adj->rewrite_header */
2284           adj_index2 = vnet_buffer (b[2])->ip.adj_index[VLIB_TX];
2285           adj2 = adj_get (adj_index2);
2286           p = (u8 *) adj2;
2287           CLIB_PREFETCH (p + CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES,
2288                          LOAD);
2289         }
2290
2291       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2292
2293       /*
2294        * Prefetch the per-adjacency counters
2295        */
2296       if (do_counters)
2297         {
2298           vlib_prefetch_combined_counter (&adjacency_counters,
2299                                           thread_index, adj_index0);
2300         }
2301
2302       ip0 = vlib_buffer_get_current (b[0]);
2303
2304       error0 = IP4_ERROR_NONE;
2305
2306       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2307
2308       /* Rewrite packet header and updates lengths. */
2309       adj0 = adj_get (adj_index0);
2310
2311       /* Rewrite header was prefetched. */
2312       rw_len0 = adj0[0].rewrite_header.data_bytes;
2313       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2314
2315       /* Check MTU of outgoing interface. */
2316       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2317
2318       if (b[0]->flags & VNET_BUFFER_F_GSO)
2319         ip0_len = gso_mtu_sz (b[0]);
2320
2321       ip4_mtu_check (b[0], ip0_len,
2322                      adj0[0].rewrite_header.max_l3_packet_bytes,
2323                      ip0->flags_and_fragment_offset &
2324                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2325                      next + 0, is_midchain, &error0);
2326
2327       if (is_mcast)
2328         {
2329           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2330                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2331                     IP4_ERROR_SAME_INTERFACE : error0);
2332         }
2333
2334       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2335        * to see the IP header */
2336       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2337         {
2338           u32 next_index = adj0[0].rewrite_header.next_index;
2339           vlib_buffer_advance (b[0], -(word) rw_len0);
2340           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2341           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2342
2343           if (PREDICT_FALSE
2344               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2345             vnet_feature_arc_start (lm->output_feature_arc_index,
2346                                     tx_sw_if_index0, &next_index, b[0]);
2347           next[0] = next_index;
2348
2349           if (is_midchain)
2350             calc_checksums (vm, b[0]);
2351
2352           /* Guess we are only writing on simple Ethernet header. */
2353           vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2354
2355           /*
2356            * Bump the per-adjacency counters
2357            */
2358           if (do_counters)
2359             vlib_increment_combined_counter
2360               (&adjacency_counters,
2361                thread_index,
2362                adj_index0, 1, vlib_buffer_length_in_chain (vm,
2363                                                            b[0]) + rw_len0);
2364
2365           if (is_midchain && adj0->sub_type.midchain.fixup_func)
2366             adj0->sub_type.midchain.fixup_func
2367               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2368
2369           if (is_mcast)
2370             /* copy bytes from the IP address into the MAC rewrite */
2371             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2372                                         adj0->rewrite_header.dst_mcast_offset,
2373                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2374         }
2375       else
2376         {
2377           b[0]->error = error_node->errors[error0];
2378           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2379             ip4_ttl_inc (b[0], ip0);
2380         }
2381
2382       next += 1;
2383       b += 1;
2384       n_left_from -= 1;
2385     }
2386 #endif
2387
2388   while (n_left_from > 0)
2389     {
2390       ip_adjacency_t *adj0;
2391       ip4_header_t *ip0;
2392       u32 rw_len0, adj_index0, error0;
2393       u32 tx_sw_if_index0;
2394
2395       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2396
2397       adj0 = adj_get (adj_index0);
2398
2399       if (do_counters)
2400         vlib_prefetch_combined_counter (&adjacency_counters,
2401                                         thread_index, adj_index0);
2402
2403       ip0 = vlib_buffer_get_current (b[0]);
2404
2405       error0 = IP4_ERROR_NONE;
2406
2407       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2408
2409
2410       /* Update packet buffer attributes/set output interface. */
2411       rw_len0 = adj0[0].rewrite_header.data_bytes;
2412       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2413
2414       /* Check MTU of outgoing interface. */
2415       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2416       if (b[0]->flags & VNET_BUFFER_F_GSO)
2417         ip0_len = gso_mtu_sz (b[0]);
2418
2419       ip4_mtu_check (b[0], ip0_len,
2420                      adj0[0].rewrite_header.max_l3_packet_bytes,
2421                      ip0->flags_and_fragment_offset &
2422                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2423                      next + 0, is_midchain, &error0);
2424
2425       if (is_mcast)
2426         {
2427           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2428                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2429                     IP4_ERROR_SAME_INTERFACE : error0);
2430         }
2431
2432       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2433        * to see the IP header */
2434       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2435         {
2436           u32 next_index = adj0[0].rewrite_header.next_index;
2437           vlib_buffer_advance (b[0], -(word) rw_len0);
2438           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2439           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2440
2441           if (PREDICT_FALSE
2442               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2443             vnet_feature_arc_start (lm->output_feature_arc_index,
2444                                     tx_sw_if_index0, &next_index, b[0]);
2445           next[0] = next_index;
2446
2447           if (is_midchain)
2448             /* this acts on the packet that is about to be encapped */
2449             calc_checksums (vm, b[0]);
2450
2451           /* Guess we are only writing on simple Ethernet header. */
2452           vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2453
2454           if (do_counters)
2455             vlib_increment_combined_counter
2456               (&adjacency_counters,
2457                thread_index, adj_index0, 1,
2458                vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2459
2460           if (is_midchain && adj0->sub_type.midchain.fixup_func)
2461             adj0->sub_type.midchain.fixup_func
2462               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2463
2464           if (is_mcast)
2465             /* copy bytes from the IP address into the MAC rewrite */
2466             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2467                                         adj0->rewrite_header.dst_mcast_offset,
2468                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2469         }
2470       else
2471         {
2472           b[0]->error = error_node->errors[error0];
2473           /* undo the TTL decrement - we'll be back to do it again */
2474           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2475             ip4_ttl_inc (b[0], ip0);
2476         }
2477
2478       next += 1;
2479       b += 1;
2480       n_left_from -= 1;
2481     }
2482
2483
2484   /* Need to do trace after rewrites to pick up new packet data. */
2485   if (node->flags & VLIB_NODE_FLAG_TRACE)
2486     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2487
2488   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2489   return frame->n_vectors;
2490 }
2491
2492 always_inline uword
2493 ip4_rewrite_inline (vlib_main_t * vm,
2494                     vlib_node_runtime_t * node,
2495                     vlib_frame_t * frame,
2496                     int do_counters, int is_midchain, int is_mcast)
2497 {
2498   return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2499                                       is_midchain, is_mcast);
2500 }
2501
2502
2503 /** @brief IPv4 rewrite node.
2504     @node ip4-rewrite
2505
2506     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2507     header checksum, fetch the ip adjacency, check the outbound mtu,
2508     apply the adjacency rewrite, and send pkts to the adjacency
2509     rewrite header's rewrite_next_index.
2510
2511     @param vm vlib_main_t corresponding to the current thread
2512     @param node vlib_node_runtime_t
2513     @param frame vlib_frame_t whose contents should be dispatched
2514
2515     @par Graph mechanics: buffer metadata, next index usage
2516
2517     @em Uses:
2518     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2519         - the rewrite adjacency index
2520     - <code>adj->lookup_next_index</code>
2521         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2522           the packet will be dropped.
2523     - <code>adj->rewrite_header</code>
2524         - Rewrite string length, rewrite string, next_index
2525
2526     @em Sets:
2527     - <code>b->current_data, b->current_length</code>
2528         - Updated net of applying the rewrite string
2529
2530     <em>Next Indices:</em>
2531     - <code> adj->rewrite_header.next_index </code>
2532       or @c ip4-drop
2533 */
2534
2535 VLIB_NODE_FN (ip4_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2536                                  vlib_frame_t * frame)
2537 {
2538   if (adj_are_counters_enabled ())
2539     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2540   else
2541     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2542 }
2543
2544 VLIB_NODE_FN (ip4_rewrite_bcast_node) (vlib_main_t * vm,
2545                                        vlib_node_runtime_t * node,
2546                                        vlib_frame_t * frame)
2547 {
2548   if (adj_are_counters_enabled ())
2549     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2550   else
2551     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2552 }
2553
2554 VLIB_NODE_FN (ip4_midchain_node) (vlib_main_t * vm,
2555                                   vlib_node_runtime_t * node,
2556                                   vlib_frame_t * frame)
2557 {
2558   if (adj_are_counters_enabled ())
2559     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2560   else
2561     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2562 }
2563
2564 VLIB_NODE_FN (ip4_rewrite_mcast_node) (vlib_main_t * vm,
2565                                        vlib_node_runtime_t * node,
2566                                        vlib_frame_t * frame)
2567 {
2568   if (adj_are_counters_enabled ())
2569     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2570   else
2571     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2572 }
2573
2574 VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
2575                                         vlib_node_runtime_t * node,
2576                                         vlib_frame_t * frame)
2577 {
2578   if (adj_are_counters_enabled ())
2579     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2580   else
2581     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2582 }
2583
2584 /* *INDENT-OFF* */
2585 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2586   .name = "ip4-rewrite",
2587   .vector_size = sizeof (u32),
2588
2589   .format_trace = format_ip4_rewrite_trace,
2590
2591   .n_next_nodes = IP4_REWRITE_N_NEXT,
2592   .next_nodes = {
2593     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2594     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2595     [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2596   },
2597 };
2598
2599 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2600   .name = "ip4-rewrite-bcast",
2601   .vector_size = sizeof (u32),
2602
2603   .format_trace = format_ip4_rewrite_trace,
2604   .sibling_of = "ip4-rewrite",
2605 };
2606
2607 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2608   .name = "ip4-rewrite-mcast",
2609   .vector_size = sizeof (u32),
2610
2611   .format_trace = format_ip4_rewrite_trace,
2612   .sibling_of = "ip4-rewrite",
2613 };
2614
2615 VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
2616   .name = "ip4-mcast-midchain",
2617   .vector_size = sizeof (u32),
2618
2619   .format_trace = format_ip4_rewrite_trace,
2620   .sibling_of = "ip4-rewrite",
2621 };
2622
2623 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2624   .name = "ip4-midchain",
2625   .vector_size = sizeof (u32),
2626   .format_trace = format_ip4_rewrite_trace,
2627   .sibling_of = "ip4-rewrite",
2628 };
2629 /* *INDENT-ON */
2630
2631 static int
2632 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2633 {
2634   ip4_fib_mtrie_t *mtrie0;
2635   ip4_fib_mtrie_leaf_t leaf0;
2636   u32 lbi0;
2637
2638   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2639
2640   leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2641   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2642   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2643
2644   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2645
2646   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2647 }
2648
2649 static clib_error_t *
2650 test_lookup_command_fn (vlib_main_t * vm,
2651                         unformat_input_t * input, vlib_cli_command_t * cmd)
2652 {
2653   ip4_fib_t *fib;
2654   u32 table_id = 0;
2655   f64 count = 1;
2656   u32 n;
2657   int i;
2658   ip4_address_t ip4_base_address;
2659   u64 errors = 0;
2660
2661   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2662     {
2663       if (unformat (input, "table %d", &table_id))
2664         {
2665           /* Make sure the entry exists. */
2666           fib = ip4_fib_get (table_id);
2667           if ((fib) && (fib->index != table_id))
2668             return clib_error_return (0, "<fib-index> %d does not exist",
2669                                       table_id);
2670         }
2671       else if (unformat (input, "count %f", &count))
2672         ;
2673
2674       else if (unformat (input, "%U",
2675                          unformat_ip4_address, &ip4_base_address))
2676         ;
2677       else
2678         return clib_error_return (0, "unknown input `%U'",
2679                                   format_unformat_error, input);
2680     }
2681
2682   n = count;
2683
2684   for (i = 0; i < n; i++)
2685     {
2686       if (!ip4_lookup_validate (&ip4_base_address, table_id))
2687         errors++;
2688
2689       ip4_base_address.as_u32 =
2690         clib_host_to_net_u32 (1 +
2691                               clib_net_to_host_u32 (ip4_base_address.as_u32));
2692     }
2693
2694   if (errors)
2695     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2696   else
2697     vlib_cli_output (vm, "No errors in %d lookups\n", n);
2698
2699   return 0;
2700 }
2701
2702 /*?
2703  * Perform a lookup of an IPv4 Address (or range of addresses) in the
2704  * given FIB table to determine if there is a conflict with the
2705  * adjacency table. The fib-id can be determined by using the
2706  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2707  * of 0 is used.
2708  *
2709  * @todo This command uses fib-id, other commands use table-id (not
2710  * just a name, they are different indexes). Would like to change this
2711  * to table-id for consistency.
2712  *
2713  * @cliexpar
2714  * Example of how to run the test lookup command:
2715  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2716  * No errors in 2 lookups
2717  * @cliexend
2718 ?*/
2719 /* *INDENT-OFF* */
2720 VLIB_CLI_COMMAND (lookup_test_command, static) =
2721 {
2722   .path = "test lookup",
2723   .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2724   .function = test_lookup_command_fn,
2725 };
2726 /* *INDENT-ON* */
2727
2728 #ifndef CLIB_MARCH_VARIANT
2729 int
2730 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2731 {
2732   u32 fib_index;
2733
2734   fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
2735
2736   if (~0 == fib_index)
2737     return VNET_API_ERROR_NO_SUCH_FIB;
2738
2739   fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
2740                                   flow_hash_config);
2741
2742   return 0;
2743 }
2744 #endif
2745
2746 static clib_error_t *
2747 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2748                              unformat_input_t * input,
2749                              vlib_cli_command_t * cmd)
2750 {
2751   int matched = 0;
2752   u32 table_id = 0;
2753   u32 flow_hash_config = 0;
2754   int rv;
2755
2756   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2757     {
2758       if (unformat (input, "table %d", &table_id))
2759         matched = 1;
2760 #define _(a,v) \
2761     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2762       foreach_flow_hash_bit
2763 #undef _
2764         else
2765         break;
2766     }
2767
2768   if (matched == 0)
2769     return clib_error_return (0, "unknown input `%U'",
2770                               format_unformat_error, input);
2771
2772   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
2773   switch (rv)
2774     {
2775     case 0:
2776       break;
2777
2778     case VNET_API_ERROR_NO_SUCH_FIB:
2779       return clib_error_return (0, "no such FIB table %d", table_id);
2780
2781     default:
2782       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2783       break;
2784     }
2785
2786   return 0;
2787 }
2788
2789 /*?
2790  * Configure the set of IPv4 fields used by the flow hash.
2791  *
2792  * @cliexpar
2793  * Example of how to set the flow hash on a given table:
2794  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2795  * Example of display the configured flow hash:
2796  * @cliexstart{show ip fib}
2797  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2798  * 0.0.0.0/0
2799  *   unicast-ip4-chain
2800  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2801  *     [0] [@0]: dpo-drop ip6
2802  * 0.0.0.0/32
2803  *   unicast-ip4-chain
2804  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2805  *     [0] [@0]: dpo-drop ip6
2806  * 224.0.0.0/8
2807  *   unicast-ip4-chain
2808  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2809  *     [0] [@0]: dpo-drop ip6
2810  * 6.0.1.2/32
2811  *   unicast-ip4-chain
2812  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2813  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2814  * 7.0.0.1/32
2815  *   unicast-ip4-chain
2816  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
2817  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2818  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2819  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2820  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2821  * 240.0.0.0/8
2822  *   unicast-ip4-chain
2823  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
2824  *     [0] [@0]: dpo-drop ip6
2825  * 255.255.255.255/32
2826  *   unicast-ip4-chain
2827  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
2828  *     [0] [@0]: dpo-drop ip6
2829  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
2830  * 0.0.0.0/0
2831  *   unicast-ip4-chain
2832  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
2833  *     [0] [@0]: dpo-drop ip6
2834  * 0.0.0.0/32
2835  *   unicast-ip4-chain
2836  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
2837  *     [0] [@0]: dpo-drop ip6
2838  * 172.16.1.0/24
2839  *   unicast-ip4-chain
2840  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
2841  *     [0] [@4]: ipv4-glean: af_packet0
2842  * 172.16.1.1/32
2843  *   unicast-ip4-chain
2844  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
2845  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
2846  * 172.16.1.2/32
2847  *   unicast-ip4-chain
2848  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2849  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
2850  * 172.16.2.0/24
2851  *   unicast-ip4-chain
2852  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
2853  *     [0] [@4]: ipv4-glean: af_packet1
2854  * 172.16.2.1/32
2855  *   unicast-ip4-chain
2856  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
2857  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
2858  * 224.0.0.0/8
2859  *   unicast-ip4-chain
2860  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
2861  *     [0] [@0]: dpo-drop ip6
2862  * 240.0.0.0/8
2863  *   unicast-ip4-chain
2864  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
2865  *     [0] [@0]: dpo-drop ip6
2866  * 255.255.255.255/32
2867  *   unicast-ip4-chain
2868  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
2869  *     [0] [@0]: dpo-drop ip6
2870  * @cliexend
2871 ?*/
2872 /* *INDENT-OFF* */
2873 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
2874 {
2875   .path = "set ip flow-hash",
2876   .short_help =
2877   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
2878   .function = set_ip_flow_hash_command_fn,
2879 };
2880 /* *INDENT-ON* */
2881
2882 #ifndef CLIB_MARCH_VARIANT
2883 int
2884 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2885                              u32 table_index)
2886 {
2887   vnet_main_t *vnm = vnet_get_main ();
2888   vnet_interface_main_t *im = &vnm->interface_main;
2889   ip4_main_t *ipm = &ip4_main;
2890   ip_lookup_main_t *lm = &ipm->lookup_main;
2891   vnet_classify_main_t *cm = &vnet_classify_main;
2892   ip4_address_t *if_addr;
2893
2894   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2895     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2896
2897   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2898     return VNET_API_ERROR_NO_SUCH_ENTRY;
2899
2900   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2901   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
2902
2903   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
2904
2905   if (NULL != if_addr)
2906     {
2907       fib_prefix_t pfx = {
2908         .fp_len = 32,
2909         .fp_proto = FIB_PROTOCOL_IP4,
2910         .fp_addr.ip4 = *if_addr,
2911       };
2912       u32 fib_index;
2913
2914       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2915                                                        sw_if_index);
2916
2917
2918       if (table_index != (u32) ~ 0)
2919         {
2920           dpo_id_t dpo = DPO_INVALID;
2921
2922           dpo_set (&dpo,
2923                    DPO_CLASSIFY,
2924                    DPO_PROTO_IP4,
2925                    classify_dpo_create (DPO_PROTO_IP4, table_index));
2926
2927           fib_table_entry_special_dpo_add (fib_index,
2928                                            &pfx,
2929                                            FIB_SOURCE_CLASSIFY,
2930                                            FIB_ENTRY_FLAG_NONE, &dpo);
2931           dpo_reset (&dpo);
2932         }
2933       else
2934         {
2935           fib_table_entry_special_remove (fib_index,
2936                                           &pfx, FIB_SOURCE_CLASSIFY);
2937         }
2938     }
2939
2940   return 0;
2941 }
2942 #endif
2943
2944 static clib_error_t *
2945 set_ip_classify_command_fn (vlib_main_t * vm,
2946                             unformat_input_t * input,
2947                             vlib_cli_command_t * cmd)
2948 {
2949   u32 table_index = ~0;
2950   int table_index_set = 0;
2951   u32 sw_if_index = ~0;
2952   int rv;
2953
2954   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2955     {
2956       if (unformat (input, "table-index %d", &table_index))
2957         table_index_set = 1;
2958       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
2959                          vnet_get_main (), &sw_if_index))
2960         ;
2961       else
2962         break;
2963     }
2964
2965   if (table_index_set == 0)
2966     return clib_error_return (0, "classify table-index must be specified");
2967
2968   if (sw_if_index == ~0)
2969     return clib_error_return (0, "interface / subif must be specified");
2970
2971   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
2972
2973   switch (rv)
2974     {
2975     case 0:
2976       break;
2977
2978     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
2979       return clib_error_return (0, "No such interface");
2980
2981     case VNET_API_ERROR_NO_SUCH_ENTRY:
2982       return clib_error_return (0, "No such classifier table");
2983     }
2984   return 0;
2985 }
2986
2987 /*?
2988  * Assign a classification table to an interface. The classification
2989  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
2990  * commands. Once the table is create, use this command to filter packets
2991  * on an interface.
2992  *
2993  * @cliexpar
2994  * Example of how to assign a classification table to an interface:
2995  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
2996 ?*/
2997 /* *INDENT-OFF* */
2998 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
2999 {
3000     .path = "set ip classify",
3001     .short_help =
3002     "set ip classify intfc <interface> table-index <classify-idx>",
3003     .function = set_ip_classify_command_fn,
3004 };
3005 /* *INDENT-ON* */
3006
3007 static clib_error_t *
3008 ip4_config (vlib_main_t * vm, unformat_input_t * input)
3009 {
3010   ip4_main_t *im = &ip4_main;
3011   uword heapsize = 0;
3012
3013   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3014     {
3015       if (unformat (input, "heap-size %U", unformat_memory_size, &heapsize))
3016         ;
3017       else
3018         return clib_error_return (0,
3019                                   "invalid heap-size parameter `%U'",
3020                                   format_unformat_error, input);
3021     }
3022
3023   im->mtrie_heap_size = heapsize;
3024
3025   return 0;
3026 }
3027
3028 VLIB_EARLY_CONFIG_FUNCTION (ip4_config, "ip");
3029
3030 /*
3031  * fd.io coding-style-patch-verification: ON
3032  *
3033  * Local Variables:
3034  * eval: (c-set-style "gnu")
3035  * End:
3036  */