classify: Reduce the include dependencies on vnet_classify.h
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h>     /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/dpo/load_balance.h>
53 #include <vnet/dpo/load_balance_map.h>
54 #include <vnet/dpo/classify_dpo.h>
55 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
56
57 #include <vnet/ip/ip4_forward.h>
58 #include <vnet/interface_output.h>
59 #include <vnet/classify/vnet_classify.h>
60
61 /** @brief IPv4 lookup node.
62     @node ip4-lookup
63
64     This is the main IPv4 lookup dispatch node.
65
66     @param vm vlib_main_t corresponding to the current thread
67     @param node vlib_node_runtime_t
68     @param frame vlib_frame_t whose contents should be dispatched
69
70     @par Graph mechanics: buffer metadata, next index usage
71
72     @em Uses:
73     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
74         - Indicates the @c sw_if_index value of the interface that the
75           packet was received on.
76     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
77         - When the value is @c ~0 then the node performs a longest prefix
78           match (LPM) for the packet destination address in the FIB attached
79           to the receive interface.
80         - Otherwise perform LPM for the packet destination address in the
81           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
82           value (0, 1, ...) and not a VRF id.
83
84     @em Sets:
85     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
86         - The lookup result adjacency index.
87
88     <em>Next Index:</em>
89     - Dispatches the packet to the node index found in
90       ip_adjacency_t @c adj->lookup_next_index
91       (where @c adj is the lookup result adjacency).
92 */
93 VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
94                                 vlib_frame_t * frame)
95 {
96   return ip4_lookup_inline (vm, node, frame);
97 }
98
99 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
100
101 /* *INDENT-OFF* */
102 VLIB_REGISTER_NODE (ip4_lookup_node) =
103 {
104   .name = "ip4-lookup",
105   .vector_size = sizeof (u32),
106   .format_trace = format_ip4_lookup_trace,
107   .n_next_nodes = IP_LOOKUP_N_NEXT,
108   .next_nodes = IP4_LOOKUP_NEXT_NODES,
109 };
110 /* *INDENT-ON* */
111
112 VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
113                                       vlib_node_runtime_t * node,
114                                       vlib_frame_t * frame)
115 {
116   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
117   u32 n_left, *from;
118   u32 thread_index = vm->thread_index;
119   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
120   u16 nexts[VLIB_FRAME_SIZE], *next;
121
122   from = vlib_frame_vector_args (frame);
123   n_left = frame->n_vectors;
124   next = nexts;
125
126   vlib_get_buffers (vm, from, bufs, n_left);
127
128   while (n_left >= 4)
129     {
130       const load_balance_t *lb0, *lb1;
131       const ip4_header_t *ip0, *ip1;
132       u32 lbi0, hc0, lbi1, hc1;
133       const dpo_id_t *dpo0, *dpo1;
134
135       /* Prefetch next iteration. */
136       {
137         vlib_prefetch_buffer_header (b[2], LOAD);
138         vlib_prefetch_buffer_header (b[3], LOAD);
139
140         CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), LOAD);
141         CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), LOAD);
142       }
143
144       ip0 = vlib_buffer_get_current (b[0]);
145       ip1 = vlib_buffer_get_current (b[1]);
146       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
147       lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
148
149       lb0 = load_balance_get (lbi0);
150       lb1 = load_balance_get (lbi1);
151
152       /*
153        * this node is for via FIBs we can re-use the hash value from the
154        * to node if present.
155        * We don't want to use the same hash value at each level in the recursion
156        * graph as that would lead to polarisation
157        */
158       hc0 = hc1 = 0;
159
160       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
161         {
162           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
163             {
164               hc0 = vnet_buffer (b[0])->ip.flow_hash =
165                 vnet_buffer (b[0])->ip.flow_hash >> 1;
166             }
167           else
168             {
169               hc0 = vnet_buffer (b[0])->ip.flow_hash =
170                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
171             }
172           dpo0 = load_balance_get_fwd_bucket
173             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
174         }
175       else
176         {
177           dpo0 = load_balance_get_bucket_i (lb0, 0);
178         }
179       if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
180         {
181           if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
182             {
183               hc1 = vnet_buffer (b[1])->ip.flow_hash =
184                 vnet_buffer (b[1])->ip.flow_hash >> 1;
185             }
186           else
187             {
188               hc1 = vnet_buffer (b[1])->ip.flow_hash =
189                 ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
190             }
191           dpo1 = load_balance_get_fwd_bucket
192             (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
193         }
194       else
195         {
196           dpo1 = load_balance_get_bucket_i (lb1, 0);
197         }
198
199       next[0] = dpo0->dpoi_next_node;
200       next[1] = dpo1->dpoi_next_node;
201
202       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
203       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
204
205       vlib_increment_combined_counter
206         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
207       vlib_increment_combined_counter
208         (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
209
210       b += 2;
211       next += 2;
212       n_left -= 2;
213     }
214
215   while (n_left > 0)
216     {
217       const load_balance_t *lb0;
218       const ip4_header_t *ip0;
219       const dpo_id_t *dpo0;
220       u32 lbi0, hc0;
221
222       ip0 = vlib_buffer_get_current (b[0]);
223       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
224
225       lb0 = load_balance_get (lbi0);
226
227       hc0 = 0;
228       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
229         {
230           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
231             {
232               hc0 = vnet_buffer (b[0])->ip.flow_hash =
233                 vnet_buffer (b[0])->ip.flow_hash >> 1;
234             }
235           else
236             {
237               hc0 = vnet_buffer (b[0])->ip.flow_hash =
238                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
239             }
240           dpo0 = load_balance_get_fwd_bucket
241             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
242         }
243       else
244         {
245           dpo0 = load_balance_get_bucket_i (lb0, 0);
246         }
247
248       next[0] = dpo0->dpoi_next_node;
249       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
250
251       vlib_increment_combined_counter
252         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
253
254       b += 1;
255       next += 1;
256       n_left -= 1;
257     }
258
259   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
260   if (node->flags & VLIB_NODE_FLAG_TRACE)
261     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
262
263   return frame->n_vectors;
264 }
265
266 /* *INDENT-OFF* */
267 VLIB_REGISTER_NODE (ip4_load_balance_node) =
268 {
269   .name = "ip4-load-balance",
270   .vector_size = sizeof (u32),
271   .sibling_of = "ip4-lookup",
272   .format_trace = format_ip4_lookup_trace,
273 };
274 /* *INDENT-ON* */
275
276 #ifndef CLIB_MARCH_VARIANT
277 /* get first interface address */
278 ip4_address_t *
279 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
280                              ip_interface_address_t ** result_ia)
281 {
282   ip_lookup_main_t *lm = &im->lookup_main;
283   ip_interface_address_t *ia = 0;
284   ip4_address_t *result = 0;
285
286   /* *INDENT-OFF* */
287   foreach_ip_interface_address
288     (lm, ia, sw_if_index,
289      1 /* honor unnumbered */ ,
290      ({
291        ip4_address_t * a =
292          ip_interface_address_get_address (lm, ia);
293        result = a;
294        break;
295      }));
296   /* *INDENT-OFF* */
297   if (result_ia)
298     *result_ia = result ? ia : 0;
299   return result;
300 }
301 #endif
302
303 static void
304 ip4_add_subnet_bcast_route (u32 fib_index,
305                             fib_prefix_t *pfx,
306                             u32 sw_if_index)
307 {
308   vnet_sw_interface_flags_t iflags;
309
310   iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
311
312   fib_table_entry_special_remove(fib_index,
313                                  pfx,
314                                  FIB_SOURCE_INTERFACE);
315
316   if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
317     {
318       fib_table_entry_update_one_path (fib_index, pfx,
319                                        FIB_SOURCE_INTERFACE,
320                                        FIB_ENTRY_FLAG_NONE,
321                                        DPO_PROTO_IP4,
322                                        /* No next-hop address */
323                                        &ADJ_BCAST_ADDR,
324                                        sw_if_index,
325                                        // invalid FIB index
326                                        ~0,
327                                        1,
328                                        // no out-label stack
329                                        NULL,
330                                        FIB_ROUTE_PATH_FLAG_NONE);
331     }
332   else
333     {
334         fib_table_entry_special_add(fib_index,
335                                     pfx,
336                                     FIB_SOURCE_INTERFACE,
337                                     (FIB_ENTRY_FLAG_DROP |
338                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
339     }
340 }
341
342 static void
343 ip4_add_interface_prefix_routes (ip4_main_t *im,
344                                  u32 sw_if_index,
345                                  u32 fib_index,
346                                  ip_interface_address_t * a)
347 {
348   ip_lookup_main_t *lm = &im->lookup_main;
349   ip_interface_prefix_t *if_prefix;
350   ip4_address_t *address = ip_interface_address_get_address (lm, a);
351
352   ip_interface_prefix_key_t key = {
353     .prefix = {
354       .fp_len = a->address_length,
355       .fp_proto = FIB_PROTOCOL_IP4,
356       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[a->address_length],
357     },
358     .sw_if_index = sw_if_index,
359   };
360
361   fib_prefix_t pfx_special = {
362     .fp_proto = FIB_PROTOCOL_IP4,
363   };
364
365   /* If prefix already set on interface, just increment ref count & return */
366   if_prefix = ip_get_interface_prefix (lm, &key);
367   if (if_prefix)
368     {
369       if_prefix->ref_count += 1;
370       return;
371     }
372
373   /* New prefix - allocate a pool entry, initialize it, add to the hash */
374   pool_get (lm->if_prefix_pool, if_prefix);
375   if_prefix->ref_count = 1;
376   if_prefix->src_ia_index = a - lm->if_address_pool;
377   clib_memcpy (&if_prefix->key, &key, sizeof (key));
378   mhash_set (&lm->prefix_to_if_prefix_index, &key,
379              if_prefix - lm->if_prefix_pool, 0 /* old value */);
380
381   /* length <= 30 - add glean, drop first address, maybe drop bcast address */
382   if (a->address_length <= 30)
383     {
384       pfx_special.fp_len = a->address_length;
385       pfx_special.fp_addr.ip4.as_u32 = address->as_u32;
386
387       /* set the glean route for the prefix */
388       fib_table_entry_update_one_path (fib_index, &pfx_special,
389                                        FIB_SOURCE_INTERFACE,
390                                        (FIB_ENTRY_FLAG_CONNECTED |
391                                         FIB_ENTRY_FLAG_ATTACHED),
392                                        DPO_PROTO_IP4,
393                                        /* No next-hop address */
394                                        NULL,
395                                        sw_if_index,
396                                        /* invalid FIB index */
397                                        ~0,
398                                        1,
399                                        /* no out-label stack */
400                                        NULL,
401                                        FIB_ROUTE_PATH_FLAG_NONE);
402
403       /* set a drop route for the base address of the prefix */
404       pfx_special.fp_len = 32;
405       pfx_special.fp_addr.ip4.as_u32 =
406         address->as_u32 & im->fib_masks[a->address_length];
407
408       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
409         fib_table_entry_special_add (fib_index, &pfx_special,
410                                      FIB_SOURCE_INTERFACE,
411                                      (FIB_ENTRY_FLAG_DROP |
412                                       FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
413
414       /* set a route for the broadcast address of the prefix */
415       pfx_special.fp_len = 32;
416       pfx_special.fp_addr.ip4.as_u32 =
417         address->as_u32 | ~im->fib_masks[a->address_length];
418       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
419         ip4_add_subnet_bcast_route (fib_index, &pfx_special, sw_if_index);
420
421
422     }
423   /* length == 31 - add an attached route for the other address */
424   else if (a->address_length == 31)
425     {
426       pfx_special.fp_len = 32;
427       pfx_special.fp_addr.ip4.as_u32 =
428         address->as_u32 ^ clib_host_to_net_u32(1);
429
430       fib_table_entry_update_one_path (fib_index, &pfx_special,
431                                        FIB_SOURCE_INTERFACE,
432                                        (FIB_ENTRY_FLAG_ATTACHED),
433                                        DPO_PROTO_IP4,
434                                        &pfx_special.fp_addr,
435                                        sw_if_index,
436                                        /* invalid FIB index */
437                                        ~0,
438                                        1,
439                                        NULL,
440                                        FIB_ROUTE_PATH_FLAG_NONE);
441     }
442 }
443
444 static void
445 ip4_add_interface_routes (u32 sw_if_index,
446                           ip4_main_t * im, u32 fib_index,
447                           ip_interface_address_t * a)
448 {
449   ip_lookup_main_t *lm = &im->lookup_main;
450   ip4_address_t *address = ip_interface_address_get_address (lm, a);
451   fib_prefix_t pfx = {
452     .fp_len = 32,
453     .fp_proto = FIB_PROTOCOL_IP4,
454     .fp_addr.ip4 = *address,
455   };
456
457   /* set special routes for the prefix if needed */
458   ip4_add_interface_prefix_routes (im, sw_if_index, fib_index, a);
459
460   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
461     {
462       u32 classify_table_index =
463         lm->classify_table_index_by_sw_if_index[sw_if_index];
464       if (classify_table_index != (u32) ~ 0)
465         {
466           dpo_id_t dpo = DPO_INVALID;
467
468           dpo_set (&dpo,
469                    DPO_CLASSIFY,
470                    DPO_PROTO_IP4,
471                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
472
473           fib_table_entry_special_dpo_add (fib_index,
474                                            &pfx,
475                                            FIB_SOURCE_CLASSIFY,
476                                            FIB_ENTRY_FLAG_NONE, &dpo);
477           dpo_reset (&dpo);
478         }
479     }
480
481   fib_table_entry_update_one_path (fib_index, &pfx,
482                                    FIB_SOURCE_INTERFACE,
483                                    (FIB_ENTRY_FLAG_CONNECTED |
484                                     FIB_ENTRY_FLAG_LOCAL),
485                                    DPO_PROTO_IP4,
486                                    &pfx.fp_addr,
487                                    sw_if_index,
488                                    // invalid FIB index
489                                    ~0,
490                                    1, NULL,
491                                    FIB_ROUTE_PATH_FLAG_NONE);
492 }
493
494 static void
495 ip4_del_interface_prefix_routes (ip4_main_t * im,
496                                  u32 sw_if_index,
497                                  u32 fib_index,
498                                  ip4_address_t * address,
499                                  u32 address_length)
500 {
501   ip_lookup_main_t *lm = &im->lookup_main;
502   ip_interface_prefix_t *if_prefix;
503
504   ip_interface_prefix_key_t key = {
505     .prefix = {
506       .fp_len = address_length,
507       .fp_proto = FIB_PROTOCOL_IP4,
508       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[address_length],
509     },
510     .sw_if_index = sw_if_index,
511   };
512
513   fib_prefix_t pfx_special = {
514     .fp_len = 32,
515     .fp_proto = FIB_PROTOCOL_IP4,
516   };
517
518   if_prefix = ip_get_interface_prefix (lm, &key);
519   if (!if_prefix)
520     {
521       clib_warning ("Prefix not found while deleting %U",
522                     format_ip4_address_and_length, address, address_length);
523       return;
524     }
525
526   if_prefix->ref_count -= 1;
527
528   /*
529    * Routes need to be adjusted if:
530    * - deleting last intf addr in prefix
531    * - deleting intf addr used as default source address in glean adjacency
532    *
533    * We're done now otherwise
534    */
535   if ((if_prefix->ref_count > 0) &&
536       !pool_is_free_index (lm->if_address_pool, if_prefix->src_ia_index))
537     return;
538
539   /* length <= 30, delete glean route, first address, last address */
540   if (address_length <= 30)
541     {
542
543       /* remove glean route for prefix */
544       pfx_special.fp_addr.ip4 = *address;
545       pfx_special.fp_len = address_length;
546       fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
547
548       /* if no more intf addresses in prefix, remove other special routes */
549       if (!if_prefix->ref_count)
550         {
551           /* first address in prefix */
552           pfx_special.fp_addr.ip4.as_u32 =
553             address->as_u32 & im->fib_masks[address_length];
554           pfx_special.fp_len = 32;
555
556           if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
557           fib_table_entry_special_remove (fib_index,
558                                           &pfx_special,
559                                           FIB_SOURCE_INTERFACE);
560
561           /* prefix broadcast address */
562           pfx_special.fp_addr.ip4.as_u32 =
563             address->as_u32 | ~im->fib_masks[address_length];
564           pfx_special.fp_len = 32;
565
566           if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
567           fib_table_entry_special_remove (fib_index,
568                                           &pfx_special,
569                                           FIB_SOURCE_INTERFACE);
570         }
571       else
572         /* default source addr just got deleted, find another */
573         {
574           ip_interface_address_t *new_src_ia = NULL;
575           ip4_address_t *new_src_addr = NULL;
576
577           new_src_addr =
578             ip4_interface_address_matching_destination
579               (im, address, sw_if_index, &new_src_ia);
580
581           if_prefix->src_ia_index = new_src_ia - lm->if_address_pool;
582
583           pfx_special.fp_len = address_length;
584           pfx_special.fp_addr.ip4 = *new_src_addr;
585
586           /* set new glean route for the prefix */
587           fib_table_entry_update_one_path (fib_index, &pfx_special,
588                                            FIB_SOURCE_INTERFACE,
589                                            (FIB_ENTRY_FLAG_CONNECTED |
590                                             FIB_ENTRY_FLAG_ATTACHED),
591                                            DPO_PROTO_IP4,
592                                            /* No next-hop address */
593                                            NULL,
594                                            sw_if_index,
595                                            /* invalid FIB index */
596                                            ~0,
597                                            1,
598                                            /* no out-label stack */
599                                            NULL,
600                                            FIB_ROUTE_PATH_FLAG_NONE);
601           return;
602         }
603     }
604   /* length == 31, delete attached route for the other address */
605   else if (address_length == 31)
606     {
607       pfx_special.fp_addr.ip4.as_u32 =
608         address->as_u32 ^ clib_host_to_net_u32(1);
609
610       fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
611     }
612
613   mhash_unset (&lm->prefix_to_if_prefix_index, &key, 0 /* old_value */);
614   pool_put (lm->if_prefix_pool, if_prefix);
615 }
616
617 static void
618 ip4_del_interface_routes (u32 sw_if_index,
619                           ip4_main_t * im,
620                           u32 fib_index,
621                           ip4_address_t * address, u32 address_length)
622 {
623   fib_prefix_t pfx = {
624     .fp_len = address_length,
625     .fp_proto = FIB_PROTOCOL_IP4,
626     .fp_addr.ip4 = *address,
627   };
628
629   ip4_del_interface_prefix_routes (im, sw_if_index, fib_index,
630                                    address, address_length);
631
632   pfx.fp_len = 32;
633   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
634 }
635
636 #ifndef CLIB_MARCH_VARIANT
637 void
638 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
639 {
640   ip4_main_t *im = &ip4_main;
641
642   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
643
644   /*
645    * enable/disable only on the 1<->0 transition
646    */
647   if (is_enable)
648     {
649       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
650         return;
651     }
652   else
653     {
654       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
655       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
656         return;
657     }
658   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
659                                !is_enable, 0, 0);
660
661
662   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
663                                sw_if_index, !is_enable, 0, 0);
664
665   {
666     ip4_enable_disable_interface_callback_t *cb;
667     vec_foreach (cb, im->enable_disable_interface_callbacks)
668       cb->function (im, cb->function_opaque, sw_if_index, is_enable);
669   }
670 }
671
672 static clib_error_t *
673 ip4_add_del_interface_address_internal (vlib_main_t * vm,
674                                         u32 sw_if_index,
675                                         ip4_address_t * address,
676                                         u32 address_length, u32 is_del)
677 {
678   vnet_main_t *vnm = vnet_get_main ();
679   ip4_main_t *im = &ip4_main;
680   ip_lookup_main_t *lm = &im->lookup_main;
681   clib_error_t *error = 0;
682   u32 if_address_index, elts_before;
683   ip4_address_fib_t ip4_af, *addr_fib = 0;
684
685   /* local0 interface doesn't support IP addressing  */
686   if (sw_if_index == 0)
687     {
688       return
689        clib_error_create ("local0 interface doesn't support IP addressing");
690     }
691
692   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
693   ip4_addr_fib_init (&ip4_af, address,
694                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
695   vec_add1 (addr_fib, ip4_af);
696
697   /*
698    * there is no support for adj-fib handling in the presence of overlapping
699    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
700    * most routers do.
701    */
702   /* *INDENT-OFF* */
703   if (!is_del)
704     {
705       /* When adding an address check that it does not conflict
706          with an existing address on any interface in this table. */
707       ip_interface_address_t *ia;
708       vnet_sw_interface_t *sif;
709
710       pool_foreach(sif, vnm->interface_main.sw_interfaces,
711       ({
712           if (im->fib_index_by_sw_if_index[sw_if_index] ==
713               im->fib_index_by_sw_if_index[sif->sw_if_index])
714             {
715               foreach_ip_interface_address
716                 (&im->lookup_main, ia, sif->sw_if_index,
717                  0 /* honor unnumbered */ ,
718                  ({
719                    ip4_address_t * x =
720                      ip_interface_address_get_address
721                      (&im->lookup_main, ia);
722                    if (ip4_destination_matches_route
723                        (im, address, x, ia->address_length) ||
724                        ip4_destination_matches_route (im,
725                                                       x,
726                                                       address,
727                                                       address_length))
728                      {
729                        /* an intf may have >1 addr from the same prefix */
730                        if ((sw_if_index == sif->sw_if_index) &&
731                            (ia->address_length == address_length) &&
732                            (x->as_u32 != address->as_u32))
733                          continue;
734
735                        /* error if the length or intf was different */
736                        vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
737
738                        return
739                          clib_error_create
740                          ("failed to add %U on %U which conflicts with %U for interface %U",
741                           format_ip4_address_and_length, address,
742                           address_length,
743                           format_vnet_sw_if_index_name, vnm,
744                           sw_if_index,
745                           format_ip4_address_and_length, x,
746                           ia->address_length,
747                           format_vnet_sw_if_index_name, vnm,
748                           sif->sw_if_index);
749                      }
750                  }));
751             }
752       }));
753     }
754   /* *INDENT-ON* */
755
756   elts_before = pool_elts (lm->if_address_pool);
757
758   error = ip_interface_address_add_del
759     (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
760   if (error)
761     goto done;
762
763   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
764
765   /* intf addr routes are added/deleted on admin up/down */
766   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index))
767     {
768       if (is_del)
769         ip4_del_interface_routes (sw_if_index,
770                                   im, ip4_af.fib_index, address,
771                                   address_length);
772       else
773         ip4_add_interface_routes (sw_if_index,
774                                   im, ip4_af.fib_index,
775                                   pool_elt_at_index
776                                   (lm->if_address_pool, if_address_index));
777     }
778
779   /* If pool did not grow/shrink: add duplicate address. */
780   if (elts_before != pool_elts (lm->if_address_pool))
781     {
782       ip4_add_del_interface_address_callback_t *cb;
783       vec_foreach (cb, im->add_del_interface_address_callbacks)
784         cb->function (im, cb->function_opaque, sw_if_index,
785                       address, address_length, if_address_index, is_del);
786     }
787
788 done:
789   vec_free (addr_fib);
790   return error;
791 }
792
793 clib_error_t *
794 ip4_add_del_interface_address (vlib_main_t * vm,
795                                u32 sw_if_index,
796                                ip4_address_t * address,
797                                u32 address_length, u32 is_del)
798 {
799   return ip4_add_del_interface_address_internal
800     (vm, sw_if_index, address, address_length, is_del);
801 }
802
803 void
804 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
805 {
806   ip_interface_address_t *ia;
807   ip4_main_t *im;
808
809   im = &ip4_main;
810
811   /*
812    * when directed broadcast is enabled, the subnet braodcast route will forward
813    * packets using an adjacency with a broadcast MAC. otherwise it drops
814    */
815   /* *INDENT-OFF* */
816   foreach_ip_interface_address(&im->lookup_main, ia,
817                                sw_if_index, 0,
818      ({
819        if (ia->address_length <= 30)
820          {
821            ip4_address_t *ipa;
822
823            ipa = ip_interface_address_get_address (&im->lookup_main, ia);
824
825            fib_prefix_t pfx = {
826              .fp_len = 32,
827              .fp_proto = FIB_PROTOCOL_IP4,
828              .fp_addr = {
829                .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
830              },
831            };
832
833            ip4_add_subnet_bcast_route
834              (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
835                                                   sw_if_index),
836               &pfx, sw_if_index);
837          }
838      }));
839   /* *INDENT-ON* */
840 }
841 #endif
842
843 static clib_error_t *
844 ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
845 {
846   ip4_main_t *im = &ip4_main;
847   ip_interface_address_t *ia;
848   ip4_address_t *a;
849   u32 is_admin_up, fib_index;
850
851   /* Fill in lookup tables with default table (0). */
852   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
853
854   vec_validate_init_empty (im->
855                            lookup_main.if_address_pool_index_by_sw_if_index,
856                            sw_if_index, ~0);
857
858   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
859
860   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
861
862   /* *INDENT-OFF* */
863   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
864                                 0 /* honor unnumbered */,
865   ({
866     a = ip_interface_address_get_address (&im->lookup_main, ia);
867     if (is_admin_up)
868       ip4_add_interface_routes (sw_if_index,
869                                 im, fib_index,
870                                 ia);
871     else
872       ip4_del_interface_routes (sw_if_index,
873                                 im, fib_index,
874                                 a, ia->address_length);
875   }));
876   /* *INDENT-ON* */
877
878   return 0;
879 }
880
881 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
882
883 /* Built-in ip4 unicast rx feature path definition */
884 /* *INDENT-OFF* */
885 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
886 {
887   .arc_name = "ip4-unicast",
888   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
889   .last_in_arc = "ip4-lookup",
890   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
891 };
892
893 VNET_FEATURE_INIT (ip4_flow_classify, static) =
894 {
895   .arc_name = "ip4-unicast",
896   .node_name = "ip4-flow-classify",
897   .runs_before = VNET_FEATURES ("ip4-inacl"),
898 };
899
900 VNET_FEATURE_INIT (ip4_inacl, static) =
901 {
902   .arc_name = "ip4-unicast",
903   .node_name = "ip4-inacl",
904   .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
905 };
906
907 VNET_FEATURE_INIT (ip4_source_check_1, static) =
908 {
909   .arc_name = "ip4-unicast",
910   .node_name = "ip4-source-check-via-rx",
911   .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
912 };
913
914 VNET_FEATURE_INIT (ip4_source_check_2, static) =
915 {
916   .arc_name = "ip4-unicast",
917   .node_name = "ip4-source-check-via-any",
918   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
919 };
920
921 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
922 {
923   .arc_name = "ip4-unicast",
924   .node_name = "ip4-source-and-port-range-check-rx",
925   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
926 };
927
928 VNET_FEATURE_INIT (ip4_policer_classify, static) =
929 {
930   .arc_name = "ip4-unicast",
931   .node_name = "ip4-policer-classify",
932   .runs_before = VNET_FEATURES ("ipsec4-input-feature"),
933 };
934
935 VNET_FEATURE_INIT (ip4_ipsec, static) =
936 {
937   .arc_name = "ip4-unicast",
938   .node_name = "ipsec4-input-feature",
939   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
940 };
941
942 VNET_FEATURE_INIT (ip4_vpath, static) =
943 {
944   .arc_name = "ip4-unicast",
945   .node_name = "vpath-input-ip4",
946   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
947 };
948
949 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
950 {
951   .arc_name = "ip4-unicast",
952   .node_name = "ip4-vxlan-bypass",
953   .runs_before = VNET_FEATURES ("ip4-lookup"),
954 };
955
956 VNET_FEATURE_INIT (ip4_not_enabled, static) =
957 {
958   .arc_name = "ip4-unicast",
959   .node_name = "ip4-not-enabled",
960   .runs_before = VNET_FEATURES ("ip4-lookup"),
961 };
962
963 VNET_FEATURE_INIT (ip4_lookup, static) =
964 {
965   .arc_name = "ip4-unicast",
966   .node_name = "ip4-lookup",
967   .runs_before = 0,     /* not before any other features */
968 };
969
970 /* Built-in ip4 multicast rx feature path definition */
971 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
972 {
973   .arc_name = "ip4-multicast",
974   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
975   .last_in_arc = "ip4-mfib-forward-lookup",
976   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
977 };
978
979 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
980 {
981   .arc_name = "ip4-multicast",
982   .node_name = "vpath-input-ip4",
983   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
984 };
985
986 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
987 {
988   .arc_name = "ip4-multicast",
989   .node_name = "ip4-not-enabled",
990   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
991 };
992
993 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
994 {
995   .arc_name = "ip4-multicast",
996   .node_name = "ip4-mfib-forward-lookup",
997   .runs_before = 0,     /* last feature */
998 };
999
1000 /* Source and port-range check ip4 tx feature path definition */
1001 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1002 {
1003   .arc_name = "ip4-output",
1004   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
1005   .last_in_arc = "interface-output",
1006   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1007 };
1008
1009 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1010 {
1011   .arc_name = "ip4-output",
1012   .node_name = "ip4-source-and-port-range-check-tx",
1013   .runs_before = VNET_FEATURES ("ip4-outacl"),
1014 };
1015
1016 VNET_FEATURE_INIT (ip4_outacl, static) =
1017 {
1018   .arc_name = "ip4-output",
1019   .node_name = "ip4-outacl",
1020   .runs_before = VNET_FEATURES ("ipsec4-output-feature"),
1021 };
1022
1023 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1024 {
1025   .arc_name = "ip4-output",
1026   .node_name = "ipsec4-output-feature",
1027   .runs_before = VNET_FEATURES ("interface-output"),
1028 };
1029
1030 /* Built-in ip4 tx feature path definition */
1031 VNET_FEATURE_INIT (ip4_interface_output, static) =
1032 {
1033   .arc_name = "ip4-output",
1034   .node_name = "interface-output",
1035   .runs_before = 0,     /* not before any other features */
1036 };
1037 /* *INDENT-ON* */
1038
1039 static clib_error_t *
1040 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1041 {
1042   ip4_main_t *im = &ip4_main;
1043
1044   /* Fill in lookup tables with default table (0). */
1045   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1046   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
1047
1048   if (!is_add)
1049     {
1050       ip4_main_t *im4 = &ip4_main;
1051       ip_lookup_main_t *lm4 = &im4->lookup_main;
1052       ip_interface_address_t *ia = 0;
1053       ip4_address_t *address;
1054       vlib_main_t *vm = vlib_get_main ();
1055
1056       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
1057       /* *INDENT-OFF* */
1058       foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
1059       ({
1060         address = ip_interface_address_get_address (lm4, ia);
1061         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
1062       }));
1063       /* *INDENT-ON* */
1064     }
1065
1066   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
1067                                is_add, 0, 0);
1068
1069   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
1070                                sw_if_index, is_add, 0, 0);
1071
1072   return /* no error */ 0;
1073 }
1074
1075 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1076
1077 /* Global IP4 main. */
1078 #ifndef CLIB_MARCH_VARIANT
1079 ip4_main_t ip4_main;
1080 #endif /* CLIB_MARCH_VARIANT */
1081
1082 static clib_error_t *
1083 ip4_lookup_init (vlib_main_t * vm)
1084 {
1085   ip4_main_t *im = &ip4_main;
1086   clib_error_t *error;
1087   uword i;
1088
1089   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1090     return error;
1091   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
1092     return (error);
1093   if ((error = vlib_call_init_function (vm, fib_module_init)))
1094     return error;
1095   if ((error = vlib_call_init_function (vm, mfib_module_init)))
1096     return error;
1097
1098   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1099     {
1100       u32 m;
1101
1102       if (i < 32)
1103         m = pow2_mask (i) << (32 - i);
1104       else
1105         m = ~0;
1106       im->fib_masks[i] = clib_host_to_net_u32 (m);
1107     }
1108
1109   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1110
1111   /* Create FIB with index 0 and table id of 0. */
1112   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1113                                      FIB_SOURCE_DEFAULT_ROUTE);
1114   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1115                                       MFIB_SOURCE_DEFAULT_ROUTE);
1116
1117   {
1118     pg_node_t *pn;
1119     pn = pg_get_node (ip4_lookup_node.index);
1120     pn->unformat_edit = unformat_pg_ip4_header;
1121   }
1122
1123   {
1124     ethernet_arp_header_t h;
1125
1126     clib_memset (&h, 0, sizeof (h));
1127
1128 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1129 #define _8(f,v) h.f = v;
1130     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1131     _16 (l3_type, ETHERNET_TYPE_IP4);
1132     _8 (n_l2_address_bytes, 6);
1133     _8 (n_l3_address_bytes, 4);
1134     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1135 #undef _16
1136 #undef _8
1137
1138     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1139                                /* data */ &h,
1140                                sizeof (h),
1141                                /* alloc chunk size */ 8,
1142                                "ip4 arp");
1143   }
1144
1145   return error;
1146 }
1147
1148 VLIB_INIT_FUNCTION (ip4_lookup_init);
1149
1150 typedef struct
1151 {
1152   /* Adjacency taken. */
1153   u32 dpo_index;
1154   u32 flow_hash;
1155   u32 fib_index;
1156
1157   /* Packet data, possibly *after* rewrite. */
1158   u8 packet_data[64 - 1 * sizeof (u32)];
1159 }
1160 ip4_forward_next_trace_t;
1161
1162 #ifndef CLIB_MARCH_VARIANT
1163 u8 *
1164 format_ip4_forward_next_trace (u8 * s, va_list * args)
1165 {
1166   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1167   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1168   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1169   u32 indent = format_get_indent (s);
1170   s = format (s, "%U%U",
1171               format_white_space, indent,
1172               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1173   return s;
1174 }
1175 #endif
1176
1177 static u8 *
1178 format_ip4_lookup_trace (u8 * s, va_list * args)
1179 {
1180   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1181   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1182   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1183   u32 indent = format_get_indent (s);
1184
1185   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1186               t->fib_index, t->dpo_index, t->flow_hash);
1187   s = format (s, "\n%U%U",
1188               format_white_space, indent,
1189               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1190   return s;
1191 }
1192
1193 static u8 *
1194 format_ip4_rewrite_trace (u8 * s, va_list * args)
1195 {
1196   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1197   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1198   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1199   u32 indent = format_get_indent (s);
1200
1201   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1202               t->fib_index, t->dpo_index, format_ip_adjacency,
1203               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1204   s = format (s, "\n%U%U",
1205               format_white_space, indent,
1206               format_ip_adjacency_packet_data,
1207               t->packet_data, sizeof (t->packet_data));
1208   return s;
1209 }
1210
1211 #ifndef CLIB_MARCH_VARIANT
1212 /* Common trace function for all ip4-forward next nodes. */
1213 void
1214 ip4_forward_next_trace (vlib_main_t * vm,
1215                         vlib_node_runtime_t * node,
1216                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1217 {
1218   u32 *from, n_left;
1219   ip4_main_t *im = &ip4_main;
1220
1221   n_left = frame->n_vectors;
1222   from = vlib_frame_vector_args (frame);
1223
1224   while (n_left >= 4)
1225     {
1226       u32 bi0, bi1;
1227       vlib_buffer_t *b0, *b1;
1228       ip4_forward_next_trace_t *t0, *t1;
1229
1230       /* Prefetch next iteration. */
1231       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1232       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1233
1234       bi0 = from[0];
1235       bi1 = from[1];
1236
1237       b0 = vlib_get_buffer (vm, bi0);
1238       b1 = vlib_get_buffer (vm, bi1);
1239
1240       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1241         {
1242           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1243           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1244           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1245           t0->fib_index =
1246             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1247              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1248             vec_elt (im->fib_index_by_sw_if_index,
1249                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1250
1251           clib_memcpy_fast (t0->packet_data,
1252                             vlib_buffer_get_current (b0),
1253                             sizeof (t0->packet_data));
1254         }
1255       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1256         {
1257           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1258           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1259           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1260           t1->fib_index =
1261             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1262              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1263             vec_elt (im->fib_index_by_sw_if_index,
1264                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1265           clib_memcpy_fast (t1->packet_data, vlib_buffer_get_current (b1),
1266                             sizeof (t1->packet_data));
1267         }
1268       from += 2;
1269       n_left -= 2;
1270     }
1271
1272   while (n_left >= 1)
1273     {
1274       u32 bi0;
1275       vlib_buffer_t *b0;
1276       ip4_forward_next_trace_t *t0;
1277
1278       bi0 = from[0];
1279
1280       b0 = vlib_get_buffer (vm, bi0);
1281
1282       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1283         {
1284           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1285           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1286           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1287           t0->fib_index =
1288             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1289              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1290             vec_elt (im->fib_index_by_sw_if_index,
1291                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1292           clib_memcpy_fast (t0->packet_data, vlib_buffer_get_current (b0),
1293                             sizeof (t0->packet_data));
1294         }
1295       from += 1;
1296       n_left -= 1;
1297     }
1298 }
1299
1300 /* Compute TCP/UDP/ICMP4 checksum in software. */
1301 u16
1302 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1303                               ip4_header_t * ip0)
1304 {
1305   ip_csum_t sum0;
1306   u32 ip_header_length, payload_length_host_byte_order;
1307
1308   /* Initialize checksum with ip header. */
1309   ip_header_length = ip4_header_bytes (ip0);
1310   payload_length_host_byte_order =
1311     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1312   sum0 =
1313     clib_host_to_net_u32 (payload_length_host_byte_order +
1314                           (ip0->protocol << 16));
1315
1316   if (BITS (uword) == 32)
1317     {
1318       sum0 =
1319         ip_csum_with_carry (sum0,
1320                             clib_mem_unaligned (&ip0->src_address, u32));
1321       sum0 =
1322         ip_csum_with_carry (sum0,
1323                             clib_mem_unaligned (&ip0->dst_address, u32));
1324     }
1325   else
1326     sum0 =
1327       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1328
1329   return ip_calculate_l4_checksum (vm, p0, sum0,
1330                                    payload_length_host_byte_order, (u8 *) ip0,
1331                                    ip_header_length, NULL);
1332 }
1333
1334 u32
1335 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1336 {
1337   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1338   udp_header_t *udp0;
1339   u16 sum16;
1340
1341   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1342           || ip0->protocol == IP_PROTOCOL_UDP);
1343
1344   udp0 = (void *) (ip0 + 1);
1345   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1346     {
1347       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1348                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1349       return p0->flags;
1350     }
1351
1352   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1353
1354   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1355                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1356
1357   return p0->flags;
1358 }
1359 #endif
1360
1361 /* *INDENT-OFF* */
1362 VNET_FEATURE_ARC_INIT (ip4_local) =
1363 {
1364   .arc_name  = "ip4-local",
1365   .start_nodes = VNET_FEATURES ("ip4-local"),
1366   .last_in_arc = "ip4-local-end-of-arc",
1367 };
1368 /* *INDENT-ON* */
1369
1370 static inline void
1371 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1372                             ip4_header_t * ip, u8 is_udp, u8 * error,
1373                             u8 * good_tcp_udp)
1374 {
1375   u32 flags0;
1376   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1377   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1378   if (is_udp)
1379     {
1380       udp_header_t *udp;
1381       u32 ip_len, udp_len;
1382       i32 len_diff;
1383       udp = ip4_next_header (ip);
1384       /* Verify UDP length. */
1385       ip_len = clib_net_to_host_u16 (ip->length);
1386       udp_len = clib_net_to_host_u16 (udp->length);
1387
1388       len_diff = ip_len - udp_len;
1389       *good_tcp_udp &= len_diff >= 0;
1390       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1391     }
1392 }
1393
1394 #define ip4_local_csum_is_offloaded(_b)                                 \
1395     _b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM                         \
1396         || _b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM
1397
1398 #define ip4_local_need_csum_check(is_tcp_udp, _b)                       \
1399     (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED     \
1400         || ip4_local_csum_is_offloaded (_b)))
1401
1402 #define ip4_local_csum_is_valid(_b)                                     \
1403     (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT                      \
1404         || (ip4_local_csum_is_offloaded (_b))) != 0
1405
1406 static inline void
1407 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1408                          ip4_header_t * ih, u8 * error)
1409 {
1410   u8 is_udp, is_tcp_udp, good_tcp_udp;
1411
1412   is_udp = ih->protocol == IP_PROTOCOL_UDP;
1413   is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1414
1415   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1416     ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1417   else
1418     good_tcp_udp = ip4_local_csum_is_valid (b);
1419
1420   ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1421   *error = (is_tcp_udp && !good_tcp_udp
1422             ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1423 }
1424
1425 static inline void
1426 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1427                             ip4_header_t ** ih, u8 * error)
1428 {
1429   u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1430
1431   is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1432   is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1433
1434   is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1435   is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1436
1437   good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1438   good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1439
1440   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1441                      || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1442     {
1443       if (is_tcp_udp[0])
1444         ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1445                                     &good_tcp_udp[0]);
1446       if (is_tcp_udp[1])
1447         ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1448                                     &good_tcp_udp[1]);
1449     }
1450
1451   error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1452               IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1453   error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1454               IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1455 }
1456
1457 static inline void
1458 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1459                               vlib_buffer_t * b, u16 * next, u8 error,
1460                               u8 head_of_feature_arc)
1461 {
1462   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1463   u32 next_index;
1464
1465   *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1466   b->error = error ? error_node->errors[error] : 0;
1467   if (head_of_feature_arc)
1468     {
1469       next_index = *next;
1470       if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1471         {
1472           vnet_feature_arc_start (arc_index,
1473                                   vnet_buffer (b)->sw_if_index[VLIB_RX],
1474                                   &next_index, b);
1475           *next = next_index;
1476         }
1477     }
1478 }
1479
1480 typedef struct
1481 {
1482   ip4_address_t src;
1483   u32 lbi;
1484   u8 error;
1485   u8 first;
1486 } ip4_local_last_check_t;
1487
1488 static inline void
1489 ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
1490                      ip4_local_last_check_t * last_check, u8 * error0)
1491 {
1492   ip4_fib_mtrie_leaf_t leaf0;
1493   ip4_fib_mtrie_t *mtrie0;
1494   const dpo_id_t *dpo0;
1495   load_balance_t *lb0;
1496   u32 lbi0;
1497
1498   vnet_buffer (b)->ip.fib_index =
1499     vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1500     vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1501
1502   /*
1503    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1504    *  adjacency for the destination address (the local interface address).
1505    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1506    *  adjacency for the source address (the remote sender's address)
1507    */
1508   if (PREDICT_TRUE (last_check->src.as_u32 != ip0->src_address.as_u32) ||
1509       last_check->first)
1510     {
1511       mtrie0 = &ip4_fib_get (vnet_buffer (b)->ip.fib_index)->mtrie;
1512       leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1513       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1514       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1515       lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1516
1517       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1518         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1519       vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1520
1521       lb0 = load_balance_get (lbi0);
1522       dpo0 = load_balance_get_bucket_i (lb0, 0);
1523
1524       /*
1525        * Must have a route to source otherwise we drop the packet.
1526        * ip4 broadcasts are accepted, e.g. to make dhcp client work
1527        *
1528        * The checks are:
1529        *  - the source is a recieve => it's from us => bogus, do this
1530        *    first since it sets a different error code.
1531        *  - uRPF check for any route to source - accept if passes.
1532        *  - allow packets destined to the broadcast address from unknown sources
1533        */
1534
1535       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1536                   && dpo0->dpoi_type == DPO_RECEIVE) ?
1537                  IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1538       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1539                   && !fib_urpf_check_size (lb0->lb_urpf)
1540                   && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1541                  IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1542
1543       last_check->src.as_u32 = ip0->src_address.as_u32;
1544       last_check->lbi = lbi0;
1545       last_check->error = *error0;
1546       last_check->first = 0;
1547     }
1548   else
1549     {
1550       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1551         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1552       vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1553       *error0 = last_check->error;
1554     }
1555 }
1556
1557 static inline void
1558 ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
1559                         ip4_local_last_check_t * last_check, u8 * error)
1560 {
1561   ip4_fib_mtrie_leaf_t leaf[2];
1562   ip4_fib_mtrie_t *mtrie[2];
1563   const dpo_id_t *dpo[2];
1564   load_balance_t *lb[2];
1565   u32 not_last_hit;
1566   u32 lbi[2];
1567
1568   not_last_hit = last_check->first;
1569   not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1570   not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1571
1572   vnet_buffer (b[0])->ip.fib_index =
1573     vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1574     vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1575     vnet_buffer (b[0])->ip.fib_index;
1576
1577   vnet_buffer (b[1])->ip.fib_index =
1578     vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1579     vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1580     vnet_buffer (b[1])->ip.fib_index;
1581
1582   /*
1583    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1584    *  adjacency for the destination address (the local interface address).
1585    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1586    *  adjacency for the source address (the remote sender's address)
1587    */
1588   if (PREDICT_TRUE (not_last_hit))
1589     {
1590       mtrie[0] = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie;
1591       mtrie[1] = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie;
1592
1593       leaf[0] = ip4_fib_mtrie_lookup_step_one (mtrie[0], &ip[0]->src_address);
1594       leaf[1] = ip4_fib_mtrie_lookup_step_one (mtrie[1], &ip[1]->src_address);
1595
1596       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1597                                            &ip[0]->src_address, 2);
1598       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1599                                            &ip[1]->src_address, 2);
1600
1601       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1602                                            &ip[0]->src_address, 3);
1603       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1604                                            &ip[1]->src_address, 3);
1605
1606       lbi[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf[0]);
1607       lbi[1] = ip4_fib_mtrie_leaf_get_adj_index (leaf[1]);
1608
1609       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1610         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1611       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1612
1613       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1614         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1615       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1616
1617       lb[0] = load_balance_get (lbi[0]);
1618       lb[1] = load_balance_get (lbi[1]);
1619
1620       dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1621       dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1622
1623       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1624                    dpo[0]->dpoi_type == DPO_RECEIVE) ?
1625                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1626       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1627                    !fib_urpf_check_size (lb[0]->lb_urpf) &&
1628                    ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1629                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1630
1631       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1632                    dpo[1]->dpoi_type == DPO_RECEIVE) ?
1633                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1634       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1635                    !fib_urpf_check_size (lb[1]->lb_urpf) &&
1636                    ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1637                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1638
1639       last_check->src.as_u32 = ip[1]->src_address.as_u32;
1640       last_check->lbi = lbi[1];
1641       last_check->error = error[1];
1642       last_check->first = 0;
1643     }
1644   else
1645     {
1646       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1647         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1648       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1649
1650       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1651         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1652       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1653
1654       error[0] = last_check->error;
1655       error[1] = last_check->error;
1656     }
1657 }
1658
1659 enum ip_local_packet_type_e
1660 {
1661   IP_LOCAL_PACKET_TYPE_L4,
1662   IP_LOCAL_PACKET_TYPE_NAT,
1663   IP_LOCAL_PACKET_TYPE_FRAG,
1664 };
1665
1666 /**
1667  * Determine packet type and next node.
1668  *
1669  * The expectation is that all packets that are not L4 will skip
1670  * checksums and source checks.
1671  */
1672 always_inline u8
1673 ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
1674 {
1675   ip_lookup_main_t *lm = &ip4_main.lookup_main;
1676
1677   if (PREDICT_FALSE (ip4_is_fragment (ip)))
1678     {
1679       *next = IP_LOCAL_NEXT_REASSEMBLY;
1680       return IP_LOCAL_PACKET_TYPE_FRAG;
1681     }
1682   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_IS_NATED))
1683     {
1684       *next = lm->local_next_by_ip_protocol[ip->protocol];
1685       return IP_LOCAL_PACKET_TYPE_NAT;
1686     }
1687
1688   *next = lm->local_next_by_ip_protocol[ip->protocol];
1689   return IP_LOCAL_PACKET_TYPE_L4;
1690 }
1691
1692 static inline uword
1693 ip4_local_inline (vlib_main_t * vm,
1694                   vlib_node_runtime_t * node,
1695                   vlib_frame_t * frame, int head_of_feature_arc)
1696 {
1697   u32 *from, n_left_from;
1698   vlib_node_runtime_t *error_node =
1699     vlib_node_get_runtime (vm, ip4_input_node.index);
1700   u16 nexts[VLIB_FRAME_SIZE], *next;
1701   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1702   ip4_header_t *ip[2];
1703   u8 error[2], pt[2];
1704
1705   ip4_local_last_check_t last_check = {
1706     /*
1707      * 0.0.0.0 can appear as the source address of an IP packet,
1708      * as can any other address, hence the need to use the 'first'
1709      * member to make sure the .lbi is initialised for the first
1710      * packet.
1711      */
1712     .src = {.as_u32 = 0},
1713     .lbi = ~0,
1714     .error = IP4_ERROR_UNKNOWN_PROTOCOL,
1715     .first = 1,
1716   };
1717
1718   from = vlib_frame_vector_args (frame);
1719   n_left_from = frame->n_vectors;
1720
1721   if (node->flags & VLIB_NODE_FLAG_TRACE)
1722     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1723
1724   vlib_get_buffers (vm, from, bufs, n_left_from);
1725   b = bufs;
1726   next = nexts;
1727
1728   while (n_left_from >= 6)
1729     {
1730       u8 not_batch = 0;
1731
1732       /* Prefetch next iteration. */
1733       {
1734         vlib_prefetch_buffer_header (b[4], LOAD);
1735         vlib_prefetch_buffer_header (b[5], LOAD);
1736
1737         CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1738         CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1739       }
1740
1741       error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1742
1743       ip[0] = vlib_buffer_get_current (b[0]);
1744       ip[1] = vlib_buffer_get_current (b[1]);
1745
1746       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1747       vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1748
1749       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1750       pt[1] = ip4_local_classify (b[1], ip[1], &next[1]);
1751
1752       not_batch = pt[0] ^ pt[1];
1753
1754       if (head_of_feature_arc == 0 || (pt[0] && not_batch == 0))
1755         goto skip_checks;
1756
1757       if (PREDICT_TRUE (not_batch == 0))
1758         {
1759           ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1760           ip4_local_check_src_x2 (b, ip, &last_check, error);
1761         }
1762       else
1763         {
1764           if (!pt[0])
1765             {
1766               ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1767               ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1768             }
1769           if (!pt[1])
1770             {
1771               ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1772               ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
1773             }
1774         }
1775
1776     skip_checks:
1777
1778       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1779                                     head_of_feature_arc);
1780       ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1781                                     head_of_feature_arc);
1782
1783       b += 2;
1784       next += 2;
1785       n_left_from -= 2;
1786     }
1787
1788   while (n_left_from > 0)
1789     {
1790       error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1791
1792       ip[0] = vlib_buffer_get_current (b[0]);
1793       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1794       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1795
1796       if (head_of_feature_arc == 0 || pt[0])
1797         goto skip_check;
1798
1799       ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1800       ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1801
1802     skip_check:
1803
1804       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1805                                     head_of_feature_arc);
1806
1807       b += 1;
1808       next += 1;
1809       n_left_from -= 1;
1810     }
1811
1812   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1813   return frame->n_vectors;
1814 }
1815
1816 VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1817                                vlib_frame_t * frame)
1818 {
1819   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1820 }
1821
1822 /* *INDENT-OFF* */
1823 VLIB_REGISTER_NODE (ip4_local_node) =
1824 {
1825   .name = "ip4-local",
1826   .vector_size = sizeof (u32),
1827   .format_trace = format_ip4_forward_next_trace,
1828   .n_next_nodes = IP_LOCAL_N_NEXT,
1829   .next_nodes =
1830   {
1831     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1832     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1833     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1834     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1835     [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-full-reassembly",
1836   },
1837 };
1838 /* *INDENT-ON* */
1839
1840
1841 VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
1842                                           vlib_node_runtime_t * node,
1843                                           vlib_frame_t * frame)
1844 {
1845   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1846 }
1847
1848 /* *INDENT-OFF* */
1849 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
1850   .name = "ip4-local-end-of-arc",
1851   .vector_size = sizeof (u32),
1852
1853   .format_trace = format_ip4_forward_next_trace,
1854   .sibling_of = "ip4-local",
1855 };
1856
1857 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1858   .arc_name = "ip4-local",
1859   .node_name = "ip4-local-end-of-arc",
1860   .runs_before = 0, /* not before any other features */
1861 };
1862 /* *INDENT-ON* */
1863
1864 #ifndef CLIB_MARCH_VARIANT
1865 void
1866 ip4_register_protocol (u32 protocol, u32 node_index)
1867 {
1868   vlib_main_t *vm = vlib_get_main ();
1869   ip4_main_t *im = &ip4_main;
1870   ip_lookup_main_t *lm = &im->lookup_main;
1871
1872   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1873   lm->local_next_by_ip_protocol[protocol] =
1874     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1875 }
1876
1877 void
1878 ip4_unregister_protocol (u32 protocol)
1879 {
1880   ip4_main_t *im = &ip4_main;
1881   ip_lookup_main_t *lm = &im->lookup_main;
1882
1883   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1884   lm->local_next_by_ip_protocol[protocol] = IP_LOCAL_NEXT_PUNT;
1885 }
1886 #endif
1887
1888 static clib_error_t *
1889 show_ip_local_command_fn (vlib_main_t * vm,
1890                           unformat_input_t * input, vlib_cli_command_t * cmd)
1891 {
1892   ip4_main_t *im = &ip4_main;
1893   ip_lookup_main_t *lm = &im->lookup_main;
1894   int i;
1895
1896   vlib_cli_output (vm, "Protocols handled by ip4_local");
1897   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1898     {
1899       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1900         {
1901           u32 node_index = vlib_get_node (vm,
1902                                           ip4_local_node.index)->
1903             next_nodes[lm->local_next_by_ip_protocol[i]];
1904           vlib_cli_output (vm, "%U: %U", format_ip_protocol, i,
1905                            format_vlib_node_name, vm, node_index);
1906         }
1907     }
1908   return 0;
1909 }
1910
1911
1912
1913 /*?
1914  * Display the set of protocols handled by the local IPv4 stack.
1915  *
1916  * @cliexpar
1917  * Example of how to display local protocol table:
1918  * @cliexstart{show ip local}
1919  * Protocols handled by ip4_local
1920  * 1
1921  * 17
1922  * 47
1923  * @cliexend
1924 ?*/
1925 /* *INDENT-OFF* */
1926 VLIB_CLI_COMMAND (show_ip_local, static) =
1927 {
1928   .path = "show ip local",
1929   .function = show_ip_local_command_fn,
1930   .short_help = "show ip local",
1931 };
1932 /* *INDENT-ON* */
1933
1934 typedef enum
1935 {
1936   IP4_REWRITE_NEXT_DROP,
1937   IP4_REWRITE_NEXT_ICMP_ERROR,
1938   IP4_REWRITE_NEXT_FRAGMENT,
1939   IP4_REWRITE_N_NEXT            /* Last */
1940 } ip4_rewrite_next_t;
1941
1942 /**
1943  * This bits of an IPv4 address to mask to construct a multicast
1944  * MAC address
1945  */
1946 #if CLIB_ARCH_IS_BIG_ENDIAN
1947 #define IP4_MCAST_ADDR_MASK 0x007fffff
1948 #else
1949 #define IP4_MCAST_ADDR_MASK 0xffff7f00
1950 #endif
1951
1952 always_inline void
1953 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
1954                u16 adj_packet_bytes, bool df, u16 * next,
1955                u8 is_midchain, u32 * error)
1956 {
1957   if (packet_len > adj_packet_bytes)
1958     {
1959       *error = IP4_ERROR_MTU_EXCEEDED;
1960       if (df)
1961         {
1962           icmp4_error_set_vnet_buffer
1963             (b, ICMP4_destination_unreachable,
1964              ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
1965              adj_packet_bytes);
1966           *next = IP4_REWRITE_NEXT_ICMP_ERROR;
1967         }
1968       else
1969         {
1970           /* IP fragmentation */
1971           ip_frag_set_vnet_buffer (b, adj_packet_bytes,
1972                                    (is_midchain ?
1973                                     IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN :
1974                                     IP_FRAG_NEXT_IP_REWRITE), 0);
1975           *next = IP4_REWRITE_NEXT_FRAGMENT;
1976         }
1977     }
1978 }
1979
1980 /* increment TTL & update checksum.
1981    Works either endian, so no need for byte swap. */
1982 static_always_inline void
1983 ip4_ttl_inc (vlib_buffer_t * b, ip4_header_t * ip)
1984 {
1985   i32 ttl;
1986   u32 checksum;
1987   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
1988     {
1989       b->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
1990       return;
1991     }
1992
1993   ttl = ip->ttl;
1994
1995   checksum = ip->checksum - clib_host_to_net_u16 (0x0100);
1996   checksum += checksum >= 0xffff;
1997
1998   ip->checksum = checksum;
1999   ttl += 1;
2000   ip->ttl = ttl;
2001
2002   ASSERT (ip->checksum == ip4_header_checksum (ip));
2003 }
2004
2005 /* Decrement TTL & update checksum.
2006    Works either endian, so no need for byte swap. */
2007 static_always_inline void
2008 ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
2009                             u32 * error)
2010 {
2011   i32 ttl;
2012   u32 checksum;
2013   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2014     {
2015       b->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2016       return;
2017     }
2018
2019   ttl = ip->ttl;
2020
2021   /* Input node should have reject packets with ttl 0. */
2022   ASSERT (ip->ttl > 0);
2023
2024   checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
2025   checksum += checksum >= 0xffff;
2026
2027   ip->checksum = checksum;
2028   ttl -= 1;
2029   ip->ttl = ttl;
2030
2031   /*
2032    * If the ttl drops below 1 when forwarding, generate
2033    * an ICMP response.
2034    */
2035   if (PREDICT_FALSE (ttl <= 0))
2036     {
2037       *error = IP4_ERROR_TIME_EXPIRED;
2038       vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2039       icmp4_error_set_vnet_buffer (b, ICMP4_time_exceeded,
2040                                    ICMP4_time_exceeded_ttl_exceeded_in_transit,
2041                                    0);
2042       *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2043     }
2044
2045   /* Verify checksum. */
2046   ASSERT ((ip->checksum == ip4_header_checksum (ip)) ||
2047           (b->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2048 }
2049
2050
2051 always_inline uword
2052 ip4_rewrite_inline_with_gso (vlib_main_t * vm,
2053                              vlib_node_runtime_t * node,
2054                              vlib_frame_t * frame,
2055                              int do_counters, int is_midchain, int is_mcast)
2056 {
2057   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2058   u32 *from = vlib_frame_vector_args (frame);
2059   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2060   u16 nexts[VLIB_FRAME_SIZE], *next;
2061   u32 n_left_from;
2062   vlib_node_runtime_t *error_node =
2063     vlib_node_get_runtime (vm, ip4_input_node.index);
2064
2065   n_left_from = frame->n_vectors;
2066   u32 thread_index = vm->thread_index;
2067
2068   vlib_get_buffers (vm, from, bufs, n_left_from);
2069   clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
2070
2071 #if (CLIB_N_PREFETCHES >= 8)
2072   if (n_left_from >= 6)
2073     {
2074       int i;
2075       for (i = 2; i < 6; i++)
2076         vlib_prefetch_buffer_header (bufs[i], LOAD);
2077     }
2078
2079   next = nexts;
2080   b = bufs;
2081   while (n_left_from >= 8)
2082     {
2083       const ip_adjacency_t *adj0, *adj1;
2084       ip4_header_t *ip0, *ip1;
2085       u32 rw_len0, error0, adj_index0;
2086       u32 rw_len1, error1, adj_index1;
2087       u32 tx_sw_if_index0, tx_sw_if_index1;
2088       u8 *p;
2089
2090       vlib_prefetch_buffer_header (b[6], LOAD);
2091       vlib_prefetch_buffer_header (b[7], LOAD);
2092
2093       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2094       adj_index1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
2095
2096       /*
2097        * pre-fetch the per-adjacency counters
2098        */
2099       if (do_counters)
2100         {
2101           vlib_prefetch_combined_counter (&adjacency_counters,
2102                                           thread_index, adj_index0);
2103           vlib_prefetch_combined_counter (&adjacency_counters,
2104                                           thread_index, adj_index1);
2105         }
2106
2107       ip0 = vlib_buffer_get_current (b[0]);
2108       ip1 = vlib_buffer_get_current (b[1]);
2109
2110       error0 = error1 = IP4_ERROR_NONE;
2111
2112       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2113       ip4_ttl_and_checksum_check (b[1], ip1, next + 1, &error1);
2114
2115       /* Rewrite packet header and updates lengths. */
2116       adj0 = adj_get (adj_index0);
2117       adj1 = adj_get (adj_index1);
2118
2119       /* Worth pipelining. No guarantee that adj0,1 are hot... */
2120       rw_len0 = adj0[0].rewrite_header.data_bytes;
2121       rw_len1 = adj1[0].rewrite_header.data_bytes;
2122       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2123       vnet_buffer (b[1])->ip.save_rewrite_length = rw_len1;
2124
2125       p = vlib_buffer_get_current (b[2]);
2126       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2127       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2128
2129       p = vlib_buffer_get_current (b[3]);
2130       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2131       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2132
2133       /* Check MTU of outgoing interface. */
2134       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2135       u16 ip1_len = clib_net_to_host_u16 (ip1->length);
2136
2137       if (b[0]->flags & VNET_BUFFER_F_GSO)
2138         ip0_len = gso_mtu_sz (b[0]);
2139       if (b[1]->flags & VNET_BUFFER_F_GSO)
2140         ip1_len = gso_mtu_sz (b[1]);
2141
2142       ip4_mtu_check (b[0], ip0_len,
2143                      adj0[0].rewrite_header.max_l3_packet_bytes,
2144                      ip0->flags_and_fragment_offset &
2145                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2146                      next + 0, is_midchain, &error0);
2147       ip4_mtu_check (b[1], ip1_len,
2148                      adj1[0].rewrite_header.max_l3_packet_bytes,
2149                      ip1->flags_and_fragment_offset &
2150                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2151                      next + 1, is_midchain, &error1);
2152
2153       if (is_mcast)
2154         {
2155           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2156                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2157                     IP4_ERROR_SAME_INTERFACE : error0);
2158           error1 = ((adj1[0].rewrite_header.sw_if_index ==
2159                      vnet_buffer (b[1])->sw_if_index[VLIB_RX]) ?
2160                     IP4_ERROR_SAME_INTERFACE : error1);
2161         }
2162
2163       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2164        * to see the IP header */
2165       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2166         {
2167           u32 next_index = adj0[0].rewrite_header.next_index;
2168           vlib_buffer_advance (b[0], -(word) rw_len0);
2169
2170           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2171           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2172
2173           if (PREDICT_FALSE
2174               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2175             vnet_feature_arc_start (lm->output_feature_arc_index,
2176                                     tx_sw_if_index0, &next_index, b[0]);
2177           next[0] = next_index;
2178           if (is_midchain)
2179             calc_checksums (vm, b[0]);
2180         }
2181       else
2182         {
2183           b[0]->error = error_node->errors[error0];
2184           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2185             ip4_ttl_inc (b[0], ip0);
2186         }
2187       if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2188         {
2189           u32 next_index = adj1[0].rewrite_header.next_index;
2190           vlib_buffer_advance (b[1], -(word) rw_len1);
2191
2192           tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2193           vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2194
2195           if (PREDICT_FALSE
2196               (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2197             vnet_feature_arc_start (lm->output_feature_arc_index,
2198                                     tx_sw_if_index1, &next_index, b[1]);
2199           next[1] = next_index;
2200           if (is_midchain)
2201             calc_checksums (vm, b[1]);
2202         }
2203       else
2204         {
2205           b[1]->error = error_node->errors[error1];
2206           if (error1 == IP4_ERROR_MTU_EXCEEDED)
2207             ip4_ttl_inc (b[1], ip1);
2208         }
2209
2210       /* Guess we are only writing on simple Ethernet header. */
2211       vnet_rewrite_two_headers (adj0[0], adj1[0],
2212                                 ip0, ip1, sizeof (ethernet_header_t));
2213
2214       if (do_counters)
2215         {
2216           if (error0 == IP4_ERROR_NONE)
2217             vlib_increment_combined_counter
2218               (&adjacency_counters,
2219                thread_index,
2220                adj_index0, 1,
2221                vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2222
2223           if (error1 == IP4_ERROR_NONE)
2224             vlib_increment_combined_counter
2225               (&adjacency_counters,
2226                thread_index,
2227                adj_index1, 1,
2228                vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
2229         }
2230
2231       if (is_midchain)
2232         {
2233           if (error0 == IP4_ERROR_NONE && adj0->sub_type.midchain.fixup_func)
2234             adj0->sub_type.midchain.fixup_func
2235               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2236           if (error1 == IP4_ERROR_NONE && adj1->sub_type.midchain.fixup_func)
2237             adj1->sub_type.midchain.fixup_func
2238               (vm, adj1, b[1], adj1->sub_type.midchain.fixup_data);
2239         }
2240
2241       if (is_mcast)
2242         {
2243           /* copy bytes from the IP address into the MAC rewrite */
2244           if (error0 == IP4_ERROR_NONE)
2245             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2246                                         adj0->rewrite_header.dst_mcast_offset,
2247                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2248           if (error1 == IP4_ERROR_NONE)
2249             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2250                                         adj1->rewrite_header.dst_mcast_offset,
2251                                         &ip1->dst_address.as_u32, (u8 *) ip1);
2252         }
2253
2254       next += 2;
2255       b += 2;
2256       n_left_from -= 2;
2257     }
2258 #elif (CLIB_N_PREFETCHES >= 4)
2259   next = nexts;
2260   b = bufs;
2261   while (n_left_from >= 1)
2262     {
2263       ip_adjacency_t *adj0;
2264       ip4_header_t *ip0;
2265       u32 rw_len0, error0, adj_index0;
2266       u32 tx_sw_if_index0;
2267       u8 *p;
2268
2269       /* Prefetch next iteration */
2270       if (PREDICT_TRUE (n_left_from >= 4))
2271         {
2272           ip_adjacency_t *adj2;
2273           u32 adj_index2;
2274
2275           vlib_prefetch_buffer_header (b[3], LOAD);
2276           vlib_prefetch_buffer_data (b[2], LOAD);
2277
2278           /* Prefetch adj->rewrite_header */
2279           adj_index2 = vnet_buffer (b[2])->ip.adj_index[VLIB_TX];
2280           adj2 = adj_get (adj_index2);
2281           p = (u8 *) adj2;
2282           CLIB_PREFETCH (p + CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES,
2283                          LOAD);
2284         }
2285
2286       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2287
2288       /*
2289        * Prefetch the per-adjacency counters
2290        */
2291       if (do_counters)
2292         {
2293           vlib_prefetch_combined_counter (&adjacency_counters,
2294                                           thread_index, adj_index0);
2295         }
2296
2297       ip0 = vlib_buffer_get_current (b[0]);
2298
2299       error0 = IP4_ERROR_NONE;
2300
2301       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2302
2303       /* Rewrite packet header and updates lengths. */
2304       adj0 = adj_get (adj_index0);
2305
2306       /* Rewrite header was prefetched. */
2307       rw_len0 = adj0[0].rewrite_header.data_bytes;
2308       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2309
2310       /* Check MTU of outgoing interface. */
2311       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2312
2313       if (b[0]->flags & VNET_BUFFER_F_GSO)
2314         ip0_len = gso_mtu_sz (b[0]);
2315
2316       ip4_mtu_check (b[0], ip0_len,
2317                      adj0[0].rewrite_header.max_l3_packet_bytes,
2318                      ip0->flags_and_fragment_offset &
2319                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2320                      next + 0, is_midchain, &error0);
2321
2322       if (is_mcast)
2323         {
2324           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2325                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2326                     IP4_ERROR_SAME_INTERFACE : error0);
2327         }
2328
2329       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2330        * to see the IP header */
2331       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2332         {
2333           u32 next_index = adj0[0].rewrite_header.next_index;
2334           vlib_buffer_advance (b[0], -(word) rw_len0);
2335           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2336           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2337
2338           if (PREDICT_FALSE
2339               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2340             vnet_feature_arc_start (lm->output_feature_arc_index,
2341                                     tx_sw_if_index0, &next_index, b[0]);
2342           next[0] = next_index;
2343
2344           if (is_midchain)
2345             calc_checksums (vm, b[0]);
2346
2347           /* Guess we are only writing on simple Ethernet header. */
2348           vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2349
2350           /*
2351            * Bump the per-adjacency counters
2352            */
2353           if (do_counters)
2354             vlib_increment_combined_counter
2355               (&adjacency_counters,
2356                thread_index,
2357                adj_index0, 1, vlib_buffer_length_in_chain (vm,
2358                                                            b[0]) + rw_len0);
2359
2360           if (is_midchain && adj0->sub_type.midchain.fixup_func)
2361             adj0->sub_type.midchain.fixup_func
2362               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2363
2364           if (is_mcast)
2365             /* copy bytes from the IP address into the MAC rewrite */
2366             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2367                                         adj0->rewrite_header.dst_mcast_offset,
2368                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2369         }
2370       else
2371         {
2372           b[0]->error = error_node->errors[error0];
2373           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2374             ip4_ttl_inc (b[0], ip0);
2375         }
2376
2377       next += 1;
2378       b += 1;
2379       n_left_from -= 1;
2380     }
2381 #endif
2382
2383   while (n_left_from > 0)
2384     {
2385       ip_adjacency_t *adj0;
2386       ip4_header_t *ip0;
2387       u32 rw_len0, adj_index0, error0;
2388       u32 tx_sw_if_index0;
2389
2390       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2391
2392       adj0 = adj_get (adj_index0);
2393
2394       if (do_counters)
2395         vlib_prefetch_combined_counter (&adjacency_counters,
2396                                         thread_index, adj_index0);
2397
2398       ip0 = vlib_buffer_get_current (b[0]);
2399
2400       error0 = IP4_ERROR_NONE;
2401
2402       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2403
2404
2405       /* Update packet buffer attributes/set output interface. */
2406       rw_len0 = adj0[0].rewrite_header.data_bytes;
2407       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2408
2409       /* Check MTU of outgoing interface. */
2410       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2411       if (b[0]->flags & VNET_BUFFER_F_GSO)
2412         ip0_len = gso_mtu_sz (b[0]);
2413
2414       ip4_mtu_check (b[0], ip0_len,
2415                      adj0[0].rewrite_header.max_l3_packet_bytes,
2416                      ip0->flags_and_fragment_offset &
2417                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2418                      next + 0, is_midchain, &error0);
2419
2420       if (is_mcast)
2421         {
2422           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2423                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2424                     IP4_ERROR_SAME_INTERFACE : error0);
2425         }
2426
2427       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2428        * to see the IP header */
2429       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2430         {
2431           u32 next_index = adj0[0].rewrite_header.next_index;
2432           vlib_buffer_advance (b[0], -(word) rw_len0);
2433           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2434           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2435
2436           if (PREDICT_FALSE
2437               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2438             vnet_feature_arc_start (lm->output_feature_arc_index,
2439                                     tx_sw_if_index0, &next_index, b[0]);
2440           next[0] = next_index;
2441
2442           if (is_midchain)
2443             /* this acts on the packet that is about to be encapped */
2444             calc_checksums (vm, b[0]);
2445
2446           /* Guess we are only writing on simple Ethernet header. */
2447           vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2448
2449           if (do_counters)
2450             vlib_increment_combined_counter
2451               (&adjacency_counters,
2452                thread_index, adj_index0, 1,
2453                vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2454
2455           if (is_midchain && adj0->sub_type.midchain.fixup_func)
2456             adj0->sub_type.midchain.fixup_func
2457               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2458
2459           if (is_mcast)
2460             /* copy bytes from the IP address into the MAC rewrite */
2461             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2462                                         adj0->rewrite_header.dst_mcast_offset,
2463                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2464         }
2465       else
2466         {
2467           b[0]->error = error_node->errors[error0];
2468           /* undo the TTL decrement - we'll be back to do it again */
2469           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2470             ip4_ttl_inc (b[0], ip0);
2471         }
2472
2473       next += 1;
2474       b += 1;
2475       n_left_from -= 1;
2476     }
2477
2478
2479   /* Need to do trace after rewrites to pick up new packet data. */
2480   if (node->flags & VLIB_NODE_FLAG_TRACE)
2481     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2482
2483   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2484   return frame->n_vectors;
2485 }
2486
2487 always_inline uword
2488 ip4_rewrite_inline (vlib_main_t * vm,
2489                     vlib_node_runtime_t * node,
2490                     vlib_frame_t * frame,
2491                     int do_counters, int is_midchain, int is_mcast)
2492 {
2493   return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2494                                       is_midchain, is_mcast);
2495 }
2496
2497
2498 /** @brief IPv4 rewrite node.
2499     @node ip4-rewrite
2500
2501     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2502     header checksum, fetch the ip adjacency, check the outbound mtu,
2503     apply the adjacency rewrite, and send pkts to the adjacency
2504     rewrite header's rewrite_next_index.
2505
2506     @param vm vlib_main_t corresponding to the current thread
2507     @param node vlib_node_runtime_t
2508     @param frame vlib_frame_t whose contents should be dispatched
2509
2510     @par Graph mechanics: buffer metadata, next index usage
2511
2512     @em Uses:
2513     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2514         - the rewrite adjacency index
2515     - <code>adj->lookup_next_index</code>
2516         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2517           the packet will be dropped.
2518     - <code>adj->rewrite_header</code>
2519         - Rewrite string length, rewrite string, next_index
2520
2521     @em Sets:
2522     - <code>b->current_data, b->current_length</code>
2523         - Updated net of applying the rewrite string
2524
2525     <em>Next Indices:</em>
2526     - <code> adj->rewrite_header.next_index </code>
2527       or @c ip4-drop
2528 */
2529
2530 VLIB_NODE_FN (ip4_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2531                                  vlib_frame_t * frame)
2532 {
2533   if (adj_are_counters_enabled ())
2534     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2535   else
2536     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2537 }
2538
2539 VLIB_NODE_FN (ip4_rewrite_bcast_node) (vlib_main_t * vm,
2540                                        vlib_node_runtime_t * node,
2541                                        vlib_frame_t * frame)
2542 {
2543   if (adj_are_counters_enabled ())
2544     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2545   else
2546     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2547 }
2548
2549 VLIB_NODE_FN (ip4_midchain_node) (vlib_main_t * vm,
2550                                   vlib_node_runtime_t * node,
2551                                   vlib_frame_t * frame)
2552 {
2553   if (adj_are_counters_enabled ())
2554     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2555   else
2556     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2557 }
2558
2559 VLIB_NODE_FN (ip4_rewrite_mcast_node) (vlib_main_t * vm,
2560                                        vlib_node_runtime_t * node,
2561                                        vlib_frame_t * frame)
2562 {
2563   if (adj_are_counters_enabled ())
2564     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2565   else
2566     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2567 }
2568
2569 VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
2570                                         vlib_node_runtime_t * node,
2571                                         vlib_frame_t * frame)
2572 {
2573   if (adj_are_counters_enabled ())
2574     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2575   else
2576     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2577 }
2578
2579 /* *INDENT-OFF* */
2580 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2581   .name = "ip4-rewrite",
2582   .vector_size = sizeof (u32),
2583
2584   .format_trace = format_ip4_rewrite_trace,
2585
2586   .n_next_nodes = IP4_REWRITE_N_NEXT,
2587   .next_nodes = {
2588     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2589     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2590     [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2591   },
2592 };
2593
2594 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2595   .name = "ip4-rewrite-bcast",
2596   .vector_size = sizeof (u32),
2597
2598   .format_trace = format_ip4_rewrite_trace,
2599   .sibling_of = "ip4-rewrite",
2600 };
2601
2602 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2603   .name = "ip4-rewrite-mcast",
2604   .vector_size = sizeof (u32),
2605
2606   .format_trace = format_ip4_rewrite_trace,
2607   .sibling_of = "ip4-rewrite",
2608 };
2609
2610 VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
2611   .name = "ip4-mcast-midchain",
2612   .vector_size = sizeof (u32),
2613
2614   .format_trace = format_ip4_rewrite_trace,
2615   .sibling_of = "ip4-rewrite",
2616 };
2617
2618 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2619   .name = "ip4-midchain",
2620   .vector_size = sizeof (u32),
2621   .format_trace = format_ip4_rewrite_trace,
2622   .sibling_of = "ip4-rewrite",
2623 };
2624 /* *INDENT-ON */
2625
2626 static int
2627 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2628 {
2629   ip4_fib_mtrie_t *mtrie0;
2630   ip4_fib_mtrie_leaf_t leaf0;
2631   u32 lbi0;
2632
2633   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2634
2635   leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2636   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2637   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2638
2639   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2640
2641   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2642 }
2643
2644 static clib_error_t *
2645 test_lookup_command_fn (vlib_main_t * vm,
2646                         unformat_input_t * input, vlib_cli_command_t * cmd)
2647 {
2648   ip4_fib_t *fib;
2649   u32 table_id = 0;
2650   f64 count = 1;
2651   u32 n;
2652   int i;
2653   ip4_address_t ip4_base_address;
2654   u64 errors = 0;
2655
2656   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2657     {
2658       if (unformat (input, "table %d", &table_id))
2659         {
2660           /* Make sure the entry exists. */
2661           fib = ip4_fib_get (table_id);
2662           if ((fib) && (fib->index != table_id))
2663             return clib_error_return (0, "<fib-index> %d does not exist",
2664                                       table_id);
2665         }
2666       else if (unformat (input, "count %f", &count))
2667         ;
2668
2669       else if (unformat (input, "%U",
2670                          unformat_ip4_address, &ip4_base_address))
2671         ;
2672       else
2673         return clib_error_return (0, "unknown input `%U'",
2674                                   format_unformat_error, input);
2675     }
2676
2677   n = count;
2678
2679   for (i = 0; i < n; i++)
2680     {
2681       if (!ip4_lookup_validate (&ip4_base_address, table_id))
2682         errors++;
2683
2684       ip4_base_address.as_u32 =
2685         clib_host_to_net_u32 (1 +
2686                               clib_net_to_host_u32 (ip4_base_address.as_u32));
2687     }
2688
2689   if (errors)
2690     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2691   else
2692     vlib_cli_output (vm, "No errors in %d lookups\n", n);
2693
2694   return 0;
2695 }
2696
2697 /*?
2698  * Perform a lookup of an IPv4 Address (or range of addresses) in the
2699  * given FIB table to determine if there is a conflict with the
2700  * adjacency table. The fib-id can be determined by using the
2701  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2702  * of 0 is used.
2703  *
2704  * @todo This command uses fib-id, other commands use table-id (not
2705  * just a name, they are different indexes). Would like to change this
2706  * to table-id for consistency.
2707  *
2708  * @cliexpar
2709  * Example of how to run the test lookup command:
2710  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2711  * No errors in 2 lookups
2712  * @cliexend
2713 ?*/
2714 /* *INDENT-OFF* */
2715 VLIB_CLI_COMMAND (lookup_test_command, static) =
2716 {
2717   .path = "test lookup",
2718   .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2719   .function = test_lookup_command_fn,
2720 };
2721 /* *INDENT-ON* */
2722
2723 #ifndef CLIB_MARCH_VARIANT
2724 int
2725 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2726 {
2727   u32 fib_index;
2728
2729   fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
2730
2731   if (~0 == fib_index)
2732     return VNET_API_ERROR_NO_SUCH_FIB;
2733
2734   fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
2735                                   flow_hash_config);
2736
2737   return 0;
2738 }
2739 #endif
2740
2741 static clib_error_t *
2742 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2743                              unformat_input_t * input,
2744                              vlib_cli_command_t * cmd)
2745 {
2746   int matched = 0;
2747   u32 table_id = 0;
2748   u32 flow_hash_config = 0;
2749   int rv;
2750
2751   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2752     {
2753       if (unformat (input, "table %d", &table_id))
2754         matched = 1;
2755 #define _(a,v) \
2756     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2757       foreach_flow_hash_bit
2758 #undef _
2759         else
2760         break;
2761     }
2762
2763   if (matched == 0)
2764     return clib_error_return (0, "unknown input `%U'",
2765                               format_unformat_error, input);
2766
2767   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
2768   switch (rv)
2769     {
2770     case 0:
2771       break;
2772
2773     case VNET_API_ERROR_NO_SUCH_FIB:
2774       return clib_error_return (0, "no such FIB table %d", table_id);
2775
2776     default:
2777       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2778       break;
2779     }
2780
2781   return 0;
2782 }
2783
2784 /*?
2785  * Configure the set of IPv4 fields used by the flow hash.
2786  *
2787  * @cliexpar
2788  * Example of how to set the flow hash on a given table:
2789  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2790  * Example of display the configured flow hash:
2791  * @cliexstart{show ip fib}
2792  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2793  * 0.0.0.0/0
2794  *   unicast-ip4-chain
2795  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2796  *     [0] [@0]: dpo-drop ip6
2797  * 0.0.0.0/32
2798  *   unicast-ip4-chain
2799  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2800  *     [0] [@0]: dpo-drop ip6
2801  * 224.0.0.0/8
2802  *   unicast-ip4-chain
2803  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2804  *     [0] [@0]: dpo-drop ip6
2805  * 6.0.1.2/32
2806  *   unicast-ip4-chain
2807  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2808  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2809  * 7.0.0.1/32
2810  *   unicast-ip4-chain
2811  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
2812  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2813  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2814  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2815  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2816  * 240.0.0.0/8
2817  *   unicast-ip4-chain
2818  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
2819  *     [0] [@0]: dpo-drop ip6
2820  * 255.255.255.255/32
2821  *   unicast-ip4-chain
2822  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
2823  *     [0] [@0]: dpo-drop ip6
2824  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
2825  * 0.0.0.0/0
2826  *   unicast-ip4-chain
2827  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
2828  *     [0] [@0]: dpo-drop ip6
2829  * 0.0.0.0/32
2830  *   unicast-ip4-chain
2831  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
2832  *     [0] [@0]: dpo-drop ip6
2833  * 172.16.1.0/24
2834  *   unicast-ip4-chain
2835  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
2836  *     [0] [@4]: ipv4-glean: af_packet0
2837  * 172.16.1.1/32
2838  *   unicast-ip4-chain
2839  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
2840  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
2841  * 172.16.1.2/32
2842  *   unicast-ip4-chain
2843  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2844  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
2845  * 172.16.2.0/24
2846  *   unicast-ip4-chain
2847  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
2848  *     [0] [@4]: ipv4-glean: af_packet1
2849  * 172.16.2.1/32
2850  *   unicast-ip4-chain
2851  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
2852  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
2853  * 224.0.0.0/8
2854  *   unicast-ip4-chain
2855  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
2856  *     [0] [@0]: dpo-drop ip6
2857  * 240.0.0.0/8
2858  *   unicast-ip4-chain
2859  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
2860  *     [0] [@0]: dpo-drop ip6
2861  * 255.255.255.255/32
2862  *   unicast-ip4-chain
2863  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
2864  *     [0] [@0]: dpo-drop ip6
2865  * @cliexend
2866 ?*/
2867 /* *INDENT-OFF* */
2868 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
2869 {
2870   .path = "set ip flow-hash",
2871   .short_help =
2872   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
2873   .function = set_ip_flow_hash_command_fn,
2874 };
2875 /* *INDENT-ON* */
2876
2877 #ifndef CLIB_MARCH_VARIANT
2878 int
2879 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2880                              u32 table_index)
2881 {
2882   vnet_main_t *vnm = vnet_get_main ();
2883   vnet_interface_main_t *im = &vnm->interface_main;
2884   ip4_main_t *ipm = &ip4_main;
2885   ip_lookup_main_t *lm = &ipm->lookup_main;
2886   vnet_classify_main_t *cm = &vnet_classify_main;
2887   ip4_address_t *if_addr;
2888
2889   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2890     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2891
2892   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2893     return VNET_API_ERROR_NO_SUCH_ENTRY;
2894
2895   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2896   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
2897
2898   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
2899
2900   if (NULL != if_addr)
2901     {
2902       fib_prefix_t pfx = {
2903         .fp_len = 32,
2904         .fp_proto = FIB_PROTOCOL_IP4,
2905         .fp_addr.ip4 = *if_addr,
2906       };
2907       u32 fib_index;
2908
2909       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2910                                                        sw_if_index);
2911
2912
2913       if (table_index != (u32) ~ 0)
2914         {
2915           dpo_id_t dpo = DPO_INVALID;
2916
2917           dpo_set (&dpo,
2918                    DPO_CLASSIFY,
2919                    DPO_PROTO_IP4,
2920                    classify_dpo_create (DPO_PROTO_IP4, table_index));
2921
2922           fib_table_entry_special_dpo_add (fib_index,
2923                                            &pfx,
2924                                            FIB_SOURCE_CLASSIFY,
2925                                            FIB_ENTRY_FLAG_NONE, &dpo);
2926           dpo_reset (&dpo);
2927         }
2928       else
2929         {
2930           fib_table_entry_special_remove (fib_index,
2931                                           &pfx, FIB_SOURCE_CLASSIFY);
2932         }
2933     }
2934
2935   return 0;
2936 }
2937 #endif
2938
2939 static clib_error_t *
2940 set_ip_classify_command_fn (vlib_main_t * vm,
2941                             unformat_input_t * input,
2942                             vlib_cli_command_t * cmd)
2943 {
2944   u32 table_index = ~0;
2945   int table_index_set = 0;
2946   u32 sw_if_index = ~0;
2947   int rv;
2948
2949   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2950     {
2951       if (unformat (input, "table-index %d", &table_index))
2952         table_index_set = 1;
2953       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
2954                          vnet_get_main (), &sw_if_index))
2955         ;
2956       else
2957         break;
2958     }
2959
2960   if (table_index_set == 0)
2961     return clib_error_return (0, "classify table-index must be specified");
2962
2963   if (sw_if_index == ~0)
2964     return clib_error_return (0, "interface / subif must be specified");
2965
2966   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
2967
2968   switch (rv)
2969     {
2970     case 0:
2971       break;
2972
2973     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
2974       return clib_error_return (0, "No such interface");
2975
2976     case VNET_API_ERROR_NO_SUCH_ENTRY:
2977       return clib_error_return (0, "No such classifier table");
2978     }
2979   return 0;
2980 }
2981
2982 /*?
2983  * Assign a classification table to an interface. The classification
2984  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
2985  * commands. Once the table is create, use this command to filter packets
2986  * on an interface.
2987  *
2988  * @cliexpar
2989  * Example of how to assign a classification table to an interface:
2990  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
2991 ?*/
2992 /* *INDENT-OFF* */
2993 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
2994 {
2995     .path = "set ip classify",
2996     .short_help =
2997     "set ip classify intfc <interface> table-index <classify-idx>",
2998     .function = set_ip_classify_command_fn,
2999 };
3000 /* *INDENT-ON* */
3001
3002 static clib_error_t *
3003 ip4_config (vlib_main_t * vm, unformat_input_t * input)
3004 {
3005   ip4_main_t *im = &ip4_main;
3006   uword heapsize = 0;
3007
3008   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3009     {
3010       if (unformat (input, "heap-size %U", unformat_memory_size, &heapsize))
3011         ;
3012       else
3013         return clib_error_return (0,
3014                                   "invalid heap-size parameter `%U'",
3015                                   format_unformat_error, input);
3016     }
3017
3018   im->mtrie_heap_size = heapsize;
3019
3020   return 0;
3021 }
3022
3023 VLIB_EARLY_CONFIG_FUNCTION (ip4_config, "ip");
3024
3025 /*
3026  * fd.io coding-style-patch-verification: ON
3027  *
3028  * Local Variables:
3029  * eval: (c-set-style "gnu")
3030  * End:
3031  */