95db9314d9b7be8669f87c595e4b5339b7b259e0
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h>     /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/dpo/load_balance.h>
53 #include <vnet/dpo/load_balance_map.h>
54 #include <vnet/dpo/classify_dpo.h>
55 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
56
57 #include <vnet/ip/ip4_forward.h>
58 #include <vnet/interface_output.h>
59 #include <vnet/classify/vnet_classify.h>
60
61 /** @brief IPv4 lookup node.
62     @node ip4-lookup
63
64     This is the main IPv4 lookup dispatch node.
65
66     @param vm vlib_main_t corresponding to the current thread
67     @param node vlib_node_runtime_t
68     @param frame vlib_frame_t whose contents should be dispatched
69
70     @par Graph mechanics: buffer metadata, next index usage
71
72     @em Uses:
73     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
74         - Indicates the @c sw_if_index value of the interface that the
75           packet was received on.
76     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
77         - When the value is @c ~0 then the node performs a longest prefix
78           match (LPM) for the packet destination address in the FIB attached
79           to the receive interface.
80         - Otherwise perform LPM for the packet destination address in the
81           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
82           value (0, 1, ...) and not a VRF id.
83
84     @em Sets:
85     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
86         - The lookup result adjacency index.
87
88     <em>Next Index:</em>
89     - Dispatches the packet to the node index found in
90       ip_adjacency_t @c adj->lookup_next_index
91       (where @c adj is the lookup result adjacency).
92 */
93 VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
94                                 vlib_frame_t * frame)
95 {
96   return ip4_lookup_inline (vm, node, frame);
97 }
98
99 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
100
101 /* *INDENT-OFF* */
102 VLIB_REGISTER_NODE (ip4_lookup_node) =
103 {
104   .name = "ip4-lookup",
105   .vector_size = sizeof (u32),
106   .format_trace = format_ip4_lookup_trace,
107   .n_next_nodes = IP_LOOKUP_N_NEXT,
108   .next_nodes = IP4_LOOKUP_NEXT_NODES,
109 };
110 /* *INDENT-ON* */
111
112 VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
113                                       vlib_node_runtime_t * node,
114                                       vlib_frame_t * frame)
115 {
116   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
117   u32 n_left, *from;
118   u32 thread_index = vm->thread_index;
119   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
120   u16 nexts[VLIB_FRAME_SIZE], *next;
121
122   from = vlib_frame_vector_args (frame);
123   n_left = frame->n_vectors;
124   next = nexts;
125
126   vlib_get_buffers (vm, from, bufs, n_left);
127
128   while (n_left >= 4)
129     {
130       const load_balance_t *lb0, *lb1;
131       const ip4_header_t *ip0, *ip1;
132       u32 lbi0, hc0, lbi1, hc1;
133       const dpo_id_t *dpo0, *dpo1;
134
135       /* Prefetch next iteration. */
136       {
137         vlib_prefetch_buffer_header (b[2], LOAD);
138         vlib_prefetch_buffer_header (b[3], LOAD);
139
140         CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), LOAD);
141         CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), LOAD);
142       }
143
144       ip0 = vlib_buffer_get_current (b[0]);
145       ip1 = vlib_buffer_get_current (b[1]);
146       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
147       lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
148
149       lb0 = load_balance_get (lbi0);
150       lb1 = load_balance_get (lbi1);
151
152       /*
153        * this node is for via FIBs we can re-use the hash value from the
154        * to node if present.
155        * We don't want to use the same hash value at each level in the recursion
156        * graph as that would lead to polarisation
157        */
158       hc0 = hc1 = 0;
159
160       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
161         {
162           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
163             {
164               hc0 = vnet_buffer (b[0])->ip.flow_hash =
165                 vnet_buffer (b[0])->ip.flow_hash >> 1;
166             }
167           else
168             {
169               hc0 = vnet_buffer (b[0])->ip.flow_hash =
170                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
171             }
172           dpo0 = load_balance_get_fwd_bucket
173             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
174         }
175       else
176         {
177           dpo0 = load_balance_get_bucket_i (lb0, 0);
178         }
179       if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
180         {
181           if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
182             {
183               hc1 = vnet_buffer (b[1])->ip.flow_hash =
184                 vnet_buffer (b[1])->ip.flow_hash >> 1;
185             }
186           else
187             {
188               hc1 = vnet_buffer (b[1])->ip.flow_hash =
189                 ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
190             }
191           dpo1 = load_balance_get_fwd_bucket
192             (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
193         }
194       else
195         {
196           dpo1 = load_balance_get_bucket_i (lb1, 0);
197         }
198
199       next[0] = dpo0->dpoi_next_node;
200       next[1] = dpo1->dpoi_next_node;
201
202       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
203       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
204
205       vlib_increment_combined_counter
206         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
207       vlib_increment_combined_counter
208         (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
209
210       b += 2;
211       next += 2;
212       n_left -= 2;
213     }
214
215   while (n_left > 0)
216     {
217       const load_balance_t *lb0;
218       const ip4_header_t *ip0;
219       const dpo_id_t *dpo0;
220       u32 lbi0, hc0;
221
222       ip0 = vlib_buffer_get_current (b[0]);
223       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
224
225       lb0 = load_balance_get (lbi0);
226
227       hc0 = 0;
228       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
229         {
230           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
231             {
232               hc0 = vnet_buffer (b[0])->ip.flow_hash =
233                 vnet_buffer (b[0])->ip.flow_hash >> 1;
234             }
235           else
236             {
237               hc0 = vnet_buffer (b[0])->ip.flow_hash =
238                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
239             }
240           dpo0 = load_balance_get_fwd_bucket
241             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
242         }
243       else
244         {
245           dpo0 = load_balance_get_bucket_i (lb0, 0);
246         }
247
248       next[0] = dpo0->dpoi_next_node;
249       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
250
251       vlib_increment_combined_counter
252         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
253
254       b += 1;
255       next += 1;
256       n_left -= 1;
257     }
258
259   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
260   if (node->flags & VLIB_NODE_FLAG_TRACE)
261     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
262
263   return frame->n_vectors;
264 }
265
266 /* *INDENT-OFF* */
267 VLIB_REGISTER_NODE (ip4_load_balance_node) =
268 {
269   .name = "ip4-load-balance",
270   .vector_size = sizeof (u32),
271   .sibling_of = "ip4-lookup",
272   .format_trace = format_ip4_lookup_trace,
273 };
274 /* *INDENT-ON* */
275
276 #ifndef CLIB_MARCH_VARIANT
277 /* get first interface address */
278 ip4_address_t *
279 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
280                              ip_interface_address_t ** result_ia)
281 {
282   ip_lookup_main_t *lm = &im->lookup_main;
283   ip_interface_address_t *ia = 0;
284   ip4_address_t *result = 0;
285
286   /* *INDENT-OFF* */
287   foreach_ip_interface_address
288     (lm, ia, sw_if_index,
289      1 /* honor unnumbered */ ,
290      ({
291        ip4_address_t * a =
292          ip_interface_address_get_address (lm, ia);
293        result = a;
294        break;
295      }));
296   /* *INDENT-OFF* */
297   if (result_ia)
298     *result_ia = result ? ia : 0;
299   return result;
300 }
301 #endif
302
303 static void
304 ip4_add_subnet_bcast_route (u32 fib_index,
305                             fib_prefix_t *pfx,
306                             u32 sw_if_index)
307 {
308   vnet_sw_interface_flags_t iflags;
309
310   iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
311
312   fib_table_entry_special_remove(fib_index,
313                                  pfx,
314                                  FIB_SOURCE_INTERFACE);
315
316   if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
317     {
318       fib_table_entry_update_one_path (fib_index, pfx,
319                                        FIB_SOURCE_INTERFACE,
320                                        FIB_ENTRY_FLAG_NONE,
321                                        DPO_PROTO_IP4,
322                                        /* No next-hop address */
323                                        &ADJ_BCAST_ADDR,
324                                        sw_if_index,
325                                        // invalid FIB index
326                                        ~0,
327                                        1,
328                                        // no out-label stack
329                                        NULL,
330                                        FIB_ROUTE_PATH_FLAG_NONE);
331     }
332   else
333     {
334         fib_table_entry_special_add(fib_index,
335                                     pfx,
336                                     FIB_SOURCE_INTERFACE,
337                                     (FIB_ENTRY_FLAG_DROP |
338                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
339     }
340 }
341
342 static void
343 ip4_add_interface_prefix_routes (ip4_main_t *im,
344                                  u32 sw_if_index,
345                                  u32 fib_index,
346                                  ip_interface_address_t * a)
347 {
348   ip_lookup_main_t *lm = &im->lookup_main;
349   ip_interface_prefix_t *if_prefix;
350   ip4_address_t *address = ip_interface_address_get_address (lm, a);
351
352   ip_interface_prefix_key_t key = {
353     .prefix = {
354       .fp_len = a->address_length,
355       .fp_proto = FIB_PROTOCOL_IP4,
356       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[a->address_length],
357     },
358     .sw_if_index = sw_if_index,
359   };
360
361   fib_prefix_t pfx_special = {
362     .fp_proto = FIB_PROTOCOL_IP4,
363   };
364
365   /* If prefix already set on interface, just increment ref count & return */
366   if_prefix = ip_get_interface_prefix (lm, &key);
367   if (if_prefix)
368     {
369       if_prefix->ref_count += 1;
370       return;
371     }
372
373   /* New prefix - allocate a pool entry, initialize it, add to the hash */
374   pool_get (lm->if_prefix_pool, if_prefix);
375   if_prefix->ref_count = 1;
376   if_prefix->src_ia_index = a - lm->if_address_pool;
377   clib_memcpy (&if_prefix->key, &key, sizeof (key));
378   mhash_set (&lm->prefix_to_if_prefix_index, &key,
379              if_prefix - lm->if_prefix_pool, 0 /* old value */);
380
381   /* length <= 30 - add glean, drop first address, maybe drop bcast address */
382   if (a->address_length <= 30)
383     {
384       pfx_special.fp_len = a->address_length;
385       pfx_special.fp_addr.ip4.as_u32 = address->as_u32;
386
387       /* set the glean route for the prefix */
388       fib_table_entry_update_one_path (fib_index, &pfx_special,
389                                        FIB_SOURCE_INTERFACE,
390                                        (FIB_ENTRY_FLAG_CONNECTED |
391                                         FIB_ENTRY_FLAG_ATTACHED),
392                                        DPO_PROTO_IP4,
393                                        /* No next-hop address */
394                                        NULL,
395                                        sw_if_index,
396                                        /* invalid FIB index */
397                                        ~0,
398                                        1,
399                                        /* no out-label stack */
400                                        NULL,
401                                        FIB_ROUTE_PATH_FLAG_NONE);
402
403       /* set a drop route for the base address of the prefix */
404       pfx_special.fp_len = 32;
405       pfx_special.fp_addr.ip4.as_u32 =
406         address->as_u32 & im->fib_masks[a->address_length];
407
408       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
409         fib_table_entry_special_add (fib_index, &pfx_special,
410                                      FIB_SOURCE_INTERFACE,
411                                      (FIB_ENTRY_FLAG_DROP |
412                                       FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
413
414       /* set a route for the broadcast address of the prefix */
415       pfx_special.fp_len = 32;
416       pfx_special.fp_addr.ip4.as_u32 =
417         address->as_u32 | ~im->fib_masks[a->address_length];
418       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
419         ip4_add_subnet_bcast_route (fib_index, &pfx_special, sw_if_index);
420
421
422     }
423   /* length == 31 - add an attached route for the other address */
424   else if (a->address_length == 31)
425     {
426       pfx_special.fp_len = 32;
427       pfx_special.fp_addr.ip4.as_u32 =
428         address->as_u32 ^ clib_host_to_net_u32(1);
429
430       fib_table_entry_update_one_path (fib_index, &pfx_special,
431                                        FIB_SOURCE_INTERFACE,
432                                        (FIB_ENTRY_FLAG_ATTACHED),
433                                        DPO_PROTO_IP4,
434                                        &pfx_special.fp_addr,
435                                        sw_if_index,
436                                        /* invalid FIB index */
437                                        ~0,
438                                        1,
439                                        NULL,
440                                        FIB_ROUTE_PATH_FLAG_NONE);
441     }
442 }
443
444 static void
445 ip4_add_interface_routes (u32 sw_if_index,
446                           ip4_main_t * im, u32 fib_index,
447                           ip_interface_address_t * a)
448 {
449   ip_lookup_main_t *lm = &im->lookup_main;
450   ip4_address_t *address = ip_interface_address_get_address (lm, a);
451   fib_prefix_t pfx = {
452     .fp_len = 32,
453     .fp_proto = FIB_PROTOCOL_IP4,
454     .fp_addr.ip4 = *address,
455   };
456
457   /* set special routes for the prefix if needed */
458   ip4_add_interface_prefix_routes (im, sw_if_index, fib_index, a);
459
460   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
461     {
462       u32 classify_table_index =
463         lm->classify_table_index_by_sw_if_index[sw_if_index];
464       if (classify_table_index != (u32) ~ 0)
465         {
466           dpo_id_t dpo = DPO_INVALID;
467
468           dpo_set (&dpo,
469                    DPO_CLASSIFY,
470                    DPO_PROTO_IP4,
471                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
472
473           fib_table_entry_special_dpo_add (fib_index,
474                                            &pfx,
475                                            FIB_SOURCE_CLASSIFY,
476                                            FIB_ENTRY_FLAG_NONE, &dpo);
477           dpo_reset (&dpo);
478         }
479     }
480
481   fib_table_entry_update_one_path (fib_index, &pfx,
482                                    FIB_SOURCE_INTERFACE,
483                                    (FIB_ENTRY_FLAG_CONNECTED |
484                                     FIB_ENTRY_FLAG_LOCAL),
485                                    DPO_PROTO_IP4,
486                                    &pfx.fp_addr,
487                                    sw_if_index,
488                                    // invalid FIB index
489                                    ~0,
490                                    1, NULL,
491                                    FIB_ROUTE_PATH_FLAG_NONE);
492 }
493
494 static void
495 ip4_del_interface_prefix_routes (ip4_main_t * im,
496                                  u32 sw_if_index,
497                                  u32 fib_index,
498                                  ip4_address_t * address,
499                                  u32 address_length)
500 {
501   ip_lookup_main_t *lm = &im->lookup_main;
502   ip_interface_prefix_t *if_prefix;
503
504   ip_interface_prefix_key_t key = {
505     .prefix = {
506       .fp_len = address_length,
507       .fp_proto = FIB_PROTOCOL_IP4,
508       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[address_length],
509     },
510     .sw_if_index = sw_if_index,
511   };
512
513   fib_prefix_t pfx_special = {
514     .fp_len = 32,
515     .fp_proto = FIB_PROTOCOL_IP4,
516   };
517
518   if_prefix = ip_get_interface_prefix (lm, &key);
519   if (!if_prefix)
520     {
521       clib_warning ("Prefix not found while deleting %U",
522                     format_ip4_address_and_length, address, address_length);
523       return;
524     }
525
526   if_prefix->ref_count -= 1;
527
528   /*
529    * Routes need to be adjusted if:
530    * - deleting last intf addr in prefix
531    * - deleting intf addr used as default source address in glean adjacency
532    *
533    * We're done now otherwise
534    */
535   if ((if_prefix->ref_count > 0) &&
536       !pool_is_free_index (lm->if_address_pool, if_prefix->src_ia_index))
537     return;
538
539   /* length <= 30, delete glean route, first address, last address */
540   if (address_length <= 30)
541     {
542
543       /* remove glean route for prefix */
544       pfx_special.fp_addr.ip4 = *address;
545       pfx_special.fp_len = address_length;
546       fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
547
548       /* if no more intf addresses in prefix, remove other special routes */
549       if (!if_prefix->ref_count)
550         {
551           /* first address in prefix */
552           pfx_special.fp_addr.ip4.as_u32 =
553             address->as_u32 & im->fib_masks[address_length];
554           pfx_special.fp_len = 32;
555
556           if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
557           fib_table_entry_special_remove (fib_index,
558                                           &pfx_special,
559                                           FIB_SOURCE_INTERFACE);
560
561           /* prefix broadcast address */
562           pfx_special.fp_addr.ip4.as_u32 =
563             address->as_u32 | ~im->fib_masks[address_length];
564           pfx_special.fp_len = 32;
565
566           if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
567           fib_table_entry_special_remove (fib_index,
568                                           &pfx_special,
569                                           FIB_SOURCE_INTERFACE);
570         }
571       else
572         /* default source addr just got deleted, find another */
573         {
574           ip_interface_address_t *new_src_ia = NULL;
575           ip4_address_t *new_src_addr = NULL;
576
577           new_src_addr =
578             ip4_interface_address_matching_destination
579               (im, address, sw_if_index, &new_src_ia);
580
581           if_prefix->src_ia_index = new_src_ia - lm->if_address_pool;
582
583           pfx_special.fp_len = address_length;
584           pfx_special.fp_addr.ip4 = *new_src_addr;
585
586           /* set new glean route for the prefix */
587           fib_table_entry_update_one_path (fib_index, &pfx_special,
588                                            FIB_SOURCE_INTERFACE,
589                                            (FIB_ENTRY_FLAG_CONNECTED |
590                                             FIB_ENTRY_FLAG_ATTACHED),
591                                            DPO_PROTO_IP4,
592                                            /* No next-hop address */
593                                            NULL,
594                                            sw_if_index,
595                                            /* invalid FIB index */
596                                            ~0,
597                                            1,
598                                            /* no out-label stack */
599                                            NULL,
600                                            FIB_ROUTE_PATH_FLAG_NONE);
601           return;
602         }
603     }
604   /* length == 31, delete attached route for the other address */
605   else if (address_length == 31)
606     {
607       pfx_special.fp_addr.ip4.as_u32 =
608         address->as_u32 ^ clib_host_to_net_u32(1);
609
610       fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
611     }
612
613   mhash_unset (&lm->prefix_to_if_prefix_index, &key, 0 /* old_value */);
614   pool_put (lm->if_prefix_pool, if_prefix);
615 }
616
617 static void
618 ip4_del_interface_routes (u32 sw_if_index,
619                           ip4_main_t * im,
620                           u32 fib_index,
621                           ip4_address_t * address, u32 address_length)
622 {
623   fib_prefix_t pfx = {
624     .fp_len = address_length,
625     .fp_proto = FIB_PROTOCOL_IP4,
626     .fp_addr.ip4 = *address,
627   };
628
629   ip4_del_interface_prefix_routes (im, sw_if_index, fib_index,
630                                    address, address_length);
631
632   pfx.fp_len = 32;
633   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
634 }
635
636 #ifndef CLIB_MARCH_VARIANT
637 void
638 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
639 {
640   ip4_main_t *im = &ip4_main;
641
642   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
643
644   /*
645    * enable/disable only on the 1<->0 transition
646    */
647   if (is_enable)
648     {
649       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
650         return;
651     }
652   else
653     {
654       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
655       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
656         return;
657     }
658   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
659                                !is_enable, 0, 0);
660
661
662   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
663                                sw_if_index, !is_enable, 0, 0);
664
665   {
666     ip4_enable_disable_interface_callback_t *cb;
667     vec_foreach (cb, im->enable_disable_interface_callbacks)
668       cb->function (im, cb->function_opaque, sw_if_index, is_enable);
669   }
670 }
671
672 static clib_error_t *
673 ip4_add_del_interface_address_internal (vlib_main_t * vm,
674                                         u32 sw_if_index,
675                                         ip4_address_t * address,
676                                         u32 address_length, u32 is_del)
677 {
678   vnet_main_t *vnm = vnet_get_main ();
679   ip4_main_t *im = &ip4_main;
680   ip_lookup_main_t *lm = &im->lookup_main;
681   clib_error_t *error = 0;
682   u32 if_address_index, elts_before;
683   ip4_address_fib_t ip4_af, *addr_fib = 0;
684
685   /* local0 interface doesn't support IP addressing  */
686   if (sw_if_index == 0)
687     {
688       return
689        clib_error_create ("local0 interface doesn't support IP addressing");
690     }
691
692   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
693   ip4_addr_fib_init (&ip4_af, address,
694                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
695   vec_add1 (addr_fib, ip4_af);
696
697   /*
698    * there is no support for adj-fib handling in the presence of overlapping
699    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
700    * most routers do.
701    */
702   /* *INDENT-OFF* */
703   if (!is_del)
704     {
705       /* When adding an address check that it does not conflict
706          with an existing address on any interface in this table. */
707       ip_interface_address_t *ia;
708       vnet_sw_interface_t *sif;
709
710       pool_foreach(sif, vnm->interface_main.sw_interfaces,
711       ({
712           if (im->fib_index_by_sw_if_index[sw_if_index] ==
713               im->fib_index_by_sw_if_index[sif->sw_if_index])
714             {
715               foreach_ip_interface_address
716                 (&im->lookup_main, ia, sif->sw_if_index,
717                  0 /* honor unnumbered */ ,
718                  ({
719                    ip4_address_t * x =
720                      ip_interface_address_get_address
721                      (&im->lookup_main, ia);
722                    if (ip4_destination_matches_route
723                        (im, address, x, ia->address_length) ||
724                        ip4_destination_matches_route (im,
725                                                       x,
726                                                       address,
727                                                       address_length))
728                      {
729                        /* an intf may have >1 addr from the same prefix */
730                        if ((sw_if_index == sif->sw_if_index) &&
731                            (ia->address_length == address_length) &&
732                            (x->as_u32 != address->as_u32))
733                          continue;
734
735                        /* error if the length or intf was different */
736                        vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
737
738                        return
739                          clib_error_create
740                          ("failed to add %U on %U which conflicts with %U for interface %U",
741                           format_ip4_address_and_length, address,
742                           address_length,
743                           format_vnet_sw_if_index_name, vnm,
744                           sw_if_index,
745                           format_ip4_address_and_length, x,
746                           ia->address_length,
747                           format_vnet_sw_if_index_name, vnm,
748                           sif->sw_if_index);
749                      }
750                  }));
751             }
752       }));
753     }
754   /* *INDENT-ON* */
755
756   elts_before = pool_elts (lm->if_address_pool);
757
758   error = ip_interface_address_add_del
759     (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
760   if (error)
761     goto done;
762
763   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
764
765   /* intf addr routes are added/deleted on admin up/down */
766   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index))
767     {
768       if (is_del)
769         ip4_del_interface_routes (sw_if_index,
770                                   im, ip4_af.fib_index, address,
771                                   address_length);
772       else
773         ip4_add_interface_routes (sw_if_index,
774                                   im, ip4_af.fib_index,
775                                   pool_elt_at_index
776                                   (lm->if_address_pool, if_address_index));
777     }
778
779   /* If pool did not grow/shrink: add duplicate address. */
780   if (elts_before != pool_elts (lm->if_address_pool))
781     {
782       ip4_add_del_interface_address_callback_t *cb;
783       vec_foreach (cb, im->add_del_interface_address_callbacks)
784         cb->function (im, cb->function_opaque, sw_if_index,
785                       address, address_length, if_address_index, is_del);
786     }
787
788 done:
789   vec_free (addr_fib);
790   return error;
791 }
792
793 clib_error_t *
794 ip4_add_del_interface_address (vlib_main_t * vm,
795                                u32 sw_if_index,
796                                ip4_address_t * address,
797                                u32 address_length, u32 is_del)
798 {
799   return ip4_add_del_interface_address_internal
800     (vm, sw_if_index, address, address_length, is_del);
801 }
802
803 void
804 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
805 {
806   ip_interface_address_t *ia;
807   ip4_main_t *im;
808
809   im = &ip4_main;
810
811   /*
812    * when directed broadcast is enabled, the subnet braodcast route will forward
813    * packets using an adjacency with a broadcast MAC. otherwise it drops
814    */
815   /* *INDENT-OFF* */
816   foreach_ip_interface_address(&im->lookup_main, ia,
817                                sw_if_index, 0,
818      ({
819        if (ia->address_length <= 30)
820          {
821            ip4_address_t *ipa;
822
823            ipa = ip_interface_address_get_address (&im->lookup_main, ia);
824
825            fib_prefix_t pfx = {
826              .fp_len = 32,
827              .fp_proto = FIB_PROTOCOL_IP4,
828              .fp_addr = {
829                .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
830              },
831            };
832
833            ip4_add_subnet_bcast_route
834              (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
835                                                   sw_if_index),
836               &pfx, sw_if_index);
837          }
838      }));
839   /* *INDENT-ON* */
840 }
841 #endif
842
843 static clib_error_t *
844 ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
845 {
846   ip4_main_t *im = &ip4_main;
847   ip_interface_address_t *ia;
848   ip4_address_t *a;
849   u32 is_admin_up, fib_index;
850
851   /* Fill in lookup tables with default table (0). */
852   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
853
854   vec_validate_init_empty (im->
855                            lookup_main.if_address_pool_index_by_sw_if_index,
856                            sw_if_index, ~0);
857
858   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
859
860   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
861
862   /* *INDENT-OFF* */
863   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
864                                 0 /* honor unnumbered */,
865   ({
866     a = ip_interface_address_get_address (&im->lookup_main, ia);
867     if (is_admin_up)
868       ip4_add_interface_routes (sw_if_index,
869                                 im, fib_index,
870                                 ia);
871     else
872       ip4_del_interface_routes (sw_if_index,
873                                 im, fib_index,
874                                 a, ia->address_length);
875   }));
876   /* *INDENT-ON* */
877
878   return 0;
879 }
880
881 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
882
883 /* Built-in ip4 unicast rx feature path definition */
884 /* *INDENT-OFF* */
885 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
886 {
887   .arc_name = "ip4-unicast",
888   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
889   .last_in_arc = "ip4-lookup",
890   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
891 };
892
893 VNET_FEATURE_INIT (ip4_flow_classify, static) =
894 {
895   .arc_name = "ip4-unicast",
896   .node_name = "ip4-flow-classify",
897   .runs_before = VNET_FEATURES ("ip4-inacl"),
898 };
899
900 VNET_FEATURE_INIT (ip4_inacl, static) =
901 {
902   .arc_name = "ip4-unicast",
903   .node_name = "ip4-inacl",
904   .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
905 };
906
907 VNET_FEATURE_INIT (ip4_source_check_1, static) =
908 {
909   .arc_name = "ip4-unicast",
910   .node_name = "ip4-source-check-via-rx",
911   .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
912 };
913
914 VNET_FEATURE_INIT (ip4_source_check_2, static) =
915 {
916   .arc_name = "ip4-unicast",
917   .node_name = "ip4-source-check-via-any",
918   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
919 };
920
921 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
922 {
923   .arc_name = "ip4-unicast",
924   .node_name = "ip4-source-and-port-range-check-rx",
925   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
926 };
927
928 VNET_FEATURE_INIT (ip4_policer_classify, static) =
929 {
930   .arc_name = "ip4-unicast",
931   .node_name = "ip4-policer-classify",
932   .runs_before = VNET_FEATURES ("ipsec4-input-feature"),
933 };
934
935 VNET_FEATURE_INIT (ip4_ipsec, static) =
936 {
937   .arc_name = "ip4-unicast",
938   .node_name = "ipsec4-input-feature",
939   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
940 };
941
942 VNET_FEATURE_INIT (ip4_vpath, static) =
943 {
944   .arc_name = "ip4-unicast",
945   .node_name = "vpath-input-ip4",
946   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
947 };
948
949 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
950 {
951   .arc_name = "ip4-unicast",
952   .node_name = "ip4-vxlan-bypass",
953   .runs_before = VNET_FEATURES ("ip4-lookup"),
954 };
955
956 VNET_FEATURE_INIT (ip4_not_enabled, static) =
957 {
958   .arc_name = "ip4-unicast",
959   .node_name = "ip4-not-enabled",
960   .runs_before = VNET_FEATURES ("ip4-lookup"),
961 };
962
963 VNET_FEATURE_INIT (ip4_lookup, static) =
964 {
965   .arc_name = "ip4-unicast",
966   .node_name = "ip4-lookup",
967   .runs_before = 0,     /* not before any other features */
968 };
969
970 /* Built-in ip4 multicast rx feature path definition */
971 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
972 {
973   .arc_name = "ip4-multicast",
974   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
975   .last_in_arc = "ip4-mfib-forward-lookup",
976   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
977 };
978
979 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
980 {
981   .arc_name = "ip4-multicast",
982   .node_name = "vpath-input-ip4",
983   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
984 };
985
986 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
987 {
988   .arc_name = "ip4-multicast",
989   .node_name = "ip4-not-enabled",
990   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
991 };
992
993 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
994 {
995   .arc_name = "ip4-multicast",
996   .node_name = "ip4-mfib-forward-lookup",
997   .runs_before = 0,     /* last feature */
998 };
999
1000 /* Source and port-range check ip4 tx feature path definition */
1001 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1002 {
1003   .arc_name = "ip4-output",
1004   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
1005   .last_in_arc = "interface-output",
1006   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1007 };
1008
1009 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1010 {
1011   .arc_name = "ip4-output",
1012   .node_name = "ip4-source-and-port-range-check-tx",
1013   .runs_before = VNET_FEATURES ("ip4-outacl"),
1014 };
1015
1016 VNET_FEATURE_INIT (ip4_outacl, static) =
1017 {
1018   .arc_name = "ip4-output",
1019   .node_name = "ip4-outacl",
1020   .runs_before = VNET_FEATURES ("ipsec4-output-feature"),
1021 };
1022
1023 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1024 {
1025   .arc_name = "ip4-output",
1026   .node_name = "ipsec4-output-feature",
1027   .runs_before = VNET_FEATURES ("interface-output"),
1028 };
1029
1030 /* Built-in ip4 tx feature path definition */
1031 VNET_FEATURE_INIT (ip4_interface_output, static) =
1032 {
1033   .arc_name = "ip4-output",
1034   .node_name = "interface-output",
1035   .runs_before = 0,     /* not before any other features */
1036 };
1037 /* *INDENT-ON* */
1038
1039 static clib_error_t *
1040 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1041 {
1042   ip4_main_t *im = &ip4_main;
1043
1044   /* Fill in lookup tables with default table (0). */
1045   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1046   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
1047
1048   if (!is_add)
1049     {
1050       ip4_main_t *im4 = &ip4_main;
1051       ip_lookup_main_t *lm4 = &im4->lookup_main;
1052       ip_interface_address_t *ia = 0;
1053       ip4_address_t *address;
1054       vlib_main_t *vm = vlib_get_main ();
1055
1056       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
1057       /* *INDENT-OFF* */
1058       foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
1059       ({
1060         address = ip_interface_address_get_address (lm4, ia);
1061         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
1062       }));
1063       /* *INDENT-ON* */
1064     }
1065
1066   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
1067                                is_add, 0, 0);
1068
1069   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
1070                                sw_if_index, is_add, 0, 0);
1071
1072   return /* no error */ 0;
1073 }
1074
1075 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1076
1077 /* Global IP4 main. */
1078 #ifndef CLIB_MARCH_VARIANT
1079 ip4_main_t ip4_main;
1080 #endif /* CLIB_MARCH_VARIANT */
1081
1082 static clib_error_t *
1083 ip4_lookup_init (vlib_main_t * vm)
1084 {
1085   ip4_main_t *im = &ip4_main;
1086   clib_error_t *error;
1087   uword i;
1088
1089   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1090     return error;
1091   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
1092     return (error);
1093   if ((error = vlib_call_init_function (vm, fib_module_init)))
1094     return error;
1095   if ((error = vlib_call_init_function (vm, mfib_module_init)))
1096     return error;
1097
1098   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1099     {
1100       u32 m;
1101
1102       if (i < 32)
1103         m = pow2_mask (i) << (32 - i);
1104       else
1105         m = ~0;
1106       im->fib_masks[i] = clib_host_to_net_u32 (m);
1107     }
1108
1109   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1110
1111   /* Create FIB with index 0 and table id of 0. */
1112   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1113                                      FIB_SOURCE_DEFAULT_ROUTE);
1114   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1115                                       MFIB_SOURCE_DEFAULT_ROUTE);
1116
1117   {
1118     pg_node_t *pn;
1119     pn = pg_get_node (ip4_lookup_node.index);
1120     pn->unformat_edit = unformat_pg_ip4_header;
1121   }
1122
1123   {
1124     ethernet_arp_header_t h;
1125
1126     clib_memset (&h, 0, sizeof (h));
1127
1128 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1129 #define _8(f,v) h.f = v;
1130     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1131     _16 (l3_type, ETHERNET_TYPE_IP4);
1132     _8 (n_l2_address_bytes, 6);
1133     _8 (n_l3_address_bytes, 4);
1134     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1135 #undef _16
1136 #undef _8
1137
1138     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1139                                /* data */ &h,
1140                                sizeof (h),
1141                                /* alloc chunk size */ 8,
1142                                "ip4 arp");
1143   }
1144
1145   return error;
1146 }
1147
1148 VLIB_INIT_FUNCTION (ip4_lookup_init);
1149
1150 typedef struct
1151 {
1152   /* Adjacency taken. */
1153   u32 dpo_index;
1154   u32 flow_hash;
1155   u32 fib_index;
1156
1157   /* Packet data, possibly *after* rewrite. */
1158   u8 packet_data[64 - 1 * sizeof (u32)];
1159 }
1160 ip4_forward_next_trace_t;
1161
1162 #ifndef CLIB_MARCH_VARIANT
1163 u8 *
1164 format_ip4_forward_next_trace (u8 * s, va_list * args)
1165 {
1166   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1167   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1168   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1169   u32 indent = format_get_indent (s);
1170   s = format (s, "%U%U",
1171               format_white_space, indent,
1172               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1173   return s;
1174 }
1175 #endif
1176
1177 static u8 *
1178 format_ip4_lookup_trace (u8 * s, va_list * args)
1179 {
1180   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1181   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1182   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1183   u32 indent = format_get_indent (s);
1184
1185   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1186               t->fib_index, t->dpo_index, t->flow_hash);
1187   s = format (s, "\n%U%U",
1188               format_white_space, indent,
1189               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1190   return s;
1191 }
1192
1193 static u8 *
1194 format_ip4_rewrite_trace (u8 * s, va_list * args)
1195 {
1196   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1197   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1198   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1199   u32 indent = format_get_indent (s);
1200
1201   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1202               t->fib_index, t->dpo_index, format_ip_adjacency,
1203               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1204   s = format (s, "\n%U%U",
1205               format_white_space, indent,
1206               format_ip_adjacency_packet_data,
1207               t->packet_data, sizeof (t->packet_data));
1208   return s;
1209 }
1210
1211 #ifndef CLIB_MARCH_VARIANT
1212 /* Common trace function for all ip4-forward next nodes. */
1213 void
1214 ip4_forward_next_trace (vlib_main_t * vm,
1215                         vlib_node_runtime_t * node,
1216                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1217 {
1218   u32 *from, n_left;
1219   ip4_main_t *im = &ip4_main;
1220
1221   n_left = frame->n_vectors;
1222   from = vlib_frame_vector_args (frame);
1223
1224   while (n_left >= 4)
1225     {
1226       u32 bi0, bi1;
1227       vlib_buffer_t *b0, *b1;
1228       ip4_forward_next_trace_t *t0, *t1;
1229
1230       /* Prefetch next iteration. */
1231       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1232       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1233
1234       bi0 = from[0];
1235       bi1 = from[1];
1236
1237       b0 = vlib_get_buffer (vm, bi0);
1238       b1 = vlib_get_buffer (vm, bi1);
1239
1240       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1241         {
1242           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1243           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1244           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1245           t0->fib_index =
1246             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1247              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1248             vec_elt (im->fib_index_by_sw_if_index,
1249                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1250
1251           clib_memcpy_fast (t0->packet_data,
1252                             vlib_buffer_get_current (b0),
1253                             sizeof (t0->packet_data));
1254         }
1255       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1256         {
1257           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1258           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1259           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1260           t1->fib_index =
1261             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1262              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1263             vec_elt (im->fib_index_by_sw_if_index,
1264                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1265           clib_memcpy_fast (t1->packet_data, vlib_buffer_get_current (b1),
1266                             sizeof (t1->packet_data));
1267         }
1268       from += 2;
1269       n_left -= 2;
1270     }
1271
1272   while (n_left >= 1)
1273     {
1274       u32 bi0;
1275       vlib_buffer_t *b0;
1276       ip4_forward_next_trace_t *t0;
1277
1278       bi0 = from[0];
1279
1280       b0 = vlib_get_buffer (vm, bi0);
1281
1282       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1283         {
1284           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1285           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1286           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1287           t0->fib_index =
1288             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1289              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1290             vec_elt (im->fib_index_by_sw_if_index,
1291                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1292           clib_memcpy_fast (t0->packet_data, vlib_buffer_get_current (b0),
1293                             sizeof (t0->packet_data));
1294         }
1295       from += 1;
1296       n_left -= 1;
1297     }
1298 }
1299
1300 /* Compute TCP/UDP/ICMP4 checksum in software. */
1301 u16
1302 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1303                               ip4_header_t * ip0)
1304 {
1305   ip_csum_t sum0;
1306   u32 ip_header_length, payload_length_host_byte_order;
1307
1308   /* Initialize checksum with ip header. */
1309   ip_header_length = ip4_header_bytes (ip0);
1310   payload_length_host_byte_order =
1311     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1312   sum0 =
1313     clib_host_to_net_u32 (payload_length_host_byte_order +
1314                           (ip0->protocol << 16));
1315
1316   if (BITS (uword) == 32)
1317     {
1318       sum0 =
1319         ip_csum_with_carry (sum0,
1320                             clib_mem_unaligned (&ip0->src_address, u32));
1321       sum0 =
1322         ip_csum_with_carry (sum0,
1323                             clib_mem_unaligned (&ip0->dst_address, u32));
1324     }
1325   else
1326     sum0 =
1327       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1328
1329   return ip_calculate_l4_checksum (vm, p0, sum0,
1330                                    payload_length_host_byte_order, (u8 *) ip0,
1331                                    ip_header_length, NULL);
1332 }
1333
1334 u32
1335 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1336 {
1337   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1338   udp_header_t *udp0;
1339   u16 sum16;
1340
1341   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1342           || ip0->protocol == IP_PROTOCOL_UDP);
1343
1344   udp0 = (void *) (ip0 + 1);
1345   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1346     {
1347       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1348                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1349       return p0->flags;
1350     }
1351
1352   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1353
1354   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1355                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1356
1357   return p0->flags;
1358 }
1359 #endif
1360
1361 /* *INDENT-OFF* */
1362 VNET_FEATURE_ARC_INIT (ip4_local) =
1363 {
1364   .arc_name  = "ip4-local",
1365   .start_nodes = VNET_FEATURES ("ip4-local"),
1366   .last_in_arc = "ip4-local-end-of-arc",
1367 };
1368 /* *INDENT-ON* */
1369
1370 static inline void
1371 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1372                             ip4_header_t * ip, u8 is_udp, u8 * error,
1373                             u8 * good_tcp_udp)
1374 {
1375   u32 flags0;
1376   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1377   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1378   if (is_udp)
1379     {
1380       udp_header_t *udp;
1381       u32 ip_len, udp_len;
1382       i32 len_diff;
1383       udp = ip4_next_header (ip);
1384       /* Verify UDP length. */
1385       ip_len = clib_net_to_host_u16 (ip->length);
1386       udp_len = clib_net_to_host_u16 (udp->length);
1387
1388       len_diff = ip_len - udp_len;
1389       *good_tcp_udp &= len_diff >= 0;
1390       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1391     }
1392 }
1393
1394 #define ip4_local_csum_is_offloaded(_b)                                 \
1395     _b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM                         \
1396         || _b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM
1397
1398 #define ip4_local_need_csum_check(is_tcp_udp, _b)                       \
1399     (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED     \
1400         || ip4_local_csum_is_offloaded (_b)))
1401
1402 #define ip4_local_csum_is_valid(_b)                                     \
1403     (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT                      \
1404         || (ip4_local_csum_is_offloaded (_b))) != 0
1405
1406 static inline void
1407 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1408                          ip4_header_t * ih, u8 * error)
1409 {
1410   u8 is_udp, is_tcp_udp, good_tcp_udp;
1411
1412   is_udp = ih->protocol == IP_PROTOCOL_UDP;
1413   is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1414
1415   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1416     ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1417   else
1418     good_tcp_udp = ip4_local_csum_is_valid (b);
1419
1420   ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1421   *error = (is_tcp_udp && !good_tcp_udp
1422             ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1423 }
1424
1425 static inline void
1426 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1427                             ip4_header_t ** ih, u8 * error)
1428 {
1429   u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1430
1431   is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1432   is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1433
1434   is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1435   is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1436
1437   good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1438   good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1439
1440   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1441                      || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1442     {
1443       if (is_tcp_udp[0])
1444         ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1445                                     &good_tcp_udp[0]);
1446       if (is_tcp_udp[1])
1447         ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1448                                     &good_tcp_udp[1]);
1449     }
1450
1451   error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1452               IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1453   error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1454               IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1455 }
1456
1457 static inline void
1458 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1459                               vlib_buffer_t * b, u16 * next, u8 error,
1460                               u8 head_of_feature_arc)
1461 {
1462   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1463   u32 next_index;
1464
1465   *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1466   b->error = error ? error_node->errors[error] : 0;
1467   if (head_of_feature_arc)
1468     {
1469       next_index = *next;
1470       if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1471         {
1472           vnet_feature_arc_start (arc_index,
1473                                   vnet_buffer (b)->sw_if_index[VLIB_RX],
1474                                   &next_index, b);
1475           *next = next_index;
1476         }
1477     }
1478 }
1479
1480 typedef struct
1481 {
1482   ip4_address_t src;
1483   u32 lbi;
1484   u8 error;
1485   u8 first;
1486 } ip4_local_last_check_t;
1487
1488 static inline void
1489 ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
1490                      ip4_local_last_check_t * last_check, u8 * error0)
1491 {
1492   ip4_fib_mtrie_leaf_t leaf0;
1493   ip4_fib_mtrie_t *mtrie0;
1494   const dpo_id_t *dpo0;
1495   load_balance_t *lb0;
1496   u32 lbi0;
1497
1498   vnet_buffer (b)->ip.fib_index =
1499     vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1500     vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1501
1502   /*
1503    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1504    *  adjacency for the destination address (the local interface address).
1505    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1506    *  adjacency for the source address (the remote sender's address)
1507    */
1508   if (PREDICT_TRUE (last_check->src.as_u32 != ip0->src_address.as_u32) ||
1509       last_check->first)
1510     {
1511       mtrie0 = &ip4_fib_get (vnet_buffer (b)->ip.fib_index)->mtrie;
1512       leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1513       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1514       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1515       lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1516
1517       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1518         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1519       vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1520
1521       lb0 = load_balance_get (lbi0);
1522       dpo0 = load_balance_get_bucket_i (lb0, 0);
1523
1524       /*
1525        * Must have a route to source otherwise we drop the packet.
1526        * ip4 broadcasts are accepted, e.g. to make dhcp client work
1527        *
1528        * The checks are:
1529        *  - the source is a recieve => it's from us => bogus, do this
1530        *    first since it sets a different error code.
1531        *  - uRPF check for any route to source - accept if passes.
1532        *  - allow packets destined to the broadcast address from unknown sources
1533        */
1534
1535       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1536                   && dpo0->dpoi_type == DPO_RECEIVE) ?
1537                  IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1538       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1539                   && !fib_urpf_check_size (lb0->lb_urpf)
1540                   && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1541                  IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1542
1543       last_check->src.as_u32 = ip0->src_address.as_u32;
1544       last_check->lbi = lbi0;
1545       last_check->error = *error0;
1546       last_check->first = 0;
1547     }
1548   else
1549     {
1550       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1551         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1552       vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1553       *error0 = last_check->error;
1554     }
1555 }
1556
1557 static inline void
1558 ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
1559                         ip4_local_last_check_t * last_check, u8 * error)
1560 {
1561   ip4_fib_mtrie_leaf_t leaf[2];
1562   ip4_fib_mtrie_t *mtrie[2];
1563   const dpo_id_t *dpo[2];
1564   load_balance_t *lb[2];
1565   u32 not_last_hit;
1566   u32 lbi[2];
1567
1568   not_last_hit = last_check->first;
1569   not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1570   not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1571
1572   vnet_buffer (b[0])->ip.fib_index =
1573     vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1574     vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1575     vnet_buffer (b[0])->ip.fib_index;
1576
1577   vnet_buffer (b[1])->ip.fib_index =
1578     vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1579     vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1580     vnet_buffer (b[1])->ip.fib_index;
1581
1582   /*
1583    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1584    *  adjacency for the destination address (the local interface address).
1585    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1586    *  adjacency for the source address (the remote sender's address)
1587    */
1588   if (PREDICT_TRUE (not_last_hit))
1589     {
1590       mtrie[0] = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie;
1591       mtrie[1] = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie;
1592
1593       leaf[0] = ip4_fib_mtrie_lookup_step_one (mtrie[0], &ip[0]->src_address);
1594       leaf[1] = ip4_fib_mtrie_lookup_step_one (mtrie[1], &ip[1]->src_address);
1595
1596       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1597                                            &ip[0]->src_address, 2);
1598       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1599                                            &ip[1]->src_address, 2);
1600
1601       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1602                                            &ip[0]->src_address, 3);
1603       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1604                                            &ip[1]->src_address, 3);
1605
1606       lbi[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf[0]);
1607       lbi[1] = ip4_fib_mtrie_leaf_get_adj_index (leaf[1]);
1608
1609       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1610         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1611       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1612
1613       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1614         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1615       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1616
1617       lb[0] = load_balance_get (lbi[0]);
1618       lb[1] = load_balance_get (lbi[1]);
1619
1620       dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1621       dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1622
1623       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1624                    dpo[0]->dpoi_type == DPO_RECEIVE) ?
1625                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1626       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1627                    !fib_urpf_check_size (lb[0]->lb_urpf) &&
1628                    ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1629                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1630
1631       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1632                    dpo[1]->dpoi_type == DPO_RECEIVE) ?
1633                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1634       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1635                    !fib_urpf_check_size (lb[1]->lb_urpf) &&
1636                    ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1637                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1638
1639       last_check->src.as_u32 = ip[1]->src_address.as_u32;
1640       last_check->lbi = lbi[1];
1641       last_check->error = error[1];
1642       last_check->first = 0;
1643     }
1644   else
1645     {
1646       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1647         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1648       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1649
1650       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1651         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1652       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1653
1654       error[0] = last_check->error;
1655       error[1] = last_check->error;
1656     }
1657 }
1658
1659 enum ip_local_packet_type_e
1660 {
1661   IP_LOCAL_PACKET_TYPE_L4,
1662   IP_LOCAL_PACKET_TYPE_NAT,
1663   IP_LOCAL_PACKET_TYPE_FRAG,
1664 };
1665
1666 /**
1667  * Determine packet type and next node.
1668  *
1669  * The expectation is that all packets that are not L4 will skip
1670  * checksums and source checks.
1671  */
1672 always_inline u8
1673 ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
1674 {
1675   ip_lookup_main_t *lm = &ip4_main.lookup_main;
1676
1677   if (PREDICT_FALSE (ip4_is_fragment (ip)))
1678     {
1679       *next = IP_LOCAL_NEXT_REASSEMBLY;
1680       return IP_LOCAL_PACKET_TYPE_FRAG;
1681     }
1682   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_IS_NATED))
1683     {
1684       *next = lm->local_next_by_ip_protocol[ip->protocol];
1685       return IP_LOCAL_PACKET_TYPE_NAT;
1686     }
1687
1688   *next = lm->local_next_by_ip_protocol[ip->protocol];
1689   return IP_LOCAL_PACKET_TYPE_L4;
1690 }
1691
1692 static inline uword
1693 ip4_local_inline (vlib_main_t * vm,
1694                   vlib_node_runtime_t * node,
1695                   vlib_frame_t * frame, int head_of_feature_arc)
1696 {
1697   u32 *from, n_left_from;
1698   vlib_node_runtime_t *error_node =
1699     vlib_node_get_runtime (vm, ip4_local_node.index);
1700   u16 nexts[VLIB_FRAME_SIZE], *next;
1701   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1702   ip4_header_t *ip[2];
1703   u8 error[2], pt[2];
1704
1705   ip4_local_last_check_t last_check = {
1706     /*
1707      * 0.0.0.0 can appear as the source address of an IP packet,
1708      * as can any other address, hence the need to use the 'first'
1709      * member to make sure the .lbi is initialised for the first
1710      * packet.
1711      */
1712     .src = {.as_u32 = 0},
1713     .lbi = ~0,
1714     .error = IP4_ERROR_UNKNOWN_PROTOCOL,
1715     .first = 1,
1716   };
1717
1718   from = vlib_frame_vector_args (frame);
1719   n_left_from = frame->n_vectors;
1720
1721   if (node->flags & VLIB_NODE_FLAG_TRACE)
1722     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1723
1724   vlib_get_buffers (vm, from, bufs, n_left_from);
1725   b = bufs;
1726   next = nexts;
1727
1728   while (n_left_from >= 6)
1729     {
1730       u8 not_batch = 0;
1731
1732       /* Prefetch next iteration. */
1733       {
1734         vlib_prefetch_buffer_header (b[4], LOAD);
1735         vlib_prefetch_buffer_header (b[5], LOAD);
1736
1737         CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1738         CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1739       }
1740
1741       error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1742
1743       ip[0] = vlib_buffer_get_current (b[0]);
1744       ip[1] = vlib_buffer_get_current (b[1]);
1745
1746       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1747       vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1748
1749       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1750       pt[1] = ip4_local_classify (b[1], ip[1], &next[1]);
1751
1752       not_batch = pt[0] ^ pt[1];
1753
1754       if (head_of_feature_arc == 0 || (pt[0] && not_batch == 0))
1755         goto skip_checks;
1756
1757       if (PREDICT_TRUE (not_batch == 0))
1758         {
1759           ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1760           ip4_local_check_src_x2 (b, ip, &last_check, error);
1761         }
1762       else
1763         {
1764           if (!pt[0])
1765             {
1766               ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1767               ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1768             }
1769           if (!pt[1])
1770             {
1771               ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1772               ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
1773             }
1774         }
1775
1776     skip_checks:
1777
1778       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1779                                     head_of_feature_arc);
1780       ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1781                                     head_of_feature_arc);
1782
1783       b += 2;
1784       next += 2;
1785       n_left_from -= 2;
1786     }
1787
1788   while (n_left_from > 0)
1789     {
1790       error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1791
1792       ip[0] = vlib_buffer_get_current (b[0]);
1793       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1794       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1795
1796       if (head_of_feature_arc == 0 || pt[0])
1797         goto skip_check;
1798
1799       ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1800       ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1801
1802     skip_check:
1803
1804       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1805                                     head_of_feature_arc);
1806
1807       b += 1;
1808       next += 1;
1809       n_left_from -= 1;
1810     }
1811
1812   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1813   return frame->n_vectors;
1814 }
1815
1816 VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1817                                vlib_frame_t * frame)
1818 {
1819   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1820 }
1821
1822 /* *INDENT-OFF* */
1823 VLIB_REGISTER_NODE (ip4_local_node) =
1824 {
1825   .name = "ip4-local",
1826   .vector_size = sizeof (u32),
1827   .format_trace = format_ip4_forward_next_trace,
1828   .n_errors = IP4_N_ERROR,
1829   .error_strings = ip4_error_strings,
1830   .n_next_nodes = IP_LOCAL_N_NEXT,
1831   .next_nodes =
1832   {
1833     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1834     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1835     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1836     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1837     [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-full-reassembly",
1838   },
1839 };
1840 /* *INDENT-ON* */
1841
1842
1843 VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
1844                                           vlib_node_runtime_t * node,
1845                                           vlib_frame_t * frame)
1846 {
1847   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1848 }
1849
1850 /* *INDENT-OFF* */
1851 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
1852   .name = "ip4-local-end-of-arc",
1853   .vector_size = sizeof (u32),
1854
1855   .format_trace = format_ip4_forward_next_trace,
1856   .sibling_of = "ip4-local",
1857 };
1858
1859 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1860   .arc_name = "ip4-local",
1861   .node_name = "ip4-local-end-of-arc",
1862   .runs_before = 0, /* not before any other features */
1863 };
1864 /* *INDENT-ON* */
1865
1866 #ifndef CLIB_MARCH_VARIANT
1867 void
1868 ip4_register_protocol (u32 protocol, u32 node_index)
1869 {
1870   vlib_main_t *vm = vlib_get_main ();
1871   ip4_main_t *im = &ip4_main;
1872   ip_lookup_main_t *lm = &im->lookup_main;
1873
1874   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1875   lm->local_next_by_ip_protocol[protocol] =
1876     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1877 }
1878
1879 void
1880 ip4_unregister_protocol (u32 protocol)
1881 {
1882   ip4_main_t *im = &ip4_main;
1883   ip_lookup_main_t *lm = &im->lookup_main;
1884
1885   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1886   lm->local_next_by_ip_protocol[protocol] = IP_LOCAL_NEXT_PUNT;
1887 }
1888 #endif
1889
1890 static clib_error_t *
1891 show_ip_local_command_fn (vlib_main_t * vm,
1892                           unformat_input_t * input, vlib_cli_command_t * cmd)
1893 {
1894   ip4_main_t *im = &ip4_main;
1895   ip_lookup_main_t *lm = &im->lookup_main;
1896   int i;
1897
1898   vlib_cli_output (vm, "Protocols handled by ip4_local");
1899   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1900     {
1901       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1902         {
1903           u32 node_index = vlib_get_node (vm,
1904                                           ip4_local_node.index)->
1905             next_nodes[lm->local_next_by_ip_protocol[i]];
1906           vlib_cli_output (vm, "%U: %U", format_ip_protocol, i,
1907                            format_vlib_node_name, vm, node_index);
1908         }
1909     }
1910   return 0;
1911 }
1912
1913
1914
1915 /*?
1916  * Display the set of protocols handled by the local IPv4 stack.
1917  *
1918  * @cliexpar
1919  * Example of how to display local protocol table:
1920  * @cliexstart{show ip local}
1921  * Protocols handled by ip4_local
1922  * 1
1923  * 17
1924  * 47
1925  * @cliexend
1926 ?*/
1927 /* *INDENT-OFF* */
1928 VLIB_CLI_COMMAND (show_ip_local, static) =
1929 {
1930   .path = "show ip local",
1931   .function = show_ip_local_command_fn,
1932   .short_help = "show ip local",
1933 };
1934 /* *INDENT-ON* */
1935
1936 typedef enum
1937 {
1938   IP4_REWRITE_NEXT_DROP,
1939   IP4_REWRITE_NEXT_ICMP_ERROR,
1940   IP4_REWRITE_NEXT_FRAGMENT,
1941   IP4_REWRITE_N_NEXT            /* Last */
1942 } ip4_rewrite_next_t;
1943
1944 /**
1945  * This bits of an IPv4 address to mask to construct a multicast
1946  * MAC address
1947  */
1948 #if CLIB_ARCH_IS_BIG_ENDIAN
1949 #define IP4_MCAST_ADDR_MASK 0x007fffff
1950 #else
1951 #define IP4_MCAST_ADDR_MASK 0xffff7f00
1952 #endif
1953
1954 always_inline void
1955 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
1956                u16 adj_packet_bytes, bool df, u16 * next,
1957                u8 is_midchain, u32 * error)
1958 {
1959   if (packet_len > adj_packet_bytes)
1960     {
1961       *error = IP4_ERROR_MTU_EXCEEDED;
1962       if (df)
1963         {
1964           icmp4_error_set_vnet_buffer
1965             (b, ICMP4_destination_unreachable,
1966              ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
1967              adj_packet_bytes);
1968           *next = IP4_REWRITE_NEXT_ICMP_ERROR;
1969         }
1970       else
1971         {
1972           /* IP fragmentation */
1973           ip_frag_set_vnet_buffer (b, adj_packet_bytes,
1974                                    (is_midchain ?
1975                                     IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN :
1976                                     IP_FRAG_NEXT_IP_REWRITE), 0);
1977           *next = IP4_REWRITE_NEXT_FRAGMENT;
1978         }
1979     }
1980 }
1981
1982 /* increment TTL & update checksum.
1983    Works either endian, so no need for byte swap. */
1984 static_always_inline void
1985 ip4_ttl_inc (vlib_buffer_t * b, ip4_header_t * ip)
1986 {
1987   i32 ttl;
1988   u32 checksum;
1989   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
1990     {
1991       b->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
1992       return;
1993     }
1994
1995   ttl = ip->ttl;
1996
1997   checksum = ip->checksum - clib_host_to_net_u16 (0x0100);
1998   checksum += checksum >= 0xffff;
1999
2000   ip->checksum = checksum;
2001   ttl += 1;
2002   ip->ttl = ttl;
2003
2004   ASSERT (ip->checksum == ip4_header_checksum (ip));
2005 }
2006
2007 /* Decrement TTL & update checksum.
2008    Works either endian, so no need for byte swap. */
2009 static_always_inline void
2010 ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
2011                             u32 * error)
2012 {
2013   i32 ttl;
2014   u32 checksum;
2015   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2016     {
2017       b->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2018       return;
2019     }
2020
2021   ttl = ip->ttl;
2022
2023   /* Input node should have reject packets with ttl 0. */
2024   ASSERT (ip->ttl > 0);
2025
2026   checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
2027   checksum += checksum >= 0xffff;
2028
2029   ip->checksum = checksum;
2030   ttl -= 1;
2031   ip->ttl = ttl;
2032
2033   /*
2034    * If the ttl drops below 1 when forwarding, generate
2035    * an ICMP response.
2036    */
2037   if (PREDICT_FALSE (ttl <= 0))
2038     {
2039       *error = IP4_ERROR_TIME_EXPIRED;
2040       vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2041       icmp4_error_set_vnet_buffer (b, ICMP4_time_exceeded,
2042                                    ICMP4_time_exceeded_ttl_exceeded_in_transit,
2043                                    0);
2044       *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2045     }
2046
2047   /* Verify checksum. */
2048   ASSERT ((ip->checksum == ip4_header_checksum (ip)) ||
2049           (b->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2050 }
2051
2052
2053 always_inline uword
2054 ip4_rewrite_inline_with_gso (vlib_main_t * vm,
2055                              vlib_node_runtime_t * node,
2056                              vlib_frame_t * frame,
2057                              int do_counters, int is_midchain, int is_mcast)
2058 {
2059   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2060   u32 *from = vlib_frame_vector_args (frame);
2061   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2062   u16 nexts[VLIB_FRAME_SIZE], *next;
2063   u32 n_left_from;
2064   vlib_node_runtime_t *error_node =
2065     vlib_node_get_runtime (vm, ip4_input_node.index);
2066
2067   n_left_from = frame->n_vectors;
2068   u32 thread_index = vm->thread_index;
2069
2070   vlib_get_buffers (vm, from, bufs, n_left_from);
2071   clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
2072
2073 #if (CLIB_N_PREFETCHES >= 8)
2074   if (n_left_from >= 6)
2075     {
2076       int i;
2077       for (i = 2; i < 6; i++)
2078         vlib_prefetch_buffer_header (bufs[i], LOAD);
2079     }
2080
2081   next = nexts;
2082   b = bufs;
2083   while (n_left_from >= 8)
2084     {
2085       const ip_adjacency_t *adj0, *adj1;
2086       ip4_header_t *ip0, *ip1;
2087       u32 rw_len0, error0, adj_index0;
2088       u32 rw_len1, error1, adj_index1;
2089       u32 tx_sw_if_index0, tx_sw_if_index1;
2090       u8 *p;
2091
2092       vlib_prefetch_buffer_header (b[6], LOAD);
2093       vlib_prefetch_buffer_header (b[7], LOAD);
2094
2095       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2096       adj_index1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
2097
2098       /*
2099        * pre-fetch the per-adjacency counters
2100        */
2101       if (do_counters)
2102         {
2103           vlib_prefetch_combined_counter (&adjacency_counters,
2104                                           thread_index, adj_index0);
2105           vlib_prefetch_combined_counter (&adjacency_counters,
2106                                           thread_index, adj_index1);
2107         }
2108
2109       ip0 = vlib_buffer_get_current (b[0]);
2110       ip1 = vlib_buffer_get_current (b[1]);
2111
2112       error0 = error1 = IP4_ERROR_NONE;
2113
2114       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2115       ip4_ttl_and_checksum_check (b[1], ip1, next + 1, &error1);
2116
2117       /* Rewrite packet header and updates lengths. */
2118       adj0 = adj_get (adj_index0);
2119       adj1 = adj_get (adj_index1);
2120
2121       /* Worth pipelining. No guarantee that adj0,1 are hot... */
2122       rw_len0 = adj0[0].rewrite_header.data_bytes;
2123       rw_len1 = adj1[0].rewrite_header.data_bytes;
2124       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2125       vnet_buffer (b[1])->ip.save_rewrite_length = rw_len1;
2126
2127       p = vlib_buffer_get_current (b[2]);
2128       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2129       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2130
2131       p = vlib_buffer_get_current (b[3]);
2132       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2133       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2134
2135       /* Check MTU of outgoing interface. */
2136       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2137       u16 ip1_len = clib_net_to_host_u16 (ip1->length);
2138
2139       if (b[0]->flags & VNET_BUFFER_F_GSO)
2140         ip0_len = gso_mtu_sz (b[0]);
2141       if (b[1]->flags & VNET_BUFFER_F_GSO)
2142         ip1_len = gso_mtu_sz (b[1]);
2143
2144       ip4_mtu_check (b[0], ip0_len,
2145                      adj0[0].rewrite_header.max_l3_packet_bytes,
2146                      ip0->flags_and_fragment_offset &
2147                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2148                      next + 0, is_midchain, &error0);
2149       ip4_mtu_check (b[1], ip1_len,
2150                      adj1[0].rewrite_header.max_l3_packet_bytes,
2151                      ip1->flags_and_fragment_offset &
2152                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2153                      next + 1, is_midchain, &error1);
2154
2155       if (is_mcast)
2156         {
2157           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2158                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2159                     IP4_ERROR_SAME_INTERFACE : error0);
2160           error1 = ((adj1[0].rewrite_header.sw_if_index ==
2161                      vnet_buffer (b[1])->sw_if_index[VLIB_RX]) ?
2162                     IP4_ERROR_SAME_INTERFACE : error1);
2163         }
2164
2165       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2166        * to see the IP header */
2167       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2168         {
2169           u32 next_index = adj0[0].rewrite_header.next_index;
2170           vlib_buffer_advance (b[0], -(word) rw_len0);
2171
2172           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2173           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2174
2175           if (PREDICT_FALSE
2176               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2177             vnet_feature_arc_start (lm->output_feature_arc_index,
2178                                     tx_sw_if_index0, &next_index, b[0]);
2179           next[0] = next_index;
2180           if (is_midchain)
2181             calc_checksums (vm, b[0]);
2182         }
2183       else
2184         {
2185           b[0]->error = error_node->errors[error0];
2186           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2187             ip4_ttl_inc (b[0], ip0);
2188         }
2189       if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2190         {
2191           u32 next_index = adj1[0].rewrite_header.next_index;
2192           vlib_buffer_advance (b[1], -(word) rw_len1);
2193
2194           tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2195           vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2196
2197           if (PREDICT_FALSE
2198               (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2199             vnet_feature_arc_start (lm->output_feature_arc_index,
2200                                     tx_sw_if_index1, &next_index, b[1]);
2201           next[1] = next_index;
2202           if (is_midchain)
2203             calc_checksums (vm, b[1]);
2204         }
2205       else
2206         {
2207           b[1]->error = error_node->errors[error1];
2208           if (error1 == IP4_ERROR_MTU_EXCEEDED)
2209             ip4_ttl_inc (b[1], ip1);
2210         }
2211
2212       /* Guess we are only writing on simple Ethernet header. */
2213       vnet_rewrite_two_headers (adj0[0], adj1[0],
2214                                 ip0, ip1, sizeof (ethernet_header_t));
2215
2216       if (do_counters)
2217         {
2218           if (error0 == IP4_ERROR_NONE)
2219             vlib_increment_combined_counter
2220               (&adjacency_counters,
2221                thread_index,
2222                adj_index0, 1,
2223                vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2224
2225           if (error1 == IP4_ERROR_NONE)
2226             vlib_increment_combined_counter
2227               (&adjacency_counters,
2228                thread_index,
2229                adj_index1, 1,
2230                vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
2231         }
2232
2233       if (is_midchain)
2234         {
2235           if (error0 == IP4_ERROR_NONE && adj0->sub_type.midchain.fixup_func)
2236             adj0->sub_type.midchain.fixup_func
2237               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2238           if (error1 == IP4_ERROR_NONE && adj1->sub_type.midchain.fixup_func)
2239             adj1->sub_type.midchain.fixup_func
2240               (vm, adj1, b[1], adj1->sub_type.midchain.fixup_data);
2241         }
2242
2243       if (is_mcast)
2244         {
2245           /* copy bytes from the IP address into the MAC rewrite */
2246           if (error0 == IP4_ERROR_NONE)
2247             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2248                                         adj0->rewrite_header.dst_mcast_offset,
2249                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2250           if (error1 == IP4_ERROR_NONE)
2251             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2252                                         adj1->rewrite_header.dst_mcast_offset,
2253                                         &ip1->dst_address.as_u32, (u8 *) ip1);
2254         }
2255
2256       next += 2;
2257       b += 2;
2258       n_left_from -= 2;
2259     }
2260 #elif (CLIB_N_PREFETCHES >= 4)
2261   next = nexts;
2262   b = bufs;
2263   while (n_left_from >= 1)
2264     {
2265       ip_adjacency_t *adj0;
2266       ip4_header_t *ip0;
2267       u32 rw_len0, error0, adj_index0;
2268       u32 tx_sw_if_index0;
2269       u8 *p;
2270
2271       /* Prefetch next iteration */
2272       if (PREDICT_TRUE (n_left_from >= 4))
2273         {
2274           ip_adjacency_t *adj2;
2275           u32 adj_index2;
2276
2277           vlib_prefetch_buffer_header (b[3], LOAD);
2278           vlib_prefetch_buffer_data (b[2], LOAD);
2279
2280           /* Prefetch adj->rewrite_header */
2281           adj_index2 = vnet_buffer (b[2])->ip.adj_index[VLIB_TX];
2282           adj2 = adj_get (adj_index2);
2283           p = (u8 *) adj2;
2284           CLIB_PREFETCH (p + CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES,
2285                          LOAD);
2286         }
2287
2288       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2289
2290       /*
2291        * Prefetch the per-adjacency counters
2292        */
2293       if (do_counters)
2294         {
2295           vlib_prefetch_combined_counter (&adjacency_counters,
2296                                           thread_index, adj_index0);
2297         }
2298
2299       ip0 = vlib_buffer_get_current (b[0]);
2300
2301       error0 = IP4_ERROR_NONE;
2302
2303       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2304
2305       /* Rewrite packet header and updates lengths. */
2306       adj0 = adj_get (adj_index0);
2307
2308       /* Rewrite header was prefetched. */
2309       rw_len0 = adj0[0].rewrite_header.data_bytes;
2310       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2311
2312       /* Check MTU of outgoing interface. */
2313       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2314
2315       if (b[0]->flags & VNET_BUFFER_F_GSO)
2316         ip0_len = gso_mtu_sz (b[0]);
2317
2318       ip4_mtu_check (b[0], ip0_len,
2319                      adj0[0].rewrite_header.max_l3_packet_bytes,
2320                      ip0->flags_and_fragment_offset &
2321                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2322                      next + 0, is_midchain, &error0);
2323
2324       if (is_mcast)
2325         {
2326           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2327                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2328                     IP4_ERROR_SAME_INTERFACE : error0);
2329         }
2330
2331       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2332        * to see the IP header */
2333       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2334         {
2335           u32 next_index = adj0[0].rewrite_header.next_index;
2336           vlib_buffer_advance (b[0], -(word) rw_len0);
2337           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2338           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2339
2340           if (PREDICT_FALSE
2341               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2342             vnet_feature_arc_start (lm->output_feature_arc_index,
2343                                     tx_sw_if_index0, &next_index, b[0]);
2344           next[0] = next_index;
2345
2346           if (is_midchain)
2347             calc_checksums (vm, b[0]);
2348
2349           /* Guess we are only writing on simple Ethernet header. */
2350           vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2351
2352           /*
2353            * Bump the per-adjacency counters
2354            */
2355           if (do_counters)
2356             vlib_increment_combined_counter
2357               (&adjacency_counters,
2358                thread_index,
2359                adj_index0, 1, vlib_buffer_length_in_chain (vm,
2360                                                            b[0]) + rw_len0);
2361
2362           if (is_midchain && adj0->sub_type.midchain.fixup_func)
2363             adj0->sub_type.midchain.fixup_func
2364               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2365
2366           if (is_mcast)
2367             /* copy bytes from the IP address into the MAC rewrite */
2368             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2369                                         adj0->rewrite_header.dst_mcast_offset,
2370                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2371         }
2372       else
2373         {
2374           b[0]->error = error_node->errors[error0];
2375           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2376             ip4_ttl_inc (b[0], ip0);
2377         }
2378
2379       next += 1;
2380       b += 1;
2381       n_left_from -= 1;
2382     }
2383 #endif
2384
2385   while (n_left_from > 0)
2386     {
2387       ip_adjacency_t *adj0;
2388       ip4_header_t *ip0;
2389       u32 rw_len0, adj_index0, error0;
2390       u32 tx_sw_if_index0;
2391
2392       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2393
2394       adj0 = adj_get (adj_index0);
2395
2396       if (do_counters)
2397         vlib_prefetch_combined_counter (&adjacency_counters,
2398                                         thread_index, adj_index0);
2399
2400       ip0 = vlib_buffer_get_current (b[0]);
2401
2402       error0 = IP4_ERROR_NONE;
2403
2404       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2405
2406
2407       /* Update packet buffer attributes/set output interface. */
2408       rw_len0 = adj0[0].rewrite_header.data_bytes;
2409       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2410
2411       /* Check MTU of outgoing interface. */
2412       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2413       if (b[0]->flags & VNET_BUFFER_F_GSO)
2414         ip0_len = gso_mtu_sz (b[0]);
2415
2416       ip4_mtu_check (b[0], ip0_len,
2417                      adj0[0].rewrite_header.max_l3_packet_bytes,
2418                      ip0->flags_and_fragment_offset &
2419                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2420                      next + 0, is_midchain, &error0);
2421
2422       if (is_mcast)
2423         {
2424           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2425                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2426                     IP4_ERROR_SAME_INTERFACE : error0);
2427         }
2428
2429       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2430        * to see the IP header */
2431       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2432         {
2433           u32 next_index = adj0[0].rewrite_header.next_index;
2434           vlib_buffer_advance (b[0], -(word) rw_len0);
2435           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2436           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2437
2438           if (PREDICT_FALSE
2439               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2440             vnet_feature_arc_start (lm->output_feature_arc_index,
2441                                     tx_sw_if_index0, &next_index, b[0]);
2442           next[0] = next_index;
2443
2444           if (is_midchain)
2445             /* this acts on the packet that is about to be encapped */
2446             calc_checksums (vm, b[0]);
2447
2448           /* Guess we are only writing on simple Ethernet header. */
2449           vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2450
2451           if (do_counters)
2452             vlib_increment_combined_counter
2453               (&adjacency_counters,
2454                thread_index, adj_index0, 1,
2455                vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2456
2457           if (is_midchain && adj0->sub_type.midchain.fixup_func)
2458             adj0->sub_type.midchain.fixup_func
2459               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2460
2461           if (is_mcast)
2462             /* copy bytes from the IP address into the MAC rewrite */
2463             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2464                                         adj0->rewrite_header.dst_mcast_offset,
2465                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2466         }
2467       else
2468         {
2469           b[0]->error = error_node->errors[error0];
2470           /* undo the TTL decrement - we'll be back to do it again */
2471           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2472             ip4_ttl_inc (b[0], ip0);
2473         }
2474
2475       next += 1;
2476       b += 1;
2477       n_left_from -= 1;
2478     }
2479
2480
2481   /* Need to do trace after rewrites to pick up new packet data. */
2482   if (node->flags & VLIB_NODE_FLAG_TRACE)
2483     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2484
2485   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2486   return frame->n_vectors;
2487 }
2488
2489 always_inline uword
2490 ip4_rewrite_inline (vlib_main_t * vm,
2491                     vlib_node_runtime_t * node,
2492                     vlib_frame_t * frame,
2493                     int do_counters, int is_midchain, int is_mcast)
2494 {
2495   return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2496                                       is_midchain, is_mcast);
2497 }
2498
2499
2500 /** @brief IPv4 rewrite node.
2501     @node ip4-rewrite
2502
2503     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2504     header checksum, fetch the ip adjacency, check the outbound mtu,
2505     apply the adjacency rewrite, and send pkts to the adjacency
2506     rewrite header's rewrite_next_index.
2507
2508     @param vm vlib_main_t corresponding to the current thread
2509     @param node vlib_node_runtime_t
2510     @param frame vlib_frame_t whose contents should be dispatched
2511
2512     @par Graph mechanics: buffer metadata, next index usage
2513
2514     @em Uses:
2515     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2516         - the rewrite adjacency index
2517     - <code>adj->lookup_next_index</code>
2518         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2519           the packet will be dropped.
2520     - <code>adj->rewrite_header</code>
2521         - Rewrite string length, rewrite string, next_index
2522
2523     @em Sets:
2524     - <code>b->current_data, b->current_length</code>
2525         - Updated net of applying the rewrite string
2526
2527     <em>Next Indices:</em>
2528     - <code> adj->rewrite_header.next_index </code>
2529       or @c ip4-drop
2530 */
2531
2532 VLIB_NODE_FN (ip4_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2533                                  vlib_frame_t * frame)
2534 {
2535   if (adj_are_counters_enabled ())
2536     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2537   else
2538     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2539 }
2540
2541 VLIB_NODE_FN (ip4_rewrite_bcast_node) (vlib_main_t * vm,
2542                                        vlib_node_runtime_t * node,
2543                                        vlib_frame_t * frame)
2544 {
2545   if (adj_are_counters_enabled ())
2546     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2547   else
2548     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2549 }
2550
2551 VLIB_NODE_FN (ip4_midchain_node) (vlib_main_t * vm,
2552                                   vlib_node_runtime_t * node,
2553                                   vlib_frame_t * frame)
2554 {
2555   if (adj_are_counters_enabled ())
2556     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2557   else
2558     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2559 }
2560
2561 VLIB_NODE_FN (ip4_rewrite_mcast_node) (vlib_main_t * vm,
2562                                        vlib_node_runtime_t * node,
2563                                        vlib_frame_t * frame)
2564 {
2565   if (adj_are_counters_enabled ())
2566     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2567   else
2568     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2569 }
2570
2571 VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
2572                                         vlib_node_runtime_t * node,
2573                                         vlib_frame_t * frame)
2574 {
2575   if (adj_are_counters_enabled ())
2576     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2577   else
2578     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2579 }
2580
2581 /* *INDENT-OFF* */
2582 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2583   .name = "ip4-rewrite",
2584   .vector_size = sizeof (u32),
2585
2586   .format_trace = format_ip4_rewrite_trace,
2587
2588   .n_next_nodes = IP4_REWRITE_N_NEXT,
2589   .next_nodes = {
2590     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2591     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2592     [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2593   },
2594 };
2595
2596 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2597   .name = "ip4-rewrite-bcast",
2598   .vector_size = sizeof (u32),
2599
2600   .format_trace = format_ip4_rewrite_trace,
2601   .sibling_of = "ip4-rewrite",
2602 };
2603
2604 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2605   .name = "ip4-rewrite-mcast",
2606   .vector_size = sizeof (u32),
2607
2608   .format_trace = format_ip4_rewrite_trace,
2609   .sibling_of = "ip4-rewrite",
2610 };
2611
2612 VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
2613   .name = "ip4-mcast-midchain",
2614   .vector_size = sizeof (u32),
2615
2616   .format_trace = format_ip4_rewrite_trace,
2617   .sibling_of = "ip4-rewrite",
2618 };
2619
2620 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2621   .name = "ip4-midchain",
2622   .vector_size = sizeof (u32),
2623   .format_trace = format_ip4_rewrite_trace,
2624   .sibling_of = "ip4-rewrite",
2625 };
2626 /* *INDENT-ON */
2627
2628 static int
2629 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2630 {
2631   ip4_fib_mtrie_t *mtrie0;
2632   ip4_fib_mtrie_leaf_t leaf0;
2633   u32 lbi0;
2634
2635   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2636
2637   leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2638   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2639   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2640
2641   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2642
2643   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2644 }
2645
2646 static clib_error_t *
2647 test_lookup_command_fn (vlib_main_t * vm,
2648                         unformat_input_t * input, vlib_cli_command_t * cmd)
2649 {
2650   ip4_fib_t *fib;
2651   u32 table_id = 0;
2652   f64 count = 1;
2653   u32 n;
2654   int i;
2655   ip4_address_t ip4_base_address;
2656   u64 errors = 0;
2657
2658   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2659     {
2660       if (unformat (input, "table %d", &table_id))
2661         {
2662           /* Make sure the entry exists. */
2663           fib = ip4_fib_get (table_id);
2664           if ((fib) && (fib->index != table_id))
2665             return clib_error_return (0, "<fib-index> %d does not exist",
2666                                       table_id);
2667         }
2668       else if (unformat (input, "count %f", &count))
2669         ;
2670
2671       else if (unformat (input, "%U",
2672                          unformat_ip4_address, &ip4_base_address))
2673         ;
2674       else
2675         return clib_error_return (0, "unknown input `%U'",
2676                                   format_unformat_error, input);
2677     }
2678
2679   n = count;
2680
2681   for (i = 0; i < n; i++)
2682     {
2683       if (!ip4_lookup_validate (&ip4_base_address, table_id))
2684         errors++;
2685
2686       ip4_base_address.as_u32 =
2687         clib_host_to_net_u32 (1 +
2688                               clib_net_to_host_u32 (ip4_base_address.as_u32));
2689     }
2690
2691   if (errors)
2692     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2693   else
2694     vlib_cli_output (vm, "No errors in %d lookups\n", n);
2695
2696   return 0;
2697 }
2698
2699 /*?
2700  * Perform a lookup of an IPv4 Address (or range of addresses) in the
2701  * given FIB table to determine if there is a conflict with the
2702  * adjacency table. The fib-id can be determined by using the
2703  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2704  * of 0 is used.
2705  *
2706  * @todo This command uses fib-id, other commands use table-id (not
2707  * just a name, they are different indexes). Would like to change this
2708  * to table-id for consistency.
2709  *
2710  * @cliexpar
2711  * Example of how to run the test lookup command:
2712  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2713  * No errors in 2 lookups
2714  * @cliexend
2715 ?*/
2716 /* *INDENT-OFF* */
2717 VLIB_CLI_COMMAND (lookup_test_command, static) =
2718 {
2719   .path = "test lookup",
2720   .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2721   .function = test_lookup_command_fn,
2722 };
2723 /* *INDENT-ON* */
2724
2725 #ifndef CLIB_MARCH_VARIANT
2726 int
2727 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2728 {
2729   u32 fib_index;
2730
2731   fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
2732
2733   if (~0 == fib_index)
2734     return VNET_API_ERROR_NO_SUCH_FIB;
2735
2736   fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
2737                                   flow_hash_config);
2738
2739   return 0;
2740 }
2741 #endif
2742
2743 static clib_error_t *
2744 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2745                              unformat_input_t * input,
2746                              vlib_cli_command_t * cmd)
2747 {
2748   int matched = 0;
2749   u32 table_id = 0;
2750   u32 flow_hash_config = 0;
2751   int rv;
2752
2753   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2754     {
2755       if (unformat (input, "table %d", &table_id))
2756         matched = 1;
2757 #define _(a,v) \
2758     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2759       foreach_flow_hash_bit
2760 #undef _
2761         else
2762         break;
2763     }
2764
2765   if (matched == 0)
2766     return clib_error_return (0, "unknown input `%U'",
2767                               format_unformat_error, input);
2768
2769   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
2770   switch (rv)
2771     {
2772     case 0:
2773       break;
2774
2775     case VNET_API_ERROR_NO_SUCH_FIB:
2776       return clib_error_return (0, "no such FIB table %d", table_id);
2777
2778     default:
2779       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2780       break;
2781     }
2782
2783   return 0;
2784 }
2785
2786 /*?
2787  * Configure the set of IPv4 fields used by the flow hash.
2788  *
2789  * @cliexpar
2790  * Example of how to set the flow hash on a given table:
2791  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2792  * Example of display the configured flow hash:
2793  * @cliexstart{show ip fib}
2794  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2795  * 0.0.0.0/0
2796  *   unicast-ip4-chain
2797  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2798  *     [0] [@0]: dpo-drop ip6
2799  * 0.0.0.0/32
2800  *   unicast-ip4-chain
2801  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2802  *     [0] [@0]: dpo-drop ip6
2803  * 224.0.0.0/8
2804  *   unicast-ip4-chain
2805  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2806  *     [0] [@0]: dpo-drop ip6
2807  * 6.0.1.2/32
2808  *   unicast-ip4-chain
2809  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2810  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2811  * 7.0.0.1/32
2812  *   unicast-ip4-chain
2813  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
2814  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2815  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2816  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2817  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2818  * 240.0.0.0/8
2819  *   unicast-ip4-chain
2820  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
2821  *     [0] [@0]: dpo-drop ip6
2822  * 255.255.255.255/32
2823  *   unicast-ip4-chain
2824  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
2825  *     [0] [@0]: dpo-drop ip6
2826  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
2827  * 0.0.0.0/0
2828  *   unicast-ip4-chain
2829  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
2830  *     [0] [@0]: dpo-drop ip6
2831  * 0.0.0.0/32
2832  *   unicast-ip4-chain
2833  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
2834  *     [0] [@0]: dpo-drop ip6
2835  * 172.16.1.0/24
2836  *   unicast-ip4-chain
2837  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
2838  *     [0] [@4]: ipv4-glean: af_packet0
2839  * 172.16.1.1/32
2840  *   unicast-ip4-chain
2841  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
2842  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
2843  * 172.16.1.2/32
2844  *   unicast-ip4-chain
2845  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2846  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
2847  * 172.16.2.0/24
2848  *   unicast-ip4-chain
2849  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
2850  *     [0] [@4]: ipv4-glean: af_packet1
2851  * 172.16.2.1/32
2852  *   unicast-ip4-chain
2853  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
2854  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
2855  * 224.0.0.0/8
2856  *   unicast-ip4-chain
2857  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
2858  *     [0] [@0]: dpo-drop ip6
2859  * 240.0.0.0/8
2860  *   unicast-ip4-chain
2861  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
2862  *     [0] [@0]: dpo-drop ip6
2863  * 255.255.255.255/32
2864  *   unicast-ip4-chain
2865  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
2866  *     [0] [@0]: dpo-drop ip6
2867  * @cliexend
2868 ?*/
2869 /* *INDENT-OFF* */
2870 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
2871 {
2872   .path = "set ip flow-hash",
2873   .short_help =
2874   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
2875   .function = set_ip_flow_hash_command_fn,
2876 };
2877 /* *INDENT-ON* */
2878
2879 #ifndef CLIB_MARCH_VARIANT
2880 int
2881 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2882                              u32 table_index)
2883 {
2884   vnet_main_t *vnm = vnet_get_main ();
2885   vnet_interface_main_t *im = &vnm->interface_main;
2886   ip4_main_t *ipm = &ip4_main;
2887   ip_lookup_main_t *lm = &ipm->lookup_main;
2888   vnet_classify_main_t *cm = &vnet_classify_main;
2889   ip4_address_t *if_addr;
2890
2891   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2892     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2893
2894   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2895     return VNET_API_ERROR_NO_SUCH_ENTRY;
2896
2897   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2898   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
2899
2900   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
2901
2902   if (NULL != if_addr)
2903     {
2904       fib_prefix_t pfx = {
2905         .fp_len = 32,
2906         .fp_proto = FIB_PROTOCOL_IP4,
2907         .fp_addr.ip4 = *if_addr,
2908       };
2909       u32 fib_index;
2910
2911       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2912                                                        sw_if_index);
2913
2914
2915       if (table_index != (u32) ~ 0)
2916         {
2917           dpo_id_t dpo = DPO_INVALID;
2918
2919           dpo_set (&dpo,
2920                    DPO_CLASSIFY,
2921                    DPO_PROTO_IP4,
2922                    classify_dpo_create (DPO_PROTO_IP4, table_index));
2923
2924           fib_table_entry_special_dpo_add (fib_index,
2925                                            &pfx,
2926                                            FIB_SOURCE_CLASSIFY,
2927                                            FIB_ENTRY_FLAG_NONE, &dpo);
2928           dpo_reset (&dpo);
2929         }
2930       else
2931         {
2932           fib_table_entry_special_remove (fib_index,
2933                                           &pfx, FIB_SOURCE_CLASSIFY);
2934         }
2935     }
2936
2937   return 0;
2938 }
2939 #endif
2940
2941 static clib_error_t *
2942 set_ip_classify_command_fn (vlib_main_t * vm,
2943                             unformat_input_t * input,
2944                             vlib_cli_command_t * cmd)
2945 {
2946   u32 table_index = ~0;
2947   int table_index_set = 0;
2948   u32 sw_if_index = ~0;
2949   int rv;
2950
2951   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2952     {
2953       if (unformat (input, "table-index %d", &table_index))
2954         table_index_set = 1;
2955       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
2956                          vnet_get_main (), &sw_if_index))
2957         ;
2958       else
2959         break;
2960     }
2961
2962   if (table_index_set == 0)
2963     return clib_error_return (0, "classify table-index must be specified");
2964
2965   if (sw_if_index == ~0)
2966     return clib_error_return (0, "interface / subif must be specified");
2967
2968   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
2969
2970   switch (rv)
2971     {
2972     case 0:
2973       break;
2974
2975     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
2976       return clib_error_return (0, "No such interface");
2977
2978     case VNET_API_ERROR_NO_SUCH_ENTRY:
2979       return clib_error_return (0, "No such classifier table");
2980     }
2981   return 0;
2982 }
2983
2984 /*?
2985  * Assign a classification table to an interface. The classification
2986  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
2987  * commands. Once the table is create, use this command to filter packets
2988  * on an interface.
2989  *
2990  * @cliexpar
2991  * Example of how to assign a classification table to an interface:
2992  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
2993 ?*/
2994 /* *INDENT-OFF* */
2995 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
2996 {
2997     .path = "set ip classify",
2998     .short_help =
2999     "set ip classify intfc <interface> table-index <classify-idx>",
3000     .function = set_ip_classify_command_fn,
3001 };
3002 /* *INDENT-ON* */
3003
3004 static clib_error_t *
3005 ip4_config (vlib_main_t * vm, unformat_input_t * input)
3006 {
3007   ip4_main_t *im = &ip4_main;
3008   uword heapsize = 0;
3009
3010   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3011     {
3012       if (unformat (input, "heap-size %U", unformat_memory_size, &heapsize))
3013         ;
3014       else
3015         return clib_error_return (0,
3016                                   "invalid heap-size parameter `%U'",
3017                                   format_unformat_error, input);
3018     }
3019
3020   im->mtrie_heap_size = heapsize;
3021
3022   return 0;
3023 }
3024
3025 VLIB_EARLY_CONFIG_FUNCTION (ip4_config, "ip");
3026
3027 /*
3028  * fd.io coding-style-patch-verification: ON
3029  *
3030  * Local Variables:
3031  * eval: (c-set-style "gnu")
3032  * End:
3033  */