ip: Use the IP4 lookup functions
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h>     /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/mfib/ip4_mfib.h>
53 #include <vnet/dpo/load_balance.h>
54 #include <vnet/dpo/load_balance_map.h>
55 #include <vnet/dpo/classify_dpo.h>
56 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
57 #include <vnet/adj/adj_dp.h>
58 #include <vnet/pg/pg.h>
59
60 #include <vnet/ip/ip4_forward.h>
61 #include <vnet/interface_output.h>
62 #include <vnet/classify/vnet_classify.h>
63
64 /** @brief IPv4 lookup node.
65     @node ip4-lookup
66
67     This is the main IPv4 lookup dispatch node.
68
69     @param vm vlib_main_t corresponding to the current thread
70     @param node vlib_node_runtime_t
71     @param frame vlib_frame_t whose contents should be dispatched
72
73     @par Graph mechanics: buffer metadata, next index usage
74
75     @em Uses:
76     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
77         - Indicates the @c sw_if_index value of the interface that the
78           packet was received on.
79     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
80         - When the value is @c ~0 then the node performs a longest prefix
81           match (LPM) for the packet destination address in the FIB attached
82           to the receive interface.
83         - Otherwise perform LPM for the packet destination address in the
84           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
85           value (0, 1, ...) and not a VRF id.
86
87     @em Sets:
88     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
89         - The lookup result adjacency index.
90
91     <em>Next Index:</em>
92     - Dispatches the packet to the node index found in
93       ip_adjacency_t @c adj->lookup_next_index
94       (where @c adj is the lookup result adjacency).
95 */
96 VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
97                                 vlib_frame_t * frame)
98 {
99   return ip4_lookup_inline (vm, node, frame);
100 }
101
102 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
103
104 /* *INDENT-OFF* */
105 VLIB_REGISTER_NODE (ip4_lookup_node) =
106 {
107   .name = "ip4-lookup",
108   .vector_size = sizeof (u32),
109   .format_trace = format_ip4_lookup_trace,
110   .n_next_nodes = IP_LOOKUP_N_NEXT,
111   .next_nodes = IP4_LOOKUP_NEXT_NODES,
112 };
113 /* *INDENT-ON* */
114
115 VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
116                                       vlib_node_runtime_t * node,
117                                       vlib_frame_t * frame)
118 {
119   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
120   u32 n_left, *from;
121   u32 thread_index = vm->thread_index;
122   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
123   u16 nexts[VLIB_FRAME_SIZE], *next;
124
125   from = vlib_frame_vector_args (frame);
126   n_left = frame->n_vectors;
127   next = nexts;
128
129   vlib_get_buffers (vm, from, bufs, n_left);
130
131   while (n_left >= 4)
132     {
133       const load_balance_t *lb0, *lb1;
134       const ip4_header_t *ip0, *ip1;
135       u32 lbi0, hc0, lbi1, hc1;
136       const dpo_id_t *dpo0, *dpo1;
137
138       /* Prefetch next iteration. */
139       {
140         vlib_prefetch_buffer_header (b[2], LOAD);
141         vlib_prefetch_buffer_header (b[3], LOAD);
142
143         CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), LOAD);
144         CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), LOAD);
145       }
146
147       ip0 = vlib_buffer_get_current (b[0]);
148       ip1 = vlib_buffer_get_current (b[1]);
149       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
150       lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
151
152       lb0 = load_balance_get (lbi0);
153       lb1 = load_balance_get (lbi1);
154
155       /*
156        * this node is for via FIBs we can re-use the hash value from the
157        * to node if present.
158        * We don't want to use the same hash value at each level in the recursion
159        * graph as that would lead to polarisation
160        */
161       hc0 = hc1 = 0;
162
163       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
164         {
165           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
166             {
167               hc0 = vnet_buffer (b[0])->ip.flow_hash =
168                 vnet_buffer (b[0])->ip.flow_hash >> 1;
169             }
170           else
171             {
172               hc0 = vnet_buffer (b[0])->ip.flow_hash =
173                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
174             }
175           dpo0 = load_balance_get_fwd_bucket
176             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
177         }
178       else
179         {
180           dpo0 = load_balance_get_bucket_i (lb0, 0);
181         }
182       if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
183         {
184           if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
185             {
186               hc1 = vnet_buffer (b[1])->ip.flow_hash =
187                 vnet_buffer (b[1])->ip.flow_hash >> 1;
188             }
189           else
190             {
191               hc1 = vnet_buffer (b[1])->ip.flow_hash =
192                 ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
193             }
194           dpo1 = load_balance_get_fwd_bucket
195             (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
196         }
197       else
198         {
199           dpo1 = load_balance_get_bucket_i (lb1, 0);
200         }
201
202       next[0] = dpo0->dpoi_next_node;
203       next[1] = dpo1->dpoi_next_node;
204
205       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
206       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
207
208       vlib_increment_combined_counter
209         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
210       vlib_increment_combined_counter
211         (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
212
213       b += 2;
214       next += 2;
215       n_left -= 2;
216     }
217
218   while (n_left > 0)
219     {
220       const load_balance_t *lb0;
221       const ip4_header_t *ip0;
222       const dpo_id_t *dpo0;
223       u32 lbi0, hc0;
224
225       ip0 = vlib_buffer_get_current (b[0]);
226       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
227
228       lb0 = load_balance_get (lbi0);
229
230       hc0 = 0;
231       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
232         {
233           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
234             {
235               hc0 = vnet_buffer (b[0])->ip.flow_hash =
236                 vnet_buffer (b[0])->ip.flow_hash >> 1;
237             }
238           else
239             {
240               hc0 = vnet_buffer (b[0])->ip.flow_hash =
241                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
242             }
243           dpo0 = load_balance_get_fwd_bucket
244             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
245         }
246       else
247         {
248           dpo0 = load_balance_get_bucket_i (lb0, 0);
249         }
250
251       next[0] = dpo0->dpoi_next_node;
252       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
253
254       vlib_increment_combined_counter
255         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
256
257       b += 1;
258       next += 1;
259       n_left -= 1;
260     }
261
262   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
263   if (node->flags & VLIB_NODE_FLAG_TRACE)
264     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
265
266   return frame->n_vectors;
267 }
268
269 /* *INDENT-OFF* */
270 VLIB_REGISTER_NODE (ip4_load_balance_node) =
271 {
272   .name = "ip4-load-balance",
273   .vector_size = sizeof (u32),
274   .sibling_of = "ip4-lookup",
275   .format_trace = format_ip4_lookup_trace,
276 };
277 /* *INDENT-ON* */
278
279 #ifndef CLIB_MARCH_VARIANT
280 /* get first interface address */
281 ip4_address_t *
282 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
283                              ip_interface_address_t ** result_ia)
284 {
285   ip_lookup_main_t *lm = &im->lookup_main;
286   ip_interface_address_t *ia = 0;
287   ip4_address_t *result = 0;
288
289   /* *INDENT-OFF* */
290   foreach_ip_interface_address
291     (lm, ia, sw_if_index,
292      1 /* honor unnumbered */ ,
293      ({
294        ip4_address_t * a =
295          ip_interface_address_get_address (lm, ia);
296        result = a;
297        break;
298      }));
299   /* *INDENT-OFF* */
300   if (result_ia)
301     *result_ia = result ? ia : 0;
302   return result;
303 }
304 #endif
305
306 static void
307 ip4_add_subnet_bcast_route (u32 fib_index,
308                             fib_prefix_t *pfx,
309                             u32 sw_if_index)
310 {
311   vnet_sw_interface_flags_t iflags;
312
313   iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
314
315   fib_table_entry_special_remove(fib_index,
316                                  pfx,
317                                  FIB_SOURCE_INTERFACE);
318
319   if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
320     {
321       fib_table_entry_update_one_path (fib_index, pfx,
322                                        FIB_SOURCE_INTERFACE,
323                                        FIB_ENTRY_FLAG_NONE,
324                                        DPO_PROTO_IP4,
325                                        /* No next-hop address */
326                                        &ADJ_BCAST_ADDR,
327                                        sw_if_index,
328                                        // invalid FIB index
329                                        ~0,
330                                        1,
331                                        // no out-label stack
332                                        NULL,
333                                        FIB_ROUTE_PATH_FLAG_NONE);
334     }
335   else
336     {
337         fib_table_entry_special_add(fib_index,
338                                     pfx,
339                                     FIB_SOURCE_INTERFACE,
340                                     (FIB_ENTRY_FLAG_DROP |
341                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
342     }
343 }
344
345 static void
346 ip4_add_interface_prefix_routes (ip4_main_t *im,
347                                  u32 sw_if_index,
348                                  u32 fib_index,
349                                  ip_interface_address_t * a)
350 {
351   ip_lookup_main_t *lm = &im->lookup_main;
352   ip_interface_prefix_t *if_prefix;
353   ip4_address_t *address = ip_interface_address_get_address (lm, a);
354
355   ip_interface_prefix_key_t key = {
356     .prefix = {
357       .fp_len = a->address_length,
358       .fp_proto = FIB_PROTOCOL_IP4,
359       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[a->address_length],
360     },
361     .sw_if_index = sw_if_index,
362   };
363
364   fib_prefix_t pfx_special = {
365     .fp_proto = FIB_PROTOCOL_IP4,
366   };
367
368   /* If prefix already set on interface, just increment ref count & return */
369   if_prefix = ip_get_interface_prefix (lm, &key);
370   if (if_prefix)
371     {
372       if_prefix->ref_count += 1;
373       return;
374     }
375
376   /* New prefix - allocate a pool entry, initialize it, add to the hash */
377   pool_get (lm->if_prefix_pool, if_prefix);
378   if_prefix->ref_count = 1;
379   if_prefix->src_ia_index = a - lm->if_address_pool;
380   clib_memcpy (&if_prefix->key, &key, sizeof (key));
381   mhash_set (&lm->prefix_to_if_prefix_index, &key,
382              if_prefix - lm->if_prefix_pool, 0 /* old value */);
383
384   pfx_special.fp_len = a->address_length;
385   pfx_special.fp_addr.ip4.as_u32 = address->as_u32;
386
387   /* set the glean route for the prefix */
388   fib_table_entry_update_one_path (fib_index, &pfx_special,
389                                    FIB_SOURCE_INTERFACE,
390                                    (FIB_ENTRY_FLAG_CONNECTED |
391                                     FIB_ENTRY_FLAG_ATTACHED),
392                                    DPO_PROTO_IP4,
393                                    /* No next-hop address */
394                                    NULL,
395                                    sw_if_index,
396                                    /* invalid FIB index */
397                                    ~0,
398                                    1,
399                                    /* no out-label stack */
400                                    NULL,
401                                    FIB_ROUTE_PATH_FLAG_NONE);
402
403   /* length <= 30 - add glean, drop first address, maybe drop bcast address */
404   if (a->address_length <= 30)
405     {
406       /* set a drop route for the base address of the prefix */
407       pfx_special.fp_len = 32;
408       pfx_special.fp_addr.ip4.as_u32 =
409         address->as_u32 & im->fib_masks[a->address_length];
410
411       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
412         fib_table_entry_special_add (fib_index, &pfx_special,
413                                      FIB_SOURCE_INTERFACE,
414                                      (FIB_ENTRY_FLAG_DROP |
415                                       FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
416
417       /* set a route for the broadcast address of the prefix */
418       pfx_special.fp_len = 32;
419       pfx_special.fp_addr.ip4.as_u32 =
420         address->as_u32 | ~im->fib_masks[a->address_length];
421       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
422         ip4_add_subnet_bcast_route (fib_index, &pfx_special, sw_if_index);
423
424
425     }
426   /* length == 31 - add an attached route for the other address */
427   else if (a->address_length == 31)
428     {
429       pfx_special.fp_len = 32;
430       pfx_special.fp_addr.ip4.as_u32 =
431         address->as_u32 ^ clib_host_to_net_u32(1);
432
433       fib_table_entry_update_one_path (fib_index, &pfx_special,
434                                        FIB_SOURCE_INTERFACE,
435                                        (FIB_ENTRY_FLAG_ATTACHED),
436                                        DPO_PROTO_IP4,
437                                        &pfx_special.fp_addr,
438                                        sw_if_index,
439                                        /* invalid FIB index */
440                                        ~0,
441                                        1,
442                                        NULL,
443                                        FIB_ROUTE_PATH_FLAG_NONE);
444     }
445 }
446
447 static void
448 ip4_add_interface_routes (u32 sw_if_index,
449                           ip4_main_t * im, u32 fib_index,
450                           ip_interface_address_t * a)
451 {
452   ip_lookup_main_t *lm = &im->lookup_main;
453   ip4_address_t *address = ip_interface_address_get_address (lm, a);
454   fib_prefix_t pfx = {
455     .fp_len = 32,
456     .fp_proto = FIB_PROTOCOL_IP4,
457     .fp_addr.ip4 = *address,
458   };
459
460   /* set special routes for the prefix if needed */
461   ip4_add_interface_prefix_routes (im, sw_if_index, fib_index, a);
462
463   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
464     {
465       u32 classify_table_index =
466         lm->classify_table_index_by_sw_if_index[sw_if_index];
467       if (classify_table_index != (u32) ~ 0)
468         {
469           dpo_id_t dpo = DPO_INVALID;
470
471           dpo_set (&dpo,
472                    DPO_CLASSIFY,
473                    DPO_PROTO_IP4,
474                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
475
476           fib_table_entry_special_dpo_add (fib_index,
477                                            &pfx,
478                                            FIB_SOURCE_CLASSIFY,
479                                            FIB_ENTRY_FLAG_NONE, &dpo);
480           dpo_reset (&dpo);
481         }
482     }
483
484   fib_table_entry_update_one_path (fib_index, &pfx,
485                                    FIB_SOURCE_INTERFACE,
486                                    (FIB_ENTRY_FLAG_CONNECTED |
487                                     FIB_ENTRY_FLAG_LOCAL),
488                                    DPO_PROTO_IP4,
489                                    &pfx.fp_addr,
490                                    sw_if_index,
491                                    // invalid FIB index
492                                    ~0,
493                                    1, NULL,
494                                    FIB_ROUTE_PATH_FLAG_NONE);
495 }
496
497 static void
498 ip4_del_interface_prefix_routes (ip4_main_t * im,
499                                  u32 sw_if_index,
500                                  u32 fib_index,
501                                  ip4_address_t * address,
502                                  u32 address_length)
503 {
504   ip_lookup_main_t *lm = &im->lookup_main;
505   ip_interface_prefix_t *if_prefix;
506
507   ip_interface_prefix_key_t key = {
508     .prefix = {
509       .fp_len = address_length,
510       .fp_proto = FIB_PROTOCOL_IP4,
511       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[address_length],
512     },
513     .sw_if_index = sw_if_index,
514   };
515
516   fib_prefix_t pfx_special = {
517     .fp_len = 32,
518     .fp_proto = FIB_PROTOCOL_IP4,
519   };
520
521   if_prefix = ip_get_interface_prefix (lm, &key);
522   if (!if_prefix)
523     {
524       clib_warning ("Prefix not found while deleting %U",
525                     format_ip4_address_and_length, address, address_length);
526       return;
527     }
528
529   if_prefix->ref_count -= 1;
530
531   /*
532    * Routes need to be adjusted if deleting last intf addr in prefix
533    *
534    * We're done now otherwise
535    */
536   if (if_prefix->ref_count > 0)
537     return;
538
539   /* length <= 30, delete glean route, first address, last address */
540   if (address_length <= 30)
541     {
542       /* Less work to do in FIB if we remove the covered /32s first */
543
544       /* first address in prefix */
545       pfx_special.fp_addr.ip4.as_u32 =
546         address->as_u32 & im->fib_masks[address_length];
547       pfx_special.fp_len = 32;
548
549       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
550         fib_table_entry_special_remove (fib_index,
551                                         &pfx_special,
552                                         FIB_SOURCE_INTERFACE);
553
554       /* prefix broadcast address */
555       pfx_special.fp_addr.ip4.as_u32 =
556         address->as_u32 | ~im->fib_masks[address_length];
557       pfx_special.fp_len = 32;
558
559       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
560         fib_table_entry_special_remove (fib_index,
561                                         &pfx_special,
562                                         FIB_SOURCE_INTERFACE);
563     }
564   else if (address_length == 31)
565     {
566       /* length == 31, delete attached route for the other address */
567       pfx_special.fp_addr.ip4.as_u32 =
568         address->as_u32 ^ clib_host_to_net_u32(1);
569
570       fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
571     }
572
573   /* remove glean route for prefix */
574   pfx_special.fp_addr.ip4 = *address;
575   pfx_special.fp_len = address_length;
576   fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
577
578   mhash_unset (&lm->prefix_to_if_prefix_index, &key, 0 /* old_value */);
579   pool_put (lm->if_prefix_pool, if_prefix);
580 }
581
582 static void
583 ip4_del_interface_routes (u32 sw_if_index,
584                           ip4_main_t * im,
585                           u32 fib_index,
586                           ip4_address_t * address, u32 address_length)
587 {
588   fib_prefix_t pfx = {
589     .fp_len = 32,
590     .fp_proto = FIB_PROTOCOL_IP4,
591     .fp_addr.ip4 = *address,
592   };
593
594   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
595
596   ip4_del_interface_prefix_routes (im, sw_if_index, fib_index,
597                                    address, address_length);
598 }
599
600 #ifndef CLIB_MARCH_VARIANT
601 void
602 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
603 {
604   ip4_main_t *im = &ip4_main;
605   vnet_main_t *vnm = vnet_get_main ();
606   vnet_hw_interface_t *hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
607
608   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
609
610   /*
611    * enable/disable only on the 1<->0 transition
612    */
613   if (is_enable)
614     {
615       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
616         return;
617     }
618   else
619     {
620       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
621       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
622         return;
623     }
624   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
625                                !is_enable, 0, 0);
626
627
628   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
629                                sw_if_index, !is_enable, 0, 0);
630
631   if (is_enable)
632     hi->l3_if_count++;
633   else if (hi->l3_if_count)
634     hi->l3_if_count--;
635
636   {
637     ip4_enable_disable_interface_callback_t *cb;
638     vec_foreach (cb, im->enable_disable_interface_callbacks)
639       cb->function (im, cb->function_opaque, sw_if_index, is_enable);
640   }
641 }
642
643 static clib_error_t *
644 ip4_add_del_interface_address_internal (vlib_main_t * vm,
645                                         u32 sw_if_index,
646                                         ip4_address_t * address,
647                                         u32 address_length, u32 is_del)
648 {
649   vnet_main_t *vnm = vnet_get_main ();
650   ip4_main_t *im = &ip4_main;
651   ip_lookup_main_t *lm = &im->lookup_main;
652   clib_error_t *error = 0;
653   u32 if_address_index;
654   ip4_address_fib_t ip4_af, *addr_fib = 0;
655
656   /* local0 interface doesn't support IP addressing  */
657   if (sw_if_index == 0)
658     {
659       return
660        clib_error_create ("local0 interface doesn't support IP addressing");
661     }
662
663   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
664   ip4_addr_fib_init (&ip4_af, address,
665                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
666   vec_add1 (addr_fib, ip4_af);
667
668   /*
669    * there is no support for adj-fib handling in the presence of overlapping
670    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
671    * most routers do.
672    */
673   /* *INDENT-OFF* */
674   if (!is_del)
675     {
676       /* When adding an address check that it does not conflict
677          with an existing address on any interface in this table. */
678       ip_interface_address_t *ia;
679       vnet_sw_interface_t *sif;
680
681       pool_foreach (sif, vnm->interface_main.sw_interfaces)
682        {
683           if (im->fib_index_by_sw_if_index[sw_if_index] ==
684               im->fib_index_by_sw_if_index[sif->sw_if_index])
685             {
686               foreach_ip_interface_address
687                 (&im->lookup_main, ia, sif->sw_if_index,
688                  0 /* honor unnumbered */ ,
689                  ({
690                    ip4_address_t * x =
691                      ip_interface_address_get_address
692                      (&im->lookup_main, ia);
693
694                    if (ip4_destination_matches_route
695                        (im, address, x, ia->address_length) ||
696                        ip4_destination_matches_route (im,
697                                                       x,
698                                                       address,
699                                                       address_length))
700                      {
701                        /* an intf may have >1 addr from the same prefix */
702                        if ((sw_if_index == sif->sw_if_index) &&
703                            (ia->address_length == address_length) &&
704                            (x->as_u32 != address->as_u32))
705                          continue;
706
707                        if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE)
708                          /* if the address we're comparing against is stale
709                           * then the CP has not added this one back yet, maybe
710                           * it never will, so we have to assume it won't and
711                           * ignore it. if it does add it back, then it will fail
712                           * because this one is now present */
713                          continue;
714
715                        /* error if the length or intf was different */
716                        vnm->api_errno = VNET_API_ERROR_ADDRESS_IN_USE;
717
718                        error = clib_error_create
719                          ("failed to add %U on %U which conflicts with %U for interface %U",
720                           format_ip4_address_and_length, address,
721                           address_length,
722                           format_vnet_sw_if_index_name, vnm,
723                           sw_if_index,
724                           format_ip4_address_and_length, x,
725                           ia->address_length,
726                           format_vnet_sw_if_index_name, vnm,
727                           sif->sw_if_index);
728                        goto done;
729                      }
730                  }));
731             }
732       }
733     }
734   /* *INDENT-ON* */
735
736   if_address_index = ip_interface_address_find (lm, addr_fib, address_length);
737
738   if (is_del)
739     {
740       if (~0 == if_address_index)
741         {
742           vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_FOUND_FOR_INTERFACE;
743           error = clib_error_create ("%U not found for interface %U",
744                                      lm->format_address_and_length,
745                                      addr_fib, address_length,
746                                      format_vnet_sw_if_index_name, vnm,
747                                      sw_if_index);
748           goto done;
749         }
750
751       error = ip_interface_address_del (lm, vnm, if_address_index, addr_fib,
752                                         address_length, sw_if_index);
753       if (error)
754         goto done;
755     }
756   else
757     {
758       if (~0 != if_address_index)
759         {
760           ip_interface_address_t *ia;
761
762           ia = pool_elt_at_index (lm->if_address_pool, if_address_index);
763
764           if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE)
765             {
766               if (ia->sw_if_index == sw_if_index)
767                 {
768                   /* re-adding an address during the replace action.
769                    * consdier this the update. clear the flag and
770                    * we're done */
771                   ia->flags &= ~IP_INTERFACE_ADDRESS_FLAG_STALE;
772                   goto done;
773                 }
774               else
775                 {
776                   /* The prefix is moving from one interface to another.
777                    * delete the stale and add the new */
778                   ip4_add_del_interface_address_internal (vm,
779                                                           ia->sw_if_index,
780                                                           address,
781                                                           address_length, 1);
782                   ia = NULL;
783                   error = ip_interface_address_add (lm, sw_if_index,
784                                                     addr_fib, address_length,
785                                                     &if_address_index);
786                 }
787             }
788           else
789             {
790               vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
791               error = clib_error_create
792                 ("Prefix %U already found on interface %U",
793                  lm->format_address_and_length, addr_fib, address_length,
794                  format_vnet_sw_if_index_name, vnm, ia->sw_if_index);
795             }
796         }
797       else
798         error = ip_interface_address_add (lm, sw_if_index,
799                                           addr_fib, address_length,
800                                           &if_address_index);
801     }
802
803   if (error)
804     goto done;
805
806   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
807   ip4_mfib_interface_enable_disable (sw_if_index, !is_del);
808
809   /* intf addr routes are added/deleted on admin up/down */
810   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index))
811     {
812       if (is_del)
813         ip4_del_interface_routes (sw_if_index,
814                                   im, ip4_af.fib_index, address,
815                                   address_length);
816       else
817         ip4_add_interface_routes (sw_if_index,
818                                   im, ip4_af.fib_index,
819                                   pool_elt_at_index
820                                   (lm->if_address_pool, if_address_index));
821     }
822
823   ip4_add_del_interface_address_callback_t *cb;
824   vec_foreach (cb, im->add_del_interface_address_callbacks)
825     cb->function (im, cb->function_opaque, sw_if_index,
826                   address, address_length, if_address_index, is_del);
827
828 done:
829   vec_free (addr_fib);
830   return error;
831 }
832
833 clib_error_t *
834 ip4_add_del_interface_address (vlib_main_t * vm,
835                                u32 sw_if_index,
836                                ip4_address_t * address,
837                                u32 address_length, u32 is_del)
838 {
839   return ip4_add_del_interface_address_internal
840     (vm, sw_if_index, address, address_length, is_del);
841 }
842
843 void
844 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
845 {
846   ip_interface_address_t *ia;
847   ip4_main_t *im;
848
849   im = &ip4_main;
850
851   /*
852    * when directed broadcast is enabled, the subnet braodcast route will forward
853    * packets using an adjacency with a broadcast MAC. otherwise it drops
854    */
855   /* *INDENT-OFF* */
856   foreach_ip_interface_address(&im->lookup_main, ia,
857                                sw_if_index, 0,
858      ({
859        if (ia->address_length <= 30)
860          {
861            ip4_address_t *ipa;
862
863            ipa = ip_interface_address_get_address (&im->lookup_main, ia);
864
865            fib_prefix_t pfx = {
866              .fp_len = 32,
867              .fp_proto = FIB_PROTOCOL_IP4,
868              .fp_addr = {
869                .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
870              },
871            };
872
873            ip4_add_subnet_bcast_route
874              (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
875                                                   sw_if_index),
876               &pfx, sw_if_index);
877          }
878      }));
879   /* *INDENT-ON* */
880 }
881 #endif
882
883 static clib_error_t *
884 ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
885 {
886   ip4_main_t *im = &ip4_main;
887   ip_interface_address_t *ia;
888   ip4_address_t *a;
889   u32 is_admin_up, fib_index;
890
891   /* Fill in lookup tables with default table (0). */
892   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
893
894   vec_validate_init_empty (im->
895                            lookup_main.if_address_pool_index_by_sw_if_index,
896                            sw_if_index, ~0);
897
898   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
899
900   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
901
902   /* *INDENT-OFF* */
903   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
904                                 0 /* honor unnumbered */,
905   ({
906     a = ip_interface_address_get_address (&im->lookup_main, ia);
907     if (is_admin_up)
908       ip4_add_interface_routes (sw_if_index,
909                                 im, fib_index,
910                                 ia);
911     else
912       ip4_del_interface_routes (sw_if_index,
913                                 im, fib_index,
914                                 a, ia->address_length);
915   }));
916   /* *INDENT-ON* */
917
918   return 0;
919 }
920
921 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
922
923 /* Built-in ip4 unicast rx feature path definition */
924 /* *INDENT-OFF* */
925 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
926 {
927   .arc_name = "ip4-unicast",
928   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
929   .last_in_arc = "ip4-lookup",
930   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
931 };
932
933 VNET_FEATURE_INIT (ip4_flow_classify, static) =
934 {
935   .arc_name = "ip4-unicast",
936   .node_name = "ip4-flow-classify",
937   .runs_before = VNET_FEATURES ("ip4-inacl"),
938 };
939
940 VNET_FEATURE_INIT (ip4_inacl, static) =
941 {
942   .arc_name = "ip4-unicast",
943   .node_name = "ip4-inacl",
944   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
945 };
946
947 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
948 {
949   .arc_name = "ip4-unicast",
950   .node_name = "ip4-source-and-port-range-check-rx",
951   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
952 };
953
954 VNET_FEATURE_INIT (ip4_policer_classify, static) =
955 {
956   .arc_name = "ip4-unicast",
957   .node_name = "ip4-policer-classify",
958   .runs_before = VNET_FEATURES ("ipsec4-input-feature"),
959 };
960
961 VNET_FEATURE_INIT (ip4_ipsec, static) =
962 {
963   .arc_name = "ip4-unicast",
964   .node_name = "ipsec4-input-feature",
965   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
966 };
967
968 VNET_FEATURE_INIT (ip4_vpath, static) =
969 {
970   .arc_name = "ip4-unicast",
971   .node_name = "vpath-input-ip4",
972   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
973 };
974
975 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
976 {
977   .arc_name = "ip4-unicast",
978   .node_name = "ip4-vxlan-bypass",
979   .runs_before = VNET_FEATURES ("ip4-lookup"),
980 };
981
982 VNET_FEATURE_INIT (ip4_not_enabled, static) =
983 {
984   .arc_name = "ip4-unicast",
985   .node_name = "ip4-not-enabled",
986   .runs_before = VNET_FEATURES ("ip4-lookup"),
987 };
988
989 VNET_FEATURE_INIT (ip4_lookup, static) =
990 {
991   .arc_name = "ip4-unicast",
992   .node_name = "ip4-lookup",
993   .runs_before = 0,     /* not before any other features */
994 };
995
996 /* Built-in ip4 multicast rx feature path definition */
997 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
998 {
999   .arc_name = "ip4-multicast",
1000   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
1001   .last_in_arc = "ip4-mfib-forward-lookup",
1002   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
1003 };
1004
1005 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
1006 {
1007   .arc_name = "ip4-multicast",
1008   .node_name = "vpath-input-ip4",
1009   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1010 };
1011
1012 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
1013 {
1014   .arc_name = "ip4-multicast",
1015   .node_name = "ip4-not-enabled",
1016   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1017 };
1018
1019 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
1020 {
1021   .arc_name = "ip4-multicast",
1022   .node_name = "ip4-mfib-forward-lookup",
1023   .runs_before = 0,     /* last feature */
1024 };
1025
1026 /* Source and port-range check ip4 tx feature path definition */
1027 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1028 {
1029   .arc_name = "ip4-output",
1030   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
1031   .last_in_arc = "interface-output",
1032   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1033 };
1034
1035 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1036 {
1037   .arc_name = "ip4-output",
1038   .node_name = "ip4-source-and-port-range-check-tx",
1039   .runs_before = VNET_FEATURES ("ip4-outacl"),
1040 };
1041
1042 VNET_FEATURE_INIT (ip4_outacl, static) =
1043 {
1044   .arc_name = "ip4-output",
1045   .node_name = "ip4-outacl",
1046   .runs_before = VNET_FEATURES ("ipsec4-output-feature"),
1047 };
1048
1049 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1050 {
1051   .arc_name = "ip4-output",
1052   .node_name = "ipsec4-output-feature",
1053   .runs_before = VNET_FEATURES ("interface-output"),
1054 };
1055
1056 /* Built-in ip4 tx feature path definition */
1057 VNET_FEATURE_INIT (ip4_interface_output, static) =
1058 {
1059   .arc_name = "ip4-output",
1060   .node_name = "interface-output",
1061   .runs_before = 0,     /* not before any other features */
1062 };
1063 /* *INDENT-ON* */
1064
1065 static clib_error_t *
1066 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1067 {
1068   ip4_main_t *im = &ip4_main;
1069
1070   /* Fill in lookup tables with default table (0). */
1071   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1072   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
1073
1074   if (!is_add)
1075     {
1076       ip4_main_t *im4 = &ip4_main;
1077       ip_lookup_main_t *lm4 = &im4->lookup_main;
1078       ip_interface_address_t *ia = 0;
1079       ip4_address_t *address;
1080       vlib_main_t *vm = vlib_get_main ();
1081
1082       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
1083       /* *INDENT-OFF* */
1084       foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
1085       ({
1086         address = ip_interface_address_get_address (lm4, ia);
1087         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
1088       }));
1089       /* *INDENT-ON* */
1090       ip4_mfib_interface_enable_disable (sw_if_index, 0);
1091     }
1092
1093   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
1094                                is_add, 0, 0);
1095
1096   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
1097                                sw_if_index, is_add, 0, 0);
1098
1099   return /* no error */ 0;
1100 }
1101
1102 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1103
1104 /* Global IP4 main. */
1105 #ifndef CLIB_MARCH_VARIANT
1106 ip4_main_t ip4_main;
1107 #endif /* CLIB_MARCH_VARIANT */
1108
1109 static clib_error_t *
1110 ip4_lookup_init (vlib_main_t * vm)
1111 {
1112   ip4_main_t *im = &ip4_main;
1113   clib_error_t *error;
1114   uword i;
1115
1116   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1117     return error;
1118   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
1119     return (error);
1120   if ((error = vlib_call_init_function (vm, fib_module_init)))
1121     return error;
1122   if ((error = vlib_call_init_function (vm, mfib_module_init)))
1123     return error;
1124
1125   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1126     {
1127       u32 m;
1128
1129       if (i < 32)
1130         m = pow2_mask (i) << (32 - i);
1131       else
1132         m = ~0;
1133       im->fib_masks[i] = clib_host_to_net_u32 (m);
1134     }
1135
1136   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1137
1138   /* Create FIB with index 0 and table id of 0. */
1139   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1140                                      FIB_SOURCE_DEFAULT_ROUTE);
1141   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1142                                       MFIB_SOURCE_DEFAULT_ROUTE);
1143
1144   {
1145     pg_node_t *pn;
1146     pn = pg_get_node (ip4_lookup_node.index);
1147     pn->unformat_edit = unformat_pg_ip4_header;
1148   }
1149
1150   {
1151     ethernet_arp_header_t h;
1152
1153     clib_memset (&h, 0, sizeof (h));
1154
1155 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1156 #define _8(f,v) h.f = v;
1157     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1158     _16 (l3_type, ETHERNET_TYPE_IP4);
1159     _8 (n_l2_address_bytes, 6);
1160     _8 (n_l3_address_bytes, 4);
1161     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1162 #undef _16
1163 #undef _8
1164
1165     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1166                                /* data */ &h,
1167                                sizeof (h),
1168                                /* alloc chunk size */ 8,
1169                                "ip4 arp");
1170   }
1171
1172   return error;
1173 }
1174
1175 VLIB_INIT_FUNCTION (ip4_lookup_init);
1176
1177 typedef struct
1178 {
1179   /* Adjacency taken. */
1180   u32 dpo_index;
1181   u32 flow_hash;
1182   u32 fib_index;
1183
1184   /* Packet data, possibly *after* rewrite. */
1185   u8 packet_data[64 - 1 * sizeof (u32)];
1186 }
1187 ip4_forward_next_trace_t;
1188
1189 #ifndef CLIB_MARCH_VARIANT
1190 u8 *
1191 format_ip4_forward_next_trace (u8 * s, va_list * args)
1192 {
1193   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1194   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1195   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1196   u32 indent = format_get_indent (s);
1197   s = format (s, "%U%U",
1198               format_white_space, indent,
1199               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1200   return s;
1201 }
1202 #endif
1203
1204 static u8 *
1205 format_ip4_lookup_trace (u8 * s, va_list * args)
1206 {
1207   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1208   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1209   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1210   u32 indent = format_get_indent (s);
1211
1212   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1213               t->fib_index, t->dpo_index, t->flow_hash);
1214   s = format (s, "\n%U%U",
1215               format_white_space, indent,
1216               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1217   return s;
1218 }
1219
1220 static u8 *
1221 format_ip4_rewrite_trace (u8 * s, va_list * args)
1222 {
1223   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1224   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1225   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1226   u32 indent = format_get_indent (s);
1227
1228   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1229               t->fib_index, t->dpo_index, format_ip_adjacency,
1230               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1231   s = format (s, "\n%U%U",
1232               format_white_space, indent,
1233               format_ip_adjacency_packet_data,
1234               t->packet_data, sizeof (t->packet_data));
1235   return s;
1236 }
1237
1238 #ifndef CLIB_MARCH_VARIANT
1239 /* Common trace function for all ip4-forward next nodes. */
1240 void
1241 ip4_forward_next_trace (vlib_main_t * vm,
1242                         vlib_node_runtime_t * node,
1243                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1244 {
1245   u32 *from, n_left;
1246   ip4_main_t *im = &ip4_main;
1247
1248   n_left = frame->n_vectors;
1249   from = vlib_frame_vector_args (frame);
1250
1251   while (n_left >= 4)
1252     {
1253       u32 bi0, bi1;
1254       vlib_buffer_t *b0, *b1;
1255       ip4_forward_next_trace_t *t0, *t1;
1256
1257       /* Prefetch next iteration. */
1258       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1259       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1260
1261       bi0 = from[0];
1262       bi1 = from[1];
1263
1264       b0 = vlib_get_buffer (vm, bi0);
1265       b1 = vlib_get_buffer (vm, bi1);
1266
1267       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1268         {
1269           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1270           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1271           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1272           t0->fib_index =
1273             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1274              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1275             vec_elt (im->fib_index_by_sw_if_index,
1276                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1277
1278           clib_memcpy_fast (t0->packet_data,
1279                             vlib_buffer_get_current (b0),
1280                             sizeof (t0->packet_data));
1281         }
1282       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1283         {
1284           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1285           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1286           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1287           t1->fib_index =
1288             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1289              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1290             vec_elt (im->fib_index_by_sw_if_index,
1291                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1292           clib_memcpy_fast (t1->packet_data, vlib_buffer_get_current (b1),
1293                             sizeof (t1->packet_data));
1294         }
1295       from += 2;
1296       n_left -= 2;
1297     }
1298
1299   while (n_left >= 1)
1300     {
1301       u32 bi0;
1302       vlib_buffer_t *b0;
1303       ip4_forward_next_trace_t *t0;
1304
1305       bi0 = from[0];
1306
1307       b0 = vlib_get_buffer (vm, bi0);
1308
1309       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1310         {
1311           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1312           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1313           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1314           t0->fib_index =
1315             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1316              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1317             vec_elt (im->fib_index_by_sw_if_index,
1318                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1319           clib_memcpy_fast (t0->packet_data, vlib_buffer_get_current (b0),
1320                             sizeof (t0->packet_data));
1321         }
1322       from += 1;
1323       n_left -= 1;
1324     }
1325 }
1326
1327 /* Compute TCP/UDP/ICMP4 checksum in software. */
1328 u16
1329 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1330                               ip4_header_t * ip0)
1331 {
1332   ip_csum_t sum0;
1333   u32 ip_header_length, payload_length_host_byte_order;
1334
1335   /* Initialize checksum with ip header. */
1336   ip_header_length = ip4_header_bytes (ip0);
1337   payload_length_host_byte_order =
1338     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1339   sum0 =
1340     clib_host_to_net_u32 (payload_length_host_byte_order +
1341                           (ip0->protocol << 16));
1342
1343   if (BITS (uword) == 32)
1344     {
1345       sum0 =
1346         ip_csum_with_carry (sum0,
1347                             clib_mem_unaligned (&ip0->src_address, u32));
1348       sum0 =
1349         ip_csum_with_carry (sum0,
1350                             clib_mem_unaligned (&ip0->dst_address, u32));
1351     }
1352   else
1353     sum0 =
1354       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1355
1356   return ip_calculate_l4_checksum (vm, p0, sum0,
1357                                    payload_length_host_byte_order, (u8 *) ip0,
1358                                    ip_header_length, NULL);
1359 }
1360
1361 u32
1362 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1363 {
1364   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1365   udp_header_t *udp0;
1366   u16 sum16;
1367
1368   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1369           || ip0->protocol == IP_PROTOCOL_UDP);
1370
1371   udp0 = (void *) (ip0 + 1);
1372   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1373     {
1374       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1375                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1376       return p0->flags;
1377     }
1378
1379   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1380
1381   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1382                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1383
1384   return p0->flags;
1385 }
1386 #endif
1387
1388 /* *INDENT-OFF* */
1389 VNET_FEATURE_ARC_INIT (ip4_local) =
1390 {
1391   .arc_name  = "ip4-local",
1392   .start_nodes = VNET_FEATURES ("ip4-local"),
1393   .last_in_arc = "ip4-local-end-of-arc",
1394 };
1395 /* *INDENT-ON* */
1396
1397 static inline void
1398 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1399                             ip4_header_t * ip, u8 is_udp, u8 * error,
1400                             u8 * good_tcp_udp)
1401 {
1402   u32 flags0;
1403   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1404   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1405   if (is_udp)
1406     {
1407       udp_header_t *udp;
1408       u32 ip_len, udp_len;
1409       i32 len_diff;
1410       udp = ip4_next_header (ip);
1411       /* Verify UDP length. */
1412       ip_len = clib_net_to_host_u16 (ip->length);
1413       udp_len = clib_net_to_host_u16 (udp->length);
1414
1415       len_diff = ip_len - udp_len;
1416       *good_tcp_udp &= len_diff >= 0;
1417       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1418     }
1419 }
1420
1421 #define ip4_local_csum_is_offloaded(_b)                                       \
1422   ((_b->flags & VNET_BUFFER_F_OFFLOAD) &&                                     \
1423    (vnet_buffer (_b)->oflags &                                                \
1424     (VNET_BUFFER_OFFLOAD_F_TCP_CKSUM | VNET_BUFFER_OFFLOAD_F_UDP_CKSUM)))
1425
1426 #define ip4_local_need_csum_check(is_tcp_udp, _b)                       \
1427     (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED     \
1428         || ip4_local_csum_is_offloaded (_b)))
1429
1430 #define ip4_local_csum_is_valid(_b)                                     \
1431     (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT                      \
1432         || (ip4_local_csum_is_offloaded (_b))) != 0
1433
1434 static inline void
1435 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1436                          ip4_header_t * ih, u8 * error)
1437 {
1438   u8 is_udp, is_tcp_udp, good_tcp_udp;
1439
1440   is_udp = ih->protocol == IP_PROTOCOL_UDP;
1441   is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1442
1443   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1444     ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1445   else
1446     good_tcp_udp = ip4_local_csum_is_valid (b);
1447
1448   ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1449   *error = (is_tcp_udp && !good_tcp_udp
1450             ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1451 }
1452
1453 static inline void
1454 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1455                             ip4_header_t ** ih, u8 * error)
1456 {
1457   u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1458
1459   is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1460   is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1461
1462   is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1463   is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1464
1465   good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1466   good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1467
1468   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1469                      || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1470     {
1471       if (is_tcp_udp[0])
1472         ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1473                                     &good_tcp_udp[0]);
1474       if (is_tcp_udp[1])
1475         ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1476                                     &good_tcp_udp[1]);
1477     }
1478
1479   error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1480               IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1481   error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1482               IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1483 }
1484
1485 static inline void
1486 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1487                               vlib_buffer_t * b, u16 * next, u8 error,
1488                               u8 head_of_feature_arc)
1489 {
1490   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1491   u32 next_index;
1492
1493   *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1494   b->error = error ? error_node->errors[error] : 0;
1495   if (head_of_feature_arc)
1496     {
1497       next_index = *next;
1498       if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1499         {
1500           vnet_feature_arc_start (arc_index,
1501                                   vnet_buffer (b)->sw_if_index[VLIB_RX],
1502                                   &next_index, b);
1503           *next = next_index;
1504         }
1505     }
1506 }
1507
1508 typedef struct
1509 {
1510   ip4_address_t src;
1511   u32 lbi;
1512   u8 error;
1513   u8 first;
1514 } ip4_local_last_check_t;
1515
1516 static inline void
1517 ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
1518                      ip4_local_last_check_t * last_check, u8 * error0)
1519 {
1520   const dpo_id_t *dpo0;
1521   load_balance_t *lb0;
1522   u32 lbi0;
1523
1524   vnet_buffer (b)->ip.fib_index =
1525     vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1526     vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1527
1528   /*
1529    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1530    *  adjacency for the destination address (the local interface address).
1531    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1532    *  adjacency for the source address (the remote sender's address)
1533    */
1534   if (PREDICT_TRUE (last_check->src.as_u32 != ip0->src_address.as_u32) ||
1535       last_check->first)
1536     {
1537       lbi0 = ip4_fib_forwarding_lookup (vnet_buffer (b)->ip.fib_index,
1538                                         &ip0->src_address);
1539
1540       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1541         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1542       vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1543
1544       lb0 = load_balance_get (lbi0);
1545       dpo0 = load_balance_get_bucket_i (lb0, 0);
1546
1547       /*
1548        * Must have a route to source otherwise we drop the packet.
1549        * ip4 broadcasts are accepted, e.g. to make dhcp client work
1550        *
1551        * The checks are:
1552        *  - the source is a recieve => it's from us => bogus, do this
1553        *    first since it sets a different error code.
1554        *  - uRPF check for any route to source - accept if passes.
1555        *  - allow packets destined to the broadcast address from unknown sources
1556        */
1557
1558       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1559                   && dpo0->dpoi_type == DPO_RECEIVE) ?
1560                  IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1561       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1562                   && !fib_urpf_check_size (lb0->lb_urpf)
1563                   && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1564                  IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1565
1566       last_check->src.as_u32 = ip0->src_address.as_u32;
1567       last_check->lbi = lbi0;
1568       last_check->error = *error0;
1569       last_check->first = 0;
1570     }
1571   else
1572     {
1573       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1574         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1575       vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1576       *error0 = last_check->error;
1577     }
1578 }
1579
1580 static inline void
1581 ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
1582                         ip4_local_last_check_t * last_check, u8 * error)
1583 {
1584   const dpo_id_t *dpo[2];
1585   load_balance_t *lb[2];
1586   u32 not_last_hit;
1587   u32 lbi[2];
1588
1589   not_last_hit = last_check->first;
1590   not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1591   not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1592
1593   vnet_buffer (b[0])->ip.fib_index =
1594     vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1595     vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1596     vnet_buffer (b[0])->ip.fib_index;
1597
1598   vnet_buffer (b[1])->ip.fib_index =
1599     vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1600     vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1601     vnet_buffer (b[1])->ip.fib_index;
1602
1603   /*
1604    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1605    *  adjacency for the destination address (the local interface address).
1606    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1607    *  adjacency for the source address (the remote sender's address)
1608    */
1609   if (PREDICT_TRUE (not_last_hit))
1610     {
1611       ip4_fib_forwarding_lookup_x2 (
1612         vnet_buffer (b[0])->ip.fib_index, vnet_buffer (b[1])->ip.fib_index,
1613         &ip[0]->src_address, &ip[1]->src_address, &lbi[0], &lbi[1]);
1614
1615       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1616         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1617       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1618
1619       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1620         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1621       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1622
1623       lb[0] = load_balance_get (lbi[0]);
1624       lb[1] = load_balance_get (lbi[1]);
1625
1626       dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1627       dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1628
1629       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1630                    dpo[0]->dpoi_type == DPO_RECEIVE) ?
1631                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1632       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1633                    !fib_urpf_check_size (lb[0]->lb_urpf) &&
1634                    ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1635                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1636
1637       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1638                    dpo[1]->dpoi_type == DPO_RECEIVE) ?
1639                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1640       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1641                    !fib_urpf_check_size (lb[1]->lb_urpf) &&
1642                    ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1643                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1644
1645       last_check->src.as_u32 = ip[1]->src_address.as_u32;
1646       last_check->lbi = lbi[1];
1647       last_check->error = error[1];
1648       last_check->first = 0;
1649     }
1650   else
1651     {
1652       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1653         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1654       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1655
1656       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1657         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1658       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1659
1660       error[0] = last_check->error;
1661       error[1] = last_check->error;
1662     }
1663 }
1664
1665 enum ip_local_packet_type_e
1666 {
1667   IP_LOCAL_PACKET_TYPE_L4,
1668   IP_LOCAL_PACKET_TYPE_NAT,
1669   IP_LOCAL_PACKET_TYPE_FRAG,
1670 };
1671
1672 /**
1673  * Determine packet type and next node.
1674  *
1675  * The expectation is that all packets that are not L4 will skip
1676  * checksums and source checks.
1677  */
1678 always_inline u8
1679 ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
1680 {
1681   ip_lookup_main_t *lm = &ip4_main.lookup_main;
1682
1683   if (PREDICT_FALSE (ip4_is_fragment (ip)))
1684     {
1685       *next = IP_LOCAL_NEXT_REASSEMBLY;
1686       return IP_LOCAL_PACKET_TYPE_FRAG;
1687     }
1688   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_IS_NATED))
1689     {
1690       *next = lm->local_next_by_ip_protocol[ip->protocol];
1691       return IP_LOCAL_PACKET_TYPE_NAT;
1692     }
1693
1694   *next = lm->local_next_by_ip_protocol[ip->protocol];
1695   return IP_LOCAL_PACKET_TYPE_L4;
1696 }
1697
1698 static inline uword
1699 ip4_local_inline (vlib_main_t * vm,
1700                   vlib_node_runtime_t * node,
1701                   vlib_frame_t * frame, int head_of_feature_arc)
1702 {
1703   u32 *from, n_left_from;
1704   vlib_node_runtime_t *error_node =
1705     vlib_node_get_runtime (vm, ip4_local_node.index);
1706   u16 nexts[VLIB_FRAME_SIZE], *next;
1707   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1708   ip4_header_t *ip[2];
1709   u8 error[2], pt[2];
1710
1711   ip4_local_last_check_t last_check = {
1712     /*
1713      * 0.0.0.0 can appear as the source address of an IP packet,
1714      * as can any other address, hence the need to use the 'first'
1715      * member to make sure the .lbi is initialised for the first
1716      * packet.
1717      */
1718     .src = {.as_u32 = 0},
1719     .lbi = ~0,
1720     .error = IP4_ERROR_UNKNOWN_PROTOCOL,
1721     .first = 1,
1722   };
1723
1724   from = vlib_frame_vector_args (frame);
1725   n_left_from = frame->n_vectors;
1726
1727   if (node->flags & VLIB_NODE_FLAG_TRACE)
1728     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1729
1730   vlib_get_buffers (vm, from, bufs, n_left_from);
1731   b = bufs;
1732   next = nexts;
1733
1734   while (n_left_from >= 6)
1735     {
1736       u8 not_batch = 0;
1737
1738       /* Prefetch next iteration. */
1739       {
1740         vlib_prefetch_buffer_header (b[4], LOAD);
1741         vlib_prefetch_buffer_header (b[5], LOAD);
1742
1743         clib_prefetch_load (b[4]->data);
1744         clib_prefetch_load (b[5]->data);
1745       }
1746
1747       error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1748
1749       ip[0] = vlib_buffer_get_current (b[0]);
1750       ip[1] = vlib_buffer_get_current (b[1]);
1751
1752       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1753       vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1754
1755       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1756       pt[1] = ip4_local_classify (b[1], ip[1], &next[1]);
1757
1758       not_batch = pt[0] ^ pt[1];
1759
1760       if (head_of_feature_arc == 0 || (pt[0] && not_batch == 0))
1761         goto skip_checks;
1762
1763       if (PREDICT_TRUE (not_batch == 0))
1764         {
1765           ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1766           ip4_local_check_src_x2 (b, ip, &last_check, error);
1767         }
1768       else
1769         {
1770           if (!pt[0])
1771             {
1772               ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1773               ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1774             }
1775           if (!pt[1])
1776             {
1777               ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1778               ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
1779             }
1780         }
1781
1782     skip_checks:
1783
1784       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1785                                     head_of_feature_arc);
1786       ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1787                                     head_of_feature_arc);
1788
1789       b += 2;
1790       next += 2;
1791       n_left_from -= 2;
1792     }
1793
1794   while (n_left_from > 0)
1795     {
1796       error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1797
1798       ip[0] = vlib_buffer_get_current (b[0]);
1799       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1800       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1801
1802       if (head_of_feature_arc == 0 || pt[0])
1803         goto skip_check;
1804
1805       ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1806       ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1807
1808     skip_check:
1809
1810       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1811                                     head_of_feature_arc);
1812
1813       b += 1;
1814       next += 1;
1815       n_left_from -= 1;
1816     }
1817
1818   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1819   return frame->n_vectors;
1820 }
1821
1822 VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1823                                vlib_frame_t * frame)
1824 {
1825   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1826 }
1827
1828 /* *INDENT-OFF* */
1829 VLIB_REGISTER_NODE (ip4_local_node) =
1830 {
1831   .name = "ip4-local",
1832   .vector_size = sizeof (u32),
1833   .format_trace = format_ip4_forward_next_trace,
1834   .n_errors = IP4_N_ERROR,
1835   .error_strings = ip4_error_strings,
1836   .n_next_nodes = IP_LOCAL_N_NEXT,
1837   .next_nodes =
1838   {
1839     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1840     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1841     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1842     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1843     [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-full-reassembly",
1844   },
1845 };
1846 /* *INDENT-ON* */
1847
1848
1849 VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
1850                                           vlib_node_runtime_t * node,
1851                                           vlib_frame_t * frame)
1852 {
1853   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1854 }
1855
1856 /* *INDENT-OFF* */
1857 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
1858   .name = "ip4-local-end-of-arc",
1859   .vector_size = sizeof (u32),
1860
1861   .format_trace = format_ip4_forward_next_trace,
1862   .sibling_of = "ip4-local",
1863 };
1864
1865 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1866   .arc_name = "ip4-local",
1867   .node_name = "ip4-local-end-of-arc",
1868   .runs_before = 0, /* not before any other features */
1869 };
1870 /* *INDENT-ON* */
1871
1872 #ifndef CLIB_MARCH_VARIANT
1873 void
1874 ip4_register_protocol (u32 protocol, u32 node_index)
1875 {
1876   vlib_main_t *vm = vlib_get_main ();
1877   ip4_main_t *im = &ip4_main;
1878   ip_lookup_main_t *lm = &im->lookup_main;
1879
1880   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1881   lm->local_next_by_ip_protocol[protocol] =
1882     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1883 }
1884
1885 void
1886 ip4_unregister_protocol (u32 protocol)
1887 {
1888   ip4_main_t *im = &ip4_main;
1889   ip_lookup_main_t *lm = &im->lookup_main;
1890
1891   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1892   lm->local_next_by_ip_protocol[protocol] = IP_LOCAL_NEXT_PUNT;
1893 }
1894 #endif
1895
1896 static clib_error_t *
1897 show_ip_local_command_fn (vlib_main_t * vm,
1898                           unformat_input_t * input, vlib_cli_command_t * cmd)
1899 {
1900   ip4_main_t *im = &ip4_main;
1901   ip_lookup_main_t *lm = &im->lookup_main;
1902   int i;
1903
1904   vlib_cli_output (vm, "Protocols handled by ip4_local");
1905   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1906     {
1907       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1908         {
1909           u32 node_index = vlib_get_node (vm,
1910                                           ip4_local_node.index)->
1911             next_nodes[lm->local_next_by_ip_protocol[i]];
1912           vlib_cli_output (vm, "%U: %U", format_ip_protocol, i,
1913                            format_vlib_node_name, vm, node_index);
1914         }
1915     }
1916   return 0;
1917 }
1918
1919
1920
1921 /*?
1922  * Display the set of protocols handled by the local IPv4 stack.
1923  *
1924  * @cliexpar
1925  * Example of how to display local protocol table:
1926  * @cliexstart{show ip local}
1927  * Protocols handled by ip4_local
1928  * 1
1929  * 17
1930  * 47
1931  * @cliexend
1932 ?*/
1933 /* *INDENT-OFF* */
1934 VLIB_CLI_COMMAND (show_ip_local, static) =
1935 {
1936   .path = "show ip local",
1937   .function = show_ip_local_command_fn,
1938   .short_help = "show ip local",
1939 };
1940 /* *INDENT-ON* */
1941
1942 typedef enum
1943 {
1944   IP4_REWRITE_NEXT_DROP,
1945   IP4_REWRITE_NEXT_ICMP_ERROR,
1946   IP4_REWRITE_NEXT_FRAGMENT,
1947   IP4_REWRITE_N_NEXT            /* Last */
1948 } ip4_rewrite_next_t;
1949
1950 /**
1951  * This bits of an IPv4 address to mask to construct a multicast
1952  * MAC address
1953  */
1954 #if CLIB_ARCH_IS_BIG_ENDIAN
1955 #define IP4_MCAST_ADDR_MASK 0x007fffff
1956 #else
1957 #define IP4_MCAST_ADDR_MASK 0xffff7f00
1958 #endif
1959
1960 always_inline void
1961 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
1962                u16 adj_packet_bytes, bool df, u16 * next,
1963                u8 is_midchain, u32 * error)
1964 {
1965   if (packet_len > adj_packet_bytes)
1966     {
1967       *error = IP4_ERROR_MTU_EXCEEDED;
1968       if (df)
1969         {
1970           icmp4_error_set_vnet_buffer
1971             (b, ICMP4_destination_unreachable,
1972              ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
1973              adj_packet_bytes);
1974           *next = IP4_REWRITE_NEXT_ICMP_ERROR;
1975         }
1976       else
1977         {
1978           /* IP fragmentation */
1979           ip_frag_set_vnet_buffer (b, adj_packet_bytes,
1980                                    (is_midchain ?
1981                                     IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN :
1982                                     IP_FRAG_NEXT_IP_REWRITE), 0);
1983           *next = IP4_REWRITE_NEXT_FRAGMENT;
1984         }
1985     }
1986 }
1987
1988 /* increment TTL & update checksum.
1989    Works either endian, so no need for byte swap. */
1990 static_always_inline void
1991 ip4_ttl_inc (vlib_buffer_t * b, ip4_header_t * ip)
1992 {
1993   i32 ttl;
1994   u32 checksum;
1995   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
1996     return;
1997
1998   ttl = ip->ttl;
1999
2000   checksum = ip->checksum - clib_host_to_net_u16 (0x0100);
2001   checksum += checksum >= 0xffff;
2002
2003   ip->checksum = checksum;
2004   ttl += 1;
2005   ip->ttl = ttl;
2006
2007   ASSERT (ip4_header_checksum_is_valid (ip));
2008 }
2009
2010 /* Decrement TTL & update checksum.
2011    Works either endian, so no need for byte swap. */
2012 static_always_inline void
2013 ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
2014                             u32 * error)
2015 {
2016   i32 ttl;
2017   u32 checksum;
2018   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2019     return;
2020
2021   ttl = ip->ttl;
2022
2023   /* Input node should have reject packets with ttl 0. */
2024   ASSERT (ip->ttl > 0);
2025
2026   checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
2027   checksum += checksum >= 0xffff;
2028
2029   ip->checksum = checksum;
2030   ttl -= 1;
2031   ip->ttl = ttl;
2032
2033   /*
2034    * If the ttl drops below 1 when forwarding, generate
2035    * an ICMP response.
2036    */
2037   if (PREDICT_FALSE (ttl <= 0))
2038     {
2039       *error = IP4_ERROR_TIME_EXPIRED;
2040       vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2041       icmp4_error_set_vnet_buffer (b, ICMP4_time_exceeded,
2042                                    ICMP4_time_exceeded_ttl_exceeded_in_transit,
2043                                    0);
2044       *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2045     }
2046
2047   /* Verify checksum. */
2048   ASSERT (ip4_header_checksum_is_valid (ip) ||
2049           (vnet_buffer (b)->oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM));
2050 }
2051
2052 always_inline uword
2053 ip4_rewrite_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
2054                     vlib_frame_t *frame, int do_counters, int is_midchain,
2055                     int is_mcast)
2056 {
2057   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2058   u32 *from = vlib_frame_vector_args (frame);
2059   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2060   u16 nexts[VLIB_FRAME_SIZE], *next;
2061   u32 n_left_from;
2062   vlib_node_runtime_t *error_node =
2063     vlib_node_get_runtime (vm, ip4_input_node.index);
2064
2065   n_left_from = frame->n_vectors;
2066   u32 thread_index = vm->thread_index;
2067
2068   vlib_get_buffers (vm, from, bufs, n_left_from);
2069   clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
2070
2071 #if (CLIB_N_PREFETCHES >= 8)
2072   if (n_left_from >= 6)
2073     {
2074       int i;
2075       for (i = 2; i < 6; i++)
2076         vlib_prefetch_buffer_header (bufs[i], LOAD);
2077     }
2078
2079   next = nexts;
2080   b = bufs;
2081   while (n_left_from >= 8)
2082     {
2083       const ip_adjacency_t *adj0, *adj1;
2084       ip4_header_t *ip0, *ip1;
2085       u32 rw_len0, error0, adj_index0;
2086       u32 rw_len1, error1, adj_index1;
2087       u32 tx_sw_if_index0, tx_sw_if_index1;
2088       u8 *p;
2089
2090       if (is_midchain)
2091         {
2092           vlib_prefetch_buffer_header (b[6], LOAD);
2093           vlib_prefetch_buffer_header (b[7], LOAD);
2094         }
2095
2096       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2097       adj_index1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
2098
2099       /*
2100        * pre-fetch the per-adjacency counters
2101        */
2102       if (do_counters)
2103         {
2104           vlib_prefetch_combined_counter (&adjacency_counters,
2105                                           thread_index, adj_index0);
2106           vlib_prefetch_combined_counter (&adjacency_counters,
2107                                           thread_index, adj_index1);
2108         }
2109
2110       ip0 = vlib_buffer_get_current (b[0]);
2111       ip1 = vlib_buffer_get_current (b[1]);
2112
2113       error0 = error1 = IP4_ERROR_NONE;
2114
2115       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2116       ip4_ttl_and_checksum_check (b[1], ip1, next + 1, &error1);
2117
2118       /* Rewrite packet header and updates lengths. */
2119       adj0 = adj_get (adj_index0);
2120       adj1 = adj_get (adj_index1);
2121
2122       /* Worth pipelining. No guarantee that adj0,1 are hot... */
2123       rw_len0 = adj0[0].rewrite_header.data_bytes;
2124       rw_len1 = adj1[0].rewrite_header.data_bytes;
2125       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2126       vnet_buffer (b[1])->ip.save_rewrite_length = rw_len1;
2127
2128       p = vlib_buffer_get_current (b[2]);
2129       clib_prefetch_store (p - CLIB_CACHE_LINE_BYTES);
2130       clib_prefetch_load (p);
2131
2132       p = vlib_buffer_get_current (b[3]);
2133       clib_prefetch_store (p - CLIB_CACHE_LINE_BYTES);
2134       clib_prefetch_load (p);
2135
2136       /* Check MTU of outgoing interface. */
2137       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2138       u16 ip1_len = clib_net_to_host_u16 (ip1->length);
2139
2140       if (b[0]->flags & VNET_BUFFER_F_GSO)
2141         ip0_len = gso_mtu_sz (b[0]);
2142       if (b[1]->flags & VNET_BUFFER_F_GSO)
2143         ip1_len = gso_mtu_sz (b[1]);
2144
2145       ip4_mtu_check (b[0], ip0_len,
2146                      adj0[0].rewrite_header.max_l3_packet_bytes,
2147                      ip0->flags_and_fragment_offset &
2148                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2149                      next + 0, is_midchain, &error0);
2150       ip4_mtu_check (b[1], ip1_len,
2151                      adj1[0].rewrite_header.max_l3_packet_bytes,
2152                      ip1->flags_and_fragment_offset &
2153                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2154                      next + 1, is_midchain, &error1);
2155
2156       if (is_mcast)
2157         {
2158           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2159                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2160                     IP4_ERROR_SAME_INTERFACE : error0);
2161           error1 = ((adj1[0].rewrite_header.sw_if_index ==
2162                      vnet_buffer (b[1])->sw_if_index[VLIB_RX]) ?
2163                     IP4_ERROR_SAME_INTERFACE : error1);
2164         }
2165
2166       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2167        * to see the IP header */
2168       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2169         {
2170           u32 next_index = adj0[0].rewrite_header.next_index;
2171           vlib_buffer_advance (b[0], -(word) rw_len0);
2172
2173           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2174           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2175
2176           if (PREDICT_FALSE
2177               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2178             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2179                                                 tx_sw_if_index0,
2180                                                 &next_index, b[0],
2181                                                 adj0->ia_cfg_index);
2182
2183           next[0] = next_index;
2184           if (is_midchain)
2185             vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2186                                         0 /* is_ip6 */ );
2187         }
2188       else
2189         {
2190           b[0]->error = error_node->errors[error0];
2191           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2192             ip4_ttl_inc (b[0], ip0);
2193         }
2194       if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2195         {
2196           u32 next_index = adj1[0].rewrite_header.next_index;
2197           vlib_buffer_advance (b[1], -(word) rw_len1);
2198
2199           tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2200           vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2201
2202           if (PREDICT_FALSE
2203               (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2204             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2205                                                 tx_sw_if_index1,
2206                                                 &next_index, b[1],
2207                                                 adj1->ia_cfg_index);
2208           next[1] = next_index;
2209           if (is_midchain)
2210             vnet_calc_checksums_inline (vm, b[1], 1 /* is_ip4 */ ,
2211                                         0 /* is_ip6 */ );
2212         }
2213       else
2214         {
2215           b[1]->error = error_node->errors[error1];
2216           if (error1 == IP4_ERROR_MTU_EXCEEDED)
2217             ip4_ttl_inc (b[1], ip1);
2218         }
2219
2220       if (is_midchain)
2221         /* Guess we are only writing on ipv4 header. */
2222         vnet_rewrite_two_headers (adj0[0], adj1[0],
2223                                   ip0, ip1, sizeof (ip4_header_t));
2224       else
2225         /* Guess we are only writing on simple Ethernet header. */
2226         vnet_rewrite_two_headers (adj0[0], adj1[0],
2227                                   ip0, ip1, sizeof (ethernet_header_t));
2228
2229       if (do_counters)
2230         {
2231           if (error0 == IP4_ERROR_NONE)
2232             vlib_increment_combined_counter
2233               (&adjacency_counters,
2234                thread_index,
2235                adj_index0, 1,
2236                vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2237
2238           if (error1 == IP4_ERROR_NONE)
2239             vlib_increment_combined_counter
2240               (&adjacency_counters,
2241                thread_index,
2242                adj_index1, 1,
2243                vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
2244         }
2245
2246       if (is_midchain)
2247         {
2248           if (error0 == IP4_ERROR_NONE)
2249             adj_midchain_fixup (vm, adj0, b[0], VNET_LINK_IP4);
2250           if (error1 == IP4_ERROR_NONE)
2251             adj_midchain_fixup (vm, adj1, b[1], VNET_LINK_IP4);
2252         }
2253
2254       if (is_mcast)
2255         {
2256           /* copy bytes from the IP address into the MAC rewrite */
2257           if (error0 == IP4_ERROR_NONE)
2258             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2259                                         adj0->rewrite_header.dst_mcast_offset,
2260                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2261           if (error1 == IP4_ERROR_NONE)
2262             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2263                                         adj1->rewrite_header.dst_mcast_offset,
2264                                         &ip1->dst_address.as_u32, (u8 *) ip1);
2265         }
2266
2267       next += 2;
2268       b += 2;
2269       n_left_from -= 2;
2270     }
2271 #elif (CLIB_N_PREFETCHES >= 4)
2272   next = nexts;
2273   b = bufs;
2274   while (n_left_from >= 1)
2275     {
2276       ip_adjacency_t *adj0;
2277       ip4_header_t *ip0;
2278       u32 rw_len0, error0, adj_index0;
2279       u32 tx_sw_if_index0;
2280       u8 *p;
2281
2282       /* Prefetch next iteration */
2283       if (PREDICT_TRUE (n_left_from >= 4))
2284         {
2285           ip_adjacency_t *adj2;
2286           u32 adj_index2;
2287
2288           vlib_prefetch_buffer_header (b[3], LOAD);
2289           vlib_prefetch_buffer_data (b[2], LOAD);
2290
2291           /* Prefetch adj->rewrite_header */
2292           adj_index2 = vnet_buffer (b[2])->ip.adj_index[VLIB_TX];
2293           adj2 = adj_get (adj_index2);
2294           p = (u8 *) adj2;
2295           CLIB_PREFETCH (p + CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES,
2296                          LOAD);
2297         }
2298
2299       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2300
2301       /*
2302        * Prefetch the per-adjacency counters
2303        */
2304       if (do_counters)
2305         {
2306           vlib_prefetch_combined_counter (&adjacency_counters,
2307                                           thread_index, adj_index0);
2308         }
2309
2310       ip0 = vlib_buffer_get_current (b[0]);
2311
2312       error0 = IP4_ERROR_NONE;
2313
2314       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2315
2316       /* Rewrite packet header and updates lengths. */
2317       adj0 = adj_get (adj_index0);
2318
2319       /* Rewrite header was prefetched. */
2320       rw_len0 = adj0[0].rewrite_header.data_bytes;
2321       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2322
2323       /* Check MTU of outgoing interface. */
2324       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2325
2326       if (b[0]->flags & VNET_BUFFER_F_GSO)
2327         ip0_len = gso_mtu_sz (b[0]);
2328
2329       ip4_mtu_check (b[0], ip0_len,
2330                      adj0[0].rewrite_header.max_l3_packet_bytes,
2331                      ip0->flags_and_fragment_offset &
2332                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2333                      next + 0, is_midchain, &error0);
2334
2335       if (is_mcast)
2336         {
2337           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2338                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2339                     IP4_ERROR_SAME_INTERFACE : error0);
2340         }
2341
2342       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2343        * to see the IP header */
2344       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2345         {
2346           u32 next_index = adj0[0].rewrite_header.next_index;
2347           vlib_buffer_advance (b[0], -(word) rw_len0);
2348           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2349           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2350
2351           if (PREDICT_FALSE
2352               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2353             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2354                                                 tx_sw_if_index0,
2355                                                 &next_index, b[0],
2356                                                 adj0->ia_cfg_index);
2357           next[0] = next_index;
2358
2359           if (is_midchain)
2360             {
2361               vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2362                                           0 /* is_ip6 */ );
2363
2364               /* Guess we are only writing on ipv4 header. */
2365               vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t));
2366             }
2367           else
2368             /* Guess we are only writing on simple Ethernet header. */
2369             vnet_rewrite_one_header (adj0[0], ip0,
2370                                      sizeof (ethernet_header_t));
2371
2372           /*
2373            * Bump the per-adjacency counters
2374            */
2375           if (do_counters)
2376             vlib_increment_combined_counter
2377               (&adjacency_counters,
2378                thread_index,
2379                adj_index0, 1, vlib_buffer_length_in_chain (vm,
2380                                                            b[0]) + rw_len0);
2381
2382           if (is_midchain)
2383             adj_midchain_fixup (vm, adj0, b[0], VNET_LINK_IP4);
2384
2385           if (is_mcast)
2386             /* copy bytes from the IP address into the MAC rewrite */
2387             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2388                                         adj0->rewrite_header.dst_mcast_offset,
2389                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2390         }
2391       else
2392         {
2393           b[0]->error = error_node->errors[error0];
2394           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2395             ip4_ttl_inc (b[0], ip0);
2396         }
2397
2398       next += 1;
2399       b += 1;
2400       n_left_from -= 1;
2401     }
2402 #endif
2403
2404   while (n_left_from > 0)
2405     {
2406       ip_adjacency_t *adj0;
2407       ip4_header_t *ip0;
2408       u32 rw_len0, adj_index0, error0;
2409       u32 tx_sw_if_index0;
2410
2411       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2412
2413       adj0 = adj_get (adj_index0);
2414
2415       if (do_counters)
2416         vlib_prefetch_combined_counter (&adjacency_counters,
2417                                         thread_index, adj_index0);
2418
2419       ip0 = vlib_buffer_get_current (b[0]);
2420
2421       error0 = IP4_ERROR_NONE;
2422
2423       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2424
2425
2426       /* Update packet buffer attributes/set output interface. */
2427       rw_len0 = adj0[0].rewrite_header.data_bytes;
2428       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2429
2430       /* Check MTU of outgoing interface. */
2431       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2432       if (b[0]->flags & VNET_BUFFER_F_GSO)
2433         ip0_len = gso_mtu_sz (b[0]);
2434
2435       ip4_mtu_check (b[0], ip0_len,
2436                      adj0[0].rewrite_header.max_l3_packet_bytes,
2437                      ip0->flags_and_fragment_offset &
2438                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2439                      next + 0, is_midchain, &error0);
2440
2441       if (is_mcast)
2442         {
2443           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2444                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2445                     IP4_ERROR_SAME_INTERFACE : error0);
2446         }
2447
2448       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2449        * to see the IP header */
2450       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2451         {
2452           u32 next_index = adj0[0].rewrite_header.next_index;
2453           vlib_buffer_advance (b[0], -(word) rw_len0);
2454           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2455           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2456
2457           if (PREDICT_FALSE
2458               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2459             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2460                                                 tx_sw_if_index0,
2461                                                 &next_index, b[0],
2462                                                 adj0->ia_cfg_index);
2463           next[0] = next_index;
2464
2465           if (is_midchain)
2466             {
2467               /* this acts on the packet that is about to be encapped */
2468               vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2469                                           0 /* is_ip6 */ );
2470
2471               /* Guess we are only writing on ipv4 header. */
2472               vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t));
2473             }
2474           else
2475             /* Guess we are only writing on simple Ethernet header. */
2476             vnet_rewrite_one_header (adj0[0], ip0,
2477                                      sizeof (ethernet_header_t));
2478
2479           if (do_counters)
2480             vlib_increment_combined_counter
2481               (&adjacency_counters,
2482                thread_index, adj_index0, 1,
2483                vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2484
2485           if (is_midchain)
2486             adj_midchain_fixup (vm, adj0, b[0], VNET_LINK_IP4);
2487
2488           if (is_mcast)
2489             /* copy bytes from the IP address into the MAC rewrite */
2490             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2491                                         adj0->rewrite_header.dst_mcast_offset,
2492                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2493         }
2494       else
2495         {
2496           b[0]->error = error_node->errors[error0];
2497           /* undo the TTL decrement - we'll be back to do it again */
2498           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2499             ip4_ttl_inc (b[0], ip0);
2500         }
2501
2502       next += 1;
2503       b += 1;
2504       n_left_from -= 1;
2505     }
2506
2507
2508   /* Need to do trace after rewrites to pick up new packet data. */
2509   if (node->flags & VLIB_NODE_FLAG_TRACE)
2510     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2511
2512   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2513   return frame->n_vectors;
2514 }
2515
2516 /** @brief IPv4 rewrite node.
2517     @node ip4-rewrite
2518
2519     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2520     header checksum, fetch the ip adjacency, check the outbound mtu,
2521     apply the adjacency rewrite, and send pkts to the adjacency
2522     rewrite header's rewrite_next_index.
2523
2524     @param vm vlib_main_t corresponding to the current thread
2525     @param node vlib_node_runtime_t
2526     @param frame vlib_frame_t whose contents should be dispatched
2527
2528     @par Graph mechanics: buffer metadata, next index usage
2529
2530     @em Uses:
2531     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2532         - the rewrite adjacency index
2533     - <code>adj->lookup_next_index</code>
2534         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2535           the packet will be dropped.
2536     - <code>adj->rewrite_header</code>
2537         - Rewrite string length, rewrite string, next_index
2538
2539     @em Sets:
2540     - <code>b->current_data, b->current_length</code>
2541         - Updated net of applying the rewrite string
2542
2543     <em>Next Indices:</em>
2544     - <code> adj->rewrite_header.next_index </code>
2545       or @c ip4-drop
2546 */
2547
2548 VLIB_NODE_FN (ip4_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2549                                  vlib_frame_t * frame)
2550 {
2551   if (adj_are_counters_enabled ())
2552     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2553   else
2554     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2555 }
2556
2557 VLIB_NODE_FN (ip4_rewrite_bcast_node) (vlib_main_t * vm,
2558                                        vlib_node_runtime_t * node,
2559                                        vlib_frame_t * frame)
2560 {
2561   if (adj_are_counters_enabled ())
2562     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2563   else
2564     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2565 }
2566
2567 VLIB_NODE_FN (ip4_midchain_node) (vlib_main_t * vm,
2568                                   vlib_node_runtime_t * node,
2569                                   vlib_frame_t * frame)
2570 {
2571   if (adj_are_counters_enabled ())
2572     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2573   else
2574     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2575 }
2576
2577 VLIB_NODE_FN (ip4_rewrite_mcast_node) (vlib_main_t * vm,
2578                                        vlib_node_runtime_t * node,
2579                                        vlib_frame_t * frame)
2580 {
2581   if (adj_are_counters_enabled ())
2582     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2583   else
2584     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2585 }
2586
2587 VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
2588                                         vlib_node_runtime_t * node,
2589                                         vlib_frame_t * frame)
2590 {
2591   if (adj_are_counters_enabled ())
2592     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2593   else
2594     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2595 }
2596
2597 /* *INDENT-OFF* */
2598 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2599   .name = "ip4-rewrite",
2600   .vector_size = sizeof (u32),
2601
2602   .format_trace = format_ip4_rewrite_trace,
2603
2604   .n_next_nodes = IP4_REWRITE_N_NEXT,
2605   .next_nodes = {
2606     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2607     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2608     [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2609   },
2610 };
2611
2612 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2613   .name = "ip4-rewrite-bcast",
2614   .vector_size = sizeof (u32),
2615
2616   .format_trace = format_ip4_rewrite_trace,
2617   .sibling_of = "ip4-rewrite",
2618 };
2619
2620 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2621   .name = "ip4-rewrite-mcast",
2622   .vector_size = sizeof (u32),
2623
2624   .format_trace = format_ip4_rewrite_trace,
2625   .sibling_of = "ip4-rewrite",
2626 };
2627
2628 VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
2629   .name = "ip4-mcast-midchain",
2630   .vector_size = sizeof (u32),
2631
2632   .format_trace = format_ip4_rewrite_trace,
2633   .sibling_of = "ip4-rewrite",
2634 };
2635
2636 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2637   .name = "ip4-midchain",
2638   .vector_size = sizeof (u32),
2639   .format_trace = format_ip4_rewrite_trace,
2640   .sibling_of = "ip4-rewrite",
2641 };
2642 /* *INDENT-ON */
2643
2644 static clib_error_t *
2645 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2646                              unformat_input_t * input,
2647                              vlib_cli_command_t * cmd)
2648 {
2649   int matched = 0;
2650   u32 table_id = 0;
2651   u32 flow_hash_config = 0;
2652   int rv;
2653
2654   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2655     {
2656       if (unformat (input, "table %d", &table_id))
2657         matched = 1;
2658 #define _(a, b, v)                                                            \
2659   else if (unformat (input, #a))                                              \
2660   {                                                                           \
2661     flow_hash_config |= v;                                                    \
2662     matched = 1;                                                              \
2663   }
2664       foreach_flow_hash_bit
2665 #undef _
2666         else
2667         break;
2668     }
2669
2670   if (matched == 0)
2671     return clib_error_return (0, "unknown input `%U'",
2672                               format_unformat_error, input);
2673
2674   rv = ip_flow_hash_set (AF_IP4, table_id, flow_hash_config);
2675   switch (rv)
2676     {
2677     case 0:
2678       break;
2679
2680     case VNET_API_ERROR_NO_SUCH_FIB:
2681       return clib_error_return (0, "no such FIB table %d", table_id);
2682
2683     default:
2684       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2685       break;
2686     }
2687
2688   return 0;
2689 }
2690
2691 /*?
2692  * Configure the set of IPv4 fields used by the flow hash.
2693  *
2694  * @cliexpar
2695  * Example of how to set the flow hash on a given table:
2696  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2697  * Example of display the configured flow hash:
2698  * @cliexstart{show ip fib}
2699  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2700  * 0.0.0.0/0
2701  *   unicast-ip4-chain
2702  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2703  *     [0] [@0]: dpo-drop ip6
2704  * 0.0.0.0/32
2705  *   unicast-ip4-chain
2706  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2707  *     [0] [@0]: dpo-drop ip6
2708  * 224.0.0.0/8
2709  *   unicast-ip4-chain
2710  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2711  *     [0] [@0]: dpo-drop ip6
2712  * 6.0.1.2/32
2713  *   unicast-ip4-chain
2714  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2715  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2716  * 7.0.0.1/32
2717  *   unicast-ip4-chain
2718  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
2719  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2720  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2721  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2722  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2723  * 240.0.0.0/8
2724  *   unicast-ip4-chain
2725  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
2726  *     [0] [@0]: dpo-drop ip6
2727  * 255.255.255.255/32
2728  *   unicast-ip4-chain
2729  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
2730  *     [0] [@0]: dpo-drop ip6
2731  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
2732  * 0.0.0.0/0
2733  *   unicast-ip4-chain
2734  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
2735  *     [0] [@0]: dpo-drop ip6
2736  * 0.0.0.0/32
2737  *   unicast-ip4-chain
2738  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
2739  *     [0] [@0]: dpo-drop ip6
2740  * 172.16.1.0/24
2741  *   unicast-ip4-chain
2742  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
2743  *     [0] [@4]: ipv4-glean: af_packet0
2744  * 172.16.1.1/32
2745  *   unicast-ip4-chain
2746  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
2747  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
2748  * 172.16.1.2/32
2749  *   unicast-ip4-chain
2750  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2751  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
2752  * 172.16.2.0/24
2753  *   unicast-ip4-chain
2754  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
2755  *     [0] [@4]: ipv4-glean: af_packet1
2756  * 172.16.2.1/32
2757  *   unicast-ip4-chain
2758  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
2759  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
2760  * 224.0.0.0/8
2761  *   unicast-ip4-chain
2762  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
2763  *     [0] [@0]: dpo-drop ip6
2764  * 240.0.0.0/8
2765  *   unicast-ip4-chain
2766  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
2767  *     [0] [@0]: dpo-drop ip6
2768  * 255.255.255.255/32
2769  *   unicast-ip4-chain
2770  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
2771  *     [0] [@0]: dpo-drop ip6
2772  * @cliexend
2773 ?*/
2774 /* *INDENT-OFF* */
2775 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
2776 {
2777   .path = "set ip flow-hash",
2778   .short_help =
2779   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
2780   .function = set_ip_flow_hash_command_fn,
2781 };
2782 /* *INDENT-ON* */
2783
2784 #ifndef CLIB_MARCH_VARIANT
2785 int
2786 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2787                              u32 table_index)
2788 {
2789   vnet_main_t *vnm = vnet_get_main ();
2790   vnet_interface_main_t *im = &vnm->interface_main;
2791   ip4_main_t *ipm = &ip4_main;
2792   ip_lookup_main_t *lm = &ipm->lookup_main;
2793   vnet_classify_main_t *cm = &vnet_classify_main;
2794   ip4_address_t *if_addr;
2795
2796   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2797     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2798
2799   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2800     return VNET_API_ERROR_NO_SUCH_ENTRY;
2801
2802   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2803   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
2804
2805   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
2806
2807   if (NULL != if_addr)
2808     {
2809       fib_prefix_t pfx = {
2810         .fp_len = 32,
2811         .fp_proto = FIB_PROTOCOL_IP4,
2812         .fp_addr.ip4 = *if_addr,
2813       };
2814       u32 fib_index;
2815
2816       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2817                                                        sw_if_index);
2818
2819
2820       if (table_index != (u32) ~ 0)
2821         {
2822           dpo_id_t dpo = DPO_INVALID;
2823
2824           dpo_set (&dpo,
2825                    DPO_CLASSIFY,
2826                    DPO_PROTO_IP4,
2827                    classify_dpo_create (DPO_PROTO_IP4, table_index));
2828
2829           fib_table_entry_special_dpo_add (fib_index,
2830                                            &pfx,
2831                                            FIB_SOURCE_CLASSIFY,
2832                                            FIB_ENTRY_FLAG_NONE, &dpo);
2833           dpo_reset (&dpo);
2834         }
2835       else
2836         {
2837           fib_table_entry_special_remove (fib_index,
2838                                           &pfx, FIB_SOURCE_CLASSIFY);
2839         }
2840     }
2841
2842   return 0;
2843 }
2844 #endif
2845
2846 static clib_error_t *
2847 set_ip_classify_command_fn (vlib_main_t * vm,
2848                             unformat_input_t * input,
2849                             vlib_cli_command_t * cmd)
2850 {
2851   u32 table_index = ~0;
2852   int table_index_set = 0;
2853   u32 sw_if_index = ~0;
2854   int rv;
2855
2856   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2857     {
2858       if (unformat (input, "table-index %d", &table_index))
2859         table_index_set = 1;
2860       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
2861                          vnet_get_main (), &sw_if_index))
2862         ;
2863       else
2864         break;
2865     }
2866
2867   if (table_index_set == 0)
2868     return clib_error_return (0, "classify table-index must be specified");
2869
2870   if (sw_if_index == ~0)
2871     return clib_error_return (0, "interface / subif must be specified");
2872
2873   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
2874
2875   switch (rv)
2876     {
2877     case 0:
2878       break;
2879
2880     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
2881       return clib_error_return (0, "No such interface");
2882
2883     case VNET_API_ERROR_NO_SUCH_ENTRY:
2884       return clib_error_return (0, "No such classifier table");
2885     }
2886   return 0;
2887 }
2888
2889 /*?
2890  * Assign a classification table to an interface. The classification
2891  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
2892  * commands. Once the table is create, use this command to filter packets
2893  * on an interface.
2894  *
2895  * @cliexpar
2896  * Example of how to assign a classification table to an interface:
2897  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
2898 ?*/
2899 /* *INDENT-OFF* */
2900 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
2901 {
2902     .path = "set ip classify",
2903     .short_help =
2904     "set ip classify intfc <interface> table-index <classify-idx>",
2905     .function = set_ip_classify_command_fn,
2906 };
2907 /* *INDENT-ON* */
2908
2909 /*
2910  * fd.io coding-style-patch-verification: ON
2911  *
2912  * Local Variables:
2913  * eval: (c-set-style "gnu")
2914  * End:
2915  */