interface: fix init fib_index_by_sw_if_index
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h>     /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/mfib/ip4_mfib.h>
53 #include <vnet/dpo/load_balance.h>
54 #include <vnet/dpo/load_balance_map.h>
55 #include <vnet/dpo/classify_dpo.h>
56 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
57 #include <vnet/adj/adj_dp.h>
58 #include <vnet/pg/pg.h>
59
60 #include <vnet/ip/ip4_forward.h>
61 #include <vnet/interface_output.h>
62 #include <vnet/classify/vnet_classify.h>
63
64 /** @brief IPv4 lookup node.
65     @node ip4-lookup
66
67     This is the main IPv4 lookup dispatch node.
68
69     @param vm vlib_main_t corresponding to the current thread
70     @param node vlib_node_runtime_t
71     @param frame vlib_frame_t whose contents should be dispatched
72
73     @par Graph mechanics: buffer metadata, next index usage
74
75     @em Uses:
76     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
77         - Indicates the @c sw_if_index value of the interface that the
78           packet was received on.
79     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
80         - When the value is @c ~0 then the node performs a longest prefix
81           match (LPM) for the packet destination address in the FIB attached
82           to the receive interface.
83         - Otherwise perform LPM for the packet destination address in the
84           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
85           value (0, 1, ...) and not a VRF id.
86
87     @em Sets:
88     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
89         - The lookup result adjacency index.
90
91     <em>Next Index:</em>
92     - Dispatches the packet to the node index found in
93       ip_adjacency_t @c adj->lookup_next_index
94       (where @c adj is the lookup result adjacency).
95 */
96 VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
97                                 vlib_frame_t * frame)
98 {
99   return ip4_lookup_inline (vm, node, frame);
100 }
101
102 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
103
104 /* *INDENT-OFF* */
105 VLIB_REGISTER_NODE (ip4_lookup_node) =
106 {
107   .name = "ip4-lookup",
108   .vector_size = sizeof (u32),
109   .format_trace = format_ip4_lookup_trace,
110   .n_next_nodes = IP_LOOKUP_N_NEXT,
111   .next_nodes = IP4_LOOKUP_NEXT_NODES,
112 };
113 /* *INDENT-ON* */
114
115 VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
116                                       vlib_node_runtime_t * node,
117                                       vlib_frame_t * frame)
118 {
119   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
120   u32 n_left, *from;
121   u32 thread_index = vm->thread_index;
122   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
123   u16 nexts[VLIB_FRAME_SIZE], *next;
124
125   from = vlib_frame_vector_args (frame);
126   n_left = frame->n_vectors;
127   next = nexts;
128
129   vlib_get_buffers (vm, from, bufs, n_left);
130
131   while (n_left >= 4)
132     {
133       const load_balance_t *lb0, *lb1;
134       const ip4_header_t *ip0, *ip1;
135       u32 lbi0, hc0, lbi1, hc1;
136       const dpo_id_t *dpo0, *dpo1;
137
138       /* Prefetch next iteration. */
139       {
140         vlib_prefetch_buffer_header (b[2], LOAD);
141         vlib_prefetch_buffer_header (b[3], LOAD);
142
143         CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), LOAD);
144         CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), LOAD);
145       }
146
147       ip0 = vlib_buffer_get_current (b[0]);
148       ip1 = vlib_buffer_get_current (b[1]);
149       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
150       lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
151
152       lb0 = load_balance_get (lbi0);
153       lb1 = load_balance_get (lbi1);
154
155       /*
156        * this node is for via FIBs we can re-use the hash value from the
157        * to node if present.
158        * We don't want to use the same hash value at each level in the recursion
159        * graph as that would lead to polarisation
160        */
161       hc0 = hc1 = 0;
162
163       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
164         {
165           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
166             {
167               hc0 = vnet_buffer (b[0])->ip.flow_hash =
168                 vnet_buffer (b[0])->ip.flow_hash >> 1;
169             }
170           else
171             {
172               hc0 = vnet_buffer (b[0])->ip.flow_hash =
173                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
174             }
175           dpo0 = load_balance_get_fwd_bucket
176             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
177         }
178       else
179         {
180           dpo0 = load_balance_get_bucket_i (lb0, 0);
181         }
182       if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
183         {
184           if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
185             {
186               hc1 = vnet_buffer (b[1])->ip.flow_hash =
187                 vnet_buffer (b[1])->ip.flow_hash >> 1;
188             }
189           else
190             {
191               hc1 = vnet_buffer (b[1])->ip.flow_hash =
192                 ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
193             }
194           dpo1 = load_balance_get_fwd_bucket
195             (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
196         }
197       else
198         {
199           dpo1 = load_balance_get_bucket_i (lb1, 0);
200         }
201
202       next[0] = dpo0->dpoi_next_node;
203       next[1] = dpo1->dpoi_next_node;
204
205       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
206       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
207
208       vlib_increment_combined_counter
209         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
210       vlib_increment_combined_counter
211         (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
212
213       b += 2;
214       next += 2;
215       n_left -= 2;
216     }
217
218   while (n_left > 0)
219     {
220       const load_balance_t *lb0;
221       const ip4_header_t *ip0;
222       const dpo_id_t *dpo0;
223       u32 lbi0, hc0;
224
225       ip0 = vlib_buffer_get_current (b[0]);
226       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
227
228       lb0 = load_balance_get (lbi0);
229
230       hc0 = 0;
231       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
232         {
233           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
234             {
235               hc0 = vnet_buffer (b[0])->ip.flow_hash =
236                 vnet_buffer (b[0])->ip.flow_hash >> 1;
237             }
238           else
239             {
240               hc0 = vnet_buffer (b[0])->ip.flow_hash =
241                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
242             }
243           dpo0 = load_balance_get_fwd_bucket
244             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
245         }
246       else
247         {
248           dpo0 = load_balance_get_bucket_i (lb0, 0);
249         }
250
251       next[0] = dpo0->dpoi_next_node;
252       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
253
254       vlib_increment_combined_counter
255         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
256
257       b += 1;
258       next += 1;
259       n_left -= 1;
260     }
261
262   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
263   if (node->flags & VLIB_NODE_FLAG_TRACE)
264     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
265
266   return frame->n_vectors;
267 }
268
269 /* *INDENT-OFF* */
270 VLIB_REGISTER_NODE (ip4_load_balance_node) =
271 {
272   .name = "ip4-load-balance",
273   .vector_size = sizeof (u32),
274   .sibling_of = "ip4-lookup",
275   .format_trace = format_ip4_lookup_trace,
276 };
277 /* *INDENT-ON* */
278
279 #ifndef CLIB_MARCH_VARIANT
280 /* get first interface address */
281 ip4_address_t *
282 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
283                              ip_interface_address_t ** result_ia)
284 {
285   ip_lookup_main_t *lm = &im->lookup_main;
286   ip_interface_address_t *ia = 0;
287   ip4_address_t *result = 0;
288
289   /* *INDENT-OFF* */
290   foreach_ip_interface_address
291     (lm, ia, sw_if_index,
292      1 /* honor unnumbered */ ,
293      ({
294        ip4_address_t * a =
295          ip_interface_address_get_address (lm, ia);
296        result = a;
297        break;
298      }));
299   /* *INDENT-OFF* */
300   if (result_ia)
301     *result_ia = result ? ia : 0;
302   return result;
303 }
304 #endif
305
306 static void
307 ip4_add_subnet_bcast_route (u32 fib_index,
308                             fib_prefix_t *pfx,
309                             u32 sw_if_index)
310 {
311   vnet_sw_interface_flags_t iflags;
312
313   iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
314
315   fib_table_entry_special_remove(fib_index,
316                                  pfx,
317                                  FIB_SOURCE_INTERFACE);
318
319   if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
320     {
321       fib_table_entry_update_one_path (fib_index, pfx,
322                                        FIB_SOURCE_INTERFACE,
323                                        FIB_ENTRY_FLAG_NONE,
324                                        DPO_PROTO_IP4,
325                                        /* No next-hop address */
326                                        &ADJ_BCAST_ADDR,
327                                        sw_if_index,
328                                        // invalid FIB index
329                                        ~0,
330                                        1,
331                                        // no out-label stack
332                                        NULL,
333                                        FIB_ROUTE_PATH_FLAG_NONE);
334     }
335   else
336     {
337         fib_table_entry_special_add(fib_index,
338                                     pfx,
339                                     FIB_SOURCE_INTERFACE,
340                                     (FIB_ENTRY_FLAG_DROP |
341                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
342     }
343 }
344
345 static void
346 ip4_add_interface_prefix_routes (ip4_main_t *im,
347                                  u32 sw_if_index,
348                                  u32 fib_index,
349                                  ip_interface_address_t * a)
350 {
351   ip_lookup_main_t *lm = &im->lookup_main;
352   ip_interface_prefix_t *if_prefix;
353   ip4_address_t *address = ip_interface_address_get_address (lm, a);
354
355   ip_interface_prefix_key_t key = {
356     .prefix = {
357       .fp_len = a->address_length,
358       .fp_proto = FIB_PROTOCOL_IP4,
359       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[a->address_length],
360     },
361     .sw_if_index = sw_if_index,
362   };
363
364   fib_prefix_t pfx_special = {
365     .fp_proto = FIB_PROTOCOL_IP4,
366   };
367
368   /* If prefix already set on interface, just increment ref count & return */
369   if_prefix = ip_get_interface_prefix (lm, &key);
370   if (if_prefix)
371     {
372       if_prefix->ref_count += 1;
373       return;
374     }
375
376   /* New prefix - allocate a pool entry, initialize it, add to the hash */
377   pool_get (lm->if_prefix_pool, if_prefix);
378   if_prefix->ref_count = 1;
379   if_prefix->src_ia_index = a - lm->if_address_pool;
380   clib_memcpy (&if_prefix->key, &key, sizeof (key));
381   mhash_set (&lm->prefix_to_if_prefix_index, &key,
382              if_prefix - lm->if_prefix_pool, 0 /* old value */);
383
384   pfx_special.fp_len = a->address_length;
385   pfx_special.fp_addr.ip4.as_u32 = address->as_u32;
386
387   /* set the glean route for the prefix */
388   fib_table_entry_update_one_path (fib_index, &pfx_special,
389                                    FIB_SOURCE_INTERFACE,
390                                    (FIB_ENTRY_FLAG_CONNECTED |
391                                     FIB_ENTRY_FLAG_ATTACHED),
392                                    DPO_PROTO_IP4,
393                                    /* No next-hop address */
394                                    NULL,
395                                    sw_if_index,
396                                    /* invalid FIB index */
397                                    ~0,
398                                    1,
399                                    /* no out-label stack */
400                                    NULL,
401                                    FIB_ROUTE_PATH_FLAG_NONE);
402
403   /* length <= 30 - add glean, drop first address, maybe drop bcast address */
404   if (a->address_length <= 30)
405     {
406       /* set a drop route for the base address of the prefix */
407       pfx_special.fp_len = 32;
408       pfx_special.fp_addr.ip4.as_u32 =
409         address->as_u32 & im->fib_masks[a->address_length];
410
411       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
412         fib_table_entry_special_add (fib_index, &pfx_special,
413                                      FIB_SOURCE_INTERFACE,
414                                      (FIB_ENTRY_FLAG_DROP |
415                                       FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
416
417       /* set a route for the broadcast address of the prefix */
418       pfx_special.fp_len = 32;
419       pfx_special.fp_addr.ip4.as_u32 =
420         address->as_u32 | ~im->fib_masks[a->address_length];
421       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
422         ip4_add_subnet_bcast_route (fib_index, &pfx_special, sw_if_index);
423
424
425     }
426   /* length == 31 - add an attached route for the other address */
427   else if (a->address_length == 31)
428     {
429       pfx_special.fp_len = 32;
430       pfx_special.fp_addr.ip4.as_u32 =
431         address->as_u32 ^ clib_host_to_net_u32(1);
432
433       fib_table_entry_update_one_path (fib_index, &pfx_special,
434                                        FIB_SOURCE_INTERFACE,
435                                        (FIB_ENTRY_FLAG_ATTACHED),
436                                        DPO_PROTO_IP4,
437                                        &pfx_special.fp_addr,
438                                        sw_if_index,
439                                        /* invalid FIB index */
440                                        ~0,
441                                        1,
442                                        NULL,
443                                        FIB_ROUTE_PATH_FLAG_NONE);
444     }
445 }
446
447 static void
448 ip4_add_interface_routes (u32 sw_if_index,
449                           ip4_main_t * im, u32 fib_index,
450                           ip_interface_address_t * a)
451 {
452   ip_lookup_main_t *lm = &im->lookup_main;
453   ip4_address_t *address = ip_interface_address_get_address (lm, a);
454   fib_prefix_t pfx = {
455     .fp_len = 32,
456     .fp_proto = FIB_PROTOCOL_IP4,
457     .fp_addr.ip4 = *address,
458   };
459
460   /* set special routes for the prefix if needed */
461   ip4_add_interface_prefix_routes (im, sw_if_index, fib_index, a);
462
463   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
464     {
465       u32 classify_table_index =
466         lm->classify_table_index_by_sw_if_index[sw_if_index];
467       if (classify_table_index != (u32) ~ 0)
468         {
469           dpo_id_t dpo = DPO_INVALID;
470
471           dpo_set (&dpo,
472                    DPO_CLASSIFY,
473                    DPO_PROTO_IP4,
474                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
475
476           fib_table_entry_special_dpo_add (fib_index,
477                                            &pfx,
478                                            FIB_SOURCE_CLASSIFY,
479                                            FIB_ENTRY_FLAG_NONE, &dpo);
480           dpo_reset (&dpo);
481         }
482     }
483
484   fib_table_entry_update_one_path (fib_index, &pfx,
485                                    FIB_SOURCE_INTERFACE,
486                                    (FIB_ENTRY_FLAG_CONNECTED |
487                                     FIB_ENTRY_FLAG_LOCAL),
488                                    DPO_PROTO_IP4,
489                                    &pfx.fp_addr,
490                                    sw_if_index,
491                                    // invalid FIB index
492                                    ~0,
493                                    1, NULL,
494                                    FIB_ROUTE_PATH_FLAG_NONE);
495 }
496
497 static void
498 ip4_del_interface_prefix_routes (ip4_main_t * im,
499                                  u32 sw_if_index,
500                                  u32 fib_index,
501                                  ip4_address_t * address,
502                                  u32 address_length)
503 {
504   ip_lookup_main_t *lm = &im->lookup_main;
505   ip_interface_prefix_t *if_prefix;
506
507   ip_interface_prefix_key_t key = {
508     .prefix = {
509       .fp_len = address_length,
510       .fp_proto = FIB_PROTOCOL_IP4,
511       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[address_length],
512     },
513     .sw_if_index = sw_if_index,
514   };
515
516   fib_prefix_t pfx_special = {
517     .fp_len = 32,
518     .fp_proto = FIB_PROTOCOL_IP4,
519   };
520
521   if_prefix = ip_get_interface_prefix (lm, &key);
522   if (!if_prefix)
523     {
524       clib_warning ("Prefix not found while deleting %U",
525                     format_ip4_address_and_length, address, address_length);
526       return;
527     }
528
529   if_prefix->ref_count -= 1;
530
531   /*
532    * Routes need to be adjusted if deleting last intf addr in prefix
533    *
534    * We're done now otherwise
535    */
536   if (if_prefix->ref_count > 0)
537     return;
538
539   /* length <= 30, delete glean route, first address, last address */
540   if (address_length <= 30)
541     {
542       /* Less work to do in FIB if we remove the covered /32s first */
543
544       /* first address in prefix */
545       pfx_special.fp_addr.ip4.as_u32 =
546         address->as_u32 & im->fib_masks[address_length];
547       pfx_special.fp_len = 32;
548
549       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
550         fib_table_entry_special_remove (fib_index,
551                                         &pfx_special,
552                                         FIB_SOURCE_INTERFACE);
553
554       /* prefix broadcast address */
555       pfx_special.fp_addr.ip4.as_u32 =
556         address->as_u32 | ~im->fib_masks[address_length];
557       pfx_special.fp_len = 32;
558
559       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
560         fib_table_entry_special_remove (fib_index,
561                                         &pfx_special,
562                                         FIB_SOURCE_INTERFACE);
563     }
564   else if (address_length == 31)
565     {
566       /* length == 31, delete attached route for the other address */
567       pfx_special.fp_addr.ip4.as_u32 =
568         address->as_u32 ^ clib_host_to_net_u32(1);
569
570       fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
571     }
572
573   /* remove glean route for prefix */
574   pfx_special.fp_addr.ip4 = *address;
575   pfx_special.fp_len = address_length;
576   fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
577
578   mhash_unset (&lm->prefix_to_if_prefix_index, &key, 0 /* old_value */);
579   pool_put (lm->if_prefix_pool, if_prefix);
580 }
581
582 static void
583 ip4_del_interface_routes (u32 sw_if_index,
584                           ip4_main_t * im,
585                           u32 fib_index,
586                           ip4_address_t * address, u32 address_length)
587 {
588   fib_prefix_t pfx = {
589     .fp_len = 32,
590     .fp_proto = FIB_PROTOCOL_IP4,
591     .fp_addr.ip4 = *address,
592   };
593
594   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
595
596   ip4_del_interface_prefix_routes (im, sw_if_index, fib_index,
597                                    address, address_length);
598 }
599
600 #ifndef CLIB_MARCH_VARIANT
601 void
602 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
603 {
604   ip4_main_t *im = &ip4_main;
605   vnet_main_t *vnm = vnet_get_main ();
606   vnet_hw_interface_t *hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
607
608   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
609
610   /*
611    * enable/disable only on the 1<->0 transition
612    */
613   if (is_enable)
614     {
615       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
616         return;
617     }
618   else
619     {
620       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
621       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
622         return;
623     }
624   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
625                                !is_enable, 0, 0);
626
627
628   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
629                                sw_if_index, !is_enable, 0, 0);
630
631   if (is_enable)
632     hi->l3_if_count++;
633   else if (hi->l3_if_count)
634     hi->l3_if_count--;
635
636   {
637     ip4_enable_disable_interface_callback_t *cb;
638     vec_foreach (cb, im->enable_disable_interface_callbacks)
639       cb->function (im, cb->function_opaque, sw_if_index, is_enable);
640   }
641 }
642
643 static clib_error_t *
644 ip4_add_del_interface_address_internal (vlib_main_t * vm,
645                                         u32 sw_if_index,
646                                         ip4_address_t * address,
647                                         u32 address_length, u32 is_del)
648 {
649   vnet_main_t *vnm = vnet_get_main ();
650   ip4_main_t *im = &ip4_main;
651   ip_lookup_main_t *lm = &im->lookup_main;
652   clib_error_t *error = 0;
653   u32 if_address_index;
654   ip4_address_fib_t ip4_af, *addr_fib = 0;
655
656   error = vnet_sw_interface_supports_addressing (vnm, sw_if_index);
657   if (error)
658     return error;
659
660   ip4_addr_fib_init (&ip4_af, address,
661                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
662   vec_add1 (addr_fib, ip4_af);
663
664   /*
665    * there is no support for adj-fib handling in the presence of overlapping
666    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
667    * most routers do.
668    */
669   /* *INDENT-OFF* */
670   if (!is_del)
671     {
672       /* When adding an address check that it does not conflict
673          with an existing address on any interface in this table. */
674       ip_interface_address_t *ia;
675       vnet_sw_interface_t *sif;
676
677       pool_foreach (sif, vnm->interface_main.sw_interfaces)
678        {
679           if (im->fib_index_by_sw_if_index[sw_if_index] ==
680               im->fib_index_by_sw_if_index[sif->sw_if_index])
681             {
682               foreach_ip_interface_address
683                 (&im->lookup_main, ia, sif->sw_if_index,
684                  0 /* honor unnumbered */ ,
685                  ({
686                    ip4_address_t * x =
687                      ip_interface_address_get_address
688                      (&im->lookup_main, ia);
689
690                    if (ip4_destination_matches_route
691                        (im, address, x, ia->address_length) ||
692                        ip4_destination_matches_route (im,
693                                                       x,
694                                                       address,
695                                                       address_length))
696                      {
697                        /* an intf may have >1 addr from the same prefix */
698                        if ((sw_if_index == sif->sw_if_index) &&
699                            (ia->address_length == address_length) &&
700                            (x->as_u32 != address->as_u32))
701                          continue;
702
703                        if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE)
704                          /* if the address we're comparing against is stale
705                           * then the CP has not added this one back yet, maybe
706                           * it never will, so we have to assume it won't and
707                           * ignore it. if it does add it back, then it will fail
708                           * because this one is now present */
709                          continue;
710
711                        /* error if the length or intf was different */
712                        vnm->api_errno = VNET_API_ERROR_ADDRESS_IN_USE;
713
714                        error = clib_error_create
715                          ("failed to add %U on %U which conflicts with %U for interface %U",
716                           format_ip4_address_and_length, address,
717                           address_length,
718                           format_vnet_sw_if_index_name, vnm,
719                           sw_if_index,
720                           format_ip4_address_and_length, x,
721                           ia->address_length,
722                           format_vnet_sw_if_index_name, vnm,
723                           sif->sw_if_index);
724                        goto done;
725                      }
726                  }));
727             }
728       }
729     }
730   /* *INDENT-ON* */
731
732   if_address_index = ip_interface_address_find (lm, addr_fib, address_length);
733
734   if (is_del)
735     {
736       if (~0 == if_address_index)
737         {
738           vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_FOUND_FOR_INTERFACE;
739           error = clib_error_create ("%U not found for interface %U",
740                                      lm->format_address_and_length,
741                                      addr_fib, address_length,
742                                      format_vnet_sw_if_index_name, vnm,
743                                      sw_if_index);
744           goto done;
745         }
746
747       error = ip_interface_address_del (lm, vnm, if_address_index, addr_fib,
748                                         address_length, sw_if_index);
749       if (error)
750         goto done;
751     }
752   else
753     {
754       if (~0 != if_address_index)
755         {
756           ip_interface_address_t *ia;
757
758           ia = pool_elt_at_index (lm->if_address_pool, if_address_index);
759
760           if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE)
761             {
762               if (ia->sw_if_index == sw_if_index)
763                 {
764                   /* re-adding an address during the replace action.
765                    * consdier this the update. clear the flag and
766                    * we're done */
767                   ia->flags &= ~IP_INTERFACE_ADDRESS_FLAG_STALE;
768                   goto done;
769                 }
770               else
771                 {
772                   /* The prefix is moving from one interface to another.
773                    * delete the stale and add the new */
774                   ip4_add_del_interface_address_internal (vm,
775                                                           ia->sw_if_index,
776                                                           address,
777                                                           address_length, 1);
778                   ia = NULL;
779                   error = ip_interface_address_add (lm, sw_if_index,
780                                                     addr_fib, address_length,
781                                                     &if_address_index);
782                 }
783             }
784           else
785             {
786               vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
787               error = clib_error_create
788                 ("Prefix %U already found on interface %U",
789                  lm->format_address_and_length, addr_fib, address_length,
790                  format_vnet_sw_if_index_name, vnm, ia->sw_if_index);
791             }
792         }
793       else
794         error = ip_interface_address_add (lm, sw_if_index,
795                                           addr_fib, address_length,
796                                           &if_address_index);
797     }
798
799   if (error)
800     goto done;
801
802   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
803   ip4_mfib_interface_enable_disable (sw_if_index, !is_del);
804
805   /* intf addr routes are added/deleted on admin up/down */
806   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index))
807     {
808       if (is_del)
809         ip4_del_interface_routes (sw_if_index,
810                                   im, ip4_af.fib_index, address,
811                                   address_length);
812       else
813         ip4_add_interface_routes (sw_if_index,
814                                   im, ip4_af.fib_index,
815                                   pool_elt_at_index
816                                   (lm->if_address_pool, if_address_index));
817     }
818
819   ip4_add_del_interface_address_callback_t *cb;
820   vec_foreach (cb, im->add_del_interface_address_callbacks)
821     cb->function (im, cb->function_opaque, sw_if_index,
822                   address, address_length, if_address_index, is_del);
823
824 done:
825   vec_free (addr_fib);
826   return error;
827 }
828
829 clib_error_t *
830 ip4_add_del_interface_address (vlib_main_t * vm,
831                                u32 sw_if_index,
832                                ip4_address_t * address,
833                                u32 address_length, u32 is_del)
834 {
835   return ip4_add_del_interface_address_internal
836     (vm, sw_if_index, address, address_length, is_del);
837 }
838
839 void
840 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
841 {
842   ip_interface_address_t *ia;
843   ip4_main_t *im;
844
845   im = &ip4_main;
846
847   /*
848    * when directed broadcast is enabled, the subnet braodcast route will forward
849    * packets using an adjacency with a broadcast MAC. otherwise it drops
850    */
851   /* *INDENT-OFF* */
852   foreach_ip_interface_address(&im->lookup_main, ia,
853                                sw_if_index, 0,
854      ({
855        if (ia->address_length <= 30)
856          {
857            ip4_address_t *ipa;
858
859            ipa = ip_interface_address_get_address (&im->lookup_main, ia);
860
861            fib_prefix_t pfx = {
862              .fp_len = 32,
863              .fp_proto = FIB_PROTOCOL_IP4,
864              .fp_addr = {
865                .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
866              },
867            };
868
869            ip4_add_subnet_bcast_route
870              (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
871                                                   sw_if_index),
872               &pfx, sw_if_index);
873          }
874      }));
875   /* *INDENT-ON* */
876 }
877 #endif
878
879 static clib_error_t *
880 ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
881 {
882   ip4_main_t *im = &ip4_main;
883   ip_interface_address_t *ia;
884   ip4_address_t *a;
885   u32 is_admin_up, fib_index;
886
887   vec_validate_init_empty (im->
888                            lookup_main.if_address_pool_index_by_sw_if_index,
889                            sw_if_index, ~0);
890
891   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
892
893   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
894
895   /* *INDENT-OFF* */
896   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
897                                 0 /* honor unnumbered */,
898   ({
899     a = ip_interface_address_get_address (&im->lookup_main, ia);
900     if (is_admin_up)
901       ip4_add_interface_routes (sw_if_index,
902                                 im, fib_index,
903                                 ia);
904     else
905       ip4_del_interface_routes (sw_if_index,
906                                 im, fib_index,
907                                 a, ia->address_length);
908   }));
909   /* *INDENT-ON* */
910
911   return 0;
912 }
913
914 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
915
916 /* Built-in ip4 unicast rx feature path definition */
917 /* *INDENT-OFF* */
918 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
919 {
920   .arc_name = "ip4-unicast",
921   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
922   .last_in_arc = "ip4-lookup",
923   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
924 };
925
926 VNET_FEATURE_INIT (ip4_flow_classify, static) =
927 {
928   .arc_name = "ip4-unicast",
929   .node_name = "ip4-flow-classify",
930   .runs_before = VNET_FEATURES ("ip4-inacl"),
931 };
932
933 VNET_FEATURE_INIT (ip4_inacl, static) =
934 {
935   .arc_name = "ip4-unicast",
936   .node_name = "ip4-inacl",
937   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
938 };
939
940 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
941 {
942   .arc_name = "ip4-unicast",
943   .node_name = "ip4-source-and-port-range-check-rx",
944   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
945 };
946
947 VNET_FEATURE_INIT (ip4_policer_classify, static) =
948 {
949   .arc_name = "ip4-unicast",
950   .node_name = "ip4-policer-classify",
951   .runs_before = VNET_FEATURES ("ipsec4-input-feature"),
952 };
953
954 VNET_FEATURE_INIT (ip4_ipsec, static) =
955 {
956   .arc_name = "ip4-unicast",
957   .node_name = "ipsec4-input-feature",
958   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
959 };
960
961 VNET_FEATURE_INIT (ip4_vpath, static) =
962 {
963   .arc_name = "ip4-unicast",
964   .node_name = "vpath-input-ip4",
965   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
966 };
967
968 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
969 {
970   .arc_name = "ip4-unicast",
971   .node_name = "ip4-vxlan-bypass",
972   .runs_before = VNET_FEATURES ("ip4-lookup"),
973 };
974
975 VNET_FEATURE_INIT (ip4_not_enabled, static) =
976 {
977   .arc_name = "ip4-unicast",
978   .node_name = "ip4-not-enabled",
979   .runs_before = VNET_FEATURES ("ip4-lookup"),
980 };
981
982 VNET_FEATURE_INIT (ip4_lookup, static) =
983 {
984   .arc_name = "ip4-unicast",
985   .node_name = "ip4-lookup",
986   .runs_before = 0,     /* not before any other features */
987 };
988
989 /* Built-in ip4 multicast rx feature path definition */
990 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
991 {
992   .arc_name = "ip4-multicast",
993   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
994   .last_in_arc = "ip4-mfib-forward-lookup",
995   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
996 };
997
998 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
999 {
1000   .arc_name = "ip4-multicast",
1001   .node_name = "vpath-input-ip4",
1002   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1003 };
1004
1005 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
1006 {
1007   .arc_name = "ip4-multicast",
1008   .node_name = "ip4-not-enabled",
1009   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1010 };
1011
1012 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
1013 {
1014   .arc_name = "ip4-multicast",
1015   .node_name = "ip4-mfib-forward-lookup",
1016   .runs_before = 0,     /* last feature */
1017 };
1018
1019 /* Source and port-range check ip4 tx feature path definition */
1020 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1021 {
1022   .arc_name = "ip4-output",
1023   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
1024   .last_in_arc = "interface-output",
1025   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1026 };
1027
1028 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1029 {
1030   .arc_name = "ip4-output",
1031   .node_name = "ip4-source-and-port-range-check-tx",
1032   .runs_before = VNET_FEATURES ("ip4-outacl"),
1033 };
1034
1035 VNET_FEATURE_INIT (ip4_outacl, static) =
1036 {
1037   .arc_name = "ip4-output",
1038   .node_name = "ip4-outacl",
1039   .runs_before = VNET_FEATURES ("ipsec4-output-feature"),
1040 };
1041
1042 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1043 {
1044   .arc_name = "ip4-output",
1045   .node_name = "ipsec4-output-feature",
1046   .runs_before = VNET_FEATURES ("interface-output"),
1047 };
1048
1049 /* Built-in ip4 tx feature path definition */
1050 VNET_FEATURE_INIT (ip4_interface_output, static) =
1051 {
1052   .arc_name = "ip4-output",
1053   .node_name = "interface-output",
1054   .runs_before = 0,     /* not before any other features */
1055 };
1056 /* *INDENT-ON* */
1057
1058 static clib_error_t *
1059 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1060 {
1061   ip4_main_t *im = &ip4_main;
1062
1063   vec_validate_init_empty (im->fib_index_by_sw_if_index, sw_if_index, ~0);
1064   vec_validate_init_empty (im->mfib_index_by_sw_if_index, sw_if_index, ~0);
1065
1066   if (is_add)
1067     {
1068       /* Fill in lookup tables with default table (0). */
1069       im->fib_index_by_sw_if_index[sw_if_index] = 0;
1070       im->mfib_index_by_sw_if_index[sw_if_index] = 0;
1071     }
1072   else
1073     {
1074       ip4_main_t *im4 = &ip4_main;
1075       ip_lookup_main_t *lm4 = &im4->lookup_main;
1076       ip_interface_address_t *ia = 0;
1077       ip4_address_t *address;
1078       vlib_main_t *vm = vlib_get_main ();
1079
1080       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
1081       /* *INDENT-OFF* */
1082       foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
1083       ({
1084         address = ip_interface_address_get_address (lm4, ia);
1085         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
1086       }));
1087       /* *INDENT-ON* */
1088       ip4_mfib_interface_enable_disable (sw_if_index, 0);
1089     }
1090
1091   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
1092                                is_add, 0, 0);
1093
1094   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
1095                                sw_if_index, is_add, 0, 0);
1096
1097   return /* no error */ 0;
1098 }
1099
1100 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1101
1102 /* Global IP4 main. */
1103 #ifndef CLIB_MARCH_VARIANT
1104 ip4_main_t ip4_main;
1105 #endif /* CLIB_MARCH_VARIANT */
1106
1107 static clib_error_t *
1108 ip4_lookup_init (vlib_main_t * vm)
1109 {
1110   ip4_main_t *im = &ip4_main;
1111   clib_error_t *error;
1112   uword i;
1113
1114   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1115     return error;
1116   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
1117     return (error);
1118   if ((error = vlib_call_init_function (vm, fib_module_init)))
1119     return error;
1120   if ((error = vlib_call_init_function (vm, mfib_module_init)))
1121     return error;
1122
1123   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1124     {
1125       u32 m;
1126
1127       if (i < 32)
1128         m = pow2_mask (i) << (32 - i);
1129       else
1130         m = ~0;
1131       im->fib_masks[i] = clib_host_to_net_u32 (m);
1132     }
1133
1134   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1135
1136   /* Create FIB with index 0 and table id of 0. */
1137   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1138                                      FIB_SOURCE_DEFAULT_ROUTE);
1139   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1140                                       MFIB_SOURCE_DEFAULT_ROUTE);
1141
1142   {
1143     pg_node_t *pn;
1144     pn = pg_get_node (ip4_lookup_node.index);
1145     pn->unformat_edit = unformat_pg_ip4_header;
1146   }
1147
1148   {
1149     ethernet_arp_header_t h;
1150
1151     clib_memset (&h, 0, sizeof (h));
1152
1153 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1154 #define _8(f,v) h.f = v;
1155     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1156     _16 (l3_type, ETHERNET_TYPE_IP4);
1157     _8 (n_l2_address_bytes, 6);
1158     _8 (n_l3_address_bytes, 4);
1159     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1160 #undef _16
1161 #undef _8
1162
1163     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1164                                /* data */ &h,
1165                                sizeof (h),
1166                                /* alloc chunk size */ 8,
1167                                "ip4 arp");
1168   }
1169
1170   return error;
1171 }
1172
1173 VLIB_INIT_FUNCTION (ip4_lookup_init);
1174
1175 typedef struct
1176 {
1177   /* Adjacency taken. */
1178   u32 dpo_index;
1179   u32 flow_hash;
1180   u32 fib_index;
1181
1182   /* Packet data, possibly *after* rewrite. */
1183   u8 packet_data[64 - 1 * sizeof (u32)];
1184 }
1185 ip4_forward_next_trace_t;
1186
1187 #ifndef CLIB_MARCH_VARIANT
1188 u8 *
1189 format_ip4_forward_next_trace (u8 * s, va_list * args)
1190 {
1191   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1192   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1193   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1194   u32 indent = format_get_indent (s);
1195   s = format (s, "%U%U",
1196               format_white_space, indent,
1197               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1198   return s;
1199 }
1200 #endif
1201
1202 static u8 *
1203 format_ip4_lookup_trace (u8 * s, va_list * args)
1204 {
1205   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1206   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1207   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1208   u32 indent = format_get_indent (s);
1209
1210   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1211               t->fib_index, t->dpo_index, t->flow_hash);
1212   s = format (s, "\n%U%U",
1213               format_white_space, indent,
1214               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1215   return s;
1216 }
1217
1218 static u8 *
1219 format_ip4_rewrite_trace (u8 * s, va_list * args)
1220 {
1221   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1222   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1223   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1224   u32 indent = format_get_indent (s);
1225
1226   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1227               t->fib_index, t->dpo_index, format_ip_adjacency,
1228               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1229   s = format (s, "\n%U%U",
1230               format_white_space, indent,
1231               format_ip_adjacency_packet_data,
1232               t->packet_data, sizeof (t->packet_data));
1233   return s;
1234 }
1235
1236 #ifndef CLIB_MARCH_VARIANT
1237 /* Common trace function for all ip4-forward next nodes. */
1238 void
1239 ip4_forward_next_trace (vlib_main_t * vm,
1240                         vlib_node_runtime_t * node,
1241                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1242 {
1243   u32 *from, n_left;
1244   ip4_main_t *im = &ip4_main;
1245
1246   n_left = frame->n_vectors;
1247   from = vlib_frame_vector_args (frame);
1248
1249   while (n_left >= 4)
1250     {
1251       u32 bi0, bi1;
1252       vlib_buffer_t *b0, *b1;
1253       ip4_forward_next_trace_t *t0, *t1;
1254
1255       /* Prefetch next iteration. */
1256       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1257       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1258
1259       bi0 = from[0];
1260       bi1 = from[1];
1261
1262       b0 = vlib_get_buffer (vm, bi0);
1263       b1 = vlib_get_buffer (vm, bi1);
1264
1265       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1266         {
1267           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1268           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1269           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1270           t0->fib_index =
1271             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1272              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1273             vec_elt (im->fib_index_by_sw_if_index,
1274                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1275
1276           clib_memcpy_fast (t0->packet_data,
1277                             vlib_buffer_get_current (b0),
1278                             sizeof (t0->packet_data));
1279         }
1280       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1281         {
1282           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1283           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1284           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1285           t1->fib_index =
1286             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1287              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1288             vec_elt (im->fib_index_by_sw_if_index,
1289                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1290           clib_memcpy_fast (t1->packet_data, vlib_buffer_get_current (b1),
1291                             sizeof (t1->packet_data));
1292         }
1293       from += 2;
1294       n_left -= 2;
1295     }
1296
1297   while (n_left >= 1)
1298     {
1299       u32 bi0;
1300       vlib_buffer_t *b0;
1301       ip4_forward_next_trace_t *t0;
1302
1303       bi0 = from[0];
1304
1305       b0 = vlib_get_buffer (vm, bi0);
1306
1307       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1308         {
1309           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1310           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1311           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1312           t0->fib_index =
1313             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1314              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1315             vec_elt (im->fib_index_by_sw_if_index,
1316                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1317           clib_memcpy_fast (t0->packet_data, vlib_buffer_get_current (b0),
1318                             sizeof (t0->packet_data));
1319         }
1320       from += 1;
1321       n_left -= 1;
1322     }
1323 }
1324
1325 /* Compute TCP/UDP/ICMP4 checksum in software. */
1326 u16
1327 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1328                               ip4_header_t * ip0)
1329 {
1330   ip_csum_t sum0;
1331   u32 ip_header_length, payload_length_host_byte_order;
1332
1333   /* Initialize checksum with ip header. */
1334   ip_header_length = ip4_header_bytes (ip0);
1335   payload_length_host_byte_order =
1336     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1337   sum0 =
1338     clib_host_to_net_u32 (payload_length_host_byte_order +
1339                           (ip0->protocol << 16));
1340
1341   if (BITS (uword) == 32)
1342     {
1343       sum0 =
1344         ip_csum_with_carry (sum0,
1345                             clib_mem_unaligned (&ip0->src_address, u32));
1346       sum0 =
1347         ip_csum_with_carry (sum0,
1348                             clib_mem_unaligned (&ip0->dst_address, u32));
1349     }
1350   else
1351     sum0 =
1352       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1353
1354   return ip_calculate_l4_checksum (vm, p0, sum0,
1355                                    payload_length_host_byte_order, (u8 *) ip0,
1356                                    ip_header_length, NULL);
1357 }
1358
1359 u32
1360 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1361 {
1362   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1363   udp_header_t *udp0;
1364   u16 sum16;
1365
1366   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1367           || ip0->protocol == IP_PROTOCOL_UDP);
1368
1369   udp0 = (void *) (ip0 + 1);
1370   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1371     {
1372       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1373                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1374       return p0->flags;
1375     }
1376
1377   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1378
1379   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1380                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1381
1382   return p0->flags;
1383 }
1384 #endif
1385
1386 /* *INDENT-OFF* */
1387 VNET_FEATURE_ARC_INIT (ip4_local) =
1388 {
1389   .arc_name  = "ip4-local",
1390   .start_nodes = VNET_FEATURES ("ip4-local"),
1391   .last_in_arc = "ip4-local-end-of-arc",
1392 };
1393 /* *INDENT-ON* */
1394
1395 static inline void
1396 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1397                             ip4_header_t * ip, u8 is_udp, u8 * error,
1398                             u8 * good_tcp_udp)
1399 {
1400   u32 flags0;
1401   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1402   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1403   if (is_udp)
1404     {
1405       udp_header_t *udp;
1406       u32 ip_len, udp_len;
1407       i32 len_diff;
1408       udp = ip4_next_header (ip);
1409       /* Verify UDP length. */
1410       ip_len = clib_net_to_host_u16 (ip->length);
1411       udp_len = clib_net_to_host_u16 (udp->length);
1412
1413       len_diff = ip_len - udp_len;
1414       *good_tcp_udp &= len_diff >= 0;
1415       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1416     }
1417 }
1418
1419 #define ip4_local_csum_is_offloaded(_b)                                       \
1420   ((_b->flags & VNET_BUFFER_F_OFFLOAD) &&                                     \
1421    (vnet_buffer (_b)->oflags &                                                \
1422     (VNET_BUFFER_OFFLOAD_F_TCP_CKSUM | VNET_BUFFER_OFFLOAD_F_UDP_CKSUM)))
1423
1424 #define ip4_local_need_csum_check(is_tcp_udp, _b)                       \
1425     (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED     \
1426         || ip4_local_csum_is_offloaded (_b)))
1427
1428 #define ip4_local_csum_is_valid(_b)                                     \
1429     (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT                      \
1430         || (ip4_local_csum_is_offloaded (_b))) != 0
1431
1432 static inline void
1433 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1434                          ip4_header_t * ih, u8 * error)
1435 {
1436   u8 is_udp, is_tcp_udp, good_tcp_udp;
1437
1438   is_udp = ih->protocol == IP_PROTOCOL_UDP;
1439   is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1440
1441   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1442     ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1443   else
1444     good_tcp_udp = ip4_local_csum_is_valid (b);
1445
1446   ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1447   *error = (is_tcp_udp && !good_tcp_udp
1448             ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1449 }
1450
1451 static inline void
1452 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1453                             ip4_header_t ** ih, u8 * error)
1454 {
1455   u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1456
1457   is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1458   is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1459
1460   is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1461   is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1462
1463   good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1464   good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1465
1466   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1467                      || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1468     {
1469       if (is_tcp_udp[0])
1470         ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1471                                     &good_tcp_udp[0]);
1472       if (is_tcp_udp[1])
1473         ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1474                                     &good_tcp_udp[1]);
1475     }
1476
1477   error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1478               IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1479   error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1480               IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1481 }
1482
1483 static inline void
1484 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1485                               vlib_buffer_t * b, u16 * next, u8 error,
1486                               u8 head_of_feature_arc)
1487 {
1488   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1489   u32 next_index;
1490
1491   *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1492   b->error = error ? error_node->errors[error] : 0;
1493   if (head_of_feature_arc)
1494     {
1495       next_index = *next;
1496       if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1497         {
1498           vnet_feature_arc_start (arc_index,
1499                                   vnet_buffer (b)->sw_if_index[VLIB_RX],
1500                                   &next_index, b);
1501           *next = next_index;
1502         }
1503     }
1504 }
1505
1506 typedef struct
1507 {
1508   ip4_address_t src;
1509   u32 lbi;
1510   u8 error;
1511   u8 first;
1512 } ip4_local_last_check_t;
1513
1514 static inline void
1515 ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
1516                      ip4_local_last_check_t * last_check, u8 * error0)
1517 {
1518   const dpo_id_t *dpo0;
1519   load_balance_t *lb0;
1520   u32 lbi0;
1521
1522   vnet_buffer (b)->ip.fib_index =
1523     vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1524     vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1525
1526   /*
1527    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1528    *  adjacency for the destination address (the local interface address).
1529    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1530    *  adjacency for the source address (the remote sender's address)
1531    */
1532   if (PREDICT_TRUE (last_check->src.as_u32 != ip0->src_address.as_u32) ||
1533       last_check->first)
1534     {
1535       lbi0 = ip4_fib_forwarding_lookup (vnet_buffer (b)->ip.fib_index,
1536                                         &ip0->src_address);
1537
1538       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1539         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1540       vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1541
1542       lb0 = load_balance_get (lbi0);
1543       dpo0 = load_balance_get_bucket_i (lb0, 0);
1544
1545       /*
1546        * Must have a route to source otherwise we drop the packet.
1547        * ip4 broadcasts are accepted, e.g. to make dhcp client work
1548        *
1549        * The checks are:
1550        *  - the source is a recieve => it's from us => bogus, do this
1551        *    first since it sets a different error code.
1552        *  - uRPF check for any route to source - accept if passes.
1553        *  - allow packets destined to the broadcast address from unknown sources
1554        */
1555
1556       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1557                   && dpo0->dpoi_type == DPO_RECEIVE) ?
1558                  IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1559       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1560                   && !fib_urpf_check_size (lb0->lb_urpf)
1561                   && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1562                  IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1563
1564       last_check->src.as_u32 = ip0->src_address.as_u32;
1565       last_check->lbi = lbi0;
1566       last_check->error = *error0;
1567       last_check->first = 0;
1568     }
1569   else
1570     {
1571       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1572         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1573       vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1574       *error0 = last_check->error;
1575     }
1576 }
1577
1578 static inline void
1579 ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
1580                         ip4_local_last_check_t * last_check, u8 * error)
1581 {
1582   const dpo_id_t *dpo[2];
1583   load_balance_t *lb[2];
1584   u32 not_last_hit;
1585   u32 lbi[2];
1586
1587   not_last_hit = last_check->first;
1588   not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1589   not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1590
1591   vnet_buffer (b[0])->ip.fib_index =
1592     vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1593     vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1594     vnet_buffer (b[0])->ip.fib_index;
1595
1596   vnet_buffer (b[1])->ip.fib_index =
1597     vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1598     vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1599     vnet_buffer (b[1])->ip.fib_index;
1600
1601   /*
1602    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1603    *  adjacency for the destination address (the local interface address).
1604    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1605    *  adjacency for the source address (the remote sender's address)
1606    */
1607   if (PREDICT_TRUE (not_last_hit))
1608     {
1609       ip4_fib_forwarding_lookup_x2 (
1610         vnet_buffer (b[0])->ip.fib_index, vnet_buffer (b[1])->ip.fib_index,
1611         &ip[0]->src_address, &ip[1]->src_address, &lbi[0], &lbi[1]);
1612
1613       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1614         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1615       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1616
1617       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1618         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1619       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1620
1621       lb[0] = load_balance_get (lbi[0]);
1622       lb[1] = load_balance_get (lbi[1]);
1623
1624       dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1625       dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1626
1627       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1628                    dpo[0]->dpoi_type == DPO_RECEIVE) ?
1629                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1630       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1631                    !fib_urpf_check_size (lb[0]->lb_urpf) &&
1632                    ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1633                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1634
1635       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1636                    dpo[1]->dpoi_type == DPO_RECEIVE) ?
1637                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1638       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1639                    !fib_urpf_check_size (lb[1]->lb_urpf) &&
1640                    ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1641                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1642
1643       last_check->src.as_u32 = ip[1]->src_address.as_u32;
1644       last_check->lbi = lbi[1];
1645       last_check->error = error[1];
1646       last_check->first = 0;
1647     }
1648   else
1649     {
1650       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1651         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1652       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1653
1654       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1655         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1656       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1657
1658       error[0] = last_check->error;
1659       error[1] = last_check->error;
1660     }
1661 }
1662
1663 enum ip_local_packet_type_e
1664 {
1665   IP_LOCAL_PACKET_TYPE_L4,
1666   IP_LOCAL_PACKET_TYPE_NAT,
1667   IP_LOCAL_PACKET_TYPE_FRAG,
1668 };
1669
1670 /**
1671  * Determine packet type and next node.
1672  *
1673  * The expectation is that all packets that are not L4 will skip
1674  * checksums and source checks.
1675  */
1676 always_inline u8
1677 ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
1678 {
1679   ip_lookup_main_t *lm = &ip4_main.lookup_main;
1680
1681   if (PREDICT_FALSE (ip4_is_fragment (ip)))
1682     {
1683       *next = IP_LOCAL_NEXT_REASSEMBLY;
1684       return IP_LOCAL_PACKET_TYPE_FRAG;
1685     }
1686   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_IS_NATED))
1687     {
1688       *next = lm->local_next_by_ip_protocol[ip->protocol];
1689       return IP_LOCAL_PACKET_TYPE_NAT;
1690     }
1691
1692   *next = lm->local_next_by_ip_protocol[ip->protocol];
1693   return IP_LOCAL_PACKET_TYPE_L4;
1694 }
1695
1696 static inline uword
1697 ip4_local_inline (vlib_main_t * vm,
1698                   vlib_node_runtime_t * node,
1699                   vlib_frame_t * frame, int head_of_feature_arc)
1700 {
1701   u32 *from, n_left_from;
1702   vlib_node_runtime_t *error_node =
1703     vlib_node_get_runtime (vm, ip4_local_node.index);
1704   u16 nexts[VLIB_FRAME_SIZE], *next;
1705   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1706   ip4_header_t *ip[2];
1707   u8 error[2], pt[2];
1708
1709   ip4_local_last_check_t last_check = {
1710     /*
1711      * 0.0.0.0 can appear as the source address of an IP packet,
1712      * as can any other address, hence the need to use the 'first'
1713      * member to make sure the .lbi is initialised for the first
1714      * packet.
1715      */
1716     .src = {.as_u32 = 0},
1717     .lbi = ~0,
1718     .error = IP4_ERROR_UNKNOWN_PROTOCOL,
1719     .first = 1,
1720   };
1721
1722   from = vlib_frame_vector_args (frame);
1723   n_left_from = frame->n_vectors;
1724
1725   if (node->flags & VLIB_NODE_FLAG_TRACE)
1726     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1727
1728   vlib_get_buffers (vm, from, bufs, n_left_from);
1729   b = bufs;
1730   next = nexts;
1731
1732   while (n_left_from >= 6)
1733     {
1734       u8 not_batch = 0;
1735
1736       /* Prefetch next iteration. */
1737       {
1738         vlib_prefetch_buffer_header (b[4], LOAD);
1739         vlib_prefetch_buffer_header (b[5], LOAD);
1740
1741         clib_prefetch_load (b[4]->data);
1742         clib_prefetch_load (b[5]->data);
1743       }
1744
1745       error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1746
1747       ip[0] = vlib_buffer_get_current (b[0]);
1748       ip[1] = vlib_buffer_get_current (b[1]);
1749
1750       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1751       vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1752
1753       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1754       pt[1] = ip4_local_classify (b[1], ip[1], &next[1]);
1755
1756       not_batch = pt[0] ^ pt[1];
1757
1758       if (head_of_feature_arc == 0 || (pt[0] && not_batch == 0))
1759         goto skip_checks;
1760
1761       if (PREDICT_TRUE (not_batch == 0))
1762         {
1763           ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1764           ip4_local_check_src_x2 (b, ip, &last_check, error);
1765         }
1766       else
1767         {
1768           if (!pt[0])
1769             {
1770               ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1771               ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1772             }
1773           if (!pt[1])
1774             {
1775               ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1776               ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
1777             }
1778         }
1779
1780     skip_checks:
1781
1782       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1783                                     head_of_feature_arc);
1784       ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1785                                     head_of_feature_arc);
1786
1787       b += 2;
1788       next += 2;
1789       n_left_from -= 2;
1790     }
1791
1792   while (n_left_from > 0)
1793     {
1794       error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1795
1796       ip[0] = vlib_buffer_get_current (b[0]);
1797       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1798       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1799
1800       if (head_of_feature_arc == 0 || pt[0])
1801         goto skip_check;
1802
1803       ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1804       ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1805
1806     skip_check:
1807
1808       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1809                                     head_of_feature_arc);
1810
1811       b += 1;
1812       next += 1;
1813       n_left_from -= 1;
1814     }
1815
1816   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1817   return frame->n_vectors;
1818 }
1819
1820 VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1821                                vlib_frame_t * frame)
1822 {
1823   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1824 }
1825
1826 /* *INDENT-OFF* */
1827 VLIB_REGISTER_NODE (ip4_local_node) =
1828 {
1829   .name = "ip4-local",
1830   .vector_size = sizeof (u32),
1831   .format_trace = format_ip4_forward_next_trace,
1832   .n_errors = IP4_N_ERROR,
1833   .error_strings = ip4_error_strings,
1834   .n_next_nodes = IP_LOCAL_N_NEXT,
1835   .next_nodes =
1836   {
1837     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1838     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1839     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1840     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1841     [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-full-reassembly",
1842   },
1843 };
1844 /* *INDENT-ON* */
1845
1846
1847 VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
1848                                           vlib_node_runtime_t * node,
1849                                           vlib_frame_t * frame)
1850 {
1851   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1852 }
1853
1854 /* *INDENT-OFF* */
1855 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
1856   .name = "ip4-local-end-of-arc",
1857   .vector_size = sizeof (u32),
1858
1859   .format_trace = format_ip4_forward_next_trace,
1860   .sibling_of = "ip4-local",
1861 };
1862
1863 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1864   .arc_name = "ip4-local",
1865   .node_name = "ip4-local-end-of-arc",
1866   .runs_before = 0, /* not before any other features */
1867 };
1868 /* *INDENT-ON* */
1869
1870 #ifndef CLIB_MARCH_VARIANT
1871 void
1872 ip4_register_protocol (u32 protocol, u32 node_index)
1873 {
1874   vlib_main_t *vm = vlib_get_main ();
1875   ip4_main_t *im = &ip4_main;
1876   ip_lookup_main_t *lm = &im->lookup_main;
1877
1878   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1879   lm->local_next_by_ip_protocol[protocol] =
1880     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1881 }
1882
1883 void
1884 ip4_unregister_protocol (u32 protocol)
1885 {
1886   ip4_main_t *im = &ip4_main;
1887   ip_lookup_main_t *lm = &im->lookup_main;
1888
1889   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1890   lm->local_next_by_ip_protocol[protocol] = IP_LOCAL_NEXT_PUNT;
1891 }
1892 #endif
1893
1894 static clib_error_t *
1895 show_ip_local_command_fn (vlib_main_t * vm,
1896                           unformat_input_t * input, vlib_cli_command_t * cmd)
1897 {
1898   ip4_main_t *im = &ip4_main;
1899   ip_lookup_main_t *lm = &im->lookup_main;
1900   int i;
1901
1902   vlib_cli_output (vm, "Protocols handled by ip4_local");
1903   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1904     {
1905       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1906         {
1907           u32 node_index = vlib_get_node (vm,
1908                                           ip4_local_node.index)->
1909             next_nodes[lm->local_next_by_ip_protocol[i]];
1910           vlib_cli_output (vm, "%U: %U", format_ip_protocol, i,
1911                            format_vlib_node_name, vm, node_index);
1912         }
1913     }
1914   return 0;
1915 }
1916
1917
1918
1919 /*?
1920  * Display the set of protocols handled by the local IPv4 stack.
1921  *
1922  * @cliexpar
1923  * Example of how to display local protocol table:
1924  * @cliexstart{show ip local}
1925  * Protocols handled by ip4_local
1926  * 1
1927  * 17
1928  * 47
1929  * @cliexend
1930 ?*/
1931 /* *INDENT-OFF* */
1932 VLIB_CLI_COMMAND (show_ip_local, static) =
1933 {
1934   .path = "show ip local",
1935   .function = show_ip_local_command_fn,
1936   .short_help = "show ip local",
1937 };
1938 /* *INDENT-ON* */
1939
1940 typedef enum
1941 {
1942   IP4_REWRITE_NEXT_DROP,
1943   IP4_REWRITE_NEXT_ICMP_ERROR,
1944   IP4_REWRITE_NEXT_FRAGMENT,
1945   IP4_REWRITE_N_NEXT            /* Last */
1946 } ip4_rewrite_next_t;
1947
1948 /**
1949  * This bits of an IPv4 address to mask to construct a multicast
1950  * MAC address
1951  */
1952 #if CLIB_ARCH_IS_BIG_ENDIAN
1953 #define IP4_MCAST_ADDR_MASK 0x007fffff
1954 #else
1955 #define IP4_MCAST_ADDR_MASK 0xffff7f00
1956 #endif
1957
1958 always_inline void
1959 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
1960                u16 adj_packet_bytes, bool df, u16 * next,
1961                u8 is_midchain, u32 * error)
1962 {
1963   if (packet_len > adj_packet_bytes)
1964     {
1965       *error = IP4_ERROR_MTU_EXCEEDED;
1966       if (df)
1967         {
1968           icmp4_error_set_vnet_buffer
1969             (b, ICMP4_destination_unreachable,
1970              ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
1971              adj_packet_bytes);
1972           *next = IP4_REWRITE_NEXT_ICMP_ERROR;
1973         }
1974       else
1975         {
1976           /* IP fragmentation */
1977           ip_frag_set_vnet_buffer (b, adj_packet_bytes,
1978                                    (is_midchain ?
1979                                     IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN :
1980                                     IP_FRAG_NEXT_IP_REWRITE), 0);
1981           *next = IP4_REWRITE_NEXT_FRAGMENT;
1982         }
1983     }
1984 }
1985
1986 /* increment TTL & update checksum.
1987    Works either endian, so no need for byte swap. */
1988 static_always_inline void
1989 ip4_ttl_inc (vlib_buffer_t * b, ip4_header_t * ip)
1990 {
1991   i32 ttl;
1992   u32 checksum;
1993   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
1994     return;
1995
1996   ttl = ip->ttl;
1997
1998   checksum = ip->checksum - clib_host_to_net_u16 (0x0100);
1999   checksum += checksum >= 0xffff;
2000
2001   ip->checksum = checksum;
2002   ttl += 1;
2003   ip->ttl = ttl;
2004
2005   ASSERT (ip4_header_checksum_is_valid (ip));
2006 }
2007
2008 /* Decrement TTL & update checksum.
2009    Works either endian, so no need for byte swap. */
2010 static_always_inline void
2011 ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
2012                             u32 * error)
2013 {
2014   i32 ttl;
2015   u32 checksum;
2016   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2017     return;
2018
2019   ttl = ip->ttl;
2020
2021   /* Input node should have reject packets with ttl 0. */
2022   ASSERT (ip->ttl > 0);
2023
2024   checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
2025   checksum += checksum >= 0xffff;
2026
2027   ip->checksum = checksum;
2028   ttl -= 1;
2029   ip->ttl = ttl;
2030
2031   /*
2032    * If the ttl drops below 1 when forwarding, generate
2033    * an ICMP response.
2034    */
2035   if (PREDICT_FALSE (ttl <= 0))
2036     {
2037       *error = IP4_ERROR_TIME_EXPIRED;
2038       vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2039       icmp4_error_set_vnet_buffer (b, ICMP4_time_exceeded,
2040                                    ICMP4_time_exceeded_ttl_exceeded_in_transit,
2041                                    0);
2042       *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2043     }
2044
2045   /* Verify checksum. */
2046   ASSERT (ip4_header_checksum_is_valid (ip) ||
2047           (vnet_buffer (b)->oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM));
2048 }
2049
2050 always_inline uword
2051 ip4_rewrite_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
2052                     vlib_frame_t *frame, int do_counters, int is_midchain,
2053                     int is_mcast)
2054 {
2055   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2056   u32 *from = vlib_frame_vector_args (frame);
2057   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2058   u16 nexts[VLIB_FRAME_SIZE], *next;
2059   u32 n_left_from;
2060   vlib_node_runtime_t *error_node =
2061     vlib_node_get_runtime (vm, ip4_input_node.index);
2062
2063   n_left_from = frame->n_vectors;
2064   u32 thread_index = vm->thread_index;
2065
2066   vlib_get_buffers (vm, from, bufs, n_left_from);
2067   clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
2068
2069 #if (CLIB_N_PREFETCHES >= 8)
2070   if (n_left_from >= 6)
2071     {
2072       int i;
2073       for (i = 2; i < 6; i++)
2074         vlib_prefetch_buffer_header (bufs[i], LOAD);
2075     }
2076
2077   next = nexts;
2078   b = bufs;
2079   while (n_left_from >= 8)
2080     {
2081       const ip_adjacency_t *adj0, *adj1;
2082       ip4_header_t *ip0, *ip1;
2083       u32 rw_len0, error0, adj_index0;
2084       u32 rw_len1, error1, adj_index1;
2085       u32 tx_sw_if_index0, tx_sw_if_index1;
2086       u8 *p;
2087
2088       if (is_midchain)
2089         {
2090           vlib_prefetch_buffer_header (b[6], LOAD);
2091           vlib_prefetch_buffer_header (b[7], LOAD);
2092         }
2093
2094       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2095       adj_index1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
2096
2097       /*
2098        * pre-fetch the per-adjacency counters
2099        */
2100       if (do_counters)
2101         {
2102           vlib_prefetch_combined_counter (&adjacency_counters,
2103                                           thread_index, adj_index0);
2104           vlib_prefetch_combined_counter (&adjacency_counters,
2105                                           thread_index, adj_index1);
2106         }
2107
2108       ip0 = vlib_buffer_get_current (b[0]);
2109       ip1 = vlib_buffer_get_current (b[1]);
2110
2111       error0 = error1 = IP4_ERROR_NONE;
2112
2113       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2114       ip4_ttl_and_checksum_check (b[1], ip1, next + 1, &error1);
2115
2116       /* Rewrite packet header and updates lengths. */
2117       adj0 = adj_get (adj_index0);
2118       adj1 = adj_get (adj_index1);
2119
2120       /* Worth pipelining. No guarantee that adj0,1 are hot... */
2121       rw_len0 = adj0[0].rewrite_header.data_bytes;
2122       rw_len1 = adj1[0].rewrite_header.data_bytes;
2123       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2124       vnet_buffer (b[1])->ip.save_rewrite_length = rw_len1;
2125
2126       p = vlib_buffer_get_current (b[2]);
2127       clib_prefetch_store (p - CLIB_CACHE_LINE_BYTES);
2128       clib_prefetch_load (p);
2129
2130       p = vlib_buffer_get_current (b[3]);
2131       clib_prefetch_store (p - CLIB_CACHE_LINE_BYTES);
2132       clib_prefetch_load (p);
2133
2134       /* Check MTU of outgoing interface. */
2135       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2136       u16 ip1_len = clib_net_to_host_u16 (ip1->length);
2137
2138       if (b[0]->flags & VNET_BUFFER_F_GSO)
2139         ip0_len = gso_mtu_sz (b[0]);
2140       if (b[1]->flags & VNET_BUFFER_F_GSO)
2141         ip1_len = gso_mtu_sz (b[1]);
2142
2143       ip4_mtu_check (b[0], ip0_len,
2144                      adj0[0].rewrite_header.max_l3_packet_bytes,
2145                      ip0->flags_and_fragment_offset &
2146                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2147                      next + 0, is_midchain, &error0);
2148       ip4_mtu_check (b[1], ip1_len,
2149                      adj1[0].rewrite_header.max_l3_packet_bytes,
2150                      ip1->flags_and_fragment_offset &
2151                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2152                      next + 1, is_midchain, &error1);
2153
2154       if (is_mcast)
2155         {
2156           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2157                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2158                     IP4_ERROR_SAME_INTERFACE : error0);
2159           error1 = ((adj1[0].rewrite_header.sw_if_index ==
2160                      vnet_buffer (b[1])->sw_if_index[VLIB_RX]) ?
2161                     IP4_ERROR_SAME_INTERFACE : error1);
2162         }
2163
2164       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2165        * to see the IP header */
2166       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2167         {
2168           u32 next_index = adj0[0].rewrite_header.next_index;
2169           vlib_buffer_advance (b[0], -(word) rw_len0);
2170
2171           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2172           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2173
2174           if (PREDICT_FALSE
2175               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2176             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2177                                                 tx_sw_if_index0,
2178                                                 &next_index, b[0],
2179                                                 adj0->ia_cfg_index);
2180
2181           next[0] = next_index;
2182           if (is_midchain)
2183             vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2184                                         0 /* is_ip6 */ );
2185         }
2186       else
2187         {
2188           b[0]->error = error_node->errors[error0];
2189           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2190             ip4_ttl_inc (b[0], ip0);
2191         }
2192       if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2193         {
2194           u32 next_index = adj1[0].rewrite_header.next_index;
2195           vlib_buffer_advance (b[1], -(word) rw_len1);
2196
2197           tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2198           vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2199
2200           if (PREDICT_FALSE
2201               (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2202             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2203                                                 tx_sw_if_index1,
2204                                                 &next_index, b[1],
2205                                                 adj1->ia_cfg_index);
2206           next[1] = next_index;
2207           if (is_midchain)
2208             vnet_calc_checksums_inline (vm, b[1], 1 /* is_ip4 */ ,
2209                                         0 /* is_ip6 */ );
2210         }
2211       else
2212         {
2213           b[1]->error = error_node->errors[error1];
2214           if (error1 == IP4_ERROR_MTU_EXCEEDED)
2215             ip4_ttl_inc (b[1], ip1);
2216         }
2217
2218       if (is_midchain)
2219         /* Guess we are only writing on ipv4 header. */
2220         vnet_rewrite_two_headers (adj0[0], adj1[0],
2221                                   ip0, ip1, sizeof (ip4_header_t));
2222       else
2223         /* Guess we are only writing on simple Ethernet header. */
2224         vnet_rewrite_two_headers (adj0[0], adj1[0],
2225                                   ip0, ip1, sizeof (ethernet_header_t));
2226
2227       if (do_counters)
2228         {
2229           if (error0 == IP4_ERROR_NONE)
2230             vlib_increment_combined_counter
2231               (&adjacency_counters,
2232                thread_index,
2233                adj_index0, 1,
2234                vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2235
2236           if (error1 == IP4_ERROR_NONE)
2237             vlib_increment_combined_counter
2238               (&adjacency_counters,
2239                thread_index,
2240                adj_index1, 1,
2241                vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
2242         }
2243
2244       if (is_midchain)
2245         {
2246           if (error0 == IP4_ERROR_NONE)
2247             adj_midchain_fixup (vm, adj0, b[0], VNET_LINK_IP4);
2248           if (error1 == IP4_ERROR_NONE)
2249             adj_midchain_fixup (vm, adj1, b[1], VNET_LINK_IP4);
2250         }
2251
2252       if (is_mcast)
2253         {
2254           /* copy bytes from the IP address into the MAC rewrite */
2255           if (error0 == IP4_ERROR_NONE)
2256             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2257                                         adj0->rewrite_header.dst_mcast_offset,
2258                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2259           if (error1 == IP4_ERROR_NONE)
2260             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2261                                         adj1->rewrite_header.dst_mcast_offset,
2262                                         &ip1->dst_address.as_u32, (u8 *) ip1);
2263         }
2264
2265       next += 2;
2266       b += 2;
2267       n_left_from -= 2;
2268     }
2269 #elif (CLIB_N_PREFETCHES >= 4)
2270   next = nexts;
2271   b = bufs;
2272   while (n_left_from >= 1)
2273     {
2274       ip_adjacency_t *adj0;
2275       ip4_header_t *ip0;
2276       u32 rw_len0, error0, adj_index0;
2277       u32 tx_sw_if_index0;
2278       u8 *p;
2279
2280       /* Prefetch next iteration */
2281       if (PREDICT_TRUE (n_left_from >= 4))
2282         {
2283           ip_adjacency_t *adj2;
2284           u32 adj_index2;
2285
2286           vlib_prefetch_buffer_header (b[3], LOAD);
2287           vlib_prefetch_buffer_data (b[2], LOAD);
2288
2289           /* Prefetch adj->rewrite_header */
2290           adj_index2 = vnet_buffer (b[2])->ip.adj_index[VLIB_TX];
2291           adj2 = adj_get (adj_index2);
2292           p = (u8 *) adj2;
2293           CLIB_PREFETCH (p + CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES,
2294                          LOAD);
2295         }
2296
2297       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2298
2299       /*
2300        * Prefetch the per-adjacency counters
2301        */
2302       if (do_counters)
2303         {
2304           vlib_prefetch_combined_counter (&adjacency_counters,
2305                                           thread_index, adj_index0);
2306         }
2307
2308       ip0 = vlib_buffer_get_current (b[0]);
2309
2310       error0 = IP4_ERROR_NONE;
2311
2312       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2313
2314       /* Rewrite packet header and updates lengths. */
2315       adj0 = adj_get (adj_index0);
2316
2317       /* Rewrite header was prefetched. */
2318       rw_len0 = adj0[0].rewrite_header.data_bytes;
2319       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2320
2321       /* Check MTU of outgoing interface. */
2322       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2323
2324       if (b[0]->flags & VNET_BUFFER_F_GSO)
2325         ip0_len = gso_mtu_sz (b[0]);
2326
2327       ip4_mtu_check (b[0], ip0_len,
2328                      adj0[0].rewrite_header.max_l3_packet_bytes,
2329                      ip0->flags_and_fragment_offset &
2330                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2331                      next + 0, is_midchain, &error0);
2332
2333       if (is_mcast)
2334         {
2335           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2336                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2337                     IP4_ERROR_SAME_INTERFACE : error0);
2338         }
2339
2340       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2341        * to see the IP header */
2342       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2343         {
2344           u32 next_index = adj0[0].rewrite_header.next_index;
2345           vlib_buffer_advance (b[0], -(word) rw_len0);
2346           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2347           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2348
2349           if (PREDICT_FALSE
2350               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2351             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2352                                                 tx_sw_if_index0,
2353                                                 &next_index, b[0],
2354                                                 adj0->ia_cfg_index);
2355           next[0] = next_index;
2356
2357           if (is_midchain)
2358             {
2359               vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2360                                           0 /* is_ip6 */ );
2361
2362               /* Guess we are only writing on ipv4 header. */
2363               vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t));
2364             }
2365           else
2366             /* Guess we are only writing on simple Ethernet header. */
2367             vnet_rewrite_one_header (adj0[0], ip0,
2368                                      sizeof (ethernet_header_t));
2369
2370           /*
2371            * Bump the per-adjacency counters
2372            */
2373           if (do_counters)
2374             vlib_increment_combined_counter
2375               (&adjacency_counters,
2376                thread_index,
2377                adj_index0, 1, vlib_buffer_length_in_chain (vm,
2378                                                            b[0]) + rw_len0);
2379
2380           if (is_midchain)
2381             adj_midchain_fixup (vm, adj0, b[0], VNET_LINK_IP4);
2382
2383           if (is_mcast)
2384             /* copy bytes from the IP address into the MAC rewrite */
2385             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2386                                         adj0->rewrite_header.dst_mcast_offset,
2387                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2388         }
2389       else
2390         {
2391           b[0]->error = error_node->errors[error0];
2392           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2393             ip4_ttl_inc (b[0], ip0);
2394         }
2395
2396       next += 1;
2397       b += 1;
2398       n_left_from -= 1;
2399     }
2400 #endif
2401
2402   while (n_left_from > 0)
2403     {
2404       ip_adjacency_t *adj0;
2405       ip4_header_t *ip0;
2406       u32 rw_len0, adj_index0, error0;
2407       u32 tx_sw_if_index0;
2408
2409       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2410
2411       adj0 = adj_get (adj_index0);
2412
2413       if (do_counters)
2414         vlib_prefetch_combined_counter (&adjacency_counters,
2415                                         thread_index, adj_index0);
2416
2417       ip0 = vlib_buffer_get_current (b[0]);
2418
2419       error0 = IP4_ERROR_NONE;
2420
2421       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2422
2423
2424       /* Update packet buffer attributes/set output interface. */
2425       rw_len0 = adj0[0].rewrite_header.data_bytes;
2426       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2427
2428       /* Check MTU of outgoing interface. */
2429       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2430       if (b[0]->flags & VNET_BUFFER_F_GSO)
2431         ip0_len = gso_mtu_sz (b[0]);
2432
2433       ip4_mtu_check (b[0], ip0_len,
2434                      adj0[0].rewrite_header.max_l3_packet_bytes,
2435                      ip0->flags_and_fragment_offset &
2436                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2437                      next + 0, is_midchain, &error0);
2438
2439       if (is_mcast)
2440         {
2441           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2442                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2443                     IP4_ERROR_SAME_INTERFACE : error0);
2444         }
2445
2446       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2447        * to see the IP header */
2448       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2449         {
2450           u32 next_index = adj0[0].rewrite_header.next_index;
2451           vlib_buffer_advance (b[0], -(word) rw_len0);
2452           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2453           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2454
2455           if (PREDICT_FALSE
2456               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2457             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2458                                                 tx_sw_if_index0,
2459                                                 &next_index, b[0],
2460                                                 adj0->ia_cfg_index);
2461           next[0] = next_index;
2462
2463           if (is_midchain)
2464             {
2465               /* this acts on the packet that is about to be encapped */
2466               vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2467                                           0 /* is_ip6 */ );
2468
2469               /* Guess we are only writing on ipv4 header. */
2470               vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t));
2471             }
2472           else
2473             /* Guess we are only writing on simple Ethernet header. */
2474             vnet_rewrite_one_header (adj0[0], ip0,
2475                                      sizeof (ethernet_header_t));
2476
2477           if (do_counters)
2478             vlib_increment_combined_counter
2479               (&adjacency_counters,
2480                thread_index, adj_index0, 1,
2481                vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2482
2483           if (is_midchain)
2484             adj_midchain_fixup (vm, adj0, b[0], VNET_LINK_IP4);
2485
2486           if (is_mcast)
2487             /* copy bytes from the IP address into the MAC rewrite */
2488             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2489                                         adj0->rewrite_header.dst_mcast_offset,
2490                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2491         }
2492       else
2493         {
2494           b[0]->error = error_node->errors[error0];
2495           /* undo the TTL decrement - we'll be back to do it again */
2496           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2497             ip4_ttl_inc (b[0], ip0);
2498         }
2499
2500       next += 1;
2501       b += 1;
2502       n_left_from -= 1;
2503     }
2504
2505
2506   /* Need to do trace after rewrites to pick up new packet data. */
2507   if (node->flags & VLIB_NODE_FLAG_TRACE)
2508     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2509
2510   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2511   return frame->n_vectors;
2512 }
2513
2514 /** @brief IPv4 rewrite node.
2515     @node ip4-rewrite
2516
2517     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2518     header checksum, fetch the ip adjacency, check the outbound mtu,
2519     apply the adjacency rewrite, and send pkts to the adjacency
2520     rewrite header's rewrite_next_index.
2521
2522     @param vm vlib_main_t corresponding to the current thread
2523     @param node vlib_node_runtime_t
2524     @param frame vlib_frame_t whose contents should be dispatched
2525
2526     @par Graph mechanics: buffer metadata, next index usage
2527
2528     @em Uses:
2529     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2530         - the rewrite adjacency index
2531     - <code>adj->lookup_next_index</code>
2532         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2533           the packet will be dropped.
2534     - <code>adj->rewrite_header</code>
2535         - Rewrite string length, rewrite string, next_index
2536
2537     @em Sets:
2538     - <code>b->current_data, b->current_length</code>
2539         - Updated net of applying the rewrite string
2540
2541     <em>Next Indices:</em>
2542     - <code> adj->rewrite_header.next_index </code>
2543       or @c ip4-drop
2544 */
2545
2546 VLIB_NODE_FN (ip4_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2547                                  vlib_frame_t * frame)
2548 {
2549   if (adj_are_counters_enabled ())
2550     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2551   else
2552     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2553 }
2554
2555 VLIB_NODE_FN (ip4_rewrite_bcast_node) (vlib_main_t * vm,
2556                                        vlib_node_runtime_t * node,
2557                                        vlib_frame_t * frame)
2558 {
2559   if (adj_are_counters_enabled ())
2560     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2561   else
2562     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2563 }
2564
2565 VLIB_NODE_FN (ip4_midchain_node) (vlib_main_t * vm,
2566                                   vlib_node_runtime_t * node,
2567                                   vlib_frame_t * frame)
2568 {
2569   if (adj_are_counters_enabled ())
2570     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2571   else
2572     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2573 }
2574
2575 VLIB_NODE_FN (ip4_rewrite_mcast_node) (vlib_main_t * vm,
2576                                        vlib_node_runtime_t * node,
2577                                        vlib_frame_t * frame)
2578 {
2579   if (adj_are_counters_enabled ())
2580     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2581   else
2582     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2583 }
2584
2585 VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
2586                                         vlib_node_runtime_t * node,
2587                                         vlib_frame_t * frame)
2588 {
2589   if (adj_are_counters_enabled ())
2590     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2591   else
2592     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2593 }
2594
2595 /* *INDENT-OFF* */
2596 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2597   .name = "ip4-rewrite",
2598   .vector_size = sizeof (u32),
2599
2600   .format_trace = format_ip4_rewrite_trace,
2601
2602   .n_next_nodes = IP4_REWRITE_N_NEXT,
2603   .next_nodes = {
2604     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2605     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2606     [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2607   },
2608 };
2609
2610 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2611   .name = "ip4-rewrite-bcast",
2612   .vector_size = sizeof (u32),
2613
2614   .format_trace = format_ip4_rewrite_trace,
2615   .sibling_of = "ip4-rewrite",
2616 };
2617
2618 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2619   .name = "ip4-rewrite-mcast",
2620   .vector_size = sizeof (u32),
2621
2622   .format_trace = format_ip4_rewrite_trace,
2623   .sibling_of = "ip4-rewrite",
2624 };
2625
2626 VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
2627   .name = "ip4-mcast-midchain",
2628   .vector_size = sizeof (u32),
2629
2630   .format_trace = format_ip4_rewrite_trace,
2631   .sibling_of = "ip4-rewrite",
2632 };
2633
2634 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2635   .name = "ip4-midchain",
2636   .vector_size = sizeof (u32),
2637   .format_trace = format_ip4_rewrite_trace,
2638   .sibling_of = "ip4-rewrite",
2639 };
2640 /* *INDENT-ON */
2641
2642 static clib_error_t *
2643 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2644                              unformat_input_t * input,
2645                              vlib_cli_command_t * cmd)
2646 {
2647   int matched = 0;
2648   u32 table_id = 0;
2649   u32 flow_hash_config = 0;
2650   int rv;
2651
2652   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2653     {
2654       if (unformat (input, "table %d", &table_id))
2655         matched = 1;
2656 #define _(a, b, v)                                                            \
2657   else if (unformat (input, #a))                                              \
2658   {                                                                           \
2659     flow_hash_config |= v;                                                    \
2660     matched = 1;                                                              \
2661   }
2662       foreach_flow_hash_bit
2663 #undef _
2664         else
2665         break;
2666     }
2667
2668   if (matched == 0)
2669     return clib_error_return (0, "unknown input `%U'",
2670                               format_unformat_error, input);
2671
2672   rv = ip_flow_hash_set (AF_IP4, table_id, flow_hash_config);
2673   switch (rv)
2674     {
2675     case 0:
2676       break;
2677
2678     case VNET_API_ERROR_NO_SUCH_FIB:
2679       return clib_error_return (0, "no such FIB table %d", table_id);
2680
2681     default:
2682       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2683       break;
2684     }
2685
2686   return 0;
2687 }
2688
2689 /*?
2690  * Configure the set of IPv4 fields used by the flow hash.
2691  *
2692  * @cliexpar
2693  * Example of how to set the flow hash on a given table:
2694  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2695  * Example of display the configured flow hash:
2696  * @cliexstart{show ip fib}
2697  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2698  * 0.0.0.0/0
2699  *   unicast-ip4-chain
2700  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2701  *     [0] [@0]: dpo-drop ip6
2702  * 0.0.0.0/32
2703  *   unicast-ip4-chain
2704  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2705  *     [0] [@0]: dpo-drop ip6
2706  * 224.0.0.0/8
2707  *   unicast-ip4-chain
2708  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2709  *     [0] [@0]: dpo-drop ip6
2710  * 6.0.1.2/32
2711  *   unicast-ip4-chain
2712  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2713  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2714  * 7.0.0.1/32
2715  *   unicast-ip4-chain
2716  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
2717  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2718  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2719  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2720  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2721  * 240.0.0.0/8
2722  *   unicast-ip4-chain
2723  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
2724  *     [0] [@0]: dpo-drop ip6
2725  * 255.255.255.255/32
2726  *   unicast-ip4-chain
2727  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
2728  *     [0] [@0]: dpo-drop ip6
2729  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
2730  * 0.0.0.0/0
2731  *   unicast-ip4-chain
2732  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
2733  *     [0] [@0]: dpo-drop ip6
2734  * 0.0.0.0/32
2735  *   unicast-ip4-chain
2736  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
2737  *     [0] [@0]: dpo-drop ip6
2738  * 172.16.1.0/24
2739  *   unicast-ip4-chain
2740  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
2741  *     [0] [@4]: ipv4-glean: af_packet0
2742  * 172.16.1.1/32
2743  *   unicast-ip4-chain
2744  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
2745  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
2746  * 172.16.1.2/32
2747  *   unicast-ip4-chain
2748  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2749  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
2750  * 172.16.2.0/24
2751  *   unicast-ip4-chain
2752  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
2753  *     [0] [@4]: ipv4-glean: af_packet1
2754  * 172.16.2.1/32
2755  *   unicast-ip4-chain
2756  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
2757  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
2758  * 224.0.0.0/8
2759  *   unicast-ip4-chain
2760  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
2761  *     [0] [@0]: dpo-drop ip6
2762  * 240.0.0.0/8
2763  *   unicast-ip4-chain
2764  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
2765  *     [0] [@0]: dpo-drop ip6
2766  * 255.255.255.255/32
2767  *   unicast-ip4-chain
2768  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
2769  *     [0] [@0]: dpo-drop ip6
2770  * @cliexend
2771 ?*/
2772 /* *INDENT-OFF* */
2773 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
2774 {
2775   .path = "set ip flow-hash",
2776   .short_help =
2777   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
2778   .function = set_ip_flow_hash_command_fn,
2779 };
2780 /* *INDENT-ON* */
2781
2782 #ifndef CLIB_MARCH_VARIANT
2783 int
2784 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2785                              u32 table_index)
2786 {
2787   vnet_main_t *vnm = vnet_get_main ();
2788   vnet_interface_main_t *im = &vnm->interface_main;
2789   ip4_main_t *ipm = &ip4_main;
2790   ip_lookup_main_t *lm = &ipm->lookup_main;
2791   vnet_classify_main_t *cm = &vnet_classify_main;
2792   ip4_address_t *if_addr;
2793
2794   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2795     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2796
2797   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2798     return VNET_API_ERROR_NO_SUCH_ENTRY;
2799
2800   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2801   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
2802
2803   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
2804
2805   if (NULL != if_addr)
2806     {
2807       fib_prefix_t pfx = {
2808         .fp_len = 32,
2809         .fp_proto = FIB_PROTOCOL_IP4,
2810         .fp_addr.ip4 = *if_addr,
2811       };
2812       u32 fib_index;
2813
2814       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2815                                                        sw_if_index);
2816
2817
2818       if (table_index != (u32) ~ 0)
2819         {
2820           dpo_id_t dpo = DPO_INVALID;
2821
2822           dpo_set (&dpo,
2823                    DPO_CLASSIFY,
2824                    DPO_PROTO_IP4,
2825                    classify_dpo_create (DPO_PROTO_IP4, table_index));
2826
2827           fib_table_entry_special_dpo_add (fib_index,
2828                                            &pfx,
2829                                            FIB_SOURCE_CLASSIFY,
2830                                            FIB_ENTRY_FLAG_NONE, &dpo);
2831           dpo_reset (&dpo);
2832         }
2833       else
2834         {
2835           fib_table_entry_special_remove (fib_index,
2836                                           &pfx, FIB_SOURCE_CLASSIFY);
2837         }
2838     }
2839
2840   return 0;
2841 }
2842 #endif
2843
2844 static clib_error_t *
2845 set_ip_classify_command_fn (vlib_main_t * vm,
2846                             unformat_input_t * input,
2847                             vlib_cli_command_t * cmd)
2848 {
2849   u32 table_index = ~0;
2850   int table_index_set = 0;
2851   u32 sw_if_index = ~0;
2852   int rv;
2853
2854   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2855     {
2856       if (unformat (input, "table-index %d", &table_index))
2857         table_index_set = 1;
2858       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
2859                          vnet_get_main (), &sw_if_index))
2860         ;
2861       else
2862         break;
2863     }
2864
2865   if (table_index_set == 0)
2866     return clib_error_return (0, "classify table-index must be specified");
2867
2868   if (sw_if_index == ~0)
2869     return clib_error_return (0, "interface / subif must be specified");
2870
2871   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
2872
2873   switch (rv)
2874     {
2875     case 0:
2876       break;
2877
2878     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
2879       return clib_error_return (0, "No such interface");
2880
2881     case VNET_API_ERROR_NO_SUCH_ENTRY:
2882       return clib_error_return (0, "No such classifier table");
2883     }
2884   return 0;
2885 }
2886
2887 /*?
2888  * Assign a classification table to an interface. The classification
2889  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
2890  * commands. Once the table is create, use this command to filter packets
2891  * on an interface.
2892  *
2893  * @cliexpar
2894  * Example of how to assign a classification table to an interface:
2895  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
2896 ?*/
2897 /* *INDENT-OFF* */
2898 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
2899 {
2900     .path = "set ip classify",
2901     .short_help =
2902     "set ip classify intfc <interface> table-index <classify-idx>",
2903     .function = set_ip_classify_command_fn,
2904 };
2905 /* *INDENT-ON* */
2906
2907 /*
2908  * fd.io coding-style-patch-verification: ON
2909  *
2910  * Local Variables:
2911  * eval: (c-set-style "gnu")
2912  * End:
2913  */