ip: set error number on failed intf addr
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h>     /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/mfib/ip4_mfib.h>
53 #include <vnet/dpo/load_balance.h>
54 #include <vnet/dpo/load_balance_map.h>
55 #include <vnet/dpo/classify_dpo.h>
56 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
57 #include <vnet/adj/adj_dp.h>
58 #include <vnet/pg/pg.h>
59
60 #include <vnet/ip/ip4_forward.h>
61 #include <vnet/interface_output.h>
62 #include <vnet/classify/vnet_classify.h>
63
64 /** @brief IPv4 lookup node.
65     @node ip4-lookup
66
67     This is the main IPv4 lookup dispatch node.
68
69     @param vm vlib_main_t corresponding to the current thread
70     @param node vlib_node_runtime_t
71     @param frame vlib_frame_t whose contents should be dispatched
72
73     @par Graph mechanics: buffer metadata, next index usage
74
75     @em Uses:
76     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
77         - Indicates the @c sw_if_index value of the interface that the
78           packet was received on.
79     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
80         - When the value is @c ~0 then the node performs a longest prefix
81           match (LPM) for the packet destination address in the FIB attached
82           to the receive interface.
83         - Otherwise perform LPM for the packet destination address in the
84           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
85           value (0, 1, ...) and not a VRF id.
86
87     @em Sets:
88     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
89         - The lookup result adjacency index.
90
91     <em>Next Index:</em>
92     - Dispatches the packet to the node index found in
93       ip_adjacency_t @c adj->lookup_next_index
94       (where @c adj is the lookup result adjacency).
95 */
96 VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
97                                 vlib_frame_t * frame)
98 {
99   return ip4_lookup_inline (vm, node, frame);
100 }
101
102 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
103
104 /* *INDENT-OFF* */
105 VLIB_REGISTER_NODE (ip4_lookup_node) =
106 {
107   .name = "ip4-lookup",
108   .vector_size = sizeof (u32),
109   .format_trace = format_ip4_lookup_trace,
110   .n_next_nodes = IP_LOOKUP_N_NEXT,
111   .next_nodes = IP4_LOOKUP_NEXT_NODES,
112 };
113 /* *INDENT-ON* */
114
115 VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
116                                       vlib_node_runtime_t * node,
117                                       vlib_frame_t * frame)
118 {
119   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
120   u32 n_left, *from;
121   u32 thread_index = vm->thread_index;
122   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
123   u16 nexts[VLIB_FRAME_SIZE], *next;
124
125   from = vlib_frame_vector_args (frame);
126   n_left = frame->n_vectors;
127   next = nexts;
128
129   vlib_get_buffers (vm, from, bufs, n_left);
130
131   while (n_left >= 4)
132     {
133       const load_balance_t *lb0, *lb1;
134       const ip4_header_t *ip0, *ip1;
135       u32 lbi0, hc0, lbi1, hc1;
136       const dpo_id_t *dpo0, *dpo1;
137
138       /* Prefetch next iteration. */
139       {
140         vlib_prefetch_buffer_header (b[2], LOAD);
141         vlib_prefetch_buffer_header (b[3], LOAD);
142
143         CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), LOAD);
144         CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), LOAD);
145       }
146
147       ip0 = vlib_buffer_get_current (b[0]);
148       ip1 = vlib_buffer_get_current (b[1]);
149       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
150       lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
151
152       lb0 = load_balance_get (lbi0);
153       lb1 = load_balance_get (lbi1);
154
155       /*
156        * this node is for via FIBs we can re-use the hash value from the
157        * to node if present.
158        * We don't want to use the same hash value at each level in the recursion
159        * graph as that would lead to polarisation
160        */
161       hc0 = hc1 = 0;
162
163       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
164         {
165           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
166             {
167               hc0 = vnet_buffer (b[0])->ip.flow_hash =
168                 vnet_buffer (b[0])->ip.flow_hash >> 1;
169             }
170           else
171             {
172               hc0 = vnet_buffer (b[0])->ip.flow_hash =
173                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
174             }
175           dpo0 = load_balance_get_fwd_bucket
176             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
177         }
178       else
179         {
180           dpo0 = load_balance_get_bucket_i (lb0, 0);
181         }
182       if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
183         {
184           if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
185             {
186               hc1 = vnet_buffer (b[1])->ip.flow_hash =
187                 vnet_buffer (b[1])->ip.flow_hash >> 1;
188             }
189           else
190             {
191               hc1 = vnet_buffer (b[1])->ip.flow_hash =
192                 ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
193             }
194           dpo1 = load_balance_get_fwd_bucket
195             (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
196         }
197       else
198         {
199           dpo1 = load_balance_get_bucket_i (lb1, 0);
200         }
201
202       next[0] = dpo0->dpoi_next_node;
203       next[1] = dpo1->dpoi_next_node;
204
205       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
206       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
207
208       vlib_increment_combined_counter
209         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
210       vlib_increment_combined_counter
211         (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
212
213       b += 2;
214       next += 2;
215       n_left -= 2;
216     }
217
218   while (n_left > 0)
219     {
220       const load_balance_t *lb0;
221       const ip4_header_t *ip0;
222       const dpo_id_t *dpo0;
223       u32 lbi0, hc0;
224
225       ip0 = vlib_buffer_get_current (b[0]);
226       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
227
228       lb0 = load_balance_get (lbi0);
229
230       hc0 = 0;
231       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
232         {
233           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
234             {
235               hc0 = vnet_buffer (b[0])->ip.flow_hash =
236                 vnet_buffer (b[0])->ip.flow_hash >> 1;
237             }
238           else
239             {
240               hc0 = vnet_buffer (b[0])->ip.flow_hash =
241                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
242             }
243           dpo0 = load_balance_get_fwd_bucket
244             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
245         }
246       else
247         {
248           dpo0 = load_balance_get_bucket_i (lb0, 0);
249         }
250
251       next[0] = dpo0->dpoi_next_node;
252       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
253
254       vlib_increment_combined_counter
255         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
256
257       b += 1;
258       next += 1;
259       n_left -= 1;
260     }
261
262   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
263   if (node->flags & VLIB_NODE_FLAG_TRACE)
264     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
265
266   return frame->n_vectors;
267 }
268
269 /* *INDENT-OFF* */
270 VLIB_REGISTER_NODE (ip4_load_balance_node) =
271 {
272   .name = "ip4-load-balance",
273   .vector_size = sizeof (u32),
274   .sibling_of = "ip4-lookup",
275   .format_trace = format_ip4_lookup_trace,
276 };
277 /* *INDENT-ON* */
278
279 #ifndef CLIB_MARCH_VARIANT
280 /* get first interface address */
281 ip4_address_t *
282 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
283                              ip_interface_address_t ** result_ia)
284 {
285   ip_lookup_main_t *lm = &im->lookup_main;
286   ip_interface_address_t *ia = 0;
287   ip4_address_t *result = 0;
288
289   /* *INDENT-OFF* */
290   foreach_ip_interface_address
291     (lm, ia, sw_if_index,
292      1 /* honor unnumbered */ ,
293      ({
294        ip4_address_t * a =
295          ip_interface_address_get_address (lm, ia);
296        result = a;
297        break;
298      }));
299   /* *INDENT-OFF* */
300   if (result_ia)
301     *result_ia = result ? ia : 0;
302   return result;
303 }
304 #endif
305
306 static void
307 ip4_add_subnet_bcast_route (u32 fib_index,
308                             fib_prefix_t *pfx,
309                             u32 sw_if_index)
310 {
311   vnet_sw_interface_flags_t iflags;
312
313   iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
314
315   fib_table_entry_special_remove(fib_index,
316                                  pfx,
317                                  FIB_SOURCE_INTERFACE);
318
319   if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
320     {
321       fib_table_entry_update_one_path (fib_index, pfx,
322                                        FIB_SOURCE_INTERFACE,
323                                        FIB_ENTRY_FLAG_NONE,
324                                        DPO_PROTO_IP4,
325                                        /* No next-hop address */
326                                        &ADJ_BCAST_ADDR,
327                                        sw_if_index,
328                                        // invalid FIB index
329                                        ~0,
330                                        1,
331                                        // no out-label stack
332                                        NULL,
333                                        FIB_ROUTE_PATH_FLAG_NONE);
334     }
335   else
336     {
337         fib_table_entry_special_add(fib_index,
338                                     pfx,
339                                     FIB_SOURCE_INTERFACE,
340                                     (FIB_ENTRY_FLAG_DROP |
341                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
342     }
343 }
344
345 static void
346 ip4_add_interface_prefix_routes (ip4_main_t *im,
347                                  u32 sw_if_index,
348                                  u32 fib_index,
349                                  ip_interface_address_t * a)
350 {
351   ip_lookup_main_t *lm = &im->lookup_main;
352   ip_interface_prefix_t *if_prefix;
353   ip4_address_t *address = ip_interface_address_get_address (lm, a);
354
355   ip_interface_prefix_key_t key = {
356     .prefix = {
357       .fp_len = a->address_length,
358       .fp_proto = FIB_PROTOCOL_IP4,
359       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[a->address_length],
360     },
361     .sw_if_index = sw_if_index,
362   };
363
364   fib_prefix_t pfx_special = {
365     .fp_proto = FIB_PROTOCOL_IP4,
366   };
367
368   /* If prefix already set on interface, just increment ref count & return */
369   if_prefix = ip_get_interface_prefix (lm, &key);
370   if (if_prefix)
371     {
372       if_prefix->ref_count += 1;
373       return;
374     }
375
376   /* New prefix - allocate a pool entry, initialize it, add to the hash */
377   pool_get (lm->if_prefix_pool, if_prefix);
378   if_prefix->ref_count = 1;
379   if_prefix->src_ia_index = a - lm->if_address_pool;
380   clib_memcpy (&if_prefix->key, &key, sizeof (key));
381   mhash_set (&lm->prefix_to_if_prefix_index, &key,
382              if_prefix - lm->if_prefix_pool, 0 /* old value */);
383
384   pfx_special.fp_len = a->address_length;
385   pfx_special.fp_addr.ip4.as_u32 = address->as_u32;
386
387   /* set the glean route for the prefix */
388   fib_table_entry_update_one_path (fib_index, &pfx_special,
389                                    FIB_SOURCE_INTERFACE,
390                                    (FIB_ENTRY_FLAG_CONNECTED |
391                                     FIB_ENTRY_FLAG_ATTACHED),
392                                    DPO_PROTO_IP4,
393                                    /* No next-hop address */
394                                    NULL,
395                                    sw_if_index,
396                                    /* invalid FIB index */
397                                    ~0,
398                                    1,
399                                    /* no out-label stack */
400                                    NULL,
401                                    FIB_ROUTE_PATH_FLAG_NONE);
402
403   /* length <= 30 - add glean, drop first address, maybe drop bcast address */
404   if (a->address_length <= 30)
405     {
406       /* set a drop route for the base address of the prefix */
407       pfx_special.fp_len = 32;
408       pfx_special.fp_addr.ip4.as_u32 =
409         address->as_u32 & im->fib_masks[a->address_length];
410
411       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
412         fib_table_entry_special_add (fib_index, &pfx_special,
413                                      FIB_SOURCE_INTERFACE,
414                                      (FIB_ENTRY_FLAG_DROP |
415                                       FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
416
417       /* set a route for the broadcast address of the prefix */
418       pfx_special.fp_len = 32;
419       pfx_special.fp_addr.ip4.as_u32 =
420         address->as_u32 | ~im->fib_masks[a->address_length];
421       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
422         ip4_add_subnet_bcast_route (fib_index, &pfx_special, sw_if_index);
423
424
425     }
426   /* length == 31 - add an attached route for the other address */
427   else if (a->address_length == 31)
428     {
429       pfx_special.fp_len = 32;
430       pfx_special.fp_addr.ip4.as_u32 =
431         address->as_u32 ^ clib_host_to_net_u32(1);
432
433       fib_table_entry_update_one_path (fib_index, &pfx_special,
434                                        FIB_SOURCE_INTERFACE,
435                                        (FIB_ENTRY_FLAG_ATTACHED),
436                                        DPO_PROTO_IP4,
437                                        &pfx_special.fp_addr,
438                                        sw_if_index,
439                                        /* invalid FIB index */
440                                        ~0,
441                                        1,
442                                        NULL,
443                                        FIB_ROUTE_PATH_FLAG_NONE);
444     }
445 }
446
447 static void
448 ip4_add_interface_routes (u32 sw_if_index,
449                           ip4_main_t * im, u32 fib_index,
450                           ip_interface_address_t * a)
451 {
452   ip_lookup_main_t *lm = &im->lookup_main;
453   ip4_address_t *address = ip_interface_address_get_address (lm, a);
454   fib_prefix_t pfx = {
455     .fp_len = 32,
456     .fp_proto = FIB_PROTOCOL_IP4,
457     .fp_addr.ip4 = *address,
458   };
459
460   /* set special routes for the prefix if needed */
461   ip4_add_interface_prefix_routes (im, sw_if_index, fib_index, a);
462
463   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
464     {
465       u32 classify_table_index =
466         lm->classify_table_index_by_sw_if_index[sw_if_index];
467       if (classify_table_index != (u32) ~ 0)
468         {
469           dpo_id_t dpo = DPO_INVALID;
470
471           dpo_set (&dpo,
472                    DPO_CLASSIFY,
473                    DPO_PROTO_IP4,
474                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
475
476           fib_table_entry_special_dpo_add (fib_index,
477                                            &pfx,
478                                            FIB_SOURCE_CLASSIFY,
479                                            FIB_ENTRY_FLAG_NONE, &dpo);
480           dpo_reset (&dpo);
481         }
482     }
483
484   fib_table_entry_update_one_path (fib_index, &pfx,
485                                    FIB_SOURCE_INTERFACE,
486                                    (FIB_ENTRY_FLAG_CONNECTED |
487                                     FIB_ENTRY_FLAG_LOCAL),
488                                    DPO_PROTO_IP4,
489                                    &pfx.fp_addr,
490                                    sw_if_index,
491                                    // invalid FIB index
492                                    ~0,
493                                    1, NULL,
494                                    FIB_ROUTE_PATH_FLAG_NONE);
495 }
496
497 static void
498 ip4_del_interface_prefix_routes (ip4_main_t * im,
499                                  u32 sw_if_index,
500                                  u32 fib_index,
501                                  ip4_address_t * address,
502                                  u32 address_length)
503 {
504   ip_lookup_main_t *lm = &im->lookup_main;
505   ip_interface_prefix_t *if_prefix;
506
507   ip_interface_prefix_key_t key = {
508     .prefix = {
509       .fp_len = address_length,
510       .fp_proto = FIB_PROTOCOL_IP4,
511       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[address_length],
512     },
513     .sw_if_index = sw_if_index,
514   };
515
516   fib_prefix_t pfx_special = {
517     .fp_len = 32,
518     .fp_proto = FIB_PROTOCOL_IP4,
519   };
520
521   if_prefix = ip_get_interface_prefix (lm, &key);
522   if (!if_prefix)
523     {
524       clib_warning ("Prefix not found while deleting %U",
525                     format_ip4_address_and_length, address, address_length);
526       return;
527     }
528
529   if_prefix->ref_count -= 1;
530
531   /*
532    * Routes need to be adjusted if deleting last intf addr in prefix
533    *
534    * We're done now otherwise
535    */
536   if (if_prefix->ref_count > 0)
537     return;
538
539   /* length <= 30, delete glean route, first address, last address */
540   if (address_length <= 30)
541     {
542       /* Less work to do in FIB if we remove the covered /32s first */
543
544       /* first address in prefix */
545       pfx_special.fp_addr.ip4.as_u32 =
546         address->as_u32 & im->fib_masks[address_length];
547       pfx_special.fp_len = 32;
548
549       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
550         fib_table_entry_special_remove (fib_index,
551                                         &pfx_special,
552                                         FIB_SOURCE_INTERFACE);
553
554       /* prefix broadcast address */
555       pfx_special.fp_addr.ip4.as_u32 =
556         address->as_u32 | ~im->fib_masks[address_length];
557       pfx_special.fp_len = 32;
558
559       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
560         fib_table_entry_special_remove (fib_index,
561                                         &pfx_special,
562                                         FIB_SOURCE_INTERFACE);
563     }
564   else if (address_length == 31)
565     {
566       /* length == 31, delete attached route for the other address */
567       pfx_special.fp_addr.ip4.as_u32 =
568         address->as_u32 ^ clib_host_to_net_u32(1);
569
570       fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
571     }
572
573   /* remove glean route for prefix */
574   pfx_special.fp_addr.ip4 = *address;
575   pfx_special.fp_len = address_length;
576   fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
577
578   mhash_unset (&lm->prefix_to_if_prefix_index, &key, 0 /* old_value */);
579   pool_put (lm->if_prefix_pool, if_prefix);
580 }
581
582 static void
583 ip4_del_interface_routes (u32 sw_if_index,
584                           ip4_main_t * im,
585                           u32 fib_index,
586                           ip4_address_t * address, u32 address_length)
587 {
588   fib_prefix_t pfx = {
589     .fp_len = 32,
590     .fp_proto = FIB_PROTOCOL_IP4,
591     .fp_addr.ip4 = *address,
592   };
593
594   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
595
596   ip4_del_interface_prefix_routes (im, sw_if_index, fib_index,
597                                    address, address_length);
598 }
599
600 #ifndef CLIB_MARCH_VARIANT
601 void
602 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
603 {
604   ip4_main_t *im = &ip4_main;
605   vnet_main_t *vnm = vnet_get_main ();
606   vnet_hw_interface_t *hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
607
608   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
609
610   /*
611    * enable/disable only on the 1<->0 transition
612    */
613   if (is_enable)
614     {
615       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
616         return;
617     }
618   else
619     {
620       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
621       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
622         return;
623     }
624   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
625                                !is_enable, 0, 0);
626
627
628   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
629                                sw_if_index, !is_enable, 0, 0);
630
631   if (is_enable)
632     hi->l3_if_count++;
633   else if (hi->l3_if_count)
634     hi->l3_if_count--;
635
636   {
637     ip4_enable_disable_interface_callback_t *cb;
638     vec_foreach (cb, im->enable_disable_interface_callbacks)
639       cb->function (im, cb->function_opaque, sw_if_index, is_enable);
640   }
641 }
642
643 static clib_error_t *
644 ip4_add_del_interface_address_internal (vlib_main_t * vm,
645                                         u32 sw_if_index,
646                                         ip4_address_t * address,
647                                         u32 address_length, u32 is_del)
648 {
649   vnet_main_t *vnm = vnet_get_main ();
650   ip4_main_t *im = &ip4_main;
651   ip_lookup_main_t *lm = &im->lookup_main;
652   clib_error_t *error = 0;
653   u32 if_address_index;
654   ip4_address_fib_t ip4_af, *addr_fib = 0;
655
656   error = vnet_sw_interface_supports_addressing (vnm, sw_if_index);
657   if (error)
658     {
659       vnm->api_errno = VNET_API_ERROR_UNSUPPORTED;
660       return error;
661     }
662
663   ip4_addr_fib_init (&ip4_af, address,
664                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
665   vec_add1 (addr_fib, ip4_af);
666
667   /*
668    * there is no support for adj-fib handling in the presence of overlapping
669    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
670    * most routers do.
671    */
672   /* *INDENT-OFF* */
673   if (!is_del)
674     {
675       /* When adding an address check that it does not conflict
676          with an existing address on any interface in this table. */
677       ip_interface_address_t *ia;
678       vnet_sw_interface_t *sif;
679
680       pool_foreach (sif, vnm->interface_main.sw_interfaces)
681        {
682           if (im->fib_index_by_sw_if_index[sw_if_index] ==
683               im->fib_index_by_sw_if_index[sif->sw_if_index])
684             {
685               foreach_ip_interface_address
686                 (&im->lookup_main, ia, sif->sw_if_index,
687                  0 /* honor unnumbered */ ,
688                  ({
689                    ip4_address_t * x =
690                      ip_interface_address_get_address
691                      (&im->lookup_main, ia);
692
693                    if (ip4_destination_matches_route
694                        (im, address, x, ia->address_length) ||
695                        ip4_destination_matches_route (im,
696                                                       x,
697                                                       address,
698                                                       address_length))
699                      {
700                        /* an intf may have >1 addr from the same prefix */
701                        if ((sw_if_index == sif->sw_if_index) &&
702                            (ia->address_length == address_length) &&
703                            (x->as_u32 != address->as_u32))
704                          continue;
705
706                        if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE)
707                          /* if the address we're comparing against is stale
708                           * then the CP has not added this one back yet, maybe
709                           * it never will, so we have to assume it won't and
710                           * ignore it. if it does add it back, then it will fail
711                           * because this one is now present */
712                          continue;
713
714                        /* error if the length or intf was different */
715                        vnm->api_errno = VNET_API_ERROR_ADDRESS_IN_USE;
716
717                        error = clib_error_create
718                          ("failed to add %U on %U which conflicts with %U for interface %U",
719                           format_ip4_address_and_length, address,
720                           address_length,
721                           format_vnet_sw_if_index_name, vnm,
722                           sw_if_index,
723                           format_ip4_address_and_length, x,
724                           ia->address_length,
725                           format_vnet_sw_if_index_name, vnm,
726                           sif->sw_if_index);
727                        goto done;
728                      }
729                  }));
730             }
731       }
732     }
733   /* *INDENT-ON* */
734
735   if_address_index = ip_interface_address_find (lm, addr_fib, address_length);
736
737   if (is_del)
738     {
739       if (~0 == if_address_index)
740         {
741           vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_FOUND_FOR_INTERFACE;
742           error = clib_error_create ("%U not found for interface %U",
743                                      lm->format_address_and_length,
744                                      addr_fib, address_length,
745                                      format_vnet_sw_if_index_name, vnm,
746                                      sw_if_index);
747           goto done;
748         }
749
750       error = ip_interface_address_del (lm, vnm, if_address_index, addr_fib,
751                                         address_length, sw_if_index);
752       if (error)
753         goto done;
754     }
755   else
756     {
757       if (~0 != if_address_index)
758         {
759           ip_interface_address_t *ia;
760
761           ia = pool_elt_at_index (lm->if_address_pool, if_address_index);
762
763           if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE)
764             {
765               if (ia->sw_if_index == sw_if_index)
766                 {
767                   /* re-adding an address during the replace action.
768                    * consdier this the update. clear the flag and
769                    * we're done */
770                   ia->flags &= ~IP_INTERFACE_ADDRESS_FLAG_STALE;
771                   goto done;
772                 }
773               else
774                 {
775                   /* The prefix is moving from one interface to another.
776                    * delete the stale and add the new */
777                   ip4_add_del_interface_address_internal (vm,
778                                                           ia->sw_if_index,
779                                                           address,
780                                                           address_length, 1);
781                   ia = NULL;
782                   error = ip_interface_address_add (lm, sw_if_index,
783                                                     addr_fib, address_length,
784                                                     &if_address_index);
785                 }
786             }
787           else
788             {
789               vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
790               error = clib_error_create
791                 ("Prefix %U already found on interface %U",
792                  lm->format_address_and_length, addr_fib, address_length,
793                  format_vnet_sw_if_index_name, vnm, ia->sw_if_index);
794             }
795         }
796       else
797         error = ip_interface_address_add (lm, sw_if_index,
798                                           addr_fib, address_length,
799                                           &if_address_index);
800     }
801
802   if (error)
803     goto done;
804
805   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
806   ip4_mfib_interface_enable_disable (sw_if_index, !is_del);
807
808   /* intf addr routes are added/deleted on admin up/down */
809   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index))
810     {
811       if (is_del)
812         ip4_del_interface_routes (sw_if_index,
813                                   im, ip4_af.fib_index, address,
814                                   address_length);
815       else
816         ip4_add_interface_routes (sw_if_index,
817                                   im, ip4_af.fib_index,
818                                   pool_elt_at_index
819                                   (lm->if_address_pool, if_address_index));
820     }
821
822   ip4_add_del_interface_address_callback_t *cb;
823   vec_foreach (cb, im->add_del_interface_address_callbacks)
824     cb->function (im, cb->function_opaque, sw_if_index,
825                   address, address_length, if_address_index, is_del);
826
827 done:
828   vec_free (addr_fib);
829   return error;
830 }
831
832 clib_error_t *
833 ip4_add_del_interface_address (vlib_main_t * vm,
834                                u32 sw_if_index,
835                                ip4_address_t * address,
836                                u32 address_length, u32 is_del)
837 {
838   return ip4_add_del_interface_address_internal
839     (vm, sw_if_index, address, address_length, is_del);
840 }
841
842 void
843 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
844 {
845   ip_interface_address_t *ia;
846   ip4_main_t *im;
847
848   im = &ip4_main;
849
850   /*
851    * when directed broadcast is enabled, the subnet braodcast route will forward
852    * packets using an adjacency with a broadcast MAC. otherwise it drops
853    */
854   /* *INDENT-OFF* */
855   foreach_ip_interface_address(&im->lookup_main, ia,
856                                sw_if_index, 0,
857      ({
858        if (ia->address_length <= 30)
859          {
860            ip4_address_t *ipa;
861
862            ipa = ip_interface_address_get_address (&im->lookup_main, ia);
863
864            fib_prefix_t pfx = {
865              .fp_len = 32,
866              .fp_proto = FIB_PROTOCOL_IP4,
867              .fp_addr = {
868                .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
869              },
870            };
871
872            ip4_add_subnet_bcast_route
873              (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
874                                                   sw_if_index),
875               &pfx, sw_if_index);
876          }
877      }));
878   /* *INDENT-ON* */
879 }
880 #endif
881
882 static clib_error_t *
883 ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
884 {
885   ip4_main_t *im = &ip4_main;
886   ip_interface_address_t *ia;
887   ip4_address_t *a;
888   u32 is_admin_up, fib_index;
889
890   vec_validate_init_empty (im->
891                            lookup_main.if_address_pool_index_by_sw_if_index,
892                            sw_if_index, ~0);
893
894   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
895
896   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
897
898   /* *INDENT-OFF* */
899   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
900                                 0 /* honor unnumbered */,
901   ({
902     a = ip_interface_address_get_address (&im->lookup_main, ia);
903     if (is_admin_up)
904       ip4_add_interface_routes (sw_if_index,
905                                 im, fib_index,
906                                 ia);
907     else
908       ip4_del_interface_routes (sw_if_index,
909                                 im, fib_index,
910                                 a, ia->address_length);
911   }));
912   /* *INDENT-ON* */
913
914   return 0;
915 }
916
917 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
918
919 /* Built-in ip4 unicast rx feature path definition */
920 /* *INDENT-OFF* */
921 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
922 {
923   .arc_name = "ip4-unicast",
924   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
925   .last_in_arc = "ip4-lookup",
926   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
927 };
928
929 VNET_FEATURE_INIT (ip4_flow_classify, static) =
930 {
931   .arc_name = "ip4-unicast",
932   .node_name = "ip4-flow-classify",
933   .runs_before = VNET_FEATURES ("ip4-inacl"),
934 };
935
936 VNET_FEATURE_INIT (ip4_inacl, static) =
937 {
938   .arc_name = "ip4-unicast",
939   .node_name = "ip4-inacl",
940   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
941 };
942
943 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
944 {
945   .arc_name = "ip4-unicast",
946   .node_name = "ip4-source-and-port-range-check-rx",
947   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
948 };
949
950 VNET_FEATURE_INIT (ip4_policer_classify, static) =
951 {
952   .arc_name = "ip4-unicast",
953   .node_name = "ip4-policer-classify",
954   .runs_before = VNET_FEATURES ("ipsec4-input-feature"),
955 };
956
957 VNET_FEATURE_INIT (ip4_ipsec, static) =
958 {
959   .arc_name = "ip4-unicast",
960   .node_name = "ipsec4-input-feature",
961   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
962 };
963
964 VNET_FEATURE_INIT (ip4_vpath, static) =
965 {
966   .arc_name = "ip4-unicast",
967   .node_name = "vpath-input-ip4",
968   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
969 };
970
971 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
972 {
973   .arc_name = "ip4-unicast",
974   .node_name = "ip4-vxlan-bypass",
975   .runs_before = VNET_FEATURES ("ip4-lookup"),
976 };
977
978 VNET_FEATURE_INIT (ip4_not_enabled, static) =
979 {
980   .arc_name = "ip4-unicast",
981   .node_name = "ip4-not-enabled",
982   .runs_before = VNET_FEATURES ("ip4-lookup"),
983 };
984
985 VNET_FEATURE_INIT (ip4_lookup, static) =
986 {
987   .arc_name = "ip4-unicast",
988   .node_name = "ip4-lookup",
989   .runs_before = 0,     /* not before any other features */
990 };
991
992 /* Built-in ip4 multicast rx feature path definition */
993 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
994 {
995   .arc_name = "ip4-multicast",
996   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
997   .last_in_arc = "ip4-mfib-forward-lookup",
998   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
999 };
1000
1001 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
1002 {
1003   .arc_name = "ip4-multicast",
1004   .node_name = "vpath-input-ip4",
1005   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1006 };
1007
1008 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
1009 {
1010   .arc_name = "ip4-multicast",
1011   .node_name = "ip4-not-enabled",
1012   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1013 };
1014
1015 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
1016 {
1017   .arc_name = "ip4-multicast",
1018   .node_name = "ip4-mfib-forward-lookup",
1019   .runs_before = 0,     /* last feature */
1020 };
1021
1022 /* Source and port-range check ip4 tx feature path definition */
1023 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1024 {
1025   .arc_name = "ip4-output",
1026   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
1027   .last_in_arc = "interface-output",
1028   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1029 };
1030
1031 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1032 {
1033   .arc_name = "ip4-output",
1034   .node_name = "ip4-source-and-port-range-check-tx",
1035   .runs_before = VNET_FEATURES ("ip4-outacl"),
1036 };
1037
1038 VNET_FEATURE_INIT (ip4_outacl, static) =
1039 {
1040   .arc_name = "ip4-output",
1041   .node_name = "ip4-outacl",
1042   .runs_before = VNET_FEATURES ("ipsec4-output-feature"),
1043 };
1044
1045 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1046 {
1047   .arc_name = "ip4-output",
1048   .node_name = "ipsec4-output-feature",
1049   .runs_before = VNET_FEATURES ("interface-output"),
1050 };
1051
1052 /* Built-in ip4 tx feature path definition */
1053 VNET_FEATURE_INIT (ip4_interface_output, static) =
1054 {
1055   .arc_name = "ip4-output",
1056   .node_name = "interface-output",
1057   .runs_before = 0,     /* not before any other features */
1058 };
1059 /* *INDENT-ON* */
1060
1061 static clib_error_t *
1062 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1063 {
1064   ip4_main_t *im = &ip4_main;
1065
1066   vec_validate_init_empty (im->fib_index_by_sw_if_index, sw_if_index, ~0);
1067   vec_validate_init_empty (im->mfib_index_by_sw_if_index, sw_if_index, ~0);
1068
1069   if (is_add)
1070     {
1071       /* Fill in lookup tables with default table (0). */
1072       im->fib_index_by_sw_if_index[sw_if_index] = 0;
1073       im->mfib_index_by_sw_if_index[sw_if_index] = 0;
1074     }
1075   else
1076     {
1077       ip4_main_t *im4 = &ip4_main;
1078       ip_lookup_main_t *lm4 = &im4->lookup_main;
1079       ip_interface_address_t *ia = 0;
1080       ip4_address_t *address;
1081       vlib_main_t *vm = vlib_get_main ();
1082
1083       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
1084       /* *INDENT-OFF* */
1085       foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
1086       ({
1087         address = ip_interface_address_get_address (lm4, ia);
1088         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
1089       }));
1090       /* *INDENT-ON* */
1091       ip4_mfib_interface_enable_disable (sw_if_index, 0);
1092     }
1093
1094   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
1095                                is_add, 0, 0);
1096
1097   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
1098                                sw_if_index, is_add, 0, 0);
1099
1100   return /* no error */ 0;
1101 }
1102
1103 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1104
1105 /* Global IP4 main. */
1106 #ifndef CLIB_MARCH_VARIANT
1107 ip4_main_t ip4_main;
1108 #endif /* CLIB_MARCH_VARIANT */
1109
1110 static clib_error_t *
1111 ip4_lookup_init (vlib_main_t * vm)
1112 {
1113   ip4_main_t *im = &ip4_main;
1114   clib_error_t *error;
1115   uword i;
1116
1117   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1118     return error;
1119   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
1120     return (error);
1121   if ((error = vlib_call_init_function (vm, fib_module_init)))
1122     return error;
1123   if ((error = vlib_call_init_function (vm, mfib_module_init)))
1124     return error;
1125
1126   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1127     {
1128       u32 m;
1129
1130       if (i < 32)
1131         m = pow2_mask (i) << (32 - i);
1132       else
1133         m = ~0;
1134       im->fib_masks[i] = clib_host_to_net_u32 (m);
1135     }
1136
1137   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1138
1139   /* Create FIB with index 0 and table id of 0. */
1140   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1141                                      FIB_SOURCE_DEFAULT_ROUTE);
1142   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1143                                       MFIB_SOURCE_DEFAULT_ROUTE);
1144
1145   {
1146     pg_node_t *pn;
1147     pn = pg_get_node (ip4_lookup_node.index);
1148     pn->unformat_edit = unformat_pg_ip4_header;
1149   }
1150
1151   {
1152     ethernet_arp_header_t h;
1153
1154     clib_memset (&h, 0, sizeof (h));
1155
1156 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1157 #define _8(f,v) h.f = v;
1158     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1159     _16 (l3_type, ETHERNET_TYPE_IP4);
1160     _8 (n_l2_address_bytes, 6);
1161     _8 (n_l3_address_bytes, 4);
1162     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1163 #undef _16
1164 #undef _8
1165
1166     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1167                                /* data */ &h,
1168                                sizeof (h),
1169                                /* alloc chunk size */ 8,
1170                                "ip4 arp");
1171   }
1172
1173   return error;
1174 }
1175
1176 VLIB_INIT_FUNCTION (ip4_lookup_init);
1177
1178 typedef struct
1179 {
1180   /* Adjacency taken. */
1181   u32 dpo_index;
1182   u32 flow_hash;
1183   u32 fib_index;
1184
1185   /* Packet data, possibly *after* rewrite. */
1186   u8 packet_data[64 - 1 * sizeof (u32)];
1187 }
1188 ip4_forward_next_trace_t;
1189
1190 #ifndef CLIB_MARCH_VARIANT
1191 u8 *
1192 format_ip4_forward_next_trace (u8 * s, va_list * args)
1193 {
1194   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1195   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1196   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1197   u32 indent = format_get_indent (s);
1198   s = format (s, "%U%U",
1199               format_white_space, indent,
1200               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1201   return s;
1202 }
1203 #endif
1204
1205 static u8 *
1206 format_ip4_lookup_trace (u8 * s, va_list * args)
1207 {
1208   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1209   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1210   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1211   u32 indent = format_get_indent (s);
1212
1213   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1214               t->fib_index, t->dpo_index, t->flow_hash);
1215   s = format (s, "\n%U%U",
1216               format_white_space, indent,
1217               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1218   return s;
1219 }
1220
1221 static u8 *
1222 format_ip4_rewrite_trace (u8 * s, va_list * args)
1223 {
1224   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1225   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1226   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1227   u32 indent = format_get_indent (s);
1228
1229   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1230               t->fib_index, t->dpo_index, format_ip_adjacency,
1231               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1232   s = format (s, "\n%U%U",
1233               format_white_space, indent,
1234               format_ip_adjacency_packet_data,
1235               t->packet_data, sizeof (t->packet_data));
1236   return s;
1237 }
1238
1239 #ifndef CLIB_MARCH_VARIANT
1240 /* Common trace function for all ip4-forward next nodes. */
1241 void
1242 ip4_forward_next_trace (vlib_main_t * vm,
1243                         vlib_node_runtime_t * node,
1244                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1245 {
1246   u32 *from, n_left;
1247   ip4_main_t *im = &ip4_main;
1248
1249   n_left = frame->n_vectors;
1250   from = vlib_frame_vector_args (frame);
1251
1252   while (n_left >= 4)
1253     {
1254       u32 bi0, bi1;
1255       vlib_buffer_t *b0, *b1;
1256       ip4_forward_next_trace_t *t0, *t1;
1257
1258       /* Prefetch next iteration. */
1259       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1260       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1261
1262       bi0 = from[0];
1263       bi1 = from[1];
1264
1265       b0 = vlib_get_buffer (vm, bi0);
1266       b1 = vlib_get_buffer (vm, bi1);
1267
1268       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1269         {
1270           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1271           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1272           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1273           t0->fib_index =
1274             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1275              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1276             vec_elt (im->fib_index_by_sw_if_index,
1277                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1278
1279           clib_memcpy_fast (t0->packet_data,
1280                             vlib_buffer_get_current (b0),
1281                             sizeof (t0->packet_data));
1282         }
1283       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1284         {
1285           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1286           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1287           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1288           t1->fib_index =
1289             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1290              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1291             vec_elt (im->fib_index_by_sw_if_index,
1292                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1293           clib_memcpy_fast (t1->packet_data, vlib_buffer_get_current (b1),
1294                             sizeof (t1->packet_data));
1295         }
1296       from += 2;
1297       n_left -= 2;
1298     }
1299
1300   while (n_left >= 1)
1301     {
1302       u32 bi0;
1303       vlib_buffer_t *b0;
1304       ip4_forward_next_trace_t *t0;
1305
1306       bi0 = from[0];
1307
1308       b0 = vlib_get_buffer (vm, bi0);
1309
1310       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1311         {
1312           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1313           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1314           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1315           t0->fib_index =
1316             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1317              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1318             vec_elt (im->fib_index_by_sw_if_index,
1319                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1320           clib_memcpy_fast (t0->packet_data, vlib_buffer_get_current (b0),
1321                             sizeof (t0->packet_data));
1322         }
1323       from += 1;
1324       n_left -= 1;
1325     }
1326 }
1327
1328 /* Compute TCP/UDP/ICMP4 checksum in software. */
1329 u16
1330 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1331                               ip4_header_t * ip0)
1332 {
1333   ip_csum_t sum0;
1334   u32 ip_header_length, payload_length_host_byte_order;
1335
1336   /* Initialize checksum with ip header. */
1337   ip_header_length = ip4_header_bytes (ip0);
1338   payload_length_host_byte_order =
1339     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1340   sum0 =
1341     clib_host_to_net_u32 (payload_length_host_byte_order +
1342                           (ip0->protocol << 16));
1343
1344   if (BITS (uword) == 32)
1345     {
1346       sum0 =
1347         ip_csum_with_carry (sum0,
1348                             clib_mem_unaligned (&ip0->src_address, u32));
1349       sum0 =
1350         ip_csum_with_carry (sum0,
1351                             clib_mem_unaligned (&ip0->dst_address, u32));
1352     }
1353   else
1354     sum0 =
1355       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1356
1357   return ip_calculate_l4_checksum (vm, p0, sum0,
1358                                    payload_length_host_byte_order, (u8 *) ip0,
1359                                    ip_header_length, NULL);
1360 }
1361
1362 u32
1363 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1364 {
1365   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1366   udp_header_t *udp0;
1367   u16 sum16;
1368
1369   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1370           || ip0->protocol == IP_PROTOCOL_UDP);
1371
1372   udp0 = (void *) (ip0 + 1);
1373   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1374     {
1375       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1376                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1377       return p0->flags;
1378     }
1379
1380   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1381
1382   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1383                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1384
1385   return p0->flags;
1386 }
1387 #endif
1388
1389 /* *INDENT-OFF* */
1390 VNET_FEATURE_ARC_INIT (ip4_local) =
1391 {
1392   .arc_name  = "ip4-local",
1393   .start_nodes = VNET_FEATURES ("ip4-local"),
1394   .last_in_arc = "ip4-local-end-of-arc",
1395 };
1396 /* *INDENT-ON* */
1397
1398 static inline void
1399 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1400                             ip4_header_t * ip, u8 is_udp, u8 * error,
1401                             u8 * good_tcp_udp)
1402 {
1403   u32 flags0;
1404   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1405   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1406   if (is_udp)
1407     {
1408       udp_header_t *udp;
1409       u32 ip_len, udp_len;
1410       i32 len_diff;
1411       udp = ip4_next_header (ip);
1412       /* Verify UDP length. */
1413       ip_len = clib_net_to_host_u16 (ip->length);
1414       udp_len = clib_net_to_host_u16 (udp->length);
1415
1416       len_diff = ip_len - udp_len;
1417       *good_tcp_udp &= len_diff >= 0;
1418       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1419     }
1420 }
1421
1422 #define ip4_local_csum_is_offloaded(_b)                                       \
1423   ((_b->flags & VNET_BUFFER_F_OFFLOAD) &&                                     \
1424    (vnet_buffer (_b)->oflags &                                                \
1425     (VNET_BUFFER_OFFLOAD_F_TCP_CKSUM | VNET_BUFFER_OFFLOAD_F_UDP_CKSUM)))
1426
1427 #define ip4_local_need_csum_check(is_tcp_udp, _b)                       \
1428     (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED     \
1429         || ip4_local_csum_is_offloaded (_b)))
1430
1431 #define ip4_local_csum_is_valid(_b)                                     \
1432     (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT                      \
1433         || (ip4_local_csum_is_offloaded (_b))) != 0
1434
1435 static inline void
1436 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1437                          ip4_header_t * ih, u8 * error)
1438 {
1439   u8 is_udp, is_tcp_udp, good_tcp_udp;
1440
1441   is_udp = ih->protocol == IP_PROTOCOL_UDP;
1442   is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1443
1444   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1445     ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1446   else
1447     good_tcp_udp = ip4_local_csum_is_valid (b);
1448
1449   ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1450   *error = (is_tcp_udp && !good_tcp_udp
1451             ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1452 }
1453
1454 static inline void
1455 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1456                             ip4_header_t ** ih, u8 * error)
1457 {
1458   u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1459
1460   is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1461   is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1462
1463   is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1464   is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1465
1466   good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1467   good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1468
1469   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1470                      || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1471     {
1472       if (is_tcp_udp[0])
1473         ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1474                                     &good_tcp_udp[0]);
1475       if (is_tcp_udp[1])
1476         ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1477                                     &good_tcp_udp[1]);
1478     }
1479
1480   error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1481               IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1482   error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1483               IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1484 }
1485
1486 static inline void
1487 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1488                               vlib_buffer_t * b, u16 * next, u8 error,
1489                               u8 head_of_feature_arc)
1490 {
1491   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1492   u32 next_index;
1493
1494   *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1495   b->error = error ? error_node->errors[error] : 0;
1496   if (head_of_feature_arc)
1497     {
1498       next_index = *next;
1499       if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1500         {
1501           vnet_feature_arc_start (arc_index,
1502                                   vnet_buffer (b)->sw_if_index[VLIB_RX],
1503                                   &next_index, b);
1504           *next = next_index;
1505         }
1506     }
1507 }
1508
1509 typedef struct
1510 {
1511   ip4_address_t src;
1512   u32 lbi;
1513   u8 error;
1514   u8 first;
1515 } ip4_local_last_check_t;
1516
1517 static inline void
1518 ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
1519                      ip4_local_last_check_t * last_check, u8 * error0)
1520 {
1521   const dpo_id_t *dpo0;
1522   load_balance_t *lb0;
1523   u32 lbi0;
1524
1525   vnet_buffer (b)->ip.fib_index =
1526     vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1527     vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1528
1529   /*
1530    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1531    *  adjacency for the destination address (the local interface address).
1532    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1533    *  adjacency for the source address (the remote sender's address)
1534    */
1535   if (PREDICT_TRUE (last_check->src.as_u32 != ip0->src_address.as_u32) ||
1536       last_check->first)
1537     {
1538       lbi0 = ip4_fib_forwarding_lookup (vnet_buffer (b)->ip.fib_index,
1539                                         &ip0->src_address);
1540
1541       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1542         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1543       vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1544
1545       lb0 = load_balance_get (lbi0);
1546       dpo0 = load_balance_get_bucket_i (lb0, 0);
1547
1548       /*
1549        * Must have a route to source otherwise we drop the packet.
1550        * ip4 broadcasts are accepted, e.g. to make dhcp client work
1551        *
1552        * The checks are:
1553        *  - the source is a recieve => it's from us => bogus, do this
1554        *    first since it sets a different error code.
1555        *  - uRPF check for any route to source - accept if passes.
1556        *  - allow packets destined to the broadcast address from unknown sources
1557        */
1558
1559       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1560                   && dpo0->dpoi_type == DPO_RECEIVE) ?
1561                  IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1562       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1563                   && !fib_urpf_check_size (lb0->lb_urpf)
1564                   && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1565                  IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1566
1567       last_check->src.as_u32 = ip0->src_address.as_u32;
1568       last_check->lbi = lbi0;
1569       last_check->error = *error0;
1570       last_check->first = 0;
1571     }
1572   else
1573     {
1574       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1575         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1576       vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1577       *error0 = last_check->error;
1578     }
1579 }
1580
1581 static inline void
1582 ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
1583                         ip4_local_last_check_t * last_check, u8 * error)
1584 {
1585   const dpo_id_t *dpo[2];
1586   load_balance_t *lb[2];
1587   u32 not_last_hit;
1588   u32 lbi[2];
1589
1590   not_last_hit = last_check->first;
1591   not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1592   not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1593
1594   vnet_buffer (b[0])->ip.fib_index =
1595     vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1596     vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1597     vnet_buffer (b[0])->ip.fib_index;
1598
1599   vnet_buffer (b[1])->ip.fib_index =
1600     vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1601     vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1602     vnet_buffer (b[1])->ip.fib_index;
1603
1604   /*
1605    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1606    *  adjacency for the destination address (the local interface address).
1607    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1608    *  adjacency for the source address (the remote sender's address)
1609    */
1610   if (PREDICT_TRUE (not_last_hit))
1611     {
1612       ip4_fib_forwarding_lookup_x2 (
1613         vnet_buffer (b[0])->ip.fib_index, vnet_buffer (b[1])->ip.fib_index,
1614         &ip[0]->src_address, &ip[1]->src_address, &lbi[0], &lbi[1]);
1615
1616       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1617         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1618       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1619
1620       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1621         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1622       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1623
1624       lb[0] = load_balance_get (lbi[0]);
1625       lb[1] = load_balance_get (lbi[1]);
1626
1627       dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1628       dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1629
1630       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1631                    dpo[0]->dpoi_type == DPO_RECEIVE) ?
1632                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1633       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1634                    !fib_urpf_check_size (lb[0]->lb_urpf) &&
1635                    ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1636                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1637
1638       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1639                    dpo[1]->dpoi_type == DPO_RECEIVE) ?
1640                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1641       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1642                    !fib_urpf_check_size (lb[1]->lb_urpf) &&
1643                    ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1644                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1645
1646       last_check->src.as_u32 = ip[1]->src_address.as_u32;
1647       last_check->lbi = lbi[1];
1648       last_check->error = error[1];
1649       last_check->first = 0;
1650     }
1651   else
1652     {
1653       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1654         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1655       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1656
1657       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1658         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1659       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1660
1661       error[0] = last_check->error;
1662       error[1] = last_check->error;
1663     }
1664 }
1665
1666 enum ip_local_packet_type_e
1667 {
1668   IP_LOCAL_PACKET_TYPE_L4,
1669   IP_LOCAL_PACKET_TYPE_NAT,
1670   IP_LOCAL_PACKET_TYPE_FRAG,
1671 };
1672
1673 /**
1674  * Determine packet type and next node.
1675  *
1676  * The expectation is that all packets that are not L4 will skip
1677  * checksums and source checks.
1678  */
1679 always_inline u8
1680 ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
1681 {
1682   ip_lookup_main_t *lm = &ip4_main.lookup_main;
1683
1684   if (PREDICT_FALSE (ip4_is_fragment (ip)))
1685     {
1686       *next = IP_LOCAL_NEXT_REASSEMBLY;
1687       return IP_LOCAL_PACKET_TYPE_FRAG;
1688     }
1689   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_IS_NATED))
1690     {
1691       *next = lm->local_next_by_ip_protocol[ip->protocol];
1692       return IP_LOCAL_PACKET_TYPE_NAT;
1693     }
1694
1695   *next = lm->local_next_by_ip_protocol[ip->protocol];
1696   return IP_LOCAL_PACKET_TYPE_L4;
1697 }
1698
1699 static inline uword
1700 ip4_local_inline (vlib_main_t * vm,
1701                   vlib_node_runtime_t * node,
1702                   vlib_frame_t * frame, int head_of_feature_arc)
1703 {
1704   u32 *from, n_left_from;
1705   vlib_node_runtime_t *error_node =
1706     vlib_node_get_runtime (vm, ip4_local_node.index);
1707   u16 nexts[VLIB_FRAME_SIZE], *next;
1708   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1709   ip4_header_t *ip[2];
1710   u8 error[2], pt[2];
1711
1712   ip4_local_last_check_t last_check = {
1713     /*
1714      * 0.0.0.0 can appear as the source address of an IP packet,
1715      * as can any other address, hence the need to use the 'first'
1716      * member to make sure the .lbi is initialised for the first
1717      * packet.
1718      */
1719     .src = {.as_u32 = 0},
1720     .lbi = ~0,
1721     .error = IP4_ERROR_UNKNOWN_PROTOCOL,
1722     .first = 1,
1723   };
1724
1725   from = vlib_frame_vector_args (frame);
1726   n_left_from = frame->n_vectors;
1727
1728   if (node->flags & VLIB_NODE_FLAG_TRACE)
1729     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1730
1731   vlib_get_buffers (vm, from, bufs, n_left_from);
1732   b = bufs;
1733   next = nexts;
1734
1735   while (n_left_from >= 6)
1736     {
1737       u8 not_batch = 0;
1738
1739       /* Prefetch next iteration. */
1740       {
1741         vlib_prefetch_buffer_header (b[4], LOAD);
1742         vlib_prefetch_buffer_header (b[5], LOAD);
1743
1744         clib_prefetch_load (b[4]->data);
1745         clib_prefetch_load (b[5]->data);
1746       }
1747
1748       error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1749
1750       ip[0] = vlib_buffer_get_current (b[0]);
1751       ip[1] = vlib_buffer_get_current (b[1]);
1752
1753       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1754       vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1755
1756       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1757       pt[1] = ip4_local_classify (b[1], ip[1], &next[1]);
1758
1759       not_batch = pt[0] ^ pt[1];
1760
1761       if (head_of_feature_arc == 0 || (pt[0] && not_batch == 0))
1762         goto skip_checks;
1763
1764       if (PREDICT_TRUE (not_batch == 0))
1765         {
1766           ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1767           ip4_local_check_src_x2 (b, ip, &last_check, error);
1768         }
1769       else
1770         {
1771           if (!pt[0])
1772             {
1773               ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1774               ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1775             }
1776           if (!pt[1])
1777             {
1778               ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1779               ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
1780             }
1781         }
1782
1783     skip_checks:
1784
1785       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1786                                     head_of_feature_arc);
1787       ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1788                                     head_of_feature_arc);
1789
1790       b += 2;
1791       next += 2;
1792       n_left_from -= 2;
1793     }
1794
1795   while (n_left_from > 0)
1796     {
1797       error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1798
1799       ip[0] = vlib_buffer_get_current (b[0]);
1800       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1801       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1802
1803       if (head_of_feature_arc == 0 || pt[0])
1804         goto skip_check;
1805
1806       ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1807       ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1808
1809     skip_check:
1810
1811       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1812                                     head_of_feature_arc);
1813
1814       b += 1;
1815       next += 1;
1816       n_left_from -= 1;
1817     }
1818
1819   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1820   return frame->n_vectors;
1821 }
1822
1823 VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1824                                vlib_frame_t * frame)
1825 {
1826   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1827 }
1828
1829 /* *INDENT-OFF* */
1830 VLIB_REGISTER_NODE (ip4_local_node) =
1831 {
1832   .name = "ip4-local",
1833   .vector_size = sizeof (u32),
1834   .format_trace = format_ip4_forward_next_trace,
1835   .n_errors = IP4_N_ERROR,
1836   .error_strings = ip4_error_strings,
1837   .n_next_nodes = IP_LOCAL_N_NEXT,
1838   .next_nodes =
1839   {
1840     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1841     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1842     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1843     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1844     [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-full-reassembly",
1845   },
1846 };
1847 /* *INDENT-ON* */
1848
1849
1850 VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
1851                                           vlib_node_runtime_t * node,
1852                                           vlib_frame_t * frame)
1853 {
1854   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1855 }
1856
1857 /* *INDENT-OFF* */
1858 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
1859   .name = "ip4-local-end-of-arc",
1860   .vector_size = sizeof (u32),
1861
1862   .format_trace = format_ip4_forward_next_trace,
1863   .sibling_of = "ip4-local",
1864 };
1865
1866 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1867   .arc_name = "ip4-local",
1868   .node_name = "ip4-local-end-of-arc",
1869   .runs_before = 0, /* not before any other features */
1870 };
1871 /* *INDENT-ON* */
1872
1873 #ifndef CLIB_MARCH_VARIANT
1874 void
1875 ip4_register_protocol (u32 protocol, u32 node_index)
1876 {
1877   vlib_main_t *vm = vlib_get_main ();
1878   ip4_main_t *im = &ip4_main;
1879   ip_lookup_main_t *lm = &im->lookup_main;
1880
1881   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1882   lm->local_next_by_ip_protocol[protocol] =
1883     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1884 }
1885
1886 void
1887 ip4_unregister_protocol (u32 protocol)
1888 {
1889   ip4_main_t *im = &ip4_main;
1890   ip_lookup_main_t *lm = &im->lookup_main;
1891
1892   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1893   lm->local_next_by_ip_protocol[protocol] = IP_LOCAL_NEXT_PUNT;
1894 }
1895 #endif
1896
1897 static clib_error_t *
1898 show_ip_local_command_fn (vlib_main_t * vm,
1899                           unformat_input_t * input, vlib_cli_command_t * cmd)
1900 {
1901   ip4_main_t *im = &ip4_main;
1902   ip_lookup_main_t *lm = &im->lookup_main;
1903   int i;
1904
1905   vlib_cli_output (vm, "Protocols handled by ip4_local");
1906   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1907     {
1908       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1909         {
1910           u32 node_index = vlib_get_node (vm,
1911                                           ip4_local_node.index)->
1912             next_nodes[lm->local_next_by_ip_protocol[i]];
1913           vlib_cli_output (vm, "%U: %U", format_ip_protocol, i,
1914                            format_vlib_node_name, vm, node_index);
1915         }
1916     }
1917   return 0;
1918 }
1919
1920
1921
1922 /*?
1923  * Display the set of protocols handled by the local IPv4 stack.
1924  *
1925  * @cliexpar
1926  * Example of how to display local protocol table:
1927  * @cliexstart{show ip local}
1928  * Protocols handled by ip4_local
1929  * 1
1930  * 17
1931  * 47
1932  * @cliexend
1933 ?*/
1934 /* *INDENT-OFF* */
1935 VLIB_CLI_COMMAND (show_ip_local, static) =
1936 {
1937   .path = "show ip local",
1938   .function = show_ip_local_command_fn,
1939   .short_help = "show ip local",
1940 };
1941 /* *INDENT-ON* */
1942
1943 typedef enum
1944 {
1945   IP4_REWRITE_NEXT_DROP,
1946   IP4_REWRITE_NEXT_ICMP_ERROR,
1947   IP4_REWRITE_NEXT_FRAGMENT,
1948   IP4_REWRITE_N_NEXT            /* Last */
1949 } ip4_rewrite_next_t;
1950
1951 /**
1952  * This bits of an IPv4 address to mask to construct a multicast
1953  * MAC address
1954  */
1955 #if CLIB_ARCH_IS_BIG_ENDIAN
1956 #define IP4_MCAST_ADDR_MASK 0x007fffff
1957 #else
1958 #define IP4_MCAST_ADDR_MASK 0xffff7f00
1959 #endif
1960
1961 always_inline void
1962 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
1963                u16 adj_packet_bytes, bool df, u16 * next,
1964                u8 is_midchain, u32 * error)
1965 {
1966   if (packet_len > adj_packet_bytes)
1967     {
1968       *error = IP4_ERROR_MTU_EXCEEDED;
1969       if (df)
1970         {
1971           icmp4_error_set_vnet_buffer
1972             (b, ICMP4_destination_unreachable,
1973              ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
1974              adj_packet_bytes);
1975           *next = IP4_REWRITE_NEXT_ICMP_ERROR;
1976         }
1977       else
1978         {
1979           /* IP fragmentation */
1980           ip_frag_set_vnet_buffer (b, adj_packet_bytes,
1981                                    (is_midchain ?
1982                                     IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN :
1983                                     IP_FRAG_NEXT_IP_REWRITE), 0);
1984           *next = IP4_REWRITE_NEXT_FRAGMENT;
1985         }
1986     }
1987 }
1988
1989 /* increment TTL & update checksum.
1990    Works either endian, so no need for byte swap. */
1991 static_always_inline void
1992 ip4_ttl_inc (vlib_buffer_t * b, ip4_header_t * ip)
1993 {
1994   i32 ttl;
1995   u32 checksum;
1996   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
1997     return;
1998
1999   ttl = ip->ttl;
2000
2001   checksum = ip->checksum - clib_host_to_net_u16 (0x0100);
2002   checksum += checksum >= 0xffff;
2003
2004   ip->checksum = checksum;
2005   ttl += 1;
2006   ip->ttl = ttl;
2007
2008   ASSERT (ip4_header_checksum_is_valid (ip));
2009 }
2010
2011 /* Decrement TTL & update checksum.
2012    Works either endian, so no need for byte swap. */
2013 static_always_inline void
2014 ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
2015                             u32 * error)
2016 {
2017   i32 ttl;
2018   u32 checksum;
2019   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2020     return;
2021
2022   ttl = ip->ttl;
2023
2024   /* Input node should have reject packets with ttl 0. */
2025   ASSERT (ip->ttl > 0);
2026
2027   checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
2028   checksum += checksum >= 0xffff;
2029
2030   ip->checksum = checksum;
2031   ttl -= 1;
2032   ip->ttl = ttl;
2033
2034   /*
2035    * If the ttl drops below 1 when forwarding, generate
2036    * an ICMP response.
2037    */
2038   if (PREDICT_FALSE (ttl <= 0))
2039     {
2040       *error = IP4_ERROR_TIME_EXPIRED;
2041       vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2042       icmp4_error_set_vnet_buffer (b, ICMP4_time_exceeded,
2043                                    ICMP4_time_exceeded_ttl_exceeded_in_transit,
2044                                    0);
2045       *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2046     }
2047
2048   /* Verify checksum. */
2049   ASSERT (ip4_header_checksum_is_valid (ip) ||
2050           (vnet_buffer (b)->oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM) ||
2051           (vnet_buffer (b)->oflags & VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM));
2052 }
2053
2054 always_inline uword
2055 ip4_rewrite_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
2056                     vlib_frame_t *frame, int do_counters, int is_midchain,
2057                     int is_mcast)
2058 {
2059   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2060   u32 *from = vlib_frame_vector_args (frame);
2061   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2062   u16 nexts[VLIB_FRAME_SIZE], *next;
2063   u32 n_left_from;
2064   vlib_node_runtime_t *error_node =
2065     vlib_node_get_runtime (vm, ip4_input_node.index);
2066
2067   n_left_from = frame->n_vectors;
2068   u32 thread_index = vm->thread_index;
2069
2070   vlib_get_buffers (vm, from, bufs, n_left_from);
2071   clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
2072
2073 #if (CLIB_N_PREFETCHES >= 8)
2074   if (n_left_from >= 6)
2075     {
2076       int i;
2077       for (i = 2; i < 6; i++)
2078         vlib_prefetch_buffer_header (bufs[i], LOAD);
2079     }
2080
2081   next = nexts;
2082   b = bufs;
2083   while (n_left_from >= 8)
2084     {
2085       const ip_adjacency_t *adj0, *adj1;
2086       ip4_header_t *ip0, *ip1;
2087       u32 rw_len0, error0, adj_index0;
2088       u32 rw_len1, error1, adj_index1;
2089       u32 tx_sw_if_index0, tx_sw_if_index1;
2090       u8 *p;
2091
2092       if (is_midchain)
2093         {
2094           vlib_prefetch_buffer_header (b[6], LOAD);
2095           vlib_prefetch_buffer_header (b[7], LOAD);
2096         }
2097
2098       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2099       adj_index1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
2100
2101       /*
2102        * pre-fetch the per-adjacency counters
2103        */
2104       if (do_counters)
2105         {
2106           vlib_prefetch_combined_counter (&adjacency_counters,
2107                                           thread_index, adj_index0);
2108           vlib_prefetch_combined_counter (&adjacency_counters,
2109                                           thread_index, adj_index1);
2110         }
2111
2112       ip0 = vlib_buffer_get_current (b[0]);
2113       ip1 = vlib_buffer_get_current (b[1]);
2114
2115       error0 = error1 = IP4_ERROR_NONE;
2116
2117       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2118       ip4_ttl_and_checksum_check (b[1], ip1, next + 1, &error1);
2119
2120       /* Rewrite packet header and updates lengths. */
2121       adj0 = adj_get (adj_index0);
2122       adj1 = adj_get (adj_index1);
2123
2124       /* Worth pipelining. No guarantee that adj0,1 are hot... */
2125       rw_len0 = adj0[0].rewrite_header.data_bytes;
2126       rw_len1 = adj1[0].rewrite_header.data_bytes;
2127       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2128       vnet_buffer (b[1])->ip.save_rewrite_length = rw_len1;
2129
2130       p = vlib_buffer_get_current (b[2]);
2131       clib_prefetch_store (p - CLIB_CACHE_LINE_BYTES);
2132       clib_prefetch_load (p);
2133
2134       p = vlib_buffer_get_current (b[3]);
2135       clib_prefetch_store (p - CLIB_CACHE_LINE_BYTES);
2136       clib_prefetch_load (p);
2137
2138       /* Check MTU of outgoing interface. */
2139       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2140       u16 ip1_len = clib_net_to_host_u16 (ip1->length);
2141
2142       if (b[0]->flags & VNET_BUFFER_F_GSO)
2143         ip0_len = gso_mtu_sz (b[0]);
2144       if (b[1]->flags & VNET_BUFFER_F_GSO)
2145         ip1_len = gso_mtu_sz (b[1]);
2146
2147       ip4_mtu_check (b[0], ip0_len,
2148                      adj0[0].rewrite_header.max_l3_packet_bytes,
2149                      ip0->flags_and_fragment_offset &
2150                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2151                      next + 0, is_midchain, &error0);
2152       ip4_mtu_check (b[1], ip1_len,
2153                      adj1[0].rewrite_header.max_l3_packet_bytes,
2154                      ip1->flags_and_fragment_offset &
2155                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2156                      next + 1, is_midchain, &error1);
2157
2158       if (is_mcast)
2159         {
2160           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2161                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2162                     IP4_ERROR_SAME_INTERFACE : error0);
2163           error1 = ((adj1[0].rewrite_header.sw_if_index ==
2164                      vnet_buffer (b[1])->sw_if_index[VLIB_RX]) ?
2165                     IP4_ERROR_SAME_INTERFACE : error1);
2166         }
2167
2168       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2169        * to see the IP header */
2170       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2171         {
2172           u32 next_index = adj0[0].rewrite_header.next_index;
2173           vlib_buffer_advance (b[0], -(word) rw_len0);
2174
2175           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2176           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2177
2178           if (PREDICT_FALSE
2179               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2180             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2181                                                 tx_sw_if_index0,
2182                                                 &next_index, b[0],
2183                                                 adj0->ia_cfg_index);
2184
2185           next[0] = next_index;
2186           if (is_midchain)
2187             vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2188                                         0 /* is_ip6 */ );
2189         }
2190       else
2191         {
2192           b[0]->error = error_node->errors[error0];
2193           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2194             ip4_ttl_inc (b[0], ip0);
2195         }
2196       if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2197         {
2198           u32 next_index = adj1[0].rewrite_header.next_index;
2199           vlib_buffer_advance (b[1], -(word) rw_len1);
2200
2201           tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2202           vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2203
2204           if (PREDICT_FALSE
2205               (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2206             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2207                                                 tx_sw_if_index1,
2208                                                 &next_index, b[1],
2209                                                 adj1->ia_cfg_index);
2210           next[1] = next_index;
2211           if (is_midchain)
2212             vnet_calc_checksums_inline (vm, b[1], 1 /* is_ip4 */ ,
2213                                         0 /* is_ip6 */ );
2214         }
2215       else
2216         {
2217           b[1]->error = error_node->errors[error1];
2218           if (error1 == IP4_ERROR_MTU_EXCEEDED)
2219             ip4_ttl_inc (b[1], ip1);
2220         }
2221
2222       if (is_midchain)
2223         /* Guess we are only writing on ipv4 header. */
2224         vnet_rewrite_two_headers (adj0[0], adj1[0],
2225                                   ip0, ip1, sizeof (ip4_header_t));
2226       else
2227         /* Guess we are only writing on simple Ethernet header. */
2228         vnet_rewrite_two_headers (adj0[0], adj1[0],
2229                                   ip0, ip1, sizeof (ethernet_header_t));
2230
2231       if (do_counters)
2232         {
2233           if (error0 == IP4_ERROR_NONE)
2234             vlib_increment_combined_counter
2235               (&adjacency_counters,
2236                thread_index,
2237                adj_index0, 1,
2238                vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2239
2240           if (error1 == IP4_ERROR_NONE)
2241             vlib_increment_combined_counter
2242               (&adjacency_counters,
2243                thread_index,
2244                adj_index1, 1,
2245                vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
2246         }
2247
2248       if (is_midchain)
2249         {
2250           if (error0 == IP4_ERROR_NONE)
2251             adj_midchain_fixup (vm, adj0, b[0], VNET_LINK_IP4);
2252           if (error1 == IP4_ERROR_NONE)
2253             adj_midchain_fixup (vm, adj1, b[1], VNET_LINK_IP4);
2254         }
2255
2256       if (is_mcast)
2257         {
2258           /* copy bytes from the IP address into the MAC rewrite */
2259           if (error0 == IP4_ERROR_NONE)
2260             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2261                                         adj0->rewrite_header.dst_mcast_offset,
2262                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2263           if (error1 == IP4_ERROR_NONE)
2264             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2265                                         adj1->rewrite_header.dst_mcast_offset,
2266                                         &ip1->dst_address.as_u32, (u8 *) ip1);
2267         }
2268
2269       next += 2;
2270       b += 2;
2271       n_left_from -= 2;
2272     }
2273 #elif (CLIB_N_PREFETCHES >= 4)
2274   next = nexts;
2275   b = bufs;
2276   while (n_left_from >= 1)
2277     {
2278       ip_adjacency_t *adj0;
2279       ip4_header_t *ip0;
2280       u32 rw_len0, error0, adj_index0;
2281       u32 tx_sw_if_index0;
2282       u8 *p;
2283
2284       /* Prefetch next iteration */
2285       if (PREDICT_TRUE (n_left_from >= 4))
2286         {
2287           ip_adjacency_t *adj2;
2288           u32 adj_index2;
2289
2290           vlib_prefetch_buffer_header (b[3], LOAD);
2291           vlib_prefetch_buffer_data (b[2], LOAD);
2292
2293           /* Prefetch adj->rewrite_header */
2294           adj_index2 = vnet_buffer (b[2])->ip.adj_index[VLIB_TX];
2295           adj2 = adj_get (adj_index2);
2296           p = (u8 *) adj2;
2297           CLIB_PREFETCH (p + CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES,
2298                          LOAD);
2299         }
2300
2301       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2302
2303       /*
2304        * Prefetch the per-adjacency counters
2305        */
2306       if (do_counters)
2307         {
2308           vlib_prefetch_combined_counter (&adjacency_counters,
2309                                           thread_index, adj_index0);
2310         }
2311
2312       ip0 = vlib_buffer_get_current (b[0]);
2313
2314       error0 = IP4_ERROR_NONE;
2315
2316       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2317
2318       /* Rewrite packet header and updates lengths. */
2319       adj0 = adj_get (adj_index0);
2320
2321       /* Rewrite header was prefetched. */
2322       rw_len0 = adj0[0].rewrite_header.data_bytes;
2323       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2324
2325       /* Check MTU of outgoing interface. */
2326       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2327
2328       if (b[0]->flags & VNET_BUFFER_F_GSO)
2329         ip0_len = gso_mtu_sz (b[0]);
2330
2331       ip4_mtu_check (b[0], ip0_len,
2332                      adj0[0].rewrite_header.max_l3_packet_bytes,
2333                      ip0->flags_and_fragment_offset &
2334                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2335                      next + 0, is_midchain, &error0);
2336
2337       if (is_mcast)
2338         {
2339           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2340                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2341                     IP4_ERROR_SAME_INTERFACE : error0);
2342         }
2343
2344       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2345        * to see the IP header */
2346       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2347         {
2348           u32 next_index = adj0[0].rewrite_header.next_index;
2349           vlib_buffer_advance (b[0], -(word) rw_len0);
2350           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2351           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2352
2353           if (PREDICT_FALSE
2354               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2355             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2356                                                 tx_sw_if_index0,
2357                                                 &next_index, b[0],
2358                                                 adj0->ia_cfg_index);
2359           next[0] = next_index;
2360
2361           if (is_midchain)
2362             {
2363               vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2364                                           0 /* is_ip6 */ );
2365
2366               /* Guess we are only writing on ipv4 header. */
2367               vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t));
2368             }
2369           else
2370             /* Guess we are only writing on simple Ethernet header. */
2371             vnet_rewrite_one_header (adj0[0], ip0,
2372                                      sizeof (ethernet_header_t));
2373
2374           /*
2375            * Bump the per-adjacency counters
2376            */
2377           if (do_counters)
2378             vlib_increment_combined_counter
2379               (&adjacency_counters,
2380                thread_index,
2381                adj_index0, 1, vlib_buffer_length_in_chain (vm,
2382                                                            b[0]) + rw_len0);
2383
2384           if (is_midchain)
2385             adj_midchain_fixup (vm, adj0, b[0], VNET_LINK_IP4);
2386
2387           if (is_mcast)
2388             /* copy bytes from the IP address into the MAC rewrite */
2389             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2390                                         adj0->rewrite_header.dst_mcast_offset,
2391                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2392         }
2393       else
2394         {
2395           b[0]->error = error_node->errors[error0];
2396           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2397             ip4_ttl_inc (b[0], ip0);
2398         }
2399
2400       next += 1;
2401       b += 1;
2402       n_left_from -= 1;
2403     }
2404 #endif
2405
2406   while (n_left_from > 0)
2407     {
2408       ip_adjacency_t *adj0;
2409       ip4_header_t *ip0;
2410       u32 rw_len0, adj_index0, error0;
2411       u32 tx_sw_if_index0;
2412
2413       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2414
2415       adj0 = adj_get (adj_index0);
2416
2417       if (do_counters)
2418         vlib_prefetch_combined_counter (&adjacency_counters,
2419                                         thread_index, adj_index0);
2420
2421       ip0 = vlib_buffer_get_current (b[0]);
2422
2423       error0 = IP4_ERROR_NONE;
2424
2425       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2426
2427
2428       /* Update packet buffer attributes/set output interface. */
2429       rw_len0 = adj0[0].rewrite_header.data_bytes;
2430       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2431
2432       /* Check MTU of outgoing interface. */
2433       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2434       if (b[0]->flags & VNET_BUFFER_F_GSO)
2435         ip0_len = gso_mtu_sz (b[0]);
2436
2437       ip4_mtu_check (b[0], ip0_len,
2438                      adj0[0].rewrite_header.max_l3_packet_bytes,
2439                      ip0->flags_and_fragment_offset &
2440                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2441                      next + 0, is_midchain, &error0);
2442
2443       if (is_mcast)
2444         {
2445           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2446                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2447                     IP4_ERROR_SAME_INTERFACE : error0);
2448         }
2449
2450       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2451        * to see the IP header */
2452       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2453         {
2454           u32 next_index = adj0[0].rewrite_header.next_index;
2455           vlib_buffer_advance (b[0], -(word) rw_len0);
2456           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2457           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2458
2459           if (PREDICT_FALSE
2460               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2461             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2462                                                 tx_sw_if_index0,
2463                                                 &next_index, b[0],
2464                                                 adj0->ia_cfg_index);
2465           next[0] = next_index;
2466
2467           if (is_midchain)
2468             {
2469               /* this acts on the packet that is about to be encapped */
2470               vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2471                                           0 /* is_ip6 */ );
2472
2473               /* Guess we are only writing on ipv4 header. */
2474               vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t));
2475             }
2476           else
2477             /* Guess we are only writing on simple Ethernet header. */
2478             vnet_rewrite_one_header (adj0[0], ip0,
2479                                      sizeof (ethernet_header_t));
2480
2481           if (do_counters)
2482             vlib_increment_combined_counter
2483               (&adjacency_counters,
2484                thread_index, adj_index0, 1,
2485                vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2486
2487           if (is_midchain)
2488             adj_midchain_fixup (vm, adj0, b[0], VNET_LINK_IP4);
2489
2490           if (is_mcast)
2491             /* copy bytes from the IP address into the MAC rewrite */
2492             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2493                                         adj0->rewrite_header.dst_mcast_offset,
2494                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2495         }
2496       else
2497         {
2498           b[0]->error = error_node->errors[error0];
2499           /* undo the TTL decrement - we'll be back to do it again */
2500           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2501             ip4_ttl_inc (b[0], ip0);
2502         }
2503
2504       next += 1;
2505       b += 1;
2506       n_left_from -= 1;
2507     }
2508
2509
2510   /* Need to do trace after rewrites to pick up new packet data. */
2511   if (node->flags & VLIB_NODE_FLAG_TRACE)
2512     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2513
2514   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2515   return frame->n_vectors;
2516 }
2517
2518 /** @brief IPv4 rewrite node.
2519     @node ip4-rewrite
2520
2521     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2522     header checksum, fetch the ip adjacency, check the outbound mtu,
2523     apply the adjacency rewrite, and send pkts to the adjacency
2524     rewrite header's rewrite_next_index.
2525
2526     @param vm vlib_main_t corresponding to the current thread
2527     @param node vlib_node_runtime_t
2528     @param frame vlib_frame_t whose contents should be dispatched
2529
2530     @par Graph mechanics: buffer metadata, next index usage
2531
2532     @em Uses:
2533     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2534         - the rewrite adjacency index
2535     - <code>adj->lookup_next_index</code>
2536         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2537           the packet will be dropped.
2538     - <code>adj->rewrite_header</code>
2539         - Rewrite string length, rewrite string, next_index
2540
2541     @em Sets:
2542     - <code>b->current_data, b->current_length</code>
2543         - Updated net of applying the rewrite string
2544
2545     <em>Next Indices:</em>
2546     - <code> adj->rewrite_header.next_index </code>
2547       or @c ip4-drop
2548 */
2549
2550 VLIB_NODE_FN (ip4_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2551                                  vlib_frame_t * frame)
2552 {
2553   if (adj_are_counters_enabled ())
2554     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2555   else
2556     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2557 }
2558
2559 VLIB_NODE_FN (ip4_rewrite_bcast_node) (vlib_main_t * vm,
2560                                        vlib_node_runtime_t * node,
2561                                        vlib_frame_t * frame)
2562 {
2563   if (adj_are_counters_enabled ())
2564     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2565   else
2566     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2567 }
2568
2569 VLIB_NODE_FN (ip4_midchain_node) (vlib_main_t * vm,
2570                                   vlib_node_runtime_t * node,
2571                                   vlib_frame_t * frame)
2572 {
2573   if (adj_are_counters_enabled ())
2574     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2575   else
2576     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2577 }
2578
2579 VLIB_NODE_FN (ip4_rewrite_mcast_node) (vlib_main_t * vm,
2580                                        vlib_node_runtime_t * node,
2581                                        vlib_frame_t * frame)
2582 {
2583   if (adj_are_counters_enabled ())
2584     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2585   else
2586     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2587 }
2588
2589 VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
2590                                         vlib_node_runtime_t * node,
2591                                         vlib_frame_t * frame)
2592 {
2593   if (adj_are_counters_enabled ())
2594     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2595   else
2596     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2597 }
2598
2599 /* *INDENT-OFF* */
2600 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2601   .name = "ip4-rewrite",
2602   .vector_size = sizeof (u32),
2603
2604   .format_trace = format_ip4_rewrite_trace,
2605
2606   .n_next_nodes = IP4_REWRITE_N_NEXT,
2607   .next_nodes = {
2608     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2609     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2610     [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2611   },
2612 };
2613
2614 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2615   .name = "ip4-rewrite-bcast",
2616   .vector_size = sizeof (u32),
2617
2618   .format_trace = format_ip4_rewrite_trace,
2619   .sibling_of = "ip4-rewrite",
2620 };
2621
2622 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2623   .name = "ip4-rewrite-mcast",
2624   .vector_size = sizeof (u32),
2625
2626   .format_trace = format_ip4_rewrite_trace,
2627   .sibling_of = "ip4-rewrite",
2628 };
2629
2630 VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
2631   .name = "ip4-mcast-midchain",
2632   .vector_size = sizeof (u32),
2633
2634   .format_trace = format_ip4_rewrite_trace,
2635   .sibling_of = "ip4-rewrite",
2636 };
2637
2638 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2639   .name = "ip4-midchain",
2640   .vector_size = sizeof (u32),
2641   .format_trace = format_ip4_rewrite_trace,
2642   .sibling_of = "ip4-rewrite",
2643 };
2644 /* *INDENT-ON */
2645
2646 static clib_error_t *
2647 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2648                              unformat_input_t * input,
2649                              vlib_cli_command_t * cmd)
2650 {
2651   int matched = 0;
2652   u32 table_id = 0;
2653   u32 flow_hash_config = 0;
2654   int rv;
2655
2656   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2657     {
2658       if (unformat (input, "table %d", &table_id))
2659         matched = 1;
2660 #define _(a, b, v)                                                            \
2661   else if (unformat (input, #a))                                              \
2662   {                                                                           \
2663     flow_hash_config |= v;                                                    \
2664     matched = 1;                                                              \
2665   }
2666       foreach_flow_hash_bit
2667 #undef _
2668         else
2669         break;
2670     }
2671
2672   if (matched == 0)
2673     return clib_error_return (0, "unknown input `%U'",
2674                               format_unformat_error, input);
2675
2676   rv = ip_flow_hash_set (AF_IP4, table_id, flow_hash_config);
2677   switch (rv)
2678     {
2679     case 0:
2680       break;
2681
2682     case VNET_API_ERROR_NO_SUCH_FIB:
2683       return clib_error_return (0, "no such FIB table %d", table_id);
2684
2685     default:
2686       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2687       break;
2688     }
2689
2690   return 0;
2691 }
2692
2693 /*?
2694  * Configure the set of IPv4 fields used by the flow hash.
2695  *
2696  * @cliexpar
2697  * Example of how to set the flow hash on a given table:
2698  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2699  * Example of display the configured flow hash:
2700  * @cliexstart{show ip fib}
2701  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2702  * 0.0.0.0/0
2703  *   unicast-ip4-chain
2704  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2705  *     [0] [@0]: dpo-drop ip6
2706  * 0.0.0.0/32
2707  *   unicast-ip4-chain
2708  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2709  *     [0] [@0]: dpo-drop ip6
2710  * 224.0.0.0/8
2711  *   unicast-ip4-chain
2712  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2713  *     [0] [@0]: dpo-drop ip6
2714  * 6.0.1.2/32
2715  *   unicast-ip4-chain
2716  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2717  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2718  * 7.0.0.1/32
2719  *   unicast-ip4-chain
2720  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
2721  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2722  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2723  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2724  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2725  * 240.0.0.0/8
2726  *   unicast-ip4-chain
2727  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
2728  *     [0] [@0]: dpo-drop ip6
2729  * 255.255.255.255/32
2730  *   unicast-ip4-chain
2731  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
2732  *     [0] [@0]: dpo-drop ip6
2733  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
2734  * 0.0.0.0/0
2735  *   unicast-ip4-chain
2736  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
2737  *     [0] [@0]: dpo-drop ip6
2738  * 0.0.0.0/32
2739  *   unicast-ip4-chain
2740  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
2741  *     [0] [@0]: dpo-drop ip6
2742  * 172.16.1.0/24
2743  *   unicast-ip4-chain
2744  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
2745  *     [0] [@4]: ipv4-glean: af_packet0
2746  * 172.16.1.1/32
2747  *   unicast-ip4-chain
2748  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
2749  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
2750  * 172.16.1.2/32
2751  *   unicast-ip4-chain
2752  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2753  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
2754  * 172.16.2.0/24
2755  *   unicast-ip4-chain
2756  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
2757  *     [0] [@4]: ipv4-glean: af_packet1
2758  * 172.16.2.1/32
2759  *   unicast-ip4-chain
2760  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
2761  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
2762  * 224.0.0.0/8
2763  *   unicast-ip4-chain
2764  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
2765  *     [0] [@0]: dpo-drop ip6
2766  * 240.0.0.0/8
2767  *   unicast-ip4-chain
2768  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
2769  *     [0] [@0]: dpo-drop ip6
2770  * 255.255.255.255/32
2771  *   unicast-ip4-chain
2772  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
2773  *     [0] [@0]: dpo-drop ip6
2774  * @cliexend
2775 ?*/
2776 /* *INDENT-OFF* */
2777 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
2778 {
2779   .path = "set ip flow-hash",
2780   .short_help =
2781   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
2782   .function = set_ip_flow_hash_command_fn,
2783 };
2784 /* *INDENT-ON* */
2785
2786 #ifndef CLIB_MARCH_VARIANT
2787 int
2788 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2789                              u32 table_index)
2790 {
2791   vnet_main_t *vnm = vnet_get_main ();
2792   vnet_interface_main_t *im = &vnm->interface_main;
2793   ip4_main_t *ipm = &ip4_main;
2794   ip_lookup_main_t *lm = &ipm->lookup_main;
2795   vnet_classify_main_t *cm = &vnet_classify_main;
2796   ip4_address_t *if_addr;
2797
2798   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2799     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2800
2801   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2802     return VNET_API_ERROR_NO_SUCH_ENTRY;
2803
2804   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2805   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
2806
2807   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
2808
2809   if (NULL != if_addr)
2810     {
2811       fib_prefix_t pfx = {
2812         .fp_len = 32,
2813         .fp_proto = FIB_PROTOCOL_IP4,
2814         .fp_addr.ip4 = *if_addr,
2815       };
2816       u32 fib_index;
2817
2818       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2819                                                        sw_if_index);
2820
2821
2822       if (table_index != (u32) ~ 0)
2823         {
2824           dpo_id_t dpo = DPO_INVALID;
2825
2826           dpo_set (&dpo,
2827                    DPO_CLASSIFY,
2828                    DPO_PROTO_IP4,
2829                    classify_dpo_create (DPO_PROTO_IP4, table_index));
2830
2831           fib_table_entry_special_dpo_add (fib_index,
2832                                            &pfx,
2833                                            FIB_SOURCE_CLASSIFY,
2834                                            FIB_ENTRY_FLAG_NONE, &dpo);
2835           dpo_reset (&dpo);
2836         }
2837       else
2838         {
2839           fib_table_entry_special_remove (fib_index,
2840                                           &pfx, FIB_SOURCE_CLASSIFY);
2841         }
2842     }
2843
2844   return 0;
2845 }
2846 #endif
2847
2848 static clib_error_t *
2849 set_ip_classify_command_fn (vlib_main_t * vm,
2850                             unformat_input_t * input,
2851                             vlib_cli_command_t * cmd)
2852 {
2853   u32 table_index = ~0;
2854   int table_index_set = 0;
2855   u32 sw_if_index = ~0;
2856   int rv;
2857
2858   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2859     {
2860       if (unformat (input, "table-index %d", &table_index))
2861         table_index_set = 1;
2862       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
2863                          vnet_get_main (), &sw_if_index))
2864         ;
2865       else
2866         break;
2867     }
2868
2869   if (table_index_set == 0)
2870     return clib_error_return (0, "classify table-index must be specified");
2871
2872   if (sw_if_index == ~0)
2873     return clib_error_return (0, "interface / subif must be specified");
2874
2875   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
2876
2877   switch (rv)
2878     {
2879     case 0:
2880       break;
2881
2882     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
2883       return clib_error_return (0, "No such interface");
2884
2885     case VNET_API_ERROR_NO_SUCH_ENTRY:
2886       return clib_error_return (0, "No such classifier table");
2887     }
2888   return 0;
2889 }
2890
2891 /*?
2892  * Assign a classification table to an interface. The classification
2893  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
2894  * commands. Once the table is create, use this command to filter packets
2895  * on an interface.
2896  *
2897  * @cliexpar
2898  * Example of how to assign a classification table to an interface:
2899  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
2900 ?*/
2901 /* *INDENT-OFF* */
2902 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
2903 {
2904     .path = "set ip classify",
2905     .short_help =
2906     "set ip classify intfc <interface> table-index <classify-idx>",
2907     .function = set_ip_classify_command_fn,
2908 };
2909 /* *INDENT-ON* */
2910
2911 /*
2912  * fd.io coding-style-patch-verification: ON
2913  *
2914  * Local Variables:
2915  * eval: (c-set-style "gnu")
2916  * End:
2917  */