ip: admin change affects intf IPv4 addr routes
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h>     /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/dpo/load_balance.h>
53 #include <vnet/dpo/load_balance_map.h>
54 #include <vnet/dpo/classify_dpo.h>
55 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
56
57 #include <vnet/ip/ip4_forward.h>
58 #include <vnet/interface_output.h>
59
60 /** @brief IPv4 lookup node.
61     @node ip4-lookup
62
63     This is the main IPv4 lookup dispatch node.
64
65     @param vm vlib_main_t corresponding to the current thread
66     @param node vlib_node_runtime_t
67     @param frame vlib_frame_t whose contents should be dispatched
68
69     @par Graph mechanics: buffer metadata, next index usage
70
71     @em Uses:
72     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
73         - Indicates the @c sw_if_index value of the interface that the
74           packet was received on.
75     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
76         - When the value is @c ~0 then the node performs a longest prefix
77           match (LPM) for the packet destination address in the FIB attached
78           to the receive interface.
79         - Otherwise perform LPM for the packet destination address in the
80           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
81           value (0, 1, ...) and not a VRF id.
82
83     @em Sets:
84     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
85         - The lookup result adjacency index.
86
87     <em>Next Index:</em>
88     - Dispatches the packet to the node index found in
89       ip_adjacency_t @c adj->lookup_next_index
90       (where @c adj is the lookup result adjacency).
91 */
92 VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
93                                 vlib_frame_t * frame)
94 {
95   return ip4_lookup_inline (vm, node, frame);
96 }
97
98 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
99
100 /* *INDENT-OFF* */
101 VLIB_REGISTER_NODE (ip4_lookup_node) =
102 {
103   .name = "ip4-lookup",
104   .vector_size = sizeof (u32),
105   .format_trace = format_ip4_lookup_trace,
106   .n_next_nodes = IP_LOOKUP_N_NEXT,
107   .next_nodes = IP4_LOOKUP_NEXT_NODES,
108 };
109 /* *INDENT-ON* */
110
111 VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
112                                       vlib_node_runtime_t * node,
113                                       vlib_frame_t * frame)
114 {
115   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
116   u32 n_left, *from;
117   u32 thread_index = vm->thread_index;
118   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
119   u16 nexts[VLIB_FRAME_SIZE], *next;
120
121   from = vlib_frame_vector_args (frame);
122   n_left = frame->n_vectors;
123   next = nexts;
124
125   vlib_get_buffers (vm, from, bufs, n_left);
126
127   while (n_left >= 4)
128     {
129       const load_balance_t *lb0, *lb1;
130       const ip4_header_t *ip0, *ip1;
131       u32 lbi0, hc0, lbi1, hc1;
132       const dpo_id_t *dpo0, *dpo1;
133
134       /* Prefetch next iteration. */
135       {
136         vlib_prefetch_buffer_header (b[2], LOAD);
137         vlib_prefetch_buffer_header (b[3], LOAD);
138
139         CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), LOAD);
140         CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), LOAD);
141       }
142
143       ip0 = vlib_buffer_get_current (b[0]);
144       ip1 = vlib_buffer_get_current (b[1]);
145       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
146       lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
147
148       lb0 = load_balance_get (lbi0);
149       lb1 = load_balance_get (lbi1);
150
151       /*
152        * this node is for via FIBs we can re-use the hash value from the
153        * to node if present.
154        * We don't want to use the same hash value at each level in the recursion
155        * graph as that would lead to polarisation
156        */
157       hc0 = hc1 = 0;
158
159       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
160         {
161           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
162             {
163               hc0 = vnet_buffer (b[0])->ip.flow_hash =
164                 vnet_buffer (b[0])->ip.flow_hash >> 1;
165             }
166           else
167             {
168               hc0 = vnet_buffer (b[0])->ip.flow_hash =
169                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
170             }
171           dpo0 = load_balance_get_fwd_bucket
172             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
173         }
174       else
175         {
176           dpo0 = load_balance_get_bucket_i (lb0, 0);
177         }
178       if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
179         {
180           if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
181             {
182               hc1 = vnet_buffer (b[1])->ip.flow_hash =
183                 vnet_buffer (b[1])->ip.flow_hash >> 1;
184             }
185           else
186             {
187               hc1 = vnet_buffer (b[1])->ip.flow_hash =
188                 ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
189             }
190           dpo1 = load_balance_get_fwd_bucket
191             (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
192         }
193       else
194         {
195           dpo1 = load_balance_get_bucket_i (lb1, 0);
196         }
197
198       next[0] = dpo0->dpoi_next_node;
199       next[1] = dpo1->dpoi_next_node;
200
201       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
202       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
203
204       vlib_increment_combined_counter
205         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
206       vlib_increment_combined_counter
207         (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
208
209       b += 2;
210       next += 2;
211       n_left -= 2;
212     }
213
214   while (n_left > 0)
215     {
216       const load_balance_t *lb0;
217       const ip4_header_t *ip0;
218       const dpo_id_t *dpo0;
219       u32 lbi0, hc0;
220
221       ip0 = vlib_buffer_get_current (b[0]);
222       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
223
224       lb0 = load_balance_get (lbi0);
225
226       hc0 = 0;
227       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
228         {
229           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
230             {
231               hc0 = vnet_buffer (b[0])->ip.flow_hash =
232                 vnet_buffer (b[0])->ip.flow_hash >> 1;
233             }
234           else
235             {
236               hc0 = vnet_buffer (b[0])->ip.flow_hash =
237                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
238             }
239           dpo0 = load_balance_get_fwd_bucket
240             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
241         }
242       else
243         {
244           dpo0 = load_balance_get_bucket_i (lb0, 0);
245         }
246
247       next[0] = dpo0->dpoi_next_node;
248       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
249
250       vlib_increment_combined_counter
251         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
252
253       b += 1;
254       next += 1;
255       n_left -= 1;
256     }
257
258   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
259   if (node->flags & VLIB_NODE_FLAG_TRACE)
260     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
261
262   return frame->n_vectors;
263 }
264
265 /* *INDENT-OFF* */
266 VLIB_REGISTER_NODE (ip4_load_balance_node) =
267 {
268   .name = "ip4-load-balance",
269   .vector_size = sizeof (u32),
270   .sibling_of = "ip4-lookup",
271   .format_trace = format_ip4_lookup_trace,
272 };
273 /* *INDENT-ON* */
274
275 #ifndef CLIB_MARCH_VARIANT
276 /* get first interface address */
277 ip4_address_t *
278 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
279                              ip_interface_address_t ** result_ia)
280 {
281   ip_lookup_main_t *lm = &im->lookup_main;
282   ip_interface_address_t *ia = 0;
283   ip4_address_t *result = 0;
284
285   /* *INDENT-OFF* */
286   foreach_ip_interface_address
287     (lm, ia, sw_if_index,
288      1 /* honor unnumbered */ ,
289      ({
290        ip4_address_t * a =
291          ip_interface_address_get_address (lm, ia);
292        result = a;
293        break;
294      }));
295   /* *INDENT-OFF* */
296   if (result_ia)
297     *result_ia = result ? ia : 0;
298   return result;
299 }
300 #endif
301
302 static void
303 ip4_add_subnet_bcast_route (u32 fib_index,
304                             fib_prefix_t *pfx,
305                             u32 sw_if_index)
306 {
307   vnet_sw_interface_flags_t iflags;
308
309   iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
310
311   fib_table_entry_special_remove(fib_index,
312                                  pfx,
313                                  FIB_SOURCE_INTERFACE);
314
315   if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
316     {
317       fib_table_entry_update_one_path (fib_index, pfx,
318                                        FIB_SOURCE_INTERFACE,
319                                        FIB_ENTRY_FLAG_NONE,
320                                        DPO_PROTO_IP4,
321                                        /* No next-hop address */
322                                        &ADJ_BCAST_ADDR,
323                                        sw_if_index,
324                                        // invalid FIB index
325                                        ~0,
326                                        1,
327                                        // no out-label stack
328                                        NULL,
329                                        FIB_ROUTE_PATH_FLAG_NONE);
330     }
331   else
332     {
333         fib_table_entry_special_add(fib_index,
334                                     pfx,
335                                     FIB_SOURCE_INTERFACE,
336                                     (FIB_ENTRY_FLAG_DROP |
337                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
338     }
339 }
340
341 static void
342 ip4_add_interface_routes (u32 sw_if_index,
343                           ip4_main_t * im, u32 fib_index,
344                           ip_interface_address_t * a)
345 {
346   ip_lookup_main_t *lm = &im->lookup_main;
347   ip4_address_t *address = ip_interface_address_get_address (lm, a);
348   fib_prefix_t pfx = {
349     .fp_len = a->address_length,
350     .fp_proto = FIB_PROTOCOL_IP4,
351     .fp_addr.ip4 = *address,
352   };
353
354   if (pfx.fp_len <= 30)
355     {
356       /* a /30 or shorter - add a glean for the network address */
357       fib_table_entry_update_one_path (fib_index, &pfx,
358                                        FIB_SOURCE_INTERFACE,
359                                        (FIB_ENTRY_FLAG_CONNECTED |
360                                         FIB_ENTRY_FLAG_ATTACHED),
361                                        DPO_PROTO_IP4,
362                                        /* No next-hop address */
363                                        NULL,
364                                        sw_if_index,
365                                        // invalid FIB index
366                                        ~0,
367                                        1,
368                                        // no out-label stack
369                                        NULL,
370                                        FIB_ROUTE_PATH_FLAG_NONE);
371
372       /* Add the two broadcast addresses as drop */
373       fib_prefix_t net_pfx = {
374         .fp_len = 32,
375         .fp_proto = FIB_PROTOCOL_IP4,
376         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
377       };
378       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
379         fib_table_entry_special_add(fib_index,
380                                     &net_pfx,
381                                     FIB_SOURCE_INTERFACE,
382                                     (FIB_ENTRY_FLAG_DROP |
383                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
384       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
385       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
386         ip4_add_subnet_bcast_route(fib_index, &net_pfx, sw_if_index);
387     }
388   else if (pfx.fp_len == 31)
389     {
390       u32 mask = clib_host_to_net_u32(1);
391       fib_prefix_t net_pfx = pfx;
392
393       net_pfx.fp_len = 32;
394       net_pfx.fp_addr.ip4.as_u32 ^= mask;
395
396       /* a /31 - add the other end as an attached host */
397       fib_table_entry_update_one_path (fib_index, &net_pfx,
398                                        FIB_SOURCE_INTERFACE,
399                                        (FIB_ENTRY_FLAG_ATTACHED),
400                                        DPO_PROTO_IP4,
401                                        &net_pfx.fp_addr,
402                                        sw_if_index,
403                                        // invalid FIB index
404                                        ~0,
405                                        1,
406                                        NULL,
407                                        FIB_ROUTE_PATH_FLAG_NONE);
408     }
409   pfx.fp_len = 32;
410
411   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
412     {
413       u32 classify_table_index =
414         lm->classify_table_index_by_sw_if_index[sw_if_index];
415       if (classify_table_index != (u32) ~ 0)
416         {
417           dpo_id_t dpo = DPO_INVALID;
418
419           dpo_set (&dpo,
420                    DPO_CLASSIFY,
421                    DPO_PROTO_IP4,
422                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
423
424           fib_table_entry_special_dpo_add (fib_index,
425                                            &pfx,
426                                            FIB_SOURCE_CLASSIFY,
427                                            FIB_ENTRY_FLAG_NONE, &dpo);
428           dpo_reset (&dpo);
429         }
430     }
431
432   fib_table_entry_update_one_path (fib_index, &pfx,
433                                    FIB_SOURCE_INTERFACE,
434                                    (FIB_ENTRY_FLAG_CONNECTED |
435                                     FIB_ENTRY_FLAG_LOCAL),
436                                    DPO_PROTO_IP4,
437                                    &pfx.fp_addr,
438                                    sw_if_index,
439                                    // invalid FIB index
440                                    ~0,
441                                    1, NULL,
442                                    FIB_ROUTE_PATH_FLAG_NONE);
443 }
444
445 static void
446 ip4_del_interface_routes (ip4_main_t * im,
447                           u32 fib_index,
448                           ip4_address_t * address, u32 address_length)
449 {
450   fib_prefix_t pfx = {
451     .fp_len = address_length,
452     .fp_proto = FIB_PROTOCOL_IP4,
453     .fp_addr.ip4 = *address,
454   };
455
456   if (pfx.fp_len <= 30)
457     {
458       fib_prefix_t net_pfx = {
459         .fp_len = 32,
460         .fp_proto = FIB_PROTOCOL_IP4,
461         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
462       };
463       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
464         fib_table_entry_special_remove(fib_index,
465                                        &net_pfx,
466                                        FIB_SOURCE_INTERFACE);
467       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
468       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
469         fib_table_entry_special_remove(fib_index,
470                                        &net_pfx,
471                                        FIB_SOURCE_INTERFACE);
472       fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
473     }
474     else if (pfx.fp_len == 31)
475     {
476       u32 mask = clib_host_to_net_u32(1);
477       fib_prefix_t net_pfx = pfx;
478
479       net_pfx.fp_len = 32;
480       net_pfx.fp_addr.ip4.as_u32 ^= mask;
481
482       fib_table_entry_delete (fib_index, &net_pfx, FIB_SOURCE_INTERFACE);
483     }
484
485   pfx.fp_len = 32;
486   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
487 }
488
489 #ifndef CLIB_MARCH_VARIANT
490 void
491 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
492 {
493   ip4_main_t *im = &ip4_main;
494
495   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
496
497   /*
498    * enable/disable only on the 1<->0 transition
499    */
500   if (is_enable)
501     {
502       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
503         return;
504     }
505   else
506     {
507       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
508       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
509         return;
510     }
511   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
512                                !is_enable, 0, 0);
513
514
515   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
516                                sw_if_index, !is_enable, 0, 0);
517
518   {
519     ip4_enable_disable_interface_callback_t *cb;
520     vec_foreach (cb, im->enable_disable_interface_callbacks)
521       cb->function (im, cb->function_opaque, sw_if_index, is_enable);
522   }
523 }
524
525 static clib_error_t *
526 ip4_add_del_interface_address_internal (vlib_main_t * vm,
527                                         u32 sw_if_index,
528                                         ip4_address_t * address,
529                                         u32 address_length, u32 is_del)
530 {
531   vnet_main_t *vnm = vnet_get_main ();
532   ip4_main_t *im = &ip4_main;
533   ip_lookup_main_t *lm = &im->lookup_main;
534   clib_error_t *error = 0;
535   u32 if_address_index, elts_before;
536   ip4_address_fib_t ip4_af, *addr_fib = 0;
537
538   /* local0 interface doesn't support IP addressing  */
539   if (sw_if_index == 0)
540     {
541       return
542        clib_error_create ("local0 interface doesn't support IP addressing");
543     }
544
545   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
546   ip4_addr_fib_init (&ip4_af, address,
547                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
548   vec_add1 (addr_fib, ip4_af);
549
550   /*
551    * there is no support for adj-fib handling in the presence of overlapping
552    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
553    * most routers do.
554    */
555   /* *INDENT-OFF* */
556   if (!is_del)
557     {
558       /* When adding an address check that it does not conflict
559          with an existing address on any interface in this table. */
560       ip_interface_address_t *ia;
561       vnet_sw_interface_t *sif;
562
563       pool_foreach(sif, vnm->interface_main.sw_interfaces,
564       ({
565           if (im->fib_index_by_sw_if_index[sw_if_index] ==
566               im->fib_index_by_sw_if_index[sif->sw_if_index])
567             {
568               foreach_ip_interface_address
569                 (&im->lookup_main, ia, sif->sw_if_index,
570                  0 /* honor unnumbered */ ,
571                  ({
572                    ip4_address_t * x =
573                      ip_interface_address_get_address
574                      (&im->lookup_main, ia);
575                    if (ip4_destination_matches_route
576                        (im, address, x, ia->address_length) ||
577                        ip4_destination_matches_route (im,
578                                                       x,
579                                                       address,
580                                                       address_length))
581                      {
582                        vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
583
584                        return
585                          clib_error_create
586                          ("failed to add %U which conflicts with %U for interface %U",
587                           format_ip4_address_and_length, address,
588                           address_length,
589                           format_ip4_address_and_length, x,
590                           ia->address_length,
591                           format_vnet_sw_if_index_name, vnm,
592                           sif->sw_if_index);
593                      }
594                  }));
595             }
596       }));
597     }
598   /* *INDENT-ON* */
599
600   elts_before = pool_elts (lm->if_address_pool);
601
602   error = ip_interface_address_add_del
603     (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
604   if (error)
605     goto done;
606
607   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
608
609   /* intf addr routes are added/deleted on admin up/down */
610   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index))
611     {
612       if (is_del)
613         ip4_del_interface_routes (im, ip4_af.fib_index, address,
614                                   address_length);
615       else
616         ip4_add_interface_routes (sw_if_index,
617                                   im, ip4_af.fib_index,
618                                   pool_elt_at_index
619                                   (lm->if_address_pool, if_address_index));
620     }
621
622   /* If pool did not grow/shrink: add duplicate address. */
623   if (elts_before != pool_elts (lm->if_address_pool))
624     {
625       ip4_add_del_interface_address_callback_t *cb;
626       vec_foreach (cb, im->add_del_interface_address_callbacks)
627         cb->function (im, cb->function_opaque, sw_if_index,
628                       address, address_length, if_address_index, is_del);
629     }
630
631 done:
632   vec_free (addr_fib);
633   return error;
634 }
635
636 clib_error_t *
637 ip4_add_del_interface_address (vlib_main_t * vm,
638                                u32 sw_if_index,
639                                ip4_address_t * address,
640                                u32 address_length, u32 is_del)
641 {
642   return ip4_add_del_interface_address_internal
643     (vm, sw_if_index, address, address_length, is_del);
644 }
645
646 void
647 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
648 {
649   ip_interface_address_t *ia;
650   ip4_main_t *im;
651
652   im = &ip4_main;
653
654   /*
655    * when directed broadcast is enabled, the subnet braodcast route will forward
656    * packets using an adjacency with a broadcast MAC. otherwise it drops
657    */
658   /* *INDENT-OFF* */
659   foreach_ip_interface_address(&im->lookup_main, ia,
660                                sw_if_index, 0,
661      ({
662        if (ia->address_length <= 30)
663          {
664            ip4_address_t *ipa;
665
666            ipa = ip_interface_address_get_address (&im->lookup_main, ia);
667
668            fib_prefix_t pfx = {
669              .fp_len = 32,
670              .fp_proto = FIB_PROTOCOL_IP4,
671              .fp_addr = {
672                .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
673              },
674            };
675
676            ip4_add_subnet_bcast_route
677              (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
678                                                   sw_if_index),
679               &pfx, sw_if_index);
680          }
681      }));
682   /* *INDENT-ON* */
683 }
684 #endif
685
686 static clib_error_t *
687 ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
688 {
689   ip4_main_t *im = &ip4_main;
690   ip_interface_address_t *ia;
691   ip4_address_t *a;
692   u32 is_admin_up, fib_index;
693
694   /* Fill in lookup tables with default table (0). */
695   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
696
697   vec_validate_init_empty (im->
698                            lookup_main.if_address_pool_index_by_sw_if_index,
699                            sw_if_index, ~0);
700
701   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
702
703   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
704
705   /* *INDENT-OFF* */
706   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
707                                 0 /* honor unnumbered */,
708   ({
709     a = ip_interface_address_get_address (&im->lookup_main, ia);
710     if (is_admin_up)
711       ip4_add_interface_routes (sw_if_index,
712                                 im, fib_index,
713                                 ia);
714     else
715       ip4_del_interface_routes (im, fib_index,
716                                 a, ia->address_length);
717   }));
718   /* *INDENT-ON* */
719
720   return 0;
721 }
722
723 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
724
725 /* Built-in ip4 unicast rx feature path definition */
726 /* *INDENT-OFF* */
727 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
728 {
729   .arc_name = "ip4-unicast",
730   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
731   .last_in_arc = "ip4-lookup",
732   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
733 };
734
735 VNET_FEATURE_INIT (ip4_flow_classify, static) =
736 {
737   .arc_name = "ip4-unicast",
738   .node_name = "ip4-flow-classify",
739   .runs_before = VNET_FEATURES ("ip4-inacl"),
740 };
741
742 VNET_FEATURE_INIT (ip4_inacl, static) =
743 {
744   .arc_name = "ip4-unicast",
745   .node_name = "ip4-inacl",
746   .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
747 };
748
749 VNET_FEATURE_INIT (ip4_source_check_1, static) =
750 {
751   .arc_name = "ip4-unicast",
752   .node_name = "ip4-source-check-via-rx",
753   .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
754 };
755
756 VNET_FEATURE_INIT (ip4_source_check_2, static) =
757 {
758   .arc_name = "ip4-unicast",
759   .node_name = "ip4-source-check-via-any",
760   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
761 };
762
763 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
764 {
765   .arc_name = "ip4-unicast",
766   .node_name = "ip4-source-and-port-range-check-rx",
767   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
768 };
769
770 VNET_FEATURE_INIT (ip4_policer_classify, static) =
771 {
772   .arc_name = "ip4-unicast",
773   .node_name = "ip4-policer-classify",
774   .runs_before = VNET_FEATURES ("ipsec4-input-feature"),
775 };
776
777 VNET_FEATURE_INIT (ip4_ipsec, static) =
778 {
779   .arc_name = "ip4-unicast",
780   .node_name = "ipsec4-input-feature",
781   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
782 };
783
784 VNET_FEATURE_INIT (ip4_vpath, static) =
785 {
786   .arc_name = "ip4-unicast",
787   .node_name = "vpath-input-ip4",
788   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
789 };
790
791 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
792 {
793   .arc_name = "ip4-unicast",
794   .node_name = "ip4-vxlan-bypass",
795   .runs_before = VNET_FEATURES ("ip4-lookup"),
796 };
797
798 VNET_FEATURE_INIT (ip4_not_enabled, static) =
799 {
800   .arc_name = "ip4-unicast",
801   .node_name = "ip4-not-enabled",
802   .runs_before = VNET_FEATURES ("ip4-lookup"),
803 };
804
805 VNET_FEATURE_INIT (ip4_lookup, static) =
806 {
807   .arc_name = "ip4-unicast",
808   .node_name = "ip4-lookup",
809   .runs_before = 0,     /* not before any other features */
810 };
811
812 /* Built-in ip4 multicast rx feature path definition */
813 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
814 {
815   .arc_name = "ip4-multicast",
816   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
817   .last_in_arc = "ip4-mfib-forward-lookup",
818   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
819 };
820
821 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
822 {
823   .arc_name = "ip4-multicast",
824   .node_name = "vpath-input-ip4",
825   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
826 };
827
828 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
829 {
830   .arc_name = "ip4-multicast",
831   .node_name = "ip4-not-enabled",
832   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
833 };
834
835 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
836 {
837   .arc_name = "ip4-multicast",
838   .node_name = "ip4-mfib-forward-lookup",
839   .runs_before = 0,     /* last feature */
840 };
841
842 /* Source and port-range check ip4 tx feature path definition */
843 VNET_FEATURE_ARC_INIT (ip4_output, static) =
844 {
845   .arc_name = "ip4-output",
846   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
847   .last_in_arc = "interface-output",
848   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
849 };
850
851 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
852 {
853   .arc_name = "ip4-output",
854   .node_name = "ip4-source-and-port-range-check-tx",
855   .runs_before = VNET_FEATURES ("ip4-outacl"),
856 };
857
858 VNET_FEATURE_INIT (ip4_outacl, static) =
859 {
860   .arc_name = "ip4-output",
861   .node_name = "ip4-outacl",
862   .runs_before = VNET_FEATURES ("ipsec4-output-feature"),
863 };
864
865 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
866 {
867   .arc_name = "ip4-output",
868   .node_name = "ipsec4-output-feature",
869   .runs_before = VNET_FEATURES ("interface-output"),
870 };
871
872 /* Built-in ip4 tx feature path definition */
873 VNET_FEATURE_INIT (ip4_interface_output, static) =
874 {
875   .arc_name = "ip4-output",
876   .node_name = "interface-output",
877   .runs_before = 0,     /* not before any other features */
878 };
879 /* *INDENT-ON* */
880
881 static clib_error_t *
882 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
883 {
884   ip4_main_t *im = &ip4_main;
885
886   /* Fill in lookup tables with default table (0). */
887   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
888   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
889
890   if (!is_add)
891     {
892       ip4_main_t *im4 = &ip4_main;
893       ip_lookup_main_t *lm4 = &im4->lookup_main;
894       ip_interface_address_t *ia = 0;
895       ip4_address_t *address;
896       vlib_main_t *vm = vlib_get_main ();
897
898       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
899       /* *INDENT-OFF* */
900       foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
901       ({
902         address = ip_interface_address_get_address (lm4, ia);
903         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
904       }));
905       /* *INDENT-ON* */
906     }
907
908   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
909                                is_add, 0, 0);
910
911   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
912                                sw_if_index, is_add, 0, 0);
913
914   return /* no error */ 0;
915 }
916
917 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
918
919 /* Global IP4 main. */
920 #ifndef CLIB_MARCH_VARIANT
921 ip4_main_t ip4_main;
922 #endif /* CLIB_MARCH_VARIANT */
923
924 static clib_error_t *
925 ip4_lookup_init (vlib_main_t * vm)
926 {
927   ip4_main_t *im = &ip4_main;
928   clib_error_t *error;
929   uword i;
930
931   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
932     return error;
933   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
934     return (error);
935   if ((error = vlib_call_init_function (vm, fib_module_init)))
936     return error;
937   if ((error = vlib_call_init_function (vm, mfib_module_init)))
938     return error;
939
940   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
941     {
942       u32 m;
943
944       if (i < 32)
945         m = pow2_mask (i) << (32 - i);
946       else
947         m = ~0;
948       im->fib_masks[i] = clib_host_to_net_u32 (m);
949     }
950
951   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
952
953   /* Create FIB with index 0 and table id of 0. */
954   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
955                                      FIB_SOURCE_DEFAULT_ROUTE);
956   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
957                                       MFIB_SOURCE_DEFAULT_ROUTE);
958
959   {
960     pg_node_t *pn;
961     pn = pg_get_node (ip4_lookup_node.index);
962     pn->unformat_edit = unformat_pg_ip4_header;
963   }
964
965   {
966     ethernet_arp_header_t h;
967
968     clib_memset (&h, 0, sizeof (h));
969
970 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
971 #define _8(f,v) h.f = v;
972     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
973     _16 (l3_type, ETHERNET_TYPE_IP4);
974     _8 (n_l2_address_bytes, 6);
975     _8 (n_l3_address_bytes, 4);
976     _16 (opcode, ETHERNET_ARP_OPCODE_request);
977 #undef _16
978 #undef _8
979
980     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
981                                /* data */ &h,
982                                sizeof (h),
983                                /* alloc chunk size */ 8,
984                                "ip4 arp");
985   }
986
987   return error;
988 }
989
990 VLIB_INIT_FUNCTION (ip4_lookup_init);
991
992 typedef struct
993 {
994   /* Adjacency taken. */
995   u32 dpo_index;
996   u32 flow_hash;
997   u32 fib_index;
998
999   /* Packet data, possibly *after* rewrite. */
1000   u8 packet_data[64 - 1 * sizeof (u32)];
1001 }
1002 ip4_forward_next_trace_t;
1003
1004 #ifndef CLIB_MARCH_VARIANT
1005 u8 *
1006 format_ip4_forward_next_trace (u8 * s, va_list * args)
1007 {
1008   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1009   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1010   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1011   u32 indent = format_get_indent (s);
1012   s = format (s, "%U%U",
1013               format_white_space, indent,
1014               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1015   return s;
1016 }
1017 #endif
1018
1019 static u8 *
1020 format_ip4_lookup_trace (u8 * s, va_list * args)
1021 {
1022   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1023   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1024   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1025   u32 indent = format_get_indent (s);
1026
1027   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1028               t->fib_index, t->dpo_index, t->flow_hash);
1029   s = format (s, "\n%U%U",
1030               format_white_space, indent,
1031               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1032   return s;
1033 }
1034
1035 static u8 *
1036 format_ip4_rewrite_trace (u8 * s, va_list * args)
1037 {
1038   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1039   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1040   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1041   u32 indent = format_get_indent (s);
1042
1043   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1044               t->fib_index, t->dpo_index, format_ip_adjacency,
1045               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1046   s = format (s, "\n%U%U",
1047               format_white_space, indent,
1048               format_ip_adjacency_packet_data,
1049               t->dpo_index, t->packet_data, sizeof (t->packet_data));
1050   return s;
1051 }
1052
1053 #ifndef CLIB_MARCH_VARIANT
1054 /* Common trace function for all ip4-forward next nodes. */
1055 void
1056 ip4_forward_next_trace (vlib_main_t * vm,
1057                         vlib_node_runtime_t * node,
1058                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1059 {
1060   u32 *from, n_left;
1061   ip4_main_t *im = &ip4_main;
1062
1063   n_left = frame->n_vectors;
1064   from = vlib_frame_vector_args (frame);
1065
1066   while (n_left >= 4)
1067     {
1068       u32 bi0, bi1;
1069       vlib_buffer_t *b0, *b1;
1070       ip4_forward_next_trace_t *t0, *t1;
1071
1072       /* Prefetch next iteration. */
1073       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1074       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1075
1076       bi0 = from[0];
1077       bi1 = from[1];
1078
1079       b0 = vlib_get_buffer (vm, bi0);
1080       b1 = vlib_get_buffer (vm, bi1);
1081
1082       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1083         {
1084           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1085           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1086           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1087           t0->fib_index =
1088             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1089              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1090             vec_elt (im->fib_index_by_sw_if_index,
1091                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1092
1093           clib_memcpy_fast (t0->packet_data,
1094                             vlib_buffer_get_current (b0),
1095                             sizeof (t0->packet_data));
1096         }
1097       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1098         {
1099           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1100           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1101           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1102           t1->fib_index =
1103             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1104              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1105             vec_elt (im->fib_index_by_sw_if_index,
1106                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1107           clib_memcpy_fast (t1->packet_data, vlib_buffer_get_current (b1),
1108                             sizeof (t1->packet_data));
1109         }
1110       from += 2;
1111       n_left -= 2;
1112     }
1113
1114   while (n_left >= 1)
1115     {
1116       u32 bi0;
1117       vlib_buffer_t *b0;
1118       ip4_forward_next_trace_t *t0;
1119
1120       bi0 = from[0];
1121
1122       b0 = vlib_get_buffer (vm, bi0);
1123
1124       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1125         {
1126           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1127           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1128           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1129           t0->fib_index =
1130             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1131              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1132             vec_elt (im->fib_index_by_sw_if_index,
1133                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1134           clib_memcpy_fast (t0->packet_data, vlib_buffer_get_current (b0),
1135                             sizeof (t0->packet_data));
1136         }
1137       from += 1;
1138       n_left -= 1;
1139     }
1140 }
1141
1142 /* Compute TCP/UDP/ICMP4 checksum in software. */
1143 u16
1144 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1145                               ip4_header_t * ip0)
1146 {
1147   ip_csum_t sum0;
1148   u32 ip_header_length, payload_length_host_byte_order;
1149   u32 n_this_buffer, n_bytes_left, n_ip_bytes_this_buffer;
1150   u16 sum16;
1151   void *data_this_buffer;
1152
1153   /* Initialize checksum with ip header. */
1154   ip_header_length = ip4_header_bytes (ip0);
1155   payload_length_host_byte_order =
1156     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1157   sum0 =
1158     clib_host_to_net_u32 (payload_length_host_byte_order +
1159                           (ip0->protocol << 16));
1160
1161   if (BITS (uword) == 32)
1162     {
1163       sum0 =
1164         ip_csum_with_carry (sum0,
1165                             clib_mem_unaligned (&ip0->src_address, u32));
1166       sum0 =
1167         ip_csum_with_carry (sum0,
1168                             clib_mem_unaligned (&ip0->dst_address, u32));
1169     }
1170   else
1171     sum0 =
1172       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1173
1174   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1175   data_this_buffer = (void *) ip0 + ip_header_length;
1176   n_ip_bytes_this_buffer =
1177     p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data);
1178   if (n_this_buffer + ip_header_length > n_ip_bytes_this_buffer)
1179     {
1180       n_this_buffer = n_ip_bytes_this_buffer > ip_header_length ?
1181         n_ip_bytes_this_buffer - ip_header_length : 0;
1182     }
1183   while (1)
1184     {
1185       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1186       n_bytes_left -= n_this_buffer;
1187       if (n_bytes_left == 0)
1188         break;
1189
1190       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1191       p0 = vlib_get_buffer (vm, p0->next_buffer);
1192       data_this_buffer = vlib_buffer_get_current (p0);
1193       n_this_buffer = clib_min (p0->current_length, n_bytes_left);
1194     }
1195
1196   sum16 = ~ip_csum_fold (sum0);
1197
1198   return sum16;
1199 }
1200
1201 u32
1202 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1203 {
1204   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1205   udp_header_t *udp0;
1206   u16 sum16;
1207
1208   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1209           || ip0->protocol == IP_PROTOCOL_UDP);
1210
1211   udp0 = (void *) (ip0 + 1);
1212   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1213     {
1214       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1215                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1216       return p0->flags;
1217     }
1218
1219   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1220
1221   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1222                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1223
1224   return p0->flags;
1225 }
1226 #endif
1227
1228 /* *INDENT-OFF* */
1229 VNET_FEATURE_ARC_INIT (ip4_local) =
1230 {
1231   .arc_name  = "ip4-local",
1232   .start_nodes = VNET_FEATURES ("ip4-local"),
1233   .last_in_arc = "ip4-local-end-of-arc",
1234 };
1235 /* *INDENT-ON* */
1236
1237 static inline void
1238 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1239                             ip4_header_t * ip, u8 is_udp, u8 * error,
1240                             u8 * good_tcp_udp)
1241 {
1242   u32 flags0;
1243   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1244   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1245   if (is_udp)
1246     {
1247       udp_header_t *udp;
1248       u32 ip_len, udp_len;
1249       i32 len_diff;
1250       udp = ip4_next_header (ip);
1251       /* Verify UDP length. */
1252       ip_len = clib_net_to_host_u16 (ip->length);
1253       udp_len = clib_net_to_host_u16 (udp->length);
1254
1255       len_diff = ip_len - udp_len;
1256       *good_tcp_udp &= len_diff >= 0;
1257       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1258     }
1259 }
1260
1261 #define ip4_local_csum_is_offloaded(_b)                                 \
1262     _b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM                         \
1263         || _b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM
1264
1265 #define ip4_local_need_csum_check(is_tcp_udp, _b)                       \
1266     (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED     \
1267         || ip4_local_csum_is_offloaded (_b)))
1268
1269 #define ip4_local_csum_is_valid(_b)                                     \
1270     (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT                      \
1271         || (ip4_local_csum_is_offloaded (_b))) != 0
1272
1273 static inline void
1274 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1275                          ip4_header_t * ih, u8 * error)
1276 {
1277   u8 is_udp, is_tcp_udp, good_tcp_udp;
1278
1279   is_udp = ih->protocol == IP_PROTOCOL_UDP;
1280   is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1281
1282   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1283     ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1284   else
1285     good_tcp_udp = ip4_local_csum_is_valid (b);
1286
1287   ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1288   *error = (is_tcp_udp && !good_tcp_udp
1289             ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1290 }
1291
1292 static inline void
1293 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1294                             ip4_header_t ** ih, u8 * error)
1295 {
1296   u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1297
1298   is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1299   is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1300
1301   is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1302   is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1303
1304   good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1305   good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1306
1307   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1308                      || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1309     {
1310       if (is_tcp_udp[0])
1311         ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1312                                     &good_tcp_udp[0]);
1313       if (is_tcp_udp[1])
1314         ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1315                                     &good_tcp_udp[1]);
1316     }
1317
1318   error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1319               IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1320   error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1321               IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1322 }
1323
1324 static inline void
1325 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1326                               vlib_buffer_t * b, u16 * next, u8 error,
1327                               u8 head_of_feature_arc)
1328 {
1329   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1330   u32 next_index;
1331
1332   *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1333   b->error = error ? error_node->errors[error] : 0;
1334   if (head_of_feature_arc)
1335     {
1336       next_index = *next;
1337       if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1338         {
1339           vnet_feature_arc_start (arc_index,
1340                                   vnet_buffer (b)->sw_if_index[VLIB_RX],
1341                                   &next_index, b);
1342           *next = next_index;
1343         }
1344     }
1345 }
1346
1347 typedef struct
1348 {
1349   ip4_address_t src;
1350   u32 lbi;
1351   u8 error;
1352   u8 first;
1353 } ip4_local_last_check_t;
1354
1355 static inline void
1356 ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
1357                      ip4_local_last_check_t * last_check, u8 * error0)
1358 {
1359   ip4_fib_mtrie_leaf_t leaf0;
1360   ip4_fib_mtrie_t *mtrie0;
1361   const dpo_id_t *dpo0;
1362   load_balance_t *lb0;
1363   u32 lbi0;
1364
1365   vnet_buffer (b)->ip.fib_index =
1366     vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1367     vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1368
1369   /*
1370    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1371    *  adjacency for the destination address (the local interface address).
1372    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1373    *  adjacency for the source address (the remote sender's address)
1374    */
1375   if (PREDICT_FALSE (last_check->first ||
1376                      (last_check->src.as_u32 != ip0->src_address.as_u32)))
1377     {
1378       mtrie0 = &ip4_fib_get (vnet_buffer (b)->ip.fib_index)->mtrie;
1379       leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1380       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1381       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1382       lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1383
1384       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1385         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1386       vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1387
1388       lb0 = load_balance_get (lbi0);
1389       dpo0 = load_balance_get_bucket_i (lb0, 0);
1390
1391       /*
1392        * Must have a route to source otherwise we drop the packet.
1393        * ip4 broadcasts are accepted, e.g. to make dhcp client work
1394        *
1395        * The checks are:
1396        *  - the source is a recieve => it's from us => bogus, do this
1397        *    first since it sets a different error code.
1398        *  - uRPF check for any route to source - accept if passes.
1399        *  - allow packets destined to the broadcast address from unknown sources
1400        */
1401
1402       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1403                   && dpo0->dpoi_type == DPO_RECEIVE) ?
1404                  IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1405       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1406                   && !fib_urpf_check_size (lb0->lb_urpf)
1407                   && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1408                  IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1409
1410       last_check->src.as_u32 = ip0->src_address.as_u32;
1411       last_check->lbi = lbi0;
1412       last_check->error = *error0;
1413     }
1414   else
1415     {
1416       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1417         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1418       vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1419       *error0 = last_check->error;
1420       last_check->first = 0;
1421     }
1422 }
1423
1424 static inline void
1425 ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
1426                         ip4_local_last_check_t * last_check, u8 * error)
1427 {
1428   ip4_fib_mtrie_leaf_t leaf[2];
1429   ip4_fib_mtrie_t *mtrie[2];
1430   const dpo_id_t *dpo[2];
1431   load_balance_t *lb[2];
1432   u32 not_last_hit;
1433   u32 lbi[2];
1434
1435   not_last_hit = last_check->first;
1436   not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1437   not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1438
1439   vnet_buffer (b[0])->ip.fib_index =
1440     vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1441     vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1442     vnet_buffer (b[0])->ip.fib_index;
1443
1444   vnet_buffer (b[1])->ip.fib_index =
1445     vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1446     vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1447     vnet_buffer (b[1])->ip.fib_index;
1448
1449   /*
1450    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1451    *  adjacency for the destination address (the local interface address).
1452    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1453    *  adjacency for the source address (the remote sender's address)
1454    */
1455   if (PREDICT_FALSE (not_last_hit))
1456     {
1457       mtrie[0] = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie;
1458       mtrie[1] = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie;
1459
1460       leaf[0] = ip4_fib_mtrie_lookup_step_one (mtrie[0], &ip[0]->src_address);
1461       leaf[1] = ip4_fib_mtrie_lookup_step_one (mtrie[1], &ip[1]->src_address);
1462
1463       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1464                                            &ip[0]->src_address, 2);
1465       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1466                                            &ip[1]->src_address, 2);
1467
1468       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1469                                            &ip[0]->src_address, 3);
1470       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1471                                            &ip[1]->src_address, 3);
1472
1473       lbi[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf[0]);
1474       lbi[1] = ip4_fib_mtrie_leaf_get_adj_index (leaf[1]);
1475
1476       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1477         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1478       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1479
1480       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1481         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1482       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1483
1484       lb[0] = load_balance_get (lbi[0]);
1485       lb[1] = load_balance_get (lbi[1]);
1486
1487       dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1488       dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1489
1490       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1491                    dpo[0]->dpoi_type == DPO_RECEIVE) ?
1492                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1493       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1494                    !fib_urpf_check_size (lb[0]->lb_urpf) &&
1495                    ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1496                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1497
1498       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1499                    dpo[1]->dpoi_type == DPO_RECEIVE) ?
1500                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1501       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1502                    !fib_urpf_check_size (lb[1]->lb_urpf) &&
1503                    ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1504                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1505
1506       last_check->src.as_u32 = ip[1]->src_address.as_u32;
1507       last_check->lbi = lbi[1];
1508       last_check->error = error[1];
1509     }
1510   else
1511     {
1512       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1513         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1514       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1515
1516       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1517         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1518       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1519
1520       error[0] = last_check->error;
1521       error[1] = last_check->error;
1522       last_check->first = 0;
1523     }
1524 }
1525
1526 enum ip_local_packet_type_e
1527 {
1528   IP_LOCAL_PACKET_TYPE_L4,
1529   IP_LOCAL_PACKET_TYPE_NAT,
1530   IP_LOCAL_PACKET_TYPE_FRAG,
1531 };
1532
1533 /**
1534  * Determine packet type and next node.
1535  *
1536  * The expectation is that all packets that are not L4 will skip
1537  * checksums and source checks.
1538  */
1539 always_inline u8
1540 ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
1541 {
1542   ip_lookup_main_t *lm = &ip4_main.lookup_main;
1543
1544   if (PREDICT_FALSE (ip4_is_fragment (ip)))
1545     {
1546       *next = IP_LOCAL_NEXT_REASSEMBLY;
1547       return IP_LOCAL_PACKET_TYPE_FRAG;
1548     }
1549   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_IS_NATED))
1550     {
1551       *next = lm->local_next_by_ip_protocol[ip->protocol];
1552       return IP_LOCAL_PACKET_TYPE_NAT;
1553     }
1554
1555   *next = lm->local_next_by_ip_protocol[ip->protocol];
1556   return IP_LOCAL_PACKET_TYPE_L4;
1557 }
1558
1559 static inline uword
1560 ip4_local_inline (vlib_main_t * vm,
1561                   vlib_node_runtime_t * node,
1562                   vlib_frame_t * frame, int head_of_feature_arc)
1563 {
1564   u32 *from, n_left_from;
1565   vlib_node_runtime_t *error_node =
1566     vlib_node_get_runtime (vm, ip4_input_node.index);
1567   u16 nexts[VLIB_FRAME_SIZE], *next;
1568   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1569   ip4_header_t *ip[2];
1570   u8 error[2], pt[2];
1571
1572   ip4_local_last_check_t last_check = {
1573     /*
1574      * 0.0.0.0 can appear as the source address of an IP packet,
1575      * as can any other address, hence the need to use the 'first'
1576      * member to make sure the .lbi is initialised for the first
1577      * packet.
1578      */
1579     .src = {.as_u32 = 0},
1580     .lbi = ~0,
1581     .error = IP4_ERROR_UNKNOWN_PROTOCOL,
1582     .first = 1,
1583   };
1584
1585   from = vlib_frame_vector_args (frame);
1586   n_left_from = frame->n_vectors;
1587
1588   if (node->flags & VLIB_NODE_FLAG_TRACE)
1589     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1590
1591   vlib_get_buffers (vm, from, bufs, n_left_from);
1592   b = bufs;
1593   next = nexts;
1594
1595   while (n_left_from >= 6)
1596     {
1597       u8 not_batch = 0;
1598
1599       /* Prefetch next iteration. */
1600       {
1601         vlib_prefetch_buffer_header (b[4], LOAD);
1602         vlib_prefetch_buffer_header (b[5], LOAD);
1603
1604         CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1605         CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1606       }
1607
1608       error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1609
1610       ip[0] = vlib_buffer_get_current (b[0]);
1611       ip[1] = vlib_buffer_get_current (b[1]);
1612
1613       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1614       vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1615
1616       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1617       pt[1] = ip4_local_classify (b[1], ip[1], &next[1]);
1618
1619       not_batch = pt[0] ^ pt[1];
1620
1621       if (head_of_feature_arc == 0 || (pt[0] && not_batch == 0))
1622         goto skip_checks;
1623
1624       if (PREDICT_TRUE (not_batch == 0))
1625         {
1626           ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1627           ip4_local_check_src_x2 (b, ip, &last_check, error);
1628         }
1629       else
1630         {
1631           if (!pt[0])
1632             {
1633               ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1634               ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1635             }
1636           if (!pt[1])
1637             {
1638               ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1639               ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
1640             }
1641         }
1642
1643     skip_checks:
1644
1645       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1646                                     head_of_feature_arc);
1647       ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1648                                     head_of_feature_arc);
1649
1650       b += 2;
1651       next += 2;
1652       n_left_from -= 2;
1653     }
1654
1655   while (n_left_from > 0)
1656     {
1657       error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1658
1659       ip[0] = vlib_buffer_get_current (b[0]);
1660       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1661       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1662
1663       if (head_of_feature_arc == 0 || pt[0])
1664         goto skip_check;
1665
1666       ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1667       ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1668
1669     skip_check:
1670
1671       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1672                                     head_of_feature_arc);
1673
1674       b += 1;
1675       next += 1;
1676       n_left_from -= 1;
1677     }
1678
1679   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1680   return frame->n_vectors;
1681 }
1682
1683 VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1684                                vlib_frame_t * frame)
1685 {
1686   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1687 }
1688
1689 /* *INDENT-OFF* */
1690 VLIB_REGISTER_NODE (ip4_local_node) =
1691 {
1692   .name = "ip4-local",
1693   .vector_size = sizeof (u32),
1694   .format_trace = format_ip4_forward_next_trace,
1695   .n_next_nodes = IP_LOCAL_N_NEXT,
1696   .next_nodes =
1697   {
1698     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1699     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1700     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1701     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1702     [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-reassembly",
1703   },
1704 };
1705 /* *INDENT-ON* */
1706
1707
1708 VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
1709                                           vlib_node_runtime_t * node,
1710                                           vlib_frame_t * frame)
1711 {
1712   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1713 }
1714
1715 /* *INDENT-OFF* */
1716 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
1717   .name = "ip4-local-end-of-arc",
1718   .vector_size = sizeof (u32),
1719
1720   .format_trace = format_ip4_forward_next_trace,
1721   .sibling_of = "ip4-local",
1722 };
1723
1724 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1725   .arc_name = "ip4-local",
1726   .node_name = "ip4-local-end-of-arc",
1727   .runs_before = 0, /* not before any other features */
1728 };
1729 /* *INDENT-ON* */
1730
1731 #ifndef CLIB_MARCH_VARIANT
1732 void
1733 ip4_register_protocol (u32 protocol, u32 node_index)
1734 {
1735   vlib_main_t *vm = vlib_get_main ();
1736   ip4_main_t *im = &ip4_main;
1737   ip_lookup_main_t *lm = &im->lookup_main;
1738
1739   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1740   lm->local_next_by_ip_protocol[protocol] =
1741     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1742 }
1743
1744 void
1745 ip4_unregister_protocol (u32 protocol)
1746 {
1747   ip4_main_t *im = &ip4_main;
1748   ip_lookup_main_t *lm = &im->lookup_main;
1749
1750   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1751   lm->local_next_by_ip_protocol[protocol] = IP_LOCAL_NEXT_PUNT;
1752 }
1753 #endif
1754
1755 static clib_error_t *
1756 show_ip_local_command_fn (vlib_main_t * vm,
1757                           unformat_input_t * input, vlib_cli_command_t * cmd)
1758 {
1759   ip4_main_t *im = &ip4_main;
1760   ip_lookup_main_t *lm = &im->lookup_main;
1761   int i;
1762
1763   vlib_cli_output (vm, "Protocols handled by ip4_local");
1764   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1765     {
1766       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1767         {
1768           u32 node_index = vlib_get_node (vm,
1769                                           ip4_local_node.index)->
1770             next_nodes[lm->local_next_by_ip_protocol[i]];
1771           vlib_cli_output (vm, "%U: %U", format_ip_protocol, i,
1772                            format_vlib_node_name, vm, node_index);
1773         }
1774     }
1775   return 0;
1776 }
1777
1778
1779
1780 /*?
1781  * Display the set of protocols handled by the local IPv4 stack.
1782  *
1783  * @cliexpar
1784  * Example of how to display local protocol table:
1785  * @cliexstart{show ip local}
1786  * Protocols handled by ip4_local
1787  * 1
1788  * 17
1789  * 47
1790  * @cliexend
1791 ?*/
1792 /* *INDENT-OFF* */
1793 VLIB_CLI_COMMAND (show_ip_local, static) =
1794 {
1795   .path = "show ip local",
1796   .function = show_ip_local_command_fn,
1797   .short_help = "show ip local",
1798 };
1799 /* *INDENT-ON* */
1800
1801 always_inline uword
1802 ip4_arp_inline (vlib_main_t * vm,
1803                 vlib_node_runtime_t * node,
1804                 vlib_frame_t * frame, int is_glean)
1805 {
1806   vnet_main_t *vnm = vnet_get_main ();
1807   ip4_main_t *im = &ip4_main;
1808   ip_lookup_main_t *lm = &im->lookup_main;
1809   u32 *from, *to_next_drop;
1810   uword n_left_from, n_left_to_next_drop, next_index;
1811   u32 thread_index = vm->thread_index;
1812   u64 seed;
1813
1814   if (node->flags & VLIB_NODE_FLAG_TRACE)
1815     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1816
1817   seed = throttle_seed (&im->arp_throttle, thread_index, vlib_time_now (vm));
1818
1819   from = vlib_frame_vector_args (frame);
1820   n_left_from = frame->n_vectors;
1821   next_index = node->cached_next_index;
1822   if (next_index == IP4_ARP_NEXT_DROP)
1823     next_index = IP4_ARP_N_NEXT;        /* point to first interface */
1824
1825   while (n_left_from > 0)
1826     {
1827       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1828                            to_next_drop, n_left_to_next_drop);
1829
1830       while (n_left_from > 0 && n_left_to_next_drop > 0)
1831         {
1832           u32 pi0, bi0, adj_index0, sw_if_index0;
1833           ip_adjacency_t *adj0;
1834           vlib_buffer_t *p0, *b0;
1835           ip4_address_t resolve0;
1836           ethernet_arp_header_t *h0;
1837           vnet_hw_interface_t *hw_if0;
1838           u64 r0;
1839
1840           pi0 = from[0];
1841           p0 = vlib_get_buffer (vm, pi0);
1842
1843           from += 1;
1844           n_left_from -= 1;
1845           to_next_drop[0] = pi0;
1846           to_next_drop += 1;
1847           n_left_to_next_drop -= 1;
1848
1849           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1850           adj0 = adj_get (adj_index0);
1851
1852           if (is_glean)
1853             {
1854               /* resolve the packet's destination */
1855               ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1856               resolve0 = ip0->dst_address;
1857             }
1858           else
1859             {
1860               /* resolve the incomplete adj */
1861               resolve0 = adj0->sub_type.nbr.next_hop.ip4;
1862             }
1863
1864           /* combine the address and interface for the hash key */
1865           sw_if_index0 = adj0->rewrite_header.sw_if_index;
1866           r0 = (u64) resolve0.data_u32 << 32;
1867           r0 |= sw_if_index0;
1868
1869           if (throttle_check (&im->arp_throttle, thread_index, r0, seed))
1870             {
1871               p0->error = node->errors[IP4_ARP_ERROR_THROTTLED];
1872               continue;
1873             }
1874
1875           /*
1876            * the adj has been updated to a rewrite but the node the DPO that got
1877            * us here hasn't - yet. no big deal. we'll drop while we wait.
1878            */
1879           if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
1880             {
1881               p0->error = node->errors[IP4_ARP_ERROR_RESOLVED];
1882               continue;
1883             }
1884
1885           /*
1886            * Can happen if the control-plane is programming tables
1887            * with traffic flowing; at least that's today's lame excuse.
1888            */
1889           if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
1890               || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
1891             {
1892               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
1893               continue;
1894             }
1895           /* Send ARP request. */
1896           h0 =
1897             vlib_packet_template_get_packet (vm,
1898                                              &im->ip4_arp_request_packet_template,
1899                                              &bi0);
1900           /* Seems we're out of buffers */
1901           if (PREDICT_FALSE (!h0))
1902             {
1903               p0->error = node->errors[IP4_ARP_ERROR_NO_BUFFERS];
1904               continue;
1905             }
1906
1907           b0 = vlib_get_buffer (vm, bi0);
1908
1909           /* copy the persistent fields from the original */
1910           clib_memcpy_fast (b0->opaque2, p0->opaque2, sizeof (p0->opaque2));
1911
1912           /* Add rewrite/encap string for ARP packet. */
1913           vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
1914
1915           hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1916
1917           /* Src ethernet address in ARP header. */
1918           mac_address_from_bytes (&h0->ip4_over_ethernet[0].mac,
1919                                   hw_if0->hw_address);
1920           if (is_glean)
1921             {
1922               /* The interface's source address is stashed in the Glean Adj */
1923               h0->ip4_over_ethernet[0].ip4 =
1924                 adj0->sub_type.glean.receive_addr.ip4;
1925             }
1926           else
1927             {
1928               /* Src IP address in ARP header. */
1929               if (ip4_src_address_for_packet (lm, sw_if_index0,
1930                                               &h0->ip4_over_ethernet[0].ip4))
1931                 {
1932                   /* No source address available */
1933                   p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
1934                   vlib_buffer_free (vm, &bi0, 1);
1935                   continue;
1936                 }
1937             }
1938           h0->ip4_over_ethernet[1].ip4 = resolve0;
1939
1940           p0->error = node->errors[IP4_ARP_ERROR_REQUEST_SENT];
1941
1942           vlib_buffer_copy_trace_flag (vm, p0, bi0);
1943           VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
1944           vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
1945
1946           vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
1947
1948           vlib_set_next_frame_buffer (vm, node,
1949                                       adj0->rewrite_header.next_index, bi0);
1950         }
1951
1952       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
1953     }
1954
1955   return frame->n_vectors;
1956 }
1957
1958 VLIB_NODE_FN (ip4_arp_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1959                              vlib_frame_t * frame)
1960 {
1961   return (ip4_arp_inline (vm, node, frame, 0));
1962 }
1963
1964 VLIB_NODE_FN (ip4_glean_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1965                                vlib_frame_t * frame)
1966 {
1967   return (ip4_arp_inline (vm, node, frame, 1));
1968 }
1969
1970 static char *ip4_arp_error_strings[] = {
1971   [IP4_ARP_ERROR_THROTTLED] = "ARP requests throttled",
1972   [IP4_ARP_ERROR_RESOLVED] = "ARP requests resolved",
1973   [IP4_ARP_ERROR_NO_BUFFERS] = "ARP requests out of buffer",
1974   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
1975   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
1976   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
1977 };
1978
1979 /* *INDENT-OFF* */
1980 VLIB_REGISTER_NODE (ip4_arp_node) =
1981 {
1982   .name = "ip4-arp",
1983   .vector_size = sizeof (u32),
1984   .format_trace = format_ip4_forward_next_trace,
1985   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1986   .error_strings = ip4_arp_error_strings,
1987   .n_next_nodes = IP4_ARP_N_NEXT,
1988   .next_nodes =
1989   {
1990     [IP4_ARP_NEXT_DROP] = "error-drop",
1991   },
1992 };
1993
1994 VLIB_REGISTER_NODE (ip4_glean_node) =
1995 {
1996   .name = "ip4-glean",
1997   .vector_size = sizeof (u32),
1998   .format_trace = format_ip4_forward_next_trace,
1999   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2000   .error_strings = ip4_arp_error_strings,
2001   .n_next_nodes = IP4_ARP_N_NEXT,
2002   .next_nodes = {
2003   [IP4_ARP_NEXT_DROP] = "error-drop",
2004   },
2005 };
2006 /* *INDENT-ON* */
2007
2008 #define foreach_notrace_ip4_arp_error           \
2009 _(THROTTLED)                                    \
2010 _(RESOLVED)                                     \
2011 _(NO_BUFFERS)                                   \
2012 _(REQUEST_SENT)                                 \
2013 _(NON_ARP_ADJ)                                  \
2014 _(NO_SOURCE_ADDRESS)
2015
2016 static clib_error_t *
2017 arp_notrace_init (vlib_main_t * vm)
2018 {
2019   vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
2020
2021   /* don't trace ARP request packets */
2022 #define _(a)                                    \
2023     vnet_pcap_drop_trace_filter_add_del         \
2024         (rt->errors[IP4_ARP_ERROR_##a],         \
2025          1 /* is_add */);
2026   foreach_notrace_ip4_arp_error;
2027 #undef _
2028   return 0;
2029 }
2030
2031 VLIB_INIT_FUNCTION (arp_notrace_init);
2032
2033
2034 #ifndef CLIB_MARCH_VARIANT
2035 /* Send an ARP request to see if given destination is reachable on given interface. */
2036 clib_error_t *
2037 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index,
2038                     u8 refresh)
2039 {
2040   vnet_main_t *vnm = vnet_get_main ();
2041   ip4_main_t *im = &ip4_main;
2042   ethernet_arp_header_t *h;
2043   ip4_address_t *src;
2044   ip_interface_address_t *ia;
2045   ip_adjacency_t *adj;
2046   vnet_hw_interface_t *hi;
2047   vnet_sw_interface_t *si;
2048   vlib_buffer_t *b;
2049   adj_index_t ai;
2050   u32 bi = 0;
2051   u8 unicast_rewrite = 0;
2052
2053   si = vnet_get_sw_interface (vnm, sw_if_index);
2054
2055   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2056     {
2057       return clib_error_return (0, "%U: interface %U down",
2058                                 format_ip4_address, dst,
2059                                 format_vnet_sw_if_index_name, vnm,
2060                                 sw_if_index);
2061     }
2062
2063   src =
2064     ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2065   if (!src)
2066     {
2067       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2068       return clib_error_return
2069         (0,
2070          "no matching interface address for destination %U (interface %U)",
2071          format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
2072          sw_if_index);
2073     }
2074
2075   h = vlib_packet_template_get_packet (vm,
2076                                        &im->ip4_arp_request_packet_template,
2077                                        &bi);
2078
2079   if (!h)
2080     return clib_error_return (0, "ARP request packet allocation failed");
2081
2082   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2083   if (PREDICT_FALSE (!hi->hw_address))
2084     {
2085       return clib_error_return (0, "%U: interface %U do not support ip probe",
2086                                 format_ip4_address, dst,
2087                                 format_vnet_sw_if_index_name, vnm,
2088                                 sw_if_index);
2089     }
2090
2091   mac_address_from_bytes (&h->ip4_over_ethernet[0].mac, hi->hw_address);
2092
2093   h->ip4_over_ethernet[0].ip4 = src[0];
2094   h->ip4_over_ethernet[1].ip4 = dst[0];
2095
2096   b = vlib_get_buffer (vm, bi);
2097   vnet_buffer (b)->sw_if_index[VLIB_RX] =
2098     vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2099
2100   ip46_address_t nh = {
2101     .ip4 = *dst,
2102   };
2103
2104   ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4,
2105                             VNET_LINK_IP4, &nh, sw_if_index);
2106   adj = adj_get (ai);
2107
2108   /* Peer has been previously resolved, retrieve glean adj instead */
2109   if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE)
2110     {
2111       if (refresh)
2112         unicast_rewrite = 1;
2113       else
2114         {
2115           adj_unlock (ai);
2116           ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4,
2117                                       VNET_LINK_IP4, sw_if_index, &nh);
2118           adj = adj_get (ai);
2119         }
2120     }
2121
2122   /* Add encapsulation string for software interface (e.g. ethernet header). */
2123   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2124   if (unicast_rewrite)
2125     {
2126       u16 *etype = vlib_buffer_get_current (b) - 2;
2127       etype[0] = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
2128     }
2129   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2130
2131   {
2132     vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2133     u32 *to_next = vlib_frame_vector_args (f);
2134     to_next[0] = bi;
2135     f->n_vectors = 1;
2136     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2137   }
2138
2139   adj_unlock (ai);
2140   return /* no error */ 0;
2141 }
2142 #endif
2143
2144 typedef enum
2145 {
2146   IP4_REWRITE_NEXT_DROP,
2147   IP4_REWRITE_NEXT_ICMP_ERROR,
2148   IP4_REWRITE_NEXT_FRAGMENT,
2149   IP4_REWRITE_N_NEXT            /* Last */
2150 } ip4_rewrite_next_t;
2151
2152 /**
2153  * This bits of an IPv4 address to mask to construct a multicast
2154  * MAC address
2155  */
2156 #if CLIB_ARCH_IS_BIG_ENDIAN
2157 #define IP4_MCAST_ADDR_MASK 0x007fffff
2158 #else
2159 #define IP4_MCAST_ADDR_MASK 0xffff7f00
2160 #endif
2161
2162 always_inline void
2163 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
2164                u16 adj_packet_bytes, bool df, u16 * next, u32 * error)
2165 {
2166   if (packet_len > adj_packet_bytes)
2167     {
2168       *error = IP4_ERROR_MTU_EXCEEDED;
2169       if (df)
2170         {
2171           icmp4_error_set_vnet_buffer
2172             (b, ICMP4_destination_unreachable,
2173              ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
2174              adj_packet_bytes);
2175           *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2176         }
2177       else
2178         {
2179           /* IP fragmentation */
2180           ip_frag_set_vnet_buffer (b, adj_packet_bytes,
2181                                    IP4_FRAG_NEXT_IP4_REWRITE, 0);
2182           *next = IP4_REWRITE_NEXT_FRAGMENT;
2183         }
2184     }
2185 }
2186
2187 /* Decrement TTL & update checksum.
2188    Works either endian, so no need for byte swap. */
2189 static_always_inline void
2190 ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
2191                             u32 * error)
2192 {
2193   i32 ttl;
2194   u32 checksum;
2195   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2196     {
2197       b->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2198       return;
2199     }
2200
2201   ttl = ip->ttl;
2202
2203   /* Input node should have reject packets with ttl 0. */
2204   ASSERT (ip->ttl > 0);
2205
2206   checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
2207   checksum += checksum >= 0xffff;
2208
2209   ip->checksum = checksum;
2210   ttl -= 1;
2211   ip->ttl = ttl;
2212
2213   /*
2214    * If the ttl drops below 1 when forwarding, generate
2215    * an ICMP response.
2216    */
2217   if (PREDICT_FALSE (ttl <= 0))
2218     {
2219       *error = IP4_ERROR_TIME_EXPIRED;
2220       vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2221       icmp4_error_set_vnet_buffer (b, ICMP4_time_exceeded,
2222                                    ICMP4_time_exceeded_ttl_exceeded_in_transit,
2223                                    0);
2224       *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2225     }
2226
2227   /* Verify checksum. */
2228   ASSERT ((ip->checksum == ip4_header_checksum (ip)) ||
2229           (b->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2230 }
2231
2232
2233 always_inline uword
2234 ip4_rewrite_inline_with_gso (vlib_main_t * vm,
2235                              vlib_node_runtime_t * node,
2236                              vlib_frame_t * frame,
2237                              int do_counters, int is_midchain, int is_mcast,
2238                              int do_gso)
2239 {
2240   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2241   u32 *from = vlib_frame_vector_args (frame);
2242   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2243   u16 nexts[VLIB_FRAME_SIZE], *next;
2244   u32 n_left_from;
2245   vlib_node_runtime_t *error_node =
2246     vlib_node_get_runtime (vm, ip4_input_node.index);
2247
2248   n_left_from = frame->n_vectors;
2249   u32 thread_index = vm->thread_index;
2250
2251   vlib_get_buffers (vm, from, bufs, n_left_from);
2252   clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
2253
2254   if (n_left_from >= 6)
2255     {
2256       int i;
2257       for (i = 2; i < 6; i++)
2258         vlib_prefetch_buffer_header (bufs[i], LOAD);
2259     }
2260
2261   next = nexts;
2262   b = bufs;
2263   while (n_left_from >= 8)
2264     {
2265       ip_adjacency_t *adj0, *adj1;
2266       ip4_header_t *ip0, *ip1;
2267       u32 rw_len0, error0, adj_index0;
2268       u32 rw_len1, error1, adj_index1;
2269       u32 tx_sw_if_index0, tx_sw_if_index1;
2270       u8 *p;
2271
2272       vlib_prefetch_buffer_header (b[6], LOAD);
2273       vlib_prefetch_buffer_header (b[7], LOAD);
2274
2275       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2276       adj_index1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
2277
2278       /*
2279        * pre-fetch the per-adjacency counters
2280        */
2281       if (do_counters)
2282         {
2283           vlib_prefetch_combined_counter (&adjacency_counters,
2284                                           thread_index, adj_index0);
2285           vlib_prefetch_combined_counter (&adjacency_counters,
2286                                           thread_index, adj_index1);
2287         }
2288
2289       ip0 = vlib_buffer_get_current (b[0]);
2290       ip1 = vlib_buffer_get_current (b[1]);
2291
2292       error0 = error1 = IP4_ERROR_NONE;
2293
2294       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2295       ip4_ttl_and_checksum_check (b[1], ip1, next + 1, &error1);
2296
2297       /* Rewrite packet header and updates lengths. */
2298       adj0 = adj_get (adj_index0);
2299       adj1 = adj_get (adj_index1);
2300
2301       /* Worth pipelining. No guarantee that adj0,1 are hot... */
2302       rw_len0 = adj0[0].rewrite_header.data_bytes;
2303       rw_len1 = adj1[0].rewrite_header.data_bytes;
2304       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2305       vnet_buffer (b[1])->ip.save_rewrite_length = rw_len1;
2306
2307       p = vlib_buffer_get_current (b[2]);
2308       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2309       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2310
2311       p = vlib_buffer_get_current (b[3]);
2312       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2313       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2314
2315       /* Check MTU of outgoing interface. */
2316       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2317       u16 ip1_len = clib_net_to_host_u16 (ip1->length);
2318
2319       if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO))
2320         ip0_len = gso_mtu_sz (b[0]);
2321       if (do_gso && (b[1]->flags & VNET_BUFFER_F_GSO))
2322         ip1_len = gso_mtu_sz (b[1]);
2323
2324       ip4_mtu_check (b[0], ip0_len,
2325                      adj0[0].rewrite_header.max_l3_packet_bytes,
2326                      ip0->flags_and_fragment_offset &
2327                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2328                      next + 0, &error0);
2329       ip4_mtu_check (b[1], ip1_len,
2330                      adj1[0].rewrite_header.max_l3_packet_bytes,
2331                      ip1->flags_and_fragment_offset &
2332                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2333                      next + 1, &error1);
2334
2335       if (is_mcast)
2336         {
2337           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2338                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2339                     IP4_ERROR_SAME_INTERFACE : error0);
2340           error1 = ((adj1[0].rewrite_header.sw_if_index ==
2341                      vnet_buffer (b[1])->sw_if_index[VLIB_RX]) ?
2342                     IP4_ERROR_SAME_INTERFACE : error1);
2343         }
2344
2345       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2346        * to see the IP header */
2347       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2348         {
2349           u32 next_index = adj0[0].rewrite_header.next_index;
2350           b[0]->current_data -= rw_len0;
2351           b[0]->current_length += rw_len0;
2352           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2353           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2354
2355           if (PREDICT_FALSE
2356               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2357             vnet_feature_arc_start (lm->output_feature_arc_index,
2358                                     tx_sw_if_index0, &next_index, b[0]);
2359           next[0] = next_index;
2360         }
2361       else
2362         {
2363           b[0]->error = error_node->errors[error0];
2364         }
2365       if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2366         {
2367           u32 next_index = adj1[0].rewrite_header.next_index;
2368           b[1]->current_data -= rw_len1;
2369           b[1]->current_length += rw_len1;
2370
2371           tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2372           vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2373
2374           if (PREDICT_FALSE
2375               (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2376             vnet_feature_arc_start (lm->output_feature_arc_index,
2377                                     tx_sw_if_index1, &next_index, b[1]);
2378           next[1] = next_index;
2379         }
2380       else
2381         {
2382           b[1]->error = error_node->errors[error1];
2383         }
2384       if (is_midchain)
2385         {
2386           calc_checksums (vm, b[0]);
2387           calc_checksums (vm, b[1]);
2388         }
2389       /* Guess we are only writing on simple Ethernet header. */
2390       vnet_rewrite_two_headers (adj0[0], adj1[0],
2391                                 ip0, ip1, sizeof (ethernet_header_t));
2392
2393       /*
2394        * Bump the per-adjacency counters
2395        */
2396       if (do_counters)
2397         {
2398           vlib_increment_combined_counter
2399             (&adjacency_counters,
2400              thread_index,
2401              adj_index0, 1, vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2402
2403           vlib_increment_combined_counter
2404             (&adjacency_counters,
2405              thread_index,
2406              adj_index1, 1, vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
2407         }
2408
2409       if (is_midchain)
2410         {
2411           if (adj0->sub_type.midchain.fixup_func)
2412             adj0->sub_type.midchain.fixup_func
2413               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2414           if (adj1->sub_type.midchain.fixup_func)
2415             adj1->sub_type.midchain.fixup_func
2416               (vm, adj1, b[1], adj1->sub_type.midchain.fixup_data);
2417         }
2418
2419       if (is_mcast)
2420         {
2421           /*
2422            * copy bytes from the IP address into the MAC rewrite
2423            */
2424           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2425                                       adj0->rewrite_header.dst_mcast_offset,
2426                                       &ip0->dst_address.as_u32, (u8 *) ip0);
2427           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2428                                       adj1->rewrite_header.dst_mcast_offset,
2429                                       &ip1->dst_address.as_u32, (u8 *) ip1);
2430         }
2431
2432       next += 2;
2433       b += 2;
2434       n_left_from -= 2;
2435     }
2436
2437   while (n_left_from > 0)
2438     {
2439       ip_adjacency_t *adj0;
2440       ip4_header_t *ip0;
2441       u32 rw_len0, adj_index0, error0;
2442       u32 tx_sw_if_index0;
2443
2444       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2445
2446       adj0 = adj_get (adj_index0);
2447
2448       if (do_counters)
2449         vlib_prefetch_combined_counter (&adjacency_counters,
2450                                         thread_index, adj_index0);
2451
2452       ip0 = vlib_buffer_get_current (b[0]);
2453
2454       error0 = IP4_ERROR_NONE;
2455
2456       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2457
2458
2459       /* Update packet buffer attributes/set output interface. */
2460       rw_len0 = adj0[0].rewrite_header.data_bytes;
2461       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2462
2463       /* Check MTU of outgoing interface. */
2464       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2465       if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO))
2466         ip0_len = gso_mtu_sz (b[0]);
2467
2468       ip4_mtu_check (b[0], ip0_len,
2469                      adj0[0].rewrite_header.max_l3_packet_bytes,
2470                      ip0->flags_and_fragment_offset &
2471                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2472                      next + 0, &error0);
2473
2474       if (is_mcast)
2475         {
2476           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2477                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2478                     IP4_ERROR_SAME_INTERFACE : error0);
2479         }
2480
2481       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2482        * to see the IP header */
2483       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2484         {
2485           u32 next_index = adj0[0].rewrite_header.next_index;
2486           b[0]->current_data -= rw_len0;
2487           b[0]->current_length += rw_len0;
2488           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2489           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2490
2491           if (PREDICT_FALSE
2492               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2493             vnet_feature_arc_start (lm->output_feature_arc_index,
2494                                     tx_sw_if_index0, &next_index, b[0]);
2495           next[0] = next_index;
2496         }
2497       else
2498         {
2499           b[0]->error = error_node->errors[error0];
2500         }
2501       if (is_midchain)
2502         {
2503           calc_checksums (vm, b[0]);
2504         }
2505       /* Guess we are only writing on simple Ethernet header. */
2506       vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2507
2508       if (do_counters)
2509         vlib_increment_combined_counter
2510           (&adjacency_counters,
2511            thread_index, adj_index0, 1,
2512            vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2513
2514       if (is_midchain)
2515         {
2516           if (adj0->sub_type.midchain.fixup_func)
2517             adj0->sub_type.midchain.fixup_func
2518               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2519         }
2520
2521       if (is_mcast)
2522         {
2523           /*
2524            * copy bytes from the IP address into the MAC rewrite
2525            */
2526           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2527                                       adj0->rewrite_header.dst_mcast_offset,
2528                                       &ip0->dst_address.as_u32, (u8 *) ip0);
2529         }
2530
2531       next += 1;
2532       b += 1;
2533       n_left_from -= 1;
2534     }
2535
2536
2537   /* Need to do trace after rewrites to pick up new packet data. */
2538   if (node->flags & VLIB_NODE_FLAG_TRACE)
2539     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2540
2541   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2542   return frame->n_vectors;
2543 }
2544
2545 always_inline uword
2546 ip4_rewrite_inline (vlib_main_t * vm,
2547                     vlib_node_runtime_t * node,
2548                     vlib_frame_t * frame,
2549                     int do_counters, int is_midchain, int is_mcast)
2550 {
2551   vnet_main_t *vnm = vnet_get_main ();
2552   if (PREDICT_FALSE (vnm->interface_main.gso_interface_count > 0))
2553     return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2554                                         is_midchain, is_mcast,
2555                                         1 /* do_gso */ );
2556   else
2557     return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2558                                         is_midchain, is_mcast,
2559                                         0 /* no do_gso */ );
2560 }
2561
2562
2563 /** @brief IPv4 rewrite node.
2564     @node ip4-rewrite
2565
2566     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2567     header checksum, fetch the ip adjacency, check the outbound mtu,
2568     apply the adjacency rewrite, and send pkts to the adjacency
2569     rewrite header's rewrite_next_index.
2570
2571     @param vm vlib_main_t corresponding to the current thread
2572     @param node vlib_node_runtime_t
2573     @param frame vlib_frame_t whose contents should be dispatched
2574
2575     @par Graph mechanics: buffer metadata, next index usage
2576
2577     @em Uses:
2578     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2579         - the rewrite adjacency index
2580     - <code>adj->lookup_next_index</code>
2581         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2582           the packet will be dropped.
2583     - <code>adj->rewrite_header</code>
2584         - Rewrite string length, rewrite string, next_index
2585
2586     @em Sets:
2587     - <code>b->current_data, b->current_length</code>
2588         - Updated net of applying the rewrite string
2589
2590     <em>Next Indices:</em>
2591     - <code> adj->rewrite_header.next_index </code>
2592       or @c ip4-drop
2593 */
2594
2595 VLIB_NODE_FN (ip4_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2596                                  vlib_frame_t * frame)
2597 {
2598   if (adj_are_counters_enabled ())
2599     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2600   else
2601     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2602 }
2603
2604 VLIB_NODE_FN (ip4_rewrite_bcast_node) (vlib_main_t * vm,
2605                                        vlib_node_runtime_t * node,
2606                                        vlib_frame_t * frame)
2607 {
2608   if (adj_are_counters_enabled ())
2609     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2610   else
2611     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2612 }
2613
2614 VLIB_NODE_FN (ip4_midchain_node) (vlib_main_t * vm,
2615                                   vlib_node_runtime_t * node,
2616                                   vlib_frame_t * frame)
2617 {
2618   if (adj_are_counters_enabled ())
2619     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2620   else
2621     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2622 }
2623
2624 VLIB_NODE_FN (ip4_rewrite_mcast_node) (vlib_main_t * vm,
2625                                        vlib_node_runtime_t * node,
2626                                        vlib_frame_t * frame)
2627 {
2628   if (adj_are_counters_enabled ())
2629     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2630   else
2631     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2632 }
2633
2634 VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
2635                                         vlib_node_runtime_t * node,
2636                                         vlib_frame_t * frame)
2637 {
2638   if (adj_are_counters_enabled ())
2639     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2640   else
2641     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2642 }
2643
2644 /* *INDENT-OFF* */
2645 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2646   .name = "ip4-rewrite",
2647   .vector_size = sizeof (u32),
2648
2649   .format_trace = format_ip4_rewrite_trace,
2650
2651   .n_next_nodes = IP4_REWRITE_N_NEXT,
2652   .next_nodes = {
2653     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2654     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2655     [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2656   },
2657 };
2658
2659 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2660   .name = "ip4-rewrite-bcast",
2661   .vector_size = sizeof (u32),
2662
2663   .format_trace = format_ip4_rewrite_trace,
2664   .sibling_of = "ip4-rewrite",
2665 };
2666
2667 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2668   .name = "ip4-rewrite-mcast",
2669   .vector_size = sizeof (u32),
2670
2671   .format_trace = format_ip4_rewrite_trace,
2672   .sibling_of = "ip4-rewrite",
2673 };
2674
2675 VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
2676   .name = "ip4-mcast-midchain",
2677   .vector_size = sizeof (u32),
2678
2679   .format_trace = format_ip4_rewrite_trace,
2680   .sibling_of = "ip4-rewrite",
2681 };
2682
2683 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2684   .name = "ip4-midchain",
2685   .vector_size = sizeof (u32),
2686   .format_trace = format_ip4_forward_next_trace,
2687   .sibling_of =  "ip4-rewrite",
2688 };
2689 /* *INDENT-ON */
2690
2691 static int
2692 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2693 {
2694   ip4_fib_mtrie_t *mtrie0;
2695   ip4_fib_mtrie_leaf_t leaf0;
2696   u32 lbi0;
2697
2698   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2699
2700   leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2701   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2702   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2703
2704   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2705
2706   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2707 }
2708
2709 static clib_error_t *
2710 test_lookup_command_fn (vlib_main_t * vm,
2711                         unformat_input_t * input, vlib_cli_command_t * cmd)
2712 {
2713   ip4_fib_t *fib;
2714   u32 table_id = 0;
2715   f64 count = 1;
2716   u32 n;
2717   int i;
2718   ip4_address_t ip4_base_address;
2719   u64 errors = 0;
2720
2721   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2722     {
2723       if (unformat (input, "table %d", &table_id))
2724         {
2725           /* Make sure the entry exists. */
2726           fib = ip4_fib_get (table_id);
2727           if ((fib) && (fib->index != table_id))
2728             return clib_error_return (0, "<fib-index> %d does not exist",
2729                                       table_id);
2730         }
2731       else if (unformat (input, "count %f", &count))
2732         ;
2733
2734       else if (unformat (input, "%U",
2735                          unformat_ip4_address, &ip4_base_address))
2736         ;
2737       else
2738         return clib_error_return (0, "unknown input `%U'",
2739                                   format_unformat_error, input);
2740     }
2741
2742   n = count;
2743
2744   for (i = 0; i < n; i++)
2745     {
2746       if (!ip4_lookup_validate (&ip4_base_address, table_id))
2747         errors++;
2748
2749       ip4_base_address.as_u32 =
2750         clib_host_to_net_u32 (1 +
2751                               clib_net_to_host_u32 (ip4_base_address.as_u32));
2752     }
2753
2754   if (errors)
2755     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2756   else
2757     vlib_cli_output (vm, "No errors in %d lookups\n", n);
2758
2759   return 0;
2760 }
2761
2762 /*?
2763  * Perform a lookup of an IPv4 Address (or range of addresses) in the
2764  * given FIB table to determine if there is a conflict with the
2765  * adjacency table. The fib-id can be determined by using the
2766  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2767  * of 0 is used.
2768  *
2769  * @todo This command uses fib-id, other commands use table-id (not
2770  * just a name, they are different indexes). Would like to change this
2771  * to table-id for consistency.
2772  *
2773  * @cliexpar
2774  * Example of how to run the test lookup command:
2775  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2776  * No errors in 2 lookups
2777  * @cliexend
2778 ?*/
2779 /* *INDENT-OFF* */
2780 VLIB_CLI_COMMAND (lookup_test_command, static) =
2781 {
2782   .path = "test lookup",
2783   .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2784   .function = test_lookup_command_fn,
2785 };
2786 /* *INDENT-ON* */
2787
2788 #ifndef CLIB_MARCH_VARIANT
2789 int
2790 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2791 {
2792   u32 fib_index;
2793
2794   fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
2795
2796   if (~0 == fib_index)
2797     return VNET_API_ERROR_NO_SUCH_FIB;
2798
2799   fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
2800                                   flow_hash_config);
2801
2802   return 0;
2803 }
2804 #endif
2805
2806 static clib_error_t *
2807 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2808                              unformat_input_t * input,
2809                              vlib_cli_command_t * cmd)
2810 {
2811   int matched = 0;
2812   u32 table_id = 0;
2813   u32 flow_hash_config = 0;
2814   int rv;
2815
2816   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2817     {
2818       if (unformat (input, "table %d", &table_id))
2819         matched = 1;
2820 #define _(a,v) \
2821     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2822       foreach_flow_hash_bit
2823 #undef _
2824         else
2825         break;
2826     }
2827
2828   if (matched == 0)
2829     return clib_error_return (0, "unknown input `%U'",
2830                               format_unformat_error, input);
2831
2832   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
2833   switch (rv)
2834     {
2835     case 0:
2836       break;
2837
2838     case VNET_API_ERROR_NO_SUCH_FIB:
2839       return clib_error_return (0, "no such FIB table %d", table_id);
2840
2841     default:
2842       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2843       break;
2844     }
2845
2846   return 0;
2847 }
2848
2849 /*?
2850  * Configure the set of IPv4 fields used by the flow hash.
2851  *
2852  * @cliexpar
2853  * Example of how to set the flow hash on a given table:
2854  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2855  * Example of display the configured flow hash:
2856  * @cliexstart{show ip fib}
2857  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2858  * 0.0.0.0/0
2859  *   unicast-ip4-chain
2860  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2861  *     [0] [@0]: dpo-drop ip6
2862  * 0.0.0.0/32
2863  *   unicast-ip4-chain
2864  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2865  *     [0] [@0]: dpo-drop ip6
2866  * 224.0.0.0/8
2867  *   unicast-ip4-chain
2868  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2869  *     [0] [@0]: dpo-drop ip6
2870  * 6.0.1.2/32
2871  *   unicast-ip4-chain
2872  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2873  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2874  * 7.0.0.1/32
2875  *   unicast-ip4-chain
2876  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
2877  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2878  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2879  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2880  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2881  * 240.0.0.0/8
2882  *   unicast-ip4-chain
2883  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
2884  *     [0] [@0]: dpo-drop ip6
2885  * 255.255.255.255/32
2886  *   unicast-ip4-chain
2887  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
2888  *     [0] [@0]: dpo-drop ip6
2889  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
2890  * 0.0.0.0/0
2891  *   unicast-ip4-chain
2892  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
2893  *     [0] [@0]: dpo-drop ip6
2894  * 0.0.0.0/32
2895  *   unicast-ip4-chain
2896  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
2897  *     [0] [@0]: dpo-drop ip6
2898  * 172.16.1.0/24
2899  *   unicast-ip4-chain
2900  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
2901  *     [0] [@4]: ipv4-glean: af_packet0
2902  * 172.16.1.1/32
2903  *   unicast-ip4-chain
2904  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
2905  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
2906  * 172.16.1.2/32
2907  *   unicast-ip4-chain
2908  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2909  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
2910  * 172.16.2.0/24
2911  *   unicast-ip4-chain
2912  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
2913  *     [0] [@4]: ipv4-glean: af_packet1
2914  * 172.16.2.1/32
2915  *   unicast-ip4-chain
2916  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
2917  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
2918  * 224.0.0.0/8
2919  *   unicast-ip4-chain
2920  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
2921  *     [0] [@0]: dpo-drop ip6
2922  * 240.0.0.0/8
2923  *   unicast-ip4-chain
2924  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
2925  *     [0] [@0]: dpo-drop ip6
2926  * 255.255.255.255/32
2927  *   unicast-ip4-chain
2928  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
2929  *     [0] [@0]: dpo-drop ip6
2930  * @cliexend
2931 ?*/
2932 /* *INDENT-OFF* */
2933 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
2934 {
2935   .path = "set ip flow-hash",
2936   .short_help =
2937   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
2938   .function = set_ip_flow_hash_command_fn,
2939 };
2940 /* *INDENT-ON* */
2941
2942 #ifndef CLIB_MARCH_VARIANT
2943 int
2944 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2945                              u32 table_index)
2946 {
2947   vnet_main_t *vnm = vnet_get_main ();
2948   vnet_interface_main_t *im = &vnm->interface_main;
2949   ip4_main_t *ipm = &ip4_main;
2950   ip_lookup_main_t *lm = &ipm->lookup_main;
2951   vnet_classify_main_t *cm = &vnet_classify_main;
2952   ip4_address_t *if_addr;
2953
2954   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2955     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2956
2957   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2958     return VNET_API_ERROR_NO_SUCH_ENTRY;
2959
2960   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2961   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
2962
2963   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
2964
2965   if (NULL != if_addr)
2966     {
2967       fib_prefix_t pfx = {
2968         .fp_len = 32,
2969         .fp_proto = FIB_PROTOCOL_IP4,
2970         .fp_addr.ip4 = *if_addr,
2971       };
2972       u32 fib_index;
2973
2974       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2975                                                        sw_if_index);
2976
2977
2978       if (table_index != (u32) ~ 0)
2979         {
2980           dpo_id_t dpo = DPO_INVALID;
2981
2982           dpo_set (&dpo,
2983                    DPO_CLASSIFY,
2984                    DPO_PROTO_IP4,
2985                    classify_dpo_create (DPO_PROTO_IP4, table_index));
2986
2987           fib_table_entry_special_dpo_add (fib_index,
2988                                            &pfx,
2989                                            FIB_SOURCE_CLASSIFY,
2990                                            FIB_ENTRY_FLAG_NONE, &dpo);
2991           dpo_reset (&dpo);
2992         }
2993       else
2994         {
2995           fib_table_entry_special_remove (fib_index,
2996                                           &pfx, FIB_SOURCE_CLASSIFY);
2997         }
2998     }
2999
3000   return 0;
3001 }
3002 #endif
3003
3004 static clib_error_t *
3005 set_ip_classify_command_fn (vlib_main_t * vm,
3006                             unformat_input_t * input,
3007                             vlib_cli_command_t * cmd)
3008 {
3009   u32 table_index = ~0;
3010   int table_index_set = 0;
3011   u32 sw_if_index = ~0;
3012   int rv;
3013
3014   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3015     {
3016       if (unformat (input, "table-index %d", &table_index))
3017         table_index_set = 1;
3018       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3019                          vnet_get_main (), &sw_if_index))
3020         ;
3021       else
3022         break;
3023     }
3024
3025   if (table_index_set == 0)
3026     return clib_error_return (0, "classify table-index must be specified");
3027
3028   if (sw_if_index == ~0)
3029     return clib_error_return (0, "interface / subif must be specified");
3030
3031   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3032
3033   switch (rv)
3034     {
3035     case 0:
3036       break;
3037
3038     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3039       return clib_error_return (0, "No such interface");
3040
3041     case VNET_API_ERROR_NO_SUCH_ENTRY:
3042       return clib_error_return (0, "No such classifier table");
3043     }
3044   return 0;
3045 }
3046
3047 /*?
3048  * Assign a classification table to an interface. The classification
3049  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3050  * commands. Once the table is create, use this command to filter packets
3051  * on an interface.
3052  *
3053  * @cliexpar
3054  * Example of how to assign a classification table to an interface:
3055  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3056 ?*/
3057 /* *INDENT-OFF* */
3058 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
3059 {
3060     .path = "set ip classify",
3061     .short_help =
3062     "set ip classify intfc <interface> table-index <classify-idx>",
3063     .function = set_ip_classify_command_fn,
3064 };
3065 /* *INDENT-ON* */
3066
3067 static clib_error_t *
3068 ip4_config (vlib_main_t * vm, unformat_input_t * input)
3069 {
3070   ip4_main_t *im = &ip4_main;
3071   uword heapsize = 0;
3072
3073   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3074     {
3075       if (unformat (input, "heap-size %U", unformat_memory_size, &heapsize))
3076         ;
3077       else
3078         return clib_error_return (0,
3079                                   "invalid heap-size parameter `%U'",
3080                                   format_unformat_error, input);
3081     }
3082
3083   im->mtrie_heap_size = heapsize;
3084
3085   return 0;
3086 }
3087
3088 VLIB_EARLY_CONFIG_FUNCTION (ip4_config, "ip");
3089
3090 /*
3091  * fd.io coding-style-patch-verification: ON
3092  *
3093  * Local Variables:
3094  * eval: (c-set-style "gnu")
3095  * End:
3096  */