754bb21fe67eaf0f2e6d573c088380b3f05b7251
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h>     /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/dpo/load_balance.h>
53 #include <vnet/dpo/load_balance_map.h>
54 #include <vnet/dpo/classify_dpo.h>
55 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
56
57 #include <vnet/ip/ip4_forward.h>
58 #include <vnet/interface_output.h>
59
60 /** @brief IPv4 lookup node.
61     @node ip4-lookup
62
63     This is the main IPv4 lookup dispatch node.
64
65     @param vm vlib_main_t corresponding to the current thread
66     @param node vlib_node_runtime_t
67     @param frame vlib_frame_t whose contents should be dispatched
68
69     @par Graph mechanics: buffer metadata, next index usage
70
71     @em Uses:
72     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
73         - Indicates the @c sw_if_index value of the interface that the
74           packet was received on.
75     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
76         - When the value is @c ~0 then the node performs a longest prefix
77           match (LPM) for the packet destination address in the FIB attached
78           to the receive interface.
79         - Otherwise perform LPM for the packet destination address in the
80           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
81           value (0, 1, ...) and not a VRF id.
82
83     @em Sets:
84     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
85         - The lookup result adjacency index.
86
87     <em>Next Index:</em>
88     - Dispatches the packet to the node index found in
89       ip_adjacency_t @c adj->lookup_next_index
90       (where @c adj is the lookup result adjacency).
91 */
92 VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
93                                 vlib_frame_t * frame)
94 {
95   return ip4_lookup_inline (vm, node, frame);
96 }
97
98 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
99
100 /* *INDENT-OFF* */
101 VLIB_REGISTER_NODE (ip4_lookup_node) =
102 {
103   .name = "ip4-lookup",
104   .vector_size = sizeof (u32),
105   .format_trace = format_ip4_lookup_trace,
106   .n_next_nodes = IP_LOOKUP_N_NEXT,
107   .next_nodes = IP4_LOOKUP_NEXT_NODES,
108 };
109 /* *INDENT-ON* */
110
111 VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
112                                       vlib_node_runtime_t * node,
113                                       vlib_frame_t * frame)
114 {
115   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
116   u32 n_left, *from;
117   u32 thread_index = vm->thread_index;
118   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
119   u16 nexts[VLIB_FRAME_SIZE], *next;
120
121   from = vlib_frame_vector_args (frame);
122   n_left = frame->n_vectors;
123   next = nexts;
124
125   vlib_get_buffers (vm, from, bufs, n_left);
126
127   while (n_left >= 4)
128     {
129       const load_balance_t *lb0, *lb1;
130       const ip4_header_t *ip0, *ip1;
131       u32 lbi0, hc0, lbi1, hc1;
132       const dpo_id_t *dpo0, *dpo1;
133
134       /* Prefetch next iteration. */
135       {
136         vlib_prefetch_buffer_header (b[2], LOAD);
137         vlib_prefetch_buffer_header (b[3], LOAD);
138
139         CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), LOAD);
140         CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), LOAD);
141       }
142
143       ip0 = vlib_buffer_get_current (b[0]);
144       ip1 = vlib_buffer_get_current (b[1]);
145       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
146       lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
147
148       lb0 = load_balance_get (lbi0);
149       lb1 = load_balance_get (lbi1);
150
151       /*
152        * this node is for via FIBs we can re-use the hash value from the
153        * to node if present.
154        * We don't want to use the same hash value at each level in the recursion
155        * graph as that would lead to polarisation
156        */
157       hc0 = hc1 = 0;
158
159       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
160         {
161           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
162             {
163               hc0 = vnet_buffer (b[0])->ip.flow_hash =
164                 vnet_buffer (b[0])->ip.flow_hash >> 1;
165             }
166           else
167             {
168               hc0 = vnet_buffer (b[0])->ip.flow_hash =
169                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
170             }
171           dpo0 = load_balance_get_fwd_bucket
172             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
173         }
174       else
175         {
176           dpo0 = load_balance_get_bucket_i (lb0, 0);
177         }
178       if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
179         {
180           if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
181             {
182               hc1 = vnet_buffer (b[1])->ip.flow_hash =
183                 vnet_buffer (b[1])->ip.flow_hash >> 1;
184             }
185           else
186             {
187               hc1 = vnet_buffer (b[1])->ip.flow_hash =
188                 ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
189             }
190           dpo1 = load_balance_get_fwd_bucket
191             (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
192         }
193       else
194         {
195           dpo1 = load_balance_get_bucket_i (lb1, 0);
196         }
197
198       next[0] = dpo0->dpoi_next_node;
199       next[1] = dpo1->dpoi_next_node;
200
201       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
202       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
203
204       vlib_increment_combined_counter
205         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
206       vlib_increment_combined_counter
207         (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
208
209       b += 2;
210       next += 2;
211       n_left -= 2;
212     }
213
214   while (n_left > 0)
215     {
216       const load_balance_t *lb0;
217       const ip4_header_t *ip0;
218       const dpo_id_t *dpo0;
219       u32 lbi0, hc0;
220
221       ip0 = vlib_buffer_get_current (b[0]);
222       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
223
224       lb0 = load_balance_get (lbi0);
225
226       hc0 = 0;
227       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
228         {
229           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
230             {
231               hc0 = vnet_buffer (b[0])->ip.flow_hash =
232                 vnet_buffer (b[0])->ip.flow_hash >> 1;
233             }
234           else
235             {
236               hc0 = vnet_buffer (b[0])->ip.flow_hash =
237                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
238             }
239           dpo0 = load_balance_get_fwd_bucket
240             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
241         }
242       else
243         {
244           dpo0 = load_balance_get_bucket_i (lb0, 0);
245         }
246
247       next[0] = dpo0->dpoi_next_node;
248       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
249
250       vlib_increment_combined_counter
251         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
252
253       b += 1;
254       next += 1;
255       n_left -= 1;
256     }
257
258   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
259   if (node->flags & VLIB_NODE_FLAG_TRACE)
260     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
261
262   return frame->n_vectors;
263 }
264
265 /* *INDENT-OFF* */
266 VLIB_REGISTER_NODE (ip4_load_balance_node) =
267 {
268   .name = "ip4-load-balance",
269   .vector_size = sizeof (u32),
270   .sibling_of = "ip4-lookup",
271   .format_trace = format_ip4_lookup_trace,
272 };
273 /* *INDENT-ON* */
274
275 #ifndef CLIB_MARCH_VARIANT
276 /* get first interface address */
277 ip4_address_t *
278 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
279                              ip_interface_address_t ** result_ia)
280 {
281   ip_lookup_main_t *lm = &im->lookup_main;
282   ip_interface_address_t *ia = 0;
283   ip4_address_t *result = 0;
284
285   /* *INDENT-OFF* */
286   foreach_ip_interface_address
287     (lm, ia, sw_if_index,
288      1 /* honor unnumbered */ ,
289      ({
290        ip4_address_t * a =
291          ip_interface_address_get_address (lm, ia);
292        result = a;
293        break;
294      }));
295   /* *INDENT-OFF* */
296   if (result_ia)
297     *result_ia = result ? ia : 0;
298   return result;
299 }
300 #endif
301
302 static void
303 ip4_add_subnet_bcast_route (u32 fib_index,
304                             fib_prefix_t *pfx,
305                             u32 sw_if_index)
306 {
307   vnet_sw_interface_flags_t iflags;
308
309   iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
310
311   fib_table_entry_special_remove(fib_index,
312                                  pfx,
313                                  FIB_SOURCE_INTERFACE);
314
315   if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
316     {
317       fib_table_entry_update_one_path (fib_index, pfx,
318                                        FIB_SOURCE_INTERFACE,
319                                        FIB_ENTRY_FLAG_NONE,
320                                        DPO_PROTO_IP4,
321                                        /* No next-hop address */
322                                        &ADJ_BCAST_ADDR,
323                                        sw_if_index,
324                                        // invalid FIB index
325                                        ~0,
326                                        1,
327                                        // no out-label stack
328                                        NULL,
329                                        FIB_ROUTE_PATH_FLAG_NONE);
330     }
331   else
332     {
333         fib_table_entry_special_add(fib_index,
334                                     pfx,
335                                     FIB_SOURCE_INTERFACE,
336                                     (FIB_ENTRY_FLAG_DROP |
337                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
338     }
339 }
340
341 static void
342 ip4_add_interface_routes (u32 sw_if_index,
343                           ip4_main_t * im, u32 fib_index,
344                           ip_interface_address_t * a)
345 {
346   ip_lookup_main_t *lm = &im->lookup_main;
347   ip4_address_t *address = ip_interface_address_get_address (lm, a);
348   fib_prefix_t pfx = {
349     .fp_len = a->address_length,
350     .fp_proto = FIB_PROTOCOL_IP4,
351     .fp_addr.ip4 = *address,
352   };
353
354   if (pfx.fp_len <= 30)
355     {
356       /* a /30 or shorter - add a glean for the network address */
357       fib_table_entry_update_one_path (fib_index, &pfx,
358                                        FIB_SOURCE_INTERFACE,
359                                        (FIB_ENTRY_FLAG_CONNECTED |
360                                         FIB_ENTRY_FLAG_ATTACHED),
361                                        DPO_PROTO_IP4,
362                                        /* No next-hop address */
363                                        NULL,
364                                        sw_if_index,
365                                        // invalid FIB index
366                                        ~0,
367                                        1,
368                                        // no out-label stack
369                                        NULL,
370                                        FIB_ROUTE_PATH_FLAG_NONE);
371
372       /* Add the two broadcast addresses as drop */
373       fib_prefix_t net_pfx = {
374         .fp_len = 32,
375         .fp_proto = FIB_PROTOCOL_IP4,
376         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
377       };
378       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
379         fib_table_entry_special_add(fib_index,
380                                     &net_pfx,
381                                     FIB_SOURCE_INTERFACE,
382                                     (FIB_ENTRY_FLAG_DROP |
383                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
384       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
385       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
386         ip4_add_subnet_bcast_route(fib_index, &net_pfx, sw_if_index);
387     }
388   else if (pfx.fp_len == 31)
389     {
390       u32 mask = clib_host_to_net_u32(1);
391       fib_prefix_t net_pfx = pfx;
392
393       net_pfx.fp_len = 32;
394       net_pfx.fp_addr.ip4.as_u32 ^= mask;
395
396       /* a /31 - add the other end as an attached host */
397       fib_table_entry_update_one_path (fib_index, &net_pfx,
398                                        FIB_SOURCE_INTERFACE,
399                                        (FIB_ENTRY_FLAG_ATTACHED),
400                                        DPO_PROTO_IP4,
401                                        &net_pfx.fp_addr,
402                                        sw_if_index,
403                                        // invalid FIB index
404                                        ~0,
405                                        1,
406                                        NULL,
407                                        FIB_ROUTE_PATH_FLAG_NONE);
408     }
409   pfx.fp_len = 32;
410
411   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
412     {
413       u32 classify_table_index =
414         lm->classify_table_index_by_sw_if_index[sw_if_index];
415       if (classify_table_index != (u32) ~ 0)
416         {
417           dpo_id_t dpo = DPO_INVALID;
418
419           dpo_set (&dpo,
420                    DPO_CLASSIFY,
421                    DPO_PROTO_IP4,
422                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
423
424           fib_table_entry_special_dpo_add (fib_index,
425                                            &pfx,
426                                            FIB_SOURCE_CLASSIFY,
427                                            FIB_ENTRY_FLAG_NONE, &dpo);
428           dpo_reset (&dpo);
429         }
430     }
431
432   fib_table_entry_update_one_path (fib_index, &pfx,
433                                    FIB_SOURCE_INTERFACE,
434                                    (FIB_ENTRY_FLAG_CONNECTED |
435                                     FIB_ENTRY_FLAG_LOCAL),
436                                    DPO_PROTO_IP4,
437                                    &pfx.fp_addr,
438                                    sw_if_index,
439                                    // invalid FIB index
440                                    ~0,
441                                    1, NULL,
442                                    FIB_ROUTE_PATH_FLAG_NONE);
443 }
444
445 static void
446 ip4_del_interface_routes (ip4_main_t * im,
447                           u32 fib_index,
448                           ip4_address_t * address, u32 address_length)
449 {
450   fib_prefix_t pfx = {
451     .fp_len = address_length,
452     .fp_proto = FIB_PROTOCOL_IP4,
453     .fp_addr.ip4 = *address,
454   };
455
456   if (pfx.fp_len <= 30)
457     {
458       fib_prefix_t net_pfx = {
459         .fp_len = 32,
460         .fp_proto = FIB_PROTOCOL_IP4,
461         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
462       };
463       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
464         fib_table_entry_special_remove(fib_index,
465                                        &net_pfx,
466                                        FIB_SOURCE_INTERFACE);
467       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
468       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
469         fib_table_entry_special_remove(fib_index,
470                                        &net_pfx,
471                                        FIB_SOURCE_INTERFACE);
472       fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
473     }
474     else if (pfx.fp_len == 31)
475     {
476       u32 mask = clib_host_to_net_u32(1);
477       fib_prefix_t net_pfx = pfx;
478
479       net_pfx.fp_len = 32;
480       net_pfx.fp_addr.ip4.as_u32 ^= mask;
481
482       fib_table_entry_delete (fib_index, &net_pfx, FIB_SOURCE_INTERFACE);
483     }
484
485   pfx.fp_len = 32;
486   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
487 }
488
489 #ifndef CLIB_MARCH_VARIANT
490 void
491 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
492 {
493   ip4_main_t *im = &ip4_main;
494
495   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
496
497   /*
498    * enable/disable only on the 1<->0 transition
499    */
500   if (is_enable)
501     {
502       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
503         return;
504     }
505   else
506     {
507       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
508       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
509         return;
510     }
511   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
512                                !is_enable, 0, 0);
513
514
515   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
516                                sw_if_index, !is_enable, 0, 0);
517
518   {
519     ip4_enable_disable_interface_callback_t *cb;
520     vec_foreach (cb, im->enable_disable_interface_callbacks)
521       cb->function (im, cb->function_opaque, sw_if_index, is_enable);
522   }
523 }
524
525 static clib_error_t *
526 ip4_add_del_interface_address_internal (vlib_main_t * vm,
527                                         u32 sw_if_index,
528                                         ip4_address_t * address,
529                                         u32 address_length, u32 is_del)
530 {
531   vnet_main_t *vnm = vnet_get_main ();
532   ip4_main_t *im = &ip4_main;
533   ip_lookup_main_t *lm = &im->lookup_main;
534   clib_error_t *error = 0;
535   u32 if_address_index, elts_before;
536   ip4_address_fib_t ip4_af, *addr_fib = 0;
537
538   /* local0 interface doesn't support IP addressing  */
539   if (sw_if_index == 0)
540     {
541       return
542        clib_error_create ("local0 interface doesn't support IP addressing");
543     }
544
545   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
546   ip4_addr_fib_init (&ip4_af, address,
547                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
548   vec_add1 (addr_fib, ip4_af);
549
550   /*
551    * there is no support for adj-fib handling in the presence of overlapping
552    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
553    * most routers do.
554    */
555   /* *INDENT-OFF* */
556   if (!is_del)
557     {
558       /* When adding an address check that it does not conflict
559          with an existing address on any interface in this table. */
560       ip_interface_address_t *ia;
561       vnet_sw_interface_t *sif;
562
563       pool_foreach(sif, vnm->interface_main.sw_interfaces,
564       ({
565           if (im->fib_index_by_sw_if_index[sw_if_index] ==
566               im->fib_index_by_sw_if_index[sif->sw_if_index])
567             {
568               foreach_ip_interface_address
569                 (&im->lookup_main, ia, sif->sw_if_index,
570                  0 /* honor unnumbered */ ,
571                  ({
572                    ip4_address_t * x =
573                      ip_interface_address_get_address
574                      (&im->lookup_main, ia);
575                    if (ip4_destination_matches_route
576                        (im, address, x, ia->address_length) ||
577                        ip4_destination_matches_route (im,
578                                                       x,
579                                                       address,
580                                                       address_length))
581                      {
582                        vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
583
584                        return
585                          clib_error_create
586                          ("failed to add %U which conflicts with %U for interface %U",
587                           format_ip4_address_and_length, address,
588                           address_length,
589                           format_ip4_address_and_length, x,
590                           ia->address_length,
591                           format_vnet_sw_if_index_name, vnm,
592                           sif->sw_if_index);
593                      }
594                  }));
595             }
596       }));
597     }
598   /* *INDENT-ON* */
599
600   elts_before = pool_elts (lm->if_address_pool);
601
602   error = ip_interface_address_add_del
603     (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
604   if (error)
605     goto done;
606
607   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
608
609   /* intf addr routes are added/deleted on admin up/down */
610   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index))
611     {
612       if (is_del)
613         ip4_del_interface_routes (im, ip4_af.fib_index, address,
614                                   address_length);
615       else
616         ip4_add_interface_routes (sw_if_index,
617                                   im, ip4_af.fib_index,
618                                   pool_elt_at_index
619                                   (lm->if_address_pool, if_address_index));
620     }
621
622   /* If pool did not grow/shrink: add duplicate address. */
623   if (elts_before != pool_elts (lm->if_address_pool))
624     {
625       ip4_add_del_interface_address_callback_t *cb;
626       vec_foreach (cb, im->add_del_interface_address_callbacks)
627         cb->function (im, cb->function_opaque, sw_if_index,
628                       address, address_length, if_address_index, is_del);
629     }
630
631 done:
632   vec_free (addr_fib);
633   return error;
634 }
635
636 clib_error_t *
637 ip4_add_del_interface_address (vlib_main_t * vm,
638                                u32 sw_if_index,
639                                ip4_address_t * address,
640                                u32 address_length, u32 is_del)
641 {
642   return ip4_add_del_interface_address_internal
643     (vm, sw_if_index, address, address_length, is_del);
644 }
645
646 void
647 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
648 {
649   ip_interface_address_t *ia;
650   ip4_main_t *im;
651
652   im = &ip4_main;
653
654   /*
655    * when directed broadcast is enabled, the subnet braodcast route will forward
656    * packets using an adjacency with a broadcast MAC. otherwise it drops
657    */
658   /* *INDENT-OFF* */
659   foreach_ip_interface_address(&im->lookup_main, ia,
660                                sw_if_index, 0,
661      ({
662        if (ia->address_length <= 30)
663          {
664            ip4_address_t *ipa;
665
666            ipa = ip_interface_address_get_address (&im->lookup_main, ia);
667
668            fib_prefix_t pfx = {
669              .fp_len = 32,
670              .fp_proto = FIB_PROTOCOL_IP4,
671              .fp_addr = {
672                .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
673              },
674            };
675
676            ip4_add_subnet_bcast_route
677              (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
678                                                   sw_if_index),
679               &pfx, sw_if_index);
680          }
681      }));
682   /* *INDENT-ON* */
683 }
684 #endif
685
686 static clib_error_t *
687 ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
688 {
689   ip4_main_t *im = &ip4_main;
690   ip_interface_address_t *ia;
691   ip4_address_t *a;
692   u32 is_admin_up, fib_index;
693
694   /* Fill in lookup tables with default table (0). */
695   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
696
697   vec_validate_init_empty (im->
698                            lookup_main.if_address_pool_index_by_sw_if_index,
699                            sw_if_index, ~0);
700
701   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
702
703   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
704
705   /* *INDENT-OFF* */
706   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
707                                 0 /* honor unnumbered */,
708   ({
709     a = ip_interface_address_get_address (&im->lookup_main, ia);
710     if (is_admin_up)
711       ip4_add_interface_routes (sw_if_index,
712                                 im, fib_index,
713                                 ia);
714     else
715       ip4_del_interface_routes (im, fib_index,
716                                 a, ia->address_length);
717   }));
718   /* *INDENT-ON* */
719
720   return 0;
721 }
722
723 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
724
725 /* Built-in ip4 unicast rx feature path definition */
726 /* *INDENT-OFF* */
727 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
728 {
729   .arc_name = "ip4-unicast",
730   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
731   .last_in_arc = "ip4-lookup",
732   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
733 };
734
735 VNET_FEATURE_INIT (ip4_flow_classify, static) =
736 {
737   .arc_name = "ip4-unicast",
738   .node_name = "ip4-flow-classify",
739   .runs_before = VNET_FEATURES ("ip4-inacl"),
740 };
741
742 VNET_FEATURE_INIT (ip4_inacl, static) =
743 {
744   .arc_name = "ip4-unicast",
745   .node_name = "ip4-inacl",
746   .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
747 };
748
749 VNET_FEATURE_INIT (ip4_source_check_1, static) =
750 {
751   .arc_name = "ip4-unicast",
752   .node_name = "ip4-source-check-via-rx",
753   .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
754 };
755
756 VNET_FEATURE_INIT (ip4_source_check_2, static) =
757 {
758   .arc_name = "ip4-unicast",
759   .node_name = "ip4-source-check-via-any",
760   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
761 };
762
763 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
764 {
765   .arc_name = "ip4-unicast",
766   .node_name = "ip4-source-and-port-range-check-rx",
767   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
768 };
769
770 VNET_FEATURE_INIT (ip4_policer_classify, static) =
771 {
772   .arc_name = "ip4-unicast",
773   .node_name = "ip4-policer-classify",
774   .runs_before = VNET_FEATURES ("ipsec4-input-feature"),
775 };
776
777 VNET_FEATURE_INIT (ip4_ipsec, static) =
778 {
779   .arc_name = "ip4-unicast",
780   .node_name = "ipsec4-input-feature",
781   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
782 };
783
784 VNET_FEATURE_INIT (ip4_vpath, static) =
785 {
786   .arc_name = "ip4-unicast",
787   .node_name = "vpath-input-ip4",
788   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
789 };
790
791 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
792 {
793   .arc_name = "ip4-unicast",
794   .node_name = "ip4-vxlan-bypass",
795   .runs_before = VNET_FEATURES ("ip4-lookup"),
796 };
797
798 VNET_FEATURE_INIT (ip4_not_enabled, static) =
799 {
800   .arc_name = "ip4-unicast",
801   .node_name = "ip4-not-enabled",
802   .runs_before = VNET_FEATURES ("ip4-lookup"),
803 };
804
805 VNET_FEATURE_INIT (ip4_lookup, static) =
806 {
807   .arc_name = "ip4-unicast",
808   .node_name = "ip4-lookup",
809   .runs_before = 0,     /* not before any other features */
810 };
811
812 /* Built-in ip4 multicast rx feature path definition */
813 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
814 {
815   .arc_name = "ip4-multicast",
816   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
817   .last_in_arc = "ip4-mfib-forward-lookup",
818   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
819 };
820
821 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
822 {
823   .arc_name = "ip4-multicast",
824   .node_name = "vpath-input-ip4",
825   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
826 };
827
828 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
829 {
830   .arc_name = "ip4-multicast",
831   .node_name = "ip4-not-enabled",
832   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
833 };
834
835 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
836 {
837   .arc_name = "ip4-multicast",
838   .node_name = "ip4-mfib-forward-lookup",
839   .runs_before = 0,     /* last feature */
840 };
841
842 /* Source and port-range check ip4 tx feature path definition */
843 VNET_FEATURE_ARC_INIT (ip4_output, static) =
844 {
845   .arc_name = "ip4-output",
846   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
847   .last_in_arc = "interface-output",
848   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
849 };
850
851 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
852 {
853   .arc_name = "ip4-output",
854   .node_name = "ip4-source-and-port-range-check-tx",
855   .runs_before = VNET_FEATURES ("ip4-outacl"),
856 };
857
858 VNET_FEATURE_INIT (ip4_outacl, static) =
859 {
860   .arc_name = "ip4-output",
861   .node_name = "ip4-outacl",
862   .runs_before = VNET_FEATURES ("ipsec4-output-feature"),
863 };
864
865 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
866 {
867   .arc_name = "ip4-output",
868   .node_name = "ipsec4-output-feature",
869   .runs_before = VNET_FEATURES ("interface-output"),
870 };
871
872 /* Built-in ip4 tx feature path definition */
873 VNET_FEATURE_INIT (ip4_interface_output, static) =
874 {
875   .arc_name = "ip4-output",
876   .node_name = "interface-output",
877   .runs_before = 0,     /* not before any other features */
878 };
879 /* *INDENT-ON* */
880
881 static clib_error_t *
882 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
883 {
884   ip4_main_t *im = &ip4_main;
885
886   /* Fill in lookup tables with default table (0). */
887   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
888   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
889
890   if (!is_add)
891     {
892       ip4_main_t *im4 = &ip4_main;
893       ip_lookup_main_t *lm4 = &im4->lookup_main;
894       ip_interface_address_t *ia = 0;
895       ip4_address_t *address;
896       vlib_main_t *vm = vlib_get_main ();
897
898       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
899       /* *INDENT-OFF* */
900       foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
901       ({
902         address = ip_interface_address_get_address (lm4, ia);
903         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
904       }));
905       /* *INDENT-ON* */
906     }
907
908   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
909                                is_add, 0, 0);
910
911   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
912                                sw_if_index, is_add, 0, 0);
913
914   return /* no error */ 0;
915 }
916
917 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
918
919 /* Global IP4 main. */
920 #ifndef CLIB_MARCH_VARIANT
921 ip4_main_t ip4_main;
922 #endif /* CLIB_MARCH_VARIANT */
923
924 static clib_error_t *
925 ip4_lookup_init (vlib_main_t * vm)
926 {
927   ip4_main_t *im = &ip4_main;
928   clib_error_t *error;
929   uword i;
930
931   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
932     return error;
933   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
934     return (error);
935   if ((error = vlib_call_init_function (vm, fib_module_init)))
936     return error;
937   if ((error = vlib_call_init_function (vm, mfib_module_init)))
938     return error;
939
940   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
941     {
942       u32 m;
943
944       if (i < 32)
945         m = pow2_mask (i) << (32 - i);
946       else
947         m = ~0;
948       im->fib_masks[i] = clib_host_to_net_u32 (m);
949     }
950
951   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
952
953   /* Create FIB with index 0 and table id of 0. */
954   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
955                                      FIB_SOURCE_DEFAULT_ROUTE);
956   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
957                                       MFIB_SOURCE_DEFAULT_ROUTE);
958
959   {
960     pg_node_t *pn;
961     pn = pg_get_node (ip4_lookup_node.index);
962     pn->unformat_edit = unformat_pg_ip4_header;
963   }
964
965   {
966     ethernet_arp_header_t h;
967
968     clib_memset (&h, 0, sizeof (h));
969
970 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
971 #define _8(f,v) h.f = v;
972     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
973     _16 (l3_type, ETHERNET_TYPE_IP4);
974     _8 (n_l2_address_bytes, 6);
975     _8 (n_l3_address_bytes, 4);
976     _16 (opcode, ETHERNET_ARP_OPCODE_request);
977 #undef _16
978 #undef _8
979
980     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
981                                /* data */ &h,
982                                sizeof (h),
983                                /* alloc chunk size */ 8,
984                                "ip4 arp");
985   }
986
987   return error;
988 }
989
990 VLIB_INIT_FUNCTION (ip4_lookup_init);
991
992 typedef struct
993 {
994   /* Adjacency taken. */
995   u32 dpo_index;
996   u32 flow_hash;
997   u32 fib_index;
998
999   /* Packet data, possibly *after* rewrite. */
1000   u8 packet_data[64 - 1 * sizeof (u32)];
1001 }
1002 ip4_forward_next_trace_t;
1003
1004 #ifndef CLIB_MARCH_VARIANT
1005 u8 *
1006 format_ip4_forward_next_trace (u8 * s, va_list * args)
1007 {
1008   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1009   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1010   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1011   u32 indent = format_get_indent (s);
1012   s = format (s, "%U%U",
1013               format_white_space, indent,
1014               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1015   return s;
1016 }
1017 #endif
1018
1019 static u8 *
1020 format_ip4_lookup_trace (u8 * s, va_list * args)
1021 {
1022   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1023   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1024   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1025   u32 indent = format_get_indent (s);
1026
1027   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1028               t->fib_index, t->dpo_index, t->flow_hash);
1029   s = format (s, "\n%U%U",
1030               format_white_space, indent,
1031               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1032   return s;
1033 }
1034
1035 static u8 *
1036 format_ip4_rewrite_trace (u8 * s, va_list * args)
1037 {
1038   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1039   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1040   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1041   u32 indent = format_get_indent (s);
1042
1043   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1044               t->fib_index, t->dpo_index, format_ip_adjacency,
1045               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1046   s = format (s, "\n%U%U",
1047               format_white_space, indent,
1048               format_ip_adjacency_packet_data,
1049               t->dpo_index, t->packet_data, sizeof (t->packet_data));
1050   return s;
1051 }
1052
1053 #ifndef CLIB_MARCH_VARIANT
1054 /* Common trace function for all ip4-forward next nodes. */
1055 void
1056 ip4_forward_next_trace (vlib_main_t * vm,
1057                         vlib_node_runtime_t * node,
1058                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1059 {
1060   u32 *from, n_left;
1061   ip4_main_t *im = &ip4_main;
1062
1063   n_left = frame->n_vectors;
1064   from = vlib_frame_vector_args (frame);
1065
1066   while (n_left >= 4)
1067     {
1068       u32 bi0, bi1;
1069       vlib_buffer_t *b0, *b1;
1070       ip4_forward_next_trace_t *t0, *t1;
1071
1072       /* Prefetch next iteration. */
1073       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1074       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1075
1076       bi0 = from[0];
1077       bi1 = from[1];
1078
1079       b0 = vlib_get_buffer (vm, bi0);
1080       b1 = vlib_get_buffer (vm, bi1);
1081
1082       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1083         {
1084           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1085           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1086           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1087           t0->fib_index =
1088             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1089              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1090             vec_elt (im->fib_index_by_sw_if_index,
1091                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1092
1093           clib_memcpy_fast (t0->packet_data,
1094                             vlib_buffer_get_current (b0),
1095                             sizeof (t0->packet_data));
1096         }
1097       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1098         {
1099           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1100           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1101           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1102           t1->fib_index =
1103             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1104              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1105             vec_elt (im->fib_index_by_sw_if_index,
1106                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1107           clib_memcpy_fast (t1->packet_data, vlib_buffer_get_current (b1),
1108                             sizeof (t1->packet_data));
1109         }
1110       from += 2;
1111       n_left -= 2;
1112     }
1113
1114   while (n_left >= 1)
1115     {
1116       u32 bi0;
1117       vlib_buffer_t *b0;
1118       ip4_forward_next_trace_t *t0;
1119
1120       bi0 = from[0];
1121
1122       b0 = vlib_get_buffer (vm, bi0);
1123
1124       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1125         {
1126           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1127           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1128           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1129           t0->fib_index =
1130             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1131              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1132             vec_elt (im->fib_index_by_sw_if_index,
1133                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1134           clib_memcpy_fast (t0->packet_data, vlib_buffer_get_current (b0),
1135                             sizeof (t0->packet_data));
1136         }
1137       from += 1;
1138       n_left -= 1;
1139     }
1140 }
1141
1142 /* Compute TCP/UDP/ICMP4 checksum in software. */
1143 u16
1144 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1145                               ip4_header_t * ip0)
1146 {
1147   ip_csum_t sum0;
1148   u32 ip_header_length, payload_length_host_byte_order;
1149   u32 n_this_buffer, n_bytes_left, n_ip_bytes_this_buffer;
1150   u16 sum16;
1151   void *data_this_buffer;
1152
1153   /* Initialize checksum with ip header. */
1154   ip_header_length = ip4_header_bytes (ip0);
1155   payload_length_host_byte_order =
1156     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1157   sum0 =
1158     clib_host_to_net_u32 (payload_length_host_byte_order +
1159                           (ip0->protocol << 16));
1160
1161   if (BITS (uword) == 32)
1162     {
1163       sum0 =
1164         ip_csum_with_carry (sum0,
1165                             clib_mem_unaligned (&ip0->src_address, u32));
1166       sum0 =
1167         ip_csum_with_carry (sum0,
1168                             clib_mem_unaligned (&ip0->dst_address, u32));
1169     }
1170   else
1171     sum0 =
1172       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1173
1174   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1175   data_this_buffer = (void *) ip0 + ip_header_length;
1176   n_ip_bytes_this_buffer =
1177     p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data);
1178   if (n_this_buffer + ip_header_length > n_ip_bytes_this_buffer)
1179     {
1180       n_this_buffer = n_ip_bytes_this_buffer > ip_header_length ?
1181         n_ip_bytes_this_buffer - ip_header_length : 0;
1182     }
1183   while (1)
1184     {
1185       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1186       n_bytes_left -= n_this_buffer;
1187       if (n_bytes_left == 0)
1188         break;
1189
1190       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1191       if (!(p0->flags & VLIB_BUFFER_NEXT_PRESENT))
1192         return 0xfefe;
1193
1194       p0 = vlib_get_buffer (vm, p0->next_buffer);
1195       data_this_buffer = vlib_buffer_get_current (p0);
1196       n_this_buffer = clib_min (p0->current_length, n_bytes_left);
1197     }
1198
1199   sum16 = ~ip_csum_fold (sum0);
1200
1201   return sum16;
1202 }
1203
1204 u32
1205 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1206 {
1207   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1208   udp_header_t *udp0;
1209   u16 sum16;
1210
1211   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1212           || ip0->protocol == IP_PROTOCOL_UDP);
1213
1214   udp0 = (void *) (ip0 + 1);
1215   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1216     {
1217       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1218                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1219       return p0->flags;
1220     }
1221
1222   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1223
1224   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1225                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1226
1227   return p0->flags;
1228 }
1229 #endif
1230
1231 /* *INDENT-OFF* */
1232 VNET_FEATURE_ARC_INIT (ip4_local) =
1233 {
1234   .arc_name  = "ip4-local",
1235   .start_nodes = VNET_FEATURES ("ip4-local"),
1236   .last_in_arc = "ip4-local-end-of-arc",
1237 };
1238 /* *INDENT-ON* */
1239
1240 static inline void
1241 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1242                             ip4_header_t * ip, u8 is_udp, u8 * error,
1243                             u8 * good_tcp_udp)
1244 {
1245   u32 flags0;
1246   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1247   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1248   if (is_udp)
1249     {
1250       udp_header_t *udp;
1251       u32 ip_len, udp_len;
1252       i32 len_diff;
1253       udp = ip4_next_header (ip);
1254       /* Verify UDP length. */
1255       ip_len = clib_net_to_host_u16 (ip->length);
1256       udp_len = clib_net_to_host_u16 (udp->length);
1257
1258       len_diff = ip_len - udp_len;
1259       *good_tcp_udp &= len_diff >= 0;
1260       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1261     }
1262 }
1263
1264 #define ip4_local_csum_is_offloaded(_b)                                 \
1265     _b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM                         \
1266         || _b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM
1267
1268 #define ip4_local_need_csum_check(is_tcp_udp, _b)                       \
1269     (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED     \
1270         || ip4_local_csum_is_offloaded (_b)))
1271
1272 #define ip4_local_csum_is_valid(_b)                                     \
1273     (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT                      \
1274         || (ip4_local_csum_is_offloaded (_b))) != 0
1275
1276 static inline void
1277 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1278                          ip4_header_t * ih, u8 * error)
1279 {
1280   u8 is_udp, is_tcp_udp, good_tcp_udp;
1281
1282   is_udp = ih->protocol == IP_PROTOCOL_UDP;
1283   is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1284
1285   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1286     ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1287   else
1288     good_tcp_udp = ip4_local_csum_is_valid (b);
1289
1290   ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1291   *error = (is_tcp_udp && !good_tcp_udp
1292             ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1293 }
1294
1295 static inline void
1296 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1297                             ip4_header_t ** ih, u8 * error)
1298 {
1299   u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1300
1301   is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1302   is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1303
1304   is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1305   is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1306
1307   good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1308   good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1309
1310   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1311                      || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1312     {
1313       if (is_tcp_udp[0])
1314         ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1315                                     &good_tcp_udp[0]);
1316       if (is_tcp_udp[1])
1317         ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1318                                     &good_tcp_udp[1]);
1319     }
1320
1321   error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1322               IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1323   error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1324               IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1325 }
1326
1327 static inline void
1328 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1329                               vlib_buffer_t * b, u16 * next, u8 error,
1330                               u8 head_of_feature_arc)
1331 {
1332   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1333   u32 next_index;
1334
1335   *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1336   b->error = error ? error_node->errors[error] : 0;
1337   if (head_of_feature_arc)
1338     {
1339       next_index = *next;
1340       if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1341         {
1342           vnet_feature_arc_start (arc_index,
1343                                   vnet_buffer (b)->sw_if_index[VLIB_RX],
1344                                   &next_index, b);
1345           *next = next_index;
1346         }
1347     }
1348 }
1349
1350 typedef struct
1351 {
1352   ip4_address_t src;
1353   u32 lbi;
1354   u8 error;
1355   u8 first;
1356 } ip4_local_last_check_t;
1357
1358 static inline void
1359 ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
1360                      ip4_local_last_check_t * last_check, u8 * error0)
1361 {
1362   ip4_fib_mtrie_leaf_t leaf0;
1363   ip4_fib_mtrie_t *mtrie0;
1364   const dpo_id_t *dpo0;
1365   load_balance_t *lb0;
1366   u32 lbi0;
1367
1368   vnet_buffer (b)->ip.fib_index =
1369     vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1370     vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1371
1372   /*
1373    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1374    *  adjacency for the destination address (the local interface address).
1375    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1376    *  adjacency for the source address (the remote sender's address)
1377    */
1378   if (PREDICT_FALSE (last_check->first ||
1379                      (last_check->src.as_u32 != ip0->src_address.as_u32)))
1380     {
1381       mtrie0 = &ip4_fib_get (vnet_buffer (b)->ip.fib_index)->mtrie;
1382       leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1383       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1384       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1385       lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1386
1387       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1388         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1389       vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1390
1391       lb0 = load_balance_get (lbi0);
1392       dpo0 = load_balance_get_bucket_i (lb0, 0);
1393
1394       /*
1395        * Must have a route to source otherwise we drop the packet.
1396        * ip4 broadcasts are accepted, e.g. to make dhcp client work
1397        *
1398        * The checks are:
1399        *  - the source is a recieve => it's from us => bogus, do this
1400        *    first since it sets a different error code.
1401        *  - uRPF check for any route to source - accept if passes.
1402        *  - allow packets destined to the broadcast address from unknown sources
1403        */
1404
1405       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1406                   && dpo0->dpoi_type == DPO_RECEIVE) ?
1407                  IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1408       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1409                   && !fib_urpf_check_size (lb0->lb_urpf)
1410                   && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1411                  IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1412
1413       last_check->src.as_u32 = ip0->src_address.as_u32;
1414       last_check->lbi = lbi0;
1415       last_check->error = *error0;
1416     }
1417   else
1418     {
1419       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1420         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1421       vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1422       *error0 = last_check->error;
1423       last_check->first = 0;
1424     }
1425 }
1426
1427 static inline void
1428 ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
1429                         ip4_local_last_check_t * last_check, u8 * error)
1430 {
1431   ip4_fib_mtrie_leaf_t leaf[2];
1432   ip4_fib_mtrie_t *mtrie[2];
1433   const dpo_id_t *dpo[2];
1434   load_balance_t *lb[2];
1435   u32 not_last_hit;
1436   u32 lbi[2];
1437
1438   not_last_hit = last_check->first;
1439   not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1440   not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1441
1442   vnet_buffer (b[0])->ip.fib_index =
1443     vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1444     vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1445     vnet_buffer (b[0])->ip.fib_index;
1446
1447   vnet_buffer (b[1])->ip.fib_index =
1448     vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1449     vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1450     vnet_buffer (b[1])->ip.fib_index;
1451
1452   /*
1453    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1454    *  adjacency for the destination address (the local interface address).
1455    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1456    *  adjacency for the source address (the remote sender's address)
1457    */
1458   if (PREDICT_FALSE (not_last_hit))
1459     {
1460       mtrie[0] = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie;
1461       mtrie[1] = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie;
1462
1463       leaf[0] = ip4_fib_mtrie_lookup_step_one (mtrie[0], &ip[0]->src_address);
1464       leaf[1] = ip4_fib_mtrie_lookup_step_one (mtrie[1], &ip[1]->src_address);
1465
1466       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1467                                            &ip[0]->src_address, 2);
1468       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1469                                            &ip[1]->src_address, 2);
1470
1471       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1472                                            &ip[0]->src_address, 3);
1473       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1474                                            &ip[1]->src_address, 3);
1475
1476       lbi[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf[0]);
1477       lbi[1] = ip4_fib_mtrie_leaf_get_adj_index (leaf[1]);
1478
1479       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1480         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1481       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1482
1483       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1484         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1485       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1486
1487       lb[0] = load_balance_get (lbi[0]);
1488       lb[1] = load_balance_get (lbi[1]);
1489
1490       dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1491       dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1492
1493       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1494                    dpo[0]->dpoi_type == DPO_RECEIVE) ?
1495                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1496       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1497                    !fib_urpf_check_size (lb[0]->lb_urpf) &&
1498                    ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1499                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1500
1501       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1502                    dpo[1]->dpoi_type == DPO_RECEIVE) ?
1503                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1504       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1505                    !fib_urpf_check_size (lb[1]->lb_urpf) &&
1506                    ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1507                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1508
1509       last_check->src.as_u32 = ip[1]->src_address.as_u32;
1510       last_check->lbi = lbi[1];
1511       last_check->error = error[1];
1512     }
1513   else
1514     {
1515       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1516         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1517       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1518
1519       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1520         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1521       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1522
1523       error[0] = last_check->error;
1524       error[1] = last_check->error;
1525       last_check->first = 0;
1526     }
1527 }
1528
1529 enum ip_local_packet_type_e
1530 {
1531   IP_LOCAL_PACKET_TYPE_L4,
1532   IP_LOCAL_PACKET_TYPE_NAT,
1533   IP_LOCAL_PACKET_TYPE_FRAG,
1534 };
1535
1536 /**
1537  * Determine packet type and next node.
1538  *
1539  * The expectation is that all packets that are not L4 will skip
1540  * checksums and source checks.
1541  */
1542 always_inline u8
1543 ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
1544 {
1545   ip_lookup_main_t *lm = &ip4_main.lookup_main;
1546
1547   if (PREDICT_FALSE (ip4_is_fragment (ip)))
1548     {
1549       *next = IP_LOCAL_NEXT_REASSEMBLY;
1550       return IP_LOCAL_PACKET_TYPE_FRAG;
1551     }
1552   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_IS_NATED))
1553     {
1554       *next = lm->local_next_by_ip_protocol[ip->protocol];
1555       return IP_LOCAL_PACKET_TYPE_NAT;
1556     }
1557
1558   *next = lm->local_next_by_ip_protocol[ip->protocol];
1559   return IP_LOCAL_PACKET_TYPE_L4;
1560 }
1561
1562 static inline uword
1563 ip4_local_inline (vlib_main_t * vm,
1564                   vlib_node_runtime_t * node,
1565                   vlib_frame_t * frame, int head_of_feature_arc)
1566 {
1567   u32 *from, n_left_from;
1568   vlib_node_runtime_t *error_node =
1569     vlib_node_get_runtime (vm, ip4_input_node.index);
1570   u16 nexts[VLIB_FRAME_SIZE], *next;
1571   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1572   ip4_header_t *ip[2];
1573   u8 error[2], pt[2];
1574
1575   ip4_local_last_check_t last_check = {
1576     /*
1577      * 0.0.0.0 can appear as the source address of an IP packet,
1578      * as can any other address, hence the need to use the 'first'
1579      * member to make sure the .lbi is initialised for the first
1580      * packet.
1581      */
1582     .src = {.as_u32 = 0},
1583     .lbi = ~0,
1584     .error = IP4_ERROR_UNKNOWN_PROTOCOL,
1585     .first = 1,
1586   };
1587
1588   from = vlib_frame_vector_args (frame);
1589   n_left_from = frame->n_vectors;
1590
1591   if (node->flags & VLIB_NODE_FLAG_TRACE)
1592     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1593
1594   vlib_get_buffers (vm, from, bufs, n_left_from);
1595   b = bufs;
1596   next = nexts;
1597
1598   while (n_left_from >= 6)
1599     {
1600       u8 not_batch = 0;
1601
1602       /* Prefetch next iteration. */
1603       {
1604         vlib_prefetch_buffer_header (b[4], LOAD);
1605         vlib_prefetch_buffer_header (b[5], LOAD);
1606
1607         CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1608         CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1609       }
1610
1611       error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1612
1613       ip[0] = vlib_buffer_get_current (b[0]);
1614       ip[1] = vlib_buffer_get_current (b[1]);
1615
1616       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1617       vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1618
1619       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1620       pt[1] = ip4_local_classify (b[1], ip[1], &next[1]);
1621
1622       not_batch = pt[0] ^ pt[1];
1623
1624       if (head_of_feature_arc == 0 || (pt[0] && not_batch == 0))
1625         goto skip_checks;
1626
1627       if (PREDICT_TRUE (not_batch == 0))
1628         {
1629           ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1630           ip4_local_check_src_x2 (b, ip, &last_check, error);
1631         }
1632       else
1633         {
1634           if (!pt[0])
1635             {
1636               ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1637               ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1638             }
1639           if (!pt[1])
1640             {
1641               ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1642               ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
1643             }
1644         }
1645
1646     skip_checks:
1647
1648       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1649                                     head_of_feature_arc);
1650       ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1651                                     head_of_feature_arc);
1652
1653       b += 2;
1654       next += 2;
1655       n_left_from -= 2;
1656     }
1657
1658   while (n_left_from > 0)
1659     {
1660       error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1661
1662       ip[0] = vlib_buffer_get_current (b[0]);
1663       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1664       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1665
1666       if (head_of_feature_arc == 0 || pt[0])
1667         goto skip_check;
1668
1669       ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1670       ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1671
1672     skip_check:
1673
1674       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1675                                     head_of_feature_arc);
1676
1677       b += 1;
1678       next += 1;
1679       n_left_from -= 1;
1680     }
1681
1682   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1683   return frame->n_vectors;
1684 }
1685
1686 VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1687                                vlib_frame_t * frame)
1688 {
1689   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1690 }
1691
1692 /* *INDENT-OFF* */
1693 VLIB_REGISTER_NODE (ip4_local_node) =
1694 {
1695   .name = "ip4-local",
1696   .vector_size = sizeof (u32),
1697   .format_trace = format_ip4_forward_next_trace,
1698   .n_next_nodes = IP_LOCAL_N_NEXT,
1699   .next_nodes =
1700   {
1701     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1702     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1703     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1704     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1705     [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-reassembly",
1706   },
1707 };
1708 /* *INDENT-ON* */
1709
1710
1711 VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
1712                                           vlib_node_runtime_t * node,
1713                                           vlib_frame_t * frame)
1714 {
1715   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1716 }
1717
1718 /* *INDENT-OFF* */
1719 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
1720   .name = "ip4-local-end-of-arc",
1721   .vector_size = sizeof (u32),
1722
1723   .format_trace = format_ip4_forward_next_trace,
1724   .sibling_of = "ip4-local",
1725 };
1726
1727 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1728   .arc_name = "ip4-local",
1729   .node_name = "ip4-local-end-of-arc",
1730   .runs_before = 0, /* not before any other features */
1731 };
1732 /* *INDENT-ON* */
1733
1734 #ifndef CLIB_MARCH_VARIANT
1735 void
1736 ip4_register_protocol (u32 protocol, u32 node_index)
1737 {
1738   vlib_main_t *vm = vlib_get_main ();
1739   ip4_main_t *im = &ip4_main;
1740   ip_lookup_main_t *lm = &im->lookup_main;
1741
1742   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1743   lm->local_next_by_ip_protocol[protocol] =
1744     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1745 }
1746
1747 void
1748 ip4_unregister_protocol (u32 protocol)
1749 {
1750   ip4_main_t *im = &ip4_main;
1751   ip_lookup_main_t *lm = &im->lookup_main;
1752
1753   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1754   lm->local_next_by_ip_protocol[protocol] = IP_LOCAL_NEXT_PUNT;
1755 }
1756 #endif
1757
1758 static clib_error_t *
1759 show_ip_local_command_fn (vlib_main_t * vm,
1760                           unformat_input_t * input, vlib_cli_command_t * cmd)
1761 {
1762   ip4_main_t *im = &ip4_main;
1763   ip_lookup_main_t *lm = &im->lookup_main;
1764   int i;
1765
1766   vlib_cli_output (vm, "Protocols handled by ip4_local");
1767   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1768     {
1769       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1770         {
1771           u32 node_index = vlib_get_node (vm,
1772                                           ip4_local_node.index)->
1773             next_nodes[lm->local_next_by_ip_protocol[i]];
1774           vlib_cli_output (vm, "%U: %U", format_ip_protocol, i,
1775                            format_vlib_node_name, vm, node_index);
1776         }
1777     }
1778   return 0;
1779 }
1780
1781
1782
1783 /*?
1784  * Display the set of protocols handled by the local IPv4 stack.
1785  *
1786  * @cliexpar
1787  * Example of how to display local protocol table:
1788  * @cliexstart{show ip local}
1789  * Protocols handled by ip4_local
1790  * 1
1791  * 17
1792  * 47
1793  * @cliexend
1794 ?*/
1795 /* *INDENT-OFF* */
1796 VLIB_CLI_COMMAND (show_ip_local, static) =
1797 {
1798   .path = "show ip local",
1799   .function = show_ip_local_command_fn,
1800   .short_help = "show ip local",
1801 };
1802 /* *INDENT-ON* */
1803
1804 always_inline uword
1805 ip4_arp_inline (vlib_main_t * vm,
1806                 vlib_node_runtime_t * node,
1807                 vlib_frame_t * frame, int is_glean)
1808 {
1809   vnet_main_t *vnm = vnet_get_main ();
1810   ip4_main_t *im = &ip4_main;
1811   ip_lookup_main_t *lm = &im->lookup_main;
1812   u32 *from, *to_next_drop;
1813   uword n_left_from, n_left_to_next_drop, next_index;
1814   u32 thread_index = vm->thread_index;
1815   u64 seed;
1816
1817   if (node->flags & VLIB_NODE_FLAG_TRACE)
1818     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1819
1820   seed = throttle_seed (&im->arp_throttle, thread_index, vlib_time_now (vm));
1821
1822   from = vlib_frame_vector_args (frame);
1823   n_left_from = frame->n_vectors;
1824   next_index = node->cached_next_index;
1825   if (next_index == IP4_ARP_NEXT_DROP)
1826     next_index = IP4_ARP_N_NEXT;        /* point to first interface */
1827
1828   while (n_left_from > 0)
1829     {
1830       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1831                            to_next_drop, n_left_to_next_drop);
1832
1833       while (n_left_from > 0 && n_left_to_next_drop > 0)
1834         {
1835           u32 pi0, bi0, adj_index0, sw_if_index0;
1836           ip_adjacency_t *adj0;
1837           vlib_buffer_t *p0, *b0;
1838           ip4_address_t resolve0;
1839           ethernet_arp_header_t *h0;
1840           vnet_hw_interface_t *hw_if0;
1841           u64 r0;
1842
1843           pi0 = from[0];
1844           p0 = vlib_get_buffer (vm, pi0);
1845
1846           from += 1;
1847           n_left_from -= 1;
1848           to_next_drop[0] = pi0;
1849           to_next_drop += 1;
1850           n_left_to_next_drop -= 1;
1851
1852           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1853           adj0 = adj_get (adj_index0);
1854
1855           if (is_glean)
1856             {
1857               /* resolve the packet's destination */
1858               ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1859               resolve0 = ip0->dst_address;
1860             }
1861           else
1862             {
1863               /* resolve the incomplete adj */
1864               resolve0 = adj0->sub_type.nbr.next_hop.ip4;
1865             }
1866
1867           /* combine the address and interface for the hash key */
1868           sw_if_index0 = adj0->rewrite_header.sw_if_index;
1869           r0 = (u64) resolve0.data_u32 << 32;
1870           r0 |= sw_if_index0;
1871
1872           if (throttle_check (&im->arp_throttle, thread_index, r0, seed))
1873             {
1874               p0->error = node->errors[IP4_ARP_ERROR_THROTTLED];
1875               continue;
1876             }
1877
1878           /*
1879            * the adj has been updated to a rewrite but the node the DPO that got
1880            * us here hasn't - yet. no big deal. we'll drop while we wait.
1881            */
1882           if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
1883             {
1884               p0->error = node->errors[IP4_ARP_ERROR_RESOLVED];
1885               continue;
1886             }
1887
1888           /*
1889            * Can happen if the control-plane is programming tables
1890            * with traffic flowing; at least that's today's lame excuse.
1891            */
1892           if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
1893               || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
1894             {
1895               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
1896               continue;
1897             }
1898           /* Send ARP request. */
1899           h0 =
1900             vlib_packet_template_get_packet (vm,
1901                                              &im->ip4_arp_request_packet_template,
1902                                              &bi0);
1903           /* Seems we're out of buffers */
1904           if (PREDICT_FALSE (!h0))
1905             {
1906               p0->error = node->errors[IP4_ARP_ERROR_NO_BUFFERS];
1907               continue;
1908             }
1909
1910           b0 = vlib_get_buffer (vm, bi0);
1911
1912           /* copy the persistent fields from the original */
1913           clib_memcpy_fast (b0->opaque2, p0->opaque2, sizeof (p0->opaque2));
1914
1915           /* Add rewrite/encap string for ARP packet. */
1916           vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
1917
1918           hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1919
1920           /* Src ethernet address in ARP header. */
1921           mac_address_from_bytes (&h0->ip4_over_ethernet[0].mac,
1922                                   hw_if0->hw_address);
1923           if (is_glean)
1924             {
1925               /* The interface's source address is stashed in the Glean Adj */
1926               h0->ip4_over_ethernet[0].ip4 =
1927                 adj0->sub_type.glean.receive_addr.ip4;
1928             }
1929           else
1930             {
1931               /* Src IP address in ARP header. */
1932               if (ip4_src_address_for_packet (lm, sw_if_index0,
1933                                               &h0->ip4_over_ethernet[0].ip4))
1934                 {
1935                   /* No source address available */
1936                   p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
1937                   vlib_buffer_free (vm, &bi0, 1);
1938                   continue;
1939                 }
1940             }
1941           h0->ip4_over_ethernet[1].ip4 = resolve0;
1942
1943           p0->error = node->errors[IP4_ARP_ERROR_REQUEST_SENT];
1944
1945           vlib_buffer_copy_trace_flag (vm, p0, bi0);
1946           VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
1947           vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
1948
1949           vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
1950
1951           vlib_set_next_frame_buffer (vm, node,
1952                                       adj0->rewrite_header.next_index, bi0);
1953         }
1954
1955       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
1956     }
1957
1958   return frame->n_vectors;
1959 }
1960
1961 VLIB_NODE_FN (ip4_arp_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1962                              vlib_frame_t * frame)
1963 {
1964   return (ip4_arp_inline (vm, node, frame, 0));
1965 }
1966
1967 VLIB_NODE_FN (ip4_glean_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1968                                vlib_frame_t * frame)
1969 {
1970   return (ip4_arp_inline (vm, node, frame, 1));
1971 }
1972
1973 static char *ip4_arp_error_strings[] = {
1974   [IP4_ARP_ERROR_THROTTLED] = "ARP requests throttled",
1975   [IP4_ARP_ERROR_RESOLVED] = "ARP requests resolved",
1976   [IP4_ARP_ERROR_NO_BUFFERS] = "ARP requests out of buffer",
1977   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
1978   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
1979   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
1980 };
1981
1982 /* *INDENT-OFF* */
1983 VLIB_REGISTER_NODE (ip4_arp_node) =
1984 {
1985   .name = "ip4-arp",
1986   .vector_size = sizeof (u32),
1987   .format_trace = format_ip4_forward_next_trace,
1988   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1989   .error_strings = ip4_arp_error_strings,
1990   .n_next_nodes = IP4_ARP_N_NEXT,
1991   .next_nodes =
1992   {
1993     [IP4_ARP_NEXT_DROP] = "error-drop",
1994   },
1995 };
1996
1997 VLIB_REGISTER_NODE (ip4_glean_node) =
1998 {
1999   .name = "ip4-glean",
2000   .vector_size = sizeof (u32),
2001   .format_trace = format_ip4_forward_next_trace,
2002   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2003   .error_strings = ip4_arp_error_strings,
2004   .n_next_nodes = IP4_ARP_N_NEXT,
2005   .next_nodes = {
2006   [IP4_ARP_NEXT_DROP] = "error-drop",
2007   },
2008 };
2009 /* *INDENT-ON* */
2010
2011 #define foreach_notrace_ip4_arp_error           \
2012 _(THROTTLED)                                    \
2013 _(RESOLVED)                                     \
2014 _(NO_BUFFERS)                                   \
2015 _(REQUEST_SENT)                                 \
2016 _(NON_ARP_ADJ)                                  \
2017 _(NO_SOURCE_ADDRESS)
2018
2019 static clib_error_t *
2020 arp_notrace_init (vlib_main_t * vm)
2021 {
2022   vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
2023
2024   /* don't trace ARP request packets */
2025 #define _(a)                                    \
2026     vnet_pcap_drop_trace_filter_add_del         \
2027         (rt->errors[IP4_ARP_ERROR_##a],         \
2028          1 /* is_add */);
2029   foreach_notrace_ip4_arp_error;
2030 #undef _
2031   return 0;
2032 }
2033
2034 VLIB_INIT_FUNCTION (arp_notrace_init);
2035
2036
2037 #ifndef CLIB_MARCH_VARIANT
2038 /* Send an ARP request to see if given destination is reachable on given interface. */
2039 clib_error_t *
2040 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index,
2041                     u8 refresh)
2042 {
2043   vnet_main_t *vnm = vnet_get_main ();
2044   ip4_main_t *im = &ip4_main;
2045   ethernet_arp_header_t *h;
2046   ip4_address_t *src;
2047   ip_interface_address_t *ia;
2048   ip_adjacency_t *adj;
2049   vnet_hw_interface_t *hi;
2050   vnet_sw_interface_t *si;
2051   vlib_buffer_t *b;
2052   adj_index_t ai;
2053   u32 bi = 0;
2054   u8 unicast_rewrite = 0;
2055
2056   si = vnet_get_sw_interface (vnm, sw_if_index);
2057
2058   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2059     {
2060       return clib_error_return (0, "%U: interface %U down",
2061                                 format_ip4_address, dst,
2062                                 format_vnet_sw_if_index_name, vnm,
2063                                 sw_if_index);
2064     }
2065
2066   src =
2067     ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2068   if (!src)
2069     {
2070       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2071       return clib_error_return
2072         (0,
2073          "no matching interface address for destination %U (interface %U)",
2074          format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
2075          sw_if_index);
2076     }
2077
2078   h = vlib_packet_template_get_packet (vm,
2079                                        &im->ip4_arp_request_packet_template,
2080                                        &bi);
2081
2082   if (!h)
2083     return clib_error_return (0, "ARP request packet allocation failed");
2084
2085   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2086   if (PREDICT_FALSE (!hi->hw_address))
2087     {
2088       return clib_error_return (0, "%U: interface %U do not support ip probe",
2089                                 format_ip4_address, dst,
2090                                 format_vnet_sw_if_index_name, vnm,
2091                                 sw_if_index);
2092     }
2093
2094   mac_address_from_bytes (&h->ip4_over_ethernet[0].mac, hi->hw_address);
2095
2096   h->ip4_over_ethernet[0].ip4 = src[0];
2097   h->ip4_over_ethernet[1].ip4 = dst[0];
2098
2099   b = vlib_get_buffer (vm, bi);
2100   vnet_buffer (b)->sw_if_index[VLIB_RX] =
2101     vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2102
2103   ip46_address_t nh = {
2104     .ip4 = *dst,
2105   };
2106
2107   ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4,
2108                             VNET_LINK_IP4, &nh, sw_if_index);
2109   adj = adj_get (ai);
2110
2111   /* Peer has been previously resolved, retrieve glean adj instead */
2112   if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE)
2113     {
2114       if (refresh)
2115         unicast_rewrite = 1;
2116       else
2117         {
2118           adj_unlock (ai);
2119           ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4,
2120                                       VNET_LINK_IP4, sw_if_index, &nh);
2121           adj = adj_get (ai);
2122         }
2123     }
2124
2125   /* Add encapsulation string for software interface (e.g. ethernet header). */
2126   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2127   if (unicast_rewrite)
2128     {
2129       u16 *etype = vlib_buffer_get_current (b) - 2;
2130       etype[0] = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
2131     }
2132   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2133
2134   {
2135     vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2136     u32 *to_next = vlib_frame_vector_args (f);
2137     to_next[0] = bi;
2138     f->n_vectors = 1;
2139     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2140   }
2141
2142   adj_unlock (ai);
2143   return /* no error */ 0;
2144 }
2145 #endif
2146
2147 typedef enum
2148 {
2149   IP4_REWRITE_NEXT_DROP,
2150   IP4_REWRITE_NEXT_ICMP_ERROR,
2151   IP4_REWRITE_NEXT_FRAGMENT,
2152   IP4_REWRITE_N_NEXT            /* Last */
2153 } ip4_rewrite_next_t;
2154
2155 /**
2156  * This bits of an IPv4 address to mask to construct a multicast
2157  * MAC address
2158  */
2159 #if CLIB_ARCH_IS_BIG_ENDIAN
2160 #define IP4_MCAST_ADDR_MASK 0x007fffff
2161 #else
2162 #define IP4_MCAST_ADDR_MASK 0xffff7f00
2163 #endif
2164
2165 always_inline void
2166 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
2167                u16 adj_packet_bytes, bool df, u16 * next, u32 * error)
2168 {
2169   if (packet_len > adj_packet_bytes)
2170     {
2171       *error = IP4_ERROR_MTU_EXCEEDED;
2172       if (df)
2173         {
2174           icmp4_error_set_vnet_buffer
2175             (b, ICMP4_destination_unreachable,
2176              ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
2177              adj_packet_bytes);
2178           *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2179         }
2180       else
2181         {
2182           /* IP fragmentation */
2183           ip_frag_set_vnet_buffer (b, adj_packet_bytes,
2184                                    IP4_FRAG_NEXT_IP4_REWRITE, 0);
2185           *next = IP4_REWRITE_NEXT_FRAGMENT;
2186         }
2187     }
2188 }
2189
2190 /* Decrement TTL & update checksum.
2191    Works either endian, so no need for byte swap. */
2192 static_always_inline void
2193 ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
2194                             u32 * error)
2195 {
2196   i32 ttl;
2197   u32 checksum;
2198   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2199     {
2200       b->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2201       return;
2202     }
2203
2204   ttl = ip->ttl;
2205
2206   /* Input node should have reject packets with ttl 0. */
2207   ASSERT (ip->ttl > 0);
2208
2209   checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
2210   checksum += checksum >= 0xffff;
2211
2212   ip->checksum = checksum;
2213   ttl -= 1;
2214   ip->ttl = ttl;
2215
2216   /*
2217    * If the ttl drops below 1 when forwarding, generate
2218    * an ICMP response.
2219    */
2220   if (PREDICT_FALSE (ttl <= 0))
2221     {
2222       *error = IP4_ERROR_TIME_EXPIRED;
2223       vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2224       icmp4_error_set_vnet_buffer (b, ICMP4_time_exceeded,
2225                                    ICMP4_time_exceeded_ttl_exceeded_in_transit,
2226                                    0);
2227       *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2228     }
2229
2230   /* Verify checksum. */
2231   ASSERT ((ip->checksum == ip4_header_checksum (ip)) ||
2232           (b->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2233 }
2234
2235
2236 always_inline uword
2237 ip4_rewrite_inline_with_gso (vlib_main_t * vm,
2238                              vlib_node_runtime_t * node,
2239                              vlib_frame_t * frame,
2240                              int do_counters, int is_midchain, int is_mcast,
2241                              int do_gso)
2242 {
2243   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2244   u32 *from = vlib_frame_vector_args (frame);
2245   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2246   u16 nexts[VLIB_FRAME_SIZE], *next;
2247   u32 n_left_from;
2248   vlib_node_runtime_t *error_node =
2249     vlib_node_get_runtime (vm, ip4_input_node.index);
2250
2251   n_left_from = frame->n_vectors;
2252   u32 thread_index = vm->thread_index;
2253
2254   vlib_get_buffers (vm, from, bufs, n_left_from);
2255   clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
2256
2257   if (n_left_from >= 6)
2258     {
2259       int i;
2260       for (i = 2; i < 6; i++)
2261         vlib_prefetch_buffer_header (bufs[i], LOAD);
2262     }
2263
2264   next = nexts;
2265   b = bufs;
2266   while (n_left_from >= 8)
2267     {
2268       ip_adjacency_t *adj0, *adj1;
2269       ip4_header_t *ip0, *ip1;
2270       u32 rw_len0, error0, adj_index0;
2271       u32 rw_len1, error1, adj_index1;
2272       u32 tx_sw_if_index0, tx_sw_if_index1;
2273       u8 *p;
2274
2275       vlib_prefetch_buffer_header (b[6], LOAD);
2276       vlib_prefetch_buffer_header (b[7], LOAD);
2277
2278       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2279       adj_index1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
2280
2281       /*
2282        * pre-fetch the per-adjacency counters
2283        */
2284       if (do_counters)
2285         {
2286           vlib_prefetch_combined_counter (&adjacency_counters,
2287                                           thread_index, adj_index0);
2288           vlib_prefetch_combined_counter (&adjacency_counters,
2289                                           thread_index, adj_index1);
2290         }
2291
2292       ip0 = vlib_buffer_get_current (b[0]);
2293       ip1 = vlib_buffer_get_current (b[1]);
2294
2295       error0 = error1 = IP4_ERROR_NONE;
2296
2297       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2298       ip4_ttl_and_checksum_check (b[1], ip1, next + 1, &error1);
2299
2300       /* Rewrite packet header and updates lengths. */
2301       adj0 = adj_get (adj_index0);
2302       adj1 = adj_get (adj_index1);
2303
2304       /* Worth pipelining. No guarantee that adj0,1 are hot... */
2305       rw_len0 = adj0[0].rewrite_header.data_bytes;
2306       rw_len1 = adj1[0].rewrite_header.data_bytes;
2307       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2308       vnet_buffer (b[1])->ip.save_rewrite_length = rw_len1;
2309
2310       p = vlib_buffer_get_current (b[2]);
2311       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2312       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2313
2314       p = vlib_buffer_get_current (b[3]);
2315       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2316       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2317
2318       /* Check MTU of outgoing interface. */
2319       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2320       u16 ip1_len = clib_net_to_host_u16 (ip1->length);
2321
2322       if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO))
2323         ip0_len = gso_mtu_sz (b[0]);
2324       if (do_gso && (b[1]->flags & VNET_BUFFER_F_GSO))
2325         ip1_len = gso_mtu_sz (b[1]);
2326
2327       ip4_mtu_check (b[0], ip0_len,
2328                      adj0[0].rewrite_header.max_l3_packet_bytes,
2329                      ip0->flags_and_fragment_offset &
2330                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2331                      next + 0, &error0);
2332       ip4_mtu_check (b[1], ip1_len,
2333                      adj1[0].rewrite_header.max_l3_packet_bytes,
2334                      ip1->flags_and_fragment_offset &
2335                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2336                      next + 1, &error1);
2337
2338       if (is_mcast)
2339         {
2340           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2341                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2342                     IP4_ERROR_SAME_INTERFACE : error0);
2343           error1 = ((adj1[0].rewrite_header.sw_if_index ==
2344                      vnet_buffer (b[1])->sw_if_index[VLIB_RX]) ?
2345                     IP4_ERROR_SAME_INTERFACE : error1);
2346         }
2347
2348       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2349        * to see the IP header */
2350       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2351         {
2352           u32 next_index = adj0[0].rewrite_header.next_index;
2353           b[0]->current_data -= rw_len0;
2354           b[0]->current_length += rw_len0;
2355           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2356           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2357
2358           if (PREDICT_FALSE
2359               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2360             vnet_feature_arc_start (lm->output_feature_arc_index,
2361                                     tx_sw_if_index0, &next_index, b[0]);
2362           next[0] = next_index;
2363         }
2364       else
2365         {
2366           b[0]->error = error_node->errors[error0];
2367         }
2368       if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2369         {
2370           u32 next_index = adj1[0].rewrite_header.next_index;
2371           b[1]->current_data -= rw_len1;
2372           b[1]->current_length += rw_len1;
2373
2374           tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2375           vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2376
2377           if (PREDICT_FALSE
2378               (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2379             vnet_feature_arc_start (lm->output_feature_arc_index,
2380                                     tx_sw_if_index1, &next_index, b[1]);
2381           next[1] = next_index;
2382         }
2383       else
2384         {
2385           b[1]->error = error_node->errors[error1];
2386         }
2387       if (is_midchain)
2388         {
2389           calc_checksums (vm, b[0]);
2390           calc_checksums (vm, b[1]);
2391         }
2392       /* Guess we are only writing on simple Ethernet header. */
2393       vnet_rewrite_two_headers (adj0[0], adj1[0],
2394                                 ip0, ip1, sizeof (ethernet_header_t));
2395
2396       /*
2397        * Bump the per-adjacency counters
2398        */
2399       if (do_counters)
2400         {
2401           vlib_increment_combined_counter
2402             (&adjacency_counters,
2403              thread_index,
2404              adj_index0, 1, vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2405
2406           vlib_increment_combined_counter
2407             (&adjacency_counters,
2408              thread_index,
2409              adj_index1, 1, vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
2410         }
2411
2412       if (is_midchain)
2413         {
2414           if (adj0->sub_type.midchain.fixup_func)
2415             adj0->sub_type.midchain.fixup_func
2416               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2417           if (adj1->sub_type.midchain.fixup_func)
2418             adj1->sub_type.midchain.fixup_func
2419               (vm, adj1, b[1], adj1->sub_type.midchain.fixup_data);
2420         }
2421
2422       if (is_mcast)
2423         {
2424           /*
2425            * copy bytes from the IP address into the MAC rewrite
2426            */
2427           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2428                                       adj0->rewrite_header.dst_mcast_offset,
2429                                       &ip0->dst_address.as_u32, (u8 *) ip0);
2430           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2431                                       adj1->rewrite_header.dst_mcast_offset,
2432                                       &ip1->dst_address.as_u32, (u8 *) ip1);
2433         }
2434
2435       next += 2;
2436       b += 2;
2437       n_left_from -= 2;
2438     }
2439
2440   while (n_left_from > 0)
2441     {
2442       ip_adjacency_t *adj0;
2443       ip4_header_t *ip0;
2444       u32 rw_len0, adj_index0, error0;
2445       u32 tx_sw_if_index0;
2446
2447       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2448
2449       adj0 = adj_get (adj_index0);
2450
2451       if (do_counters)
2452         vlib_prefetch_combined_counter (&adjacency_counters,
2453                                         thread_index, adj_index0);
2454
2455       ip0 = vlib_buffer_get_current (b[0]);
2456
2457       error0 = IP4_ERROR_NONE;
2458
2459       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2460
2461
2462       /* Update packet buffer attributes/set output interface. */
2463       rw_len0 = adj0[0].rewrite_header.data_bytes;
2464       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2465
2466       /* Check MTU of outgoing interface. */
2467       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2468       if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO))
2469         ip0_len = gso_mtu_sz (b[0]);
2470
2471       ip4_mtu_check (b[0], ip0_len,
2472                      adj0[0].rewrite_header.max_l3_packet_bytes,
2473                      ip0->flags_and_fragment_offset &
2474                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2475                      next + 0, &error0);
2476
2477       if (is_mcast)
2478         {
2479           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2480                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2481                     IP4_ERROR_SAME_INTERFACE : error0);
2482         }
2483
2484       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2485        * to see the IP header */
2486       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2487         {
2488           u32 next_index = adj0[0].rewrite_header.next_index;
2489           b[0]->current_data -= rw_len0;
2490           b[0]->current_length += rw_len0;
2491           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2492           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2493
2494           if (PREDICT_FALSE
2495               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2496             vnet_feature_arc_start (lm->output_feature_arc_index,
2497                                     tx_sw_if_index0, &next_index, b[0]);
2498           next[0] = next_index;
2499         }
2500       else
2501         {
2502           b[0]->error = error_node->errors[error0];
2503         }
2504       if (is_midchain)
2505         {
2506           calc_checksums (vm, b[0]);
2507         }
2508       /* Guess we are only writing on simple Ethernet header. */
2509       vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2510
2511       if (do_counters)
2512         vlib_increment_combined_counter
2513           (&adjacency_counters,
2514            thread_index, adj_index0, 1,
2515            vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2516
2517       if (is_midchain)
2518         {
2519           if (adj0->sub_type.midchain.fixup_func)
2520             adj0->sub_type.midchain.fixup_func
2521               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2522         }
2523
2524       if (is_mcast)
2525         {
2526           /*
2527            * copy bytes from the IP address into the MAC rewrite
2528            */
2529           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2530                                       adj0->rewrite_header.dst_mcast_offset,
2531                                       &ip0->dst_address.as_u32, (u8 *) ip0);
2532         }
2533
2534       next += 1;
2535       b += 1;
2536       n_left_from -= 1;
2537     }
2538
2539
2540   /* Need to do trace after rewrites to pick up new packet data. */
2541   if (node->flags & VLIB_NODE_FLAG_TRACE)
2542     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2543
2544   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2545   return frame->n_vectors;
2546 }
2547
2548 always_inline uword
2549 ip4_rewrite_inline (vlib_main_t * vm,
2550                     vlib_node_runtime_t * node,
2551                     vlib_frame_t * frame,
2552                     int do_counters, int is_midchain, int is_mcast)
2553 {
2554   vnet_main_t *vnm = vnet_get_main ();
2555   if (PREDICT_FALSE (vnm->interface_main.gso_interface_count > 0))
2556     return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2557                                         is_midchain, is_mcast,
2558                                         1 /* do_gso */ );
2559   else
2560     return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2561                                         is_midchain, is_mcast,
2562                                         0 /* no do_gso */ );
2563 }
2564
2565
2566 /** @brief IPv4 rewrite node.
2567     @node ip4-rewrite
2568
2569     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2570     header checksum, fetch the ip adjacency, check the outbound mtu,
2571     apply the adjacency rewrite, and send pkts to the adjacency
2572     rewrite header's rewrite_next_index.
2573
2574     @param vm vlib_main_t corresponding to the current thread
2575     @param node vlib_node_runtime_t
2576     @param frame vlib_frame_t whose contents should be dispatched
2577
2578     @par Graph mechanics: buffer metadata, next index usage
2579
2580     @em Uses:
2581     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2582         - the rewrite adjacency index
2583     - <code>adj->lookup_next_index</code>
2584         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2585           the packet will be dropped.
2586     - <code>adj->rewrite_header</code>
2587         - Rewrite string length, rewrite string, next_index
2588
2589     @em Sets:
2590     - <code>b->current_data, b->current_length</code>
2591         - Updated net of applying the rewrite string
2592
2593     <em>Next Indices:</em>
2594     - <code> adj->rewrite_header.next_index </code>
2595       or @c ip4-drop
2596 */
2597
2598 VLIB_NODE_FN (ip4_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2599                                  vlib_frame_t * frame)
2600 {
2601   if (adj_are_counters_enabled ())
2602     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2603   else
2604     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2605 }
2606
2607 VLIB_NODE_FN (ip4_rewrite_bcast_node) (vlib_main_t * vm,
2608                                        vlib_node_runtime_t * node,
2609                                        vlib_frame_t * frame)
2610 {
2611   if (adj_are_counters_enabled ())
2612     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2613   else
2614     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2615 }
2616
2617 VLIB_NODE_FN (ip4_midchain_node) (vlib_main_t * vm,
2618                                   vlib_node_runtime_t * node,
2619                                   vlib_frame_t * frame)
2620 {
2621   if (adj_are_counters_enabled ())
2622     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2623   else
2624     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2625 }
2626
2627 VLIB_NODE_FN (ip4_rewrite_mcast_node) (vlib_main_t * vm,
2628                                        vlib_node_runtime_t * node,
2629                                        vlib_frame_t * frame)
2630 {
2631   if (adj_are_counters_enabled ())
2632     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2633   else
2634     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2635 }
2636
2637 VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
2638                                         vlib_node_runtime_t * node,
2639                                         vlib_frame_t * frame)
2640 {
2641   if (adj_are_counters_enabled ())
2642     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2643   else
2644     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2645 }
2646
2647 /* *INDENT-OFF* */
2648 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2649   .name = "ip4-rewrite",
2650   .vector_size = sizeof (u32),
2651
2652   .format_trace = format_ip4_rewrite_trace,
2653
2654   .n_next_nodes = IP4_REWRITE_N_NEXT,
2655   .next_nodes = {
2656     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2657     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2658     [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2659   },
2660 };
2661
2662 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2663   .name = "ip4-rewrite-bcast",
2664   .vector_size = sizeof (u32),
2665
2666   .format_trace = format_ip4_rewrite_trace,
2667   .sibling_of = "ip4-rewrite",
2668 };
2669
2670 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2671   .name = "ip4-rewrite-mcast",
2672   .vector_size = sizeof (u32),
2673
2674   .format_trace = format_ip4_rewrite_trace,
2675   .sibling_of = "ip4-rewrite",
2676 };
2677
2678 VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
2679   .name = "ip4-mcast-midchain",
2680   .vector_size = sizeof (u32),
2681
2682   .format_trace = format_ip4_rewrite_trace,
2683   .sibling_of = "ip4-rewrite",
2684 };
2685
2686 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2687   .name = "ip4-midchain",
2688   .vector_size = sizeof (u32),
2689   .format_trace = format_ip4_forward_next_trace,
2690   .sibling_of =  "ip4-rewrite",
2691 };
2692 /* *INDENT-ON */
2693
2694 static int
2695 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2696 {
2697   ip4_fib_mtrie_t *mtrie0;
2698   ip4_fib_mtrie_leaf_t leaf0;
2699   u32 lbi0;
2700
2701   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2702
2703   leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2704   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2705   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2706
2707   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2708
2709   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2710 }
2711
2712 static clib_error_t *
2713 test_lookup_command_fn (vlib_main_t * vm,
2714                         unformat_input_t * input, vlib_cli_command_t * cmd)
2715 {
2716   ip4_fib_t *fib;
2717   u32 table_id = 0;
2718   f64 count = 1;
2719   u32 n;
2720   int i;
2721   ip4_address_t ip4_base_address;
2722   u64 errors = 0;
2723
2724   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2725     {
2726       if (unformat (input, "table %d", &table_id))
2727         {
2728           /* Make sure the entry exists. */
2729           fib = ip4_fib_get (table_id);
2730           if ((fib) && (fib->index != table_id))
2731             return clib_error_return (0, "<fib-index> %d does not exist",
2732                                       table_id);
2733         }
2734       else if (unformat (input, "count %f", &count))
2735         ;
2736
2737       else if (unformat (input, "%U",
2738                          unformat_ip4_address, &ip4_base_address))
2739         ;
2740       else
2741         return clib_error_return (0, "unknown input `%U'",
2742                                   format_unformat_error, input);
2743     }
2744
2745   n = count;
2746
2747   for (i = 0; i < n; i++)
2748     {
2749       if (!ip4_lookup_validate (&ip4_base_address, table_id))
2750         errors++;
2751
2752       ip4_base_address.as_u32 =
2753         clib_host_to_net_u32 (1 +
2754                               clib_net_to_host_u32 (ip4_base_address.as_u32));
2755     }
2756
2757   if (errors)
2758     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2759   else
2760     vlib_cli_output (vm, "No errors in %d lookups\n", n);
2761
2762   return 0;
2763 }
2764
2765 /*?
2766  * Perform a lookup of an IPv4 Address (or range of addresses) in the
2767  * given FIB table to determine if there is a conflict with the
2768  * adjacency table. The fib-id can be determined by using the
2769  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2770  * of 0 is used.
2771  *
2772  * @todo This command uses fib-id, other commands use table-id (not
2773  * just a name, they are different indexes). Would like to change this
2774  * to table-id for consistency.
2775  *
2776  * @cliexpar
2777  * Example of how to run the test lookup command:
2778  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2779  * No errors in 2 lookups
2780  * @cliexend
2781 ?*/
2782 /* *INDENT-OFF* */
2783 VLIB_CLI_COMMAND (lookup_test_command, static) =
2784 {
2785   .path = "test lookup",
2786   .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2787   .function = test_lookup_command_fn,
2788 };
2789 /* *INDENT-ON* */
2790
2791 #ifndef CLIB_MARCH_VARIANT
2792 int
2793 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2794 {
2795   u32 fib_index;
2796
2797   fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
2798
2799   if (~0 == fib_index)
2800     return VNET_API_ERROR_NO_SUCH_FIB;
2801
2802   fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
2803                                   flow_hash_config);
2804
2805   return 0;
2806 }
2807 #endif
2808
2809 static clib_error_t *
2810 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2811                              unformat_input_t * input,
2812                              vlib_cli_command_t * cmd)
2813 {
2814   int matched = 0;
2815   u32 table_id = 0;
2816   u32 flow_hash_config = 0;
2817   int rv;
2818
2819   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2820     {
2821       if (unformat (input, "table %d", &table_id))
2822         matched = 1;
2823 #define _(a,v) \
2824     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2825       foreach_flow_hash_bit
2826 #undef _
2827         else
2828         break;
2829     }
2830
2831   if (matched == 0)
2832     return clib_error_return (0, "unknown input `%U'",
2833                               format_unformat_error, input);
2834
2835   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
2836   switch (rv)
2837     {
2838     case 0:
2839       break;
2840
2841     case VNET_API_ERROR_NO_SUCH_FIB:
2842       return clib_error_return (0, "no such FIB table %d", table_id);
2843
2844     default:
2845       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2846       break;
2847     }
2848
2849   return 0;
2850 }
2851
2852 /*?
2853  * Configure the set of IPv4 fields used by the flow hash.
2854  *
2855  * @cliexpar
2856  * Example of how to set the flow hash on a given table:
2857  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2858  * Example of display the configured flow hash:
2859  * @cliexstart{show ip fib}
2860  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2861  * 0.0.0.0/0
2862  *   unicast-ip4-chain
2863  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2864  *     [0] [@0]: dpo-drop ip6
2865  * 0.0.0.0/32
2866  *   unicast-ip4-chain
2867  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2868  *     [0] [@0]: dpo-drop ip6
2869  * 224.0.0.0/8
2870  *   unicast-ip4-chain
2871  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2872  *     [0] [@0]: dpo-drop ip6
2873  * 6.0.1.2/32
2874  *   unicast-ip4-chain
2875  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2876  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2877  * 7.0.0.1/32
2878  *   unicast-ip4-chain
2879  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
2880  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2881  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2882  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2883  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2884  * 240.0.0.0/8
2885  *   unicast-ip4-chain
2886  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
2887  *     [0] [@0]: dpo-drop ip6
2888  * 255.255.255.255/32
2889  *   unicast-ip4-chain
2890  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
2891  *     [0] [@0]: dpo-drop ip6
2892  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
2893  * 0.0.0.0/0
2894  *   unicast-ip4-chain
2895  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
2896  *     [0] [@0]: dpo-drop ip6
2897  * 0.0.0.0/32
2898  *   unicast-ip4-chain
2899  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
2900  *     [0] [@0]: dpo-drop ip6
2901  * 172.16.1.0/24
2902  *   unicast-ip4-chain
2903  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
2904  *     [0] [@4]: ipv4-glean: af_packet0
2905  * 172.16.1.1/32
2906  *   unicast-ip4-chain
2907  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
2908  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
2909  * 172.16.1.2/32
2910  *   unicast-ip4-chain
2911  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2912  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
2913  * 172.16.2.0/24
2914  *   unicast-ip4-chain
2915  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
2916  *     [0] [@4]: ipv4-glean: af_packet1
2917  * 172.16.2.1/32
2918  *   unicast-ip4-chain
2919  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
2920  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
2921  * 224.0.0.0/8
2922  *   unicast-ip4-chain
2923  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
2924  *     [0] [@0]: dpo-drop ip6
2925  * 240.0.0.0/8
2926  *   unicast-ip4-chain
2927  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
2928  *     [0] [@0]: dpo-drop ip6
2929  * 255.255.255.255/32
2930  *   unicast-ip4-chain
2931  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
2932  *     [0] [@0]: dpo-drop ip6
2933  * @cliexend
2934 ?*/
2935 /* *INDENT-OFF* */
2936 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
2937 {
2938   .path = "set ip flow-hash",
2939   .short_help =
2940   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
2941   .function = set_ip_flow_hash_command_fn,
2942 };
2943 /* *INDENT-ON* */
2944
2945 #ifndef CLIB_MARCH_VARIANT
2946 int
2947 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2948                              u32 table_index)
2949 {
2950   vnet_main_t *vnm = vnet_get_main ();
2951   vnet_interface_main_t *im = &vnm->interface_main;
2952   ip4_main_t *ipm = &ip4_main;
2953   ip_lookup_main_t *lm = &ipm->lookup_main;
2954   vnet_classify_main_t *cm = &vnet_classify_main;
2955   ip4_address_t *if_addr;
2956
2957   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2958     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2959
2960   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2961     return VNET_API_ERROR_NO_SUCH_ENTRY;
2962
2963   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2964   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
2965
2966   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
2967
2968   if (NULL != if_addr)
2969     {
2970       fib_prefix_t pfx = {
2971         .fp_len = 32,
2972         .fp_proto = FIB_PROTOCOL_IP4,
2973         .fp_addr.ip4 = *if_addr,
2974       };
2975       u32 fib_index;
2976
2977       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2978                                                        sw_if_index);
2979
2980
2981       if (table_index != (u32) ~ 0)
2982         {
2983           dpo_id_t dpo = DPO_INVALID;
2984
2985           dpo_set (&dpo,
2986                    DPO_CLASSIFY,
2987                    DPO_PROTO_IP4,
2988                    classify_dpo_create (DPO_PROTO_IP4, table_index));
2989
2990           fib_table_entry_special_dpo_add (fib_index,
2991                                            &pfx,
2992                                            FIB_SOURCE_CLASSIFY,
2993                                            FIB_ENTRY_FLAG_NONE, &dpo);
2994           dpo_reset (&dpo);
2995         }
2996       else
2997         {
2998           fib_table_entry_special_remove (fib_index,
2999                                           &pfx, FIB_SOURCE_CLASSIFY);
3000         }
3001     }
3002
3003   return 0;
3004 }
3005 #endif
3006
3007 static clib_error_t *
3008 set_ip_classify_command_fn (vlib_main_t * vm,
3009                             unformat_input_t * input,
3010                             vlib_cli_command_t * cmd)
3011 {
3012   u32 table_index = ~0;
3013   int table_index_set = 0;
3014   u32 sw_if_index = ~0;
3015   int rv;
3016
3017   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3018     {
3019       if (unformat (input, "table-index %d", &table_index))
3020         table_index_set = 1;
3021       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3022                          vnet_get_main (), &sw_if_index))
3023         ;
3024       else
3025         break;
3026     }
3027
3028   if (table_index_set == 0)
3029     return clib_error_return (0, "classify table-index must be specified");
3030
3031   if (sw_if_index == ~0)
3032     return clib_error_return (0, "interface / subif must be specified");
3033
3034   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3035
3036   switch (rv)
3037     {
3038     case 0:
3039       break;
3040
3041     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3042       return clib_error_return (0, "No such interface");
3043
3044     case VNET_API_ERROR_NO_SUCH_ENTRY:
3045       return clib_error_return (0, "No such classifier table");
3046     }
3047   return 0;
3048 }
3049
3050 /*?
3051  * Assign a classification table to an interface. The classification
3052  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3053  * commands. Once the table is create, use this command to filter packets
3054  * on an interface.
3055  *
3056  * @cliexpar
3057  * Example of how to assign a classification table to an interface:
3058  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3059 ?*/
3060 /* *INDENT-OFF* */
3061 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
3062 {
3063     .path = "set ip classify",
3064     .short_help =
3065     "set ip classify intfc <interface> table-index <classify-idx>",
3066     .function = set_ip_classify_command_fn,
3067 };
3068 /* *INDENT-ON* */
3069
3070 static clib_error_t *
3071 ip4_config (vlib_main_t * vm, unformat_input_t * input)
3072 {
3073   ip4_main_t *im = &ip4_main;
3074   uword heapsize = 0;
3075
3076   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3077     {
3078       if (unformat (input, "heap-size %U", unformat_memory_size, &heapsize))
3079         ;
3080       else
3081         return clib_error_return (0,
3082                                   "invalid heap-size parameter `%U'",
3083                                   format_unformat_error, input);
3084     }
3085
3086   im->mtrie_heap_size = heapsize;
3087
3088   return 0;
3089 }
3090
3091 VLIB_EARLY_CONFIG_FUNCTION (ip4_config, "ip");
3092
3093 /*
3094  * fd.io coding-style-patch-verification: ON
3095  *
3096  * Local Variables:
3097  * eval: (c-set-style "gnu")
3098  * End:
3099  */