a05428c047d7bd3b15e795dc73c781c469f44e7a
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h>     /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/dpo/load_balance.h>
53 #include <vnet/dpo/load_balance_map.h>
54 #include <vnet/dpo/classify_dpo.h>
55 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
56
57 #include <vnet/ip/ip4_forward.h>
58 #include <vnet/interface_output.h>
59
60 /** @brief IPv4 lookup node.
61     @node ip4-lookup
62
63     This is the main IPv4 lookup dispatch node.
64
65     @param vm vlib_main_t corresponding to the current thread
66     @param node vlib_node_runtime_t
67     @param frame vlib_frame_t whose contents should be dispatched
68
69     @par Graph mechanics: buffer metadata, next index usage
70
71     @em Uses:
72     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
73         - Indicates the @c sw_if_index value of the interface that the
74           packet was received on.
75     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
76         - When the value is @c ~0 then the node performs a longest prefix
77           match (LPM) for the packet destination address in the FIB attached
78           to the receive interface.
79         - Otherwise perform LPM for the packet destination address in the
80           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
81           value (0, 1, ...) and not a VRF id.
82
83     @em Sets:
84     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
85         - The lookup result adjacency index.
86
87     <em>Next Index:</em>
88     - Dispatches the packet to the node index found in
89       ip_adjacency_t @c adj->lookup_next_index
90       (where @c adj is the lookup result adjacency).
91 */
92 VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
93                                 vlib_frame_t * frame)
94 {
95   return ip4_lookup_inline (vm, node, frame);
96 }
97
98 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
99
100 /* *INDENT-OFF* */
101 VLIB_REGISTER_NODE (ip4_lookup_node) =
102 {
103   .name = "ip4-lookup",
104   .vector_size = sizeof (u32),
105   .format_trace = format_ip4_lookup_trace,
106   .n_next_nodes = IP_LOOKUP_N_NEXT,
107   .next_nodes = IP4_LOOKUP_NEXT_NODES,
108 };
109 /* *INDENT-ON* */
110
111 VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
112                                       vlib_node_runtime_t * node,
113                                       vlib_frame_t * frame)
114 {
115   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
116   u32 n_left, *from;
117   u32 thread_index = vm->thread_index;
118   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
119   u16 nexts[VLIB_FRAME_SIZE], *next;
120
121   from = vlib_frame_vector_args (frame);
122   n_left = frame->n_vectors;
123   next = nexts;
124
125   vlib_get_buffers (vm, from, bufs, n_left);
126
127   while (n_left >= 4)
128     {
129       const load_balance_t *lb0, *lb1;
130       const ip4_header_t *ip0, *ip1;
131       u32 lbi0, hc0, lbi1, hc1;
132       const dpo_id_t *dpo0, *dpo1;
133
134       /* Prefetch next iteration. */
135       {
136         vlib_prefetch_buffer_header (b[2], LOAD);
137         vlib_prefetch_buffer_header (b[3], LOAD);
138
139         CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), LOAD);
140         CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), LOAD);
141       }
142
143       ip0 = vlib_buffer_get_current (b[0]);
144       ip1 = vlib_buffer_get_current (b[1]);
145       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
146       lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
147
148       lb0 = load_balance_get (lbi0);
149       lb1 = load_balance_get (lbi1);
150
151       /*
152        * this node is for via FIBs we can re-use the hash value from the
153        * to node if present.
154        * We don't want to use the same hash value at each level in the recursion
155        * graph as that would lead to polarisation
156        */
157       hc0 = hc1 = 0;
158
159       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
160         {
161           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
162             {
163               hc0 = vnet_buffer (b[0])->ip.flow_hash =
164                 vnet_buffer (b[0])->ip.flow_hash >> 1;
165             }
166           else
167             {
168               hc0 = vnet_buffer (b[0])->ip.flow_hash =
169                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
170             }
171           dpo0 = load_balance_get_fwd_bucket
172             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
173         }
174       else
175         {
176           dpo0 = load_balance_get_bucket_i (lb0, 0);
177         }
178       if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
179         {
180           if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
181             {
182               hc1 = vnet_buffer (b[1])->ip.flow_hash =
183                 vnet_buffer (b[1])->ip.flow_hash >> 1;
184             }
185           else
186             {
187               hc1 = vnet_buffer (b[1])->ip.flow_hash =
188                 ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
189             }
190           dpo1 = load_balance_get_fwd_bucket
191             (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
192         }
193       else
194         {
195           dpo1 = load_balance_get_bucket_i (lb1, 0);
196         }
197
198       next[0] = dpo0->dpoi_next_node;
199       next[1] = dpo1->dpoi_next_node;
200
201       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
202       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
203
204       vlib_increment_combined_counter
205         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
206       vlib_increment_combined_counter
207         (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
208
209       b += 2;
210       next += 2;
211       n_left -= 2;
212     }
213
214   while (n_left > 0)
215     {
216       const load_balance_t *lb0;
217       const ip4_header_t *ip0;
218       const dpo_id_t *dpo0;
219       u32 lbi0, hc0;
220
221       ip0 = vlib_buffer_get_current (b[0]);
222       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
223
224       lb0 = load_balance_get (lbi0);
225
226       hc0 = 0;
227       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
228         {
229           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
230             {
231               hc0 = vnet_buffer (b[0])->ip.flow_hash =
232                 vnet_buffer (b[0])->ip.flow_hash >> 1;
233             }
234           else
235             {
236               hc0 = vnet_buffer (b[0])->ip.flow_hash =
237                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
238             }
239           dpo0 = load_balance_get_fwd_bucket
240             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
241         }
242       else
243         {
244           dpo0 = load_balance_get_bucket_i (lb0, 0);
245         }
246
247       next[0] = dpo0->dpoi_next_node;
248       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
249
250       vlib_increment_combined_counter
251         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
252
253       b += 1;
254       next += 1;
255       n_left -= 1;
256     }
257
258   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
259   if (node->flags & VLIB_NODE_FLAG_TRACE)
260     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
261
262   return frame->n_vectors;
263 }
264
265 /* *INDENT-OFF* */
266 VLIB_REGISTER_NODE (ip4_load_balance_node) =
267 {
268   .name = "ip4-load-balance",
269   .vector_size = sizeof (u32),
270   .sibling_of = "ip4-lookup",
271   .format_trace = format_ip4_lookup_trace,
272 };
273 /* *INDENT-ON* */
274
275 #ifndef CLIB_MARCH_VARIANT
276 /* get first interface address */
277 ip4_address_t *
278 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
279                              ip_interface_address_t ** result_ia)
280 {
281   ip_lookup_main_t *lm = &im->lookup_main;
282   ip_interface_address_t *ia = 0;
283   ip4_address_t *result = 0;
284
285   /* *INDENT-OFF* */
286   foreach_ip_interface_address
287     (lm, ia, sw_if_index,
288      1 /* honor unnumbered */ ,
289      ({
290        ip4_address_t * a =
291          ip_interface_address_get_address (lm, ia);
292        result = a;
293        break;
294      }));
295   /* *INDENT-OFF* */
296   if (result_ia)
297     *result_ia = result ? ia : 0;
298   return result;
299 }
300
301 static void
302 ip4_add_subnet_bcast_route (u32 fib_index,
303                             fib_prefix_t *pfx,
304                             u32 sw_if_index)
305 {
306   vnet_sw_interface_flags_t iflags;
307
308   iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
309
310   fib_table_entry_special_remove(fib_index,
311                                  pfx,
312                                  FIB_SOURCE_INTERFACE);
313
314   if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
315     {
316       fib_table_entry_update_one_path (fib_index, pfx,
317                                        FIB_SOURCE_INTERFACE,
318                                        FIB_ENTRY_FLAG_NONE,
319                                        DPO_PROTO_IP4,
320                                        /* No next-hop address */
321                                        &ADJ_BCAST_ADDR,
322                                        sw_if_index,
323                                        // invalid FIB index
324                                        ~0,
325                                        1,
326                                        // no out-label stack
327                                        NULL,
328                                        FIB_ROUTE_PATH_FLAG_NONE);
329     }
330   else
331     {
332         fib_table_entry_special_add(fib_index,
333                                     pfx,
334                                     FIB_SOURCE_INTERFACE,
335                                     (FIB_ENTRY_FLAG_DROP |
336                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
337     }
338 }
339
340 static void
341 ip4_add_interface_routes (u32 sw_if_index,
342                           ip4_main_t * im, u32 fib_index,
343                           ip_interface_address_t * a)
344 {
345   ip_lookup_main_t *lm = &im->lookup_main;
346   ip4_address_t *address = ip_interface_address_get_address (lm, a);
347   fib_prefix_t pfx = {
348     .fp_len = a->address_length,
349     .fp_proto = FIB_PROTOCOL_IP4,
350     .fp_addr.ip4 = *address,
351   };
352
353   if (pfx.fp_len <= 30)
354     {
355       /* a /30 or shorter - add a glean for the network address */
356       fib_table_entry_update_one_path (fib_index, &pfx,
357                                        FIB_SOURCE_INTERFACE,
358                                        (FIB_ENTRY_FLAG_CONNECTED |
359                                         FIB_ENTRY_FLAG_ATTACHED),
360                                        DPO_PROTO_IP4,
361                                        /* No next-hop address */
362                                        NULL,
363                                        sw_if_index,
364                                        // invalid FIB index
365                                        ~0,
366                                        1,
367                                        // no out-label stack
368                                        NULL,
369                                        FIB_ROUTE_PATH_FLAG_NONE);
370
371       /* Add the two broadcast addresses as drop */
372       fib_prefix_t net_pfx = {
373         .fp_len = 32,
374         .fp_proto = FIB_PROTOCOL_IP4,
375         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
376       };
377       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
378         fib_table_entry_special_add(fib_index,
379                                     &net_pfx,
380                                     FIB_SOURCE_INTERFACE,
381                                     (FIB_ENTRY_FLAG_DROP |
382                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
383       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
384       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
385         ip4_add_subnet_bcast_route(fib_index, &net_pfx, sw_if_index);
386     }
387   else if (pfx.fp_len == 31)
388     {
389       u32 mask = clib_host_to_net_u32(1);
390       fib_prefix_t net_pfx = pfx;
391
392       net_pfx.fp_len = 32;
393       net_pfx.fp_addr.ip4.as_u32 ^= mask;
394
395       /* a /31 - add the other end as an attached host */
396       fib_table_entry_update_one_path (fib_index, &net_pfx,
397                                        FIB_SOURCE_INTERFACE,
398                                        (FIB_ENTRY_FLAG_ATTACHED),
399                                        DPO_PROTO_IP4,
400                                        &net_pfx.fp_addr,
401                                        sw_if_index,
402                                        // invalid FIB index
403                                        ~0,
404                                        1,
405                                        NULL,
406                                        FIB_ROUTE_PATH_FLAG_NONE);
407     }
408   pfx.fp_len = 32;
409
410   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
411     {
412       u32 classify_table_index =
413         lm->classify_table_index_by_sw_if_index[sw_if_index];
414       if (classify_table_index != (u32) ~ 0)
415         {
416           dpo_id_t dpo = DPO_INVALID;
417
418           dpo_set (&dpo,
419                    DPO_CLASSIFY,
420                    DPO_PROTO_IP4,
421                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
422
423           fib_table_entry_special_dpo_add (fib_index,
424                                            &pfx,
425                                            FIB_SOURCE_CLASSIFY,
426                                            FIB_ENTRY_FLAG_NONE, &dpo);
427           dpo_reset (&dpo);
428         }
429     }
430
431   fib_table_entry_update_one_path (fib_index, &pfx,
432                                    FIB_SOURCE_INTERFACE,
433                                    (FIB_ENTRY_FLAG_CONNECTED |
434                                     FIB_ENTRY_FLAG_LOCAL),
435                                    DPO_PROTO_IP4,
436                                    &pfx.fp_addr,
437                                    sw_if_index,
438                                    // invalid FIB index
439                                    ~0,
440                                    1, NULL,
441                                    FIB_ROUTE_PATH_FLAG_NONE);
442 }
443
444 static void
445 ip4_del_interface_routes (ip4_main_t * im,
446                           u32 fib_index,
447                           ip4_address_t * address, u32 address_length)
448 {
449   fib_prefix_t pfx = {
450     .fp_len = address_length,
451     .fp_proto = FIB_PROTOCOL_IP4,
452     .fp_addr.ip4 = *address,
453   };
454
455   if (pfx.fp_len <= 30)
456     {
457       fib_prefix_t net_pfx = {
458         .fp_len = 32,
459         .fp_proto = FIB_PROTOCOL_IP4,
460         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
461       };
462       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
463         fib_table_entry_special_remove(fib_index,
464                                        &net_pfx,
465                                        FIB_SOURCE_INTERFACE);
466       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
467       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
468         fib_table_entry_special_remove(fib_index,
469                                        &net_pfx,
470                                        FIB_SOURCE_INTERFACE);
471       fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
472     }
473     else if (pfx.fp_len == 31)
474     {
475       u32 mask = clib_host_to_net_u32(1);
476       fib_prefix_t net_pfx = pfx;
477
478       net_pfx.fp_len = 32;
479       net_pfx.fp_addr.ip4.as_u32 ^= mask;
480
481       fib_table_entry_delete (fib_index, &net_pfx, FIB_SOURCE_INTERFACE);
482     }
483
484   pfx.fp_len = 32;
485   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
486 }
487
488 void
489 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
490 {
491   ip4_main_t *im = &ip4_main;
492
493   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
494
495   /*
496    * enable/disable only on the 1<->0 transition
497    */
498   if (is_enable)
499     {
500       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
501         return;
502     }
503   else
504     {
505       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
506       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
507         return;
508     }
509   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
510                                !is_enable, 0, 0);
511
512
513   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
514                                sw_if_index, !is_enable, 0, 0);
515
516   {
517     ip4_enable_disable_interface_callback_t *cb;
518     vec_foreach (cb, im->enable_disable_interface_callbacks)
519       cb->function (im, cb->function_opaque, sw_if_index, is_enable);
520   }
521 }
522
523 static clib_error_t *
524 ip4_add_del_interface_address_internal (vlib_main_t * vm,
525                                         u32 sw_if_index,
526                                         ip4_address_t * address,
527                                         u32 address_length, u32 is_del)
528 {
529   vnet_main_t *vnm = vnet_get_main ();
530   ip4_main_t *im = &ip4_main;
531   ip_lookup_main_t *lm = &im->lookup_main;
532   clib_error_t *error = 0;
533   u32 if_address_index, elts_before;
534   ip4_address_fib_t ip4_af, *addr_fib = 0;
535
536   /* local0 interface doesn't support IP addressing  */
537   if (sw_if_index == 0)
538     {
539       return
540        clib_error_create ("local0 interface doesn't support IP addressing");
541     }
542
543   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
544   ip4_addr_fib_init (&ip4_af, address,
545                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
546   vec_add1 (addr_fib, ip4_af);
547
548   /*
549    * there is no support for adj-fib handling in the presence of overlapping
550    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
551    * most routers do.
552    */
553   /* *INDENT-OFF* */
554   if (!is_del)
555     {
556       /* When adding an address check that it does not conflict
557          with an existing address on any interface in this table. */
558       ip_interface_address_t *ia;
559       vnet_sw_interface_t *sif;
560
561       pool_foreach(sif, vnm->interface_main.sw_interfaces,
562       ({
563           if (im->fib_index_by_sw_if_index[sw_if_index] ==
564               im->fib_index_by_sw_if_index[sif->sw_if_index])
565             {
566               foreach_ip_interface_address
567                 (&im->lookup_main, ia, sif->sw_if_index,
568                  0 /* honor unnumbered */ ,
569                  ({
570                    ip4_address_t * x =
571                      ip_interface_address_get_address
572                      (&im->lookup_main, ia);
573                    if (ip4_destination_matches_route
574                        (im, address, x, ia->address_length) ||
575                        ip4_destination_matches_route (im,
576                                                       x,
577                                                       address,
578                                                       address_length))
579                      {
580                        vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
581
582                        return
583                          clib_error_create
584                          ("failed to add %U which conflicts with %U for interface %U",
585                           format_ip4_address_and_length, address,
586                           address_length,
587                           format_ip4_address_and_length, x,
588                           ia->address_length,
589                           format_vnet_sw_if_index_name, vnm,
590                           sif->sw_if_index);
591                      }
592                  }));
593             }
594       }));
595     }
596   /* *INDENT-ON* */
597
598   elts_before = pool_elts (lm->if_address_pool);
599
600   error = ip_interface_address_add_del
601     (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
602   if (error)
603     goto done;
604
605   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
606
607   if (is_del)
608     ip4_del_interface_routes (im, ip4_af.fib_index, address, address_length);
609   else
610     ip4_add_interface_routes (sw_if_index,
611                               im, ip4_af.fib_index,
612                               pool_elt_at_index
613                               (lm->if_address_pool, if_address_index));
614
615   /* If pool did not grow/shrink: add duplicate address. */
616   if (elts_before != pool_elts (lm->if_address_pool))
617     {
618       ip4_add_del_interface_address_callback_t *cb;
619       vec_foreach (cb, im->add_del_interface_address_callbacks)
620         cb->function (im, cb->function_opaque, sw_if_index,
621                       address, address_length, if_address_index, is_del);
622     }
623
624 done:
625   vec_free (addr_fib);
626   return error;
627 }
628
629 clib_error_t *
630 ip4_add_del_interface_address (vlib_main_t * vm,
631                                u32 sw_if_index,
632                                ip4_address_t * address,
633                                u32 address_length, u32 is_del)
634 {
635   return ip4_add_del_interface_address_internal
636     (vm, sw_if_index, address, address_length, is_del);
637 }
638
639 void
640 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
641 {
642   ip_interface_address_t *ia;
643   ip4_main_t *im;
644
645   im = &ip4_main;
646
647   /*
648    * when directed broadcast is enabled, the subnet braodcast route will forward
649    * packets using an adjacency with a broadcast MAC. otherwise it drops
650    */
651   /* *INDENT-OFF* */
652   foreach_ip_interface_address(&im->lookup_main, ia,
653                                sw_if_index, 0,
654      ({
655        if (ia->address_length <= 30)
656          {
657            ip4_address_t *ipa;
658
659            ipa = ip_interface_address_get_address (&im->lookup_main, ia);
660
661            fib_prefix_t pfx = {
662              .fp_len = 32,
663              .fp_proto = FIB_PROTOCOL_IP4,
664              .fp_addr = {
665                .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
666              },
667            };
668
669            ip4_add_subnet_bcast_route
670              (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
671                                                   sw_if_index),
672               &pfx, sw_if_index);
673          }
674      }));
675   /* *INDENT-ON* */
676 }
677 #endif
678
679 /* Built-in ip4 unicast rx feature path definition */
680 /* *INDENT-OFF* */
681 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
682 {
683   .arc_name = "ip4-unicast",
684   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
685   .last_in_arc = "ip4-lookup",
686   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
687 };
688
689 VNET_FEATURE_INIT (ip4_flow_classify, static) =
690 {
691   .arc_name = "ip4-unicast",
692   .node_name = "ip4-flow-classify",
693   .runs_before = VNET_FEATURES ("ip4-inacl"),
694 };
695
696 VNET_FEATURE_INIT (ip4_inacl, static) =
697 {
698   .arc_name = "ip4-unicast",
699   .node_name = "ip4-inacl",
700   .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
701 };
702
703 VNET_FEATURE_INIT (ip4_source_check_1, static) =
704 {
705   .arc_name = "ip4-unicast",
706   .node_name = "ip4-source-check-via-rx",
707   .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
708 };
709
710 VNET_FEATURE_INIT (ip4_source_check_2, static) =
711 {
712   .arc_name = "ip4-unicast",
713   .node_name = "ip4-source-check-via-any",
714   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
715 };
716
717 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
718 {
719   .arc_name = "ip4-unicast",
720   .node_name = "ip4-source-and-port-range-check-rx",
721   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
722 };
723
724 VNET_FEATURE_INIT (ip4_policer_classify, static) =
725 {
726   .arc_name = "ip4-unicast",
727   .node_name = "ip4-policer-classify",
728   .runs_before = VNET_FEATURES ("ipsec4-input-feature"),
729 };
730
731 VNET_FEATURE_INIT (ip4_ipsec, static) =
732 {
733   .arc_name = "ip4-unicast",
734   .node_name = "ipsec4-input-feature",
735   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
736 };
737
738 VNET_FEATURE_INIT (ip4_vpath, static) =
739 {
740   .arc_name = "ip4-unicast",
741   .node_name = "vpath-input-ip4",
742   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
743 };
744
745 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
746 {
747   .arc_name = "ip4-unicast",
748   .node_name = "ip4-vxlan-bypass",
749   .runs_before = VNET_FEATURES ("ip4-lookup"),
750 };
751
752 VNET_FEATURE_INIT (ip4_not_enabled, static) =
753 {
754   .arc_name = "ip4-unicast",
755   .node_name = "ip4-not-enabled",
756   .runs_before = VNET_FEATURES ("ip4-lookup"),
757 };
758
759 VNET_FEATURE_INIT (ip4_lookup, static) =
760 {
761   .arc_name = "ip4-unicast",
762   .node_name = "ip4-lookup",
763   .runs_before = 0,     /* not before any other features */
764 };
765
766 /* Built-in ip4 multicast rx feature path definition */
767 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
768 {
769   .arc_name = "ip4-multicast",
770   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
771   .last_in_arc = "ip4-mfib-forward-lookup",
772   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
773 };
774
775 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
776 {
777   .arc_name = "ip4-multicast",
778   .node_name = "vpath-input-ip4",
779   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
780 };
781
782 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
783 {
784   .arc_name = "ip4-multicast",
785   .node_name = "ip4-not-enabled",
786   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
787 };
788
789 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
790 {
791   .arc_name = "ip4-multicast",
792   .node_name = "ip4-mfib-forward-lookup",
793   .runs_before = 0,     /* last feature */
794 };
795
796 /* Source and port-range check ip4 tx feature path definition */
797 VNET_FEATURE_ARC_INIT (ip4_output, static) =
798 {
799   .arc_name = "ip4-output",
800   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
801   .last_in_arc = "interface-output",
802   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
803 };
804
805 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
806 {
807   .arc_name = "ip4-output",
808   .node_name = "ip4-source-and-port-range-check-tx",
809   .runs_before = VNET_FEATURES ("ip4-outacl"),
810 };
811
812 VNET_FEATURE_INIT (ip4_outacl, static) =
813 {
814   .arc_name = "ip4-output",
815   .node_name = "ip4-outacl",
816   .runs_before = VNET_FEATURES ("ipsec4-output-feature"),
817 };
818
819 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
820 {
821   .arc_name = "ip4-output",
822   .node_name = "ipsec4-output-feature",
823   .runs_before = VNET_FEATURES ("interface-output"),
824 };
825
826 /* Built-in ip4 tx feature path definition */
827 VNET_FEATURE_INIT (ip4_interface_output, static) =
828 {
829   .arc_name = "ip4-output",
830   .node_name = "interface-output",
831   .runs_before = 0,     /* not before any other features */
832 };
833 /* *INDENT-ON* */
834
835 static clib_error_t *
836 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
837 {
838   ip4_main_t *im = &ip4_main;
839
840   /* Fill in lookup tables with default table (0). */
841   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
842   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
843
844   if (!is_add)
845     {
846       ip4_main_t *im4 = &ip4_main;
847       ip_lookup_main_t *lm4 = &im4->lookup_main;
848       ip_interface_address_t *ia = 0;
849       ip4_address_t *address;
850       vlib_main_t *vm = vlib_get_main ();
851
852       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
853       /* *INDENT-OFF* */
854       foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
855       ({
856         address = ip_interface_address_get_address (lm4, ia);
857         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
858       }));
859       /* *INDENT-ON* */
860     }
861
862   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
863                                is_add, 0, 0);
864
865   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
866                                sw_if_index, is_add, 0, 0);
867
868   return /* no error */ 0;
869 }
870
871 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
872
873 /* Global IP4 main. */
874 #ifndef CLIB_MARCH_VARIANT
875 ip4_main_t ip4_main;
876 #endif /* CLIB_MARCH_VARIANT */
877
878 static clib_error_t *
879 ip4_lookup_init (vlib_main_t * vm)
880 {
881   ip4_main_t *im = &ip4_main;
882   clib_error_t *error;
883   uword i;
884
885   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
886     return error;
887   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
888     return (error);
889   if ((error = vlib_call_init_function (vm, fib_module_init)))
890     return error;
891   if ((error = vlib_call_init_function (vm, mfib_module_init)))
892     return error;
893
894   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
895     {
896       u32 m;
897
898       if (i < 32)
899         m = pow2_mask (i) << (32 - i);
900       else
901         m = ~0;
902       im->fib_masks[i] = clib_host_to_net_u32 (m);
903     }
904
905   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
906
907   /* Create FIB with index 0 and table id of 0. */
908   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
909                                      FIB_SOURCE_DEFAULT_ROUTE);
910   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
911                                       MFIB_SOURCE_DEFAULT_ROUTE);
912
913   {
914     pg_node_t *pn;
915     pn = pg_get_node (ip4_lookup_node.index);
916     pn->unformat_edit = unformat_pg_ip4_header;
917   }
918
919   {
920     ethernet_arp_header_t h;
921
922     clib_memset (&h, 0, sizeof (h));
923
924 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
925 #define _8(f,v) h.f = v;
926     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
927     _16 (l3_type, ETHERNET_TYPE_IP4);
928     _8 (n_l2_address_bytes, 6);
929     _8 (n_l3_address_bytes, 4);
930     _16 (opcode, ETHERNET_ARP_OPCODE_request);
931 #undef _16
932 #undef _8
933
934     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
935                                /* data */ &h,
936                                sizeof (h),
937                                /* alloc chunk size */ 8,
938                                "ip4 arp");
939   }
940
941   return error;
942 }
943
944 VLIB_INIT_FUNCTION (ip4_lookup_init);
945
946 typedef struct
947 {
948   /* Adjacency taken. */
949   u32 dpo_index;
950   u32 flow_hash;
951   u32 fib_index;
952
953   /* Packet data, possibly *after* rewrite. */
954   u8 packet_data[64 - 1 * sizeof (u32)];
955 }
956 ip4_forward_next_trace_t;
957
958 #ifndef CLIB_MARCH_VARIANT
959 u8 *
960 format_ip4_forward_next_trace (u8 * s, va_list * args)
961 {
962   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
963   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
964   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
965   u32 indent = format_get_indent (s);
966   s = format (s, "%U%U",
967               format_white_space, indent,
968               format_ip4_header, t->packet_data, sizeof (t->packet_data));
969   return s;
970 }
971 #endif
972
973 static u8 *
974 format_ip4_lookup_trace (u8 * s, va_list * args)
975 {
976   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
977   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
978   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
979   u32 indent = format_get_indent (s);
980
981   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
982               t->fib_index, t->dpo_index, t->flow_hash);
983   s = format (s, "\n%U%U",
984               format_white_space, indent,
985               format_ip4_header, t->packet_data, sizeof (t->packet_data));
986   return s;
987 }
988
989 static u8 *
990 format_ip4_rewrite_trace (u8 * s, va_list * args)
991 {
992   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
993   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
994   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
995   u32 indent = format_get_indent (s);
996
997   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
998               t->fib_index, t->dpo_index, format_ip_adjacency,
999               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1000   s = format (s, "\n%U%U",
1001               format_white_space, indent,
1002               format_ip_adjacency_packet_data,
1003               t->dpo_index, t->packet_data, sizeof (t->packet_data));
1004   return s;
1005 }
1006
1007 #ifndef CLIB_MARCH_VARIANT
1008 /* Common trace function for all ip4-forward next nodes. */
1009 void
1010 ip4_forward_next_trace (vlib_main_t * vm,
1011                         vlib_node_runtime_t * node,
1012                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1013 {
1014   u32 *from, n_left;
1015   ip4_main_t *im = &ip4_main;
1016
1017   n_left = frame->n_vectors;
1018   from = vlib_frame_vector_args (frame);
1019
1020   while (n_left >= 4)
1021     {
1022       u32 bi0, bi1;
1023       vlib_buffer_t *b0, *b1;
1024       ip4_forward_next_trace_t *t0, *t1;
1025
1026       /* Prefetch next iteration. */
1027       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1028       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1029
1030       bi0 = from[0];
1031       bi1 = from[1];
1032
1033       b0 = vlib_get_buffer (vm, bi0);
1034       b1 = vlib_get_buffer (vm, bi1);
1035
1036       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1037         {
1038           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1039           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1040           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1041           t0->fib_index =
1042             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1043              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1044             vec_elt (im->fib_index_by_sw_if_index,
1045                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1046
1047           clib_memcpy_fast (t0->packet_data,
1048                             vlib_buffer_get_current (b0),
1049                             sizeof (t0->packet_data));
1050         }
1051       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1052         {
1053           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1054           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1055           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1056           t1->fib_index =
1057             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1058              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1059             vec_elt (im->fib_index_by_sw_if_index,
1060                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1061           clib_memcpy_fast (t1->packet_data, vlib_buffer_get_current (b1),
1062                             sizeof (t1->packet_data));
1063         }
1064       from += 2;
1065       n_left -= 2;
1066     }
1067
1068   while (n_left >= 1)
1069     {
1070       u32 bi0;
1071       vlib_buffer_t *b0;
1072       ip4_forward_next_trace_t *t0;
1073
1074       bi0 = from[0];
1075
1076       b0 = vlib_get_buffer (vm, bi0);
1077
1078       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1079         {
1080           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1081           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1082           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1083           t0->fib_index =
1084             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1085              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1086             vec_elt (im->fib_index_by_sw_if_index,
1087                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1088           clib_memcpy_fast (t0->packet_data, vlib_buffer_get_current (b0),
1089                             sizeof (t0->packet_data));
1090         }
1091       from += 1;
1092       n_left -= 1;
1093     }
1094 }
1095
1096 /* Compute TCP/UDP/ICMP4 checksum in software. */
1097 u16
1098 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1099                               ip4_header_t * ip0)
1100 {
1101   ip_csum_t sum0;
1102   u32 ip_header_length, payload_length_host_byte_order;
1103   u32 n_this_buffer, n_bytes_left, n_ip_bytes_this_buffer;
1104   u16 sum16;
1105   void *data_this_buffer;
1106
1107   /* Initialize checksum with ip header. */
1108   ip_header_length = ip4_header_bytes (ip0);
1109   payload_length_host_byte_order =
1110     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1111   sum0 =
1112     clib_host_to_net_u32 (payload_length_host_byte_order +
1113                           (ip0->protocol << 16));
1114
1115   if (BITS (uword) == 32)
1116     {
1117       sum0 =
1118         ip_csum_with_carry (sum0,
1119                             clib_mem_unaligned (&ip0->src_address, u32));
1120       sum0 =
1121         ip_csum_with_carry (sum0,
1122                             clib_mem_unaligned (&ip0->dst_address, u32));
1123     }
1124   else
1125     sum0 =
1126       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1127
1128   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1129   data_this_buffer = (void *) ip0 + ip_header_length;
1130   n_ip_bytes_this_buffer =
1131     p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data);
1132   if (n_this_buffer + ip_header_length > n_ip_bytes_this_buffer)
1133     {
1134       n_this_buffer = n_ip_bytes_this_buffer > ip_header_length ?
1135         n_ip_bytes_this_buffer - ip_header_length : 0;
1136     }
1137   while (1)
1138     {
1139       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1140       n_bytes_left -= n_this_buffer;
1141       if (n_bytes_left == 0)
1142         break;
1143
1144       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1145       p0 = vlib_get_buffer (vm, p0->next_buffer);
1146       data_this_buffer = vlib_buffer_get_current (p0);
1147       n_this_buffer = clib_min (p0->current_length, n_bytes_left);
1148     }
1149
1150   sum16 = ~ip_csum_fold (sum0);
1151
1152   return sum16;
1153 }
1154
1155 u32
1156 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1157 {
1158   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1159   udp_header_t *udp0;
1160   u16 sum16;
1161
1162   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1163           || ip0->protocol == IP_PROTOCOL_UDP);
1164
1165   udp0 = (void *) (ip0 + 1);
1166   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1167     {
1168       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1169                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1170       return p0->flags;
1171     }
1172
1173   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1174
1175   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1176                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1177
1178   return p0->flags;
1179 }
1180 #endif
1181
1182 /* *INDENT-OFF* */
1183 VNET_FEATURE_ARC_INIT (ip4_local) =
1184 {
1185   .arc_name  = "ip4-local",
1186   .start_nodes = VNET_FEATURES ("ip4-local"),
1187   .last_in_arc = "ip4-local-end-of-arc",
1188 };
1189 /* *INDENT-ON* */
1190
1191 static inline void
1192 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1193                             ip4_header_t * ip, u8 is_udp, u8 * error,
1194                             u8 * good_tcp_udp)
1195 {
1196   u32 flags0;
1197   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1198   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1199   if (is_udp)
1200     {
1201       udp_header_t *udp;
1202       u32 ip_len, udp_len;
1203       i32 len_diff;
1204       udp = ip4_next_header (ip);
1205       /* Verify UDP length. */
1206       ip_len = clib_net_to_host_u16 (ip->length);
1207       udp_len = clib_net_to_host_u16 (udp->length);
1208
1209       len_diff = ip_len - udp_len;
1210       *good_tcp_udp &= len_diff >= 0;
1211       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1212     }
1213 }
1214
1215 #define ip4_local_csum_is_offloaded(_b)                                 \
1216     _b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM                         \
1217         || _b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM
1218
1219 #define ip4_local_need_csum_check(is_tcp_udp, _b)                       \
1220     (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED     \
1221         || ip4_local_csum_is_offloaded (_b)))
1222
1223 #define ip4_local_csum_is_valid(_b)                                     \
1224     (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT                      \
1225         || (ip4_local_csum_is_offloaded (_b))) != 0
1226
1227 static inline void
1228 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1229                          ip4_header_t * ih, u8 * error)
1230 {
1231   u8 is_udp, is_tcp_udp, good_tcp_udp;
1232
1233   is_udp = ih->protocol == IP_PROTOCOL_UDP;
1234   is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1235
1236   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1237     ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1238   else
1239     good_tcp_udp = ip4_local_csum_is_valid (b);
1240
1241   ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1242   *error = (is_tcp_udp && !good_tcp_udp
1243             ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1244 }
1245
1246 static inline void
1247 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1248                             ip4_header_t ** ih, u8 * error)
1249 {
1250   u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1251
1252   is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1253   is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1254
1255   is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1256   is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1257
1258   good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1259   good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1260
1261   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1262                      || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1263     {
1264       if (is_tcp_udp[0])
1265         ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1266                                     &good_tcp_udp[0]);
1267       if (is_tcp_udp[1])
1268         ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1269                                     &good_tcp_udp[1]);
1270     }
1271
1272   error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1273               IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1274   error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1275               IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1276 }
1277
1278 static inline void
1279 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1280                               vlib_buffer_t * b, u16 * next, u8 error,
1281                               u8 head_of_feature_arc)
1282 {
1283   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1284   u32 next_index;
1285
1286   *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1287   b->error = error ? error_node->errors[error] : 0;
1288   if (head_of_feature_arc)
1289     {
1290       next_index = *next;
1291       if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1292         {
1293           vnet_feature_arc_start (arc_index,
1294                                   vnet_buffer (b)->sw_if_index[VLIB_RX],
1295                                   &next_index, b);
1296           *next = next_index;
1297         }
1298     }
1299 }
1300
1301 typedef struct
1302 {
1303   ip4_address_t src;
1304   u32 lbi;
1305   u8 error;
1306   u8 first;
1307 } ip4_local_last_check_t;
1308
1309 static inline void
1310 ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
1311                      ip4_local_last_check_t * last_check, u8 * error0)
1312 {
1313   ip4_fib_mtrie_leaf_t leaf0;
1314   ip4_fib_mtrie_t *mtrie0;
1315   const dpo_id_t *dpo0;
1316   load_balance_t *lb0;
1317   u32 lbi0;
1318
1319   vnet_buffer (b)->ip.fib_index =
1320     vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1321     vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1322
1323   /*
1324    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1325    *  adjacency for the destination address (the local interface address).
1326    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1327    *  adjacency for the source address (the remote sender's address)
1328    */
1329   if (PREDICT_FALSE (last_check->first ||
1330                      (last_check->src.as_u32 != ip0->src_address.as_u32)))
1331     {
1332       mtrie0 = &ip4_fib_get (vnet_buffer (b)->ip.fib_index)->mtrie;
1333       leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1334       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1335       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1336       lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1337
1338       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1339         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1340       vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1341
1342       lb0 = load_balance_get (lbi0);
1343       dpo0 = load_balance_get_bucket_i (lb0, 0);
1344
1345       /*
1346        * Must have a route to source otherwise we drop the packet.
1347        * ip4 broadcasts are accepted, e.g. to make dhcp client work
1348        *
1349        * The checks are:
1350        *  - the source is a recieve => it's from us => bogus, do this
1351        *    first since it sets a different error code.
1352        *  - uRPF check for any route to source - accept if passes.
1353        *  - allow packets destined to the broadcast address from unknown sources
1354        */
1355
1356       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1357                   && dpo0->dpoi_type == DPO_RECEIVE) ?
1358                  IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1359       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1360                   && !fib_urpf_check_size (lb0->lb_urpf)
1361                   && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1362                  IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1363
1364       last_check->src.as_u32 = ip0->src_address.as_u32;
1365       last_check->lbi = lbi0;
1366       last_check->error = *error0;
1367     }
1368   else
1369     {
1370       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1371         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1372       vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1373       *error0 = last_check->error;
1374       last_check->first = 0;
1375     }
1376 }
1377
1378 static inline void
1379 ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
1380                         ip4_local_last_check_t * last_check, u8 * error)
1381 {
1382   ip4_fib_mtrie_leaf_t leaf[2];
1383   ip4_fib_mtrie_t *mtrie[2];
1384   const dpo_id_t *dpo[2];
1385   load_balance_t *lb[2];
1386   u32 not_last_hit;
1387   u32 lbi[2];
1388
1389   not_last_hit = last_check->first;
1390   not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1391   not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1392
1393   vnet_buffer (b[0])->ip.fib_index =
1394     vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1395     vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1396     vnet_buffer (b[0])->ip.fib_index;
1397
1398   vnet_buffer (b[1])->ip.fib_index =
1399     vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1400     vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1401     vnet_buffer (b[1])->ip.fib_index;
1402
1403   /*
1404    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1405    *  adjacency for the destination address (the local interface address).
1406    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1407    *  adjacency for the source address (the remote sender's address)
1408    */
1409   if (PREDICT_FALSE (not_last_hit))
1410     {
1411       mtrie[0] = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie;
1412       mtrie[1] = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie;
1413
1414       leaf[0] = ip4_fib_mtrie_lookup_step_one (mtrie[0], &ip[0]->src_address);
1415       leaf[1] = ip4_fib_mtrie_lookup_step_one (mtrie[1], &ip[1]->src_address);
1416
1417       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1418                                            &ip[0]->src_address, 2);
1419       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1420                                            &ip[1]->src_address, 2);
1421
1422       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1423                                            &ip[0]->src_address, 3);
1424       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1425                                            &ip[1]->src_address, 3);
1426
1427       lbi[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf[0]);
1428       lbi[1] = ip4_fib_mtrie_leaf_get_adj_index (leaf[1]);
1429
1430       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1431         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1432       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1433
1434       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1435         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1436       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1437
1438       lb[0] = load_balance_get (lbi[0]);
1439       lb[1] = load_balance_get (lbi[1]);
1440
1441       dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1442       dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1443
1444       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1445                    dpo[0]->dpoi_type == DPO_RECEIVE) ?
1446                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1447       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1448                    !fib_urpf_check_size (lb[0]->lb_urpf) &&
1449                    ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1450                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1451
1452       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1453                    dpo[1]->dpoi_type == DPO_RECEIVE) ?
1454                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1455       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1456                    !fib_urpf_check_size (lb[1]->lb_urpf) &&
1457                    ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1458                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1459
1460       last_check->src.as_u32 = ip[1]->src_address.as_u32;
1461       last_check->lbi = lbi[1];
1462       last_check->error = error[1];
1463     }
1464   else
1465     {
1466       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1467         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1468       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1469
1470       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1471         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1472       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1473
1474       error[0] = last_check->error;
1475       error[1] = last_check->error;
1476       last_check->first = 0;
1477     }
1478 }
1479
1480 enum ip_local_packet_type_e
1481 {
1482   IP_LOCAL_PACKET_TYPE_L4,
1483   IP_LOCAL_PACKET_TYPE_NAT,
1484   IP_LOCAL_PACKET_TYPE_FRAG,
1485 };
1486
1487 /**
1488  * Determine packet type and next node.
1489  *
1490  * The expectation is that all packets that are not L4 will skip
1491  * checksums and source checks.
1492  */
1493 always_inline u8
1494 ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
1495 {
1496   ip_lookup_main_t *lm = &ip4_main.lookup_main;
1497
1498   if (PREDICT_FALSE (ip4_is_fragment (ip)))
1499     {
1500       *next = IP_LOCAL_NEXT_REASSEMBLY;
1501       return IP_LOCAL_PACKET_TYPE_FRAG;
1502     }
1503   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_IS_NATED))
1504     {
1505       *next = lm->local_next_by_ip_protocol[ip->protocol];
1506       return IP_LOCAL_PACKET_TYPE_NAT;
1507     }
1508
1509   *next = lm->local_next_by_ip_protocol[ip->protocol];
1510   return IP_LOCAL_PACKET_TYPE_L4;
1511 }
1512
1513 static inline uword
1514 ip4_local_inline (vlib_main_t * vm,
1515                   vlib_node_runtime_t * node,
1516                   vlib_frame_t * frame, int head_of_feature_arc)
1517 {
1518   u32 *from, n_left_from;
1519   vlib_node_runtime_t *error_node =
1520     vlib_node_get_runtime (vm, ip4_input_node.index);
1521   u16 nexts[VLIB_FRAME_SIZE], *next;
1522   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1523   ip4_header_t *ip[2];
1524   u8 error[2], pt[2];
1525
1526   ip4_local_last_check_t last_check = {
1527     /*
1528      * 0.0.0.0 can appear as the source address of an IP packet,
1529      * as can any other address, hence the need to use the 'first'
1530      * member to make sure the .lbi is initialised for the first
1531      * packet.
1532      */
1533     .src = {.as_u32 = 0},
1534     .lbi = ~0,
1535     .error = IP4_ERROR_UNKNOWN_PROTOCOL,
1536     .first = 1,
1537   };
1538
1539   from = vlib_frame_vector_args (frame);
1540   n_left_from = frame->n_vectors;
1541
1542   if (node->flags & VLIB_NODE_FLAG_TRACE)
1543     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1544
1545   vlib_get_buffers (vm, from, bufs, n_left_from);
1546   b = bufs;
1547   next = nexts;
1548
1549   while (n_left_from >= 6)
1550     {
1551       u8 not_batch = 0;
1552
1553       /* Prefetch next iteration. */
1554       {
1555         vlib_prefetch_buffer_header (b[4], LOAD);
1556         vlib_prefetch_buffer_header (b[5], LOAD);
1557
1558         CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1559         CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1560       }
1561
1562       error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1563
1564       ip[0] = vlib_buffer_get_current (b[0]);
1565       ip[1] = vlib_buffer_get_current (b[1]);
1566
1567       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1568       vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1569
1570       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1571       pt[1] = ip4_local_classify (b[1], ip[1], &next[1]);
1572
1573       not_batch = pt[0] ^ pt[1];
1574
1575       if (head_of_feature_arc == 0 || (pt[0] && not_batch == 0))
1576         goto skip_checks;
1577
1578       if (PREDICT_TRUE (not_batch == 0))
1579         {
1580           ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1581           ip4_local_check_src_x2 (b, ip, &last_check, error);
1582         }
1583       else
1584         {
1585           if (!pt[0])
1586             {
1587               ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1588               ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1589             }
1590           if (!pt[1])
1591             {
1592               ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1593               ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
1594             }
1595         }
1596
1597     skip_checks:
1598
1599       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1600                                     head_of_feature_arc);
1601       ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1602                                     head_of_feature_arc);
1603
1604       b += 2;
1605       next += 2;
1606       n_left_from -= 2;
1607     }
1608
1609   while (n_left_from > 0)
1610     {
1611       error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1612
1613       ip[0] = vlib_buffer_get_current (b[0]);
1614       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1615       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1616
1617       if (head_of_feature_arc == 0 || pt[0])
1618         goto skip_check;
1619
1620       ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1621       ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1622
1623     skip_check:
1624
1625       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1626                                     head_of_feature_arc);
1627
1628       b += 1;
1629       next += 1;
1630       n_left_from -= 1;
1631     }
1632
1633   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1634   return frame->n_vectors;
1635 }
1636
1637 VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1638                                vlib_frame_t * frame)
1639 {
1640   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1641 }
1642
1643 /* *INDENT-OFF* */
1644 VLIB_REGISTER_NODE (ip4_local_node) =
1645 {
1646   .name = "ip4-local",
1647   .vector_size = sizeof (u32),
1648   .format_trace = format_ip4_forward_next_trace,
1649   .n_next_nodes = IP_LOCAL_N_NEXT,
1650   .next_nodes =
1651   {
1652     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1653     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1654     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1655     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1656     [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-reassembly",
1657   },
1658 };
1659 /* *INDENT-ON* */
1660
1661
1662 VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
1663                                           vlib_node_runtime_t * node,
1664                                           vlib_frame_t * frame)
1665 {
1666   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1667 }
1668
1669 /* *INDENT-OFF* */
1670 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
1671   .name = "ip4-local-end-of-arc",
1672   .vector_size = sizeof (u32),
1673
1674   .format_trace = format_ip4_forward_next_trace,
1675   .sibling_of = "ip4-local",
1676 };
1677
1678 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1679   .arc_name = "ip4-local",
1680   .node_name = "ip4-local-end-of-arc",
1681   .runs_before = 0, /* not before any other features */
1682 };
1683 /* *INDENT-ON* */
1684
1685 #ifndef CLIB_MARCH_VARIANT
1686 void
1687 ip4_register_protocol (u32 protocol, u32 node_index)
1688 {
1689   vlib_main_t *vm = vlib_get_main ();
1690   ip4_main_t *im = &ip4_main;
1691   ip_lookup_main_t *lm = &im->lookup_main;
1692
1693   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1694   lm->local_next_by_ip_protocol[protocol] =
1695     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1696 }
1697
1698 void
1699 ip4_unregister_protocol (u32 protocol)
1700 {
1701   ip4_main_t *im = &ip4_main;
1702   ip_lookup_main_t *lm = &im->lookup_main;
1703
1704   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1705   lm->local_next_by_ip_protocol[protocol] = IP_LOCAL_NEXT_PUNT;
1706 }
1707 #endif
1708
1709 static clib_error_t *
1710 show_ip_local_command_fn (vlib_main_t * vm,
1711                           unformat_input_t * input, vlib_cli_command_t * cmd)
1712 {
1713   ip4_main_t *im = &ip4_main;
1714   ip_lookup_main_t *lm = &im->lookup_main;
1715   int i;
1716
1717   vlib_cli_output (vm, "Protocols handled by ip4_local");
1718   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1719     {
1720       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1721         {
1722           u32 node_index = vlib_get_node (vm,
1723                                           ip4_local_node.index)->
1724             next_nodes[lm->local_next_by_ip_protocol[i]];
1725           vlib_cli_output (vm, "%U: %U", format_ip_protocol, i,
1726                            format_vlib_node_name, vm, node_index);
1727         }
1728     }
1729   return 0;
1730 }
1731
1732
1733
1734 /*?
1735  * Display the set of protocols handled by the local IPv4 stack.
1736  *
1737  * @cliexpar
1738  * Example of how to display local protocol table:
1739  * @cliexstart{show ip local}
1740  * Protocols handled by ip4_local
1741  * 1
1742  * 17
1743  * 47
1744  * @cliexend
1745 ?*/
1746 /* *INDENT-OFF* */
1747 VLIB_CLI_COMMAND (show_ip_local, static) =
1748 {
1749   .path = "show ip local",
1750   .function = show_ip_local_command_fn,
1751   .short_help = "show ip local",
1752 };
1753 /* *INDENT-ON* */
1754
1755 always_inline uword
1756 ip4_arp_inline (vlib_main_t * vm,
1757                 vlib_node_runtime_t * node,
1758                 vlib_frame_t * frame, int is_glean)
1759 {
1760   vnet_main_t *vnm = vnet_get_main ();
1761   ip4_main_t *im = &ip4_main;
1762   ip_lookup_main_t *lm = &im->lookup_main;
1763   u32 *from, *to_next_drop;
1764   uword n_left_from, n_left_to_next_drop, next_index;
1765   u32 thread_index = vm->thread_index;
1766   u64 seed;
1767
1768   if (node->flags & VLIB_NODE_FLAG_TRACE)
1769     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1770
1771   seed = throttle_seed (&im->arp_throttle, thread_index, vlib_time_now (vm));
1772
1773   from = vlib_frame_vector_args (frame);
1774   n_left_from = frame->n_vectors;
1775   next_index = node->cached_next_index;
1776   if (next_index == IP4_ARP_NEXT_DROP)
1777     next_index = IP4_ARP_N_NEXT;        /* point to first interface */
1778
1779   while (n_left_from > 0)
1780     {
1781       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1782                            to_next_drop, n_left_to_next_drop);
1783
1784       while (n_left_from > 0 && n_left_to_next_drop > 0)
1785         {
1786           u32 pi0, bi0, adj_index0, sw_if_index0;
1787           ip_adjacency_t *adj0;
1788           vlib_buffer_t *p0, *b0;
1789           ip4_address_t resolve0;
1790           ethernet_arp_header_t *h0;
1791           vnet_hw_interface_t *hw_if0;
1792           u64 r0;
1793
1794           pi0 = from[0];
1795           p0 = vlib_get_buffer (vm, pi0);
1796
1797           from += 1;
1798           n_left_from -= 1;
1799           to_next_drop[0] = pi0;
1800           to_next_drop += 1;
1801           n_left_to_next_drop -= 1;
1802
1803           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1804           adj0 = adj_get (adj_index0);
1805
1806           if (is_glean)
1807             {
1808               /* resolve the packet's destination */
1809               ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1810               resolve0 = ip0->dst_address;
1811             }
1812           else
1813             {
1814               /* resolve the incomplete adj */
1815               resolve0 = adj0->sub_type.nbr.next_hop.ip4;
1816             }
1817
1818           /* combine the address and interface for the hash key */
1819           sw_if_index0 = adj0->rewrite_header.sw_if_index;
1820           r0 = (u64) resolve0.data_u32 << 32;
1821           r0 |= sw_if_index0;
1822
1823           if (throttle_check (&im->arp_throttle, thread_index, r0, seed))
1824             {
1825               p0->error = node->errors[IP4_ARP_ERROR_THROTTLED];
1826               continue;
1827             }
1828
1829           /*
1830            * the adj has been updated to a rewrite but the node the DPO that got
1831            * us here hasn't - yet. no big deal. we'll drop while we wait.
1832            */
1833           if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
1834             {
1835               p0->error = node->errors[IP4_ARP_ERROR_RESOLVED];
1836               continue;
1837             }
1838
1839           /*
1840            * Can happen if the control-plane is programming tables
1841            * with traffic flowing; at least that's today's lame excuse.
1842            */
1843           if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
1844               || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
1845             {
1846               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
1847               continue;
1848             }
1849           /* Send ARP request. */
1850           h0 =
1851             vlib_packet_template_get_packet (vm,
1852                                              &im->ip4_arp_request_packet_template,
1853                                              &bi0);
1854           /* Seems we're out of buffers */
1855           if (PREDICT_FALSE (!h0))
1856             {
1857               p0->error = node->errors[IP4_ARP_ERROR_NO_BUFFERS];
1858               continue;
1859             }
1860
1861           b0 = vlib_get_buffer (vm, bi0);
1862
1863           /* copy the persistent fields from the original */
1864           clib_memcpy_fast (b0->opaque2, p0->opaque2, sizeof (p0->opaque2));
1865
1866           /* Add rewrite/encap string for ARP packet. */
1867           vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
1868
1869           hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1870
1871           /* Src ethernet address in ARP header. */
1872           mac_address_from_bytes (&h0->ip4_over_ethernet[0].mac,
1873                                   hw_if0->hw_address);
1874           if (is_glean)
1875             {
1876               /* The interface's source address is stashed in the Glean Adj */
1877               h0->ip4_over_ethernet[0].ip4 =
1878                 adj0->sub_type.glean.receive_addr.ip4;
1879             }
1880           else
1881             {
1882               /* Src IP address in ARP header. */
1883               if (ip4_src_address_for_packet (lm, sw_if_index0,
1884                                               &h0->ip4_over_ethernet[0].ip4))
1885                 {
1886                   /* No source address available */
1887                   p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
1888                   vlib_buffer_free (vm, &bi0, 1);
1889                   continue;
1890                 }
1891             }
1892           h0->ip4_over_ethernet[1].ip4 = resolve0;
1893
1894           p0->error = node->errors[IP4_ARP_ERROR_REQUEST_SENT];
1895
1896           vlib_buffer_copy_trace_flag (vm, p0, bi0);
1897           VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
1898           vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
1899
1900           vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
1901
1902           vlib_set_next_frame_buffer (vm, node,
1903                                       adj0->rewrite_header.next_index, bi0);
1904         }
1905
1906       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
1907     }
1908
1909   return frame->n_vectors;
1910 }
1911
1912 VLIB_NODE_FN (ip4_arp_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1913                              vlib_frame_t * frame)
1914 {
1915   return (ip4_arp_inline (vm, node, frame, 0));
1916 }
1917
1918 VLIB_NODE_FN (ip4_glean_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1919                                vlib_frame_t * frame)
1920 {
1921   return (ip4_arp_inline (vm, node, frame, 1));
1922 }
1923
1924 static char *ip4_arp_error_strings[] = {
1925   [IP4_ARP_ERROR_THROTTLED] = "ARP requests throttled",
1926   [IP4_ARP_ERROR_RESOLVED] = "ARP requests resolved",
1927   [IP4_ARP_ERROR_NO_BUFFERS] = "ARP requests out of buffer",
1928   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
1929   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
1930   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
1931 };
1932
1933 /* *INDENT-OFF* */
1934 VLIB_REGISTER_NODE (ip4_arp_node) =
1935 {
1936   .name = "ip4-arp",
1937   .vector_size = sizeof (u32),
1938   .format_trace = format_ip4_forward_next_trace,
1939   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1940   .error_strings = ip4_arp_error_strings,
1941   .n_next_nodes = IP4_ARP_N_NEXT,
1942   .next_nodes =
1943   {
1944     [IP4_ARP_NEXT_DROP] = "error-drop",
1945   },
1946 };
1947
1948 VLIB_REGISTER_NODE (ip4_glean_node) =
1949 {
1950   .name = "ip4-glean",
1951   .vector_size = sizeof (u32),
1952   .format_trace = format_ip4_forward_next_trace,
1953   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1954   .error_strings = ip4_arp_error_strings,
1955   .n_next_nodes = IP4_ARP_N_NEXT,
1956   .next_nodes = {
1957   [IP4_ARP_NEXT_DROP] = "error-drop",
1958   },
1959 };
1960 /* *INDENT-ON* */
1961
1962 #define foreach_notrace_ip4_arp_error           \
1963 _(THROTTLED)                                    \
1964 _(RESOLVED)                                     \
1965 _(NO_BUFFERS)                                   \
1966 _(REQUEST_SENT)                                 \
1967 _(NON_ARP_ADJ)                                  \
1968 _(NO_SOURCE_ADDRESS)
1969
1970 static clib_error_t *
1971 arp_notrace_init (vlib_main_t * vm)
1972 {
1973   vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
1974
1975   /* don't trace ARP request packets */
1976 #define _(a)                                    \
1977     vnet_pcap_drop_trace_filter_add_del         \
1978         (rt->errors[IP4_ARP_ERROR_##a],         \
1979          1 /* is_add */);
1980   foreach_notrace_ip4_arp_error;
1981 #undef _
1982   return 0;
1983 }
1984
1985 VLIB_INIT_FUNCTION (arp_notrace_init);
1986
1987
1988 #ifndef CLIB_MARCH_VARIANT
1989 /* Send an ARP request to see if given destination is reachable on given interface. */
1990 clib_error_t *
1991 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index,
1992                     u8 refresh)
1993 {
1994   vnet_main_t *vnm = vnet_get_main ();
1995   ip4_main_t *im = &ip4_main;
1996   ethernet_arp_header_t *h;
1997   ip4_address_t *src;
1998   ip_interface_address_t *ia;
1999   ip_adjacency_t *adj;
2000   vnet_hw_interface_t *hi;
2001   vnet_sw_interface_t *si;
2002   vlib_buffer_t *b;
2003   adj_index_t ai;
2004   u32 bi = 0;
2005   u8 unicast_rewrite = 0;
2006
2007   si = vnet_get_sw_interface (vnm, sw_if_index);
2008
2009   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2010     {
2011       return clib_error_return (0, "%U: interface %U down",
2012                                 format_ip4_address, dst,
2013                                 format_vnet_sw_if_index_name, vnm,
2014                                 sw_if_index);
2015     }
2016
2017   src =
2018     ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2019   if (!src)
2020     {
2021       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2022       return clib_error_return
2023         (0,
2024          "no matching interface address for destination %U (interface %U)",
2025          format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
2026          sw_if_index);
2027     }
2028
2029   h = vlib_packet_template_get_packet (vm,
2030                                        &im->ip4_arp_request_packet_template,
2031                                        &bi);
2032
2033   if (!h)
2034     return clib_error_return (0, "ARP request packet allocation failed");
2035
2036   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2037   if (PREDICT_FALSE (!hi->hw_address))
2038     {
2039       return clib_error_return (0, "%U: interface %U do not support ip probe",
2040                                 format_ip4_address, dst,
2041                                 format_vnet_sw_if_index_name, vnm,
2042                                 sw_if_index);
2043     }
2044
2045   mac_address_from_bytes (&h->ip4_over_ethernet[0].mac, hi->hw_address);
2046
2047   h->ip4_over_ethernet[0].ip4 = src[0];
2048   h->ip4_over_ethernet[1].ip4 = dst[0];
2049
2050   b = vlib_get_buffer (vm, bi);
2051   vnet_buffer (b)->sw_if_index[VLIB_RX] =
2052     vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2053
2054   ip46_address_t nh = {
2055     .ip4 = *dst,
2056   };
2057
2058   ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4,
2059                             VNET_LINK_IP4, &nh, sw_if_index);
2060   adj = adj_get (ai);
2061
2062   /* Peer has been previously resolved, retrieve glean adj instead */
2063   if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE)
2064     {
2065       if (refresh)
2066         unicast_rewrite = 1;
2067       else
2068         {
2069           adj_unlock (ai);
2070           ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4,
2071                                       VNET_LINK_IP4, sw_if_index, &nh);
2072           adj = adj_get (ai);
2073         }
2074     }
2075
2076   /* Add encapsulation string for software interface (e.g. ethernet header). */
2077   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2078   if (unicast_rewrite)
2079     {
2080       u16 *etype = vlib_buffer_get_current (b) - 2;
2081       etype[0] = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
2082     }
2083   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2084
2085   {
2086     vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2087     u32 *to_next = vlib_frame_vector_args (f);
2088     to_next[0] = bi;
2089     f->n_vectors = 1;
2090     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2091   }
2092
2093   adj_unlock (ai);
2094   return /* no error */ 0;
2095 }
2096 #endif
2097
2098 typedef enum
2099 {
2100   IP4_REWRITE_NEXT_DROP,
2101   IP4_REWRITE_NEXT_ICMP_ERROR,
2102   IP4_REWRITE_NEXT_FRAGMENT,
2103   IP4_REWRITE_N_NEXT            /* Last */
2104 } ip4_rewrite_next_t;
2105
2106 /**
2107  * This bits of an IPv4 address to mask to construct a multicast
2108  * MAC address
2109  */
2110 #if CLIB_ARCH_IS_BIG_ENDIAN
2111 #define IP4_MCAST_ADDR_MASK 0x007fffff
2112 #else
2113 #define IP4_MCAST_ADDR_MASK 0xffff7f00
2114 #endif
2115
2116 always_inline void
2117 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
2118                u16 adj_packet_bytes, bool df, u16 * next, u32 * error)
2119 {
2120   if (packet_len > adj_packet_bytes)
2121     {
2122       *error = IP4_ERROR_MTU_EXCEEDED;
2123       if (df)
2124         {
2125           icmp4_error_set_vnet_buffer
2126             (b, ICMP4_destination_unreachable,
2127              ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
2128              adj_packet_bytes);
2129           *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2130         }
2131       else
2132         {
2133           /* IP fragmentation */
2134           ip_frag_set_vnet_buffer (b, adj_packet_bytes,
2135                                    IP4_FRAG_NEXT_IP4_REWRITE, 0);
2136           *next = IP4_REWRITE_NEXT_FRAGMENT;
2137         }
2138     }
2139 }
2140
2141 /* Decrement TTL & update checksum.
2142    Works either endian, so no need for byte swap. */
2143 static_always_inline void
2144 ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
2145                             u32 * error)
2146 {
2147   i32 ttl;
2148   u32 checksum;
2149   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2150     {
2151       b->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2152       return;
2153     }
2154
2155   ttl = ip->ttl;
2156
2157   /* Input node should have reject packets with ttl 0. */
2158   ASSERT (ip->ttl > 0);
2159
2160   checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
2161   checksum += checksum >= 0xffff;
2162
2163   ip->checksum = checksum;
2164   ttl -= 1;
2165   ip->ttl = ttl;
2166
2167   /*
2168    * If the ttl drops below 1 when forwarding, generate
2169    * an ICMP response.
2170    */
2171   if (PREDICT_FALSE (ttl <= 0))
2172     {
2173       *error = IP4_ERROR_TIME_EXPIRED;
2174       vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2175       icmp4_error_set_vnet_buffer (b, ICMP4_time_exceeded,
2176                                    ICMP4_time_exceeded_ttl_exceeded_in_transit,
2177                                    0);
2178       *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2179     }
2180
2181   /* Verify checksum. */
2182   ASSERT ((ip->checksum == ip4_header_checksum (ip)) ||
2183           (b->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2184 }
2185
2186
2187 always_inline uword
2188 ip4_rewrite_inline_with_gso (vlib_main_t * vm,
2189                              vlib_node_runtime_t * node,
2190                              vlib_frame_t * frame,
2191                              int do_counters, int is_midchain, int is_mcast,
2192                              int do_gso)
2193 {
2194   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2195   u32 *from = vlib_frame_vector_args (frame);
2196   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2197   u16 nexts[VLIB_FRAME_SIZE], *next;
2198   u32 n_left_from;
2199   vlib_node_runtime_t *error_node =
2200     vlib_node_get_runtime (vm, ip4_input_node.index);
2201
2202   n_left_from = frame->n_vectors;
2203   u32 thread_index = vm->thread_index;
2204
2205   vlib_get_buffers (vm, from, bufs, n_left_from);
2206   clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
2207
2208   if (n_left_from >= 6)
2209     {
2210       int i;
2211       for (i = 2; i < 6; i++)
2212         vlib_prefetch_buffer_header (bufs[i], LOAD);
2213     }
2214
2215   next = nexts;
2216   b = bufs;
2217   while (n_left_from >= 8)
2218     {
2219       ip_adjacency_t *adj0, *adj1;
2220       ip4_header_t *ip0, *ip1;
2221       u32 rw_len0, error0, adj_index0;
2222       u32 rw_len1, error1, adj_index1;
2223       u32 tx_sw_if_index0, tx_sw_if_index1;
2224       u8 *p;
2225
2226       vlib_prefetch_buffer_header (b[6], LOAD);
2227       vlib_prefetch_buffer_header (b[7], LOAD);
2228
2229       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2230       adj_index1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
2231
2232       /*
2233        * pre-fetch the per-adjacency counters
2234        */
2235       if (do_counters)
2236         {
2237           vlib_prefetch_combined_counter (&adjacency_counters,
2238                                           thread_index, adj_index0);
2239           vlib_prefetch_combined_counter (&adjacency_counters,
2240                                           thread_index, adj_index1);
2241         }
2242
2243       ip0 = vlib_buffer_get_current (b[0]);
2244       ip1 = vlib_buffer_get_current (b[1]);
2245
2246       error0 = error1 = IP4_ERROR_NONE;
2247
2248       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2249       ip4_ttl_and_checksum_check (b[1], ip1, next + 1, &error1);
2250
2251       /* Rewrite packet header and updates lengths. */
2252       adj0 = adj_get (adj_index0);
2253       adj1 = adj_get (adj_index1);
2254
2255       /* Worth pipelining. No guarantee that adj0,1 are hot... */
2256       rw_len0 = adj0[0].rewrite_header.data_bytes;
2257       rw_len1 = adj1[0].rewrite_header.data_bytes;
2258       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2259       vnet_buffer (b[1])->ip.save_rewrite_length = rw_len1;
2260
2261       p = vlib_buffer_get_current (b[2]);
2262       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2263       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2264
2265       p = vlib_buffer_get_current (b[3]);
2266       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2267       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2268
2269       /* Check MTU of outgoing interface. */
2270       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2271       u16 ip1_len = clib_net_to_host_u16 (ip1->length);
2272
2273       if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO))
2274         ip0_len = gso_mtu_sz (b[0]);
2275       if (do_gso && (b[1]->flags & VNET_BUFFER_F_GSO))
2276         ip1_len = gso_mtu_sz (b[1]);
2277
2278       ip4_mtu_check (b[0], ip0_len,
2279                      adj0[0].rewrite_header.max_l3_packet_bytes,
2280                      ip0->flags_and_fragment_offset &
2281                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2282                      next + 0, &error0);
2283       ip4_mtu_check (b[1], ip1_len,
2284                      adj1[0].rewrite_header.max_l3_packet_bytes,
2285                      ip1->flags_and_fragment_offset &
2286                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2287                      next + 1, &error1);
2288
2289       if (is_mcast)
2290         {
2291           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2292                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2293                     IP4_ERROR_SAME_INTERFACE : error0);
2294           error1 = ((adj1[0].rewrite_header.sw_if_index ==
2295                      vnet_buffer (b[1])->sw_if_index[VLIB_RX]) ?
2296                     IP4_ERROR_SAME_INTERFACE : error1);
2297         }
2298
2299       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2300        * to see the IP header */
2301       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2302         {
2303           u32 next_index = adj0[0].rewrite_header.next_index;
2304           b[0]->current_data -= rw_len0;
2305           b[0]->current_length += rw_len0;
2306           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2307           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2308
2309           if (PREDICT_FALSE
2310               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2311             vnet_feature_arc_start (lm->output_feature_arc_index,
2312                                     tx_sw_if_index0, &next_index, b[0]);
2313           next[0] = next_index;
2314         }
2315       else
2316         {
2317           b[0]->error = error_node->errors[error0];
2318         }
2319       if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2320         {
2321           u32 next_index = adj1[0].rewrite_header.next_index;
2322           b[1]->current_data -= rw_len1;
2323           b[1]->current_length += rw_len1;
2324
2325           tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2326           vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2327
2328           if (PREDICT_FALSE
2329               (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2330             vnet_feature_arc_start (lm->output_feature_arc_index,
2331                                     tx_sw_if_index1, &next_index, b[1]);
2332           next[1] = next_index;
2333         }
2334       else
2335         {
2336           b[1]->error = error_node->errors[error1];
2337         }
2338       if (is_midchain)
2339         {
2340           calc_checksums (vm, b[0]);
2341           calc_checksums (vm, b[1]);
2342         }
2343       /* Guess we are only writing on simple Ethernet header. */
2344       vnet_rewrite_two_headers (adj0[0], adj1[0],
2345                                 ip0, ip1, sizeof (ethernet_header_t));
2346
2347       /*
2348        * Bump the per-adjacency counters
2349        */
2350       if (do_counters)
2351         {
2352           vlib_increment_combined_counter
2353             (&adjacency_counters,
2354              thread_index,
2355              adj_index0, 1, vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2356
2357           vlib_increment_combined_counter
2358             (&adjacency_counters,
2359              thread_index,
2360              adj_index1, 1, vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
2361         }
2362
2363       if (is_midchain)
2364         {
2365           if (adj0->sub_type.midchain.fixup_func)
2366             adj0->sub_type.midchain.fixup_func
2367               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2368           if (adj1->sub_type.midchain.fixup_func)
2369             adj1->sub_type.midchain.fixup_func
2370               (vm, adj1, b[1], adj1->sub_type.midchain.fixup_data);
2371         }
2372
2373       if (is_mcast)
2374         {
2375           /*
2376            * copy bytes from the IP address into the MAC rewrite
2377            */
2378           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2379                                       adj0->rewrite_header.dst_mcast_offset,
2380                                       &ip0->dst_address.as_u32, (u8 *) ip0);
2381           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2382                                       adj1->rewrite_header.dst_mcast_offset,
2383                                       &ip1->dst_address.as_u32, (u8 *) ip1);
2384         }
2385
2386       next += 2;
2387       b += 2;
2388       n_left_from -= 2;
2389     }
2390
2391   while (n_left_from > 0)
2392     {
2393       ip_adjacency_t *adj0;
2394       ip4_header_t *ip0;
2395       u32 rw_len0, adj_index0, error0;
2396       u32 tx_sw_if_index0;
2397
2398       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2399
2400       adj0 = adj_get (adj_index0);
2401
2402       if (do_counters)
2403         vlib_prefetch_combined_counter (&adjacency_counters,
2404                                         thread_index, adj_index0);
2405
2406       ip0 = vlib_buffer_get_current (b[0]);
2407
2408       error0 = IP4_ERROR_NONE;
2409
2410       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2411
2412
2413       /* Update packet buffer attributes/set output interface. */
2414       rw_len0 = adj0[0].rewrite_header.data_bytes;
2415       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2416
2417       /* Check MTU of outgoing interface. */
2418       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2419       if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO))
2420         ip0_len = gso_mtu_sz (b[0]);
2421
2422       ip4_mtu_check (b[0], ip0_len,
2423                      adj0[0].rewrite_header.max_l3_packet_bytes,
2424                      ip0->flags_and_fragment_offset &
2425                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2426                      next + 0, &error0);
2427
2428       if (is_mcast)
2429         {
2430           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2431                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2432                     IP4_ERROR_SAME_INTERFACE : error0);
2433         }
2434
2435       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2436        * to see the IP header */
2437       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2438         {
2439           u32 next_index = adj0[0].rewrite_header.next_index;
2440           b[0]->current_data -= rw_len0;
2441           b[0]->current_length += rw_len0;
2442           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2443           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2444
2445           if (PREDICT_FALSE
2446               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2447             vnet_feature_arc_start (lm->output_feature_arc_index,
2448                                     tx_sw_if_index0, &next_index, b[0]);
2449           next[0] = next_index;
2450         }
2451       else
2452         {
2453           b[0]->error = error_node->errors[error0];
2454         }
2455       if (is_midchain)
2456         {
2457           calc_checksums (vm, b[0]);
2458         }
2459       /* Guess we are only writing on simple Ethernet header. */
2460       vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2461
2462       if (do_counters)
2463         vlib_increment_combined_counter
2464           (&adjacency_counters,
2465            thread_index, adj_index0, 1,
2466            vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2467
2468       if (is_midchain)
2469         {
2470           if (adj0->sub_type.midchain.fixup_func)
2471             adj0->sub_type.midchain.fixup_func
2472               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2473         }
2474
2475       if (is_mcast)
2476         {
2477           /*
2478            * copy bytes from the IP address into the MAC rewrite
2479            */
2480           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2481                                       adj0->rewrite_header.dst_mcast_offset,
2482                                       &ip0->dst_address.as_u32, (u8 *) ip0);
2483         }
2484
2485       next += 1;
2486       b += 1;
2487       n_left_from -= 1;
2488     }
2489
2490
2491   /* Need to do trace after rewrites to pick up new packet data. */
2492   if (node->flags & VLIB_NODE_FLAG_TRACE)
2493     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2494
2495   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2496   return frame->n_vectors;
2497 }
2498
2499 always_inline uword
2500 ip4_rewrite_inline (vlib_main_t * vm,
2501                     vlib_node_runtime_t * node,
2502                     vlib_frame_t * frame,
2503                     int do_counters, int is_midchain, int is_mcast)
2504 {
2505   vnet_main_t *vnm = vnet_get_main ();
2506   if (PREDICT_FALSE (vnm->interface_main.gso_interface_count > 0))
2507     return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2508                                         is_midchain, is_mcast,
2509                                         1 /* do_gso */ );
2510   else
2511     return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2512                                         is_midchain, is_mcast,
2513                                         0 /* no do_gso */ );
2514 }
2515
2516
2517 /** @brief IPv4 rewrite node.
2518     @node ip4-rewrite
2519
2520     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2521     header checksum, fetch the ip adjacency, check the outbound mtu,
2522     apply the adjacency rewrite, and send pkts to the adjacency
2523     rewrite header's rewrite_next_index.
2524
2525     @param vm vlib_main_t corresponding to the current thread
2526     @param node vlib_node_runtime_t
2527     @param frame vlib_frame_t whose contents should be dispatched
2528
2529     @par Graph mechanics: buffer metadata, next index usage
2530
2531     @em Uses:
2532     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2533         - the rewrite adjacency index
2534     - <code>adj->lookup_next_index</code>
2535         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2536           the packet will be dropped.
2537     - <code>adj->rewrite_header</code>
2538         - Rewrite string length, rewrite string, next_index
2539
2540     @em Sets:
2541     - <code>b->current_data, b->current_length</code>
2542         - Updated net of applying the rewrite string
2543
2544     <em>Next Indices:</em>
2545     - <code> adj->rewrite_header.next_index </code>
2546       or @c ip4-drop
2547 */
2548
2549 VLIB_NODE_FN (ip4_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2550                                  vlib_frame_t * frame)
2551 {
2552   if (adj_are_counters_enabled ())
2553     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2554   else
2555     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2556 }
2557
2558 VLIB_NODE_FN (ip4_rewrite_bcast_node) (vlib_main_t * vm,
2559                                        vlib_node_runtime_t * node,
2560                                        vlib_frame_t * frame)
2561 {
2562   if (adj_are_counters_enabled ())
2563     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2564   else
2565     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2566 }
2567
2568 VLIB_NODE_FN (ip4_midchain_node) (vlib_main_t * vm,
2569                                   vlib_node_runtime_t * node,
2570                                   vlib_frame_t * frame)
2571 {
2572   if (adj_are_counters_enabled ())
2573     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2574   else
2575     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2576 }
2577
2578 VLIB_NODE_FN (ip4_rewrite_mcast_node) (vlib_main_t * vm,
2579                                        vlib_node_runtime_t * node,
2580                                        vlib_frame_t * frame)
2581 {
2582   if (adj_are_counters_enabled ())
2583     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2584   else
2585     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2586 }
2587
2588 VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
2589                                         vlib_node_runtime_t * node,
2590                                         vlib_frame_t * frame)
2591 {
2592   if (adj_are_counters_enabled ())
2593     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2594   else
2595     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2596 }
2597
2598 /* *INDENT-OFF* */
2599 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2600   .name = "ip4-rewrite",
2601   .vector_size = sizeof (u32),
2602
2603   .format_trace = format_ip4_rewrite_trace,
2604
2605   .n_next_nodes = IP4_REWRITE_N_NEXT,
2606   .next_nodes = {
2607     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2608     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2609     [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2610   },
2611 };
2612
2613 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2614   .name = "ip4-rewrite-bcast",
2615   .vector_size = sizeof (u32),
2616
2617   .format_trace = format_ip4_rewrite_trace,
2618   .sibling_of = "ip4-rewrite",
2619 };
2620
2621 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2622   .name = "ip4-rewrite-mcast",
2623   .vector_size = sizeof (u32),
2624
2625   .format_trace = format_ip4_rewrite_trace,
2626   .sibling_of = "ip4-rewrite",
2627 };
2628
2629 VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
2630   .name = "ip4-mcast-midchain",
2631   .vector_size = sizeof (u32),
2632
2633   .format_trace = format_ip4_rewrite_trace,
2634   .sibling_of = "ip4-rewrite",
2635 };
2636
2637 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2638   .name = "ip4-midchain",
2639   .vector_size = sizeof (u32),
2640   .format_trace = format_ip4_forward_next_trace,
2641   .sibling_of =  "ip4-rewrite",
2642 };
2643 /* *INDENT-ON */
2644
2645 static int
2646 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2647 {
2648   ip4_fib_mtrie_t *mtrie0;
2649   ip4_fib_mtrie_leaf_t leaf0;
2650   u32 lbi0;
2651
2652   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2653
2654   leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2655   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2656   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2657
2658   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2659
2660   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2661 }
2662
2663 static clib_error_t *
2664 test_lookup_command_fn (vlib_main_t * vm,
2665                         unformat_input_t * input, vlib_cli_command_t * cmd)
2666 {
2667   ip4_fib_t *fib;
2668   u32 table_id = 0;
2669   f64 count = 1;
2670   u32 n;
2671   int i;
2672   ip4_address_t ip4_base_address;
2673   u64 errors = 0;
2674
2675   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2676     {
2677       if (unformat (input, "table %d", &table_id))
2678         {
2679           /* Make sure the entry exists. */
2680           fib = ip4_fib_get (table_id);
2681           if ((fib) && (fib->index != table_id))
2682             return clib_error_return (0, "<fib-index> %d does not exist",
2683                                       table_id);
2684         }
2685       else if (unformat (input, "count %f", &count))
2686         ;
2687
2688       else if (unformat (input, "%U",
2689                          unformat_ip4_address, &ip4_base_address))
2690         ;
2691       else
2692         return clib_error_return (0, "unknown input `%U'",
2693                                   format_unformat_error, input);
2694     }
2695
2696   n = count;
2697
2698   for (i = 0; i < n; i++)
2699     {
2700       if (!ip4_lookup_validate (&ip4_base_address, table_id))
2701         errors++;
2702
2703       ip4_base_address.as_u32 =
2704         clib_host_to_net_u32 (1 +
2705                               clib_net_to_host_u32 (ip4_base_address.as_u32));
2706     }
2707
2708   if (errors)
2709     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2710   else
2711     vlib_cli_output (vm, "No errors in %d lookups\n", n);
2712
2713   return 0;
2714 }
2715
2716 /*?
2717  * Perform a lookup of an IPv4 Address (or range of addresses) in the
2718  * given FIB table to determine if there is a conflict with the
2719  * adjacency table. The fib-id can be determined by using the
2720  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2721  * of 0 is used.
2722  *
2723  * @todo This command uses fib-id, other commands use table-id (not
2724  * just a name, they are different indexes). Would like to change this
2725  * to table-id for consistency.
2726  *
2727  * @cliexpar
2728  * Example of how to run the test lookup command:
2729  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2730  * No errors in 2 lookups
2731  * @cliexend
2732 ?*/
2733 /* *INDENT-OFF* */
2734 VLIB_CLI_COMMAND (lookup_test_command, static) =
2735 {
2736   .path = "test lookup",
2737   .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2738   .function = test_lookup_command_fn,
2739 };
2740 /* *INDENT-ON* */
2741
2742 #ifndef CLIB_MARCH_VARIANT
2743 int
2744 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2745 {
2746   u32 fib_index;
2747
2748   fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
2749
2750   if (~0 == fib_index)
2751     return VNET_API_ERROR_NO_SUCH_FIB;
2752
2753   fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
2754                                   flow_hash_config);
2755
2756   return 0;
2757 }
2758 #endif
2759
2760 static clib_error_t *
2761 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2762                              unformat_input_t * input,
2763                              vlib_cli_command_t * cmd)
2764 {
2765   int matched = 0;
2766   u32 table_id = 0;
2767   u32 flow_hash_config = 0;
2768   int rv;
2769
2770   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2771     {
2772       if (unformat (input, "table %d", &table_id))
2773         matched = 1;
2774 #define _(a,v) \
2775     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2776       foreach_flow_hash_bit
2777 #undef _
2778         else
2779         break;
2780     }
2781
2782   if (matched == 0)
2783     return clib_error_return (0, "unknown input `%U'",
2784                               format_unformat_error, input);
2785
2786   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
2787   switch (rv)
2788     {
2789     case 0:
2790       break;
2791
2792     case VNET_API_ERROR_NO_SUCH_FIB:
2793       return clib_error_return (0, "no such FIB table %d", table_id);
2794
2795     default:
2796       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2797       break;
2798     }
2799
2800   return 0;
2801 }
2802
2803 /*?
2804  * Configure the set of IPv4 fields used by the flow hash.
2805  *
2806  * @cliexpar
2807  * Example of how to set the flow hash on a given table:
2808  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2809  * Example of display the configured flow hash:
2810  * @cliexstart{show ip fib}
2811  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2812  * 0.0.0.0/0
2813  *   unicast-ip4-chain
2814  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2815  *     [0] [@0]: dpo-drop ip6
2816  * 0.0.0.0/32
2817  *   unicast-ip4-chain
2818  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2819  *     [0] [@0]: dpo-drop ip6
2820  * 224.0.0.0/8
2821  *   unicast-ip4-chain
2822  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2823  *     [0] [@0]: dpo-drop ip6
2824  * 6.0.1.2/32
2825  *   unicast-ip4-chain
2826  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2827  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2828  * 7.0.0.1/32
2829  *   unicast-ip4-chain
2830  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
2831  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2832  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2833  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2834  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2835  * 240.0.0.0/8
2836  *   unicast-ip4-chain
2837  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
2838  *     [0] [@0]: dpo-drop ip6
2839  * 255.255.255.255/32
2840  *   unicast-ip4-chain
2841  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
2842  *     [0] [@0]: dpo-drop ip6
2843  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
2844  * 0.0.0.0/0
2845  *   unicast-ip4-chain
2846  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
2847  *     [0] [@0]: dpo-drop ip6
2848  * 0.0.0.0/32
2849  *   unicast-ip4-chain
2850  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
2851  *     [0] [@0]: dpo-drop ip6
2852  * 172.16.1.0/24
2853  *   unicast-ip4-chain
2854  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
2855  *     [0] [@4]: ipv4-glean: af_packet0
2856  * 172.16.1.1/32
2857  *   unicast-ip4-chain
2858  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
2859  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
2860  * 172.16.1.2/32
2861  *   unicast-ip4-chain
2862  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2863  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
2864  * 172.16.2.0/24
2865  *   unicast-ip4-chain
2866  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
2867  *     [0] [@4]: ipv4-glean: af_packet1
2868  * 172.16.2.1/32
2869  *   unicast-ip4-chain
2870  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
2871  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
2872  * 224.0.0.0/8
2873  *   unicast-ip4-chain
2874  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
2875  *     [0] [@0]: dpo-drop ip6
2876  * 240.0.0.0/8
2877  *   unicast-ip4-chain
2878  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
2879  *     [0] [@0]: dpo-drop ip6
2880  * 255.255.255.255/32
2881  *   unicast-ip4-chain
2882  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
2883  *     [0] [@0]: dpo-drop ip6
2884  * @cliexend
2885 ?*/
2886 /* *INDENT-OFF* */
2887 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
2888 {
2889   .path = "set ip flow-hash",
2890   .short_help =
2891   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
2892   .function = set_ip_flow_hash_command_fn,
2893 };
2894 /* *INDENT-ON* */
2895
2896 #ifndef CLIB_MARCH_VARIANT
2897 int
2898 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2899                              u32 table_index)
2900 {
2901   vnet_main_t *vnm = vnet_get_main ();
2902   vnet_interface_main_t *im = &vnm->interface_main;
2903   ip4_main_t *ipm = &ip4_main;
2904   ip_lookup_main_t *lm = &ipm->lookup_main;
2905   vnet_classify_main_t *cm = &vnet_classify_main;
2906   ip4_address_t *if_addr;
2907
2908   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2909     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2910
2911   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2912     return VNET_API_ERROR_NO_SUCH_ENTRY;
2913
2914   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2915   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
2916
2917   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
2918
2919   if (NULL != if_addr)
2920     {
2921       fib_prefix_t pfx = {
2922         .fp_len = 32,
2923         .fp_proto = FIB_PROTOCOL_IP4,
2924         .fp_addr.ip4 = *if_addr,
2925       };
2926       u32 fib_index;
2927
2928       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2929                                                        sw_if_index);
2930
2931
2932       if (table_index != (u32) ~ 0)
2933         {
2934           dpo_id_t dpo = DPO_INVALID;
2935
2936           dpo_set (&dpo,
2937                    DPO_CLASSIFY,
2938                    DPO_PROTO_IP4,
2939                    classify_dpo_create (DPO_PROTO_IP4, table_index));
2940
2941           fib_table_entry_special_dpo_add (fib_index,
2942                                            &pfx,
2943                                            FIB_SOURCE_CLASSIFY,
2944                                            FIB_ENTRY_FLAG_NONE, &dpo);
2945           dpo_reset (&dpo);
2946         }
2947       else
2948         {
2949           fib_table_entry_special_remove (fib_index,
2950                                           &pfx, FIB_SOURCE_CLASSIFY);
2951         }
2952     }
2953
2954   return 0;
2955 }
2956 #endif
2957
2958 static clib_error_t *
2959 set_ip_classify_command_fn (vlib_main_t * vm,
2960                             unformat_input_t * input,
2961                             vlib_cli_command_t * cmd)
2962 {
2963   u32 table_index = ~0;
2964   int table_index_set = 0;
2965   u32 sw_if_index = ~0;
2966   int rv;
2967
2968   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2969     {
2970       if (unformat (input, "table-index %d", &table_index))
2971         table_index_set = 1;
2972       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
2973                          vnet_get_main (), &sw_if_index))
2974         ;
2975       else
2976         break;
2977     }
2978
2979   if (table_index_set == 0)
2980     return clib_error_return (0, "classify table-index must be specified");
2981
2982   if (sw_if_index == ~0)
2983     return clib_error_return (0, "interface / subif must be specified");
2984
2985   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
2986
2987   switch (rv)
2988     {
2989     case 0:
2990       break;
2991
2992     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
2993       return clib_error_return (0, "No such interface");
2994
2995     case VNET_API_ERROR_NO_SUCH_ENTRY:
2996       return clib_error_return (0, "No such classifier table");
2997     }
2998   return 0;
2999 }
3000
3001 /*?
3002  * Assign a classification table to an interface. The classification
3003  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3004  * commands. Once the table is create, use this command to filter packets
3005  * on an interface.
3006  *
3007  * @cliexpar
3008  * Example of how to assign a classification table to an interface:
3009  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3010 ?*/
3011 /* *INDENT-OFF* */
3012 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
3013 {
3014     .path = "set ip classify",
3015     .short_help =
3016     "set ip classify intfc <interface> table-index <classify-idx>",
3017     .function = set_ip_classify_command_fn,
3018 };
3019 /* *INDENT-ON* */
3020
3021 static clib_error_t *
3022 ip4_config (vlib_main_t * vm, unformat_input_t * input)
3023 {
3024   ip4_main_t *im = &ip4_main;
3025   uword heapsize = 0;
3026
3027   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3028     {
3029       if (unformat (input, "heap-size %U", unformat_memory_size, &heapsize))
3030         ;
3031       else
3032         return clib_error_return (0,
3033                                   "invalid heap-size parameter `%U'",
3034                                   format_unformat_error, input);
3035     }
3036
3037   im->mtrie_heap_size = heapsize;
3038
3039   return 0;
3040 }
3041
3042 VLIB_EARLY_CONFIG_FUNCTION (ip4_config, "ip");
3043
3044 /*
3045  * fd.io coding-style-patch-verification: ON
3046  *
3047  * Local Variables:
3048  * eval: (c-set-style "gnu")
3049  * End:
3050  */