ip: ip-lookup - remove the dead code
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h>     /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/dpo/load_balance.h>
53 #include <vnet/dpo/load_balance_map.h>
54 #include <vnet/dpo/classify_dpo.h>
55 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
56
57 #include <vnet/ip/ip4_forward.h>
58 #include <vnet/interface_output.h>
59
60 /** @brief IPv4 lookup node.
61     @node ip4-lookup
62
63     This is the main IPv4 lookup dispatch node.
64
65     @param vm vlib_main_t corresponding to the current thread
66     @param node vlib_node_runtime_t
67     @param frame vlib_frame_t whose contents should be dispatched
68
69     @par Graph mechanics: buffer metadata, next index usage
70
71     @em Uses:
72     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
73         - Indicates the @c sw_if_index value of the interface that the
74           packet was received on.
75     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
76         - When the value is @c ~0 then the node performs a longest prefix
77           match (LPM) for the packet destination address in the FIB attached
78           to the receive interface.
79         - Otherwise perform LPM for the packet destination address in the
80           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
81           value (0, 1, ...) and not a VRF id.
82
83     @em Sets:
84     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
85         - The lookup result adjacency index.
86
87     <em>Next Index:</em>
88     - Dispatches the packet to the node index found in
89       ip_adjacency_t @c adj->lookup_next_index
90       (where @c adj is the lookup result adjacency).
91 */
92 VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
93                                 vlib_frame_t * frame)
94 {
95   return ip4_lookup_inline (vm, node, frame);
96 }
97
98 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
99
100 /* *INDENT-OFF* */
101 VLIB_REGISTER_NODE (ip4_lookup_node) =
102 {
103   .name = "ip4-lookup",
104   .vector_size = sizeof (u32),
105   .format_trace = format_ip4_lookup_trace,
106   .n_next_nodes = IP_LOOKUP_N_NEXT,
107   .next_nodes = IP4_LOOKUP_NEXT_NODES,
108 };
109 /* *INDENT-ON* */
110
111 VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
112                                       vlib_node_runtime_t * node,
113                                       vlib_frame_t * frame)
114 {
115   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
116   u32 n_left, *from;
117   u32 thread_index = vm->thread_index;
118   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
119   u16 nexts[VLIB_FRAME_SIZE], *next;
120
121   from = vlib_frame_vector_args (frame);
122   n_left = frame->n_vectors;
123   next = nexts;
124
125   vlib_get_buffers (vm, from, bufs, n_left);
126
127   while (n_left >= 4)
128     {
129       const load_balance_t *lb0, *lb1;
130       const ip4_header_t *ip0, *ip1;
131       u32 lbi0, hc0, lbi1, hc1;
132       const dpo_id_t *dpo0, *dpo1;
133
134       /* Prefetch next iteration. */
135       {
136         vlib_prefetch_buffer_header (b[2], LOAD);
137         vlib_prefetch_buffer_header (b[3], LOAD);
138
139         CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), LOAD);
140         CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), LOAD);
141       }
142
143       ip0 = vlib_buffer_get_current (b[0]);
144       ip1 = vlib_buffer_get_current (b[1]);
145       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
146       lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
147
148       lb0 = load_balance_get (lbi0);
149       lb1 = load_balance_get (lbi1);
150
151       /*
152        * this node is for via FIBs we can re-use the hash value from the
153        * to node if present.
154        * We don't want to use the same hash value at each level in the recursion
155        * graph as that would lead to polarisation
156        */
157       hc0 = hc1 = 0;
158
159       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
160         {
161           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
162             {
163               hc0 = vnet_buffer (b[0])->ip.flow_hash =
164                 vnet_buffer (b[0])->ip.flow_hash >> 1;
165             }
166           else
167             {
168               hc0 = vnet_buffer (b[0])->ip.flow_hash =
169                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
170             }
171           dpo0 = load_balance_get_fwd_bucket
172             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
173         }
174       else
175         {
176           dpo0 = load_balance_get_bucket_i (lb0, 0);
177         }
178       if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
179         {
180           if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
181             {
182               hc1 = vnet_buffer (b[1])->ip.flow_hash =
183                 vnet_buffer (b[1])->ip.flow_hash >> 1;
184             }
185           else
186             {
187               hc1 = vnet_buffer (b[1])->ip.flow_hash =
188                 ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
189             }
190           dpo1 = load_balance_get_fwd_bucket
191             (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
192         }
193       else
194         {
195           dpo1 = load_balance_get_bucket_i (lb1, 0);
196         }
197
198       next[0] = dpo0->dpoi_next_node;
199       next[1] = dpo1->dpoi_next_node;
200
201       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
202       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
203
204       vlib_increment_combined_counter
205         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
206       vlib_increment_combined_counter
207         (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
208
209       b += 2;
210       next += 2;
211       n_left -= 2;
212     }
213
214   while (n_left > 0)
215     {
216       const load_balance_t *lb0;
217       const ip4_header_t *ip0;
218       const dpo_id_t *dpo0;
219       u32 lbi0, hc0;
220
221       ip0 = vlib_buffer_get_current (b[0]);
222       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
223
224       lb0 = load_balance_get (lbi0);
225
226       hc0 = 0;
227       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
228         {
229           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
230             {
231               hc0 = vnet_buffer (b[0])->ip.flow_hash =
232                 vnet_buffer (b[0])->ip.flow_hash >> 1;
233             }
234           else
235             {
236               hc0 = vnet_buffer (b[0])->ip.flow_hash =
237                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
238             }
239           dpo0 = load_balance_get_fwd_bucket
240             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
241         }
242       else
243         {
244           dpo0 = load_balance_get_bucket_i (lb0, 0);
245         }
246
247       next[0] = dpo0->dpoi_next_node;
248       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
249
250       vlib_increment_combined_counter
251         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
252
253       b += 1;
254       next += 1;
255       n_left -= 1;
256     }
257
258   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
259   if (node->flags & VLIB_NODE_FLAG_TRACE)
260     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
261
262   return frame->n_vectors;
263 }
264
265 /* *INDENT-OFF* */
266 VLIB_REGISTER_NODE (ip4_load_balance_node) =
267 {
268   .name = "ip4-load-balance",
269   .vector_size = sizeof (u32),
270   .sibling_of = "ip4-lookup",
271   .format_trace = format_ip4_lookup_trace,
272 };
273 /* *INDENT-ON* */
274
275 #ifndef CLIB_MARCH_VARIANT
276 /* get first interface address */
277 ip4_address_t *
278 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
279                              ip_interface_address_t ** result_ia)
280 {
281   ip_lookup_main_t *lm = &im->lookup_main;
282   ip_interface_address_t *ia = 0;
283   ip4_address_t *result = 0;
284
285   /* *INDENT-OFF* */
286   foreach_ip_interface_address
287     (lm, ia, sw_if_index,
288      1 /* honor unnumbered */ ,
289      ({
290        ip4_address_t * a =
291          ip_interface_address_get_address (lm, ia);
292        result = a;
293        break;
294      }));
295   /* *INDENT-OFF* */
296   if (result_ia)
297     *result_ia = result ? ia : 0;
298   return result;
299 }
300
301 static void
302 ip4_add_subnet_bcast_route (u32 fib_index,
303                             fib_prefix_t *pfx,
304                             u32 sw_if_index)
305 {
306   vnet_sw_interface_flags_t iflags;
307
308   iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
309
310   fib_table_entry_special_remove(fib_index,
311                                  pfx,
312                                  FIB_SOURCE_INTERFACE);
313
314   if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
315     {
316       fib_table_entry_update_one_path (fib_index, pfx,
317                                        FIB_SOURCE_INTERFACE,
318                                        FIB_ENTRY_FLAG_NONE,
319                                        DPO_PROTO_IP4,
320                                        /* No next-hop address */
321                                        &ADJ_BCAST_ADDR,
322                                        sw_if_index,
323                                        // invalid FIB index
324                                        ~0,
325                                        1,
326                                        // no out-label stack
327                                        NULL,
328                                        FIB_ROUTE_PATH_FLAG_NONE);
329     }
330   else
331     {
332         fib_table_entry_special_add(fib_index,
333                                     pfx,
334                                     FIB_SOURCE_INTERFACE,
335                                     (FIB_ENTRY_FLAG_DROP |
336                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
337     }
338 }
339
340 static void
341 ip4_add_interface_routes (u32 sw_if_index,
342                           ip4_main_t * im, u32 fib_index,
343                           ip_interface_address_t * a)
344 {
345   ip_lookup_main_t *lm = &im->lookup_main;
346   ip4_address_t *address = ip_interface_address_get_address (lm, a);
347   fib_prefix_t pfx = {
348     .fp_len = a->address_length,
349     .fp_proto = FIB_PROTOCOL_IP4,
350     .fp_addr.ip4 = *address,
351   };
352
353   if (pfx.fp_len <= 30)
354     {
355       /* a /30 or shorter - add a glean for the network address */
356       fib_table_entry_update_one_path (fib_index, &pfx,
357                                        FIB_SOURCE_INTERFACE,
358                                        (FIB_ENTRY_FLAG_CONNECTED |
359                                         FIB_ENTRY_FLAG_ATTACHED),
360                                        DPO_PROTO_IP4,
361                                        /* No next-hop address */
362                                        NULL,
363                                        sw_if_index,
364                                        // invalid FIB index
365                                        ~0,
366                                        1,
367                                        // no out-label stack
368                                        NULL,
369                                        FIB_ROUTE_PATH_FLAG_NONE);
370
371       /* Add the two broadcast addresses as drop */
372       fib_prefix_t net_pfx = {
373         .fp_len = 32,
374         .fp_proto = FIB_PROTOCOL_IP4,
375         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
376       };
377       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
378         fib_table_entry_special_add(fib_index,
379                                     &net_pfx,
380                                     FIB_SOURCE_INTERFACE,
381                                     (FIB_ENTRY_FLAG_DROP |
382                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
383       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
384       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
385         ip4_add_subnet_bcast_route(fib_index, &net_pfx, sw_if_index);
386     }
387   else if (pfx.fp_len == 31)
388     {
389       u32 mask = clib_host_to_net_u32(1);
390       fib_prefix_t net_pfx = pfx;
391
392       net_pfx.fp_len = 32;
393       net_pfx.fp_addr.ip4.as_u32 ^= mask;
394
395       /* a /31 - add the other end as an attached host */
396       fib_table_entry_update_one_path (fib_index, &net_pfx,
397                                        FIB_SOURCE_INTERFACE,
398                                        (FIB_ENTRY_FLAG_ATTACHED),
399                                        DPO_PROTO_IP4,
400                                        &net_pfx.fp_addr,
401                                        sw_if_index,
402                                        // invalid FIB index
403                                        ~0,
404                                        1,
405                                        NULL,
406                                        FIB_ROUTE_PATH_FLAG_NONE);
407     }
408   pfx.fp_len = 32;
409
410   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
411     {
412       u32 classify_table_index =
413         lm->classify_table_index_by_sw_if_index[sw_if_index];
414       if (classify_table_index != (u32) ~ 0)
415         {
416           dpo_id_t dpo = DPO_INVALID;
417
418           dpo_set (&dpo,
419                    DPO_CLASSIFY,
420                    DPO_PROTO_IP4,
421                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
422
423           fib_table_entry_special_dpo_add (fib_index,
424                                            &pfx,
425                                            FIB_SOURCE_CLASSIFY,
426                                            FIB_ENTRY_FLAG_NONE, &dpo);
427           dpo_reset (&dpo);
428         }
429     }
430
431   fib_table_entry_update_one_path (fib_index, &pfx,
432                                    FIB_SOURCE_INTERFACE,
433                                    (FIB_ENTRY_FLAG_CONNECTED |
434                                     FIB_ENTRY_FLAG_LOCAL),
435                                    DPO_PROTO_IP4,
436                                    &pfx.fp_addr,
437                                    sw_if_index,
438                                    // invalid FIB index
439                                    ~0,
440                                    1, NULL,
441                                    FIB_ROUTE_PATH_FLAG_NONE);
442 }
443
444 static void
445 ip4_del_interface_routes (ip4_main_t * im,
446                           u32 fib_index,
447                           ip4_address_t * address, u32 address_length)
448 {
449   fib_prefix_t pfx = {
450     .fp_len = address_length,
451     .fp_proto = FIB_PROTOCOL_IP4,
452     .fp_addr.ip4 = *address,
453   };
454
455   if (pfx.fp_len <= 30)
456     {
457       fib_prefix_t net_pfx = {
458         .fp_len = 32,
459         .fp_proto = FIB_PROTOCOL_IP4,
460         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
461       };
462       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
463         fib_table_entry_special_remove(fib_index,
464                                        &net_pfx,
465                                        FIB_SOURCE_INTERFACE);
466       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
467       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
468         fib_table_entry_special_remove(fib_index,
469                                        &net_pfx,
470                                        FIB_SOURCE_INTERFACE);
471       fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
472     }
473     else if (pfx.fp_len == 31)
474     {
475       u32 mask = clib_host_to_net_u32(1);
476       fib_prefix_t net_pfx = pfx;
477
478       net_pfx.fp_len = 32;
479       net_pfx.fp_addr.ip4.as_u32 ^= mask;
480
481       fib_table_entry_delete (fib_index, &net_pfx, FIB_SOURCE_INTERFACE);
482     }
483
484   pfx.fp_len = 32;
485   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
486 }
487
488 void
489 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
490 {
491   ip4_main_t *im = &ip4_main;
492
493   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
494
495   /*
496    * enable/disable only on the 1<->0 transition
497    */
498   if (is_enable)
499     {
500       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
501         return;
502     }
503   else
504     {
505       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
506       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
507         return;
508     }
509   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
510                                !is_enable, 0, 0);
511
512
513   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
514                                sw_if_index, !is_enable, 0, 0);
515
516   {
517     ip4_enable_disable_interface_callback_t *cb;
518     vec_foreach (cb, im->enable_disable_interface_callbacks)
519       cb->function (im, cb->function_opaque, sw_if_index, is_enable);
520   }
521 }
522
523 static clib_error_t *
524 ip4_add_del_interface_address_internal (vlib_main_t * vm,
525                                         u32 sw_if_index,
526                                         ip4_address_t * address,
527                                         u32 address_length, u32 is_del)
528 {
529   vnet_main_t *vnm = vnet_get_main ();
530   ip4_main_t *im = &ip4_main;
531   ip_lookup_main_t *lm = &im->lookup_main;
532   clib_error_t *error = 0;
533   u32 if_address_index, elts_before;
534   ip4_address_fib_t ip4_af, *addr_fib = 0;
535
536   /* local0 interface doesn't support IP addressing  */
537   if (sw_if_index == 0)
538     {
539       return
540        clib_error_create ("local0 interface doesn't support IP addressing");
541     }
542
543   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
544   ip4_addr_fib_init (&ip4_af, address,
545                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
546   vec_add1 (addr_fib, ip4_af);
547
548   /*
549    * there is no support for adj-fib handling in the presence of overlapping
550    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
551    * most routers do.
552    */
553   /* *INDENT-OFF* */
554   if (!is_del)
555     {
556       /* When adding an address check that it does not conflict
557          with an existing address on any interface in this table. */
558       ip_interface_address_t *ia;
559       vnet_sw_interface_t *sif;
560
561       pool_foreach(sif, vnm->interface_main.sw_interfaces,
562       ({
563           if (im->fib_index_by_sw_if_index[sw_if_index] ==
564               im->fib_index_by_sw_if_index[sif->sw_if_index])
565             {
566               foreach_ip_interface_address
567                 (&im->lookup_main, ia, sif->sw_if_index,
568                  0 /* honor unnumbered */ ,
569                  ({
570                    ip4_address_t * x =
571                      ip_interface_address_get_address
572                      (&im->lookup_main, ia);
573                    if (ip4_destination_matches_route
574                        (im, address, x, ia->address_length) ||
575                        ip4_destination_matches_route (im,
576                                                       x,
577                                                       address,
578                                                       address_length))
579                      {
580                        vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
581
582                        return
583                          clib_error_create
584                          ("failed to add %U which conflicts with %U for interface %U",
585                           format_ip4_address_and_length, address,
586                           address_length,
587                           format_ip4_address_and_length, x,
588                           ia->address_length,
589                           format_vnet_sw_if_index_name, vnm,
590                           sif->sw_if_index);
591                      }
592                  }));
593             }
594       }));
595     }
596   /* *INDENT-ON* */
597
598   elts_before = pool_elts (lm->if_address_pool);
599
600   error = ip_interface_address_add_del
601     (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
602   if (error)
603     goto done;
604
605   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
606
607   if (is_del)
608     ip4_del_interface_routes (im, ip4_af.fib_index, address, address_length);
609   else
610     ip4_add_interface_routes (sw_if_index,
611                               im, ip4_af.fib_index,
612                               pool_elt_at_index
613                               (lm->if_address_pool, if_address_index));
614
615   /* If pool did not grow/shrink: add duplicate address. */
616   if (elts_before != pool_elts (lm->if_address_pool))
617     {
618       ip4_add_del_interface_address_callback_t *cb;
619       vec_foreach (cb, im->add_del_interface_address_callbacks)
620         cb->function (im, cb->function_opaque, sw_if_index,
621                       address, address_length, if_address_index, is_del);
622     }
623
624 done:
625   vec_free (addr_fib);
626   return error;
627 }
628
629 clib_error_t *
630 ip4_add_del_interface_address (vlib_main_t * vm,
631                                u32 sw_if_index,
632                                ip4_address_t * address,
633                                u32 address_length, u32 is_del)
634 {
635   return ip4_add_del_interface_address_internal
636     (vm, sw_if_index, address, address_length, is_del);
637 }
638
639 void
640 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
641 {
642   ip_interface_address_t *ia;
643   ip4_main_t *im;
644
645   im = &ip4_main;
646
647   /*
648    * when directed broadcast is enabled, the subnet braodcast route will forward
649    * packets using an adjacency with a broadcast MAC. otherwise it drops
650    */
651   /* *INDENT-OFF* */
652   foreach_ip_interface_address(&im->lookup_main, ia,
653                                sw_if_index, 0,
654      ({
655        if (ia->address_length <= 30)
656          {
657            ip4_address_t *ipa;
658
659            ipa = ip_interface_address_get_address (&im->lookup_main, ia);
660
661            fib_prefix_t pfx = {
662              .fp_len = 32,
663              .fp_proto = FIB_PROTOCOL_IP4,
664              .fp_addr = {
665                .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
666              },
667            };
668
669            ip4_add_subnet_bcast_route
670              (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
671                                                   sw_if_index),
672               &pfx, sw_if_index);
673          }
674      }));
675   /* *INDENT-ON* */
676 }
677 #endif
678
679 /* Built-in ip4 unicast rx feature path definition */
680 /* *INDENT-OFF* */
681 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
682 {
683   .arc_name = "ip4-unicast",
684   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
685   .last_in_arc = "ip4-lookup",
686   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
687 };
688
689 VNET_FEATURE_INIT (ip4_flow_classify, static) =
690 {
691   .arc_name = "ip4-unicast",
692   .node_name = "ip4-flow-classify",
693   .runs_before = VNET_FEATURES ("ip4-inacl"),
694 };
695
696 VNET_FEATURE_INIT (ip4_inacl, static) =
697 {
698   .arc_name = "ip4-unicast",
699   .node_name = "ip4-inacl",
700   .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
701 };
702
703 VNET_FEATURE_INIT (ip4_source_check_1, static) =
704 {
705   .arc_name = "ip4-unicast",
706   .node_name = "ip4-source-check-via-rx",
707   .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
708 };
709
710 VNET_FEATURE_INIT (ip4_source_check_2, static) =
711 {
712   .arc_name = "ip4-unicast",
713   .node_name = "ip4-source-check-via-any",
714   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
715 };
716
717 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
718 {
719   .arc_name = "ip4-unicast",
720   .node_name = "ip4-source-and-port-range-check-rx",
721   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
722 };
723
724 VNET_FEATURE_INIT (ip4_policer_classify, static) =
725 {
726   .arc_name = "ip4-unicast",
727   .node_name = "ip4-policer-classify",
728   .runs_before = VNET_FEATURES ("ipsec4-input-feature"),
729 };
730
731 VNET_FEATURE_INIT (ip4_ipsec, static) =
732 {
733   .arc_name = "ip4-unicast",
734   .node_name = "ipsec4-input-feature",
735   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
736 };
737
738 VNET_FEATURE_INIT (ip4_vpath, static) =
739 {
740   .arc_name = "ip4-unicast",
741   .node_name = "vpath-input-ip4",
742   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
743 };
744
745 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
746 {
747   .arc_name = "ip4-unicast",
748   .node_name = "ip4-vxlan-bypass",
749   .runs_before = VNET_FEATURES ("ip4-lookup"),
750 };
751
752 VNET_FEATURE_INIT (ip4_not_enabled, static) =
753 {
754   .arc_name = "ip4-unicast",
755   .node_name = "ip4-not-enabled",
756   .runs_before = VNET_FEATURES ("ip4-lookup"),
757 };
758
759 VNET_FEATURE_INIT (ip4_lookup, static) =
760 {
761   .arc_name = "ip4-unicast",
762   .node_name = "ip4-lookup",
763   .runs_before = 0,     /* not before any other features */
764 };
765
766 /* Built-in ip4 multicast rx feature path definition */
767 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
768 {
769   .arc_name = "ip4-multicast",
770   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
771   .last_in_arc = "ip4-mfib-forward-lookup",
772   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
773 };
774
775 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
776 {
777   .arc_name = "ip4-multicast",
778   .node_name = "vpath-input-ip4",
779   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
780 };
781
782 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
783 {
784   .arc_name = "ip4-multicast",
785   .node_name = "ip4-not-enabled",
786   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
787 };
788
789 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
790 {
791   .arc_name = "ip4-multicast",
792   .node_name = "ip4-mfib-forward-lookup",
793   .runs_before = 0,     /* last feature */
794 };
795
796 /* Source and port-range check ip4 tx feature path definition */
797 VNET_FEATURE_ARC_INIT (ip4_output, static) =
798 {
799   .arc_name = "ip4-output",
800   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
801   .last_in_arc = "interface-output",
802   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
803 };
804
805 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
806 {
807   .arc_name = "ip4-output",
808   .node_name = "ip4-source-and-port-range-check-tx",
809   .runs_before = VNET_FEATURES ("ip4-outacl"),
810 };
811
812 VNET_FEATURE_INIT (ip4_outacl, static) =
813 {
814   .arc_name = "ip4-output",
815   .node_name = "ip4-outacl",
816   .runs_before = VNET_FEATURES ("ipsec4-output-feature"),
817 };
818
819 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
820 {
821   .arc_name = "ip4-output",
822   .node_name = "ipsec4-output-feature",
823   .runs_before = VNET_FEATURES ("interface-output"),
824 };
825
826 /* Built-in ip4 tx feature path definition */
827 VNET_FEATURE_INIT (ip4_interface_output, static) =
828 {
829   .arc_name = "ip4-output",
830   .node_name = "interface-output",
831   .runs_before = 0,     /* not before any other features */
832 };
833 /* *INDENT-ON* */
834
835 static clib_error_t *
836 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
837 {
838   ip4_main_t *im = &ip4_main;
839
840   /* Fill in lookup tables with default table (0). */
841   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
842   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
843
844   if (!is_add)
845     {
846       ip4_main_t *im4 = &ip4_main;
847       ip_lookup_main_t *lm4 = &im4->lookup_main;
848       ip_interface_address_t *ia = 0;
849       ip4_address_t *address;
850       vlib_main_t *vm = vlib_get_main ();
851
852       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
853       /* *INDENT-OFF* */
854       foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
855       ({
856         address = ip_interface_address_get_address (lm4, ia);
857         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
858       }));
859       /* *INDENT-ON* */
860     }
861
862   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
863                                is_add, 0, 0);
864
865   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
866                                sw_if_index, is_add, 0, 0);
867
868   return /* no error */ 0;
869 }
870
871 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
872
873 /* Global IP4 main. */
874 #ifndef CLIB_MARCH_VARIANT
875 ip4_main_t ip4_main;
876 #endif /* CLIB_MARCH_VARIANT */
877
878 static clib_error_t *
879 ip4_lookup_init (vlib_main_t * vm)
880 {
881   ip4_main_t *im = &ip4_main;
882   clib_error_t *error;
883   uword i;
884
885   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
886     return error;
887   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
888     return (error);
889   if ((error = vlib_call_init_function (vm, fib_module_init)))
890     return error;
891   if ((error = vlib_call_init_function (vm, mfib_module_init)))
892     return error;
893
894   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
895     {
896       u32 m;
897
898       if (i < 32)
899         m = pow2_mask (i) << (32 - i);
900       else
901         m = ~0;
902       im->fib_masks[i] = clib_host_to_net_u32 (m);
903     }
904
905   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
906
907   /* Create FIB with index 0 and table id of 0. */
908   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
909                                      FIB_SOURCE_DEFAULT_ROUTE);
910   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
911                                       MFIB_SOURCE_DEFAULT_ROUTE);
912
913   {
914     pg_node_t *pn;
915     pn = pg_get_node (ip4_lookup_node.index);
916     pn->unformat_edit = unformat_pg_ip4_header;
917   }
918
919   {
920     ethernet_arp_header_t h;
921
922     clib_memset (&h, 0, sizeof (h));
923
924 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
925 #define _8(f,v) h.f = v;
926     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
927     _16 (l3_type, ETHERNET_TYPE_IP4);
928     _8 (n_l2_address_bytes, 6);
929     _8 (n_l3_address_bytes, 4);
930     _16 (opcode, ETHERNET_ARP_OPCODE_request);
931 #undef _16
932 #undef _8
933
934     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
935                                /* data */ &h,
936                                sizeof (h),
937                                /* alloc chunk size */ 8,
938                                "ip4 arp");
939   }
940
941   return error;
942 }
943
944 VLIB_INIT_FUNCTION (ip4_lookup_init);
945
946 typedef struct
947 {
948   /* Adjacency taken. */
949   u32 dpo_index;
950   u32 flow_hash;
951   u32 fib_index;
952
953   /* Packet data, possibly *after* rewrite. */
954   u8 packet_data[64 - 1 * sizeof (u32)];
955 }
956 ip4_forward_next_trace_t;
957
958 #ifndef CLIB_MARCH_VARIANT
959 u8 *
960 format_ip4_forward_next_trace (u8 * s, va_list * args)
961 {
962   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
963   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
964   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
965   u32 indent = format_get_indent (s);
966   s = format (s, "%U%U",
967               format_white_space, indent,
968               format_ip4_header, t->packet_data, sizeof (t->packet_data));
969   return s;
970 }
971 #endif
972
973 static u8 *
974 format_ip4_lookup_trace (u8 * s, va_list * args)
975 {
976   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
977   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
978   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
979   u32 indent = format_get_indent (s);
980
981   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
982               t->fib_index, t->dpo_index, t->flow_hash);
983   s = format (s, "\n%U%U",
984               format_white_space, indent,
985               format_ip4_header, t->packet_data, sizeof (t->packet_data));
986   return s;
987 }
988
989 static u8 *
990 format_ip4_rewrite_trace (u8 * s, va_list * args)
991 {
992   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
993   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
994   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
995   u32 indent = format_get_indent (s);
996
997   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
998               t->fib_index, t->dpo_index, format_ip_adjacency,
999               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1000   s = format (s, "\n%U%U",
1001               format_white_space, indent,
1002               format_ip_adjacency_packet_data,
1003               t->dpo_index, t->packet_data, sizeof (t->packet_data));
1004   return s;
1005 }
1006
1007 #ifndef CLIB_MARCH_VARIANT
1008 /* Common trace function for all ip4-forward next nodes. */
1009 void
1010 ip4_forward_next_trace (vlib_main_t * vm,
1011                         vlib_node_runtime_t * node,
1012                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1013 {
1014   u32 *from, n_left;
1015   ip4_main_t *im = &ip4_main;
1016
1017   n_left = frame->n_vectors;
1018   from = vlib_frame_vector_args (frame);
1019
1020   while (n_left >= 4)
1021     {
1022       u32 bi0, bi1;
1023       vlib_buffer_t *b0, *b1;
1024       ip4_forward_next_trace_t *t0, *t1;
1025
1026       /* Prefetch next iteration. */
1027       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1028       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1029
1030       bi0 = from[0];
1031       bi1 = from[1];
1032
1033       b0 = vlib_get_buffer (vm, bi0);
1034       b1 = vlib_get_buffer (vm, bi1);
1035
1036       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1037         {
1038           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1039           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1040           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1041           t0->fib_index =
1042             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1043              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1044             vec_elt (im->fib_index_by_sw_if_index,
1045                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1046
1047           clib_memcpy_fast (t0->packet_data,
1048                             vlib_buffer_get_current (b0),
1049                             sizeof (t0->packet_data));
1050         }
1051       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1052         {
1053           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1054           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1055           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1056           t1->fib_index =
1057             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1058              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1059             vec_elt (im->fib_index_by_sw_if_index,
1060                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1061           clib_memcpy_fast (t1->packet_data, vlib_buffer_get_current (b1),
1062                             sizeof (t1->packet_data));
1063         }
1064       from += 2;
1065       n_left -= 2;
1066     }
1067
1068   while (n_left >= 1)
1069     {
1070       u32 bi0;
1071       vlib_buffer_t *b0;
1072       ip4_forward_next_trace_t *t0;
1073
1074       bi0 = from[0];
1075
1076       b0 = vlib_get_buffer (vm, bi0);
1077
1078       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1079         {
1080           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1081           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1082           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1083           t0->fib_index =
1084             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1085              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1086             vec_elt (im->fib_index_by_sw_if_index,
1087                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1088           clib_memcpy_fast (t0->packet_data, vlib_buffer_get_current (b0),
1089                             sizeof (t0->packet_data));
1090         }
1091       from += 1;
1092       n_left -= 1;
1093     }
1094 }
1095
1096 /* Compute TCP/UDP/ICMP4 checksum in software. */
1097 u16
1098 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1099                               ip4_header_t * ip0)
1100 {
1101   ip_csum_t sum0;
1102   u32 ip_header_length, payload_length_host_byte_order;
1103   u32 n_this_buffer, n_bytes_left, n_ip_bytes_this_buffer;
1104   u16 sum16;
1105   void *data_this_buffer;
1106
1107   /* Initialize checksum with ip header. */
1108   ip_header_length = ip4_header_bytes (ip0);
1109   payload_length_host_byte_order =
1110     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1111   sum0 =
1112     clib_host_to_net_u32 (payload_length_host_byte_order +
1113                           (ip0->protocol << 16));
1114
1115   if (BITS (uword) == 32)
1116     {
1117       sum0 =
1118         ip_csum_with_carry (sum0,
1119                             clib_mem_unaligned (&ip0->src_address, u32));
1120       sum0 =
1121         ip_csum_with_carry (sum0,
1122                             clib_mem_unaligned (&ip0->dst_address, u32));
1123     }
1124   else
1125     sum0 =
1126       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1127
1128   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1129   data_this_buffer = (void *) ip0 + ip_header_length;
1130   n_ip_bytes_this_buffer =
1131     p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data);
1132   if (n_this_buffer + ip_header_length > n_ip_bytes_this_buffer)
1133     {
1134       n_this_buffer = n_ip_bytes_this_buffer > ip_header_length ?
1135         n_ip_bytes_this_buffer - ip_header_length : 0;
1136     }
1137   while (1)
1138     {
1139       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1140       n_bytes_left -= n_this_buffer;
1141       if (n_bytes_left == 0)
1142         break;
1143
1144       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1145       p0 = vlib_get_buffer (vm, p0->next_buffer);
1146       data_this_buffer = vlib_buffer_get_current (p0);
1147       n_this_buffer = clib_min (p0->current_length, n_bytes_left);
1148     }
1149
1150   sum16 = ~ip_csum_fold (sum0);
1151
1152   return sum16;
1153 }
1154
1155 u32
1156 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1157 {
1158   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1159   udp_header_t *udp0;
1160   u16 sum16;
1161
1162   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1163           || ip0->protocol == IP_PROTOCOL_UDP);
1164
1165   udp0 = (void *) (ip0 + 1);
1166   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1167     {
1168       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1169                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1170       return p0->flags;
1171     }
1172
1173   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1174
1175   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1176                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1177
1178   return p0->flags;
1179 }
1180 #endif
1181
1182 /* *INDENT-OFF* */
1183 VNET_FEATURE_ARC_INIT (ip4_local) =
1184 {
1185   .arc_name  = "ip4-local",
1186   .start_nodes = VNET_FEATURES ("ip4-local"),
1187   .last_in_arc = "ip4-local-end-of-arc",
1188 };
1189 /* *INDENT-ON* */
1190
1191 static inline void
1192 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1193                             ip4_header_t * ip, u8 is_udp, u8 * error,
1194                             u8 * good_tcp_udp)
1195 {
1196   u32 flags0;
1197   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1198   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1199   if (is_udp)
1200     {
1201       udp_header_t *udp;
1202       u32 ip_len, udp_len;
1203       i32 len_diff;
1204       udp = ip4_next_header (ip);
1205       /* Verify UDP length. */
1206       ip_len = clib_net_to_host_u16 (ip->length);
1207       udp_len = clib_net_to_host_u16 (udp->length);
1208
1209       len_diff = ip_len - udp_len;
1210       *good_tcp_udp &= len_diff >= 0;
1211       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1212     }
1213 }
1214
1215 #define ip4_local_csum_is_offloaded(_b)                                 \
1216     _b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM                         \
1217         || _b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM
1218
1219 #define ip4_local_need_csum_check(is_tcp_udp, _b)                       \
1220     (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED     \
1221         || ip4_local_csum_is_offloaded (_b)))
1222
1223 #define ip4_local_csum_is_valid(_b)                                     \
1224     (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT                      \
1225         || (ip4_local_csum_is_offloaded (_b))) != 0
1226
1227 static inline void
1228 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1229                          ip4_header_t * ih, u8 * error)
1230 {
1231   u8 is_udp, is_tcp_udp, good_tcp_udp;
1232
1233   is_udp = ih->protocol == IP_PROTOCOL_UDP;
1234   is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1235
1236   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1237     ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1238   else
1239     good_tcp_udp = ip4_local_csum_is_valid (b);
1240
1241   ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1242   *error = (is_tcp_udp && !good_tcp_udp
1243             ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1244 }
1245
1246 static inline void
1247 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1248                             ip4_header_t ** ih, u8 * error)
1249 {
1250   u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1251
1252   is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1253   is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1254
1255   is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1256   is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1257
1258   good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1259   good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1260
1261   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1262                      || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1263     {
1264       if (is_tcp_udp[0])
1265         ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1266                                     &good_tcp_udp[0]);
1267       if (is_tcp_udp[1])
1268         ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1269                                     &good_tcp_udp[1]);
1270     }
1271
1272   error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1273               IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1274   error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1275               IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1276 }
1277
1278 static inline void
1279 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1280                               vlib_buffer_t * b, u16 * next, u8 error,
1281                               u8 head_of_feature_arc)
1282 {
1283   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1284   u32 next_index;
1285
1286   *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1287   b->error = error ? error_node->errors[error] : 0;
1288   if (head_of_feature_arc)
1289     {
1290       next_index = *next;
1291       if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1292         {
1293           vnet_feature_arc_start (arc_index,
1294                                   vnet_buffer (b)->sw_if_index[VLIB_RX],
1295                                   &next_index, b);
1296           *next = next_index;
1297         }
1298     }
1299 }
1300
1301 typedef struct
1302 {
1303   ip4_address_t src;
1304   u32 lbi;
1305   u8 error;
1306   u8 first;
1307 } ip4_local_last_check_t;
1308
1309 static inline void
1310 ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
1311                      ip4_local_last_check_t * last_check, u8 * error0)
1312 {
1313   ip4_fib_mtrie_leaf_t leaf0;
1314   ip4_fib_mtrie_t *mtrie0;
1315   const dpo_id_t *dpo0;
1316   load_balance_t *lb0;
1317   u32 lbi0;
1318
1319   vnet_buffer (b)->ip.fib_index =
1320     vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1321     vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1322
1323   if (PREDICT_FALSE (last_check->first ||
1324                      (last_check->src.as_u32 != ip0->src_address.as_u32)))
1325     {
1326       mtrie0 = &ip4_fib_get (vnet_buffer (b)->ip.fib_index)->mtrie;
1327       leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1328       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1329       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1330       lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1331
1332       vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1333       vnet_buffer (b)->ip.adj_index[VLIB_RX] = lbi0;
1334
1335       lb0 = load_balance_get (lbi0);
1336       dpo0 = load_balance_get_bucket_i (lb0, 0);
1337
1338       /*
1339        * Must have a route to source otherwise we drop the packet.
1340        * ip4 broadcasts are accepted, e.g. to make dhcp client work
1341        *
1342        * The checks are:
1343        *  - the source is a recieve => it's from us => bogus, do this
1344        *    first since it sets a different error code.
1345        *  - uRPF check for any route to source - accept if passes.
1346        *  - allow packets destined to the broadcast address from unknown sources
1347        */
1348
1349       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1350                   && dpo0->dpoi_type == DPO_RECEIVE) ?
1351                  IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1352       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1353                   && !fib_urpf_check_size (lb0->lb_urpf)
1354                   && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1355                  IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1356
1357       last_check->src.as_u32 = ip0->src_address.as_u32;
1358       last_check->lbi = lbi0;
1359       last_check->error = *error0;
1360     }
1361   else
1362     {
1363       vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1364       vnet_buffer (b)->ip.adj_index[VLIB_RX] = last_check->lbi;
1365       *error0 = last_check->error;
1366       last_check->first = 0;
1367     }
1368 }
1369
1370 static inline void
1371 ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
1372                         ip4_local_last_check_t * last_check, u8 * error)
1373 {
1374   ip4_fib_mtrie_leaf_t leaf[2];
1375   ip4_fib_mtrie_t *mtrie[2];
1376   const dpo_id_t *dpo[2];
1377   load_balance_t *lb[2];
1378   u32 not_last_hit;
1379   u32 lbi[2];
1380
1381   not_last_hit = last_check->first;
1382   not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1383   not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1384
1385   vnet_buffer (b[0])->ip.fib_index =
1386     vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1387     vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1388     vnet_buffer (b[0])->ip.fib_index;
1389
1390   vnet_buffer (b[1])->ip.fib_index =
1391     vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1392     vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1393     vnet_buffer (b[1])->ip.fib_index;
1394
1395   if (PREDICT_FALSE (not_last_hit))
1396     {
1397       mtrie[0] = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie;
1398       mtrie[1] = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie;
1399
1400       leaf[0] = ip4_fib_mtrie_lookup_step_one (mtrie[0], &ip[0]->src_address);
1401       leaf[1] = ip4_fib_mtrie_lookup_step_one (mtrie[1], &ip[1]->src_address);
1402
1403       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1404                                            &ip[0]->src_address, 2);
1405       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1406                                            &ip[1]->src_address, 2);
1407
1408       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1409                                            &ip[0]->src_address, 3);
1410       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1411                                            &ip[1]->src_address, 3);
1412
1413       lbi[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf[0]);
1414       lbi[1] = ip4_fib_mtrie_leaf_get_adj_index (leaf[1]);
1415
1416       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1417       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] = lbi[0];
1418
1419       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1420       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] = lbi[1];
1421
1422       lb[0] = load_balance_get (lbi[0]);
1423       lb[1] = load_balance_get (lbi[1]);
1424
1425       dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1426       dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1427
1428       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1429                    dpo[0]->dpoi_type == DPO_RECEIVE) ?
1430                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1431       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1432                    !fib_urpf_check_size (lb[0]->lb_urpf) &&
1433                    ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1434                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1435
1436       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1437                    dpo[1]->dpoi_type == DPO_RECEIVE) ?
1438                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1439       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1440                    !fib_urpf_check_size (lb[1]->lb_urpf) &&
1441                    ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1442                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1443
1444       last_check->src.as_u32 = ip[1]->src_address.as_u32;
1445       last_check->lbi = lbi[1];
1446       last_check->error = error[1];
1447     }
1448   else
1449     {
1450       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1451       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] = last_check->lbi;
1452
1453       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1454       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] = last_check->lbi;
1455
1456       error[0] = last_check->error;
1457       error[1] = last_check->error;
1458       last_check->first = 0;
1459     }
1460 }
1461
1462 enum ip_local_packet_type_e
1463 {
1464   IP_LOCAL_PACKET_TYPE_L4,
1465   IP_LOCAL_PACKET_TYPE_NAT,
1466   IP_LOCAL_PACKET_TYPE_FRAG,
1467 };
1468
1469 /**
1470  * Determine packet type and next node.
1471  *
1472  * The expectation is that all packets that are not L4 will skip
1473  * checksums and source checks.
1474  */
1475 always_inline u8
1476 ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
1477 {
1478   ip_lookup_main_t *lm = &ip4_main.lookup_main;
1479
1480   if (PREDICT_FALSE (ip4_is_fragment (ip)))
1481     {
1482       *next = IP_LOCAL_NEXT_REASSEMBLY;
1483       return IP_LOCAL_PACKET_TYPE_FRAG;
1484     }
1485   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_IS_NATED))
1486     {
1487       *next = lm->local_next_by_ip_protocol[ip->protocol];
1488       return IP_LOCAL_PACKET_TYPE_NAT;
1489     }
1490
1491   *next = lm->local_next_by_ip_protocol[ip->protocol];
1492   return IP_LOCAL_PACKET_TYPE_L4;
1493 }
1494
1495 static inline uword
1496 ip4_local_inline (vlib_main_t * vm,
1497                   vlib_node_runtime_t * node,
1498                   vlib_frame_t * frame, int head_of_feature_arc)
1499 {
1500   u32 *from, n_left_from;
1501   vlib_node_runtime_t *error_node =
1502     vlib_node_get_runtime (vm, ip4_input_node.index);
1503   u16 nexts[VLIB_FRAME_SIZE], *next;
1504   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1505   ip4_header_t *ip[2];
1506   u8 error[2], pt[2];
1507
1508   ip4_local_last_check_t last_check = {
1509     /*
1510      * 0.0.0.0 can appear as the source address of an IP packet,
1511      * as can any other address, hence the need to use the 'first'
1512      * member to make sure the .lbi is initialised for the first
1513      * packet.
1514      */
1515     .src = {.as_u32 = 0},
1516     .lbi = ~0,
1517     .error = IP4_ERROR_UNKNOWN_PROTOCOL,
1518     .first = 1,
1519   };
1520
1521   from = vlib_frame_vector_args (frame);
1522   n_left_from = frame->n_vectors;
1523
1524   if (node->flags & VLIB_NODE_FLAG_TRACE)
1525     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1526
1527   vlib_get_buffers (vm, from, bufs, n_left_from);
1528   b = bufs;
1529   next = nexts;
1530
1531   while (n_left_from >= 6)
1532     {
1533       u8 not_batch = 0;
1534
1535       /* Prefetch next iteration. */
1536       {
1537         vlib_prefetch_buffer_header (b[4], LOAD);
1538         vlib_prefetch_buffer_header (b[5], LOAD);
1539
1540         CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1541         CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1542       }
1543
1544       error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1545
1546       ip[0] = vlib_buffer_get_current (b[0]);
1547       ip[1] = vlib_buffer_get_current (b[1]);
1548
1549       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1550       vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1551
1552       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1553       pt[1] = ip4_local_classify (b[1], ip[1], &next[1]);
1554
1555       not_batch = pt[0] ^ pt[1];
1556
1557       if (head_of_feature_arc == 0 || (pt[0] && not_batch == 0))
1558         goto skip_checks;
1559
1560       if (PREDICT_TRUE (not_batch == 0))
1561         {
1562           ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1563           ip4_local_check_src_x2 (b, ip, &last_check, error);
1564         }
1565       else
1566         {
1567           if (!pt[0])
1568             {
1569               ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1570               ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1571             }
1572           if (!pt[1])
1573             {
1574               ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1575               ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
1576             }
1577         }
1578
1579     skip_checks:
1580
1581       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1582                                     head_of_feature_arc);
1583       ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1584                                     head_of_feature_arc);
1585
1586       b += 2;
1587       next += 2;
1588       n_left_from -= 2;
1589     }
1590
1591   while (n_left_from > 0)
1592     {
1593       error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1594
1595       ip[0] = vlib_buffer_get_current (b[0]);
1596       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1597       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1598
1599       if (head_of_feature_arc == 0 || pt[0])
1600         goto skip_check;
1601
1602       ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1603       ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1604
1605     skip_check:
1606
1607       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1608                                     head_of_feature_arc);
1609
1610       b += 1;
1611       next += 1;
1612       n_left_from -= 1;
1613     }
1614
1615   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1616   return frame->n_vectors;
1617 }
1618
1619 VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1620                                vlib_frame_t * frame)
1621 {
1622   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1623 }
1624
1625 /* *INDENT-OFF* */
1626 VLIB_REGISTER_NODE (ip4_local_node) =
1627 {
1628   .name = "ip4-local",
1629   .vector_size = sizeof (u32),
1630   .format_trace = format_ip4_forward_next_trace,
1631   .n_next_nodes = IP_LOCAL_N_NEXT,
1632   .next_nodes =
1633   {
1634     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1635     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1636     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1637     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1638     [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-reassembly",
1639   },
1640 };
1641 /* *INDENT-ON* */
1642
1643
1644 VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
1645                                           vlib_node_runtime_t * node,
1646                                           vlib_frame_t * frame)
1647 {
1648   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1649 }
1650
1651 /* *INDENT-OFF* */
1652 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
1653   .name = "ip4-local-end-of-arc",
1654   .vector_size = sizeof (u32),
1655
1656   .format_trace = format_ip4_forward_next_trace,
1657   .sibling_of = "ip4-local",
1658 };
1659
1660 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1661   .arc_name = "ip4-local",
1662   .node_name = "ip4-local-end-of-arc",
1663   .runs_before = 0, /* not before any other features */
1664 };
1665 /* *INDENT-ON* */
1666
1667 #ifndef CLIB_MARCH_VARIANT
1668 void
1669 ip4_register_protocol (u32 protocol, u32 node_index)
1670 {
1671   vlib_main_t *vm = vlib_get_main ();
1672   ip4_main_t *im = &ip4_main;
1673   ip_lookup_main_t *lm = &im->lookup_main;
1674
1675   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1676   lm->local_next_by_ip_protocol[protocol] =
1677     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1678 }
1679
1680 void
1681 ip4_unregister_protocol (u32 protocol)
1682 {
1683   ip4_main_t *im = &ip4_main;
1684   ip_lookup_main_t *lm = &im->lookup_main;
1685
1686   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1687   lm->local_next_by_ip_protocol[protocol] = IP_LOCAL_NEXT_PUNT;
1688 }
1689 #endif
1690
1691 static clib_error_t *
1692 show_ip_local_command_fn (vlib_main_t * vm,
1693                           unformat_input_t * input, vlib_cli_command_t * cmd)
1694 {
1695   ip4_main_t *im = &ip4_main;
1696   ip_lookup_main_t *lm = &im->lookup_main;
1697   int i;
1698
1699   vlib_cli_output (vm, "Protocols handled by ip4_local");
1700   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1701     {
1702       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1703         {
1704           u32 node_index = vlib_get_node (vm,
1705                                           ip4_local_node.index)->
1706             next_nodes[lm->local_next_by_ip_protocol[i]];
1707           vlib_cli_output (vm, "%U: %U", format_ip_protocol, i,
1708                            format_vlib_node_name, vm, node_index);
1709         }
1710     }
1711   return 0;
1712 }
1713
1714
1715
1716 /*?
1717  * Display the set of protocols handled by the local IPv4 stack.
1718  *
1719  * @cliexpar
1720  * Example of how to display local protocol table:
1721  * @cliexstart{show ip local}
1722  * Protocols handled by ip4_local
1723  * 1
1724  * 17
1725  * 47
1726  * @cliexend
1727 ?*/
1728 /* *INDENT-OFF* */
1729 VLIB_CLI_COMMAND (show_ip_local, static) =
1730 {
1731   .path = "show ip local",
1732   .function = show_ip_local_command_fn,
1733   .short_help = "show ip local",
1734 };
1735 /* *INDENT-ON* */
1736
1737 always_inline uword
1738 ip4_arp_inline (vlib_main_t * vm,
1739                 vlib_node_runtime_t * node,
1740                 vlib_frame_t * frame, int is_glean)
1741 {
1742   vnet_main_t *vnm = vnet_get_main ();
1743   ip4_main_t *im = &ip4_main;
1744   ip_lookup_main_t *lm = &im->lookup_main;
1745   u32 *from, *to_next_drop;
1746   uword n_left_from, n_left_to_next_drop, next_index;
1747   u32 thread_index = vm->thread_index;
1748   u64 seed;
1749
1750   if (node->flags & VLIB_NODE_FLAG_TRACE)
1751     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1752
1753   seed = throttle_seed (&im->arp_throttle, thread_index, vlib_time_now (vm));
1754
1755   from = vlib_frame_vector_args (frame);
1756   n_left_from = frame->n_vectors;
1757   next_index = node->cached_next_index;
1758   if (next_index == IP4_ARP_NEXT_DROP)
1759     next_index = IP4_ARP_N_NEXT;        /* point to first interface */
1760
1761   while (n_left_from > 0)
1762     {
1763       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1764                            to_next_drop, n_left_to_next_drop);
1765
1766       while (n_left_from > 0 && n_left_to_next_drop > 0)
1767         {
1768           u32 pi0, bi0, adj_index0, sw_if_index0;
1769           ip_adjacency_t *adj0;
1770           vlib_buffer_t *p0, *b0;
1771           ip4_address_t resolve0;
1772           ethernet_arp_header_t *h0;
1773           vnet_hw_interface_t *hw_if0;
1774           u64 r0;
1775
1776           pi0 = from[0];
1777           p0 = vlib_get_buffer (vm, pi0);
1778
1779           from += 1;
1780           n_left_from -= 1;
1781           to_next_drop[0] = pi0;
1782           to_next_drop += 1;
1783           n_left_to_next_drop -= 1;
1784
1785           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1786           adj0 = adj_get (adj_index0);
1787
1788           if (is_glean)
1789             {
1790               /* resolve the packet's destination */
1791               ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1792               resolve0 = ip0->dst_address;
1793             }
1794           else
1795             {
1796               /* resolve the incomplete adj */
1797               resolve0 = adj0->sub_type.nbr.next_hop.ip4;
1798             }
1799
1800           /* combine the address and interface for the hash key */
1801           sw_if_index0 = adj0->rewrite_header.sw_if_index;
1802           r0 = (u64) resolve0.data_u32 << 32;
1803           r0 |= sw_if_index0;
1804
1805           if (throttle_check (&im->arp_throttle, thread_index, r0, seed))
1806             {
1807               p0->error = node->errors[IP4_ARP_ERROR_THROTTLED];
1808               continue;
1809             }
1810
1811           /*
1812            * the adj has been updated to a rewrite but the node the DPO that got
1813            * us here hasn't - yet. no big deal. we'll drop while we wait.
1814            */
1815           if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
1816             {
1817               p0->error = node->errors[IP4_ARP_ERROR_RESOLVED];
1818               continue;
1819             }
1820
1821           /*
1822            * Can happen if the control-plane is programming tables
1823            * with traffic flowing; at least that's today's lame excuse.
1824            */
1825           if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
1826               || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
1827             {
1828               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
1829               continue;
1830             }
1831           /* Send ARP request. */
1832           h0 =
1833             vlib_packet_template_get_packet (vm,
1834                                              &im->ip4_arp_request_packet_template,
1835                                              &bi0);
1836           /* Seems we're out of buffers */
1837           if (PREDICT_FALSE (!h0))
1838             {
1839               p0->error = node->errors[IP4_ARP_ERROR_NO_BUFFERS];
1840               continue;
1841             }
1842
1843           b0 = vlib_get_buffer (vm, bi0);
1844
1845           /* copy the persistent fields from the original */
1846           clib_memcpy_fast (b0->opaque2, p0->opaque2, sizeof (p0->opaque2));
1847
1848           /* Add rewrite/encap string for ARP packet. */
1849           vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
1850
1851           hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1852
1853           /* Src ethernet address in ARP header. */
1854           mac_address_from_bytes (&h0->ip4_over_ethernet[0].mac,
1855                                   hw_if0->hw_address);
1856           if (is_glean)
1857             {
1858               /* The interface's source address is stashed in the Glean Adj */
1859               h0->ip4_over_ethernet[0].ip4 =
1860                 adj0->sub_type.glean.receive_addr.ip4;
1861             }
1862           else
1863             {
1864               /* Src IP address in ARP header. */
1865               if (ip4_src_address_for_packet (lm, sw_if_index0,
1866                                               &h0->ip4_over_ethernet[0].ip4))
1867                 {
1868                   /* No source address available */
1869                   p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
1870                   vlib_buffer_free (vm, &bi0, 1);
1871                   continue;
1872                 }
1873             }
1874           h0->ip4_over_ethernet[1].ip4 = resolve0;
1875
1876           p0->error = node->errors[IP4_ARP_ERROR_REQUEST_SENT];
1877
1878           vlib_buffer_copy_trace_flag (vm, p0, bi0);
1879           VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
1880           vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
1881
1882           vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
1883
1884           vlib_set_next_frame_buffer (vm, node,
1885                                       adj0->rewrite_header.next_index, bi0);
1886         }
1887
1888       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
1889     }
1890
1891   return frame->n_vectors;
1892 }
1893
1894 VLIB_NODE_FN (ip4_arp_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1895                              vlib_frame_t * frame)
1896 {
1897   return (ip4_arp_inline (vm, node, frame, 0));
1898 }
1899
1900 VLIB_NODE_FN (ip4_glean_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1901                                vlib_frame_t * frame)
1902 {
1903   return (ip4_arp_inline (vm, node, frame, 1));
1904 }
1905
1906 static char *ip4_arp_error_strings[] = {
1907   [IP4_ARP_ERROR_THROTTLED] = "ARP requests throttled",
1908   [IP4_ARP_ERROR_RESOLVED] = "ARP requests resolved",
1909   [IP4_ARP_ERROR_NO_BUFFERS] = "ARP requests out of buffer",
1910   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
1911   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
1912   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
1913 };
1914
1915 /* *INDENT-OFF* */
1916 VLIB_REGISTER_NODE (ip4_arp_node) =
1917 {
1918   .name = "ip4-arp",
1919   .vector_size = sizeof (u32),
1920   .format_trace = format_ip4_forward_next_trace,
1921   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1922   .error_strings = ip4_arp_error_strings,
1923   .n_next_nodes = IP4_ARP_N_NEXT,
1924   .next_nodes =
1925   {
1926     [IP4_ARP_NEXT_DROP] = "error-drop",
1927   },
1928 };
1929
1930 VLIB_REGISTER_NODE (ip4_glean_node) =
1931 {
1932   .name = "ip4-glean",
1933   .vector_size = sizeof (u32),
1934   .format_trace = format_ip4_forward_next_trace,
1935   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1936   .error_strings = ip4_arp_error_strings,
1937   .n_next_nodes = IP4_ARP_N_NEXT,
1938   .next_nodes = {
1939   [IP4_ARP_NEXT_DROP] = "error-drop",
1940   },
1941 };
1942 /* *INDENT-ON* */
1943
1944 #define foreach_notrace_ip4_arp_error           \
1945 _(THROTTLED)                                    \
1946 _(RESOLVED)                                     \
1947 _(NO_BUFFERS)                                   \
1948 _(REQUEST_SENT)                                 \
1949 _(NON_ARP_ADJ)                                  \
1950 _(NO_SOURCE_ADDRESS)
1951
1952 static clib_error_t *
1953 arp_notrace_init (vlib_main_t * vm)
1954 {
1955   vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
1956
1957   /* don't trace ARP request packets */
1958 #define _(a)                                    \
1959     vnet_pcap_drop_trace_filter_add_del         \
1960         (rt->errors[IP4_ARP_ERROR_##a],         \
1961          1 /* is_add */);
1962   foreach_notrace_ip4_arp_error;
1963 #undef _
1964   return 0;
1965 }
1966
1967 VLIB_INIT_FUNCTION (arp_notrace_init);
1968
1969
1970 #ifndef CLIB_MARCH_VARIANT
1971 /* Send an ARP request to see if given destination is reachable on given interface. */
1972 clib_error_t *
1973 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index,
1974                     u8 refresh)
1975 {
1976   vnet_main_t *vnm = vnet_get_main ();
1977   ip4_main_t *im = &ip4_main;
1978   ethernet_arp_header_t *h;
1979   ip4_address_t *src;
1980   ip_interface_address_t *ia;
1981   ip_adjacency_t *adj;
1982   vnet_hw_interface_t *hi;
1983   vnet_sw_interface_t *si;
1984   vlib_buffer_t *b;
1985   adj_index_t ai;
1986   u32 bi = 0;
1987   u8 unicast_rewrite = 0;
1988
1989   si = vnet_get_sw_interface (vnm, sw_if_index);
1990
1991   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
1992     {
1993       return clib_error_return (0, "%U: interface %U down",
1994                                 format_ip4_address, dst,
1995                                 format_vnet_sw_if_index_name, vnm,
1996                                 sw_if_index);
1997     }
1998
1999   src =
2000     ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2001   if (!src)
2002     {
2003       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2004       return clib_error_return
2005         (0,
2006          "no matching interface address for destination %U (interface %U)",
2007          format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
2008          sw_if_index);
2009     }
2010
2011   h = vlib_packet_template_get_packet (vm,
2012                                        &im->ip4_arp_request_packet_template,
2013                                        &bi);
2014
2015   if (!h)
2016     return clib_error_return (0, "ARP request packet allocation failed");
2017
2018   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2019   if (PREDICT_FALSE (!hi->hw_address))
2020     {
2021       return clib_error_return (0, "%U: interface %U do not support ip probe",
2022                                 format_ip4_address, dst,
2023                                 format_vnet_sw_if_index_name, vnm,
2024                                 sw_if_index);
2025     }
2026
2027   mac_address_from_bytes (&h->ip4_over_ethernet[0].mac, hi->hw_address);
2028
2029   h->ip4_over_ethernet[0].ip4 = src[0];
2030   h->ip4_over_ethernet[1].ip4 = dst[0];
2031
2032   b = vlib_get_buffer (vm, bi);
2033   vnet_buffer (b)->sw_if_index[VLIB_RX] =
2034     vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2035
2036   ip46_address_t nh = {
2037     .ip4 = *dst,
2038   };
2039
2040   ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4,
2041                             VNET_LINK_IP4, &nh, sw_if_index);
2042   adj = adj_get (ai);
2043
2044   /* Peer has been previously resolved, retrieve glean adj instead */
2045   if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE)
2046     {
2047       if (refresh)
2048         unicast_rewrite = 1;
2049       else
2050         {
2051           adj_unlock (ai);
2052           ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4,
2053                                       VNET_LINK_IP4, sw_if_index, &nh);
2054           adj = adj_get (ai);
2055         }
2056     }
2057
2058   /* Add encapsulation string for software interface (e.g. ethernet header). */
2059   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2060   if (unicast_rewrite)
2061     {
2062       u16 *etype = vlib_buffer_get_current (b) - 2;
2063       etype[0] = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
2064     }
2065   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2066
2067   {
2068     vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2069     u32 *to_next = vlib_frame_vector_args (f);
2070     to_next[0] = bi;
2071     f->n_vectors = 1;
2072     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2073   }
2074
2075   adj_unlock (ai);
2076   return /* no error */ 0;
2077 }
2078 #endif
2079
2080 typedef enum
2081 {
2082   IP4_REWRITE_NEXT_DROP,
2083   IP4_REWRITE_NEXT_ICMP_ERROR,
2084   IP4_REWRITE_NEXT_FRAGMENT,
2085   IP4_REWRITE_N_NEXT            /* Last */
2086 } ip4_rewrite_next_t;
2087
2088 /**
2089  * This bits of an IPv4 address to mask to construct a multicast
2090  * MAC address
2091  */
2092 #if CLIB_ARCH_IS_BIG_ENDIAN
2093 #define IP4_MCAST_ADDR_MASK 0x007fffff
2094 #else
2095 #define IP4_MCAST_ADDR_MASK 0xffff7f00
2096 #endif
2097
2098 always_inline void
2099 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
2100                u16 adj_packet_bytes, bool df, u16 * next, u32 * error)
2101 {
2102   if (packet_len > adj_packet_bytes)
2103     {
2104       *error = IP4_ERROR_MTU_EXCEEDED;
2105       if (df)
2106         {
2107           icmp4_error_set_vnet_buffer
2108             (b, ICMP4_destination_unreachable,
2109              ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
2110              adj_packet_bytes);
2111           *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2112         }
2113       else
2114         {
2115           /* IP fragmentation */
2116           ip_frag_set_vnet_buffer (b, adj_packet_bytes,
2117                                    IP4_FRAG_NEXT_IP4_REWRITE, 0);
2118           *next = IP4_REWRITE_NEXT_FRAGMENT;
2119         }
2120     }
2121 }
2122
2123 /* Decrement TTL & update checksum.
2124    Works either endian, so no need for byte swap. */
2125 static_always_inline void
2126 ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
2127                             u32 * error)
2128 {
2129   i32 ttl;
2130   u32 checksum;
2131   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2132     {
2133       b->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2134       return;
2135     }
2136
2137   ttl = ip->ttl;
2138
2139   /* Input node should have reject packets with ttl 0. */
2140   ASSERT (ip->ttl > 0);
2141
2142   checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
2143   checksum += checksum >= 0xffff;
2144
2145   ip->checksum = checksum;
2146   ttl -= 1;
2147   ip->ttl = ttl;
2148
2149   /*
2150    * If the ttl drops below 1 when forwarding, generate
2151    * an ICMP response.
2152    */
2153   if (PREDICT_FALSE (ttl <= 0))
2154     {
2155       *error = IP4_ERROR_TIME_EXPIRED;
2156       vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2157       icmp4_error_set_vnet_buffer (b, ICMP4_time_exceeded,
2158                                    ICMP4_time_exceeded_ttl_exceeded_in_transit,
2159                                    0);
2160       *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2161     }
2162
2163   /* Verify checksum. */
2164   ASSERT ((ip->checksum == ip4_header_checksum (ip)) ||
2165           (b->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2166 }
2167
2168
2169 always_inline uword
2170 ip4_rewrite_inline_with_gso (vlib_main_t * vm,
2171                              vlib_node_runtime_t * node,
2172                              vlib_frame_t * frame,
2173                              int do_counters, int is_midchain, int is_mcast,
2174                              int do_gso)
2175 {
2176   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2177   u32 *from = vlib_frame_vector_args (frame);
2178   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2179   u16 nexts[VLIB_FRAME_SIZE], *next;
2180   u32 n_left_from;
2181   vlib_node_runtime_t *error_node =
2182     vlib_node_get_runtime (vm, ip4_input_node.index);
2183
2184   n_left_from = frame->n_vectors;
2185   u32 thread_index = vm->thread_index;
2186
2187   vlib_get_buffers (vm, from, bufs, n_left_from);
2188   clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
2189
2190   if (n_left_from >= 6)
2191     {
2192       int i;
2193       for (i = 2; i < 6; i++)
2194         vlib_prefetch_buffer_header (bufs[i], LOAD);
2195     }
2196
2197   next = nexts;
2198   b = bufs;
2199   while (n_left_from >= 8)
2200     {
2201       ip_adjacency_t *adj0, *adj1;
2202       ip4_header_t *ip0, *ip1;
2203       u32 rw_len0, error0, adj_index0;
2204       u32 rw_len1, error1, adj_index1;
2205       u32 tx_sw_if_index0, tx_sw_if_index1;
2206       u8 *p;
2207
2208       vlib_prefetch_buffer_header (b[6], LOAD);
2209       vlib_prefetch_buffer_header (b[7], LOAD);
2210
2211       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2212       adj_index1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
2213
2214       /*
2215        * pre-fetch the per-adjacency counters
2216        */
2217       if (do_counters)
2218         {
2219           vlib_prefetch_combined_counter (&adjacency_counters,
2220                                           thread_index, adj_index0);
2221           vlib_prefetch_combined_counter (&adjacency_counters,
2222                                           thread_index, adj_index1);
2223         }
2224
2225       ip0 = vlib_buffer_get_current (b[0]);
2226       ip1 = vlib_buffer_get_current (b[1]);
2227
2228       error0 = error1 = IP4_ERROR_NONE;
2229
2230       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2231       ip4_ttl_and_checksum_check (b[1], ip1, next + 1, &error1);
2232
2233       /* Rewrite packet header and updates lengths. */
2234       adj0 = adj_get (adj_index0);
2235       adj1 = adj_get (adj_index1);
2236
2237       /* Worth pipelining. No guarantee that adj0,1 are hot... */
2238       rw_len0 = adj0[0].rewrite_header.data_bytes;
2239       rw_len1 = adj1[0].rewrite_header.data_bytes;
2240       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2241       vnet_buffer (b[1])->ip.save_rewrite_length = rw_len1;
2242
2243       p = vlib_buffer_get_current (b[2]);
2244       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2245       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2246
2247       p = vlib_buffer_get_current (b[3]);
2248       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2249       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2250
2251       /* Check MTU of outgoing interface. */
2252       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2253       u16 ip1_len = clib_net_to_host_u16 (ip1->length);
2254
2255       if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO))
2256         ip0_len = gso_mtu_sz (b[0]);
2257       if (do_gso && (b[1]->flags & VNET_BUFFER_F_GSO))
2258         ip1_len = gso_mtu_sz (b[1]);
2259
2260       ip4_mtu_check (b[0], ip0_len,
2261                      adj0[0].rewrite_header.max_l3_packet_bytes,
2262                      ip0->flags_and_fragment_offset &
2263                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2264                      next + 0, &error0);
2265       ip4_mtu_check (b[1], ip1_len,
2266                      adj1[0].rewrite_header.max_l3_packet_bytes,
2267                      ip1->flags_and_fragment_offset &
2268                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2269                      next + 1, &error1);
2270
2271       if (is_mcast)
2272         {
2273           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2274                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2275                     IP4_ERROR_SAME_INTERFACE : error0);
2276           error1 = ((adj1[0].rewrite_header.sw_if_index ==
2277                      vnet_buffer (b[1])->sw_if_index[VLIB_RX]) ?
2278                     IP4_ERROR_SAME_INTERFACE : error1);
2279         }
2280
2281       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2282        * to see the IP header */
2283       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2284         {
2285           u32 next_index = adj0[0].rewrite_header.next_index;
2286           b[0]->current_data -= rw_len0;
2287           b[0]->current_length += rw_len0;
2288           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2289           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2290
2291           if (PREDICT_FALSE
2292               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2293             vnet_feature_arc_start (lm->output_feature_arc_index,
2294                                     tx_sw_if_index0, &next_index, b[0]);
2295           next[0] = next_index;
2296         }
2297       else
2298         {
2299           b[0]->error = error_node->errors[error0];
2300         }
2301       if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2302         {
2303           u32 next_index = adj1[0].rewrite_header.next_index;
2304           b[1]->current_data -= rw_len1;
2305           b[1]->current_length += rw_len1;
2306
2307           tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2308           vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2309
2310           if (PREDICT_FALSE
2311               (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2312             vnet_feature_arc_start (lm->output_feature_arc_index,
2313                                     tx_sw_if_index1, &next_index, b[1]);
2314           next[1] = next_index;
2315         }
2316       else
2317         {
2318           b[1]->error = error_node->errors[error1];
2319         }
2320       if (is_midchain)
2321         {
2322           calc_checksums (vm, b[0]);
2323           calc_checksums (vm, b[1]);
2324         }
2325       /* Guess we are only writing on simple Ethernet header. */
2326       vnet_rewrite_two_headers (adj0[0], adj1[0],
2327                                 ip0, ip1, sizeof (ethernet_header_t));
2328
2329       /*
2330        * Bump the per-adjacency counters
2331        */
2332       if (do_counters)
2333         {
2334           vlib_increment_combined_counter
2335             (&adjacency_counters,
2336              thread_index,
2337              adj_index0, 1, vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2338
2339           vlib_increment_combined_counter
2340             (&adjacency_counters,
2341              thread_index,
2342              adj_index1, 1, vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
2343         }
2344
2345       if (is_midchain)
2346         {
2347           if (adj0->sub_type.midchain.fixup_func)
2348             adj0->sub_type.midchain.fixup_func
2349               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2350           if (adj1->sub_type.midchain.fixup_func)
2351             adj1->sub_type.midchain.fixup_func
2352               (vm, adj1, b[1], adj1->sub_type.midchain.fixup_data);
2353         }
2354
2355       if (is_mcast)
2356         {
2357           /*
2358            * copy bytes from the IP address into the MAC rewrite
2359            */
2360           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2361                                       adj0->rewrite_header.dst_mcast_offset,
2362                                       &ip0->dst_address.as_u32, (u8 *) ip0);
2363           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2364                                       adj1->rewrite_header.dst_mcast_offset,
2365                                       &ip1->dst_address.as_u32, (u8 *) ip1);
2366         }
2367
2368       next += 2;
2369       b += 2;
2370       n_left_from -= 2;
2371     }
2372
2373   while (n_left_from > 0)
2374     {
2375       ip_adjacency_t *adj0;
2376       ip4_header_t *ip0;
2377       u32 rw_len0, adj_index0, error0;
2378       u32 tx_sw_if_index0;
2379
2380       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2381
2382       adj0 = adj_get (adj_index0);
2383
2384       if (do_counters)
2385         vlib_prefetch_combined_counter (&adjacency_counters,
2386                                         thread_index, adj_index0);
2387
2388       ip0 = vlib_buffer_get_current (b[0]);
2389
2390       error0 = IP4_ERROR_NONE;
2391
2392       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2393
2394
2395       /* Update packet buffer attributes/set output interface. */
2396       rw_len0 = adj0[0].rewrite_header.data_bytes;
2397       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2398
2399       /* Check MTU of outgoing interface. */
2400       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2401       if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO))
2402         ip0_len = gso_mtu_sz (b[0]);
2403
2404       ip4_mtu_check (b[0], ip0_len,
2405                      adj0[0].rewrite_header.max_l3_packet_bytes,
2406                      ip0->flags_and_fragment_offset &
2407                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2408                      next + 0, &error0);
2409
2410       if (is_mcast)
2411         {
2412           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2413                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2414                     IP4_ERROR_SAME_INTERFACE : error0);
2415         }
2416
2417       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2418        * to see the IP header */
2419       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2420         {
2421           u32 next_index = adj0[0].rewrite_header.next_index;
2422           b[0]->current_data -= rw_len0;
2423           b[0]->current_length += rw_len0;
2424           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2425           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2426
2427           if (PREDICT_FALSE
2428               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2429             vnet_feature_arc_start (lm->output_feature_arc_index,
2430                                     tx_sw_if_index0, &next_index, b[0]);
2431           next[0] = next_index;
2432         }
2433       else
2434         {
2435           b[0]->error = error_node->errors[error0];
2436         }
2437       if (is_midchain)
2438         {
2439           calc_checksums (vm, b[0]);
2440         }
2441       /* Guess we are only writing on simple Ethernet header. */
2442       vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2443
2444       if (do_counters)
2445         vlib_increment_combined_counter
2446           (&adjacency_counters,
2447            thread_index, adj_index0, 1,
2448            vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2449
2450       if (is_midchain)
2451         {
2452           if (adj0->sub_type.midchain.fixup_func)
2453             adj0->sub_type.midchain.fixup_func
2454               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2455         }
2456
2457       if (is_mcast)
2458         {
2459           /*
2460            * copy bytes from the IP address into the MAC rewrite
2461            */
2462           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2463                                       adj0->rewrite_header.dst_mcast_offset,
2464                                       &ip0->dst_address.as_u32, (u8 *) ip0);
2465         }
2466
2467       next += 1;
2468       b += 1;
2469       n_left_from -= 1;
2470     }
2471
2472
2473   /* Need to do trace after rewrites to pick up new packet data. */
2474   if (node->flags & VLIB_NODE_FLAG_TRACE)
2475     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2476
2477   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2478   return frame->n_vectors;
2479 }
2480
2481 always_inline uword
2482 ip4_rewrite_inline (vlib_main_t * vm,
2483                     vlib_node_runtime_t * node,
2484                     vlib_frame_t * frame,
2485                     int do_counters, int is_midchain, int is_mcast)
2486 {
2487   vnet_main_t *vnm = vnet_get_main ();
2488   if (PREDICT_FALSE (vnm->interface_main.gso_interface_count > 0))
2489     return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2490                                         is_midchain, is_mcast,
2491                                         1 /* do_gso */ );
2492   else
2493     return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2494                                         is_midchain, is_mcast,
2495                                         0 /* no do_gso */ );
2496 }
2497
2498
2499 /** @brief IPv4 rewrite node.
2500     @node ip4-rewrite
2501
2502     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2503     header checksum, fetch the ip adjacency, check the outbound mtu,
2504     apply the adjacency rewrite, and send pkts to the adjacency
2505     rewrite header's rewrite_next_index.
2506
2507     @param vm vlib_main_t corresponding to the current thread
2508     @param node vlib_node_runtime_t
2509     @param frame vlib_frame_t whose contents should be dispatched
2510
2511     @par Graph mechanics: buffer metadata, next index usage
2512
2513     @em Uses:
2514     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2515         - the rewrite adjacency index
2516     - <code>adj->lookup_next_index</code>
2517         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2518           the packet will be dropped.
2519     - <code>adj->rewrite_header</code>
2520         - Rewrite string length, rewrite string, next_index
2521
2522     @em Sets:
2523     - <code>b->current_data, b->current_length</code>
2524         - Updated net of applying the rewrite string
2525
2526     <em>Next Indices:</em>
2527     - <code> adj->rewrite_header.next_index </code>
2528       or @c ip4-drop
2529 */
2530
2531 VLIB_NODE_FN (ip4_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2532                                  vlib_frame_t * frame)
2533 {
2534   if (adj_are_counters_enabled ())
2535     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2536   else
2537     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2538 }
2539
2540 VLIB_NODE_FN (ip4_rewrite_bcast_node) (vlib_main_t * vm,
2541                                        vlib_node_runtime_t * node,
2542                                        vlib_frame_t * frame)
2543 {
2544   if (adj_are_counters_enabled ())
2545     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2546   else
2547     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2548 }
2549
2550 VLIB_NODE_FN (ip4_midchain_node) (vlib_main_t * vm,
2551                                   vlib_node_runtime_t * node,
2552                                   vlib_frame_t * frame)
2553 {
2554   if (adj_are_counters_enabled ())
2555     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2556   else
2557     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2558 }
2559
2560 VLIB_NODE_FN (ip4_rewrite_mcast_node) (vlib_main_t * vm,
2561                                        vlib_node_runtime_t * node,
2562                                        vlib_frame_t * frame)
2563 {
2564   if (adj_are_counters_enabled ())
2565     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2566   else
2567     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2568 }
2569
2570 VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
2571                                         vlib_node_runtime_t * node,
2572                                         vlib_frame_t * frame)
2573 {
2574   if (adj_are_counters_enabled ())
2575     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2576   else
2577     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2578 }
2579
2580 /* *INDENT-OFF* */
2581 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2582   .name = "ip4-rewrite",
2583   .vector_size = sizeof (u32),
2584
2585   .format_trace = format_ip4_rewrite_trace,
2586
2587   .n_next_nodes = IP4_REWRITE_N_NEXT,
2588   .next_nodes = {
2589     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2590     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2591     [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2592   },
2593 };
2594
2595 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2596   .name = "ip4-rewrite-bcast",
2597   .vector_size = sizeof (u32),
2598
2599   .format_trace = format_ip4_rewrite_trace,
2600   .sibling_of = "ip4-rewrite",
2601 };
2602
2603 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2604   .name = "ip4-rewrite-mcast",
2605   .vector_size = sizeof (u32),
2606
2607   .format_trace = format_ip4_rewrite_trace,
2608   .sibling_of = "ip4-rewrite",
2609 };
2610
2611 VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
2612   .name = "ip4-mcast-midchain",
2613   .vector_size = sizeof (u32),
2614
2615   .format_trace = format_ip4_rewrite_trace,
2616   .sibling_of = "ip4-rewrite",
2617 };
2618
2619 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2620   .name = "ip4-midchain",
2621   .vector_size = sizeof (u32),
2622   .format_trace = format_ip4_forward_next_trace,
2623   .sibling_of =  "ip4-rewrite",
2624 };
2625 /* *INDENT-ON */
2626
2627 static int
2628 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2629 {
2630   ip4_fib_mtrie_t *mtrie0;
2631   ip4_fib_mtrie_leaf_t leaf0;
2632   u32 lbi0;
2633
2634   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2635
2636   leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2637   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2638   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2639
2640   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2641
2642   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2643 }
2644
2645 static clib_error_t *
2646 test_lookup_command_fn (vlib_main_t * vm,
2647                         unformat_input_t * input, vlib_cli_command_t * cmd)
2648 {
2649   ip4_fib_t *fib;
2650   u32 table_id = 0;
2651   f64 count = 1;
2652   u32 n;
2653   int i;
2654   ip4_address_t ip4_base_address;
2655   u64 errors = 0;
2656
2657   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2658     {
2659       if (unformat (input, "table %d", &table_id))
2660         {
2661           /* Make sure the entry exists. */
2662           fib = ip4_fib_get (table_id);
2663           if ((fib) && (fib->index != table_id))
2664             return clib_error_return (0, "<fib-index> %d does not exist",
2665                                       table_id);
2666         }
2667       else if (unformat (input, "count %f", &count))
2668         ;
2669
2670       else if (unformat (input, "%U",
2671                          unformat_ip4_address, &ip4_base_address))
2672         ;
2673       else
2674         return clib_error_return (0, "unknown input `%U'",
2675                                   format_unformat_error, input);
2676     }
2677
2678   n = count;
2679
2680   for (i = 0; i < n; i++)
2681     {
2682       if (!ip4_lookup_validate (&ip4_base_address, table_id))
2683         errors++;
2684
2685       ip4_base_address.as_u32 =
2686         clib_host_to_net_u32 (1 +
2687                               clib_net_to_host_u32 (ip4_base_address.as_u32));
2688     }
2689
2690   if (errors)
2691     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2692   else
2693     vlib_cli_output (vm, "No errors in %d lookups\n", n);
2694
2695   return 0;
2696 }
2697
2698 /*?
2699  * Perform a lookup of an IPv4 Address (or range of addresses) in the
2700  * given FIB table to determine if there is a conflict with the
2701  * adjacency table. The fib-id can be determined by using the
2702  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2703  * of 0 is used.
2704  *
2705  * @todo This command uses fib-id, other commands use table-id (not
2706  * just a name, they are different indexes). Would like to change this
2707  * to table-id for consistency.
2708  *
2709  * @cliexpar
2710  * Example of how to run the test lookup command:
2711  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2712  * No errors in 2 lookups
2713  * @cliexend
2714 ?*/
2715 /* *INDENT-OFF* */
2716 VLIB_CLI_COMMAND (lookup_test_command, static) =
2717 {
2718   .path = "test lookup",
2719   .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2720   .function = test_lookup_command_fn,
2721 };
2722 /* *INDENT-ON* */
2723
2724 #ifndef CLIB_MARCH_VARIANT
2725 int
2726 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2727 {
2728   u32 fib_index;
2729
2730   fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
2731
2732   if (~0 == fib_index)
2733     return VNET_API_ERROR_NO_SUCH_FIB;
2734
2735   fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
2736                                   flow_hash_config);
2737
2738   return 0;
2739 }
2740 #endif
2741
2742 static clib_error_t *
2743 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2744                              unformat_input_t * input,
2745                              vlib_cli_command_t * cmd)
2746 {
2747   int matched = 0;
2748   u32 table_id = 0;
2749   u32 flow_hash_config = 0;
2750   int rv;
2751
2752   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2753     {
2754       if (unformat (input, "table %d", &table_id))
2755         matched = 1;
2756 #define _(a,v) \
2757     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2758       foreach_flow_hash_bit
2759 #undef _
2760         else
2761         break;
2762     }
2763
2764   if (matched == 0)
2765     return clib_error_return (0, "unknown input `%U'",
2766                               format_unformat_error, input);
2767
2768   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
2769   switch (rv)
2770     {
2771     case 0:
2772       break;
2773
2774     case VNET_API_ERROR_NO_SUCH_FIB:
2775       return clib_error_return (0, "no such FIB table %d", table_id);
2776
2777     default:
2778       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2779       break;
2780     }
2781
2782   return 0;
2783 }
2784
2785 /*?
2786  * Configure the set of IPv4 fields used by the flow hash.
2787  *
2788  * @cliexpar
2789  * Example of how to set the flow hash on a given table:
2790  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2791  * Example of display the configured flow hash:
2792  * @cliexstart{show ip fib}
2793  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2794  * 0.0.0.0/0
2795  *   unicast-ip4-chain
2796  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2797  *     [0] [@0]: dpo-drop ip6
2798  * 0.0.0.0/32
2799  *   unicast-ip4-chain
2800  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2801  *     [0] [@0]: dpo-drop ip6
2802  * 224.0.0.0/8
2803  *   unicast-ip4-chain
2804  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2805  *     [0] [@0]: dpo-drop ip6
2806  * 6.0.1.2/32
2807  *   unicast-ip4-chain
2808  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2809  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2810  * 7.0.0.1/32
2811  *   unicast-ip4-chain
2812  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
2813  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2814  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2815  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2816  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2817  * 240.0.0.0/8
2818  *   unicast-ip4-chain
2819  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
2820  *     [0] [@0]: dpo-drop ip6
2821  * 255.255.255.255/32
2822  *   unicast-ip4-chain
2823  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
2824  *     [0] [@0]: dpo-drop ip6
2825  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
2826  * 0.0.0.0/0
2827  *   unicast-ip4-chain
2828  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
2829  *     [0] [@0]: dpo-drop ip6
2830  * 0.0.0.0/32
2831  *   unicast-ip4-chain
2832  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
2833  *     [0] [@0]: dpo-drop ip6
2834  * 172.16.1.0/24
2835  *   unicast-ip4-chain
2836  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
2837  *     [0] [@4]: ipv4-glean: af_packet0
2838  * 172.16.1.1/32
2839  *   unicast-ip4-chain
2840  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
2841  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
2842  * 172.16.1.2/32
2843  *   unicast-ip4-chain
2844  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2845  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
2846  * 172.16.2.0/24
2847  *   unicast-ip4-chain
2848  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
2849  *     [0] [@4]: ipv4-glean: af_packet1
2850  * 172.16.2.1/32
2851  *   unicast-ip4-chain
2852  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
2853  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
2854  * 224.0.0.0/8
2855  *   unicast-ip4-chain
2856  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
2857  *     [0] [@0]: dpo-drop ip6
2858  * 240.0.0.0/8
2859  *   unicast-ip4-chain
2860  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
2861  *     [0] [@0]: dpo-drop ip6
2862  * 255.255.255.255/32
2863  *   unicast-ip4-chain
2864  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
2865  *     [0] [@0]: dpo-drop ip6
2866  * @cliexend
2867 ?*/
2868 /* *INDENT-OFF* */
2869 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
2870 {
2871   .path = "set ip flow-hash",
2872   .short_help =
2873   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
2874   .function = set_ip_flow_hash_command_fn,
2875 };
2876 /* *INDENT-ON* */
2877
2878 #ifndef CLIB_MARCH_VARIANT
2879 int
2880 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2881                              u32 table_index)
2882 {
2883   vnet_main_t *vnm = vnet_get_main ();
2884   vnet_interface_main_t *im = &vnm->interface_main;
2885   ip4_main_t *ipm = &ip4_main;
2886   ip_lookup_main_t *lm = &ipm->lookup_main;
2887   vnet_classify_main_t *cm = &vnet_classify_main;
2888   ip4_address_t *if_addr;
2889
2890   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2891     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2892
2893   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2894     return VNET_API_ERROR_NO_SUCH_ENTRY;
2895
2896   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2897   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
2898
2899   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
2900
2901   if (NULL != if_addr)
2902     {
2903       fib_prefix_t pfx = {
2904         .fp_len = 32,
2905         .fp_proto = FIB_PROTOCOL_IP4,
2906         .fp_addr.ip4 = *if_addr,
2907       };
2908       u32 fib_index;
2909
2910       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2911                                                        sw_if_index);
2912
2913
2914       if (table_index != (u32) ~ 0)
2915         {
2916           dpo_id_t dpo = DPO_INVALID;
2917
2918           dpo_set (&dpo,
2919                    DPO_CLASSIFY,
2920                    DPO_PROTO_IP4,
2921                    classify_dpo_create (DPO_PROTO_IP4, table_index));
2922
2923           fib_table_entry_special_dpo_add (fib_index,
2924                                            &pfx,
2925                                            FIB_SOURCE_CLASSIFY,
2926                                            FIB_ENTRY_FLAG_NONE, &dpo);
2927           dpo_reset (&dpo);
2928         }
2929       else
2930         {
2931           fib_table_entry_special_remove (fib_index,
2932                                           &pfx, FIB_SOURCE_CLASSIFY);
2933         }
2934     }
2935
2936   return 0;
2937 }
2938 #endif
2939
2940 static clib_error_t *
2941 set_ip_classify_command_fn (vlib_main_t * vm,
2942                             unformat_input_t * input,
2943                             vlib_cli_command_t * cmd)
2944 {
2945   u32 table_index = ~0;
2946   int table_index_set = 0;
2947   u32 sw_if_index = ~0;
2948   int rv;
2949
2950   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2951     {
2952       if (unformat (input, "table-index %d", &table_index))
2953         table_index_set = 1;
2954       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
2955                          vnet_get_main (), &sw_if_index))
2956         ;
2957       else
2958         break;
2959     }
2960
2961   if (table_index_set == 0)
2962     return clib_error_return (0, "classify table-index must be specified");
2963
2964   if (sw_if_index == ~0)
2965     return clib_error_return (0, "interface / subif must be specified");
2966
2967   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
2968
2969   switch (rv)
2970     {
2971     case 0:
2972       break;
2973
2974     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
2975       return clib_error_return (0, "No such interface");
2976
2977     case VNET_API_ERROR_NO_SUCH_ENTRY:
2978       return clib_error_return (0, "No such classifier table");
2979     }
2980   return 0;
2981 }
2982
2983 /*?
2984  * Assign a classification table to an interface. The classification
2985  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
2986  * commands. Once the table is create, use this command to filter packets
2987  * on an interface.
2988  *
2989  * @cliexpar
2990  * Example of how to assign a classification table to an interface:
2991  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
2992 ?*/
2993 /* *INDENT-OFF* */
2994 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
2995 {
2996     .path = "set ip classify",
2997     .short_help =
2998     "set ip classify intfc <interface> table-index <classify-idx>",
2999     .function = set_ip_classify_command_fn,
3000 };
3001 /* *INDENT-ON* */
3002
3003 static clib_error_t *
3004 ip4_config (vlib_main_t * vm, unformat_input_t * input)
3005 {
3006   ip4_main_t *im = &ip4_main;
3007   uword heapsize = 0;
3008
3009   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3010     {
3011       if (unformat (input, "heap-size %U", unformat_memory_size, &heapsize))
3012         ;
3013       else
3014         return clib_error_return (0,
3015                                   "invalid heap-size parameter `%U'",
3016                                   format_unformat_error, input);
3017     }
3018
3019   im->mtrie_heap_size = heapsize;
3020
3021   return 0;
3022 }
3023
3024 VLIB_EARLY_CONFIG_FUNCTION (ip4_config, "ip");
3025
3026 /*
3027  * fd.io coding-style-patch-verification: ON
3028  *
3029  * Local Variables:
3030  * eval: (c-set-style "gnu")
3031  * End:
3032  */