Use IP and MAC API types for neighbors
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h>     /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/dpo/load_balance.h>
53 #include <vnet/dpo/load_balance_map.h>
54 #include <vnet/dpo/classify_dpo.h>
55 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
56
57 #include <vnet/ip/ip4_forward.h>
58
59 /** @brief IPv4 lookup node.
60     @node ip4-lookup
61
62     This is the main IPv4 lookup dispatch node.
63
64     @param vm vlib_main_t corresponding to the current thread
65     @param node vlib_node_runtime_t
66     @param frame vlib_frame_t whose contents should be dispatched
67
68     @par Graph mechanics: buffer metadata, next index usage
69
70     @em Uses:
71     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
72         - Indicates the @c sw_if_index value of the interface that the
73           packet was received on.
74     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
75         - When the value is @c ~0 then the node performs a longest prefix
76           match (LPM) for the packet destination address in the FIB attached
77           to the receive interface.
78         - Otherwise perform LPM for the packet destination address in the
79           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
80           value (0, 1, ...) and not a VRF id.
81
82     @em Sets:
83     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
84         - The lookup result adjacency index.
85
86     <em>Next Index:</em>
87     - Dispatches the packet to the node index found in
88       ip_adjacency_t @c adj->lookup_next_index
89       (where @c adj is the lookup result adjacency).
90 */
91 VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
92                                 vlib_frame_t * frame)
93 {
94   return ip4_lookup_inline (vm, node, frame,
95                             /* lookup_for_responses_to_locally_received_packets */
96                             0);
97
98 }
99
100 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
101
102 /* *INDENT-OFF* */
103 VLIB_REGISTER_NODE (ip4_lookup_node) =
104 {
105   .name = "ip4-lookup",
106   .vector_size = sizeof (u32),
107   .format_trace = format_ip4_lookup_trace,
108   .n_next_nodes = IP_LOOKUP_N_NEXT,
109   .next_nodes = IP4_LOOKUP_NEXT_NODES,
110 };
111 /* *INDENT-ON* */
112
113 VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
114                                       vlib_node_runtime_t * node,
115                                       vlib_frame_t * frame)
116 {
117   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
118   u32 n_left_from, n_left_to_next, *from, *to_next;
119   ip_lookup_next_t next;
120   u32 thread_index = vm->thread_index;
121
122   from = vlib_frame_vector_args (frame);
123   n_left_from = frame->n_vectors;
124   next = node->cached_next_index;
125
126   while (n_left_from > 0)
127     {
128       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
129
130
131       while (n_left_from >= 4 && n_left_to_next >= 2)
132         {
133           ip_lookup_next_t next0, next1;
134           const load_balance_t *lb0, *lb1;
135           vlib_buffer_t *p0, *p1;
136           u32 pi0, lbi0, hc0, pi1, lbi1, hc1;
137           const ip4_header_t *ip0, *ip1;
138           const dpo_id_t *dpo0, *dpo1;
139
140           /* Prefetch next iteration. */
141           {
142             vlib_buffer_t *p2, *p3;
143
144             p2 = vlib_get_buffer (vm, from[2]);
145             p3 = vlib_get_buffer (vm, from[3]);
146
147             vlib_prefetch_buffer_header (p2, STORE);
148             vlib_prefetch_buffer_header (p3, STORE);
149
150             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
151             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
152           }
153
154           pi0 = to_next[0] = from[0];
155           pi1 = to_next[1] = from[1];
156
157           from += 2;
158           n_left_from -= 2;
159           to_next += 2;
160           n_left_to_next -= 2;
161
162           p0 = vlib_get_buffer (vm, pi0);
163           p1 = vlib_get_buffer (vm, pi1);
164
165           ip0 = vlib_buffer_get_current (p0);
166           ip1 = vlib_buffer_get_current (p1);
167           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
168           lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
169
170           lb0 = load_balance_get (lbi0);
171           lb1 = load_balance_get (lbi1);
172
173           /*
174            * this node is for via FIBs we can re-use the hash value from the
175            * to node if present.
176            * We don't want to use the same hash value at each level in the recursion
177            * graph as that would lead to polarisation
178            */
179           hc0 = hc1 = 0;
180
181           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
182             {
183               if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
184                 {
185                   hc0 = vnet_buffer (p0)->ip.flow_hash =
186                     vnet_buffer (p0)->ip.flow_hash >> 1;
187                 }
188               else
189                 {
190                   hc0 = vnet_buffer (p0)->ip.flow_hash =
191                     ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
192                 }
193               dpo0 = load_balance_get_fwd_bucket
194                 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
195             }
196           else
197             {
198               dpo0 = load_balance_get_bucket_i (lb0, 0);
199             }
200           if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
201             {
202               if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash))
203                 {
204                   hc1 = vnet_buffer (p1)->ip.flow_hash =
205                     vnet_buffer (p1)->ip.flow_hash >> 1;
206                 }
207               else
208                 {
209                   hc1 = vnet_buffer (p1)->ip.flow_hash =
210                     ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
211                 }
212               dpo1 = load_balance_get_fwd_bucket
213                 (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
214             }
215           else
216             {
217               dpo1 = load_balance_get_bucket_i (lb1, 0);
218             }
219
220           next0 = dpo0->dpoi_next_node;
221           next1 = dpo1->dpoi_next_node;
222
223           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
224           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
225
226           vlib_increment_combined_counter
227             (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
228           vlib_increment_combined_counter
229             (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
230
231           vlib_validate_buffer_enqueue_x2 (vm, node, next,
232                                            to_next, n_left_to_next,
233                                            pi0, pi1, next0, next1);
234         }
235
236       while (n_left_from > 0 && n_left_to_next > 0)
237         {
238           ip_lookup_next_t next0;
239           const load_balance_t *lb0;
240           vlib_buffer_t *p0;
241           u32 pi0, lbi0, hc0;
242           const ip4_header_t *ip0;
243           const dpo_id_t *dpo0;
244
245           pi0 = from[0];
246           to_next[0] = pi0;
247           from += 1;
248           to_next += 1;
249           n_left_to_next -= 1;
250           n_left_from -= 1;
251
252           p0 = vlib_get_buffer (vm, pi0);
253
254           ip0 = vlib_buffer_get_current (p0);
255           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
256
257           lb0 = load_balance_get (lbi0);
258
259           hc0 = 0;
260           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
261             {
262               if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
263                 {
264                   hc0 = vnet_buffer (p0)->ip.flow_hash =
265                     vnet_buffer (p0)->ip.flow_hash >> 1;
266                 }
267               else
268                 {
269                   hc0 = vnet_buffer (p0)->ip.flow_hash =
270                     ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
271                 }
272               dpo0 = load_balance_get_fwd_bucket
273                 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
274             }
275           else
276             {
277               dpo0 = load_balance_get_bucket_i (lb0, 0);
278             }
279
280           next0 = dpo0->dpoi_next_node;
281           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
282
283           vlib_increment_combined_counter
284             (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
285
286           vlib_validate_buffer_enqueue_x1 (vm, node, next,
287                                            to_next, n_left_to_next,
288                                            pi0, next0);
289         }
290
291       vlib_put_next_frame (vm, node, next, n_left_to_next);
292     }
293
294   if (node->flags & VLIB_NODE_FLAG_TRACE)
295     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
296
297   return frame->n_vectors;
298 }
299
300 /* *INDENT-OFF* */
301 VLIB_REGISTER_NODE (ip4_load_balance_node) =
302 {
303   .name = "ip4-load-balance",
304   .vector_size = sizeof (u32),
305   .sibling_of = "ip4-lookup",
306   .format_trace = format_ip4_lookup_trace,
307 };
308 /* *INDENT-ON* */
309
310 #ifndef CLIB_MARCH_VARIANT
311 /* get first interface address */
312 ip4_address_t *
313 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
314                              ip_interface_address_t ** result_ia)
315 {
316   ip_lookup_main_t *lm = &im->lookup_main;
317   ip_interface_address_t *ia = 0;
318   ip4_address_t *result = 0;
319
320   /* *INDENT-OFF* */
321   foreach_ip_interface_address
322     (lm, ia, sw_if_index,
323      1 /* honor unnumbered */ ,
324      ({
325        ip4_address_t * a =
326          ip_interface_address_get_address (lm, ia);
327        result = a;
328        break;
329      }));
330   /* *INDENT-OFF* */
331   if (result_ia)
332     *result_ia = result ? ia : 0;
333   return result;
334 }
335
336 static void
337 ip4_add_subnet_bcast_route (u32 fib_index,
338                             fib_prefix_t *pfx,
339                             u32 sw_if_index)
340 {
341   vnet_sw_interface_flags_t iflags;
342
343   iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
344
345   fib_table_entry_special_remove(fib_index,
346                                  pfx,
347                                  FIB_SOURCE_INTERFACE);
348
349   if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
350     {
351       fib_table_entry_update_one_path (fib_index, pfx,
352                                        FIB_SOURCE_INTERFACE,
353                                        FIB_ENTRY_FLAG_NONE,
354                                        DPO_PROTO_IP4,
355                                        /* No next-hop address */
356                                        &ADJ_BCAST_ADDR,
357                                        sw_if_index,
358                                        // invalid FIB index
359                                        ~0,
360                                        1,
361                                        // no out-label stack
362                                        NULL,
363                                        FIB_ROUTE_PATH_FLAG_NONE);
364     }
365   else
366     {
367         fib_table_entry_special_add(fib_index,
368                                     pfx,
369                                     FIB_SOURCE_INTERFACE,
370                                     (FIB_ENTRY_FLAG_DROP |
371                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
372     }
373 }
374
375 static void
376 ip4_add_interface_routes (u32 sw_if_index,
377                           ip4_main_t * im, u32 fib_index,
378                           ip_interface_address_t * a)
379 {
380   ip_lookup_main_t *lm = &im->lookup_main;
381   ip4_address_t *address = ip_interface_address_get_address (lm, a);
382   fib_prefix_t pfx = {
383     .fp_len = a->address_length,
384     .fp_proto = FIB_PROTOCOL_IP4,
385     .fp_addr.ip4 = *address,
386   };
387
388   if (pfx.fp_len <= 30)
389     {
390       /* a /30 or shorter - add a glean for the network address */
391       fib_table_entry_update_one_path (fib_index, &pfx,
392                                        FIB_SOURCE_INTERFACE,
393                                        (FIB_ENTRY_FLAG_CONNECTED |
394                                         FIB_ENTRY_FLAG_ATTACHED),
395                                        DPO_PROTO_IP4,
396                                        /* No next-hop address */
397                                        NULL,
398                                        sw_if_index,
399                                        // invalid FIB index
400                                        ~0,
401                                        1,
402                                        // no out-label stack
403                                        NULL,
404                                        FIB_ROUTE_PATH_FLAG_NONE);
405
406       /* Add the two broadcast addresses as drop */
407       fib_prefix_t net_pfx = {
408         .fp_len = 32,
409         .fp_proto = FIB_PROTOCOL_IP4,
410         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
411       };
412       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
413         fib_table_entry_special_add(fib_index,
414                                     &net_pfx,
415                                     FIB_SOURCE_INTERFACE,
416                                     (FIB_ENTRY_FLAG_DROP |
417                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
418       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
419       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
420         ip4_add_subnet_bcast_route(fib_index, &net_pfx, sw_if_index);
421     }
422   else if (pfx.fp_len == 31)
423     {
424       u32 mask = clib_host_to_net_u32(1);
425       fib_prefix_t net_pfx = pfx;
426
427       net_pfx.fp_len = 32;
428       net_pfx.fp_addr.ip4.as_u32 ^= mask;
429
430       /* a /31 - add the other end as an attached host */
431       fib_table_entry_update_one_path (fib_index, &net_pfx,
432                                        FIB_SOURCE_INTERFACE,
433                                        (FIB_ENTRY_FLAG_ATTACHED),
434                                        DPO_PROTO_IP4,
435                                        &net_pfx.fp_addr,
436                                        sw_if_index,
437                                        // invalid FIB index
438                                        ~0,
439                                        1,
440                                        NULL,
441                                        FIB_ROUTE_PATH_FLAG_NONE);
442     }
443   pfx.fp_len = 32;
444
445   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
446     {
447       u32 classify_table_index =
448         lm->classify_table_index_by_sw_if_index[sw_if_index];
449       if (classify_table_index != (u32) ~ 0)
450         {
451           dpo_id_t dpo = DPO_INVALID;
452
453           dpo_set (&dpo,
454                    DPO_CLASSIFY,
455                    DPO_PROTO_IP4,
456                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
457
458           fib_table_entry_special_dpo_add (fib_index,
459                                            &pfx,
460                                            FIB_SOURCE_CLASSIFY,
461                                            FIB_ENTRY_FLAG_NONE, &dpo);
462           dpo_reset (&dpo);
463         }
464     }
465
466   fib_table_entry_update_one_path (fib_index, &pfx,
467                                    FIB_SOURCE_INTERFACE,
468                                    (FIB_ENTRY_FLAG_CONNECTED |
469                                     FIB_ENTRY_FLAG_LOCAL),
470                                    DPO_PROTO_IP4,
471                                    &pfx.fp_addr,
472                                    sw_if_index,
473                                    // invalid FIB index
474                                    ~0,
475                                    1, NULL,
476                                    FIB_ROUTE_PATH_FLAG_NONE);
477 }
478
479 static void
480 ip4_del_interface_routes (ip4_main_t * im,
481                           u32 fib_index,
482                           ip4_address_t * address, u32 address_length)
483 {
484   fib_prefix_t pfx = {
485     .fp_len = address_length,
486     .fp_proto = FIB_PROTOCOL_IP4,
487     .fp_addr.ip4 = *address,
488   };
489
490   if (pfx.fp_len <= 30)
491     {
492       fib_prefix_t net_pfx = {
493         .fp_len = 32,
494         .fp_proto = FIB_PROTOCOL_IP4,
495         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
496       };
497       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
498         fib_table_entry_special_remove(fib_index,
499                                        &net_pfx,
500                                        FIB_SOURCE_INTERFACE);
501       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
502       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
503         fib_table_entry_special_remove(fib_index,
504                                        &net_pfx,
505                                        FIB_SOURCE_INTERFACE);
506       fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
507     }
508     else if (pfx.fp_len == 31)
509     {
510       u32 mask = clib_host_to_net_u32(1);
511       fib_prefix_t net_pfx = pfx;
512
513       net_pfx.fp_len = 32;
514       net_pfx.fp_addr.ip4.as_u32 ^= mask;
515
516       fib_table_entry_delete (fib_index, &net_pfx, FIB_SOURCE_INTERFACE);
517     }
518
519   pfx.fp_len = 32;
520   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
521 }
522
523 void
524 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
525 {
526   ip4_main_t *im = &ip4_main;
527
528   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
529
530   /*
531    * enable/disable only on the 1<->0 transition
532    */
533   if (is_enable)
534     {
535       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
536         return;
537     }
538   else
539     {
540       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
541       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
542         return;
543     }
544   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
545                                !is_enable, 0, 0);
546
547
548   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
549                                sw_if_index, !is_enable, 0, 0);
550 }
551
552 static clib_error_t *
553 ip4_add_del_interface_address_internal (vlib_main_t * vm,
554                                         u32 sw_if_index,
555                                         ip4_address_t * address,
556                                         u32 address_length, u32 is_del)
557 {
558   vnet_main_t *vnm = vnet_get_main ();
559   ip4_main_t *im = &ip4_main;
560   ip_lookup_main_t *lm = &im->lookup_main;
561   clib_error_t *error = 0;
562   u32 if_address_index, elts_before;
563   ip4_address_fib_t ip4_af, *addr_fib = 0;
564
565   /* local0 interface doesn't support IP addressing  */
566   if (sw_if_index == 0)
567     {
568       return
569        clib_error_create ("local0 interface doesn't support IP addressing");
570     }
571
572   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
573   ip4_addr_fib_init (&ip4_af, address,
574                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
575   vec_add1 (addr_fib, ip4_af);
576
577   /*
578    * there is no support for adj-fib handling in the presence of overlapping
579    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
580    * most routers do.
581    */
582   /* *INDENT-OFF* */
583   if (!is_del)
584     {
585       /* When adding an address check that it does not conflict
586          with an existing address on any interface in this table. */
587       ip_interface_address_t *ia;
588       vnet_sw_interface_t *sif;
589
590       pool_foreach(sif, vnm->interface_main.sw_interfaces,
591       ({
592           if (im->fib_index_by_sw_if_index[sw_if_index] ==
593               im->fib_index_by_sw_if_index[sif->sw_if_index])
594             {
595               foreach_ip_interface_address
596                 (&im->lookup_main, ia, sif->sw_if_index,
597                  0 /* honor unnumbered */ ,
598                  ({
599                    ip4_address_t * x =
600                      ip_interface_address_get_address
601                      (&im->lookup_main, ia);
602                    if (ip4_destination_matches_route
603                        (im, address, x, ia->address_length) ||
604                        ip4_destination_matches_route (im,
605                                                       x,
606                                                       address,
607                                                       address_length))
608                      {
609                        vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
610
611                        return
612                          clib_error_create
613                          ("failed to add %U which conflicts with %U for interface %U",
614                           format_ip4_address_and_length, address,
615                           address_length,
616                           format_ip4_address_and_length, x,
617                           ia->address_length,
618                           format_vnet_sw_if_index_name, vnm,
619                           sif->sw_if_index);
620                      }
621                  }));
622             }
623       }));
624     }
625   /* *INDENT-ON* */
626
627   elts_before = pool_elts (lm->if_address_pool);
628
629   error = ip_interface_address_add_del
630     (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
631   if (error)
632     goto done;
633
634   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
635
636   if (is_del)
637     ip4_del_interface_routes (im, ip4_af.fib_index, address, address_length);
638   else
639     ip4_add_interface_routes (sw_if_index,
640                               im, ip4_af.fib_index,
641                               pool_elt_at_index
642                               (lm->if_address_pool, if_address_index));
643
644   /* If pool did not grow/shrink: add duplicate address. */
645   if (elts_before != pool_elts (lm->if_address_pool))
646     {
647       ip4_add_del_interface_address_callback_t *cb;
648       vec_foreach (cb, im->add_del_interface_address_callbacks)
649         cb->function (im, cb->function_opaque, sw_if_index,
650                       address, address_length, if_address_index, is_del);
651     }
652
653 done:
654   vec_free (addr_fib);
655   return error;
656 }
657
658 clib_error_t *
659 ip4_add_del_interface_address (vlib_main_t * vm,
660                                u32 sw_if_index,
661                                ip4_address_t * address,
662                                u32 address_length, u32 is_del)
663 {
664   return ip4_add_del_interface_address_internal
665     (vm, sw_if_index, address, address_length, is_del);
666 }
667
668 void
669 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
670 {
671   ip_interface_address_t *ia;
672   ip4_main_t *im;
673
674   im = &ip4_main;
675
676   /*
677    * when directed broadcast is enabled, the subnet braodcast route will forward
678    * packets using an adjacency with a broadcast MAC. otherwise it drops
679    */
680   /* *INDENT-OFF* */
681   foreach_ip_interface_address(&im->lookup_main, ia,
682                                sw_if_index, 0,
683      ({
684        if (ia->address_length <= 30)
685          {
686            ip4_address_t *ipa;
687
688            ipa = ip_interface_address_get_address (&im->lookup_main, ia);
689
690            fib_prefix_t pfx = {
691              .fp_len = 32,
692              .fp_proto = FIB_PROTOCOL_IP4,
693              .fp_addr = {
694                .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
695              },
696            };
697
698            ip4_add_subnet_bcast_route
699              (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
700                                                   sw_if_index),
701               &pfx, sw_if_index);
702          }
703      }));
704   /* *INDENT-ON* */
705 }
706 #endif
707
708 /* Built-in ip4 unicast rx feature path definition */
709 /* *INDENT-OFF* */
710 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
711 {
712   .arc_name = "ip4-unicast",
713   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
714   .last_in_arc = "ip4-lookup",
715   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
716 };
717
718 VNET_FEATURE_INIT (ip4_flow_classify, static) =
719 {
720   .arc_name = "ip4-unicast",
721   .node_name = "ip4-flow-classify",
722   .runs_before = VNET_FEATURES ("ip4-inacl"),
723 };
724
725 VNET_FEATURE_INIT (ip4_inacl, static) =
726 {
727   .arc_name = "ip4-unicast",
728   .node_name = "ip4-inacl",
729   .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
730 };
731
732 VNET_FEATURE_INIT (ip4_source_check_1, static) =
733 {
734   .arc_name = "ip4-unicast",
735   .node_name = "ip4-source-check-via-rx",
736   .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
737 };
738
739 VNET_FEATURE_INIT (ip4_source_check_2, static) =
740 {
741   .arc_name = "ip4-unicast",
742   .node_name = "ip4-source-check-via-any",
743   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
744 };
745
746 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
747 {
748   .arc_name = "ip4-unicast",
749   .node_name = "ip4-source-and-port-range-check-rx",
750   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
751 };
752
753 VNET_FEATURE_INIT (ip4_policer_classify, static) =
754 {
755   .arc_name = "ip4-unicast",
756   .node_name = "ip4-policer-classify",
757   .runs_before = VNET_FEATURES ("ipsec4-input-feature"),
758 };
759
760 VNET_FEATURE_INIT (ip4_ipsec, static) =
761 {
762   .arc_name = "ip4-unicast",
763   .node_name = "ipsec4-input-feature",
764   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
765 };
766
767 VNET_FEATURE_INIT (ip4_vpath, static) =
768 {
769   .arc_name = "ip4-unicast",
770   .node_name = "vpath-input-ip4",
771   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
772 };
773
774 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
775 {
776   .arc_name = "ip4-unicast",
777   .node_name = "ip4-vxlan-bypass",
778   .runs_before = VNET_FEATURES ("ip4-lookup"),
779 };
780
781 VNET_FEATURE_INIT (ip4_not_enabled, static) =
782 {
783   .arc_name = "ip4-unicast",
784   .node_name = "ip4-not-enabled",
785   .runs_before = VNET_FEATURES ("ip4-lookup"),
786 };
787
788 VNET_FEATURE_INIT (ip4_lookup, static) =
789 {
790   .arc_name = "ip4-unicast",
791   .node_name = "ip4-lookup",
792   .runs_before = 0,     /* not before any other features */
793 };
794
795 /* Built-in ip4 multicast rx feature path definition */
796 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
797 {
798   .arc_name = "ip4-multicast",
799   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
800   .last_in_arc = "ip4-mfib-forward-lookup",
801   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
802 };
803
804 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
805 {
806   .arc_name = "ip4-multicast",
807   .node_name = "vpath-input-ip4",
808   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
809 };
810
811 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
812 {
813   .arc_name = "ip4-multicast",
814   .node_name = "ip4-not-enabled",
815   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
816 };
817
818 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
819 {
820   .arc_name = "ip4-multicast",
821   .node_name = "ip4-mfib-forward-lookup",
822   .runs_before = 0,     /* last feature */
823 };
824
825 /* Source and port-range check ip4 tx feature path definition */
826 VNET_FEATURE_ARC_INIT (ip4_output, static) =
827 {
828   .arc_name = "ip4-output",
829   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
830   .last_in_arc = "interface-output",
831   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
832 };
833
834 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
835 {
836   .arc_name = "ip4-output",
837   .node_name = "ip4-source-and-port-range-check-tx",
838   .runs_before = VNET_FEATURES ("ip4-outacl"),
839 };
840
841 VNET_FEATURE_INIT (ip4_outacl, static) =
842 {
843   .arc_name = "ip4-output",
844   .node_name = "ip4-outacl",
845   .runs_before = VNET_FEATURES ("ipsec4-output-feature"),
846 };
847
848 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
849 {
850   .arc_name = "ip4-output",
851   .node_name = "ipsec4-output-feature",
852   .runs_before = VNET_FEATURES ("interface-output"),
853 };
854
855 /* Built-in ip4 tx feature path definition */
856 VNET_FEATURE_INIT (ip4_interface_output, static) =
857 {
858   .arc_name = "ip4-output",
859   .node_name = "interface-output",
860   .runs_before = 0,     /* not before any other features */
861 };
862 /* *INDENT-ON* */
863
864 static clib_error_t *
865 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
866 {
867   ip4_main_t *im = &ip4_main;
868
869   /* Fill in lookup tables with default table (0). */
870   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
871   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
872
873   if (!is_add)
874     {
875       ip4_main_t *im4 = &ip4_main;
876       ip_lookup_main_t *lm4 = &im4->lookup_main;
877       ip_interface_address_t *ia = 0;
878       ip4_address_t *address;
879       vlib_main_t *vm = vlib_get_main ();
880
881       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
882       /* *INDENT-OFF* */
883       foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
884       ({
885         address = ip_interface_address_get_address (lm4, ia);
886         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
887       }));
888       /* *INDENT-ON* */
889     }
890
891   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
892                                is_add, 0, 0);
893
894   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
895                                sw_if_index, is_add, 0, 0);
896
897   return /* no error */ 0;
898 }
899
900 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
901
902 /* Global IP4 main. */
903 ip4_main_t ip4_main;
904
905 static clib_error_t *
906 ip4_lookup_init (vlib_main_t * vm)
907 {
908   ip4_main_t *im = &ip4_main;
909   clib_error_t *error;
910   uword i;
911
912   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
913     return error;
914   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
915     return (error);
916   if ((error = vlib_call_init_function (vm, fib_module_init)))
917     return error;
918   if ((error = vlib_call_init_function (vm, mfib_module_init)))
919     return error;
920
921   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
922     {
923       u32 m;
924
925       if (i < 32)
926         m = pow2_mask (i) << (32 - i);
927       else
928         m = ~0;
929       im->fib_masks[i] = clib_host_to_net_u32 (m);
930     }
931
932   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
933
934   /* Create FIB with index 0 and table id of 0. */
935   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
936                                      FIB_SOURCE_DEFAULT_ROUTE);
937   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
938                                       MFIB_SOURCE_DEFAULT_ROUTE);
939
940   {
941     pg_node_t *pn;
942     pn = pg_get_node (ip4_lookup_node.index);
943     pn->unformat_edit = unformat_pg_ip4_header;
944   }
945
946   {
947     ethernet_arp_header_t h;
948
949     clib_memset (&h, 0, sizeof (h));
950
951 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
952 #define _8(f,v) h.f = v;
953     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
954     _16 (l3_type, ETHERNET_TYPE_IP4);
955     _8 (n_l2_address_bytes, 6);
956     _8 (n_l3_address_bytes, 4);
957     _16 (opcode, ETHERNET_ARP_OPCODE_request);
958 #undef _16
959 #undef _8
960
961     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
962                                /* data */ &h,
963                                sizeof (h),
964                                /* alloc chunk size */ 8,
965                                "ip4 arp");
966   }
967
968   return error;
969 }
970
971 VLIB_INIT_FUNCTION (ip4_lookup_init);
972
973 typedef struct
974 {
975   /* Adjacency taken. */
976   u32 dpo_index;
977   u32 flow_hash;
978   u32 fib_index;
979
980   /* Packet data, possibly *after* rewrite. */
981   u8 packet_data[64 - 1 * sizeof (u32)];
982 }
983 ip4_forward_next_trace_t;
984
985 #ifndef CLIB_MARCH_VARIANT
986 u8 *
987 format_ip4_forward_next_trace (u8 * s, va_list * args)
988 {
989   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
990   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
991   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
992   u32 indent = format_get_indent (s);
993   s = format (s, "%U%U",
994               format_white_space, indent,
995               format_ip4_header, t->packet_data, sizeof (t->packet_data));
996   return s;
997 }
998 #endif
999
1000 static u8 *
1001 format_ip4_lookup_trace (u8 * s, va_list * args)
1002 {
1003   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1004   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1005   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1006   u32 indent = format_get_indent (s);
1007
1008   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1009               t->fib_index, t->dpo_index, t->flow_hash);
1010   s = format (s, "\n%U%U",
1011               format_white_space, indent,
1012               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1013   return s;
1014 }
1015
1016 static u8 *
1017 format_ip4_rewrite_trace (u8 * s, va_list * args)
1018 {
1019   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1020   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1021   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1022   u32 indent = format_get_indent (s);
1023
1024   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1025               t->fib_index, t->dpo_index, format_ip_adjacency,
1026               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1027   s = format (s, "\n%U%U",
1028               format_white_space, indent,
1029               format_ip_adjacency_packet_data,
1030               t->dpo_index, t->packet_data, sizeof (t->packet_data));
1031   return s;
1032 }
1033
1034 #ifndef CLIB_MARCH_VARIANT
1035 /* Common trace function for all ip4-forward next nodes. */
1036 void
1037 ip4_forward_next_trace (vlib_main_t * vm,
1038                         vlib_node_runtime_t * node,
1039                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1040 {
1041   u32 *from, n_left;
1042   ip4_main_t *im = &ip4_main;
1043
1044   n_left = frame->n_vectors;
1045   from = vlib_frame_vector_args (frame);
1046
1047   while (n_left >= 4)
1048     {
1049       u32 bi0, bi1;
1050       vlib_buffer_t *b0, *b1;
1051       ip4_forward_next_trace_t *t0, *t1;
1052
1053       /* Prefetch next iteration. */
1054       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1055       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1056
1057       bi0 = from[0];
1058       bi1 = from[1];
1059
1060       b0 = vlib_get_buffer (vm, bi0);
1061       b1 = vlib_get_buffer (vm, bi1);
1062
1063       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1064         {
1065           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1066           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1067           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1068           t0->fib_index =
1069             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1070              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1071             vec_elt (im->fib_index_by_sw_if_index,
1072                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1073
1074           clib_memcpy_fast (t0->packet_data,
1075                             vlib_buffer_get_current (b0),
1076                             sizeof (t0->packet_data));
1077         }
1078       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1079         {
1080           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1081           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1082           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1083           t1->fib_index =
1084             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1085              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1086             vec_elt (im->fib_index_by_sw_if_index,
1087                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1088           clib_memcpy_fast (t1->packet_data, vlib_buffer_get_current (b1),
1089                             sizeof (t1->packet_data));
1090         }
1091       from += 2;
1092       n_left -= 2;
1093     }
1094
1095   while (n_left >= 1)
1096     {
1097       u32 bi0;
1098       vlib_buffer_t *b0;
1099       ip4_forward_next_trace_t *t0;
1100
1101       bi0 = from[0];
1102
1103       b0 = vlib_get_buffer (vm, bi0);
1104
1105       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1106         {
1107           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1108           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1109           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1110           t0->fib_index =
1111             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1112              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1113             vec_elt (im->fib_index_by_sw_if_index,
1114                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1115           clib_memcpy_fast (t0->packet_data, vlib_buffer_get_current (b0),
1116                             sizeof (t0->packet_data));
1117         }
1118       from += 1;
1119       n_left -= 1;
1120     }
1121 }
1122
1123 /* Compute TCP/UDP/ICMP4 checksum in software. */
1124 u16
1125 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1126                               ip4_header_t * ip0)
1127 {
1128   ip_csum_t sum0;
1129   u32 ip_header_length, payload_length_host_byte_order;
1130   u32 n_this_buffer, n_bytes_left, n_ip_bytes_this_buffer;
1131   u16 sum16;
1132   void *data_this_buffer;
1133
1134   /* Initialize checksum with ip header. */
1135   ip_header_length = ip4_header_bytes (ip0);
1136   payload_length_host_byte_order =
1137     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1138   sum0 =
1139     clib_host_to_net_u32 (payload_length_host_byte_order +
1140                           (ip0->protocol << 16));
1141
1142   if (BITS (uword) == 32)
1143     {
1144       sum0 =
1145         ip_csum_with_carry (sum0,
1146                             clib_mem_unaligned (&ip0->src_address, u32));
1147       sum0 =
1148         ip_csum_with_carry (sum0,
1149                             clib_mem_unaligned (&ip0->dst_address, u32));
1150     }
1151   else
1152     sum0 =
1153       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1154
1155   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1156   data_this_buffer = (void *) ip0 + ip_header_length;
1157   n_ip_bytes_this_buffer =
1158     p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data);
1159   if (n_this_buffer + ip_header_length > n_ip_bytes_this_buffer)
1160     {
1161       n_this_buffer = n_ip_bytes_this_buffer > ip_header_length ?
1162         n_ip_bytes_this_buffer - ip_header_length : 0;
1163     }
1164   while (1)
1165     {
1166       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1167       n_bytes_left -= n_this_buffer;
1168       if (n_bytes_left == 0)
1169         break;
1170
1171       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1172       p0 = vlib_get_buffer (vm, p0->next_buffer);
1173       data_this_buffer = vlib_buffer_get_current (p0);
1174       n_this_buffer = p0->current_length;
1175     }
1176
1177   sum16 = ~ip_csum_fold (sum0);
1178
1179   return sum16;
1180 }
1181
1182 u32
1183 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1184 {
1185   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1186   udp_header_t *udp0;
1187   u16 sum16;
1188
1189   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1190           || ip0->protocol == IP_PROTOCOL_UDP);
1191
1192   udp0 = (void *) (ip0 + 1);
1193   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1194     {
1195       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1196                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1197       return p0->flags;
1198     }
1199
1200   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1201
1202   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1203                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1204
1205   return p0->flags;
1206 }
1207 #endif
1208
1209 /* *INDENT-OFF* */
1210 VNET_FEATURE_ARC_INIT (ip4_local) =
1211 {
1212   .arc_name  = "ip4-local",
1213   .start_nodes = VNET_FEATURES ("ip4-local"),
1214   .last_in_arc = "ip4-local-end-of-arc",
1215 };
1216 /* *INDENT-ON* */
1217
1218 static inline void
1219 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1220                             ip4_header_t * ip, u8 is_udp, u8 * error,
1221                             u8 * good_tcp_udp)
1222 {
1223   u32 flags0;
1224   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1225   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1226   if (is_udp)
1227     {
1228       udp_header_t *udp;
1229       u32 ip_len, udp_len;
1230       i32 len_diff;
1231       udp = ip4_next_header (ip);
1232       /* Verify UDP length. */
1233       ip_len = clib_net_to_host_u16 (ip->length);
1234       udp_len = clib_net_to_host_u16 (udp->length);
1235
1236       len_diff = ip_len - udp_len;
1237       *good_tcp_udp &= len_diff >= 0;
1238       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1239     }
1240 }
1241
1242 #define ip4_local_csum_is_offloaded(_b)                                 \
1243     _b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM                         \
1244         || _b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM
1245
1246 #define ip4_local_need_csum_check(is_tcp_udp, _b)                       \
1247     (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED     \
1248         || ip4_local_csum_is_offloaded (_b)))
1249
1250 #define ip4_local_csum_is_valid(_b)                                     \
1251     (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT                      \
1252         || (ip4_local_csum_is_offloaded (_b))) != 0
1253
1254 static inline void
1255 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1256                          ip4_header_t * ih, u8 * error)
1257 {
1258   u8 is_udp, is_tcp_udp, good_tcp_udp;
1259
1260   is_udp = ih->protocol == IP_PROTOCOL_UDP;
1261   is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1262
1263   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1264     ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1265   else
1266     good_tcp_udp = ip4_local_csum_is_valid (b);
1267
1268   ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1269   *error = (is_tcp_udp && !good_tcp_udp
1270             ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1271 }
1272
1273 static inline void
1274 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1275                             ip4_header_t ** ih, u8 * error)
1276 {
1277   u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1278
1279   is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1280   is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1281
1282   is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1283   is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1284
1285   good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1286   good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1287
1288   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1289                      || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1290     {
1291       if (is_tcp_udp[0])
1292         ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1293                                     &good_tcp_udp[0]);
1294       if (is_tcp_udp[1])
1295         ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1296                                     &good_tcp_udp[1]);
1297     }
1298
1299   error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1300               IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1301   error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1302               IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1303 }
1304
1305 static inline void
1306 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1307                               vlib_buffer_t * b, u16 * next, u8 error,
1308                               u8 head_of_feature_arc)
1309 {
1310   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1311   u32 next_index;
1312
1313   *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1314   b->error = error ? error_node->errors[error] : 0;
1315   if (head_of_feature_arc)
1316     {
1317       next_index = *next;
1318       if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1319         {
1320           vnet_feature_arc_start (arc_index,
1321                                   vnet_buffer (b)->sw_if_index[VLIB_RX],
1322                                   &next_index, b);
1323           *next = next_index;
1324         }
1325     }
1326 }
1327
1328 typedef struct
1329 {
1330   ip4_address_t src;
1331   u32 lbi;
1332   u8 error;
1333   u8 first;
1334 } ip4_local_last_check_t;
1335
1336 static inline void
1337 ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
1338                      ip4_local_last_check_t * last_check, u8 * error0)
1339 {
1340   ip4_fib_mtrie_leaf_t leaf0;
1341   ip4_fib_mtrie_t *mtrie0;
1342   const dpo_id_t *dpo0;
1343   load_balance_t *lb0;
1344   u32 lbi0;
1345
1346   vnet_buffer (b)->ip.fib_index =
1347     vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1348     vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1349
1350   if (PREDICT_FALSE (last_check->first ||
1351                      (last_check->src.as_u32 != ip0->src_address.as_u32)))
1352     {
1353       mtrie0 = &ip4_fib_get (vnet_buffer (b)->ip.fib_index)->mtrie;
1354       leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1355       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1356       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1357       lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1358
1359       vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1360       vnet_buffer (b)->ip.adj_index[VLIB_RX] = lbi0;
1361
1362       lb0 = load_balance_get (lbi0);
1363       dpo0 = load_balance_get_bucket_i (lb0, 0);
1364
1365       /*
1366        * Must have a route to source otherwise we drop the packet.
1367        * ip4 broadcasts are accepted, e.g. to make dhcp client work
1368        *
1369        * The checks are:
1370        *  - the source is a recieve => it's from us => bogus, do this
1371        *    first since it sets a different error code.
1372        *  - uRPF check for any route to source - accept if passes.
1373        *  - allow packets destined to the broadcast address from unknown sources
1374        */
1375
1376       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1377                   && dpo0->dpoi_type == DPO_RECEIVE) ?
1378                  IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1379       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1380                   && !fib_urpf_check_size (lb0->lb_urpf)
1381                   && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1382                  IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1383
1384       last_check->src.as_u32 = ip0->src_address.as_u32;
1385       last_check->lbi = lbi0;
1386       last_check->error = *error0;
1387     }
1388   else
1389     {
1390       vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1391       vnet_buffer (b)->ip.adj_index[VLIB_RX] = last_check->lbi;
1392       *error0 = last_check->error;
1393       last_check->first = 0;
1394     }
1395 }
1396
1397 static inline void
1398 ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
1399                         ip4_local_last_check_t * last_check, u8 * error)
1400 {
1401   ip4_fib_mtrie_leaf_t leaf[2];
1402   ip4_fib_mtrie_t *mtrie[2];
1403   const dpo_id_t *dpo[2];
1404   load_balance_t *lb[2];
1405   u32 not_last_hit;
1406   u32 lbi[2];
1407
1408   not_last_hit = last_check->first;
1409   not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1410   not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1411
1412   vnet_buffer (b[0])->ip.fib_index =
1413     vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1414     vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1415     vnet_buffer (b[0])->ip.fib_index;
1416
1417   vnet_buffer (b[1])->ip.fib_index =
1418     vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1419     vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1420     vnet_buffer (b[1])->ip.fib_index;
1421
1422   if (PREDICT_FALSE (not_last_hit))
1423     {
1424       mtrie[0] = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie;
1425       mtrie[1] = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie;
1426
1427       leaf[0] = ip4_fib_mtrie_lookup_step_one (mtrie[0], &ip[0]->src_address);
1428       leaf[1] = ip4_fib_mtrie_lookup_step_one (mtrie[1], &ip[1]->src_address);
1429
1430       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1431                                            &ip[0]->src_address, 2);
1432       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1433                                            &ip[1]->src_address, 2);
1434
1435       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1436                                            &ip[0]->src_address, 3);
1437       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1438                                            &ip[1]->src_address, 3);
1439
1440       lbi[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf[0]);
1441       lbi[1] = ip4_fib_mtrie_leaf_get_adj_index (leaf[1]);
1442
1443       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1444       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] = lbi[0];
1445
1446       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1447       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] = lbi[1];
1448
1449       lb[0] = load_balance_get (lbi[0]);
1450       lb[1] = load_balance_get (lbi[1]);
1451
1452       dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1453       dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1454
1455       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1456                    dpo[0]->dpoi_type == DPO_RECEIVE) ?
1457                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1458       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1459                    !fib_urpf_check_size (lb[0]->lb_urpf) &&
1460                    ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1461                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1462
1463       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1464                    dpo[1]->dpoi_type == DPO_RECEIVE) ?
1465                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1466       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1467                    !fib_urpf_check_size (lb[1]->lb_urpf) &&
1468                    ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1469                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1470
1471       last_check->src.as_u32 = ip[1]->src_address.as_u32;
1472       last_check->lbi = lbi[1];
1473       last_check->error = error[1];
1474     }
1475   else
1476     {
1477       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1478       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] = last_check->lbi;
1479
1480       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1481       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] = last_check->lbi;
1482
1483       error[0] = last_check->error;
1484       error[1] = last_check->error;
1485       last_check->first = 0;
1486     }
1487 }
1488
1489 enum ip_local_packet_type_e
1490 {
1491   IP_LOCAL_PACKET_TYPE_L4,
1492   IP_LOCAL_PACKET_TYPE_NAT,
1493   IP_LOCAL_PACKET_TYPE_FRAG,
1494 };
1495
1496 /**
1497  * Determine packet type and next node.
1498  *
1499  * The expectation is that all packets that are not L4 will skip
1500  * checksums and source checks.
1501  */
1502 always_inline u8
1503 ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
1504 {
1505   ip_lookup_main_t *lm = &ip4_main.lookup_main;
1506
1507   if (PREDICT_FALSE (ip4_is_fragment (ip)))
1508     {
1509       *next = IP_LOCAL_NEXT_REASSEMBLY;
1510       return IP_LOCAL_PACKET_TYPE_FRAG;
1511     }
1512   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_IS_NATED))
1513     {
1514       *next = lm->local_next_by_ip_protocol[ip->protocol];
1515       return IP_LOCAL_PACKET_TYPE_NAT;
1516     }
1517
1518   *next = lm->local_next_by_ip_protocol[ip->protocol];
1519   return IP_LOCAL_PACKET_TYPE_L4;
1520 }
1521
1522 static inline uword
1523 ip4_local_inline (vlib_main_t * vm,
1524                   vlib_node_runtime_t * node,
1525                   vlib_frame_t * frame, int head_of_feature_arc)
1526 {
1527   u32 *from, n_left_from;
1528   vlib_node_runtime_t *error_node =
1529     vlib_node_get_runtime (vm, ip4_input_node.index);
1530   u16 nexts[VLIB_FRAME_SIZE], *next;
1531   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1532   ip4_header_t *ip[2];
1533   u8 error[2], pt[2];
1534
1535   ip4_local_last_check_t last_check = {
1536     /*
1537      * 0.0.0.0 can appear as the source address of an IP packet,
1538      * as can any other address, hence the need to use the 'first'
1539      * member to make sure the .lbi is initialised for the first
1540      * packet.
1541      */
1542     .src = {.as_u32 = 0},
1543     .lbi = ~0,
1544     .error = IP4_ERROR_UNKNOWN_PROTOCOL,
1545     .first = 1,
1546   };
1547
1548   from = vlib_frame_vector_args (frame);
1549   n_left_from = frame->n_vectors;
1550
1551   if (node->flags & VLIB_NODE_FLAG_TRACE)
1552     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1553
1554   vlib_get_buffers (vm, from, bufs, n_left_from);
1555   b = bufs;
1556   next = nexts;
1557
1558   while (n_left_from >= 6)
1559     {
1560       u8 not_batch = 0;
1561
1562       /* Prefetch next iteration. */
1563       {
1564         vlib_prefetch_buffer_header (b[4], LOAD);
1565         vlib_prefetch_buffer_header (b[5], LOAD);
1566
1567         CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1568         CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1569       }
1570
1571       error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1572
1573       ip[0] = vlib_buffer_get_current (b[0]);
1574       ip[1] = vlib_buffer_get_current (b[1]);
1575
1576       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1577       vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1578
1579       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1580       pt[1] = ip4_local_classify (b[1], ip[1], &next[1]);
1581
1582       not_batch = pt[0] ^ pt[1];
1583
1584       if (head_of_feature_arc == 0 || (pt[0] && not_batch == 0))
1585         goto skip_checks;
1586
1587       if (PREDICT_TRUE (not_batch == 0))
1588         {
1589           ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1590           ip4_local_check_src_x2 (b, ip, &last_check, error);
1591         }
1592       else
1593         {
1594           if (!pt[0])
1595             {
1596               ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1597               ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1598             }
1599           if (!pt[1])
1600             {
1601               ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1602               ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
1603             }
1604         }
1605
1606     skip_checks:
1607
1608       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1609                                     head_of_feature_arc);
1610       ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1611                                     head_of_feature_arc);
1612
1613       b += 2;
1614       next += 2;
1615       n_left_from -= 2;
1616     }
1617
1618   while (n_left_from > 0)
1619     {
1620       error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1621
1622       ip[0] = vlib_buffer_get_current (b[0]);
1623       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1624       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1625
1626       if (head_of_feature_arc == 0 || pt[0])
1627         goto skip_check;
1628
1629       ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1630       ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1631
1632     skip_check:
1633
1634       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1635                                     head_of_feature_arc);
1636
1637       b += 1;
1638       next += 1;
1639       n_left_from -= 1;
1640     }
1641
1642   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1643   return frame->n_vectors;
1644 }
1645
1646 VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1647                                vlib_frame_t * frame)
1648 {
1649   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1650 }
1651
1652 /* *INDENT-OFF* */
1653 VLIB_REGISTER_NODE (ip4_local_node) =
1654 {
1655   .name = "ip4-local",
1656   .vector_size = sizeof (u32),
1657   .format_trace = format_ip4_forward_next_trace,
1658   .n_next_nodes = IP_LOCAL_N_NEXT,
1659   .next_nodes =
1660   {
1661     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1662     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1663     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1664     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1665     [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-reassembly",
1666   },
1667 };
1668 /* *INDENT-ON* */
1669
1670
1671 VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
1672                                           vlib_node_runtime_t * node,
1673                                           vlib_frame_t * frame)
1674 {
1675   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1676 }
1677
1678 /* *INDENT-OFF* */
1679 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
1680   .name = "ip4-local-end-of-arc",
1681   .vector_size = sizeof (u32),
1682
1683   .format_trace = format_ip4_forward_next_trace,
1684   .sibling_of = "ip4-local",
1685 };
1686
1687 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1688   .arc_name = "ip4-local",
1689   .node_name = "ip4-local-end-of-arc",
1690   .runs_before = 0, /* not before any other features */
1691 };
1692 /* *INDENT-ON* */
1693
1694 #ifndef CLIB_MARCH_VARIANT
1695 void
1696 ip4_register_protocol (u32 protocol, u32 node_index)
1697 {
1698   vlib_main_t *vm = vlib_get_main ();
1699   ip4_main_t *im = &ip4_main;
1700   ip_lookup_main_t *lm = &im->lookup_main;
1701
1702   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1703   lm->local_next_by_ip_protocol[protocol] =
1704     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1705 }
1706 #endif
1707
1708 static clib_error_t *
1709 show_ip_local_command_fn (vlib_main_t * vm,
1710                           unformat_input_t * input, vlib_cli_command_t * cmd)
1711 {
1712   ip4_main_t *im = &ip4_main;
1713   ip_lookup_main_t *lm = &im->lookup_main;
1714   int i;
1715
1716   vlib_cli_output (vm, "Protocols handled by ip4_local");
1717   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1718     {
1719       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1720         {
1721           u32 node_index = vlib_get_node (vm,
1722                                           ip4_local_node.index)->
1723             next_nodes[lm->local_next_by_ip_protocol[i]];
1724           vlib_cli_output (vm, "%d: %U", i, format_vlib_node_name, vm,
1725                            node_index);
1726         }
1727     }
1728   return 0;
1729 }
1730
1731
1732
1733 /*?
1734  * Display the set of protocols handled by the local IPv4 stack.
1735  *
1736  * @cliexpar
1737  * Example of how to display local protocol table:
1738  * @cliexstart{show ip local}
1739  * Protocols handled by ip4_local
1740  * 1
1741  * 17
1742  * 47
1743  * @cliexend
1744 ?*/
1745 /* *INDENT-OFF* */
1746 VLIB_CLI_COMMAND (show_ip_local, static) =
1747 {
1748   .path = "show ip local",
1749   .function = show_ip_local_command_fn,
1750   .short_help = "show ip local",
1751 };
1752 /* *INDENT-ON* */
1753
1754 always_inline uword
1755 ip4_arp_inline (vlib_main_t * vm,
1756                 vlib_node_runtime_t * node,
1757                 vlib_frame_t * frame, int is_glean)
1758 {
1759   vnet_main_t *vnm = vnet_get_main ();
1760   ip4_main_t *im = &ip4_main;
1761   ip_lookup_main_t *lm = &im->lookup_main;
1762   u32 *from, *to_next_drop;
1763   uword n_left_from, n_left_to_next_drop, next_index;
1764   u32 thread_index = vm->thread_index;
1765   u64 seed;
1766
1767   if (node->flags & VLIB_NODE_FLAG_TRACE)
1768     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1769
1770   seed = throttle_seed (&im->arp_throttle, thread_index, vlib_time_now (vm));
1771
1772   from = vlib_frame_vector_args (frame);
1773   n_left_from = frame->n_vectors;
1774   next_index = node->cached_next_index;
1775   if (next_index == IP4_ARP_NEXT_DROP)
1776     next_index = IP4_ARP_N_NEXT;        /* point to first interface */
1777
1778   while (n_left_from > 0)
1779     {
1780       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1781                            to_next_drop, n_left_to_next_drop);
1782
1783       while (n_left_from > 0 && n_left_to_next_drop > 0)
1784         {
1785           u32 pi0, bi0, adj_index0, sw_if_index0;
1786           ip_adjacency_t *adj0;
1787           vlib_buffer_t *p0, *b0;
1788           ip4_address_t resolve0;
1789           ethernet_arp_header_t *h0;
1790           vnet_hw_interface_t *hw_if0;
1791           u64 r0;
1792
1793           pi0 = from[0];
1794           p0 = vlib_get_buffer (vm, pi0);
1795
1796           from += 1;
1797           n_left_from -= 1;
1798           to_next_drop[0] = pi0;
1799           to_next_drop += 1;
1800           n_left_to_next_drop -= 1;
1801
1802           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1803           adj0 = adj_get (adj_index0);
1804
1805           if (is_glean)
1806             {
1807               /* resolve the packet's destination */
1808               ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1809               resolve0 = ip0->dst_address;
1810             }
1811           else
1812             {
1813               /* resolve the incomplete adj */
1814               resolve0 = adj0->sub_type.nbr.next_hop.ip4;
1815             }
1816
1817           /* combine the address and interface for the hash key */
1818           sw_if_index0 = adj0->rewrite_header.sw_if_index;
1819           r0 = (u64) resolve0.data_u32 << 32;
1820           r0 |= sw_if_index0;
1821
1822           if (throttle_check (&im->arp_throttle, thread_index, r0, seed))
1823             {
1824               p0->error = node->errors[IP4_ARP_ERROR_THROTTLED];
1825               continue;
1826             }
1827
1828           /*
1829            * the adj has been updated to a rewrite but the node the DPO that got
1830            * us here hasn't - yet. no big deal. we'll drop while we wait.
1831            */
1832           if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
1833             {
1834               p0->error = node->errors[IP4_ARP_ERROR_RESOLVED];
1835               continue;
1836             }
1837
1838           /*
1839            * Can happen if the control-plane is programming tables
1840            * with traffic flowing; at least that's today's lame excuse.
1841            */
1842           if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
1843               || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
1844             {
1845               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
1846               continue;
1847             }
1848           /* Send ARP request. */
1849           h0 =
1850             vlib_packet_template_get_packet (vm,
1851                                              &im->ip4_arp_request_packet_template,
1852                                              &bi0);
1853           b0 = vlib_get_buffer (vm, bi0);
1854
1855           /* copy the persistent fields from the original */
1856           clib_memcpy_fast (b0->opaque2, p0->opaque2, sizeof (p0->opaque2));
1857
1858           /* Seems we're out of buffers */
1859           if (PREDICT_FALSE (!h0))
1860             {
1861               p0->error = node->errors[IP4_ARP_ERROR_NO_BUFFERS];
1862               continue;
1863             }
1864
1865           /* Add rewrite/encap string for ARP packet. */
1866           vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
1867
1868           hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1869
1870           /* Src ethernet address in ARP header. */
1871           mac_address_from_bytes (&h0->ip4_over_ethernet[0].mac,
1872                                   hw_if0->hw_address);
1873           if (is_glean)
1874             {
1875               /* The interface's source address is stashed in the Glean Adj */
1876               h0->ip4_over_ethernet[0].ip4 =
1877                 adj0->sub_type.glean.receive_addr.ip4;
1878             }
1879           else
1880             {
1881               /* Src IP address in ARP header. */
1882               if (ip4_src_address_for_packet (lm, sw_if_index0,
1883                                               &h0->ip4_over_ethernet[0].ip4))
1884                 {
1885                   /* No source address available */
1886                   p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
1887                   vlib_buffer_free (vm, &bi0, 1);
1888                   continue;
1889                 }
1890             }
1891           h0->ip4_over_ethernet[1].ip4 = resolve0;
1892
1893           p0->error = node->errors[IP4_ARP_ERROR_REQUEST_SENT];
1894
1895           vlib_buffer_copy_trace_flag (vm, p0, bi0);
1896           VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
1897           vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
1898
1899           vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
1900
1901           vlib_set_next_frame_buffer (vm, node,
1902                                       adj0->rewrite_header.next_index, bi0);
1903         }
1904
1905       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
1906     }
1907
1908   return frame->n_vectors;
1909 }
1910
1911 VLIB_NODE_FN (ip4_arp_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1912                              vlib_frame_t * frame)
1913 {
1914   return (ip4_arp_inline (vm, node, frame, 0));
1915 }
1916
1917 VLIB_NODE_FN (ip4_glean_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1918                                vlib_frame_t * frame)
1919 {
1920   return (ip4_arp_inline (vm, node, frame, 1));
1921 }
1922
1923 static char *ip4_arp_error_strings[] = {
1924   [IP4_ARP_ERROR_THROTTLED] = "ARP requests throttled",
1925   [IP4_ARP_ERROR_RESOLVED] = "ARP requests resolved",
1926   [IP4_ARP_ERROR_NO_BUFFERS] = "ARP requests out of buffer",
1927   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
1928   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
1929   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
1930 };
1931
1932 /* *INDENT-OFF* */
1933 VLIB_REGISTER_NODE (ip4_arp_node) =
1934 {
1935   .name = "ip4-arp",
1936   .vector_size = sizeof (u32),
1937   .format_trace = format_ip4_forward_next_trace,
1938   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1939   .error_strings = ip4_arp_error_strings,
1940   .n_next_nodes = IP4_ARP_N_NEXT,
1941   .next_nodes =
1942   {
1943     [IP4_ARP_NEXT_DROP] = "error-drop",
1944   },
1945 };
1946
1947 VLIB_REGISTER_NODE (ip4_glean_node) =
1948 {
1949   .name = "ip4-glean",
1950   .vector_size = sizeof (u32),
1951   .format_trace = format_ip4_forward_next_trace,
1952   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1953   .error_strings = ip4_arp_error_strings,
1954   .n_next_nodes = IP4_ARP_N_NEXT,
1955   .next_nodes = {
1956   [IP4_ARP_NEXT_DROP] = "error-drop",
1957   },
1958 };
1959 /* *INDENT-ON* */
1960
1961 #define foreach_notrace_ip4_arp_error           \
1962 _(THROTTLED)                                    \
1963 _(RESOLVED)                                     \
1964 _(NO_BUFFERS)                                   \
1965 _(REQUEST_SENT)                                 \
1966 _(NON_ARP_ADJ)                                  \
1967 _(NO_SOURCE_ADDRESS)
1968
1969 static clib_error_t *
1970 arp_notrace_init (vlib_main_t * vm)
1971 {
1972   vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
1973
1974   /* don't trace ARP request packets */
1975 #define _(a)                                    \
1976     vnet_pcap_drop_trace_filter_add_del         \
1977         (rt->errors[IP4_ARP_ERROR_##a],         \
1978          1 /* is_add */);
1979   foreach_notrace_ip4_arp_error;
1980 #undef _
1981   return 0;
1982 }
1983
1984 VLIB_INIT_FUNCTION (arp_notrace_init);
1985
1986
1987 #ifndef CLIB_MARCH_VARIANT
1988 /* Send an ARP request to see if given destination is reachable on given interface. */
1989 clib_error_t *
1990 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index,
1991                     u8 refresh)
1992 {
1993   vnet_main_t *vnm = vnet_get_main ();
1994   ip4_main_t *im = &ip4_main;
1995   ethernet_arp_header_t *h;
1996   ip4_address_t *src;
1997   ip_interface_address_t *ia;
1998   ip_adjacency_t *adj;
1999   vnet_hw_interface_t *hi;
2000   vnet_sw_interface_t *si;
2001   vlib_buffer_t *b;
2002   adj_index_t ai;
2003   u32 bi = 0;
2004   u8 unicast_rewrite = 0;
2005
2006   si = vnet_get_sw_interface (vnm, sw_if_index);
2007
2008   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2009     {
2010       return clib_error_return (0, "%U: interface %U down",
2011                                 format_ip4_address, dst,
2012                                 format_vnet_sw_if_index_name, vnm,
2013                                 sw_if_index);
2014     }
2015
2016   src =
2017     ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2018   if (!src)
2019     {
2020       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2021       return clib_error_return
2022         (0,
2023          "no matching interface address for destination %U (interface %U)",
2024          format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
2025          sw_if_index);
2026     }
2027
2028   h = vlib_packet_template_get_packet (vm,
2029                                        &im->ip4_arp_request_packet_template,
2030                                        &bi);
2031
2032   if (!h)
2033     return clib_error_return (0, "ARP request packet allocation failed");
2034
2035   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2036   if (PREDICT_FALSE (!hi->hw_address))
2037     {
2038       return clib_error_return (0, "%U: interface %U do not support ip probe",
2039                                 format_ip4_address, dst,
2040                                 format_vnet_sw_if_index_name, vnm,
2041                                 sw_if_index);
2042     }
2043
2044   mac_address_from_bytes (&h->ip4_over_ethernet[0].mac, hi->hw_address);
2045
2046   h->ip4_over_ethernet[0].ip4 = src[0];
2047   h->ip4_over_ethernet[1].ip4 = dst[0];
2048
2049   b = vlib_get_buffer (vm, bi);
2050   vnet_buffer (b)->sw_if_index[VLIB_RX] =
2051     vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2052
2053   ip46_address_t nh = {
2054     .ip4 = *dst,
2055   };
2056
2057   ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4,
2058                             VNET_LINK_IP4, &nh, sw_if_index);
2059   adj = adj_get (ai);
2060
2061   /* Peer has been previously resolved, retrieve glean adj instead */
2062   if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE)
2063     {
2064       if (refresh)
2065         unicast_rewrite = 1;
2066       else
2067         {
2068           adj_unlock (ai);
2069           ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4,
2070                                       VNET_LINK_IP4, sw_if_index, &nh);
2071           adj = adj_get (ai);
2072         }
2073     }
2074
2075   /* Add encapsulation string for software interface (e.g. ethernet header). */
2076   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2077   if (unicast_rewrite)
2078     {
2079       u16 *etype = vlib_buffer_get_current (b) - 2;
2080       etype[0] = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
2081     }
2082   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2083
2084   {
2085     vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2086     u32 *to_next = vlib_frame_vector_args (f);
2087     to_next[0] = bi;
2088     f->n_vectors = 1;
2089     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2090   }
2091
2092   adj_unlock (ai);
2093   return /* no error */ 0;
2094 }
2095 #endif
2096
2097 typedef enum
2098 {
2099   IP4_REWRITE_NEXT_DROP,
2100   IP4_REWRITE_NEXT_ICMP_ERROR,
2101   IP4_REWRITE_NEXT_FRAGMENT,
2102   IP4_REWRITE_N_NEXT            /* Last */
2103 } ip4_rewrite_next_t;
2104
2105 /**
2106  * This bits of an IPv4 address to mask to construct a multicast
2107  * MAC address
2108  */
2109 #if CLIB_ARCH_IS_BIG_ENDIAN
2110 #define IP4_MCAST_ADDR_MASK 0x007fffff
2111 #else
2112 #define IP4_MCAST_ADDR_MASK 0xffff7f00
2113 #endif
2114
2115 always_inline void
2116 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
2117                u16 adj_packet_bytes, bool df, u16 * next, u32 * error)
2118 {
2119   if (packet_len > adj_packet_bytes)
2120     {
2121       *error = IP4_ERROR_MTU_EXCEEDED;
2122       if (df)
2123         {
2124           icmp4_error_set_vnet_buffer
2125             (b, ICMP4_destination_unreachable,
2126              ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
2127              adj_packet_bytes);
2128           *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2129         }
2130       else
2131         {
2132           /* IP fragmentation */
2133           ip_frag_set_vnet_buffer (b, adj_packet_bytes,
2134                                    IP4_FRAG_NEXT_IP4_REWRITE, 0);
2135           *next = IP4_REWRITE_NEXT_FRAGMENT;
2136         }
2137     }
2138 }
2139
2140 /* Decrement TTL & update checksum.
2141    Works either endian, so no need for byte swap. */
2142 static_always_inline void
2143 ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
2144                             u32 * error)
2145 {
2146   i32 ttl;
2147   u32 checksum;
2148   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2149     {
2150       b->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2151       return;
2152     }
2153
2154   ttl = ip->ttl;
2155
2156   /* Input node should have reject packets with ttl 0. */
2157   ASSERT (ip->ttl > 0);
2158
2159   checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
2160   checksum += checksum >= 0xffff;
2161
2162   ip->checksum = checksum;
2163   ttl -= 1;
2164   ip->ttl = ttl;
2165
2166   /*
2167    * If the ttl drops below 1 when forwarding, generate
2168    * an ICMP response.
2169    */
2170   if (PREDICT_FALSE (ttl <= 0))
2171     {
2172       *error = IP4_ERROR_TIME_EXPIRED;
2173       vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2174       icmp4_error_set_vnet_buffer (b, ICMP4_time_exceeded,
2175                                    ICMP4_time_exceeded_ttl_exceeded_in_transit,
2176                                    0);
2177       *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2178     }
2179
2180   /* Verify checksum. */
2181   ASSERT ((ip->checksum == ip4_header_checksum (ip)) ||
2182           (b->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2183 }
2184
2185
2186 always_inline uword
2187 ip4_rewrite_inline (vlib_main_t * vm,
2188                     vlib_node_runtime_t * node,
2189                     vlib_frame_t * frame,
2190                     int do_counters, int is_midchain, int is_mcast)
2191 {
2192   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2193   u32 *from = vlib_frame_vector_args (frame);
2194   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2195   u16 nexts[VLIB_FRAME_SIZE], *next;
2196   u32 n_left_from;
2197   vlib_node_runtime_t *error_node =
2198     vlib_node_get_runtime (vm, ip4_input_node.index);
2199
2200   n_left_from = frame->n_vectors;
2201   u32 thread_index = vm->thread_index;
2202
2203   vlib_get_buffers (vm, from, bufs, n_left_from);
2204   clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
2205
2206   if (n_left_from >= 6)
2207     {
2208       int i;
2209       for (i = 2; i < 6; i++)
2210         vlib_prefetch_buffer_header (bufs[i], LOAD);
2211     }
2212
2213   next = nexts;
2214   b = bufs;
2215   while (n_left_from >= 8)
2216     {
2217       ip_adjacency_t *adj0, *adj1;
2218       ip4_header_t *ip0, *ip1;
2219       u32 rw_len0, error0, adj_index0;
2220       u32 rw_len1, error1, adj_index1;
2221       u32 tx_sw_if_index0, tx_sw_if_index1;
2222       u8 *p;
2223
2224       vlib_prefetch_buffer_header (b[6], LOAD);
2225       vlib_prefetch_buffer_header (b[7], LOAD);
2226
2227       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2228       adj_index1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
2229
2230       /*
2231        * pre-fetch the per-adjacency counters
2232        */
2233       if (do_counters)
2234         {
2235           vlib_prefetch_combined_counter (&adjacency_counters,
2236                                           thread_index, adj_index0);
2237           vlib_prefetch_combined_counter (&adjacency_counters,
2238                                           thread_index, adj_index1);
2239         }
2240
2241       ip0 = vlib_buffer_get_current (b[0]);
2242       ip1 = vlib_buffer_get_current (b[1]);
2243
2244       error0 = error1 = IP4_ERROR_NONE;
2245
2246       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2247       ip4_ttl_and_checksum_check (b[1], ip1, next + 1, &error1);
2248
2249       /* Rewrite packet header and updates lengths. */
2250       adj0 = adj_get (adj_index0);
2251       adj1 = adj_get (adj_index1);
2252
2253       /* Worth pipelining. No guarantee that adj0,1 are hot... */
2254       rw_len0 = adj0[0].rewrite_header.data_bytes;
2255       rw_len1 = adj1[0].rewrite_header.data_bytes;
2256       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2257       vnet_buffer (b[1])->ip.save_rewrite_length = rw_len1;
2258
2259       p = vlib_buffer_get_current (b[2]);
2260       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2261       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2262
2263       p = vlib_buffer_get_current (b[3]);
2264       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2265       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2266
2267       /* Check MTU of outgoing interface. */
2268       ip4_mtu_check (b[0], clib_net_to_host_u16 (ip0->length),
2269                      adj0[0].rewrite_header.max_l3_packet_bytes,
2270                      ip0->flags_and_fragment_offset &
2271                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2272                      next + 0, &error0);
2273       ip4_mtu_check (b[1], clib_net_to_host_u16 (ip1->length),
2274                      adj1[0].rewrite_header.max_l3_packet_bytes,
2275                      ip1->flags_and_fragment_offset &
2276                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2277                      next + 1, &error1);
2278
2279       if (is_mcast)
2280         {
2281           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2282                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2283                     IP4_ERROR_SAME_INTERFACE : error0);
2284           error1 = ((adj1[0].rewrite_header.sw_if_index ==
2285                      vnet_buffer (b[1])->sw_if_index[VLIB_RX]) ?
2286                     IP4_ERROR_SAME_INTERFACE : error1);
2287         }
2288
2289       b[0]->error = error_node->errors[error0];
2290       b[1]->error = error_node->errors[error1];
2291       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2292        * to see the IP headerr */
2293       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2294         {
2295           u32 next_index = adj0[0].rewrite_header.next_index;
2296           b[0]->current_data -= rw_len0;
2297           b[0]->current_length += rw_len0;
2298           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2299           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2300
2301           if (PREDICT_FALSE
2302               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2303             vnet_feature_arc_start (lm->output_feature_arc_index,
2304                                     tx_sw_if_index0, &next_index, b[0]);
2305           next[0] = next_index;
2306         }
2307       if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2308         {
2309           u32 next_index = adj1[0].rewrite_header.next_index;
2310           b[1]->current_data -= rw_len1;
2311           b[1]->current_length += rw_len1;
2312
2313           tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2314           vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2315
2316           if (PREDICT_FALSE
2317               (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2318             vnet_feature_arc_start (lm->output_feature_arc_index,
2319                                     tx_sw_if_index1, &next_index, b[1]);
2320           next[1] = next_index;
2321         }
2322
2323       /* Guess we are only writing on simple Ethernet header. */
2324       vnet_rewrite_two_headers (adj0[0], adj1[0],
2325                                 ip0, ip1, sizeof (ethernet_header_t));
2326
2327       /*
2328        * Bump the per-adjacency counters
2329        */
2330       if (do_counters)
2331         {
2332           vlib_increment_combined_counter
2333             (&adjacency_counters,
2334              thread_index,
2335              adj_index0, 1, vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2336
2337           vlib_increment_combined_counter
2338             (&adjacency_counters,
2339              thread_index,
2340              adj_index1, 1, vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
2341         }
2342
2343       if (is_midchain)
2344         {
2345           adj0->sub_type.midchain.fixup_func
2346             (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2347           adj1->sub_type.midchain.fixup_func
2348             (vm, adj1, b[1], adj1->sub_type.midchain.fixup_data);
2349         }
2350
2351       if (is_mcast)
2352         {
2353           /*
2354            * copy bytes from the IP address into the MAC rewrite
2355            */
2356           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2357                                       adj0->rewrite_header.dst_mcast_offset,
2358                                       &ip0->dst_address.as_u32, (u8 *) ip0);
2359           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2360                                       adj1->rewrite_header.dst_mcast_offset,
2361                                       &ip1->dst_address.as_u32, (u8 *) ip1);
2362         }
2363
2364       next += 2;
2365       b += 2;
2366       n_left_from -= 2;
2367     }
2368
2369   while (n_left_from > 0)
2370     {
2371       ip_adjacency_t *adj0;
2372       ip4_header_t *ip0;
2373       u32 rw_len0, adj_index0, error0;
2374       u32 tx_sw_if_index0;
2375
2376       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2377
2378       adj0 = adj_get (adj_index0);
2379
2380       if (do_counters)
2381         vlib_prefetch_combined_counter (&adjacency_counters,
2382                                         thread_index, adj_index0);
2383
2384       ip0 = vlib_buffer_get_current (b[0]);
2385
2386       error0 = IP4_ERROR_NONE;
2387
2388       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2389
2390
2391       /* Update packet buffer attributes/set output interface. */
2392       rw_len0 = adj0[0].rewrite_header.data_bytes;
2393       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2394
2395       /* Check MTU of outgoing interface. */
2396       ip4_mtu_check (b[0], clib_net_to_host_u16 (ip0->length),
2397                      adj0[0].rewrite_header.max_l3_packet_bytes,
2398                      ip0->flags_and_fragment_offset &
2399                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2400                      next + 0, &error0);
2401
2402       if (is_mcast)
2403         {
2404           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2405                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2406                     IP4_ERROR_SAME_INTERFACE : error0);
2407         }
2408       b[0]->error = error_node->errors[error0];
2409
2410       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2411        * to see the IP headerr */
2412       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2413         {
2414           u32 next_index = adj0[0].rewrite_header.next_index;
2415           b[0]->current_data -= rw_len0;
2416           b[0]->current_length += rw_len0;
2417           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2418           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2419
2420           if (PREDICT_FALSE
2421               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2422             vnet_feature_arc_start (lm->output_feature_arc_index,
2423                                     tx_sw_if_index0, &next_index, b[0]);
2424           next[0] = next_index;
2425         }
2426
2427       /* Guess we are only writing on simple Ethernet header. */
2428       vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2429
2430       if (do_counters)
2431         vlib_increment_combined_counter
2432           (&adjacency_counters,
2433            thread_index, adj_index0, 1,
2434            vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2435
2436       if (is_midchain)
2437         {
2438           adj0->sub_type.midchain.fixup_func
2439             (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2440         }
2441
2442       if (is_mcast)
2443         {
2444           /*
2445            * copy bytes from the IP address into the MAC rewrite
2446            */
2447           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2448                                       adj0->rewrite_header.dst_mcast_offset,
2449                                       &ip0->dst_address.as_u32, (u8 *) ip0);
2450         }
2451
2452       next += 1;
2453       b += 1;
2454       n_left_from -= 1;
2455     }
2456
2457
2458   /* Need to do trace after rewrites to pick up new packet data. */
2459   if (node->flags & VLIB_NODE_FLAG_TRACE)
2460     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2461
2462   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2463   return frame->n_vectors;
2464 }
2465
2466
2467 /** @brief IPv4 rewrite node.
2468     @node ip4-rewrite
2469
2470     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2471     header checksum, fetch the ip adjacency, check the outbound mtu,
2472     apply the adjacency rewrite, and send pkts to the adjacency
2473     rewrite header's rewrite_next_index.
2474
2475     @param vm vlib_main_t corresponding to the current thread
2476     @param node vlib_node_runtime_t
2477     @param frame vlib_frame_t whose contents should be dispatched
2478
2479     @par Graph mechanics: buffer metadata, next index usage
2480
2481     @em Uses:
2482     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2483         - the rewrite adjacency index
2484     - <code>adj->lookup_next_index</code>
2485         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2486           the packet will be dropped.
2487     - <code>adj->rewrite_header</code>
2488         - Rewrite string length, rewrite string, next_index
2489
2490     @em Sets:
2491     - <code>b->current_data, b->current_length</code>
2492         - Updated net of applying the rewrite string
2493
2494     <em>Next Indices:</em>
2495     - <code> adj->rewrite_header.next_index </code>
2496       or @c ip4-drop
2497 */
2498
2499 VLIB_NODE_FN (ip4_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2500                                  vlib_frame_t * frame)
2501 {
2502   if (adj_are_counters_enabled ())
2503     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2504   else
2505     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2506 }
2507
2508 VLIB_NODE_FN (ip4_rewrite_bcast_node) (vlib_main_t * vm,
2509                                        vlib_node_runtime_t * node,
2510                                        vlib_frame_t * frame)
2511 {
2512   if (adj_are_counters_enabled ())
2513     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2514   else
2515     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2516 }
2517
2518 VLIB_NODE_FN (ip4_midchain_node) (vlib_main_t * vm,
2519                                   vlib_node_runtime_t * node,
2520                                   vlib_frame_t * frame)
2521 {
2522   if (adj_are_counters_enabled ())
2523     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2524   else
2525     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2526 }
2527
2528 VLIB_NODE_FN (ip4_rewrite_mcast_node) (vlib_main_t * vm,
2529                                        vlib_node_runtime_t * node,
2530                                        vlib_frame_t * frame)
2531 {
2532   if (adj_are_counters_enabled ())
2533     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2534   else
2535     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2536 }
2537
2538 VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
2539                                         vlib_node_runtime_t * node,
2540                                         vlib_frame_t * frame)
2541 {
2542   if (adj_are_counters_enabled ())
2543     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2544   else
2545     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2546 }
2547
2548 /* *INDENT-OFF* */
2549 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2550   .name = "ip4-rewrite",
2551   .vector_size = sizeof (u32),
2552
2553   .format_trace = format_ip4_rewrite_trace,
2554
2555   .n_next_nodes = IP4_REWRITE_N_NEXT,
2556   .next_nodes = {
2557     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2558     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2559     [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2560   },
2561 };
2562
2563 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2564   .name = "ip4-rewrite-bcast",
2565   .vector_size = sizeof (u32),
2566
2567   .format_trace = format_ip4_rewrite_trace,
2568   .sibling_of = "ip4-rewrite",
2569 };
2570
2571 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2572   .name = "ip4-rewrite-mcast",
2573   .vector_size = sizeof (u32),
2574
2575   .format_trace = format_ip4_rewrite_trace,
2576   .sibling_of = "ip4-rewrite",
2577 };
2578
2579 VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
2580   .name = "ip4-mcast-midchain",
2581   .vector_size = sizeof (u32),
2582
2583   .format_trace = format_ip4_rewrite_trace,
2584   .sibling_of = "ip4-rewrite",
2585 };
2586
2587 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2588   .name = "ip4-midchain",
2589   .vector_size = sizeof (u32),
2590   .format_trace = format_ip4_forward_next_trace,
2591   .sibling_of =  "ip4-rewrite",
2592 };
2593 /* *INDENT-ON */
2594
2595 static int
2596 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2597 {
2598   ip4_fib_mtrie_t *mtrie0;
2599   ip4_fib_mtrie_leaf_t leaf0;
2600   u32 lbi0;
2601
2602   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2603
2604   leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2605   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2606   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2607
2608   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2609
2610   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2611 }
2612
2613 static clib_error_t *
2614 test_lookup_command_fn (vlib_main_t * vm,
2615                         unformat_input_t * input, vlib_cli_command_t * cmd)
2616 {
2617   ip4_fib_t *fib;
2618   u32 table_id = 0;
2619   f64 count = 1;
2620   u32 n;
2621   int i;
2622   ip4_address_t ip4_base_address;
2623   u64 errors = 0;
2624
2625   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2626     {
2627       if (unformat (input, "table %d", &table_id))
2628         {
2629           /* Make sure the entry exists. */
2630           fib = ip4_fib_get (table_id);
2631           if ((fib) && (fib->index != table_id))
2632             return clib_error_return (0, "<fib-index> %d does not exist",
2633                                       table_id);
2634         }
2635       else if (unformat (input, "count %f", &count))
2636         ;
2637
2638       else if (unformat (input, "%U",
2639                          unformat_ip4_address, &ip4_base_address))
2640         ;
2641       else
2642         return clib_error_return (0, "unknown input `%U'",
2643                                   format_unformat_error, input);
2644     }
2645
2646   n = count;
2647
2648   for (i = 0; i < n; i++)
2649     {
2650       if (!ip4_lookup_validate (&ip4_base_address, table_id))
2651         errors++;
2652
2653       ip4_base_address.as_u32 =
2654         clib_host_to_net_u32 (1 +
2655                               clib_net_to_host_u32 (ip4_base_address.as_u32));
2656     }
2657
2658   if (errors)
2659     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2660   else
2661     vlib_cli_output (vm, "No errors in %d lookups\n", n);
2662
2663   return 0;
2664 }
2665
2666 /*?
2667  * Perform a lookup of an IPv4 Address (or range of addresses) in the
2668  * given FIB table to determine if there is a conflict with the
2669  * adjacency table. The fib-id can be determined by using the
2670  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2671  * of 0 is used.
2672  *
2673  * @todo This command uses fib-id, other commands use table-id (not
2674  * just a name, they are different indexes). Would like to change this
2675  * to table-id for consistency.
2676  *
2677  * @cliexpar
2678  * Example of how to run the test lookup command:
2679  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2680  * No errors in 2 lookups
2681  * @cliexend
2682 ?*/
2683 /* *INDENT-OFF* */
2684 VLIB_CLI_COMMAND (lookup_test_command, static) =
2685 {
2686   .path = "test lookup",
2687   .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2688   .function = test_lookup_command_fn,
2689 };
2690 /* *INDENT-ON* */
2691
2692 #ifndef CLIB_MARCH_VARIANT
2693 int
2694 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2695 {
2696   u32 fib_index;
2697
2698   fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
2699
2700   if (~0 == fib_index)
2701     return VNET_API_ERROR_NO_SUCH_FIB;
2702
2703   fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
2704                                   flow_hash_config);
2705
2706   return 0;
2707 }
2708 #endif
2709
2710 static clib_error_t *
2711 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2712                              unformat_input_t * input,
2713                              vlib_cli_command_t * cmd)
2714 {
2715   int matched = 0;
2716   u32 table_id = 0;
2717   u32 flow_hash_config = 0;
2718   int rv;
2719
2720   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2721     {
2722       if (unformat (input, "table %d", &table_id))
2723         matched = 1;
2724 #define _(a,v) \
2725     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2726       foreach_flow_hash_bit
2727 #undef _
2728         else
2729         break;
2730     }
2731
2732   if (matched == 0)
2733     return clib_error_return (0, "unknown input `%U'",
2734                               format_unformat_error, input);
2735
2736   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
2737   switch (rv)
2738     {
2739     case 0:
2740       break;
2741
2742     case VNET_API_ERROR_NO_SUCH_FIB:
2743       return clib_error_return (0, "no such FIB table %d", table_id);
2744
2745     default:
2746       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2747       break;
2748     }
2749
2750   return 0;
2751 }
2752
2753 /*?
2754  * Configure the set of IPv4 fields used by the flow hash.
2755  *
2756  * @cliexpar
2757  * Example of how to set the flow hash on a given table:
2758  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2759  * Example of display the configured flow hash:
2760  * @cliexstart{show ip fib}
2761  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2762  * 0.0.0.0/0
2763  *   unicast-ip4-chain
2764  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2765  *     [0] [@0]: dpo-drop ip6
2766  * 0.0.0.0/32
2767  *   unicast-ip4-chain
2768  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2769  *     [0] [@0]: dpo-drop ip6
2770  * 224.0.0.0/8
2771  *   unicast-ip4-chain
2772  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2773  *     [0] [@0]: dpo-drop ip6
2774  * 6.0.1.2/32
2775  *   unicast-ip4-chain
2776  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2777  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2778  * 7.0.0.1/32
2779  *   unicast-ip4-chain
2780  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
2781  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2782  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2783  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2784  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2785  * 240.0.0.0/8
2786  *   unicast-ip4-chain
2787  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
2788  *     [0] [@0]: dpo-drop ip6
2789  * 255.255.255.255/32
2790  *   unicast-ip4-chain
2791  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
2792  *     [0] [@0]: dpo-drop ip6
2793  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
2794  * 0.0.0.0/0
2795  *   unicast-ip4-chain
2796  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
2797  *     [0] [@0]: dpo-drop ip6
2798  * 0.0.0.0/32
2799  *   unicast-ip4-chain
2800  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
2801  *     [0] [@0]: dpo-drop ip6
2802  * 172.16.1.0/24
2803  *   unicast-ip4-chain
2804  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
2805  *     [0] [@4]: ipv4-glean: af_packet0
2806  * 172.16.1.1/32
2807  *   unicast-ip4-chain
2808  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
2809  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
2810  * 172.16.1.2/32
2811  *   unicast-ip4-chain
2812  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2813  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
2814  * 172.16.2.0/24
2815  *   unicast-ip4-chain
2816  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
2817  *     [0] [@4]: ipv4-glean: af_packet1
2818  * 172.16.2.1/32
2819  *   unicast-ip4-chain
2820  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
2821  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
2822  * 224.0.0.0/8
2823  *   unicast-ip4-chain
2824  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
2825  *     [0] [@0]: dpo-drop ip6
2826  * 240.0.0.0/8
2827  *   unicast-ip4-chain
2828  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
2829  *     [0] [@0]: dpo-drop ip6
2830  * 255.255.255.255/32
2831  *   unicast-ip4-chain
2832  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
2833  *     [0] [@0]: dpo-drop ip6
2834  * @cliexend
2835 ?*/
2836 /* *INDENT-OFF* */
2837 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
2838 {
2839   .path = "set ip flow-hash",
2840   .short_help =
2841   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
2842   .function = set_ip_flow_hash_command_fn,
2843 };
2844 /* *INDENT-ON* */
2845
2846 #ifndef CLIB_MARCH_VARIANT
2847 int
2848 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2849                              u32 table_index)
2850 {
2851   vnet_main_t *vnm = vnet_get_main ();
2852   vnet_interface_main_t *im = &vnm->interface_main;
2853   ip4_main_t *ipm = &ip4_main;
2854   ip_lookup_main_t *lm = &ipm->lookup_main;
2855   vnet_classify_main_t *cm = &vnet_classify_main;
2856   ip4_address_t *if_addr;
2857
2858   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2859     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2860
2861   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2862     return VNET_API_ERROR_NO_SUCH_ENTRY;
2863
2864   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2865   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
2866
2867   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
2868
2869   if (NULL != if_addr)
2870     {
2871       fib_prefix_t pfx = {
2872         .fp_len = 32,
2873         .fp_proto = FIB_PROTOCOL_IP4,
2874         .fp_addr.ip4 = *if_addr,
2875       };
2876       u32 fib_index;
2877
2878       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2879                                                        sw_if_index);
2880
2881
2882       if (table_index != (u32) ~ 0)
2883         {
2884           dpo_id_t dpo = DPO_INVALID;
2885
2886           dpo_set (&dpo,
2887                    DPO_CLASSIFY,
2888                    DPO_PROTO_IP4,
2889                    classify_dpo_create (DPO_PROTO_IP4, table_index));
2890
2891           fib_table_entry_special_dpo_add (fib_index,
2892                                            &pfx,
2893                                            FIB_SOURCE_CLASSIFY,
2894                                            FIB_ENTRY_FLAG_NONE, &dpo);
2895           dpo_reset (&dpo);
2896         }
2897       else
2898         {
2899           fib_table_entry_special_remove (fib_index,
2900                                           &pfx, FIB_SOURCE_CLASSIFY);
2901         }
2902     }
2903
2904   return 0;
2905 }
2906 #endif
2907
2908 static clib_error_t *
2909 set_ip_classify_command_fn (vlib_main_t * vm,
2910                             unformat_input_t * input,
2911                             vlib_cli_command_t * cmd)
2912 {
2913   u32 table_index = ~0;
2914   int table_index_set = 0;
2915   u32 sw_if_index = ~0;
2916   int rv;
2917
2918   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2919     {
2920       if (unformat (input, "table-index %d", &table_index))
2921         table_index_set = 1;
2922       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
2923                          vnet_get_main (), &sw_if_index))
2924         ;
2925       else
2926         break;
2927     }
2928
2929   if (table_index_set == 0)
2930     return clib_error_return (0, "classify table-index must be specified");
2931
2932   if (sw_if_index == ~0)
2933     return clib_error_return (0, "interface / subif must be specified");
2934
2935   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
2936
2937   switch (rv)
2938     {
2939     case 0:
2940       break;
2941
2942     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
2943       return clib_error_return (0, "No such interface");
2944
2945     case VNET_API_ERROR_NO_SUCH_ENTRY:
2946       return clib_error_return (0, "No such classifier table");
2947     }
2948   return 0;
2949 }
2950
2951 /*?
2952  * Assign a classification table to an interface. The classification
2953  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
2954  * commands. Once the table is create, use this command to filter packets
2955  * on an interface.
2956  *
2957  * @cliexpar
2958  * Example of how to assign a classification table to an interface:
2959  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
2960 ?*/
2961 /* *INDENT-OFF* */
2962 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
2963 {
2964     .path = "set ip classify",
2965     .short_help =
2966     "set ip classify intfc <interface> table-index <classify-idx>",
2967     .function = set_ip_classify_command_fn,
2968 };
2969 /* *INDENT-ON* */
2970
2971 static clib_error_t *
2972 ip4_config (vlib_main_t * vm, unformat_input_t * input)
2973 {
2974   ip4_main_t *im = &ip4_main;
2975   uword heapsize = 0;
2976
2977   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2978     {
2979       if (unformat (input, "heap-size %U", unformat_memory_size, &heapsize))
2980         ;
2981       else
2982         return clib_error_return (0,
2983                                   "invalid heap-size parameter `%U'",
2984                                   format_unformat_error, input);
2985     }
2986
2987   im->mtrie_heap_size = heapsize;
2988
2989   return 0;
2990 }
2991
2992 VLIB_EARLY_CONFIG_FUNCTION (ip4_config, "ip");
2993
2994 /*
2995  * fd.io coding-style-patch-verification: ON
2996  *
2997  * Local Variables:
2998  * eval: (c-set-style "gnu")
2999  * End:
3000  */