Add -fno-common compile option
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h>     /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/dpo/load_balance.h>
53 #include <vnet/dpo/load_balance_map.h>
54 #include <vnet/dpo/classify_dpo.h>
55 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
56
57 #include <vnet/ip/ip4_forward.h>
58
59 /** @brief IPv4 lookup node.
60     @node ip4-lookup
61
62     This is the main IPv4 lookup dispatch node.
63
64     @param vm vlib_main_t corresponding to the current thread
65     @param node vlib_node_runtime_t
66     @param frame vlib_frame_t whose contents should be dispatched
67
68     @par Graph mechanics: buffer metadata, next index usage
69
70     @em Uses:
71     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
72         - Indicates the @c sw_if_index value of the interface that the
73           packet was received on.
74     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
75         - When the value is @c ~0 then the node performs a longest prefix
76           match (LPM) for the packet destination address in the FIB attached
77           to the receive interface.
78         - Otherwise perform LPM for the packet destination address in the
79           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
80           value (0, 1, ...) and not a VRF id.
81
82     @em Sets:
83     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
84         - The lookup result adjacency index.
85
86     <em>Next Index:</em>
87     - Dispatches the packet to the node index found in
88       ip_adjacency_t @c adj->lookup_next_index
89       (where @c adj is the lookup result adjacency).
90 */
91 VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
92                                 vlib_frame_t * frame)
93 {
94   return ip4_lookup_inline (vm, node, frame,
95                             /* lookup_for_responses_to_locally_received_packets */
96                             0);
97
98 }
99
100 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
101
102 /* *INDENT-OFF* */
103 VLIB_REGISTER_NODE (ip4_lookup_node) =
104 {
105   .name = "ip4-lookup",
106   .vector_size = sizeof (u32),
107   .format_trace = format_ip4_lookup_trace,
108   .n_next_nodes = IP_LOOKUP_N_NEXT,
109   .next_nodes = IP4_LOOKUP_NEXT_NODES,
110 };
111 /* *INDENT-ON* */
112
113 VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
114                                       vlib_node_runtime_t * node,
115                                       vlib_frame_t * frame)
116 {
117   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
118   u32 n_left_from, n_left_to_next, *from, *to_next;
119   ip_lookup_next_t next;
120   u32 thread_index = vm->thread_index;
121
122   from = vlib_frame_vector_args (frame);
123   n_left_from = frame->n_vectors;
124   next = node->cached_next_index;
125
126   while (n_left_from > 0)
127     {
128       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
129
130
131       while (n_left_from >= 4 && n_left_to_next >= 2)
132         {
133           ip_lookup_next_t next0, next1;
134           const load_balance_t *lb0, *lb1;
135           vlib_buffer_t *p0, *p1;
136           u32 pi0, lbi0, hc0, pi1, lbi1, hc1;
137           const ip4_header_t *ip0, *ip1;
138           const dpo_id_t *dpo0, *dpo1;
139
140           /* Prefetch next iteration. */
141           {
142             vlib_buffer_t *p2, *p3;
143
144             p2 = vlib_get_buffer (vm, from[2]);
145             p3 = vlib_get_buffer (vm, from[3]);
146
147             vlib_prefetch_buffer_header (p2, STORE);
148             vlib_prefetch_buffer_header (p3, STORE);
149
150             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
151             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
152           }
153
154           pi0 = to_next[0] = from[0];
155           pi1 = to_next[1] = from[1];
156
157           from += 2;
158           n_left_from -= 2;
159           to_next += 2;
160           n_left_to_next -= 2;
161
162           p0 = vlib_get_buffer (vm, pi0);
163           p1 = vlib_get_buffer (vm, pi1);
164
165           ip0 = vlib_buffer_get_current (p0);
166           ip1 = vlib_buffer_get_current (p1);
167           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
168           lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
169
170           lb0 = load_balance_get (lbi0);
171           lb1 = load_balance_get (lbi1);
172
173           /*
174            * this node is for via FIBs we can re-use the hash value from the
175            * to node if present.
176            * We don't want to use the same hash value at each level in the recursion
177            * graph as that would lead to polarisation
178            */
179           hc0 = hc1 = 0;
180
181           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
182             {
183               if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
184                 {
185                   hc0 = vnet_buffer (p0)->ip.flow_hash =
186                     vnet_buffer (p0)->ip.flow_hash >> 1;
187                 }
188               else
189                 {
190                   hc0 = vnet_buffer (p0)->ip.flow_hash =
191                     ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
192                 }
193               dpo0 = load_balance_get_fwd_bucket
194                 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
195             }
196           else
197             {
198               dpo0 = load_balance_get_bucket_i (lb0, 0);
199             }
200           if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
201             {
202               if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash))
203                 {
204                   hc1 = vnet_buffer (p1)->ip.flow_hash =
205                     vnet_buffer (p1)->ip.flow_hash >> 1;
206                 }
207               else
208                 {
209                   hc1 = vnet_buffer (p1)->ip.flow_hash =
210                     ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
211                 }
212               dpo1 = load_balance_get_fwd_bucket
213                 (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
214             }
215           else
216             {
217               dpo1 = load_balance_get_bucket_i (lb1, 0);
218             }
219
220           next0 = dpo0->dpoi_next_node;
221           next1 = dpo1->dpoi_next_node;
222
223           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
224           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
225
226           vlib_increment_combined_counter
227             (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
228           vlib_increment_combined_counter
229             (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
230
231           vlib_validate_buffer_enqueue_x2 (vm, node, next,
232                                            to_next, n_left_to_next,
233                                            pi0, pi1, next0, next1);
234         }
235
236       while (n_left_from > 0 && n_left_to_next > 0)
237         {
238           ip_lookup_next_t next0;
239           const load_balance_t *lb0;
240           vlib_buffer_t *p0;
241           u32 pi0, lbi0, hc0;
242           const ip4_header_t *ip0;
243           const dpo_id_t *dpo0;
244
245           pi0 = from[0];
246           to_next[0] = pi0;
247           from += 1;
248           to_next += 1;
249           n_left_to_next -= 1;
250           n_left_from -= 1;
251
252           p0 = vlib_get_buffer (vm, pi0);
253
254           ip0 = vlib_buffer_get_current (p0);
255           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
256
257           lb0 = load_balance_get (lbi0);
258
259           hc0 = 0;
260           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
261             {
262               if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
263                 {
264                   hc0 = vnet_buffer (p0)->ip.flow_hash =
265                     vnet_buffer (p0)->ip.flow_hash >> 1;
266                 }
267               else
268                 {
269                   hc0 = vnet_buffer (p0)->ip.flow_hash =
270                     ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
271                 }
272               dpo0 = load_balance_get_fwd_bucket
273                 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
274             }
275           else
276             {
277               dpo0 = load_balance_get_bucket_i (lb0, 0);
278             }
279
280           next0 = dpo0->dpoi_next_node;
281           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
282
283           vlib_increment_combined_counter
284             (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
285
286           vlib_validate_buffer_enqueue_x1 (vm, node, next,
287                                            to_next, n_left_to_next,
288                                            pi0, next0);
289         }
290
291       vlib_put_next_frame (vm, node, next, n_left_to_next);
292     }
293
294   if (node->flags & VLIB_NODE_FLAG_TRACE)
295     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
296
297   return frame->n_vectors;
298 }
299
300 /* *INDENT-OFF* */
301 VLIB_REGISTER_NODE (ip4_load_balance_node) =
302 {
303   .name = "ip4-load-balance",
304   .vector_size = sizeof (u32),
305   .sibling_of = "ip4-lookup",
306   .format_trace = format_ip4_lookup_trace,
307 };
308 /* *INDENT-ON* */
309
310 #ifndef CLIB_MARCH_VARIANT
311 /* get first interface address */
312 ip4_address_t *
313 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
314                              ip_interface_address_t ** result_ia)
315 {
316   ip_lookup_main_t *lm = &im->lookup_main;
317   ip_interface_address_t *ia = 0;
318   ip4_address_t *result = 0;
319
320   /* *INDENT-OFF* */
321   foreach_ip_interface_address
322     (lm, ia, sw_if_index,
323      1 /* honor unnumbered */ ,
324      ({
325        ip4_address_t * a =
326          ip_interface_address_get_address (lm, ia);
327        result = a;
328        break;
329      }));
330   /* *INDENT-OFF* */
331   if (result_ia)
332     *result_ia = result ? ia : 0;
333   return result;
334 }
335
336 static void
337 ip4_add_subnet_bcast_route (u32 fib_index,
338                             fib_prefix_t *pfx,
339                             u32 sw_if_index)
340 {
341   vnet_sw_interface_flags_t iflags;
342
343   iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
344
345   fib_table_entry_special_remove(fib_index,
346                                  pfx,
347                                  FIB_SOURCE_INTERFACE);
348
349   if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
350     {
351       fib_table_entry_update_one_path (fib_index, pfx,
352                                        FIB_SOURCE_INTERFACE,
353                                        FIB_ENTRY_FLAG_NONE,
354                                        DPO_PROTO_IP4,
355                                        /* No next-hop address */
356                                        &ADJ_BCAST_ADDR,
357                                        sw_if_index,
358                                        // invalid FIB index
359                                        ~0,
360                                        1,
361                                        // no out-label stack
362                                        NULL,
363                                        FIB_ROUTE_PATH_FLAG_NONE);
364     }
365   else
366     {
367         fib_table_entry_special_add(fib_index,
368                                     pfx,
369                                     FIB_SOURCE_INTERFACE,
370                                     (FIB_ENTRY_FLAG_DROP |
371                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
372     }
373 }
374
375 static void
376 ip4_add_interface_routes (u32 sw_if_index,
377                           ip4_main_t * im, u32 fib_index,
378                           ip_interface_address_t * a)
379 {
380   ip_lookup_main_t *lm = &im->lookup_main;
381   ip4_address_t *address = ip_interface_address_get_address (lm, a);
382   fib_prefix_t pfx = {
383     .fp_len = a->address_length,
384     .fp_proto = FIB_PROTOCOL_IP4,
385     .fp_addr.ip4 = *address,
386   };
387
388   if (pfx.fp_len <= 30)
389     {
390       /* a /30 or shorter - add a glean for the network address */
391       fib_table_entry_update_one_path (fib_index, &pfx,
392                                        FIB_SOURCE_INTERFACE,
393                                        (FIB_ENTRY_FLAG_CONNECTED |
394                                         FIB_ENTRY_FLAG_ATTACHED),
395                                        DPO_PROTO_IP4,
396                                        /* No next-hop address */
397                                        NULL,
398                                        sw_if_index,
399                                        // invalid FIB index
400                                        ~0,
401                                        1,
402                                        // no out-label stack
403                                        NULL,
404                                        FIB_ROUTE_PATH_FLAG_NONE);
405
406       /* Add the two broadcast addresses as drop */
407       fib_prefix_t net_pfx = {
408         .fp_len = 32,
409         .fp_proto = FIB_PROTOCOL_IP4,
410         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
411       };
412       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
413         fib_table_entry_special_add(fib_index,
414                                     &net_pfx,
415                                     FIB_SOURCE_INTERFACE,
416                                     (FIB_ENTRY_FLAG_DROP |
417                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
418       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
419       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
420         ip4_add_subnet_bcast_route(fib_index, &net_pfx, sw_if_index);
421     }
422   else if (pfx.fp_len == 31)
423     {
424       u32 mask = clib_host_to_net_u32(1);
425       fib_prefix_t net_pfx = pfx;
426
427       net_pfx.fp_len = 32;
428       net_pfx.fp_addr.ip4.as_u32 ^= mask;
429
430       /* a /31 - add the other end as an attached host */
431       fib_table_entry_update_one_path (fib_index, &net_pfx,
432                                        FIB_SOURCE_INTERFACE,
433                                        (FIB_ENTRY_FLAG_ATTACHED),
434                                        DPO_PROTO_IP4,
435                                        &net_pfx.fp_addr,
436                                        sw_if_index,
437                                        // invalid FIB index
438                                        ~0,
439                                        1,
440                                        NULL,
441                                        FIB_ROUTE_PATH_FLAG_NONE);
442     }
443   pfx.fp_len = 32;
444
445   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
446     {
447       u32 classify_table_index =
448         lm->classify_table_index_by_sw_if_index[sw_if_index];
449       if (classify_table_index != (u32) ~ 0)
450         {
451           dpo_id_t dpo = DPO_INVALID;
452
453           dpo_set (&dpo,
454                    DPO_CLASSIFY,
455                    DPO_PROTO_IP4,
456                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
457
458           fib_table_entry_special_dpo_add (fib_index,
459                                            &pfx,
460                                            FIB_SOURCE_CLASSIFY,
461                                            FIB_ENTRY_FLAG_NONE, &dpo);
462           dpo_reset (&dpo);
463         }
464     }
465
466   fib_table_entry_update_one_path (fib_index, &pfx,
467                                    FIB_SOURCE_INTERFACE,
468                                    (FIB_ENTRY_FLAG_CONNECTED |
469                                     FIB_ENTRY_FLAG_LOCAL),
470                                    DPO_PROTO_IP4,
471                                    &pfx.fp_addr,
472                                    sw_if_index,
473                                    // invalid FIB index
474                                    ~0,
475                                    1, NULL,
476                                    FIB_ROUTE_PATH_FLAG_NONE);
477 }
478
479 static void
480 ip4_del_interface_routes (ip4_main_t * im,
481                           u32 fib_index,
482                           ip4_address_t * address, u32 address_length)
483 {
484   fib_prefix_t pfx = {
485     .fp_len = address_length,
486     .fp_proto = FIB_PROTOCOL_IP4,
487     .fp_addr.ip4 = *address,
488   };
489
490   if (pfx.fp_len <= 30)
491     {
492       fib_prefix_t net_pfx = {
493         .fp_len = 32,
494         .fp_proto = FIB_PROTOCOL_IP4,
495         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
496       };
497       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
498         fib_table_entry_special_remove(fib_index,
499                                        &net_pfx,
500                                        FIB_SOURCE_INTERFACE);
501       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
502       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
503         fib_table_entry_special_remove(fib_index,
504                                        &net_pfx,
505                                        FIB_SOURCE_INTERFACE);
506       fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
507     }
508     else if (pfx.fp_len == 31)
509     {
510       u32 mask = clib_host_to_net_u32(1);
511       fib_prefix_t net_pfx = pfx;
512
513       net_pfx.fp_len = 32;
514       net_pfx.fp_addr.ip4.as_u32 ^= mask;
515
516       fib_table_entry_delete (fib_index, &net_pfx, FIB_SOURCE_INTERFACE);
517     }
518
519   pfx.fp_len = 32;
520   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
521 }
522
523 void
524 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
525 {
526   ip4_main_t *im = &ip4_main;
527
528   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
529
530   /*
531    * enable/disable only on the 1<->0 transition
532    */
533   if (is_enable)
534     {
535       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
536         return;
537     }
538   else
539     {
540       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
541       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
542         return;
543     }
544   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
545                                !is_enable, 0, 0);
546
547
548   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
549                                sw_if_index, !is_enable, 0, 0);
550 }
551
552 static clib_error_t *
553 ip4_add_del_interface_address_internal (vlib_main_t * vm,
554                                         u32 sw_if_index,
555                                         ip4_address_t * address,
556                                         u32 address_length, u32 is_del)
557 {
558   vnet_main_t *vnm = vnet_get_main ();
559   ip4_main_t *im = &ip4_main;
560   ip_lookup_main_t *lm = &im->lookup_main;
561   clib_error_t *error = 0;
562   u32 if_address_index, elts_before;
563   ip4_address_fib_t ip4_af, *addr_fib = 0;
564
565   /* local0 interface doesn't support IP addressing  */
566   if (sw_if_index == 0)
567     {
568       return
569        clib_error_create ("local0 interface doesn't support IP addressing");
570     }
571
572   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
573   ip4_addr_fib_init (&ip4_af, address,
574                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
575   vec_add1 (addr_fib, ip4_af);
576
577   /*
578    * there is no support for adj-fib handling in the presence of overlapping
579    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
580    * most routers do.
581    */
582   /* *INDENT-OFF* */
583   if (!is_del)
584     {
585       /* When adding an address check that it does not conflict
586          with an existing address on any interface in this table. */
587       ip_interface_address_t *ia;
588       vnet_sw_interface_t *sif;
589
590       pool_foreach(sif, vnm->interface_main.sw_interfaces,
591       ({
592           if (im->fib_index_by_sw_if_index[sw_if_index] ==
593               im->fib_index_by_sw_if_index[sif->sw_if_index])
594             {
595               foreach_ip_interface_address
596                 (&im->lookup_main, ia, sif->sw_if_index,
597                  0 /* honor unnumbered */ ,
598                  ({
599                    ip4_address_t * x =
600                      ip_interface_address_get_address
601                      (&im->lookup_main, ia);
602                    if (ip4_destination_matches_route
603                        (im, address, x, ia->address_length) ||
604                        ip4_destination_matches_route (im,
605                                                       x,
606                                                       address,
607                                                       address_length))
608                      {
609                        vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
610
611                        return
612                          clib_error_create
613                          ("failed to add %U which conflicts with %U for interface %U",
614                           format_ip4_address_and_length, address,
615                           address_length,
616                           format_ip4_address_and_length, x,
617                           ia->address_length,
618                           format_vnet_sw_if_index_name, vnm,
619                           sif->sw_if_index);
620                      }
621                  }));
622             }
623       }));
624     }
625   /* *INDENT-ON* */
626
627   elts_before = pool_elts (lm->if_address_pool);
628
629   error = ip_interface_address_add_del
630     (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
631   if (error)
632     goto done;
633
634   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
635
636   if (is_del)
637     ip4_del_interface_routes (im, ip4_af.fib_index, address, address_length);
638   else
639     ip4_add_interface_routes (sw_if_index,
640                               im, ip4_af.fib_index,
641                               pool_elt_at_index
642                               (lm->if_address_pool, if_address_index));
643
644   /* If pool did not grow/shrink: add duplicate address. */
645   if (elts_before != pool_elts (lm->if_address_pool))
646     {
647       ip4_add_del_interface_address_callback_t *cb;
648       vec_foreach (cb, im->add_del_interface_address_callbacks)
649         cb->function (im, cb->function_opaque, sw_if_index,
650                       address, address_length, if_address_index, is_del);
651     }
652
653 done:
654   vec_free (addr_fib);
655   return error;
656 }
657
658 clib_error_t *
659 ip4_add_del_interface_address (vlib_main_t * vm,
660                                u32 sw_if_index,
661                                ip4_address_t * address,
662                                u32 address_length, u32 is_del)
663 {
664   return ip4_add_del_interface_address_internal
665     (vm, sw_if_index, address, address_length, is_del);
666 }
667
668 void
669 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
670 {
671   ip_interface_address_t *ia;
672   ip4_main_t *im;
673
674   im = &ip4_main;
675
676   /*
677    * when directed broadcast is enabled, the subnet braodcast route will forward
678    * packets using an adjacency with a broadcast MAC. otherwise it drops
679    */
680   /* *INDENT-OFF* */
681   foreach_ip_interface_address(&im->lookup_main, ia,
682                                sw_if_index, 0,
683      ({
684        if (ia->address_length <= 30)
685          {
686            ip4_address_t *ipa;
687
688            ipa = ip_interface_address_get_address (&im->lookup_main, ia);
689
690            fib_prefix_t pfx = {
691              .fp_len = 32,
692              .fp_proto = FIB_PROTOCOL_IP4,
693              .fp_addr = {
694                .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
695              },
696            };
697
698            ip4_add_subnet_bcast_route
699              (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
700                                                   sw_if_index),
701               &pfx, sw_if_index);
702          }
703      }));
704   /* *INDENT-ON* */
705 }
706 #endif
707
708 /* Built-in ip4 unicast rx feature path definition */
709 /* *INDENT-OFF* */
710 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
711 {
712   .arc_name = "ip4-unicast",
713   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
714   .last_in_arc = "ip4-lookup",
715   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
716 };
717
718 VNET_FEATURE_INIT (ip4_flow_classify, static) =
719 {
720   .arc_name = "ip4-unicast",
721   .node_name = "ip4-flow-classify",
722   .runs_before = VNET_FEATURES ("ip4-inacl"),
723 };
724
725 VNET_FEATURE_INIT (ip4_inacl, static) =
726 {
727   .arc_name = "ip4-unicast",
728   .node_name = "ip4-inacl",
729   .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
730 };
731
732 VNET_FEATURE_INIT (ip4_source_check_1, static) =
733 {
734   .arc_name = "ip4-unicast",
735   .node_name = "ip4-source-check-via-rx",
736   .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
737 };
738
739 VNET_FEATURE_INIT (ip4_source_check_2, static) =
740 {
741   .arc_name = "ip4-unicast",
742   .node_name = "ip4-source-check-via-any",
743   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
744 };
745
746 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
747 {
748   .arc_name = "ip4-unicast",
749   .node_name = "ip4-source-and-port-range-check-rx",
750   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
751 };
752
753 VNET_FEATURE_INIT (ip4_policer_classify, static) =
754 {
755   .arc_name = "ip4-unicast",
756   .node_name = "ip4-policer-classify",
757   .runs_before = VNET_FEATURES ("ipsec4-input-feature"),
758 };
759
760 VNET_FEATURE_INIT (ip4_ipsec, static) =
761 {
762   .arc_name = "ip4-unicast",
763   .node_name = "ipsec4-input-feature",
764   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
765 };
766
767 VNET_FEATURE_INIT (ip4_vpath, static) =
768 {
769   .arc_name = "ip4-unicast",
770   .node_name = "vpath-input-ip4",
771   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
772 };
773
774 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
775 {
776   .arc_name = "ip4-unicast",
777   .node_name = "ip4-vxlan-bypass",
778   .runs_before = VNET_FEATURES ("ip4-lookup"),
779 };
780
781 VNET_FEATURE_INIT (ip4_not_enabled, static) =
782 {
783   .arc_name = "ip4-unicast",
784   .node_name = "ip4-not-enabled",
785   .runs_before = VNET_FEATURES ("ip4-lookup"),
786 };
787
788 VNET_FEATURE_INIT (ip4_lookup, static) =
789 {
790   .arc_name = "ip4-unicast",
791   .node_name = "ip4-lookup",
792   .runs_before = 0,     /* not before any other features */
793 };
794
795 /* Built-in ip4 multicast rx feature path definition */
796 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
797 {
798   .arc_name = "ip4-multicast",
799   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
800   .last_in_arc = "ip4-mfib-forward-lookup",
801   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
802 };
803
804 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
805 {
806   .arc_name = "ip4-multicast",
807   .node_name = "vpath-input-ip4",
808   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
809 };
810
811 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
812 {
813   .arc_name = "ip4-multicast",
814   .node_name = "ip4-not-enabled",
815   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
816 };
817
818 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
819 {
820   .arc_name = "ip4-multicast",
821   .node_name = "ip4-mfib-forward-lookup",
822   .runs_before = 0,     /* last feature */
823 };
824
825 /* Source and port-range check ip4 tx feature path definition */
826 VNET_FEATURE_ARC_INIT (ip4_output, static) =
827 {
828   .arc_name = "ip4-output",
829   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
830   .last_in_arc = "interface-output",
831   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
832 };
833
834 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
835 {
836   .arc_name = "ip4-output",
837   .node_name = "ip4-source-and-port-range-check-tx",
838   .runs_before = VNET_FEATURES ("ip4-outacl"),
839 };
840
841 VNET_FEATURE_INIT (ip4_outacl, static) =
842 {
843   .arc_name = "ip4-output",
844   .node_name = "ip4-outacl",
845   .runs_before = VNET_FEATURES ("ipsec4-output-feature"),
846 };
847
848 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
849 {
850   .arc_name = "ip4-output",
851   .node_name = "ipsec4-output-feature",
852   .runs_before = VNET_FEATURES ("interface-output"),
853 };
854
855 /* Built-in ip4 tx feature path definition */
856 VNET_FEATURE_INIT (ip4_interface_output, static) =
857 {
858   .arc_name = "ip4-output",
859   .node_name = "interface-output",
860   .runs_before = 0,     /* not before any other features */
861 };
862 /* *INDENT-ON* */
863
864 static clib_error_t *
865 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
866 {
867   ip4_main_t *im = &ip4_main;
868
869   /* Fill in lookup tables with default table (0). */
870   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
871   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
872
873   if (!is_add)
874     {
875       ip4_main_t *im4 = &ip4_main;
876       ip_lookup_main_t *lm4 = &im4->lookup_main;
877       ip_interface_address_t *ia = 0;
878       ip4_address_t *address;
879       vlib_main_t *vm = vlib_get_main ();
880
881       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
882       /* *INDENT-OFF* */
883       foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
884       ({
885         address = ip_interface_address_get_address (lm4, ia);
886         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
887       }));
888       /* *INDENT-ON* */
889     }
890
891   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
892                                is_add, 0, 0);
893
894   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
895                                sw_if_index, is_add, 0, 0);
896
897   return /* no error */ 0;
898 }
899
900 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
901
902 /* Global IP4 main. */
903 #ifndef CLIB_MARCH_VARIANT
904 ip4_main_t ip4_main;
905 #endif /* CLIB_MARCH_VARIANT */
906
907 static clib_error_t *
908 ip4_lookup_init (vlib_main_t * vm)
909 {
910   ip4_main_t *im = &ip4_main;
911   clib_error_t *error;
912   uword i;
913
914   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
915     return error;
916   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
917     return (error);
918   if ((error = vlib_call_init_function (vm, fib_module_init)))
919     return error;
920   if ((error = vlib_call_init_function (vm, mfib_module_init)))
921     return error;
922
923   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
924     {
925       u32 m;
926
927       if (i < 32)
928         m = pow2_mask (i) << (32 - i);
929       else
930         m = ~0;
931       im->fib_masks[i] = clib_host_to_net_u32 (m);
932     }
933
934   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
935
936   /* Create FIB with index 0 and table id of 0. */
937   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
938                                      FIB_SOURCE_DEFAULT_ROUTE);
939   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
940                                       MFIB_SOURCE_DEFAULT_ROUTE);
941
942   {
943     pg_node_t *pn;
944     pn = pg_get_node (ip4_lookup_node.index);
945     pn->unformat_edit = unformat_pg_ip4_header;
946   }
947
948   {
949     ethernet_arp_header_t h;
950
951     clib_memset (&h, 0, sizeof (h));
952
953 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
954 #define _8(f,v) h.f = v;
955     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
956     _16 (l3_type, ETHERNET_TYPE_IP4);
957     _8 (n_l2_address_bytes, 6);
958     _8 (n_l3_address_bytes, 4);
959     _16 (opcode, ETHERNET_ARP_OPCODE_request);
960 #undef _16
961 #undef _8
962
963     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
964                                /* data */ &h,
965                                sizeof (h),
966                                /* alloc chunk size */ 8,
967                                "ip4 arp");
968   }
969
970   return error;
971 }
972
973 VLIB_INIT_FUNCTION (ip4_lookup_init);
974
975 typedef struct
976 {
977   /* Adjacency taken. */
978   u32 dpo_index;
979   u32 flow_hash;
980   u32 fib_index;
981
982   /* Packet data, possibly *after* rewrite. */
983   u8 packet_data[64 - 1 * sizeof (u32)];
984 }
985 ip4_forward_next_trace_t;
986
987 #ifndef CLIB_MARCH_VARIANT
988 u8 *
989 format_ip4_forward_next_trace (u8 * s, va_list * args)
990 {
991   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
992   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
993   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
994   u32 indent = format_get_indent (s);
995   s = format (s, "%U%U",
996               format_white_space, indent,
997               format_ip4_header, t->packet_data, sizeof (t->packet_data));
998   return s;
999 }
1000 #endif
1001
1002 static u8 *
1003 format_ip4_lookup_trace (u8 * s, va_list * args)
1004 {
1005   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1006   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1007   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1008   u32 indent = format_get_indent (s);
1009
1010   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1011               t->fib_index, t->dpo_index, t->flow_hash);
1012   s = format (s, "\n%U%U",
1013               format_white_space, indent,
1014               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1015   return s;
1016 }
1017
1018 static u8 *
1019 format_ip4_rewrite_trace (u8 * s, va_list * args)
1020 {
1021   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1022   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1023   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1024   u32 indent = format_get_indent (s);
1025
1026   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1027               t->fib_index, t->dpo_index, format_ip_adjacency,
1028               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1029   s = format (s, "\n%U%U",
1030               format_white_space, indent,
1031               format_ip_adjacency_packet_data,
1032               t->dpo_index, t->packet_data, sizeof (t->packet_data));
1033   return s;
1034 }
1035
1036 #ifndef CLIB_MARCH_VARIANT
1037 /* Common trace function for all ip4-forward next nodes. */
1038 void
1039 ip4_forward_next_trace (vlib_main_t * vm,
1040                         vlib_node_runtime_t * node,
1041                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1042 {
1043   u32 *from, n_left;
1044   ip4_main_t *im = &ip4_main;
1045
1046   n_left = frame->n_vectors;
1047   from = vlib_frame_vector_args (frame);
1048
1049   while (n_left >= 4)
1050     {
1051       u32 bi0, bi1;
1052       vlib_buffer_t *b0, *b1;
1053       ip4_forward_next_trace_t *t0, *t1;
1054
1055       /* Prefetch next iteration. */
1056       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1057       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1058
1059       bi0 = from[0];
1060       bi1 = from[1];
1061
1062       b0 = vlib_get_buffer (vm, bi0);
1063       b1 = vlib_get_buffer (vm, bi1);
1064
1065       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1066         {
1067           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1068           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1069           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1070           t0->fib_index =
1071             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1072              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1073             vec_elt (im->fib_index_by_sw_if_index,
1074                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1075
1076           clib_memcpy_fast (t0->packet_data,
1077                             vlib_buffer_get_current (b0),
1078                             sizeof (t0->packet_data));
1079         }
1080       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1081         {
1082           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1083           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1084           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1085           t1->fib_index =
1086             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1087              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1088             vec_elt (im->fib_index_by_sw_if_index,
1089                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1090           clib_memcpy_fast (t1->packet_data, vlib_buffer_get_current (b1),
1091                             sizeof (t1->packet_data));
1092         }
1093       from += 2;
1094       n_left -= 2;
1095     }
1096
1097   while (n_left >= 1)
1098     {
1099       u32 bi0;
1100       vlib_buffer_t *b0;
1101       ip4_forward_next_trace_t *t0;
1102
1103       bi0 = from[0];
1104
1105       b0 = vlib_get_buffer (vm, bi0);
1106
1107       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1108         {
1109           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1110           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1111           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1112           t0->fib_index =
1113             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1114              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1115             vec_elt (im->fib_index_by_sw_if_index,
1116                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1117           clib_memcpy_fast (t0->packet_data, vlib_buffer_get_current (b0),
1118                             sizeof (t0->packet_data));
1119         }
1120       from += 1;
1121       n_left -= 1;
1122     }
1123 }
1124
1125 /* Compute TCP/UDP/ICMP4 checksum in software. */
1126 u16
1127 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1128                               ip4_header_t * ip0)
1129 {
1130   ip_csum_t sum0;
1131   u32 ip_header_length, payload_length_host_byte_order;
1132   u32 n_this_buffer, n_bytes_left, n_ip_bytes_this_buffer;
1133   u16 sum16;
1134   void *data_this_buffer;
1135
1136   /* Initialize checksum with ip header. */
1137   ip_header_length = ip4_header_bytes (ip0);
1138   payload_length_host_byte_order =
1139     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1140   sum0 =
1141     clib_host_to_net_u32 (payload_length_host_byte_order +
1142                           (ip0->protocol << 16));
1143
1144   if (BITS (uword) == 32)
1145     {
1146       sum0 =
1147         ip_csum_with_carry (sum0,
1148                             clib_mem_unaligned (&ip0->src_address, u32));
1149       sum0 =
1150         ip_csum_with_carry (sum0,
1151                             clib_mem_unaligned (&ip0->dst_address, u32));
1152     }
1153   else
1154     sum0 =
1155       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1156
1157   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1158   data_this_buffer = (void *) ip0 + ip_header_length;
1159   n_ip_bytes_this_buffer =
1160     p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data);
1161   if (n_this_buffer + ip_header_length > n_ip_bytes_this_buffer)
1162     {
1163       n_this_buffer = n_ip_bytes_this_buffer > ip_header_length ?
1164         n_ip_bytes_this_buffer - ip_header_length : 0;
1165     }
1166   while (1)
1167     {
1168       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1169       n_bytes_left -= n_this_buffer;
1170       if (n_bytes_left == 0)
1171         break;
1172
1173       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1174       p0 = vlib_get_buffer (vm, p0->next_buffer);
1175       data_this_buffer = vlib_buffer_get_current (p0);
1176       n_this_buffer = p0->current_length;
1177     }
1178
1179   sum16 = ~ip_csum_fold (sum0);
1180
1181   return sum16;
1182 }
1183
1184 u32
1185 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1186 {
1187   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1188   udp_header_t *udp0;
1189   u16 sum16;
1190
1191   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1192           || ip0->protocol == IP_PROTOCOL_UDP);
1193
1194   udp0 = (void *) (ip0 + 1);
1195   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1196     {
1197       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1198                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1199       return p0->flags;
1200     }
1201
1202   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1203
1204   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1205                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1206
1207   return p0->flags;
1208 }
1209 #endif
1210
1211 /* *INDENT-OFF* */
1212 VNET_FEATURE_ARC_INIT (ip4_local) =
1213 {
1214   .arc_name  = "ip4-local",
1215   .start_nodes = VNET_FEATURES ("ip4-local"),
1216   .last_in_arc = "ip4-local-end-of-arc",
1217 };
1218 /* *INDENT-ON* */
1219
1220 static inline void
1221 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1222                             ip4_header_t * ip, u8 is_udp, u8 * error,
1223                             u8 * good_tcp_udp)
1224 {
1225   u32 flags0;
1226   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1227   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1228   if (is_udp)
1229     {
1230       udp_header_t *udp;
1231       u32 ip_len, udp_len;
1232       i32 len_diff;
1233       udp = ip4_next_header (ip);
1234       /* Verify UDP length. */
1235       ip_len = clib_net_to_host_u16 (ip->length);
1236       udp_len = clib_net_to_host_u16 (udp->length);
1237
1238       len_diff = ip_len - udp_len;
1239       *good_tcp_udp &= len_diff >= 0;
1240       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1241     }
1242 }
1243
1244 #define ip4_local_csum_is_offloaded(_b)                                 \
1245     _b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM                         \
1246         || _b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM
1247
1248 #define ip4_local_need_csum_check(is_tcp_udp, _b)                       \
1249     (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED     \
1250         || ip4_local_csum_is_offloaded (_b)))
1251
1252 #define ip4_local_csum_is_valid(_b)                                     \
1253     (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT                      \
1254         || (ip4_local_csum_is_offloaded (_b))) != 0
1255
1256 static inline void
1257 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1258                          ip4_header_t * ih, u8 * error)
1259 {
1260   u8 is_udp, is_tcp_udp, good_tcp_udp;
1261
1262   is_udp = ih->protocol == IP_PROTOCOL_UDP;
1263   is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1264
1265   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1266     ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1267   else
1268     good_tcp_udp = ip4_local_csum_is_valid (b);
1269
1270   ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1271   *error = (is_tcp_udp && !good_tcp_udp
1272             ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1273 }
1274
1275 static inline void
1276 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1277                             ip4_header_t ** ih, u8 * error)
1278 {
1279   u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1280
1281   is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1282   is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1283
1284   is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1285   is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1286
1287   good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1288   good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1289
1290   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1291                      || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1292     {
1293       if (is_tcp_udp[0])
1294         ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1295                                     &good_tcp_udp[0]);
1296       if (is_tcp_udp[1])
1297         ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1298                                     &good_tcp_udp[1]);
1299     }
1300
1301   error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1302               IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1303   error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1304               IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1305 }
1306
1307 static inline void
1308 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1309                               vlib_buffer_t * b, u16 * next, u8 error,
1310                               u8 head_of_feature_arc)
1311 {
1312   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1313   u32 next_index;
1314
1315   *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1316   b->error = error ? error_node->errors[error] : 0;
1317   if (head_of_feature_arc)
1318     {
1319       next_index = *next;
1320       if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1321         {
1322           vnet_feature_arc_start (arc_index,
1323                                   vnet_buffer (b)->sw_if_index[VLIB_RX],
1324                                   &next_index, b);
1325           *next = next_index;
1326         }
1327     }
1328 }
1329
1330 typedef struct
1331 {
1332   ip4_address_t src;
1333   u32 lbi;
1334   u8 error;
1335   u8 first;
1336 } ip4_local_last_check_t;
1337
1338 static inline void
1339 ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
1340                      ip4_local_last_check_t * last_check, u8 * error0)
1341 {
1342   ip4_fib_mtrie_leaf_t leaf0;
1343   ip4_fib_mtrie_t *mtrie0;
1344   const dpo_id_t *dpo0;
1345   load_balance_t *lb0;
1346   u32 lbi0;
1347
1348   vnet_buffer (b)->ip.fib_index =
1349     vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1350     vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1351
1352   if (PREDICT_FALSE (last_check->first ||
1353                      (last_check->src.as_u32 != ip0->src_address.as_u32)))
1354     {
1355       mtrie0 = &ip4_fib_get (vnet_buffer (b)->ip.fib_index)->mtrie;
1356       leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1357       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1358       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1359       lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1360
1361       vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1362       vnet_buffer (b)->ip.adj_index[VLIB_RX] = lbi0;
1363
1364       lb0 = load_balance_get (lbi0);
1365       dpo0 = load_balance_get_bucket_i (lb0, 0);
1366
1367       /*
1368        * Must have a route to source otherwise we drop the packet.
1369        * ip4 broadcasts are accepted, e.g. to make dhcp client work
1370        *
1371        * The checks are:
1372        *  - the source is a recieve => it's from us => bogus, do this
1373        *    first since it sets a different error code.
1374        *  - uRPF check for any route to source - accept if passes.
1375        *  - allow packets destined to the broadcast address from unknown sources
1376        */
1377
1378       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1379                   && dpo0->dpoi_type == DPO_RECEIVE) ?
1380                  IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1381       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1382                   && !fib_urpf_check_size (lb0->lb_urpf)
1383                   && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1384                  IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1385
1386       last_check->src.as_u32 = ip0->src_address.as_u32;
1387       last_check->lbi = lbi0;
1388       last_check->error = *error0;
1389     }
1390   else
1391     {
1392       vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1393       vnet_buffer (b)->ip.adj_index[VLIB_RX] = last_check->lbi;
1394       *error0 = last_check->error;
1395       last_check->first = 0;
1396     }
1397 }
1398
1399 static inline void
1400 ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
1401                         ip4_local_last_check_t * last_check, u8 * error)
1402 {
1403   ip4_fib_mtrie_leaf_t leaf[2];
1404   ip4_fib_mtrie_t *mtrie[2];
1405   const dpo_id_t *dpo[2];
1406   load_balance_t *lb[2];
1407   u32 not_last_hit;
1408   u32 lbi[2];
1409
1410   not_last_hit = last_check->first;
1411   not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1412   not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1413
1414   vnet_buffer (b[0])->ip.fib_index =
1415     vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1416     vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1417     vnet_buffer (b[0])->ip.fib_index;
1418
1419   vnet_buffer (b[1])->ip.fib_index =
1420     vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1421     vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1422     vnet_buffer (b[1])->ip.fib_index;
1423
1424   if (PREDICT_FALSE (not_last_hit))
1425     {
1426       mtrie[0] = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie;
1427       mtrie[1] = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie;
1428
1429       leaf[0] = ip4_fib_mtrie_lookup_step_one (mtrie[0], &ip[0]->src_address);
1430       leaf[1] = ip4_fib_mtrie_lookup_step_one (mtrie[1], &ip[1]->src_address);
1431
1432       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1433                                            &ip[0]->src_address, 2);
1434       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1435                                            &ip[1]->src_address, 2);
1436
1437       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1438                                            &ip[0]->src_address, 3);
1439       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1440                                            &ip[1]->src_address, 3);
1441
1442       lbi[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf[0]);
1443       lbi[1] = ip4_fib_mtrie_leaf_get_adj_index (leaf[1]);
1444
1445       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1446       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] = lbi[0];
1447
1448       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1449       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] = lbi[1];
1450
1451       lb[0] = load_balance_get (lbi[0]);
1452       lb[1] = load_balance_get (lbi[1]);
1453
1454       dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1455       dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1456
1457       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1458                    dpo[0]->dpoi_type == DPO_RECEIVE) ?
1459                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1460       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1461                    !fib_urpf_check_size (lb[0]->lb_urpf) &&
1462                    ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1463                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1464
1465       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1466                    dpo[1]->dpoi_type == DPO_RECEIVE) ?
1467                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1468       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1469                    !fib_urpf_check_size (lb[1]->lb_urpf) &&
1470                    ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1471                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1472
1473       last_check->src.as_u32 = ip[1]->src_address.as_u32;
1474       last_check->lbi = lbi[1];
1475       last_check->error = error[1];
1476     }
1477   else
1478     {
1479       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1480       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] = last_check->lbi;
1481
1482       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1483       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] = last_check->lbi;
1484
1485       error[0] = last_check->error;
1486       error[1] = last_check->error;
1487       last_check->first = 0;
1488     }
1489 }
1490
1491 enum ip_local_packet_type_e
1492 {
1493   IP_LOCAL_PACKET_TYPE_L4,
1494   IP_LOCAL_PACKET_TYPE_NAT,
1495   IP_LOCAL_PACKET_TYPE_FRAG,
1496 };
1497
1498 /**
1499  * Determine packet type and next node.
1500  *
1501  * The expectation is that all packets that are not L4 will skip
1502  * checksums and source checks.
1503  */
1504 always_inline u8
1505 ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
1506 {
1507   ip_lookup_main_t *lm = &ip4_main.lookup_main;
1508
1509   if (PREDICT_FALSE (ip4_is_fragment (ip)))
1510     {
1511       *next = IP_LOCAL_NEXT_REASSEMBLY;
1512       return IP_LOCAL_PACKET_TYPE_FRAG;
1513     }
1514   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_IS_NATED))
1515     {
1516       *next = lm->local_next_by_ip_protocol[ip->protocol];
1517       return IP_LOCAL_PACKET_TYPE_NAT;
1518     }
1519
1520   *next = lm->local_next_by_ip_protocol[ip->protocol];
1521   return IP_LOCAL_PACKET_TYPE_L4;
1522 }
1523
1524 static inline uword
1525 ip4_local_inline (vlib_main_t * vm,
1526                   vlib_node_runtime_t * node,
1527                   vlib_frame_t * frame, int head_of_feature_arc)
1528 {
1529   u32 *from, n_left_from;
1530   vlib_node_runtime_t *error_node =
1531     vlib_node_get_runtime (vm, ip4_input_node.index);
1532   u16 nexts[VLIB_FRAME_SIZE], *next;
1533   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1534   ip4_header_t *ip[2];
1535   u8 error[2], pt[2];
1536
1537   ip4_local_last_check_t last_check = {
1538     /*
1539      * 0.0.0.0 can appear as the source address of an IP packet,
1540      * as can any other address, hence the need to use the 'first'
1541      * member to make sure the .lbi is initialised for the first
1542      * packet.
1543      */
1544     .src = {.as_u32 = 0},
1545     .lbi = ~0,
1546     .error = IP4_ERROR_UNKNOWN_PROTOCOL,
1547     .first = 1,
1548   };
1549
1550   from = vlib_frame_vector_args (frame);
1551   n_left_from = frame->n_vectors;
1552
1553   if (node->flags & VLIB_NODE_FLAG_TRACE)
1554     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1555
1556   vlib_get_buffers (vm, from, bufs, n_left_from);
1557   b = bufs;
1558   next = nexts;
1559
1560   while (n_left_from >= 6)
1561     {
1562       u8 not_batch = 0;
1563
1564       /* Prefetch next iteration. */
1565       {
1566         vlib_prefetch_buffer_header (b[4], LOAD);
1567         vlib_prefetch_buffer_header (b[5], LOAD);
1568
1569         CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1570         CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1571       }
1572
1573       error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1574
1575       ip[0] = vlib_buffer_get_current (b[0]);
1576       ip[1] = vlib_buffer_get_current (b[1]);
1577
1578       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1579       vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1580
1581       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1582       pt[1] = ip4_local_classify (b[1], ip[1], &next[1]);
1583
1584       not_batch = pt[0] ^ pt[1];
1585
1586       if (head_of_feature_arc == 0 || (pt[0] && not_batch == 0))
1587         goto skip_checks;
1588
1589       if (PREDICT_TRUE (not_batch == 0))
1590         {
1591           ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1592           ip4_local_check_src_x2 (b, ip, &last_check, error);
1593         }
1594       else
1595         {
1596           if (!pt[0])
1597             {
1598               ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1599               ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1600             }
1601           if (!pt[1])
1602             {
1603               ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1604               ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
1605             }
1606         }
1607
1608     skip_checks:
1609
1610       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1611                                     head_of_feature_arc);
1612       ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1613                                     head_of_feature_arc);
1614
1615       b += 2;
1616       next += 2;
1617       n_left_from -= 2;
1618     }
1619
1620   while (n_left_from > 0)
1621     {
1622       error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1623
1624       ip[0] = vlib_buffer_get_current (b[0]);
1625       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1626       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1627
1628       if (head_of_feature_arc == 0 || pt[0])
1629         goto skip_check;
1630
1631       ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1632       ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1633
1634     skip_check:
1635
1636       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1637                                     head_of_feature_arc);
1638
1639       b += 1;
1640       next += 1;
1641       n_left_from -= 1;
1642     }
1643
1644   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1645   return frame->n_vectors;
1646 }
1647
1648 VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1649                                vlib_frame_t * frame)
1650 {
1651   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1652 }
1653
1654 /* *INDENT-OFF* */
1655 VLIB_REGISTER_NODE (ip4_local_node) =
1656 {
1657   .name = "ip4-local",
1658   .vector_size = sizeof (u32),
1659   .format_trace = format_ip4_forward_next_trace,
1660   .n_next_nodes = IP_LOCAL_N_NEXT,
1661   .next_nodes =
1662   {
1663     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1664     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1665     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1666     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1667     [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-reassembly",
1668   },
1669 };
1670 /* *INDENT-ON* */
1671
1672
1673 VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
1674                                           vlib_node_runtime_t * node,
1675                                           vlib_frame_t * frame)
1676 {
1677   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1678 }
1679
1680 /* *INDENT-OFF* */
1681 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
1682   .name = "ip4-local-end-of-arc",
1683   .vector_size = sizeof (u32),
1684
1685   .format_trace = format_ip4_forward_next_trace,
1686   .sibling_of = "ip4-local",
1687 };
1688
1689 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1690   .arc_name = "ip4-local",
1691   .node_name = "ip4-local-end-of-arc",
1692   .runs_before = 0, /* not before any other features */
1693 };
1694 /* *INDENT-ON* */
1695
1696 #ifndef CLIB_MARCH_VARIANT
1697 void
1698 ip4_register_protocol (u32 protocol, u32 node_index)
1699 {
1700   vlib_main_t *vm = vlib_get_main ();
1701   ip4_main_t *im = &ip4_main;
1702   ip_lookup_main_t *lm = &im->lookup_main;
1703
1704   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1705   lm->local_next_by_ip_protocol[protocol] =
1706     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1707 }
1708 #endif
1709
1710 static clib_error_t *
1711 show_ip_local_command_fn (vlib_main_t * vm,
1712                           unformat_input_t * input, vlib_cli_command_t * cmd)
1713 {
1714   ip4_main_t *im = &ip4_main;
1715   ip_lookup_main_t *lm = &im->lookup_main;
1716   int i;
1717
1718   vlib_cli_output (vm, "Protocols handled by ip4_local");
1719   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1720     {
1721       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1722         {
1723           u32 node_index = vlib_get_node (vm,
1724                                           ip4_local_node.index)->
1725             next_nodes[lm->local_next_by_ip_protocol[i]];
1726           vlib_cli_output (vm, "%d: %U", i, format_vlib_node_name, vm,
1727                            node_index);
1728         }
1729     }
1730   return 0;
1731 }
1732
1733
1734
1735 /*?
1736  * Display the set of protocols handled by the local IPv4 stack.
1737  *
1738  * @cliexpar
1739  * Example of how to display local protocol table:
1740  * @cliexstart{show ip local}
1741  * Protocols handled by ip4_local
1742  * 1
1743  * 17
1744  * 47
1745  * @cliexend
1746 ?*/
1747 /* *INDENT-OFF* */
1748 VLIB_CLI_COMMAND (show_ip_local, static) =
1749 {
1750   .path = "show ip local",
1751   .function = show_ip_local_command_fn,
1752   .short_help = "show ip local",
1753 };
1754 /* *INDENT-ON* */
1755
1756 always_inline uword
1757 ip4_arp_inline (vlib_main_t * vm,
1758                 vlib_node_runtime_t * node,
1759                 vlib_frame_t * frame, int is_glean)
1760 {
1761   vnet_main_t *vnm = vnet_get_main ();
1762   ip4_main_t *im = &ip4_main;
1763   ip_lookup_main_t *lm = &im->lookup_main;
1764   u32 *from, *to_next_drop;
1765   uword n_left_from, n_left_to_next_drop, next_index;
1766   u32 thread_index = vm->thread_index;
1767   u64 seed;
1768
1769   if (node->flags & VLIB_NODE_FLAG_TRACE)
1770     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1771
1772   seed = throttle_seed (&im->arp_throttle, thread_index, vlib_time_now (vm));
1773
1774   from = vlib_frame_vector_args (frame);
1775   n_left_from = frame->n_vectors;
1776   next_index = node->cached_next_index;
1777   if (next_index == IP4_ARP_NEXT_DROP)
1778     next_index = IP4_ARP_N_NEXT;        /* point to first interface */
1779
1780   while (n_left_from > 0)
1781     {
1782       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1783                            to_next_drop, n_left_to_next_drop);
1784
1785       while (n_left_from > 0 && n_left_to_next_drop > 0)
1786         {
1787           u32 pi0, bi0, adj_index0, sw_if_index0;
1788           ip_adjacency_t *adj0;
1789           vlib_buffer_t *p0, *b0;
1790           ip4_address_t resolve0;
1791           ethernet_arp_header_t *h0;
1792           vnet_hw_interface_t *hw_if0;
1793           u64 r0;
1794
1795           pi0 = from[0];
1796           p0 = vlib_get_buffer (vm, pi0);
1797
1798           from += 1;
1799           n_left_from -= 1;
1800           to_next_drop[0] = pi0;
1801           to_next_drop += 1;
1802           n_left_to_next_drop -= 1;
1803
1804           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1805           adj0 = adj_get (adj_index0);
1806
1807           if (is_glean)
1808             {
1809               /* resolve the packet's destination */
1810               ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1811               resolve0 = ip0->dst_address;
1812             }
1813           else
1814             {
1815               /* resolve the incomplete adj */
1816               resolve0 = adj0->sub_type.nbr.next_hop.ip4;
1817             }
1818
1819           /* combine the address and interface for the hash key */
1820           sw_if_index0 = adj0->rewrite_header.sw_if_index;
1821           r0 = (u64) resolve0.data_u32 << 32;
1822           r0 |= sw_if_index0;
1823
1824           if (throttle_check (&im->arp_throttle, thread_index, r0, seed))
1825             {
1826               p0->error = node->errors[IP4_ARP_ERROR_THROTTLED];
1827               continue;
1828             }
1829
1830           /*
1831            * the adj has been updated to a rewrite but the node the DPO that got
1832            * us here hasn't - yet. no big deal. we'll drop while we wait.
1833            */
1834           if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
1835             {
1836               p0->error = node->errors[IP4_ARP_ERROR_RESOLVED];
1837               continue;
1838             }
1839
1840           /*
1841            * Can happen if the control-plane is programming tables
1842            * with traffic flowing; at least that's today's lame excuse.
1843            */
1844           if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
1845               || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
1846             {
1847               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
1848               continue;
1849             }
1850           /* Send ARP request. */
1851           h0 =
1852             vlib_packet_template_get_packet (vm,
1853                                              &im->ip4_arp_request_packet_template,
1854                                              &bi0);
1855           b0 = vlib_get_buffer (vm, bi0);
1856
1857           /* copy the persistent fields from the original */
1858           clib_memcpy_fast (b0->opaque2, p0->opaque2, sizeof (p0->opaque2));
1859
1860           /* Seems we're out of buffers */
1861           if (PREDICT_FALSE (!h0))
1862             {
1863               p0->error = node->errors[IP4_ARP_ERROR_NO_BUFFERS];
1864               continue;
1865             }
1866
1867           /* Add rewrite/encap string for ARP packet. */
1868           vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
1869
1870           hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1871
1872           /* Src ethernet address in ARP header. */
1873           mac_address_from_bytes (&h0->ip4_over_ethernet[0].mac,
1874                                   hw_if0->hw_address);
1875           if (is_glean)
1876             {
1877               /* The interface's source address is stashed in the Glean Adj */
1878               h0->ip4_over_ethernet[0].ip4 =
1879                 adj0->sub_type.glean.receive_addr.ip4;
1880             }
1881           else
1882             {
1883               /* Src IP address in ARP header. */
1884               if (ip4_src_address_for_packet (lm, sw_if_index0,
1885                                               &h0->ip4_over_ethernet[0].ip4))
1886                 {
1887                   /* No source address available */
1888                   p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
1889                   vlib_buffer_free (vm, &bi0, 1);
1890                   continue;
1891                 }
1892             }
1893           h0->ip4_over_ethernet[1].ip4 = resolve0;
1894
1895           p0->error = node->errors[IP4_ARP_ERROR_REQUEST_SENT];
1896
1897           vlib_buffer_copy_trace_flag (vm, p0, bi0);
1898           VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
1899           vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
1900
1901           vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
1902
1903           vlib_set_next_frame_buffer (vm, node,
1904                                       adj0->rewrite_header.next_index, bi0);
1905         }
1906
1907       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
1908     }
1909
1910   return frame->n_vectors;
1911 }
1912
1913 VLIB_NODE_FN (ip4_arp_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1914                              vlib_frame_t * frame)
1915 {
1916   return (ip4_arp_inline (vm, node, frame, 0));
1917 }
1918
1919 VLIB_NODE_FN (ip4_glean_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1920                                vlib_frame_t * frame)
1921 {
1922   return (ip4_arp_inline (vm, node, frame, 1));
1923 }
1924
1925 static char *ip4_arp_error_strings[] = {
1926   [IP4_ARP_ERROR_THROTTLED] = "ARP requests throttled",
1927   [IP4_ARP_ERROR_RESOLVED] = "ARP requests resolved",
1928   [IP4_ARP_ERROR_NO_BUFFERS] = "ARP requests out of buffer",
1929   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
1930   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
1931   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
1932 };
1933
1934 /* *INDENT-OFF* */
1935 VLIB_REGISTER_NODE (ip4_arp_node) =
1936 {
1937   .name = "ip4-arp",
1938   .vector_size = sizeof (u32),
1939   .format_trace = format_ip4_forward_next_trace,
1940   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1941   .error_strings = ip4_arp_error_strings,
1942   .n_next_nodes = IP4_ARP_N_NEXT,
1943   .next_nodes =
1944   {
1945     [IP4_ARP_NEXT_DROP] = "error-drop",
1946   },
1947 };
1948
1949 VLIB_REGISTER_NODE (ip4_glean_node) =
1950 {
1951   .name = "ip4-glean",
1952   .vector_size = sizeof (u32),
1953   .format_trace = format_ip4_forward_next_trace,
1954   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1955   .error_strings = ip4_arp_error_strings,
1956   .n_next_nodes = IP4_ARP_N_NEXT,
1957   .next_nodes = {
1958   [IP4_ARP_NEXT_DROP] = "error-drop",
1959   },
1960 };
1961 /* *INDENT-ON* */
1962
1963 #define foreach_notrace_ip4_arp_error           \
1964 _(THROTTLED)                                    \
1965 _(RESOLVED)                                     \
1966 _(NO_BUFFERS)                                   \
1967 _(REQUEST_SENT)                                 \
1968 _(NON_ARP_ADJ)                                  \
1969 _(NO_SOURCE_ADDRESS)
1970
1971 static clib_error_t *
1972 arp_notrace_init (vlib_main_t * vm)
1973 {
1974   vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
1975
1976   /* don't trace ARP request packets */
1977 #define _(a)                                    \
1978     vnet_pcap_drop_trace_filter_add_del         \
1979         (rt->errors[IP4_ARP_ERROR_##a],         \
1980          1 /* is_add */);
1981   foreach_notrace_ip4_arp_error;
1982 #undef _
1983   return 0;
1984 }
1985
1986 VLIB_INIT_FUNCTION (arp_notrace_init);
1987
1988
1989 #ifndef CLIB_MARCH_VARIANT
1990 /* Send an ARP request to see if given destination is reachable on given interface. */
1991 clib_error_t *
1992 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index,
1993                     u8 refresh)
1994 {
1995   vnet_main_t *vnm = vnet_get_main ();
1996   ip4_main_t *im = &ip4_main;
1997   ethernet_arp_header_t *h;
1998   ip4_address_t *src;
1999   ip_interface_address_t *ia;
2000   ip_adjacency_t *adj;
2001   vnet_hw_interface_t *hi;
2002   vnet_sw_interface_t *si;
2003   vlib_buffer_t *b;
2004   adj_index_t ai;
2005   u32 bi = 0;
2006   u8 unicast_rewrite = 0;
2007
2008   si = vnet_get_sw_interface (vnm, sw_if_index);
2009
2010   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2011     {
2012       return clib_error_return (0, "%U: interface %U down",
2013                                 format_ip4_address, dst,
2014                                 format_vnet_sw_if_index_name, vnm,
2015                                 sw_if_index);
2016     }
2017
2018   src =
2019     ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2020   if (!src)
2021     {
2022       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2023       return clib_error_return
2024         (0,
2025          "no matching interface address for destination %U (interface %U)",
2026          format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
2027          sw_if_index);
2028     }
2029
2030   h = vlib_packet_template_get_packet (vm,
2031                                        &im->ip4_arp_request_packet_template,
2032                                        &bi);
2033
2034   if (!h)
2035     return clib_error_return (0, "ARP request packet allocation failed");
2036
2037   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2038   if (PREDICT_FALSE (!hi->hw_address))
2039     {
2040       return clib_error_return (0, "%U: interface %U do not support ip probe",
2041                                 format_ip4_address, dst,
2042                                 format_vnet_sw_if_index_name, vnm,
2043                                 sw_if_index);
2044     }
2045
2046   mac_address_from_bytes (&h->ip4_over_ethernet[0].mac, hi->hw_address);
2047
2048   h->ip4_over_ethernet[0].ip4 = src[0];
2049   h->ip4_over_ethernet[1].ip4 = dst[0];
2050
2051   b = vlib_get_buffer (vm, bi);
2052   vnet_buffer (b)->sw_if_index[VLIB_RX] =
2053     vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2054
2055   ip46_address_t nh = {
2056     .ip4 = *dst,
2057   };
2058
2059   ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4,
2060                             VNET_LINK_IP4, &nh, sw_if_index);
2061   adj = adj_get (ai);
2062
2063   /* Peer has been previously resolved, retrieve glean adj instead */
2064   if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE)
2065     {
2066       if (refresh)
2067         unicast_rewrite = 1;
2068       else
2069         {
2070           adj_unlock (ai);
2071           ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4,
2072                                       VNET_LINK_IP4, sw_if_index, &nh);
2073           adj = adj_get (ai);
2074         }
2075     }
2076
2077   /* Add encapsulation string for software interface (e.g. ethernet header). */
2078   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2079   if (unicast_rewrite)
2080     {
2081       u16 *etype = vlib_buffer_get_current (b) - 2;
2082       etype[0] = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
2083     }
2084   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2085
2086   {
2087     vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2088     u32 *to_next = vlib_frame_vector_args (f);
2089     to_next[0] = bi;
2090     f->n_vectors = 1;
2091     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2092   }
2093
2094   adj_unlock (ai);
2095   return /* no error */ 0;
2096 }
2097 #endif
2098
2099 typedef enum
2100 {
2101   IP4_REWRITE_NEXT_DROP,
2102   IP4_REWRITE_NEXT_ICMP_ERROR,
2103   IP4_REWRITE_NEXT_FRAGMENT,
2104   IP4_REWRITE_N_NEXT            /* Last */
2105 } ip4_rewrite_next_t;
2106
2107 /**
2108  * This bits of an IPv4 address to mask to construct a multicast
2109  * MAC address
2110  */
2111 #if CLIB_ARCH_IS_BIG_ENDIAN
2112 #define IP4_MCAST_ADDR_MASK 0x007fffff
2113 #else
2114 #define IP4_MCAST_ADDR_MASK 0xffff7f00
2115 #endif
2116
2117 always_inline void
2118 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
2119                u16 adj_packet_bytes, bool df, u16 * next, u32 * error)
2120 {
2121   if (packet_len > adj_packet_bytes)
2122     {
2123       *error = IP4_ERROR_MTU_EXCEEDED;
2124       if (df)
2125         {
2126           icmp4_error_set_vnet_buffer
2127             (b, ICMP4_destination_unreachable,
2128              ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
2129              adj_packet_bytes);
2130           *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2131         }
2132       else
2133         {
2134           /* IP fragmentation */
2135           ip_frag_set_vnet_buffer (b, adj_packet_bytes,
2136                                    IP4_FRAG_NEXT_IP4_REWRITE, 0);
2137           *next = IP4_REWRITE_NEXT_FRAGMENT;
2138         }
2139     }
2140 }
2141
2142 /* Decrement TTL & update checksum.
2143    Works either endian, so no need for byte swap. */
2144 static_always_inline void
2145 ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
2146                             u32 * error)
2147 {
2148   i32 ttl;
2149   u32 checksum;
2150   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2151     {
2152       b->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2153       return;
2154     }
2155
2156   ttl = ip->ttl;
2157
2158   /* Input node should have reject packets with ttl 0. */
2159   ASSERT (ip->ttl > 0);
2160
2161   checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
2162   checksum += checksum >= 0xffff;
2163
2164   ip->checksum = checksum;
2165   ttl -= 1;
2166   ip->ttl = ttl;
2167
2168   /*
2169    * If the ttl drops below 1 when forwarding, generate
2170    * an ICMP response.
2171    */
2172   if (PREDICT_FALSE (ttl <= 0))
2173     {
2174       *error = IP4_ERROR_TIME_EXPIRED;
2175       vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2176       icmp4_error_set_vnet_buffer (b, ICMP4_time_exceeded,
2177                                    ICMP4_time_exceeded_ttl_exceeded_in_transit,
2178                                    0);
2179       *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2180     }
2181
2182   /* Verify checksum. */
2183   ASSERT ((ip->checksum == ip4_header_checksum (ip)) ||
2184           (b->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2185 }
2186
2187
2188 always_inline uword
2189 ip4_rewrite_inline (vlib_main_t * vm,
2190                     vlib_node_runtime_t * node,
2191                     vlib_frame_t * frame,
2192                     int do_counters, int is_midchain, int is_mcast)
2193 {
2194   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2195   u32 *from = vlib_frame_vector_args (frame);
2196   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2197   u16 nexts[VLIB_FRAME_SIZE], *next;
2198   u32 n_left_from;
2199   vlib_node_runtime_t *error_node =
2200     vlib_node_get_runtime (vm, ip4_input_node.index);
2201
2202   n_left_from = frame->n_vectors;
2203   u32 thread_index = vm->thread_index;
2204
2205   vlib_get_buffers (vm, from, bufs, n_left_from);
2206   clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
2207
2208   if (n_left_from >= 6)
2209     {
2210       int i;
2211       for (i = 2; i < 6; i++)
2212         vlib_prefetch_buffer_header (bufs[i], LOAD);
2213     }
2214
2215   next = nexts;
2216   b = bufs;
2217   while (n_left_from >= 8)
2218     {
2219       ip_adjacency_t *adj0, *adj1;
2220       ip4_header_t *ip0, *ip1;
2221       u32 rw_len0, error0, adj_index0;
2222       u32 rw_len1, error1, adj_index1;
2223       u32 tx_sw_if_index0, tx_sw_if_index1;
2224       u8 *p;
2225
2226       vlib_prefetch_buffer_header (b[6], LOAD);
2227       vlib_prefetch_buffer_header (b[7], LOAD);
2228
2229       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2230       adj_index1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
2231
2232       /*
2233        * pre-fetch the per-adjacency counters
2234        */
2235       if (do_counters)
2236         {
2237           vlib_prefetch_combined_counter (&adjacency_counters,
2238                                           thread_index, adj_index0);
2239           vlib_prefetch_combined_counter (&adjacency_counters,
2240                                           thread_index, adj_index1);
2241         }
2242
2243       ip0 = vlib_buffer_get_current (b[0]);
2244       ip1 = vlib_buffer_get_current (b[1]);
2245
2246       error0 = error1 = IP4_ERROR_NONE;
2247
2248       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2249       ip4_ttl_and_checksum_check (b[1], ip1, next + 1, &error1);
2250
2251       /* Rewrite packet header and updates lengths. */
2252       adj0 = adj_get (adj_index0);
2253       adj1 = adj_get (adj_index1);
2254
2255       /* Worth pipelining. No guarantee that adj0,1 are hot... */
2256       rw_len0 = adj0[0].rewrite_header.data_bytes;
2257       rw_len1 = adj1[0].rewrite_header.data_bytes;
2258       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2259       vnet_buffer (b[1])->ip.save_rewrite_length = rw_len1;
2260
2261       p = vlib_buffer_get_current (b[2]);
2262       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2263       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2264
2265       p = vlib_buffer_get_current (b[3]);
2266       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2267       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2268
2269       /* Check MTU of outgoing interface. */
2270       ip4_mtu_check (b[0], clib_net_to_host_u16 (ip0->length),
2271                      adj0[0].rewrite_header.max_l3_packet_bytes,
2272                      ip0->flags_and_fragment_offset &
2273                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2274                      next + 0, &error0);
2275       ip4_mtu_check (b[1], clib_net_to_host_u16 (ip1->length),
2276                      adj1[0].rewrite_header.max_l3_packet_bytes,
2277                      ip1->flags_and_fragment_offset &
2278                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2279                      next + 1, &error1);
2280
2281       if (is_mcast)
2282         {
2283           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2284                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2285                     IP4_ERROR_SAME_INTERFACE : error0);
2286           error1 = ((adj1[0].rewrite_header.sw_if_index ==
2287                      vnet_buffer (b[1])->sw_if_index[VLIB_RX]) ?
2288                     IP4_ERROR_SAME_INTERFACE : error1);
2289         }
2290
2291       b[0]->error = error_node->errors[error0];
2292       b[1]->error = error_node->errors[error1];
2293       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2294        * to see the IP headerr */
2295       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2296         {
2297           u32 next_index = adj0[0].rewrite_header.next_index;
2298           b[0]->current_data -= rw_len0;
2299           b[0]->current_length += rw_len0;
2300           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2301           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2302
2303           if (PREDICT_FALSE
2304               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2305             vnet_feature_arc_start (lm->output_feature_arc_index,
2306                                     tx_sw_if_index0, &next_index, b[0]);
2307           next[0] = next_index;
2308         }
2309       if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2310         {
2311           u32 next_index = adj1[0].rewrite_header.next_index;
2312           b[1]->current_data -= rw_len1;
2313           b[1]->current_length += rw_len1;
2314
2315           tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2316           vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2317
2318           if (PREDICT_FALSE
2319               (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2320             vnet_feature_arc_start (lm->output_feature_arc_index,
2321                                     tx_sw_if_index1, &next_index, b[1]);
2322           next[1] = next_index;
2323         }
2324
2325       /* Guess we are only writing on simple Ethernet header. */
2326       vnet_rewrite_two_headers (adj0[0], adj1[0],
2327                                 ip0, ip1, sizeof (ethernet_header_t));
2328
2329       /*
2330        * Bump the per-adjacency counters
2331        */
2332       if (do_counters)
2333         {
2334           vlib_increment_combined_counter
2335             (&adjacency_counters,
2336              thread_index,
2337              adj_index0, 1, vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2338
2339           vlib_increment_combined_counter
2340             (&adjacency_counters,
2341              thread_index,
2342              adj_index1, 1, vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
2343         }
2344
2345       if (is_midchain)
2346         {
2347           adj0->sub_type.midchain.fixup_func
2348             (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2349           adj1->sub_type.midchain.fixup_func
2350             (vm, adj1, b[1], adj1->sub_type.midchain.fixup_data);
2351         }
2352
2353       if (is_mcast)
2354         {
2355           /*
2356            * copy bytes from the IP address into the MAC rewrite
2357            */
2358           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2359                                       adj0->rewrite_header.dst_mcast_offset,
2360                                       &ip0->dst_address.as_u32, (u8 *) ip0);
2361           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2362                                       adj1->rewrite_header.dst_mcast_offset,
2363                                       &ip1->dst_address.as_u32, (u8 *) ip1);
2364         }
2365
2366       next += 2;
2367       b += 2;
2368       n_left_from -= 2;
2369     }
2370
2371   while (n_left_from > 0)
2372     {
2373       ip_adjacency_t *adj0;
2374       ip4_header_t *ip0;
2375       u32 rw_len0, adj_index0, error0;
2376       u32 tx_sw_if_index0;
2377
2378       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2379
2380       adj0 = adj_get (adj_index0);
2381
2382       if (do_counters)
2383         vlib_prefetch_combined_counter (&adjacency_counters,
2384                                         thread_index, adj_index0);
2385
2386       ip0 = vlib_buffer_get_current (b[0]);
2387
2388       error0 = IP4_ERROR_NONE;
2389
2390       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2391
2392
2393       /* Update packet buffer attributes/set output interface. */
2394       rw_len0 = adj0[0].rewrite_header.data_bytes;
2395       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2396
2397       /* Check MTU of outgoing interface. */
2398       ip4_mtu_check (b[0], clib_net_to_host_u16 (ip0->length),
2399                      adj0[0].rewrite_header.max_l3_packet_bytes,
2400                      ip0->flags_and_fragment_offset &
2401                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2402                      next + 0, &error0);
2403
2404       if (is_mcast)
2405         {
2406           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2407                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2408                     IP4_ERROR_SAME_INTERFACE : error0);
2409         }
2410       b[0]->error = error_node->errors[error0];
2411
2412       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2413        * to see the IP headerr */
2414       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2415         {
2416           u32 next_index = adj0[0].rewrite_header.next_index;
2417           b[0]->current_data -= rw_len0;
2418           b[0]->current_length += rw_len0;
2419           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2420           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2421
2422           if (PREDICT_FALSE
2423               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2424             vnet_feature_arc_start (lm->output_feature_arc_index,
2425                                     tx_sw_if_index0, &next_index, b[0]);
2426           next[0] = next_index;
2427         }
2428
2429       /* Guess we are only writing on simple Ethernet header. */
2430       vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2431
2432       if (do_counters)
2433         vlib_increment_combined_counter
2434           (&adjacency_counters,
2435            thread_index, adj_index0, 1,
2436            vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2437
2438       if (is_midchain)
2439         {
2440           adj0->sub_type.midchain.fixup_func
2441             (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2442         }
2443
2444       if (is_mcast)
2445         {
2446           /*
2447            * copy bytes from the IP address into the MAC rewrite
2448            */
2449           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2450                                       adj0->rewrite_header.dst_mcast_offset,
2451                                       &ip0->dst_address.as_u32, (u8 *) ip0);
2452         }
2453
2454       next += 1;
2455       b += 1;
2456       n_left_from -= 1;
2457     }
2458
2459
2460   /* Need to do trace after rewrites to pick up new packet data. */
2461   if (node->flags & VLIB_NODE_FLAG_TRACE)
2462     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2463
2464   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2465   return frame->n_vectors;
2466 }
2467
2468
2469 /** @brief IPv4 rewrite node.
2470     @node ip4-rewrite
2471
2472     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2473     header checksum, fetch the ip adjacency, check the outbound mtu,
2474     apply the adjacency rewrite, and send pkts to the adjacency
2475     rewrite header's rewrite_next_index.
2476
2477     @param vm vlib_main_t corresponding to the current thread
2478     @param node vlib_node_runtime_t
2479     @param frame vlib_frame_t whose contents should be dispatched
2480
2481     @par Graph mechanics: buffer metadata, next index usage
2482
2483     @em Uses:
2484     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2485         - the rewrite adjacency index
2486     - <code>adj->lookup_next_index</code>
2487         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2488           the packet will be dropped.
2489     - <code>adj->rewrite_header</code>
2490         - Rewrite string length, rewrite string, next_index
2491
2492     @em Sets:
2493     - <code>b->current_data, b->current_length</code>
2494         - Updated net of applying the rewrite string
2495
2496     <em>Next Indices:</em>
2497     - <code> adj->rewrite_header.next_index </code>
2498       or @c ip4-drop
2499 */
2500
2501 VLIB_NODE_FN (ip4_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2502                                  vlib_frame_t * frame)
2503 {
2504   if (adj_are_counters_enabled ())
2505     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2506   else
2507     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2508 }
2509
2510 VLIB_NODE_FN (ip4_rewrite_bcast_node) (vlib_main_t * vm,
2511                                        vlib_node_runtime_t * node,
2512                                        vlib_frame_t * frame)
2513 {
2514   if (adj_are_counters_enabled ())
2515     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2516   else
2517     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2518 }
2519
2520 VLIB_NODE_FN (ip4_midchain_node) (vlib_main_t * vm,
2521                                   vlib_node_runtime_t * node,
2522                                   vlib_frame_t * frame)
2523 {
2524   if (adj_are_counters_enabled ())
2525     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2526   else
2527     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2528 }
2529
2530 VLIB_NODE_FN (ip4_rewrite_mcast_node) (vlib_main_t * vm,
2531                                        vlib_node_runtime_t * node,
2532                                        vlib_frame_t * frame)
2533 {
2534   if (adj_are_counters_enabled ())
2535     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2536   else
2537     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2538 }
2539
2540 VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
2541                                         vlib_node_runtime_t * node,
2542                                         vlib_frame_t * frame)
2543 {
2544   if (adj_are_counters_enabled ())
2545     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2546   else
2547     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2548 }
2549
2550 /* *INDENT-OFF* */
2551 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2552   .name = "ip4-rewrite",
2553   .vector_size = sizeof (u32),
2554
2555   .format_trace = format_ip4_rewrite_trace,
2556
2557   .n_next_nodes = IP4_REWRITE_N_NEXT,
2558   .next_nodes = {
2559     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2560     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2561     [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2562   },
2563 };
2564
2565 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2566   .name = "ip4-rewrite-bcast",
2567   .vector_size = sizeof (u32),
2568
2569   .format_trace = format_ip4_rewrite_trace,
2570   .sibling_of = "ip4-rewrite",
2571 };
2572
2573 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2574   .name = "ip4-rewrite-mcast",
2575   .vector_size = sizeof (u32),
2576
2577   .format_trace = format_ip4_rewrite_trace,
2578   .sibling_of = "ip4-rewrite",
2579 };
2580
2581 VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
2582   .name = "ip4-mcast-midchain",
2583   .vector_size = sizeof (u32),
2584
2585   .format_trace = format_ip4_rewrite_trace,
2586   .sibling_of = "ip4-rewrite",
2587 };
2588
2589 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2590   .name = "ip4-midchain",
2591   .vector_size = sizeof (u32),
2592   .format_trace = format_ip4_forward_next_trace,
2593   .sibling_of =  "ip4-rewrite",
2594 };
2595 /* *INDENT-ON */
2596
2597 static int
2598 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2599 {
2600   ip4_fib_mtrie_t *mtrie0;
2601   ip4_fib_mtrie_leaf_t leaf0;
2602   u32 lbi0;
2603
2604   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2605
2606   leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2607   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2608   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2609
2610   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2611
2612   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2613 }
2614
2615 static clib_error_t *
2616 test_lookup_command_fn (vlib_main_t * vm,
2617                         unformat_input_t * input, vlib_cli_command_t * cmd)
2618 {
2619   ip4_fib_t *fib;
2620   u32 table_id = 0;
2621   f64 count = 1;
2622   u32 n;
2623   int i;
2624   ip4_address_t ip4_base_address;
2625   u64 errors = 0;
2626
2627   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2628     {
2629       if (unformat (input, "table %d", &table_id))
2630         {
2631           /* Make sure the entry exists. */
2632           fib = ip4_fib_get (table_id);
2633           if ((fib) && (fib->index != table_id))
2634             return clib_error_return (0, "<fib-index> %d does not exist",
2635                                       table_id);
2636         }
2637       else if (unformat (input, "count %f", &count))
2638         ;
2639
2640       else if (unformat (input, "%U",
2641                          unformat_ip4_address, &ip4_base_address))
2642         ;
2643       else
2644         return clib_error_return (0, "unknown input `%U'",
2645                                   format_unformat_error, input);
2646     }
2647
2648   n = count;
2649
2650   for (i = 0; i < n; i++)
2651     {
2652       if (!ip4_lookup_validate (&ip4_base_address, table_id))
2653         errors++;
2654
2655       ip4_base_address.as_u32 =
2656         clib_host_to_net_u32 (1 +
2657                               clib_net_to_host_u32 (ip4_base_address.as_u32));
2658     }
2659
2660   if (errors)
2661     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2662   else
2663     vlib_cli_output (vm, "No errors in %d lookups\n", n);
2664
2665   return 0;
2666 }
2667
2668 /*?
2669  * Perform a lookup of an IPv4 Address (or range of addresses) in the
2670  * given FIB table to determine if there is a conflict with the
2671  * adjacency table. The fib-id can be determined by using the
2672  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2673  * of 0 is used.
2674  *
2675  * @todo This command uses fib-id, other commands use table-id (not
2676  * just a name, they are different indexes). Would like to change this
2677  * to table-id for consistency.
2678  *
2679  * @cliexpar
2680  * Example of how to run the test lookup command:
2681  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2682  * No errors in 2 lookups
2683  * @cliexend
2684 ?*/
2685 /* *INDENT-OFF* */
2686 VLIB_CLI_COMMAND (lookup_test_command, static) =
2687 {
2688   .path = "test lookup",
2689   .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2690   .function = test_lookup_command_fn,
2691 };
2692 /* *INDENT-ON* */
2693
2694 #ifndef CLIB_MARCH_VARIANT
2695 int
2696 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2697 {
2698   u32 fib_index;
2699
2700   fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
2701
2702   if (~0 == fib_index)
2703     return VNET_API_ERROR_NO_SUCH_FIB;
2704
2705   fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
2706                                   flow_hash_config);
2707
2708   return 0;
2709 }
2710 #endif
2711
2712 static clib_error_t *
2713 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2714                              unformat_input_t * input,
2715                              vlib_cli_command_t * cmd)
2716 {
2717   int matched = 0;
2718   u32 table_id = 0;
2719   u32 flow_hash_config = 0;
2720   int rv;
2721
2722   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2723     {
2724       if (unformat (input, "table %d", &table_id))
2725         matched = 1;
2726 #define _(a,v) \
2727     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2728       foreach_flow_hash_bit
2729 #undef _
2730         else
2731         break;
2732     }
2733
2734   if (matched == 0)
2735     return clib_error_return (0, "unknown input `%U'",
2736                               format_unformat_error, input);
2737
2738   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
2739   switch (rv)
2740     {
2741     case 0:
2742       break;
2743
2744     case VNET_API_ERROR_NO_SUCH_FIB:
2745       return clib_error_return (0, "no such FIB table %d", table_id);
2746
2747     default:
2748       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2749       break;
2750     }
2751
2752   return 0;
2753 }
2754
2755 /*?
2756  * Configure the set of IPv4 fields used by the flow hash.
2757  *
2758  * @cliexpar
2759  * Example of how to set the flow hash on a given table:
2760  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2761  * Example of display the configured flow hash:
2762  * @cliexstart{show ip fib}
2763  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2764  * 0.0.0.0/0
2765  *   unicast-ip4-chain
2766  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2767  *     [0] [@0]: dpo-drop ip6
2768  * 0.0.0.0/32
2769  *   unicast-ip4-chain
2770  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2771  *     [0] [@0]: dpo-drop ip6
2772  * 224.0.0.0/8
2773  *   unicast-ip4-chain
2774  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2775  *     [0] [@0]: dpo-drop ip6
2776  * 6.0.1.2/32
2777  *   unicast-ip4-chain
2778  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2779  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2780  * 7.0.0.1/32
2781  *   unicast-ip4-chain
2782  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
2783  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2784  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2785  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2786  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2787  * 240.0.0.0/8
2788  *   unicast-ip4-chain
2789  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
2790  *     [0] [@0]: dpo-drop ip6
2791  * 255.255.255.255/32
2792  *   unicast-ip4-chain
2793  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
2794  *     [0] [@0]: dpo-drop ip6
2795  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
2796  * 0.0.0.0/0
2797  *   unicast-ip4-chain
2798  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
2799  *     [0] [@0]: dpo-drop ip6
2800  * 0.0.0.0/32
2801  *   unicast-ip4-chain
2802  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
2803  *     [0] [@0]: dpo-drop ip6
2804  * 172.16.1.0/24
2805  *   unicast-ip4-chain
2806  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
2807  *     [0] [@4]: ipv4-glean: af_packet0
2808  * 172.16.1.1/32
2809  *   unicast-ip4-chain
2810  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
2811  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
2812  * 172.16.1.2/32
2813  *   unicast-ip4-chain
2814  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2815  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
2816  * 172.16.2.0/24
2817  *   unicast-ip4-chain
2818  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
2819  *     [0] [@4]: ipv4-glean: af_packet1
2820  * 172.16.2.1/32
2821  *   unicast-ip4-chain
2822  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
2823  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
2824  * 224.0.0.0/8
2825  *   unicast-ip4-chain
2826  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
2827  *     [0] [@0]: dpo-drop ip6
2828  * 240.0.0.0/8
2829  *   unicast-ip4-chain
2830  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
2831  *     [0] [@0]: dpo-drop ip6
2832  * 255.255.255.255/32
2833  *   unicast-ip4-chain
2834  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
2835  *     [0] [@0]: dpo-drop ip6
2836  * @cliexend
2837 ?*/
2838 /* *INDENT-OFF* */
2839 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
2840 {
2841   .path = "set ip flow-hash",
2842   .short_help =
2843   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
2844   .function = set_ip_flow_hash_command_fn,
2845 };
2846 /* *INDENT-ON* */
2847
2848 #ifndef CLIB_MARCH_VARIANT
2849 int
2850 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2851                              u32 table_index)
2852 {
2853   vnet_main_t *vnm = vnet_get_main ();
2854   vnet_interface_main_t *im = &vnm->interface_main;
2855   ip4_main_t *ipm = &ip4_main;
2856   ip_lookup_main_t *lm = &ipm->lookup_main;
2857   vnet_classify_main_t *cm = &vnet_classify_main;
2858   ip4_address_t *if_addr;
2859
2860   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2861     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2862
2863   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2864     return VNET_API_ERROR_NO_SUCH_ENTRY;
2865
2866   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2867   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
2868
2869   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
2870
2871   if (NULL != if_addr)
2872     {
2873       fib_prefix_t pfx = {
2874         .fp_len = 32,
2875         .fp_proto = FIB_PROTOCOL_IP4,
2876         .fp_addr.ip4 = *if_addr,
2877       };
2878       u32 fib_index;
2879
2880       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2881                                                        sw_if_index);
2882
2883
2884       if (table_index != (u32) ~ 0)
2885         {
2886           dpo_id_t dpo = DPO_INVALID;
2887
2888           dpo_set (&dpo,
2889                    DPO_CLASSIFY,
2890                    DPO_PROTO_IP4,
2891                    classify_dpo_create (DPO_PROTO_IP4, table_index));
2892
2893           fib_table_entry_special_dpo_add (fib_index,
2894                                            &pfx,
2895                                            FIB_SOURCE_CLASSIFY,
2896                                            FIB_ENTRY_FLAG_NONE, &dpo);
2897           dpo_reset (&dpo);
2898         }
2899       else
2900         {
2901           fib_table_entry_special_remove (fib_index,
2902                                           &pfx, FIB_SOURCE_CLASSIFY);
2903         }
2904     }
2905
2906   return 0;
2907 }
2908 #endif
2909
2910 static clib_error_t *
2911 set_ip_classify_command_fn (vlib_main_t * vm,
2912                             unformat_input_t * input,
2913                             vlib_cli_command_t * cmd)
2914 {
2915   u32 table_index = ~0;
2916   int table_index_set = 0;
2917   u32 sw_if_index = ~0;
2918   int rv;
2919
2920   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2921     {
2922       if (unformat (input, "table-index %d", &table_index))
2923         table_index_set = 1;
2924       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
2925                          vnet_get_main (), &sw_if_index))
2926         ;
2927       else
2928         break;
2929     }
2930
2931   if (table_index_set == 0)
2932     return clib_error_return (0, "classify table-index must be specified");
2933
2934   if (sw_if_index == ~0)
2935     return clib_error_return (0, "interface / subif must be specified");
2936
2937   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
2938
2939   switch (rv)
2940     {
2941     case 0:
2942       break;
2943
2944     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
2945       return clib_error_return (0, "No such interface");
2946
2947     case VNET_API_ERROR_NO_SUCH_ENTRY:
2948       return clib_error_return (0, "No such classifier table");
2949     }
2950   return 0;
2951 }
2952
2953 /*?
2954  * Assign a classification table to an interface. The classification
2955  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
2956  * commands. Once the table is create, use this command to filter packets
2957  * on an interface.
2958  *
2959  * @cliexpar
2960  * Example of how to assign a classification table to an interface:
2961  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
2962 ?*/
2963 /* *INDENT-OFF* */
2964 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
2965 {
2966     .path = "set ip classify",
2967     .short_help =
2968     "set ip classify intfc <interface> table-index <classify-idx>",
2969     .function = set_ip_classify_command_fn,
2970 };
2971 /* *INDENT-ON* */
2972
2973 static clib_error_t *
2974 ip4_config (vlib_main_t * vm, unformat_input_t * input)
2975 {
2976   ip4_main_t *im = &ip4_main;
2977   uword heapsize = 0;
2978
2979   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2980     {
2981       if (unformat (input, "heap-size %U", unformat_memory_size, &heapsize))
2982         ;
2983       else
2984         return clib_error_return (0,
2985                                   "invalid heap-size parameter `%U'",
2986                                   format_unformat_error, input);
2987     }
2988
2989   im->mtrie_heap_size = heapsize;
2990
2991   return 0;
2992 }
2993
2994 VLIB_EARLY_CONFIG_FUNCTION (ip4_config, "ip");
2995
2996 /*
2997  * fd.io coding-style-patch-verification: ON
2998  *
2999  * Local Variables:
3000  * eval: (c-set-style "gnu")
3001  * End:
3002  */