ARP/ND: copy opaque2 persistent fields to new packet
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h>     /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/dpo/load_balance.h>
53 #include <vnet/dpo/load_balance_map.h>
54 #include <vnet/dpo/classify_dpo.h>
55 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
56
57 #include <vnet/ip/ip4_forward.h>
58
59 /** @brief IPv4 lookup node.
60     @node ip4-lookup
61
62     This is the main IPv4 lookup dispatch node.
63
64     @param vm vlib_main_t corresponding to the current thread
65     @param node vlib_node_runtime_t
66     @param frame vlib_frame_t whose contents should be dispatched
67
68     @par Graph mechanics: buffer metadata, next index usage
69
70     @em Uses:
71     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
72         - Indicates the @c sw_if_index value of the interface that the
73           packet was received on.
74     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
75         - When the value is @c ~0 then the node performs a longest prefix
76           match (LPM) for the packet destination address in the FIB attached
77           to the receive interface.
78         - Otherwise perform LPM for the packet destination address in the
79           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
80           value (0, 1, ...) and not a VRF id.
81
82     @em Sets:
83     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
84         - The lookup result adjacency index.
85
86     <em>Next Index:</em>
87     - Dispatches the packet to the node index found in
88       ip_adjacency_t @c adj->lookup_next_index
89       (where @c adj is the lookup result adjacency).
90 */
91 VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
92                                 vlib_frame_t * frame)
93 {
94   return ip4_lookup_inline (vm, node, frame,
95                             /* lookup_for_responses_to_locally_received_packets */
96                             0);
97
98 }
99
100 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
101
102 /* *INDENT-OFF* */
103 VLIB_REGISTER_NODE (ip4_lookup_node) =
104 {
105   .name = "ip4-lookup",
106   .vector_size = sizeof (u32),
107   .format_trace = format_ip4_lookup_trace,
108   .n_next_nodes = IP_LOOKUP_N_NEXT,
109   .next_nodes = IP4_LOOKUP_NEXT_NODES,
110 };
111 /* *INDENT-ON* */
112
113 VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
114                                       vlib_node_runtime_t * node,
115                                       vlib_frame_t * frame)
116 {
117   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
118   u32 n_left_from, n_left_to_next, *from, *to_next;
119   ip_lookup_next_t next;
120   u32 thread_index = vm->thread_index;
121
122   from = vlib_frame_vector_args (frame);
123   n_left_from = frame->n_vectors;
124   next = node->cached_next_index;
125
126   while (n_left_from > 0)
127     {
128       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
129
130
131       while (n_left_from >= 4 && n_left_to_next >= 2)
132         {
133           ip_lookup_next_t next0, next1;
134           const load_balance_t *lb0, *lb1;
135           vlib_buffer_t *p0, *p1;
136           u32 pi0, lbi0, hc0, pi1, lbi1, hc1;
137           const ip4_header_t *ip0, *ip1;
138           const dpo_id_t *dpo0, *dpo1;
139
140           /* Prefetch next iteration. */
141           {
142             vlib_buffer_t *p2, *p3;
143
144             p2 = vlib_get_buffer (vm, from[2]);
145             p3 = vlib_get_buffer (vm, from[3]);
146
147             vlib_prefetch_buffer_header (p2, STORE);
148             vlib_prefetch_buffer_header (p3, STORE);
149
150             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
151             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
152           }
153
154           pi0 = to_next[0] = from[0];
155           pi1 = to_next[1] = from[1];
156
157           from += 2;
158           n_left_from -= 2;
159           to_next += 2;
160           n_left_to_next -= 2;
161
162           p0 = vlib_get_buffer (vm, pi0);
163           p1 = vlib_get_buffer (vm, pi1);
164
165           ip0 = vlib_buffer_get_current (p0);
166           ip1 = vlib_buffer_get_current (p1);
167           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
168           lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
169
170           lb0 = load_balance_get (lbi0);
171           lb1 = load_balance_get (lbi1);
172
173           /*
174            * this node is for via FIBs we can re-use the hash value from the
175            * to node if present.
176            * We don't want to use the same hash value at each level in the recursion
177            * graph as that would lead to polarisation
178            */
179           hc0 = hc1 = 0;
180
181           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
182             {
183               if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
184                 {
185                   hc0 = vnet_buffer (p0)->ip.flow_hash =
186                     vnet_buffer (p0)->ip.flow_hash >> 1;
187                 }
188               else
189                 {
190                   hc0 = vnet_buffer (p0)->ip.flow_hash =
191                     ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
192                 }
193               dpo0 = load_balance_get_fwd_bucket
194                 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
195             }
196           else
197             {
198               dpo0 = load_balance_get_bucket_i (lb0, 0);
199             }
200           if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
201             {
202               if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash))
203                 {
204                   hc1 = vnet_buffer (p1)->ip.flow_hash =
205                     vnet_buffer (p1)->ip.flow_hash >> 1;
206                 }
207               else
208                 {
209                   hc1 = vnet_buffer (p1)->ip.flow_hash =
210                     ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
211                 }
212               dpo1 = load_balance_get_fwd_bucket
213                 (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
214             }
215           else
216             {
217               dpo1 = load_balance_get_bucket_i (lb1, 0);
218             }
219
220           next0 = dpo0->dpoi_next_node;
221           next1 = dpo1->dpoi_next_node;
222
223           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
224           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
225
226           vlib_increment_combined_counter
227             (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
228           vlib_increment_combined_counter
229             (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
230
231           vlib_validate_buffer_enqueue_x2 (vm, node, next,
232                                            to_next, n_left_to_next,
233                                            pi0, pi1, next0, next1);
234         }
235
236       while (n_left_from > 0 && n_left_to_next > 0)
237         {
238           ip_lookup_next_t next0;
239           const load_balance_t *lb0;
240           vlib_buffer_t *p0;
241           u32 pi0, lbi0, hc0;
242           const ip4_header_t *ip0;
243           const dpo_id_t *dpo0;
244
245           pi0 = from[0];
246           to_next[0] = pi0;
247           from += 1;
248           to_next += 1;
249           n_left_to_next -= 1;
250           n_left_from -= 1;
251
252           p0 = vlib_get_buffer (vm, pi0);
253
254           ip0 = vlib_buffer_get_current (p0);
255           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
256
257           lb0 = load_balance_get (lbi0);
258
259           hc0 = 0;
260           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
261             {
262               if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
263                 {
264                   hc0 = vnet_buffer (p0)->ip.flow_hash =
265                     vnet_buffer (p0)->ip.flow_hash >> 1;
266                 }
267               else
268                 {
269                   hc0 = vnet_buffer (p0)->ip.flow_hash =
270                     ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
271                 }
272               dpo0 = load_balance_get_fwd_bucket
273                 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
274             }
275           else
276             {
277               dpo0 = load_balance_get_bucket_i (lb0, 0);
278             }
279
280           next0 = dpo0->dpoi_next_node;
281           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
282
283           vlib_increment_combined_counter
284             (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
285
286           vlib_validate_buffer_enqueue_x1 (vm, node, next,
287                                            to_next, n_left_to_next,
288                                            pi0, next0);
289         }
290
291       vlib_put_next_frame (vm, node, next, n_left_to_next);
292     }
293
294   if (node->flags & VLIB_NODE_FLAG_TRACE)
295     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
296
297   return frame->n_vectors;
298 }
299
300 /* *INDENT-OFF* */
301 VLIB_REGISTER_NODE (ip4_load_balance_node) =
302 {
303   .name = "ip4-load-balance",
304   .vector_size = sizeof (u32),
305   .sibling_of = "ip4-lookup",
306   .format_trace = format_ip4_lookup_trace,
307 };
308 /* *INDENT-ON* */
309
310 #ifndef CLIB_MARCH_VARIANT
311 /* get first interface address */
312 ip4_address_t *
313 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
314                              ip_interface_address_t ** result_ia)
315 {
316   ip_lookup_main_t *lm = &im->lookup_main;
317   ip_interface_address_t *ia = 0;
318   ip4_address_t *result = 0;
319
320   /* *INDENT-OFF* */
321   foreach_ip_interface_address
322     (lm, ia, sw_if_index,
323      1 /* honor unnumbered */ ,
324      ({
325        ip4_address_t * a =
326          ip_interface_address_get_address (lm, ia);
327        result = a;
328        break;
329      }));
330   /* *INDENT-OFF* */
331   if (result_ia)
332     *result_ia = result ? ia : 0;
333   return result;
334 }
335
336 static void
337 ip4_add_subnet_bcast_route (u32 fib_index,
338                             fib_prefix_t *pfx,
339                             u32 sw_if_index)
340 {
341   vnet_sw_interface_flags_t iflags;
342
343   iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
344
345   fib_table_entry_special_remove(fib_index,
346                                  pfx,
347                                  FIB_SOURCE_INTERFACE);
348
349   if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
350     {
351       fib_table_entry_update_one_path (fib_index, pfx,
352                                        FIB_SOURCE_INTERFACE,
353                                        FIB_ENTRY_FLAG_NONE,
354                                        DPO_PROTO_IP4,
355                                        /* No next-hop address */
356                                        &ADJ_BCAST_ADDR,
357                                        sw_if_index,
358                                        // invalid FIB index
359                                        ~0,
360                                        1,
361                                        // no out-label stack
362                                        NULL,
363                                        FIB_ROUTE_PATH_FLAG_NONE);
364     }
365   else
366     {
367         fib_table_entry_special_add(fib_index,
368                                     pfx,
369                                     FIB_SOURCE_INTERFACE,
370                                     (FIB_ENTRY_FLAG_DROP |
371                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
372     }
373 }
374
375 static void
376 ip4_add_interface_routes (u32 sw_if_index,
377                           ip4_main_t * im, u32 fib_index,
378                           ip_interface_address_t * a)
379 {
380   ip_lookup_main_t *lm = &im->lookup_main;
381   ip4_address_t *address = ip_interface_address_get_address (lm, a);
382   fib_prefix_t pfx = {
383     .fp_len = a->address_length,
384     .fp_proto = FIB_PROTOCOL_IP4,
385     .fp_addr.ip4 = *address,
386   };
387
388   if (pfx.fp_len <= 30)
389     {
390       /* a /30 or shorter - add a glean for the network address */
391       fib_table_entry_update_one_path (fib_index, &pfx,
392                                        FIB_SOURCE_INTERFACE,
393                                        (FIB_ENTRY_FLAG_CONNECTED |
394                                         FIB_ENTRY_FLAG_ATTACHED),
395                                        DPO_PROTO_IP4,
396                                        /* No next-hop address */
397                                        NULL,
398                                        sw_if_index,
399                                        // invalid FIB index
400                                        ~0,
401                                        1,
402                                        // no out-label stack
403                                        NULL,
404                                        FIB_ROUTE_PATH_FLAG_NONE);
405
406       /* Add the two broadcast addresses as drop */
407       fib_prefix_t net_pfx = {
408         .fp_len = 32,
409         .fp_proto = FIB_PROTOCOL_IP4,
410         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
411       };
412       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
413         fib_table_entry_special_add(fib_index,
414                                     &net_pfx,
415                                     FIB_SOURCE_INTERFACE,
416                                     (FIB_ENTRY_FLAG_DROP |
417                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
418       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
419       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
420         ip4_add_subnet_bcast_route(fib_index, &net_pfx, sw_if_index);
421     }
422   else if (pfx.fp_len == 31)
423     {
424       u32 mask = clib_host_to_net_u32(1);
425       fib_prefix_t net_pfx = pfx;
426
427       net_pfx.fp_len = 32;
428       net_pfx.fp_addr.ip4.as_u32 ^= mask;
429
430       /* a /31 - add the other end as an attached host */
431       fib_table_entry_update_one_path (fib_index, &net_pfx,
432                                        FIB_SOURCE_INTERFACE,
433                                        (FIB_ENTRY_FLAG_ATTACHED),
434                                        DPO_PROTO_IP4,
435                                        &net_pfx.fp_addr,
436                                        sw_if_index,
437                                        // invalid FIB index
438                                        ~0,
439                                        1,
440                                        NULL,
441                                        FIB_ROUTE_PATH_FLAG_NONE);
442     }
443   pfx.fp_len = 32;
444
445   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
446     {
447       u32 classify_table_index =
448         lm->classify_table_index_by_sw_if_index[sw_if_index];
449       if (classify_table_index != (u32) ~ 0)
450         {
451           dpo_id_t dpo = DPO_INVALID;
452
453           dpo_set (&dpo,
454                    DPO_CLASSIFY,
455                    DPO_PROTO_IP4,
456                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
457
458           fib_table_entry_special_dpo_add (fib_index,
459                                            &pfx,
460                                            FIB_SOURCE_CLASSIFY,
461                                            FIB_ENTRY_FLAG_NONE, &dpo);
462           dpo_reset (&dpo);
463         }
464     }
465
466   fib_table_entry_update_one_path (fib_index, &pfx,
467                                    FIB_SOURCE_INTERFACE,
468                                    (FIB_ENTRY_FLAG_CONNECTED |
469                                     FIB_ENTRY_FLAG_LOCAL),
470                                    DPO_PROTO_IP4,
471                                    &pfx.fp_addr,
472                                    sw_if_index,
473                                    // invalid FIB index
474                                    ~0,
475                                    1, NULL,
476                                    FIB_ROUTE_PATH_FLAG_NONE);
477 }
478
479 static void
480 ip4_del_interface_routes (ip4_main_t * im,
481                           u32 fib_index,
482                           ip4_address_t * address, u32 address_length)
483 {
484   fib_prefix_t pfx = {
485     .fp_len = address_length,
486     .fp_proto = FIB_PROTOCOL_IP4,
487     .fp_addr.ip4 = *address,
488   };
489
490   if (pfx.fp_len <= 30)
491     {
492       fib_prefix_t net_pfx = {
493         .fp_len = 32,
494         .fp_proto = FIB_PROTOCOL_IP4,
495         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
496       };
497       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
498         fib_table_entry_special_remove(fib_index,
499                                        &net_pfx,
500                                        FIB_SOURCE_INTERFACE);
501       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
502       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
503         fib_table_entry_special_remove(fib_index,
504                                        &net_pfx,
505                                        FIB_SOURCE_INTERFACE);
506       fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
507     }
508     else if (pfx.fp_len == 31)
509     {
510       u32 mask = clib_host_to_net_u32(1);
511       fib_prefix_t net_pfx = pfx;
512
513       net_pfx.fp_len = 32;
514       net_pfx.fp_addr.ip4.as_u32 ^= mask;
515
516       fib_table_entry_delete (fib_index, &net_pfx, FIB_SOURCE_INTERFACE);
517     }
518
519   pfx.fp_len = 32;
520   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
521 }
522
523 void
524 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
525 {
526   ip4_main_t *im = &ip4_main;
527
528   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
529
530   /*
531    * enable/disable only on the 1<->0 transition
532    */
533   if (is_enable)
534     {
535       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
536         return;
537     }
538   else
539     {
540       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
541       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
542         return;
543     }
544   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
545                                !is_enable, 0, 0);
546
547
548   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
549                                sw_if_index, !is_enable, 0, 0);
550 }
551
552 static clib_error_t *
553 ip4_add_del_interface_address_internal (vlib_main_t * vm,
554                                         u32 sw_if_index,
555                                         ip4_address_t * address,
556                                         u32 address_length, u32 is_del)
557 {
558   vnet_main_t *vnm = vnet_get_main ();
559   ip4_main_t *im = &ip4_main;
560   ip_lookup_main_t *lm = &im->lookup_main;
561   clib_error_t *error = 0;
562   u32 if_address_index, elts_before;
563   ip4_address_fib_t ip4_af, *addr_fib = 0;
564
565   /* local0 interface doesn't support IP addressing  */
566   if (sw_if_index == 0)
567     {
568       return
569        clib_error_create ("local0 interface doesn't support IP addressing");
570     }
571
572   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
573   ip4_addr_fib_init (&ip4_af, address,
574                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
575   vec_add1 (addr_fib, ip4_af);
576
577   /*
578    * there is no support for adj-fib handling in the presence of overlapping
579    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
580    * most routers do.
581    */
582   /* *INDENT-OFF* */
583   if (!is_del)
584     {
585       /* When adding an address check that it does not conflict
586          with an existing address on any interface in this table. */
587       ip_interface_address_t *ia;
588       vnet_sw_interface_t *sif;
589
590       pool_foreach(sif, vnm->interface_main.sw_interfaces,
591       ({
592           if (im->fib_index_by_sw_if_index[sw_if_index] ==
593               im->fib_index_by_sw_if_index[sif->sw_if_index])
594             {
595               foreach_ip_interface_address
596                 (&im->lookup_main, ia, sif->sw_if_index,
597                  0 /* honor unnumbered */ ,
598                  ({
599                    ip4_address_t * x =
600                      ip_interface_address_get_address
601                      (&im->lookup_main, ia);
602                    if (ip4_destination_matches_route
603                        (im, address, x, ia->address_length) ||
604                        ip4_destination_matches_route (im,
605                                                       x,
606                                                       address,
607                                                       address_length))
608                      {
609                        vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
610
611                        return
612                          clib_error_create
613                          ("failed to add %U which conflicts with %U for interface %U",
614                           format_ip4_address_and_length, address,
615                           address_length,
616                           format_ip4_address_and_length, x,
617                           ia->address_length,
618                           format_vnet_sw_if_index_name, vnm,
619                           sif->sw_if_index);
620                      }
621                  }));
622             }
623       }));
624     }
625   /* *INDENT-ON* */
626
627   elts_before = pool_elts (lm->if_address_pool);
628
629   error = ip_interface_address_add_del
630     (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
631   if (error)
632     goto done;
633
634   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
635
636   if (is_del)
637     ip4_del_interface_routes (im, ip4_af.fib_index, address, address_length);
638   else
639     ip4_add_interface_routes (sw_if_index,
640                               im, ip4_af.fib_index,
641                               pool_elt_at_index
642                               (lm->if_address_pool, if_address_index));
643
644   /* If pool did not grow/shrink: add duplicate address. */
645   if (elts_before != pool_elts (lm->if_address_pool))
646     {
647       ip4_add_del_interface_address_callback_t *cb;
648       vec_foreach (cb, im->add_del_interface_address_callbacks)
649         cb->function (im, cb->function_opaque, sw_if_index,
650                       address, address_length, if_address_index, is_del);
651     }
652
653 done:
654   vec_free (addr_fib);
655   return error;
656 }
657
658 clib_error_t *
659 ip4_add_del_interface_address (vlib_main_t * vm,
660                                u32 sw_if_index,
661                                ip4_address_t * address,
662                                u32 address_length, u32 is_del)
663 {
664   return ip4_add_del_interface_address_internal
665     (vm, sw_if_index, address, address_length, is_del);
666 }
667
668 void
669 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
670 {
671   ip_interface_address_t *ia;
672   ip4_main_t *im;
673
674   im = &ip4_main;
675
676   /*
677    * when directed broadcast is enabled, the subnet braodcast route will forward
678    * packets using an adjacency with a broadcast MAC. otherwise it drops
679    */
680   /* *INDENT-OFF* */
681   foreach_ip_interface_address(&im->lookup_main, ia,
682                                sw_if_index, 0,
683      ({
684        if (ia->address_length <= 30)
685          {
686            ip4_address_t *ipa;
687
688            ipa = ip_interface_address_get_address (&im->lookup_main, ia);
689
690            fib_prefix_t pfx = {
691              .fp_len = 32,
692              .fp_proto = FIB_PROTOCOL_IP4,
693              .fp_addr = {
694                .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
695              },
696            };
697
698            ip4_add_subnet_bcast_route
699              (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
700                                                   sw_if_index),
701               &pfx, sw_if_index);
702          }
703      }));
704   /* *INDENT-ON* */
705 }
706 #endif
707
708 /* Built-in ip4 unicast rx feature path definition */
709 /* *INDENT-OFF* */
710 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
711 {
712   .arc_name = "ip4-unicast",
713   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
714   .last_in_arc = "ip4-lookup",
715   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
716 };
717
718 VNET_FEATURE_INIT (ip4_flow_classify, static) =
719 {
720   .arc_name = "ip4-unicast",
721   .node_name = "ip4-flow-classify",
722   .runs_before = VNET_FEATURES ("ip4-inacl"),
723 };
724
725 VNET_FEATURE_INIT (ip4_inacl, static) =
726 {
727   .arc_name = "ip4-unicast",
728   .node_name = "ip4-inacl",
729   .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
730 };
731
732 VNET_FEATURE_INIT (ip4_source_check_1, static) =
733 {
734   .arc_name = "ip4-unicast",
735   .node_name = "ip4-source-check-via-rx",
736   .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
737 };
738
739 VNET_FEATURE_INIT (ip4_source_check_2, static) =
740 {
741   .arc_name = "ip4-unicast",
742   .node_name = "ip4-source-check-via-any",
743   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
744 };
745
746 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
747 {
748   .arc_name = "ip4-unicast",
749   .node_name = "ip4-source-and-port-range-check-rx",
750   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
751 };
752
753 VNET_FEATURE_INIT (ip4_policer_classify, static) =
754 {
755   .arc_name = "ip4-unicast",
756   .node_name = "ip4-policer-classify",
757   .runs_before = VNET_FEATURES ("ipsec4-input-feature"),
758 };
759
760 VNET_FEATURE_INIT (ip4_ipsec, static) =
761 {
762   .arc_name = "ip4-unicast",
763   .node_name = "ipsec4-input-feature",
764   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
765 };
766
767 VNET_FEATURE_INIT (ip4_vpath, static) =
768 {
769   .arc_name = "ip4-unicast",
770   .node_name = "vpath-input-ip4",
771   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
772 };
773
774 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
775 {
776   .arc_name = "ip4-unicast",
777   .node_name = "ip4-vxlan-bypass",
778   .runs_before = VNET_FEATURES ("ip4-lookup"),
779 };
780
781 VNET_FEATURE_INIT (ip4_not_enabled, static) =
782 {
783   .arc_name = "ip4-unicast",
784   .node_name = "ip4-not-enabled",
785   .runs_before = VNET_FEATURES ("ip4-lookup"),
786 };
787
788 VNET_FEATURE_INIT (ip4_lookup, static) =
789 {
790   .arc_name = "ip4-unicast",
791   .node_name = "ip4-lookup",
792   .runs_before = 0,     /* not before any other features */
793 };
794
795 /* Built-in ip4 multicast rx feature path definition */
796 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
797 {
798   .arc_name = "ip4-multicast",
799   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
800   .last_in_arc = "ip4-mfib-forward-lookup",
801   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
802 };
803
804 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
805 {
806   .arc_name = "ip4-multicast",
807   .node_name = "vpath-input-ip4",
808   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
809 };
810
811 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
812 {
813   .arc_name = "ip4-multicast",
814   .node_name = "ip4-not-enabled",
815   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
816 };
817
818 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
819 {
820   .arc_name = "ip4-multicast",
821   .node_name = "ip4-mfib-forward-lookup",
822   .runs_before = 0,     /* last feature */
823 };
824
825 /* Source and port-range check ip4 tx feature path definition */
826 VNET_FEATURE_ARC_INIT (ip4_output, static) =
827 {
828   .arc_name = "ip4-output",
829   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
830   .last_in_arc = "interface-output",
831   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
832 };
833
834 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
835 {
836   .arc_name = "ip4-output",
837   .node_name = "ip4-source-and-port-range-check-tx",
838   .runs_before = VNET_FEATURES ("ip4-outacl"),
839 };
840
841 VNET_FEATURE_INIT (ip4_outacl, static) =
842 {
843   .arc_name = "ip4-output",
844   .node_name = "ip4-outacl",
845   .runs_before = VNET_FEATURES ("ipsec4-output-feature"),
846 };
847
848 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
849 {
850   .arc_name = "ip4-output",
851   .node_name = "ipsec4-output-feature",
852   .runs_before = VNET_FEATURES ("interface-output"),
853 };
854
855 /* Built-in ip4 tx feature path definition */
856 VNET_FEATURE_INIT (ip4_interface_output, static) =
857 {
858   .arc_name = "ip4-output",
859   .node_name = "interface-output",
860   .runs_before = 0,     /* not before any other features */
861 };
862 /* *INDENT-ON* */
863
864 static clib_error_t *
865 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
866 {
867   ip4_main_t *im = &ip4_main;
868
869   /* Fill in lookup tables with default table (0). */
870   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
871   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
872
873   if (!is_add)
874     {
875       ip4_main_t *im4 = &ip4_main;
876       ip_lookup_main_t *lm4 = &im4->lookup_main;
877       ip_interface_address_t *ia = 0;
878       ip4_address_t *address;
879       vlib_main_t *vm = vlib_get_main ();
880
881       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
882       /* *INDENT-OFF* */
883       foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
884       ({
885         address = ip_interface_address_get_address (lm4, ia);
886         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
887       }));
888       /* *INDENT-ON* */
889     }
890
891   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
892                                is_add, 0, 0);
893
894   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
895                                sw_if_index, is_add, 0, 0);
896
897   return /* no error */ 0;
898 }
899
900 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
901
902 /* Global IP4 main. */
903 ip4_main_t ip4_main;
904
905 static clib_error_t *
906 ip4_lookup_init (vlib_main_t * vm)
907 {
908   ip4_main_t *im = &ip4_main;
909   clib_error_t *error;
910   uword i;
911
912   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
913     return error;
914   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
915     return (error);
916   if ((error = vlib_call_init_function (vm, fib_module_init)))
917     return error;
918   if ((error = vlib_call_init_function (vm, mfib_module_init)))
919     return error;
920
921   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
922     {
923       u32 m;
924
925       if (i < 32)
926         m = pow2_mask (i) << (32 - i);
927       else
928         m = ~0;
929       im->fib_masks[i] = clib_host_to_net_u32 (m);
930     }
931
932   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
933
934   /* Create FIB with index 0 and table id of 0. */
935   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
936                                      FIB_SOURCE_DEFAULT_ROUTE);
937   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
938                                       MFIB_SOURCE_DEFAULT_ROUTE);
939
940   {
941     pg_node_t *pn;
942     pn = pg_get_node (ip4_lookup_node.index);
943     pn->unformat_edit = unformat_pg_ip4_header;
944   }
945
946   {
947     ethernet_arp_header_t h;
948
949     clib_memset (&h, 0, sizeof (h));
950
951     /* Set target ethernet address to all zeros. */
952     clib_memset (h.ip4_over_ethernet[1].ethernet, 0,
953                  sizeof (h.ip4_over_ethernet[1].ethernet));
954
955 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
956 #define _8(f,v) h.f = v;
957     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
958     _16 (l3_type, ETHERNET_TYPE_IP4);
959     _8 (n_l2_address_bytes, 6);
960     _8 (n_l3_address_bytes, 4);
961     _16 (opcode, ETHERNET_ARP_OPCODE_request);
962 #undef _16
963 #undef _8
964
965     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
966                                /* data */ &h,
967                                sizeof (h),
968                                /* alloc chunk size */ 8,
969                                "ip4 arp");
970   }
971
972   return error;
973 }
974
975 VLIB_INIT_FUNCTION (ip4_lookup_init);
976
977 typedef struct
978 {
979   /* Adjacency taken. */
980   u32 dpo_index;
981   u32 flow_hash;
982   u32 fib_index;
983
984   /* Packet data, possibly *after* rewrite. */
985   u8 packet_data[64 - 1 * sizeof (u32)];
986 }
987 ip4_forward_next_trace_t;
988
989 #ifndef CLIB_MARCH_VARIANT
990 u8 *
991 format_ip4_forward_next_trace (u8 * s, va_list * args)
992 {
993   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
994   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
995   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
996   u32 indent = format_get_indent (s);
997   s = format (s, "%U%U",
998               format_white_space, indent,
999               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1000   return s;
1001 }
1002 #endif
1003
1004 static u8 *
1005 format_ip4_lookup_trace (u8 * s, va_list * args)
1006 {
1007   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1008   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1009   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1010   u32 indent = format_get_indent (s);
1011
1012   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1013               t->fib_index, t->dpo_index, t->flow_hash);
1014   s = format (s, "\n%U%U",
1015               format_white_space, indent,
1016               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1017   return s;
1018 }
1019
1020 static u8 *
1021 format_ip4_rewrite_trace (u8 * s, va_list * args)
1022 {
1023   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1024   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1025   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1026   u32 indent = format_get_indent (s);
1027
1028   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1029               t->fib_index, t->dpo_index, format_ip_adjacency,
1030               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1031   s = format (s, "\n%U%U",
1032               format_white_space, indent,
1033               format_ip_adjacency_packet_data,
1034               t->dpo_index, t->packet_data, sizeof (t->packet_data));
1035   return s;
1036 }
1037
1038 #ifndef CLIB_MARCH_VARIANT
1039 /* Common trace function for all ip4-forward next nodes. */
1040 void
1041 ip4_forward_next_trace (vlib_main_t * vm,
1042                         vlib_node_runtime_t * node,
1043                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1044 {
1045   u32 *from, n_left;
1046   ip4_main_t *im = &ip4_main;
1047
1048   n_left = frame->n_vectors;
1049   from = vlib_frame_vector_args (frame);
1050
1051   while (n_left >= 4)
1052     {
1053       u32 bi0, bi1;
1054       vlib_buffer_t *b0, *b1;
1055       ip4_forward_next_trace_t *t0, *t1;
1056
1057       /* Prefetch next iteration. */
1058       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1059       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1060
1061       bi0 = from[0];
1062       bi1 = from[1];
1063
1064       b0 = vlib_get_buffer (vm, bi0);
1065       b1 = vlib_get_buffer (vm, bi1);
1066
1067       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1068         {
1069           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1070           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1071           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1072           t0->fib_index =
1073             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1074              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1075             vec_elt (im->fib_index_by_sw_if_index,
1076                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1077
1078           clib_memcpy_fast (t0->packet_data,
1079                             vlib_buffer_get_current (b0),
1080                             sizeof (t0->packet_data));
1081         }
1082       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1083         {
1084           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1085           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1086           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1087           t1->fib_index =
1088             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1089              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1090             vec_elt (im->fib_index_by_sw_if_index,
1091                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1092           clib_memcpy_fast (t1->packet_data, vlib_buffer_get_current (b1),
1093                             sizeof (t1->packet_data));
1094         }
1095       from += 2;
1096       n_left -= 2;
1097     }
1098
1099   while (n_left >= 1)
1100     {
1101       u32 bi0;
1102       vlib_buffer_t *b0;
1103       ip4_forward_next_trace_t *t0;
1104
1105       bi0 = from[0];
1106
1107       b0 = vlib_get_buffer (vm, bi0);
1108
1109       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1110         {
1111           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1112           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1113           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1114           t0->fib_index =
1115             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1116              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1117             vec_elt (im->fib_index_by_sw_if_index,
1118                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1119           clib_memcpy_fast (t0->packet_data, vlib_buffer_get_current (b0),
1120                             sizeof (t0->packet_data));
1121         }
1122       from += 1;
1123       n_left -= 1;
1124     }
1125 }
1126
1127 /* Compute TCP/UDP/ICMP4 checksum in software. */
1128 u16
1129 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1130                               ip4_header_t * ip0)
1131 {
1132   ip_csum_t sum0;
1133   u32 ip_header_length, payload_length_host_byte_order;
1134   u32 n_this_buffer, n_bytes_left, n_ip_bytes_this_buffer;
1135   u16 sum16;
1136   void *data_this_buffer;
1137
1138   /* Initialize checksum with ip header. */
1139   ip_header_length = ip4_header_bytes (ip0);
1140   payload_length_host_byte_order =
1141     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1142   sum0 =
1143     clib_host_to_net_u32 (payload_length_host_byte_order +
1144                           (ip0->protocol << 16));
1145
1146   if (BITS (uword) == 32)
1147     {
1148       sum0 =
1149         ip_csum_with_carry (sum0,
1150                             clib_mem_unaligned (&ip0->src_address, u32));
1151       sum0 =
1152         ip_csum_with_carry (sum0,
1153                             clib_mem_unaligned (&ip0->dst_address, u32));
1154     }
1155   else
1156     sum0 =
1157       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1158
1159   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1160   data_this_buffer = (void *) ip0 + ip_header_length;
1161   n_ip_bytes_this_buffer =
1162     p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data);
1163   if (n_this_buffer + ip_header_length > n_ip_bytes_this_buffer)
1164     {
1165       n_this_buffer = n_ip_bytes_this_buffer > ip_header_length ?
1166         n_ip_bytes_this_buffer - ip_header_length : 0;
1167     }
1168   while (1)
1169     {
1170       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1171       n_bytes_left -= n_this_buffer;
1172       if (n_bytes_left == 0)
1173         break;
1174
1175       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1176       p0 = vlib_get_buffer (vm, p0->next_buffer);
1177       data_this_buffer = vlib_buffer_get_current (p0);
1178       n_this_buffer = p0->current_length;
1179     }
1180
1181   sum16 = ~ip_csum_fold (sum0);
1182
1183   return sum16;
1184 }
1185
1186 u32
1187 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1188 {
1189   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1190   udp_header_t *udp0;
1191   u16 sum16;
1192
1193   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1194           || ip0->protocol == IP_PROTOCOL_UDP);
1195
1196   udp0 = (void *) (ip0 + 1);
1197   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1198     {
1199       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1200                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1201       return p0->flags;
1202     }
1203
1204   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1205
1206   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1207                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1208
1209   return p0->flags;
1210 }
1211 #endif
1212
1213 /* *INDENT-OFF* */
1214 VNET_FEATURE_ARC_INIT (ip4_local) =
1215 {
1216   .arc_name  = "ip4-local",
1217   .start_nodes = VNET_FEATURES ("ip4-local"),
1218   .last_in_arc = "ip4-local-end-of-arc",
1219 };
1220 /* *INDENT-ON* */
1221
1222 static inline void
1223 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1224                             ip4_header_t * ip, u8 is_udp, u8 * error,
1225                             u8 * good_tcp_udp)
1226 {
1227   u32 flags0;
1228   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1229   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1230   if (is_udp)
1231     {
1232       udp_header_t *udp;
1233       u32 ip_len, udp_len;
1234       i32 len_diff;
1235       udp = ip4_next_header (ip);
1236       /* Verify UDP length. */
1237       ip_len = clib_net_to_host_u16 (ip->length);
1238       udp_len = clib_net_to_host_u16 (udp->length);
1239
1240       len_diff = ip_len - udp_len;
1241       *good_tcp_udp &= len_diff >= 0;
1242       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1243     }
1244 }
1245
1246 #define ip4_local_csum_is_offloaded(_b)                                 \
1247     _b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM                         \
1248         || _b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM
1249
1250 #define ip4_local_need_csum_check(is_tcp_udp, _b)                       \
1251     (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED     \
1252         || ip4_local_csum_is_offloaded (_b)))
1253
1254 #define ip4_local_csum_is_valid(_b)                                     \
1255     (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT                      \
1256         || (ip4_local_csum_is_offloaded (_b))) != 0
1257
1258 static inline void
1259 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1260                          ip4_header_t * ih, u8 * error)
1261 {
1262   u8 is_udp, is_tcp_udp, good_tcp_udp;
1263
1264   is_udp = ih->protocol == IP_PROTOCOL_UDP;
1265   is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1266
1267   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1268     ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1269   else
1270     good_tcp_udp = ip4_local_csum_is_valid (b);
1271
1272   ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1273   *error = (is_tcp_udp && !good_tcp_udp
1274             ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1275 }
1276
1277 static inline void
1278 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1279                             ip4_header_t ** ih, u8 * error)
1280 {
1281   u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1282
1283   is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1284   is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1285
1286   is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1287   is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1288
1289   good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1290   good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1291
1292   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1293                      || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1294     {
1295       if (is_tcp_udp[0])
1296         ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1297                                     &good_tcp_udp[0]);
1298       if (is_tcp_udp[1])
1299         ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1300                                     &good_tcp_udp[1]);
1301     }
1302
1303   error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1304               IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1305   error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1306               IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1307 }
1308
1309 static inline void
1310 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1311                               vlib_buffer_t * b, u16 * next, u8 error,
1312                               u8 head_of_feature_arc)
1313 {
1314   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1315   u32 next_index;
1316
1317   *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1318   b->error = error ? error_node->errors[error] : 0;
1319   if (head_of_feature_arc)
1320     {
1321       next_index = *next;
1322       if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1323         {
1324           vnet_feature_arc_start (arc_index,
1325                                   vnet_buffer (b)->sw_if_index[VLIB_RX],
1326                                   &next_index, b);
1327           *next = next_index;
1328         }
1329     }
1330 }
1331
1332 typedef struct
1333 {
1334   ip4_address_t src;
1335   u32 lbi;
1336   u8 error;
1337   u8 first;
1338 } ip4_local_last_check_t;
1339
1340 static inline void
1341 ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
1342                      ip4_local_last_check_t * last_check, u8 * error0)
1343 {
1344   ip4_fib_mtrie_leaf_t leaf0;
1345   ip4_fib_mtrie_t *mtrie0;
1346   const dpo_id_t *dpo0;
1347   load_balance_t *lb0;
1348   u32 lbi0;
1349
1350   vnet_buffer (b)->ip.fib_index =
1351     vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1352     vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1353
1354   if (PREDICT_FALSE (last_check->first ||
1355                      (last_check->src.as_u32 != ip0->src_address.as_u32)))
1356     {
1357       mtrie0 = &ip4_fib_get (vnet_buffer (b)->ip.fib_index)->mtrie;
1358       leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1359       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1360       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1361       lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1362
1363       vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1364       vnet_buffer (b)->ip.adj_index[VLIB_RX] = lbi0;
1365
1366       lb0 = load_balance_get (lbi0);
1367       dpo0 = load_balance_get_bucket_i (lb0, 0);
1368
1369       /*
1370        * Must have a route to source otherwise we drop the packet.
1371        * ip4 broadcasts are accepted, e.g. to make dhcp client work
1372        *
1373        * The checks are:
1374        *  - the source is a recieve => it's from us => bogus, do this
1375        *    first since it sets a different error code.
1376        *  - uRPF check for any route to source - accept if passes.
1377        *  - allow packets destined to the broadcast address from unknown sources
1378        */
1379
1380       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1381                   && dpo0->dpoi_type == DPO_RECEIVE) ?
1382                  IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1383       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1384                   && !fib_urpf_check_size (lb0->lb_urpf)
1385                   && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1386                  IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1387
1388       last_check->src.as_u32 = ip0->src_address.as_u32;
1389       last_check->lbi = lbi0;
1390       last_check->error = *error0;
1391     }
1392   else
1393     {
1394       vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1395       vnet_buffer (b)->ip.adj_index[VLIB_RX] = last_check->lbi;
1396       *error0 = last_check->error;
1397       last_check->first = 0;
1398     }
1399 }
1400
1401 static inline void
1402 ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
1403                         ip4_local_last_check_t * last_check, u8 * error)
1404 {
1405   ip4_fib_mtrie_leaf_t leaf[2];
1406   ip4_fib_mtrie_t *mtrie[2];
1407   const dpo_id_t *dpo[2];
1408   load_balance_t *lb[2];
1409   u32 not_last_hit;
1410   u32 lbi[2];
1411
1412   not_last_hit = last_check->first;
1413   not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1414   not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1415
1416   vnet_buffer (b[0])->ip.fib_index =
1417     vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1418     vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1419     vnet_buffer (b[0])->ip.fib_index;
1420
1421   vnet_buffer (b[1])->ip.fib_index =
1422     vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1423     vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1424     vnet_buffer (b[1])->ip.fib_index;
1425
1426   if (PREDICT_FALSE (not_last_hit))
1427     {
1428       mtrie[0] = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie;
1429       mtrie[1] = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie;
1430
1431       leaf[0] = ip4_fib_mtrie_lookup_step_one (mtrie[0], &ip[0]->src_address);
1432       leaf[1] = ip4_fib_mtrie_lookup_step_one (mtrie[1], &ip[1]->src_address);
1433
1434       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1435                                            &ip[0]->src_address, 2);
1436       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1437                                            &ip[1]->src_address, 2);
1438
1439       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1440                                            &ip[0]->src_address, 3);
1441       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1442                                            &ip[1]->src_address, 3);
1443
1444       lbi[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf[0]);
1445       lbi[1] = ip4_fib_mtrie_leaf_get_adj_index (leaf[1]);
1446
1447       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1448       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] = lbi[0];
1449
1450       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1451       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] = lbi[1];
1452
1453       lb[0] = load_balance_get (lbi[0]);
1454       lb[1] = load_balance_get (lbi[1]);
1455
1456       dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1457       dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1458
1459       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1460                    dpo[0]->dpoi_type == DPO_RECEIVE) ?
1461                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1462       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1463                    !fib_urpf_check_size (lb[0]->lb_urpf) &&
1464                    ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1465                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1466
1467       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1468                    dpo[1]->dpoi_type == DPO_RECEIVE) ?
1469                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1470       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1471                    !fib_urpf_check_size (lb[1]->lb_urpf) &&
1472                    ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1473                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1474
1475       last_check->src.as_u32 = ip[1]->src_address.as_u32;
1476       last_check->lbi = lbi[1];
1477       last_check->error = error[1];
1478     }
1479   else
1480     {
1481       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1482       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] = last_check->lbi;
1483
1484       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1485       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] = last_check->lbi;
1486
1487       error[0] = last_check->error;
1488       error[1] = last_check->error;
1489       last_check->first = 0;
1490     }
1491 }
1492
1493 enum ip_local_packet_type_e
1494 {
1495   IP_LOCAL_PACKET_TYPE_L4,
1496   IP_LOCAL_PACKET_TYPE_NAT,
1497   IP_LOCAL_PACKET_TYPE_FRAG,
1498 };
1499
1500 /**
1501  * Determine packet type and next node.
1502  *
1503  * The expectation is that all packets that are not L4 will skip
1504  * checksums and source checks.
1505  */
1506 always_inline u8
1507 ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
1508 {
1509   ip_lookup_main_t *lm = &ip4_main.lookup_main;
1510
1511   if (PREDICT_FALSE (ip4_is_fragment (ip)))
1512     {
1513       *next = IP_LOCAL_NEXT_REASSEMBLY;
1514       return IP_LOCAL_PACKET_TYPE_FRAG;
1515     }
1516   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_IS_NATED))
1517     {
1518       *next = lm->local_next_by_ip_protocol[ip->protocol];
1519       return IP_LOCAL_PACKET_TYPE_NAT;
1520     }
1521
1522   *next = lm->local_next_by_ip_protocol[ip->protocol];
1523   return IP_LOCAL_PACKET_TYPE_L4;
1524 }
1525
1526 static inline uword
1527 ip4_local_inline (vlib_main_t * vm,
1528                   vlib_node_runtime_t * node,
1529                   vlib_frame_t * frame, int head_of_feature_arc)
1530 {
1531   u32 *from, n_left_from;
1532   vlib_node_runtime_t *error_node =
1533     vlib_node_get_runtime (vm, ip4_input_node.index);
1534   u16 nexts[VLIB_FRAME_SIZE], *next;
1535   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1536   ip4_header_t *ip[2];
1537   u8 error[2], pt[2];
1538
1539   ip4_local_last_check_t last_check = {
1540     /*
1541      * 0.0.0.0 can appear as the source address of an IP packet,
1542      * as can any other address, hence the need to use the 'first'
1543      * member to make sure the .lbi is initialised for the first
1544      * packet.
1545      */
1546     .src = {.as_u32 = 0},
1547     .lbi = ~0,
1548     .error = IP4_ERROR_UNKNOWN_PROTOCOL,
1549     .first = 1,
1550   };
1551
1552   from = vlib_frame_vector_args (frame);
1553   n_left_from = frame->n_vectors;
1554
1555   if (node->flags & VLIB_NODE_FLAG_TRACE)
1556     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1557
1558   vlib_get_buffers (vm, from, bufs, n_left_from);
1559   b = bufs;
1560   next = nexts;
1561
1562   while (n_left_from >= 6)
1563     {
1564       u8 not_batch = 0;
1565
1566       /* Prefetch next iteration. */
1567       {
1568         vlib_prefetch_buffer_header (b[4], LOAD);
1569         vlib_prefetch_buffer_header (b[5], LOAD);
1570
1571         CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1572         CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1573       }
1574
1575       error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1576
1577       ip[0] = vlib_buffer_get_current (b[0]);
1578       ip[1] = vlib_buffer_get_current (b[1]);
1579
1580       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1581       vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1582
1583       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1584       pt[1] = ip4_local_classify (b[1], ip[1], &next[1]);
1585
1586       not_batch = pt[0] ^ pt[1];
1587
1588       if (head_of_feature_arc == 0 || (pt[0] && not_batch == 0))
1589         goto skip_checks;
1590
1591       if (PREDICT_TRUE (not_batch == 0))
1592         {
1593           ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1594           ip4_local_check_src_x2 (b, ip, &last_check, error);
1595         }
1596       else
1597         {
1598           if (!pt[0])
1599             {
1600               ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1601               ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1602             }
1603           if (!pt[1])
1604             {
1605               ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1606               ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
1607             }
1608         }
1609
1610     skip_checks:
1611
1612       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1613                                     head_of_feature_arc);
1614       ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1615                                     head_of_feature_arc);
1616
1617       b += 2;
1618       next += 2;
1619       n_left_from -= 2;
1620     }
1621
1622   while (n_left_from > 0)
1623     {
1624       error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1625
1626       ip[0] = vlib_buffer_get_current (b[0]);
1627       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1628       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1629
1630       if (head_of_feature_arc == 0 || pt[0])
1631         goto skip_check;
1632
1633       ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1634       ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1635
1636     skip_check:
1637
1638       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1639                                     head_of_feature_arc);
1640
1641       b += 1;
1642       next += 1;
1643       n_left_from -= 1;
1644     }
1645
1646   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1647   return frame->n_vectors;
1648 }
1649
1650 VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1651                                vlib_frame_t * frame)
1652 {
1653   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1654 }
1655
1656 /* *INDENT-OFF* */
1657 VLIB_REGISTER_NODE (ip4_local_node) =
1658 {
1659   .name = "ip4-local",
1660   .vector_size = sizeof (u32),
1661   .format_trace = format_ip4_forward_next_trace,
1662   .n_next_nodes = IP_LOCAL_N_NEXT,
1663   .next_nodes =
1664   {
1665     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1666     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1667     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1668     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1669     [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-reassembly",
1670   },
1671 };
1672 /* *INDENT-ON* */
1673
1674
1675 VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
1676                                           vlib_node_runtime_t * node,
1677                                           vlib_frame_t * frame)
1678 {
1679   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1680 }
1681
1682 /* *INDENT-OFF* */
1683 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
1684   .name = "ip4-local-end-of-arc",
1685   .vector_size = sizeof (u32),
1686
1687   .format_trace = format_ip4_forward_next_trace,
1688   .sibling_of = "ip4-local",
1689 };
1690
1691 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1692   .arc_name = "ip4-local",
1693   .node_name = "ip4-local-end-of-arc",
1694   .runs_before = 0, /* not before any other features */
1695 };
1696 /* *INDENT-ON* */
1697
1698 #ifndef CLIB_MARCH_VARIANT
1699 void
1700 ip4_register_protocol (u32 protocol, u32 node_index)
1701 {
1702   vlib_main_t *vm = vlib_get_main ();
1703   ip4_main_t *im = &ip4_main;
1704   ip_lookup_main_t *lm = &im->lookup_main;
1705
1706   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1707   lm->local_next_by_ip_protocol[protocol] =
1708     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1709 }
1710 #endif
1711
1712 static clib_error_t *
1713 show_ip_local_command_fn (vlib_main_t * vm,
1714                           unformat_input_t * input, vlib_cli_command_t * cmd)
1715 {
1716   ip4_main_t *im = &ip4_main;
1717   ip_lookup_main_t *lm = &im->lookup_main;
1718   int i;
1719
1720   vlib_cli_output (vm, "Protocols handled by ip4_local");
1721   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1722     {
1723       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1724         {
1725           u32 node_index = vlib_get_node (vm,
1726                                           ip4_local_node.index)->
1727             next_nodes[lm->local_next_by_ip_protocol[i]];
1728           vlib_cli_output (vm, "%d: %U", i, format_vlib_node_name, vm,
1729                            node_index);
1730         }
1731     }
1732   return 0;
1733 }
1734
1735
1736
1737 /*?
1738  * Display the set of protocols handled by the local IPv4 stack.
1739  *
1740  * @cliexpar
1741  * Example of how to display local protocol table:
1742  * @cliexstart{show ip local}
1743  * Protocols handled by ip4_local
1744  * 1
1745  * 17
1746  * 47
1747  * @cliexend
1748 ?*/
1749 /* *INDENT-OFF* */
1750 VLIB_CLI_COMMAND (show_ip_local, static) =
1751 {
1752   .path = "show ip local",
1753   .function = show_ip_local_command_fn,
1754   .short_help = "show ip local",
1755 };
1756 /* *INDENT-ON* */
1757
1758 always_inline uword
1759 ip4_arp_inline (vlib_main_t * vm,
1760                 vlib_node_runtime_t * node,
1761                 vlib_frame_t * frame, int is_glean)
1762 {
1763   vnet_main_t *vnm = vnet_get_main ();
1764   ip4_main_t *im = &ip4_main;
1765   ip_lookup_main_t *lm = &im->lookup_main;
1766   u32 *from, *to_next_drop;
1767   uword n_left_from, n_left_to_next_drop, next_index;
1768   u32 thread_index = vm->thread_index;
1769   u64 seed;
1770
1771   if (node->flags & VLIB_NODE_FLAG_TRACE)
1772     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1773
1774   seed = throttle_seed (&im->arp_throttle, thread_index, vlib_time_now (vm));
1775
1776   from = vlib_frame_vector_args (frame);
1777   n_left_from = frame->n_vectors;
1778   next_index = node->cached_next_index;
1779   if (next_index == IP4_ARP_NEXT_DROP)
1780     next_index = IP4_ARP_N_NEXT;        /* point to first interface */
1781
1782   while (n_left_from > 0)
1783     {
1784       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1785                            to_next_drop, n_left_to_next_drop);
1786
1787       while (n_left_from > 0 && n_left_to_next_drop > 0)
1788         {
1789           u32 pi0, bi0, adj_index0, sw_if_index0;
1790           ip_adjacency_t *adj0;
1791           vlib_buffer_t *p0, *b0;
1792           ip4_address_t resolve0;
1793           ethernet_arp_header_t *h0;
1794           vnet_hw_interface_t *hw_if0;
1795           u64 r0;
1796
1797           pi0 = from[0];
1798           p0 = vlib_get_buffer (vm, pi0);
1799
1800           from += 1;
1801           n_left_from -= 1;
1802           to_next_drop[0] = pi0;
1803           to_next_drop += 1;
1804           n_left_to_next_drop -= 1;
1805
1806           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1807           adj0 = adj_get (adj_index0);
1808
1809           if (is_glean)
1810             {
1811               /* resolve the packet's destination */
1812               ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1813               resolve0 = ip0->dst_address;
1814             }
1815           else
1816             {
1817               /* resolve the incomplete adj */
1818               resolve0 = adj0->sub_type.nbr.next_hop.ip4;
1819             }
1820
1821           /* combine the address and interface for the hash key */
1822           sw_if_index0 = adj0->rewrite_header.sw_if_index;
1823           r0 = (u64) resolve0.data_u32 << 32;
1824           r0 |= sw_if_index0;
1825
1826           if (throttle_check (&im->arp_throttle, thread_index, r0, seed))
1827             {
1828               p0->error = node->errors[IP4_ARP_ERROR_THROTTLED];
1829               continue;
1830             }
1831
1832           /*
1833            * the adj has been updated to a rewrite but the node the DPO that got
1834            * us here hasn't - yet. no big deal. we'll drop while we wait.
1835            */
1836           if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
1837             {
1838               p0->error = node->errors[IP4_ARP_ERROR_RESOLVED];
1839               continue;
1840             }
1841
1842           /*
1843            * Can happen if the control-plane is programming tables
1844            * with traffic flowing; at least that's today's lame excuse.
1845            */
1846           if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
1847               || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
1848             {
1849               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
1850               continue;
1851             }
1852           /* Send ARP request. */
1853           h0 =
1854             vlib_packet_template_get_packet (vm,
1855                                              &im->ip4_arp_request_packet_template,
1856                                              &bi0);
1857           b0 = vlib_get_buffer (vm, bi0);
1858
1859           /* copy the persistent fields from the original */
1860           clib_memcpy_fast (b0->opaque2, p0->opaque2, sizeof (p0->opaque2));
1861
1862           /* Seems we're out of buffers */
1863           if (PREDICT_FALSE (!h0))
1864             {
1865               p0->error = node->errors[IP4_ARP_ERROR_NO_BUFFERS];
1866               continue;
1867             }
1868
1869           /* Add rewrite/encap string for ARP packet. */
1870           vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
1871
1872           hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1873
1874           /* Src ethernet address in ARP header. */
1875           clib_memcpy_fast (h0->ip4_over_ethernet[0].ethernet,
1876                             hw_if0->hw_address,
1877                             sizeof (h0->ip4_over_ethernet[0].ethernet));
1878           if (is_glean)
1879             {
1880               /* The interface's source address is stashed in the Glean Adj */
1881               h0->ip4_over_ethernet[0].ip4 =
1882                 adj0->sub_type.glean.receive_addr.ip4;
1883             }
1884           else
1885             {
1886               /* Src IP address in ARP header. */
1887               if (ip4_src_address_for_packet (lm, sw_if_index0,
1888                                               &h0->ip4_over_ethernet[0].ip4))
1889                 {
1890                   /* No source address available */
1891                   p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
1892                   vlib_buffer_free (vm, &bi0, 1);
1893                   continue;
1894                 }
1895             }
1896           h0->ip4_over_ethernet[1].ip4 = resolve0;
1897
1898           p0->error = node->errors[IP4_ARP_ERROR_REQUEST_SENT];
1899
1900           vlib_buffer_copy_trace_flag (vm, p0, bi0);
1901           VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
1902           vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
1903
1904           vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
1905
1906           vlib_set_next_frame_buffer (vm, node,
1907                                       adj0->rewrite_header.next_index, bi0);
1908         }
1909
1910       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
1911     }
1912
1913   return frame->n_vectors;
1914 }
1915
1916 VLIB_NODE_FN (ip4_arp_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1917                              vlib_frame_t * frame)
1918 {
1919   return (ip4_arp_inline (vm, node, frame, 0));
1920 }
1921
1922 VLIB_NODE_FN (ip4_glean_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1923                                vlib_frame_t * frame)
1924 {
1925   return (ip4_arp_inline (vm, node, frame, 1));
1926 }
1927
1928 static char *ip4_arp_error_strings[] = {
1929   [IP4_ARP_ERROR_THROTTLED] = "ARP requests throttled",
1930   [IP4_ARP_ERROR_RESOLVED] = "ARP requests resolved",
1931   [IP4_ARP_ERROR_NO_BUFFERS] = "ARP requests out of buffer",
1932   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
1933   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
1934   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
1935 };
1936
1937 /* *INDENT-OFF* */
1938 VLIB_REGISTER_NODE (ip4_arp_node) =
1939 {
1940   .name = "ip4-arp",
1941   .vector_size = sizeof (u32),
1942   .format_trace = format_ip4_forward_next_trace,
1943   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1944   .error_strings = ip4_arp_error_strings,
1945   .n_next_nodes = IP4_ARP_N_NEXT,
1946   .next_nodes =
1947   {
1948     [IP4_ARP_NEXT_DROP] = "error-drop",
1949   },
1950 };
1951
1952 VLIB_REGISTER_NODE (ip4_glean_node) =
1953 {
1954   .name = "ip4-glean",
1955   .vector_size = sizeof (u32),
1956   .format_trace = format_ip4_forward_next_trace,
1957   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1958   .error_strings = ip4_arp_error_strings,
1959   .n_next_nodes = IP4_ARP_N_NEXT,
1960   .next_nodes = {
1961   [IP4_ARP_NEXT_DROP] = "error-drop",
1962   },
1963 };
1964 /* *INDENT-ON* */
1965
1966 #define foreach_notrace_ip4_arp_error           \
1967 _(THROTTLED)                                    \
1968 _(RESOLVED)                                     \
1969 _(NO_BUFFERS)                                   \
1970 _(REQUEST_SENT)                                 \
1971 _(NON_ARP_ADJ)                                  \
1972 _(NO_SOURCE_ADDRESS)
1973
1974 static clib_error_t *
1975 arp_notrace_init (vlib_main_t * vm)
1976 {
1977   vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
1978
1979   /* don't trace ARP request packets */
1980 #define _(a)                                    \
1981     vnet_pcap_drop_trace_filter_add_del         \
1982         (rt->errors[IP4_ARP_ERROR_##a],         \
1983          1 /* is_add */);
1984   foreach_notrace_ip4_arp_error;
1985 #undef _
1986   return 0;
1987 }
1988
1989 VLIB_INIT_FUNCTION (arp_notrace_init);
1990
1991
1992 #ifndef CLIB_MARCH_VARIANT
1993 /* Send an ARP request to see if given destination is reachable on given interface. */
1994 clib_error_t *
1995 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index,
1996                     u8 refresh)
1997 {
1998   vnet_main_t *vnm = vnet_get_main ();
1999   ip4_main_t *im = &ip4_main;
2000   ethernet_arp_header_t *h;
2001   ip4_address_t *src;
2002   ip_interface_address_t *ia;
2003   ip_adjacency_t *adj;
2004   vnet_hw_interface_t *hi;
2005   vnet_sw_interface_t *si;
2006   vlib_buffer_t *b;
2007   adj_index_t ai;
2008   u32 bi = 0;
2009   u8 unicast_rewrite = 0;
2010
2011   si = vnet_get_sw_interface (vnm, sw_if_index);
2012
2013   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2014     {
2015       return clib_error_return (0, "%U: interface %U down",
2016                                 format_ip4_address, dst,
2017                                 format_vnet_sw_if_index_name, vnm,
2018                                 sw_if_index);
2019     }
2020
2021   src =
2022     ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2023   if (!src)
2024     {
2025       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2026       return clib_error_return
2027         (0,
2028          "no matching interface address for destination %U (interface %U)",
2029          format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
2030          sw_if_index);
2031     }
2032
2033   h = vlib_packet_template_get_packet (vm,
2034                                        &im->ip4_arp_request_packet_template,
2035                                        &bi);
2036
2037   if (!h)
2038     return clib_error_return (0, "ARP request packet allocation failed");
2039
2040   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2041   if (PREDICT_FALSE (!hi->hw_address))
2042     {
2043       return clib_error_return (0, "%U: interface %U do not support ip probe",
2044                                 format_ip4_address, dst,
2045                                 format_vnet_sw_if_index_name, vnm,
2046                                 sw_if_index);
2047     }
2048
2049   clib_memcpy_fast (h->ip4_over_ethernet[0].ethernet, hi->hw_address,
2050                     sizeof (h->ip4_over_ethernet[0].ethernet));
2051
2052   h->ip4_over_ethernet[0].ip4 = src[0];
2053   h->ip4_over_ethernet[1].ip4 = dst[0];
2054
2055   b = vlib_get_buffer (vm, bi);
2056   vnet_buffer (b)->sw_if_index[VLIB_RX] =
2057     vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2058
2059   ip46_address_t nh = {
2060     .ip4 = *dst,
2061   };
2062
2063   ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4,
2064                             VNET_LINK_IP4, &nh, sw_if_index);
2065   adj = adj_get (ai);
2066
2067   /* Peer has been previously resolved, retrieve glean adj instead */
2068   if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE)
2069     {
2070       if (refresh)
2071         unicast_rewrite = 1;
2072       else
2073         {
2074           adj_unlock (ai);
2075           ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4,
2076                                       VNET_LINK_IP4, sw_if_index, &nh);
2077           adj = adj_get (ai);
2078         }
2079     }
2080
2081   /* Add encapsulation string for software interface (e.g. ethernet header). */
2082   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2083   if (unicast_rewrite)
2084     {
2085       u16 *etype = vlib_buffer_get_current (b) - 2;
2086       etype[0] = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
2087     }
2088   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2089
2090   {
2091     vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2092     u32 *to_next = vlib_frame_vector_args (f);
2093     to_next[0] = bi;
2094     f->n_vectors = 1;
2095     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2096   }
2097
2098   adj_unlock (ai);
2099   return /* no error */ 0;
2100 }
2101 #endif
2102
2103 typedef enum
2104 {
2105   IP4_REWRITE_NEXT_DROP,
2106   IP4_REWRITE_NEXT_ICMP_ERROR,
2107   IP4_REWRITE_NEXT_FRAGMENT,
2108   IP4_REWRITE_N_NEXT            /* Last */
2109 } ip4_rewrite_next_t;
2110
2111 /**
2112  * This bits of an IPv4 address to mask to construct a multicast
2113  * MAC address
2114  */
2115 #if CLIB_ARCH_IS_BIG_ENDIAN
2116 #define IP4_MCAST_ADDR_MASK 0x007fffff
2117 #else
2118 #define IP4_MCAST_ADDR_MASK 0xffff7f00
2119 #endif
2120
2121 always_inline void
2122 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
2123                u16 adj_packet_bytes, bool df, u16 * next, u32 * error)
2124 {
2125   if (packet_len > adj_packet_bytes)
2126     {
2127       *error = IP4_ERROR_MTU_EXCEEDED;
2128       if (df)
2129         {
2130           icmp4_error_set_vnet_buffer
2131             (b, ICMP4_destination_unreachable,
2132              ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
2133              adj_packet_bytes);
2134           *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2135         }
2136       else
2137         {
2138           /* IP fragmentation */
2139           ip_frag_set_vnet_buffer (b, adj_packet_bytes,
2140                                    IP4_FRAG_NEXT_IP4_REWRITE, 0);
2141           *next = IP4_REWRITE_NEXT_FRAGMENT;
2142         }
2143     }
2144 }
2145
2146 /* Decrement TTL & update checksum.
2147    Works either endian, so no need for byte swap. */
2148 static_always_inline void
2149 ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
2150                             u32 * error)
2151 {
2152   i32 ttl;
2153   u32 checksum;
2154   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2155     {
2156       b->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2157       return;
2158     }
2159
2160   ttl = ip->ttl;
2161
2162   /* Input node should have reject packets with ttl 0. */
2163   ASSERT (ip->ttl > 0);
2164
2165   checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
2166   checksum += checksum >= 0xffff;
2167
2168   ip->checksum = checksum;
2169   ttl -= 1;
2170   ip->ttl = ttl;
2171
2172   /*
2173    * If the ttl drops below 1 when forwarding, generate
2174    * an ICMP response.
2175    */
2176   if (PREDICT_FALSE (ttl <= 0))
2177     {
2178       *error = IP4_ERROR_TIME_EXPIRED;
2179       vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2180       icmp4_error_set_vnet_buffer (b, ICMP4_time_exceeded,
2181                                    ICMP4_time_exceeded_ttl_exceeded_in_transit,
2182                                    0);
2183       *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2184     }
2185
2186   /* Verify checksum. */
2187   ASSERT ((ip->checksum == ip4_header_checksum (ip)) ||
2188           (b->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2189 }
2190
2191
2192 always_inline uword
2193 ip4_rewrite_inline (vlib_main_t * vm,
2194                     vlib_node_runtime_t * node,
2195                     vlib_frame_t * frame,
2196                     int do_counters, int is_midchain, int is_mcast)
2197 {
2198   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2199   u32 *from = vlib_frame_vector_args (frame);
2200   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2201   u16 nexts[VLIB_FRAME_SIZE], *next;
2202   u32 n_left_from;
2203   vlib_node_runtime_t *error_node =
2204     vlib_node_get_runtime (vm, ip4_input_node.index);
2205
2206   n_left_from = frame->n_vectors;
2207   u32 thread_index = vm->thread_index;
2208
2209   vlib_get_buffers (vm, from, bufs, n_left_from);
2210   clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
2211
2212   if (n_left_from >= 6)
2213     {
2214       int i;
2215       for (i = 2; i < 6; i++)
2216         vlib_prefetch_buffer_header (bufs[i], LOAD);
2217     }
2218
2219   next = nexts;
2220   b = bufs;
2221   while (n_left_from >= 8)
2222     {
2223       ip_adjacency_t *adj0, *adj1;
2224       ip4_header_t *ip0, *ip1;
2225       u32 rw_len0, error0, adj_index0;
2226       u32 rw_len1, error1, adj_index1;
2227       u32 tx_sw_if_index0, tx_sw_if_index1;
2228       u8 *p;
2229
2230       vlib_prefetch_buffer_header (b[6], LOAD);
2231       vlib_prefetch_buffer_header (b[7], LOAD);
2232
2233       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2234       adj_index1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
2235
2236       /*
2237        * pre-fetch the per-adjacency counters
2238        */
2239       if (do_counters)
2240         {
2241           vlib_prefetch_combined_counter (&adjacency_counters,
2242                                           thread_index, adj_index0);
2243           vlib_prefetch_combined_counter (&adjacency_counters,
2244                                           thread_index, adj_index1);
2245         }
2246
2247       ip0 = vlib_buffer_get_current (b[0]);
2248       ip1 = vlib_buffer_get_current (b[1]);
2249
2250       error0 = error1 = IP4_ERROR_NONE;
2251
2252       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2253       ip4_ttl_and_checksum_check (b[1], ip1, next + 1, &error1);
2254
2255       /* Rewrite packet header and updates lengths. */
2256       adj0 = adj_get (adj_index0);
2257       adj1 = adj_get (adj_index1);
2258
2259       /* Worth pipelining. No guarantee that adj0,1 are hot... */
2260       rw_len0 = adj0[0].rewrite_header.data_bytes;
2261       rw_len1 = adj1[0].rewrite_header.data_bytes;
2262       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2263       vnet_buffer (b[1])->ip.save_rewrite_length = rw_len1;
2264
2265       p = vlib_buffer_get_current (b[2]);
2266       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2267       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2268
2269       p = vlib_buffer_get_current (b[3]);
2270       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2271       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2272
2273       /* Check MTU of outgoing interface. */
2274       ip4_mtu_check (b[0], clib_net_to_host_u16 (ip0->length),
2275                      adj0[0].rewrite_header.max_l3_packet_bytes,
2276                      ip0->flags_and_fragment_offset &
2277                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2278                      next + 0, &error0);
2279       ip4_mtu_check (b[1], clib_net_to_host_u16 (ip1->length),
2280                      adj1[0].rewrite_header.max_l3_packet_bytes,
2281                      ip1->flags_and_fragment_offset &
2282                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2283                      next + 1, &error1);
2284
2285       if (is_mcast)
2286         {
2287           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2288                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2289                     IP4_ERROR_SAME_INTERFACE : error0);
2290           error1 = ((adj1[0].rewrite_header.sw_if_index ==
2291                      vnet_buffer (b[1])->sw_if_index[VLIB_RX]) ?
2292                     IP4_ERROR_SAME_INTERFACE : error1);
2293         }
2294
2295       b[0]->error = error_node->errors[error0];
2296       b[1]->error = error_node->errors[error1];
2297       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2298        * to see the IP headerr */
2299       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2300         {
2301           u32 next_index = adj0[0].rewrite_header.next_index;
2302           b[0]->current_data -= rw_len0;
2303           b[0]->current_length += rw_len0;
2304           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2305           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2306
2307           if (PREDICT_FALSE
2308               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2309             vnet_feature_arc_start (lm->output_feature_arc_index,
2310                                     tx_sw_if_index0, &next_index, b[0]);
2311           next[0] = next_index;
2312         }
2313       if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2314         {
2315           u32 next_index = adj1[0].rewrite_header.next_index;
2316           b[1]->current_data -= rw_len1;
2317           b[1]->current_length += rw_len1;
2318
2319           tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2320           vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2321
2322           if (PREDICT_FALSE
2323               (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2324             vnet_feature_arc_start (lm->output_feature_arc_index,
2325                                     tx_sw_if_index1, &next_index, b[1]);
2326           next[1] = next_index;
2327         }
2328
2329       /* Guess we are only writing on simple Ethernet header. */
2330       vnet_rewrite_two_headers (adj0[0], adj1[0],
2331                                 ip0, ip1, sizeof (ethernet_header_t));
2332
2333       /*
2334        * Bump the per-adjacency counters
2335        */
2336       if (do_counters)
2337         {
2338           vlib_increment_combined_counter
2339             (&adjacency_counters,
2340              thread_index,
2341              adj_index0, 1, vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2342
2343           vlib_increment_combined_counter
2344             (&adjacency_counters,
2345              thread_index,
2346              adj_index1, 1, vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
2347         }
2348
2349       if (is_midchain)
2350         {
2351           adj0->sub_type.midchain.fixup_func
2352             (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2353           adj1->sub_type.midchain.fixup_func
2354             (vm, adj1, b[1], adj1->sub_type.midchain.fixup_data);
2355         }
2356
2357       if (is_mcast)
2358         {
2359           /*
2360            * copy bytes from the IP address into the MAC rewrite
2361            */
2362           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2363                                       adj0->rewrite_header.dst_mcast_offset,
2364                                       &ip0->dst_address.as_u32, (u8 *) ip0);
2365           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2366                                       adj1->rewrite_header.dst_mcast_offset,
2367                                       &ip1->dst_address.as_u32, (u8 *) ip1);
2368         }
2369
2370       next += 2;
2371       b += 2;
2372       n_left_from -= 2;
2373     }
2374
2375   while (n_left_from > 0)
2376     {
2377       ip_adjacency_t *adj0;
2378       ip4_header_t *ip0;
2379       u32 rw_len0, adj_index0, error0;
2380       u32 tx_sw_if_index0;
2381
2382       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2383
2384       adj0 = adj_get (adj_index0);
2385
2386       if (do_counters)
2387         vlib_prefetch_combined_counter (&adjacency_counters,
2388                                         thread_index, adj_index0);
2389
2390       ip0 = vlib_buffer_get_current (b[0]);
2391
2392       error0 = IP4_ERROR_NONE;
2393
2394       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2395
2396
2397       /* Update packet buffer attributes/set output interface. */
2398       rw_len0 = adj0[0].rewrite_header.data_bytes;
2399       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2400
2401       /* Check MTU of outgoing interface. */
2402       ip4_mtu_check (b[0], clib_net_to_host_u16 (ip0->length),
2403                      adj0[0].rewrite_header.max_l3_packet_bytes,
2404                      ip0->flags_and_fragment_offset &
2405                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2406                      next + 0, &error0);
2407
2408       if (is_mcast)
2409         {
2410           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2411                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2412                     IP4_ERROR_SAME_INTERFACE : error0);
2413         }
2414       b[0]->error = error_node->errors[error0];
2415
2416       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2417        * to see the IP headerr */
2418       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2419         {
2420           u32 next_index = adj0[0].rewrite_header.next_index;
2421           b[0]->current_data -= rw_len0;
2422           b[0]->current_length += rw_len0;
2423           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2424           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2425
2426           if (PREDICT_FALSE
2427               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2428             vnet_feature_arc_start (lm->output_feature_arc_index,
2429                                     tx_sw_if_index0, &next_index, b[0]);
2430           next[0] = next_index;
2431         }
2432
2433       /* Guess we are only writing on simple Ethernet header. */
2434       vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2435
2436       if (do_counters)
2437         vlib_increment_combined_counter
2438           (&adjacency_counters,
2439            thread_index, adj_index0, 1,
2440            vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2441
2442       if (is_midchain)
2443         {
2444           adj0->sub_type.midchain.fixup_func
2445             (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2446         }
2447
2448       if (is_mcast)
2449         {
2450           /*
2451            * copy bytes from the IP address into the MAC rewrite
2452            */
2453           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2454                                       adj0->rewrite_header.dst_mcast_offset,
2455                                       &ip0->dst_address.as_u32, (u8 *) ip0);
2456         }
2457
2458       next += 1;
2459       b += 1;
2460       n_left_from -= 1;
2461     }
2462
2463
2464   /* Need to do trace after rewrites to pick up new packet data. */
2465   if (node->flags & VLIB_NODE_FLAG_TRACE)
2466     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2467
2468   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2469   return frame->n_vectors;
2470 }
2471
2472
2473 /** @brief IPv4 rewrite node.
2474     @node ip4-rewrite
2475
2476     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2477     header checksum, fetch the ip adjacency, check the outbound mtu,
2478     apply the adjacency rewrite, and send pkts to the adjacency
2479     rewrite header's rewrite_next_index.
2480
2481     @param vm vlib_main_t corresponding to the current thread
2482     @param node vlib_node_runtime_t
2483     @param frame vlib_frame_t whose contents should be dispatched
2484
2485     @par Graph mechanics: buffer metadata, next index usage
2486
2487     @em Uses:
2488     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2489         - the rewrite adjacency index
2490     - <code>adj->lookup_next_index</code>
2491         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2492           the packet will be dropped.
2493     - <code>adj->rewrite_header</code>
2494         - Rewrite string length, rewrite string, next_index
2495
2496     @em Sets:
2497     - <code>b->current_data, b->current_length</code>
2498         - Updated net of applying the rewrite string
2499
2500     <em>Next Indices:</em>
2501     - <code> adj->rewrite_header.next_index </code>
2502       or @c ip4-drop
2503 */
2504
2505 VLIB_NODE_FN (ip4_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2506                                  vlib_frame_t * frame)
2507 {
2508   if (adj_are_counters_enabled ())
2509     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2510   else
2511     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2512 }
2513
2514 VLIB_NODE_FN (ip4_rewrite_bcast_node) (vlib_main_t * vm,
2515                                        vlib_node_runtime_t * node,
2516                                        vlib_frame_t * frame)
2517 {
2518   if (adj_are_counters_enabled ())
2519     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2520   else
2521     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2522 }
2523
2524 VLIB_NODE_FN (ip4_midchain_node) (vlib_main_t * vm,
2525                                   vlib_node_runtime_t * node,
2526                                   vlib_frame_t * frame)
2527 {
2528   if (adj_are_counters_enabled ())
2529     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2530   else
2531     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2532 }
2533
2534 VLIB_NODE_FN (ip4_rewrite_mcast_node) (vlib_main_t * vm,
2535                                        vlib_node_runtime_t * node,
2536                                        vlib_frame_t * frame)
2537 {
2538   if (adj_are_counters_enabled ())
2539     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2540   else
2541     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2542 }
2543
2544 VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
2545                                         vlib_node_runtime_t * node,
2546                                         vlib_frame_t * frame)
2547 {
2548   if (adj_are_counters_enabled ())
2549     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2550   else
2551     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2552 }
2553
2554 /* *INDENT-OFF* */
2555 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2556   .name = "ip4-rewrite",
2557   .vector_size = sizeof (u32),
2558
2559   .format_trace = format_ip4_rewrite_trace,
2560
2561   .n_next_nodes = IP4_REWRITE_N_NEXT,
2562   .next_nodes = {
2563     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2564     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2565     [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2566   },
2567 };
2568
2569 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2570   .name = "ip4-rewrite-bcast",
2571   .vector_size = sizeof (u32),
2572
2573   .format_trace = format_ip4_rewrite_trace,
2574   .sibling_of = "ip4-rewrite",
2575 };
2576
2577 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2578   .name = "ip4-rewrite-mcast",
2579   .vector_size = sizeof (u32),
2580
2581   .format_trace = format_ip4_rewrite_trace,
2582   .sibling_of = "ip4-rewrite",
2583 };
2584
2585 VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
2586   .name = "ip4-mcast-midchain",
2587   .vector_size = sizeof (u32),
2588
2589   .format_trace = format_ip4_rewrite_trace,
2590   .sibling_of = "ip4-rewrite",
2591 };
2592
2593 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2594   .name = "ip4-midchain",
2595   .vector_size = sizeof (u32),
2596   .format_trace = format_ip4_forward_next_trace,
2597   .sibling_of =  "ip4-rewrite",
2598 };
2599 /* *INDENT-ON */
2600
2601 static int
2602 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2603 {
2604   ip4_fib_mtrie_t *mtrie0;
2605   ip4_fib_mtrie_leaf_t leaf0;
2606   u32 lbi0;
2607
2608   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2609
2610   leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2611   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2612   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2613
2614   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2615
2616   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2617 }
2618
2619 static clib_error_t *
2620 test_lookup_command_fn (vlib_main_t * vm,
2621                         unformat_input_t * input, vlib_cli_command_t * cmd)
2622 {
2623   ip4_fib_t *fib;
2624   u32 table_id = 0;
2625   f64 count = 1;
2626   u32 n;
2627   int i;
2628   ip4_address_t ip4_base_address;
2629   u64 errors = 0;
2630
2631   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2632     {
2633       if (unformat (input, "table %d", &table_id))
2634         {
2635           /* Make sure the entry exists. */
2636           fib = ip4_fib_get (table_id);
2637           if ((fib) && (fib->index != table_id))
2638             return clib_error_return (0, "<fib-index> %d does not exist",
2639                                       table_id);
2640         }
2641       else if (unformat (input, "count %f", &count))
2642         ;
2643
2644       else if (unformat (input, "%U",
2645                          unformat_ip4_address, &ip4_base_address))
2646         ;
2647       else
2648         return clib_error_return (0, "unknown input `%U'",
2649                                   format_unformat_error, input);
2650     }
2651
2652   n = count;
2653
2654   for (i = 0; i < n; i++)
2655     {
2656       if (!ip4_lookup_validate (&ip4_base_address, table_id))
2657         errors++;
2658
2659       ip4_base_address.as_u32 =
2660         clib_host_to_net_u32 (1 +
2661                               clib_net_to_host_u32 (ip4_base_address.as_u32));
2662     }
2663
2664   if (errors)
2665     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2666   else
2667     vlib_cli_output (vm, "No errors in %d lookups\n", n);
2668
2669   return 0;
2670 }
2671
2672 /*?
2673  * Perform a lookup of an IPv4 Address (or range of addresses) in the
2674  * given FIB table to determine if there is a conflict with the
2675  * adjacency table. The fib-id can be determined by using the
2676  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2677  * of 0 is used.
2678  *
2679  * @todo This command uses fib-id, other commands use table-id (not
2680  * just a name, they are different indexes). Would like to change this
2681  * to table-id for consistency.
2682  *
2683  * @cliexpar
2684  * Example of how to run the test lookup command:
2685  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2686  * No errors in 2 lookups
2687  * @cliexend
2688 ?*/
2689 /* *INDENT-OFF* */
2690 VLIB_CLI_COMMAND (lookup_test_command, static) =
2691 {
2692   .path = "test lookup",
2693   .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2694   .function = test_lookup_command_fn,
2695 };
2696 /* *INDENT-ON* */
2697
2698 #ifndef CLIB_MARCH_VARIANT
2699 int
2700 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2701 {
2702   u32 fib_index;
2703
2704   fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
2705
2706   if (~0 == fib_index)
2707     return VNET_API_ERROR_NO_SUCH_FIB;
2708
2709   fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
2710                                   flow_hash_config);
2711
2712   return 0;
2713 }
2714 #endif
2715
2716 static clib_error_t *
2717 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2718                              unformat_input_t * input,
2719                              vlib_cli_command_t * cmd)
2720 {
2721   int matched = 0;
2722   u32 table_id = 0;
2723   u32 flow_hash_config = 0;
2724   int rv;
2725
2726   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2727     {
2728       if (unformat (input, "table %d", &table_id))
2729         matched = 1;
2730 #define _(a,v) \
2731     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2732       foreach_flow_hash_bit
2733 #undef _
2734         else
2735         break;
2736     }
2737
2738   if (matched == 0)
2739     return clib_error_return (0, "unknown input `%U'",
2740                               format_unformat_error, input);
2741
2742   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
2743   switch (rv)
2744     {
2745     case 0:
2746       break;
2747
2748     case VNET_API_ERROR_NO_SUCH_FIB:
2749       return clib_error_return (0, "no such FIB table %d", table_id);
2750
2751     default:
2752       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2753       break;
2754     }
2755
2756   return 0;
2757 }
2758
2759 /*?
2760  * Configure the set of IPv4 fields used by the flow hash.
2761  *
2762  * @cliexpar
2763  * Example of how to set the flow hash on a given table:
2764  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2765  * Example of display the configured flow hash:
2766  * @cliexstart{show ip fib}
2767  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2768  * 0.0.0.0/0
2769  *   unicast-ip4-chain
2770  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2771  *     [0] [@0]: dpo-drop ip6
2772  * 0.0.0.0/32
2773  *   unicast-ip4-chain
2774  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2775  *     [0] [@0]: dpo-drop ip6
2776  * 224.0.0.0/8
2777  *   unicast-ip4-chain
2778  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2779  *     [0] [@0]: dpo-drop ip6
2780  * 6.0.1.2/32
2781  *   unicast-ip4-chain
2782  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2783  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2784  * 7.0.0.1/32
2785  *   unicast-ip4-chain
2786  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
2787  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2788  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2789  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2790  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2791  * 240.0.0.0/8
2792  *   unicast-ip4-chain
2793  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
2794  *     [0] [@0]: dpo-drop ip6
2795  * 255.255.255.255/32
2796  *   unicast-ip4-chain
2797  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
2798  *     [0] [@0]: dpo-drop ip6
2799  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
2800  * 0.0.0.0/0
2801  *   unicast-ip4-chain
2802  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
2803  *     [0] [@0]: dpo-drop ip6
2804  * 0.0.0.0/32
2805  *   unicast-ip4-chain
2806  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
2807  *     [0] [@0]: dpo-drop ip6
2808  * 172.16.1.0/24
2809  *   unicast-ip4-chain
2810  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
2811  *     [0] [@4]: ipv4-glean: af_packet0
2812  * 172.16.1.1/32
2813  *   unicast-ip4-chain
2814  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
2815  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
2816  * 172.16.1.2/32
2817  *   unicast-ip4-chain
2818  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2819  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
2820  * 172.16.2.0/24
2821  *   unicast-ip4-chain
2822  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
2823  *     [0] [@4]: ipv4-glean: af_packet1
2824  * 172.16.2.1/32
2825  *   unicast-ip4-chain
2826  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
2827  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
2828  * 224.0.0.0/8
2829  *   unicast-ip4-chain
2830  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
2831  *     [0] [@0]: dpo-drop ip6
2832  * 240.0.0.0/8
2833  *   unicast-ip4-chain
2834  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
2835  *     [0] [@0]: dpo-drop ip6
2836  * 255.255.255.255/32
2837  *   unicast-ip4-chain
2838  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
2839  *     [0] [@0]: dpo-drop ip6
2840  * @cliexend
2841 ?*/
2842 /* *INDENT-OFF* */
2843 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
2844 {
2845   .path = "set ip flow-hash",
2846   .short_help =
2847   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
2848   .function = set_ip_flow_hash_command_fn,
2849 };
2850 /* *INDENT-ON* */
2851
2852 #ifndef CLIB_MARCH_VARIANT
2853 int
2854 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2855                              u32 table_index)
2856 {
2857   vnet_main_t *vnm = vnet_get_main ();
2858   vnet_interface_main_t *im = &vnm->interface_main;
2859   ip4_main_t *ipm = &ip4_main;
2860   ip_lookup_main_t *lm = &ipm->lookup_main;
2861   vnet_classify_main_t *cm = &vnet_classify_main;
2862   ip4_address_t *if_addr;
2863
2864   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2865     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2866
2867   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2868     return VNET_API_ERROR_NO_SUCH_ENTRY;
2869
2870   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2871   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
2872
2873   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
2874
2875   if (NULL != if_addr)
2876     {
2877       fib_prefix_t pfx = {
2878         .fp_len = 32,
2879         .fp_proto = FIB_PROTOCOL_IP4,
2880         .fp_addr.ip4 = *if_addr,
2881       };
2882       u32 fib_index;
2883
2884       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2885                                                        sw_if_index);
2886
2887
2888       if (table_index != (u32) ~ 0)
2889         {
2890           dpo_id_t dpo = DPO_INVALID;
2891
2892           dpo_set (&dpo,
2893                    DPO_CLASSIFY,
2894                    DPO_PROTO_IP4,
2895                    classify_dpo_create (DPO_PROTO_IP4, table_index));
2896
2897           fib_table_entry_special_dpo_add (fib_index,
2898                                            &pfx,
2899                                            FIB_SOURCE_CLASSIFY,
2900                                            FIB_ENTRY_FLAG_NONE, &dpo);
2901           dpo_reset (&dpo);
2902         }
2903       else
2904         {
2905           fib_table_entry_special_remove (fib_index,
2906                                           &pfx, FIB_SOURCE_CLASSIFY);
2907         }
2908     }
2909
2910   return 0;
2911 }
2912 #endif
2913
2914 static clib_error_t *
2915 set_ip_classify_command_fn (vlib_main_t * vm,
2916                             unformat_input_t * input,
2917                             vlib_cli_command_t * cmd)
2918 {
2919   u32 table_index = ~0;
2920   int table_index_set = 0;
2921   u32 sw_if_index = ~0;
2922   int rv;
2923
2924   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2925     {
2926       if (unformat (input, "table-index %d", &table_index))
2927         table_index_set = 1;
2928       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
2929                          vnet_get_main (), &sw_if_index))
2930         ;
2931       else
2932         break;
2933     }
2934
2935   if (table_index_set == 0)
2936     return clib_error_return (0, "classify table-index must be specified");
2937
2938   if (sw_if_index == ~0)
2939     return clib_error_return (0, "interface / subif must be specified");
2940
2941   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
2942
2943   switch (rv)
2944     {
2945     case 0:
2946       break;
2947
2948     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
2949       return clib_error_return (0, "No such interface");
2950
2951     case VNET_API_ERROR_NO_SUCH_ENTRY:
2952       return clib_error_return (0, "No such classifier table");
2953     }
2954   return 0;
2955 }
2956
2957 /*?
2958  * Assign a classification table to an interface. The classification
2959  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
2960  * commands. Once the table is create, use this command to filter packets
2961  * on an interface.
2962  *
2963  * @cliexpar
2964  * Example of how to assign a classification table to an interface:
2965  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
2966 ?*/
2967 /* *INDENT-OFF* */
2968 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
2969 {
2970     .path = "set ip classify",
2971     .short_help =
2972     "set ip classify intfc <interface> table-index <classify-idx>",
2973     .function = set_ip_classify_command_fn,
2974 };
2975 /* *INDENT-ON* */
2976
2977 static clib_error_t *
2978 ip4_config (vlib_main_t * vm, unformat_input_t * input)
2979 {
2980   ip4_main_t *im = &ip4_main;
2981   uword heapsize = 0;
2982
2983   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2984     {
2985       if (unformat (input, "heap-size %U", unformat_memory_size, &heapsize))
2986         ;
2987       else
2988         return clib_error_return (0,
2989                                   "invalid heap-size parameter `%U'",
2990                                   format_unformat_error, input);
2991     }
2992
2993   im->mtrie_heap_size = heapsize;
2994
2995   return 0;
2996 }
2997
2998 VLIB_EARLY_CONFIG_FUNCTION (ip4_config, "ip");
2999
3000 /*
3001  * fd.io coding-style-patch-verification: ON
3002  *
3003  * Local Variables:
3004  * eval: (c-set-style "gnu")
3005  * End:
3006  */