VPP-1692: move NULL pointer check
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h>     /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/dpo/load_balance.h>
53 #include <vnet/dpo/load_balance_map.h>
54 #include <vnet/dpo/classify_dpo.h>
55 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
56
57 #include <vnet/ip/ip4_forward.h>
58 #include <vnet/interface_output.h>
59
60 /** @brief IPv4 lookup node.
61     @node ip4-lookup
62
63     This is the main IPv4 lookup dispatch node.
64
65     @param vm vlib_main_t corresponding to the current thread
66     @param node vlib_node_runtime_t
67     @param frame vlib_frame_t whose contents should be dispatched
68
69     @par Graph mechanics: buffer metadata, next index usage
70
71     @em Uses:
72     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
73         - Indicates the @c sw_if_index value of the interface that the
74           packet was received on.
75     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
76         - When the value is @c ~0 then the node performs a longest prefix
77           match (LPM) for the packet destination address in the FIB attached
78           to the receive interface.
79         - Otherwise perform LPM for the packet destination address in the
80           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
81           value (0, 1, ...) and not a VRF id.
82
83     @em Sets:
84     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
85         - The lookup result adjacency index.
86
87     <em>Next Index:</em>
88     - Dispatches the packet to the node index found in
89       ip_adjacency_t @c adj->lookup_next_index
90       (where @c adj is the lookup result adjacency).
91 */
92 VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
93                                 vlib_frame_t * frame)
94 {
95   return ip4_lookup_inline (vm, node, frame,
96                             /* lookup_for_responses_to_locally_received_packets */
97                             0);
98
99 }
100
101 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
102
103 /* *INDENT-OFF* */
104 VLIB_REGISTER_NODE (ip4_lookup_node) =
105 {
106   .name = "ip4-lookup",
107   .vector_size = sizeof (u32),
108   .format_trace = format_ip4_lookup_trace,
109   .n_next_nodes = IP_LOOKUP_N_NEXT,
110   .next_nodes = IP4_LOOKUP_NEXT_NODES,
111 };
112 /* *INDENT-ON* */
113
114 VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
115                                       vlib_node_runtime_t * node,
116                                       vlib_frame_t * frame)
117 {
118   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
119   u32 n_left, *from;
120   u32 thread_index = vm->thread_index;
121   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
122   u16 nexts[VLIB_FRAME_SIZE], *next;
123
124   from = vlib_frame_vector_args (frame);
125   n_left = frame->n_vectors;
126   next = nexts;
127
128   vlib_get_buffers (vm, from, bufs, n_left);
129
130   while (n_left >= 4)
131     {
132       const load_balance_t *lb0, *lb1;
133       const ip4_header_t *ip0, *ip1;
134       u32 lbi0, hc0, lbi1, hc1;
135       const dpo_id_t *dpo0, *dpo1;
136
137       /* Prefetch next iteration. */
138       {
139         vlib_prefetch_buffer_header (b[2], LOAD);
140         vlib_prefetch_buffer_header (b[3], LOAD);
141
142         CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), LOAD);
143         CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), LOAD);
144       }
145
146       ip0 = vlib_buffer_get_current (b[0]);
147       ip1 = vlib_buffer_get_current (b[1]);
148       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
149       lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
150
151       lb0 = load_balance_get (lbi0);
152       lb1 = load_balance_get (lbi1);
153
154       /*
155        * this node is for via FIBs we can re-use the hash value from the
156        * to node if present.
157        * We don't want to use the same hash value at each level in the recursion
158        * graph as that would lead to polarisation
159        */
160       hc0 = hc1 = 0;
161
162       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
163         {
164           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
165             {
166               hc0 = vnet_buffer (b[0])->ip.flow_hash =
167                 vnet_buffer (b[0])->ip.flow_hash >> 1;
168             }
169           else
170             {
171               hc0 = vnet_buffer (b[0])->ip.flow_hash =
172                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
173             }
174           dpo0 = load_balance_get_fwd_bucket
175             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
176         }
177       else
178         {
179           dpo0 = load_balance_get_bucket_i (lb0, 0);
180         }
181       if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
182         {
183           if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
184             {
185               hc1 = vnet_buffer (b[1])->ip.flow_hash =
186                 vnet_buffer (b[1])->ip.flow_hash >> 1;
187             }
188           else
189             {
190               hc1 = vnet_buffer (b[1])->ip.flow_hash =
191                 ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
192             }
193           dpo1 = load_balance_get_fwd_bucket
194             (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
195         }
196       else
197         {
198           dpo1 = load_balance_get_bucket_i (lb1, 0);
199         }
200
201       next[0] = dpo0->dpoi_next_node;
202       next[1] = dpo1->dpoi_next_node;
203
204       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
205       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
206
207       vlib_increment_combined_counter
208         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
209       vlib_increment_combined_counter
210         (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
211
212       b += 2;
213       next += 2;
214       n_left -= 2;
215     }
216
217   while (n_left > 0)
218     {
219       const load_balance_t *lb0;
220       const ip4_header_t *ip0;
221       const dpo_id_t *dpo0;
222       u32 lbi0, hc0;
223
224       ip0 = vlib_buffer_get_current (b[0]);
225       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
226
227       lb0 = load_balance_get (lbi0);
228
229       hc0 = 0;
230       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
231         {
232           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
233             {
234               hc0 = vnet_buffer (b[0])->ip.flow_hash =
235                 vnet_buffer (b[0])->ip.flow_hash >> 1;
236             }
237           else
238             {
239               hc0 = vnet_buffer (b[0])->ip.flow_hash =
240                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
241             }
242           dpo0 = load_balance_get_fwd_bucket
243             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
244         }
245       else
246         {
247           dpo0 = load_balance_get_bucket_i (lb0, 0);
248         }
249
250       next[0] = dpo0->dpoi_next_node;
251       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
252
253       vlib_increment_combined_counter
254         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
255
256       b += 1;
257       next += 1;
258       n_left -= 1;
259     }
260
261   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
262   if (node->flags & VLIB_NODE_FLAG_TRACE)
263     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
264
265   return frame->n_vectors;
266 }
267
268 /* *INDENT-OFF* */
269 VLIB_REGISTER_NODE (ip4_load_balance_node) =
270 {
271   .name = "ip4-load-balance",
272   .vector_size = sizeof (u32),
273   .sibling_of = "ip4-lookup",
274   .format_trace = format_ip4_lookup_trace,
275 };
276 /* *INDENT-ON* */
277
278 #ifndef CLIB_MARCH_VARIANT
279 /* get first interface address */
280 ip4_address_t *
281 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
282                              ip_interface_address_t ** result_ia)
283 {
284   ip_lookup_main_t *lm = &im->lookup_main;
285   ip_interface_address_t *ia = 0;
286   ip4_address_t *result = 0;
287
288   /* *INDENT-OFF* */
289   foreach_ip_interface_address
290     (lm, ia, sw_if_index,
291      1 /* honor unnumbered */ ,
292      ({
293        ip4_address_t * a =
294          ip_interface_address_get_address (lm, ia);
295        result = a;
296        break;
297      }));
298   /* *INDENT-OFF* */
299   if (result_ia)
300     *result_ia = result ? ia : 0;
301   return result;
302 }
303
304 static void
305 ip4_add_subnet_bcast_route (u32 fib_index,
306                             fib_prefix_t *pfx,
307                             u32 sw_if_index)
308 {
309   vnet_sw_interface_flags_t iflags;
310
311   iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
312
313   fib_table_entry_special_remove(fib_index,
314                                  pfx,
315                                  FIB_SOURCE_INTERFACE);
316
317   if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
318     {
319       fib_table_entry_update_one_path (fib_index, pfx,
320                                        FIB_SOURCE_INTERFACE,
321                                        FIB_ENTRY_FLAG_NONE,
322                                        DPO_PROTO_IP4,
323                                        /* No next-hop address */
324                                        &ADJ_BCAST_ADDR,
325                                        sw_if_index,
326                                        // invalid FIB index
327                                        ~0,
328                                        1,
329                                        // no out-label stack
330                                        NULL,
331                                        FIB_ROUTE_PATH_FLAG_NONE);
332     }
333   else
334     {
335         fib_table_entry_special_add(fib_index,
336                                     pfx,
337                                     FIB_SOURCE_INTERFACE,
338                                     (FIB_ENTRY_FLAG_DROP |
339                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
340     }
341 }
342
343 static void
344 ip4_add_interface_routes (u32 sw_if_index,
345                           ip4_main_t * im, u32 fib_index,
346                           ip_interface_address_t * a)
347 {
348   ip_lookup_main_t *lm = &im->lookup_main;
349   ip4_address_t *address = ip_interface_address_get_address (lm, a);
350   fib_prefix_t pfx = {
351     .fp_len = a->address_length,
352     .fp_proto = FIB_PROTOCOL_IP4,
353     .fp_addr.ip4 = *address,
354   };
355
356   if (pfx.fp_len <= 30)
357     {
358       /* a /30 or shorter - add a glean for the network address */
359       fib_table_entry_update_one_path (fib_index, &pfx,
360                                        FIB_SOURCE_INTERFACE,
361                                        (FIB_ENTRY_FLAG_CONNECTED |
362                                         FIB_ENTRY_FLAG_ATTACHED),
363                                        DPO_PROTO_IP4,
364                                        /* No next-hop address */
365                                        NULL,
366                                        sw_if_index,
367                                        // invalid FIB index
368                                        ~0,
369                                        1,
370                                        // no out-label stack
371                                        NULL,
372                                        FIB_ROUTE_PATH_FLAG_NONE);
373
374       /* Add the two broadcast addresses as drop */
375       fib_prefix_t net_pfx = {
376         .fp_len = 32,
377         .fp_proto = FIB_PROTOCOL_IP4,
378         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
379       };
380       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
381         fib_table_entry_special_add(fib_index,
382                                     &net_pfx,
383                                     FIB_SOURCE_INTERFACE,
384                                     (FIB_ENTRY_FLAG_DROP |
385                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
386       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
387       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
388         ip4_add_subnet_bcast_route(fib_index, &net_pfx, sw_if_index);
389     }
390   else if (pfx.fp_len == 31)
391     {
392       u32 mask = clib_host_to_net_u32(1);
393       fib_prefix_t net_pfx = pfx;
394
395       net_pfx.fp_len = 32;
396       net_pfx.fp_addr.ip4.as_u32 ^= mask;
397
398       /* a /31 - add the other end as an attached host */
399       fib_table_entry_update_one_path (fib_index, &net_pfx,
400                                        FIB_SOURCE_INTERFACE,
401                                        (FIB_ENTRY_FLAG_ATTACHED),
402                                        DPO_PROTO_IP4,
403                                        &net_pfx.fp_addr,
404                                        sw_if_index,
405                                        // invalid FIB index
406                                        ~0,
407                                        1,
408                                        NULL,
409                                        FIB_ROUTE_PATH_FLAG_NONE);
410     }
411   pfx.fp_len = 32;
412
413   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
414     {
415       u32 classify_table_index =
416         lm->classify_table_index_by_sw_if_index[sw_if_index];
417       if (classify_table_index != (u32) ~ 0)
418         {
419           dpo_id_t dpo = DPO_INVALID;
420
421           dpo_set (&dpo,
422                    DPO_CLASSIFY,
423                    DPO_PROTO_IP4,
424                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
425
426           fib_table_entry_special_dpo_add (fib_index,
427                                            &pfx,
428                                            FIB_SOURCE_CLASSIFY,
429                                            FIB_ENTRY_FLAG_NONE, &dpo);
430           dpo_reset (&dpo);
431         }
432     }
433
434   fib_table_entry_update_one_path (fib_index, &pfx,
435                                    FIB_SOURCE_INTERFACE,
436                                    (FIB_ENTRY_FLAG_CONNECTED |
437                                     FIB_ENTRY_FLAG_LOCAL),
438                                    DPO_PROTO_IP4,
439                                    &pfx.fp_addr,
440                                    sw_if_index,
441                                    // invalid FIB index
442                                    ~0,
443                                    1, NULL,
444                                    FIB_ROUTE_PATH_FLAG_NONE);
445 }
446
447 static void
448 ip4_del_interface_routes (ip4_main_t * im,
449                           u32 fib_index,
450                           ip4_address_t * address, u32 address_length)
451 {
452   fib_prefix_t pfx = {
453     .fp_len = address_length,
454     .fp_proto = FIB_PROTOCOL_IP4,
455     .fp_addr.ip4 = *address,
456   };
457
458   if (pfx.fp_len <= 30)
459     {
460       fib_prefix_t net_pfx = {
461         .fp_len = 32,
462         .fp_proto = FIB_PROTOCOL_IP4,
463         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
464       };
465       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
466         fib_table_entry_special_remove(fib_index,
467                                        &net_pfx,
468                                        FIB_SOURCE_INTERFACE);
469       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
470       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
471         fib_table_entry_special_remove(fib_index,
472                                        &net_pfx,
473                                        FIB_SOURCE_INTERFACE);
474       fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
475     }
476     else if (pfx.fp_len == 31)
477     {
478       u32 mask = clib_host_to_net_u32(1);
479       fib_prefix_t net_pfx = pfx;
480
481       net_pfx.fp_len = 32;
482       net_pfx.fp_addr.ip4.as_u32 ^= mask;
483
484       fib_table_entry_delete (fib_index, &net_pfx, FIB_SOURCE_INTERFACE);
485     }
486
487   pfx.fp_len = 32;
488   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
489 }
490
491 void
492 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
493 {
494   ip4_main_t *im = &ip4_main;
495
496   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
497
498   /*
499    * enable/disable only on the 1<->0 transition
500    */
501   if (is_enable)
502     {
503       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
504         return;
505     }
506   else
507     {
508       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
509       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
510         return;
511     }
512   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
513                                !is_enable, 0, 0);
514
515
516   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
517                                sw_if_index, !is_enable, 0, 0);
518 }
519
520 static clib_error_t *
521 ip4_add_del_interface_address_internal (vlib_main_t * vm,
522                                         u32 sw_if_index,
523                                         ip4_address_t * address,
524                                         u32 address_length, u32 is_del)
525 {
526   vnet_main_t *vnm = vnet_get_main ();
527   ip4_main_t *im = &ip4_main;
528   ip_lookup_main_t *lm = &im->lookup_main;
529   clib_error_t *error = 0;
530   u32 if_address_index, elts_before;
531   ip4_address_fib_t ip4_af, *addr_fib = 0;
532
533   /* local0 interface doesn't support IP addressing  */
534   if (sw_if_index == 0)
535     {
536       return
537        clib_error_create ("local0 interface doesn't support IP addressing");
538     }
539
540   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
541   ip4_addr_fib_init (&ip4_af, address,
542                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
543   vec_add1 (addr_fib, ip4_af);
544
545   /*
546    * there is no support for adj-fib handling in the presence of overlapping
547    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
548    * most routers do.
549    */
550   /* *INDENT-OFF* */
551   if (!is_del)
552     {
553       /* When adding an address check that it does not conflict
554          with an existing address on any interface in this table. */
555       ip_interface_address_t *ia;
556       vnet_sw_interface_t *sif;
557
558       pool_foreach(sif, vnm->interface_main.sw_interfaces,
559       ({
560           if (im->fib_index_by_sw_if_index[sw_if_index] ==
561               im->fib_index_by_sw_if_index[sif->sw_if_index])
562             {
563               foreach_ip_interface_address
564                 (&im->lookup_main, ia, sif->sw_if_index,
565                  0 /* honor unnumbered */ ,
566                  ({
567                    ip4_address_t * x =
568                      ip_interface_address_get_address
569                      (&im->lookup_main, ia);
570                    if (ip4_destination_matches_route
571                        (im, address, x, ia->address_length) ||
572                        ip4_destination_matches_route (im,
573                                                       x,
574                                                       address,
575                                                       address_length))
576                      {
577                        vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
578
579                        return
580                          clib_error_create
581                          ("failed to add %U which conflicts with %U for interface %U",
582                           format_ip4_address_and_length, address,
583                           address_length,
584                           format_ip4_address_and_length, x,
585                           ia->address_length,
586                           format_vnet_sw_if_index_name, vnm,
587                           sif->sw_if_index);
588                      }
589                  }));
590             }
591       }));
592     }
593   /* *INDENT-ON* */
594
595   elts_before = pool_elts (lm->if_address_pool);
596
597   error = ip_interface_address_add_del
598     (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
599   if (error)
600     goto done;
601
602   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
603
604   if (is_del)
605     ip4_del_interface_routes (im, ip4_af.fib_index, address, address_length);
606   else
607     ip4_add_interface_routes (sw_if_index,
608                               im, ip4_af.fib_index,
609                               pool_elt_at_index
610                               (lm->if_address_pool, if_address_index));
611
612   /* If pool did not grow/shrink: add duplicate address. */
613   if (elts_before != pool_elts (lm->if_address_pool))
614     {
615       ip4_add_del_interface_address_callback_t *cb;
616       vec_foreach (cb, im->add_del_interface_address_callbacks)
617         cb->function (im, cb->function_opaque, sw_if_index,
618                       address, address_length, if_address_index, is_del);
619     }
620
621 done:
622   vec_free (addr_fib);
623   return error;
624 }
625
626 clib_error_t *
627 ip4_add_del_interface_address (vlib_main_t * vm,
628                                u32 sw_if_index,
629                                ip4_address_t * address,
630                                u32 address_length, u32 is_del)
631 {
632   return ip4_add_del_interface_address_internal
633     (vm, sw_if_index, address, address_length, is_del);
634 }
635
636 void
637 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
638 {
639   ip_interface_address_t *ia;
640   ip4_main_t *im;
641
642   im = &ip4_main;
643
644   /*
645    * when directed broadcast is enabled, the subnet braodcast route will forward
646    * packets using an adjacency with a broadcast MAC. otherwise it drops
647    */
648   /* *INDENT-OFF* */
649   foreach_ip_interface_address(&im->lookup_main, ia,
650                                sw_if_index, 0,
651      ({
652        if (ia->address_length <= 30)
653          {
654            ip4_address_t *ipa;
655
656            ipa = ip_interface_address_get_address (&im->lookup_main, ia);
657
658            fib_prefix_t pfx = {
659              .fp_len = 32,
660              .fp_proto = FIB_PROTOCOL_IP4,
661              .fp_addr = {
662                .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
663              },
664            };
665
666            ip4_add_subnet_bcast_route
667              (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
668                                                   sw_if_index),
669               &pfx, sw_if_index);
670          }
671      }));
672   /* *INDENT-ON* */
673 }
674 #endif
675
676 /* Built-in ip4 unicast rx feature path definition */
677 /* *INDENT-OFF* */
678 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
679 {
680   .arc_name = "ip4-unicast",
681   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
682   .last_in_arc = "ip4-lookup",
683   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
684 };
685
686 VNET_FEATURE_INIT (ip4_flow_classify, static) =
687 {
688   .arc_name = "ip4-unicast",
689   .node_name = "ip4-flow-classify",
690   .runs_before = VNET_FEATURES ("ip4-inacl"),
691 };
692
693 VNET_FEATURE_INIT (ip4_inacl, static) =
694 {
695   .arc_name = "ip4-unicast",
696   .node_name = "ip4-inacl",
697   .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
698 };
699
700 VNET_FEATURE_INIT (ip4_source_check_1, static) =
701 {
702   .arc_name = "ip4-unicast",
703   .node_name = "ip4-source-check-via-rx",
704   .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
705 };
706
707 VNET_FEATURE_INIT (ip4_source_check_2, static) =
708 {
709   .arc_name = "ip4-unicast",
710   .node_name = "ip4-source-check-via-any",
711   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
712 };
713
714 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
715 {
716   .arc_name = "ip4-unicast",
717   .node_name = "ip4-source-and-port-range-check-rx",
718   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
719 };
720
721 VNET_FEATURE_INIT (ip4_policer_classify, static) =
722 {
723   .arc_name = "ip4-unicast",
724   .node_name = "ip4-policer-classify",
725   .runs_before = VNET_FEATURES ("ipsec4-input-feature"),
726 };
727
728 VNET_FEATURE_INIT (ip4_ipsec, static) =
729 {
730   .arc_name = "ip4-unicast",
731   .node_name = "ipsec4-input-feature",
732   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
733 };
734
735 VNET_FEATURE_INIT (ip4_vpath, static) =
736 {
737   .arc_name = "ip4-unicast",
738   .node_name = "vpath-input-ip4",
739   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
740 };
741
742 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
743 {
744   .arc_name = "ip4-unicast",
745   .node_name = "ip4-vxlan-bypass",
746   .runs_before = VNET_FEATURES ("ip4-lookup"),
747 };
748
749 VNET_FEATURE_INIT (ip4_not_enabled, static) =
750 {
751   .arc_name = "ip4-unicast",
752   .node_name = "ip4-not-enabled",
753   .runs_before = VNET_FEATURES ("ip4-lookup"),
754 };
755
756 VNET_FEATURE_INIT (ip4_lookup, static) =
757 {
758   .arc_name = "ip4-unicast",
759   .node_name = "ip4-lookup",
760   .runs_before = 0,     /* not before any other features */
761 };
762
763 /* Built-in ip4 multicast rx feature path definition */
764 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
765 {
766   .arc_name = "ip4-multicast",
767   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
768   .last_in_arc = "ip4-mfib-forward-lookup",
769   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
770 };
771
772 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
773 {
774   .arc_name = "ip4-multicast",
775   .node_name = "vpath-input-ip4",
776   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
777 };
778
779 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
780 {
781   .arc_name = "ip4-multicast",
782   .node_name = "ip4-not-enabled",
783   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
784 };
785
786 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
787 {
788   .arc_name = "ip4-multicast",
789   .node_name = "ip4-mfib-forward-lookup",
790   .runs_before = 0,     /* last feature */
791 };
792
793 /* Source and port-range check ip4 tx feature path definition */
794 VNET_FEATURE_ARC_INIT (ip4_output, static) =
795 {
796   .arc_name = "ip4-output",
797   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
798   .last_in_arc = "interface-output",
799   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
800 };
801
802 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
803 {
804   .arc_name = "ip4-output",
805   .node_name = "ip4-source-and-port-range-check-tx",
806   .runs_before = VNET_FEATURES ("ip4-outacl"),
807 };
808
809 VNET_FEATURE_INIT (ip4_outacl, static) =
810 {
811   .arc_name = "ip4-output",
812   .node_name = "ip4-outacl",
813   .runs_before = VNET_FEATURES ("ipsec4-output-feature"),
814 };
815
816 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
817 {
818   .arc_name = "ip4-output",
819   .node_name = "ipsec4-output-feature",
820   .runs_before = VNET_FEATURES ("interface-output"),
821 };
822
823 /* Built-in ip4 tx feature path definition */
824 VNET_FEATURE_INIT (ip4_interface_output, static) =
825 {
826   .arc_name = "ip4-output",
827   .node_name = "interface-output",
828   .runs_before = 0,     /* not before any other features */
829 };
830 /* *INDENT-ON* */
831
832 static clib_error_t *
833 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
834 {
835   ip4_main_t *im = &ip4_main;
836
837   /* Fill in lookup tables with default table (0). */
838   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
839   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
840
841   if (!is_add)
842     {
843       ip4_main_t *im4 = &ip4_main;
844       ip_lookup_main_t *lm4 = &im4->lookup_main;
845       ip_interface_address_t *ia = 0;
846       ip4_address_t *address;
847       vlib_main_t *vm = vlib_get_main ();
848
849       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
850       /* *INDENT-OFF* */
851       foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
852       ({
853         address = ip_interface_address_get_address (lm4, ia);
854         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
855       }));
856       /* *INDENT-ON* */
857     }
858
859   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
860                                is_add, 0, 0);
861
862   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
863                                sw_if_index, is_add, 0, 0);
864
865   return /* no error */ 0;
866 }
867
868 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
869
870 /* Global IP4 main. */
871 #ifndef CLIB_MARCH_VARIANT
872 ip4_main_t ip4_main;
873 #endif /* CLIB_MARCH_VARIANT */
874
875 static clib_error_t *
876 ip4_lookup_init (vlib_main_t * vm)
877 {
878   ip4_main_t *im = &ip4_main;
879   clib_error_t *error;
880   uword i;
881
882   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
883     return error;
884   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
885     return (error);
886   if ((error = vlib_call_init_function (vm, fib_module_init)))
887     return error;
888   if ((error = vlib_call_init_function (vm, mfib_module_init)))
889     return error;
890
891   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
892     {
893       u32 m;
894
895       if (i < 32)
896         m = pow2_mask (i) << (32 - i);
897       else
898         m = ~0;
899       im->fib_masks[i] = clib_host_to_net_u32 (m);
900     }
901
902   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
903
904   /* Create FIB with index 0 and table id of 0. */
905   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
906                                      FIB_SOURCE_DEFAULT_ROUTE);
907   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
908                                       MFIB_SOURCE_DEFAULT_ROUTE);
909
910   {
911     pg_node_t *pn;
912     pn = pg_get_node (ip4_lookup_node.index);
913     pn->unformat_edit = unformat_pg_ip4_header;
914   }
915
916   {
917     ethernet_arp_header_t h;
918
919     clib_memset (&h, 0, sizeof (h));
920
921 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
922 #define _8(f,v) h.f = v;
923     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
924     _16 (l3_type, ETHERNET_TYPE_IP4);
925     _8 (n_l2_address_bytes, 6);
926     _8 (n_l3_address_bytes, 4);
927     _16 (opcode, ETHERNET_ARP_OPCODE_request);
928 #undef _16
929 #undef _8
930
931     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
932                                /* data */ &h,
933                                sizeof (h),
934                                /* alloc chunk size */ 8,
935                                "ip4 arp");
936   }
937
938   return error;
939 }
940
941 VLIB_INIT_FUNCTION (ip4_lookup_init);
942
943 typedef struct
944 {
945   /* Adjacency taken. */
946   u32 dpo_index;
947   u32 flow_hash;
948   u32 fib_index;
949
950   /* Packet data, possibly *after* rewrite. */
951   u8 packet_data[64 - 1 * sizeof (u32)];
952 }
953 ip4_forward_next_trace_t;
954
955 #ifndef CLIB_MARCH_VARIANT
956 u8 *
957 format_ip4_forward_next_trace (u8 * s, va_list * args)
958 {
959   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
960   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
961   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
962   u32 indent = format_get_indent (s);
963   s = format (s, "%U%U",
964               format_white_space, indent,
965               format_ip4_header, t->packet_data, sizeof (t->packet_data));
966   return s;
967 }
968 #endif
969
970 static u8 *
971 format_ip4_lookup_trace (u8 * s, va_list * args)
972 {
973   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
974   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
975   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
976   u32 indent = format_get_indent (s);
977
978   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
979               t->fib_index, t->dpo_index, t->flow_hash);
980   s = format (s, "\n%U%U",
981               format_white_space, indent,
982               format_ip4_header, t->packet_data, sizeof (t->packet_data));
983   return s;
984 }
985
986 static u8 *
987 format_ip4_rewrite_trace (u8 * s, va_list * args)
988 {
989   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
990   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
991   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
992   u32 indent = format_get_indent (s);
993
994   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
995               t->fib_index, t->dpo_index, format_ip_adjacency,
996               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
997   s = format (s, "\n%U%U",
998               format_white_space, indent,
999               format_ip_adjacency_packet_data,
1000               t->dpo_index, t->packet_data, sizeof (t->packet_data));
1001   return s;
1002 }
1003
1004 #ifndef CLIB_MARCH_VARIANT
1005 /* Common trace function for all ip4-forward next nodes. */
1006 void
1007 ip4_forward_next_trace (vlib_main_t * vm,
1008                         vlib_node_runtime_t * node,
1009                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1010 {
1011   u32 *from, n_left;
1012   ip4_main_t *im = &ip4_main;
1013
1014   n_left = frame->n_vectors;
1015   from = vlib_frame_vector_args (frame);
1016
1017   while (n_left >= 4)
1018     {
1019       u32 bi0, bi1;
1020       vlib_buffer_t *b0, *b1;
1021       ip4_forward_next_trace_t *t0, *t1;
1022
1023       /* Prefetch next iteration. */
1024       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1025       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1026
1027       bi0 = from[0];
1028       bi1 = from[1];
1029
1030       b0 = vlib_get_buffer (vm, bi0);
1031       b1 = vlib_get_buffer (vm, bi1);
1032
1033       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1034         {
1035           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1036           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1037           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1038           t0->fib_index =
1039             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1040              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1041             vec_elt (im->fib_index_by_sw_if_index,
1042                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1043
1044           clib_memcpy_fast (t0->packet_data,
1045                             vlib_buffer_get_current (b0),
1046                             sizeof (t0->packet_data));
1047         }
1048       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1049         {
1050           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1051           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1052           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1053           t1->fib_index =
1054             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1055              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1056             vec_elt (im->fib_index_by_sw_if_index,
1057                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1058           clib_memcpy_fast (t1->packet_data, vlib_buffer_get_current (b1),
1059                             sizeof (t1->packet_data));
1060         }
1061       from += 2;
1062       n_left -= 2;
1063     }
1064
1065   while (n_left >= 1)
1066     {
1067       u32 bi0;
1068       vlib_buffer_t *b0;
1069       ip4_forward_next_trace_t *t0;
1070
1071       bi0 = from[0];
1072
1073       b0 = vlib_get_buffer (vm, bi0);
1074
1075       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1076         {
1077           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1078           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1079           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1080           t0->fib_index =
1081             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1082              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1083             vec_elt (im->fib_index_by_sw_if_index,
1084                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1085           clib_memcpy_fast (t0->packet_data, vlib_buffer_get_current (b0),
1086                             sizeof (t0->packet_data));
1087         }
1088       from += 1;
1089       n_left -= 1;
1090     }
1091 }
1092
1093 /* Compute TCP/UDP/ICMP4 checksum in software. */
1094 u16
1095 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1096                               ip4_header_t * ip0)
1097 {
1098   ip_csum_t sum0;
1099   u32 ip_header_length, payload_length_host_byte_order;
1100   u32 n_this_buffer, n_bytes_left, n_ip_bytes_this_buffer;
1101   u16 sum16;
1102   void *data_this_buffer;
1103
1104   /* Initialize checksum with ip header. */
1105   ip_header_length = ip4_header_bytes (ip0);
1106   payload_length_host_byte_order =
1107     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1108   sum0 =
1109     clib_host_to_net_u32 (payload_length_host_byte_order +
1110                           (ip0->protocol << 16));
1111
1112   if (BITS (uword) == 32)
1113     {
1114       sum0 =
1115         ip_csum_with_carry (sum0,
1116                             clib_mem_unaligned (&ip0->src_address, u32));
1117       sum0 =
1118         ip_csum_with_carry (sum0,
1119                             clib_mem_unaligned (&ip0->dst_address, u32));
1120     }
1121   else
1122     sum0 =
1123       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1124
1125   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1126   data_this_buffer = (void *) ip0 + ip_header_length;
1127   n_ip_bytes_this_buffer =
1128     p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data);
1129   if (n_this_buffer + ip_header_length > n_ip_bytes_this_buffer)
1130     {
1131       n_this_buffer = n_ip_bytes_this_buffer > ip_header_length ?
1132         n_ip_bytes_this_buffer - ip_header_length : 0;
1133     }
1134   while (1)
1135     {
1136       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1137       n_bytes_left -= n_this_buffer;
1138       if (n_bytes_left == 0)
1139         break;
1140
1141       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1142       p0 = vlib_get_buffer (vm, p0->next_buffer);
1143       data_this_buffer = vlib_buffer_get_current (p0);
1144       n_this_buffer = clib_min (p0->current_length, n_bytes_left);
1145     }
1146
1147   sum16 = ~ip_csum_fold (sum0);
1148
1149   return sum16;
1150 }
1151
1152 u32
1153 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1154 {
1155   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1156   udp_header_t *udp0;
1157   u16 sum16;
1158
1159   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1160           || ip0->protocol == IP_PROTOCOL_UDP);
1161
1162   udp0 = (void *) (ip0 + 1);
1163   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1164     {
1165       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1166                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1167       return p0->flags;
1168     }
1169
1170   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1171
1172   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1173                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1174
1175   return p0->flags;
1176 }
1177 #endif
1178
1179 /* *INDENT-OFF* */
1180 VNET_FEATURE_ARC_INIT (ip4_local) =
1181 {
1182   .arc_name  = "ip4-local",
1183   .start_nodes = VNET_FEATURES ("ip4-local"),
1184   .last_in_arc = "ip4-local-end-of-arc",
1185 };
1186 /* *INDENT-ON* */
1187
1188 static inline void
1189 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1190                             ip4_header_t * ip, u8 is_udp, u8 * error,
1191                             u8 * good_tcp_udp)
1192 {
1193   u32 flags0;
1194   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1195   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1196   if (is_udp)
1197     {
1198       udp_header_t *udp;
1199       u32 ip_len, udp_len;
1200       i32 len_diff;
1201       udp = ip4_next_header (ip);
1202       /* Verify UDP length. */
1203       ip_len = clib_net_to_host_u16 (ip->length);
1204       udp_len = clib_net_to_host_u16 (udp->length);
1205
1206       len_diff = ip_len - udp_len;
1207       *good_tcp_udp &= len_diff >= 0;
1208       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1209     }
1210 }
1211
1212 #define ip4_local_csum_is_offloaded(_b)                                 \
1213     _b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM                         \
1214         || _b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM
1215
1216 #define ip4_local_need_csum_check(is_tcp_udp, _b)                       \
1217     (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED     \
1218         || ip4_local_csum_is_offloaded (_b)))
1219
1220 #define ip4_local_csum_is_valid(_b)                                     \
1221     (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT                      \
1222         || (ip4_local_csum_is_offloaded (_b))) != 0
1223
1224 static inline void
1225 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1226                          ip4_header_t * ih, u8 * error)
1227 {
1228   u8 is_udp, is_tcp_udp, good_tcp_udp;
1229
1230   is_udp = ih->protocol == IP_PROTOCOL_UDP;
1231   is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1232
1233   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1234     ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1235   else
1236     good_tcp_udp = ip4_local_csum_is_valid (b);
1237
1238   ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1239   *error = (is_tcp_udp && !good_tcp_udp
1240             ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1241 }
1242
1243 static inline void
1244 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1245                             ip4_header_t ** ih, u8 * error)
1246 {
1247   u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1248
1249   is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1250   is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1251
1252   is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1253   is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1254
1255   good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1256   good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1257
1258   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1259                      || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1260     {
1261       if (is_tcp_udp[0])
1262         ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1263                                     &good_tcp_udp[0]);
1264       if (is_tcp_udp[1])
1265         ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1266                                     &good_tcp_udp[1]);
1267     }
1268
1269   error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1270               IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1271   error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1272               IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1273 }
1274
1275 static inline void
1276 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1277                               vlib_buffer_t * b, u16 * next, u8 error,
1278                               u8 head_of_feature_arc)
1279 {
1280   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1281   u32 next_index;
1282
1283   *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1284   b->error = error ? error_node->errors[error] : 0;
1285   if (head_of_feature_arc)
1286     {
1287       next_index = *next;
1288       if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1289         {
1290           vnet_feature_arc_start (arc_index,
1291                                   vnet_buffer (b)->sw_if_index[VLIB_RX],
1292                                   &next_index, b);
1293           *next = next_index;
1294         }
1295     }
1296 }
1297
1298 typedef struct
1299 {
1300   ip4_address_t src;
1301   u32 lbi;
1302   u8 error;
1303   u8 first;
1304 } ip4_local_last_check_t;
1305
1306 static inline void
1307 ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
1308                      ip4_local_last_check_t * last_check, u8 * error0)
1309 {
1310   ip4_fib_mtrie_leaf_t leaf0;
1311   ip4_fib_mtrie_t *mtrie0;
1312   const dpo_id_t *dpo0;
1313   load_balance_t *lb0;
1314   u32 lbi0;
1315
1316   vnet_buffer (b)->ip.fib_index =
1317     vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1318     vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1319
1320   if (PREDICT_FALSE (last_check->first ||
1321                      (last_check->src.as_u32 != ip0->src_address.as_u32)))
1322     {
1323       mtrie0 = &ip4_fib_get (vnet_buffer (b)->ip.fib_index)->mtrie;
1324       leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1325       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1326       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1327       lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1328
1329       vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1330       vnet_buffer (b)->ip.adj_index[VLIB_RX] = lbi0;
1331
1332       lb0 = load_balance_get (lbi0);
1333       dpo0 = load_balance_get_bucket_i (lb0, 0);
1334
1335       /*
1336        * Must have a route to source otherwise we drop the packet.
1337        * ip4 broadcasts are accepted, e.g. to make dhcp client work
1338        *
1339        * The checks are:
1340        *  - the source is a recieve => it's from us => bogus, do this
1341        *    first since it sets a different error code.
1342        *  - uRPF check for any route to source - accept if passes.
1343        *  - allow packets destined to the broadcast address from unknown sources
1344        */
1345
1346       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1347                   && dpo0->dpoi_type == DPO_RECEIVE) ?
1348                  IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1349       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1350                   && !fib_urpf_check_size (lb0->lb_urpf)
1351                   && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1352                  IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1353
1354       last_check->src.as_u32 = ip0->src_address.as_u32;
1355       last_check->lbi = lbi0;
1356       last_check->error = *error0;
1357     }
1358   else
1359     {
1360       vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1361       vnet_buffer (b)->ip.adj_index[VLIB_RX] = last_check->lbi;
1362       *error0 = last_check->error;
1363       last_check->first = 0;
1364     }
1365 }
1366
1367 static inline void
1368 ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
1369                         ip4_local_last_check_t * last_check, u8 * error)
1370 {
1371   ip4_fib_mtrie_leaf_t leaf[2];
1372   ip4_fib_mtrie_t *mtrie[2];
1373   const dpo_id_t *dpo[2];
1374   load_balance_t *lb[2];
1375   u32 not_last_hit;
1376   u32 lbi[2];
1377
1378   not_last_hit = last_check->first;
1379   not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1380   not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1381
1382   vnet_buffer (b[0])->ip.fib_index =
1383     vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1384     vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1385     vnet_buffer (b[0])->ip.fib_index;
1386
1387   vnet_buffer (b[1])->ip.fib_index =
1388     vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1389     vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1390     vnet_buffer (b[1])->ip.fib_index;
1391
1392   if (PREDICT_FALSE (not_last_hit))
1393     {
1394       mtrie[0] = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie;
1395       mtrie[1] = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie;
1396
1397       leaf[0] = ip4_fib_mtrie_lookup_step_one (mtrie[0], &ip[0]->src_address);
1398       leaf[1] = ip4_fib_mtrie_lookup_step_one (mtrie[1], &ip[1]->src_address);
1399
1400       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1401                                            &ip[0]->src_address, 2);
1402       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1403                                            &ip[1]->src_address, 2);
1404
1405       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1406                                            &ip[0]->src_address, 3);
1407       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1408                                            &ip[1]->src_address, 3);
1409
1410       lbi[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf[0]);
1411       lbi[1] = ip4_fib_mtrie_leaf_get_adj_index (leaf[1]);
1412
1413       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1414       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] = lbi[0];
1415
1416       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1417       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] = lbi[1];
1418
1419       lb[0] = load_balance_get (lbi[0]);
1420       lb[1] = load_balance_get (lbi[1]);
1421
1422       dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1423       dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1424
1425       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1426                    dpo[0]->dpoi_type == DPO_RECEIVE) ?
1427                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1428       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1429                    !fib_urpf_check_size (lb[0]->lb_urpf) &&
1430                    ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1431                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1432
1433       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1434                    dpo[1]->dpoi_type == DPO_RECEIVE) ?
1435                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1436       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1437                    !fib_urpf_check_size (lb[1]->lb_urpf) &&
1438                    ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1439                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1440
1441       last_check->src.as_u32 = ip[1]->src_address.as_u32;
1442       last_check->lbi = lbi[1];
1443       last_check->error = error[1];
1444     }
1445   else
1446     {
1447       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1448       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] = last_check->lbi;
1449
1450       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1451       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] = last_check->lbi;
1452
1453       error[0] = last_check->error;
1454       error[1] = last_check->error;
1455       last_check->first = 0;
1456     }
1457 }
1458
1459 enum ip_local_packet_type_e
1460 {
1461   IP_LOCAL_PACKET_TYPE_L4,
1462   IP_LOCAL_PACKET_TYPE_NAT,
1463   IP_LOCAL_PACKET_TYPE_FRAG,
1464 };
1465
1466 /**
1467  * Determine packet type and next node.
1468  *
1469  * The expectation is that all packets that are not L4 will skip
1470  * checksums and source checks.
1471  */
1472 always_inline u8
1473 ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
1474 {
1475   ip_lookup_main_t *lm = &ip4_main.lookup_main;
1476
1477   if (PREDICT_FALSE (ip4_is_fragment (ip)))
1478     {
1479       *next = IP_LOCAL_NEXT_REASSEMBLY;
1480       return IP_LOCAL_PACKET_TYPE_FRAG;
1481     }
1482   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_IS_NATED))
1483     {
1484       *next = lm->local_next_by_ip_protocol[ip->protocol];
1485       return IP_LOCAL_PACKET_TYPE_NAT;
1486     }
1487
1488   *next = lm->local_next_by_ip_protocol[ip->protocol];
1489   return IP_LOCAL_PACKET_TYPE_L4;
1490 }
1491
1492 static inline uword
1493 ip4_local_inline (vlib_main_t * vm,
1494                   vlib_node_runtime_t * node,
1495                   vlib_frame_t * frame, int head_of_feature_arc)
1496 {
1497   u32 *from, n_left_from;
1498   vlib_node_runtime_t *error_node =
1499     vlib_node_get_runtime (vm, ip4_input_node.index);
1500   u16 nexts[VLIB_FRAME_SIZE], *next;
1501   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1502   ip4_header_t *ip[2];
1503   u8 error[2], pt[2];
1504
1505   ip4_local_last_check_t last_check = {
1506     /*
1507      * 0.0.0.0 can appear as the source address of an IP packet,
1508      * as can any other address, hence the need to use the 'first'
1509      * member to make sure the .lbi is initialised for the first
1510      * packet.
1511      */
1512     .src = {.as_u32 = 0},
1513     .lbi = ~0,
1514     .error = IP4_ERROR_UNKNOWN_PROTOCOL,
1515     .first = 1,
1516   };
1517
1518   from = vlib_frame_vector_args (frame);
1519   n_left_from = frame->n_vectors;
1520
1521   if (node->flags & VLIB_NODE_FLAG_TRACE)
1522     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1523
1524   vlib_get_buffers (vm, from, bufs, n_left_from);
1525   b = bufs;
1526   next = nexts;
1527
1528   while (n_left_from >= 6)
1529     {
1530       u8 not_batch = 0;
1531
1532       /* Prefetch next iteration. */
1533       {
1534         vlib_prefetch_buffer_header (b[4], LOAD);
1535         vlib_prefetch_buffer_header (b[5], LOAD);
1536
1537         CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1538         CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1539       }
1540
1541       error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1542
1543       ip[0] = vlib_buffer_get_current (b[0]);
1544       ip[1] = vlib_buffer_get_current (b[1]);
1545
1546       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1547       vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1548
1549       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1550       pt[1] = ip4_local_classify (b[1], ip[1], &next[1]);
1551
1552       not_batch = pt[0] ^ pt[1];
1553
1554       if (head_of_feature_arc == 0 || (pt[0] && not_batch == 0))
1555         goto skip_checks;
1556
1557       if (PREDICT_TRUE (not_batch == 0))
1558         {
1559           ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1560           ip4_local_check_src_x2 (b, ip, &last_check, error);
1561         }
1562       else
1563         {
1564           if (!pt[0])
1565             {
1566               ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1567               ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1568             }
1569           if (!pt[1])
1570             {
1571               ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1572               ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
1573             }
1574         }
1575
1576     skip_checks:
1577
1578       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1579                                     head_of_feature_arc);
1580       ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1581                                     head_of_feature_arc);
1582
1583       b += 2;
1584       next += 2;
1585       n_left_from -= 2;
1586     }
1587
1588   while (n_left_from > 0)
1589     {
1590       error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1591
1592       ip[0] = vlib_buffer_get_current (b[0]);
1593       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1594       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1595
1596       if (head_of_feature_arc == 0 || pt[0])
1597         goto skip_check;
1598
1599       ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1600       ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1601
1602     skip_check:
1603
1604       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1605                                     head_of_feature_arc);
1606
1607       b += 1;
1608       next += 1;
1609       n_left_from -= 1;
1610     }
1611
1612   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1613   return frame->n_vectors;
1614 }
1615
1616 VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1617                                vlib_frame_t * frame)
1618 {
1619   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1620 }
1621
1622 /* *INDENT-OFF* */
1623 VLIB_REGISTER_NODE (ip4_local_node) =
1624 {
1625   .name = "ip4-local",
1626   .vector_size = sizeof (u32),
1627   .format_trace = format_ip4_forward_next_trace,
1628   .n_next_nodes = IP_LOCAL_N_NEXT,
1629   .next_nodes =
1630   {
1631     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1632     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1633     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1634     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1635     [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-reassembly",
1636   },
1637 };
1638 /* *INDENT-ON* */
1639
1640
1641 VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
1642                                           vlib_node_runtime_t * node,
1643                                           vlib_frame_t * frame)
1644 {
1645   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1646 }
1647
1648 /* *INDENT-OFF* */
1649 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
1650   .name = "ip4-local-end-of-arc",
1651   .vector_size = sizeof (u32),
1652
1653   .format_trace = format_ip4_forward_next_trace,
1654   .sibling_of = "ip4-local",
1655 };
1656
1657 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1658   .arc_name = "ip4-local",
1659   .node_name = "ip4-local-end-of-arc",
1660   .runs_before = 0, /* not before any other features */
1661 };
1662 /* *INDENT-ON* */
1663
1664 #ifndef CLIB_MARCH_VARIANT
1665 void
1666 ip4_register_protocol (u32 protocol, u32 node_index)
1667 {
1668   vlib_main_t *vm = vlib_get_main ();
1669   ip4_main_t *im = &ip4_main;
1670   ip_lookup_main_t *lm = &im->lookup_main;
1671
1672   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1673   lm->local_next_by_ip_protocol[protocol] =
1674     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1675 }
1676 #endif
1677
1678 static clib_error_t *
1679 show_ip_local_command_fn (vlib_main_t * vm,
1680                           unformat_input_t * input, vlib_cli_command_t * cmd)
1681 {
1682   ip4_main_t *im = &ip4_main;
1683   ip_lookup_main_t *lm = &im->lookup_main;
1684   int i;
1685
1686   vlib_cli_output (vm, "Protocols handled by ip4_local");
1687   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1688     {
1689       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1690         {
1691           u32 node_index = vlib_get_node (vm,
1692                                           ip4_local_node.index)->
1693             next_nodes[lm->local_next_by_ip_protocol[i]];
1694           vlib_cli_output (vm, "%d: %U", i, format_vlib_node_name, vm,
1695                            node_index);
1696         }
1697     }
1698   return 0;
1699 }
1700
1701
1702
1703 /*?
1704  * Display the set of protocols handled by the local IPv4 stack.
1705  *
1706  * @cliexpar
1707  * Example of how to display local protocol table:
1708  * @cliexstart{show ip local}
1709  * Protocols handled by ip4_local
1710  * 1
1711  * 17
1712  * 47
1713  * @cliexend
1714 ?*/
1715 /* *INDENT-OFF* */
1716 VLIB_CLI_COMMAND (show_ip_local, static) =
1717 {
1718   .path = "show ip local",
1719   .function = show_ip_local_command_fn,
1720   .short_help = "show ip local",
1721 };
1722 /* *INDENT-ON* */
1723
1724 always_inline uword
1725 ip4_arp_inline (vlib_main_t * vm,
1726                 vlib_node_runtime_t * node,
1727                 vlib_frame_t * frame, int is_glean)
1728 {
1729   vnet_main_t *vnm = vnet_get_main ();
1730   ip4_main_t *im = &ip4_main;
1731   ip_lookup_main_t *lm = &im->lookup_main;
1732   u32 *from, *to_next_drop;
1733   uword n_left_from, n_left_to_next_drop, next_index;
1734   u32 thread_index = vm->thread_index;
1735   u64 seed;
1736
1737   if (node->flags & VLIB_NODE_FLAG_TRACE)
1738     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1739
1740   seed = throttle_seed (&im->arp_throttle, thread_index, vlib_time_now (vm));
1741
1742   from = vlib_frame_vector_args (frame);
1743   n_left_from = frame->n_vectors;
1744   next_index = node->cached_next_index;
1745   if (next_index == IP4_ARP_NEXT_DROP)
1746     next_index = IP4_ARP_N_NEXT;        /* point to first interface */
1747
1748   while (n_left_from > 0)
1749     {
1750       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1751                            to_next_drop, n_left_to_next_drop);
1752
1753       while (n_left_from > 0 && n_left_to_next_drop > 0)
1754         {
1755           u32 pi0, bi0, adj_index0, sw_if_index0;
1756           ip_adjacency_t *adj0;
1757           vlib_buffer_t *p0, *b0;
1758           ip4_address_t resolve0;
1759           ethernet_arp_header_t *h0;
1760           vnet_hw_interface_t *hw_if0;
1761           u64 r0;
1762
1763           pi0 = from[0];
1764           p0 = vlib_get_buffer (vm, pi0);
1765
1766           from += 1;
1767           n_left_from -= 1;
1768           to_next_drop[0] = pi0;
1769           to_next_drop += 1;
1770           n_left_to_next_drop -= 1;
1771
1772           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1773           adj0 = adj_get (adj_index0);
1774
1775           if (is_glean)
1776             {
1777               /* resolve the packet's destination */
1778               ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1779               resolve0 = ip0->dst_address;
1780             }
1781           else
1782             {
1783               /* resolve the incomplete adj */
1784               resolve0 = adj0->sub_type.nbr.next_hop.ip4;
1785             }
1786
1787           /* combine the address and interface for the hash key */
1788           sw_if_index0 = adj0->rewrite_header.sw_if_index;
1789           r0 = (u64) resolve0.data_u32 << 32;
1790           r0 |= sw_if_index0;
1791
1792           if (throttle_check (&im->arp_throttle, thread_index, r0, seed))
1793             {
1794               p0->error = node->errors[IP4_ARP_ERROR_THROTTLED];
1795               continue;
1796             }
1797
1798           /*
1799            * the adj has been updated to a rewrite but the node the DPO that got
1800            * us here hasn't - yet. no big deal. we'll drop while we wait.
1801            */
1802           if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
1803             {
1804               p0->error = node->errors[IP4_ARP_ERROR_RESOLVED];
1805               continue;
1806             }
1807
1808           /*
1809            * Can happen if the control-plane is programming tables
1810            * with traffic flowing; at least that's today's lame excuse.
1811            */
1812           if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
1813               || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
1814             {
1815               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
1816               continue;
1817             }
1818           /* Send ARP request. */
1819           h0 =
1820             vlib_packet_template_get_packet (vm,
1821                                              &im->ip4_arp_request_packet_template,
1822                                              &bi0);
1823           /* Seems we're out of buffers */
1824           if (PREDICT_FALSE (!h0))
1825             {
1826               p0->error = node->errors[IP4_ARP_ERROR_NO_BUFFERS];
1827               continue;
1828             }
1829
1830           b0 = vlib_get_buffer (vm, bi0);
1831
1832           /* copy the persistent fields from the original */
1833           clib_memcpy_fast (b0->opaque2, p0->opaque2, sizeof (p0->opaque2));
1834
1835           /* Add rewrite/encap string for ARP packet. */
1836           vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
1837
1838           hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1839
1840           /* Src ethernet address in ARP header. */
1841           mac_address_from_bytes (&h0->ip4_over_ethernet[0].mac,
1842                                   hw_if0->hw_address);
1843           if (is_glean)
1844             {
1845               /* The interface's source address is stashed in the Glean Adj */
1846               h0->ip4_over_ethernet[0].ip4 =
1847                 adj0->sub_type.glean.receive_addr.ip4;
1848             }
1849           else
1850             {
1851               /* Src IP address in ARP header. */
1852               if (ip4_src_address_for_packet (lm, sw_if_index0,
1853                                               &h0->ip4_over_ethernet[0].ip4))
1854                 {
1855                   /* No source address available */
1856                   p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
1857                   vlib_buffer_free (vm, &bi0, 1);
1858                   continue;
1859                 }
1860             }
1861           h0->ip4_over_ethernet[1].ip4 = resolve0;
1862
1863           p0->error = node->errors[IP4_ARP_ERROR_REQUEST_SENT];
1864
1865           vlib_buffer_copy_trace_flag (vm, p0, bi0);
1866           VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
1867           vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
1868
1869           vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
1870
1871           vlib_set_next_frame_buffer (vm, node,
1872                                       adj0->rewrite_header.next_index, bi0);
1873         }
1874
1875       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
1876     }
1877
1878   return frame->n_vectors;
1879 }
1880
1881 VLIB_NODE_FN (ip4_arp_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1882                              vlib_frame_t * frame)
1883 {
1884   return (ip4_arp_inline (vm, node, frame, 0));
1885 }
1886
1887 VLIB_NODE_FN (ip4_glean_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1888                                vlib_frame_t * frame)
1889 {
1890   return (ip4_arp_inline (vm, node, frame, 1));
1891 }
1892
1893 static char *ip4_arp_error_strings[] = {
1894   [IP4_ARP_ERROR_THROTTLED] = "ARP requests throttled",
1895   [IP4_ARP_ERROR_RESOLVED] = "ARP requests resolved",
1896   [IP4_ARP_ERROR_NO_BUFFERS] = "ARP requests out of buffer",
1897   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
1898   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
1899   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
1900 };
1901
1902 /* *INDENT-OFF* */
1903 VLIB_REGISTER_NODE (ip4_arp_node) =
1904 {
1905   .name = "ip4-arp",
1906   .vector_size = sizeof (u32),
1907   .format_trace = format_ip4_forward_next_trace,
1908   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1909   .error_strings = ip4_arp_error_strings,
1910   .n_next_nodes = IP4_ARP_N_NEXT,
1911   .next_nodes =
1912   {
1913     [IP4_ARP_NEXT_DROP] = "error-drop",
1914   },
1915 };
1916
1917 VLIB_REGISTER_NODE (ip4_glean_node) =
1918 {
1919   .name = "ip4-glean",
1920   .vector_size = sizeof (u32),
1921   .format_trace = format_ip4_forward_next_trace,
1922   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1923   .error_strings = ip4_arp_error_strings,
1924   .n_next_nodes = IP4_ARP_N_NEXT,
1925   .next_nodes = {
1926   [IP4_ARP_NEXT_DROP] = "error-drop",
1927   },
1928 };
1929 /* *INDENT-ON* */
1930
1931 #define foreach_notrace_ip4_arp_error           \
1932 _(THROTTLED)                                    \
1933 _(RESOLVED)                                     \
1934 _(NO_BUFFERS)                                   \
1935 _(REQUEST_SENT)                                 \
1936 _(NON_ARP_ADJ)                                  \
1937 _(NO_SOURCE_ADDRESS)
1938
1939 static clib_error_t *
1940 arp_notrace_init (vlib_main_t * vm)
1941 {
1942   vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
1943
1944   /* don't trace ARP request packets */
1945 #define _(a)                                    \
1946     vnet_pcap_drop_trace_filter_add_del         \
1947         (rt->errors[IP4_ARP_ERROR_##a],         \
1948          1 /* is_add */);
1949   foreach_notrace_ip4_arp_error;
1950 #undef _
1951   return 0;
1952 }
1953
1954 VLIB_INIT_FUNCTION (arp_notrace_init);
1955
1956
1957 #ifndef CLIB_MARCH_VARIANT
1958 /* Send an ARP request to see if given destination is reachable on given interface. */
1959 clib_error_t *
1960 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index,
1961                     u8 refresh)
1962 {
1963   vnet_main_t *vnm = vnet_get_main ();
1964   ip4_main_t *im = &ip4_main;
1965   ethernet_arp_header_t *h;
1966   ip4_address_t *src;
1967   ip_interface_address_t *ia;
1968   ip_adjacency_t *adj;
1969   vnet_hw_interface_t *hi;
1970   vnet_sw_interface_t *si;
1971   vlib_buffer_t *b;
1972   adj_index_t ai;
1973   u32 bi = 0;
1974   u8 unicast_rewrite = 0;
1975
1976   si = vnet_get_sw_interface (vnm, sw_if_index);
1977
1978   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
1979     {
1980       return clib_error_return (0, "%U: interface %U down",
1981                                 format_ip4_address, dst,
1982                                 format_vnet_sw_if_index_name, vnm,
1983                                 sw_if_index);
1984     }
1985
1986   src =
1987     ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
1988   if (!src)
1989     {
1990       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
1991       return clib_error_return
1992         (0,
1993          "no matching interface address for destination %U (interface %U)",
1994          format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
1995          sw_if_index);
1996     }
1997
1998   h = vlib_packet_template_get_packet (vm,
1999                                        &im->ip4_arp_request_packet_template,
2000                                        &bi);
2001
2002   if (!h)
2003     return clib_error_return (0, "ARP request packet allocation failed");
2004
2005   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2006   if (PREDICT_FALSE (!hi->hw_address))
2007     {
2008       return clib_error_return (0, "%U: interface %U do not support ip probe",
2009                                 format_ip4_address, dst,
2010                                 format_vnet_sw_if_index_name, vnm,
2011                                 sw_if_index);
2012     }
2013
2014   mac_address_from_bytes (&h->ip4_over_ethernet[0].mac, hi->hw_address);
2015
2016   h->ip4_over_ethernet[0].ip4 = src[0];
2017   h->ip4_over_ethernet[1].ip4 = dst[0];
2018
2019   b = vlib_get_buffer (vm, bi);
2020   vnet_buffer (b)->sw_if_index[VLIB_RX] =
2021     vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2022
2023   ip46_address_t nh = {
2024     .ip4 = *dst,
2025   };
2026
2027   ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4,
2028                             VNET_LINK_IP4, &nh, sw_if_index);
2029   adj = adj_get (ai);
2030
2031   /* Peer has been previously resolved, retrieve glean adj instead */
2032   if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE)
2033     {
2034       if (refresh)
2035         unicast_rewrite = 1;
2036       else
2037         {
2038           adj_unlock (ai);
2039           ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4,
2040                                       VNET_LINK_IP4, sw_if_index, &nh);
2041           adj = adj_get (ai);
2042         }
2043     }
2044
2045   /* Add encapsulation string for software interface (e.g. ethernet header). */
2046   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2047   if (unicast_rewrite)
2048     {
2049       u16 *etype = vlib_buffer_get_current (b) - 2;
2050       etype[0] = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
2051     }
2052   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2053
2054   {
2055     vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2056     u32 *to_next = vlib_frame_vector_args (f);
2057     to_next[0] = bi;
2058     f->n_vectors = 1;
2059     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2060   }
2061
2062   adj_unlock (ai);
2063   return /* no error */ 0;
2064 }
2065 #endif
2066
2067 typedef enum
2068 {
2069   IP4_REWRITE_NEXT_DROP,
2070   IP4_REWRITE_NEXT_ICMP_ERROR,
2071   IP4_REWRITE_NEXT_FRAGMENT,
2072   IP4_REWRITE_N_NEXT            /* Last */
2073 } ip4_rewrite_next_t;
2074
2075 /**
2076  * This bits of an IPv4 address to mask to construct a multicast
2077  * MAC address
2078  */
2079 #if CLIB_ARCH_IS_BIG_ENDIAN
2080 #define IP4_MCAST_ADDR_MASK 0x007fffff
2081 #else
2082 #define IP4_MCAST_ADDR_MASK 0xffff7f00
2083 #endif
2084
2085 always_inline void
2086 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
2087                u16 adj_packet_bytes, bool df, u16 * next, u32 * error)
2088 {
2089   if (packet_len > adj_packet_bytes)
2090     {
2091       *error = IP4_ERROR_MTU_EXCEEDED;
2092       if (df)
2093         {
2094           icmp4_error_set_vnet_buffer
2095             (b, ICMP4_destination_unreachable,
2096              ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
2097              adj_packet_bytes);
2098           *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2099         }
2100       else
2101         {
2102           /* IP fragmentation */
2103           ip_frag_set_vnet_buffer (b, adj_packet_bytes,
2104                                    IP4_FRAG_NEXT_IP4_REWRITE, 0);
2105           *next = IP4_REWRITE_NEXT_FRAGMENT;
2106         }
2107     }
2108 }
2109
2110 /* Decrement TTL & update checksum.
2111    Works either endian, so no need for byte swap. */
2112 static_always_inline void
2113 ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
2114                             u32 * error)
2115 {
2116   i32 ttl;
2117   u32 checksum;
2118   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2119     {
2120       b->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2121       return;
2122     }
2123
2124   ttl = ip->ttl;
2125
2126   /* Input node should have reject packets with ttl 0. */
2127   ASSERT (ip->ttl > 0);
2128
2129   checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
2130   checksum += checksum >= 0xffff;
2131
2132   ip->checksum = checksum;
2133   ttl -= 1;
2134   ip->ttl = ttl;
2135
2136   /*
2137    * If the ttl drops below 1 when forwarding, generate
2138    * an ICMP response.
2139    */
2140   if (PREDICT_FALSE (ttl <= 0))
2141     {
2142       *error = IP4_ERROR_TIME_EXPIRED;
2143       vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2144       icmp4_error_set_vnet_buffer (b, ICMP4_time_exceeded,
2145                                    ICMP4_time_exceeded_ttl_exceeded_in_transit,
2146                                    0);
2147       *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2148     }
2149
2150   /* Verify checksum. */
2151   ASSERT ((ip->checksum == ip4_header_checksum (ip)) ||
2152           (b->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2153 }
2154
2155
2156 always_inline uword
2157 ip4_rewrite_inline_with_gso (vlib_main_t * vm,
2158                              vlib_node_runtime_t * node,
2159                              vlib_frame_t * frame,
2160                              int do_counters, int is_midchain, int is_mcast,
2161                              int do_gso)
2162 {
2163   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2164   u32 *from = vlib_frame_vector_args (frame);
2165   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2166   u16 nexts[VLIB_FRAME_SIZE], *next;
2167   u32 n_left_from;
2168   vlib_node_runtime_t *error_node =
2169     vlib_node_get_runtime (vm, ip4_input_node.index);
2170
2171   n_left_from = frame->n_vectors;
2172   u32 thread_index = vm->thread_index;
2173
2174   vlib_get_buffers (vm, from, bufs, n_left_from);
2175   clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
2176
2177   if (n_left_from >= 6)
2178     {
2179       int i;
2180       for (i = 2; i < 6; i++)
2181         vlib_prefetch_buffer_header (bufs[i], LOAD);
2182     }
2183
2184   next = nexts;
2185   b = bufs;
2186   while (n_left_from >= 8)
2187     {
2188       ip_adjacency_t *adj0, *adj1;
2189       ip4_header_t *ip0, *ip1;
2190       u32 rw_len0, error0, adj_index0;
2191       u32 rw_len1, error1, adj_index1;
2192       u32 tx_sw_if_index0, tx_sw_if_index1;
2193       u8 *p;
2194
2195       vlib_prefetch_buffer_header (b[6], LOAD);
2196       vlib_prefetch_buffer_header (b[7], LOAD);
2197
2198       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2199       adj_index1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
2200
2201       /*
2202        * pre-fetch the per-adjacency counters
2203        */
2204       if (do_counters)
2205         {
2206           vlib_prefetch_combined_counter (&adjacency_counters,
2207                                           thread_index, adj_index0);
2208           vlib_prefetch_combined_counter (&adjacency_counters,
2209                                           thread_index, adj_index1);
2210         }
2211
2212       ip0 = vlib_buffer_get_current (b[0]);
2213       ip1 = vlib_buffer_get_current (b[1]);
2214
2215       error0 = error1 = IP4_ERROR_NONE;
2216
2217       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2218       ip4_ttl_and_checksum_check (b[1], ip1, next + 1, &error1);
2219
2220       /* Rewrite packet header and updates lengths. */
2221       adj0 = adj_get (adj_index0);
2222       adj1 = adj_get (adj_index1);
2223
2224       /* Worth pipelining. No guarantee that adj0,1 are hot... */
2225       rw_len0 = adj0[0].rewrite_header.data_bytes;
2226       rw_len1 = adj1[0].rewrite_header.data_bytes;
2227       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2228       vnet_buffer (b[1])->ip.save_rewrite_length = rw_len1;
2229
2230       p = vlib_buffer_get_current (b[2]);
2231       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2232       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2233
2234       p = vlib_buffer_get_current (b[3]);
2235       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2236       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2237
2238       /* Check MTU of outgoing interface. */
2239       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2240       u16 ip1_len = clib_net_to_host_u16 (ip1->length);
2241
2242       if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO))
2243         ip0_len = gso_mtu_sz (b[0]);
2244       if (do_gso && (b[1]->flags & VNET_BUFFER_F_GSO))
2245         ip1_len = gso_mtu_sz (b[1]);
2246
2247       ip4_mtu_check (b[0], ip0_len,
2248                      adj0[0].rewrite_header.max_l3_packet_bytes,
2249                      ip0->flags_and_fragment_offset &
2250                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2251                      next + 0, &error0);
2252       ip4_mtu_check (b[1], ip1_len,
2253                      adj1[0].rewrite_header.max_l3_packet_bytes,
2254                      ip1->flags_and_fragment_offset &
2255                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2256                      next + 1, &error1);
2257
2258       if (is_mcast)
2259         {
2260           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2261                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2262                     IP4_ERROR_SAME_INTERFACE : error0);
2263           error1 = ((adj1[0].rewrite_header.sw_if_index ==
2264                      vnet_buffer (b[1])->sw_if_index[VLIB_RX]) ?
2265                     IP4_ERROR_SAME_INTERFACE : error1);
2266         }
2267
2268       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2269        * to see the IP header */
2270       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2271         {
2272           u32 next_index = adj0[0].rewrite_header.next_index;
2273           b[0]->current_data -= rw_len0;
2274           b[0]->current_length += rw_len0;
2275           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2276           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2277
2278           if (PREDICT_FALSE
2279               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2280             vnet_feature_arc_start (lm->output_feature_arc_index,
2281                                     tx_sw_if_index0, &next_index, b[0]);
2282           next[0] = next_index;
2283         }
2284       else
2285         {
2286           b[0]->error = error_node->errors[error0];
2287         }
2288       if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2289         {
2290           u32 next_index = adj1[0].rewrite_header.next_index;
2291           b[1]->current_data -= rw_len1;
2292           b[1]->current_length += rw_len1;
2293
2294           tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2295           vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2296
2297           if (PREDICT_FALSE
2298               (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2299             vnet_feature_arc_start (lm->output_feature_arc_index,
2300                                     tx_sw_if_index1, &next_index, b[1]);
2301           next[1] = next_index;
2302         }
2303       else
2304         {
2305           b[1]->error = error_node->errors[error1];
2306         }
2307       if (is_midchain)
2308         {
2309           calc_checksums (vm, b[0]);
2310           calc_checksums (vm, b[1]);
2311         }
2312       /* Guess we are only writing on simple Ethernet header. */
2313       vnet_rewrite_two_headers (adj0[0], adj1[0],
2314                                 ip0, ip1, sizeof (ethernet_header_t));
2315
2316       /*
2317        * Bump the per-adjacency counters
2318        */
2319       if (do_counters)
2320         {
2321           vlib_increment_combined_counter
2322             (&adjacency_counters,
2323              thread_index,
2324              adj_index0, 1, vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2325
2326           vlib_increment_combined_counter
2327             (&adjacency_counters,
2328              thread_index,
2329              adj_index1, 1, vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
2330         }
2331
2332       if (is_midchain)
2333         {
2334           if (adj0->sub_type.midchain.fixup_func)
2335             adj0->sub_type.midchain.fixup_func
2336               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2337           if (adj1->sub_type.midchain.fixup_func)
2338             adj1->sub_type.midchain.fixup_func
2339               (vm, adj1, b[1], adj1->sub_type.midchain.fixup_data);
2340         }
2341
2342       if (is_mcast)
2343         {
2344           /*
2345            * copy bytes from the IP address into the MAC rewrite
2346            */
2347           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2348                                       adj0->rewrite_header.dst_mcast_offset,
2349                                       &ip0->dst_address.as_u32, (u8 *) ip0);
2350           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2351                                       adj1->rewrite_header.dst_mcast_offset,
2352                                       &ip1->dst_address.as_u32, (u8 *) ip1);
2353         }
2354
2355       next += 2;
2356       b += 2;
2357       n_left_from -= 2;
2358     }
2359
2360   while (n_left_from > 0)
2361     {
2362       ip_adjacency_t *adj0;
2363       ip4_header_t *ip0;
2364       u32 rw_len0, adj_index0, error0;
2365       u32 tx_sw_if_index0;
2366
2367       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2368
2369       adj0 = adj_get (adj_index0);
2370
2371       if (do_counters)
2372         vlib_prefetch_combined_counter (&adjacency_counters,
2373                                         thread_index, adj_index0);
2374
2375       ip0 = vlib_buffer_get_current (b[0]);
2376
2377       error0 = IP4_ERROR_NONE;
2378
2379       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2380
2381
2382       /* Update packet buffer attributes/set output interface. */
2383       rw_len0 = adj0[0].rewrite_header.data_bytes;
2384       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2385
2386       /* Check MTU of outgoing interface. */
2387       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2388       if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO))
2389         ip0_len = gso_mtu_sz (b[0]);
2390
2391       ip4_mtu_check (b[0], ip0_len,
2392                      adj0[0].rewrite_header.max_l3_packet_bytes,
2393                      ip0->flags_and_fragment_offset &
2394                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2395                      next + 0, &error0);
2396
2397       if (is_mcast)
2398         {
2399           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2400                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2401                     IP4_ERROR_SAME_INTERFACE : error0);
2402         }
2403
2404       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2405        * to see the IP header */
2406       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2407         {
2408           u32 next_index = adj0[0].rewrite_header.next_index;
2409           b[0]->current_data -= rw_len0;
2410           b[0]->current_length += rw_len0;
2411           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2412           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2413
2414           if (PREDICT_FALSE
2415               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2416             vnet_feature_arc_start (lm->output_feature_arc_index,
2417                                     tx_sw_if_index0, &next_index, b[0]);
2418           next[0] = next_index;
2419         }
2420       else
2421         {
2422           b[0]->error = error_node->errors[error0];
2423         }
2424       if (is_midchain)
2425         {
2426           calc_checksums (vm, b[0]);
2427         }
2428       /* Guess we are only writing on simple Ethernet header. */
2429       vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2430
2431       if (do_counters)
2432         vlib_increment_combined_counter
2433           (&adjacency_counters,
2434            thread_index, adj_index0, 1,
2435            vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2436
2437       if (is_midchain)
2438         {
2439           if (adj0->sub_type.midchain.fixup_func)
2440             adj0->sub_type.midchain.fixup_func
2441               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2442         }
2443
2444       if (is_mcast)
2445         {
2446           /*
2447            * copy bytes from the IP address into the MAC rewrite
2448            */
2449           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2450                                       adj0->rewrite_header.dst_mcast_offset,
2451                                       &ip0->dst_address.as_u32, (u8 *) ip0);
2452         }
2453
2454       next += 1;
2455       b += 1;
2456       n_left_from -= 1;
2457     }
2458
2459
2460   /* Need to do trace after rewrites to pick up new packet data. */
2461   if (node->flags & VLIB_NODE_FLAG_TRACE)
2462     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2463
2464   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2465   return frame->n_vectors;
2466 }
2467
2468 always_inline uword
2469 ip4_rewrite_inline (vlib_main_t * vm,
2470                     vlib_node_runtime_t * node,
2471                     vlib_frame_t * frame,
2472                     int do_counters, int is_midchain, int is_mcast)
2473 {
2474   vnet_main_t *vnm = vnet_get_main ();
2475   if (PREDICT_FALSE (vnm->interface_main.gso_interface_count > 0))
2476     return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2477                                         is_midchain, is_mcast,
2478                                         1 /* do_gso */ );
2479   else
2480     return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2481                                         is_midchain, is_mcast,
2482                                         0 /* no do_gso */ );
2483 }
2484
2485
2486 /** @brief IPv4 rewrite node.
2487     @node ip4-rewrite
2488
2489     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2490     header checksum, fetch the ip adjacency, check the outbound mtu,
2491     apply the adjacency rewrite, and send pkts to the adjacency
2492     rewrite header's rewrite_next_index.
2493
2494     @param vm vlib_main_t corresponding to the current thread
2495     @param node vlib_node_runtime_t
2496     @param frame vlib_frame_t whose contents should be dispatched
2497
2498     @par Graph mechanics: buffer metadata, next index usage
2499
2500     @em Uses:
2501     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2502         - the rewrite adjacency index
2503     - <code>adj->lookup_next_index</code>
2504         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2505           the packet will be dropped.
2506     - <code>adj->rewrite_header</code>
2507         - Rewrite string length, rewrite string, next_index
2508
2509     @em Sets:
2510     - <code>b->current_data, b->current_length</code>
2511         - Updated net of applying the rewrite string
2512
2513     <em>Next Indices:</em>
2514     - <code> adj->rewrite_header.next_index </code>
2515       or @c ip4-drop
2516 */
2517
2518 VLIB_NODE_FN (ip4_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2519                                  vlib_frame_t * frame)
2520 {
2521   if (adj_are_counters_enabled ())
2522     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2523   else
2524     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2525 }
2526
2527 VLIB_NODE_FN (ip4_rewrite_bcast_node) (vlib_main_t * vm,
2528                                        vlib_node_runtime_t * node,
2529                                        vlib_frame_t * frame)
2530 {
2531   if (adj_are_counters_enabled ())
2532     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2533   else
2534     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2535 }
2536
2537 VLIB_NODE_FN (ip4_midchain_node) (vlib_main_t * vm,
2538                                   vlib_node_runtime_t * node,
2539                                   vlib_frame_t * frame)
2540 {
2541   if (adj_are_counters_enabled ())
2542     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2543   else
2544     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2545 }
2546
2547 VLIB_NODE_FN (ip4_rewrite_mcast_node) (vlib_main_t * vm,
2548                                        vlib_node_runtime_t * node,
2549                                        vlib_frame_t * frame)
2550 {
2551   if (adj_are_counters_enabled ())
2552     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2553   else
2554     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2555 }
2556
2557 VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
2558                                         vlib_node_runtime_t * node,
2559                                         vlib_frame_t * frame)
2560 {
2561   if (adj_are_counters_enabled ())
2562     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2563   else
2564     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2565 }
2566
2567 /* *INDENT-OFF* */
2568 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2569   .name = "ip4-rewrite",
2570   .vector_size = sizeof (u32),
2571
2572   .format_trace = format_ip4_rewrite_trace,
2573
2574   .n_next_nodes = IP4_REWRITE_N_NEXT,
2575   .next_nodes = {
2576     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2577     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2578     [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2579   },
2580 };
2581
2582 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2583   .name = "ip4-rewrite-bcast",
2584   .vector_size = sizeof (u32),
2585
2586   .format_trace = format_ip4_rewrite_trace,
2587   .sibling_of = "ip4-rewrite",
2588 };
2589
2590 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2591   .name = "ip4-rewrite-mcast",
2592   .vector_size = sizeof (u32),
2593
2594   .format_trace = format_ip4_rewrite_trace,
2595   .sibling_of = "ip4-rewrite",
2596 };
2597
2598 VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
2599   .name = "ip4-mcast-midchain",
2600   .vector_size = sizeof (u32),
2601
2602   .format_trace = format_ip4_rewrite_trace,
2603   .sibling_of = "ip4-rewrite",
2604 };
2605
2606 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2607   .name = "ip4-midchain",
2608   .vector_size = sizeof (u32),
2609   .format_trace = format_ip4_forward_next_trace,
2610   .sibling_of =  "ip4-rewrite",
2611 };
2612 /* *INDENT-ON */
2613
2614 static int
2615 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2616 {
2617   ip4_fib_mtrie_t *mtrie0;
2618   ip4_fib_mtrie_leaf_t leaf0;
2619   u32 lbi0;
2620
2621   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2622
2623   leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2624   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2625   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2626
2627   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2628
2629   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2630 }
2631
2632 static clib_error_t *
2633 test_lookup_command_fn (vlib_main_t * vm,
2634                         unformat_input_t * input, vlib_cli_command_t * cmd)
2635 {
2636   ip4_fib_t *fib;
2637   u32 table_id = 0;
2638   f64 count = 1;
2639   u32 n;
2640   int i;
2641   ip4_address_t ip4_base_address;
2642   u64 errors = 0;
2643
2644   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2645     {
2646       if (unformat (input, "table %d", &table_id))
2647         {
2648           /* Make sure the entry exists. */
2649           fib = ip4_fib_get (table_id);
2650           if ((fib) && (fib->index != table_id))
2651             return clib_error_return (0, "<fib-index> %d does not exist",
2652                                       table_id);
2653         }
2654       else if (unformat (input, "count %f", &count))
2655         ;
2656
2657       else if (unformat (input, "%U",
2658                          unformat_ip4_address, &ip4_base_address))
2659         ;
2660       else
2661         return clib_error_return (0, "unknown input `%U'",
2662                                   format_unformat_error, input);
2663     }
2664
2665   n = count;
2666
2667   for (i = 0; i < n; i++)
2668     {
2669       if (!ip4_lookup_validate (&ip4_base_address, table_id))
2670         errors++;
2671
2672       ip4_base_address.as_u32 =
2673         clib_host_to_net_u32 (1 +
2674                               clib_net_to_host_u32 (ip4_base_address.as_u32));
2675     }
2676
2677   if (errors)
2678     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2679   else
2680     vlib_cli_output (vm, "No errors in %d lookups\n", n);
2681
2682   return 0;
2683 }
2684
2685 /*?
2686  * Perform a lookup of an IPv4 Address (or range of addresses) in the
2687  * given FIB table to determine if there is a conflict with the
2688  * adjacency table. The fib-id can be determined by using the
2689  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2690  * of 0 is used.
2691  *
2692  * @todo This command uses fib-id, other commands use table-id (not
2693  * just a name, they are different indexes). Would like to change this
2694  * to table-id for consistency.
2695  *
2696  * @cliexpar
2697  * Example of how to run the test lookup command:
2698  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2699  * No errors in 2 lookups
2700  * @cliexend
2701 ?*/
2702 /* *INDENT-OFF* */
2703 VLIB_CLI_COMMAND (lookup_test_command, static) =
2704 {
2705   .path = "test lookup",
2706   .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2707   .function = test_lookup_command_fn,
2708 };
2709 /* *INDENT-ON* */
2710
2711 #ifndef CLIB_MARCH_VARIANT
2712 int
2713 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2714 {
2715   u32 fib_index;
2716
2717   fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
2718
2719   if (~0 == fib_index)
2720     return VNET_API_ERROR_NO_SUCH_FIB;
2721
2722   fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
2723                                   flow_hash_config);
2724
2725   return 0;
2726 }
2727 #endif
2728
2729 static clib_error_t *
2730 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2731                              unformat_input_t * input,
2732                              vlib_cli_command_t * cmd)
2733 {
2734   int matched = 0;
2735   u32 table_id = 0;
2736   u32 flow_hash_config = 0;
2737   int rv;
2738
2739   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2740     {
2741       if (unformat (input, "table %d", &table_id))
2742         matched = 1;
2743 #define _(a,v) \
2744     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2745       foreach_flow_hash_bit
2746 #undef _
2747         else
2748         break;
2749     }
2750
2751   if (matched == 0)
2752     return clib_error_return (0, "unknown input `%U'",
2753                               format_unformat_error, input);
2754
2755   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
2756   switch (rv)
2757     {
2758     case 0:
2759       break;
2760
2761     case VNET_API_ERROR_NO_SUCH_FIB:
2762       return clib_error_return (0, "no such FIB table %d", table_id);
2763
2764     default:
2765       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2766       break;
2767     }
2768
2769   return 0;
2770 }
2771
2772 /*?
2773  * Configure the set of IPv4 fields used by the flow hash.
2774  *
2775  * @cliexpar
2776  * Example of how to set the flow hash on a given table:
2777  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2778  * Example of display the configured flow hash:
2779  * @cliexstart{show ip fib}
2780  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2781  * 0.0.0.0/0
2782  *   unicast-ip4-chain
2783  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2784  *     [0] [@0]: dpo-drop ip6
2785  * 0.0.0.0/32
2786  *   unicast-ip4-chain
2787  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2788  *     [0] [@0]: dpo-drop ip6
2789  * 224.0.0.0/8
2790  *   unicast-ip4-chain
2791  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2792  *     [0] [@0]: dpo-drop ip6
2793  * 6.0.1.2/32
2794  *   unicast-ip4-chain
2795  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2796  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2797  * 7.0.0.1/32
2798  *   unicast-ip4-chain
2799  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
2800  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2801  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2802  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2803  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2804  * 240.0.0.0/8
2805  *   unicast-ip4-chain
2806  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
2807  *     [0] [@0]: dpo-drop ip6
2808  * 255.255.255.255/32
2809  *   unicast-ip4-chain
2810  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
2811  *     [0] [@0]: dpo-drop ip6
2812  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
2813  * 0.0.0.0/0
2814  *   unicast-ip4-chain
2815  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
2816  *     [0] [@0]: dpo-drop ip6
2817  * 0.0.0.0/32
2818  *   unicast-ip4-chain
2819  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
2820  *     [0] [@0]: dpo-drop ip6
2821  * 172.16.1.0/24
2822  *   unicast-ip4-chain
2823  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
2824  *     [0] [@4]: ipv4-glean: af_packet0
2825  * 172.16.1.1/32
2826  *   unicast-ip4-chain
2827  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
2828  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
2829  * 172.16.1.2/32
2830  *   unicast-ip4-chain
2831  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2832  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
2833  * 172.16.2.0/24
2834  *   unicast-ip4-chain
2835  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
2836  *     [0] [@4]: ipv4-glean: af_packet1
2837  * 172.16.2.1/32
2838  *   unicast-ip4-chain
2839  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
2840  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
2841  * 224.0.0.0/8
2842  *   unicast-ip4-chain
2843  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
2844  *     [0] [@0]: dpo-drop ip6
2845  * 240.0.0.0/8
2846  *   unicast-ip4-chain
2847  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
2848  *     [0] [@0]: dpo-drop ip6
2849  * 255.255.255.255/32
2850  *   unicast-ip4-chain
2851  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
2852  *     [0] [@0]: dpo-drop ip6
2853  * @cliexend
2854 ?*/
2855 /* *INDENT-OFF* */
2856 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
2857 {
2858   .path = "set ip flow-hash",
2859   .short_help =
2860   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
2861   .function = set_ip_flow_hash_command_fn,
2862 };
2863 /* *INDENT-ON* */
2864
2865 #ifndef CLIB_MARCH_VARIANT
2866 int
2867 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2868                              u32 table_index)
2869 {
2870   vnet_main_t *vnm = vnet_get_main ();
2871   vnet_interface_main_t *im = &vnm->interface_main;
2872   ip4_main_t *ipm = &ip4_main;
2873   ip_lookup_main_t *lm = &ipm->lookup_main;
2874   vnet_classify_main_t *cm = &vnet_classify_main;
2875   ip4_address_t *if_addr;
2876
2877   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2878     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2879
2880   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2881     return VNET_API_ERROR_NO_SUCH_ENTRY;
2882
2883   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2884   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
2885
2886   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
2887
2888   if (NULL != if_addr)
2889     {
2890       fib_prefix_t pfx = {
2891         .fp_len = 32,
2892         .fp_proto = FIB_PROTOCOL_IP4,
2893         .fp_addr.ip4 = *if_addr,
2894       };
2895       u32 fib_index;
2896
2897       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2898                                                        sw_if_index);
2899
2900
2901       if (table_index != (u32) ~ 0)
2902         {
2903           dpo_id_t dpo = DPO_INVALID;
2904
2905           dpo_set (&dpo,
2906                    DPO_CLASSIFY,
2907                    DPO_PROTO_IP4,
2908                    classify_dpo_create (DPO_PROTO_IP4, table_index));
2909
2910           fib_table_entry_special_dpo_add (fib_index,
2911                                            &pfx,
2912                                            FIB_SOURCE_CLASSIFY,
2913                                            FIB_ENTRY_FLAG_NONE, &dpo);
2914           dpo_reset (&dpo);
2915         }
2916       else
2917         {
2918           fib_table_entry_special_remove (fib_index,
2919                                           &pfx, FIB_SOURCE_CLASSIFY);
2920         }
2921     }
2922
2923   return 0;
2924 }
2925 #endif
2926
2927 static clib_error_t *
2928 set_ip_classify_command_fn (vlib_main_t * vm,
2929                             unformat_input_t * input,
2930                             vlib_cli_command_t * cmd)
2931 {
2932   u32 table_index = ~0;
2933   int table_index_set = 0;
2934   u32 sw_if_index = ~0;
2935   int rv;
2936
2937   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2938     {
2939       if (unformat (input, "table-index %d", &table_index))
2940         table_index_set = 1;
2941       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
2942                          vnet_get_main (), &sw_if_index))
2943         ;
2944       else
2945         break;
2946     }
2947
2948   if (table_index_set == 0)
2949     return clib_error_return (0, "classify table-index must be specified");
2950
2951   if (sw_if_index == ~0)
2952     return clib_error_return (0, "interface / subif must be specified");
2953
2954   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
2955
2956   switch (rv)
2957     {
2958     case 0:
2959       break;
2960
2961     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
2962       return clib_error_return (0, "No such interface");
2963
2964     case VNET_API_ERROR_NO_SUCH_ENTRY:
2965       return clib_error_return (0, "No such classifier table");
2966     }
2967   return 0;
2968 }
2969
2970 /*?
2971  * Assign a classification table to an interface. The classification
2972  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
2973  * commands. Once the table is create, use this command to filter packets
2974  * on an interface.
2975  *
2976  * @cliexpar
2977  * Example of how to assign a classification table to an interface:
2978  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
2979 ?*/
2980 /* *INDENT-OFF* */
2981 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
2982 {
2983     .path = "set ip classify",
2984     .short_help =
2985     "set ip classify intfc <interface> table-index <classify-idx>",
2986     .function = set_ip_classify_command_fn,
2987 };
2988 /* *INDENT-ON* */
2989
2990 static clib_error_t *
2991 ip4_config (vlib_main_t * vm, unformat_input_t * input)
2992 {
2993   ip4_main_t *im = &ip4_main;
2994   uword heapsize = 0;
2995
2996   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2997     {
2998       if (unformat (input, "heap-size %U", unformat_memory_size, &heapsize))
2999         ;
3000       else
3001         return clib_error_return (0,
3002                                   "invalid heap-size parameter `%U'",
3003                                   format_unformat_error, input);
3004     }
3005
3006   im->mtrie_heap_size = heapsize;
3007
3008   return 0;
3009 }
3010
3011 VLIB_EARLY_CONFIG_FUNCTION (ip4_config, "ip");
3012
3013 /*
3014  * fd.io coding-style-patch-verification: ON
3015  *
3016  * Local Variables:
3017  * eval: (c-set-style "gnu")
3018  * End:
3019  */