eb2b7ee04bc4ad61d93a8767949c5e73727a09d6
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h>     /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/mfib/ip4_mfib.h>
53 #include <vnet/dpo/load_balance.h>
54 #include <vnet/dpo/load_balance_map.h>
55 #include <vnet/dpo/receive_dpo.h>
56 #include <vnet/dpo/classify_dpo.h>
57 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
58 #include <vnet/adj/adj_dp.h>
59 #include <vnet/pg/pg.h>
60
61 #include <vnet/ip/ip4_forward.h>
62 #include <vnet/interface_output.h>
63 #include <vnet/classify/vnet_classify.h>
64 #include <vnet/ip/reass/ip4_full_reass.h>
65
66 /** @brief IPv4 lookup node.
67     @node ip4-lookup
68
69     This is the main IPv4 lookup dispatch node.
70
71     @param vm vlib_main_t corresponding to the current thread
72     @param node vlib_node_runtime_t
73     @param frame vlib_frame_t whose contents should be dispatched
74
75     @par Graph mechanics: buffer metadata, next index usage
76
77     @em Uses:
78     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
79         - Indicates the @c sw_if_index value of the interface that the
80           packet was received on.
81     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
82         - When the value is @c ~0 then the node performs a longest prefix
83           match (LPM) for the packet destination address in the FIB attached
84           to the receive interface.
85         - Otherwise perform LPM for the packet destination address in the
86           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
87           value (0, 1, ...) and not a VRF id.
88
89     @em Sets:
90     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
91         - The lookup result adjacency index.
92
93     <em>Next Index:</em>
94     - Dispatches the packet to the node index found in
95       ip_adjacency_t @c adj->lookup_next_index
96       (where @c adj is the lookup result adjacency).
97 */
98 VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
99                                 vlib_frame_t * frame)
100 {
101   return ip4_lookup_inline (vm, node, frame);
102 }
103
104 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
105
106 VLIB_REGISTER_NODE (ip4_lookup_node) =
107 {
108   .name = "ip4-lookup",
109   .vector_size = sizeof (u32),
110   .format_trace = format_ip4_lookup_trace,
111   .n_next_nodes = IP_LOOKUP_N_NEXT,
112   .next_nodes = IP4_LOOKUP_NEXT_NODES,
113 };
114
115 VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
116                                       vlib_node_runtime_t * node,
117                                       vlib_frame_t * frame)
118 {
119   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
120   u32 n_left, *from;
121   u32 thread_index = vm->thread_index;
122   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
123   u16 nexts[VLIB_FRAME_SIZE], *next;
124
125   from = vlib_frame_vector_args (frame);
126   n_left = frame->n_vectors;
127   next = nexts;
128
129   vlib_get_buffers (vm, from, bufs, n_left);
130
131   while (n_left >= 4)
132     {
133       const load_balance_t *lb0, *lb1;
134       const ip4_header_t *ip0, *ip1;
135       u32 lbi0, hc0, lbi1, hc1;
136       const dpo_id_t *dpo0, *dpo1;
137
138       /* Prefetch next iteration. */
139       {
140         vlib_prefetch_buffer_header (b[2], LOAD);
141         vlib_prefetch_buffer_header (b[3], LOAD);
142
143         CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), LOAD);
144         CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), LOAD);
145       }
146
147       ip0 = vlib_buffer_get_current (b[0]);
148       ip1 = vlib_buffer_get_current (b[1]);
149       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
150       lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
151
152       lb0 = load_balance_get (lbi0);
153       lb1 = load_balance_get (lbi1);
154
155       /*
156        * this node is for via FIBs we can re-use the hash value from the
157        * to node if present.
158        * We don't want to use the same hash value at each level in the recursion
159        * graph as that would lead to polarisation
160        */
161       hc0 = hc1 = 0;
162
163       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
164         {
165           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
166             {
167               hc0 = vnet_buffer (b[0])->ip.flow_hash =
168                 vnet_buffer (b[0])->ip.flow_hash >> 1;
169             }
170           else
171             {
172               hc0 = vnet_buffer (b[0])->ip.flow_hash =
173                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
174             }
175           dpo0 = load_balance_get_fwd_bucket
176             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
177         }
178       else
179         {
180           dpo0 = load_balance_get_bucket_i (lb0, 0);
181         }
182       if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
183         {
184           if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
185             {
186               hc1 = vnet_buffer (b[1])->ip.flow_hash =
187                 vnet_buffer (b[1])->ip.flow_hash >> 1;
188             }
189           else
190             {
191               hc1 = vnet_buffer (b[1])->ip.flow_hash =
192                 ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
193             }
194           dpo1 = load_balance_get_fwd_bucket
195             (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
196         }
197       else
198         {
199           dpo1 = load_balance_get_bucket_i (lb1, 0);
200         }
201
202       next[0] = dpo0->dpoi_next_node;
203       next[1] = dpo1->dpoi_next_node;
204
205       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
206       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
207
208       vlib_increment_combined_counter
209         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
210       vlib_increment_combined_counter
211         (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
212
213       b += 2;
214       next += 2;
215       n_left -= 2;
216     }
217
218   while (n_left > 0)
219     {
220       const load_balance_t *lb0;
221       const ip4_header_t *ip0;
222       const dpo_id_t *dpo0;
223       u32 lbi0, hc0;
224
225       ip0 = vlib_buffer_get_current (b[0]);
226       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
227
228       lb0 = load_balance_get (lbi0);
229
230       hc0 = 0;
231       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
232         {
233           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
234             {
235               hc0 = vnet_buffer (b[0])->ip.flow_hash =
236                 vnet_buffer (b[0])->ip.flow_hash >> 1;
237             }
238           else
239             {
240               hc0 = vnet_buffer (b[0])->ip.flow_hash =
241                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
242             }
243           dpo0 = load_balance_get_fwd_bucket
244             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
245         }
246       else
247         {
248           dpo0 = load_balance_get_bucket_i (lb0, 0);
249         }
250
251       next[0] = dpo0->dpoi_next_node;
252       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
253
254       vlib_increment_combined_counter
255         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
256
257       b += 1;
258       next += 1;
259       n_left -= 1;
260     }
261
262   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
263   if (node->flags & VLIB_NODE_FLAG_TRACE)
264     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
265
266   return frame->n_vectors;
267 }
268
269 VLIB_REGISTER_NODE (ip4_load_balance_node) =
270 {
271   .name = "ip4-load-balance",
272   .vector_size = sizeof (u32),
273   .sibling_of = "ip4-lookup",
274   .format_trace = format_ip4_lookup_trace,
275 };
276
277 #ifndef CLIB_MARCH_VARIANT
278 /* get first interface address */
279 ip4_address_t *
280 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
281                              ip_interface_address_t ** result_ia)
282 {
283   ip_lookup_main_t *lm = &im->lookup_main;
284   ip_interface_address_t *ia = 0;
285   ip4_address_t *result = 0;
286
287   foreach_ip_interface_address
288     (lm, ia, sw_if_index,
289      1 /* honor unnumbered */ ,
290      ({
291        ip4_address_t * a =
292          ip_interface_address_get_address (lm, ia);
293        result = a;
294        break;
295      }));
296   if (result_ia)
297     *result_ia = result ? ia : 0;
298   return result;
299 }
300 #endif
301
302 static void
303 ip4_add_subnet_bcast_route (u32 fib_index,
304                             fib_prefix_t *pfx,
305                             u32 sw_if_index)
306 {
307   vnet_sw_interface_flags_t iflags;
308
309   iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
310
311   fib_table_entry_special_remove(fib_index,
312                                  pfx,
313                                  FIB_SOURCE_INTERFACE);
314
315   if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
316     {
317       fib_table_entry_update_one_path (fib_index, pfx,
318                                        FIB_SOURCE_INTERFACE,
319                                        FIB_ENTRY_FLAG_NONE,
320                                        DPO_PROTO_IP4,
321                                        /* No next-hop address */
322                                        &ADJ_BCAST_ADDR,
323                                        sw_if_index,
324                                        // invalid FIB index
325                                        ~0,
326                                        1,
327                                        // no out-label stack
328                                        NULL,
329                                        FIB_ROUTE_PATH_FLAG_NONE);
330     }
331   else
332     {
333         fib_table_entry_special_add(fib_index,
334                                     pfx,
335                                     FIB_SOURCE_INTERFACE,
336                                     (FIB_ENTRY_FLAG_DROP |
337                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
338     }
339 }
340
341 static void
342 ip4_add_interface_prefix_routes (ip4_main_t *im,
343                                  u32 sw_if_index,
344                                  u32 fib_index,
345                                  ip_interface_address_t * a)
346 {
347   ip_lookup_main_t *lm = &im->lookup_main;
348   ip_interface_prefix_t *if_prefix;
349   ip4_address_t *address = ip_interface_address_get_address (lm, a);
350
351   ip_interface_prefix_key_t key = {
352     .prefix = {
353       .fp_len = a->address_length,
354       .fp_proto = FIB_PROTOCOL_IP4,
355       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[a->address_length],
356     },
357     .sw_if_index = sw_if_index,
358   };
359
360   fib_prefix_t pfx_special = {
361     .fp_proto = FIB_PROTOCOL_IP4,
362   };
363
364   /* If prefix already set on interface, just increment ref count & return */
365   if_prefix = ip_get_interface_prefix (lm, &key);
366   if (if_prefix)
367     {
368       if_prefix->ref_count += 1;
369       return;
370     }
371
372   /* New prefix - allocate a pool entry, initialize it, add to the hash */
373   pool_get (lm->if_prefix_pool, if_prefix);
374   if_prefix->ref_count = 1;
375   if_prefix->src_ia_index = a - lm->if_address_pool;
376   clib_memcpy (&if_prefix->key, &key, sizeof (key));
377   mhash_set (&lm->prefix_to_if_prefix_index, &key,
378              if_prefix - lm->if_prefix_pool, 0 /* old value */);
379
380   pfx_special.fp_len = a->address_length;
381   pfx_special.fp_addr.ip4.as_u32 = address->as_u32;
382
383   /* set the glean route for the prefix */
384   fib_table_entry_update_one_path (fib_index, &pfx_special,
385                                    FIB_SOURCE_INTERFACE,
386                                    (FIB_ENTRY_FLAG_CONNECTED |
387                                     FIB_ENTRY_FLAG_ATTACHED),
388                                    DPO_PROTO_IP4,
389                                    /* No next-hop address */
390                                    NULL,
391                                    sw_if_index,
392                                    /* invalid FIB index */
393                                    ~0,
394                                    1,
395                                    /* no out-label stack */
396                                    NULL,
397                                    FIB_ROUTE_PATH_FLAG_NONE);
398
399   /* length <= 30 - add glean, drop first address, maybe drop bcast address */
400   if (a->address_length <= 30)
401     {
402       /* set a drop route for the base address of the prefix */
403       pfx_special.fp_len = 32;
404       pfx_special.fp_addr.ip4.as_u32 =
405         address->as_u32 & im->fib_masks[a->address_length];
406
407       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
408         fib_table_entry_special_add (fib_index, &pfx_special,
409                                      FIB_SOURCE_INTERFACE,
410                                      (FIB_ENTRY_FLAG_DROP |
411                                       FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
412
413       /* set a route for the broadcast address of the prefix */
414       pfx_special.fp_len = 32;
415       pfx_special.fp_addr.ip4.as_u32 =
416         address->as_u32 | ~im->fib_masks[a->address_length];
417       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
418         ip4_add_subnet_bcast_route (fib_index, &pfx_special, sw_if_index);
419
420
421     }
422   /* length == 31 - add an attached route for the other address */
423   else if (a->address_length == 31)
424     {
425       pfx_special.fp_len = 32;
426       pfx_special.fp_addr.ip4.as_u32 =
427         address->as_u32 ^ clib_host_to_net_u32(1);
428
429       fib_table_entry_update_one_path (fib_index, &pfx_special,
430                                        FIB_SOURCE_INTERFACE,
431                                        (FIB_ENTRY_FLAG_ATTACHED),
432                                        DPO_PROTO_IP4,
433                                        &pfx_special.fp_addr,
434                                        sw_if_index,
435                                        /* invalid FIB index */
436                                        ~0,
437                                        1,
438                                        NULL,
439                                        FIB_ROUTE_PATH_FLAG_NONE);
440     }
441 }
442
443 static void
444 ip4_add_interface_routes (u32 sw_if_index,
445                           ip4_main_t * im, u32 fib_index,
446                           ip_interface_address_t * a)
447 {
448   ip_lookup_main_t *lm = &im->lookup_main;
449   ip4_address_t *address = ip_interface_address_get_address (lm, a);
450   fib_prefix_t pfx = {
451     .fp_len = 32,
452     .fp_proto = FIB_PROTOCOL_IP4,
453     .fp_addr.ip4 = *address,
454   };
455
456   /* set special routes for the prefix if needed */
457   ip4_add_interface_prefix_routes (im, sw_if_index, fib_index, a);
458
459   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
460     {
461       u32 classify_table_index =
462         lm->classify_table_index_by_sw_if_index[sw_if_index];
463       if (classify_table_index != (u32) ~ 0)
464         {
465           dpo_id_t dpo = DPO_INVALID;
466
467           dpo_set (&dpo,
468                    DPO_CLASSIFY,
469                    DPO_PROTO_IP4,
470                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
471
472           fib_table_entry_special_dpo_add (fib_index,
473                                            &pfx,
474                                            FIB_SOURCE_CLASSIFY,
475                                            FIB_ENTRY_FLAG_NONE, &dpo);
476           dpo_reset (&dpo);
477         }
478     }
479
480   fib_table_entry_update_one_path (fib_index, &pfx,
481                                    FIB_SOURCE_INTERFACE,
482                                    (FIB_ENTRY_FLAG_CONNECTED |
483                                     FIB_ENTRY_FLAG_LOCAL),
484                                    DPO_PROTO_IP4,
485                                    &pfx.fp_addr,
486                                    sw_if_index,
487                                    // invalid FIB index
488                                    ~0,
489                                    1, NULL,
490                                    FIB_ROUTE_PATH_FLAG_NONE);
491 }
492
493 static void
494 ip4_del_interface_prefix_routes (ip4_main_t * im,
495                                  u32 sw_if_index,
496                                  u32 fib_index,
497                                  ip4_address_t * address,
498                                  u32 address_length)
499 {
500   ip_lookup_main_t *lm = &im->lookup_main;
501   ip_interface_prefix_t *if_prefix;
502
503   ip_interface_prefix_key_t key = {
504     .prefix = {
505       .fp_len = address_length,
506       .fp_proto = FIB_PROTOCOL_IP4,
507       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[address_length],
508     },
509     .sw_if_index = sw_if_index,
510   };
511
512   fib_prefix_t pfx_special = {
513     .fp_len = 32,
514     .fp_proto = FIB_PROTOCOL_IP4,
515   };
516
517   if_prefix = ip_get_interface_prefix (lm, &key);
518   if (!if_prefix)
519     {
520       clib_warning ("Prefix not found while deleting %U",
521                     format_ip4_address_and_length, address, address_length);
522       return;
523     }
524
525   if_prefix->ref_count -= 1;
526
527   /*
528    * Routes need to be adjusted if deleting last intf addr in prefix
529    *
530    * We're done now otherwise
531    */
532   if (if_prefix->ref_count > 0)
533     return;
534
535   /* length <= 30, delete glean route, first address, last address */
536   if (address_length <= 30)
537     {
538       /* Less work to do in FIB if we remove the covered /32s first */
539
540       /* first address in prefix */
541       pfx_special.fp_addr.ip4.as_u32 =
542         address->as_u32 & im->fib_masks[address_length];
543       pfx_special.fp_len = 32;
544
545       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
546         fib_table_entry_special_remove (fib_index,
547                                         &pfx_special,
548                                         FIB_SOURCE_INTERFACE);
549
550       /* prefix broadcast address */
551       pfx_special.fp_addr.ip4.as_u32 =
552         address->as_u32 | ~im->fib_masks[address_length];
553       pfx_special.fp_len = 32;
554
555       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
556         fib_table_entry_special_remove (fib_index,
557                                         &pfx_special,
558                                         FIB_SOURCE_INTERFACE);
559     }
560   else if (address_length == 31)
561     {
562       /* length == 31, delete attached route for the other address */
563       pfx_special.fp_addr.ip4.as_u32 =
564         address->as_u32 ^ clib_host_to_net_u32(1);
565
566       fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
567     }
568
569   /* remove glean route for prefix */
570   pfx_special.fp_addr.ip4 = *address;
571   pfx_special.fp_len = address_length;
572   fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
573
574   mhash_unset (&lm->prefix_to_if_prefix_index, &key, 0 /* old_value */);
575   pool_put (lm->if_prefix_pool, if_prefix);
576 }
577
578 static void
579 ip4_del_interface_routes (u32 sw_if_index,
580                           ip4_main_t * im,
581                           u32 fib_index,
582                           ip4_address_t * address, u32 address_length)
583 {
584   fib_prefix_t pfx = {
585     .fp_len = 32,
586     .fp_proto = FIB_PROTOCOL_IP4,
587     .fp_addr.ip4 = *address,
588   };
589
590   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
591
592   ip4_del_interface_prefix_routes (im, sw_if_index, fib_index,
593                                    address, address_length);
594 }
595
596 #ifndef CLIB_MARCH_VARIANT
597 void
598 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
599 {
600   ip4_main_t *im = &ip4_main;
601   vnet_main_t *vnm = vnet_get_main ();
602   vnet_hw_interface_t *hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
603
604   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
605
606   /*
607    * enable/disable only on the 1<->0 transition
608    */
609   if (is_enable)
610     {
611       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
612         return;
613     }
614   else
615     {
616       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
617       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
618         return;
619     }
620   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
621                                !is_enable, 0, 0);
622
623
624   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
625                                sw_if_index, !is_enable, 0, 0);
626
627   if (is_enable)
628     hi->l3_if_count++;
629   else if (hi->l3_if_count)
630     hi->l3_if_count--;
631
632   {
633     ip4_enable_disable_interface_callback_t *cb;
634     vec_foreach (cb, im->enable_disable_interface_callbacks)
635       cb->function (im, cb->function_opaque, sw_if_index, is_enable);
636   }
637 }
638
639 static clib_error_t *
640 ip4_add_del_interface_address_internal (vlib_main_t * vm,
641                                         u32 sw_if_index,
642                                         ip4_address_t * address,
643                                         u32 address_length, u32 is_del)
644 {
645   vnet_main_t *vnm = vnet_get_main ();
646   ip4_main_t *im = &ip4_main;
647   ip_lookup_main_t *lm = &im->lookup_main;
648   clib_error_t *error = 0;
649   u32 if_address_index;
650   ip4_address_fib_t ip4_af, *addr_fib = 0;
651
652   error = vnet_sw_interface_supports_addressing (vnm, sw_if_index);
653   if (error)
654     {
655       vnm->api_errno = VNET_API_ERROR_UNSUPPORTED;
656       return error;
657     }
658
659   ip4_addr_fib_init (&ip4_af, address,
660                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
661   vec_add1 (addr_fib, ip4_af);
662
663   /*
664    * there is no support for adj-fib handling in the presence of overlapping
665    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
666    * most routers do.
667    */
668   if (!is_del)
669     {
670       /* When adding an address check that it does not conflict
671          with an existing address on any interface in this table. */
672       ip_interface_address_t *ia;
673       vnet_sw_interface_t *sif;
674
675       pool_foreach (sif, vnm->interface_main.sw_interfaces)
676        {
677           if (im->fib_index_by_sw_if_index[sw_if_index] ==
678               im->fib_index_by_sw_if_index[sif->sw_if_index])
679             {
680               foreach_ip_interface_address
681                 (&im->lookup_main, ia, sif->sw_if_index,
682                  0 /* honor unnumbered */ ,
683                  ({
684                    ip4_address_t * x =
685                      ip_interface_address_get_address
686                      (&im->lookup_main, ia);
687
688                    if (ip4_destination_matches_route
689                        (im, address, x, ia->address_length) ||
690                        ip4_destination_matches_route (im,
691                                                       x,
692                                                       address,
693                                                       address_length))
694                      {
695                        /* an intf may have >1 addr from the same prefix */
696                        if ((sw_if_index == sif->sw_if_index) &&
697                            (ia->address_length == address_length) &&
698                            (x->as_u32 != address->as_u32))
699                          continue;
700
701                        if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE)
702                          /* if the address we're comparing against is stale
703                           * then the CP has not added this one back yet, maybe
704                           * it never will, so we have to assume it won't and
705                           * ignore it. if it does add it back, then it will fail
706                           * because this one is now present */
707                          continue;
708
709                        /* error if the length or intf was different */
710                        vnm->api_errno = VNET_API_ERROR_ADDRESS_IN_USE;
711
712                        error = clib_error_create
713                          ("failed to add %U on %U which conflicts with %U for interface %U",
714                           format_ip4_address_and_length, address,
715                           address_length,
716                           format_vnet_sw_if_index_name, vnm,
717                           sw_if_index,
718                           format_ip4_address_and_length, x,
719                           ia->address_length,
720                           format_vnet_sw_if_index_name, vnm,
721                           sif->sw_if_index);
722                        goto done;
723                      }
724                  }));
725             }
726       }
727     }
728
729   if_address_index = ip_interface_address_find (lm, addr_fib, address_length);
730
731   if (is_del)
732     {
733       if (~0 == if_address_index)
734         {
735           vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_FOUND_FOR_INTERFACE;
736           error = clib_error_create ("%U not found for interface %U",
737                                      lm->format_address_and_length,
738                                      addr_fib, address_length,
739                                      format_vnet_sw_if_index_name, vnm,
740                                      sw_if_index);
741           goto done;
742         }
743
744       error = ip_interface_address_del (lm, vnm, if_address_index, addr_fib,
745                                         address_length, sw_if_index);
746       if (error)
747         goto done;
748     }
749   else
750     {
751       if (~0 != if_address_index)
752         {
753           ip_interface_address_t *ia;
754
755           ia = pool_elt_at_index (lm->if_address_pool, if_address_index);
756
757           if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE)
758             {
759               if (ia->sw_if_index == sw_if_index)
760                 {
761                   /* re-adding an address during the replace action.
762                    * consdier this the update. clear the flag and
763                    * we're done */
764                   ia->flags &= ~IP_INTERFACE_ADDRESS_FLAG_STALE;
765                   goto done;
766                 }
767               else
768                 {
769                   /* The prefix is moving from one interface to another.
770                    * delete the stale and add the new */
771                   ip4_add_del_interface_address_internal (vm,
772                                                           ia->sw_if_index,
773                                                           address,
774                                                           address_length, 1);
775                   ia = NULL;
776                   error = ip_interface_address_add (lm, sw_if_index,
777                                                     addr_fib, address_length,
778                                                     &if_address_index);
779                 }
780             }
781           else
782             {
783               vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
784               error = clib_error_create
785                 ("Prefix %U already found on interface %U",
786                  lm->format_address_and_length, addr_fib, address_length,
787                  format_vnet_sw_if_index_name, vnm, ia->sw_if_index);
788             }
789         }
790       else
791         error = ip_interface_address_add (lm, sw_if_index,
792                                           addr_fib, address_length,
793                                           &if_address_index);
794     }
795
796   if (error)
797     goto done;
798
799   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
800   ip4_mfib_interface_enable_disable (sw_if_index, !is_del);
801
802   /* intf addr routes are added/deleted on admin up/down */
803   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index))
804     {
805       if (is_del)
806         ip4_del_interface_routes (sw_if_index,
807                                   im, ip4_af.fib_index, address,
808                                   address_length);
809       else
810         ip4_add_interface_routes (sw_if_index,
811                                   im, ip4_af.fib_index,
812                                   pool_elt_at_index
813                                   (lm->if_address_pool, if_address_index));
814     }
815
816   ip4_add_del_interface_address_callback_t *cb;
817   vec_foreach (cb, im->add_del_interface_address_callbacks)
818     cb->function (im, cb->function_opaque, sw_if_index,
819                   address, address_length, if_address_index, is_del);
820
821 done:
822   vec_free (addr_fib);
823   return error;
824 }
825
826 clib_error_t *
827 ip4_add_del_interface_address (vlib_main_t * vm,
828                                u32 sw_if_index,
829                                ip4_address_t * address,
830                                u32 address_length, u32 is_del)
831 {
832   return ip4_add_del_interface_address_internal
833     (vm, sw_if_index, address, address_length, is_del);
834 }
835
836 void
837 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
838 {
839   ip_interface_address_t *ia;
840   ip4_main_t *im;
841
842   im = &ip4_main;
843
844   /*
845    * when directed broadcast is enabled, the subnet braodcast route will forward
846    * packets using an adjacency with a broadcast MAC. otherwise it drops
847    */
848   foreach_ip_interface_address(&im->lookup_main, ia,
849                                sw_if_index, 0,
850      ({
851        if (ia->address_length <= 30)
852          {
853            ip4_address_t *ipa;
854
855            ipa = ip_interface_address_get_address (&im->lookup_main, ia);
856
857            fib_prefix_t pfx = {
858              .fp_len = 32,
859              .fp_proto = FIB_PROTOCOL_IP4,
860              .fp_addr = {
861                .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
862              },
863            };
864
865            ip4_add_subnet_bcast_route
866              (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
867                                                   sw_if_index),
868               &pfx, sw_if_index);
869          }
870      }));
871 }
872 #endif
873
874 static clib_error_t *
875 ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
876 {
877   ip4_main_t *im = &ip4_main;
878   ip_interface_address_t *ia;
879   ip4_address_t *a;
880   u32 is_admin_up, fib_index;
881
882   vec_validate_init_empty (im->
883                            lookup_main.if_address_pool_index_by_sw_if_index,
884                            sw_if_index, ~0);
885
886   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
887
888   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
889
890   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
891                                 0 /* honor unnumbered */,
892   ({
893     a = ip_interface_address_get_address (&im->lookup_main, ia);
894     if (is_admin_up)
895       ip4_add_interface_routes (sw_if_index,
896                                 im, fib_index,
897                                 ia);
898     else
899       ip4_del_interface_routes (sw_if_index,
900                                 im, fib_index,
901                                 a, ia->address_length);
902   }));
903
904   return 0;
905 }
906
907 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
908
909 /* Built-in ip4 unicast rx feature path definition */
910 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
911 {
912   .arc_name = "ip4-unicast",
913   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
914   .last_in_arc = "ip4-lookup",
915   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
916 };
917
918 VNET_FEATURE_INIT (ip4_flow_classify, static) =
919 {
920   .arc_name = "ip4-unicast",
921   .node_name = "ip4-flow-classify",
922   .runs_before = VNET_FEATURES ("ip4-inacl"),
923 };
924
925 VNET_FEATURE_INIT (ip4_inacl, static) =
926 {
927   .arc_name = "ip4-unicast",
928   .node_name = "ip4-inacl",
929   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
930 };
931
932 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
933 {
934   .arc_name = "ip4-unicast",
935   .node_name = "ip4-source-and-port-range-check-rx",
936   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
937 };
938
939 VNET_FEATURE_INIT (ip4_policer_classify, static) =
940 {
941   .arc_name = "ip4-unicast",
942   .node_name = "ip4-policer-classify",
943   .runs_before = VNET_FEATURES ("ipsec4-input-feature"),
944 };
945
946 VNET_FEATURE_INIT (ip4_ipsec, static) =
947 {
948   .arc_name = "ip4-unicast",
949   .node_name = "ipsec4-input-feature",
950   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
951 };
952
953 VNET_FEATURE_INIT (ip4_vpath, static) =
954 {
955   .arc_name = "ip4-unicast",
956   .node_name = "vpath-input-ip4",
957   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
958 };
959
960 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
961 {
962   .arc_name = "ip4-unicast",
963   .node_name = "ip4-vxlan-bypass",
964   .runs_before = VNET_FEATURES ("ip4-lookup"),
965 };
966
967 VNET_FEATURE_INIT (ip4_not_enabled, static) =
968 {
969   .arc_name = "ip4-unicast",
970   .node_name = "ip4-not-enabled",
971   .runs_before = VNET_FEATURES ("ip4-lookup"),
972 };
973
974 VNET_FEATURE_INIT (ip4_lookup, static) =
975 {
976   .arc_name = "ip4-unicast",
977   .node_name = "ip4-lookup",
978   .runs_before = 0,     /* not before any other features */
979 };
980
981 /* Built-in ip4 multicast rx feature path definition */
982 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
983 {
984   .arc_name = "ip4-multicast",
985   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
986   .last_in_arc = "ip4-mfib-forward-lookup",
987   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
988 };
989
990 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
991 {
992   .arc_name = "ip4-multicast",
993   .node_name = "vpath-input-ip4",
994   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
995 };
996
997 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
998 {
999   .arc_name = "ip4-multicast",
1000   .node_name = "ip4-not-enabled",
1001   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1002 };
1003
1004 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
1005 {
1006   .arc_name = "ip4-multicast",
1007   .node_name = "ip4-mfib-forward-lookup",
1008   .runs_before = 0,     /* last feature */
1009 };
1010
1011 /* Source and port-range check ip4 tx feature path definition */
1012 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1013 {
1014   .arc_name = "ip4-output",
1015   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
1016   .last_in_arc = "interface-output",
1017   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1018 };
1019
1020 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1021 {
1022   .arc_name = "ip4-output",
1023   .node_name = "ip4-source-and-port-range-check-tx",
1024   .runs_before = VNET_FEATURES ("ip4-outacl"),
1025 };
1026
1027 VNET_FEATURE_INIT (ip4_outacl, static) =
1028 {
1029   .arc_name = "ip4-output",
1030   .node_name = "ip4-outacl",
1031   .runs_before = VNET_FEATURES ("ipsec4-output-feature"),
1032 };
1033
1034 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1035 {
1036   .arc_name = "ip4-output",
1037   .node_name = "ipsec4-output-feature",
1038   .runs_before = VNET_FEATURES ("interface-output"),
1039 };
1040
1041 /* Built-in ip4 tx feature path definition */
1042 VNET_FEATURE_INIT (ip4_interface_output, static) =
1043 {
1044   .arc_name = "ip4-output",
1045   .node_name = "interface-output",
1046   .runs_before = 0,     /* not before any other features */
1047 };
1048
1049 static clib_error_t *
1050 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1051 {
1052   ip4_main_t *im = &ip4_main;
1053
1054   vec_validate_init_empty (im->fib_index_by_sw_if_index, sw_if_index, ~0);
1055   vec_validate_init_empty (im->mfib_index_by_sw_if_index, sw_if_index, ~0);
1056
1057   if (is_add)
1058     {
1059       /* Fill in lookup tables with default table (0). */
1060       im->fib_index_by_sw_if_index[sw_if_index] = 0;
1061       im->mfib_index_by_sw_if_index[sw_if_index] = 0;
1062     }
1063   else
1064     {
1065       ip4_main_t *im4 = &ip4_main;
1066       ip_lookup_main_t *lm4 = &im4->lookup_main;
1067       ip_interface_address_t *ia = 0;
1068       ip4_address_t *address;
1069       vlib_main_t *vm = vlib_get_main ();
1070
1071       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
1072       foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
1073       ({
1074         address = ip_interface_address_get_address (lm4, ia);
1075         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
1076       }));
1077       ip4_mfib_interface_enable_disable (sw_if_index, 0);
1078
1079       if (0 != im4->fib_index_by_sw_if_index[sw_if_index])
1080         fib_table_bind (FIB_PROTOCOL_IP4, sw_if_index, 0);
1081       if (0 != im4->mfib_index_by_sw_if_index[sw_if_index])
1082         mfib_table_bind (FIB_PROTOCOL_IP4, sw_if_index, 0);
1083
1084       /* Erase the lookup tables just in case */
1085       im4->fib_index_by_sw_if_index[sw_if_index] = ~0;
1086       im4->mfib_index_by_sw_if_index[sw_if_index] = ~0;
1087     }
1088
1089   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
1090                                is_add, 0, 0);
1091
1092   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
1093                                sw_if_index, is_add, 0, 0);
1094
1095   return /* no error */ 0;
1096 }
1097
1098 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1099
1100 /* Global IP4 main. */
1101 #ifndef CLIB_MARCH_VARIANT
1102 ip4_main_t ip4_main;
1103 #endif /* CLIB_MARCH_VARIANT */
1104
1105 static clib_error_t *
1106 ip4_lookup_init (vlib_main_t * vm)
1107 {
1108   ip4_main_t *im = &ip4_main;
1109   clib_error_t *error;
1110   uword i;
1111
1112   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1113     return error;
1114   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
1115     return (error);
1116   if ((error = vlib_call_init_function (vm, fib_module_init)))
1117     return error;
1118   if ((error = vlib_call_init_function (vm, mfib_module_init)))
1119     return error;
1120
1121   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1122     {
1123       u32 m;
1124
1125       if (i < 32)
1126         m = pow2_mask (i) << (32 - i);
1127       else
1128         m = ~0;
1129       im->fib_masks[i] = clib_host_to_net_u32 (m);
1130     }
1131
1132   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1133
1134   /* Create FIB with index 0 and table id of 0. */
1135   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1136                                      FIB_SOURCE_DEFAULT_ROUTE);
1137   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1138                                       MFIB_SOURCE_DEFAULT_ROUTE);
1139
1140   {
1141     pg_node_t *pn;
1142     pn = pg_get_node (ip4_lookup_node.index);
1143     pn->unformat_edit = unformat_pg_ip4_header;
1144   }
1145
1146   {
1147     ethernet_arp_header_t h;
1148
1149     clib_memset (&h, 0, sizeof (h));
1150
1151 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1152 #define _8(f,v) h.f = v;
1153     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1154     _16 (l3_type, ETHERNET_TYPE_IP4);
1155     _8 (n_l2_address_bytes, 6);
1156     _8 (n_l3_address_bytes, 4);
1157     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1158 #undef _16
1159 #undef _8
1160
1161     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1162                                /* data */ &h,
1163                                sizeof (h),
1164                                /* alloc chunk size */ 8,
1165                                "ip4 arp");
1166   }
1167
1168   return error;
1169 }
1170
1171 VLIB_INIT_FUNCTION (ip4_lookup_init);
1172
1173 typedef struct
1174 {
1175   /* Adjacency taken. */
1176   u32 dpo_index;
1177   u32 flow_hash;
1178   u32 fib_index;
1179
1180   /* Packet data, possibly *after* rewrite. */
1181   u8 packet_data[64 - 1 * sizeof (u32)];
1182 }
1183 ip4_forward_next_trace_t;
1184
1185 #ifndef CLIB_MARCH_VARIANT
1186 u8 *
1187 format_ip4_forward_next_trace (u8 * s, va_list * args)
1188 {
1189   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1190   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1191   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1192   u32 indent = format_get_indent (s);
1193   s = format (s, "%U%U",
1194               format_white_space, indent,
1195               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1196   return s;
1197 }
1198 #endif
1199
1200 static u8 *
1201 format_ip4_lookup_trace (u8 * s, va_list * args)
1202 {
1203   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1204   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1205   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1206   u32 indent = format_get_indent (s);
1207
1208   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1209               t->fib_index, t->dpo_index, t->flow_hash);
1210   s = format (s, "\n%U%U",
1211               format_white_space, indent,
1212               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1213   return s;
1214 }
1215
1216 static u8 *
1217 format_ip4_rewrite_trace (u8 * s, va_list * args)
1218 {
1219   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1220   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1221   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1222   u32 indent = format_get_indent (s);
1223
1224   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1225               t->fib_index, t->dpo_index, format_ip_adjacency,
1226               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1227   s = format (s, "\n%U%U",
1228               format_white_space, indent,
1229               format_ip_adjacency_packet_data,
1230               t->packet_data, sizeof (t->packet_data));
1231   return s;
1232 }
1233
1234 #ifndef CLIB_MARCH_VARIANT
1235 /* Common trace function for all ip4-forward next nodes. */
1236 void
1237 ip4_forward_next_trace (vlib_main_t * vm,
1238                         vlib_node_runtime_t * node,
1239                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1240 {
1241   u32 *from, n_left;
1242   ip4_main_t *im = &ip4_main;
1243
1244   n_left = frame->n_vectors;
1245   from = vlib_frame_vector_args (frame);
1246
1247   while (n_left >= 4)
1248     {
1249       u32 bi0, bi1;
1250       vlib_buffer_t *b0, *b1;
1251       ip4_forward_next_trace_t *t0, *t1;
1252
1253       /* Prefetch next iteration. */
1254       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1255       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1256
1257       bi0 = from[0];
1258       bi1 = from[1];
1259
1260       b0 = vlib_get_buffer (vm, bi0);
1261       b1 = vlib_get_buffer (vm, bi1);
1262
1263       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1264         {
1265           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1266           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1267           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1268           t0->fib_index =
1269             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1270              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1271             vec_elt (im->fib_index_by_sw_if_index,
1272                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1273
1274           clib_memcpy_fast (t0->packet_data,
1275                             vlib_buffer_get_current (b0),
1276                             sizeof (t0->packet_data));
1277         }
1278       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1279         {
1280           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1281           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1282           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1283           t1->fib_index =
1284             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1285              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1286             vec_elt (im->fib_index_by_sw_if_index,
1287                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1288           clib_memcpy_fast (t1->packet_data, vlib_buffer_get_current (b1),
1289                             sizeof (t1->packet_data));
1290         }
1291       from += 2;
1292       n_left -= 2;
1293     }
1294
1295   while (n_left >= 1)
1296     {
1297       u32 bi0;
1298       vlib_buffer_t *b0;
1299       ip4_forward_next_trace_t *t0;
1300
1301       bi0 = from[0];
1302
1303       b0 = vlib_get_buffer (vm, bi0);
1304
1305       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1306         {
1307           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1308           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1309           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1310           t0->fib_index =
1311             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1312              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1313             vec_elt (im->fib_index_by_sw_if_index,
1314                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1315           clib_memcpy_fast (t0->packet_data, vlib_buffer_get_current (b0),
1316                             sizeof (t0->packet_data));
1317         }
1318       from += 1;
1319       n_left -= 1;
1320     }
1321 }
1322
1323 /* Compute TCP/UDP/ICMP4 checksum in software. */
1324 u16
1325 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1326                               ip4_header_t * ip0)
1327 {
1328   ip_csum_t sum0;
1329   u32 ip_header_length, payload_length_host_byte_order;
1330
1331   /* Initialize checksum with ip header. */
1332   ip_header_length = ip4_header_bytes (ip0);
1333   payload_length_host_byte_order =
1334     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1335   sum0 =
1336     clib_host_to_net_u32 (payload_length_host_byte_order +
1337                           (ip0->protocol << 16));
1338
1339   if (BITS (uword) == 32)
1340     {
1341       sum0 =
1342         ip_csum_with_carry (sum0,
1343                             clib_mem_unaligned (&ip0->src_address, u32));
1344       sum0 =
1345         ip_csum_with_carry (sum0,
1346                             clib_mem_unaligned (&ip0->dst_address, u32));
1347     }
1348   else
1349     sum0 =
1350       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1351
1352   return ip_calculate_l4_checksum (vm, p0, sum0,
1353                                    payload_length_host_byte_order, (u8 *) ip0,
1354                                    ip_header_length, NULL);
1355 }
1356
1357 u32
1358 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1359 {
1360   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1361   udp_header_t *udp0;
1362   u16 sum16;
1363
1364   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1365           || ip0->protocol == IP_PROTOCOL_UDP);
1366
1367   udp0 = (void *) (ip0 + 1);
1368   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1369     {
1370       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1371                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1372       return p0->flags;
1373     }
1374
1375   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1376
1377   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1378                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1379
1380   return p0->flags;
1381 }
1382 #endif
1383
1384 VNET_FEATURE_ARC_INIT (ip4_local) = {
1385   .arc_name = "ip4-local",
1386   .start_nodes = VNET_FEATURES ("ip4-local", "ip4-receive"),
1387   .last_in_arc = "ip4-local-end-of-arc",
1388 };
1389
1390 static inline void
1391 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1392                             ip4_header_t * ip, u8 is_udp, u8 * error,
1393                             u8 * good_tcp_udp)
1394 {
1395   u32 flags0;
1396   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1397   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1398   if (is_udp)
1399     {
1400       udp_header_t *udp;
1401       u32 ip_len, udp_len;
1402       i32 len_diff;
1403       udp = ip4_next_header (ip);
1404       /* Verify UDP length. */
1405       ip_len = clib_net_to_host_u16 (ip->length);
1406       udp_len = clib_net_to_host_u16 (udp->length);
1407
1408       len_diff = ip_len - udp_len;
1409       *good_tcp_udp &= len_diff >= 0;
1410       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1411     }
1412 }
1413
1414 #define ip4_local_csum_is_offloaded(_b)                                       \
1415   ((_b->flags & VNET_BUFFER_F_OFFLOAD) &&                                     \
1416    (vnet_buffer (_b)->oflags &                                                \
1417     (VNET_BUFFER_OFFLOAD_F_TCP_CKSUM | VNET_BUFFER_OFFLOAD_F_UDP_CKSUM)))
1418
1419 #define ip4_local_need_csum_check(is_tcp_udp, _b)                       \
1420     (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED     \
1421         || ip4_local_csum_is_offloaded (_b)))
1422
1423 #define ip4_local_csum_is_valid(_b)                                     \
1424     (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT                      \
1425         || (ip4_local_csum_is_offloaded (_b))) != 0
1426
1427 static inline void
1428 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1429                          ip4_header_t * ih, u8 * error)
1430 {
1431   u8 is_udp, is_tcp_udp, good_tcp_udp;
1432
1433   is_udp = ih->protocol == IP_PROTOCOL_UDP;
1434   is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1435
1436   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1437     ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1438   else
1439     good_tcp_udp = ip4_local_csum_is_valid (b);
1440
1441   ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1442   *error = (is_tcp_udp && !good_tcp_udp
1443             ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1444 }
1445
1446 static inline void
1447 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1448                             ip4_header_t ** ih, u8 * error)
1449 {
1450   u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1451
1452   is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1453   is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1454
1455   is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1456   is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1457
1458   good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1459   good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1460
1461   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1462                      || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1463     {
1464       if (is_tcp_udp[0] && !ip4_local_csum_is_offloaded (b[0]))
1465         ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1466                                     &good_tcp_udp[0]);
1467       if (is_tcp_udp[1] && !ip4_local_csum_is_offloaded (b[1]))
1468         ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1469                                     &good_tcp_udp[1]);
1470     }
1471
1472   error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1473               IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1474   error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1475               IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1476 }
1477
1478 static inline void
1479 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1480                               vlib_buffer_t * b, u16 * next, u8 error,
1481                               u8 head_of_feature_arc)
1482 {
1483   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1484   u32 next_index;
1485
1486   *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1487   b->error = error ? error_node->errors[error] : 0;
1488   if (head_of_feature_arc)
1489     {
1490       next_index = *next;
1491       if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1492         {
1493           vnet_feature_arc_start (
1494             arc_index, vnet_buffer (b)->ip.rx_sw_if_index, &next_index, b);
1495           *next = next_index;
1496         }
1497     }
1498 }
1499
1500 typedef struct
1501 {
1502   /* The src and fib-index together determine if packet n is the same as n-1 */
1503   ip4_address_t src;
1504   u32 fib_index;
1505   u32 lbi;
1506   u8 error;
1507   u8 first;
1508 } ip4_local_last_check_t;
1509
1510 static inline void
1511 ip4_local_check_src (vlib_buffer_t *b, ip4_header_t *ip0,
1512                      ip4_local_last_check_t *last_check, u8 *error0,
1513                      int is_receive_dpo)
1514 {
1515   const dpo_id_t *dpo0;
1516   load_balance_t *lb0;
1517   u32 lbi0;
1518
1519   vnet_buffer (b)->ip.fib_index =
1520     vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1521     vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1522
1523   vnet_buffer (b)->ip.rx_sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
1524   if (is_receive_dpo)
1525     {
1526       receive_dpo_t *rd;
1527       rd = receive_dpo_get (vnet_buffer (b)->ip.adj_index[VLIB_TX]);
1528       if (rd->rd_sw_if_index != ~0)
1529         vnet_buffer (b)->ip.rx_sw_if_index = rd->rd_sw_if_index;
1530     }
1531
1532   /*
1533    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1534    *  adjacency for the destination address (the local interface address).
1535    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1536    *  adjacency for the source address (the remote sender's address)
1537    */
1538   if (PREDICT_TRUE ((last_check->src.as_u32 != ip0->src_address.as_u32)) ||
1539       (last_check->fib_index != vnet_buffer (b)->ip.fib_index) ||
1540       last_check->first)
1541     {
1542       lbi0 = ip4_fib_forwarding_lookup (vnet_buffer (b)->ip.fib_index,
1543                                         &ip0->src_address);
1544
1545       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1546         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1547       vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1548
1549       lb0 = load_balance_get (lbi0);
1550       dpo0 = load_balance_get_bucket_i (lb0, 0);
1551
1552       /*
1553        * Must have a route to source otherwise we drop the packet.
1554        * ip4 broadcasts are accepted, e.g. to make dhcp client work
1555        *
1556        * The checks are:
1557        *  - the source is a recieve => it's from us => bogus, do this
1558        *    first since it sets a different error code.
1559        *  - uRPF check for any route to source - accept if passes.
1560        *  - allow packets destined to the broadcast address from unknown sources
1561        */
1562
1563       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1564                   && dpo0->dpoi_type == DPO_RECEIVE) ?
1565                  IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1566       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1567                   && !fib_urpf_check_size (lb0->lb_urpf)
1568                   && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1569                  IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1570
1571       last_check->src.as_u32 = ip0->src_address.as_u32;
1572       last_check->lbi = lbi0;
1573       last_check->error = *error0;
1574       last_check->first = 0;
1575       last_check->fib_index = vnet_buffer (b)->ip.fib_index;
1576     }
1577   else
1578     {
1579       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1580         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1581       vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1582       *error0 = last_check->error;
1583     }
1584 }
1585
1586 static inline void
1587 ip4_local_check_src_x2 (vlib_buffer_t **b, ip4_header_t **ip,
1588                         ip4_local_last_check_t *last_check, u8 *error,
1589                         int is_receive_dpo)
1590 {
1591   const dpo_id_t *dpo[2];
1592   load_balance_t *lb[2];
1593   u32 not_last_hit;
1594   u32 lbi[2];
1595
1596   not_last_hit = last_check->first;
1597   not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1598   not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1599
1600   vnet_buffer (b[0])->ip.fib_index =
1601     vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1602     vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1603     vnet_buffer (b[0])->ip.fib_index;
1604
1605   vnet_buffer (b[1])->ip.fib_index =
1606     vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1607     vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1608     vnet_buffer (b[1])->ip.fib_index;
1609
1610   not_last_hit |= vnet_buffer (b[0])->ip.fib_index ^ last_check->fib_index;
1611   not_last_hit |= vnet_buffer (b[1])->ip.fib_index ^ last_check->fib_index;
1612
1613   vnet_buffer (b[0])->ip.rx_sw_if_index =
1614     vnet_buffer (b[0])->sw_if_index[VLIB_RX];
1615   vnet_buffer (b[1])->ip.rx_sw_if_index =
1616     vnet_buffer (b[1])->sw_if_index[VLIB_RX];
1617   if (is_receive_dpo)
1618     {
1619       const receive_dpo_t *rd0, *rd1;
1620       rd0 = receive_dpo_get (vnet_buffer (b[0])->ip.adj_index[VLIB_TX]);
1621       rd1 = receive_dpo_get (vnet_buffer (b[1])->ip.adj_index[VLIB_TX]);
1622       if (rd0->rd_sw_if_index != ~0)
1623         vnet_buffer (b[0])->ip.rx_sw_if_index = rd0->rd_sw_if_index;
1624       if (rd1->rd_sw_if_index != ~0)
1625         vnet_buffer (b[1])->ip.rx_sw_if_index = rd1->rd_sw_if_index;
1626     }
1627
1628   /*
1629    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1630    *  adjacency for the destination address (the local interface address).
1631    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1632    *  adjacency for the source address (the remote sender's address)
1633    */
1634   if (PREDICT_TRUE (not_last_hit))
1635     {
1636       ip4_fib_forwarding_lookup_x2 (
1637         vnet_buffer (b[0])->ip.fib_index, vnet_buffer (b[1])->ip.fib_index,
1638         &ip[0]->src_address, &ip[1]->src_address, &lbi[0], &lbi[1]);
1639
1640       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1641         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1642       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1643
1644       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1645         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1646       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1647
1648       lb[0] = load_balance_get (lbi[0]);
1649       lb[1] = load_balance_get (lbi[1]);
1650
1651       dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1652       dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1653
1654       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1655                    dpo[0]->dpoi_type == DPO_RECEIVE) ?
1656                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1657       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1658                    !fib_urpf_check_size (lb[0]->lb_urpf) &&
1659                    ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1660                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1661
1662       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1663                    dpo[1]->dpoi_type == DPO_RECEIVE) ?
1664                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1665       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1666                    !fib_urpf_check_size (lb[1]->lb_urpf) &&
1667                    ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1668                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1669
1670       last_check->src.as_u32 = ip[1]->src_address.as_u32;
1671       last_check->lbi = lbi[1];
1672       last_check->error = error[1];
1673       last_check->first = 0;
1674       last_check->fib_index = vnet_buffer (b[1])->ip.fib_index;
1675     }
1676   else
1677     {
1678       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1679         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1680       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1681
1682       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1683         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1684       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1685
1686       error[0] = last_check->error;
1687       error[1] = last_check->error;
1688     }
1689 }
1690
1691 enum ip_local_packet_type_e
1692 {
1693   IP_LOCAL_PACKET_TYPE_L4,
1694   IP_LOCAL_PACKET_TYPE_NAT,
1695   IP_LOCAL_PACKET_TYPE_FRAG,
1696 };
1697
1698 /**
1699  * Determine packet type and next node.
1700  *
1701  * The expectation is that all packets that are not L4 will skip
1702  * checksums and source checks.
1703  */
1704 always_inline u8
1705 ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
1706 {
1707   ip_lookup_main_t *lm = &ip4_main.lookup_main;
1708
1709   if (PREDICT_FALSE (ip4_is_fragment (ip)))
1710     {
1711       *next = IP_LOCAL_NEXT_REASSEMBLY;
1712       return IP_LOCAL_PACKET_TYPE_FRAG;
1713     }
1714   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_IS_NATED))
1715     {
1716       *next = lm->local_next_by_ip_protocol[ip->protocol];
1717       return IP_LOCAL_PACKET_TYPE_NAT;
1718     }
1719
1720   *next = lm->local_next_by_ip_protocol[ip->protocol];
1721   return IP_LOCAL_PACKET_TYPE_L4;
1722 }
1723
1724 static inline uword
1725 ip4_local_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
1726                   vlib_frame_t *frame, int head_of_feature_arc,
1727                   int is_receive_dpo)
1728 {
1729   u32 *from, n_left_from;
1730   vlib_node_runtime_t *error_node =
1731     vlib_node_get_runtime (vm, ip4_local_node.index);
1732   u16 nexts[VLIB_FRAME_SIZE], *next;
1733   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1734   ip4_header_t *ip[2];
1735   u8 error[2], pt[2];
1736
1737   ip4_local_last_check_t last_check = {
1738     /*
1739      * 0.0.0.0 can appear as the source address of an IP packet,
1740      * as can any other address, hence the need to use the 'first'
1741      * member to make sure the .lbi is initialised for the first
1742      * packet.
1743      */
1744     .src = { .as_u32 = 0 },
1745     .lbi = ~0,
1746     .error = IP4_ERROR_UNKNOWN_PROTOCOL,
1747     .first = 1,
1748     .fib_index = 0,
1749   };
1750
1751   from = vlib_frame_vector_args (frame);
1752   n_left_from = frame->n_vectors;
1753
1754   if (node->flags & VLIB_NODE_FLAG_TRACE)
1755     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1756
1757   vlib_get_buffers (vm, from, bufs, n_left_from);
1758   b = bufs;
1759   next = nexts;
1760
1761   while (n_left_from >= 6)
1762     {
1763       u8 not_batch = 0;
1764
1765       /* Prefetch next iteration. */
1766       {
1767         vlib_prefetch_buffer_header (b[4], LOAD);
1768         vlib_prefetch_buffer_header (b[5], LOAD);
1769
1770         clib_prefetch_load (b[4]->data);
1771         clib_prefetch_load (b[5]->data);
1772       }
1773
1774       error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1775
1776       ip[0] = vlib_buffer_get_current (b[0]);
1777       ip[1] = vlib_buffer_get_current (b[1]);
1778
1779       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1780       vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1781
1782       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1783       pt[1] = ip4_local_classify (b[1], ip[1], &next[1]);
1784
1785       not_batch = pt[0] ^ pt[1];
1786
1787       if (head_of_feature_arc == 0 || (pt[0] && not_batch == 0))
1788         goto skip_checks;
1789
1790       if (PREDICT_TRUE (not_batch == 0))
1791         {
1792           ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1793           ip4_local_check_src_x2 (b, ip, &last_check, error, is_receive_dpo);
1794         }
1795       else
1796         {
1797           if (!pt[0])
1798             {
1799               ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1800               ip4_local_check_src (b[0], ip[0], &last_check, &error[0],
1801                                    is_receive_dpo);
1802             }
1803           if (!pt[1])
1804             {
1805               ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1806               ip4_local_check_src (b[1], ip[1], &last_check, &error[1],
1807                                    is_receive_dpo);
1808             }
1809         }
1810
1811     skip_checks:
1812
1813       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1814                                     head_of_feature_arc);
1815       ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1816                                     head_of_feature_arc);
1817
1818       b += 2;
1819       next += 2;
1820       n_left_from -= 2;
1821     }
1822
1823   while (n_left_from > 0)
1824     {
1825       error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1826
1827       ip[0] = vlib_buffer_get_current (b[0]);
1828       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1829       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1830
1831       if (head_of_feature_arc == 0 || pt[0])
1832         goto skip_check;
1833
1834       ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1835       ip4_local_check_src (b[0], ip[0], &last_check, &error[0],
1836                            is_receive_dpo);
1837
1838     skip_check:
1839
1840       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1841                                     head_of_feature_arc);
1842
1843       b += 1;
1844       next += 1;
1845       n_left_from -= 1;
1846     }
1847
1848   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1849   return frame->n_vectors;
1850 }
1851
1852 VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1853                                vlib_frame_t * frame)
1854 {
1855   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */,
1856                            0 /* is_receive_dpo */);
1857 }
1858
1859 VLIB_REGISTER_NODE (ip4_local_node) =
1860 {
1861   .name = "ip4-local",
1862   .vector_size = sizeof (u32),
1863   .format_trace = format_ip4_forward_next_trace,
1864   .n_errors = IP4_N_ERROR,
1865   .error_counters = ip4_error_counters,
1866   .n_next_nodes = IP_LOCAL_N_NEXT,
1867   .next_nodes =
1868   {
1869     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1870     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1871     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1872     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1873     [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-local-full-reassembly",
1874   },
1875 };
1876
1877 VLIB_NODE_FN (ip4_receive_local_node)
1878 (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
1879 {
1880   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */,
1881                            1 /* is_receive_dpo */);
1882 }
1883
1884 VLIB_REGISTER_NODE (ip4_receive_local_node) = {
1885   .name = "ip4-receive",
1886   .vector_size = sizeof (u32),
1887   .format_trace = format_ip4_forward_next_trace,
1888   .sibling_of = "ip4-local"
1889 };
1890
1891 VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
1892                                           vlib_node_runtime_t * node,
1893                                           vlib_frame_t * frame)
1894 {
1895   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */,
1896                            0 /* is_receive_dpo */);
1897 }
1898
1899 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
1900   .name = "ip4-local-end-of-arc",
1901   .vector_size = sizeof (u32),
1902
1903   .format_trace = format_ip4_forward_next_trace,
1904   .sibling_of = "ip4-local",
1905 };
1906
1907 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1908   .arc_name = "ip4-local",
1909   .node_name = "ip4-local-end-of-arc",
1910   .runs_before = 0, /* not before any other features */
1911 };
1912
1913 #ifndef CLIB_MARCH_VARIANT
1914 void
1915 ip4_register_protocol (u32 protocol, u32 node_index)
1916 {
1917   vlib_main_t *vm = vlib_get_main ();
1918   ip4_main_t *im = &ip4_main;
1919   ip_lookup_main_t *lm = &im->lookup_main;
1920
1921   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1922   lm->local_next_by_ip_protocol[protocol] =
1923     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1924 }
1925
1926 void
1927 ip4_unregister_protocol (u32 protocol)
1928 {
1929   ip4_main_t *im = &ip4_main;
1930   ip_lookup_main_t *lm = &im->lookup_main;
1931
1932   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1933   lm->local_next_by_ip_protocol[protocol] = IP_LOCAL_NEXT_PUNT;
1934 }
1935 #endif
1936
1937 static clib_error_t *
1938 show_ip_local_command_fn (vlib_main_t * vm,
1939                           unformat_input_t * input, vlib_cli_command_t * cmd)
1940 {
1941   ip4_main_t *im = &ip4_main;
1942   ip_lookup_main_t *lm = &im->lookup_main;
1943   int i;
1944
1945   vlib_cli_output (vm, "Protocols handled by ip4_local");
1946   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1947     {
1948       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1949         {
1950           u32 node_index = vlib_get_node (vm,
1951                                           ip4_local_node.index)->
1952             next_nodes[lm->local_next_by_ip_protocol[i]];
1953           vlib_cli_output (vm, "%U: %U", format_ip_protocol, i,
1954                            format_vlib_node_name, vm, node_index);
1955         }
1956     }
1957   return 0;
1958 }
1959
1960
1961
1962 /*?
1963  * Display the set of protocols handled by the local IPv4 stack.
1964  *
1965  * @cliexpar
1966  * Example of how to display local protocol table:
1967  * @cliexstart{show ip local}
1968  * Protocols handled by ip4_local
1969  * 1
1970  * 17
1971  * 47
1972  * @cliexend
1973 ?*/
1974 VLIB_CLI_COMMAND (show_ip_local, static) =
1975 {
1976   .path = "show ip local",
1977   .function = show_ip_local_command_fn,
1978   .short_help = "show ip local",
1979 };
1980
1981 typedef enum
1982 {
1983   IP4_REWRITE_NEXT_DROP,
1984   IP4_REWRITE_NEXT_ICMP_ERROR,
1985   IP4_REWRITE_NEXT_FRAGMENT,
1986   IP4_REWRITE_N_NEXT            /* Last */
1987 } ip4_rewrite_next_t;
1988
1989 /**
1990  * This bits of an IPv4 address to mask to construct a multicast
1991  * MAC address
1992  */
1993 #if CLIB_ARCH_IS_BIG_ENDIAN
1994 #define IP4_MCAST_ADDR_MASK 0x007fffff
1995 #else
1996 #define IP4_MCAST_ADDR_MASK 0xffff7f00
1997 #endif
1998
1999 always_inline void
2000 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
2001                u16 adj_packet_bytes, bool df, u16 * next,
2002                u8 is_midchain, u32 * error)
2003 {
2004   if (packet_len > adj_packet_bytes)
2005     {
2006       *error = IP4_ERROR_MTU_EXCEEDED;
2007       if (df)
2008         {
2009           icmp4_error_set_vnet_buffer
2010             (b, ICMP4_destination_unreachable,
2011              ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
2012              adj_packet_bytes);
2013           *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2014         }
2015       else
2016         {
2017           /* IP fragmentation */
2018           ip_frag_set_vnet_buffer (b, adj_packet_bytes,
2019                                    (is_midchain ?
2020                                     IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN :
2021                                     IP_FRAG_NEXT_IP_REWRITE), 0);
2022           *next = IP4_REWRITE_NEXT_FRAGMENT;
2023         }
2024     }
2025 }
2026
2027 /* increment TTL & update checksum.
2028    Works either endian, so no need for byte swap. */
2029 static_always_inline void
2030 ip4_ttl_inc (vlib_buffer_t * b, ip4_header_t * ip)
2031 {
2032   i32 ttl;
2033   u32 checksum;
2034   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2035     return;
2036
2037   ttl = ip->ttl;
2038
2039   checksum = ip->checksum - clib_host_to_net_u16 (0x0100);
2040   checksum += checksum >= 0xffff;
2041
2042   ip->checksum = checksum;
2043   ttl += 1;
2044   ip->ttl = ttl;
2045
2046   ASSERT (ip4_header_checksum_is_valid (ip) ||
2047           (vnet_buffer (b)->oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM) ||
2048           (vnet_buffer (b)->oflags & VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM));
2049 }
2050
2051 /* Decrement TTL & update checksum.
2052    Works either endian, so no need for byte swap. */
2053 static_always_inline void
2054 ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
2055                             u32 * error)
2056 {
2057   i32 ttl;
2058   u32 checksum;
2059   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2060     return;
2061
2062   ttl = ip->ttl;
2063
2064   /* Input node should have reject packets with ttl 0. */
2065   ASSERT (ip->ttl > 0);
2066
2067   checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
2068   checksum += checksum >= 0xffff;
2069
2070   ip->checksum = checksum;
2071   ttl -= 1;
2072   ip->ttl = ttl;
2073
2074   /*
2075    * If the ttl drops below 1 when forwarding, generate
2076    * an ICMP response.
2077    */
2078   if (PREDICT_FALSE (ttl <= 0))
2079     {
2080       *error = IP4_ERROR_TIME_EXPIRED;
2081       vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2082       icmp4_error_set_vnet_buffer (b, ICMP4_time_exceeded,
2083                                    ICMP4_time_exceeded_ttl_exceeded_in_transit,
2084                                    0);
2085       *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2086     }
2087
2088   /* Verify checksum. */
2089   ASSERT (ip4_header_checksum_is_valid (ip) ||
2090           (vnet_buffer (b)->oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM) ||
2091           (vnet_buffer (b)->oflags & VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM));
2092 }
2093
2094 always_inline uword
2095 ip4_rewrite_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
2096                     vlib_frame_t *frame, int do_counters, int is_midchain,
2097                     int is_mcast)
2098 {
2099   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2100   u32 *from = vlib_frame_vector_args (frame);
2101   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2102   u16 nexts[VLIB_FRAME_SIZE], *next;
2103   u32 n_left_from;
2104   vlib_node_runtime_t *error_node =
2105     vlib_node_get_runtime (vm, ip4_input_node.index);
2106
2107   n_left_from = frame->n_vectors;
2108   u32 thread_index = vm->thread_index;
2109
2110   vlib_get_buffers (vm, from, bufs, n_left_from);
2111   clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
2112
2113 #if (CLIB_N_PREFETCHES >= 8)
2114   if (n_left_from >= 6)
2115     {
2116       int i;
2117       for (i = 2; i < 6; i++)
2118         vlib_prefetch_buffer_header (bufs[i], LOAD);
2119     }
2120
2121   next = nexts;
2122   b = bufs;
2123   while (n_left_from >= 8)
2124     {
2125       const ip_adjacency_t *adj0, *adj1;
2126       ip4_header_t *ip0, *ip1;
2127       u32 rw_len0, error0, adj_index0;
2128       u32 rw_len1, error1, adj_index1;
2129       u32 tx_sw_if_index0, tx_sw_if_index1;
2130       u8 *p;
2131
2132       if (is_midchain)
2133         {
2134           vlib_prefetch_buffer_header (b[6], LOAD);
2135           vlib_prefetch_buffer_header (b[7], LOAD);
2136         }
2137
2138       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2139       adj_index1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
2140
2141       /*
2142        * pre-fetch the per-adjacency counters
2143        */
2144       if (do_counters)
2145         {
2146           vlib_prefetch_combined_counter (&adjacency_counters,
2147                                           thread_index, adj_index0);
2148           vlib_prefetch_combined_counter (&adjacency_counters,
2149                                           thread_index, adj_index1);
2150         }
2151
2152       ip0 = vlib_buffer_get_current (b[0]);
2153       ip1 = vlib_buffer_get_current (b[1]);
2154
2155       error0 = error1 = IP4_ERROR_NONE;
2156
2157       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2158       ip4_ttl_and_checksum_check (b[1], ip1, next + 1, &error1);
2159
2160       /* Rewrite packet header and updates lengths. */
2161       adj0 = adj_get (adj_index0);
2162       adj1 = adj_get (adj_index1);
2163
2164       /* Worth pipelining. No guarantee that adj0,1 are hot... */
2165       rw_len0 = adj0[0].rewrite_header.data_bytes;
2166       rw_len1 = adj1[0].rewrite_header.data_bytes;
2167       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2168       vnet_buffer (b[1])->ip.save_rewrite_length = rw_len1;
2169
2170       p = vlib_buffer_get_current (b[2]);
2171       clib_prefetch_store (p - CLIB_CACHE_LINE_BYTES);
2172       clib_prefetch_load (p);
2173
2174       p = vlib_buffer_get_current (b[3]);
2175       clib_prefetch_store (p - CLIB_CACHE_LINE_BYTES);
2176       clib_prefetch_load (p);
2177
2178       /* Check MTU of outgoing interface. */
2179       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2180       u16 ip1_len = clib_net_to_host_u16 (ip1->length);
2181
2182       if (b[0]->flags & VNET_BUFFER_F_GSO)
2183         ip0_len = gso_mtu_sz (b[0]);
2184       if (b[1]->flags & VNET_BUFFER_F_GSO)
2185         ip1_len = gso_mtu_sz (b[1]);
2186
2187       ip4_mtu_check (b[0], ip0_len,
2188                      adj0[0].rewrite_header.max_l3_packet_bytes,
2189                      ip0->flags_and_fragment_offset &
2190                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2191                      next + 0, is_midchain, &error0);
2192       ip4_mtu_check (b[1], ip1_len,
2193                      adj1[0].rewrite_header.max_l3_packet_bytes,
2194                      ip1->flags_and_fragment_offset &
2195                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2196                      next + 1, is_midchain, &error1);
2197
2198       if (is_mcast)
2199         {
2200           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2201                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2202                     IP4_ERROR_SAME_INTERFACE : error0);
2203           error1 = ((adj1[0].rewrite_header.sw_if_index ==
2204                      vnet_buffer (b[1])->sw_if_index[VLIB_RX]) ?
2205                     IP4_ERROR_SAME_INTERFACE : error1);
2206         }
2207
2208       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2209        * to see the IP header */
2210       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2211         {
2212           u32 next_index = adj0[0].rewrite_header.next_index;
2213           vlib_buffer_advance (b[0], -(word) rw_len0);
2214
2215           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2216           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2217
2218           if (PREDICT_FALSE
2219               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2220             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2221                                                 tx_sw_if_index0,
2222                                                 &next_index, b[0],
2223                                                 adj0->ia_cfg_index);
2224
2225           next[0] = next_index;
2226           if (is_midchain)
2227             vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2228                                         0 /* is_ip6 */ );
2229         }
2230       else
2231         {
2232           b[0]->error = error_node->errors[error0];
2233           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2234             ip4_ttl_inc (b[0], ip0);
2235         }
2236       if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2237         {
2238           u32 next_index = adj1[0].rewrite_header.next_index;
2239           vlib_buffer_advance (b[1], -(word) rw_len1);
2240
2241           tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2242           vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2243
2244           if (PREDICT_FALSE
2245               (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2246             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2247                                                 tx_sw_if_index1,
2248                                                 &next_index, b[1],
2249                                                 adj1->ia_cfg_index);
2250           next[1] = next_index;
2251           if (is_midchain)
2252             vnet_calc_checksums_inline (vm, b[1], 1 /* is_ip4 */ ,
2253                                         0 /* is_ip6 */ );
2254         }
2255       else
2256         {
2257           b[1]->error = error_node->errors[error1];
2258           if (error1 == IP4_ERROR_MTU_EXCEEDED)
2259             ip4_ttl_inc (b[1], ip1);
2260         }
2261
2262       if (is_midchain)
2263         /* Guess we are only writing on ipv4 header. */
2264         vnet_rewrite_two_headers (adj0[0], adj1[0],
2265                                   ip0, ip1, sizeof (ip4_header_t));
2266       else
2267         /* Guess we are only writing on simple Ethernet header. */
2268         vnet_rewrite_two_headers (adj0[0], adj1[0],
2269                                   ip0, ip1, sizeof (ethernet_header_t));
2270
2271       if (do_counters)
2272         {
2273           if (error0 == IP4_ERROR_NONE)
2274             vlib_increment_combined_counter
2275               (&adjacency_counters,
2276                thread_index,
2277                adj_index0, 1,
2278                vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2279
2280           if (error1 == IP4_ERROR_NONE)
2281             vlib_increment_combined_counter
2282               (&adjacency_counters,
2283                thread_index,
2284                adj_index1, 1,
2285                vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
2286         }
2287
2288       if (is_midchain)
2289         {
2290           if (error0 == IP4_ERROR_NONE)
2291             adj_midchain_fixup (vm, adj0, b[0], VNET_LINK_IP4);
2292           if (error1 == IP4_ERROR_NONE)
2293             adj_midchain_fixup (vm, adj1, b[1], VNET_LINK_IP4);
2294         }
2295
2296       if (is_mcast)
2297         {
2298           /* copy bytes from the IP address into the MAC rewrite */
2299           if (error0 == IP4_ERROR_NONE)
2300             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2301                                         adj0->rewrite_header.dst_mcast_offset,
2302                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2303           if (error1 == IP4_ERROR_NONE)
2304             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2305                                         adj1->rewrite_header.dst_mcast_offset,
2306                                         &ip1->dst_address.as_u32, (u8 *) ip1);
2307         }
2308
2309       next += 2;
2310       b += 2;
2311       n_left_from -= 2;
2312     }
2313 #elif (CLIB_N_PREFETCHES >= 4)
2314   next = nexts;
2315   b = bufs;
2316   while (n_left_from >= 1)
2317     {
2318       ip_adjacency_t *adj0;
2319       ip4_header_t *ip0;
2320       u32 rw_len0, error0, adj_index0;
2321       u32 tx_sw_if_index0;
2322       u8 *p;
2323
2324       /* Prefetch next iteration */
2325       if (PREDICT_TRUE (n_left_from >= 4))
2326         {
2327           ip_adjacency_t *adj2;
2328           u32 adj_index2;
2329
2330           vlib_prefetch_buffer_header (b[3], LOAD);
2331           vlib_prefetch_buffer_data (b[2], LOAD);
2332
2333           /* Prefetch adj->rewrite_header */
2334           adj_index2 = vnet_buffer (b[2])->ip.adj_index[VLIB_TX];
2335           adj2 = adj_get (adj_index2);
2336           p = (u8 *) adj2;
2337           CLIB_PREFETCH (p + CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES,
2338                          LOAD);
2339         }
2340
2341       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2342
2343       /*
2344        * Prefetch the per-adjacency counters
2345        */
2346       if (do_counters)
2347         {
2348           vlib_prefetch_combined_counter (&adjacency_counters,
2349                                           thread_index, adj_index0);
2350         }
2351
2352       ip0 = vlib_buffer_get_current (b[0]);
2353
2354       error0 = IP4_ERROR_NONE;
2355
2356       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2357
2358       /* Rewrite packet header and updates lengths. */
2359       adj0 = adj_get (adj_index0);
2360
2361       /* Rewrite header was prefetched. */
2362       rw_len0 = adj0[0].rewrite_header.data_bytes;
2363       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2364
2365       /* Check MTU of outgoing interface. */
2366       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2367
2368       if (b[0]->flags & VNET_BUFFER_F_GSO)
2369         ip0_len = gso_mtu_sz (b[0]);
2370
2371       ip4_mtu_check (b[0], ip0_len,
2372                      adj0[0].rewrite_header.max_l3_packet_bytes,
2373                      ip0->flags_and_fragment_offset &
2374                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2375                      next + 0, is_midchain, &error0);
2376
2377       if (is_mcast)
2378         {
2379           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2380                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2381                     IP4_ERROR_SAME_INTERFACE : error0);
2382         }
2383
2384       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2385        * to see the IP header */
2386       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2387         {
2388           u32 next_index = adj0[0].rewrite_header.next_index;
2389           vlib_buffer_advance (b[0], -(word) rw_len0);
2390           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2391           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2392
2393           if (PREDICT_FALSE
2394               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2395             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2396                                                 tx_sw_if_index0,
2397                                                 &next_index, b[0],
2398                                                 adj0->ia_cfg_index);
2399           next[0] = next_index;
2400
2401           if (is_midchain)
2402             {
2403               vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2404                                           0 /* is_ip6 */ );
2405
2406               /* Guess we are only writing on ipv4 header. */
2407               vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t));
2408             }
2409           else
2410             /* Guess we are only writing on simple Ethernet header. */
2411             vnet_rewrite_one_header (adj0[0], ip0,
2412                                      sizeof (ethernet_header_t));
2413
2414           /*
2415            * Bump the per-adjacency counters
2416            */
2417           if (do_counters)
2418             vlib_increment_combined_counter
2419               (&adjacency_counters,
2420                thread_index,
2421                adj_index0, 1, vlib_buffer_length_in_chain (vm,
2422                                                            b[0]) + rw_len0);
2423
2424           if (is_midchain)
2425             adj_midchain_fixup (vm, adj0, b[0], VNET_LINK_IP4);
2426
2427           if (is_mcast)
2428             /* copy bytes from the IP address into the MAC rewrite */
2429             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2430                                         adj0->rewrite_header.dst_mcast_offset,
2431                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2432         }
2433       else
2434         {
2435           b[0]->error = error_node->errors[error0];
2436           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2437             ip4_ttl_inc (b[0], ip0);
2438         }
2439
2440       next += 1;
2441       b += 1;
2442       n_left_from -= 1;
2443     }
2444 #endif
2445
2446   while (n_left_from > 0)
2447     {
2448       ip_adjacency_t *adj0;
2449       ip4_header_t *ip0;
2450       u32 rw_len0, adj_index0, error0;
2451       u32 tx_sw_if_index0;
2452
2453       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2454
2455       adj0 = adj_get (adj_index0);
2456
2457       if (do_counters)
2458         vlib_prefetch_combined_counter (&adjacency_counters,
2459                                         thread_index, adj_index0);
2460
2461       ip0 = vlib_buffer_get_current (b[0]);
2462
2463       error0 = IP4_ERROR_NONE;
2464
2465       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2466
2467
2468       /* Update packet buffer attributes/set output interface. */
2469       rw_len0 = adj0[0].rewrite_header.data_bytes;
2470       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2471
2472       /* Check MTU of outgoing interface. */
2473       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2474       if (b[0]->flags & VNET_BUFFER_F_GSO)
2475         ip0_len = gso_mtu_sz (b[0]);
2476
2477       ip4_mtu_check (b[0], ip0_len,
2478                      adj0[0].rewrite_header.max_l3_packet_bytes,
2479                      ip0->flags_and_fragment_offset &
2480                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2481                      next + 0, is_midchain, &error0);
2482
2483       if (is_mcast)
2484         {
2485           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2486                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2487                     IP4_ERROR_SAME_INTERFACE : error0);
2488         }
2489
2490       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2491        * to see the IP header */
2492       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2493         {
2494           u32 next_index = adj0[0].rewrite_header.next_index;
2495           vlib_buffer_advance (b[0], -(word) rw_len0);
2496           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2497           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2498
2499           if (PREDICT_FALSE
2500               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2501             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2502                                                 tx_sw_if_index0,
2503                                                 &next_index, b[0],
2504                                                 adj0->ia_cfg_index);
2505           next[0] = next_index;
2506
2507           if (is_midchain)
2508             {
2509               /* this acts on the packet that is about to be encapped */
2510               vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2511                                           0 /* is_ip6 */ );
2512
2513               /* Guess we are only writing on ipv4 header. */
2514               vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t));
2515             }
2516           else
2517             /* Guess we are only writing on simple Ethernet header. */
2518             vnet_rewrite_one_header (adj0[0], ip0,
2519                                      sizeof (ethernet_header_t));
2520
2521           if (do_counters)
2522             vlib_increment_combined_counter
2523               (&adjacency_counters,
2524                thread_index, adj_index0, 1,
2525                vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2526
2527           if (is_midchain)
2528             adj_midchain_fixup (vm, adj0, b[0], VNET_LINK_IP4);
2529
2530           if (is_mcast)
2531             /* copy bytes from the IP address into the MAC rewrite */
2532             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2533                                         adj0->rewrite_header.dst_mcast_offset,
2534                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2535         }
2536       else
2537         {
2538           b[0]->error = error_node->errors[error0];
2539           /* undo the TTL decrement - we'll be back to do it again */
2540           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2541             ip4_ttl_inc (b[0], ip0);
2542         }
2543
2544       next += 1;
2545       b += 1;
2546       n_left_from -= 1;
2547     }
2548
2549
2550   /* Need to do trace after rewrites to pick up new packet data. */
2551   if (node->flags & VLIB_NODE_FLAG_TRACE)
2552     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2553
2554   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2555   return frame->n_vectors;
2556 }
2557
2558 /** @brief IPv4 rewrite node.
2559     @node ip4-rewrite
2560
2561     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2562     header checksum, fetch the ip adjacency, check the outbound mtu,
2563     apply the adjacency rewrite, and send pkts to the adjacency
2564     rewrite header's rewrite_next_index.
2565
2566     @param vm vlib_main_t corresponding to the current thread
2567     @param node vlib_node_runtime_t
2568     @param frame vlib_frame_t whose contents should be dispatched
2569
2570     @par Graph mechanics: buffer metadata, next index usage
2571
2572     @em Uses:
2573     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2574         - the rewrite adjacency index
2575     - <code>adj->lookup_next_index</code>
2576         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2577           the packet will be dropped.
2578     - <code>adj->rewrite_header</code>
2579         - Rewrite string length, rewrite string, next_index
2580
2581     @em Sets:
2582     - <code>b->current_data, b->current_length</code>
2583         - Updated net of applying the rewrite string
2584
2585     <em>Next Indices:</em>
2586     - <code> adj->rewrite_header.next_index </code>
2587       or @c ip4-drop
2588 */
2589
2590 VLIB_NODE_FN (ip4_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2591                                  vlib_frame_t * frame)
2592 {
2593   if (adj_are_counters_enabled ())
2594     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2595   else
2596     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2597 }
2598
2599 VLIB_NODE_FN (ip4_rewrite_bcast_node) (vlib_main_t * vm,
2600                                        vlib_node_runtime_t * node,
2601                                        vlib_frame_t * frame)
2602 {
2603   if (adj_are_counters_enabled ())
2604     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2605   else
2606     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2607 }
2608
2609 VLIB_NODE_FN (ip4_midchain_node) (vlib_main_t * vm,
2610                                   vlib_node_runtime_t * node,
2611                                   vlib_frame_t * frame)
2612 {
2613   if (adj_are_counters_enabled ())
2614     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2615   else
2616     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2617 }
2618
2619 VLIB_NODE_FN (ip4_rewrite_mcast_node) (vlib_main_t * vm,
2620                                        vlib_node_runtime_t * node,
2621                                        vlib_frame_t * frame)
2622 {
2623   if (adj_are_counters_enabled ())
2624     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2625   else
2626     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2627 }
2628
2629 VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
2630                                         vlib_node_runtime_t * node,
2631                                         vlib_frame_t * frame)
2632 {
2633   if (adj_are_counters_enabled ())
2634     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2635   else
2636     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2637 }
2638
2639 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2640   .name = "ip4-rewrite",
2641   .vector_size = sizeof (u32),
2642
2643   .format_trace = format_ip4_rewrite_trace,
2644
2645   .n_next_nodes = IP4_REWRITE_N_NEXT,
2646   .next_nodes = {
2647     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2648     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2649     [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2650   },
2651 };
2652
2653 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2654   .name = "ip4-rewrite-bcast",
2655   .vector_size = sizeof (u32),
2656
2657   .format_trace = format_ip4_rewrite_trace,
2658   .sibling_of = "ip4-rewrite",
2659 };
2660
2661 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2662   .name = "ip4-rewrite-mcast",
2663   .vector_size = sizeof (u32),
2664
2665   .format_trace = format_ip4_rewrite_trace,
2666   .sibling_of = "ip4-rewrite",
2667 };
2668
2669 VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
2670   .name = "ip4-mcast-midchain",
2671   .vector_size = sizeof (u32),
2672
2673   .format_trace = format_ip4_rewrite_trace,
2674   .sibling_of = "ip4-rewrite",
2675 };
2676
2677 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2678   .name = "ip4-midchain",
2679   .vector_size = sizeof (u32),
2680   .format_trace = format_ip4_rewrite_trace,
2681   .sibling_of = "ip4-rewrite",
2682 };
2683
2684 static clib_error_t *
2685 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2686                              unformat_input_t * input,
2687                              vlib_cli_command_t * cmd)
2688 {
2689   int matched = 0;
2690   u32 table_id = 0;
2691   u32 flow_hash_config = 0;
2692   int rv;
2693
2694   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2695     {
2696       if (unformat (input, "table %d", &table_id))
2697         matched = 1;
2698 #define _(a, b, v)                                                            \
2699   else if (unformat (input, #a))                                              \
2700   {                                                                           \
2701     flow_hash_config |= v;                                                    \
2702     matched = 1;                                                              \
2703   }
2704       foreach_flow_hash_bit
2705 #undef _
2706         else
2707         break;
2708     }
2709
2710   if (matched == 0)
2711     return clib_error_return (0, "unknown input `%U'",
2712                               format_unformat_error, input);
2713
2714   rv = ip_flow_hash_set (AF_IP4, table_id, flow_hash_config);
2715   switch (rv)
2716     {
2717     case 0:
2718       break;
2719
2720     case VNET_API_ERROR_NO_SUCH_FIB:
2721       return clib_error_return (0, "no such FIB table %d", table_id);
2722
2723     default:
2724       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2725       break;
2726     }
2727
2728   return 0;
2729 }
2730
2731 /*?
2732  * Configure the set of IPv4 fields used by the flow hash.
2733  *
2734  * @cliexpar
2735  * Example of how to set the flow hash on a given table:
2736  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2737  * Example of display the configured flow hash:
2738  * @cliexstart{show ip fib}
2739  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2740  * 0.0.0.0/0
2741  *   unicast-ip4-chain
2742  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2743  *     [0] [@0]: dpo-drop ip6
2744  * 0.0.0.0/32
2745  *   unicast-ip4-chain
2746  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2747  *     [0] [@0]: dpo-drop ip6
2748  * 224.0.0.0/8
2749  *   unicast-ip4-chain
2750  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2751  *     [0] [@0]: dpo-drop ip6
2752  * 6.0.1.2/32
2753  *   unicast-ip4-chain
2754  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2755  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2756  * 7.0.0.1/32
2757  *   unicast-ip4-chain
2758  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
2759  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2760  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2761  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2762  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2763  * 240.0.0.0/8
2764  *   unicast-ip4-chain
2765  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
2766  *     [0] [@0]: dpo-drop ip6
2767  * 255.255.255.255/32
2768  *   unicast-ip4-chain
2769  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
2770  *     [0] [@0]: dpo-drop ip6
2771  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
2772  * 0.0.0.0/0
2773  *   unicast-ip4-chain
2774  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
2775  *     [0] [@0]: dpo-drop ip6
2776  * 0.0.0.0/32
2777  *   unicast-ip4-chain
2778  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
2779  *     [0] [@0]: dpo-drop ip6
2780  * 172.16.1.0/24
2781  *   unicast-ip4-chain
2782  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
2783  *     [0] [@4]: ipv4-glean: af_packet0
2784  * 172.16.1.1/32
2785  *   unicast-ip4-chain
2786  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
2787  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
2788  * 172.16.1.2/32
2789  *   unicast-ip4-chain
2790  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2791  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
2792  * 172.16.2.0/24
2793  *   unicast-ip4-chain
2794  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
2795  *     [0] [@4]: ipv4-glean: af_packet1
2796  * 172.16.2.1/32
2797  *   unicast-ip4-chain
2798  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
2799  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
2800  * 224.0.0.0/8
2801  *   unicast-ip4-chain
2802  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
2803  *     [0] [@0]: dpo-drop ip6
2804  * 240.0.0.0/8
2805  *   unicast-ip4-chain
2806  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
2807  *     [0] [@0]: dpo-drop ip6
2808  * 255.255.255.255/32
2809  *   unicast-ip4-chain
2810  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
2811  *     [0] [@0]: dpo-drop ip6
2812  * @cliexend
2813 ?*/
2814 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
2815   .path = "set ip flow-hash",
2816   .short_help = "set ip flow-hash table <table-id> [src] [dst] [sport] "
2817                 "[dport] [proto] [reverse] [gtpv1teid]",
2818   .function = set_ip_flow_hash_command_fn,
2819 };
2820
2821 #ifndef CLIB_MARCH_VARIANT
2822 int
2823 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2824                              u32 table_index)
2825 {
2826   vnet_main_t *vnm = vnet_get_main ();
2827   vnet_interface_main_t *im = &vnm->interface_main;
2828   ip4_main_t *ipm = &ip4_main;
2829   ip_lookup_main_t *lm = &ipm->lookup_main;
2830   vnet_classify_main_t *cm = &vnet_classify_main;
2831   ip4_address_t *if_addr;
2832
2833   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2834     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2835
2836   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2837     return VNET_API_ERROR_NO_SUCH_ENTRY;
2838
2839   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2840   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
2841
2842   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
2843
2844   if (NULL != if_addr)
2845     {
2846       fib_prefix_t pfx = {
2847         .fp_len = 32,
2848         .fp_proto = FIB_PROTOCOL_IP4,
2849         .fp_addr.ip4 = *if_addr,
2850       };
2851       u32 fib_index;
2852
2853       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2854                                                        sw_if_index);
2855
2856
2857       if (table_index != (u32) ~ 0)
2858         {
2859           dpo_id_t dpo = DPO_INVALID;
2860
2861           dpo_set (&dpo,
2862                    DPO_CLASSIFY,
2863                    DPO_PROTO_IP4,
2864                    classify_dpo_create (DPO_PROTO_IP4, table_index));
2865
2866           fib_table_entry_special_dpo_add (fib_index,
2867                                            &pfx,
2868                                            FIB_SOURCE_CLASSIFY,
2869                                            FIB_ENTRY_FLAG_NONE, &dpo);
2870           dpo_reset (&dpo);
2871         }
2872       else
2873         {
2874           fib_table_entry_special_remove (fib_index,
2875                                           &pfx, FIB_SOURCE_CLASSIFY);
2876         }
2877     }
2878
2879   return 0;
2880 }
2881 #endif
2882
2883 static clib_error_t *
2884 set_ip_classify_command_fn (vlib_main_t * vm,
2885                             unformat_input_t * input,
2886                             vlib_cli_command_t * cmd)
2887 {
2888   u32 table_index = ~0;
2889   int table_index_set = 0;
2890   u32 sw_if_index = ~0;
2891   int rv;
2892
2893   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2894     {
2895       if (unformat (input, "table-index %d", &table_index))
2896         table_index_set = 1;
2897       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
2898                          vnet_get_main (), &sw_if_index))
2899         ;
2900       else
2901         break;
2902     }
2903
2904   if (table_index_set == 0)
2905     return clib_error_return (0, "classify table-index must be specified");
2906
2907   if (sw_if_index == ~0)
2908     return clib_error_return (0, "interface / subif must be specified");
2909
2910   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
2911
2912   switch (rv)
2913     {
2914     case 0:
2915       break;
2916
2917     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
2918       return clib_error_return (0, "No such interface");
2919
2920     case VNET_API_ERROR_NO_SUCH_ENTRY:
2921       return clib_error_return (0, "No such classifier table");
2922     }
2923   return 0;
2924 }
2925
2926 /*?
2927  * Assign a classification table to an interface. The classification
2928  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
2929  * commands. Once the table is create, use this command to filter packets
2930  * on an interface.
2931  *
2932  * @cliexpar
2933  * Example of how to assign a classification table to an interface:
2934  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
2935 ?*/
2936 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
2937 {
2938     .path = "set ip classify",
2939     .short_help =
2940     "set ip classify intfc <interface> table-index <classify-idx>",
2941     .function = set_ip_classify_command_fn,
2942 };
2943
2944 /*
2945  * fd.io coding-style-patch-verification: ON
2946  *
2947  * Local Variables:
2948  * eval: (c-set-style "gnu")
2949  * End:
2950  */