ip: unlock_fib on if delete
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h>     /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/mfib/ip4_mfib.h>
53 #include <vnet/dpo/load_balance.h>
54 #include <vnet/dpo/load_balance_map.h>
55 #include <vnet/dpo/receive_dpo.h>
56 #include <vnet/dpo/classify_dpo.h>
57 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
58 #include <vnet/adj/adj_dp.h>
59 #include <vnet/pg/pg.h>
60
61 #include <vnet/ip/ip4_forward.h>
62 #include <vnet/interface_output.h>
63 #include <vnet/classify/vnet_classify.h>
64
65 /** @brief IPv4 lookup node.
66     @node ip4-lookup
67
68     This is the main IPv4 lookup dispatch node.
69
70     @param vm vlib_main_t corresponding to the current thread
71     @param node vlib_node_runtime_t
72     @param frame vlib_frame_t whose contents should be dispatched
73
74     @par Graph mechanics: buffer metadata, next index usage
75
76     @em Uses:
77     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
78         - Indicates the @c sw_if_index value of the interface that the
79           packet was received on.
80     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
81         - When the value is @c ~0 then the node performs a longest prefix
82           match (LPM) for the packet destination address in the FIB attached
83           to the receive interface.
84         - Otherwise perform LPM for the packet destination address in the
85           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
86           value (0, 1, ...) and not a VRF id.
87
88     @em Sets:
89     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
90         - The lookup result adjacency index.
91
92     <em>Next Index:</em>
93     - Dispatches the packet to the node index found in
94       ip_adjacency_t @c adj->lookup_next_index
95       (where @c adj is the lookup result adjacency).
96 */
97 VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
98                                 vlib_frame_t * frame)
99 {
100   return ip4_lookup_inline (vm, node, frame);
101 }
102
103 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
104
105 /* *INDENT-OFF* */
106 VLIB_REGISTER_NODE (ip4_lookup_node) =
107 {
108   .name = "ip4-lookup",
109   .vector_size = sizeof (u32),
110   .format_trace = format_ip4_lookup_trace,
111   .n_next_nodes = IP_LOOKUP_N_NEXT,
112   .next_nodes = IP4_LOOKUP_NEXT_NODES,
113 };
114 /* *INDENT-ON* */
115
116 VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
117                                       vlib_node_runtime_t * node,
118                                       vlib_frame_t * frame)
119 {
120   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
121   u32 n_left, *from;
122   u32 thread_index = vm->thread_index;
123   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
124   u16 nexts[VLIB_FRAME_SIZE], *next;
125
126   from = vlib_frame_vector_args (frame);
127   n_left = frame->n_vectors;
128   next = nexts;
129
130   vlib_get_buffers (vm, from, bufs, n_left);
131
132   while (n_left >= 4)
133     {
134       const load_balance_t *lb0, *lb1;
135       const ip4_header_t *ip0, *ip1;
136       u32 lbi0, hc0, lbi1, hc1;
137       const dpo_id_t *dpo0, *dpo1;
138
139       /* Prefetch next iteration. */
140       {
141         vlib_prefetch_buffer_header (b[2], LOAD);
142         vlib_prefetch_buffer_header (b[3], LOAD);
143
144         CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), LOAD);
145         CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), LOAD);
146       }
147
148       ip0 = vlib_buffer_get_current (b[0]);
149       ip1 = vlib_buffer_get_current (b[1]);
150       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
151       lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
152
153       lb0 = load_balance_get (lbi0);
154       lb1 = load_balance_get (lbi1);
155
156       /*
157        * this node is for via FIBs we can re-use the hash value from the
158        * to node if present.
159        * We don't want to use the same hash value at each level in the recursion
160        * graph as that would lead to polarisation
161        */
162       hc0 = hc1 = 0;
163
164       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
165         {
166           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
167             {
168               hc0 = vnet_buffer (b[0])->ip.flow_hash =
169                 vnet_buffer (b[0])->ip.flow_hash >> 1;
170             }
171           else
172             {
173               hc0 = vnet_buffer (b[0])->ip.flow_hash =
174                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
175             }
176           dpo0 = load_balance_get_fwd_bucket
177             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
178         }
179       else
180         {
181           dpo0 = load_balance_get_bucket_i (lb0, 0);
182         }
183       if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
184         {
185           if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
186             {
187               hc1 = vnet_buffer (b[1])->ip.flow_hash =
188                 vnet_buffer (b[1])->ip.flow_hash >> 1;
189             }
190           else
191             {
192               hc1 = vnet_buffer (b[1])->ip.flow_hash =
193                 ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
194             }
195           dpo1 = load_balance_get_fwd_bucket
196             (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
197         }
198       else
199         {
200           dpo1 = load_balance_get_bucket_i (lb1, 0);
201         }
202
203       next[0] = dpo0->dpoi_next_node;
204       next[1] = dpo1->dpoi_next_node;
205
206       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
207       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
208
209       vlib_increment_combined_counter
210         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
211       vlib_increment_combined_counter
212         (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
213
214       b += 2;
215       next += 2;
216       n_left -= 2;
217     }
218
219   while (n_left > 0)
220     {
221       const load_balance_t *lb0;
222       const ip4_header_t *ip0;
223       const dpo_id_t *dpo0;
224       u32 lbi0, hc0;
225
226       ip0 = vlib_buffer_get_current (b[0]);
227       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
228
229       lb0 = load_balance_get (lbi0);
230
231       hc0 = 0;
232       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
233         {
234           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
235             {
236               hc0 = vnet_buffer (b[0])->ip.flow_hash =
237                 vnet_buffer (b[0])->ip.flow_hash >> 1;
238             }
239           else
240             {
241               hc0 = vnet_buffer (b[0])->ip.flow_hash =
242                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
243             }
244           dpo0 = load_balance_get_fwd_bucket
245             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
246         }
247       else
248         {
249           dpo0 = load_balance_get_bucket_i (lb0, 0);
250         }
251
252       next[0] = dpo0->dpoi_next_node;
253       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
254
255       vlib_increment_combined_counter
256         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
257
258       b += 1;
259       next += 1;
260       n_left -= 1;
261     }
262
263   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
264   if (node->flags & VLIB_NODE_FLAG_TRACE)
265     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
266
267   return frame->n_vectors;
268 }
269
270 /* *INDENT-OFF* */
271 VLIB_REGISTER_NODE (ip4_load_balance_node) =
272 {
273   .name = "ip4-load-balance",
274   .vector_size = sizeof (u32),
275   .sibling_of = "ip4-lookup",
276   .format_trace = format_ip4_lookup_trace,
277 };
278 /* *INDENT-ON* */
279
280 #ifndef CLIB_MARCH_VARIANT
281 /* get first interface address */
282 ip4_address_t *
283 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
284                              ip_interface_address_t ** result_ia)
285 {
286   ip_lookup_main_t *lm = &im->lookup_main;
287   ip_interface_address_t *ia = 0;
288   ip4_address_t *result = 0;
289
290   /* *INDENT-OFF* */
291   foreach_ip_interface_address
292     (lm, ia, sw_if_index,
293      1 /* honor unnumbered */ ,
294      ({
295        ip4_address_t * a =
296          ip_interface_address_get_address (lm, ia);
297        result = a;
298        break;
299      }));
300   /* *INDENT-OFF* */
301   if (result_ia)
302     *result_ia = result ? ia : 0;
303   return result;
304 }
305 #endif
306
307 static void
308 ip4_add_subnet_bcast_route (u32 fib_index,
309                             fib_prefix_t *pfx,
310                             u32 sw_if_index)
311 {
312   vnet_sw_interface_flags_t iflags;
313
314   iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
315
316   fib_table_entry_special_remove(fib_index,
317                                  pfx,
318                                  FIB_SOURCE_INTERFACE);
319
320   if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
321     {
322       fib_table_entry_update_one_path (fib_index, pfx,
323                                        FIB_SOURCE_INTERFACE,
324                                        FIB_ENTRY_FLAG_NONE,
325                                        DPO_PROTO_IP4,
326                                        /* No next-hop address */
327                                        &ADJ_BCAST_ADDR,
328                                        sw_if_index,
329                                        // invalid FIB index
330                                        ~0,
331                                        1,
332                                        // no out-label stack
333                                        NULL,
334                                        FIB_ROUTE_PATH_FLAG_NONE);
335     }
336   else
337     {
338         fib_table_entry_special_add(fib_index,
339                                     pfx,
340                                     FIB_SOURCE_INTERFACE,
341                                     (FIB_ENTRY_FLAG_DROP |
342                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
343     }
344 }
345
346 static void
347 ip4_add_interface_prefix_routes (ip4_main_t *im,
348                                  u32 sw_if_index,
349                                  u32 fib_index,
350                                  ip_interface_address_t * a)
351 {
352   ip_lookup_main_t *lm = &im->lookup_main;
353   ip_interface_prefix_t *if_prefix;
354   ip4_address_t *address = ip_interface_address_get_address (lm, a);
355
356   ip_interface_prefix_key_t key = {
357     .prefix = {
358       .fp_len = a->address_length,
359       .fp_proto = FIB_PROTOCOL_IP4,
360       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[a->address_length],
361     },
362     .sw_if_index = sw_if_index,
363   };
364
365   fib_prefix_t pfx_special = {
366     .fp_proto = FIB_PROTOCOL_IP4,
367   };
368
369   /* If prefix already set on interface, just increment ref count & return */
370   if_prefix = ip_get_interface_prefix (lm, &key);
371   if (if_prefix)
372     {
373       if_prefix->ref_count += 1;
374       return;
375     }
376
377   /* New prefix - allocate a pool entry, initialize it, add to the hash */
378   pool_get (lm->if_prefix_pool, if_prefix);
379   if_prefix->ref_count = 1;
380   if_prefix->src_ia_index = a - lm->if_address_pool;
381   clib_memcpy (&if_prefix->key, &key, sizeof (key));
382   mhash_set (&lm->prefix_to_if_prefix_index, &key,
383              if_prefix - lm->if_prefix_pool, 0 /* old value */);
384
385   pfx_special.fp_len = a->address_length;
386   pfx_special.fp_addr.ip4.as_u32 = address->as_u32;
387
388   /* set the glean route for the prefix */
389   fib_table_entry_update_one_path (fib_index, &pfx_special,
390                                    FIB_SOURCE_INTERFACE,
391                                    (FIB_ENTRY_FLAG_CONNECTED |
392                                     FIB_ENTRY_FLAG_ATTACHED),
393                                    DPO_PROTO_IP4,
394                                    /* No next-hop address */
395                                    NULL,
396                                    sw_if_index,
397                                    /* invalid FIB index */
398                                    ~0,
399                                    1,
400                                    /* no out-label stack */
401                                    NULL,
402                                    FIB_ROUTE_PATH_FLAG_NONE);
403
404   /* length <= 30 - add glean, drop first address, maybe drop bcast address */
405   if (a->address_length <= 30)
406     {
407       /* set a drop route for the base address of the prefix */
408       pfx_special.fp_len = 32;
409       pfx_special.fp_addr.ip4.as_u32 =
410         address->as_u32 & im->fib_masks[a->address_length];
411
412       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
413         fib_table_entry_special_add (fib_index, &pfx_special,
414                                      FIB_SOURCE_INTERFACE,
415                                      (FIB_ENTRY_FLAG_DROP |
416                                       FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
417
418       /* set a route for the broadcast address of the prefix */
419       pfx_special.fp_len = 32;
420       pfx_special.fp_addr.ip4.as_u32 =
421         address->as_u32 | ~im->fib_masks[a->address_length];
422       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
423         ip4_add_subnet_bcast_route (fib_index, &pfx_special, sw_if_index);
424
425
426     }
427   /* length == 31 - add an attached route for the other address */
428   else if (a->address_length == 31)
429     {
430       pfx_special.fp_len = 32;
431       pfx_special.fp_addr.ip4.as_u32 =
432         address->as_u32 ^ clib_host_to_net_u32(1);
433
434       fib_table_entry_update_one_path (fib_index, &pfx_special,
435                                        FIB_SOURCE_INTERFACE,
436                                        (FIB_ENTRY_FLAG_ATTACHED),
437                                        DPO_PROTO_IP4,
438                                        &pfx_special.fp_addr,
439                                        sw_if_index,
440                                        /* invalid FIB index */
441                                        ~0,
442                                        1,
443                                        NULL,
444                                        FIB_ROUTE_PATH_FLAG_NONE);
445     }
446 }
447
448 static void
449 ip4_add_interface_routes (u32 sw_if_index,
450                           ip4_main_t * im, u32 fib_index,
451                           ip_interface_address_t * a)
452 {
453   ip_lookup_main_t *lm = &im->lookup_main;
454   ip4_address_t *address = ip_interface_address_get_address (lm, a);
455   fib_prefix_t pfx = {
456     .fp_len = 32,
457     .fp_proto = FIB_PROTOCOL_IP4,
458     .fp_addr.ip4 = *address,
459   };
460
461   /* set special routes for the prefix if needed */
462   ip4_add_interface_prefix_routes (im, sw_if_index, fib_index, a);
463
464   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
465     {
466       u32 classify_table_index =
467         lm->classify_table_index_by_sw_if_index[sw_if_index];
468       if (classify_table_index != (u32) ~ 0)
469         {
470           dpo_id_t dpo = DPO_INVALID;
471
472           dpo_set (&dpo,
473                    DPO_CLASSIFY,
474                    DPO_PROTO_IP4,
475                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
476
477           fib_table_entry_special_dpo_add (fib_index,
478                                            &pfx,
479                                            FIB_SOURCE_CLASSIFY,
480                                            FIB_ENTRY_FLAG_NONE, &dpo);
481           dpo_reset (&dpo);
482         }
483     }
484
485   fib_table_entry_update_one_path (fib_index, &pfx,
486                                    FIB_SOURCE_INTERFACE,
487                                    (FIB_ENTRY_FLAG_CONNECTED |
488                                     FIB_ENTRY_FLAG_LOCAL),
489                                    DPO_PROTO_IP4,
490                                    &pfx.fp_addr,
491                                    sw_if_index,
492                                    // invalid FIB index
493                                    ~0,
494                                    1, NULL,
495                                    FIB_ROUTE_PATH_FLAG_NONE);
496 }
497
498 static void
499 ip4_del_interface_prefix_routes (ip4_main_t * im,
500                                  u32 sw_if_index,
501                                  u32 fib_index,
502                                  ip4_address_t * address,
503                                  u32 address_length)
504 {
505   ip_lookup_main_t *lm = &im->lookup_main;
506   ip_interface_prefix_t *if_prefix;
507
508   ip_interface_prefix_key_t key = {
509     .prefix = {
510       .fp_len = address_length,
511       .fp_proto = FIB_PROTOCOL_IP4,
512       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[address_length],
513     },
514     .sw_if_index = sw_if_index,
515   };
516
517   fib_prefix_t pfx_special = {
518     .fp_len = 32,
519     .fp_proto = FIB_PROTOCOL_IP4,
520   };
521
522   if_prefix = ip_get_interface_prefix (lm, &key);
523   if (!if_prefix)
524     {
525       clib_warning ("Prefix not found while deleting %U",
526                     format_ip4_address_and_length, address, address_length);
527       return;
528     }
529
530   if_prefix->ref_count -= 1;
531
532   /*
533    * Routes need to be adjusted if deleting last intf addr in prefix
534    *
535    * We're done now otherwise
536    */
537   if (if_prefix->ref_count > 0)
538     return;
539
540   /* length <= 30, delete glean route, first address, last address */
541   if (address_length <= 30)
542     {
543       /* Less work to do in FIB if we remove the covered /32s first */
544
545       /* first address in prefix */
546       pfx_special.fp_addr.ip4.as_u32 =
547         address->as_u32 & im->fib_masks[address_length];
548       pfx_special.fp_len = 32;
549
550       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
551         fib_table_entry_special_remove (fib_index,
552                                         &pfx_special,
553                                         FIB_SOURCE_INTERFACE);
554
555       /* prefix broadcast address */
556       pfx_special.fp_addr.ip4.as_u32 =
557         address->as_u32 | ~im->fib_masks[address_length];
558       pfx_special.fp_len = 32;
559
560       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
561         fib_table_entry_special_remove (fib_index,
562                                         &pfx_special,
563                                         FIB_SOURCE_INTERFACE);
564     }
565   else if (address_length == 31)
566     {
567       /* length == 31, delete attached route for the other address */
568       pfx_special.fp_addr.ip4.as_u32 =
569         address->as_u32 ^ clib_host_to_net_u32(1);
570
571       fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
572     }
573
574   /* remove glean route for prefix */
575   pfx_special.fp_addr.ip4 = *address;
576   pfx_special.fp_len = address_length;
577   fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
578
579   mhash_unset (&lm->prefix_to_if_prefix_index, &key, 0 /* old_value */);
580   pool_put (lm->if_prefix_pool, if_prefix);
581 }
582
583 static void
584 ip4_del_interface_routes (u32 sw_if_index,
585                           ip4_main_t * im,
586                           u32 fib_index,
587                           ip4_address_t * address, u32 address_length)
588 {
589   fib_prefix_t pfx = {
590     .fp_len = 32,
591     .fp_proto = FIB_PROTOCOL_IP4,
592     .fp_addr.ip4 = *address,
593   };
594
595   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
596
597   ip4_del_interface_prefix_routes (im, sw_if_index, fib_index,
598                                    address, address_length);
599 }
600
601 #ifndef CLIB_MARCH_VARIANT
602 void
603 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
604 {
605   ip4_main_t *im = &ip4_main;
606   vnet_main_t *vnm = vnet_get_main ();
607   vnet_hw_interface_t *hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
608
609   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
610
611   /*
612    * enable/disable only on the 1<->0 transition
613    */
614   if (is_enable)
615     {
616       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
617         return;
618     }
619   else
620     {
621       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
622       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
623         return;
624     }
625   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
626                                !is_enable, 0, 0);
627
628
629   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
630                                sw_if_index, !is_enable, 0, 0);
631
632   if (is_enable)
633     hi->l3_if_count++;
634   else if (hi->l3_if_count)
635     hi->l3_if_count--;
636
637   {
638     ip4_enable_disable_interface_callback_t *cb;
639     vec_foreach (cb, im->enable_disable_interface_callbacks)
640       cb->function (im, cb->function_opaque, sw_if_index, is_enable);
641   }
642 }
643
644 static clib_error_t *
645 ip4_add_del_interface_address_internal (vlib_main_t * vm,
646                                         u32 sw_if_index,
647                                         ip4_address_t * address,
648                                         u32 address_length, u32 is_del)
649 {
650   vnet_main_t *vnm = vnet_get_main ();
651   ip4_main_t *im = &ip4_main;
652   ip_lookup_main_t *lm = &im->lookup_main;
653   clib_error_t *error = 0;
654   u32 if_address_index;
655   ip4_address_fib_t ip4_af, *addr_fib = 0;
656
657   error = vnet_sw_interface_supports_addressing (vnm, sw_if_index);
658   if (error)
659     {
660       vnm->api_errno = VNET_API_ERROR_UNSUPPORTED;
661       return error;
662     }
663
664   ip4_addr_fib_init (&ip4_af, address,
665                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
666   vec_add1 (addr_fib, ip4_af);
667
668   /*
669    * there is no support for adj-fib handling in the presence of overlapping
670    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
671    * most routers do.
672    */
673   /* *INDENT-OFF* */
674   if (!is_del)
675     {
676       /* When adding an address check that it does not conflict
677          with an existing address on any interface in this table. */
678       ip_interface_address_t *ia;
679       vnet_sw_interface_t *sif;
680
681       pool_foreach (sif, vnm->interface_main.sw_interfaces)
682        {
683           if (im->fib_index_by_sw_if_index[sw_if_index] ==
684               im->fib_index_by_sw_if_index[sif->sw_if_index])
685             {
686               foreach_ip_interface_address
687                 (&im->lookup_main, ia, sif->sw_if_index,
688                  0 /* honor unnumbered */ ,
689                  ({
690                    ip4_address_t * x =
691                      ip_interface_address_get_address
692                      (&im->lookup_main, ia);
693
694                    if (ip4_destination_matches_route
695                        (im, address, x, ia->address_length) ||
696                        ip4_destination_matches_route (im,
697                                                       x,
698                                                       address,
699                                                       address_length))
700                      {
701                        /* an intf may have >1 addr from the same prefix */
702                        if ((sw_if_index == sif->sw_if_index) &&
703                            (ia->address_length == address_length) &&
704                            (x->as_u32 != address->as_u32))
705                          continue;
706
707                        if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE)
708                          /* if the address we're comparing against is stale
709                           * then the CP has not added this one back yet, maybe
710                           * it never will, so we have to assume it won't and
711                           * ignore it. if it does add it back, then it will fail
712                           * because this one is now present */
713                          continue;
714
715                        /* error if the length or intf was different */
716                        vnm->api_errno = VNET_API_ERROR_ADDRESS_IN_USE;
717
718                        error = clib_error_create
719                          ("failed to add %U on %U which conflicts with %U for interface %U",
720                           format_ip4_address_and_length, address,
721                           address_length,
722                           format_vnet_sw_if_index_name, vnm,
723                           sw_if_index,
724                           format_ip4_address_and_length, x,
725                           ia->address_length,
726                           format_vnet_sw_if_index_name, vnm,
727                           sif->sw_if_index);
728                        goto done;
729                      }
730                  }));
731             }
732       }
733     }
734   /* *INDENT-ON* */
735
736   if_address_index = ip_interface_address_find (lm, addr_fib, address_length);
737
738   if (is_del)
739     {
740       if (~0 == if_address_index)
741         {
742           vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_FOUND_FOR_INTERFACE;
743           error = clib_error_create ("%U not found for interface %U",
744                                      lm->format_address_and_length,
745                                      addr_fib, address_length,
746                                      format_vnet_sw_if_index_name, vnm,
747                                      sw_if_index);
748           goto done;
749         }
750
751       error = ip_interface_address_del (lm, vnm, if_address_index, addr_fib,
752                                         address_length, sw_if_index);
753       if (error)
754         goto done;
755     }
756   else
757     {
758       if (~0 != if_address_index)
759         {
760           ip_interface_address_t *ia;
761
762           ia = pool_elt_at_index (lm->if_address_pool, if_address_index);
763
764           if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE)
765             {
766               if (ia->sw_if_index == sw_if_index)
767                 {
768                   /* re-adding an address during the replace action.
769                    * consdier this the update. clear the flag and
770                    * we're done */
771                   ia->flags &= ~IP_INTERFACE_ADDRESS_FLAG_STALE;
772                   goto done;
773                 }
774               else
775                 {
776                   /* The prefix is moving from one interface to another.
777                    * delete the stale and add the new */
778                   ip4_add_del_interface_address_internal (vm,
779                                                           ia->sw_if_index,
780                                                           address,
781                                                           address_length, 1);
782                   ia = NULL;
783                   error = ip_interface_address_add (lm, sw_if_index,
784                                                     addr_fib, address_length,
785                                                     &if_address_index);
786                 }
787             }
788           else
789             {
790               vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
791               error = clib_error_create
792                 ("Prefix %U already found on interface %U",
793                  lm->format_address_and_length, addr_fib, address_length,
794                  format_vnet_sw_if_index_name, vnm, ia->sw_if_index);
795             }
796         }
797       else
798         error = ip_interface_address_add (lm, sw_if_index,
799                                           addr_fib, address_length,
800                                           &if_address_index);
801     }
802
803   if (error)
804     goto done;
805
806   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
807   ip4_mfib_interface_enable_disable (sw_if_index, !is_del);
808
809   /* intf addr routes are added/deleted on admin up/down */
810   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index))
811     {
812       if (is_del)
813         ip4_del_interface_routes (sw_if_index,
814                                   im, ip4_af.fib_index, address,
815                                   address_length);
816       else
817         ip4_add_interface_routes (sw_if_index,
818                                   im, ip4_af.fib_index,
819                                   pool_elt_at_index
820                                   (lm->if_address_pool, if_address_index));
821     }
822
823   ip4_add_del_interface_address_callback_t *cb;
824   vec_foreach (cb, im->add_del_interface_address_callbacks)
825     cb->function (im, cb->function_opaque, sw_if_index,
826                   address, address_length, if_address_index, is_del);
827
828 done:
829   vec_free (addr_fib);
830   return error;
831 }
832
833 clib_error_t *
834 ip4_add_del_interface_address (vlib_main_t * vm,
835                                u32 sw_if_index,
836                                ip4_address_t * address,
837                                u32 address_length, u32 is_del)
838 {
839   return ip4_add_del_interface_address_internal
840     (vm, sw_if_index, address, address_length, is_del);
841 }
842
843 void
844 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
845 {
846   ip_interface_address_t *ia;
847   ip4_main_t *im;
848
849   im = &ip4_main;
850
851   /*
852    * when directed broadcast is enabled, the subnet braodcast route will forward
853    * packets using an adjacency with a broadcast MAC. otherwise it drops
854    */
855   /* *INDENT-OFF* */
856   foreach_ip_interface_address(&im->lookup_main, ia,
857                                sw_if_index, 0,
858      ({
859        if (ia->address_length <= 30)
860          {
861            ip4_address_t *ipa;
862
863            ipa = ip_interface_address_get_address (&im->lookup_main, ia);
864
865            fib_prefix_t pfx = {
866              .fp_len = 32,
867              .fp_proto = FIB_PROTOCOL_IP4,
868              .fp_addr = {
869                .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
870              },
871            };
872
873            ip4_add_subnet_bcast_route
874              (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
875                                                   sw_if_index),
876               &pfx, sw_if_index);
877          }
878      }));
879   /* *INDENT-ON* */
880 }
881 #endif
882
883 static clib_error_t *
884 ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
885 {
886   ip4_main_t *im = &ip4_main;
887   ip_interface_address_t *ia;
888   ip4_address_t *a;
889   u32 is_admin_up, fib_index;
890
891   vec_validate_init_empty (im->
892                            lookup_main.if_address_pool_index_by_sw_if_index,
893                            sw_if_index, ~0);
894
895   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
896
897   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
898
899   /* *INDENT-OFF* */
900   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
901                                 0 /* honor unnumbered */,
902   ({
903     a = ip_interface_address_get_address (&im->lookup_main, ia);
904     if (is_admin_up)
905       ip4_add_interface_routes (sw_if_index,
906                                 im, fib_index,
907                                 ia);
908     else
909       ip4_del_interface_routes (sw_if_index,
910                                 im, fib_index,
911                                 a, ia->address_length);
912   }));
913   /* *INDENT-ON* */
914
915   return 0;
916 }
917
918 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
919
920 /* Built-in ip4 unicast rx feature path definition */
921 /* *INDENT-OFF* */
922 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
923 {
924   .arc_name = "ip4-unicast",
925   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
926   .last_in_arc = "ip4-lookup",
927   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
928 };
929
930 VNET_FEATURE_INIT (ip4_flow_classify, static) =
931 {
932   .arc_name = "ip4-unicast",
933   .node_name = "ip4-flow-classify",
934   .runs_before = VNET_FEATURES ("ip4-inacl"),
935 };
936
937 VNET_FEATURE_INIT (ip4_inacl, static) =
938 {
939   .arc_name = "ip4-unicast",
940   .node_name = "ip4-inacl",
941   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
942 };
943
944 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
945 {
946   .arc_name = "ip4-unicast",
947   .node_name = "ip4-source-and-port-range-check-rx",
948   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
949 };
950
951 VNET_FEATURE_INIT (ip4_policer_classify, static) =
952 {
953   .arc_name = "ip4-unicast",
954   .node_name = "ip4-policer-classify",
955   .runs_before = VNET_FEATURES ("ipsec4-input-feature"),
956 };
957
958 VNET_FEATURE_INIT (ip4_ipsec, static) =
959 {
960   .arc_name = "ip4-unicast",
961   .node_name = "ipsec4-input-feature",
962   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
963 };
964
965 VNET_FEATURE_INIT (ip4_vpath, static) =
966 {
967   .arc_name = "ip4-unicast",
968   .node_name = "vpath-input-ip4",
969   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
970 };
971
972 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
973 {
974   .arc_name = "ip4-unicast",
975   .node_name = "ip4-vxlan-bypass",
976   .runs_before = VNET_FEATURES ("ip4-lookup"),
977 };
978
979 VNET_FEATURE_INIT (ip4_not_enabled, static) =
980 {
981   .arc_name = "ip4-unicast",
982   .node_name = "ip4-not-enabled",
983   .runs_before = VNET_FEATURES ("ip4-lookup"),
984 };
985
986 VNET_FEATURE_INIT (ip4_lookup, static) =
987 {
988   .arc_name = "ip4-unicast",
989   .node_name = "ip4-lookup",
990   .runs_before = 0,     /* not before any other features */
991 };
992
993 /* Built-in ip4 multicast rx feature path definition */
994 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
995 {
996   .arc_name = "ip4-multicast",
997   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
998   .last_in_arc = "ip4-mfib-forward-lookup",
999   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
1000 };
1001
1002 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
1003 {
1004   .arc_name = "ip4-multicast",
1005   .node_name = "vpath-input-ip4",
1006   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1007 };
1008
1009 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
1010 {
1011   .arc_name = "ip4-multicast",
1012   .node_name = "ip4-not-enabled",
1013   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1014 };
1015
1016 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
1017 {
1018   .arc_name = "ip4-multicast",
1019   .node_name = "ip4-mfib-forward-lookup",
1020   .runs_before = 0,     /* last feature */
1021 };
1022
1023 /* Source and port-range check ip4 tx feature path definition */
1024 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1025 {
1026   .arc_name = "ip4-output",
1027   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
1028   .last_in_arc = "interface-output",
1029   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1030 };
1031
1032 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1033 {
1034   .arc_name = "ip4-output",
1035   .node_name = "ip4-source-and-port-range-check-tx",
1036   .runs_before = VNET_FEATURES ("ip4-outacl"),
1037 };
1038
1039 VNET_FEATURE_INIT (ip4_outacl, static) =
1040 {
1041   .arc_name = "ip4-output",
1042   .node_name = "ip4-outacl",
1043   .runs_before = VNET_FEATURES ("ipsec4-output-feature"),
1044 };
1045
1046 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1047 {
1048   .arc_name = "ip4-output",
1049   .node_name = "ipsec4-output-feature",
1050   .runs_before = VNET_FEATURES ("interface-output"),
1051 };
1052
1053 /* Built-in ip4 tx feature path definition */
1054 VNET_FEATURE_INIT (ip4_interface_output, static) =
1055 {
1056   .arc_name = "ip4-output",
1057   .node_name = "interface-output",
1058   .runs_before = 0,     /* not before any other features */
1059 };
1060 /* *INDENT-ON* */
1061
1062 static clib_error_t *
1063 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1064 {
1065   ip4_main_t *im = &ip4_main;
1066
1067   vec_validate_init_empty (im->fib_index_by_sw_if_index, sw_if_index, ~0);
1068   vec_validate_init_empty (im->mfib_index_by_sw_if_index, sw_if_index, ~0);
1069
1070   if (is_add)
1071     {
1072       /* Fill in lookup tables with default table (0). */
1073       im->fib_index_by_sw_if_index[sw_if_index] = 0;
1074       im->mfib_index_by_sw_if_index[sw_if_index] = 0;
1075     }
1076   else
1077     {
1078       ip4_main_t *im4 = &ip4_main;
1079       ip_lookup_main_t *lm4 = &im4->lookup_main;
1080       ip_interface_address_t *ia = 0;
1081       ip4_address_t *address;
1082       vlib_main_t *vm = vlib_get_main ();
1083
1084       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
1085       /* *INDENT-OFF* */
1086       foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
1087       ({
1088         address = ip_interface_address_get_address (lm4, ia);
1089         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
1090       }));
1091       /* *INDENT-ON* */
1092       ip4_mfib_interface_enable_disable (sw_if_index, 0);
1093
1094       if (0 != im4->fib_index_by_sw_if_index[sw_if_index])
1095         fib_table_bind (FIB_PROTOCOL_IP4, sw_if_index, 0);
1096       if (0 != im4->mfib_index_by_sw_if_index[sw_if_index])
1097         mfib_table_bind (FIB_PROTOCOL_IP4, sw_if_index, 0);
1098
1099       /* Erase the lookup tables just in case */
1100       im4->fib_index_by_sw_if_index[sw_if_index] = ~0;
1101       im4->mfib_index_by_sw_if_index[sw_if_index] = ~0;
1102     }
1103
1104   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
1105                                is_add, 0, 0);
1106
1107   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
1108                                sw_if_index, is_add, 0, 0);
1109
1110   return /* no error */ 0;
1111 }
1112
1113 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1114
1115 /* Global IP4 main. */
1116 #ifndef CLIB_MARCH_VARIANT
1117 ip4_main_t ip4_main;
1118 #endif /* CLIB_MARCH_VARIANT */
1119
1120 static clib_error_t *
1121 ip4_lookup_init (vlib_main_t * vm)
1122 {
1123   ip4_main_t *im = &ip4_main;
1124   clib_error_t *error;
1125   uword i;
1126
1127   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1128     return error;
1129   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
1130     return (error);
1131   if ((error = vlib_call_init_function (vm, fib_module_init)))
1132     return error;
1133   if ((error = vlib_call_init_function (vm, mfib_module_init)))
1134     return error;
1135
1136   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1137     {
1138       u32 m;
1139
1140       if (i < 32)
1141         m = pow2_mask (i) << (32 - i);
1142       else
1143         m = ~0;
1144       im->fib_masks[i] = clib_host_to_net_u32 (m);
1145     }
1146
1147   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1148
1149   /* Create FIB with index 0 and table id of 0. */
1150   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1151                                      FIB_SOURCE_DEFAULT_ROUTE);
1152   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1153                                       MFIB_SOURCE_DEFAULT_ROUTE);
1154
1155   {
1156     pg_node_t *pn;
1157     pn = pg_get_node (ip4_lookup_node.index);
1158     pn->unformat_edit = unformat_pg_ip4_header;
1159   }
1160
1161   {
1162     ethernet_arp_header_t h;
1163
1164     clib_memset (&h, 0, sizeof (h));
1165
1166 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1167 #define _8(f,v) h.f = v;
1168     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1169     _16 (l3_type, ETHERNET_TYPE_IP4);
1170     _8 (n_l2_address_bytes, 6);
1171     _8 (n_l3_address_bytes, 4);
1172     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1173 #undef _16
1174 #undef _8
1175
1176     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1177                                /* data */ &h,
1178                                sizeof (h),
1179                                /* alloc chunk size */ 8,
1180                                "ip4 arp");
1181   }
1182
1183   return error;
1184 }
1185
1186 VLIB_INIT_FUNCTION (ip4_lookup_init);
1187
1188 typedef struct
1189 {
1190   /* Adjacency taken. */
1191   u32 dpo_index;
1192   u32 flow_hash;
1193   u32 fib_index;
1194
1195   /* Packet data, possibly *after* rewrite. */
1196   u8 packet_data[64 - 1 * sizeof (u32)];
1197 }
1198 ip4_forward_next_trace_t;
1199
1200 #ifndef CLIB_MARCH_VARIANT
1201 u8 *
1202 format_ip4_forward_next_trace (u8 * s, va_list * args)
1203 {
1204   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1205   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1206   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1207   u32 indent = format_get_indent (s);
1208   s = format (s, "%U%U",
1209               format_white_space, indent,
1210               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1211   return s;
1212 }
1213 #endif
1214
1215 static u8 *
1216 format_ip4_lookup_trace (u8 * s, va_list * args)
1217 {
1218   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1219   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1220   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1221   u32 indent = format_get_indent (s);
1222
1223   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1224               t->fib_index, t->dpo_index, t->flow_hash);
1225   s = format (s, "\n%U%U",
1226               format_white_space, indent,
1227               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1228   return s;
1229 }
1230
1231 static u8 *
1232 format_ip4_rewrite_trace (u8 * s, va_list * args)
1233 {
1234   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1235   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1236   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1237   u32 indent = format_get_indent (s);
1238
1239   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1240               t->fib_index, t->dpo_index, format_ip_adjacency,
1241               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1242   s = format (s, "\n%U%U",
1243               format_white_space, indent,
1244               format_ip_adjacency_packet_data,
1245               t->packet_data, sizeof (t->packet_data));
1246   return s;
1247 }
1248
1249 #ifndef CLIB_MARCH_VARIANT
1250 /* Common trace function for all ip4-forward next nodes. */
1251 void
1252 ip4_forward_next_trace (vlib_main_t * vm,
1253                         vlib_node_runtime_t * node,
1254                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1255 {
1256   u32 *from, n_left;
1257   ip4_main_t *im = &ip4_main;
1258
1259   n_left = frame->n_vectors;
1260   from = vlib_frame_vector_args (frame);
1261
1262   while (n_left >= 4)
1263     {
1264       u32 bi0, bi1;
1265       vlib_buffer_t *b0, *b1;
1266       ip4_forward_next_trace_t *t0, *t1;
1267
1268       /* Prefetch next iteration. */
1269       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1270       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1271
1272       bi0 = from[0];
1273       bi1 = from[1];
1274
1275       b0 = vlib_get_buffer (vm, bi0);
1276       b1 = vlib_get_buffer (vm, bi1);
1277
1278       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1279         {
1280           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1281           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1282           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1283           t0->fib_index =
1284             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1285              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1286             vec_elt (im->fib_index_by_sw_if_index,
1287                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1288
1289           clib_memcpy_fast (t0->packet_data,
1290                             vlib_buffer_get_current (b0),
1291                             sizeof (t0->packet_data));
1292         }
1293       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1294         {
1295           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1296           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1297           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1298           t1->fib_index =
1299             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1300              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1301             vec_elt (im->fib_index_by_sw_if_index,
1302                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1303           clib_memcpy_fast (t1->packet_data, vlib_buffer_get_current (b1),
1304                             sizeof (t1->packet_data));
1305         }
1306       from += 2;
1307       n_left -= 2;
1308     }
1309
1310   while (n_left >= 1)
1311     {
1312       u32 bi0;
1313       vlib_buffer_t *b0;
1314       ip4_forward_next_trace_t *t0;
1315
1316       bi0 = from[0];
1317
1318       b0 = vlib_get_buffer (vm, bi0);
1319
1320       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1321         {
1322           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1323           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1324           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1325           t0->fib_index =
1326             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1327              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1328             vec_elt (im->fib_index_by_sw_if_index,
1329                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1330           clib_memcpy_fast (t0->packet_data, vlib_buffer_get_current (b0),
1331                             sizeof (t0->packet_data));
1332         }
1333       from += 1;
1334       n_left -= 1;
1335     }
1336 }
1337
1338 /* Compute TCP/UDP/ICMP4 checksum in software. */
1339 u16
1340 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1341                               ip4_header_t * ip0)
1342 {
1343   ip_csum_t sum0;
1344   u32 ip_header_length, payload_length_host_byte_order;
1345
1346   /* Initialize checksum with ip header. */
1347   ip_header_length = ip4_header_bytes (ip0);
1348   payload_length_host_byte_order =
1349     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1350   sum0 =
1351     clib_host_to_net_u32 (payload_length_host_byte_order +
1352                           (ip0->protocol << 16));
1353
1354   if (BITS (uword) == 32)
1355     {
1356       sum0 =
1357         ip_csum_with_carry (sum0,
1358                             clib_mem_unaligned (&ip0->src_address, u32));
1359       sum0 =
1360         ip_csum_with_carry (sum0,
1361                             clib_mem_unaligned (&ip0->dst_address, u32));
1362     }
1363   else
1364     sum0 =
1365       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1366
1367   return ip_calculate_l4_checksum (vm, p0, sum0,
1368                                    payload_length_host_byte_order, (u8 *) ip0,
1369                                    ip_header_length, NULL);
1370 }
1371
1372 u32
1373 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1374 {
1375   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1376   udp_header_t *udp0;
1377   u16 sum16;
1378
1379   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1380           || ip0->protocol == IP_PROTOCOL_UDP);
1381
1382   udp0 = (void *) (ip0 + 1);
1383   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1384     {
1385       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1386                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1387       return p0->flags;
1388     }
1389
1390   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1391
1392   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1393                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1394
1395   return p0->flags;
1396 }
1397 #endif
1398
1399 /* *INDENT-OFF* */
1400 VNET_FEATURE_ARC_INIT (ip4_local) = {
1401   .arc_name = "ip4-local",
1402   .start_nodes = VNET_FEATURES ("ip4-local", "ip4-receive"),
1403   .last_in_arc = "ip4-local-end-of-arc",
1404 };
1405 /* *INDENT-ON* */
1406
1407 static inline void
1408 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1409                             ip4_header_t * ip, u8 is_udp, u8 * error,
1410                             u8 * good_tcp_udp)
1411 {
1412   u32 flags0;
1413   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1414   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1415   if (is_udp)
1416     {
1417       udp_header_t *udp;
1418       u32 ip_len, udp_len;
1419       i32 len_diff;
1420       udp = ip4_next_header (ip);
1421       /* Verify UDP length. */
1422       ip_len = clib_net_to_host_u16 (ip->length);
1423       udp_len = clib_net_to_host_u16 (udp->length);
1424
1425       len_diff = ip_len - udp_len;
1426       *good_tcp_udp &= len_diff >= 0;
1427       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1428     }
1429 }
1430
1431 #define ip4_local_csum_is_offloaded(_b)                                       \
1432   ((_b->flags & VNET_BUFFER_F_OFFLOAD) &&                                     \
1433    (vnet_buffer (_b)->oflags &                                                \
1434     (VNET_BUFFER_OFFLOAD_F_TCP_CKSUM | VNET_BUFFER_OFFLOAD_F_UDP_CKSUM)))
1435
1436 #define ip4_local_need_csum_check(is_tcp_udp, _b)                       \
1437     (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED     \
1438         || ip4_local_csum_is_offloaded (_b)))
1439
1440 #define ip4_local_csum_is_valid(_b)                                     \
1441     (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT                      \
1442         || (ip4_local_csum_is_offloaded (_b))) != 0
1443
1444 static inline void
1445 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1446                          ip4_header_t * ih, u8 * error)
1447 {
1448   u8 is_udp, is_tcp_udp, good_tcp_udp;
1449
1450   is_udp = ih->protocol == IP_PROTOCOL_UDP;
1451   is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1452
1453   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1454     ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1455   else
1456     good_tcp_udp = ip4_local_csum_is_valid (b);
1457
1458   ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1459   *error = (is_tcp_udp && !good_tcp_udp
1460             ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1461 }
1462
1463 static inline void
1464 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1465                             ip4_header_t ** ih, u8 * error)
1466 {
1467   u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1468
1469   is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1470   is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1471
1472   is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1473   is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1474
1475   good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1476   good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1477
1478   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1479                      || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1480     {
1481       if (is_tcp_udp[0])
1482         ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1483                                     &good_tcp_udp[0]);
1484       if (is_tcp_udp[1])
1485         ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1486                                     &good_tcp_udp[1]);
1487     }
1488
1489   error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1490               IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1491   error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1492               IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1493 }
1494
1495 static inline void
1496 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1497                               vlib_buffer_t * b, u16 * next, u8 error,
1498                               u8 head_of_feature_arc)
1499 {
1500   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1501   u32 next_index;
1502
1503   *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1504   b->error = error ? error_node->errors[error] : 0;
1505   if (head_of_feature_arc)
1506     {
1507       next_index = *next;
1508       if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1509         {
1510           vnet_feature_arc_start (arc_index,
1511                                   vnet_buffer (b)->sw_if_index[VLIB_RX],
1512                                   &next_index, b);
1513           *next = next_index;
1514         }
1515     }
1516 }
1517
1518 typedef struct
1519 {
1520   ip4_address_t src;
1521   u32 lbi;
1522   u8 error;
1523   u8 first;
1524 } ip4_local_last_check_t;
1525
1526 static inline void
1527 ip4_local_check_src (vlib_buffer_t *b, ip4_header_t *ip0,
1528                      ip4_local_last_check_t *last_check, u8 *error0,
1529                      int is_receive_dpo)
1530 {
1531   const dpo_id_t *dpo0;
1532   load_balance_t *lb0;
1533   u32 lbi0;
1534
1535   vnet_buffer (b)->ip.fib_index =
1536     vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1537     vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1538
1539   if (is_receive_dpo)
1540     {
1541       receive_dpo_t *rd;
1542       rd = receive_dpo_get (vnet_buffer (b)->ip.adj_index[VLIB_TX]);
1543       vnet_buffer (b)->ip.rx_sw_if_index = rd->rd_sw_if_index;
1544     }
1545   else
1546     vnet_buffer (b)->ip.rx_sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
1547
1548   /*
1549    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1550    *  adjacency for the destination address (the local interface address).
1551    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1552    *  adjacency for the source address (the remote sender's address)
1553    */
1554   if (PREDICT_TRUE (last_check->src.as_u32 != ip0->src_address.as_u32) ||
1555       last_check->first)
1556     {
1557       lbi0 = ip4_fib_forwarding_lookup (vnet_buffer (b)->ip.fib_index,
1558                                         &ip0->src_address);
1559
1560       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1561         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1562       vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1563
1564       lb0 = load_balance_get (lbi0);
1565       dpo0 = load_balance_get_bucket_i (lb0, 0);
1566
1567       /*
1568        * Must have a route to source otherwise we drop the packet.
1569        * ip4 broadcasts are accepted, e.g. to make dhcp client work
1570        *
1571        * The checks are:
1572        *  - the source is a recieve => it's from us => bogus, do this
1573        *    first since it sets a different error code.
1574        *  - uRPF check for any route to source - accept if passes.
1575        *  - allow packets destined to the broadcast address from unknown sources
1576        */
1577
1578       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1579                   && dpo0->dpoi_type == DPO_RECEIVE) ?
1580                  IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1581       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1582                   && !fib_urpf_check_size (lb0->lb_urpf)
1583                   && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1584                  IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1585
1586       last_check->src.as_u32 = ip0->src_address.as_u32;
1587       last_check->lbi = lbi0;
1588       last_check->error = *error0;
1589       last_check->first = 0;
1590     }
1591   else
1592     {
1593       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1594         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1595       vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1596       *error0 = last_check->error;
1597     }
1598 }
1599
1600 static inline void
1601 ip4_local_check_src_x2 (vlib_buffer_t **b, ip4_header_t **ip,
1602                         ip4_local_last_check_t *last_check, u8 *error,
1603                         int is_receive_dpo)
1604 {
1605   const dpo_id_t *dpo[2];
1606   load_balance_t *lb[2];
1607   u32 not_last_hit;
1608   u32 lbi[2];
1609
1610   not_last_hit = last_check->first;
1611   not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1612   not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1613
1614   vnet_buffer (b[0])->ip.fib_index =
1615     vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1616     vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1617     vnet_buffer (b[0])->ip.fib_index;
1618
1619   vnet_buffer (b[1])->ip.fib_index =
1620     vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1621     vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1622     vnet_buffer (b[1])->ip.fib_index;
1623
1624   if (is_receive_dpo)
1625     {
1626       const receive_dpo_t *rd0, *rd1;
1627       rd0 = receive_dpo_get (vnet_buffer (b[0])->ip.adj_index[VLIB_TX]);
1628       rd1 = receive_dpo_get (vnet_buffer (b[1])->ip.adj_index[VLIB_TX]);
1629       vnet_buffer (b[0])->ip.rx_sw_if_index = rd0->rd_sw_if_index;
1630       vnet_buffer (b[1])->ip.rx_sw_if_index = rd1->rd_sw_if_index;
1631     }
1632   else
1633     {
1634       vnet_buffer (b[0])->ip.rx_sw_if_index =
1635         vnet_buffer (b[0])->sw_if_index[VLIB_RX];
1636       vnet_buffer (b[1])->ip.rx_sw_if_index =
1637         vnet_buffer (b[1])->sw_if_index[VLIB_RX];
1638     }
1639
1640   /*
1641    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1642    *  adjacency for the destination address (the local interface address).
1643    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1644    *  adjacency for the source address (the remote sender's address)
1645    */
1646   if (PREDICT_TRUE (not_last_hit))
1647     {
1648       ip4_fib_forwarding_lookup_x2 (
1649         vnet_buffer (b[0])->ip.fib_index, vnet_buffer (b[1])->ip.fib_index,
1650         &ip[0]->src_address, &ip[1]->src_address, &lbi[0], &lbi[1]);
1651
1652       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1653         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1654       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1655
1656       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1657         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1658       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1659
1660       lb[0] = load_balance_get (lbi[0]);
1661       lb[1] = load_balance_get (lbi[1]);
1662
1663       dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1664       dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1665
1666       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1667                    dpo[0]->dpoi_type == DPO_RECEIVE) ?
1668                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1669       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1670                    !fib_urpf_check_size (lb[0]->lb_urpf) &&
1671                    ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1672                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1673
1674       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1675                    dpo[1]->dpoi_type == DPO_RECEIVE) ?
1676                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1677       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1678                    !fib_urpf_check_size (lb[1]->lb_urpf) &&
1679                    ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1680                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1681
1682       last_check->src.as_u32 = ip[1]->src_address.as_u32;
1683       last_check->lbi = lbi[1];
1684       last_check->error = error[1];
1685       last_check->first = 0;
1686     }
1687   else
1688     {
1689       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1690         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1691       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1692
1693       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1694         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1695       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1696
1697       error[0] = last_check->error;
1698       error[1] = last_check->error;
1699     }
1700 }
1701
1702 enum ip_local_packet_type_e
1703 {
1704   IP_LOCAL_PACKET_TYPE_L4,
1705   IP_LOCAL_PACKET_TYPE_NAT,
1706   IP_LOCAL_PACKET_TYPE_FRAG,
1707 };
1708
1709 /**
1710  * Determine packet type and next node.
1711  *
1712  * The expectation is that all packets that are not L4 will skip
1713  * checksums and source checks.
1714  */
1715 always_inline u8
1716 ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
1717 {
1718   ip_lookup_main_t *lm = &ip4_main.lookup_main;
1719
1720   if (PREDICT_FALSE (ip4_is_fragment (ip)))
1721     {
1722       *next = IP_LOCAL_NEXT_REASSEMBLY;
1723       return IP_LOCAL_PACKET_TYPE_FRAG;
1724     }
1725   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_IS_NATED))
1726     {
1727       *next = lm->local_next_by_ip_protocol[ip->protocol];
1728       return IP_LOCAL_PACKET_TYPE_NAT;
1729     }
1730
1731   *next = lm->local_next_by_ip_protocol[ip->protocol];
1732   return IP_LOCAL_PACKET_TYPE_L4;
1733 }
1734
1735 static inline uword
1736 ip4_local_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
1737                   vlib_frame_t *frame, int head_of_feature_arc,
1738                   int is_receive_dpo)
1739 {
1740   u32 *from, n_left_from;
1741   vlib_node_runtime_t *error_node =
1742     vlib_node_get_runtime (vm, ip4_local_node.index);
1743   u16 nexts[VLIB_FRAME_SIZE], *next;
1744   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1745   ip4_header_t *ip[2];
1746   u8 error[2], pt[2];
1747
1748   ip4_local_last_check_t last_check = {
1749     /*
1750      * 0.0.0.0 can appear as the source address of an IP packet,
1751      * as can any other address, hence the need to use the 'first'
1752      * member to make sure the .lbi is initialised for the first
1753      * packet.
1754      */
1755     .src = {.as_u32 = 0},
1756     .lbi = ~0,
1757     .error = IP4_ERROR_UNKNOWN_PROTOCOL,
1758     .first = 1,
1759   };
1760
1761   from = vlib_frame_vector_args (frame);
1762   n_left_from = frame->n_vectors;
1763
1764   if (node->flags & VLIB_NODE_FLAG_TRACE)
1765     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1766
1767   vlib_get_buffers (vm, from, bufs, n_left_from);
1768   b = bufs;
1769   next = nexts;
1770
1771   while (n_left_from >= 6)
1772     {
1773       u8 not_batch = 0;
1774
1775       /* Prefetch next iteration. */
1776       {
1777         vlib_prefetch_buffer_header (b[4], LOAD);
1778         vlib_prefetch_buffer_header (b[5], LOAD);
1779
1780         clib_prefetch_load (b[4]->data);
1781         clib_prefetch_load (b[5]->data);
1782       }
1783
1784       error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1785
1786       ip[0] = vlib_buffer_get_current (b[0]);
1787       ip[1] = vlib_buffer_get_current (b[1]);
1788
1789       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1790       vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1791
1792       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1793       pt[1] = ip4_local_classify (b[1], ip[1], &next[1]);
1794
1795       not_batch = pt[0] ^ pt[1];
1796
1797       if (head_of_feature_arc == 0 || (pt[0] && not_batch == 0))
1798         goto skip_checks;
1799
1800       if (PREDICT_TRUE (not_batch == 0))
1801         {
1802           ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1803           ip4_local_check_src_x2 (b, ip, &last_check, error, is_receive_dpo);
1804         }
1805       else
1806         {
1807           if (!pt[0])
1808             {
1809               ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1810               ip4_local_check_src (b[0], ip[0], &last_check, &error[0],
1811                                    is_receive_dpo);
1812             }
1813           if (!pt[1])
1814             {
1815               ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1816               ip4_local_check_src (b[1], ip[1], &last_check, &error[1],
1817                                    is_receive_dpo);
1818             }
1819         }
1820
1821     skip_checks:
1822
1823       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1824                                     head_of_feature_arc);
1825       ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1826                                     head_of_feature_arc);
1827
1828       b += 2;
1829       next += 2;
1830       n_left_from -= 2;
1831     }
1832
1833   while (n_left_from > 0)
1834     {
1835       error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1836
1837       ip[0] = vlib_buffer_get_current (b[0]);
1838       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1839       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1840
1841       if (head_of_feature_arc == 0 || pt[0])
1842         goto skip_check;
1843
1844       ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1845       ip4_local_check_src (b[0], ip[0], &last_check, &error[0],
1846                            is_receive_dpo);
1847
1848     skip_check:
1849
1850       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1851                                     head_of_feature_arc);
1852
1853       b += 1;
1854       next += 1;
1855       n_left_from -= 1;
1856     }
1857
1858   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1859   return frame->n_vectors;
1860 }
1861
1862 VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1863                                vlib_frame_t * frame)
1864 {
1865   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */,
1866                            0 /* is_receive_dpo */);
1867 }
1868
1869 VLIB_REGISTER_NODE (ip4_local_node) =
1870 {
1871   .name = "ip4-local",
1872   .vector_size = sizeof (u32),
1873   .format_trace = format_ip4_forward_next_trace,
1874   .n_errors = IP4_N_ERROR,
1875   .error_strings = ip4_error_strings,
1876   .n_next_nodes = IP_LOCAL_N_NEXT,
1877   .next_nodes =
1878   {
1879     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1880     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1881     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1882     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1883     [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-full-reassembly",
1884   },
1885 };
1886
1887 VLIB_NODE_FN (ip4_receive_local_node)
1888 (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
1889 {
1890   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */,
1891                            1 /* is_receive_dpo */);
1892 }
1893
1894 VLIB_REGISTER_NODE (ip4_receive_local_node) = {
1895   .name = "ip4-receive",
1896   .vector_size = sizeof (u32),
1897   .format_trace = format_ip4_forward_next_trace,
1898   .sibling_of = "ip4-local"
1899 };
1900
1901 VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
1902                                           vlib_node_runtime_t * node,
1903                                           vlib_frame_t * frame)
1904 {
1905   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */,
1906                            0 /* is_receive_dpo */);
1907 }
1908
1909 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
1910   .name = "ip4-local-end-of-arc",
1911   .vector_size = sizeof (u32),
1912
1913   .format_trace = format_ip4_forward_next_trace,
1914   .sibling_of = "ip4-local",
1915 };
1916
1917 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1918   .arc_name = "ip4-local",
1919   .node_name = "ip4-local-end-of-arc",
1920   .runs_before = 0, /* not before any other features */
1921 };
1922
1923 #ifndef CLIB_MARCH_VARIANT
1924 void
1925 ip4_register_protocol (u32 protocol, u32 node_index)
1926 {
1927   vlib_main_t *vm = vlib_get_main ();
1928   ip4_main_t *im = &ip4_main;
1929   ip_lookup_main_t *lm = &im->lookup_main;
1930
1931   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1932   lm->local_next_by_ip_protocol[protocol] =
1933     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1934 }
1935
1936 void
1937 ip4_unregister_protocol (u32 protocol)
1938 {
1939   ip4_main_t *im = &ip4_main;
1940   ip_lookup_main_t *lm = &im->lookup_main;
1941
1942   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1943   lm->local_next_by_ip_protocol[protocol] = IP_LOCAL_NEXT_PUNT;
1944 }
1945 #endif
1946
1947 static clib_error_t *
1948 show_ip_local_command_fn (vlib_main_t * vm,
1949                           unformat_input_t * input, vlib_cli_command_t * cmd)
1950 {
1951   ip4_main_t *im = &ip4_main;
1952   ip_lookup_main_t *lm = &im->lookup_main;
1953   int i;
1954
1955   vlib_cli_output (vm, "Protocols handled by ip4_local");
1956   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1957     {
1958       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1959         {
1960           u32 node_index = vlib_get_node (vm,
1961                                           ip4_local_node.index)->
1962             next_nodes[lm->local_next_by_ip_protocol[i]];
1963           vlib_cli_output (vm, "%U: %U", format_ip_protocol, i,
1964                            format_vlib_node_name, vm, node_index);
1965         }
1966     }
1967   return 0;
1968 }
1969
1970
1971
1972 /*?
1973  * Display the set of protocols handled by the local IPv4 stack.
1974  *
1975  * @cliexpar
1976  * Example of how to display local protocol table:
1977  * @cliexstart{show ip local}
1978  * Protocols handled by ip4_local
1979  * 1
1980  * 17
1981  * 47
1982  * @cliexend
1983 ?*/
1984 /* *INDENT-OFF* */
1985 VLIB_CLI_COMMAND (show_ip_local, static) =
1986 {
1987   .path = "show ip local",
1988   .function = show_ip_local_command_fn,
1989   .short_help = "show ip local",
1990 };
1991 /* *INDENT-ON* */
1992
1993 typedef enum
1994 {
1995   IP4_REWRITE_NEXT_DROP,
1996   IP4_REWRITE_NEXT_ICMP_ERROR,
1997   IP4_REWRITE_NEXT_FRAGMENT,
1998   IP4_REWRITE_N_NEXT            /* Last */
1999 } ip4_rewrite_next_t;
2000
2001 /**
2002  * This bits of an IPv4 address to mask to construct a multicast
2003  * MAC address
2004  */
2005 #if CLIB_ARCH_IS_BIG_ENDIAN
2006 #define IP4_MCAST_ADDR_MASK 0x007fffff
2007 #else
2008 #define IP4_MCAST_ADDR_MASK 0xffff7f00
2009 #endif
2010
2011 always_inline void
2012 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
2013                u16 adj_packet_bytes, bool df, u16 * next,
2014                u8 is_midchain, u32 * error)
2015 {
2016   if (packet_len > adj_packet_bytes)
2017     {
2018       *error = IP4_ERROR_MTU_EXCEEDED;
2019       if (df)
2020         {
2021           icmp4_error_set_vnet_buffer
2022             (b, ICMP4_destination_unreachable,
2023              ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
2024              adj_packet_bytes);
2025           *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2026         }
2027       else
2028         {
2029           /* IP fragmentation */
2030           ip_frag_set_vnet_buffer (b, adj_packet_bytes,
2031                                    (is_midchain ?
2032                                     IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN :
2033                                     IP_FRAG_NEXT_IP_REWRITE), 0);
2034           *next = IP4_REWRITE_NEXT_FRAGMENT;
2035         }
2036     }
2037 }
2038
2039 /* increment TTL & update checksum.
2040    Works either endian, so no need for byte swap. */
2041 static_always_inline void
2042 ip4_ttl_inc (vlib_buffer_t * b, ip4_header_t * ip)
2043 {
2044   i32 ttl;
2045   u32 checksum;
2046   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2047     return;
2048
2049   ttl = ip->ttl;
2050
2051   checksum = ip->checksum - clib_host_to_net_u16 (0x0100);
2052   checksum += checksum >= 0xffff;
2053
2054   ip->checksum = checksum;
2055   ttl += 1;
2056   ip->ttl = ttl;
2057
2058   ASSERT (ip4_header_checksum_is_valid (ip));
2059 }
2060
2061 /* Decrement TTL & update checksum.
2062    Works either endian, so no need for byte swap. */
2063 static_always_inline void
2064 ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
2065                             u32 * error)
2066 {
2067   i32 ttl;
2068   u32 checksum;
2069   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2070     return;
2071
2072   ttl = ip->ttl;
2073
2074   /* Input node should have reject packets with ttl 0. */
2075   ASSERT (ip->ttl > 0);
2076
2077   checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
2078   checksum += checksum >= 0xffff;
2079
2080   ip->checksum = checksum;
2081   ttl -= 1;
2082   ip->ttl = ttl;
2083
2084   /*
2085    * If the ttl drops below 1 when forwarding, generate
2086    * an ICMP response.
2087    */
2088   if (PREDICT_FALSE (ttl <= 0))
2089     {
2090       *error = IP4_ERROR_TIME_EXPIRED;
2091       vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2092       icmp4_error_set_vnet_buffer (b, ICMP4_time_exceeded,
2093                                    ICMP4_time_exceeded_ttl_exceeded_in_transit,
2094                                    0);
2095       *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2096     }
2097
2098   /* Verify checksum. */
2099   ASSERT (ip4_header_checksum_is_valid (ip) ||
2100           (vnet_buffer (b)->oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM) ||
2101           (vnet_buffer (b)->oflags & VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM));
2102 }
2103
2104 always_inline uword
2105 ip4_rewrite_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
2106                     vlib_frame_t *frame, int do_counters, int is_midchain,
2107                     int is_mcast)
2108 {
2109   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2110   u32 *from = vlib_frame_vector_args (frame);
2111   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2112   u16 nexts[VLIB_FRAME_SIZE], *next;
2113   u32 n_left_from;
2114   vlib_node_runtime_t *error_node =
2115     vlib_node_get_runtime (vm, ip4_input_node.index);
2116
2117   n_left_from = frame->n_vectors;
2118   u32 thread_index = vm->thread_index;
2119
2120   vlib_get_buffers (vm, from, bufs, n_left_from);
2121   clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
2122
2123 #if (CLIB_N_PREFETCHES >= 8)
2124   if (n_left_from >= 6)
2125     {
2126       int i;
2127       for (i = 2; i < 6; i++)
2128         vlib_prefetch_buffer_header (bufs[i], LOAD);
2129     }
2130
2131   next = nexts;
2132   b = bufs;
2133   while (n_left_from >= 8)
2134     {
2135       const ip_adjacency_t *adj0, *adj1;
2136       ip4_header_t *ip0, *ip1;
2137       u32 rw_len0, error0, adj_index0;
2138       u32 rw_len1, error1, adj_index1;
2139       u32 tx_sw_if_index0, tx_sw_if_index1;
2140       u8 *p;
2141
2142       if (is_midchain)
2143         {
2144           vlib_prefetch_buffer_header (b[6], LOAD);
2145           vlib_prefetch_buffer_header (b[7], LOAD);
2146         }
2147
2148       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2149       adj_index1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
2150
2151       /*
2152        * pre-fetch the per-adjacency counters
2153        */
2154       if (do_counters)
2155         {
2156           vlib_prefetch_combined_counter (&adjacency_counters,
2157                                           thread_index, adj_index0);
2158           vlib_prefetch_combined_counter (&adjacency_counters,
2159                                           thread_index, adj_index1);
2160         }
2161
2162       ip0 = vlib_buffer_get_current (b[0]);
2163       ip1 = vlib_buffer_get_current (b[1]);
2164
2165       error0 = error1 = IP4_ERROR_NONE;
2166
2167       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2168       ip4_ttl_and_checksum_check (b[1], ip1, next + 1, &error1);
2169
2170       /* Rewrite packet header and updates lengths. */
2171       adj0 = adj_get (adj_index0);
2172       adj1 = adj_get (adj_index1);
2173
2174       /* Worth pipelining. No guarantee that adj0,1 are hot... */
2175       rw_len0 = adj0[0].rewrite_header.data_bytes;
2176       rw_len1 = adj1[0].rewrite_header.data_bytes;
2177       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2178       vnet_buffer (b[1])->ip.save_rewrite_length = rw_len1;
2179
2180       p = vlib_buffer_get_current (b[2]);
2181       clib_prefetch_store (p - CLIB_CACHE_LINE_BYTES);
2182       clib_prefetch_load (p);
2183
2184       p = vlib_buffer_get_current (b[3]);
2185       clib_prefetch_store (p - CLIB_CACHE_LINE_BYTES);
2186       clib_prefetch_load (p);
2187
2188       /* Check MTU of outgoing interface. */
2189       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2190       u16 ip1_len = clib_net_to_host_u16 (ip1->length);
2191
2192       if (b[0]->flags & VNET_BUFFER_F_GSO)
2193         ip0_len = gso_mtu_sz (b[0]);
2194       if (b[1]->flags & VNET_BUFFER_F_GSO)
2195         ip1_len = gso_mtu_sz (b[1]);
2196
2197       ip4_mtu_check (b[0], ip0_len,
2198                      adj0[0].rewrite_header.max_l3_packet_bytes,
2199                      ip0->flags_and_fragment_offset &
2200                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2201                      next + 0, is_midchain, &error0);
2202       ip4_mtu_check (b[1], ip1_len,
2203                      adj1[0].rewrite_header.max_l3_packet_bytes,
2204                      ip1->flags_and_fragment_offset &
2205                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2206                      next + 1, is_midchain, &error1);
2207
2208       if (is_mcast)
2209         {
2210           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2211                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2212                     IP4_ERROR_SAME_INTERFACE : error0);
2213           error1 = ((adj1[0].rewrite_header.sw_if_index ==
2214                      vnet_buffer (b[1])->sw_if_index[VLIB_RX]) ?
2215                     IP4_ERROR_SAME_INTERFACE : error1);
2216         }
2217
2218       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2219        * to see the IP header */
2220       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2221         {
2222           u32 next_index = adj0[0].rewrite_header.next_index;
2223           vlib_buffer_advance (b[0], -(word) rw_len0);
2224
2225           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2226           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2227
2228           if (PREDICT_FALSE
2229               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2230             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2231                                                 tx_sw_if_index0,
2232                                                 &next_index, b[0],
2233                                                 adj0->ia_cfg_index);
2234
2235           next[0] = next_index;
2236           if (is_midchain)
2237             vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2238                                         0 /* is_ip6 */ );
2239         }
2240       else
2241         {
2242           b[0]->error = error_node->errors[error0];
2243           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2244             ip4_ttl_inc (b[0], ip0);
2245         }
2246       if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2247         {
2248           u32 next_index = adj1[0].rewrite_header.next_index;
2249           vlib_buffer_advance (b[1], -(word) rw_len1);
2250
2251           tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2252           vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2253
2254           if (PREDICT_FALSE
2255               (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2256             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2257                                                 tx_sw_if_index1,
2258                                                 &next_index, b[1],
2259                                                 adj1->ia_cfg_index);
2260           next[1] = next_index;
2261           if (is_midchain)
2262             vnet_calc_checksums_inline (vm, b[1], 1 /* is_ip4 */ ,
2263                                         0 /* is_ip6 */ );
2264         }
2265       else
2266         {
2267           b[1]->error = error_node->errors[error1];
2268           if (error1 == IP4_ERROR_MTU_EXCEEDED)
2269             ip4_ttl_inc (b[1], ip1);
2270         }
2271
2272       if (is_midchain)
2273         /* Guess we are only writing on ipv4 header. */
2274         vnet_rewrite_two_headers (adj0[0], adj1[0],
2275                                   ip0, ip1, sizeof (ip4_header_t));
2276       else
2277         /* Guess we are only writing on simple Ethernet header. */
2278         vnet_rewrite_two_headers (adj0[0], adj1[0],
2279                                   ip0, ip1, sizeof (ethernet_header_t));
2280
2281       if (do_counters)
2282         {
2283           if (error0 == IP4_ERROR_NONE)
2284             vlib_increment_combined_counter
2285               (&adjacency_counters,
2286                thread_index,
2287                adj_index0, 1,
2288                vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2289
2290           if (error1 == IP4_ERROR_NONE)
2291             vlib_increment_combined_counter
2292               (&adjacency_counters,
2293                thread_index,
2294                adj_index1, 1,
2295                vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
2296         }
2297
2298       if (is_midchain)
2299         {
2300           if (error0 == IP4_ERROR_NONE)
2301             adj_midchain_fixup (vm, adj0, b[0], VNET_LINK_IP4);
2302           if (error1 == IP4_ERROR_NONE)
2303             adj_midchain_fixup (vm, adj1, b[1], VNET_LINK_IP4);
2304         }
2305
2306       if (is_mcast)
2307         {
2308           /* copy bytes from the IP address into the MAC rewrite */
2309           if (error0 == IP4_ERROR_NONE)
2310             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2311                                         adj0->rewrite_header.dst_mcast_offset,
2312                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2313           if (error1 == IP4_ERROR_NONE)
2314             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2315                                         adj1->rewrite_header.dst_mcast_offset,
2316                                         &ip1->dst_address.as_u32, (u8 *) ip1);
2317         }
2318
2319       next += 2;
2320       b += 2;
2321       n_left_from -= 2;
2322     }
2323 #elif (CLIB_N_PREFETCHES >= 4)
2324   next = nexts;
2325   b = bufs;
2326   while (n_left_from >= 1)
2327     {
2328       ip_adjacency_t *adj0;
2329       ip4_header_t *ip0;
2330       u32 rw_len0, error0, adj_index0;
2331       u32 tx_sw_if_index0;
2332       u8 *p;
2333
2334       /* Prefetch next iteration */
2335       if (PREDICT_TRUE (n_left_from >= 4))
2336         {
2337           ip_adjacency_t *adj2;
2338           u32 adj_index2;
2339
2340           vlib_prefetch_buffer_header (b[3], LOAD);
2341           vlib_prefetch_buffer_data (b[2], LOAD);
2342
2343           /* Prefetch adj->rewrite_header */
2344           adj_index2 = vnet_buffer (b[2])->ip.adj_index[VLIB_TX];
2345           adj2 = adj_get (adj_index2);
2346           p = (u8 *) adj2;
2347           CLIB_PREFETCH (p + CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES,
2348                          LOAD);
2349         }
2350
2351       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2352
2353       /*
2354        * Prefetch the per-adjacency counters
2355        */
2356       if (do_counters)
2357         {
2358           vlib_prefetch_combined_counter (&adjacency_counters,
2359                                           thread_index, adj_index0);
2360         }
2361
2362       ip0 = vlib_buffer_get_current (b[0]);
2363
2364       error0 = IP4_ERROR_NONE;
2365
2366       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2367
2368       /* Rewrite packet header and updates lengths. */
2369       adj0 = adj_get (adj_index0);
2370
2371       /* Rewrite header was prefetched. */
2372       rw_len0 = adj0[0].rewrite_header.data_bytes;
2373       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2374
2375       /* Check MTU of outgoing interface. */
2376       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2377
2378       if (b[0]->flags & VNET_BUFFER_F_GSO)
2379         ip0_len = gso_mtu_sz (b[0]);
2380
2381       ip4_mtu_check (b[0], ip0_len,
2382                      adj0[0].rewrite_header.max_l3_packet_bytes,
2383                      ip0->flags_and_fragment_offset &
2384                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2385                      next + 0, is_midchain, &error0);
2386
2387       if (is_mcast)
2388         {
2389           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2390                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2391                     IP4_ERROR_SAME_INTERFACE : error0);
2392         }
2393
2394       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2395        * to see the IP header */
2396       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2397         {
2398           u32 next_index = adj0[0].rewrite_header.next_index;
2399           vlib_buffer_advance (b[0], -(word) rw_len0);
2400           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2401           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2402
2403           if (PREDICT_FALSE
2404               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2405             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2406                                                 tx_sw_if_index0,
2407                                                 &next_index, b[0],
2408                                                 adj0->ia_cfg_index);
2409           next[0] = next_index;
2410
2411           if (is_midchain)
2412             {
2413               vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2414                                           0 /* is_ip6 */ );
2415
2416               /* Guess we are only writing on ipv4 header. */
2417               vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t));
2418             }
2419           else
2420             /* Guess we are only writing on simple Ethernet header. */
2421             vnet_rewrite_one_header (adj0[0], ip0,
2422                                      sizeof (ethernet_header_t));
2423
2424           /*
2425            * Bump the per-adjacency counters
2426            */
2427           if (do_counters)
2428             vlib_increment_combined_counter
2429               (&adjacency_counters,
2430                thread_index,
2431                adj_index0, 1, vlib_buffer_length_in_chain (vm,
2432                                                            b[0]) + rw_len0);
2433
2434           if (is_midchain)
2435             adj_midchain_fixup (vm, adj0, b[0], VNET_LINK_IP4);
2436
2437           if (is_mcast)
2438             /* copy bytes from the IP address into the MAC rewrite */
2439             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2440                                         adj0->rewrite_header.dst_mcast_offset,
2441                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2442         }
2443       else
2444         {
2445           b[0]->error = error_node->errors[error0];
2446           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2447             ip4_ttl_inc (b[0], ip0);
2448         }
2449
2450       next += 1;
2451       b += 1;
2452       n_left_from -= 1;
2453     }
2454 #endif
2455
2456   while (n_left_from > 0)
2457     {
2458       ip_adjacency_t *adj0;
2459       ip4_header_t *ip0;
2460       u32 rw_len0, adj_index0, error0;
2461       u32 tx_sw_if_index0;
2462
2463       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2464
2465       adj0 = adj_get (adj_index0);
2466
2467       if (do_counters)
2468         vlib_prefetch_combined_counter (&adjacency_counters,
2469                                         thread_index, adj_index0);
2470
2471       ip0 = vlib_buffer_get_current (b[0]);
2472
2473       error0 = IP4_ERROR_NONE;
2474
2475       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2476
2477
2478       /* Update packet buffer attributes/set output interface. */
2479       rw_len0 = adj0[0].rewrite_header.data_bytes;
2480       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2481
2482       /* Check MTU of outgoing interface. */
2483       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2484       if (b[0]->flags & VNET_BUFFER_F_GSO)
2485         ip0_len = gso_mtu_sz (b[0]);
2486
2487       ip4_mtu_check (b[0], ip0_len,
2488                      adj0[0].rewrite_header.max_l3_packet_bytes,
2489                      ip0->flags_and_fragment_offset &
2490                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2491                      next + 0, is_midchain, &error0);
2492
2493       if (is_mcast)
2494         {
2495           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2496                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2497                     IP4_ERROR_SAME_INTERFACE : error0);
2498         }
2499
2500       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2501        * to see the IP header */
2502       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2503         {
2504           u32 next_index = adj0[0].rewrite_header.next_index;
2505           vlib_buffer_advance (b[0], -(word) rw_len0);
2506           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2507           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2508
2509           if (PREDICT_FALSE
2510               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2511             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2512                                                 tx_sw_if_index0,
2513                                                 &next_index, b[0],
2514                                                 adj0->ia_cfg_index);
2515           next[0] = next_index;
2516
2517           if (is_midchain)
2518             {
2519               /* this acts on the packet that is about to be encapped */
2520               vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2521                                           0 /* is_ip6 */ );
2522
2523               /* Guess we are only writing on ipv4 header. */
2524               vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t));
2525             }
2526           else
2527             /* Guess we are only writing on simple Ethernet header. */
2528             vnet_rewrite_one_header (adj0[0], ip0,
2529                                      sizeof (ethernet_header_t));
2530
2531           if (do_counters)
2532             vlib_increment_combined_counter
2533               (&adjacency_counters,
2534                thread_index, adj_index0, 1,
2535                vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2536
2537           if (is_midchain)
2538             adj_midchain_fixup (vm, adj0, b[0], VNET_LINK_IP4);
2539
2540           if (is_mcast)
2541             /* copy bytes from the IP address into the MAC rewrite */
2542             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2543                                         adj0->rewrite_header.dst_mcast_offset,
2544                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2545         }
2546       else
2547         {
2548           b[0]->error = error_node->errors[error0];
2549           /* undo the TTL decrement - we'll be back to do it again */
2550           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2551             ip4_ttl_inc (b[0], ip0);
2552         }
2553
2554       next += 1;
2555       b += 1;
2556       n_left_from -= 1;
2557     }
2558
2559
2560   /* Need to do trace after rewrites to pick up new packet data. */
2561   if (node->flags & VLIB_NODE_FLAG_TRACE)
2562     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2563
2564   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2565   return frame->n_vectors;
2566 }
2567
2568 /** @brief IPv4 rewrite node.
2569     @node ip4-rewrite
2570
2571     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2572     header checksum, fetch the ip adjacency, check the outbound mtu,
2573     apply the adjacency rewrite, and send pkts to the adjacency
2574     rewrite header's rewrite_next_index.
2575
2576     @param vm vlib_main_t corresponding to the current thread
2577     @param node vlib_node_runtime_t
2578     @param frame vlib_frame_t whose contents should be dispatched
2579
2580     @par Graph mechanics: buffer metadata, next index usage
2581
2582     @em Uses:
2583     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2584         - the rewrite adjacency index
2585     - <code>adj->lookup_next_index</code>
2586         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2587           the packet will be dropped.
2588     - <code>adj->rewrite_header</code>
2589         - Rewrite string length, rewrite string, next_index
2590
2591     @em Sets:
2592     - <code>b->current_data, b->current_length</code>
2593         - Updated net of applying the rewrite string
2594
2595     <em>Next Indices:</em>
2596     - <code> adj->rewrite_header.next_index </code>
2597       or @c ip4-drop
2598 */
2599
2600 VLIB_NODE_FN (ip4_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2601                                  vlib_frame_t * frame)
2602 {
2603   if (adj_are_counters_enabled ())
2604     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2605   else
2606     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2607 }
2608
2609 VLIB_NODE_FN (ip4_rewrite_bcast_node) (vlib_main_t * vm,
2610                                        vlib_node_runtime_t * node,
2611                                        vlib_frame_t * frame)
2612 {
2613   if (adj_are_counters_enabled ())
2614     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2615   else
2616     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2617 }
2618
2619 VLIB_NODE_FN (ip4_midchain_node) (vlib_main_t * vm,
2620                                   vlib_node_runtime_t * node,
2621                                   vlib_frame_t * frame)
2622 {
2623   if (adj_are_counters_enabled ())
2624     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2625   else
2626     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2627 }
2628
2629 VLIB_NODE_FN (ip4_rewrite_mcast_node) (vlib_main_t * vm,
2630                                        vlib_node_runtime_t * node,
2631                                        vlib_frame_t * frame)
2632 {
2633   if (adj_are_counters_enabled ())
2634     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2635   else
2636     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2637 }
2638
2639 VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
2640                                         vlib_node_runtime_t * node,
2641                                         vlib_frame_t * frame)
2642 {
2643   if (adj_are_counters_enabled ())
2644     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2645   else
2646     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2647 }
2648
2649 /* *INDENT-OFF* */
2650 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2651   .name = "ip4-rewrite",
2652   .vector_size = sizeof (u32),
2653
2654   .format_trace = format_ip4_rewrite_trace,
2655
2656   .n_next_nodes = IP4_REWRITE_N_NEXT,
2657   .next_nodes = {
2658     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2659     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2660     [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2661   },
2662 };
2663
2664 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2665   .name = "ip4-rewrite-bcast",
2666   .vector_size = sizeof (u32),
2667
2668   .format_trace = format_ip4_rewrite_trace,
2669   .sibling_of = "ip4-rewrite",
2670 };
2671
2672 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2673   .name = "ip4-rewrite-mcast",
2674   .vector_size = sizeof (u32),
2675
2676   .format_trace = format_ip4_rewrite_trace,
2677   .sibling_of = "ip4-rewrite",
2678 };
2679
2680 VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
2681   .name = "ip4-mcast-midchain",
2682   .vector_size = sizeof (u32),
2683
2684   .format_trace = format_ip4_rewrite_trace,
2685   .sibling_of = "ip4-rewrite",
2686 };
2687
2688 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2689   .name = "ip4-midchain",
2690   .vector_size = sizeof (u32),
2691   .format_trace = format_ip4_rewrite_trace,
2692   .sibling_of = "ip4-rewrite",
2693 };
2694 /* *INDENT-ON */
2695
2696 static clib_error_t *
2697 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2698                              unformat_input_t * input,
2699                              vlib_cli_command_t * cmd)
2700 {
2701   int matched = 0;
2702   u32 table_id = 0;
2703   u32 flow_hash_config = 0;
2704   int rv;
2705
2706   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2707     {
2708       if (unformat (input, "table %d", &table_id))
2709         matched = 1;
2710 #define _(a, b, v)                                                            \
2711   else if (unformat (input, #a))                                              \
2712   {                                                                           \
2713     flow_hash_config |= v;                                                    \
2714     matched = 1;                                                              \
2715   }
2716       foreach_flow_hash_bit
2717 #undef _
2718         else
2719         break;
2720     }
2721
2722   if (matched == 0)
2723     return clib_error_return (0, "unknown input `%U'",
2724                               format_unformat_error, input);
2725
2726   rv = ip_flow_hash_set (AF_IP4, table_id, flow_hash_config);
2727   switch (rv)
2728     {
2729     case 0:
2730       break;
2731
2732     case VNET_API_ERROR_NO_SUCH_FIB:
2733       return clib_error_return (0, "no such FIB table %d", table_id);
2734
2735     default:
2736       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2737       break;
2738     }
2739
2740   return 0;
2741 }
2742
2743 /*?
2744  * Configure the set of IPv4 fields used by the flow hash.
2745  *
2746  * @cliexpar
2747  * Example of how to set the flow hash on a given table:
2748  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2749  * Example of display the configured flow hash:
2750  * @cliexstart{show ip fib}
2751  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2752  * 0.0.0.0/0
2753  *   unicast-ip4-chain
2754  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2755  *     [0] [@0]: dpo-drop ip6
2756  * 0.0.0.0/32
2757  *   unicast-ip4-chain
2758  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2759  *     [0] [@0]: dpo-drop ip6
2760  * 224.0.0.0/8
2761  *   unicast-ip4-chain
2762  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2763  *     [0] [@0]: dpo-drop ip6
2764  * 6.0.1.2/32
2765  *   unicast-ip4-chain
2766  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2767  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2768  * 7.0.0.1/32
2769  *   unicast-ip4-chain
2770  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
2771  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2772  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2773  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2774  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2775  * 240.0.0.0/8
2776  *   unicast-ip4-chain
2777  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
2778  *     [0] [@0]: dpo-drop ip6
2779  * 255.255.255.255/32
2780  *   unicast-ip4-chain
2781  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
2782  *     [0] [@0]: dpo-drop ip6
2783  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
2784  * 0.0.0.0/0
2785  *   unicast-ip4-chain
2786  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
2787  *     [0] [@0]: dpo-drop ip6
2788  * 0.0.0.0/32
2789  *   unicast-ip4-chain
2790  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
2791  *     [0] [@0]: dpo-drop ip6
2792  * 172.16.1.0/24
2793  *   unicast-ip4-chain
2794  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
2795  *     [0] [@4]: ipv4-glean: af_packet0
2796  * 172.16.1.1/32
2797  *   unicast-ip4-chain
2798  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
2799  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
2800  * 172.16.1.2/32
2801  *   unicast-ip4-chain
2802  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2803  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
2804  * 172.16.2.0/24
2805  *   unicast-ip4-chain
2806  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
2807  *     [0] [@4]: ipv4-glean: af_packet1
2808  * 172.16.2.1/32
2809  *   unicast-ip4-chain
2810  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
2811  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
2812  * 224.0.0.0/8
2813  *   unicast-ip4-chain
2814  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
2815  *     [0] [@0]: dpo-drop ip6
2816  * 240.0.0.0/8
2817  *   unicast-ip4-chain
2818  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
2819  *     [0] [@0]: dpo-drop ip6
2820  * 255.255.255.255/32
2821  *   unicast-ip4-chain
2822  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
2823  *     [0] [@0]: dpo-drop ip6
2824  * @cliexend
2825 ?*/
2826 /* *INDENT-OFF* */
2827 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
2828 {
2829   .path = "set ip flow-hash",
2830   .short_help =
2831   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
2832   .function = set_ip_flow_hash_command_fn,
2833 };
2834 /* *INDENT-ON* */
2835
2836 #ifndef CLIB_MARCH_VARIANT
2837 int
2838 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2839                              u32 table_index)
2840 {
2841   vnet_main_t *vnm = vnet_get_main ();
2842   vnet_interface_main_t *im = &vnm->interface_main;
2843   ip4_main_t *ipm = &ip4_main;
2844   ip_lookup_main_t *lm = &ipm->lookup_main;
2845   vnet_classify_main_t *cm = &vnet_classify_main;
2846   ip4_address_t *if_addr;
2847
2848   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2849     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2850
2851   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2852     return VNET_API_ERROR_NO_SUCH_ENTRY;
2853
2854   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2855   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
2856
2857   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
2858
2859   if (NULL != if_addr)
2860     {
2861       fib_prefix_t pfx = {
2862         .fp_len = 32,
2863         .fp_proto = FIB_PROTOCOL_IP4,
2864         .fp_addr.ip4 = *if_addr,
2865       };
2866       u32 fib_index;
2867
2868       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2869                                                        sw_if_index);
2870
2871
2872       if (table_index != (u32) ~ 0)
2873         {
2874           dpo_id_t dpo = DPO_INVALID;
2875
2876           dpo_set (&dpo,
2877                    DPO_CLASSIFY,
2878                    DPO_PROTO_IP4,
2879                    classify_dpo_create (DPO_PROTO_IP4, table_index));
2880
2881           fib_table_entry_special_dpo_add (fib_index,
2882                                            &pfx,
2883                                            FIB_SOURCE_CLASSIFY,
2884                                            FIB_ENTRY_FLAG_NONE, &dpo);
2885           dpo_reset (&dpo);
2886         }
2887       else
2888         {
2889           fib_table_entry_special_remove (fib_index,
2890                                           &pfx, FIB_SOURCE_CLASSIFY);
2891         }
2892     }
2893
2894   return 0;
2895 }
2896 #endif
2897
2898 static clib_error_t *
2899 set_ip_classify_command_fn (vlib_main_t * vm,
2900                             unformat_input_t * input,
2901                             vlib_cli_command_t * cmd)
2902 {
2903   u32 table_index = ~0;
2904   int table_index_set = 0;
2905   u32 sw_if_index = ~0;
2906   int rv;
2907
2908   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2909     {
2910       if (unformat (input, "table-index %d", &table_index))
2911         table_index_set = 1;
2912       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
2913                          vnet_get_main (), &sw_if_index))
2914         ;
2915       else
2916         break;
2917     }
2918
2919   if (table_index_set == 0)
2920     return clib_error_return (0, "classify table-index must be specified");
2921
2922   if (sw_if_index == ~0)
2923     return clib_error_return (0, "interface / subif must be specified");
2924
2925   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
2926
2927   switch (rv)
2928     {
2929     case 0:
2930       break;
2931
2932     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
2933       return clib_error_return (0, "No such interface");
2934
2935     case VNET_API_ERROR_NO_SUCH_ENTRY:
2936       return clib_error_return (0, "No such classifier table");
2937     }
2938   return 0;
2939 }
2940
2941 /*?
2942  * Assign a classification table to an interface. The classification
2943  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
2944  * commands. Once the table is create, use this command to filter packets
2945  * on an interface.
2946  *
2947  * @cliexpar
2948  * Example of how to assign a classification table to an interface:
2949  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
2950 ?*/
2951 /* *INDENT-OFF* */
2952 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
2953 {
2954     .path = "set ip classify",
2955     .short_help =
2956     "set ip classify intfc <interface> table-index <classify-idx>",
2957     .function = set_ip_classify_command_fn,
2958 };
2959 /* *INDENT-ON* */
2960
2961 /*
2962  * fd.io coding-style-patch-verification: ON
2963  *
2964  * Local Variables:
2965  * eval: (c-set-style "gnu")
2966  * End:
2967  */