ip: always set ip rx_sw_if_index
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h>     /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/mfib/ip4_mfib.h>
53 #include <vnet/dpo/load_balance.h>
54 #include <vnet/dpo/load_balance_map.h>
55 #include <vnet/dpo/receive_dpo.h>
56 #include <vnet/dpo/classify_dpo.h>
57 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
58 #include <vnet/adj/adj_dp.h>
59 #include <vnet/pg/pg.h>
60
61 #include <vnet/ip/ip4_forward.h>
62 #include <vnet/interface_output.h>
63 #include <vnet/classify/vnet_classify.h>
64
65 /** @brief IPv4 lookup node.
66     @node ip4-lookup
67
68     This is the main IPv4 lookup dispatch node.
69
70     @param vm vlib_main_t corresponding to the current thread
71     @param node vlib_node_runtime_t
72     @param frame vlib_frame_t whose contents should be dispatched
73
74     @par Graph mechanics: buffer metadata, next index usage
75
76     @em Uses:
77     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
78         - Indicates the @c sw_if_index value of the interface that the
79           packet was received on.
80     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
81         - When the value is @c ~0 then the node performs a longest prefix
82           match (LPM) for the packet destination address in the FIB attached
83           to the receive interface.
84         - Otherwise perform LPM for the packet destination address in the
85           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
86           value (0, 1, ...) and not a VRF id.
87
88     @em Sets:
89     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
90         - The lookup result adjacency index.
91
92     <em>Next Index:</em>
93     - Dispatches the packet to the node index found in
94       ip_adjacency_t @c adj->lookup_next_index
95       (where @c adj is the lookup result adjacency).
96 */
97 VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
98                                 vlib_frame_t * frame)
99 {
100   return ip4_lookup_inline (vm, node, frame);
101 }
102
103 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
104
105 /* *INDENT-OFF* */
106 VLIB_REGISTER_NODE (ip4_lookup_node) =
107 {
108   .name = "ip4-lookup",
109   .vector_size = sizeof (u32),
110   .format_trace = format_ip4_lookup_trace,
111   .n_next_nodes = IP_LOOKUP_N_NEXT,
112   .next_nodes = IP4_LOOKUP_NEXT_NODES,
113 };
114 /* *INDENT-ON* */
115
116 VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
117                                       vlib_node_runtime_t * node,
118                                       vlib_frame_t * frame)
119 {
120   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
121   u32 n_left, *from;
122   u32 thread_index = vm->thread_index;
123   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
124   u16 nexts[VLIB_FRAME_SIZE], *next;
125
126   from = vlib_frame_vector_args (frame);
127   n_left = frame->n_vectors;
128   next = nexts;
129
130   vlib_get_buffers (vm, from, bufs, n_left);
131
132   while (n_left >= 4)
133     {
134       const load_balance_t *lb0, *lb1;
135       const ip4_header_t *ip0, *ip1;
136       u32 lbi0, hc0, lbi1, hc1;
137       const dpo_id_t *dpo0, *dpo1;
138
139       /* Prefetch next iteration. */
140       {
141         vlib_prefetch_buffer_header (b[2], LOAD);
142         vlib_prefetch_buffer_header (b[3], LOAD);
143
144         CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), LOAD);
145         CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), LOAD);
146       }
147
148       ip0 = vlib_buffer_get_current (b[0]);
149       ip1 = vlib_buffer_get_current (b[1]);
150       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
151       lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
152
153       lb0 = load_balance_get (lbi0);
154       lb1 = load_balance_get (lbi1);
155
156       /*
157        * this node is for via FIBs we can re-use the hash value from the
158        * to node if present.
159        * We don't want to use the same hash value at each level in the recursion
160        * graph as that would lead to polarisation
161        */
162       hc0 = hc1 = 0;
163
164       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
165         {
166           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
167             {
168               hc0 = vnet_buffer (b[0])->ip.flow_hash =
169                 vnet_buffer (b[0])->ip.flow_hash >> 1;
170             }
171           else
172             {
173               hc0 = vnet_buffer (b[0])->ip.flow_hash =
174                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
175             }
176           dpo0 = load_balance_get_fwd_bucket
177             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
178         }
179       else
180         {
181           dpo0 = load_balance_get_bucket_i (lb0, 0);
182         }
183       if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
184         {
185           if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
186             {
187               hc1 = vnet_buffer (b[1])->ip.flow_hash =
188                 vnet_buffer (b[1])->ip.flow_hash >> 1;
189             }
190           else
191             {
192               hc1 = vnet_buffer (b[1])->ip.flow_hash =
193                 ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
194             }
195           dpo1 = load_balance_get_fwd_bucket
196             (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
197         }
198       else
199         {
200           dpo1 = load_balance_get_bucket_i (lb1, 0);
201         }
202
203       next[0] = dpo0->dpoi_next_node;
204       next[1] = dpo1->dpoi_next_node;
205
206       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
207       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
208
209       vlib_increment_combined_counter
210         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
211       vlib_increment_combined_counter
212         (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
213
214       b += 2;
215       next += 2;
216       n_left -= 2;
217     }
218
219   while (n_left > 0)
220     {
221       const load_balance_t *lb0;
222       const ip4_header_t *ip0;
223       const dpo_id_t *dpo0;
224       u32 lbi0, hc0;
225
226       ip0 = vlib_buffer_get_current (b[0]);
227       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
228
229       lb0 = load_balance_get (lbi0);
230
231       hc0 = 0;
232       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
233         {
234           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
235             {
236               hc0 = vnet_buffer (b[0])->ip.flow_hash =
237                 vnet_buffer (b[0])->ip.flow_hash >> 1;
238             }
239           else
240             {
241               hc0 = vnet_buffer (b[0])->ip.flow_hash =
242                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
243             }
244           dpo0 = load_balance_get_fwd_bucket
245             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
246         }
247       else
248         {
249           dpo0 = load_balance_get_bucket_i (lb0, 0);
250         }
251
252       next[0] = dpo0->dpoi_next_node;
253       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
254
255       vlib_increment_combined_counter
256         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
257
258       b += 1;
259       next += 1;
260       n_left -= 1;
261     }
262
263   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
264   if (node->flags & VLIB_NODE_FLAG_TRACE)
265     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
266
267   return frame->n_vectors;
268 }
269
270 /* *INDENT-OFF* */
271 VLIB_REGISTER_NODE (ip4_load_balance_node) =
272 {
273   .name = "ip4-load-balance",
274   .vector_size = sizeof (u32),
275   .sibling_of = "ip4-lookup",
276   .format_trace = format_ip4_lookup_trace,
277 };
278 /* *INDENT-ON* */
279
280 #ifndef CLIB_MARCH_VARIANT
281 /* get first interface address */
282 ip4_address_t *
283 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
284                              ip_interface_address_t ** result_ia)
285 {
286   ip_lookup_main_t *lm = &im->lookup_main;
287   ip_interface_address_t *ia = 0;
288   ip4_address_t *result = 0;
289
290   /* *INDENT-OFF* */
291   foreach_ip_interface_address
292     (lm, ia, sw_if_index,
293      1 /* honor unnumbered */ ,
294      ({
295        ip4_address_t * a =
296          ip_interface_address_get_address (lm, ia);
297        result = a;
298        break;
299      }));
300   /* *INDENT-OFF* */
301   if (result_ia)
302     *result_ia = result ? ia : 0;
303   return result;
304 }
305 #endif
306
307 static void
308 ip4_add_subnet_bcast_route (u32 fib_index,
309                             fib_prefix_t *pfx,
310                             u32 sw_if_index)
311 {
312   vnet_sw_interface_flags_t iflags;
313
314   iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
315
316   fib_table_entry_special_remove(fib_index,
317                                  pfx,
318                                  FIB_SOURCE_INTERFACE);
319
320   if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
321     {
322       fib_table_entry_update_one_path (fib_index, pfx,
323                                        FIB_SOURCE_INTERFACE,
324                                        FIB_ENTRY_FLAG_NONE,
325                                        DPO_PROTO_IP4,
326                                        /* No next-hop address */
327                                        &ADJ_BCAST_ADDR,
328                                        sw_if_index,
329                                        // invalid FIB index
330                                        ~0,
331                                        1,
332                                        // no out-label stack
333                                        NULL,
334                                        FIB_ROUTE_PATH_FLAG_NONE);
335     }
336   else
337     {
338         fib_table_entry_special_add(fib_index,
339                                     pfx,
340                                     FIB_SOURCE_INTERFACE,
341                                     (FIB_ENTRY_FLAG_DROP |
342                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
343     }
344 }
345
346 static void
347 ip4_add_interface_prefix_routes (ip4_main_t *im,
348                                  u32 sw_if_index,
349                                  u32 fib_index,
350                                  ip_interface_address_t * a)
351 {
352   ip_lookup_main_t *lm = &im->lookup_main;
353   ip_interface_prefix_t *if_prefix;
354   ip4_address_t *address = ip_interface_address_get_address (lm, a);
355
356   ip_interface_prefix_key_t key = {
357     .prefix = {
358       .fp_len = a->address_length,
359       .fp_proto = FIB_PROTOCOL_IP4,
360       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[a->address_length],
361     },
362     .sw_if_index = sw_if_index,
363   };
364
365   fib_prefix_t pfx_special = {
366     .fp_proto = FIB_PROTOCOL_IP4,
367   };
368
369   /* If prefix already set on interface, just increment ref count & return */
370   if_prefix = ip_get_interface_prefix (lm, &key);
371   if (if_prefix)
372     {
373       if_prefix->ref_count += 1;
374       return;
375     }
376
377   /* New prefix - allocate a pool entry, initialize it, add to the hash */
378   pool_get (lm->if_prefix_pool, if_prefix);
379   if_prefix->ref_count = 1;
380   if_prefix->src_ia_index = a - lm->if_address_pool;
381   clib_memcpy (&if_prefix->key, &key, sizeof (key));
382   mhash_set (&lm->prefix_to_if_prefix_index, &key,
383              if_prefix - lm->if_prefix_pool, 0 /* old value */);
384
385   pfx_special.fp_len = a->address_length;
386   pfx_special.fp_addr.ip4.as_u32 = address->as_u32;
387
388   /* set the glean route for the prefix */
389   fib_table_entry_update_one_path (fib_index, &pfx_special,
390                                    FIB_SOURCE_INTERFACE,
391                                    (FIB_ENTRY_FLAG_CONNECTED |
392                                     FIB_ENTRY_FLAG_ATTACHED),
393                                    DPO_PROTO_IP4,
394                                    /* No next-hop address */
395                                    NULL,
396                                    sw_if_index,
397                                    /* invalid FIB index */
398                                    ~0,
399                                    1,
400                                    /* no out-label stack */
401                                    NULL,
402                                    FIB_ROUTE_PATH_FLAG_NONE);
403
404   /* length <= 30 - add glean, drop first address, maybe drop bcast address */
405   if (a->address_length <= 30)
406     {
407       /* set a drop route for the base address of the prefix */
408       pfx_special.fp_len = 32;
409       pfx_special.fp_addr.ip4.as_u32 =
410         address->as_u32 & im->fib_masks[a->address_length];
411
412       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
413         fib_table_entry_special_add (fib_index, &pfx_special,
414                                      FIB_SOURCE_INTERFACE,
415                                      (FIB_ENTRY_FLAG_DROP |
416                                       FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
417
418       /* set a route for the broadcast address of the prefix */
419       pfx_special.fp_len = 32;
420       pfx_special.fp_addr.ip4.as_u32 =
421         address->as_u32 | ~im->fib_masks[a->address_length];
422       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
423         ip4_add_subnet_bcast_route (fib_index, &pfx_special, sw_if_index);
424
425
426     }
427   /* length == 31 - add an attached route for the other address */
428   else if (a->address_length == 31)
429     {
430       pfx_special.fp_len = 32;
431       pfx_special.fp_addr.ip4.as_u32 =
432         address->as_u32 ^ clib_host_to_net_u32(1);
433
434       fib_table_entry_update_one_path (fib_index, &pfx_special,
435                                        FIB_SOURCE_INTERFACE,
436                                        (FIB_ENTRY_FLAG_ATTACHED),
437                                        DPO_PROTO_IP4,
438                                        &pfx_special.fp_addr,
439                                        sw_if_index,
440                                        /* invalid FIB index */
441                                        ~0,
442                                        1,
443                                        NULL,
444                                        FIB_ROUTE_PATH_FLAG_NONE);
445     }
446 }
447
448 static void
449 ip4_add_interface_routes (u32 sw_if_index,
450                           ip4_main_t * im, u32 fib_index,
451                           ip_interface_address_t * a)
452 {
453   ip_lookup_main_t *lm = &im->lookup_main;
454   ip4_address_t *address = ip_interface_address_get_address (lm, a);
455   fib_prefix_t pfx = {
456     .fp_len = 32,
457     .fp_proto = FIB_PROTOCOL_IP4,
458     .fp_addr.ip4 = *address,
459   };
460
461   /* set special routes for the prefix if needed */
462   ip4_add_interface_prefix_routes (im, sw_if_index, fib_index, a);
463
464   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
465     {
466       u32 classify_table_index =
467         lm->classify_table_index_by_sw_if_index[sw_if_index];
468       if (classify_table_index != (u32) ~ 0)
469         {
470           dpo_id_t dpo = DPO_INVALID;
471
472           dpo_set (&dpo,
473                    DPO_CLASSIFY,
474                    DPO_PROTO_IP4,
475                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
476
477           fib_table_entry_special_dpo_add (fib_index,
478                                            &pfx,
479                                            FIB_SOURCE_CLASSIFY,
480                                            FIB_ENTRY_FLAG_NONE, &dpo);
481           dpo_reset (&dpo);
482         }
483     }
484
485   fib_table_entry_update_one_path (fib_index, &pfx,
486                                    FIB_SOURCE_INTERFACE,
487                                    (FIB_ENTRY_FLAG_CONNECTED |
488                                     FIB_ENTRY_FLAG_LOCAL),
489                                    DPO_PROTO_IP4,
490                                    &pfx.fp_addr,
491                                    sw_if_index,
492                                    // invalid FIB index
493                                    ~0,
494                                    1, NULL,
495                                    FIB_ROUTE_PATH_FLAG_NONE);
496 }
497
498 static void
499 ip4_del_interface_prefix_routes (ip4_main_t * im,
500                                  u32 sw_if_index,
501                                  u32 fib_index,
502                                  ip4_address_t * address,
503                                  u32 address_length)
504 {
505   ip_lookup_main_t *lm = &im->lookup_main;
506   ip_interface_prefix_t *if_prefix;
507
508   ip_interface_prefix_key_t key = {
509     .prefix = {
510       .fp_len = address_length,
511       .fp_proto = FIB_PROTOCOL_IP4,
512       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[address_length],
513     },
514     .sw_if_index = sw_if_index,
515   };
516
517   fib_prefix_t pfx_special = {
518     .fp_len = 32,
519     .fp_proto = FIB_PROTOCOL_IP4,
520   };
521
522   if_prefix = ip_get_interface_prefix (lm, &key);
523   if (!if_prefix)
524     {
525       clib_warning ("Prefix not found while deleting %U",
526                     format_ip4_address_and_length, address, address_length);
527       return;
528     }
529
530   if_prefix->ref_count -= 1;
531
532   /*
533    * Routes need to be adjusted if deleting last intf addr in prefix
534    *
535    * We're done now otherwise
536    */
537   if (if_prefix->ref_count > 0)
538     return;
539
540   /* length <= 30, delete glean route, first address, last address */
541   if (address_length <= 30)
542     {
543       /* Less work to do in FIB if we remove the covered /32s first */
544
545       /* first address in prefix */
546       pfx_special.fp_addr.ip4.as_u32 =
547         address->as_u32 & im->fib_masks[address_length];
548       pfx_special.fp_len = 32;
549
550       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
551         fib_table_entry_special_remove (fib_index,
552                                         &pfx_special,
553                                         FIB_SOURCE_INTERFACE);
554
555       /* prefix broadcast address */
556       pfx_special.fp_addr.ip4.as_u32 =
557         address->as_u32 | ~im->fib_masks[address_length];
558       pfx_special.fp_len = 32;
559
560       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
561         fib_table_entry_special_remove (fib_index,
562                                         &pfx_special,
563                                         FIB_SOURCE_INTERFACE);
564     }
565   else if (address_length == 31)
566     {
567       /* length == 31, delete attached route for the other address */
568       pfx_special.fp_addr.ip4.as_u32 =
569         address->as_u32 ^ clib_host_to_net_u32(1);
570
571       fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
572     }
573
574   /* remove glean route for prefix */
575   pfx_special.fp_addr.ip4 = *address;
576   pfx_special.fp_len = address_length;
577   fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
578
579   mhash_unset (&lm->prefix_to_if_prefix_index, &key, 0 /* old_value */);
580   pool_put (lm->if_prefix_pool, if_prefix);
581 }
582
583 static void
584 ip4_del_interface_routes (u32 sw_if_index,
585                           ip4_main_t * im,
586                           u32 fib_index,
587                           ip4_address_t * address, u32 address_length)
588 {
589   fib_prefix_t pfx = {
590     .fp_len = 32,
591     .fp_proto = FIB_PROTOCOL_IP4,
592     .fp_addr.ip4 = *address,
593   };
594
595   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
596
597   ip4_del_interface_prefix_routes (im, sw_if_index, fib_index,
598                                    address, address_length);
599 }
600
601 #ifndef CLIB_MARCH_VARIANT
602 void
603 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
604 {
605   ip4_main_t *im = &ip4_main;
606   vnet_main_t *vnm = vnet_get_main ();
607   vnet_hw_interface_t *hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
608
609   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
610
611   /*
612    * enable/disable only on the 1<->0 transition
613    */
614   if (is_enable)
615     {
616       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
617         return;
618     }
619   else
620     {
621       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
622       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
623         return;
624     }
625   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
626                                !is_enable, 0, 0);
627
628
629   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
630                                sw_if_index, !is_enable, 0, 0);
631
632   if (is_enable)
633     hi->l3_if_count++;
634   else if (hi->l3_if_count)
635     hi->l3_if_count--;
636
637   {
638     ip4_enable_disable_interface_callback_t *cb;
639     vec_foreach (cb, im->enable_disable_interface_callbacks)
640       cb->function (im, cb->function_opaque, sw_if_index, is_enable);
641   }
642 }
643
644 static clib_error_t *
645 ip4_add_del_interface_address_internal (vlib_main_t * vm,
646                                         u32 sw_if_index,
647                                         ip4_address_t * address,
648                                         u32 address_length, u32 is_del)
649 {
650   vnet_main_t *vnm = vnet_get_main ();
651   ip4_main_t *im = &ip4_main;
652   ip_lookup_main_t *lm = &im->lookup_main;
653   clib_error_t *error = 0;
654   u32 if_address_index;
655   ip4_address_fib_t ip4_af, *addr_fib = 0;
656
657   error = vnet_sw_interface_supports_addressing (vnm, sw_if_index);
658   if (error)
659     {
660       vnm->api_errno = VNET_API_ERROR_UNSUPPORTED;
661       return error;
662     }
663
664   ip4_addr_fib_init (&ip4_af, address,
665                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
666   vec_add1 (addr_fib, ip4_af);
667
668   /*
669    * there is no support for adj-fib handling in the presence of overlapping
670    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
671    * most routers do.
672    */
673   /* *INDENT-OFF* */
674   if (!is_del)
675     {
676       /* When adding an address check that it does not conflict
677          with an existing address on any interface in this table. */
678       ip_interface_address_t *ia;
679       vnet_sw_interface_t *sif;
680
681       pool_foreach (sif, vnm->interface_main.sw_interfaces)
682        {
683           if (im->fib_index_by_sw_if_index[sw_if_index] ==
684               im->fib_index_by_sw_if_index[sif->sw_if_index])
685             {
686               foreach_ip_interface_address
687                 (&im->lookup_main, ia, sif->sw_if_index,
688                  0 /* honor unnumbered */ ,
689                  ({
690                    ip4_address_t * x =
691                      ip_interface_address_get_address
692                      (&im->lookup_main, ia);
693
694                    if (ip4_destination_matches_route
695                        (im, address, x, ia->address_length) ||
696                        ip4_destination_matches_route (im,
697                                                       x,
698                                                       address,
699                                                       address_length))
700                      {
701                        /* an intf may have >1 addr from the same prefix */
702                        if ((sw_if_index == sif->sw_if_index) &&
703                            (ia->address_length == address_length) &&
704                            (x->as_u32 != address->as_u32))
705                          continue;
706
707                        if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE)
708                          /* if the address we're comparing against is stale
709                           * then the CP has not added this one back yet, maybe
710                           * it never will, so we have to assume it won't and
711                           * ignore it. if it does add it back, then it will fail
712                           * because this one is now present */
713                          continue;
714
715                        /* error if the length or intf was different */
716                        vnm->api_errno = VNET_API_ERROR_ADDRESS_IN_USE;
717
718                        error = clib_error_create
719                          ("failed to add %U on %U which conflicts with %U for interface %U",
720                           format_ip4_address_and_length, address,
721                           address_length,
722                           format_vnet_sw_if_index_name, vnm,
723                           sw_if_index,
724                           format_ip4_address_and_length, x,
725                           ia->address_length,
726                           format_vnet_sw_if_index_name, vnm,
727                           sif->sw_if_index);
728                        goto done;
729                      }
730                  }));
731             }
732       }
733     }
734   /* *INDENT-ON* */
735
736   if_address_index = ip_interface_address_find (lm, addr_fib, address_length);
737
738   if (is_del)
739     {
740       if (~0 == if_address_index)
741         {
742           vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_FOUND_FOR_INTERFACE;
743           error = clib_error_create ("%U not found for interface %U",
744                                      lm->format_address_and_length,
745                                      addr_fib, address_length,
746                                      format_vnet_sw_if_index_name, vnm,
747                                      sw_if_index);
748           goto done;
749         }
750
751       error = ip_interface_address_del (lm, vnm, if_address_index, addr_fib,
752                                         address_length, sw_if_index);
753       if (error)
754         goto done;
755     }
756   else
757     {
758       if (~0 != if_address_index)
759         {
760           ip_interface_address_t *ia;
761
762           ia = pool_elt_at_index (lm->if_address_pool, if_address_index);
763
764           if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE)
765             {
766               if (ia->sw_if_index == sw_if_index)
767                 {
768                   /* re-adding an address during the replace action.
769                    * consdier this the update. clear the flag and
770                    * we're done */
771                   ia->flags &= ~IP_INTERFACE_ADDRESS_FLAG_STALE;
772                   goto done;
773                 }
774               else
775                 {
776                   /* The prefix is moving from one interface to another.
777                    * delete the stale and add the new */
778                   ip4_add_del_interface_address_internal (vm,
779                                                           ia->sw_if_index,
780                                                           address,
781                                                           address_length, 1);
782                   ia = NULL;
783                   error = ip_interface_address_add (lm, sw_if_index,
784                                                     addr_fib, address_length,
785                                                     &if_address_index);
786                 }
787             }
788           else
789             {
790               vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
791               error = clib_error_create
792                 ("Prefix %U already found on interface %U",
793                  lm->format_address_and_length, addr_fib, address_length,
794                  format_vnet_sw_if_index_name, vnm, ia->sw_if_index);
795             }
796         }
797       else
798         error = ip_interface_address_add (lm, sw_if_index,
799                                           addr_fib, address_length,
800                                           &if_address_index);
801     }
802
803   if (error)
804     goto done;
805
806   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
807   ip4_mfib_interface_enable_disable (sw_if_index, !is_del);
808
809   /* intf addr routes are added/deleted on admin up/down */
810   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index))
811     {
812       if (is_del)
813         ip4_del_interface_routes (sw_if_index,
814                                   im, ip4_af.fib_index, address,
815                                   address_length);
816       else
817         ip4_add_interface_routes (sw_if_index,
818                                   im, ip4_af.fib_index,
819                                   pool_elt_at_index
820                                   (lm->if_address_pool, if_address_index));
821     }
822
823   ip4_add_del_interface_address_callback_t *cb;
824   vec_foreach (cb, im->add_del_interface_address_callbacks)
825     cb->function (im, cb->function_opaque, sw_if_index,
826                   address, address_length, if_address_index, is_del);
827
828 done:
829   vec_free (addr_fib);
830   return error;
831 }
832
833 clib_error_t *
834 ip4_add_del_interface_address (vlib_main_t * vm,
835                                u32 sw_if_index,
836                                ip4_address_t * address,
837                                u32 address_length, u32 is_del)
838 {
839   return ip4_add_del_interface_address_internal
840     (vm, sw_if_index, address, address_length, is_del);
841 }
842
843 void
844 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
845 {
846   ip_interface_address_t *ia;
847   ip4_main_t *im;
848
849   im = &ip4_main;
850
851   /*
852    * when directed broadcast is enabled, the subnet braodcast route will forward
853    * packets using an adjacency with a broadcast MAC. otherwise it drops
854    */
855   /* *INDENT-OFF* */
856   foreach_ip_interface_address(&im->lookup_main, ia,
857                                sw_if_index, 0,
858      ({
859        if (ia->address_length <= 30)
860          {
861            ip4_address_t *ipa;
862
863            ipa = ip_interface_address_get_address (&im->lookup_main, ia);
864
865            fib_prefix_t pfx = {
866              .fp_len = 32,
867              .fp_proto = FIB_PROTOCOL_IP4,
868              .fp_addr = {
869                .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
870              },
871            };
872
873            ip4_add_subnet_bcast_route
874              (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
875                                                   sw_if_index),
876               &pfx, sw_if_index);
877          }
878      }));
879   /* *INDENT-ON* */
880 }
881 #endif
882
883 static clib_error_t *
884 ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
885 {
886   ip4_main_t *im = &ip4_main;
887   ip_interface_address_t *ia;
888   ip4_address_t *a;
889   u32 is_admin_up, fib_index;
890
891   vec_validate_init_empty (im->
892                            lookup_main.if_address_pool_index_by_sw_if_index,
893                            sw_if_index, ~0);
894
895   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
896
897   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
898
899   /* *INDENT-OFF* */
900   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
901                                 0 /* honor unnumbered */,
902   ({
903     a = ip_interface_address_get_address (&im->lookup_main, ia);
904     if (is_admin_up)
905       ip4_add_interface_routes (sw_if_index,
906                                 im, fib_index,
907                                 ia);
908     else
909       ip4_del_interface_routes (sw_if_index,
910                                 im, fib_index,
911                                 a, ia->address_length);
912   }));
913   /* *INDENT-ON* */
914
915   return 0;
916 }
917
918 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
919
920 /* Built-in ip4 unicast rx feature path definition */
921 /* *INDENT-OFF* */
922 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
923 {
924   .arc_name = "ip4-unicast",
925   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
926   .last_in_arc = "ip4-lookup",
927   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
928 };
929
930 VNET_FEATURE_INIT (ip4_flow_classify, static) =
931 {
932   .arc_name = "ip4-unicast",
933   .node_name = "ip4-flow-classify",
934   .runs_before = VNET_FEATURES ("ip4-inacl"),
935 };
936
937 VNET_FEATURE_INIT (ip4_inacl, static) =
938 {
939   .arc_name = "ip4-unicast",
940   .node_name = "ip4-inacl",
941   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
942 };
943
944 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
945 {
946   .arc_name = "ip4-unicast",
947   .node_name = "ip4-source-and-port-range-check-rx",
948   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
949 };
950
951 VNET_FEATURE_INIT (ip4_policer_classify, static) =
952 {
953   .arc_name = "ip4-unicast",
954   .node_name = "ip4-policer-classify",
955   .runs_before = VNET_FEATURES ("ipsec4-input-feature"),
956 };
957
958 VNET_FEATURE_INIT (ip4_ipsec, static) =
959 {
960   .arc_name = "ip4-unicast",
961   .node_name = "ipsec4-input-feature",
962   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
963 };
964
965 VNET_FEATURE_INIT (ip4_vpath, static) =
966 {
967   .arc_name = "ip4-unicast",
968   .node_name = "vpath-input-ip4",
969   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
970 };
971
972 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
973 {
974   .arc_name = "ip4-unicast",
975   .node_name = "ip4-vxlan-bypass",
976   .runs_before = VNET_FEATURES ("ip4-lookup"),
977 };
978
979 VNET_FEATURE_INIT (ip4_not_enabled, static) =
980 {
981   .arc_name = "ip4-unicast",
982   .node_name = "ip4-not-enabled",
983   .runs_before = VNET_FEATURES ("ip4-lookup"),
984 };
985
986 VNET_FEATURE_INIT (ip4_lookup, static) =
987 {
988   .arc_name = "ip4-unicast",
989   .node_name = "ip4-lookup",
990   .runs_before = 0,     /* not before any other features */
991 };
992
993 /* Built-in ip4 multicast rx feature path definition */
994 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
995 {
996   .arc_name = "ip4-multicast",
997   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
998   .last_in_arc = "ip4-mfib-forward-lookup",
999   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
1000 };
1001
1002 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
1003 {
1004   .arc_name = "ip4-multicast",
1005   .node_name = "vpath-input-ip4",
1006   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1007 };
1008
1009 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
1010 {
1011   .arc_name = "ip4-multicast",
1012   .node_name = "ip4-not-enabled",
1013   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1014 };
1015
1016 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
1017 {
1018   .arc_name = "ip4-multicast",
1019   .node_name = "ip4-mfib-forward-lookup",
1020   .runs_before = 0,     /* last feature */
1021 };
1022
1023 /* Source and port-range check ip4 tx feature path definition */
1024 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1025 {
1026   .arc_name = "ip4-output",
1027   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
1028   .last_in_arc = "interface-output",
1029   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1030 };
1031
1032 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1033 {
1034   .arc_name = "ip4-output",
1035   .node_name = "ip4-source-and-port-range-check-tx",
1036   .runs_before = VNET_FEATURES ("ip4-outacl"),
1037 };
1038
1039 VNET_FEATURE_INIT (ip4_outacl, static) =
1040 {
1041   .arc_name = "ip4-output",
1042   .node_name = "ip4-outacl",
1043   .runs_before = VNET_FEATURES ("ipsec4-output-feature"),
1044 };
1045
1046 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1047 {
1048   .arc_name = "ip4-output",
1049   .node_name = "ipsec4-output-feature",
1050   .runs_before = VNET_FEATURES ("interface-output"),
1051 };
1052
1053 /* Built-in ip4 tx feature path definition */
1054 VNET_FEATURE_INIT (ip4_interface_output, static) =
1055 {
1056   .arc_name = "ip4-output",
1057   .node_name = "interface-output",
1058   .runs_before = 0,     /* not before any other features */
1059 };
1060 /* *INDENT-ON* */
1061
1062 static clib_error_t *
1063 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1064 {
1065   ip4_main_t *im = &ip4_main;
1066
1067   vec_validate_init_empty (im->fib_index_by_sw_if_index, sw_if_index, ~0);
1068   vec_validate_init_empty (im->mfib_index_by_sw_if_index, sw_if_index, ~0);
1069
1070   if (is_add)
1071     {
1072       /* Fill in lookup tables with default table (0). */
1073       im->fib_index_by_sw_if_index[sw_if_index] = 0;
1074       im->mfib_index_by_sw_if_index[sw_if_index] = 0;
1075     }
1076   else
1077     {
1078       ip4_main_t *im4 = &ip4_main;
1079       ip_lookup_main_t *lm4 = &im4->lookup_main;
1080       ip_interface_address_t *ia = 0;
1081       ip4_address_t *address;
1082       vlib_main_t *vm = vlib_get_main ();
1083
1084       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
1085       /* *INDENT-OFF* */
1086       foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
1087       ({
1088         address = ip_interface_address_get_address (lm4, ia);
1089         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
1090       }));
1091       /* *INDENT-ON* */
1092       ip4_mfib_interface_enable_disable (sw_if_index, 0);
1093     }
1094
1095   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
1096                                is_add, 0, 0);
1097
1098   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
1099                                sw_if_index, is_add, 0, 0);
1100
1101   return /* no error */ 0;
1102 }
1103
1104 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1105
1106 /* Global IP4 main. */
1107 #ifndef CLIB_MARCH_VARIANT
1108 ip4_main_t ip4_main;
1109 #endif /* CLIB_MARCH_VARIANT */
1110
1111 static clib_error_t *
1112 ip4_lookup_init (vlib_main_t * vm)
1113 {
1114   ip4_main_t *im = &ip4_main;
1115   clib_error_t *error;
1116   uword i;
1117
1118   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1119     return error;
1120   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
1121     return (error);
1122   if ((error = vlib_call_init_function (vm, fib_module_init)))
1123     return error;
1124   if ((error = vlib_call_init_function (vm, mfib_module_init)))
1125     return error;
1126
1127   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1128     {
1129       u32 m;
1130
1131       if (i < 32)
1132         m = pow2_mask (i) << (32 - i);
1133       else
1134         m = ~0;
1135       im->fib_masks[i] = clib_host_to_net_u32 (m);
1136     }
1137
1138   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1139
1140   /* Create FIB with index 0 and table id of 0. */
1141   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1142                                      FIB_SOURCE_DEFAULT_ROUTE);
1143   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1144                                       MFIB_SOURCE_DEFAULT_ROUTE);
1145
1146   {
1147     pg_node_t *pn;
1148     pn = pg_get_node (ip4_lookup_node.index);
1149     pn->unformat_edit = unformat_pg_ip4_header;
1150   }
1151
1152   {
1153     ethernet_arp_header_t h;
1154
1155     clib_memset (&h, 0, sizeof (h));
1156
1157 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1158 #define _8(f,v) h.f = v;
1159     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1160     _16 (l3_type, ETHERNET_TYPE_IP4);
1161     _8 (n_l2_address_bytes, 6);
1162     _8 (n_l3_address_bytes, 4);
1163     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1164 #undef _16
1165 #undef _8
1166
1167     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1168                                /* data */ &h,
1169                                sizeof (h),
1170                                /* alloc chunk size */ 8,
1171                                "ip4 arp");
1172   }
1173
1174   return error;
1175 }
1176
1177 VLIB_INIT_FUNCTION (ip4_lookup_init);
1178
1179 typedef struct
1180 {
1181   /* Adjacency taken. */
1182   u32 dpo_index;
1183   u32 flow_hash;
1184   u32 fib_index;
1185
1186   /* Packet data, possibly *after* rewrite. */
1187   u8 packet_data[64 - 1 * sizeof (u32)];
1188 }
1189 ip4_forward_next_trace_t;
1190
1191 #ifndef CLIB_MARCH_VARIANT
1192 u8 *
1193 format_ip4_forward_next_trace (u8 * s, va_list * args)
1194 {
1195   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1196   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1197   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1198   u32 indent = format_get_indent (s);
1199   s = format (s, "%U%U",
1200               format_white_space, indent,
1201               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1202   return s;
1203 }
1204 #endif
1205
1206 static u8 *
1207 format_ip4_lookup_trace (u8 * s, va_list * args)
1208 {
1209   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1210   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1211   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1212   u32 indent = format_get_indent (s);
1213
1214   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1215               t->fib_index, t->dpo_index, t->flow_hash);
1216   s = format (s, "\n%U%U",
1217               format_white_space, indent,
1218               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1219   return s;
1220 }
1221
1222 static u8 *
1223 format_ip4_rewrite_trace (u8 * s, va_list * args)
1224 {
1225   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1226   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1227   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1228   u32 indent = format_get_indent (s);
1229
1230   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1231               t->fib_index, t->dpo_index, format_ip_adjacency,
1232               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1233   s = format (s, "\n%U%U",
1234               format_white_space, indent,
1235               format_ip_adjacency_packet_data,
1236               t->packet_data, sizeof (t->packet_data));
1237   return s;
1238 }
1239
1240 #ifndef CLIB_MARCH_VARIANT
1241 /* Common trace function for all ip4-forward next nodes. */
1242 void
1243 ip4_forward_next_trace (vlib_main_t * vm,
1244                         vlib_node_runtime_t * node,
1245                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1246 {
1247   u32 *from, n_left;
1248   ip4_main_t *im = &ip4_main;
1249
1250   n_left = frame->n_vectors;
1251   from = vlib_frame_vector_args (frame);
1252
1253   while (n_left >= 4)
1254     {
1255       u32 bi0, bi1;
1256       vlib_buffer_t *b0, *b1;
1257       ip4_forward_next_trace_t *t0, *t1;
1258
1259       /* Prefetch next iteration. */
1260       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1261       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1262
1263       bi0 = from[0];
1264       bi1 = from[1];
1265
1266       b0 = vlib_get_buffer (vm, bi0);
1267       b1 = vlib_get_buffer (vm, bi1);
1268
1269       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1270         {
1271           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1272           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1273           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1274           t0->fib_index =
1275             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1276              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1277             vec_elt (im->fib_index_by_sw_if_index,
1278                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1279
1280           clib_memcpy_fast (t0->packet_data,
1281                             vlib_buffer_get_current (b0),
1282                             sizeof (t0->packet_data));
1283         }
1284       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1285         {
1286           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1287           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1288           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1289           t1->fib_index =
1290             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1291              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1292             vec_elt (im->fib_index_by_sw_if_index,
1293                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1294           clib_memcpy_fast (t1->packet_data, vlib_buffer_get_current (b1),
1295                             sizeof (t1->packet_data));
1296         }
1297       from += 2;
1298       n_left -= 2;
1299     }
1300
1301   while (n_left >= 1)
1302     {
1303       u32 bi0;
1304       vlib_buffer_t *b0;
1305       ip4_forward_next_trace_t *t0;
1306
1307       bi0 = from[0];
1308
1309       b0 = vlib_get_buffer (vm, bi0);
1310
1311       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1312         {
1313           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1314           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1315           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1316           t0->fib_index =
1317             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1318              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1319             vec_elt (im->fib_index_by_sw_if_index,
1320                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1321           clib_memcpy_fast (t0->packet_data, vlib_buffer_get_current (b0),
1322                             sizeof (t0->packet_data));
1323         }
1324       from += 1;
1325       n_left -= 1;
1326     }
1327 }
1328
1329 /* Compute TCP/UDP/ICMP4 checksum in software. */
1330 u16
1331 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1332                               ip4_header_t * ip0)
1333 {
1334   ip_csum_t sum0;
1335   u32 ip_header_length, payload_length_host_byte_order;
1336
1337   /* Initialize checksum with ip header. */
1338   ip_header_length = ip4_header_bytes (ip0);
1339   payload_length_host_byte_order =
1340     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1341   sum0 =
1342     clib_host_to_net_u32 (payload_length_host_byte_order +
1343                           (ip0->protocol << 16));
1344
1345   if (BITS (uword) == 32)
1346     {
1347       sum0 =
1348         ip_csum_with_carry (sum0,
1349                             clib_mem_unaligned (&ip0->src_address, u32));
1350       sum0 =
1351         ip_csum_with_carry (sum0,
1352                             clib_mem_unaligned (&ip0->dst_address, u32));
1353     }
1354   else
1355     sum0 =
1356       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1357
1358   return ip_calculate_l4_checksum (vm, p0, sum0,
1359                                    payload_length_host_byte_order, (u8 *) ip0,
1360                                    ip_header_length, NULL);
1361 }
1362
1363 u32
1364 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1365 {
1366   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1367   udp_header_t *udp0;
1368   u16 sum16;
1369
1370   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1371           || ip0->protocol == IP_PROTOCOL_UDP);
1372
1373   udp0 = (void *) (ip0 + 1);
1374   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1375     {
1376       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1377                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1378       return p0->flags;
1379     }
1380
1381   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1382
1383   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1384                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1385
1386   return p0->flags;
1387 }
1388 #endif
1389
1390 /* *INDENT-OFF* */
1391 VNET_FEATURE_ARC_INIT (ip4_local) = {
1392   .arc_name = "ip4-local",
1393   .start_nodes = VNET_FEATURES ("ip4-local", "ip4-receive"),
1394   .last_in_arc = "ip4-local-end-of-arc",
1395 };
1396 /* *INDENT-ON* */
1397
1398 static inline void
1399 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1400                             ip4_header_t * ip, u8 is_udp, u8 * error,
1401                             u8 * good_tcp_udp)
1402 {
1403   u32 flags0;
1404   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1405   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1406   if (is_udp)
1407     {
1408       udp_header_t *udp;
1409       u32 ip_len, udp_len;
1410       i32 len_diff;
1411       udp = ip4_next_header (ip);
1412       /* Verify UDP length. */
1413       ip_len = clib_net_to_host_u16 (ip->length);
1414       udp_len = clib_net_to_host_u16 (udp->length);
1415
1416       len_diff = ip_len - udp_len;
1417       *good_tcp_udp &= len_diff >= 0;
1418       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1419     }
1420 }
1421
1422 #define ip4_local_csum_is_offloaded(_b)                                       \
1423   ((_b->flags & VNET_BUFFER_F_OFFLOAD) &&                                     \
1424    (vnet_buffer (_b)->oflags &                                                \
1425     (VNET_BUFFER_OFFLOAD_F_TCP_CKSUM | VNET_BUFFER_OFFLOAD_F_UDP_CKSUM)))
1426
1427 #define ip4_local_need_csum_check(is_tcp_udp, _b)                       \
1428     (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED     \
1429         || ip4_local_csum_is_offloaded (_b)))
1430
1431 #define ip4_local_csum_is_valid(_b)                                     \
1432     (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT                      \
1433         || (ip4_local_csum_is_offloaded (_b))) != 0
1434
1435 static inline void
1436 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1437                          ip4_header_t * ih, u8 * error)
1438 {
1439   u8 is_udp, is_tcp_udp, good_tcp_udp;
1440
1441   is_udp = ih->protocol == IP_PROTOCOL_UDP;
1442   is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1443
1444   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1445     ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1446   else
1447     good_tcp_udp = ip4_local_csum_is_valid (b);
1448
1449   ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1450   *error = (is_tcp_udp && !good_tcp_udp
1451             ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1452 }
1453
1454 static inline void
1455 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1456                             ip4_header_t ** ih, u8 * error)
1457 {
1458   u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1459
1460   is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1461   is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1462
1463   is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1464   is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1465
1466   good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1467   good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1468
1469   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1470                      || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1471     {
1472       if (is_tcp_udp[0])
1473         ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1474                                     &good_tcp_udp[0]);
1475       if (is_tcp_udp[1])
1476         ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1477                                     &good_tcp_udp[1]);
1478     }
1479
1480   error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1481               IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1482   error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1483               IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1484 }
1485
1486 static inline void
1487 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1488                               vlib_buffer_t * b, u16 * next, u8 error,
1489                               u8 head_of_feature_arc)
1490 {
1491   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1492   u32 next_index;
1493
1494   *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1495   b->error = error ? error_node->errors[error] : 0;
1496   if (head_of_feature_arc)
1497     {
1498       next_index = *next;
1499       if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1500         {
1501           vnet_feature_arc_start (arc_index,
1502                                   vnet_buffer (b)->sw_if_index[VLIB_RX],
1503                                   &next_index, b);
1504           *next = next_index;
1505         }
1506     }
1507 }
1508
1509 typedef struct
1510 {
1511   ip4_address_t src;
1512   u32 lbi;
1513   u8 error;
1514   u8 first;
1515 } ip4_local_last_check_t;
1516
1517 static inline void
1518 ip4_local_check_src (vlib_buffer_t *b, ip4_header_t *ip0,
1519                      ip4_local_last_check_t *last_check, u8 *error0,
1520                      int is_receive_dpo)
1521 {
1522   const dpo_id_t *dpo0;
1523   load_balance_t *lb0;
1524   u32 lbi0;
1525
1526   vnet_buffer (b)->ip.fib_index =
1527     vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1528     vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1529
1530   if (is_receive_dpo)
1531     {
1532       receive_dpo_t *rd;
1533       rd = receive_dpo_get (vnet_buffer (b)->ip.adj_index[VLIB_TX]);
1534       vnet_buffer (b)->ip.rx_sw_if_index = rd->rd_sw_if_index;
1535     }
1536   else
1537     vnet_buffer (b)->ip.rx_sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
1538
1539   /*
1540    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1541    *  adjacency for the destination address (the local interface address).
1542    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1543    *  adjacency for the source address (the remote sender's address)
1544    */
1545   if (PREDICT_TRUE (last_check->src.as_u32 != ip0->src_address.as_u32) ||
1546       last_check->first)
1547     {
1548       lbi0 = ip4_fib_forwarding_lookup (vnet_buffer (b)->ip.fib_index,
1549                                         &ip0->src_address);
1550
1551       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1552         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1553       vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1554
1555       lb0 = load_balance_get (lbi0);
1556       dpo0 = load_balance_get_bucket_i (lb0, 0);
1557
1558       /*
1559        * Must have a route to source otherwise we drop the packet.
1560        * ip4 broadcasts are accepted, e.g. to make dhcp client work
1561        *
1562        * The checks are:
1563        *  - the source is a recieve => it's from us => bogus, do this
1564        *    first since it sets a different error code.
1565        *  - uRPF check for any route to source - accept if passes.
1566        *  - allow packets destined to the broadcast address from unknown sources
1567        */
1568
1569       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1570                   && dpo0->dpoi_type == DPO_RECEIVE) ?
1571                  IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1572       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1573                   && !fib_urpf_check_size (lb0->lb_urpf)
1574                   && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1575                  IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1576
1577       last_check->src.as_u32 = ip0->src_address.as_u32;
1578       last_check->lbi = lbi0;
1579       last_check->error = *error0;
1580       last_check->first = 0;
1581     }
1582   else
1583     {
1584       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1585         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1586       vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1587       *error0 = last_check->error;
1588     }
1589 }
1590
1591 static inline void
1592 ip4_local_check_src_x2 (vlib_buffer_t **b, ip4_header_t **ip,
1593                         ip4_local_last_check_t *last_check, u8 *error,
1594                         int is_receive_dpo)
1595 {
1596   const dpo_id_t *dpo[2];
1597   load_balance_t *lb[2];
1598   u32 not_last_hit;
1599   u32 lbi[2];
1600
1601   not_last_hit = last_check->first;
1602   not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1603   not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1604
1605   vnet_buffer (b[0])->ip.fib_index =
1606     vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1607     vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1608     vnet_buffer (b[0])->ip.fib_index;
1609
1610   vnet_buffer (b[1])->ip.fib_index =
1611     vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1612     vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1613     vnet_buffer (b[1])->ip.fib_index;
1614
1615   if (is_receive_dpo)
1616     {
1617       const receive_dpo_t *rd0, *rd1;
1618       rd0 = receive_dpo_get (vnet_buffer (b[0])->ip.adj_index[VLIB_TX]);
1619       rd1 = receive_dpo_get (vnet_buffer (b[1])->ip.adj_index[VLIB_TX]);
1620       vnet_buffer (b[0])->ip.rx_sw_if_index = rd0->rd_sw_if_index;
1621       vnet_buffer (b[1])->ip.rx_sw_if_index = rd1->rd_sw_if_index;
1622     }
1623   else
1624     {
1625       vnet_buffer (b[0])->ip.rx_sw_if_index =
1626         vnet_buffer (b[0])->sw_if_index[VLIB_RX];
1627       vnet_buffer (b[1])->ip.rx_sw_if_index =
1628         vnet_buffer (b[1])->sw_if_index[VLIB_RX];
1629     }
1630
1631   /*
1632    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1633    *  adjacency for the destination address (the local interface address).
1634    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1635    *  adjacency for the source address (the remote sender's address)
1636    */
1637   if (PREDICT_TRUE (not_last_hit))
1638     {
1639       ip4_fib_forwarding_lookup_x2 (
1640         vnet_buffer (b[0])->ip.fib_index, vnet_buffer (b[1])->ip.fib_index,
1641         &ip[0]->src_address, &ip[1]->src_address, &lbi[0], &lbi[1]);
1642
1643       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1644         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1645       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1646
1647       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1648         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1649       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1650
1651       lb[0] = load_balance_get (lbi[0]);
1652       lb[1] = load_balance_get (lbi[1]);
1653
1654       dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1655       dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1656
1657       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1658                    dpo[0]->dpoi_type == DPO_RECEIVE) ?
1659                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1660       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1661                    !fib_urpf_check_size (lb[0]->lb_urpf) &&
1662                    ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1663                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1664
1665       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1666                    dpo[1]->dpoi_type == DPO_RECEIVE) ?
1667                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1668       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1669                    !fib_urpf_check_size (lb[1]->lb_urpf) &&
1670                    ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1671                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1672
1673       last_check->src.as_u32 = ip[1]->src_address.as_u32;
1674       last_check->lbi = lbi[1];
1675       last_check->error = error[1];
1676       last_check->first = 0;
1677     }
1678   else
1679     {
1680       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1681         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1682       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1683
1684       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1685         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1686       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1687
1688       error[0] = last_check->error;
1689       error[1] = last_check->error;
1690     }
1691 }
1692
1693 enum ip_local_packet_type_e
1694 {
1695   IP_LOCAL_PACKET_TYPE_L4,
1696   IP_LOCAL_PACKET_TYPE_NAT,
1697   IP_LOCAL_PACKET_TYPE_FRAG,
1698 };
1699
1700 /**
1701  * Determine packet type and next node.
1702  *
1703  * The expectation is that all packets that are not L4 will skip
1704  * checksums and source checks.
1705  */
1706 always_inline u8
1707 ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
1708 {
1709   ip_lookup_main_t *lm = &ip4_main.lookup_main;
1710
1711   if (PREDICT_FALSE (ip4_is_fragment (ip)))
1712     {
1713       *next = IP_LOCAL_NEXT_REASSEMBLY;
1714       return IP_LOCAL_PACKET_TYPE_FRAG;
1715     }
1716   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_IS_NATED))
1717     {
1718       *next = lm->local_next_by_ip_protocol[ip->protocol];
1719       return IP_LOCAL_PACKET_TYPE_NAT;
1720     }
1721
1722   *next = lm->local_next_by_ip_protocol[ip->protocol];
1723   return IP_LOCAL_PACKET_TYPE_L4;
1724 }
1725
1726 static inline uword
1727 ip4_local_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
1728                   vlib_frame_t *frame, int head_of_feature_arc,
1729                   int is_receive_dpo)
1730 {
1731   u32 *from, n_left_from;
1732   vlib_node_runtime_t *error_node =
1733     vlib_node_get_runtime (vm, ip4_local_node.index);
1734   u16 nexts[VLIB_FRAME_SIZE], *next;
1735   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1736   ip4_header_t *ip[2];
1737   u8 error[2], pt[2];
1738
1739   ip4_local_last_check_t last_check = {
1740     /*
1741      * 0.0.0.0 can appear as the source address of an IP packet,
1742      * as can any other address, hence the need to use the 'first'
1743      * member to make sure the .lbi is initialised for the first
1744      * packet.
1745      */
1746     .src = {.as_u32 = 0},
1747     .lbi = ~0,
1748     .error = IP4_ERROR_UNKNOWN_PROTOCOL,
1749     .first = 1,
1750   };
1751
1752   from = vlib_frame_vector_args (frame);
1753   n_left_from = frame->n_vectors;
1754
1755   if (node->flags & VLIB_NODE_FLAG_TRACE)
1756     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1757
1758   vlib_get_buffers (vm, from, bufs, n_left_from);
1759   b = bufs;
1760   next = nexts;
1761
1762   while (n_left_from >= 6)
1763     {
1764       u8 not_batch = 0;
1765
1766       /* Prefetch next iteration. */
1767       {
1768         vlib_prefetch_buffer_header (b[4], LOAD);
1769         vlib_prefetch_buffer_header (b[5], LOAD);
1770
1771         clib_prefetch_load (b[4]->data);
1772         clib_prefetch_load (b[5]->data);
1773       }
1774
1775       error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1776
1777       ip[0] = vlib_buffer_get_current (b[0]);
1778       ip[1] = vlib_buffer_get_current (b[1]);
1779
1780       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1781       vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1782
1783       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1784       pt[1] = ip4_local_classify (b[1], ip[1], &next[1]);
1785
1786       not_batch = pt[0] ^ pt[1];
1787
1788       if (head_of_feature_arc == 0 || (pt[0] && not_batch == 0))
1789         goto skip_checks;
1790
1791       if (PREDICT_TRUE (not_batch == 0))
1792         {
1793           ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1794           ip4_local_check_src_x2 (b, ip, &last_check, error, is_receive_dpo);
1795         }
1796       else
1797         {
1798           if (!pt[0])
1799             {
1800               ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1801               ip4_local_check_src (b[0], ip[0], &last_check, &error[0],
1802                                    is_receive_dpo);
1803             }
1804           if (!pt[1])
1805             {
1806               ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1807               ip4_local_check_src (b[1], ip[1], &last_check, &error[1],
1808                                    is_receive_dpo);
1809             }
1810         }
1811
1812     skip_checks:
1813
1814       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1815                                     head_of_feature_arc);
1816       ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1817                                     head_of_feature_arc);
1818
1819       b += 2;
1820       next += 2;
1821       n_left_from -= 2;
1822     }
1823
1824   while (n_left_from > 0)
1825     {
1826       error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1827
1828       ip[0] = vlib_buffer_get_current (b[0]);
1829       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1830       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1831
1832       if (head_of_feature_arc == 0 || pt[0])
1833         goto skip_check;
1834
1835       ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1836       ip4_local_check_src (b[0], ip[0], &last_check, &error[0],
1837                            is_receive_dpo);
1838
1839     skip_check:
1840
1841       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1842                                     head_of_feature_arc);
1843
1844       b += 1;
1845       next += 1;
1846       n_left_from -= 1;
1847     }
1848
1849   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1850   return frame->n_vectors;
1851 }
1852
1853 VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1854                                vlib_frame_t * frame)
1855 {
1856   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */,
1857                            0 /* is_receive_dpo */);
1858 }
1859
1860 VLIB_REGISTER_NODE (ip4_local_node) =
1861 {
1862   .name = "ip4-local",
1863   .vector_size = sizeof (u32),
1864   .format_trace = format_ip4_forward_next_trace,
1865   .n_errors = IP4_N_ERROR,
1866   .error_strings = ip4_error_strings,
1867   .n_next_nodes = IP_LOCAL_N_NEXT,
1868   .next_nodes =
1869   {
1870     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1871     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1872     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1873     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1874     [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-full-reassembly",
1875   },
1876 };
1877
1878 VLIB_NODE_FN (ip4_receive_local_node)
1879 (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
1880 {
1881   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */,
1882                            1 /* is_receive_dpo */);
1883 }
1884
1885 VLIB_REGISTER_NODE (ip4_receive_local_node) = {
1886   .name = "ip4-receive",
1887   .vector_size = sizeof (u32),
1888   .format_trace = format_ip4_forward_next_trace,
1889   .sibling_of = "ip4-local"
1890 };
1891
1892 VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
1893                                           vlib_node_runtime_t * node,
1894                                           vlib_frame_t * frame)
1895 {
1896   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */,
1897                            0 /* is_receive_dpo */);
1898 }
1899
1900 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
1901   .name = "ip4-local-end-of-arc",
1902   .vector_size = sizeof (u32),
1903
1904   .format_trace = format_ip4_forward_next_trace,
1905   .sibling_of = "ip4-local",
1906 };
1907
1908 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1909   .arc_name = "ip4-local",
1910   .node_name = "ip4-local-end-of-arc",
1911   .runs_before = 0, /* not before any other features */
1912 };
1913
1914 #ifndef CLIB_MARCH_VARIANT
1915 void
1916 ip4_register_protocol (u32 protocol, u32 node_index)
1917 {
1918   vlib_main_t *vm = vlib_get_main ();
1919   ip4_main_t *im = &ip4_main;
1920   ip_lookup_main_t *lm = &im->lookup_main;
1921
1922   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1923   lm->local_next_by_ip_protocol[protocol] =
1924     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1925 }
1926
1927 void
1928 ip4_unregister_protocol (u32 protocol)
1929 {
1930   ip4_main_t *im = &ip4_main;
1931   ip_lookup_main_t *lm = &im->lookup_main;
1932
1933   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1934   lm->local_next_by_ip_protocol[protocol] = IP_LOCAL_NEXT_PUNT;
1935 }
1936 #endif
1937
1938 static clib_error_t *
1939 show_ip_local_command_fn (vlib_main_t * vm,
1940                           unformat_input_t * input, vlib_cli_command_t * cmd)
1941 {
1942   ip4_main_t *im = &ip4_main;
1943   ip_lookup_main_t *lm = &im->lookup_main;
1944   int i;
1945
1946   vlib_cli_output (vm, "Protocols handled by ip4_local");
1947   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1948     {
1949       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1950         {
1951           u32 node_index = vlib_get_node (vm,
1952                                           ip4_local_node.index)->
1953             next_nodes[lm->local_next_by_ip_protocol[i]];
1954           vlib_cli_output (vm, "%U: %U", format_ip_protocol, i,
1955                            format_vlib_node_name, vm, node_index);
1956         }
1957     }
1958   return 0;
1959 }
1960
1961
1962
1963 /*?
1964  * Display the set of protocols handled by the local IPv4 stack.
1965  *
1966  * @cliexpar
1967  * Example of how to display local protocol table:
1968  * @cliexstart{show ip local}
1969  * Protocols handled by ip4_local
1970  * 1
1971  * 17
1972  * 47
1973  * @cliexend
1974 ?*/
1975 /* *INDENT-OFF* */
1976 VLIB_CLI_COMMAND (show_ip_local, static) =
1977 {
1978   .path = "show ip local",
1979   .function = show_ip_local_command_fn,
1980   .short_help = "show ip local",
1981 };
1982 /* *INDENT-ON* */
1983
1984 typedef enum
1985 {
1986   IP4_REWRITE_NEXT_DROP,
1987   IP4_REWRITE_NEXT_ICMP_ERROR,
1988   IP4_REWRITE_NEXT_FRAGMENT,
1989   IP4_REWRITE_N_NEXT            /* Last */
1990 } ip4_rewrite_next_t;
1991
1992 /**
1993  * This bits of an IPv4 address to mask to construct a multicast
1994  * MAC address
1995  */
1996 #if CLIB_ARCH_IS_BIG_ENDIAN
1997 #define IP4_MCAST_ADDR_MASK 0x007fffff
1998 #else
1999 #define IP4_MCAST_ADDR_MASK 0xffff7f00
2000 #endif
2001
2002 always_inline void
2003 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
2004                u16 adj_packet_bytes, bool df, u16 * next,
2005                u8 is_midchain, u32 * error)
2006 {
2007   if (packet_len > adj_packet_bytes)
2008     {
2009       *error = IP4_ERROR_MTU_EXCEEDED;
2010       if (df)
2011         {
2012           icmp4_error_set_vnet_buffer
2013             (b, ICMP4_destination_unreachable,
2014              ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
2015              adj_packet_bytes);
2016           *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2017         }
2018       else
2019         {
2020           /* IP fragmentation */
2021           ip_frag_set_vnet_buffer (b, adj_packet_bytes,
2022                                    (is_midchain ?
2023                                     IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN :
2024                                     IP_FRAG_NEXT_IP_REWRITE), 0);
2025           *next = IP4_REWRITE_NEXT_FRAGMENT;
2026         }
2027     }
2028 }
2029
2030 /* increment TTL & update checksum.
2031    Works either endian, so no need for byte swap. */
2032 static_always_inline void
2033 ip4_ttl_inc (vlib_buffer_t * b, ip4_header_t * ip)
2034 {
2035   i32 ttl;
2036   u32 checksum;
2037   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2038     return;
2039
2040   ttl = ip->ttl;
2041
2042   checksum = ip->checksum - clib_host_to_net_u16 (0x0100);
2043   checksum += checksum >= 0xffff;
2044
2045   ip->checksum = checksum;
2046   ttl += 1;
2047   ip->ttl = ttl;
2048
2049   ASSERT (ip4_header_checksum_is_valid (ip));
2050 }
2051
2052 /* Decrement TTL & update checksum.
2053    Works either endian, so no need for byte swap. */
2054 static_always_inline void
2055 ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
2056                             u32 * error)
2057 {
2058   i32 ttl;
2059   u32 checksum;
2060   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2061     return;
2062
2063   ttl = ip->ttl;
2064
2065   /* Input node should have reject packets with ttl 0. */
2066   ASSERT (ip->ttl > 0);
2067
2068   checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
2069   checksum += checksum >= 0xffff;
2070
2071   ip->checksum = checksum;
2072   ttl -= 1;
2073   ip->ttl = ttl;
2074
2075   /*
2076    * If the ttl drops below 1 when forwarding, generate
2077    * an ICMP response.
2078    */
2079   if (PREDICT_FALSE (ttl <= 0))
2080     {
2081       *error = IP4_ERROR_TIME_EXPIRED;
2082       vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2083       icmp4_error_set_vnet_buffer (b, ICMP4_time_exceeded,
2084                                    ICMP4_time_exceeded_ttl_exceeded_in_transit,
2085                                    0);
2086       *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2087     }
2088
2089   /* Verify checksum. */
2090   ASSERT (ip4_header_checksum_is_valid (ip) ||
2091           (vnet_buffer (b)->oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM) ||
2092           (vnet_buffer (b)->oflags & VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM));
2093 }
2094
2095 always_inline uword
2096 ip4_rewrite_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
2097                     vlib_frame_t *frame, int do_counters, int is_midchain,
2098                     int is_mcast)
2099 {
2100   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2101   u32 *from = vlib_frame_vector_args (frame);
2102   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2103   u16 nexts[VLIB_FRAME_SIZE], *next;
2104   u32 n_left_from;
2105   vlib_node_runtime_t *error_node =
2106     vlib_node_get_runtime (vm, ip4_input_node.index);
2107
2108   n_left_from = frame->n_vectors;
2109   u32 thread_index = vm->thread_index;
2110
2111   vlib_get_buffers (vm, from, bufs, n_left_from);
2112   clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
2113
2114 #if (CLIB_N_PREFETCHES >= 8)
2115   if (n_left_from >= 6)
2116     {
2117       int i;
2118       for (i = 2; i < 6; i++)
2119         vlib_prefetch_buffer_header (bufs[i], LOAD);
2120     }
2121
2122   next = nexts;
2123   b = bufs;
2124   while (n_left_from >= 8)
2125     {
2126       const ip_adjacency_t *adj0, *adj1;
2127       ip4_header_t *ip0, *ip1;
2128       u32 rw_len0, error0, adj_index0;
2129       u32 rw_len1, error1, adj_index1;
2130       u32 tx_sw_if_index0, tx_sw_if_index1;
2131       u8 *p;
2132
2133       if (is_midchain)
2134         {
2135           vlib_prefetch_buffer_header (b[6], LOAD);
2136           vlib_prefetch_buffer_header (b[7], LOAD);
2137         }
2138
2139       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2140       adj_index1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
2141
2142       /*
2143        * pre-fetch the per-adjacency counters
2144        */
2145       if (do_counters)
2146         {
2147           vlib_prefetch_combined_counter (&adjacency_counters,
2148                                           thread_index, adj_index0);
2149           vlib_prefetch_combined_counter (&adjacency_counters,
2150                                           thread_index, adj_index1);
2151         }
2152
2153       ip0 = vlib_buffer_get_current (b[0]);
2154       ip1 = vlib_buffer_get_current (b[1]);
2155
2156       error0 = error1 = IP4_ERROR_NONE;
2157
2158       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2159       ip4_ttl_and_checksum_check (b[1], ip1, next + 1, &error1);
2160
2161       /* Rewrite packet header and updates lengths. */
2162       adj0 = adj_get (adj_index0);
2163       adj1 = adj_get (adj_index1);
2164
2165       /* Worth pipelining. No guarantee that adj0,1 are hot... */
2166       rw_len0 = adj0[0].rewrite_header.data_bytes;
2167       rw_len1 = adj1[0].rewrite_header.data_bytes;
2168       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2169       vnet_buffer (b[1])->ip.save_rewrite_length = rw_len1;
2170
2171       p = vlib_buffer_get_current (b[2]);
2172       clib_prefetch_store (p - CLIB_CACHE_LINE_BYTES);
2173       clib_prefetch_load (p);
2174
2175       p = vlib_buffer_get_current (b[3]);
2176       clib_prefetch_store (p - CLIB_CACHE_LINE_BYTES);
2177       clib_prefetch_load (p);
2178
2179       /* Check MTU of outgoing interface. */
2180       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2181       u16 ip1_len = clib_net_to_host_u16 (ip1->length);
2182
2183       if (b[0]->flags & VNET_BUFFER_F_GSO)
2184         ip0_len = gso_mtu_sz (b[0]);
2185       if (b[1]->flags & VNET_BUFFER_F_GSO)
2186         ip1_len = gso_mtu_sz (b[1]);
2187
2188       ip4_mtu_check (b[0], ip0_len,
2189                      adj0[0].rewrite_header.max_l3_packet_bytes,
2190                      ip0->flags_and_fragment_offset &
2191                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2192                      next + 0, is_midchain, &error0);
2193       ip4_mtu_check (b[1], ip1_len,
2194                      adj1[0].rewrite_header.max_l3_packet_bytes,
2195                      ip1->flags_and_fragment_offset &
2196                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2197                      next + 1, is_midchain, &error1);
2198
2199       if (is_mcast)
2200         {
2201           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2202                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2203                     IP4_ERROR_SAME_INTERFACE : error0);
2204           error1 = ((adj1[0].rewrite_header.sw_if_index ==
2205                      vnet_buffer (b[1])->sw_if_index[VLIB_RX]) ?
2206                     IP4_ERROR_SAME_INTERFACE : error1);
2207         }
2208
2209       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2210        * to see the IP header */
2211       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2212         {
2213           u32 next_index = adj0[0].rewrite_header.next_index;
2214           vlib_buffer_advance (b[0], -(word) rw_len0);
2215
2216           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2217           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2218
2219           if (PREDICT_FALSE
2220               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2221             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2222                                                 tx_sw_if_index0,
2223                                                 &next_index, b[0],
2224                                                 adj0->ia_cfg_index);
2225
2226           next[0] = next_index;
2227           if (is_midchain)
2228             vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2229                                         0 /* is_ip6 */ );
2230         }
2231       else
2232         {
2233           b[0]->error = error_node->errors[error0];
2234           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2235             ip4_ttl_inc (b[0], ip0);
2236         }
2237       if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2238         {
2239           u32 next_index = adj1[0].rewrite_header.next_index;
2240           vlib_buffer_advance (b[1], -(word) rw_len1);
2241
2242           tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2243           vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2244
2245           if (PREDICT_FALSE
2246               (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2247             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2248                                                 tx_sw_if_index1,
2249                                                 &next_index, b[1],
2250                                                 adj1->ia_cfg_index);
2251           next[1] = next_index;
2252           if (is_midchain)
2253             vnet_calc_checksums_inline (vm, b[1], 1 /* is_ip4 */ ,
2254                                         0 /* is_ip6 */ );
2255         }
2256       else
2257         {
2258           b[1]->error = error_node->errors[error1];
2259           if (error1 == IP4_ERROR_MTU_EXCEEDED)
2260             ip4_ttl_inc (b[1], ip1);
2261         }
2262
2263       if (is_midchain)
2264         /* Guess we are only writing on ipv4 header. */
2265         vnet_rewrite_two_headers (adj0[0], adj1[0],
2266                                   ip0, ip1, sizeof (ip4_header_t));
2267       else
2268         /* Guess we are only writing on simple Ethernet header. */
2269         vnet_rewrite_two_headers (adj0[0], adj1[0],
2270                                   ip0, ip1, sizeof (ethernet_header_t));
2271
2272       if (do_counters)
2273         {
2274           if (error0 == IP4_ERROR_NONE)
2275             vlib_increment_combined_counter
2276               (&adjacency_counters,
2277                thread_index,
2278                adj_index0, 1,
2279                vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2280
2281           if (error1 == IP4_ERROR_NONE)
2282             vlib_increment_combined_counter
2283               (&adjacency_counters,
2284                thread_index,
2285                adj_index1, 1,
2286                vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
2287         }
2288
2289       if (is_midchain)
2290         {
2291           if (error0 == IP4_ERROR_NONE)
2292             adj_midchain_fixup (vm, adj0, b[0], VNET_LINK_IP4);
2293           if (error1 == IP4_ERROR_NONE)
2294             adj_midchain_fixup (vm, adj1, b[1], VNET_LINK_IP4);
2295         }
2296
2297       if (is_mcast)
2298         {
2299           /* copy bytes from the IP address into the MAC rewrite */
2300           if (error0 == IP4_ERROR_NONE)
2301             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2302                                         adj0->rewrite_header.dst_mcast_offset,
2303                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2304           if (error1 == IP4_ERROR_NONE)
2305             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2306                                         adj1->rewrite_header.dst_mcast_offset,
2307                                         &ip1->dst_address.as_u32, (u8 *) ip1);
2308         }
2309
2310       next += 2;
2311       b += 2;
2312       n_left_from -= 2;
2313     }
2314 #elif (CLIB_N_PREFETCHES >= 4)
2315   next = nexts;
2316   b = bufs;
2317   while (n_left_from >= 1)
2318     {
2319       ip_adjacency_t *adj0;
2320       ip4_header_t *ip0;
2321       u32 rw_len0, error0, adj_index0;
2322       u32 tx_sw_if_index0;
2323       u8 *p;
2324
2325       /* Prefetch next iteration */
2326       if (PREDICT_TRUE (n_left_from >= 4))
2327         {
2328           ip_adjacency_t *adj2;
2329           u32 adj_index2;
2330
2331           vlib_prefetch_buffer_header (b[3], LOAD);
2332           vlib_prefetch_buffer_data (b[2], LOAD);
2333
2334           /* Prefetch adj->rewrite_header */
2335           adj_index2 = vnet_buffer (b[2])->ip.adj_index[VLIB_TX];
2336           adj2 = adj_get (adj_index2);
2337           p = (u8 *) adj2;
2338           CLIB_PREFETCH (p + CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES,
2339                          LOAD);
2340         }
2341
2342       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2343
2344       /*
2345        * Prefetch the per-adjacency counters
2346        */
2347       if (do_counters)
2348         {
2349           vlib_prefetch_combined_counter (&adjacency_counters,
2350                                           thread_index, adj_index0);
2351         }
2352
2353       ip0 = vlib_buffer_get_current (b[0]);
2354
2355       error0 = IP4_ERROR_NONE;
2356
2357       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2358
2359       /* Rewrite packet header and updates lengths. */
2360       adj0 = adj_get (adj_index0);
2361
2362       /* Rewrite header was prefetched. */
2363       rw_len0 = adj0[0].rewrite_header.data_bytes;
2364       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2365
2366       /* Check MTU of outgoing interface. */
2367       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2368
2369       if (b[0]->flags & VNET_BUFFER_F_GSO)
2370         ip0_len = gso_mtu_sz (b[0]);
2371
2372       ip4_mtu_check (b[0], ip0_len,
2373                      adj0[0].rewrite_header.max_l3_packet_bytes,
2374                      ip0->flags_and_fragment_offset &
2375                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2376                      next + 0, is_midchain, &error0);
2377
2378       if (is_mcast)
2379         {
2380           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2381                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2382                     IP4_ERROR_SAME_INTERFACE : error0);
2383         }
2384
2385       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2386        * to see the IP header */
2387       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2388         {
2389           u32 next_index = adj0[0].rewrite_header.next_index;
2390           vlib_buffer_advance (b[0], -(word) rw_len0);
2391           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2392           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2393
2394           if (PREDICT_FALSE
2395               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2396             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2397                                                 tx_sw_if_index0,
2398                                                 &next_index, b[0],
2399                                                 adj0->ia_cfg_index);
2400           next[0] = next_index;
2401
2402           if (is_midchain)
2403             {
2404               vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2405                                           0 /* is_ip6 */ );
2406
2407               /* Guess we are only writing on ipv4 header. */
2408               vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t));
2409             }
2410           else
2411             /* Guess we are only writing on simple Ethernet header. */
2412             vnet_rewrite_one_header (adj0[0], ip0,
2413                                      sizeof (ethernet_header_t));
2414
2415           /*
2416            * Bump the per-adjacency counters
2417            */
2418           if (do_counters)
2419             vlib_increment_combined_counter
2420               (&adjacency_counters,
2421                thread_index,
2422                adj_index0, 1, vlib_buffer_length_in_chain (vm,
2423                                                            b[0]) + rw_len0);
2424
2425           if (is_midchain)
2426             adj_midchain_fixup (vm, adj0, b[0], VNET_LINK_IP4);
2427
2428           if (is_mcast)
2429             /* copy bytes from the IP address into the MAC rewrite */
2430             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2431                                         adj0->rewrite_header.dst_mcast_offset,
2432                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2433         }
2434       else
2435         {
2436           b[0]->error = error_node->errors[error0];
2437           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2438             ip4_ttl_inc (b[0], ip0);
2439         }
2440
2441       next += 1;
2442       b += 1;
2443       n_left_from -= 1;
2444     }
2445 #endif
2446
2447   while (n_left_from > 0)
2448     {
2449       ip_adjacency_t *adj0;
2450       ip4_header_t *ip0;
2451       u32 rw_len0, adj_index0, error0;
2452       u32 tx_sw_if_index0;
2453
2454       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2455
2456       adj0 = adj_get (adj_index0);
2457
2458       if (do_counters)
2459         vlib_prefetch_combined_counter (&adjacency_counters,
2460                                         thread_index, adj_index0);
2461
2462       ip0 = vlib_buffer_get_current (b[0]);
2463
2464       error0 = IP4_ERROR_NONE;
2465
2466       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2467
2468
2469       /* Update packet buffer attributes/set output interface. */
2470       rw_len0 = adj0[0].rewrite_header.data_bytes;
2471       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2472
2473       /* Check MTU of outgoing interface. */
2474       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2475       if (b[0]->flags & VNET_BUFFER_F_GSO)
2476         ip0_len = gso_mtu_sz (b[0]);
2477
2478       ip4_mtu_check (b[0], ip0_len,
2479                      adj0[0].rewrite_header.max_l3_packet_bytes,
2480                      ip0->flags_and_fragment_offset &
2481                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2482                      next + 0, is_midchain, &error0);
2483
2484       if (is_mcast)
2485         {
2486           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2487                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2488                     IP4_ERROR_SAME_INTERFACE : error0);
2489         }
2490
2491       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2492        * to see the IP header */
2493       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2494         {
2495           u32 next_index = adj0[0].rewrite_header.next_index;
2496           vlib_buffer_advance (b[0], -(word) rw_len0);
2497           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2498           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2499
2500           if (PREDICT_FALSE
2501               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2502             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2503                                                 tx_sw_if_index0,
2504                                                 &next_index, b[0],
2505                                                 adj0->ia_cfg_index);
2506           next[0] = next_index;
2507
2508           if (is_midchain)
2509             {
2510               /* this acts on the packet that is about to be encapped */
2511               vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2512                                           0 /* is_ip6 */ );
2513
2514               /* Guess we are only writing on ipv4 header. */
2515               vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t));
2516             }
2517           else
2518             /* Guess we are only writing on simple Ethernet header. */
2519             vnet_rewrite_one_header (adj0[0], ip0,
2520                                      sizeof (ethernet_header_t));
2521
2522           if (do_counters)
2523             vlib_increment_combined_counter
2524               (&adjacency_counters,
2525                thread_index, adj_index0, 1,
2526                vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2527
2528           if (is_midchain)
2529             adj_midchain_fixup (vm, adj0, b[0], VNET_LINK_IP4);
2530
2531           if (is_mcast)
2532             /* copy bytes from the IP address into the MAC rewrite */
2533             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2534                                         adj0->rewrite_header.dst_mcast_offset,
2535                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2536         }
2537       else
2538         {
2539           b[0]->error = error_node->errors[error0];
2540           /* undo the TTL decrement - we'll be back to do it again */
2541           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2542             ip4_ttl_inc (b[0], ip0);
2543         }
2544
2545       next += 1;
2546       b += 1;
2547       n_left_from -= 1;
2548     }
2549
2550
2551   /* Need to do trace after rewrites to pick up new packet data. */
2552   if (node->flags & VLIB_NODE_FLAG_TRACE)
2553     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2554
2555   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2556   return frame->n_vectors;
2557 }
2558
2559 /** @brief IPv4 rewrite node.
2560     @node ip4-rewrite
2561
2562     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2563     header checksum, fetch the ip adjacency, check the outbound mtu,
2564     apply the adjacency rewrite, and send pkts to the adjacency
2565     rewrite header's rewrite_next_index.
2566
2567     @param vm vlib_main_t corresponding to the current thread
2568     @param node vlib_node_runtime_t
2569     @param frame vlib_frame_t whose contents should be dispatched
2570
2571     @par Graph mechanics: buffer metadata, next index usage
2572
2573     @em Uses:
2574     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2575         - the rewrite adjacency index
2576     - <code>adj->lookup_next_index</code>
2577         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2578           the packet will be dropped.
2579     - <code>adj->rewrite_header</code>
2580         - Rewrite string length, rewrite string, next_index
2581
2582     @em Sets:
2583     - <code>b->current_data, b->current_length</code>
2584         - Updated net of applying the rewrite string
2585
2586     <em>Next Indices:</em>
2587     - <code> adj->rewrite_header.next_index </code>
2588       or @c ip4-drop
2589 */
2590
2591 VLIB_NODE_FN (ip4_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2592                                  vlib_frame_t * frame)
2593 {
2594   if (adj_are_counters_enabled ())
2595     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2596   else
2597     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2598 }
2599
2600 VLIB_NODE_FN (ip4_rewrite_bcast_node) (vlib_main_t * vm,
2601                                        vlib_node_runtime_t * node,
2602                                        vlib_frame_t * frame)
2603 {
2604   if (adj_are_counters_enabled ())
2605     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2606   else
2607     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2608 }
2609
2610 VLIB_NODE_FN (ip4_midchain_node) (vlib_main_t * vm,
2611                                   vlib_node_runtime_t * node,
2612                                   vlib_frame_t * frame)
2613 {
2614   if (adj_are_counters_enabled ())
2615     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2616   else
2617     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2618 }
2619
2620 VLIB_NODE_FN (ip4_rewrite_mcast_node) (vlib_main_t * vm,
2621                                        vlib_node_runtime_t * node,
2622                                        vlib_frame_t * frame)
2623 {
2624   if (adj_are_counters_enabled ())
2625     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2626   else
2627     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2628 }
2629
2630 VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
2631                                         vlib_node_runtime_t * node,
2632                                         vlib_frame_t * frame)
2633 {
2634   if (adj_are_counters_enabled ())
2635     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2636   else
2637     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2638 }
2639
2640 /* *INDENT-OFF* */
2641 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2642   .name = "ip4-rewrite",
2643   .vector_size = sizeof (u32),
2644
2645   .format_trace = format_ip4_rewrite_trace,
2646
2647   .n_next_nodes = IP4_REWRITE_N_NEXT,
2648   .next_nodes = {
2649     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2650     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2651     [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2652   },
2653 };
2654
2655 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2656   .name = "ip4-rewrite-bcast",
2657   .vector_size = sizeof (u32),
2658
2659   .format_trace = format_ip4_rewrite_trace,
2660   .sibling_of = "ip4-rewrite",
2661 };
2662
2663 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2664   .name = "ip4-rewrite-mcast",
2665   .vector_size = sizeof (u32),
2666
2667   .format_trace = format_ip4_rewrite_trace,
2668   .sibling_of = "ip4-rewrite",
2669 };
2670
2671 VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
2672   .name = "ip4-mcast-midchain",
2673   .vector_size = sizeof (u32),
2674
2675   .format_trace = format_ip4_rewrite_trace,
2676   .sibling_of = "ip4-rewrite",
2677 };
2678
2679 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2680   .name = "ip4-midchain",
2681   .vector_size = sizeof (u32),
2682   .format_trace = format_ip4_rewrite_trace,
2683   .sibling_of = "ip4-rewrite",
2684 };
2685 /* *INDENT-ON */
2686
2687 static clib_error_t *
2688 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2689                              unformat_input_t * input,
2690                              vlib_cli_command_t * cmd)
2691 {
2692   int matched = 0;
2693   u32 table_id = 0;
2694   u32 flow_hash_config = 0;
2695   int rv;
2696
2697   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2698     {
2699       if (unformat (input, "table %d", &table_id))
2700         matched = 1;
2701 #define _(a, b, v)                                                            \
2702   else if (unformat (input, #a))                                              \
2703   {                                                                           \
2704     flow_hash_config |= v;                                                    \
2705     matched = 1;                                                              \
2706   }
2707       foreach_flow_hash_bit
2708 #undef _
2709         else
2710         break;
2711     }
2712
2713   if (matched == 0)
2714     return clib_error_return (0, "unknown input `%U'",
2715                               format_unformat_error, input);
2716
2717   rv = ip_flow_hash_set (AF_IP4, table_id, flow_hash_config);
2718   switch (rv)
2719     {
2720     case 0:
2721       break;
2722
2723     case VNET_API_ERROR_NO_SUCH_FIB:
2724       return clib_error_return (0, "no such FIB table %d", table_id);
2725
2726     default:
2727       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2728       break;
2729     }
2730
2731   return 0;
2732 }
2733
2734 /*?
2735  * Configure the set of IPv4 fields used by the flow hash.
2736  *
2737  * @cliexpar
2738  * Example of how to set the flow hash on a given table:
2739  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2740  * Example of display the configured flow hash:
2741  * @cliexstart{show ip fib}
2742  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2743  * 0.0.0.0/0
2744  *   unicast-ip4-chain
2745  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2746  *     [0] [@0]: dpo-drop ip6
2747  * 0.0.0.0/32
2748  *   unicast-ip4-chain
2749  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2750  *     [0] [@0]: dpo-drop ip6
2751  * 224.0.0.0/8
2752  *   unicast-ip4-chain
2753  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2754  *     [0] [@0]: dpo-drop ip6
2755  * 6.0.1.2/32
2756  *   unicast-ip4-chain
2757  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2758  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2759  * 7.0.0.1/32
2760  *   unicast-ip4-chain
2761  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
2762  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2763  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2764  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2765  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2766  * 240.0.0.0/8
2767  *   unicast-ip4-chain
2768  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
2769  *     [0] [@0]: dpo-drop ip6
2770  * 255.255.255.255/32
2771  *   unicast-ip4-chain
2772  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
2773  *     [0] [@0]: dpo-drop ip6
2774  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
2775  * 0.0.0.0/0
2776  *   unicast-ip4-chain
2777  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
2778  *     [0] [@0]: dpo-drop ip6
2779  * 0.0.0.0/32
2780  *   unicast-ip4-chain
2781  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
2782  *     [0] [@0]: dpo-drop ip6
2783  * 172.16.1.0/24
2784  *   unicast-ip4-chain
2785  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
2786  *     [0] [@4]: ipv4-glean: af_packet0
2787  * 172.16.1.1/32
2788  *   unicast-ip4-chain
2789  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
2790  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
2791  * 172.16.1.2/32
2792  *   unicast-ip4-chain
2793  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2794  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
2795  * 172.16.2.0/24
2796  *   unicast-ip4-chain
2797  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
2798  *     [0] [@4]: ipv4-glean: af_packet1
2799  * 172.16.2.1/32
2800  *   unicast-ip4-chain
2801  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
2802  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
2803  * 224.0.0.0/8
2804  *   unicast-ip4-chain
2805  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
2806  *     [0] [@0]: dpo-drop ip6
2807  * 240.0.0.0/8
2808  *   unicast-ip4-chain
2809  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
2810  *     [0] [@0]: dpo-drop ip6
2811  * 255.255.255.255/32
2812  *   unicast-ip4-chain
2813  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
2814  *     [0] [@0]: dpo-drop ip6
2815  * @cliexend
2816 ?*/
2817 /* *INDENT-OFF* */
2818 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
2819 {
2820   .path = "set ip flow-hash",
2821   .short_help =
2822   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
2823   .function = set_ip_flow_hash_command_fn,
2824 };
2825 /* *INDENT-ON* */
2826
2827 #ifndef CLIB_MARCH_VARIANT
2828 int
2829 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2830                              u32 table_index)
2831 {
2832   vnet_main_t *vnm = vnet_get_main ();
2833   vnet_interface_main_t *im = &vnm->interface_main;
2834   ip4_main_t *ipm = &ip4_main;
2835   ip_lookup_main_t *lm = &ipm->lookup_main;
2836   vnet_classify_main_t *cm = &vnet_classify_main;
2837   ip4_address_t *if_addr;
2838
2839   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2840     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2841
2842   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2843     return VNET_API_ERROR_NO_SUCH_ENTRY;
2844
2845   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2846   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
2847
2848   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
2849
2850   if (NULL != if_addr)
2851     {
2852       fib_prefix_t pfx = {
2853         .fp_len = 32,
2854         .fp_proto = FIB_PROTOCOL_IP4,
2855         .fp_addr.ip4 = *if_addr,
2856       };
2857       u32 fib_index;
2858
2859       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2860                                                        sw_if_index);
2861
2862
2863       if (table_index != (u32) ~ 0)
2864         {
2865           dpo_id_t dpo = DPO_INVALID;
2866
2867           dpo_set (&dpo,
2868                    DPO_CLASSIFY,
2869                    DPO_PROTO_IP4,
2870                    classify_dpo_create (DPO_PROTO_IP4, table_index));
2871
2872           fib_table_entry_special_dpo_add (fib_index,
2873                                            &pfx,
2874                                            FIB_SOURCE_CLASSIFY,
2875                                            FIB_ENTRY_FLAG_NONE, &dpo);
2876           dpo_reset (&dpo);
2877         }
2878       else
2879         {
2880           fib_table_entry_special_remove (fib_index,
2881                                           &pfx, FIB_SOURCE_CLASSIFY);
2882         }
2883     }
2884
2885   return 0;
2886 }
2887 #endif
2888
2889 static clib_error_t *
2890 set_ip_classify_command_fn (vlib_main_t * vm,
2891                             unformat_input_t * input,
2892                             vlib_cli_command_t * cmd)
2893 {
2894   u32 table_index = ~0;
2895   int table_index_set = 0;
2896   u32 sw_if_index = ~0;
2897   int rv;
2898
2899   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2900     {
2901       if (unformat (input, "table-index %d", &table_index))
2902         table_index_set = 1;
2903       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
2904                          vnet_get_main (), &sw_if_index))
2905         ;
2906       else
2907         break;
2908     }
2909
2910   if (table_index_set == 0)
2911     return clib_error_return (0, "classify table-index must be specified");
2912
2913   if (sw_if_index == ~0)
2914     return clib_error_return (0, "interface / subif must be specified");
2915
2916   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
2917
2918   switch (rv)
2919     {
2920     case 0:
2921       break;
2922
2923     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
2924       return clib_error_return (0, "No such interface");
2925
2926     case VNET_API_ERROR_NO_SUCH_ENTRY:
2927       return clib_error_return (0, "No such classifier table");
2928     }
2929   return 0;
2930 }
2931
2932 /*?
2933  * Assign a classification table to an interface. The classification
2934  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
2935  * commands. Once the table is create, use this command to filter packets
2936  * on an interface.
2937  *
2938  * @cliexpar
2939  * Example of how to assign a classification table to an interface:
2940  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
2941 ?*/
2942 /* *INDENT-OFF* */
2943 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
2944 {
2945     .path = "set ip classify",
2946     .short_help =
2947     "set ip classify intfc <interface> table-index <classify-idx>",
2948     .function = set_ip_classify_command_fn,
2949 };
2950 /* *INDENT-ON* */
2951
2952 /*
2953  * fd.io coding-style-patch-verification: ON
2954  *
2955  * Local Variables:
2956  * eval: (c-set-style "gnu")
2957  * End:
2958  */