ip: Add ip46-local node for local swif[rx]
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h>     /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/mfib/ip4_mfib.h>
53 #include <vnet/dpo/load_balance.h>
54 #include <vnet/dpo/load_balance_map.h>
55 #include <vnet/dpo/receive_dpo.h>
56 #include <vnet/dpo/classify_dpo.h>
57 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
58 #include <vnet/adj/adj_dp.h>
59 #include <vnet/pg/pg.h>
60
61 #include <vnet/ip/ip4_forward.h>
62 #include <vnet/interface_output.h>
63 #include <vnet/classify/vnet_classify.h>
64
65 /** @brief IPv4 lookup node.
66     @node ip4-lookup
67
68     This is the main IPv4 lookup dispatch node.
69
70     @param vm vlib_main_t corresponding to the current thread
71     @param node vlib_node_runtime_t
72     @param frame vlib_frame_t whose contents should be dispatched
73
74     @par Graph mechanics: buffer metadata, next index usage
75
76     @em Uses:
77     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
78         - Indicates the @c sw_if_index value of the interface that the
79           packet was received on.
80     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
81         - When the value is @c ~0 then the node performs a longest prefix
82           match (LPM) for the packet destination address in the FIB attached
83           to the receive interface.
84         - Otherwise perform LPM for the packet destination address in the
85           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
86           value (0, 1, ...) and not a VRF id.
87
88     @em Sets:
89     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
90         - The lookup result adjacency index.
91
92     <em>Next Index:</em>
93     - Dispatches the packet to the node index found in
94       ip_adjacency_t @c adj->lookup_next_index
95       (where @c adj is the lookup result adjacency).
96 */
97 VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
98                                 vlib_frame_t * frame)
99 {
100   return ip4_lookup_inline (vm, node, frame);
101 }
102
103 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
104
105 /* *INDENT-OFF* */
106 VLIB_REGISTER_NODE (ip4_lookup_node) =
107 {
108   .name = "ip4-lookup",
109   .vector_size = sizeof (u32),
110   .format_trace = format_ip4_lookup_trace,
111   .n_next_nodes = IP_LOOKUP_N_NEXT,
112   .next_nodes = IP4_LOOKUP_NEXT_NODES,
113 };
114 /* *INDENT-ON* */
115
116 VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
117                                       vlib_node_runtime_t * node,
118                                       vlib_frame_t * frame)
119 {
120   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
121   u32 n_left, *from;
122   u32 thread_index = vm->thread_index;
123   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
124   u16 nexts[VLIB_FRAME_SIZE], *next;
125
126   from = vlib_frame_vector_args (frame);
127   n_left = frame->n_vectors;
128   next = nexts;
129
130   vlib_get_buffers (vm, from, bufs, n_left);
131
132   while (n_left >= 4)
133     {
134       const load_balance_t *lb0, *lb1;
135       const ip4_header_t *ip0, *ip1;
136       u32 lbi0, hc0, lbi1, hc1;
137       const dpo_id_t *dpo0, *dpo1;
138
139       /* Prefetch next iteration. */
140       {
141         vlib_prefetch_buffer_header (b[2], LOAD);
142         vlib_prefetch_buffer_header (b[3], LOAD);
143
144         CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), LOAD);
145         CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), LOAD);
146       }
147
148       ip0 = vlib_buffer_get_current (b[0]);
149       ip1 = vlib_buffer_get_current (b[1]);
150       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
151       lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
152
153       lb0 = load_balance_get (lbi0);
154       lb1 = load_balance_get (lbi1);
155
156       /*
157        * this node is for via FIBs we can re-use the hash value from the
158        * to node if present.
159        * We don't want to use the same hash value at each level in the recursion
160        * graph as that would lead to polarisation
161        */
162       hc0 = hc1 = 0;
163
164       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
165         {
166           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
167             {
168               hc0 = vnet_buffer (b[0])->ip.flow_hash =
169                 vnet_buffer (b[0])->ip.flow_hash >> 1;
170             }
171           else
172             {
173               hc0 = vnet_buffer (b[0])->ip.flow_hash =
174                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
175             }
176           dpo0 = load_balance_get_fwd_bucket
177             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
178         }
179       else
180         {
181           dpo0 = load_balance_get_bucket_i (lb0, 0);
182         }
183       if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
184         {
185           if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
186             {
187               hc1 = vnet_buffer (b[1])->ip.flow_hash =
188                 vnet_buffer (b[1])->ip.flow_hash >> 1;
189             }
190           else
191             {
192               hc1 = vnet_buffer (b[1])->ip.flow_hash =
193                 ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
194             }
195           dpo1 = load_balance_get_fwd_bucket
196             (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
197         }
198       else
199         {
200           dpo1 = load_balance_get_bucket_i (lb1, 0);
201         }
202
203       next[0] = dpo0->dpoi_next_node;
204       next[1] = dpo1->dpoi_next_node;
205
206       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
207       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
208
209       vlib_increment_combined_counter
210         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
211       vlib_increment_combined_counter
212         (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
213
214       b += 2;
215       next += 2;
216       n_left -= 2;
217     }
218
219   while (n_left > 0)
220     {
221       const load_balance_t *lb0;
222       const ip4_header_t *ip0;
223       const dpo_id_t *dpo0;
224       u32 lbi0, hc0;
225
226       ip0 = vlib_buffer_get_current (b[0]);
227       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
228
229       lb0 = load_balance_get (lbi0);
230
231       hc0 = 0;
232       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
233         {
234           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
235             {
236               hc0 = vnet_buffer (b[0])->ip.flow_hash =
237                 vnet_buffer (b[0])->ip.flow_hash >> 1;
238             }
239           else
240             {
241               hc0 = vnet_buffer (b[0])->ip.flow_hash =
242                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
243             }
244           dpo0 = load_balance_get_fwd_bucket
245             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
246         }
247       else
248         {
249           dpo0 = load_balance_get_bucket_i (lb0, 0);
250         }
251
252       next[0] = dpo0->dpoi_next_node;
253       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
254
255       vlib_increment_combined_counter
256         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
257
258       b += 1;
259       next += 1;
260       n_left -= 1;
261     }
262
263   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
264   if (node->flags & VLIB_NODE_FLAG_TRACE)
265     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
266
267   return frame->n_vectors;
268 }
269
270 /* *INDENT-OFF* */
271 VLIB_REGISTER_NODE (ip4_load_balance_node) =
272 {
273   .name = "ip4-load-balance",
274   .vector_size = sizeof (u32),
275   .sibling_of = "ip4-lookup",
276   .format_trace = format_ip4_lookup_trace,
277 };
278 /* *INDENT-ON* */
279
280 #ifndef CLIB_MARCH_VARIANT
281 /* get first interface address */
282 ip4_address_t *
283 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
284                              ip_interface_address_t ** result_ia)
285 {
286   ip_lookup_main_t *lm = &im->lookup_main;
287   ip_interface_address_t *ia = 0;
288   ip4_address_t *result = 0;
289
290   /* *INDENT-OFF* */
291   foreach_ip_interface_address
292     (lm, ia, sw_if_index,
293      1 /* honor unnumbered */ ,
294      ({
295        ip4_address_t * a =
296          ip_interface_address_get_address (lm, ia);
297        result = a;
298        break;
299      }));
300   /* *INDENT-OFF* */
301   if (result_ia)
302     *result_ia = result ? ia : 0;
303   return result;
304 }
305 #endif
306
307 static void
308 ip4_add_subnet_bcast_route (u32 fib_index,
309                             fib_prefix_t *pfx,
310                             u32 sw_if_index)
311 {
312   vnet_sw_interface_flags_t iflags;
313
314   iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
315
316   fib_table_entry_special_remove(fib_index,
317                                  pfx,
318                                  FIB_SOURCE_INTERFACE);
319
320   if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
321     {
322       fib_table_entry_update_one_path (fib_index, pfx,
323                                        FIB_SOURCE_INTERFACE,
324                                        FIB_ENTRY_FLAG_NONE,
325                                        DPO_PROTO_IP4,
326                                        /* No next-hop address */
327                                        &ADJ_BCAST_ADDR,
328                                        sw_if_index,
329                                        // invalid FIB index
330                                        ~0,
331                                        1,
332                                        // no out-label stack
333                                        NULL,
334                                        FIB_ROUTE_PATH_FLAG_NONE);
335     }
336   else
337     {
338         fib_table_entry_special_add(fib_index,
339                                     pfx,
340                                     FIB_SOURCE_INTERFACE,
341                                     (FIB_ENTRY_FLAG_DROP |
342                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
343     }
344 }
345
346 static void
347 ip4_add_interface_prefix_routes (ip4_main_t *im,
348                                  u32 sw_if_index,
349                                  u32 fib_index,
350                                  ip_interface_address_t * a)
351 {
352   ip_lookup_main_t *lm = &im->lookup_main;
353   ip_interface_prefix_t *if_prefix;
354   ip4_address_t *address = ip_interface_address_get_address (lm, a);
355
356   ip_interface_prefix_key_t key = {
357     .prefix = {
358       .fp_len = a->address_length,
359       .fp_proto = FIB_PROTOCOL_IP4,
360       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[a->address_length],
361     },
362     .sw_if_index = sw_if_index,
363   };
364
365   fib_prefix_t pfx_special = {
366     .fp_proto = FIB_PROTOCOL_IP4,
367   };
368
369   /* If prefix already set on interface, just increment ref count & return */
370   if_prefix = ip_get_interface_prefix (lm, &key);
371   if (if_prefix)
372     {
373       if_prefix->ref_count += 1;
374       return;
375     }
376
377   /* New prefix - allocate a pool entry, initialize it, add to the hash */
378   pool_get (lm->if_prefix_pool, if_prefix);
379   if_prefix->ref_count = 1;
380   if_prefix->src_ia_index = a - lm->if_address_pool;
381   clib_memcpy (&if_prefix->key, &key, sizeof (key));
382   mhash_set (&lm->prefix_to_if_prefix_index, &key,
383              if_prefix - lm->if_prefix_pool, 0 /* old value */);
384
385   pfx_special.fp_len = a->address_length;
386   pfx_special.fp_addr.ip4.as_u32 = address->as_u32;
387
388   /* set the glean route for the prefix */
389   fib_table_entry_update_one_path (fib_index, &pfx_special,
390                                    FIB_SOURCE_INTERFACE,
391                                    (FIB_ENTRY_FLAG_CONNECTED |
392                                     FIB_ENTRY_FLAG_ATTACHED),
393                                    DPO_PROTO_IP4,
394                                    /* No next-hop address */
395                                    NULL,
396                                    sw_if_index,
397                                    /* invalid FIB index */
398                                    ~0,
399                                    1,
400                                    /* no out-label stack */
401                                    NULL,
402                                    FIB_ROUTE_PATH_FLAG_NONE);
403
404   /* length <= 30 - add glean, drop first address, maybe drop bcast address */
405   if (a->address_length <= 30)
406     {
407       /* set a drop route for the base address of the prefix */
408       pfx_special.fp_len = 32;
409       pfx_special.fp_addr.ip4.as_u32 =
410         address->as_u32 & im->fib_masks[a->address_length];
411
412       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
413         fib_table_entry_special_add (fib_index, &pfx_special,
414                                      FIB_SOURCE_INTERFACE,
415                                      (FIB_ENTRY_FLAG_DROP |
416                                       FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
417
418       /* set a route for the broadcast address of the prefix */
419       pfx_special.fp_len = 32;
420       pfx_special.fp_addr.ip4.as_u32 =
421         address->as_u32 | ~im->fib_masks[a->address_length];
422       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
423         ip4_add_subnet_bcast_route (fib_index, &pfx_special, sw_if_index);
424
425
426     }
427   /* length == 31 - add an attached route for the other address */
428   else if (a->address_length == 31)
429     {
430       pfx_special.fp_len = 32;
431       pfx_special.fp_addr.ip4.as_u32 =
432         address->as_u32 ^ clib_host_to_net_u32(1);
433
434       fib_table_entry_update_one_path (fib_index, &pfx_special,
435                                        FIB_SOURCE_INTERFACE,
436                                        (FIB_ENTRY_FLAG_ATTACHED),
437                                        DPO_PROTO_IP4,
438                                        &pfx_special.fp_addr,
439                                        sw_if_index,
440                                        /* invalid FIB index */
441                                        ~0,
442                                        1,
443                                        NULL,
444                                        FIB_ROUTE_PATH_FLAG_NONE);
445     }
446 }
447
448 static void
449 ip4_add_interface_routes (u32 sw_if_index,
450                           ip4_main_t * im, u32 fib_index,
451                           ip_interface_address_t * a)
452 {
453   ip_lookup_main_t *lm = &im->lookup_main;
454   ip4_address_t *address = ip_interface_address_get_address (lm, a);
455   fib_prefix_t pfx = {
456     .fp_len = 32,
457     .fp_proto = FIB_PROTOCOL_IP4,
458     .fp_addr.ip4 = *address,
459   };
460
461   /* set special routes for the prefix if needed */
462   ip4_add_interface_prefix_routes (im, sw_if_index, fib_index, a);
463
464   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
465     {
466       u32 classify_table_index =
467         lm->classify_table_index_by_sw_if_index[sw_if_index];
468       if (classify_table_index != (u32) ~ 0)
469         {
470           dpo_id_t dpo = DPO_INVALID;
471
472           dpo_set (&dpo,
473                    DPO_CLASSIFY,
474                    DPO_PROTO_IP4,
475                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
476
477           fib_table_entry_special_dpo_add (fib_index,
478                                            &pfx,
479                                            FIB_SOURCE_CLASSIFY,
480                                            FIB_ENTRY_FLAG_NONE, &dpo);
481           dpo_reset (&dpo);
482         }
483     }
484
485   fib_table_entry_update_one_path (fib_index, &pfx,
486                                    FIB_SOURCE_INTERFACE,
487                                    (FIB_ENTRY_FLAG_CONNECTED |
488                                     FIB_ENTRY_FLAG_LOCAL),
489                                    DPO_PROTO_IP4,
490                                    &pfx.fp_addr,
491                                    sw_if_index,
492                                    // invalid FIB index
493                                    ~0,
494                                    1, NULL,
495                                    FIB_ROUTE_PATH_FLAG_NONE);
496 }
497
498 static void
499 ip4_del_interface_prefix_routes (ip4_main_t * im,
500                                  u32 sw_if_index,
501                                  u32 fib_index,
502                                  ip4_address_t * address,
503                                  u32 address_length)
504 {
505   ip_lookup_main_t *lm = &im->lookup_main;
506   ip_interface_prefix_t *if_prefix;
507
508   ip_interface_prefix_key_t key = {
509     .prefix = {
510       .fp_len = address_length,
511       .fp_proto = FIB_PROTOCOL_IP4,
512       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[address_length],
513     },
514     .sw_if_index = sw_if_index,
515   };
516
517   fib_prefix_t pfx_special = {
518     .fp_len = 32,
519     .fp_proto = FIB_PROTOCOL_IP4,
520   };
521
522   if_prefix = ip_get_interface_prefix (lm, &key);
523   if (!if_prefix)
524     {
525       clib_warning ("Prefix not found while deleting %U",
526                     format_ip4_address_and_length, address, address_length);
527       return;
528     }
529
530   if_prefix->ref_count -= 1;
531
532   /*
533    * Routes need to be adjusted if deleting last intf addr in prefix
534    *
535    * We're done now otherwise
536    */
537   if (if_prefix->ref_count > 0)
538     return;
539
540   /* length <= 30, delete glean route, first address, last address */
541   if (address_length <= 30)
542     {
543       /* Less work to do in FIB if we remove the covered /32s first */
544
545       /* first address in prefix */
546       pfx_special.fp_addr.ip4.as_u32 =
547         address->as_u32 & im->fib_masks[address_length];
548       pfx_special.fp_len = 32;
549
550       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
551         fib_table_entry_special_remove (fib_index,
552                                         &pfx_special,
553                                         FIB_SOURCE_INTERFACE);
554
555       /* prefix broadcast address */
556       pfx_special.fp_addr.ip4.as_u32 =
557         address->as_u32 | ~im->fib_masks[address_length];
558       pfx_special.fp_len = 32;
559
560       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
561         fib_table_entry_special_remove (fib_index,
562                                         &pfx_special,
563                                         FIB_SOURCE_INTERFACE);
564     }
565   else if (address_length == 31)
566     {
567       /* length == 31, delete attached route for the other address */
568       pfx_special.fp_addr.ip4.as_u32 =
569         address->as_u32 ^ clib_host_to_net_u32(1);
570
571       fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
572     }
573
574   /* remove glean route for prefix */
575   pfx_special.fp_addr.ip4 = *address;
576   pfx_special.fp_len = address_length;
577   fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
578
579   mhash_unset (&lm->prefix_to_if_prefix_index, &key, 0 /* old_value */);
580   pool_put (lm->if_prefix_pool, if_prefix);
581 }
582
583 static void
584 ip4_del_interface_routes (u32 sw_if_index,
585                           ip4_main_t * im,
586                           u32 fib_index,
587                           ip4_address_t * address, u32 address_length)
588 {
589   fib_prefix_t pfx = {
590     .fp_len = 32,
591     .fp_proto = FIB_PROTOCOL_IP4,
592     .fp_addr.ip4 = *address,
593   };
594
595   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
596
597   ip4_del_interface_prefix_routes (im, sw_if_index, fib_index,
598                                    address, address_length);
599 }
600
601 #ifndef CLIB_MARCH_VARIANT
602 void
603 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
604 {
605   ip4_main_t *im = &ip4_main;
606   vnet_main_t *vnm = vnet_get_main ();
607   vnet_hw_interface_t *hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
608
609   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
610
611   /*
612    * enable/disable only on the 1<->0 transition
613    */
614   if (is_enable)
615     {
616       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
617         return;
618     }
619   else
620     {
621       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
622       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
623         return;
624     }
625   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
626                                !is_enable, 0, 0);
627
628
629   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
630                                sw_if_index, !is_enable, 0, 0);
631
632   if (is_enable)
633     hi->l3_if_count++;
634   else if (hi->l3_if_count)
635     hi->l3_if_count--;
636
637   {
638     ip4_enable_disable_interface_callback_t *cb;
639     vec_foreach (cb, im->enable_disable_interface_callbacks)
640       cb->function (im, cb->function_opaque, sw_if_index, is_enable);
641   }
642 }
643
644 static clib_error_t *
645 ip4_add_del_interface_address_internal (vlib_main_t * vm,
646                                         u32 sw_if_index,
647                                         ip4_address_t * address,
648                                         u32 address_length, u32 is_del)
649 {
650   vnet_main_t *vnm = vnet_get_main ();
651   ip4_main_t *im = &ip4_main;
652   ip_lookup_main_t *lm = &im->lookup_main;
653   clib_error_t *error = 0;
654   u32 if_address_index;
655   ip4_address_fib_t ip4_af, *addr_fib = 0;
656
657   error = vnet_sw_interface_supports_addressing (vnm, sw_if_index);
658   if (error)
659     {
660       vnm->api_errno = VNET_API_ERROR_UNSUPPORTED;
661       return error;
662     }
663
664   ip4_addr_fib_init (&ip4_af, address,
665                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
666   vec_add1 (addr_fib, ip4_af);
667
668   /*
669    * there is no support for adj-fib handling in the presence of overlapping
670    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
671    * most routers do.
672    */
673   /* *INDENT-OFF* */
674   if (!is_del)
675     {
676       /* When adding an address check that it does not conflict
677          with an existing address on any interface in this table. */
678       ip_interface_address_t *ia;
679       vnet_sw_interface_t *sif;
680
681       pool_foreach (sif, vnm->interface_main.sw_interfaces)
682        {
683           if (im->fib_index_by_sw_if_index[sw_if_index] ==
684               im->fib_index_by_sw_if_index[sif->sw_if_index])
685             {
686               foreach_ip_interface_address
687                 (&im->lookup_main, ia, sif->sw_if_index,
688                  0 /* honor unnumbered */ ,
689                  ({
690                    ip4_address_t * x =
691                      ip_interface_address_get_address
692                      (&im->lookup_main, ia);
693
694                    if (ip4_destination_matches_route
695                        (im, address, x, ia->address_length) ||
696                        ip4_destination_matches_route (im,
697                                                       x,
698                                                       address,
699                                                       address_length))
700                      {
701                        /* an intf may have >1 addr from the same prefix */
702                        if ((sw_if_index == sif->sw_if_index) &&
703                            (ia->address_length == address_length) &&
704                            (x->as_u32 != address->as_u32))
705                          continue;
706
707                        if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE)
708                          /* if the address we're comparing against is stale
709                           * then the CP has not added this one back yet, maybe
710                           * it never will, so we have to assume it won't and
711                           * ignore it. if it does add it back, then it will fail
712                           * because this one is now present */
713                          continue;
714
715                        /* error if the length or intf was different */
716                        vnm->api_errno = VNET_API_ERROR_ADDRESS_IN_USE;
717
718                        error = clib_error_create
719                          ("failed to add %U on %U which conflicts with %U for interface %U",
720                           format_ip4_address_and_length, address,
721                           address_length,
722                           format_vnet_sw_if_index_name, vnm,
723                           sw_if_index,
724                           format_ip4_address_and_length, x,
725                           ia->address_length,
726                           format_vnet_sw_if_index_name, vnm,
727                           sif->sw_if_index);
728                        goto done;
729                      }
730                  }));
731             }
732       }
733     }
734   /* *INDENT-ON* */
735
736   if_address_index = ip_interface_address_find (lm, addr_fib, address_length);
737
738   if (is_del)
739     {
740       if (~0 == if_address_index)
741         {
742           vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_FOUND_FOR_INTERFACE;
743           error = clib_error_create ("%U not found for interface %U",
744                                      lm->format_address_and_length,
745                                      addr_fib, address_length,
746                                      format_vnet_sw_if_index_name, vnm,
747                                      sw_if_index);
748           goto done;
749         }
750
751       error = ip_interface_address_del (lm, vnm, if_address_index, addr_fib,
752                                         address_length, sw_if_index);
753       if (error)
754         goto done;
755     }
756   else
757     {
758       if (~0 != if_address_index)
759         {
760           ip_interface_address_t *ia;
761
762           ia = pool_elt_at_index (lm->if_address_pool, if_address_index);
763
764           if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE)
765             {
766               if (ia->sw_if_index == sw_if_index)
767                 {
768                   /* re-adding an address during the replace action.
769                    * consdier this the update. clear the flag and
770                    * we're done */
771                   ia->flags &= ~IP_INTERFACE_ADDRESS_FLAG_STALE;
772                   goto done;
773                 }
774               else
775                 {
776                   /* The prefix is moving from one interface to another.
777                    * delete the stale and add the new */
778                   ip4_add_del_interface_address_internal (vm,
779                                                           ia->sw_if_index,
780                                                           address,
781                                                           address_length, 1);
782                   ia = NULL;
783                   error = ip_interface_address_add (lm, sw_if_index,
784                                                     addr_fib, address_length,
785                                                     &if_address_index);
786                 }
787             }
788           else
789             {
790               vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
791               error = clib_error_create
792                 ("Prefix %U already found on interface %U",
793                  lm->format_address_and_length, addr_fib, address_length,
794                  format_vnet_sw_if_index_name, vnm, ia->sw_if_index);
795             }
796         }
797       else
798         error = ip_interface_address_add (lm, sw_if_index,
799                                           addr_fib, address_length,
800                                           &if_address_index);
801     }
802
803   if (error)
804     goto done;
805
806   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
807   ip4_mfib_interface_enable_disable (sw_if_index, !is_del);
808
809   /* intf addr routes are added/deleted on admin up/down */
810   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index))
811     {
812       if (is_del)
813         ip4_del_interface_routes (sw_if_index,
814                                   im, ip4_af.fib_index, address,
815                                   address_length);
816       else
817         ip4_add_interface_routes (sw_if_index,
818                                   im, ip4_af.fib_index,
819                                   pool_elt_at_index
820                                   (lm->if_address_pool, if_address_index));
821     }
822
823   ip4_add_del_interface_address_callback_t *cb;
824   vec_foreach (cb, im->add_del_interface_address_callbacks)
825     cb->function (im, cb->function_opaque, sw_if_index,
826                   address, address_length, if_address_index, is_del);
827
828 done:
829   vec_free (addr_fib);
830   return error;
831 }
832
833 clib_error_t *
834 ip4_add_del_interface_address (vlib_main_t * vm,
835                                u32 sw_if_index,
836                                ip4_address_t * address,
837                                u32 address_length, u32 is_del)
838 {
839   return ip4_add_del_interface_address_internal
840     (vm, sw_if_index, address, address_length, is_del);
841 }
842
843 void
844 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
845 {
846   ip_interface_address_t *ia;
847   ip4_main_t *im;
848
849   im = &ip4_main;
850
851   /*
852    * when directed broadcast is enabled, the subnet braodcast route will forward
853    * packets using an adjacency with a broadcast MAC. otherwise it drops
854    */
855   /* *INDENT-OFF* */
856   foreach_ip_interface_address(&im->lookup_main, ia,
857                                sw_if_index, 0,
858      ({
859        if (ia->address_length <= 30)
860          {
861            ip4_address_t *ipa;
862
863            ipa = ip_interface_address_get_address (&im->lookup_main, ia);
864
865            fib_prefix_t pfx = {
866              .fp_len = 32,
867              .fp_proto = FIB_PROTOCOL_IP4,
868              .fp_addr = {
869                .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
870              },
871            };
872
873            ip4_add_subnet_bcast_route
874              (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
875                                                   sw_if_index),
876               &pfx, sw_if_index);
877          }
878      }));
879   /* *INDENT-ON* */
880 }
881 #endif
882
883 static clib_error_t *
884 ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
885 {
886   ip4_main_t *im = &ip4_main;
887   ip_interface_address_t *ia;
888   ip4_address_t *a;
889   u32 is_admin_up, fib_index;
890
891   vec_validate_init_empty (im->
892                            lookup_main.if_address_pool_index_by_sw_if_index,
893                            sw_if_index, ~0);
894
895   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
896
897   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
898
899   /* *INDENT-OFF* */
900   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
901                                 0 /* honor unnumbered */,
902   ({
903     a = ip_interface_address_get_address (&im->lookup_main, ia);
904     if (is_admin_up)
905       ip4_add_interface_routes (sw_if_index,
906                                 im, fib_index,
907                                 ia);
908     else
909       ip4_del_interface_routes (sw_if_index,
910                                 im, fib_index,
911                                 a, ia->address_length);
912   }));
913   /* *INDENT-ON* */
914
915   return 0;
916 }
917
918 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
919
920 /* Built-in ip4 unicast rx feature path definition */
921 /* *INDENT-OFF* */
922 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
923 {
924   .arc_name = "ip4-unicast",
925   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
926   .last_in_arc = "ip4-lookup",
927   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
928 };
929
930 VNET_FEATURE_INIT (ip4_flow_classify, static) =
931 {
932   .arc_name = "ip4-unicast",
933   .node_name = "ip4-flow-classify",
934   .runs_before = VNET_FEATURES ("ip4-inacl"),
935 };
936
937 VNET_FEATURE_INIT (ip4_inacl, static) =
938 {
939   .arc_name = "ip4-unicast",
940   .node_name = "ip4-inacl",
941   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
942 };
943
944 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
945 {
946   .arc_name = "ip4-unicast",
947   .node_name = "ip4-source-and-port-range-check-rx",
948   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
949 };
950
951 VNET_FEATURE_INIT (ip4_policer_classify, static) =
952 {
953   .arc_name = "ip4-unicast",
954   .node_name = "ip4-policer-classify",
955   .runs_before = VNET_FEATURES ("ipsec4-input-feature"),
956 };
957
958 VNET_FEATURE_INIT (ip4_ipsec, static) =
959 {
960   .arc_name = "ip4-unicast",
961   .node_name = "ipsec4-input-feature",
962   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
963 };
964
965 VNET_FEATURE_INIT (ip4_vpath, static) =
966 {
967   .arc_name = "ip4-unicast",
968   .node_name = "vpath-input-ip4",
969   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
970 };
971
972 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
973 {
974   .arc_name = "ip4-unicast",
975   .node_name = "ip4-vxlan-bypass",
976   .runs_before = VNET_FEATURES ("ip4-lookup"),
977 };
978
979 VNET_FEATURE_INIT (ip4_not_enabled, static) =
980 {
981   .arc_name = "ip4-unicast",
982   .node_name = "ip4-not-enabled",
983   .runs_before = VNET_FEATURES ("ip4-lookup"),
984 };
985
986 VNET_FEATURE_INIT (ip4_lookup, static) =
987 {
988   .arc_name = "ip4-unicast",
989   .node_name = "ip4-lookup",
990   .runs_before = 0,     /* not before any other features */
991 };
992
993 /* Built-in ip4 multicast rx feature path definition */
994 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
995 {
996   .arc_name = "ip4-multicast",
997   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
998   .last_in_arc = "ip4-mfib-forward-lookup",
999   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
1000 };
1001
1002 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
1003 {
1004   .arc_name = "ip4-multicast",
1005   .node_name = "vpath-input-ip4",
1006   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1007 };
1008
1009 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
1010 {
1011   .arc_name = "ip4-multicast",
1012   .node_name = "ip4-not-enabled",
1013   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1014 };
1015
1016 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
1017 {
1018   .arc_name = "ip4-multicast",
1019   .node_name = "ip4-mfib-forward-lookup",
1020   .runs_before = 0,     /* last feature */
1021 };
1022
1023 /* Source and port-range check ip4 tx feature path definition */
1024 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1025 {
1026   .arc_name = "ip4-output",
1027   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
1028   .last_in_arc = "interface-output",
1029   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1030 };
1031
1032 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1033 {
1034   .arc_name = "ip4-output",
1035   .node_name = "ip4-source-and-port-range-check-tx",
1036   .runs_before = VNET_FEATURES ("ip4-outacl"),
1037 };
1038
1039 VNET_FEATURE_INIT (ip4_outacl, static) =
1040 {
1041   .arc_name = "ip4-output",
1042   .node_name = "ip4-outacl",
1043   .runs_before = VNET_FEATURES ("ipsec4-output-feature"),
1044 };
1045
1046 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1047 {
1048   .arc_name = "ip4-output",
1049   .node_name = "ipsec4-output-feature",
1050   .runs_before = VNET_FEATURES ("interface-output"),
1051 };
1052
1053 /* Built-in ip4 tx feature path definition */
1054 VNET_FEATURE_INIT (ip4_interface_output, static) =
1055 {
1056   .arc_name = "ip4-output",
1057   .node_name = "interface-output",
1058   .runs_before = 0,     /* not before any other features */
1059 };
1060 /* *INDENT-ON* */
1061
1062 static clib_error_t *
1063 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1064 {
1065   ip4_main_t *im = &ip4_main;
1066
1067   vec_validate_init_empty (im->fib_index_by_sw_if_index, sw_if_index, ~0);
1068   vec_validate_init_empty (im->mfib_index_by_sw_if_index, sw_if_index, ~0);
1069
1070   if (is_add)
1071     {
1072       /* Fill in lookup tables with default table (0). */
1073       im->fib_index_by_sw_if_index[sw_if_index] = 0;
1074       im->mfib_index_by_sw_if_index[sw_if_index] = 0;
1075     }
1076   else
1077     {
1078       ip4_main_t *im4 = &ip4_main;
1079       ip_lookup_main_t *lm4 = &im4->lookup_main;
1080       ip_interface_address_t *ia = 0;
1081       ip4_address_t *address;
1082       vlib_main_t *vm = vlib_get_main ();
1083
1084       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
1085       /* *INDENT-OFF* */
1086       foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
1087       ({
1088         address = ip_interface_address_get_address (lm4, ia);
1089         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
1090       }));
1091       /* *INDENT-ON* */
1092       ip4_mfib_interface_enable_disable (sw_if_index, 0);
1093     }
1094
1095   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
1096                                is_add, 0, 0);
1097
1098   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
1099                                sw_if_index, is_add, 0, 0);
1100
1101   return /* no error */ 0;
1102 }
1103
1104 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1105
1106 /* Global IP4 main. */
1107 #ifndef CLIB_MARCH_VARIANT
1108 ip4_main_t ip4_main;
1109 #endif /* CLIB_MARCH_VARIANT */
1110
1111 static clib_error_t *
1112 ip4_lookup_init (vlib_main_t * vm)
1113 {
1114   ip4_main_t *im = &ip4_main;
1115   clib_error_t *error;
1116   uword i;
1117
1118   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1119     return error;
1120   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
1121     return (error);
1122   if ((error = vlib_call_init_function (vm, fib_module_init)))
1123     return error;
1124   if ((error = vlib_call_init_function (vm, mfib_module_init)))
1125     return error;
1126
1127   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1128     {
1129       u32 m;
1130
1131       if (i < 32)
1132         m = pow2_mask (i) << (32 - i);
1133       else
1134         m = ~0;
1135       im->fib_masks[i] = clib_host_to_net_u32 (m);
1136     }
1137
1138   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1139
1140   /* Create FIB with index 0 and table id of 0. */
1141   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1142                                      FIB_SOURCE_DEFAULT_ROUTE);
1143   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1144                                       MFIB_SOURCE_DEFAULT_ROUTE);
1145
1146   {
1147     pg_node_t *pn;
1148     pn = pg_get_node (ip4_lookup_node.index);
1149     pn->unformat_edit = unformat_pg_ip4_header;
1150   }
1151
1152   {
1153     ethernet_arp_header_t h;
1154
1155     clib_memset (&h, 0, sizeof (h));
1156
1157 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1158 #define _8(f,v) h.f = v;
1159     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1160     _16 (l3_type, ETHERNET_TYPE_IP4);
1161     _8 (n_l2_address_bytes, 6);
1162     _8 (n_l3_address_bytes, 4);
1163     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1164 #undef _16
1165 #undef _8
1166
1167     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1168                                /* data */ &h,
1169                                sizeof (h),
1170                                /* alloc chunk size */ 8,
1171                                "ip4 arp");
1172   }
1173
1174   return error;
1175 }
1176
1177 VLIB_INIT_FUNCTION (ip4_lookup_init);
1178
1179 typedef struct
1180 {
1181   /* Adjacency taken. */
1182   u32 dpo_index;
1183   u32 flow_hash;
1184   u32 fib_index;
1185
1186   /* Packet data, possibly *after* rewrite. */
1187   u8 packet_data[64 - 1 * sizeof (u32)];
1188 }
1189 ip4_forward_next_trace_t;
1190
1191 #ifndef CLIB_MARCH_VARIANT
1192 u8 *
1193 format_ip4_forward_next_trace (u8 * s, va_list * args)
1194 {
1195   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1196   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1197   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1198   u32 indent = format_get_indent (s);
1199   s = format (s, "%U%U",
1200               format_white_space, indent,
1201               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1202   return s;
1203 }
1204 #endif
1205
1206 static u8 *
1207 format_ip4_lookup_trace (u8 * s, va_list * args)
1208 {
1209   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1210   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1211   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1212   u32 indent = format_get_indent (s);
1213
1214   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1215               t->fib_index, t->dpo_index, t->flow_hash);
1216   s = format (s, "\n%U%U",
1217               format_white_space, indent,
1218               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1219   return s;
1220 }
1221
1222 static u8 *
1223 format_ip4_rewrite_trace (u8 * s, va_list * args)
1224 {
1225   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1226   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1227   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1228   u32 indent = format_get_indent (s);
1229
1230   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1231               t->fib_index, t->dpo_index, format_ip_adjacency,
1232               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1233   s = format (s, "\n%U%U",
1234               format_white_space, indent,
1235               format_ip_adjacency_packet_data,
1236               t->packet_data, sizeof (t->packet_data));
1237   return s;
1238 }
1239
1240 #ifndef CLIB_MARCH_VARIANT
1241 /* Common trace function for all ip4-forward next nodes. */
1242 void
1243 ip4_forward_next_trace (vlib_main_t * vm,
1244                         vlib_node_runtime_t * node,
1245                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1246 {
1247   u32 *from, n_left;
1248   ip4_main_t *im = &ip4_main;
1249
1250   n_left = frame->n_vectors;
1251   from = vlib_frame_vector_args (frame);
1252
1253   while (n_left >= 4)
1254     {
1255       u32 bi0, bi1;
1256       vlib_buffer_t *b0, *b1;
1257       ip4_forward_next_trace_t *t0, *t1;
1258
1259       /* Prefetch next iteration. */
1260       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1261       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1262
1263       bi0 = from[0];
1264       bi1 = from[1];
1265
1266       b0 = vlib_get_buffer (vm, bi0);
1267       b1 = vlib_get_buffer (vm, bi1);
1268
1269       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1270         {
1271           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1272           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1273           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1274           t0->fib_index =
1275             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1276              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1277             vec_elt (im->fib_index_by_sw_if_index,
1278                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1279
1280           clib_memcpy_fast (t0->packet_data,
1281                             vlib_buffer_get_current (b0),
1282                             sizeof (t0->packet_data));
1283         }
1284       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1285         {
1286           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1287           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1288           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1289           t1->fib_index =
1290             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1291              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1292             vec_elt (im->fib_index_by_sw_if_index,
1293                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1294           clib_memcpy_fast (t1->packet_data, vlib_buffer_get_current (b1),
1295                             sizeof (t1->packet_data));
1296         }
1297       from += 2;
1298       n_left -= 2;
1299     }
1300
1301   while (n_left >= 1)
1302     {
1303       u32 bi0;
1304       vlib_buffer_t *b0;
1305       ip4_forward_next_trace_t *t0;
1306
1307       bi0 = from[0];
1308
1309       b0 = vlib_get_buffer (vm, bi0);
1310
1311       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1312         {
1313           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1314           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1315           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1316           t0->fib_index =
1317             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1318              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1319             vec_elt (im->fib_index_by_sw_if_index,
1320                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1321           clib_memcpy_fast (t0->packet_data, vlib_buffer_get_current (b0),
1322                             sizeof (t0->packet_data));
1323         }
1324       from += 1;
1325       n_left -= 1;
1326     }
1327 }
1328
1329 /* Compute TCP/UDP/ICMP4 checksum in software. */
1330 u16
1331 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1332                               ip4_header_t * ip0)
1333 {
1334   ip_csum_t sum0;
1335   u32 ip_header_length, payload_length_host_byte_order;
1336
1337   /* Initialize checksum with ip header. */
1338   ip_header_length = ip4_header_bytes (ip0);
1339   payload_length_host_byte_order =
1340     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1341   sum0 =
1342     clib_host_to_net_u32 (payload_length_host_byte_order +
1343                           (ip0->protocol << 16));
1344
1345   if (BITS (uword) == 32)
1346     {
1347       sum0 =
1348         ip_csum_with_carry (sum0,
1349                             clib_mem_unaligned (&ip0->src_address, u32));
1350       sum0 =
1351         ip_csum_with_carry (sum0,
1352                             clib_mem_unaligned (&ip0->dst_address, u32));
1353     }
1354   else
1355     sum0 =
1356       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1357
1358   return ip_calculate_l4_checksum (vm, p0, sum0,
1359                                    payload_length_host_byte_order, (u8 *) ip0,
1360                                    ip_header_length, NULL);
1361 }
1362
1363 u32
1364 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1365 {
1366   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1367   udp_header_t *udp0;
1368   u16 sum16;
1369
1370   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1371           || ip0->protocol == IP_PROTOCOL_UDP);
1372
1373   udp0 = (void *) (ip0 + 1);
1374   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1375     {
1376       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1377                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1378       return p0->flags;
1379     }
1380
1381   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1382
1383   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1384                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1385
1386   return p0->flags;
1387 }
1388 #endif
1389
1390 /* *INDENT-OFF* */
1391 VNET_FEATURE_ARC_INIT (ip4_local) = {
1392   .arc_name = "ip4-local",
1393   .start_nodes = VNET_FEATURES ("ip4-local", "ip4-receive"),
1394   .last_in_arc = "ip4-local-end-of-arc",
1395 };
1396 /* *INDENT-ON* */
1397
1398 static inline void
1399 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1400                             ip4_header_t * ip, u8 is_udp, u8 * error,
1401                             u8 * good_tcp_udp)
1402 {
1403   u32 flags0;
1404   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1405   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1406   if (is_udp)
1407     {
1408       udp_header_t *udp;
1409       u32 ip_len, udp_len;
1410       i32 len_diff;
1411       udp = ip4_next_header (ip);
1412       /* Verify UDP length. */
1413       ip_len = clib_net_to_host_u16 (ip->length);
1414       udp_len = clib_net_to_host_u16 (udp->length);
1415
1416       len_diff = ip_len - udp_len;
1417       *good_tcp_udp &= len_diff >= 0;
1418       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1419     }
1420 }
1421
1422 #define ip4_local_csum_is_offloaded(_b)                                       \
1423   ((_b->flags & VNET_BUFFER_F_OFFLOAD) &&                                     \
1424    (vnet_buffer (_b)->oflags &                                                \
1425     (VNET_BUFFER_OFFLOAD_F_TCP_CKSUM | VNET_BUFFER_OFFLOAD_F_UDP_CKSUM)))
1426
1427 #define ip4_local_need_csum_check(is_tcp_udp, _b)                       \
1428     (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED     \
1429         || ip4_local_csum_is_offloaded (_b)))
1430
1431 #define ip4_local_csum_is_valid(_b)                                     \
1432     (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT                      \
1433         || (ip4_local_csum_is_offloaded (_b))) != 0
1434
1435 static inline void
1436 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1437                          ip4_header_t * ih, u8 * error)
1438 {
1439   u8 is_udp, is_tcp_udp, good_tcp_udp;
1440
1441   is_udp = ih->protocol == IP_PROTOCOL_UDP;
1442   is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1443
1444   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1445     ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1446   else
1447     good_tcp_udp = ip4_local_csum_is_valid (b);
1448
1449   ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1450   *error = (is_tcp_udp && !good_tcp_udp
1451             ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1452 }
1453
1454 static inline void
1455 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1456                             ip4_header_t ** ih, u8 * error)
1457 {
1458   u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1459
1460   is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1461   is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1462
1463   is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1464   is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1465
1466   good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1467   good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1468
1469   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1470                      || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1471     {
1472       if (is_tcp_udp[0])
1473         ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1474                                     &good_tcp_udp[0]);
1475       if (is_tcp_udp[1])
1476         ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1477                                     &good_tcp_udp[1]);
1478     }
1479
1480   error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1481               IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1482   error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1483               IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1484 }
1485
1486 static inline void
1487 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1488                               vlib_buffer_t * b, u16 * next, u8 error,
1489                               u8 head_of_feature_arc)
1490 {
1491   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1492   u32 next_index;
1493
1494   *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1495   b->error = error ? error_node->errors[error] : 0;
1496   if (head_of_feature_arc)
1497     {
1498       next_index = *next;
1499       if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1500         {
1501           vnet_feature_arc_start (arc_index,
1502                                   vnet_buffer (b)->sw_if_index[VLIB_RX],
1503                                   &next_index, b);
1504           *next = next_index;
1505         }
1506     }
1507 }
1508
1509 typedef struct
1510 {
1511   ip4_address_t src;
1512   u32 lbi;
1513   u8 error;
1514   u8 first;
1515 } ip4_local_last_check_t;
1516
1517 static inline void
1518 ip4_local_check_src (vlib_buffer_t *b, ip4_header_t *ip0,
1519                      ip4_local_last_check_t *last_check, u8 *error0,
1520                      int is_receive_dpo)
1521 {
1522   const dpo_id_t *dpo0;
1523   load_balance_t *lb0;
1524   u32 lbi0;
1525
1526   vnet_buffer (b)->ip.fib_index =
1527     vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1528     vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1529
1530   if (is_receive_dpo)
1531     {
1532       receive_dpo_t *rd;
1533       rd = receive_dpo_get (vnet_buffer (b)->ip.adj_index[VLIB_TX]);
1534       vnet_buffer (b)->ip.rx_sw_if_index = rd->rd_sw_if_index;
1535     }
1536   else
1537     vnet_buffer (b)->ip.rx_sw_if_index = ~0;
1538
1539   /*
1540    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1541    *  adjacency for the destination address (the local interface address).
1542    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1543    *  adjacency for the source address (the remote sender's address)
1544    */
1545   if (PREDICT_TRUE (last_check->src.as_u32 != ip0->src_address.as_u32) ||
1546       last_check->first)
1547     {
1548       lbi0 = ip4_fib_forwarding_lookup (vnet_buffer (b)->ip.fib_index,
1549                                         &ip0->src_address);
1550
1551       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1552         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1553       vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1554
1555       lb0 = load_balance_get (lbi0);
1556       dpo0 = load_balance_get_bucket_i (lb0, 0);
1557
1558       /*
1559        * Must have a route to source otherwise we drop the packet.
1560        * ip4 broadcasts are accepted, e.g. to make dhcp client work
1561        *
1562        * The checks are:
1563        *  - the source is a recieve => it's from us => bogus, do this
1564        *    first since it sets a different error code.
1565        *  - uRPF check for any route to source - accept if passes.
1566        *  - allow packets destined to the broadcast address from unknown sources
1567        */
1568
1569       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1570                   && dpo0->dpoi_type == DPO_RECEIVE) ?
1571                  IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1572       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1573                   && !fib_urpf_check_size (lb0->lb_urpf)
1574                   && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1575                  IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1576
1577       last_check->src.as_u32 = ip0->src_address.as_u32;
1578       last_check->lbi = lbi0;
1579       last_check->error = *error0;
1580       last_check->first = 0;
1581     }
1582   else
1583     {
1584       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1585         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1586       vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1587       *error0 = last_check->error;
1588     }
1589 }
1590
1591 static inline void
1592 ip4_local_check_src_x2 (vlib_buffer_t **b, ip4_header_t **ip,
1593                         ip4_local_last_check_t *last_check, u8 *error,
1594                         int is_receive_dpo)
1595 {
1596   const dpo_id_t *dpo[2];
1597   load_balance_t *lb[2];
1598   u32 not_last_hit;
1599   u32 lbi[2];
1600
1601   not_last_hit = last_check->first;
1602   not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1603   not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1604
1605   vnet_buffer (b[0])->ip.fib_index =
1606     vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1607     vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1608     vnet_buffer (b[0])->ip.fib_index;
1609
1610   vnet_buffer (b[1])->ip.fib_index =
1611     vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1612     vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1613     vnet_buffer (b[1])->ip.fib_index;
1614
1615   if (is_receive_dpo)
1616     {
1617       const receive_dpo_t *rd0, *rd1;
1618       rd0 = receive_dpo_get (vnet_buffer (b[0])->ip.adj_index[VLIB_TX]);
1619       rd1 = receive_dpo_get (vnet_buffer (b[1])->ip.adj_index[VLIB_TX]);
1620       vnet_buffer (b[0])->ip.rx_sw_if_index = rd0->rd_sw_if_index;
1621       vnet_buffer (b[1])->ip.rx_sw_if_index = rd1->rd_sw_if_index;
1622     }
1623   else
1624     {
1625       vnet_buffer (b[0])->ip.rx_sw_if_index = ~0;
1626       vnet_buffer (b[1])->ip.rx_sw_if_index = ~0;
1627     }
1628
1629   /*
1630    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1631    *  adjacency for the destination address (the local interface address).
1632    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1633    *  adjacency for the source address (the remote sender's address)
1634    */
1635   if (PREDICT_TRUE (not_last_hit))
1636     {
1637       ip4_fib_forwarding_lookup_x2 (
1638         vnet_buffer (b[0])->ip.fib_index, vnet_buffer (b[1])->ip.fib_index,
1639         &ip[0]->src_address, &ip[1]->src_address, &lbi[0], &lbi[1]);
1640
1641       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1642         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1643       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1644
1645       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1646         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1647       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1648
1649       lb[0] = load_balance_get (lbi[0]);
1650       lb[1] = load_balance_get (lbi[1]);
1651
1652       dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1653       dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1654
1655       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1656                    dpo[0]->dpoi_type == DPO_RECEIVE) ?
1657                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1658       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1659                    !fib_urpf_check_size (lb[0]->lb_urpf) &&
1660                    ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1661                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1662
1663       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1664                    dpo[1]->dpoi_type == DPO_RECEIVE) ?
1665                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1666       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1667                    !fib_urpf_check_size (lb[1]->lb_urpf) &&
1668                    ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1669                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1670
1671       last_check->src.as_u32 = ip[1]->src_address.as_u32;
1672       last_check->lbi = lbi[1];
1673       last_check->error = error[1];
1674       last_check->first = 0;
1675     }
1676   else
1677     {
1678       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1679         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1680       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1681
1682       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1683         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1684       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1685
1686       error[0] = last_check->error;
1687       error[1] = last_check->error;
1688     }
1689 }
1690
1691 enum ip_local_packet_type_e
1692 {
1693   IP_LOCAL_PACKET_TYPE_L4,
1694   IP_LOCAL_PACKET_TYPE_NAT,
1695   IP_LOCAL_PACKET_TYPE_FRAG,
1696 };
1697
1698 /**
1699  * Determine packet type and next node.
1700  *
1701  * The expectation is that all packets that are not L4 will skip
1702  * checksums and source checks.
1703  */
1704 always_inline u8
1705 ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
1706 {
1707   ip_lookup_main_t *lm = &ip4_main.lookup_main;
1708
1709   if (PREDICT_FALSE (ip4_is_fragment (ip)))
1710     {
1711       *next = IP_LOCAL_NEXT_REASSEMBLY;
1712       return IP_LOCAL_PACKET_TYPE_FRAG;
1713     }
1714   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_IS_NATED))
1715     {
1716       *next = lm->local_next_by_ip_protocol[ip->protocol];
1717       return IP_LOCAL_PACKET_TYPE_NAT;
1718     }
1719
1720   *next = lm->local_next_by_ip_protocol[ip->protocol];
1721   return IP_LOCAL_PACKET_TYPE_L4;
1722 }
1723
1724 static inline uword
1725 ip4_local_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
1726                   vlib_frame_t *frame, int head_of_feature_arc,
1727                   int is_receive_dpo)
1728 {
1729   u32 *from, n_left_from;
1730   vlib_node_runtime_t *error_node =
1731     vlib_node_get_runtime (vm, ip4_local_node.index);
1732   u16 nexts[VLIB_FRAME_SIZE], *next;
1733   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1734   ip4_header_t *ip[2];
1735   u8 error[2], pt[2];
1736
1737   ip4_local_last_check_t last_check = {
1738     /*
1739      * 0.0.0.0 can appear as the source address of an IP packet,
1740      * as can any other address, hence the need to use the 'first'
1741      * member to make sure the .lbi is initialised for the first
1742      * packet.
1743      */
1744     .src = {.as_u32 = 0},
1745     .lbi = ~0,
1746     .error = IP4_ERROR_UNKNOWN_PROTOCOL,
1747     .first = 1,
1748   };
1749
1750   from = vlib_frame_vector_args (frame);
1751   n_left_from = frame->n_vectors;
1752
1753   if (node->flags & VLIB_NODE_FLAG_TRACE)
1754     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1755
1756   vlib_get_buffers (vm, from, bufs, n_left_from);
1757   b = bufs;
1758   next = nexts;
1759
1760   while (n_left_from >= 6)
1761     {
1762       u8 not_batch = 0;
1763
1764       /* Prefetch next iteration. */
1765       {
1766         vlib_prefetch_buffer_header (b[4], LOAD);
1767         vlib_prefetch_buffer_header (b[5], LOAD);
1768
1769         clib_prefetch_load (b[4]->data);
1770         clib_prefetch_load (b[5]->data);
1771       }
1772
1773       error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1774
1775       ip[0] = vlib_buffer_get_current (b[0]);
1776       ip[1] = vlib_buffer_get_current (b[1]);
1777
1778       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1779       vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1780
1781       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1782       pt[1] = ip4_local_classify (b[1], ip[1], &next[1]);
1783
1784       not_batch = pt[0] ^ pt[1];
1785
1786       if (head_of_feature_arc == 0 || (pt[0] && not_batch == 0))
1787         goto skip_checks;
1788
1789       if (PREDICT_TRUE (not_batch == 0))
1790         {
1791           ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1792           ip4_local_check_src_x2 (b, ip, &last_check, error, is_receive_dpo);
1793         }
1794       else
1795         {
1796           if (!pt[0])
1797             {
1798               ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1799               ip4_local_check_src (b[0], ip[0], &last_check, &error[0],
1800                                    is_receive_dpo);
1801             }
1802           if (!pt[1])
1803             {
1804               ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1805               ip4_local_check_src (b[1], ip[1], &last_check, &error[1],
1806                                    is_receive_dpo);
1807             }
1808         }
1809
1810     skip_checks:
1811
1812       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1813                                     head_of_feature_arc);
1814       ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1815                                     head_of_feature_arc);
1816
1817       b += 2;
1818       next += 2;
1819       n_left_from -= 2;
1820     }
1821
1822   while (n_left_from > 0)
1823     {
1824       error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1825
1826       ip[0] = vlib_buffer_get_current (b[0]);
1827       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1828       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1829
1830       if (head_of_feature_arc == 0 || pt[0])
1831         goto skip_check;
1832
1833       ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1834       ip4_local_check_src (b[0], ip[0], &last_check, &error[0],
1835                            is_receive_dpo);
1836
1837     skip_check:
1838
1839       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1840                                     head_of_feature_arc);
1841
1842       b += 1;
1843       next += 1;
1844       n_left_from -= 1;
1845     }
1846
1847   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1848   return frame->n_vectors;
1849 }
1850
1851 VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1852                                vlib_frame_t * frame)
1853 {
1854   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */,
1855                            0 /* is_receive_dpo */);
1856 }
1857
1858 VLIB_REGISTER_NODE (ip4_local_node) =
1859 {
1860   .name = "ip4-local",
1861   .vector_size = sizeof (u32),
1862   .format_trace = format_ip4_forward_next_trace,
1863   .n_errors = IP4_N_ERROR,
1864   .error_strings = ip4_error_strings,
1865   .n_next_nodes = IP_LOCAL_N_NEXT,
1866   .next_nodes =
1867   {
1868     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1869     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1870     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1871     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1872     [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-full-reassembly",
1873   },
1874 };
1875
1876 VLIB_NODE_FN (ip4_receive_local_node)
1877 (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
1878 {
1879   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */,
1880                            1 /* is_receive_dpo */);
1881 }
1882
1883 VLIB_REGISTER_NODE (ip4_receive_local_node) = {
1884   .name = "ip4-receive",
1885   .vector_size = sizeof (u32),
1886   .format_trace = format_ip4_forward_next_trace,
1887   .sibling_of = "ip4-local"
1888 };
1889
1890 VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
1891                                           vlib_node_runtime_t * node,
1892                                           vlib_frame_t * frame)
1893 {
1894   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */,
1895                            0 /* is_receive_dpo */);
1896 }
1897
1898 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
1899   .name = "ip4-local-end-of-arc",
1900   .vector_size = sizeof (u32),
1901
1902   .format_trace = format_ip4_forward_next_trace,
1903   .sibling_of = "ip4-local",
1904 };
1905
1906 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1907   .arc_name = "ip4-local",
1908   .node_name = "ip4-local-end-of-arc",
1909   .runs_before = 0, /* not before any other features */
1910 };
1911
1912 #ifndef CLIB_MARCH_VARIANT
1913 void
1914 ip4_register_protocol (u32 protocol, u32 node_index)
1915 {
1916   vlib_main_t *vm = vlib_get_main ();
1917   ip4_main_t *im = &ip4_main;
1918   ip_lookup_main_t *lm = &im->lookup_main;
1919
1920   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1921   lm->local_next_by_ip_protocol[protocol] =
1922     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1923 }
1924
1925 void
1926 ip4_unregister_protocol (u32 protocol)
1927 {
1928   ip4_main_t *im = &ip4_main;
1929   ip_lookup_main_t *lm = &im->lookup_main;
1930
1931   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1932   lm->local_next_by_ip_protocol[protocol] = IP_LOCAL_NEXT_PUNT;
1933 }
1934 #endif
1935
1936 static clib_error_t *
1937 show_ip_local_command_fn (vlib_main_t * vm,
1938                           unformat_input_t * input, vlib_cli_command_t * cmd)
1939 {
1940   ip4_main_t *im = &ip4_main;
1941   ip_lookup_main_t *lm = &im->lookup_main;
1942   int i;
1943
1944   vlib_cli_output (vm, "Protocols handled by ip4_local");
1945   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1946     {
1947       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1948         {
1949           u32 node_index = vlib_get_node (vm,
1950                                           ip4_local_node.index)->
1951             next_nodes[lm->local_next_by_ip_protocol[i]];
1952           vlib_cli_output (vm, "%U: %U", format_ip_protocol, i,
1953                            format_vlib_node_name, vm, node_index);
1954         }
1955     }
1956   return 0;
1957 }
1958
1959
1960
1961 /*?
1962  * Display the set of protocols handled by the local IPv4 stack.
1963  *
1964  * @cliexpar
1965  * Example of how to display local protocol table:
1966  * @cliexstart{show ip local}
1967  * Protocols handled by ip4_local
1968  * 1
1969  * 17
1970  * 47
1971  * @cliexend
1972 ?*/
1973 /* *INDENT-OFF* */
1974 VLIB_CLI_COMMAND (show_ip_local, static) =
1975 {
1976   .path = "show ip local",
1977   .function = show_ip_local_command_fn,
1978   .short_help = "show ip local",
1979 };
1980 /* *INDENT-ON* */
1981
1982 typedef enum
1983 {
1984   IP4_REWRITE_NEXT_DROP,
1985   IP4_REWRITE_NEXT_ICMP_ERROR,
1986   IP4_REWRITE_NEXT_FRAGMENT,
1987   IP4_REWRITE_N_NEXT            /* Last */
1988 } ip4_rewrite_next_t;
1989
1990 /**
1991  * This bits of an IPv4 address to mask to construct a multicast
1992  * MAC address
1993  */
1994 #if CLIB_ARCH_IS_BIG_ENDIAN
1995 #define IP4_MCAST_ADDR_MASK 0x007fffff
1996 #else
1997 #define IP4_MCAST_ADDR_MASK 0xffff7f00
1998 #endif
1999
2000 always_inline void
2001 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
2002                u16 adj_packet_bytes, bool df, u16 * next,
2003                u8 is_midchain, u32 * error)
2004 {
2005   if (packet_len > adj_packet_bytes)
2006     {
2007       *error = IP4_ERROR_MTU_EXCEEDED;
2008       if (df)
2009         {
2010           icmp4_error_set_vnet_buffer
2011             (b, ICMP4_destination_unreachable,
2012              ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
2013              adj_packet_bytes);
2014           *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2015         }
2016       else
2017         {
2018           /* IP fragmentation */
2019           ip_frag_set_vnet_buffer (b, adj_packet_bytes,
2020                                    (is_midchain ?
2021                                     IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN :
2022                                     IP_FRAG_NEXT_IP_REWRITE), 0);
2023           *next = IP4_REWRITE_NEXT_FRAGMENT;
2024         }
2025     }
2026 }
2027
2028 /* increment TTL & update checksum.
2029    Works either endian, so no need for byte swap. */
2030 static_always_inline void
2031 ip4_ttl_inc (vlib_buffer_t * b, ip4_header_t * ip)
2032 {
2033   i32 ttl;
2034   u32 checksum;
2035   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2036     return;
2037
2038   ttl = ip->ttl;
2039
2040   checksum = ip->checksum - clib_host_to_net_u16 (0x0100);
2041   checksum += checksum >= 0xffff;
2042
2043   ip->checksum = checksum;
2044   ttl += 1;
2045   ip->ttl = ttl;
2046
2047   ASSERT (ip4_header_checksum_is_valid (ip));
2048 }
2049
2050 /* Decrement TTL & update checksum.
2051    Works either endian, so no need for byte swap. */
2052 static_always_inline void
2053 ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
2054                             u32 * error)
2055 {
2056   i32 ttl;
2057   u32 checksum;
2058   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2059     return;
2060
2061   ttl = ip->ttl;
2062
2063   /* Input node should have reject packets with ttl 0. */
2064   ASSERT (ip->ttl > 0);
2065
2066   checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
2067   checksum += checksum >= 0xffff;
2068
2069   ip->checksum = checksum;
2070   ttl -= 1;
2071   ip->ttl = ttl;
2072
2073   /*
2074    * If the ttl drops below 1 when forwarding, generate
2075    * an ICMP response.
2076    */
2077   if (PREDICT_FALSE (ttl <= 0))
2078     {
2079       *error = IP4_ERROR_TIME_EXPIRED;
2080       vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2081       icmp4_error_set_vnet_buffer (b, ICMP4_time_exceeded,
2082                                    ICMP4_time_exceeded_ttl_exceeded_in_transit,
2083                                    0);
2084       *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2085     }
2086
2087   /* Verify checksum. */
2088   ASSERT (ip4_header_checksum_is_valid (ip) ||
2089           (vnet_buffer (b)->oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM) ||
2090           (vnet_buffer (b)->oflags & VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM));
2091 }
2092
2093 always_inline uword
2094 ip4_rewrite_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
2095                     vlib_frame_t *frame, int do_counters, int is_midchain,
2096                     int is_mcast)
2097 {
2098   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2099   u32 *from = vlib_frame_vector_args (frame);
2100   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2101   u16 nexts[VLIB_FRAME_SIZE], *next;
2102   u32 n_left_from;
2103   vlib_node_runtime_t *error_node =
2104     vlib_node_get_runtime (vm, ip4_input_node.index);
2105
2106   n_left_from = frame->n_vectors;
2107   u32 thread_index = vm->thread_index;
2108
2109   vlib_get_buffers (vm, from, bufs, n_left_from);
2110   clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
2111
2112 #if (CLIB_N_PREFETCHES >= 8)
2113   if (n_left_from >= 6)
2114     {
2115       int i;
2116       for (i = 2; i < 6; i++)
2117         vlib_prefetch_buffer_header (bufs[i], LOAD);
2118     }
2119
2120   next = nexts;
2121   b = bufs;
2122   while (n_left_from >= 8)
2123     {
2124       const ip_adjacency_t *adj0, *adj1;
2125       ip4_header_t *ip0, *ip1;
2126       u32 rw_len0, error0, adj_index0;
2127       u32 rw_len1, error1, adj_index1;
2128       u32 tx_sw_if_index0, tx_sw_if_index1;
2129       u8 *p;
2130
2131       if (is_midchain)
2132         {
2133           vlib_prefetch_buffer_header (b[6], LOAD);
2134           vlib_prefetch_buffer_header (b[7], LOAD);
2135         }
2136
2137       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2138       adj_index1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
2139
2140       /*
2141        * pre-fetch the per-adjacency counters
2142        */
2143       if (do_counters)
2144         {
2145           vlib_prefetch_combined_counter (&adjacency_counters,
2146                                           thread_index, adj_index0);
2147           vlib_prefetch_combined_counter (&adjacency_counters,
2148                                           thread_index, adj_index1);
2149         }
2150
2151       ip0 = vlib_buffer_get_current (b[0]);
2152       ip1 = vlib_buffer_get_current (b[1]);
2153
2154       error0 = error1 = IP4_ERROR_NONE;
2155
2156       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2157       ip4_ttl_and_checksum_check (b[1], ip1, next + 1, &error1);
2158
2159       /* Rewrite packet header and updates lengths. */
2160       adj0 = adj_get (adj_index0);
2161       adj1 = adj_get (adj_index1);
2162
2163       /* Worth pipelining. No guarantee that adj0,1 are hot... */
2164       rw_len0 = adj0[0].rewrite_header.data_bytes;
2165       rw_len1 = adj1[0].rewrite_header.data_bytes;
2166       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2167       vnet_buffer (b[1])->ip.save_rewrite_length = rw_len1;
2168
2169       p = vlib_buffer_get_current (b[2]);
2170       clib_prefetch_store (p - CLIB_CACHE_LINE_BYTES);
2171       clib_prefetch_load (p);
2172
2173       p = vlib_buffer_get_current (b[3]);
2174       clib_prefetch_store (p - CLIB_CACHE_LINE_BYTES);
2175       clib_prefetch_load (p);
2176
2177       /* Check MTU of outgoing interface. */
2178       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2179       u16 ip1_len = clib_net_to_host_u16 (ip1->length);
2180
2181       if (b[0]->flags & VNET_BUFFER_F_GSO)
2182         ip0_len = gso_mtu_sz (b[0]);
2183       if (b[1]->flags & VNET_BUFFER_F_GSO)
2184         ip1_len = gso_mtu_sz (b[1]);
2185
2186       ip4_mtu_check (b[0], ip0_len,
2187                      adj0[0].rewrite_header.max_l3_packet_bytes,
2188                      ip0->flags_and_fragment_offset &
2189                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2190                      next + 0, is_midchain, &error0);
2191       ip4_mtu_check (b[1], ip1_len,
2192                      adj1[0].rewrite_header.max_l3_packet_bytes,
2193                      ip1->flags_and_fragment_offset &
2194                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2195                      next + 1, is_midchain, &error1);
2196
2197       if (is_mcast)
2198         {
2199           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2200                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2201                     IP4_ERROR_SAME_INTERFACE : error0);
2202           error1 = ((adj1[0].rewrite_header.sw_if_index ==
2203                      vnet_buffer (b[1])->sw_if_index[VLIB_RX]) ?
2204                     IP4_ERROR_SAME_INTERFACE : error1);
2205         }
2206
2207       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2208        * to see the IP header */
2209       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2210         {
2211           u32 next_index = adj0[0].rewrite_header.next_index;
2212           vlib_buffer_advance (b[0], -(word) rw_len0);
2213
2214           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2215           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2216
2217           if (PREDICT_FALSE
2218               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2219             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2220                                                 tx_sw_if_index0,
2221                                                 &next_index, b[0],
2222                                                 adj0->ia_cfg_index);
2223
2224           next[0] = next_index;
2225           if (is_midchain)
2226             vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2227                                         0 /* is_ip6 */ );
2228         }
2229       else
2230         {
2231           b[0]->error = error_node->errors[error0];
2232           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2233             ip4_ttl_inc (b[0], ip0);
2234         }
2235       if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2236         {
2237           u32 next_index = adj1[0].rewrite_header.next_index;
2238           vlib_buffer_advance (b[1], -(word) rw_len1);
2239
2240           tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2241           vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2242
2243           if (PREDICT_FALSE
2244               (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2245             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2246                                                 tx_sw_if_index1,
2247                                                 &next_index, b[1],
2248                                                 adj1->ia_cfg_index);
2249           next[1] = next_index;
2250           if (is_midchain)
2251             vnet_calc_checksums_inline (vm, b[1], 1 /* is_ip4 */ ,
2252                                         0 /* is_ip6 */ );
2253         }
2254       else
2255         {
2256           b[1]->error = error_node->errors[error1];
2257           if (error1 == IP4_ERROR_MTU_EXCEEDED)
2258             ip4_ttl_inc (b[1], ip1);
2259         }
2260
2261       if (is_midchain)
2262         /* Guess we are only writing on ipv4 header. */
2263         vnet_rewrite_two_headers (adj0[0], adj1[0],
2264                                   ip0, ip1, sizeof (ip4_header_t));
2265       else
2266         /* Guess we are only writing on simple Ethernet header. */
2267         vnet_rewrite_two_headers (adj0[0], adj1[0],
2268                                   ip0, ip1, sizeof (ethernet_header_t));
2269
2270       if (do_counters)
2271         {
2272           if (error0 == IP4_ERROR_NONE)
2273             vlib_increment_combined_counter
2274               (&adjacency_counters,
2275                thread_index,
2276                adj_index0, 1,
2277                vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2278
2279           if (error1 == IP4_ERROR_NONE)
2280             vlib_increment_combined_counter
2281               (&adjacency_counters,
2282                thread_index,
2283                adj_index1, 1,
2284                vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
2285         }
2286
2287       if (is_midchain)
2288         {
2289           if (error0 == IP4_ERROR_NONE)
2290             adj_midchain_fixup (vm, adj0, b[0], VNET_LINK_IP4);
2291           if (error1 == IP4_ERROR_NONE)
2292             adj_midchain_fixup (vm, adj1, b[1], VNET_LINK_IP4);
2293         }
2294
2295       if (is_mcast)
2296         {
2297           /* copy bytes from the IP address into the MAC rewrite */
2298           if (error0 == IP4_ERROR_NONE)
2299             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2300                                         adj0->rewrite_header.dst_mcast_offset,
2301                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2302           if (error1 == IP4_ERROR_NONE)
2303             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2304                                         adj1->rewrite_header.dst_mcast_offset,
2305                                         &ip1->dst_address.as_u32, (u8 *) ip1);
2306         }
2307
2308       next += 2;
2309       b += 2;
2310       n_left_from -= 2;
2311     }
2312 #elif (CLIB_N_PREFETCHES >= 4)
2313   next = nexts;
2314   b = bufs;
2315   while (n_left_from >= 1)
2316     {
2317       ip_adjacency_t *adj0;
2318       ip4_header_t *ip0;
2319       u32 rw_len0, error0, adj_index0;
2320       u32 tx_sw_if_index0;
2321       u8 *p;
2322
2323       /* Prefetch next iteration */
2324       if (PREDICT_TRUE (n_left_from >= 4))
2325         {
2326           ip_adjacency_t *adj2;
2327           u32 adj_index2;
2328
2329           vlib_prefetch_buffer_header (b[3], LOAD);
2330           vlib_prefetch_buffer_data (b[2], LOAD);
2331
2332           /* Prefetch adj->rewrite_header */
2333           adj_index2 = vnet_buffer (b[2])->ip.adj_index[VLIB_TX];
2334           adj2 = adj_get (adj_index2);
2335           p = (u8 *) adj2;
2336           CLIB_PREFETCH (p + CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES,
2337                          LOAD);
2338         }
2339
2340       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2341
2342       /*
2343        * Prefetch the per-adjacency counters
2344        */
2345       if (do_counters)
2346         {
2347           vlib_prefetch_combined_counter (&adjacency_counters,
2348                                           thread_index, adj_index0);
2349         }
2350
2351       ip0 = vlib_buffer_get_current (b[0]);
2352
2353       error0 = IP4_ERROR_NONE;
2354
2355       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2356
2357       /* Rewrite packet header and updates lengths. */
2358       adj0 = adj_get (adj_index0);
2359
2360       /* Rewrite header was prefetched. */
2361       rw_len0 = adj0[0].rewrite_header.data_bytes;
2362       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2363
2364       /* Check MTU of outgoing interface. */
2365       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2366
2367       if (b[0]->flags & VNET_BUFFER_F_GSO)
2368         ip0_len = gso_mtu_sz (b[0]);
2369
2370       ip4_mtu_check (b[0], ip0_len,
2371                      adj0[0].rewrite_header.max_l3_packet_bytes,
2372                      ip0->flags_and_fragment_offset &
2373                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2374                      next + 0, is_midchain, &error0);
2375
2376       if (is_mcast)
2377         {
2378           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2379                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2380                     IP4_ERROR_SAME_INTERFACE : error0);
2381         }
2382
2383       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2384        * to see the IP header */
2385       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2386         {
2387           u32 next_index = adj0[0].rewrite_header.next_index;
2388           vlib_buffer_advance (b[0], -(word) rw_len0);
2389           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2390           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2391
2392           if (PREDICT_FALSE
2393               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2394             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2395                                                 tx_sw_if_index0,
2396                                                 &next_index, b[0],
2397                                                 adj0->ia_cfg_index);
2398           next[0] = next_index;
2399
2400           if (is_midchain)
2401             {
2402               vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2403                                           0 /* is_ip6 */ );
2404
2405               /* Guess we are only writing on ipv4 header. */
2406               vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t));
2407             }
2408           else
2409             /* Guess we are only writing on simple Ethernet header. */
2410             vnet_rewrite_one_header (adj0[0], ip0,
2411                                      sizeof (ethernet_header_t));
2412
2413           /*
2414            * Bump the per-adjacency counters
2415            */
2416           if (do_counters)
2417             vlib_increment_combined_counter
2418               (&adjacency_counters,
2419                thread_index,
2420                adj_index0, 1, vlib_buffer_length_in_chain (vm,
2421                                                            b[0]) + rw_len0);
2422
2423           if (is_midchain)
2424             adj_midchain_fixup (vm, adj0, b[0], VNET_LINK_IP4);
2425
2426           if (is_mcast)
2427             /* copy bytes from the IP address into the MAC rewrite */
2428             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2429                                         adj0->rewrite_header.dst_mcast_offset,
2430                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2431         }
2432       else
2433         {
2434           b[0]->error = error_node->errors[error0];
2435           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2436             ip4_ttl_inc (b[0], ip0);
2437         }
2438
2439       next += 1;
2440       b += 1;
2441       n_left_from -= 1;
2442     }
2443 #endif
2444
2445   while (n_left_from > 0)
2446     {
2447       ip_adjacency_t *adj0;
2448       ip4_header_t *ip0;
2449       u32 rw_len0, adj_index0, error0;
2450       u32 tx_sw_if_index0;
2451
2452       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2453
2454       adj0 = adj_get (adj_index0);
2455
2456       if (do_counters)
2457         vlib_prefetch_combined_counter (&adjacency_counters,
2458                                         thread_index, adj_index0);
2459
2460       ip0 = vlib_buffer_get_current (b[0]);
2461
2462       error0 = IP4_ERROR_NONE;
2463
2464       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2465
2466
2467       /* Update packet buffer attributes/set output interface. */
2468       rw_len0 = adj0[0].rewrite_header.data_bytes;
2469       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2470
2471       /* Check MTU of outgoing interface. */
2472       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2473       if (b[0]->flags & VNET_BUFFER_F_GSO)
2474         ip0_len = gso_mtu_sz (b[0]);
2475
2476       ip4_mtu_check (b[0], ip0_len,
2477                      adj0[0].rewrite_header.max_l3_packet_bytes,
2478                      ip0->flags_and_fragment_offset &
2479                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2480                      next + 0, is_midchain, &error0);
2481
2482       if (is_mcast)
2483         {
2484           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2485                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2486                     IP4_ERROR_SAME_INTERFACE : error0);
2487         }
2488
2489       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2490        * to see the IP header */
2491       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2492         {
2493           u32 next_index = adj0[0].rewrite_header.next_index;
2494           vlib_buffer_advance (b[0], -(word) rw_len0);
2495           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2496           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2497
2498           if (PREDICT_FALSE
2499               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2500             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2501                                                 tx_sw_if_index0,
2502                                                 &next_index, b[0],
2503                                                 adj0->ia_cfg_index);
2504           next[0] = next_index;
2505
2506           if (is_midchain)
2507             {
2508               /* this acts on the packet that is about to be encapped */
2509               vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2510                                           0 /* is_ip6 */ );
2511
2512               /* Guess we are only writing on ipv4 header. */
2513               vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t));
2514             }
2515           else
2516             /* Guess we are only writing on simple Ethernet header. */
2517             vnet_rewrite_one_header (adj0[0], ip0,
2518                                      sizeof (ethernet_header_t));
2519
2520           if (do_counters)
2521             vlib_increment_combined_counter
2522               (&adjacency_counters,
2523                thread_index, adj_index0, 1,
2524                vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2525
2526           if (is_midchain)
2527             adj_midchain_fixup (vm, adj0, b[0], VNET_LINK_IP4);
2528
2529           if (is_mcast)
2530             /* copy bytes from the IP address into the MAC rewrite */
2531             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2532                                         adj0->rewrite_header.dst_mcast_offset,
2533                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2534         }
2535       else
2536         {
2537           b[0]->error = error_node->errors[error0];
2538           /* undo the TTL decrement - we'll be back to do it again */
2539           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2540             ip4_ttl_inc (b[0], ip0);
2541         }
2542
2543       next += 1;
2544       b += 1;
2545       n_left_from -= 1;
2546     }
2547
2548
2549   /* Need to do trace after rewrites to pick up new packet data. */
2550   if (node->flags & VLIB_NODE_FLAG_TRACE)
2551     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2552
2553   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2554   return frame->n_vectors;
2555 }
2556
2557 /** @brief IPv4 rewrite node.
2558     @node ip4-rewrite
2559
2560     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2561     header checksum, fetch the ip adjacency, check the outbound mtu,
2562     apply the adjacency rewrite, and send pkts to the adjacency
2563     rewrite header's rewrite_next_index.
2564
2565     @param vm vlib_main_t corresponding to the current thread
2566     @param node vlib_node_runtime_t
2567     @param frame vlib_frame_t whose contents should be dispatched
2568
2569     @par Graph mechanics: buffer metadata, next index usage
2570
2571     @em Uses:
2572     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2573         - the rewrite adjacency index
2574     - <code>adj->lookup_next_index</code>
2575         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2576           the packet will be dropped.
2577     - <code>adj->rewrite_header</code>
2578         - Rewrite string length, rewrite string, next_index
2579
2580     @em Sets:
2581     - <code>b->current_data, b->current_length</code>
2582         - Updated net of applying the rewrite string
2583
2584     <em>Next Indices:</em>
2585     - <code> adj->rewrite_header.next_index </code>
2586       or @c ip4-drop
2587 */
2588
2589 VLIB_NODE_FN (ip4_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2590                                  vlib_frame_t * frame)
2591 {
2592   if (adj_are_counters_enabled ())
2593     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2594   else
2595     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2596 }
2597
2598 VLIB_NODE_FN (ip4_rewrite_bcast_node) (vlib_main_t * vm,
2599                                        vlib_node_runtime_t * node,
2600                                        vlib_frame_t * frame)
2601 {
2602   if (adj_are_counters_enabled ())
2603     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2604   else
2605     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2606 }
2607
2608 VLIB_NODE_FN (ip4_midchain_node) (vlib_main_t * vm,
2609                                   vlib_node_runtime_t * node,
2610                                   vlib_frame_t * frame)
2611 {
2612   if (adj_are_counters_enabled ())
2613     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2614   else
2615     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2616 }
2617
2618 VLIB_NODE_FN (ip4_rewrite_mcast_node) (vlib_main_t * vm,
2619                                        vlib_node_runtime_t * node,
2620                                        vlib_frame_t * frame)
2621 {
2622   if (adj_are_counters_enabled ())
2623     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2624   else
2625     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2626 }
2627
2628 VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
2629                                         vlib_node_runtime_t * node,
2630                                         vlib_frame_t * frame)
2631 {
2632   if (adj_are_counters_enabled ())
2633     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2634   else
2635     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2636 }
2637
2638 /* *INDENT-OFF* */
2639 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2640   .name = "ip4-rewrite",
2641   .vector_size = sizeof (u32),
2642
2643   .format_trace = format_ip4_rewrite_trace,
2644
2645   .n_next_nodes = IP4_REWRITE_N_NEXT,
2646   .next_nodes = {
2647     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2648     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2649     [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2650   },
2651 };
2652
2653 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2654   .name = "ip4-rewrite-bcast",
2655   .vector_size = sizeof (u32),
2656
2657   .format_trace = format_ip4_rewrite_trace,
2658   .sibling_of = "ip4-rewrite",
2659 };
2660
2661 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2662   .name = "ip4-rewrite-mcast",
2663   .vector_size = sizeof (u32),
2664
2665   .format_trace = format_ip4_rewrite_trace,
2666   .sibling_of = "ip4-rewrite",
2667 };
2668
2669 VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
2670   .name = "ip4-mcast-midchain",
2671   .vector_size = sizeof (u32),
2672
2673   .format_trace = format_ip4_rewrite_trace,
2674   .sibling_of = "ip4-rewrite",
2675 };
2676
2677 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2678   .name = "ip4-midchain",
2679   .vector_size = sizeof (u32),
2680   .format_trace = format_ip4_rewrite_trace,
2681   .sibling_of = "ip4-rewrite",
2682 };
2683 /* *INDENT-ON */
2684
2685 static clib_error_t *
2686 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2687                              unformat_input_t * input,
2688                              vlib_cli_command_t * cmd)
2689 {
2690   int matched = 0;
2691   u32 table_id = 0;
2692   u32 flow_hash_config = 0;
2693   int rv;
2694
2695   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2696     {
2697       if (unformat (input, "table %d", &table_id))
2698         matched = 1;
2699 #define _(a, b, v)                                                            \
2700   else if (unformat (input, #a))                                              \
2701   {                                                                           \
2702     flow_hash_config |= v;                                                    \
2703     matched = 1;                                                              \
2704   }
2705       foreach_flow_hash_bit
2706 #undef _
2707         else
2708         break;
2709     }
2710
2711   if (matched == 0)
2712     return clib_error_return (0, "unknown input `%U'",
2713                               format_unformat_error, input);
2714
2715   rv = ip_flow_hash_set (AF_IP4, table_id, flow_hash_config);
2716   switch (rv)
2717     {
2718     case 0:
2719       break;
2720
2721     case VNET_API_ERROR_NO_SUCH_FIB:
2722       return clib_error_return (0, "no such FIB table %d", table_id);
2723
2724     default:
2725       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2726       break;
2727     }
2728
2729   return 0;
2730 }
2731
2732 /*?
2733  * Configure the set of IPv4 fields used by the flow hash.
2734  *
2735  * @cliexpar
2736  * Example of how to set the flow hash on a given table:
2737  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2738  * Example of display the configured flow hash:
2739  * @cliexstart{show ip fib}
2740  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2741  * 0.0.0.0/0
2742  *   unicast-ip4-chain
2743  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2744  *     [0] [@0]: dpo-drop ip6
2745  * 0.0.0.0/32
2746  *   unicast-ip4-chain
2747  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2748  *     [0] [@0]: dpo-drop ip6
2749  * 224.0.0.0/8
2750  *   unicast-ip4-chain
2751  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2752  *     [0] [@0]: dpo-drop ip6
2753  * 6.0.1.2/32
2754  *   unicast-ip4-chain
2755  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2756  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2757  * 7.0.0.1/32
2758  *   unicast-ip4-chain
2759  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
2760  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2761  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2762  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2763  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2764  * 240.0.0.0/8
2765  *   unicast-ip4-chain
2766  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
2767  *     [0] [@0]: dpo-drop ip6
2768  * 255.255.255.255/32
2769  *   unicast-ip4-chain
2770  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
2771  *     [0] [@0]: dpo-drop ip6
2772  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
2773  * 0.0.0.0/0
2774  *   unicast-ip4-chain
2775  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
2776  *     [0] [@0]: dpo-drop ip6
2777  * 0.0.0.0/32
2778  *   unicast-ip4-chain
2779  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
2780  *     [0] [@0]: dpo-drop ip6
2781  * 172.16.1.0/24
2782  *   unicast-ip4-chain
2783  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
2784  *     [0] [@4]: ipv4-glean: af_packet0
2785  * 172.16.1.1/32
2786  *   unicast-ip4-chain
2787  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
2788  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
2789  * 172.16.1.2/32
2790  *   unicast-ip4-chain
2791  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2792  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
2793  * 172.16.2.0/24
2794  *   unicast-ip4-chain
2795  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
2796  *     [0] [@4]: ipv4-glean: af_packet1
2797  * 172.16.2.1/32
2798  *   unicast-ip4-chain
2799  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
2800  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
2801  * 224.0.0.0/8
2802  *   unicast-ip4-chain
2803  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
2804  *     [0] [@0]: dpo-drop ip6
2805  * 240.0.0.0/8
2806  *   unicast-ip4-chain
2807  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
2808  *     [0] [@0]: dpo-drop ip6
2809  * 255.255.255.255/32
2810  *   unicast-ip4-chain
2811  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
2812  *     [0] [@0]: dpo-drop ip6
2813  * @cliexend
2814 ?*/
2815 /* *INDENT-OFF* */
2816 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
2817 {
2818   .path = "set ip flow-hash",
2819   .short_help =
2820   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
2821   .function = set_ip_flow_hash_command_fn,
2822 };
2823 /* *INDENT-ON* */
2824
2825 #ifndef CLIB_MARCH_VARIANT
2826 int
2827 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2828                              u32 table_index)
2829 {
2830   vnet_main_t *vnm = vnet_get_main ();
2831   vnet_interface_main_t *im = &vnm->interface_main;
2832   ip4_main_t *ipm = &ip4_main;
2833   ip_lookup_main_t *lm = &ipm->lookup_main;
2834   vnet_classify_main_t *cm = &vnet_classify_main;
2835   ip4_address_t *if_addr;
2836
2837   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2838     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2839
2840   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2841     return VNET_API_ERROR_NO_SUCH_ENTRY;
2842
2843   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2844   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
2845
2846   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
2847
2848   if (NULL != if_addr)
2849     {
2850       fib_prefix_t pfx = {
2851         .fp_len = 32,
2852         .fp_proto = FIB_PROTOCOL_IP4,
2853         .fp_addr.ip4 = *if_addr,
2854       };
2855       u32 fib_index;
2856
2857       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2858                                                        sw_if_index);
2859
2860
2861       if (table_index != (u32) ~ 0)
2862         {
2863           dpo_id_t dpo = DPO_INVALID;
2864
2865           dpo_set (&dpo,
2866                    DPO_CLASSIFY,
2867                    DPO_PROTO_IP4,
2868                    classify_dpo_create (DPO_PROTO_IP4, table_index));
2869
2870           fib_table_entry_special_dpo_add (fib_index,
2871                                            &pfx,
2872                                            FIB_SOURCE_CLASSIFY,
2873                                            FIB_ENTRY_FLAG_NONE, &dpo);
2874           dpo_reset (&dpo);
2875         }
2876       else
2877         {
2878           fib_table_entry_special_remove (fib_index,
2879                                           &pfx, FIB_SOURCE_CLASSIFY);
2880         }
2881     }
2882
2883   return 0;
2884 }
2885 #endif
2886
2887 static clib_error_t *
2888 set_ip_classify_command_fn (vlib_main_t * vm,
2889                             unformat_input_t * input,
2890                             vlib_cli_command_t * cmd)
2891 {
2892   u32 table_index = ~0;
2893   int table_index_set = 0;
2894   u32 sw_if_index = ~0;
2895   int rv;
2896
2897   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2898     {
2899       if (unformat (input, "table-index %d", &table_index))
2900         table_index_set = 1;
2901       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
2902                          vnet_get_main (), &sw_if_index))
2903         ;
2904       else
2905         break;
2906     }
2907
2908   if (table_index_set == 0)
2909     return clib_error_return (0, "classify table-index must be specified");
2910
2911   if (sw_if_index == ~0)
2912     return clib_error_return (0, "interface / subif must be specified");
2913
2914   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
2915
2916   switch (rv)
2917     {
2918     case 0:
2919       break;
2920
2921     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
2922       return clib_error_return (0, "No such interface");
2923
2924     case VNET_API_ERROR_NO_SUCH_ENTRY:
2925       return clib_error_return (0, "No such classifier table");
2926     }
2927   return 0;
2928 }
2929
2930 /*?
2931  * Assign a classification table to an interface. The classification
2932  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
2933  * commands. Once the table is create, use this command to filter packets
2934  * on an interface.
2935  *
2936  * @cliexpar
2937  * Example of how to assign a classification table to an interface:
2938  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
2939 ?*/
2940 /* *INDENT-OFF* */
2941 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
2942 {
2943     .path = "set ip classify",
2944     .short_help =
2945     "set ip classify intfc <interface> table-index <classify-idx>",
2946     .function = set_ip_classify_command_fn,
2947 };
2948 /* *INDENT-ON* */
2949
2950 /*
2951  * fd.io coding-style-patch-verification: ON
2952  *
2953  * Local Variables:
2954  * eval: (c-set-style "gnu")
2955  * End:
2956  */