gso: use the header offsets from buffer metadata
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h>     /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/mfib/ip4_mfib.h>
53 #include <vnet/dpo/load_balance.h>
54 #include <vnet/dpo/load_balance_map.h>
55 #include <vnet/dpo/receive_dpo.h>
56 #include <vnet/dpo/classify_dpo.h>
57 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
58 #include <vnet/adj/adj_dp.h>
59 #include <vnet/pg/pg.h>
60
61 #include <vnet/ip/ip4_forward.h>
62 #include <vnet/interface_output.h>
63 #include <vnet/classify/vnet_classify.h>
64 #include <vnet/ip/reass/ip4_full_reass.h>
65
66 /** @brief IPv4 lookup node.
67     @node ip4-lookup
68
69     This is the main IPv4 lookup dispatch node.
70
71     @param vm vlib_main_t corresponding to the current thread
72     @param node vlib_node_runtime_t
73     @param frame vlib_frame_t whose contents should be dispatched
74
75     @par Graph mechanics: buffer metadata, next index usage
76
77     @em Uses:
78     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
79         - Indicates the @c sw_if_index value of the interface that the
80           packet was received on.
81     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
82         - When the value is @c ~0 then the node performs a longest prefix
83           match (LPM) for the packet destination address in the FIB attached
84           to the receive interface.
85         - Otherwise perform LPM for the packet destination address in the
86           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
87           value (0, 1, ...) and not a VRF id.
88
89     @em Sets:
90     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
91         - The lookup result adjacency index.
92
93     <em>Next Index:</em>
94     - Dispatches the packet to the node index found in
95       ip_adjacency_t @c adj->lookup_next_index
96       (where @c adj is the lookup result adjacency).
97 */
98 VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
99                                 vlib_frame_t * frame)
100 {
101   return ip4_lookup_inline (vm, node, frame);
102 }
103
104 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
105
106 VLIB_REGISTER_NODE (ip4_lookup_node) =
107 {
108   .name = "ip4-lookup",
109   .vector_size = sizeof (u32),
110   .format_trace = format_ip4_lookup_trace,
111   .n_next_nodes = IP_LOOKUP_N_NEXT,
112   .next_nodes = IP4_LOOKUP_NEXT_NODES,
113 };
114
115 VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
116                                       vlib_node_runtime_t * node,
117                                       vlib_frame_t * frame)
118 {
119   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
120   u32 n_left, *from;
121   u32 thread_index = vm->thread_index;
122   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
123   u16 nexts[VLIB_FRAME_SIZE], *next;
124
125   from = vlib_frame_vector_args (frame);
126   n_left = frame->n_vectors;
127   next = nexts;
128
129   vlib_get_buffers (vm, from, bufs, n_left);
130
131   while (n_left >= 4)
132     {
133       const load_balance_t *lb0, *lb1;
134       const ip4_header_t *ip0, *ip1;
135       u32 lbi0, hc0, lbi1, hc1;
136       const dpo_id_t *dpo0, *dpo1;
137
138       /* Prefetch next iteration. */
139       {
140         vlib_prefetch_buffer_header (b[2], LOAD);
141         vlib_prefetch_buffer_header (b[3], LOAD);
142
143         CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), LOAD);
144         CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), LOAD);
145       }
146
147       ip0 = vlib_buffer_get_current (b[0]);
148       ip1 = vlib_buffer_get_current (b[1]);
149       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
150       lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
151
152       lb0 = load_balance_get (lbi0);
153       lb1 = load_balance_get (lbi1);
154
155       /*
156        * this node is for via FIBs we can re-use the hash value from the
157        * to node if present.
158        * We don't want to use the same hash value at each level in the recursion
159        * graph as that would lead to polarisation
160        */
161       hc0 = hc1 = 0;
162
163       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
164         {
165           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
166             {
167               hc0 = vnet_buffer (b[0])->ip.flow_hash =
168                 vnet_buffer (b[0])->ip.flow_hash >> 1;
169             }
170           else
171             {
172               hc0 = vnet_buffer (b[0])->ip.flow_hash =
173                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
174             }
175           dpo0 = load_balance_get_fwd_bucket
176             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
177         }
178       else
179         {
180           dpo0 = load_balance_get_bucket_i (lb0, 0);
181         }
182       if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
183         {
184           if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
185             {
186               hc1 = vnet_buffer (b[1])->ip.flow_hash =
187                 vnet_buffer (b[1])->ip.flow_hash >> 1;
188             }
189           else
190             {
191               hc1 = vnet_buffer (b[1])->ip.flow_hash =
192                 ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
193             }
194           dpo1 = load_balance_get_fwd_bucket
195             (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
196         }
197       else
198         {
199           dpo1 = load_balance_get_bucket_i (lb1, 0);
200         }
201
202       next[0] = dpo0->dpoi_next_node;
203       next[1] = dpo1->dpoi_next_node;
204
205       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
206       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
207
208       vlib_increment_combined_counter
209         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
210       vlib_increment_combined_counter
211         (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
212
213       b += 2;
214       next += 2;
215       n_left -= 2;
216     }
217
218   while (n_left > 0)
219     {
220       const load_balance_t *lb0;
221       const ip4_header_t *ip0;
222       const dpo_id_t *dpo0;
223       u32 lbi0, hc0;
224
225       ip0 = vlib_buffer_get_current (b[0]);
226       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
227
228       lb0 = load_balance_get (lbi0);
229
230       hc0 = 0;
231       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
232         {
233           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
234             {
235               hc0 = vnet_buffer (b[0])->ip.flow_hash =
236                 vnet_buffer (b[0])->ip.flow_hash >> 1;
237             }
238           else
239             {
240               hc0 = vnet_buffer (b[0])->ip.flow_hash =
241                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
242             }
243           dpo0 = load_balance_get_fwd_bucket
244             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
245         }
246       else
247         {
248           dpo0 = load_balance_get_bucket_i (lb0, 0);
249         }
250
251       next[0] = dpo0->dpoi_next_node;
252       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
253
254       vlib_increment_combined_counter
255         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
256
257       b += 1;
258       next += 1;
259       n_left -= 1;
260     }
261
262   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
263   if (node->flags & VLIB_NODE_FLAG_TRACE)
264     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
265
266   return frame->n_vectors;
267 }
268
269 VLIB_REGISTER_NODE (ip4_load_balance_node) =
270 {
271   .name = "ip4-load-balance",
272   .vector_size = sizeof (u32),
273   .sibling_of = "ip4-lookup",
274   .format_trace = format_ip4_lookup_trace,
275 };
276
277 #ifndef CLIB_MARCH_VARIANT
278 /* get first interface address */
279 ip4_address_t *
280 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
281                              ip_interface_address_t ** result_ia)
282 {
283   ip_lookup_main_t *lm = &im->lookup_main;
284   ip_interface_address_t *ia = 0;
285   ip4_address_t *result = 0;
286
287   foreach_ip_interface_address
288     (lm, ia, sw_if_index,
289      1 /* honor unnumbered */ ,
290      ({
291        ip4_address_t * a =
292          ip_interface_address_get_address (lm, ia);
293        result = a;
294        break;
295      }));
296   if (result_ia)
297     *result_ia = result ? ia : 0;
298   return result;
299 }
300 #endif
301
302 static void
303 ip4_add_subnet_bcast_route (u32 fib_index,
304                             fib_prefix_t *pfx,
305                             u32 sw_if_index)
306 {
307   vnet_sw_interface_flags_t iflags;
308
309   iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
310
311   fib_table_entry_special_remove(fib_index,
312                                  pfx,
313                                  FIB_SOURCE_INTERFACE);
314
315   if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
316     {
317       fib_table_entry_update_one_path (fib_index, pfx,
318                                        FIB_SOURCE_INTERFACE,
319                                        FIB_ENTRY_FLAG_NONE,
320                                        DPO_PROTO_IP4,
321                                        /* No next-hop address */
322                                        &ADJ_BCAST_ADDR,
323                                        sw_if_index,
324                                        // invalid FIB index
325                                        ~0,
326                                        1,
327                                        // no out-label stack
328                                        NULL,
329                                        FIB_ROUTE_PATH_FLAG_NONE);
330     }
331   else
332     {
333         fib_table_entry_special_add(fib_index,
334                                     pfx,
335                                     FIB_SOURCE_INTERFACE,
336                                     (FIB_ENTRY_FLAG_DROP |
337                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
338     }
339 }
340
341 static void
342 ip4_add_interface_prefix_routes (ip4_main_t *im,
343                                  u32 sw_if_index,
344                                  u32 fib_index,
345                                  ip_interface_address_t * a)
346 {
347   ip_lookup_main_t *lm = &im->lookup_main;
348   ip_interface_prefix_t *if_prefix;
349   ip4_address_t *address = ip_interface_address_get_address (lm, a);
350
351   ip_interface_prefix_key_t key = {
352     .prefix = {
353       .fp_len = a->address_length,
354       .fp_proto = FIB_PROTOCOL_IP4,
355       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[a->address_length],
356     },
357     .sw_if_index = sw_if_index,
358   };
359
360   fib_prefix_t pfx_special = {
361     .fp_proto = FIB_PROTOCOL_IP4,
362   };
363
364   /* If prefix already set on interface, just increment ref count & return */
365   if_prefix = ip_get_interface_prefix (lm, &key);
366   if (if_prefix)
367     {
368       if_prefix->ref_count += 1;
369       return;
370     }
371
372   /* New prefix - allocate a pool entry, initialize it, add to the hash */
373   pool_get (lm->if_prefix_pool, if_prefix);
374   if_prefix->ref_count = 1;
375   if_prefix->src_ia_index = a - lm->if_address_pool;
376   clib_memcpy (&if_prefix->key, &key, sizeof (key));
377   mhash_set (&lm->prefix_to_if_prefix_index, &key,
378              if_prefix - lm->if_prefix_pool, 0 /* old value */);
379
380   pfx_special.fp_len = a->address_length;
381   pfx_special.fp_addr.ip4.as_u32 = address->as_u32;
382
383   /* set the glean route for the prefix */
384   fib_table_entry_update_one_path (fib_index, &pfx_special,
385                                    FIB_SOURCE_INTERFACE,
386                                    (FIB_ENTRY_FLAG_CONNECTED |
387                                     FIB_ENTRY_FLAG_ATTACHED),
388                                    DPO_PROTO_IP4,
389                                    /* No next-hop address */
390                                    NULL,
391                                    sw_if_index,
392                                    /* invalid FIB index */
393                                    ~0,
394                                    1,
395                                    /* no out-label stack */
396                                    NULL,
397                                    FIB_ROUTE_PATH_FLAG_NONE);
398
399   /* length <= 30 - add glean, drop first address, maybe drop bcast address */
400   if (a->address_length <= 30)
401     {
402       /* set a drop route for the base address of the prefix */
403       pfx_special.fp_len = 32;
404       pfx_special.fp_addr.ip4.as_u32 =
405         address->as_u32 & im->fib_masks[a->address_length];
406
407       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
408         fib_table_entry_special_add (fib_index, &pfx_special,
409                                      FIB_SOURCE_INTERFACE,
410                                      (FIB_ENTRY_FLAG_DROP |
411                                       FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
412
413       /* set a route for the broadcast address of the prefix */
414       pfx_special.fp_len = 32;
415       pfx_special.fp_addr.ip4.as_u32 =
416         address->as_u32 | ~im->fib_masks[a->address_length];
417       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
418         ip4_add_subnet_bcast_route (fib_index, &pfx_special, sw_if_index);
419
420
421     }
422   /* length == 31 - add an attached route for the other address */
423   else if (a->address_length == 31)
424     {
425       pfx_special.fp_len = 32;
426       pfx_special.fp_addr.ip4.as_u32 =
427         address->as_u32 ^ clib_host_to_net_u32(1);
428
429       fib_table_entry_update_one_path (fib_index, &pfx_special,
430                                        FIB_SOURCE_INTERFACE,
431                                        (FIB_ENTRY_FLAG_ATTACHED),
432                                        DPO_PROTO_IP4,
433                                        &pfx_special.fp_addr,
434                                        sw_if_index,
435                                        /* invalid FIB index */
436                                        ~0,
437                                        1,
438                                        NULL,
439                                        FIB_ROUTE_PATH_FLAG_NONE);
440     }
441 }
442
443 static void
444 ip4_add_interface_routes (u32 sw_if_index,
445                           ip4_main_t * im, u32 fib_index,
446                           ip_interface_address_t * a)
447 {
448   ip_lookup_main_t *lm = &im->lookup_main;
449   ip4_address_t *address = ip_interface_address_get_address (lm, a);
450   fib_prefix_t pfx = {
451     .fp_len = 32,
452     .fp_proto = FIB_PROTOCOL_IP4,
453     .fp_addr.ip4 = *address,
454   };
455
456   /* set special routes for the prefix if needed */
457   ip4_add_interface_prefix_routes (im, sw_if_index, fib_index, a);
458
459   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
460     {
461       u32 classify_table_index =
462         lm->classify_table_index_by_sw_if_index[sw_if_index];
463       if (classify_table_index != (u32) ~ 0)
464         {
465           dpo_id_t dpo = DPO_INVALID;
466
467           dpo_set (&dpo,
468                    DPO_CLASSIFY,
469                    DPO_PROTO_IP4,
470                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
471
472           fib_table_entry_special_dpo_add (fib_index,
473                                            &pfx,
474                                            FIB_SOURCE_CLASSIFY,
475                                            FIB_ENTRY_FLAG_NONE, &dpo);
476           dpo_reset (&dpo);
477         }
478     }
479
480   fib_table_entry_update_one_path (fib_index, &pfx,
481                                    FIB_SOURCE_INTERFACE,
482                                    (FIB_ENTRY_FLAG_CONNECTED |
483                                     FIB_ENTRY_FLAG_LOCAL),
484                                    DPO_PROTO_IP4,
485                                    &pfx.fp_addr,
486                                    sw_if_index,
487                                    // invalid FIB index
488                                    ~0,
489                                    1, NULL,
490                                    FIB_ROUTE_PATH_FLAG_NONE);
491 }
492
493 static void
494 ip4_del_interface_prefix_routes (ip4_main_t * im,
495                                  u32 sw_if_index,
496                                  u32 fib_index,
497                                  ip4_address_t * address,
498                                  u32 address_length)
499 {
500   ip_lookup_main_t *lm = &im->lookup_main;
501   ip_interface_prefix_t *if_prefix;
502
503   ip_interface_prefix_key_t key = {
504     .prefix = {
505       .fp_len = address_length,
506       .fp_proto = FIB_PROTOCOL_IP4,
507       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[address_length],
508     },
509     .sw_if_index = sw_if_index,
510   };
511
512   fib_prefix_t pfx_special = {
513     .fp_len = 32,
514     .fp_proto = FIB_PROTOCOL_IP4,
515   };
516
517   if_prefix = ip_get_interface_prefix (lm, &key);
518   if (!if_prefix)
519     {
520       clib_warning ("Prefix not found while deleting %U",
521                     format_ip4_address_and_length, address, address_length);
522       return;
523     }
524
525   if_prefix->ref_count -= 1;
526
527   /*
528    * Routes need to be adjusted if deleting last intf addr in prefix
529    *
530    * We're done now otherwise
531    */
532   if (if_prefix->ref_count > 0)
533     return;
534
535   /* length <= 30, delete glean route, first address, last address */
536   if (address_length <= 30)
537     {
538       /* Less work to do in FIB if we remove the covered /32s first */
539
540       /* first address in prefix */
541       pfx_special.fp_addr.ip4.as_u32 =
542         address->as_u32 & im->fib_masks[address_length];
543       pfx_special.fp_len = 32;
544
545       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
546         fib_table_entry_special_remove (fib_index,
547                                         &pfx_special,
548                                         FIB_SOURCE_INTERFACE);
549
550       /* prefix broadcast address */
551       pfx_special.fp_addr.ip4.as_u32 =
552         address->as_u32 | ~im->fib_masks[address_length];
553       pfx_special.fp_len = 32;
554
555       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
556         fib_table_entry_special_remove (fib_index,
557                                         &pfx_special,
558                                         FIB_SOURCE_INTERFACE);
559     }
560   else if (address_length == 31)
561     {
562       /* length == 31, delete attached route for the other address */
563       pfx_special.fp_addr.ip4.as_u32 =
564         address->as_u32 ^ clib_host_to_net_u32(1);
565
566       fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
567     }
568
569   /* remove glean route for prefix */
570   pfx_special.fp_addr.ip4 = *address;
571   pfx_special.fp_len = address_length;
572   fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
573
574   mhash_unset (&lm->prefix_to_if_prefix_index, &key, 0 /* old_value */);
575   pool_put (lm->if_prefix_pool, if_prefix);
576 }
577
578 static void
579 ip4_del_interface_routes (u32 sw_if_index,
580                           ip4_main_t * im,
581                           u32 fib_index,
582                           ip4_address_t * address, u32 address_length)
583 {
584   fib_prefix_t pfx = {
585     .fp_len = 32,
586     .fp_proto = FIB_PROTOCOL_IP4,
587     .fp_addr.ip4 = *address,
588   };
589
590   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
591
592   ip4_del_interface_prefix_routes (im, sw_if_index, fib_index,
593                                    address, address_length);
594 }
595
596 #ifndef CLIB_MARCH_VARIANT
597 void
598 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
599 {
600   ip4_main_t *im = &ip4_main;
601   vnet_main_t *vnm = vnet_get_main ();
602   vnet_hw_interface_t *hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
603
604   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
605
606   /*
607    * enable/disable only on the 1<->0 transition
608    */
609   if (is_enable)
610     {
611       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
612         return;
613     }
614   else
615     {
616       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
617       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
618         return;
619     }
620   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
621                                !is_enable, 0, 0);
622
623
624   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
625                                sw_if_index, !is_enable, 0, 0);
626
627   if (is_enable)
628     hi->l3_if_count++;
629   else if (hi->l3_if_count)
630     hi->l3_if_count--;
631
632   {
633     ip4_enable_disable_interface_callback_t *cb;
634     vec_foreach (cb, im->enable_disable_interface_callbacks)
635       cb->function (im, cb->function_opaque, sw_if_index, is_enable);
636   }
637 }
638
639 static clib_error_t *
640 ip4_add_del_interface_address_internal (vlib_main_t * vm,
641                                         u32 sw_if_index,
642                                         ip4_address_t * address,
643                                         u32 address_length, u32 is_del)
644 {
645   vnet_main_t *vnm = vnet_get_main ();
646   ip4_main_t *im = &ip4_main;
647   ip_lookup_main_t *lm = &im->lookup_main;
648   clib_error_t *error = 0;
649   u32 if_address_index;
650   ip4_address_fib_t ip4_af, *addr_fib = 0;
651
652   error = vnet_sw_interface_supports_addressing (vnm, sw_if_index);
653   if (error)
654     {
655       vnm->api_errno = VNET_API_ERROR_UNSUPPORTED;
656       return error;
657     }
658
659   ip4_addr_fib_init (&ip4_af, address,
660                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
661   vec_add1 (addr_fib, ip4_af);
662
663   /*
664    * there is no support for adj-fib handling in the presence of overlapping
665    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
666    * most routers do.
667    */
668   if (!is_del)
669     {
670       /* When adding an address check that it does not conflict
671          with an existing address on any interface in this table. */
672       ip_interface_address_t *ia;
673       vnet_sw_interface_t *sif;
674
675       pool_foreach (sif, vnm->interface_main.sw_interfaces)
676        {
677           if (im->fib_index_by_sw_if_index[sw_if_index] ==
678               im->fib_index_by_sw_if_index[sif->sw_if_index])
679             {
680               foreach_ip_interface_address
681                 (&im->lookup_main, ia, sif->sw_if_index,
682                  0 /* honor unnumbered */ ,
683                  ({
684                    ip4_address_t * x =
685                      ip_interface_address_get_address
686                      (&im->lookup_main, ia);
687
688                    if (ip4_destination_matches_route
689                        (im, address, x, ia->address_length) ||
690                        ip4_destination_matches_route (im,
691                                                       x,
692                                                       address,
693                                                       address_length))
694                      {
695                        /* an intf may have >1 addr from the same prefix */
696                        if ((sw_if_index == sif->sw_if_index) &&
697                            (ia->address_length == address_length) &&
698                            (x->as_u32 != address->as_u32))
699                          continue;
700
701                        if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE)
702                          /* if the address we're comparing against is stale
703                           * then the CP has not added this one back yet, maybe
704                           * it never will, so we have to assume it won't and
705                           * ignore it. if it does add it back, then it will fail
706                           * because this one is now present */
707                          continue;
708
709                        /* error if the length or intf was different */
710                        vnm->api_errno = VNET_API_ERROR_ADDRESS_IN_USE;
711
712                        error = clib_error_create
713                          ("failed to add %U on %U which conflicts with %U for interface %U",
714                           format_ip4_address_and_length, address,
715                           address_length,
716                           format_vnet_sw_if_index_name, vnm,
717                           sw_if_index,
718                           format_ip4_address_and_length, x,
719                           ia->address_length,
720                           format_vnet_sw_if_index_name, vnm,
721                           sif->sw_if_index);
722                        goto done;
723                      }
724                  }));
725             }
726       }
727     }
728
729   if_address_index = ip_interface_address_find (lm, addr_fib, address_length);
730
731   if (is_del)
732     {
733       if (~0 == if_address_index)
734         {
735           vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_FOUND_FOR_INTERFACE;
736           error = clib_error_create ("%U not found for interface %U",
737                                      lm->format_address_and_length,
738                                      addr_fib, address_length,
739                                      format_vnet_sw_if_index_name, vnm,
740                                      sw_if_index);
741           goto done;
742         }
743
744       error = ip_interface_address_del (lm, vnm, if_address_index, addr_fib,
745                                         address_length, sw_if_index);
746       if (error)
747         goto done;
748     }
749   else
750     {
751       if (~0 != if_address_index)
752         {
753           ip_interface_address_t *ia;
754
755           ia = pool_elt_at_index (lm->if_address_pool, if_address_index);
756
757           if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE)
758             {
759               if (ia->sw_if_index == sw_if_index)
760                 {
761                   /* re-adding an address during the replace action.
762                    * consdier this the update. clear the flag and
763                    * we're done */
764                   ia->flags &= ~IP_INTERFACE_ADDRESS_FLAG_STALE;
765                   goto done;
766                 }
767               else
768                 {
769                   /* The prefix is moving from one interface to another.
770                    * delete the stale and add the new */
771                   ip4_add_del_interface_address_internal (vm,
772                                                           ia->sw_if_index,
773                                                           address,
774                                                           address_length, 1);
775                   ia = NULL;
776                   error = ip_interface_address_add (lm, sw_if_index,
777                                                     addr_fib, address_length,
778                                                     &if_address_index);
779                 }
780             }
781           else
782             {
783               vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
784               error = clib_error_create
785                 ("Prefix %U already found on interface %U",
786                  lm->format_address_and_length, addr_fib, address_length,
787                  format_vnet_sw_if_index_name, vnm, ia->sw_if_index);
788             }
789         }
790       else
791         error = ip_interface_address_add (lm, sw_if_index,
792                                           addr_fib, address_length,
793                                           &if_address_index);
794     }
795
796   if (error)
797     goto done;
798
799   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
800   ip4_mfib_interface_enable_disable (sw_if_index, !is_del);
801
802   /* intf addr routes are added/deleted on admin up/down */
803   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index))
804     {
805       if (is_del)
806         ip4_del_interface_routes (sw_if_index,
807                                   im, ip4_af.fib_index, address,
808                                   address_length);
809       else
810         ip4_add_interface_routes (sw_if_index,
811                                   im, ip4_af.fib_index,
812                                   pool_elt_at_index
813                                   (lm->if_address_pool, if_address_index));
814     }
815
816   ip4_add_del_interface_address_callback_t *cb;
817   vec_foreach (cb, im->add_del_interface_address_callbacks)
818     cb->function (im, cb->function_opaque, sw_if_index,
819                   address, address_length, if_address_index, is_del);
820
821 done:
822   vec_free (addr_fib);
823   return error;
824 }
825
826 clib_error_t *
827 ip4_add_del_interface_address (vlib_main_t * vm,
828                                u32 sw_if_index,
829                                ip4_address_t * address,
830                                u32 address_length, u32 is_del)
831 {
832   return ip4_add_del_interface_address_internal
833     (vm, sw_if_index, address, address_length, is_del);
834 }
835
836 void
837 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
838 {
839   ip_interface_address_t *ia;
840   ip4_main_t *im;
841
842   im = &ip4_main;
843
844   /*
845    * when directed broadcast is enabled, the subnet braodcast route will forward
846    * packets using an adjacency with a broadcast MAC. otherwise it drops
847    */
848   foreach_ip_interface_address(&im->lookup_main, ia,
849                                sw_if_index, 0,
850      ({
851        if (ia->address_length <= 30)
852          {
853            ip4_address_t *ipa;
854
855            ipa = ip_interface_address_get_address (&im->lookup_main, ia);
856
857            fib_prefix_t pfx = {
858              .fp_len = 32,
859              .fp_proto = FIB_PROTOCOL_IP4,
860              .fp_addr = {
861                .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
862              },
863            };
864
865            ip4_add_subnet_bcast_route
866              (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
867                                                   sw_if_index),
868               &pfx, sw_if_index);
869          }
870      }));
871 }
872 #endif
873
874 static clib_error_t *
875 ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
876 {
877   ip4_main_t *im = &ip4_main;
878   ip_interface_address_t *ia;
879   ip4_address_t *a;
880   u32 is_admin_up, fib_index;
881
882   vec_validate_init_empty (im->
883                            lookup_main.if_address_pool_index_by_sw_if_index,
884                            sw_if_index, ~0);
885
886   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
887
888   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
889
890   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
891                                 0 /* honor unnumbered */,
892   ({
893     a = ip_interface_address_get_address (&im->lookup_main, ia);
894     if (is_admin_up)
895       ip4_add_interface_routes (sw_if_index,
896                                 im, fib_index,
897                                 ia);
898     else
899       ip4_del_interface_routes (sw_if_index,
900                                 im, fib_index,
901                                 a, ia->address_length);
902   }));
903
904   return 0;
905 }
906
907 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
908
909 /* Built-in ip4 unicast rx feature path definition */
910 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
911 {
912   .arc_name = "ip4-unicast",
913   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
914   .last_in_arc = "ip4-lookup",
915   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
916 };
917
918 VNET_FEATURE_INIT (ip4_flow_classify, static) =
919 {
920   .arc_name = "ip4-unicast",
921   .node_name = "ip4-flow-classify",
922   .runs_before = VNET_FEATURES ("ip4-inacl"),
923 };
924
925 VNET_FEATURE_INIT (ip4_inacl, static) =
926 {
927   .arc_name = "ip4-unicast",
928   .node_name = "ip4-inacl",
929   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
930 };
931
932 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
933 {
934   .arc_name = "ip4-unicast",
935   .node_name = "ip4-source-and-port-range-check-rx",
936   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
937 };
938
939 VNET_FEATURE_INIT (ip4_policer_classify, static) =
940 {
941   .arc_name = "ip4-unicast",
942   .node_name = "ip4-policer-classify",
943   .runs_before = VNET_FEATURES ("ipsec4-input-feature"),
944 };
945
946 VNET_FEATURE_INIT (ip4_ipsec, static) =
947 {
948   .arc_name = "ip4-unicast",
949   .node_name = "ipsec4-input-feature",
950   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
951 };
952
953 VNET_FEATURE_INIT (ip4_vpath, static) =
954 {
955   .arc_name = "ip4-unicast",
956   .node_name = "vpath-input-ip4",
957   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
958 };
959
960 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
961 {
962   .arc_name = "ip4-unicast",
963   .node_name = "ip4-vxlan-bypass",
964   .runs_before = VNET_FEATURES ("ip4-lookup"),
965 };
966
967 VNET_FEATURE_INIT (ip4_not_enabled, static) =
968 {
969   .arc_name = "ip4-unicast",
970   .node_name = "ip4-not-enabled",
971   .runs_before = VNET_FEATURES ("ip4-lookup"),
972 };
973
974 VNET_FEATURE_INIT (ip4_lookup, static) =
975 {
976   .arc_name = "ip4-unicast",
977   .node_name = "ip4-lookup",
978   .runs_before = 0,     /* not before any other features */
979 };
980
981 /* Built-in ip4 multicast rx feature path definition */
982 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
983 {
984   .arc_name = "ip4-multicast",
985   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
986   .last_in_arc = "ip4-mfib-forward-lookup",
987   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
988 };
989
990 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
991 {
992   .arc_name = "ip4-multicast",
993   .node_name = "vpath-input-ip4",
994   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
995 };
996
997 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
998 {
999   .arc_name = "ip4-multicast",
1000   .node_name = "ip4-not-enabled",
1001   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1002 };
1003
1004 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
1005 {
1006   .arc_name = "ip4-multicast",
1007   .node_name = "ip4-mfib-forward-lookup",
1008   .runs_before = 0,     /* last feature */
1009 };
1010
1011 /* Source and port-range check ip4 tx feature path definition */
1012 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1013 {
1014   .arc_name = "ip4-output",
1015   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
1016   .last_in_arc = "interface-output",
1017   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1018 };
1019
1020 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1021 {
1022   .arc_name = "ip4-output",
1023   .node_name = "ip4-source-and-port-range-check-tx",
1024   .runs_before = VNET_FEATURES ("ip4-outacl"),
1025 };
1026
1027 VNET_FEATURE_INIT (ip4_outacl, static) =
1028 {
1029   .arc_name = "ip4-output",
1030   .node_name = "ip4-outacl",
1031   .runs_before = VNET_FEATURES ("ipsec4-output-feature"),
1032 };
1033
1034 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1035 {
1036   .arc_name = "ip4-output",
1037   .node_name = "ipsec4-output-feature",
1038   .runs_before = VNET_FEATURES ("interface-output"),
1039 };
1040
1041 /* Built-in ip4 tx feature path definition */
1042 VNET_FEATURE_INIT (ip4_interface_output, static) =
1043 {
1044   .arc_name = "ip4-output",
1045   .node_name = "interface-output",
1046   .runs_before = 0,     /* not before any other features */
1047 };
1048
1049 static clib_error_t *
1050 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1051 {
1052   ip4_main_t *im = &ip4_main;
1053
1054   vec_validate_init_empty (im->fib_index_by_sw_if_index, sw_if_index, ~0);
1055   vec_validate_init_empty (im->mfib_index_by_sw_if_index, sw_if_index, ~0);
1056
1057   if (is_add)
1058     {
1059       /* Fill in lookup tables with default table (0). */
1060       im->fib_index_by_sw_if_index[sw_if_index] = 0;
1061       im->mfib_index_by_sw_if_index[sw_if_index] = 0;
1062     }
1063   else
1064     {
1065       ip4_main_t *im4 = &ip4_main;
1066       ip_lookup_main_t *lm4 = &im4->lookup_main;
1067       ip_interface_address_t *ia = 0;
1068       ip4_address_t *address;
1069       vlib_main_t *vm = vlib_get_main ();
1070
1071       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
1072       foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
1073       ({
1074         address = ip_interface_address_get_address (lm4, ia);
1075         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
1076       }));
1077       ip4_mfib_interface_enable_disable (sw_if_index, 0);
1078
1079       if (0 != im4->fib_index_by_sw_if_index[sw_if_index])
1080         fib_table_bind (FIB_PROTOCOL_IP4, sw_if_index, 0);
1081       if (0 != im4->mfib_index_by_sw_if_index[sw_if_index])
1082         mfib_table_bind (FIB_PROTOCOL_IP4, sw_if_index, 0);
1083
1084       /* Erase the lookup tables just in case */
1085       im4->fib_index_by_sw_if_index[sw_if_index] = ~0;
1086       im4->mfib_index_by_sw_if_index[sw_if_index] = ~0;
1087     }
1088
1089   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
1090                                is_add, 0, 0);
1091
1092   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
1093                                sw_if_index, is_add, 0, 0);
1094
1095   return /* no error */ 0;
1096 }
1097
1098 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1099
1100 /* Global IP4 main. */
1101 #ifndef CLIB_MARCH_VARIANT
1102 ip4_main_t ip4_main;
1103 #endif /* CLIB_MARCH_VARIANT */
1104
1105 static clib_error_t *
1106 ip4_lookup_init (vlib_main_t * vm)
1107 {
1108   ip4_main_t *im = &ip4_main;
1109   clib_error_t *error;
1110   uword i;
1111
1112   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1113     return error;
1114   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
1115     return (error);
1116   if ((error = vlib_call_init_function (vm, fib_module_init)))
1117     return error;
1118   if ((error = vlib_call_init_function (vm, mfib_module_init)))
1119     return error;
1120
1121   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1122     {
1123       u32 m;
1124
1125       if (i < 32)
1126         m = pow2_mask (i) << (32 - i);
1127       else
1128         m = ~0;
1129       im->fib_masks[i] = clib_host_to_net_u32 (m);
1130     }
1131
1132   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1133
1134   /* Create FIB with index 0 and table id of 0. */
1135   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1136                                      FIB_SOURCE_DEFAULT_ROUTE);
1137   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1138                                       MFIB_SOURCE_DEFAULT_ROUTE);
1139
1140   {
1141     pg_node_t *pn;
1142     pn = pg_get_node (ip4_lookup_node.index);
1143     pn->unformat_edit = unformat_pg_ip4_header;
1144   }
1145
1146   {
1147     ethernet_arp_header_t h;
1148
1149     clib_memset (&h, 0, sizeof (h));
1150
1151 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1152 #define _8(f,v) h.f = v;
1153     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1154     _16 (l3_type, ETHERNET_TYPE_IP4);
1155     _8 (n_l2_address_bytes, 6);
1156     _8 (n_l3_address_bytes, 4);
1157     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1158 #undef _16
1159 #undef _8
1160
1161     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1162                                /* data */ &h,
1163                                sizeof (h),
1164                                /* alloc chunk size */ 8,
1165                                "ip4 arp");
1166   }
1167
1168   return error;
1169 }
1170
1171 VLIB_INIT_FUNCTION (ip4_lookup_init);
1172
1173 typedef struct
1174 {
1175   /* Adjacency taken. */
1176   u32 dpo_index;
1177   u32 flow_hash;
1178   u32 fib_index;
1179
1180   /* Packet data, possibly *after* rewrite. */
1181   u8 packet_data[64 - 1 * sizeof (u32)];
1182 }
1183 ip4_forward_next_trace_t;
1184
1185 #ifndef CLIB_MARCH_VARIANT
1186 u8 *
1187 format_ip4_forward_next_trace (u8 * s, va_list * args)
1188 {
1189   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1190   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1191   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1192   u32 indent = format_get_indent (s);
1193
1194   s = format (s, "%Ufib:%d adj:%d flow:0x%08x", format_white_space, indent,
1195               t->fib_index, t->dpo_index, t->flow_hash);
1196   s = format (s, "\n%U%U", format_white_space, indent, format_ip4_header,
1197               t->packet_data, sizeof (t->packet_data));
1198   return s;
1199 }
1200 #endif
1201
1202 static u8 *
1203 format_ip4_lookup_trace (u8 * s, va_list * args)
1204 {
1205   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1206   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1207   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1208   u32 indent = format_get_indent (s);
1209
1210   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1211               t->fib_index, t->dpo_index, t->flow_hash);
1212   s = format (s, "\n%U%U",
1213               format_white_space, indent,
1214               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1215   return s;
1216 }
1217
1218 static u8 *
1219 format_ip4_rewrite_trace (u8 * s, va_list * args)
1220 {
1221   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1222   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1223   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1224   u32 indent = format_get_indent (s);
1225
1226   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1227               t->fib_index, t->dpo_index, format_ip_adjacency,
1228               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1229   s = format (s, "\n%U%U",
1230               format_white_space, indent,
1231               format_ip_adjacency_packet_data,
1232               t->packet_data, sizeof (t->packet_data));
1233   return s;
1234 }
1235
1236 #ifndef CLIB_MARCH_VARIANT
1237 /* Common trace function for all ip4-forward next nodes. */
1238 void
1239 ip4_forward_next_trace (vlib_main_t * vm,
1240                         vlib_node_runtime_t * node,
1241                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1242 {
1243   u32 *from, n_left;
1244   ip4_main_t *im = &ip4_main;
1245
1246   n_left = frame->n_vectors;
1247   from = vlib_frame_vector_args (frame);
1248
1249   while (n_left >= 4)
1250     {
1251       u32 bi0, bi1;
1252       vlib_buffer_t *b0, *b1;
1253       ip4_forward_next_trace_t *t0, *t1;
1254
1255       /* Prefetch next iteration. */
1256       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1257       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1258
1259       bi0 = from[0];
1260       bi1 = from[1];
1261
1262       b0 = vlib_get_buffer (vm, bi0);
1263       b1 = vlib_get_buffer (vm, bi1);
1264
1265       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1266         {
1267           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1268           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1269           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1270           t0->fib_index =
1271             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1272              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1273             vec_elt (im->fib_index_by_sw_if_index,
1274                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1275
1276           clib_memcpy_fast (t0->packet_data,
1277                             vlib_buffer_get_current (b0),
1278                             sizeof (t0->packet_data));
1279         }
1280       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1281         {
1282           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1283           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1284           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1285           t1->fib_index =
1286             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1287              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1288             vec_elt (im->fib_index_by_sw_if_index,
1289                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1290           clib_memcpy_fast (t1->packet_data, vlib_buffer_get_current (b1),
1291                             sizeof (t1->packet_data));
1292         }
1293       from += 2;
1294       n_left -= 2;
1295     }
1296
1297   while (n_left >= 1)
1298     {
1299       u32 bi0;
1300       vlib_buffer_t *b0;
1301       ip4_forward_next_trace_t *t0;
1302
1303       bi0 = from[0];
1304
1305       b0 = vlib_get_buffer (vm, bi0);
1306
1307       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1308         {
1309           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1310           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1311           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1312           t0->fib_index =
1313             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1314              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1315             vec_elt (im->fib_index_by_sw_if_index,
1316                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1317           clib_memcpy_fast (t0->packet_data, vlib_buffer_get_current (b0),
1318                             sizeof (t0->packet_data));
1319         }
1320       from += 1;
1321       n_left -= 1;
1322     }
1323 }
1324
1325 /* Compute TCP/UDP/ICMP4 checksum in software. */
1326 u16
1327 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1328                               ip4_header_t * ip0)
1329 {
1330   ip_csum_t sum0;
1331   u32 ip_header_length, payload_length_host_byte_order;
1332
1333   /* Initialize checksum with ip header. */
1334   ip_header_length = ip4_header_bytes (ip0);
1335   payload_length_host_byte_order =
1336     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1337   sum0 =
1338     clib_host_to_net_u32 (payload_length_host_byte_order +
1339                           (ip0->protocol << 16));
1340
1341   if (BITS (uword) == 32)
1342     {
1343       sum0 =
1344         ip_csum_with_carry (sum0,
1345                             clib_mem_unaligned (&ip0->src_address, u32));
1346       sum0 =
1347         ip_csum_with_carry (sum0,
1348                             clib_mem_unaligned (&ip0->dst_address, u32));
1349     }
1350   else
1351     sum0 =
1352       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1353
1354   return ip_calculate_l4_checksum (vm, p0, sum0,
1355                                    payload_length_host_byte_order, (u8 *) ip0,
1356                                    ip_header_length, NULL);
1357 }
1358
1359 u32
1360 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1361 {
1362   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1363   udp_header_t *udp0;
1364   u16 sum16;
1365
1366   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1367           || ip0->protocol == IP_PROTOCOL_UDP);
1368
1369   udp0 = (void *) (ip0 + 1);
1370   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1371     {
1372       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1373                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1374       return p0->flags;
1375     }
1376
1377   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1378
1379   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1380                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1381
1382   return p0->flags;
1383 }
1384 #endif
1385
1386 VNET_FEATURE_ARC_INIT (ip4_local) = {
1387   .arc_name = "ip4-local",
1388   .start_nodes = VNET_FEATURES ("ip4-local", "ip4-receive"),
1389   .last_in_arc = "ip4-local-end-of-arc",
1390 };
1391
1392 static inline void
1393 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1394                             ip4_header_t * ip, u8 is_udp, u8 * error,
1395                             u8 * good_tcp_udp)
1396 {
1397   u32 flags0;
1398   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1399   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1400   if (is_udp)
1401     {
1402       udp_header_t *udp;
1403       u32 ip_len, udp_len;
1404       i32 len_diff;
1405       udp = ip4_next_header (ip);
1406       /* Verify UDP length. */
1407       ip_len = clib_net_to_host_u16 (ip->length);
1408       udp_len = clib_net_to_host_u16 (udp->length);
1409
1410       len_diff = ip_len - udp_len;
1411       *good_tcp_udp &= len_diff >= 0;
1412       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1413     }
1414 }
1415
1416 #define ip4_local_csum_is_offloaded(_b)                                       \
1417   ((_b->flags & VNET_BUFFER_F_OFFLOAD) &&                                     \
1418    (vnet_buffer (_b)->oflags &                                                \
1419     (VNET_BUFFER_OFFLOAD_F_TCP_CKSUM | VNET_BUFFER_OFFLOAD_F_UDP_CKSUM)))
1420
1421 #define ip4_local_need_csum_check(is_tcp_udp, _b)                       \
1422     (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED     \
1423         || ip4_local_csum_is_offloaded (_b)))
1424
1425 #define ip4_local_csum_is_valid(_b)                                     \
1426     (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT                      \
1427         || (ip4_local_csum_is_offloaded (_b))) != 0
1428
1429 static inline void
1430 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1431                          ip4_header_t * ih, u8 * error)
1432 {
1433   u8 is_udp, is_tcp_udp, good_tcp_udp;
1434
1435   is_udp = ih->protocol == IP_PROTOCOL_UDP;
1436   is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1437
1438   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1439     ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1440   else
1441     good_tcp_udp = ip4_local_csum_is_valid (b);
1442
1443   ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1444   *error = (is_tcp_udp && !good_tcp_udp
1445             ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1446 }
1447
1448 static inline void
1449 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1450                             ip4_header_t ** ih, u8 * error)
1451 {
1452   u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1453
1454   is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1455   is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1456
1457   is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1458   is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1459
1460   good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1461   good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1462
1463   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1464                      || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1465     {
1466       if (is_tcp_udp[0] && !ip4_local_csum_is_offloaded (b[0]))
1467         ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1468                                     &good_tcp_udp[0]);
1469       if (is_tcp_udp[1] && !ip4_local_csum_is_offloaded (b[1]))
1470         ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1471                                     &good_tcp_udp[1]);
1472     }
1473
1474   error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1475               IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1476   error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1477               IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1478 }
1479
1480 static inline void
1481 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1482                               vlib_buffer_t * b, u16 * next, u8 error,
1483                               u8 head_of_feature_arc)
1484 {
1485   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1486   u32 next_index;
1487
1488   *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1489   b->error = error ? error_node->errors[error] : 0;
1490   if (head_of_feature_arc)
1491     {
1492       next_index = *next;
1493       if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1494         {
1495           vnet_feature_arc_start (
1496             arc_index, vnet_buffer (b)->ip.rx_sw_if_index, &next_index, b);
1497           *next = next_index;
1498         }
1499     }
1500 }
1501
1502 typedef struct
1503 {
1504   /* The src and fib-index together determine if packet n is the same as n-1 */
1505   ip4_address_t src;
1506   u32 fib_index;
1507   u32 lbi;
1508   u8 error;
1509   u8 first;
1510 } ip4_local_last_check_t;
1511
1512 static inline void
1513 ip4_local_check_src (vlib_buffer_t *b, ip4_header_t *ip0,
1514                      ip4_local_last_check_t *last_check, u8 *error0,
1515                      int is_receive_dpo)
1516 {
1517   const dpo_id_t *dpo0;
1518   load_balance_t *lb0;
1519   u32 lbi0;
1520
1521   vnet_buffer (b)->ip.fib_index =
1522     vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1523     vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1524
1525   vnet_buffer (b)->ip.rx_sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
1526   if (is_receive_dpo)
1527     {
1528       receive_dpo_t *rd;
1529       rd = receive_dpo_get (vnet_buffer (b)->ip.adj_index[VLIB_TX]);
1530       if (rd->rd_sw_if_index != ~0)
1531         vnet_buffer (b)->ip.rx_sw_if_index = rd->rd_sw_if_index;
1532     }
1533
1534   /*
1535    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1536    *  adjacency for the destination address (the local interface address).
1537    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1538    *  adjacency for the source address (the remote sender's address)
1539    */
1540   if (PREDICT_TRUE ((last_check->src.as_u32 != ip0->src_address.as_u32)) ||
1541       (last_check->fib_index != vnet_buffer (b)->ip.fib_index) ||
1542       last_check->first)
1543     {
1544       lbi0 = ip4_fib_forwarding_lookup (vnet_buffer (b)->ip.fib_index,
1545                                         &ip0->src_address);
1546
1547       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1548         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1549       vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1550
1551       lb0 = load_balance_get (lbi0);
1552       dpo0 = load_balance_get_bucket_i (lb0, 0);
1553
1554       /*
1555        * Must have a route to source otherwise we drop the packet.
1556        * ip4 broadcasts are accepted, e.g. to make dhcp client work
1557        *
1558        * The checks are:
1559        *  - the source is a recieve => it's from us => bogus, do this
1560        *    first since it sets a different error code.
1561        *  - uRPF check for any route to source - accept if passes.
1562        *  - allow packets destined to the broadcast address from unknown sources
1563        */
1564
1565       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1566                   && dpo0->dpoi_type == DPO_RECEIVE) ?
1567                  IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1568       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1569                   && !fib_urpf_check_size (lb0->lb_urpf)
1570                   && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1571                  IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1572
1573       last_check->src.as_u32 = ip0->src_address.as_u32;
1574       last_check->lbi = lbi0;
1575       last_check->error = *error0;
1576       last_check->first = 0;
1577       last_check->fib_index = vnet_buffer (b)->ip.fib_index;
1578     }
1579   else
1580     {
1581       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1582         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1583       vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1584       *error0 = last_check->error;
1585     }
1586 }
1587
1588 static inline void
1589 ip4_local_check_src_x2 (vlib_buffer_t **b, ip4_header_t **ip,
1590                         ip4_local_last_check_t *last_check, u8 *error,
1591                         int is_receive_dpo)
1592 {
1593   const dpo_id_t *dpo[2];
1594   load_balance_t *lb[2];
1595   u32 not_last_hit;
1596   u32 lbi[2];
1597
1598   not_last_hit = last_check->first;
1599   not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1600   not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1601
1602   vnet_buffer (b[0])->ip.fib_index =
1603     vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1604     vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1605     vnet_buffer (b[0])->ip.fib_index;
1606
1607   vnet_buffer (b[1])->ip.fib_index =
1608     vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1609     vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1610     vnet_buffer (b[1])->ip.fib_index;
1611
1612   not_last_hit |= vnet_buffer (b[0])->ip.fib_index ^ last_check->fib_index;
1613   not_last_hit |= vnet_buffer (b[1])->ip.fib_index ^ last_check->fib_index;
1614
1615   vnet_buffer (b[0])->ip.rx_sw_if_index =
1616     vnet_buffer (b[0])->sw_if_index[VLIB_RX];
1617   vnet_buffer (b[1])->ip.rx_sw_if_index =
1618     vnet_buffer (b[1])->sw_if_index[VLIB_RX];
1619   if (is_receive_dpo)
1620     {
1621       const receive_dpo_t *rd0, *rd1;
1622       rd0 = receive_dpo_get (vnet_buffer (b[0])->ip.adj_index[VLIB_TX]);
1623       rd1 = receive_dpo_get (vnet_buffer (b[1])->ip.adj_index[VLIB_TX]);
1624       if (rd0->rd_sw_if_index != ~0)
1625         vnet_buffer (b[0])->ip.rx_sw_if_index = rd0->rd_sw_if_index;
1626       if (rd1->rd_sw_if_index != ~0)
1627         vnet_buffer (b[1])->ip.rx_sw_if_index = rd1->rd_sw_if_index;
1628     }
1629
1630   /*
1631    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1632    *  adjacency for the destination address (the local interface address).
1633    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1634    *  adjacency for the source address (the remote sender's address)
1635    */
1636   if (PREDICT_TRUE (not_last_hit))
1637     {
1638       ip4_fib_forwarding_lookup_x2 (
1639         vnet_buffer (b[0])->ip.fib_index, vnet_buffer (b[1])->ip.fib_index,
1640         &ip[0]->src_address, &ip[1]->src_address, &lbi[0], &lbi[1]);
1641
1642       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1643         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1644       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1645
1646       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1647         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1648       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1649
1650       lb[0] = load_balance_get (lbi[0]);
1651       lb[1] = load_balance_get (lbi[1]);
1652
1653       dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1654       dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1655
1656       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1657                    dpo[0]->dpoi_type == DPO_RECEIVE) ?
1658                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1659       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1660                    !fib_urpf_check_size (lb[0]->lb_urpf) &&
1661                    ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1662                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1663
1664       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1665                    dpo[1]->dpoi_type == DPO_RECEIVE) ?
1666                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1667       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1668                    !fib_urpf_check_size (lb[1]->lb_urpf) &&
1669                    ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1670                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1671
1672       last_check->src.as_u32 = ip[1]->src_address.as_u32;
1673       last_check->lbi = lbi[1];
1674       last_check->error = error[1];
1675       last_check->first = 0;
1676       last_check->fib_index = vnet_buffer (b[1])->ip.fib_index;
1677     }
1678   else
1679     {
1680       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1681         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1682       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1683
1684       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1685         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1686       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1687
1688       error[0] = last_check->error;
1689       error[1] = last_check->error;
1690     }
1691 }
1692
1693 enum ip_local_packet_type_e
1694 {
1695   IP_LOCAL_PACKET_TYPE_L4,
1696   IP_LOCAL_PACKET_TYPE_NAT,
1697   IP_LOCAL_PACKET_TYPE_FRAG,
1698 };
1699
1700 /**
1701  * Determine packet type and next node.
1702  *
1703  * The expectation is that all packets that are not L4 will skip
1704  * checksums and source checks.
1705  */
1706 always_inline u8
1707 ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
1708 {
1709   ip_lookup_main_t *lm = &ip4_main.lookup_main;
1710
1711   if (PREDICT_FALSE (ip4_is_fragment (ip)))
1712     {
1713       *next = IP_LOCAL_NEXT_REASSEMBLY;
1714       return IP_LOCAL_PACKET_TYPE_FRAG;
1715     }
1716   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_IS_NATED))
1717     {
1718       *next = lm->local_next_by_ip_protocol[ip->protocol];
1719       return IP_LOCAL_PACKET_TYPE_NAT;
1720     }
1721
1722   *next = lm->local_next_by_ip_protocol[ip->protocol];
1723   return IP_LOCAL_PACKET_TYPE_L4;
1724 }
1725
1726 static inline uword
1727 ip4_local_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
1728                   vlib_frame_t *frame, int head_of_feature_arc,
1729                   int is_receive_dpo)
1730 {
1731   u32 *from, n_left_from;
1732   vlib_node_runtime_t *error_node =
1733     vlib_node_get_runtime (vm, ip4_local_node.index);
1734   u16 nexts[VLIB_FRAME_SIZE], *next;
1735   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1736   ip4_header_t *ip[2];
1737   u8 error[2], pt[2];
1738
1739   ip4_local_last_check_t last_check = {
1740     /*
1741      * 0.0.0.0 can appear as the source address of an IP packet,
1742      * as can any other address, hence the need to use the 'first'
1743      * member to make sure the .lbi is initialised for the first
1744      * packet.
1745      */
1746     .src = { .as_u32 = 0 },
1747     .lbi = ~0,
1748     .error = IP4_ERROR_UNKNOWN_PROTOCOL,
1749     .first = 1,
1750     .fib_index = 0,
1751   };
1752
1753   from = vlib_frame_vector_args (frame);
1754   n_left_from = frame->n_vectors;
1755
1756   if (node->flags & VLIB_NODE_FLAG_TRACE)
1757     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1758
1759   vlib_get_buffers (vm, from, bufs, n_left_from);
1760   b = bufs;
1761   next = nexts;
1762
1763   while (n_left_from >= 6)
1764     {
1765       u8 not_batch = 0;
1766
1767       /* Prefetch next iteration. */
1768       {
1769         vlib_prefetch_buffer_header (b[4], LOAD);
1770         vlib_prefetch_buffer_header (b[5], LOAD);
1771
1772         clib_prefetch_load (b[4]->data);
1773         clib_prefetch_load (b[5]->data);
1774       }
1775
1776       error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1777
1778       ip[0] = vlib_buffer_get_current (b[0]);
1779       ip[1] = vlib_buffer_get_current (b[1]);
1780
1781       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1782       vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1783
1784       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1785       pt[1] = ip4_local_classify (b[1], ip[1], &next[1]);
1786
1787       not_batch = pt[0] ^ pt[1];
1788
1789       if (head_of_feature_arc == 0 || (pt[0] && not_batch == 0))
1790         goto skip_checks;
1791
1792       if (PREDICT_TRUE (not_batch == 0))
1793         {
1794           ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1795           ip4_local_check_src_x2 (b, ip, &last_check, error, is_receive_dpo);
1796         }
1797       else
1798         {
1799           if (!pt[0])
1800             {
1801               ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1802               ip4_local_check_src (b[0], ip[0], &last_check, &error[0],
1803                                    is_receive_dpo);
1804             }
1805           if (!pt[1])
1806             {
1807               ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1808               ip4_local_check_src (b[1], ip[1], &last_check, &error[1],
1809                                    is_receive_dpo);
1810             }
1811         }
1812
1813     skip_checks:
1814
1815       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1816                                     head_of_feature_arc);
1817       ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1818                                     head_of_feature_arc);
1819
1820       b += 2;
1821       next += 2;
1822       n_left_from -= 2;
1823     }
1824
1825   while (n_left_from > 0)
1826     {
1827       error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1828
1829       ip[0] = vlib_buffer_get_current (b[0]);
1830       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1831       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1832
1833       if (head_of_feature_arc == 0 || pt[0])
1834         goto skip_check;
1835
1836       ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1837       ip4_local_check_src (b[0], ip[0], &last_check, &error[0],
1838                            is_receive_dpo);
1839
1840     skip_check:
1841
1842       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1843                                     head_of_feature_arc);
1844
1845       b += 1;
1846       next += 1;
1847       n_left_from -= 1;
1848     }
1849
1850   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1851   return frame->n_vectors;
1852 }
1853
1854 VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1855                                vlib_frame_t * frame)
1856 {
1857   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */,
1858                            0 /* is_receive_dpo */);
1859 }
1860
1861 VLIB_REGISTER_NODE (ip4_local_node) =
1862 {
1863   .name = "ip4-local",
1864   .vector_size = sizeof (u32),
1865   .format_trace = format_ip4_forward_next_trace,
1866   .n_errors = IP4_N_ERROR,
1867   .error_counters = ip4_error_counters,
1868   .n_next_nodes = IP_LOCAL_N_NEXT,
1869   .next_nodes =
1870   {
1871     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1872     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1873     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1874     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1875     [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-local-full-reassembly",
1876   },
1877 };
1878
1879 VLIB_NODE_FN (ip4_receive_local_node)
1880 (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
1881 {
1882   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */,
1883                            1 /* is_receive_dpo */);
1884 }
1885
1886 VLIB_REGISTER_NODE (ip4_receive_local_node) = {
1887   .name = "ip4-receive",
1888   .vector_size = sizeof (u32),
1889   .format_trace = format_ip4_forward_next_trace,
1890   .sibling_of = "ip4-local"
1891 };
1892
1893 VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
1894                                           vlib_node_runtime_t * node,
1895                                           vlib_frame_t * frame)
1896 {
1897   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */,
1898                            0 /* is_receive_dpo */);
1899 }
1900
1901 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
1902   .name = "ip4-local-end-of-arc",
1903   .vector_size = sizeof (u32),
1904
1905   .format_trace = format_ip4_forward_next_trace,
1906   .sibling_of = "ip4-local",
1907 };
1908
1909 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1910   .arc_name = "ip4-local",
1911   .node_name = "ip4-local-end-of-arc",
1912   .runs_before = 0, /* not before any other features */
1913 };
1914
1915 #ifndef CLIB_MARCH_VARIANT
1916 void
1917 ip4_register_protocol (u32 protocol, u32 node_index)
1918 {
1919   vlib_main_t *vm = vlib_get_main ();
1920   ip4_main_t *im = &ip4_main;
1921   ip_lookup_main_t *lm = &im->lookup_main;
1922
1923   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1924   lm->local_next_by_ip_protocol[protocol] =
1925     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1926 }
1927
1928 void
1929 ip4_unregister_protocol (u32 protocol)
1930 {
1931   ip4_main_t *im = &ip4_main;
1932   ip_lookup_main_t *lm = &im->lookup_main;
1933
1934   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1935   lm->local_next_by_ip_protocol[protocol] = IP_LOCAL_NEXT_PUNT;
1936 }
1937 #endif
1938
1939 static clib_error_t *
1940 show_ip_local_command_fn (vlib_main_t * vm,
1941                           unformat_input_t * input, vlib_cli_command_t * cmd)
1942 {
1943   ip4_main_t *im = &ip4_main;
1944   ip_lookup_main_t *lm = &im->lookup_main;
1945   int i;
1946
1947   vlib_cli_output (vm, "Protocols handled by ip4_local");
1948   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1949     {
1950       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1951         {
1952           u32 node_index = vlib_get_node (vm,
1953                                           ip4_local_node.index)->
1954             next_nodes[lm->local_next_by_ip_protocol[i]];
1955           vlib_cli_output (vm, "%U: %U", format_ip_protocol, i,
1956                            format_vlib_node_name, vm, node_index);
1957         }
1958     }
1959   return 0;
1960 }
1961
1962
1963
1964 /*?
1965  * Display the set of protocols handled by the local IPv4 stack.
1966  *
1967  * @cliexpar
1968  * Example of how to display local protocol table:
1969  * @cliexstart{show ip local}
1970  * Protocols handled by ip4_local
1971  * 1
1972  * 17
1973  * 47
1974  * @cliexend
1975 ?*/
1976 VLIB_CLI_COMMAND (show_ip_local, static) =
1977 {
1978   .path = "show ip local",
1979   .function = show_ip_local_command_fn,
1980   .short_help = "show ip local",
1981 };
1982
1983 typedef enum
1984 {
1985   IP4_REWRITE_NEXT_DROP,
1986   IP4_REWRITE_NEXT_ICMP_ERROR,
1987   IP4_REWRITE_NEXT_FRAGMENT,
1988   IP4_REWRITE_N_NEXT            /* Last */
1989 } ip4_rewrite_next_t;
1990
1991 /**
1992  * This bits of an IPv4 address to mask to construct a multicast
1993  * MAC address
1994  */
1995 #if CLIB_ARCH_IS_BIG_ENDIAN
1996 #define IP4_MCAST_ADDR_MASK 0x007fffff
1997 #else
1998 #define IP4_MCAST_ADDR_MASK 0xffff7f00
1999 #endif
2000
2001 always_inline void
2002 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
2003                u16 adj_packet_bytes, bool df, u16 * next,
2004                u8 is_midchain, u32 * error)
2005 {
2006   if (packet_len > adj_packet_bytes)
2007     {
2008       *error = IP4_ERROR_MTU_EXCEEDED;
2009       if (df)
2010         {
2011           icmp4_error_set_vnet_buffer
2012             (b, ICMP4_destination_unreachable,
2013              ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
2014              adj_packet_bytes);
2015           *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2016         }
2017       else
2018         {
2019           /* IP fragmentation */
2020           ip_frag_set_vnet_buffer (b, adj_packet_bytes,
2021                                    (is_midchain ?
2022                                     IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN :
2023                                     IP_FRAG_NEXT_IP_REWRITE), 0);
2024           *next = IP4_REWRITE_NEXT_FRAGMENT;
2025         }
2026     }
2027 }
2028
2029 /* increment TTL & update checksum.
2030    Works either endian, so no need for byte swap. */
2031 static_always_inline void
2032 ip4_ttl_inc (vlib_buffer_t * b, ip4_header_t * ip)
2033 {
2034   i32 ttl;
2035   u32 checksum;
2036   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2037     return;
2038
2039   ttl = ip->ttl;
2040
2041   checksum = ip->checksum - clib_host_to_net_u16 (0x0100);
2042   checksum += checksum >= 0xffff;
2043
2044   ip->checksum = checksum;
2045   ttl += 1;
2046   ip->ttl = ttl;
2047
2048   ASSERT (ip4_header_checksum_is_valid (ip) ||
2049           (vnet_buffer (b)->oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM) ||
2050           (vnet_buffer (b)->oflags & VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM));
2051 }
2052
2053 /* Decrement TTL & update checksum.
2054    Works either endian, so no need for byte swap. */
2055 static_always_inline void
2056 ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
2057                             u32 * error)
2058 {
2059   i32 ttl;
2060   u32 checksum;
2061   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2062     return;
2063
2064   ttl = ip->ttl;
2065
2066   /* Input node should have reject packets with ttl 0. */
2067   ASSERT (ip->ttl > 0);
2068
2069   checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
2070   checksum += checksum >= 0xffff;
2071
2072   ip->checksum = checksum;
2073   ttl -= 1;
2074   ip->ttl = ttl;
2075
2076   /*
2077    * If the ttl drops below 1 when forwarding, generate
2078    * an ICMP response.
2079    */
2080   if (PREDICT_FALSE (ttl <= 0))
2081     {
2082       *error = IP4_ERROR_TIME_EXPIRED;
2083       vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2084       icmp4_error_set_vnet_buffer (b, ICMP4_time_exceeded,
2085                                    ICMP4_time_exceeded_ttl_exceeded_in_transit,
2086                                    0);
2087       *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2088     }
2089
2090   /* Verify checksum. */
2091   ASSERT (ip4_header_checksum_is_valid (ip) ||
2092           (vnet_buffer (b)->oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM) ||
2093           (vnet_buffer (b)->oflags & VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM));
2094 }
2095
2096 always_inline uword
2097 ip4_rewrite_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
2098                     vlib_frame_t *frame, int do_counters, int is_midchain,
2099                     int is_mcast)
2100 {
2101   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2102   u32 *from = vlib_frame_vector_args (frame);
2103   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2104   u16 nexts[VLIB_FRAME_SIZE], *next;
2105   u32 n_left_from;
2106   vlib_node_runtime_t *error_node =
2107     vlib_node_get_runtime (vm, ip4_input_node.index);
2108
2109   n_left_from = frame->n_vectors;
2110   u32 thread_index = vm->thread_index;
2111
2112   vlib_get_buffers (vm, from, bufs, n_left_from);
2113   clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
2114
2115 #if (CLIB_N_PREFETCHES >= 8)
2116   if (n_left_from >= 6)
2117     {
2118       int i;
2119       for (i = 2; i < 6; i++)
2120         vlib_prefetch_buffer_header (bufs[i], LOAD);
2121     }
2122
2123   next = nexts;
2124   b = bufs;
2125   while (n_left_from >= 8)
2126     {
2127       const ip_adjacency_t *adj0, *adj1;
2128       ip4_header_t *ip0, *ip1;
2129       u32 rw_len0, error0, adj_index0;
2130       u32 rw_len1, error1, adj_index1;
2131       u32 tx_sw_if_index0, tx_sw_if_index1;
2132       u8 *p;
2133
2134       if (is_midchain)
2135         {
2136           vlib_prefetch_buffer_header (b[6], LOAD);
2137           vlib_prefetch_buffer_header (b[7], LOAD);
2138         }
2139
2140       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2141       adj_index1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
2142
2143       /*
2144        * pre-fetch the per-adjacency counters
2145        */
2146       if (do_counters)
2147         {
2148           vlib_prefetch_combined_counter (&adjacency_counters,
2149                                           thread_index, adj_index0);
2150           vlib_prefetch_combined_counter (&adjacency_counters,
2151                                           thread_index, adj_index1);
2152         }
2153
2154       ip0 = vlib_buffer_get_current (b[0]);
2155       ip1 = vlib_buffer_get_current (b[1]);
2156
2157       error0 = error1 = IP4_ERROR_NONE;
2158
2159       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2160       ip4_ttl_and_checksum_check (b[1], ip1, next + 1, &error1);
2161
2162       /* Rewrite packet header and updates lengths. */
2163       adj0 = adj_get (adj_index0);
2164       adj1 = adj_get (adj_index1);
2165
2166       /* Worth pipelining. No guarantee that adj0,1 are hot... */
2167       rw_len0 = adj0[0].rewrite_header.data_bytes;
2168       rw_len1 = adj1[0].rewrite_header.data_bytes;
2169       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2170       vnet_buffer (b[1])->ip.save_rewrite_length = rw_len1;
2171
2172       p = vlib_buffer_get_current (b[2]);
2173       clib_prefetch_store (p - CLIB_CACHE_LINE_BYTES);
2174       clib_prefetch_load (p);
2175
2176       p = vlib_buffer_get_current (b[3]);
2177       clib_prefetch_store (p - CLIB_CACHE_LINE_BYTES);
2178       clib_prefetch_load (p);
2179
2180       /* Check MTU of outgoing interface. */
2181       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2182       u16 ip1_len = clib_net_to_host_u16 (ip1->length);
2183
2184       if (b[0]->flags & VNET_BUFFER_F_GSO)
2185         ip0_len = gso_mtu_sz (b[0]);
2186       if (b[1]->flags & VNET_BUFFER_F_GSO)
2187         ip1_len = gso_mtu_sz (b[1]);
2188
2189       ip4_mtu_check (b[0], ip0_len,
2190                      adj0[0].rewrite_header.max_l3_packet_bytes,
2191                      ip0->flags_and_fragment_offset &
2192                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2193                      next + 0, is_midchain, &error0);
2194       ip4_mtu_check (b[1], ip1_len,
2195                      adj1[0].rewrite_header.max_l3_packet_bytes,
2196                      ip1->flags_and_fragment_offset &
2197                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2198                      next + 1, is_midchain, &error1);
2199
2200       if (is_mcast)
2201         {
2202           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2203                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2204                     IP4_ERROR_SAME_INTERFACE : error0);
2205           error1 = ((adj1[0].rewrite_header.sw_if_index ==
2206                      vnet_buffer (b[1])->sw_if_index[VLIB_RX]) ?
2207                     IP4_ERROR_SAME_INTERFACE : error1);
2208         }
2209
2210       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2211        * to see the IP header */
2212       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2213         {
2214           u32 next_index = adj0[0].rewrite_header.next_index;
2215           vlib_buffer_advance (b[0], -(word) rw_len0);
2216
2217           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2218           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2219
2220           if (PREDICT_FALSE
2221               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2222             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2223                                                 tx_sw_if_index0,
2224                                                 &next_index, b[0],
2225                                                 adj0->ia_cfg_index);
2226
2227           next[0] = next_index;
2228         }
2229       else
2230         {
2231           b[0]->error = error_node->errors[error0];
2232           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2233             ip4_ttl_inc (b[0], ip0);
2234         }
2235       if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2236         {
2237           u32 next_index = adj1[0].rewrite_header.next_index;
2238           vlib_buffer_advance (b[1], -(word) rw_len1);
2239
2240           tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2241           vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2242
2243           if (PREDICT_FALSE
2244               (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2245             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2246                                                 tx_sw_if_index1,
2247                                                 &next_index, b[1],
2248                                                 adj1->ia_cfg_index);
2249           next[1] = next_index;
2250         }
2251       else
2252         {
2253           b[1]->error = error_node->errors[error1];
2254           if (error1 == IP4_ERROR_MTU_EXCEEDED)
2255             ip4_ttl_inc (b[1], ip1);
2256         }
2257
2258       if (is_midchain)
2259         /* Guess we are only writing on ipv4 header. */
2260         vnet_rewrite_two_headers (adj0[0], adj1[0],
2261                                   ip0, ip1, sizeof (ip4_header_t));
2262       else
2263         /* Guess we are only writing on simple Ethernet header. */
2264         vnet_rewrite_two_headers (adj0[0], adj1[0],
2265                                   ip0, ip1, sizeof (ethernet_header_t));
2266
2267       if (do_counters)
2268         {
2269           if (error0 == IP4_ERROR_NONE)
2270             vlib_increment_combined_counter
2271               (&adjacency_counters,
2272                thread_index,
2273                adj_index0, 1,
2274                vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2275
2276           if (error1 == IP4_ERROR_NONE)
2277             vlib_increment_combined_counter
2278               (&adjacency_counters,
2279                thread_index,
2280                adj_index1, 1,
2281                vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
2282         }
2283
2284       if (is_midchain)
2285         {
2286           if (error0 == IP4_ERROR_NONE)
2287             adj_midchain_fixup (vm, adj0, b[0], VNET_LINK_IP4);
2288           if (error1 == IP4_ERROR_NONE)
2289             adj_midchain_fixup (vm, adj1, b[1], VNET_LINK_IP4);
2290         }
2291
2292       if (is_mcast)
2293         {
2294           /* copy bytes from the IP address into the MAC rewrite */
2295           if (error0 == IP4_ERROR_NONE)
2296             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2297                                         adj0->rewrite_header.dst_mcast_offset,
2298                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2299           if (error1 == IP4_ERROR_NONE)
2300             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2301                                         adj1->rewrite_header.dst_mcast_offset,
2302                                         &ip1->dst_address.as_u32, (u8 *) ip1);
2303         }
2304
2305       next += 2;
2306       b += 2;
2307       n_left_from -= 2;
2308     }
2309 #elif (CLIB_N_PREFETCHES >= 4)
2310   next = nexts;
2311   b = bufs;
2312   while (n_left_from >= 1)
2313     {
2314       ip_adjacency_t *adj0;
2315       ip4_header_t *ip0;
2316       u32 rw_len0, error0, adj_index0;
2317       u32 tx_sw_if_index0;
2318       u8 *p;
2319
2320       /* Prefetch next iteration */
2321       if (PREDICT_TRUE (n_left_from >= 4))
2322         {
2323           ip_adjacency_t *adj2;
2324           u32 adj_index2;
2325
2326           vlib_prefetch_buffer_header (b[3], LOAD);
2327           vlib_prefetch_buffer_data (b[2], LOAD);
2328
2329           /* Prefetch adj->rewrite_header */
2330           adj_index2 = vnet_buffer (b[2])->ip.adj_index[VLIB_TX];
2331           adj2 = adj_get (adj_index2);
2332           p = (u8 *) adj2;
2333           CLIB_PREFETCH (p + CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES,
2334                          LOAD);
2335         }
2336
2337       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2338
2339       /*
2340        * Prefetch the per-adjacency counters
2341        */
2342       if (do_counters)
2343         {
2344           vlib_prefetch_combined_counter (&adjacency_counters,
2345                                           thread_index, adj_index0);
2346         }
2347
2348       ip0 = vlib_buffer_get_current (b[0]);
2349
2350       error0 = IP4_ERROR_NONE;
2351
2352       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2353
2354       /* Rewrite packet header and updates lengths. */
2355       adj0 = adj_get (adj_index0);
2356
2357       /* Rewrite header was prefetched. */
2358       rw_len0 = adj0[0].rewrite_header.data_bytes;
2359       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2360
2361       /* Check MTU of outgoing interface. */
2362       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2363
2364       if (b[0]->flags & VNET_BUFFER_F_GSO)
2365         ip0_len = gso_mtu_sz (b[0]);
2366
2367       ip4_mtu_check (b[0], ip0_len,
2368                      adj0[0].rewrite_header.max_l3_packet_bytes,
2369                      ip0->flags_and_fragment_offset &
2370                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2371                      next + 0, is_midchain, &error0);
2372
2373       if (is_mcast)
2374         {
2375           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2376                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2377                     IP4_ERROR_SAME_INTERFACE : error0);
2378         }
2379
2380       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2381        * to see the IP header */
2382       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2383         {
2384           u32 next_index = adj0[0].rewrite_header.next_index;
2385           vlib_buffer_advance (b[0], -(word) rw_len0);
2386           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2387           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2388
2389           if (PREDICT_FALSE
2390               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2391             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2392                                                 tx_sw_if_index0,
2393                                                 &next_index, b[0],
2394                                                 adj0->ia_cfg_index);
2395           next[0] = next_index;
2396
2397           if (is_midchain)
2398             {
2399               /* Guess we are only writing on ipv4 header. */
2400               vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t));
2401             }
2402           else
2403             /* Guess we are only writing on simple Ethernet header. */
2404             vnet_rewrite_one_header (adj0[0], ip0,
2405                                      sizeof (ethernet_header_t));
2406
2407           /*
2408            * Bump the per-adjacency counters
2409            */
2410           if (do_counters)
2411             vlib_increment_combined_counter
2412               (&adjacency_counters,
2413                thread_index,
2414                adj_index0, 1, vlib_buffer_length_in_chain (vm,
2415                                                            b[0]) + rw_len0);
2416
2417           if (is_midchain)
2418             adj_midchain_fixup (vm, adj0, b[0], VNET_LINK_IP4);
2419
2420           if (is_mcast)
2421             /* copy bytes from the IP address into the MAC rewrite */
2422             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2423                                         adj0->rewrite_header.dst_mcast_offset,
2424                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2425         }
2426       else
2427         {
2428           b[0]->error = error_node->errors[error0];
2429           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2430             ip4_ttl_inc (b[0], ip0);
2431         }
2432
2433       next += 1;
2434       b += 1;
2435       n_left_from -= 1;
2436     }
2437 #endif
2438
2439   while (n_left_from > 0)
2440     {
2441       ip_adjacency_t *adj0;
2442       ip4_header_t *ip0;
2443       u32 rw_len0, adj_index0, error0;
2444       u32 tx_sw_if_index0;
2445
2446       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2447
2448       adj0 = adj_get (adj_index0);
2449
2450       if (do_counters)
2451         vlib_prefetch_combined_counter (&adjacency_counters,
2452                                         thread_index, adj_index0);
2453
2454       ip0 = vlib_buffer_get_current (b[0]);
2455
2456       error0 = IP4_ERROR_NONE;
2457
2458       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2459
2460
2461       /* Update packet buffer attributes/set output interface. */
2462       rw_len0 = adj0[0].rewrite_header.data_bytes;
2463       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2464
2465       /* Check MTU of outgoing interface. */
2466       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2467       if (b[0]->flags & VNET_BUFFER_F_GSO)
2468         ip0_len = gso_mtu_sz (b[0]);
2469
2470       ip4_mtu_check (b[0], ip0_len,
2471                      adj0[0].rewrite_header.max_l3_packet_bytes,
2472                      ip0->flags_and_fragment_offset &
2473                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2474                      next + 0, is_midchain, &error0);
2475
2476       if (is_mcast)
2477         {
2478           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2479                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2480                     IP4_ERROR_SAME_INTERFACE : error0);
2481         }
2482
2483       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2484        * to see the IP header */
2485       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2486         {
2487           u32 next_index = adj0[0].rewrite_header.next_index;
2488           vlib_buffer_advance (b[0], -(word) rw_len0);
2489           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2490           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2491
2492           if (PREDICT_FALSE
2493               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2494             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2495                                                 tx_sw_if_index0,
2496                                                 &next_index, b[0],
2497                                                 adj0->ia_cfg_index);
2498           next[0] = next_index;
2499
2500           if (is_midchain)
2501             {
2502               /* Guess we are only writing on ipv4 header. */
2503               vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t));
2504             }
2505           else
2506             /* Guess we are only writing on simple Ethernet header. */
2507             vnet_rewrite_one_header (adj0[0], ip0,
2508                                      sizeof (ethernet_header_t));
2509
2510           if (do_counters)
2511             vlib_increment_combined_counter
2512               (&adjacency_counters,
2513                thread_index, adj_index0, 1,
2514                vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2515
2516           if (is_midchain)
2517             adj_midchain_fixup (vm, adj0, b[0], VNET_LINK_IP4);
2518
2519           if (is_mcast)
2520             /* copy bytes from the IP address into the MAC rewrite */
2521             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2522                                         adj0->rewrite_header.dst_mcast_offset,
2523                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2524         }
2525       else
2526         {
2527           b[0]->error = error_node->errors[error0];
2528           /* undo the TTL decrement - we'll be back to do it again */
2529           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2530             ip4_ttl_inc (b[0], ip0);
2531         }
2532
2533       next += 1;
2534       b += 1;
2535       n_left_from -= 1;
2536     }
2537
2538
2539   /* Need to do trace after rewrites to pick up new packet data. */
2540   if (node->flags & VLIB_NODE_FLAG_TRACE)
2541     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2542
2543   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2544   return frame->n_vectors;
2545 }
2546
2547 /** @brief IPv4 rewrite node.
2548     @node ip4-rewrite
2549
2550     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2551     header checksum, fetch the ip adjacency, check the outbound mtu,
2552     apply the adjacency rewrite, and send pkts to the adjacency
2553     rewrite header's rewrite_next_index.
2554
2555     @param vm vlib_main_t corresponding to the current thread
2556     @param node vlib_node_runtime_t
2557     @param frame vlib_frame_t whose contents should be dispatched
2558
2559     @par Graph mechanics: buffer metadata, next index usage
2560
2561     @em Uses:
2562     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2563         - the rewrite adjacency index
2564     - <code>adj->lookup_next_index</code>
2565         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2566           the packet will be dropped.
2567     - <code>adj->rewrite_header</code>
2568         - Rewrite string length, rewrite string, next_index
2569
2570     @em Sets:
2571     - <code>b->current_data, b->current_length</code>
2572         - Updated net of applying the rewrite string
2573
2574     <em>Next Indices:</em>
2575     - <code> adj->rewrite_header.next_index </code>
2576       or @c ip4-drop
2577 */
2578
2579 VLIB_NODE_FN (ip4_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2580                                  vlib_frame_t * frame)
2581 {
2582   if (adj_are_counters_enabled ())
2583     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2584   else
2585     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2586 }
2587
2588 VLIB_NODE_FN (ip4_rewrite_bcast_node) (vlib_main_t * vm,
2589                                        vlib_node_runtime_t * node,
2590                                        vlib_frame_t * frame)
2591 {
2592   if (adj_are_counters_enabled ())
2593     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2594   else
2595     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2596 }
2597
2598 VLIB_NODE_FN (ip4_midchain_node) (vlib_main_t * vm,
2599                                   vlib_node_runtime_t * node,
2600                                   vlib_frame_t * frame)
2601 {
2602   if (adj_are_counters_enabled ())
2603     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2604   else
2605     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2606 }
2607
2608 VLIB_NODE_FN (ip4_rewrite_mcast_node) (vlib_main_t * vm,
2609                                        vlib_node_runtime_t * node,
2610                                        vlib_frame_t * frame)
2611 {
2612   if (adj_are_counters_enabled ())
2613     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2614   else
2615     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2616 }
2617
2618 VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
2619                                         vlib_node_runtime_t * node,
2620                                         vlib_frame_t * frame)
2621 {
2622   if (adj_are_counters_enabled ())
2623     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2624   else
2625     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2626 }
2627
2628 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2629   .name = "ip4-rewrite",
2630   .vector_size = sizeof (u32),
2631
2632   .format_trace = format_ip4_rewrite_trace,
2633
2634   .n_next_nodes = IP4_REWRITE_N_NEXT,
2635   .next_nodes = {
2636     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2637     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2638     [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2639   },
2640 };
2641
2642 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2643   .name = "ip4-rewrite-bcast",
2644   .vector_size = sizeof (u32),
2645
2646   .format_trace = format_ip4_rewrite_trace,
2647   .sibling_of = "ip4-rewrite",
2648 };
2649
2650 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2651   .name = "ip4-rewrite-mcast",
2652   .vector_size = sizeof (u32),
2653
2654   .format_trace = format_ip4_rewrite_trace,
2655   .sibling_of = "ip4-rewrite",
2656 };
2657
2658 VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
2659   .name = "ip4-mcast-midchain",
2660   .vector_size = sizeof (u32),
2661
2662   .format_trace = format_ip4_rewrite_trace,
2663   .sibling_of = "ip4-rewrite",
2664 };
2665
2666 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2667   .name = "ip4-midchain",
2668   .vector_size = sizeof (u32),
2669   .format_trace = format_ip4_rewrite_trace,
2670   .sibling_of = "ip4-rewrite",
2671 };
2672
2673 static clib_error_t *
2674 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2675                              unformat_input_t * input,
2676                              vlib_cli_command_t * cmd)
2677 {
2678   int matched = 0;
2679   u32 table_id = 0;
2680   u32 flow_hash_config = 0;
2681   int rv;
2682
2683   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2684     {
2685       if (unformat (input, "table %d", &table_id))
2686         matched = 1;
2687 #define _(a, b, v)                                                            \
2688   else if (unformat (input, #a))                                              \
2689   {                                                                           \
2690     flow_hash_config |= v;                                                    \
2691     matched = 1;                                                              \
2692   }
2693       foreach_flow_hash_bit
2694 #undef _
2695         else
2696         break;
2697     }
2698
2699   if (matched == 0)
2700     return clib_error_return (0, "unknown input `%U'",
2701                               format_unformat_error, input);
2702
2703   rv = ip_flow_hash_set (AF_IP4, table_id, flow_hash_config);
2704   switch (rv)
2705     {
2706     case 0:
2707       break;
2708
2709     case VNET_API_ERROR_NO_SUCH_FIB:
2710       return clib_error_return (0, "no such FIB table %d", table_id);
2711
2712     default:
2713       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2714       break;
2715     }
2716
2717   return 0;
2718 }
2719
2720 /*?
2721  * Configure the set of IPv4 fields used by the flow hash.
2722  *
2723  * @cliexpar
2724  * Example of how to set the flow hash on a given table:
2725  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2726  * Example of display the configured flow hash:
2727  * @cliexstart{show ip fib}
2728  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2729  * 0.0.0.0/0
2730  *   unicast-ip4-chain
2731  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2732  *     [0] [@0]: dpo-drop ip6
2733  * 0.0.0.0/32
2734  *   unicast-ip4-chain
2735  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2736  *     [0] [@0]: dpo-drop ip6
2737  * 224.0.0.0/8
2738  *   unicast-ip4-chain
2739  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2740  *     [0] [@0]: dpo-drop ip6
2741  * 6.0.1.2/32
2742  *   unicast-ip4-chain
2743  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2744  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2745  * 7.0.0.1/32
2746  *   unicast-ip4-chain
2747  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
2748  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2749  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2750  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2751  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2752  * 240.0.0.0/8
2753  *   unicast-ip4-chain
2754  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
2755  *     [0] [@0]: dpo-drop ip6
2756  * 255.255.255.255/32
2757  *   unicast-ip4-chain
2758  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
2759  *     [0] [@0]: dpo-drop ip6
2760  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
2761  * 0.0.0.0/0
2762  *   unicast-ip4-chain
2763  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
2764  *     [0] [@0]: dpo-drop ip6
2765  * 0.0.0.0/32
2766  *   unicast-ip4-chain
2767  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
2768  *     [0] [@0]: dpo-drop ip6
2769  * 172.16.1.0/24
2770  *   unicast-ip4-chain
2771  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
2772  *     [0] [@4]: ipv4-glean: af_packet0
2773  * 172.16.1.1/32
2774  *   unicast-ip4-chain
2775  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
2776  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
2777  * 172.16.1.2/32
2778  *   unicast-ip4-chain
2779  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2780  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
2781  * 172.16.2.0/24
2782  *   unicast-ip4-chain
2783  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
2784  *     [0] [@4]: ipv4-glean: af_packet1
2785  * 172.16.2.1/32
2786  *   unicast-ip4-chain
2787  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
2788  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
2789  * 224.0.0.0/8
2790  *   unicast-ip4-chain
2791  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
2792  *     [0] [@0]: dpo-drop ip6
2793  * 240.0.0.0/8
2794  *   unicast-ip4-chain
2795  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
2796  *     [0] [@0]: dpo-drop ip6
2797  * 255.255.255.255/32
2798  *   unicast-ip4-chain
2799  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
2800  *     [0] [@0]: dpo-drop ip6
2801  * @cliexend
2802 ?*/
2803 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
2804   .path = "set ip flow-hash",
2805   .short_help = "set ip flow-hash table <table-id> [src] [dst] [sport] "
2806                 "[dport] [proto] [reverse] [gtpv1teid]",
2807   .function = set_ip_flow_hash_command_fn,
2808 };
2809
2810 #ifndef CLIB_MARCH_VARIANT
2811 int
2812 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2813                              u32 table_index)
2814 {
2815   vnet_main_t *vnm = vnet_get_main ();
2816   vnet_interface_main_t *im = &vnm->interface_main;
2817   ip4_main_t *ipm = &ip4_main;
2818   ip_lookup_main_t *lm = &ipm->lookup_main;
2819   vnet_classify_main_t *cm = &vnet_classify_main;
2820   ip4_address_t *if_addr;
2821
2822   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2823     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2824
2825   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2826     return VNET_API_ERROR_NO_SUCH_ENTRY;
2827
2828   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2829   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
2830
2831   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
2832
2833   if (NULL != if_addr)
2834     {
2835       fib_prefix_t pfx = {
2836         .fp_len = 32,
2837         .fp_proto = FIB_PROTOCOL_IP4,
2838         .fp_addr.ip4 = *if_addr,
2839       };
2840       u32 fib_index;
2841
2842       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2843                                                        sw_if_index);
2844
2845
2846       if (table_index != (u32) ~ 0)
2847         {
2848           dpo_id_t dpo = DPO_INVALID;
2849
2850           dpo_set (&dpo,
2851                    DPO_CLASSIFY,
2852                    DPO_PROTO_IP4,
2853                    classify_dpo_create (DPO_PROTO_IP4, table_index));
2854
2855           fib_table_entry_special_dpo_add (fib_index,
2856                                            &pfx,
2857                                            FIB_SOURCE_CLASSIFY,
2858                                            FIB_ENTRY_FLAG_NONE, &dpo);
2859           dpo_reset (&dpo);
2860         }
2861       else
2862         {
2863           fib_table_entry_special_remove (fib_index,
2864                                           &pfx, FIB_SOURCE_CLASSIFY);
2865         }
2866     }
2867
2868   return 0;
2869 }
2870 #endif
2871
2872 static clib_error_t *
2873 set_ip_classify_command_fn (vlib_main_t * vm,
2874                             unformat_input_t * input,
2875                             vlib_cli_command_t * cmd)
2876 {
2877   u32 table_index = ~0;
2878   int table_index_set = 0;
2879   u32 sw_if_index = ~0;
2880   int rv;
2881
2882   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2883     {
2884       if (unformat (input, "table-index %d", &table_index))
2885         table_index_set = 1;
2886       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
2887                          vnet_get_main (), &sw_if_index))
2888         ;
2889       else
2890         break;
2891     }
2892
2893   if (table_index_set == 0)
2894     return clib_error_return (0, "classify table-index must be specified");
2895
2896   if (sw_if_index == ~0)
2897     return clib_error_return (0, "interface / subif must be specified");
2898
2899   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
2900
2901   switch (rv)
2902     {
2903     case 0:
2904       break;
2905
2906     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
2907       return clib_error_return (0, "No such interface");
2908
2909     case VNET_API_ERROR_NO_SUCH_ENTRY:
2910       return clib_error_return (0, "No such classifier table");
2911     }
2912   return 0;
2913 }
2914
2915 /*?
2916  * Assign a classification table to an interface. The classification
2917  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
2918  * commands. Once the table is create, use this command to filter packets
2919  * on an interface.
2920  *
2921  * @cliexpar
2922  * Example of how to assign a classification table to an interface:
2923  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
2924 ?*/
2925 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
2926 {
2927     .path = "set ip classify",
2928     .short_help =
2929     "set ip classify intfc <interface> table-index <classify-idx>",
2930     .function = set_ip_classify_command_fn,
2931 };
2932
2933 /*
2934  * fd.io coding-style-patch-verification: ON
2935  *
2936  * Local Variables:
2937  * eval: (c-set-style "gnu")
2938  * End:
2939  */