e85c888f6695a0b4c58c6d9f8a66728332f3bdc2
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h>     /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/mfib/ip4_mfib.h>
53 #include <vnet/dpo/load_balance.h>
54 #include <vnet/dpo/load_balance_map.h>
55 #include <vnet/dpo/receive_dpo.h>
56 #include <vnet/dpo/classify_dpo.h>
57 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
58 #include <vnet/adj/adj_dp.h>
59 #include <vnet/pg/pg.h>
60
61 #include <vnet/ip/ip4_forward.h>
62 #include <vnet/interface_output.h>
63 #include <vnet/classify/vnet_classify.h>
64 #include <vnet/ip/reass/ip4_full_reass.h>
65
66 /** @brief IPv4 lookup node.
67     @node ip4-lookup
68
69     This is the main IPv4 lookup dispatch node.
70
71     @param vm vlib_main_t corresponding to the current thread
72     @param node vlib_node_runtime_t
73     @param frame vlib_frame_t whose contents should be dispatched
74
75     @par Graph mechanics: buffer metadata, next index usage
76
77     @em Uses:
78     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
79         - Indicates the @c sw_if_index value of the interface that the
80           packet was received on.
81     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
82         - When the value is @c ~0 then the node performs a longest prefix
83           match (LPM) for the packet destination address in the FIB attached
84           to the receive interface.
85         - Otherwise perform LPM for the packet destination address in the
86           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
87           value (0, 1, ...) and not a VRF id.
88
89     @em Sets:
90     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
91         - The lookup result adjacency index.
92
93     <em>Next Index:</em>
94     - Dispatches the packet to the node index found in
95       ip_adjacency_t @c adj->lookup_next_index
96       (where @c adj is the lookup result adjacency).
97 */
98 VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
99                                 vlib_frame_t * frame)
100 {
101   return ip4_lookup_inline (vm, node, frame);
102 }
103
104 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
105
106 VLIB_REGISTER_NODE (ip4_lookup_node) =
107 {
108   .name = "ip4-lookup",
109   .vector_size = sizeof (u32),
110   .format_trace = format_ip4_lookup_trace,
111   .n_next_nodes = IP_LOOKUP_N_NEXT,
112   .next_nodes = IP4_LOOKUP_NEXT_NODES,
113 };
114
115 VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
116                                       vlib_node_runtime_t * node,
117                                       vlib_frame_t * frame)
118 {
119   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
120   u32 n_left, *from;
121   u32 thread_index = vm->thread_index;
122   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
123   u16 nexts[VLIB_FRAME_SIZE], *next;
124
125   from = vlib_frame_vector_args (frame);
126   n_left = frame->n_vectors;
127   next = nexts;
128
129   vlib_get_buffers (vm, from, bufs, n_left);
130
131   while (n_left >= 4)
132     {
133       const load_balance_t *lb0, *lb1;
134       const ip4_header_t *ip0, *ip1;
135       u32 lbi0, hc0, lbi1, hc1;
136       const dpo_id_t *dpo0, *dpo1;
137
138       /* Prefetch next iteration. */
139       {
140         vlib_prefetch_buffer_header (b[2], LOAD);
141         vlib_prefetch_buffer_header (b[3], LOAD);
142
143         CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), LOAD);
144         CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), LOAD);
145       }
146
147       ip0 = vlib_buffer_get_current (b[0]);
148       ip1 = vlib_buffer_get_current (b[1]);
149       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
150       lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
151
152       lb0 = load_balance_get (lbi0);
153       lb1 = load_balance_get (lbi1);
154
155       /*
156        * this node is for via FIBs we can re-use the hash value from the
157        * to node if present.
158        * We don't want to use the same hash value at each level in the recursion
159        * graph as that would lead to polarisation
160        */
161       hc0 = hc1 = 0;
162
163       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
164         {
165           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
166             {
167               hc0 = vnet_buffer (b[0])->ip.flow_hash =
168                 vnet_buffer (b[0])->ip.flow_hash >> 1;
169             }
170           else
171             {
172               hc0 = vnet_buffer (b[0])->ip.flow_hash =
173                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
174             }
175           dpo0 = load_balance_get_fwd_bucket
176             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
177         }
178       else
179         {
180           dpo0 = load_balance_get_bucket_i (lb0, 0);
181         }
182       if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
183         {
184           if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
185             {
186               hc1 = vnet_buffer (b[1])->ip.flow_hash =
187                 vnet_buffer (b[1])->ip.flow_hash >> 1;
188             }
189           else
190             {
191               hc1 = vnet_buffer (b[1])->ip.flow_hash =
192                 ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
193             }
194           dpo1 = load_balance_get_fwd_bucket
195             (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
196         }
197       else
198         {
199           dpo1 = load_balance_get_bucket_i (lb1, 0);
200         }
201
202       next[0] = dpo0->dpoi_next_node;
203       next[1] = dpo1->dpoi_next_node;
204
205       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
206       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
207
208       vlib_increment_combined_counter
209         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
210       vlib_increment_combined_counter
211         (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
212
213       b += 2;
214       next += 2;
215       n_left -= 2;
216     }
217
218   while (n_left > 0)
219     {
220       const load_balance_t *lb0;
221       const ip4_header_t *ip0;
222       const dpo_id_t *dpo0;
223       u32 lbi0, hc0;
224
225       ip0 = vlib_buffer_get_current (b[0]);
226       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
227
228       lb0 = load_balance_get (lbi0);
229
230       hc0 = 0;
231       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
232         {
233           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
234             {
235               hc0 = vnet_buffer (b[0])->ip.flow_hash =
236                 vnet_buffer (b[0])->ip.flow_hash >> 1;
237             }
238           else
239             {
240               hc0 = vnet_buffer (b[0])->ip.flow_hash =
241                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
242             }
243           dpo0 = load_balance_get_fwd_bucket
244             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
245         }
246       else
247         {
248           dpo0 = load_balance_get_bucket_i (lb0, 0);
249         }
250
251       next[0] = dpo0->dpoi_next_node;
252       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
253
254       vlib_increment_combined_counter
255         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
256
257       b += 1;
258       next += 1;
259       n_left -= 1;
260     }
261
262   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
263   if (node->flags & VLIB_NODE_FLAG_TRACE)
264     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
265
266   return frame->n_vectors;
267 }
268
269 VLIB_REGISTER_NODE (ip4_load_balance_node) =
270 {
271   .name = "ip4-load-balance",
272   .vector_size = sizeof (u32),
273   .sibling_of = "ip4-lookup",
274   .format_trace = format_ip4_lookup_trace,
275 };
276
277 #ifndef CLIB_MARCH_VARIANT
278 /* get first interface address */
279 ip4_address_t *
280 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
281                              ip_interface_address_t ** result_ia)
282 {
283   ip_lookup_main_t *lm = &im->lookup_main;
284   ip_interface_address_t *ia = 0;
285   ip4_address_t *result = 0;
286
287   foreach_ip_interface_address
288     (lm, ia, sw_if_index,
289      1 /* honor unnumbered */ ,
290      ({
291        ip4_address_t * a =
292          ip_interface_address_get_address (lm, ia);
293        result = a;
294        break;
295      }));
296   if (result_ia)
297     *result_ia = result ? ia : 0;
298   return result;
299 }
300 #endif
301
302 static void
303 ip4_add_subnet_bcast_route (u32 fib_index,
304                             fib_prefix_t *pfx,
305                             u32 sw_if_index)
306 {
307   vnet_sw_interface_flags_t iflags;
308
309   iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
310
311   fib_table_entry_special_remove(fib_index,
312                                  pfx,
313                                  FIB_SOURCE_INTERFACE);
314
315   if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
316     {
317       fib_table_entry_update_one_path (fib_index, pfx,
318                                        FIB_SOURCE_INTERFACE,
319                                        FIB_ENTRY_FLAG_NONE,
320                                        DPO_PROTO_IP4,
321                                        /* No next-hop address */
322                                        &ADJ_BCAST_ADDR,
323                                        sw_if_index,
324                                        // invalid FIB index
325                                        ~0,
326                                        1,
327                                        // no out-label stack
328                                        NULL,
329                                        FIB_ROUTE_PATH_FLAG_NONE);
330     }
331   else
332     {
333         fib_table_entry_special_add(fib_index,
334                                     pfx,
335                                     FIB_SOURCE_INTERFACE,
336                                     (FIB_ENTRY_FLAG_DROP |
337                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
338     }
339 }
340
341 static void
342 ip4_add_interface_prefix_routes (ip4_main_t *im,
343                                  u32 sw_if_index,
344                                  u32 fib_index,
345                                  ip_interface_address_t * a)
346 {
347   ip_lookup_main_t *lm = &im->lookup_main;
348   ip_interface_prefix_t *if_prefix;
349   ip4_address_t *address = ip_interface_address_get_address (lm, a);
350
351   ip_interface_prefix_key_t key = {
352     .prefix = {
353       .fp_len = a->address_length,
354       .fp_proto = FIB_PROTOCOL_IP4,
355       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[a->address_length],
356     },
357     .sw_if_index = sw_if_index,
358   };
359
360   fib_prefix_t pfx_special = {
361     .fp_proto = FIB_PROTOCOL_IP4,
362   };
363
364   /* If prefix already set on interface, just increment ref count & return */
365   if_prefix = ip_get_interface_prefix (lm, &key);
366   if (if_prefix)
367     {
368       if_prefix->ref_count += 1;
369       return;
370     }
371
372   /* New prefix - allocate a pool entry, initialize it, add to the hash */
373   pool_get (lm->if_prefix_pool, if_prefix);
374   if_prefix->ref_count = 1;
375   if_prefix->src_ia_index = a - lm->if_address_pool;
376   clib_memcpy (&if_prefix->key, &key, sizeof (key));
377   mhash_set (&lm->prefix_to_if_prefix_index, &key,
378              if_prefix - lm->if_prefix_pool, 0 /* old value */);
379
380   pfx_special.fp_len = a->address_length;
381   pfx_special.fp_addr.ip4.as_u32 = address->as_u32;
382
383   /* set the glean route for the prefix */
384   fib_table_entry_update_one_path (fib_index, &pfx_special,
385                                    FIB_SOURCE_INTERFACE,
386                                    (FIB_ENTRY_FLAG_CONNECTED |
387                                     FIB_ENTRY_FLAG_ATTACHED),
388                                    DPO_PROTO_IP4,
389                                    /* No next-hop address */
390                                    NULL,
391                                    sw_if_index,
392                                    /* invalid FIB index */
393                                    ~0,
394                                    1,
395                                    /* no out-label stack */
396                                    NULL,
397                                    FIB_ROUTE_PATH_FLAG_NONE);
398
399   /* length <= 30 - add glean, drop first address, maybe drop bcast address */
400   if (a->address_length <= 30)
401     {
402       /* set a drop route for the base address of the prefix */
403       pfx_special.fp_len = 32;
404       pfx_special.fp_addr.ip4.as_u32 =
405         address->as_u32 & im->fib_masks[a->address_length];
406
407       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
408         fib_table_entry_special_add (fib_index, &pfx_special,
409                                      FIB_SOURCE_INTERFACE,
410                                      (FIB_ENTRY_FLAG_DROP |
411                                       FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
412
413       /* set a route for the broadcast address of the prefix */
414       pfx_special.fp_len = 32;
415       pfx_special.fp_addr.ip4.as_u32 =
416         address->as_u32 | ~im->fib_masks[a->address_length];
417       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
418         ip4_add_subnet_bcast_route (fib_index, &pfx_special, sw_if_index);
419
420
421     }
422   /* length == 31 - add an attached route for the other address */
423   else if (a->address_length == 31)
424     {
425       pfx_special.fp_len = 32;
426       pfx_special.fp_addr.ip4.as_u32 =
427         address->as_u32 ^ clib_host_to_net_u32(1);
428
429       fib_table_entry_update_one_path (fib_index, &pfx_special,
430                                        FIB_SOURCE_INTERFACE,
431                                        (FIB_ENTRY_FLAG_ATTACHED),
432                                        DPO_PROTO_IP4,
433                                        &pfx_special.fp_addr,
434                                        sw_if_index,
435                                        /* invalid FIB index */
436                                        ~0,
437                                        1,
438                                        NULL,
439                                        FIB_ROUTE_PATH_FLAG_NONE);
440     }
441 }
442
443 static void
444 ip4_add_interface_routes (u32 sw_if_index,
445                           ip4_main_t * im, u32 fib_index,
446                           ip_interface_address_t * a)
447 {
448   ip_lookup_main_t *lm = &im->lookup_main;
449   ip4_address_t *address = ip_interface_address_get_address (lm, a);
450   fib_prefix_t pfx = {
451     .fp_len = 32,
452     .fp_proto = FIB_PROTOCOL_IP4,
453     .fp_addr.ip4 = *address,
454   };
455
456   /* set special routes for the prefix if needed */
457   ip4_add_interface_prefix_routes (im, sw_if_index, fib_index, a);
458
459   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
460     {
461       u32 classify_table_index =
462         lm->classify_table_index_by_sw_if_index[sw_if_index];
463       if (classify_table_index != (u32) ~ 0)
464         {
465           dpo_id_t dpo = DPO_INVALID;
466
467           dpo_set (&dpo,
468                    DPO_CLASSIFY,
469                    DPO_PROTO_IP4,
470                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
471
472           fib_table_entry_special_dpo_add (fib_index,
473                                            &pfx,
474                                            FIB_SOURCE_CLASSIFY,
475                                            FIB_ENTRY_FLAG_NONE, &dpo);
476           dpo_reset (&dpo);
477         }
478     }
479
480   fib_table_entry_update_one_path (fib_index, &pfx,
481                                    FIB_SOURCE_INTERFACE,
482                                    (FIB_ENTRY_FLAG_CONNECTED |
483                                     FIB_ENTRY_FLAG_LOCAL),
484                                    DPO_PROTO_IP4,
485                                    &pfx.fp_addr,
486                                    sw_if_index,
487                                    // invalid FIB index
488                                    ~0,
489                                    1, NULL,
490                                    FIB_ROUTE_PATH_FLAG_NONE);
491 }
492
493 static void
494 ip4_del_interface_prefix_routes (ip4_main_t * im,
495                                  u32 sw_if_index,
496                                  u32 fib_index,
497                                  ip4_address_t * address,
498                                  u32 address_length)
499 {
500   ip_lookup_main_t *lm = &im->lookup_main;
501   ip_interface_prefix_t *if_prefix;
502
503   ip_interface_prefix_key_t key = {
504     .prefix = {
505       .fp_len = address_length,
506       .fp_proto = FIB_PROTOCOL_IP4,
507       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[address_length],
508     },
509     .sw_if_index = sw_if_index,
510   };
511
512   fib_prefix_t pfx_special = {
513     .fp_len = 32,
514     .fp_proto = FIB_PROTOCOL_IP4,
515   };
516
517   if_prefix = ip_get_interface_prefix (lm, &key);
518   if (!if_prefix)
519     {
520       clib_warning ("Prefix not found while deleting %U",
521                     format_ip4_address_and_length, address, address_length);
522       return;
523     }
524
525   if_prefix->ref_count -= 1;
526
527   /*
528    * Routes need to be adjusted if deleting last intf addr in prefix
529    *
530    * We're done now otherwise
531    */
532   if (if_prefix->ref_count > 0)
533     return;
534
535   /* length <= 30, delete glean route, first address, last address */
536   if (address_length <= 30)
537     {
538       /* Less work to do in FIB if we remove the covered /32s first */
539
540       /* first address in prefix */
541       pfx_special.fp_addr.ip4.as_u32 =
542         address->as_u32 & im->fib_masks[address_length];
543       pfx_special.fp_len = 32;
544
545       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
546         fib_table_entry_special_remove (fib_index,
547                                         &pfx_special,
548                                         FIB_SOURCE_INTERFACE);
549
550       /* prefix broadcast address */
551       pfx_special.fp_addr.ip4.as_u32 =
552         address->as_u32 | ~im->fib_masks[address_length];
553       pfx_special.fp_len = 32;
554
555       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
556         fib_table_entry_special_remove (fib_index,
557                                         &pfx_special,
558                                         FIB_SOURCE_INTERFACE);
559     }
560   else if (address_length == 31)
561     {
562       /* length == 31, delete attached route for the other address */
563       pfx_special.fp_addr.ip4.as_u32 =
564         address->as_u32 ^ clib_host_to_net_u32(1);
565
566       fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
567     }
568
569   /* remove glean route for prefix */
570   pfx_special.fp_addr.ip4 = *address;
571   pfx_special.fp_len = address_length;
572   fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
573
574   mhash_unset (&lm->prefix_to_if_prefix_index, &key, 0 /* old_value */);
575   pool_put (lm->if_prefix_pool, if_prefix);
576 }
577
578 static void
579 ip4_del_interface_routes (u32 sw_if_index,
580                           ip4_main_t * im,
581                           u32 fib_index,
582                           ip4_address_t * address, u32 address_length)
583 {
584   fib_prefix_t pfx = {
585     .fp_len = 32,
586     .fp_proto = FIB_PROTOCOL_IP4,
587     .fp_addr.ip4 = *address,
588   };
589
590   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
591
592   ip4_del_interface_prefix_routes (im, sw_if_index, fib_index,
593                                    address, address_length);
594 }
595
596 #ifndef CLIB_MARCH_VARIANT
597 void
598 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
599 {
600   ip4_main_t *im = &ip4_main;
601   vnet_main_t *vnm = vnet_get_main ();
602   vnet_hw_interface_t *hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
603
604   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
605
606   /*
607    * enable/disable only on the 1<->0 transition
608    */
609   if (is_enable)
610     {
611       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
612         return;
613     }
614   else
615     {
616       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
617       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
618         return;
619     }
620   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
621                                !is_enable, 0, 0);
622
623
624   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
625                                sw_if_index, !is_enable, 0, 0);
626
627   if (is_enable)
628     hi->l3_if_count++;
629   else if (hi->l3_if_count)
630     hi->l3_if_count--;
631
632   {
633     ip4_enable_disable_interface_callback_t *cb;
634     vec_foreach (cb, im->enable_disable_interface_callbacks)
635       cb->function (im, cb->function_opaque, sw_if_index, is_enable);
636   }
637 }
638
639 static clib_error_t *
640 ip4_add_del_interface_address_internal (vlib_main_t * vm,
641                                         u32 sw_if_index,
642                                         ip4_address_t * address,
643                                         u32 address_length, u32 is_del)
644 {
645   vnet_main_t *vnm = vnet_get_main ();
646   ip4_main_t *im = &ip4_main;
647   ip_lookup_main_t *lm = &im->lookup_main;
648   clib_error_t *error = 0;
649   u32 if_address_index;
650   ip4_address_fib_t ip4_af, *addr_fib = 0;
651
652   error = vnet_sw_interface_supports_addressing (vnm, sw_if_index);
653   if (error)
654     {
655       vnm->api_errno = VNET_API_ERROR_UNSUPPORTED;
656       return error;
657     }
658
659   ip4_addr_fib_init (&ip4_af, address,
660                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
661   vec_add1 (addr_fib, ip4_af);
662
663   /*
664    * there is no support for adj-fib handling in the presence of overlapping
665    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
666    * most routers do.
667    */
668   if (!is_del)
669     {
670       /* When adding an address check that it does not conflict
671          with an existing address on any interface in this table. */
672       ip_interface_address_t *ia;
673       vnet_sw_interface_t *sif;
674
675       pool_foreach (sif, vnm->interface_main.sw_interfaces)
676        {
677           if (im->fib_index_by_sw_if_index[sw_if_index] ==
678               im->fib_index_by_sw_if_index[sif->sw_if_index])
679             {
680               foreach_ip_interface_address
681                 (&im->lookup_main, ia, sif->sw_if_index,
682                  0 /* honor unnumbered */ ,
683                  ({
684                    ip4_address_t * x =
685                      ip_interface_address_get_address
686                      (&im->lookup_main, ia);
687
688                    if (ip4_destination_matches_route
689                        (im, address, x, ia->address_length) ||
690                        ip4_destination_matches_route (im,
691                                                       x,
692                                                       address,
693                                                       address_length))
694                      {
695                        /* an intf may have >1 addr from the same prefix */
696                        if ((sw_if_index == sif->sw_if_index) &&
697                            (ia->address_length == address_length) &&
698                            (x->as_u32 != address->as_u32))
699                          continue;
700
701                        if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE)
702                          /* if the address we're comparing against is stale
703                           * then the CP has not added this one back yet, maybe
704                           * it never will, so we have to assume it won't and
705                           * ignore it. if it does add it back, then it will fail
706                           * because this one is now present */
707                          continue;
708
709                        /* error if the length or intf was different */
710                        vnm->api_errno = VNET_API_ERROR_ADDRESS_IN_USE;
711
712                        error = clib_error_create
713                          ("failed to add %U on %U which conflicts with %U for interface %U",
714                           format_ip4_address_and_length, address,
715                           address_length,
716                           format_vnet_sw_if_index_name, vnm,
717                           sw_if_index,
718                           format_ip4_address_and_length, x,
719                           ia->address_length,
720                           format_vnet_sw_if_index_name, vnm,
721                           sif->sw_if_index);
722                        goto done;
723                      }
724                  }));
725             }
726       }
727     }
728
729   if_address_index = ip_interface_address_find (lm, addr_fib, address_length);
730
731   if (is_del)
732     {
733       if (~0 == if_address_index)
734         {
735           vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_FOUND_FOR_INTERFACE;
736           error = clib_error_create ("%U not found for interface %U",
737                                      lm->format_address_and_length,
738                                      addr_fib, address_length,
739                                      format_vnet_sw_if_index_name, vnm,
740                                      sw_if_index);
741           goto done;
742         }
743
744       error = ip_interface_address_del (lm, vnm, if_address_index, addr_fib,
745                                         address_length, sw_if_index);
746       if (error)
747         goto done;
748     }
749   else
750     {
751       if (~0 != if_address_index)
752         {
753           ip_interface_address_t *ia;
754
755           ia = pool_elt_at_index (lm->if_address_pool, if_address_index);
756
757           if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE)
758             {
759               if (ia->sw_if_index == sw_if_index)
760                 {
761                   /* re-adding an address during the replace action.
762                    * consdier this the update. clear the flag and
763                    * we're done */
764                   ia->flags &= ~IP_INTERFACE_ADDRESS_FLAG_STALE;
765                   goto done;
766                 }
767               else
768                 {
769                   /* The prefix is moving from one interface to another.
770                    * delete the stale and add the new */
771                   ip4_add_del_interface_address_internal (vm,
772                                                           ia->sw_if_index,
773                                                           address,
774                                                           address_length, 1);
775                   ia = NULL;
776                   error = ip_interface_address_add (lm, sw_if_index,
777                                                     addr_fib, address_length,
778                                                     &if_address_index);
779                 }
780             }
781           else
782             {
783               vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
784               error = clib_error_create
785                 ("Prefix %U already found on interface %U",
786                  lm->format_address_and_length, addr_fib, address_length,
787                  format_vnet_sw_if_index_name, vnm, ia->sw_if_index);
788             }
789         }
790       else
791         error = ip_interface_address_add (lm, sw_if_index,
792                                           addr_fib, address_length,
793                                           &if_address_index);
794     }
795
796   if (error)
797     goto done;
798
799   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
800   ip4_mfib_interface_enable_disable (sw_if_index, !is_del);
801
802   /* intf addr routes are added/deleted on admin up/down */
803   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index))
804     {
805       if (is_del)
806         ip4_del_interface_routes (sw_if_index,
807                                   im, ip4_af.fib_index, address,
808                                   address_length);
809       else
810         ip4_add_interface_routes (sw_if_index,
811                                   im, ip4_af.fib_index,
812                                   pool_elt_at_index
813                                   (lm->if_address_pool, if_address_index));
814     }
815
816   ip4_add_del_interface_address_callback_t *cb;
817   vec_foreach (cb, im->add_del_interface_address_callbacks)
818     cb->function (im, cb->function_opaque, sw_if_index,
819                   address, address_length, if_address_index, is_del);
820
821 done:
822   vec_free (addr_fib);
823   return error;
824 }
825
826 clib_error_t *
827 ip4_add_del_interface_address (vlib_main_t * vm,
828                                u32 sw_if_index,
829                                ip4_address_t * address,
830                                u32 address_length, u32 is_del)
831 {
832   return ip4_add_del_interface_address_internal
833     (vm, sw_if_index, address, address_length, is_del);
834 }
835
836 void
837 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
838 {
839   ip_interface_address_t *ia;
840   ip4_main_t *im;
841
842   im = &ip4_main;
843
844   /*
845    * when directed broadcast is enabled, the subnet braodcast route will forward
846    * packets using an adjacency with a broadcast MAC. otherwise it drops
847    */
848   foreach_ip_interface_address(&im->lookup_main, ia,
849                                sw_if_index, 0,
850      ({
851        if (ia->address_length <= 30)
852          {
853            ip4_address_t *ipa;
854
855            ipa = ip_interface_address_get_address (&im->lookup_main, ia);
856
857            fib_prefix_t pfx = {
858              .fp_len = 32,
859              .fp_proto = FIB_PROTOCOL_IP4,
860              .fp_addr = {
861                .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
862              },
863            };
864
865            ip4_add_subnet_bcast_route
866              (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
867                                                   sw_if_index),
868               &pfx, sw_if_index);
869          }
870      }));
871 }
872 #endif
873
874 static clib_error_t *
875 ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
876 {
877   ip4_main_t *im = &ip4_main;
878   ip_interface_address_t *ia;
879   ip4_address_t *a;
880   u32 is_admin_up, fib_index;
881
882   vec_validate_init_empty (im->
883                            lookup_main.if_address_pool_index_by_sw_if_index,
884                            sw_if_index, ~0);
885
886   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
887
888   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
889
890   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
891                                 0 /* honor unnumbered */,
892   ({
893     a = ip_interface_address_get_address (&im->lookup_main, ia);
894     if (is_admin_up)
895       ip4_add_interface_routes (sw_if_index,
896                                 im, fib_index,
897                                 ia);
898     else
899       ip4_del_interface_routes (sw_if_index,
900                                 im, fib_index,
901                                 a, ia->address_length);
902   }));
903
904   return 0;
905 }
906
907 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
908
909 /* Built-in ip4 unicast rx feature path definition */
910 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
911 {
912   .arc_name = "ip4-unicast",
913   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
914   .last_in_arc = "ip4-lookup",
915   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
916 };
917
918 VNET_FEATURE_INIT (ip4_flow_classify, static) =
919 {
920   .arc_name = "ip4-unicast",
921   .node_name = "ip4-flow-classify",
922   .runs_before = VNET_FEATURES ("ip4-inacl"),
923 };
924
925 VNET_FEATURE_INIT (ip4_inacl, static) =
926 {
927   .arc_name = "ip4-unicast",
928   .node_name = "ip4-inacl",
929   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
930 };
931
932 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
933 {
934   .arc_name = "ip4-unicast",
935   .node_name = "ip4-source-and-port-range-check-rx",
936   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
937 };
938
939 VNET_FEATURE_INIT (ip4_policer_classify, static) =
940 {
941   .arc_name = "ip4-unicast",
942   .node_name = "ip4-policer-classify",
943   .runs_before = VNET_FEATURES ("ipsec4-input-feature"),
944 };
945
946 VNET_FEATURE_INIT (ip4_ipsec, static) =
947 {
948   .arc_name = "ip4-unicast",
949   .node_name = "ipsec4-input-feature",
950   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
951 };
952
953 VNET_FEATURE_INIT (ip4_vpath, static) =
954 {
955   .arc_name = "ip4-unicast",
956   .node_name = "vpath-input-ip4",
957   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
958 };
959
960 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
961 {
962   .arc_name = "ip4-unicast",
963   .node_name = "ip4-vxlan-bypass",
964   .runs_before = VNET_FEATURES ("ip4-lookup"),
965 };
966
967 VNET_FEATURE_INIT (ip4_not_enabled, static) =
968 {
969   .arc_name = "ip4-unicast",
970   .node_name = "ip4-not-enabled",
971   .runs_before = VNET_FEATURES ("ip4-lookup"),
972 };
973
974 VNET_FEATURE_INIT (ip4_lookup, static) =
975 {
976   .arc_name = "ip4-unicast",
977   .node_name = "ip4-lookup",
978   .runs_before = 0,     /* not before any other features */
979 };
980
981 /* Built-in ip4 multicast rx feature path definition */
982 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
983 {
984   .arc_name = "ip4-multicast",
985   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
986   .last_in_arc = "ip4-mfib-forward-lookup",
987   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
988 };
989
990 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
991 {
992   .arc_name = "ip4-multicast",
993   .node_name = "vpath-input-ip4",
994   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
995 };
996
997 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
998 {
999   .arc_name = "ip4-multicast",
1000   .node_name = "ip4-not-enabled",
1001   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1002 };
1003
1004 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
1005 {
1006   .arc_name = "ip4-multicast",
1007   .node_name = "ip4-mfib-forward-lookup",
1008   .runs_before = 0,     /* last feature */
1009 };
1010
1011 /* Source and port-range check ip4 tx feature path definition */
1012 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1013 {
1014   .arc_name = "ip4-output",
1015   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
1016   .last_in_arc = "interface-output",
1017   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1018 };
1019
1020 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1021 {
1022   .arc_name = "ip4-output",
1023   .node_name = "ip4-source-and-port-range-check-tx",
1024   .runs_before = VNET_FEATURES ("ip4-outacl"),
1025 };
1026
1027 VNET_FEATURE_INIT (ip4_outacl, static) =
1028 {
1029   .arc_name = "ip4-output",
1030   .node_name = "ip4-outacl",
1031   .runs_before = VNET_FEATURES ("ipsec4-output-feature"),
1032 };
1033
1034 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1035 {
1036   .arc_name = "ip4-output",
1037   .node_name = "ipsec4-output-feature",
1038   .runs_before = VNET_FEATURES ("interface-output"),
1039 };
1040
1041 /* Built-in ip4 tx feature path definition */
1042 VNET_FEATURE_INIT (ip4_interface_output, static) =
1043 {
1044   .arc_name = "ip4-output",
1045   .node_name = "interface-output",
1046   .runs_before = 0,     /* not before any other features */
1047 };
1048
1049 static clib_error_t *
1050 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1051 {
1052   ip4_main_t *im = &ip4_main;
1053
1054   vec_validate_init_empty (im->fib_index_by_sw_if_index, sw_if_index, ~0);
1055   vec_validate_init_empty (im->mfib_index_by_sw_if_index, sw_if_index, ~0);
1056
1057   if (is_add)
1058     {
1059       /* Fill in lookup tables with default table (0). */
1060       im->fib_index_by_sw_if_index[sw_if_index] = 0;
1061       im->mfib_index_by_sw_if_index[sw_if_index] = 0;
1062     }
1063   else
1064     {
1065       ip4_main_t *im4 = &ip4_main;
1066       ip_lookup_main_t *lm4 = &im4->lookup_main;
1067       ip_interface_address_t *ia = 0;
1068       ip4_address_t *address;
1069       vlib_main_t *vm = vlib_get_main ();
1070
1071       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
1072       foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
1073       ({
1074         address = ip_interface_address_get_address (lm4, ia);
1075         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
1076       }));
1077       ip4_mfib_interface_enable_disable (sw_if_index, 0);
1078
1079       if (0 != im4->fib_index_by_sw_if_index[sw_if_index])
1080         fib_table_bind (FIB_PROTOCOL_IP4, sw_if_index, 0);
1081       if (0 != im4->mfib_index_by_sw_if_index[sw_if_index])
1082         mfib_table_bind (FIB_PROTOCOL_IP4, sw_if_index, 0);
1083
1084       /* Erase the lookup tables just in case */
1085       im4->fib_index_by_sw_if_index[sw_if_index] = ~0;
1086       im4->mfib_index_by_sw_if_index[sw_if_index] = ~0;
1087     }
1088
1089   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
1090                                is_add, 0, 0);
1091
1092   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
1093                                sw_if_index, is_add, 0, 0);
1094
1095   return /* no error */ 0;
1096 }
1097
1098 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1099
1100 /* Global IP4 main. */
1101 #ifndef CLIB_MARCH_VARIANT
1102 ip4_main_t ip4_main;
1103 #endif /* CLIB_MARCH_VARIANT */
1104
1105 static clib_error_t *
1106 ip4_lookup_init (vlib_main_t * vm)
1107 {
1108   ip4_main_t *im = &ip4_main;
1109   clib_error_t *error;
1110   uword i;
1111
1112   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1113     return error;
1114   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
1115     return (error);
1116   if ((error = vlib_call_init_function (vm, fib_module_init)))
1117     return error;
1118   if ((error = vlib_call_init_function (vm, mfib_module_init)))
1119     return error;
1120
1121   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1122     {
1123       u32 m;
1124
1125       if (i < 32)
1126         m = pow2_mask (i) << (32 - i);
1127       else
1128         m = ~0;
1129       im->fib_masks[i] = clib_host_to_net_u32 (m);
1130     }
1131
1132   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1133
1134   /* Create FIB with index 0 and table id of 0. */
1135   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1136                                      FIB_SOURCE_DEFAULT_ROUTE);
1137   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1138                                       MFIB_SOURCE_DEFAULT_ROUTE);
1139
1140   {
1141     pg_node_t *pn;
1142     pn = pg_get_node (ip4_lookup_node.index);
1143     pn->unformat_edit = unformat_pg_ip4_header;
1144   }
1145
1146   {
1147     ethernet_arp_header_t h;
1148
1149     clib_memset (&h, 0, sizeof (h));
1150
1151 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1152 #define _8(f,v) h.f = v;
1153     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1154     _16 (l3_type, ETHERNET_TYPE_IP4);
1155     _8 (n_l2_address_bytes, 6);
1156     _8 (n_l3_address_bytes, 4);
1157     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1158 #undef _16
1159 #undef _8
1160
1161     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1162                                /* data */ &h,
1163                                sizeof (h),
1164                                /* alloc chunk size */ 8,
1165                                "ip4 arp");
1166   }
1167
1168   return error;
1169 }
1170
1171 VLIB_INIT_FUNCTION (ip4_lookup_init);
1172
1173 typedef struct
1174 {
1175   /* Adjacency taken. */
1176   u32 dpo_index;
1177   u32 flow_hash;
1178   u32 fib_index;
1179
1180   /* Packet data, possibly *after* rewrite. */
1181   u8 packet_data[64 - 1 * sizeof (u32)];
1182 }
1183 ip4_forward_next_trace_t;
1184
1185 #ifndef CLIB_MARCH_VARIANT
1186 u8 *
1187 format_ip4_forward_next_trace (u8 * s, va_list * args)
1188 {
1189   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1190   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1191   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1192   u32 indent = format_get_indent (s);
1193   s = format (s, "%U%U",
1194               format_white_space, indent,
1195               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1196   return s;
1197 }
1198 #endif
1199
1200 static u8 *
1201 format_ip4_lookup_trace (u8 * s, va_list * args)
1202 {
1203   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1204   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1205   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1206   u32 indent = format_get_indent (s);
1207
1208   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1209               t->fib_index, t->dpo_index, t->flow_hash);
1210   s = format (s, "\n%U%U",
1211               format_white_space, indent,
1212               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1213   return s;
1214 }
1215
1216 static u8 *
1217 format_ip4_rewrite_trace (u8 * s, va_list * args)
1218 {
1219   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1220   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1221   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1222   u32 indent = format_get_indent (s);
1223
1224   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1225               t->fib_index, t->dpo_index, format_ip_adjacency,
1226               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1227   s = format (s, "\n%U%U",
1228               format_white_space, indent,
1229               format_ip_adjacency_packet_data,
1230               t->packet_data, sizeof (t->packet_data));
1231   return s;
1232 }
1233
1234 #ifndef CLIB_MARCH_VARIANT
1235 /* Common trace function for all ip4-forward next nodes. */
1236 void
1237 ip4_forward_next_trace (vlib_main_t * vm,
1238                         vlib_node_runtime_t * node,
1239                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1240 {
1241   u32 *from, n_left;
1242   ip4_main_t *im = &ip4_main;
1243
1244   n_left = frame->n_vectors;
1245   from = vlib_frame_vector_args (frame);
1246
1247   while (n_left >= 4)
1248     {
1249       u32 bi0, bi1;
1250       vlib_buffer_t *b0, *b1;
1251       ip4_forward_next_trace_t *t0, *t1;
1252
1253       /* Prefetch next iteration. */
1254       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1255       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1256
1257       bi0 = from[0];
1258       bi1 = from[1];
1259
1260       b0 = vlib_get_buffer (vm, bi0);
1261       b1 = vlib_get_buffer (vm, bi1);
1262
1263       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1264         {
1265           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1266           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1267           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1268           t0->fib_index =
1269             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1270              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1271             vec_elt (im->fib_index_by_sw_if_index,
1272                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1273
1274           clib_memcpy_fast (t0->packet_data,
1275                             vlib_buffer_get_current (b0),
1276                             sizeof (t0->packet_data));
1277         }
1278       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1279         {
1280           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1281           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1282           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1283           t1->fib_index =
1284             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1285              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1286             vec_elt (im->fib_index_by_sw_if_index,
1287                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1288           clib_memcpy_fast (t1->packet_data, vlib_buffer_get_current (b1),
1289                             sizeof (t1->packet_data));
1290         }
1291       from += 2;
1292       n_left -= 2;
1293     }
1294
1295   while (n_left >= 1)
1296     {
1297       u32 bi0;
1298       vlib_buffer_t *b0;
1299       ip4_forward_next_trace_t *t0;
1300
1301       bi0 = from[0];
1302
1303       b0 = vlib_get_buffer (vm, bi0);
1304
1305       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1306         {
1307           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1308           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1309           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1310           t0->fib_index =
1311             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1312              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1313             vec_elt (im->fib_index_by_sw_if_index,
1314                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1315           clib_memcpy_fast (t0->packet_data, vlib_buffer_get_current (b0),
1316                             sizeof (t0->packet_data));
1317         }
1318       from += 1;
1319       n_left -= 1;
1320     }
1321 }
1322
1323 /* Compute TCP/UDP/ICMP4 checksum in software. */
1324 u16
1325 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1326                               ip4_header_t * ip0)
1327 {
1328   ip_csum_t sum0;
1329   u32 ip_header_length, payload_length_host_byte_order;
1330
1331   /* Initialize checksum with ip header. */
1332   ip_header_length = ip4_header_bytes (ip0);
1333   payload_length_host_byte_order =
1334     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1335   sum0 =
1336     clib_host_to_net_u32 (payload_length_host_byte_order +
1337                           (ip0->protocol << 16));
1338
1339   if (BITS (uword) == 32)
1340     {
1341       sum0 =
1342         ip_csum_with_carry (sum0,
1343                             clib_mem_unaligned (&ip0->src_address, u32));
1344       sum0 =
1345         ip_csum_with_carry (sum0,
1346                             clib_mem_unaligned (&ip0->dst_address, u32));
1347     }
1348   else
1349     sum0 =
1350       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1351
1352   return ip_calculate_l4_checksum (vm, p0, sum0,
1353                                    payload_length_host_byte_order, (u8 *) ip0,
1354                                    ip_header_length, NULL);
1355 }
1356
1357 u32
1358 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1359 {
1360   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1361   udp_header_t *udp0;
1362   u16 sum16;
1363
1364   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1365           || ip0->protocol == IP_PROTOCOL_UDP);
1366
1367   udp0 = (void *) (ip0 + 1);
1368   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1369     {
1370       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1371                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1372       return p0->flags;
1373     }
1374
1375   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1376
1377   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1378                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1379
1380   return p0->flags;
1381 }
1382 #endif
1383
1384 VNET_FEATURE_ARC_INIT (ip4_local) = {
1385   .arc_name = "ip4-local",
1386   .start_nodes = VNET_FEATURES ("ip4-local", "ip4-receive"),
1387   .last_in_arc = "ip4-local-end-of-arc",
1388 };
1389
1390 static inline void
1391 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1392                             ip4_header_t * ip, u8 is_udp, u8 * error,
1393                             u8 * good_tcp_udp)
1394 {
1395   u32 flags0;
1396   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1397   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1398   if (is_udp)
1399     {
1400       udp_header_t *udp;
1401       u32 ip_len, udp_len;
1402       i32 len_diff;
1403       udp = ip4_next_header (ip);
1404       /* Verify UDP length. */
1405       ip_len = clib_net_to_host_u16 (ip->length);
1406       udp_len = clib_net_to_host_u16 (udp->length);
1407
1408       len_diff = ip_len - udp_len;
1409       *good_tcp_udp &= len_diff >= 0;
1410       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1411     }
1412 }
1413
1414 #define ip4_local_csum_is_offloaded(_b)                                       \
1415   ((_b->flags & VNET_BUFFER_F_OFFLOAD) &&                                     \
1416    (vnet_buffer (_b)->oflags &                                                \
1417     (VNET_BUFFER_OFFLOAD_F_TCP_CKSUM | VNET_BUFFER_OFFLOAD_F_UDP_CKSUM)))
1418
1419 #define ip4_local_need_csum_check(is_tcp_udp, _b)                       \
1420     (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED     \
1421         || ip4_local_csum_is_offloaded (_b)))
1422
1423 #define ip4_local_csum_is_valid(_b)                                     \
1424     (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT                      \
1425         || (ip4_local_csum_is_offloaded (_b))) != 0
1426
1427 static inline void
1428 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1429                          ip4_header_t * ih, u8 * error)
1430 {
1431   u8 is_udp, is_tcp_udp, good_tcp_udp;
1432
1433   is_udp = ih->protocol == IP_PROTOCOL_UDP;
1434   is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1435
1436   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1437     ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1438   else
1439     good_tcp_udp = ip4_local_csum_is_valid (b);
1440
1441   ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1442   *error = (is_tcp_udp && !good_tcp_udp
1443             ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1444 }
1445
1446 static inline void
1447 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1448                             ip4_header_t ** ih, u8 * error)
1449 {
1450   u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1451
1452   is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1453   is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1454
1455   is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1456   is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1457
1458   good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1459   good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1460
1461   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1462                      || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1463     {
1464       if (is_tcp_udp[0] && !ip4_local_csum_is_offloaded (b[0]))
1465         ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1466                                     &good_tcp_udp[0]);
1467       if (is_tcp_udp[1] && !ip4_local_csum_is_offloaded (b[1]))
1468         ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1469                                     &good_tcp_udp[1]);
1470     }
1471
1472   error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1473               IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1474   error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1475               IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1476 }
1477
1478 static inline void
1479 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1480                               vlib_buffer_t * b, u16 * next, u8 error,
1481                               u8 head_of_feature_arc)
1482 {
1483   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1484   u32 next_index;
1485
1486   *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1487   b->error = error ? error_node->errors[error] : 0;
1488   if (head_of_feature_arc)
1489     {
1490       next_index = *next;
1491       if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1492         {
1493           vnet_feature_arc_start (
1494             arc_index, vnet_buffer (b)->ip.rx_sw_if_index, &next_index, b);
1495           *next = next_index;
1496         }
1497     }
1498 }
1499
1500 typedef struct
1501 {
1502   /* The src and fib-index together determine if packet n is the same as n-1 */
1503   ip4_address_t src;
1504   u32 fib_index;
1505   u32 lbi;
1506   u8 error;
1507   u8 first;
1508 } ip4_local_last_check_t;
1509
1510 static inline void
1511 ip4_local_check_src (vlib_buffer_t *b, ip4_header_t *ip0,
1512                      ip4_local_last_check_t *last_check, u8 *error0,
1513                      int is_receive_dpo)
1514 {
1515   const dpo_id_t *dpo0;
1516   load_balance_t *lb0;
1517   u32 lbi0;
1518
1519   vnet_buffer (b)->ip.fib_index =
1520     vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1521     vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1522
1523   vnet_buffer (b)->ip.rx_sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
1524   if (is_receive_dpo)
1525     {
1526       receive_dpo_t *rd;
1527       rd = receive_dpo_get (vnet_buffer (b)->ip.adj_index[VLIB_TX]);
1528       if (rd->rd_sw_if_index != ~0)
1529         vnet_buffer (b)->ip.rx_sw_if_index = rd->rd_sw_if_index;
1530     }
1531
1532   /*
1533    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1534    *  adjacency for the destination address (the local interface address).
1535    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1536    *  adjacency for the source address (the remote sender's address)
1537    */
1538   if (PREDICT_TRUE ((last_check->src.as_u32 != ip0->src_address.as_u32)) ||
1539       (last_check->fib_index != vnet_buffer (b)->ip.fib_index) ||
1540       last_check->first)
1541     {
1542       lbi0 = ip4_fib_forwarding_lookup (vnet_buffer (b)->ip.fib_index,
1543                                         &ip0->src_address);
1544
1545       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1546         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1547       vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1548
1549       lb0 = load_balance_get (lbi0);
1550       dpo0 = load_balance_get_bucket_i (lb0, 0);
1551
1552       /*
1553        * Must have a route to source otherwise we drop the packet.
1554        * ip4 broadcasts are accepted, e.g. to make dhcp client work
1555        *
1556        * The checks are:
1557        *  - the source is a recieve => it's from us => bogus, do this
1558        *    first since it sets a different error code.
1559        *  - uRPF check for any route to source - accept if passes.
1560        *  - allow packets destined to the broadcast address from unknown sources
1561        */
1562
1563       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1564                   && dpo0->dpoi_type == DPO_RECEIVE) ?
1565                  IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1566       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1567                   && !fib_urpf_check_size (lb0->lb_urpf)
1568                   && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1569                  IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1570
1571       last_check->src.as_u32 = ip0->src_address.as_u32;
1572       last_check->lbi = lbi0;
1573       last_check->error = *error0;
1574       last_check->first = 0;
1575       last_check->fib_index = vnet_buffer (b)->ip.fib_index;
1576     }
1577   else
1578     {
1579       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1580         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1581       vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1582       *error0 = last_check->error;
1583     }
1584 }
1585
1586 static inline void
1587 ip4_local_check_src_x2 (vlib_buffer_t **b, ip4_header_t **ip,
1588                         ip4_local_last_check_t *last_check, u8 *error,
1589                         int is_receive_dpo)
1590 {
1591   const dpo_id_t *dpo[2];
1592   load_balance_t *lb[2];
1593   u32 not_last_hit;
1594   u32 lbi[2];
1595
1596   not_last_hit = last_check->first;
1597   not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1598   not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1599
1600   vnet_buffer (b[0])->ip.fib_index =
1601     vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1602     vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1603     vnet_buffer (b[0])->ip.fib_index;
1604
1605   vnet_buffer (b[1])->ip.fib_index =
1606     vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1607     vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1608     vnet_buffer (b[1])->ip.fib_index;
1609
1610   not_last_hit |= vnet_buffer (b[0])->ip.fib_index ^ last_check->fib_index;
1611   not_last_hit |= vnet_buffer (b[1])->ip.fib_index ^ last_check->fib_index;
1612
1613   vnet_buffer (b[0])->ip.rx_sw_if_index =
1614     vnet_buffer (b[0])->sw_if_index[VLIB_RX];
1615   vnet_buffer (b[1])->ip.rx_sw_if_index =
1616     vnet_buffer (b[1])->sw_if_index[VLIB_RX];
1617   if (is_receive_dpo)
1618     {
1619       const receive_dpo_t *rd0, *rd1;
1620       rd0 = receive_dpo_get (vnet_buffer (b[0])->ip.adj_index[VLIB_TX]);
1621       rd1 = receive_dpo_get (vnet_buffer (b[1])->ip.adj_index[VLIB_TX]);
1622       if (rd0->rd_sw_if_index != ~0)
1623         vnet_buffer (b[0])->ip.rx_sw_if_index = rd0->rd_sw_if_index;
1624       if (rd1->rd_sw_if_index != ~0)
1625         vnet_buffer (b[1])->ip.rx_sw_if_index = rd1->rd_sw_if_index;
1626     }
1627
1628   /*
1629    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1630    *  adjacency for the destination address (the local interface address).
1631    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1632    *  adjacency for the source address (the remote sender's address)
1633    */
1634   if (PREDICT_TRUE (not_last_hit))
1635     {
1636       ip4_fib_forwarding_lookup_x2 (
1637         vnet_buffer (b[0])->ip.fib_index, vnet_buffer (b[1])->ip.fib_index,
1638         &ip[0]->src_address, &ip[1]->src_address, &lbi[0], &lbi[1]);
1639
1640       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1641         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1642       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1643
1644       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1645         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1646       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1647
1648       lb[0] = load_balance_get (lbi[0]);
1649       lb[1] = load_balance_get (lbi[1]);
1650
1651       dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1652       dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1653
1654       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1655                    dpo[0]->dpoi_type == DPO_RECEIVE) ?
1656                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1657       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1658                    !fib_urpf_check_size (lb[0]->lb_urpf) &&
1659                    ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1660                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1661
1662       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1663                    dpo[1]->dpoi_type == DPO_RECEIVE) ?
1664                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1665       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1666                    !fib_urpf_check_size (lb[1]->lb_urpf) &&
1667                    ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1668                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1669
1670       last_check->src.as_u32 = ip[1]->src_address.as_u32;
1671       last_check->lbi = lbi[1];
1672       last_check->error = error[1];
1673       last_check->first = 0;
1674       last_check->fib_index = vnet_buffer (b[1])->ip.fib_index;
1675     }
1676   else
1677     {
1678       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1679         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1680       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1681
1682       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1683         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1684       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1685
1686       error[0] = last_check->error;
1687       error[1] = last_check->error;
1688     }
1689 }
1690
1691 enum ip_local_packet_type_e
1692 {
1693   IP_LOCAL_PACKET_TYPE_L4,
1694   IP_LOCAL_PACKET_TYPE_NAT,
1695   IP_LOCAL_PACKET_TYPE_FRAG,
1696 };
1697
1698 /**
1699  * Determine packet type and next node.
1700  *
1701  * The expectation is that all packets that are not L4 will skip
1702  * checksums and source checks.
1703  */
1704 always_inline u8
1705 ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
1706 {
1707   ip_lookup_main_t *lm = &ip4_main.lookup_main;
1708
1709   if (PREDICT_FALSE (ip4_is_fragment (ip)))
1710     {
1711       *next = IP_LOCAL_NEXT_REASSEMBLY;
1712       return IP_LOCAL_PACKET_TYPE_FRAG;
1713     }
1714   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_IS_NATED))
1715     {
1716       *next = lm->local_next_by_ip_protocol[ip->protocol];
1717       return IP_LOCAL_PACKET_TYPE_NAT;
1718     }
1719
1720   *next = lm->local_next_by_ip_protocol[ip->protocol];
1721   return IP_LOCAL_PACKET_TYPE_L4;
1722 }
1723
1724 static inline uword
1725 ip4_local_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
1726                   vlib_frame_t *frame, int head_of_feature_arc,
1727                   int is_receive_dpo)
1728 {
1729   u32 *from, n_left_from;
1730   vlib_node_runtime_t *error_node =
1731     vlib_node_get_runtime (vm, ip4_local_node.index);
1732   u16 nexts[VLIB_FRAME_SIZE], *next;
1733   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1734   ip4_header_t *ip[2];
1735   u8 error[2], pt[2];
1736
1737   ip4_local_last_check_t last_check = {
1738     /*
1739      * 0.0.0.0 can appear as the source address of an IP packet,
1740      * as can any other address, hence the need to use the 'first'
1741      * member to make sure the .lbi is initialised for the first
1742      * packet.
1743      */
1744     .src = { .as_u32 = 0 },
1745     .lbi = ~0,
1746     .error = IP4_ERROR_UNKNOWN_PROTOCOL,
1747     .first = 1,
1748     .fib_index = 0,
1749   };
1750
1751   from = vlib_frame_vector_args (frame);
1752   n_left_from = frame->n_vectors;
1753
1754   if (node->flags & VLIB_NODE_FLAG_TRACE)
1755     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1756
1757   vlib_get_buffers (vm, from, bufs, n_left_from);
1758   b = bufs;
1759   next = nexts;
1760
1761   while (n_left_from >= 6)
1762     {
1763       u8 not_batch = 0;
1764
1765       /* Prefetch next iteration. */
1766       {
1767         vlib_prefetch_buffer_header (b[4], LOAD);
1768         vlib_prefetch_buffer_header (b[5], LOAD);
1769
1770         clib_prefetch_load (b[4]->data);
1771         clib_prefetch_load (b[5]->data);
1772       }
1773
1774       error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1775
1776       ip[0] = vlib_buffer_get_current (b[0]);
1777       ip[1] = vlib_buffer_get_current (b[1]);
1778
1779       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1780       vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1781
1782       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1783       pt[1] = ip4_local_classify (b[1], ip[1], &next[1]);
1784
1785       not_batch = pt[0] ^ pt[1];
1786
1787       if (head_of_feature_arc == 0 || (pt[0] && not_batch == 0))
1788         goto skip_checks;
1789
1790       if (PREDICT_TRUE (not_batch == 0))
1791         {
1792           ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1793           ip4_local_check_src_x2 (b, ip, &last_check, error, is_receive_dpo);
1794         }
1795       else
1796         {
1797           if (!pt[0])
1798             {
1799               ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1800               ip4_local_check_src (b[0], ip[0], &last_check, &error[0],
1801                                    is_receive_dpo);
1802             }
1803           if (!pt[1])
1804             {
1805               ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1806               ip4_local_check_src (b[1], ip[1], &last_check, &error[1],
1807                                    is_receive_dpo);
1808             }
1809         }
1810
1811     skip_checks:
1812
1813       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1814                                     head_of_feature_arc);
1815       ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1816                                     head_of_feature_arc);
1817
1818       b += 2;
1819       next += 2;
1820       n_left_from -= 2;
1821     }
1822
1823   while (n_left_from > 0)
1824     {
1825       error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1826
1827       ip[0] = vlib_buffer_get_current (b[0]);
1828       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1829       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1830
1831       if (head_of_feature_arc == 0 || pt[0])
1832         goto skip_check;
1833
1834       ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1835       ip4_local_check_src (b[0], ip[0], &last_check, &error[0],
1836                            is_receive_dpo);
1837
1838     skip_check:
1839
1840       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1841                                     head_of_feature_arc);
1842
1843       b += 1;
1844       next += 1;
1845       n_left_from -= 1;
1846     }
1847
1848   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1849   return frame->n_vectors;
1850 }
1851
1852 VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1853                                vlib_frame_t * frame)
1854 {
1855   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */,
1856                            0 /* is_receive_dpo */);
1857 }
1858
1859 VLIB_REGISTER_NODE (ip4_local_node) =
1860 {
1861   .name = "ip4-local",
1862   .vector_size = sizeof (u32),
1863   .format_trace = format_ip4_forward_next_trace,
1864   .n_errors = IP4_N_ERROR,
1865   .error_counters = ip4_error_counters,
1866   .n_next_nodes = IP_LOCAL_N_NEXT,
1867   .next_nodes =
1868   {
1869     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1870     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1871     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1872     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1873     [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-local-full-reassembly",
1874   },
1875 };
1876
1877 VLIB_NODE_FN (ip4_receive_local_node)
1878 (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
1879 {
1880   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */,
1881                            1 /* is_receive_dpo */);
1882 }
1883
1884 VLIB_REGISTER_NODE (ip4_receive_local_node) = {
1885   .name = "ip4-receive",
1886   .vector_size = sizeof (u32),
1887   .format_trace = format_ip4_forward_next_trace,
1888   .sibling_of = "ip4-local"
1889 };
1890
1891 VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
1892                                           vlib_node_runtime_t * node,
1893                                           vlib_frame_t * frame)
1894 {
1895   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */,
1896                            0 /* is_receive_dpo */);
1897 }
1898
1899 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
1900   .name = "ip4-local-end-of-arc",
1901   .vector_size = sizeof (u32),
1902
1903   .format_trace = format_ip4_forward_next_trace,
1904   .sibling_of = "ip4-local",
1905 };
1906
1907 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1908   .arc_name = "ip4-local",
1909   .node_name = "ip4-local-end-of-arc",
1910   .runs_before = 0, /* not before any other features */
1911 };
1912
1913 #ifndef CLIB_MARCH_VARIANT
1914 void
1915 ip4_register_protocol (u32 protocol, u32 node_index)
1916 {
1917   vlib_main_t *vm = vlib_get_main ();
1918   ip4_main_t *im = &ip4_main;
1919   ip_lookup_main_t *lm = &im->lookup_main;
1920
1921   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1922   lm->local_next_by_ip_protocol[protocol] =
1923     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1924 }
1925
1926 void
1927 ip4_unregister_protocol (u32 protocol)
1928 {
1929   ip4_main_t *im = &ip4_main;
1930   ip_lookup_main_t *lm = &im->lookup_main;
1931
1932   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1933   lm->local_next_by_ip_protocol[protocol] = IP_LOCAL_NEXT_PUNT;
1934 }
1935 #endif
1936
1937 static clib_error_t *
1938 show_ip_local_command_fn (vlib_main_t * vm,
1939                           unformat_input_t * input, vlib_cli_command_t * cmd)
1940 {
1941   ip4_main_t *im = &ip4_main;
1942   ip_lookup_main_t *lm = &im->lookup_main;
1943   int i;
1944
1945   vlib_cli_output (vm, "Protocols handled by ip4_local");
1946   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1947     {
1948       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1949         {
1950           u32 node_index = vlib_get_node (vm,
1951                                           ip4_local_node.index)->
1952             next_nodes[lm->local_next_by_ip_protocol[i]];
1953           vlib_cli_output (vm, "%U: %U", format_ip_protocol, i,
1954                            format_vlib_node_name, vm, node_index);
1955         }
1956     }
1957   return 0;
1958 }
1959
1960
1961
1962 /*?
1963  * Display the set of protocols handled by the local IPv4 stack.
1964  *
1965  * @cliexpar
1966  * Example of how to display local protocol table:
1967  * @cliexstart{show ip local}
1968  * Protocols handled by ip4_local
1969  * 1
1970  * 17
1971  * 47
1972  * @cliexend
1973 ?*/
1974 VLIB_CLI_COMMAND (show_ip_local, static) =
1975 {
1976   .path = "show ip local",
1977   .function = show_ip_local_command_fn,
1978   .short_help = "show ip local",
1979 };
1980
1981 typedef enum
1982 {
1983   IP4_REWRITE_NEXT_DROP,
1984   IP4_REWRITE_NEXT_ICMP_ERROR,
1985   IP4_REWRITE_NEXT_FRAGMENT,
1986   IP4_REWRITE_N_NEXT            /* Last */
1987 } ip4_rewrite_next_t;
1988
1989 /**
1990  * This bits of an IPv4 address to mask to construct a multicast
1991  * MAC address
1992  */
1993 #if CLIB_ARCH_IS_BIG_ENDIAN
1994 #define IP4_MCAST_ADDR_MASK 0x007fffff
1995 #else
1996 #define IP4_MCAST_ADDR_MASK 0xffff7f00
1997 #endif
1998
1999 always_inline void
2000 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
2001                u16 adj_packet_bytes, bool df, u16 * next,
2002                u8 is_midchain, u32 * error)
2003 {
2004   if (packet_len > adj_packet_bytes)
2005     {
2006       *error = IP4_ERROR_MTU_EXCEEDED;
2007       if (df)
2008         {
2009           icmp4_error_set_vnet_buffer
2010             (b, ICMP4_destination_unreachable,
2011              ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
2012              adj_packet_bytes);
2013           *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2014         }
2015       else
2016         {
2017           /* IP fragmentation */
2018           ip_frag_set_vnet_buffer (b, adj_packet_bytes,
2019                                    (is_midchain ?
2020                                     IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN :
2021                                     IP_FRAG_NEXT_IP_REWRITE), 0);
2022           *next = IP4_REWRITE_NEXT_FRAGMENT;
2023         }
2024     }
2025 }
2026
2027 /* increment TTL & update checksum.
2028    Works either endian, so no need for byte swap. */
2029 static_always_inline void
2030 ip4_ttl_inc (vlib_buffer_t * b, ip4_header_t * ip)
2031 {
2032   i32 ttl;
2033   u32 checksum;
2034   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2035     return;
2036
2037   ttl = ip->ttl;
2038
2039   checksum = ip->checksum - clib_host_to_net_u16 (0x0100);
2040   checksum += checksum >= 0xffff;
2041
2042   ip->checksum = checksum;
2043   ttl += 1;
2044   ip->ttl = ttl;
2045
2046   ASSERT (ip4_header_checksum_is_valid (ip) ||
2047           (vnet_buffer (b)->oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM) ||
2048           (vnet_buffer (b)->oflags & VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM));
2049 }
2050
2051 /* Decrement TTL & update checksum.
2052    Works either endian, so no need for byte swap. */
2053 static_always_inline void
2054 ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
2055                             u32 * error)
2056 {
2057   i32 ttl;
2058   u32 checksum;
2059   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2060     return;
2061
2062   ttl = ip->ttl;
2063
2064   /* Input node should have reject packets with ttl 0. */
2065   ASSERT (ip->ttl > 0);
2066
2067   checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
2068   checksum += checksum >= 0xffff;
2069
2070   ip->checksum = checksum;
2071   ttl -= 1;
2072   ip->ttl = ttl;
2073
2074   /*
2075    * If the ttl drops below 1 when forwarding, generate
2076    * an ICMP response.
2077    */
2078   if (PREDICT_FALSE (ttl <= 0))
2079     {
2080       *error = IP4_ERROR_TIME_EXPIRED;
2081       vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2082       icmp4_error_set_vnet_buffer (b, ICMP4_time_exceeded,
2083                                    ICMP4_time_exceeded_ttl_exceeded_in_transit,
2084                                    0);
2085       *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2086     }
2087
2088   /* Verify checksum. */
2089   ASSERT (ip4_header_checksum_is_valid (ip) ||
2090           (vnet_buffer (b)->oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM) ||
2091           (vnet_buffer (b)->oflags & VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM));
2092 }
2093
2094 always_inline uword
2095 ip4_rewrite_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
2096                     vlib_frame_t *frame, int do_counters, int is_midchain,
2097                     int is_mcast)
2098 {
2099   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2100   u32 *from = vlib_frame_vector_args (frame);
2101   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2102   u16 nexts[VLIB_FRAME_SIZE], *next;
2103   u32 n_left_from;
2104   vlib_node_runtime_t *error_node =
2105     vlib_node_get_runtime (vm, ip4_input_node.index);
2106
2107   n_left_from = frame->n_vectors;
2108   u32 thread_index = vm->thread_index;
2109
2110   vlib_get_buffers (vm, from, bufs, n_left_from);
2111   clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
2112
2113 #if (CLIB_N_PREFETCHES >= 8)
2114   if (n_left_from >= 6)
2115     {
2116       int i;
2117       for (i = 2; i < 6; i++)
2118         vlib_prefetch_buffer_header (bufs[i], LOAD);
2119     }
2120
2121   next = nexts;
2122   b = bufs;
2123   while (n_left_from >= 8)
2124     {
2125       const ip_adjacency_t *adj0, *adj1;
2126       ip4_header_t *ip0, *ip1;
2127       u32 rw_len0, error0, adj_index0;
2128       u32 rw_len1, error1, adj_index1;
2129       u32 tx_sw_if_index0, tx_sw_if_index1;
2130       u8 *p;
2131
2132       if (is_midchain)
2133         {
2134           vlib_prefetch_buffer_header (b[6], LOAD);
2135           vlib_prefetch_buffer_header (b[7], LOAD);
2136         }
2137
2138       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2139       adj_index1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
2140
2141       /*
2142        * pre-fetch the per-adjacency counters
2143        */
2144       if (do_counters)
2145         {
2146           vlib_prefetch_combined_counter (&adjacency_counters,
2147                                           thread_index, adj_index0);
2148           vlib_prefetch_combined_counter (&adjacency_counters,
2149                                           thread_index, adj_index1);
2150         }
2151
2152       ip0 = vlib_buffer_get_current (b[0]);
2153       ip1 = vlib_buffer_get_current (b[1]);
2154
2155       error0 = error1 = IP4_ERROR_NONE;
2156
2157       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2158       ip4_ttl_and_checksum_check (b[1], ip1, next + 1, &error1);
2159
2160       /* Rewrite packet header and updates lengths. */
2161       adj0 = adj_get (adj_index0);
2162       adj1 = adj_get (adj_index1);
2163
2164       /* Worth pipelining. No guarantee that adj0,1 are hot... */
2165       rw_len0 = adj0[0].rewrite_header.data_bytes;
2166       rw_len1 = adj1[0].rewrite_header.data_bytes;
2167       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2168       vnet_buffer (b[1])->ip.save_rewrite_length = rw_len1;
2169
2170       p = vlib_buffer_get_current (b[2]);
2171       clib_prefetch_store (p - CLIB_CACHE_LINE_BYTES);
2172       clib_prefetch_load (p);
2173
2174       p = vlib_buffer_get_current (b[3]);
2175       clib_prefetch_store (p - CLIB_CACHE_LINE_BYTES);
2176       clib_prefetch_load (p);
2177
2178       /* Check MTU of outgoing interface. */
2179       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2180       u16 ip1_len = clib_net_to_host_u16 (ip1->length);
2181
2182       if (b[0]->flags & VNET_BUFFER_F_GSO)
2183         ip0_len = gso_mtu_sz (b[0]);
2184       if (b[1]->flags & VNET_BUFFER_F_GSO)
2185         ip1_len = gso_mtu_sz (b[1]);
2186
2187       ip4_mtu_check (b[0], ip0_len,
2188                      adj0[0].rewrite_header.max_l3_packet_bytes,
2189                      ip0->flags_and_fragment_offset &
2190                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2191                      next + 0, is_midchain, &error0);
2192       ip4_mtu_check (b[1], ip1_len,
2193                      adj1[0].rewrite_header.max_l3_packet_bytes,
2194                      ip1->flags_and_fragment_offset &
2195                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2196                      next + 1, is_midchain, &error1);
2197
2198       if (is_mcast)
2199         {
2200           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2201                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2202                     IP4_ERROR_SAME_INTERFACE : error0);
2203           error1 = ((adj1[0].rewrite_header.sw_if_index ==
2204                      vnet_buffer (b[1])->sw_if_index[VLIB_RX]) ?
2205                     IP4_ERROR_SAME_INTERFACE : error1);
2206         }
2207
2208       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2209        * to see the IP header */
2210       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2211         {
2212           u32 next_index = adj0[0].rewrite_header.next_index;
2213           vlib_buffer_advance (b[0], -(word) rw_len0);
2214
2215           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2216           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2217
2218           if (PREDICT_FALSE
2219               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2220             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2221                                                 tx_sw_if_index0,
2222                                                 &next_index, b[0],
2223                                                 adj0->ia_cfg_index);
2224
2225           next[0] = next_index;
2226         }
2227       else
2228         {
2229           b[0]->error = error_node->errors[error0];
2230           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2231             ip4_ttl_inc (b[0], ip0);
2232         }
2233       if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2234         {
2235           u32 next_index = adj1[0].rewrite_header.next_index;
2236           vlib_buffer_advance (b[1], -(word) rw_len1);
2237
2238           tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2239           vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2240
2241           if (PREDICT_FALSE
2242               (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2243             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2244                                                 tx_sw_if_index1,
2245                                                 &next_index, b[1],
2246                                                 adj1->ia_cfg_index);
2247           next[1] = next_index;
2248         }
2249       else
2250         {
2251           b[1]->error = error_node->errors[error1];
2252           if (error1 == IP4_ERROR_MTU_EXCEEDED)
2253             ip4_ttl_inc (b[1], ip1);
2254         }
2255
2256       if (is_midchain)
2257         /* Guess we are only writing on ipv4 header. */
2258         vnet_rewrite_two_headers (adj0[0], adj1[0],
2259                                   ip0, ip1, sizeof (ip4_header_t));
2260       else
2261         /* Guess we are only writing on simple Ethernet header. */
2262         vnet_rewrite_two_headers (adj0[0], adj1[0],
2263                                   ip0, ip1, sizeof (ethernet_header_t));
2264
2265       if (do_counters)
2266         {
2267           if (error0 == IP4_ERROR_NONE)
2268             vlib_increment_combined_counter
2269               (&adjacency_counters,
2270                thread_index,
2271                adj_index0, 1,
2272                vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2273
2274           if (error1 == IP4_ERROR_NONE)
2275             vlib_increment_combined_counter
2276               (&adjacency_counters,
2277                thread_index,
2278                adj_index1, 1,
2279                vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
2280         }
2281
2282       if (is_midchain)
2283         {
2284           if (error0 == IP4_ERROR_NONE)
2285             adj_midchain_fixup (vm, adj0, b[0], VNET_LINK_IP4);
2286           if (error1 == IP4_ERROR_NONE)
2287             adj_midchain_fixup (vm, adj1, b[1], VNET_LINK_IP4);
2288         }
2289
2290       if (is_mcast)
2291         {
2292           /* copy bytes from the IP address into the MAC rewrite */
2293           if (error0 == IP4_ERROR_NONE)
2294             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2295                                         adj0->rewrite_header.dst_mcast_offset,
2296                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2297           if (error1 == IP4_ERROR_NONE)
2298             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2299                                         adj1->rewrite_header.dst_mcast_offset,
2300                                         &ip1->dst_address.as_u32, (u8 *) ip1);
2301         }
2302
2303       next += 2;
2304       b += 2;
2305       n_left_from -= 2;
2306     }
2307 #elif (CLIB_N_PREFETCHES >= 4)
2308   next = nexts;
2309   b = bufs;
2310   while (n_left_from >= 1)
2311     {
2312       ip_adjacency_t *adj0;
2313       ip4_header_t *ip0;
2314       u32 rw_len0, error0, adj_index0;
2315       u32 tx_sw_if_index0;
2316       u8 *p;
2317
2318       /* Prefetch next iteration */
2319       if (PREDICT_TRUE (n_left_from >= 4))
2320         {
2321           ip_adjacency_t *adj2;
2322           u32 adj_index2;
2323
2324           vlib_prefetch_buffer_header (b[3], LOAD);
2325           vlib_prefetch_buffer_data (b[2], LOAD);
2326
2327           /* Prefetch adj->rewrite_header */
2328           adj_index2 = vnet_buffer (b[2])->ip.adj_index[VLIB_TX];
2329           adj2 = adj_get (adj_index2);
2330           p = (u8 *) adj2;
2331           CLIB_PREFETCH (p + CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES,
2332                          LOAD);
2333         }
2334
2335       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2336
2337       /*
2338        * Prefetch the per-adjacency counters
2339        */
2340       if (do_counters)
2341         {
2342           vlib_prefetch_combined_counter (&adjacency_counters,
2343                                           thread_index, adj_index0);
2344         }
2345
2346       ip0 = vlib_buffer_get_current (b[0]);
2347
2348       error0 = IP4_ERROR_NONE;
2349
2350       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2351
2352       /* Rewrite packet header and updates lengths. */
2353       adj0 = adj_get (adj_index0);
2354
2355       /* Rewrite header was prefetched. */
2356       rw_len0 = adj0[0].rewrite_header.data_bytes;
2357       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2358
2359       /* Check MTU of outgoing interface. */
2360       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2361
2362       if (b[0]->flags & VNET_BUFFER_F_GSO)
2363         ip0_len = gso_mtu_sz (b[0]);
2364
2365       ip4_mtu_check (b[0], ip0_len,
2366                      adj0[0].rewrite_header.max_l3_packet_bytes,
2367                      ip0->flags_and_fragment_offset &
2368                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2369                      next + 0, is_midchain, &error0);
2370
2371       if (is_mcast)
2372         {
2373           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2374                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2375                     IP4_ERROR_SAME_INTERFACE : error0);
2376         }
2377
2378       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2379        * to see the IP header */
2380       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2381         {
2382           u32 next_index = adj0[0].rewrite_header.next_index;
2383           vlib_buffer_advance (b[0], -(word) rw_len0);
2384           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2385           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2386
2387           if (PREDICT_FALSE
2388               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2389             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2390                                                 tx_sw_if_index0,
2391                                                 &next_index, b[0],
2392                                                 adj0->ia_cfg_index);
2393           next[0] = next_index;
2394
2395           if (is_midchain)
2396             {
2397               /* Guess we are only writing on ipv4 header. */
2398               vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t));
2399             }
2400           else
2401             /* Guess we are only writing on simple Ethernet header. */
2402             vnet_rewrite_one_header (adj0[0], ip0,
2403                                      sizeof (ethernet_header_t));
2404
2405           /*
2406            * Bump the per-adjacency counters
2407            */
2408           if (do_counters)
2409             vlib_increment_combined_counter
2410               (&adjacency_counters,
2411                thread_index,
2412                adj_index0, 1, vlib_buffer_length_in_chain (vm,
2413                                                            b[0]) + rw_len0);
2414
2415           if (is_midchain)
2416             adj_midchain_fixup (vm, adj0, b[0], VNET_LINK_IP4);
2417
2418           if (is_mcast)
2419             /* copy bytes from the IP address into the MAC rewrite */
2420             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2421                                         adj0->rewrite_header.dst_mcast_offset,
2422                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2423         }
2424       else
2425         {
2426           b[0]->error = error_node->errors[error0];
2427           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2428             ip4_ttl_inc (b[0], ip0);
2429         }
2430
2431       next += 1;
2432       b += 1;
2433       n_left_from -= 1;
2434     }
2435 #endif
2436
2437   while (n_left_from > 0)
2438     {
2439       ip_adjacency_t *adj0;
2440       ip4_header_t *ip0;
2441       u32 rw_len0, adj_index0, error0;
2442       u32 tx_sw_if_index0;
2443
2444       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2445
2446       adj0 = adj_get (adj_index0);
2447
2448       if (do_counters)
2449         vlib_prefetch_combined_counter (&adjacency_counters,
2450                                         thread_index, adj_index0);
2451
2452       ip0 = vlib_buffer_get_current (b[0]);
2453
2454       error0 = IP4_ERROR_NONE;
2455
2456       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2457
2458
2459       /* Update packet buffer attributes/set output interface. */
2460       rw_len0 = adj0[0].rewrite_header.data_bytes;
2461       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2462
2463       /* Check MTU of outgoing interface. */
2464       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2465       if (b[0]->flags & VNET_BUFFER_F_GSO)
2466         ip0_len = gso_mtu_sz (b[0]);
2467
2468       ip4_mtu_check (b[0], ip0_len,
2469                      adj0[0].rewrite_header.max_l3_packet_bytes,
2470                      ip0->flags_and_fragment_offset &
2471                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2472                      next + 0, is_midchain, &error0);
2473
2474       if (is_mcast)
2475         {
2476           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2477                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2478                     IP4_ERROR_SAME_INTERFACE : error0);
2479         }
2480
2481       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2482        * to see the IP header */
2483       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2484         {
2485           u32 next_index = adj0[0].rewrite_header.next_index;
2486           vlib_buffer_advance (b[0], -(word) rw_len0);
2487           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2488           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2489
2490           if (PREDICT_FALSE
2491               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2492             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2493                                                 tx_sw_if_index0,
2494                                                 &next_index, b[0],
2495                                                 adj0->ia_cfg_index);
2496           next[0] = next_index;
2497
2498           if (is_midchain)
2499             {
2500               /* Guess we are only writing on ipv4 header. */
2501               vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t));
2502             }
2503           else
2504             /* Guess we are only writing on simple Ethernet header. */
2505             vnet_rewrite_one_header (adj0[0], ip0,
2506                                      sizeof (ethernet_header_t));
2507
2508           if (do_counters)
2509             vlib_increment_combined_counter
2510               (&adjacency_counters,
2511                thread_index, adj_index0, 1,
2512                vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2513
2514           if (is_midchain)
2515             adj_midchain_fixup (vm, adj0, b[0], VNET_LINK_IP4);
2516
2517           if (is_mcast)
2518             /* copy bytes from the IP address into the MAC rewrite */
2519             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2520                                         adj0->rewrite_header.dst_mcast_offset,
2521                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2522         }
2523       else
2524         {
2525           b[0]->error = error_node->errors[error0];
2526           /* undo the TTL decrement - we'll be back to do it again */
2527           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2528             ip4_ttl_inc (b[0], ip0);
2529         }
2530
2531       next += 1;
2532       b += 1;
2533       n_left_from -= 1;
2534     }
2535
2536
2537   /* Need to do trace after rewrites to pick up new packet data. */
2538   if (node->flags & VLIB_NODE_FLAG_TRACE)
2539     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2540
2541   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2542   return frame->n_vectors;
2543 }
2544
2545 /** @brief IPv4 rewrite node.
2546     @node ip4-rewrite
2547
2548     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2549     header checksum, fetch the ip adjacency, check the outbound mtu,
2550     apply the adjacency rewrite, and send pkts to the adjacency
2551     rewrite header's rewrite_next_index.
2552
2553     @param vm vlib_main_t corresponding to the current thread
2554     @param node vlib_node_runtime_t
2555     @param frame vlib_frame_t whose contents should be dispatched
2556
2557     @par Graph mechanics: buffer metadata, next index usage
2558
2559     @em Uses:
2560     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2561         - the rewrite adjacency index
2562     - <code>adj->lookup_next_index</code>
2563         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2564           the packet will be dropped.
2565     - <code>adj->rewrite_header</code>
2566         - Rewrite string length, rewrite string, next_index
2567
2568     @em Sets:
2569     - <code>b->current_data, b->current_length</code>
2570         - Updated net of applying the rewrite string
2571
2572     <em>Next Indices:</em>
2573     - <code> adj->rewrite_header.next_index </code>
2574       or @c ip4-drop
2575 */
2576
2577 VLIB_NODE_FN (ip4_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2578                                  vlib_frame_t * frame)
2579 {
2580   if (adj_are_counters_enabled ())
2581     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2582   else
2583     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2584 }
2585
2586 VLIB_NODE_FN (ip4_rewrite_bcast_node) (vlib_main_t * vm,
2587                                        vlib_node_runtime_t * node,
2588                                        vlib_frame_t * frame)
2589 {
2590   if (adj_are_counters_enabled ())
2591     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2592   else
2593     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2594 }
2595
2596 VLIB_NODE_FN (ip4_midchain_node) (vlib_main_t * vm,
2597                                   vlib_node_runtime_t * node,
2598                                   vlib_frame_t * frame)
2599 {
2600   if (adj_are_counters_enabled ())
2601     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2602   else
2603     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2604 }
2605
2606 VLIB_NODE_FN (ip4_rewrite_mcast_node) (vlib_main_t * vm,
2607                                        vlib_node_runtime_t * node,
2608                                        vlib_frame_t * frame)
2609 {
2610   if (adj_are_counters_enabled ())
2611     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2612   else
2613     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2614 }
2615
2616 VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
2617                                         vlib_node_runtime_t * node,
2618                                         vlib_frame_t * frame)
2619 {
2620   if (adj_are_counters_enabled ())
2621     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2622   else
2623     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2624 }
2625
2626 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2627   .name = "ip4-rewrite",
2628   .vector_size = sizeof (u32),
2629
2630   .format_trace = format_ip4_rewrite_trace,
2631
2632   .n_next_nodes = IP4_REWRITE_N_NEXT,
2633   .next_nodes = {
2634     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2635     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2636     [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2637   },
2638 };
2639
2640 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2641   .name = "ip4-rewrite-bcast",
2642   .vector_size = sizeof (u32),
2643
2644   .format_trace = format_ip4_rewrite_trace,
2645   .sibling_of = "ip4-rewrite",
2646 };
2647
2648 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2649   .name = "ip4-rewrite-mcast",
2650   .vector_size = sizeof (u32),
2651
2652   .format_trace = format_ip4_rewrite_trace,
2653   .sibling_of = "ip4-rewrite",
2654 };
2655
2656 VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
2657   .name = "ip4-mcast-midchain",
2658   .vector_size = sizeof (u32),
2659
2660   .format_trace = format_ip4_rewrite_trace,
2661   .sibling_of = "ip4-rewrite",
2662 };
2663
2664 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2665   .name = "ip4-midchain",
2666   .vector_size = sizeof (u32),
2667   .format_trace = format_ip4_rewrite_trace,
2668   .sibling_of = "ip4-rewrite",
2669 };
2670
2671 static clib_error_t *
2672 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2673                              unformat_input_t * input,
2674                              vlib_cli_command_t * cmd)
2675 {
2676   int matched = 0;
2677   u32 table_id = 0;
2678   u32 flow_hash_config = 0;
2679   int rv;
2680
2681   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2682     {
2683       if (unformat (input, "table %d", &table_id))
2684         matched = 1;
2685 #define _(a, b, v)                                                            \
2686   else if (unformat (input, #a))                                              \
2687   {                                                                           \
2688     flow_hash_config |= v;                                                    \
2689     matched = 1;                                                              \
2690   }
2691       foreach_flow_hash_bit
2692 #undef _
2693         else
2694         break;
2695     }
2696
2697   if (matched == 0)
2698     return clib_error_return (0, "unknown input `%U'",
2699                               format_unformat_error, input);
2700
2701   rv = ip_flow_hash_set (AF_IP4, table_id, flow_hash_config);
2702   switch (rv)
2703     {
2704     case 0:
2705       break;
2706
2707     case VNET_API_ERROR_NO_SUCH_FIB:
2708       return clib_error_return (0, "no such FIB table %d", table_id);
2709
2710     default:
2711       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2712       break;
2713     }
2714
2715   return 0;
2716 }
2717
2718 /*?
2719  * Configure the set of IPv4 fields used by the flow hash.
2720  *
2721  * @cliexpar
2722  * Example of how to set the flow hash on a given table:
2723  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2724  * Example of display the configured flow hash:
2725  * @cliexstart{show ip fib}
2726  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2727  * 0.0.0.0/0
2728  *   unicast-ip4-chain
2729  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2730  *     [0] [@0]: dpo-drop ip6
2731  * 0.0.0.0/32
2732  *   unicast-ip4-chain
2733  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2734  *     [0] [@0]: dpo-drop ip6
2735  * 224.0.0.0/8
2736  *   unicast-ip4-chain
2737  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2738  *     [0] [@0]: dpo-drop ip6
2739  * 6.0.1.2/32
2740  *   unicast-ip4-chain
2741  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2742  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2743  * 7.0.0.1/32
2744  *   unicast-ip4-chain
2745  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
2746  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2747  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2748  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2749  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2750  * 240.0.0.0/8
2751  *   unicast-ip4-chain
2752  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
2753  *     [0] [@0]: dpo-drop ip6
2754  * 255.255.255.255/32
2755  *   unicast-ip4-chain
2756  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
2757  *     [0] [@0]: dpo-drop ip6
2758  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
2759  * 0.0.0.0/0
2760  *   unicast-ip4-chain
2761  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
2762  *     [0] [@0]: dpo-drop ip6
2763  * 0.0.0.0/32
2764  *   unicast-ip4-chain
2765  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
2766  *     [0] [@0]: dpo-drop ip6
2767  * 172.16.1.0/24
2768  *   unicast-ip4-chain
2769  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
2770  *     [0] [@4]: ipv4-glean: af_packet0
2771  * 172.16.1.1/32
2772  *   unicast-ip4-chain
2773  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
2774  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
2775  * 172.16.1.2/32
2776  *   unicast-ip4-chain
2777  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2778  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
2779  * 172.16.2.0/24
2780  *   unicast-ip4-chain
2781  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
2782  *     [0] [@4]: ipv4-glean: af_packet1
2783  * 172.16.2.1/32
2784  *   unicast-ip4-chain
2785  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
2786  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
2787  * 224.0.0.0/8
2788  *   unicast-ip4-chain
2789  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
2790  *     [0] [@0]: dpo-drop ip6
2791  * 240.0.0.0/8
2792  *   unicast-ip4-chain
2793  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
2794  *     [0] [@0]: dpo-drop ip6
2795  * 255.255.255.255/32
2796  *   unicast-ip4-chain
2797  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
2798  *     [0] [@0]: dpo-drop ip6
2799  * @cliexend
2800 ?*/
2801 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
2802   .path = "set ip flow-hash",
2803   .short_help = "set ip flow-hash table <table-id> [src] [dst] [sport] "
2804                 "[dport] [proto] [reverse] [gtpv1teid]",
2805   .function = set_ip_flow_hash_command_fn,
2806 };
2807
2808 #ifndef CLIB_MARCH_VARIANT
2809 int
2810 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2811                              u32 table_index)
2812 {
2813   vnet_main_t *vnm = vnet_get_main ();
2814   vnet_interface_main_t *im = &vnm->interface_main;
2815   ip4_main_t *ipm = &ip4_main;
2816   ip_lookup_main_t *lm = &ipm->lookup_main;
2817   vnet_classify_main_t *cm = &vnet_classify_main;
2818   ip4_address_t *if_addr;
2819
2820   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2821     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2822
2823   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2824     return VNET_API_ERROR_NO_SUCH_ENTRY;
2825
2826   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2827   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
2828
2829   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
2830
2831   if (NULL != if_addr)
2832     {
2833       fib_prefix_t pfx = {
2834         .fp_len = 32,
2835         .fp_proto = FIB_PROTOCOL_IP4,
2836         .fp_addr.ip4 = *if_addr,
2837       };
2838       u32 fib_index;
2839
2840       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2841                                                        sw_if_index);
2842
2843
2844       if (table_index != (u32) ~ 0)
2845         {
2846           dpo_id_t dpo = DPO_INVALID;
2847
2848           dpo_set (&dpo,
2849                    DPO_CLASSIFY,
2850                    DPO_PROTO_IP4,
2851                    classify_dpo_create (DPO_PROTO_IP4, table_index));
2852
2853           fib_table_entry_special_dpo_add (fib_index,
2854                                            &pfx,
2855                                            FIB_SOURCE_CLASSIFY,
2856                                            FIB_ENTRY_FLAG_NONE, &dpo);
2857           dpo_reset (&dpo);
2858         }
2859       else
2860         {
2861           fib_table_entry_special_remove (fib_index,
2862                                           &pfx, FIB_SOURCE_CLASSIFY);
2863         }
2864     }
2865
2866   return 0;
2867 }
2868 #endif
2869
2870 static clib_error_t *
2871 set_ip_classify_command_fn (vlib_main_t * vm,
2872                             unformat_input_t * input,
2873                             vlib_cli_command_t * cmd)
2874 {
2875   u32 table_index = ~0;
2876   int table_index_set = 0;
2877   u32 sw_if_index = ~0;
2878   int rv;
2879
2880   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2881     {
2882       if (unformat (input, "table-index %d", &table_index))
2883         table_index_set = 1;
2884       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
2885                          vnet_get_main (), &sw_if_index))
2886         ;
2887       else
2888         break;
2889     }
2890
2891   if (table_index_set == 0)
2892     return clib_error_return (0, "classify table-index must be specified");
2893
2894   if (sw_if_index == ~0)
2895     return clib_error_return (0, "interface / subif must be specified");
2896
2897   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
2898
2899   switch (rv)
2900     {
2901     case 0:
2902       break;
2903
2904     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
2905       return clib_error_return (0, "No such interface");
2906
2907     case VNET_API_ERROR_NO_SUCH_ENTRY:
2908       return clib_error_return (0, "No such classifier table");
2909     }
2910   return 0;
2911 }
2912
2913 /*?
2914  * Assign a classification table to an interface. The classification
2915  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
2916  * commands. Once the table is create, use this command to filter packets
2917  * on an interface.
2918  *
2919  * @cliexpar
2920  * Example of how to assign a classification table to an interface:
2921  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
2922 ?*/
2923 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
2924 {
2925     .path = "set ip classify",
2926     .short_help =
2927     "set ip classify intfc <interface> table-index <classify-idx>",
2928     .function = set_ip_classify_command_fn,
2929 };
2930
2931 /*
2932  * fd.io coding-style-patch-verification: ON
2933  *
2934  * Local Variables:
2935  * eval: (c-set-style "gnu")
2936  * End:
2937  */