ip: Use .api declared error counters
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h>     /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/mfib/ip4_mfib.h>
53 #include <vnet/dpo/load_balance.h>
54 #include <vnet/dpo/load_balance_map.h>
55 #include <vnet/dpo/receive_dpo.h>
56 #include <vnet/dpo/classify_dpo.h>
57 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
58 #include <vnet/adj/adj_dp.h>
59 #include <vnet/pg/pg.h>
60
61 #include <vnet/ip/ip4_forward.h>
62 #include <vnet/interface_output.h>
63 #include <vnet/classify/vnet_classify.h>
64 #include <vnet/ip/reass/ip4_full_reass.h>
65
66 /** @brief IPv4 lookup node.
67     @node ip4-lookup
68
69     This is the main IPv4 lookup dispatch node.
70
71     @param vm vlib_main_t corresponding to the current thread
72     @param node vlib_node_runtime_t
73     @param frame vlib_frame_t whose contents should be dispatched
74
75     @par Graph mechanics: buffer metadata, next index usage
76
77     @em Uses:
78     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
79         - Indicates the @c sw_if_index value of the interface that the
80           packet was received on.
81     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
82         - When the value is @c ~0 then the node performs a longest prefix
83           match (LPM) for the packet destination address in the FIB attached
84           to the receive interface.
85         - Otherwise perform LPM for the packet destination address in the
86           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
87           value (0, 1, ...) and not a VRF id.
88
89     @em Sets:
90     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
91         - The lookup result adjacency index.
92
93     <em>Next Index:</em>
94     - Dispatches the packet to the node index found in
95       ip_adjacency_t @c adj->lookup_next_index
96       (where @c adj is the lookup result adjacency).
97 */
98 VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
99                                 vlib_frame_t * frame)
100 {
101   return ip4_lookup_inline (vm, node, frame);
102 }
103
104 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
105
106 /* *INDENT-OFF* */
107 VLIB_REGISTER_NODE (ip4_lookup_node) =
108 {
109   .name = "ip4-lookup",
110   .vector_size = sizeof (u32),
111   .format_trace = format_ip4_lookup_trace,
112   .n_next_nodes = IP_LOOKUP_N_NEXT,
113   .next_nodes = IP4_LOOKUP_NEXT_NODES,
114 };
115 /* *INDENT-ON* */
116
117 VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
118                                       vlib_node_runtime_t * node,
119                                       vlib_frame_t * frame)
120 {
121   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
122   u32 n_left, *from;
123   u32 thread_index = vm->thread_index;
124   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
125   u16 nexts[VLIB_FRAME_SIZE], *next;
126
127   from = vlib_frame_vector_args (frame);
128   n_left = frame->n_vectors;
129   next = nexts;
130
131   vlib_get_buffers (vm, from, bufs, n_left);
132
133   while (n_left >= 4)
134     {
135       const load_balance_t *lb0, *lb1;
136       const ip4_header_t *ip0, *ip1;
137       u32 lbi0, hc0, lbi1, hc1;
138       const dpo_id_t *dpo0, *dpo1;
139
140       /* Prefetch next iteration. */
141       {
142         vlib_prefetch_buffer_header (b[2], LOAD);
143         vlib_prefetch_buffer_header (b[3], LOAD);
144
145         CLIB_PREFETCH (b[2]->data, sizeof (ip0[0]), LOAD);
146         CLIB_PREFETCH (b[3]->data, sizeof (ip0[0]), LOAD);
147       }
148
149       ip0 = vlib_buffer_get_current (b[0]);
150       ip1 = vlib_buffer_get_current (b[1]);
151       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
152       lbi1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
153
154       lb0 = load_balance_get (lbi0);
155       lb1 = load_balance_get (lbi1);
156
157       /*
158        * this node is for via FIBs we can re-use the hash value from the
159        * to node if present.
160        * We don't want to use the same hash value at each level in the recursion
161        * graph as that would lead to polarisation
162        */
163       hc0 = hc1 = 0;
164
165       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
166         {
167           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
168             {
169               hc0 = vnet_buffer (b[0])->ip.flow_hash =
170                 vnet_buffer (b[0])->ip.flow_hash >> 1;
171             }
172           else
173             {
174               hc0 = vnet_buffer (b[0])->ip.flow_hash =
175                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
176             }
177           dpo0 = load_balance_get_fwd_bucket
178             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
179         }
180       else
181         {
182           dpo0 = load_balance_get_bucket_i (lb0, 0);
183         }
184       if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
185         {
186           if (PREDICT_TRUE (vnet_buffer (b[1])->ip.flow_hash))
187             {
188               hc1 = vnet_buffer (b[1])->ip.flow_hash =
189                 vnet_buffer (b[1])->ip.flow_hash >> 1;
190             }
191           else
192             {
193               hc1 = vnet_buffer (b[1])->ip.flow_hash =
194                 ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
195             }
196           dpo1 = load_balance_get_fwd_bucket
197             (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
198         }
199       else
200         {
201           dpo1 = load_balance_get_bucket_i (lb1, 0);
202         }
203
204       next[0] = dpo0->dpoi_next_node;
205       next[1] = dpo1->dpoi_next_node;
206
207       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
208       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
209
210       vlib_increment_combined_counter
211         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
212       vlib_increment_combined_counter
213         (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, b[1]));
214
215       b += 2;
216       next += 2;
217       n_left -= 2;
218     }
219
220   while (n_left > 0)
221     {
222       const load_balance_t *lb0;
223       const ip4_header_t *ip0;
224       const dpo_id_t *dpo0;
225       u32 lbi0, hc0;
226
227       ip0 = vlib_buffer_get_current (b[0]);
228       lbi0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
229
230       lb0 = load_balance_get (lbi0);
231
232       hc0 = 0;
233       if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
234         {
235           if (PREDICT_TRUE (vnet_buffer (b[0])->ip.flow_hash))
236             {
237               hc0 = vnet_buffer (b[0])->ip.flow_hash =
238                 vnet_buffer (b[0])->ip.flow_hash >> 1;
239             }
240           else
241             {
242               hc0 = vnet_buffer (b[0])->ip.flow_hash =
243                 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
244             }
245           dpo0 = load_balance_get_fwd_bucket
246             (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
247         }
248       else
249         {
250           dpo0 = load_balance_get_bucket_i (lb0, 0);
251         }
252
253       next[0] = dpo0->dpoi_next_node;
254       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
255
256       vlib_increment_combined_counter
257         (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, b[0]));
258
259       b += 1;
260       next += 1;
261       n_left -= 1;
262     }
263
264   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
265   if (node->flags & VLIB_NODE_FLAG_TRACE)
266     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
267
268   return frame->n_vectors;
269 }
270
271 /* *INDENT-OFF* */
272 VLIB_REGISTER_NODE (ip4_load_balance_node) =
273 {
274   .name = "ip4-load-balance",
275   .vector_size = sizeof (u32),
276   .sibling_of = "ip4-lookup",
277   .format_trace = format_ip4_lookup_trace,
278 };
279 /* *INDENT-ON* */
280
281 #ifndef CLIB_MARCH_VARIANT
282 /* get first interface address */
283 ip4_address_t *
284 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
285                              ip_interface_address_t ** result_ia)
286 {
287   ip_lookup_main_t *lm = &im->lookup_main;
288   ip_interface_address_t *ia = 0;
289   ip4_address_t *result = 0;
290
291   /* *INDENT-OFF* */
292   foreach_ip_interface_address
293     (lm, ia, sw_if_index,
294      1 /* honor unnumbered */ ,
295      ({
296        ip4_address_t * a =
297          ip_interface_address_get_address (lm, ia);
298        result = a;
299        break;
300      }));
301   /* *INDENT-OFF* */
302   if (result_ia)
303     *result_ia = result ? ia : 0;
304   return result;
305 }
306 #endif
307
308 static void
309 ip4_add_subnet_bcast_route (u32 fib_index,
310                             fib_prefix_t *pfx,
311                             u32 sw_if_index)
312 {
313   vnet_sw_interface_flags_t iflags;
314
315   iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
316
317   fib_table_entry_special_remove(fib_index,
318                                  pfx,
319                                  FIB_SOURCE_INTERFACE);
320
321   if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
322     {
323       fib_table_entry_update_one_path (fib_index, pfx,
324                                        FIB_SOURCE_INTERFACE,
325                                        FIB_ENTRY_FLAG_NONE,
326                                        DPO_PROTO_IP4,
327                                        /* No next-hop address */
328                                        &ADJ_BCAST_ADDR,
329                                        sw_if_index,
330                                        // invalid FIB index
331                                        ~0,
332                                        1,
333                                        // no out-label stack
334                                        NULL,
335                                        FIB_ROUTE_PATH_FLAG_NONE);
336     }
337   else
338     {
339         fib_table_entry_special_add(fib_index,
340                                     pfx,
341                                     FIB_SOURCE_INTERFACE,
342                                     (FIB_ENTRY_FLAG_DROP |
343                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
344     }
345 }
346
347 static void
348 ip4_add_interface_prefix_routes (ip4_main_t *im,
349                                  u32 sw_if_index,
350                                  u32 fib_index,
351                                  ip_interface_address_t * a)
352 {
353   ip_lookup_main_t *lm = &im->lookup_main;
354   ip_interface_prefix_t *if_prefix;
355   ip4_address_t *address = ip_interface_address_get_address (lm, a);
356
357   ip_interface_prefix_key_t key = {
358     .prefix = {
359       .fp_len = a->address_length,
360       .fp_proto = FIB_PROTOCOL_IP4,
361       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[a->address_length],
362     },
363     .sw_if_index = sw_if_index,
364   };
365
366   fib_prefix_t pfx_special = {
367     .fp_proto = FIB_PROTOCOL_IP4,
368   };
369
370   /* If prefix already set on interface, just increment ref count & return */
371   if_prefix = ip_get_interface_prefix (lm, &key);
372   if (if_prefix)
373     {
374       if_prefix->ref_count += 1;
375       return;
376     }
377
378   /* New prefix - allocate a pool entry, initialize it, add to the hash */
379   pool_get (lm->if_prefix_pool, if_prefix);
380   if_prefix->ref_count = 1;
381   if_prefix->src_ia_index = a - lm->if_address_pool;
382   clib_memcpy (&if_prefix->key, &key, sizeof (key));
383   mhash_set (&lm->prefix_to_if_prefix_index, &key,
384              if_prefix - lm->if_prefix_pool, 0 /* old value */);
385
386   pfx_special.fp_len = a->address_length;
387   pfx_special.fp_addr.ip4.as_u32 = address->as_u32;
388
389   /* set the glean route for the prefix */
390   fib_table_entry_update_one_path (fib_index, &pfx_special,
391                                    FIB_SOURCE_INTERFACE,
392                                    (FIB_ENTRY_FLAG_CONNECTED |
393                                     FIB_ENTRY_FLAG_ATTACHED),
394                                    DPO_PROTO_IP4,
395                                    /* No next-hop address */
396                                    NULL,
397                                    sw_if_index,
398                                    /* invalid FIB index */
399                                    ~0,
400                                    1,
401                                    /* no out-label stack */
402                                    NULL,
403                                    FIB_ROUTE_PATH_FLAG_NONE);
404
405   /* length <= 30 - add glean, drop first address, maybe drop bcast address */
406   if (a->address_length <= 30)
407     {
408       /* set a drop route for the base address of the prefix */
409       pfx_special.fp_len = 32;
410       pfx_special.fp_addr.ip4.as_u32 =
411         address->as_u32 & im->fib_masks[a->address_length];
412
413       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
414         fib_table_entry_special_add (fib_index, &pfx_special,
415                                      FIB_SOURCE_INTERFACE,
416                                      (FIB_ENTRY_FLAG_DROP |
417                                       FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
418
419       /* set a route for the broadcast address of the prefix */
420       pfx_special.fp_len = 32;
421       pfx_special.fp_addr.ip4.as_u32 =
422         address->as_u32 | ~im->fib_masks[a->address_length];
423       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
424         ip4_add_subnet_bcast_route (fib_index, &pfx_special, sw_if_index);
425
426
427     }
428   /* length == 31 - add an attached route for the other address */
429   else if (a->address_length == 31)
430     {
431       pfx_special.fp_len = 32;
432       pfx_special.fp_addr.ip4.as_u32 =
433         address->as_u32 ^ clib_host_to_net_u32(1);
434
435       fib_table_entry_update_one_path (fib_index, &pfx_special,
436                                        FIB_SOURCE_INTERFACE,
437                                        (FIB_ENTRY_FLAG_ATTACHED),
438                                        DPO_PROTO_IP4,
439                                        &pfx_special.fp_addr,
440                                        sw_if_index,
441                                        /* invalid FIB index */
442                                        ~0,
443                                        1,
444                                        NULL,
445                                        FIB_ROUTE_PATH_FLAG_NONE);
446     }
447 }
448
449 static void
450 ip4_add_interface_routes (u32 sw_if_index,
451                           ip4_main_t * im, u32 fib_index,
452                           ip_interface_address_t * a)
453 {
454   ip_lookup_main_t *lm = &im->lookup_main;
455   ip4_address_t *address = ip_interface_address_get_address (lm, a);
456   fib_prefix_t pfx = {
457     .fp_len = 32,
458     .fp_proto = FIB_PROTOCOL_IP4,
459     .fp_addr.ip4 = *address,
460   };
461
462   /* set special routes for the prefix if needed */
463   ip4_add_interface_prefix_routes (im, sw_if_index, fib_index, a);
464
465   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
466     {
467       u32 classify_table_index =
468         lm->classify_table_index_by_sw_if_index[sw_if_index];
469       if (classify_table_index != (u32) ~ 0)
470         {
471           dpo_id_t dpo = DPO_INVALID;
472
473           dpo_set (&dpo,
474                    DPO_CLASSIFY,
475                    DPO_PROTO_IP4,
476                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
477
478           fib_table_entry_special_dpo_add (fib_index,
479                                            &pfx,
480                                            FIB_SOURCE_CLASSIFY,
481                                            FIB_ENTRY_FLAG_NONE, &dpo);
482           dpo_reset (&dpo);
483         }
484     }
485
486   fib_table_entry_update_one_path (fib_index, &pfx,
487                                    FIB_SOURCE_INTERFACE,
488                                    (FIB_ENTRY_FLAG_CONNECTED |
489                                     FIB_ENTRY_FLAG_LOCAL),
490                                    DPO_PROTO_IP4,
491                                    &pfx.fp_addr,
492                                    sw_if_index,
493                                    // invalid FIB index
494                                    ~0,
495                                    1, NULL,
496                                    FIB_ROUTE_PATH_FLAG_NONE);
497 }
498
499 static void
500 ip4_del_interface_prefix_routes (ip4_main_t * im,
501                                  u32 sw_if_index,
502                                  u32 fib_index,
503                                  ip4_address_t * address,
504                                  u32 address_length)
505 {
506   ip_lookup_main_t *lm = &im->lookup_main;
507   ip_interface_prefix_t *if_prefix;
508
509   ip_interface_prefix_key_t key = {
510     .prefix = {
511       .fp_len = address_length,
512       .fp_proto = FIB_PROTOCOL_IP4,
513       .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[address_length],
514     },
515     .sw_if_index = sw_if_index,
516   };
517
518   fib_prefix_t pfx_special = {
519     .fp_len = 32,
520     .fp_proto = FIB_PROTOCOL_IP4,
521   };
522
523   if_prefix = ip_get_interface_prefix (lm, &key);
524   if (!if_prefix)
525     {
526       clib_warning ("Prefix not found while deleting %U",
527                     format_ip4_address_and_length, address, address_length);
528       return;
529     }
530
531   if_prefix->ref_count -= 1;
532
533   /*
534    * Routes need to be adjusted if deleting last intf addr in prefix
535    *
536    * We're done now otherwise
537    */
538   if (if_prefix->ref_count > 0)
539     return;
540
541   /* length <= 30, delete glean route, first address, last address */
542   if (address_length <= 30)
543     {
544       /* Less work to do in FIB if we remove the covered /32s first */
545
546       /* first address in prefix */
547       pfx_special.fp_addr.ip4.as_u32 =
548         address->as_u32 & im->fib_masks[address_length];
549       pfx_special.fp_len = 32;
550
551       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
552         fib_table_entry_special_remove (fib_index,
553                                         &pfx_special,
554                                         FIB_SOURCE_INTERFACE);
555
556       /* prefix broadcast address */
557       pfx_special.fp_addr.ip4.as_u32 =
558         address->as_u32 | ~im->fib_masks[address_length];
559       pfx_special.fp_len = 32;
560
561       if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32)
562         fib_table_entry_special_remove (fib_index,
563                                         &pfx_special,
564                                         FIB_SOURCE_INTERFACE);
565     }
566   else if (address_length == 31)
567     {
568       /* length == 31, delete attached route for the other address */
569       pfx_special.fp_addr.ip4.as_u32 =
570         address->as_u32 ^ clib_host_to_net_u32(1);
571
572       fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
573     }
574
575   /* remove glean route for prefix */
576   pfx_special.fp_addr.ip4 = *address;
577   pfx_special.fp_len = address_length;
578   fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE);
579
580   mhash_unset (&lm->prefix_to_if_prefix_index, &key, 0 /* old_value */);
581   pool_put (lm->if_prefix_pool, if_prefix);
582 }
583
584 static void
585 ip4_del_interface_routes (u32 sw_if_index,
586                           ip4_main_t * im,
587                           u32 fib_index,
588                           ip4_address_t * address, u32 address_length)
589 {
590   fib_prefix_t pfx = {
591     .fp_len = 32,
592     .fp_proto = FIB_PROTOCOL_IP4,
593     .fp_addr.ip4 = *address,
594   };
595
596   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
597
598   ip4_del_interface_prefix_routes (im, sw_if_index, fib_index,
599                                    address, address_length);
600 }
601
602 #ifndef CLIB_MARCH_VARIANT
603 void
604 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
605 {
606   ip4_main_t *im = &ip4_main;
607   vnet_main_t *vnm = vnet_get_main ();
608   vnet_hw_interface_t *hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
609
610   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
611
612   /*
613    * enable/disable only on the 1<->0 transition
614    */
615   if (is_enable)
616     {
617       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
618         return;
619     }
620   else
621     {
622       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
623       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
624         return;
625     }
626   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
627                                !is_enable, 0, 0);
628
629
630   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
631                                sw_if_index, !is_enable, 0, 0);
632
633   if (is_enable)
634     hi->l3_if_count++;
635   else if (hi->l3_if_count)
636     hi->l3_if_count--;
637
638   {
639     ip4_enable_disable_interface_callback_t *cb;
640     vec_foreach (cb, im->enable_disable_interface_callbacks)
641       cb->function (im, cb->function_opaque, sw_if_index, is_enable);
642   }
643 }
644
645 static clib_error_t *
646 ip4_add_del_interface_address_internal (vlib_main_t * vm,
647                                         u32 sw_if_index,
648                                         ip4_address_t * address,
649                                         u32 address_length, u32 is_del)
650 {
651   vnet_main_t *vnm = vnet_get_main ();
652   ip4_main_t *im = &ip4_main;
653   ip_lookup_main_t *lm = &im->lookup_main;
654   clib_error_t *error = 0;
655   u32 if_address_index;
656   ip4_address_fib_t ip4_af, *addr_fib = 0;
657
658   error = vnet_sw_interface_supports_addressing (vnm, sw_if_index);
659   if (error)
660     {
661       vnm->api_errno = VNET_API_ERROR_UNSUPPORTED;
662       return error;
663     }
664
665   ip4_addr_fib_init (&ip4_af, address,
666                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
667   vec_add1 (addr_fib, ip4_af);
668
669   /*
670    * there is no support for adj-fib handling in the presence of overlapping
671    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
672    * most routers do.
673    */
674   /* *INDENT-OFF* */
675   if (!is_del)
676     {
677       /* When adding an address check that it does not conflict
678          with an existing address on any interface in this table. */
679       ip_interface_address_t *ia;
680       vnet_sw_interface_t *sif;
681
682       pool_foreach (sif, vnm->interface_main.sw_interfaces)
683        {
684           if (im->fib_index_by_sw_if_index[sw_if_index] ==
685               im->fib_index_by_sw_if_index[sif->sw_if_index])
686             {
687               foreach_ip_interface_address
688                 (&im->lookup_main, ia, sif->sw_if_index,
689                  0 /* honor unnumbered */ ,
690                  ({
691                    ip4_address_t * x =
692                      ip_interface_address_get_address
693                      (&im->lookup_main, ia);
694
695                    if (ip4_destination_matches_route
696                        (im, address, x, ia->address_length) ||
697                        ip4_destination_matches_route (im,
698                                                       x,
699                                                       address,
700                                                       address_length))
701                      {
702                        /* an intf may have >1 addr from the same prefix */
703                        if ((sw_if_index == sif->sw_if_index) &&
704                            (ia->address_length == address_length) &&
705                            (x->as_u32 != address->as_u32))
706                          continue;
707
708                        if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE)
709                          /* if the address we're comparing against is stale
710                           * then the CP has not added this one back yet, maybe
711                           * it never will, so we have to assume it won't and
712                           * ignore it. if it does add it back, then it will fail
713                           * because this one is now present */
714                          continue;
715
716                        /* error if the length or intf was different */
717                        vnm->api_errno = VNET_API_ERROR_ADDRESS_IN_USE;
718
719                        error = clib_error_create
720                          ("failed to add %U on %U which conflicts with %U for interface %U",
721                           format_ip4_address_and_length, address,
722                           address_length,
723                           format_vnet_sw_if_index_name, vnm,
724                           sw_if_index,
725                           format_ip4_address_and_length, x,
726                           ia->address_length,
727                           format_vnet_sw_if_index_name, vnm,
728                           sif->sw_if_index);
729                        goto done;
730                      }
731                  }));
732             }
733       }
734     }
735   /* *INDENT-ON* */
736
737   if_address_index = ip_interface_address_find (lm, addr_fib, address_length);
738
739   if (is_del)
740     {
741       if (~0 == if_address_index)
742         {
743           vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_FOUND_FOR_INTERFACE;
744           error = clib_error_create ("%U not found for interface %U",
745                                      lm->format_address_and_length,
746                                      addr_fib, address_length,
747                                      format_vnet_sw_if_index_name, vnm,
748                                      sw_if_index);
749           goto done;
750         }
751
752       error = ip_interface_address_del (lm, vnm, if_address_index, addr_fib,
753                                         address_length, sw_if_index);
754       if (error)
755         goto done;
756     }
757   else
758     {
759       if (~0 != if_address_index)
760         {
761           ip_interface_address_t *ia;
762
763           ia = pool_elt_at_index (lm->if_address_pool, if_address_index);
764
765           if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE)
766             {
767               if (ia->sw_if_index == sw_if_index)
768                 {
769                   /* re-adding an address during the replace action.
770                    * consdier this the update. clear the flag and
771                    * we're done */
772                   ia->flags &= ~IP_INTERFACE_ADDRESS_FLAG_STALE;
773                   goto done;
774                 }
775               else
776                 {
777                   /* The prefix is moving from one interface to another.
778                    * delete the stale and add the new */
779                   ip4_add_del_interface_address_internal (vm,
780                                                           ia->sw_if_index,
781                                                           address,
782                                                           address_length, 1);
783                   ia = NULL;
784                   error = ip_interface_address_add (lm, sw_if_index,
785                                                     addr_fib, address_length,
786                                                     &if_address_index);
787                 }
788             }
789           else
790             {
791               vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
792               error = clib_error_create
793                 ("Prefix %U already found on interface %U",
794                  lm->format_address_and_length, addr_fib, address_length,
795                  format_vnet_sw_if_index_name, vnm, ia->sw_if_index);
796             }
797         }
798       else
799         error = ip_interface_address_add (lm, sw_if_index,
800                                           addr_fib, address_length,
801                                           &if_address_index);
802     }
803
804   if (error)
805     goto done;
806
807   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
808   ip4_mfib_interface_enable_disable (sw_if_index, !is_del);
809
810   /* intf addr routes are added/deleted on admin up/down */
811   if (vnet_sw_interface_is_admin_up (vnm, sw_if_index))
812     {
813       if (is_del)
814         ip4_del_interface_routes (sw_if_index,
815                                   im, ip4_af.fib_index, address,
816                                   address_length);
817       else
818         ip4_add_interface_routes (sw_if_index,
819                                   im, ip4_af.fib_index,
820                                   pool_elt_at_index
821                                   (lm->if_address_pool, if_address_index));
822     }
823
824   ip4_add_del_interface_address_callback_t *cb;
825   vec_foreach (cb, im->add_del_interface_address_callbacks)
826     cb->function (im, cb->function_opaque, sw_if_index,
827                   address, address_length, if_address_index, is_del);
828
829 done:
830   vec_free (addr_fib);
831   return error;
832 }
833
834 clib_error_t *
835 ip4_add_del_interface_address (vlib_main_t * vm,
836                                u32 sw_if_index,
837                                ip4_address_t * address,
838                                u32 address_length, u32 is_del)
839 {
840   return ip4_add_del_interface_address_internal
841     (vm, sw_if_index, address, address_length, is_del);
842 }
843
844 void
845 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
846 {
847   ip_interface_address_t *ia;
848   ip4_main_t *im;
849
850   im = &ip4_main;
851
852   /*
853    * when directed broadcast is enabled, the subnet braodcast route will forward
854    * packets using an adjacency with a broadcast MAC. otherwise it drops
855    */
856   /* *INDENT-OFF* */
857   foreach_ip_interface_address(&im->lookup_main, ia,
858                                sw_if_index, 0,
859      ({
860        if (ia->address_length <= 30)
861          {
862            ip4_address_t *ipa;
863
864            ipa = ip_interface_address_get_address (&im->lookup_main, ia);
865
866            fib_prefix_t pfx = {
867              .fp_len = 32,
868              .fp_proto = FIB_PROTOCOL_IP4,
869              .fp_addr = {
870                .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
871              },
872            };
873
874            ip4_add_subnet_bcast_route
875              (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
876                                                   sw_if_index),
877               &pfx, sw_if_index);
878          }
879      }));
880   /* *INDENT-ON* */
881 }
882 #endif
883
884 static clib_error_t *
885 ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
886 {
887   ip4_main_t *im = &ip4_main;
888   ip_interface_address_t *ia;
889   ip4_address_t *a;
890   u32 is_admin_up, fib_index;
891
892   vec_validate_init_empty (im->
893                            lookup_main.if_address_pool_index_by_sw_if_index,
894                            sw_if_index, ~0);
895
896   is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
897
898   fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
899
900   /* *INDENT-OFF* */
901   foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
902                                 0 /* honor unnumbered */,
903   ({
904     a = ip_interface_address_get_address (&im->lookup_main, ia);
905     if (is_admin_up)
906       ip4_add_interface_routes (sw_if_index,
907                                 im, fib_index,
908                                 ia);
909     else
910       ip4_del_interface_routes (sw_if_index,
911                                 im, fib_index,
912                                 a, ia->address_length);
913   }));
914   /* *INDENT-ON* */
915
916   return 0;
917 }
918
919 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
920
921 /* Built-in ip4 unicast rx feature path definition */
922 /* *INDENT-OFF* */
923 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
924 {
925   .arc_name = "ip4-unicast",
926   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
927   .last_in_arc = "ip4-lookup",
928   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
929 };
930
931 VNET_FEATURE_INIT (ip4_flow_classify, static) =
932 {
933   .arc_name = "ip4-unicast",
934   .node_name = "ip4-flow-classify",
935   .runs_before = VNET_FEATURES ("ip4-inacl"),
936 };
937
938 VNET_FEATURE_INIT (ip4_inacl, static) =
939 {
940   .arc_name = "ip4-unicast",
941   .node_name = "ip4-inacl",
942   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
943 };
944
945 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
946 {
947   .arc_name = "ip4-unicast",
948   .node_name = "ip4-source-and-port-range-check-rx",
949   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
950 };
951
952 VNET_FEATURE_INIT (ip4_policer_classify, static) =
953 {
954   .arc_name = "ip4-unicast",
955   .node_name = "ip4-policer-classify",
956   .runs_before = VNET_FEATURES ("ipsec4-input-feature"),
957 };
958
959 VNET_FEATURE_INIT (ip4_ipsec, static) =
960 {
961   .arc_name = "ip4-unicast",
962   .node_name = "ipsec4-input-feature",
963   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
964 };
965
966 VNET_FEATURE_INIT (ip4_vpath, static) =
967 {
968   .arc_name = "ip4-unicast",
969   .node_name = "vpath-input-ip4",
970   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
971 };
972
973 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
974 {
975   .arc_name = "ip4-unicast",
976   .node_name = "ip4-vxlan-bypass",
977   .runs_before = VNET_FEATURES ("ip4-lookup"),
978 };
979
980 VNET_FEATURE_INIT (ip4_not_enabled, static) =
981 {
982   .arc_name = "ip4-unicast",
983   .node_name = "ip4-not-enabled",
984   .runs_before = VNET_FEATURES ("ip4-lookup"),
985 };
986
987 VNET_FEATURE_INIT (ip4_lookup, static) =
988 {
989   .arc_name = "ip4-unicast",
990   .node_name = "ip4-lookup",
991   .runs_before = 0,     /* not before any other features */
992 };
993
994 /* Built-in ip4 multicast rx feature path definition */
995 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
996 {
997   .arc_name = "ip4-multicast",
998   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
999   .last_in_arc = "ip4-mfib-forward-lookup",
1000   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
1001 };
1002
1003 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
1004 {
1005   .arc_name = "ip4-multicast",
1006   .node_name = "vpath-input-ip4",
1007   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1008 };
1009
1010 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
1011 {
1012   .arc_name = "ip4-multicast",
1013   .node_name = "ip4-not-enabled",
1014   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1015 };
1016
1017 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
1018 {
1019   .arc_name = "ip4-multicast",
1020   .node_name = "ip4-mfib-forward-lookup",
1021   .runs_before = 0,     /* last feature */
1022 };
1023
1024 /* Source and port-range check ip4 tx feature path definition */
1025 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1026 {
1027   .arc_name = "ip4-output",
1028   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
1029   .last_in_arc = "interface-output",
1030   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1031 };
1032
1033 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1034 {
1035   .arc_name = "ip4-output",
1036   .node_name = "ip4-source-and-port-range-check-tx",
1037   .runs_before = VNET_FEATURES ("ip4-outacl"),
1038 };
1039
1040 VNET_FEATURE_INIT (ip4_outacl, static) =
1041 {
1042   .arc_name = "ip4-output",
1043   .node_name = "ip4-outacl",
1044   .runs_before = VNET_FEATURES ("ipsec4-output-feature"),
1045 };
1046
1047 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1048 {
1049   .arc_name = "ip4-output",
1050   .node_name = "ipsec4-output-feature",
1051   .runs_before = VNET_FEATURES ("interface-output"),
1052 };
1053
1054 /* Built-in ip4 tx feature path definition */
1055 VNET_FEATURE_INIT (ip4_interface_output, static) =
1056 {
1057   .arc_name = "ip4-output",
1058   .node_name = "interface-output",
1059   .runs_before = 0,     /* not before any other features */
1060 };
1061 /* *INDENT-ON* */
1062
1063 static clib_error_t *
1064 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1065 {
1066   ip4_main_t *im = &ip4_main;
1067
1068   vec_validate_init_empty (im->fib_index_by_sw_if_index, sw_if_index, ~0);
1069   vec_validate_init_empty (im->mfib_index_by_sw_if_index, sw_if_index, ~0);
1070
1071   if (is_add)
1072     {
1073       /* Fill in lookup tables with default table (0). */
1074       im->fib_index_by_sw_if_index[sw_if_index] = 0;
1075       im->mfib_index_by_sw_if_index[sw_if_index] = 0;
1076     }
1077   else
1078     {
1079       ip4_main_t *im4 = &ip4_main;
1080       ip_lookup_main_t *lm4 = &im4->lookup_main;
1081       ip_interface_address_t *ia = 0;
1082       ip4_address_t *address;
1083       vlib_main_t *vm = vlib_get_main ();
1084
1085       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
1086       /* *INDENT-OFF* */
1087       foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
1088       ({
1089         address = ip_interface_address_get_address (lm4, ia);
1090         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
1091       }));
1092       /* *INDENT-ON* */
1093       ip4_mfib_interface_enable_disable (sw_if_index, 0);
1094
1095       if (0 != im4->fib_index_by_sw_if_index[sw_if_index])
1096         fib_table_bind (FIB_PROTOCOL_IP4, sw_if_index, 0);
1097       if (0 != im4->mfib_index_by_sw_if_index[sw_if_index])
1098         mfib_table_bind (FIB_PROTOCOL_IP4, sw_if_index, 0);
1099
1100       /* Erase the lookup tables just in case */
1101       im4->fib_index_by_sw_if_index[sw_if_index] = ~0;
1102       im4->mfib_index_by_sw_if_index[sw_if_index] = ~0;
1103     }
1104
1105   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
1106                                is_add, 0, 0);
1107
1108   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
1109                                sw_if_index, is_add, 0, 0);
1110
1111   return /* no error */ 0;
1112 }
1113
1114 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1115
1116 /* Global IP4 main. */
1117 #ifndef CLIB_MARCH_VARIANT
1118 ip4_main_t ip4_main;
1119 #endif /* CLIB_MARCH_VARIANT */
1120
1121 static clib_error_t *
1122 ip4_lookup_init (vlib_main_t * vm)
1123 {
1124   ip4_main_t *im = &ip4_main;
1125   clib_error_t *error;
1126   uword i;
1127
1128   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1129     return error;
1130   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
1131     return (error);
1132   if ((error = vlib_call_init_function (vm, fib_module_init)))
1133     return error;
1134   if ((error = vlib_call_init_function (vm, mfib_module_init)))
1135     return error;
1136
1137   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1138     {
1139       u32 m;
1140
1141       if (i < 32)
1142         m = pow2_mask (i) << (32 - i);
1143       else
1144         m = ~0;
1145       im->fib_masks[i] = clib_host_to_net_u32 (m);
1146     }
1147
1148   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1149
1150   /* Create FIB with index 0 and table id of 0. */
1151   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1152                                      FIB_SOURCE_DEFAULT_ROUTE);
1153   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1154                                       MFIB_SOURCE_DEFAULT_ROUTE);
1155
1156   {
1157     pg_node_t *pn;
1158     pn = pg_get_node (ip4_lookup_node.index);
1159     pn->unformat_edit = unformat_pg_ip4_header;
1160   }
1161
1162   {
1163     ethernet_arp_header_t h;
1164
1165     clib_memset (&h, 0, sizeof (h));
1166
1167 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1168 #define _8(f,v) h.f = v;
1169     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1170     _16 (l3_type, ETHERNET_TYPE_IP4);
1171     _8 (n_l2_address_bytes, 6);
1172     _8 (n_l3_address_bytes, 4);
1173     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1174 #undef _16
1175 #undef _8
1176
1177     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1178                                /* data */ &h,
1179                                sizeof (h),
1180                                /* alloc chunk size */ 8,
1181                                "ip4 arp");
1182   }
1183
1184   return error;
1185 }
1186
1187 VLIB_INIT_FUNCTION (ip4_lookup_init);
1188
1189 typedef struct
1190 {
1191   /* Adjacency taken. */
1192   u32 dpo_index;
1193   u32 flow_hash;
1194   u32 fib_index;
1195
1196   /* Packet data, possibly *after* rewrite. */
1197   u8 packet_data[64 - 1 * sizeof (u32)];
1198 }
1199 ip4_forward_next_trace_t;
1200
1201 #ifndef CLIB_MARCH_VARIANT
1202 u8 *
1203 format_ip4_forward_next_trace (u8 * s, va_list * args)
1204 {
1205   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1206   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1207   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1208   u32 indent = format_get_indent (s);
1209   s = format (s, "%U%U",
1210               format_white_space, indent,
1211               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1212   return s;
1213 }
1214 #endif
1215
1216 static u8 *
1217 format_ip4_lookup_trace (u8 * s, va_list * args)
1218 {
1219   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1220   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1221   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1222   u32 indent = format_get_indent (s);
1223
1224   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1225               t->fib_index, t->dpo_index, t->flow_hash);
1226   s = format (s, "\n%U%U",
1227               format_white_space, indent,
1228               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1229   return s;
1230 }
1231
1232 static u8 *
1233 format_ip4_rewrite_trace (u8 * s, va_list * args)
1234 {
1235   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1236   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1237   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1238   u32 indent = format_get_indent (s);
1239
1240   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1241               t->fib_index, t->dpo_index, format_ip_adjacency,
1242               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1243   s = format (s, "\n%U%U",
1244               format_white_space, indent,
1245               format_ip_adjacency_packet_data,
1246               t->packet_data, sizeof (t->packet_data));
1247   return s;
1248 }
1249
1250 #ifndef CLIB_MARCH_VARIANT
1251 /* Common trace function for all ip4-forward next nodes. */
1252 void
1253 ip4_forward_next_trace (vlib_main_t * vm,
1254                         vlib_node_runtime_t * node,
1255                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1256 {
1257   u32 *from, n_left;
1258   ip4_main_t *im = &ip4_main;
1259
1260   n_left = frame->n_vectors;
1261   from = vlib_frame_vector_args (frame);
1262
1263   while (n_left >= 4)
1264     {
1265       u32 bi0, bi1;
1266       vlib_buffer_t *b0, *b1;
1267       ip4_forward_next_trace_t *t0, *t1;
1268
1269       /* Prefetch next iteration. */
1270       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1271       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1272
1273       bi0 = from[0];
1274       bi1 = from[1];
1275
1276       b0 = vlib_get_buffer (vm, bi0);
1277       b1 = vlib_get_buffer (vm, bi1);
1278
1279       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1280         {
1281           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1282           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1283           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1284           t0->fib_index =
1285             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1286              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1287             vec_elt (im->fib_index_by_sw_if_index,
1288                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1289
1290           clib_memcpy_fast (t0->packet_data,
1291                             vlib_buffer_get_current (b0),
1292                             sizeof (t0->packet_data));
1293         }
1294       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1295         {
1296           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1297           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1298           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1299           t1->fib_index =
1300             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1301              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1302             vec_elt (im->fib_index_by_sw_if_index,
1303                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1304           clib_memcpy_fast (t1->packet_data, vlib_buffer_get_current (b1),
1305                             sizeof (t1->packet_data));
1306         }
1307       from += 2;
1308       n_left -= 2;
1309     }
1310
1311   while (n_left >= 1)
1312     {
1313       u32 bi0;
1314       vlib_buffer_t *b0;
1315       ip4_forward_next_trace_t *t0;
1316
1317       bi0 = from[0];
1318
1319       b0 = vlib_get_buffer (vm, bi0);
1320
1321       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1322         {
1323           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1324           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1325           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1326           t0->fib_index =
1327             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1328              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1329             vec_elt (im->fib_index_by_sw_if_index,
1330                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1331           clib_memcpy_fast (t0->packet_data, vlib_buffer_get_current (b0),
1332                             sizeof (t0->packet_data));
1333         }
1334       from += 1;
1335       n_left -= 1;
1336     }
1337 }
1338
1339 /* Compute TCP/UDP/ICMP4 checksum in software. */
1340 u16
1341 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1342                               ip4_header_t * ip0)
1343 {
1344   ip_csum_t sum0;
1345   u32 ip_header_length, payload_length_host_byte_order;
1346
1347   /* Initialize checksum with ip header. */
1348   ip_header_length = ip4_header_bytes (ip0);
1349   payload_length_host_byte_order =
1350     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1351   sum0 =
1352     clib_host_to_net_u32 (payload_length_host_byte_order +
1353                           (ip0->protocol << 16));
1354
1355   if (BITS (uword) == 32)
1356     {
1357       sum0 =
1358         ip_csum_with_carry (sum0,
1359                             clib_mem_unaligned (&ip0->src_address, u32));
1360       sum0 =
1361         ip_csum_with_carry (sum0,
1362                             clib_mem_unaligned (&ip0->dst_address, u32));
1363     }
1364   else
1365     sum0 =
1366       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1367
1368   return ip_calculate_l4_checksum (vm, p0, sum0,
1369                                    payload_length_host_byte_order, (u8 *) ip0,
1370                                    ip_header_length, NULL);
1371 }
1372
1373 u32
1374 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1375 {
1376   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1377   udp_header_t *udp0;
1378   u16 sum16;
1379
1380   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1381           || ip0->protocol == IP_PROTOCOL_UDP);
1382
1383   udp0 = (void *) (ip0 + 1);
1384   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1385     {
1386       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1387                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1388       return p0->flags;
1389     }
1390
1391   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1392
1393   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1394                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1395
1396   return p0->flags;
1397 }
1398 #endif
1399
1400 /* *INDENT-OFF* */
1401 VNET_FEATURE_ARC_INIT (ip4_local) = {
1402   .arc_name = "ip4-local",
1403   .start_nodes = VNET_FEATURES ("ip4-local", "ip4-receive"),
1404   .last_in_arc = "ip4-local-end-of-arc",
1405 };
1406 /* *INDENT-ON* */
1407
1408 static inline void
1409 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1410                             ip4_header_t * ip, u8 is_udp, u8 * error,
1411                             u8 * good_tcp_udp)
1412 {
1413   u32 flags0;
1414   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1415   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1416   if (is_udp)
1417     {
1418       udp_header_t *udp;
1419       u32 ip_len, udp_len;
1420       i32 len_diff;
1421       udp = ip4_next_header (ip);
1422       /* Verify UDP length. */
1423       ip_len = clib_net_to_host_u16 (ip->length);
1424       udp_len = clib_net_to_host_u16 (udp->length);
1425
1426       len_diff = ip_len - udp_len;
1427       *good_tcp_udp &= len_diff >= 0;
1428       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1429     }
1430 }
1431
1432 #define ip4_local_csum_is_offloaded(_b)                                       \
1433   ((_b->flags & VNET_BUFFER_F_OFFLOAD) &&                                     \
1434    (vnet_buffer (_b)->oflags &                                                \
1435     (VNET_BUFFER_OFFLOAD_F_TCP_CKSUM | VNET_BUFFER_OFFLOAD_F_UDP_CKSUM)))
1436
1437 #define ip4_local_need_csum_check(is_tcp_udp, _b)                       \
1438     (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED     \
1439         || ip4_local_csum_is_offloaded (_b)))
1440
1441 #define ip4_local_csum_is_valid(_b)                                     \
1442     (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT                      \
1443         || (ip4_local_csum_is_offloaded (_b))) != 0
1444
1445 static inline void
1446 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1447                          ip4_header_t * ih, u8 * error)
1448 {
1449   u8 is_udp, is_tcp_udp, good_tcp_udp;
1450
1451   is_udp = ih->protocol == IP_PROTOCOL_UDP;
1452   is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1453
1454   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1455     ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1456   else
1457     good_tcp_udp = ip4_local_csum_is_valid (b);
1458
1459   ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1460   *error = (is_tcp_udp && !good_tcp_udp
1461             ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1462 }
1463
1464 static inline void
1465 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1466                             ip4_header_t ** ih, u8 * error)
1467 {
1468   u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1469
1470   is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1471   is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1472
1473   is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1474   is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1475
1476   good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1477   good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1478
1479   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1480                      || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1481     {
1482       if (is_tcp_udp[0])
1483         ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1484                                     &good_tcp_udp[0]);
1485       if (is_tcp_udp[1])
1486         ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1487                                     &good_tcp_udp[1]);
1488     }
1489
1490   error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1491               IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1492   error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1493               IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1494 }
1495
1496 static inline void
1497 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1498                               vlib_buffer_t * b, u16 * next, u8 error,
1499                               u8 head_of_feature_arc)
1500 {
1501   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1502   u32 next_index;
1503
1504   *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1505   b->error = error ? error_node->errors[error] : 0;
1506   if (head_of_feature_arc)
1507     {
1508       next_index = *next;
1509       if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1510         {
1511           vnet_feature_arc_start (
1512             arc_index, vnet_buffer (b)->ip.rx_sw_if_index, &next_index, b);
1513           *next = next_index;
1514         }
1515     }
1516 }
1517
1518 typedef struct
1519 {
1520   /* The src and fib-index together determine if packet n is the same as n-1 */
1521   ip4_address_t src;
1522   u32 fib_index;
1523   u32 lbi;
1524   u8 error;
1525   u8 first;
1526 } ip4_local_last_check_t;
1527
1528 static inline void
1529 ip4_local_check_src (vlib_buffer_t *b, ip4_header_t *ip0,
1530                      ip4_local_last_check_t *last_check, u8 *error0,
1531                      int is_receive_dpo)
1532 {
1533   const dpo_id_t *dpo0;
1534   load_balance_t *lb0;
1535   u32 lbi0;
1536
1537   vnet_buffer (b)->ip.fib_index =
1538     vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1539     vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1540
1541   vnet_buffer (b)->ip.rx_sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
1542   if (is_receive_dpo)
1543     {
1544       receive_dpo_t *rd;
1545       rd = receive_dpo_get (vnet_buffer (b)->ip.adj_index[VLIB_TX]);
1546       if (rd->rd_sw_if_index != ~0)
1547         vnet_buffer (b)->ip.rx_sw_if_index = rd->rd_sw_if_index;
1548     }
1549
1550   /*
1551    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1552    *  adjacency for the destination address (the local interface address).
1553    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1554    *  adjacency for the source address (the remote sender's address)
1555    */
1556   if (PREDICT_TRUE ((last_check->src.as_u32 != ip0->src_address.as_u32)) ||
1557       (last_check->fib_index != vnet_buffer (b)->ip.fib_index) ||
1558       last_check->first)
1559     {
1560       lbi0 = ip4_fib_forwarding_lookup (vnet_buffer (b)->ip.fib_index,
1561                                         &ip0->src_address);
1562
1563       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1564         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1565       vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1566
1567       lb0 = load_balance_get (lbi0);
1568       dpo0 = load_balance_get_bucket_i (lb0, 0);
1569
1570       /*
1571        * Must have a route to source otherwise we drop the packet.
1572        * ip4 broadcasts are accepted, e.g. to make dhcp client work
1573        *
1574        * The checks are:
1575        *  - the source is a recieve => it's from us => bogus, do this
1576        *    first since it sets a different error code.
1577        *  - uRPF check for any route to source - accept if passes.
1578        *  - allow packets destined to the broadcast address from unknown sources
1579        */
1580
1581       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1582                   && dpo0->dpoi_type == DPO_RECEIVE) ?
1583                  IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1584       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1585                   && !fib_urpf_check_size (lb0->lb_urpf)
1586                   && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1587                  IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1588
1589       last_check->src.as_u32 = ip0->src_address.as_u32;
1590       last_check->lbi = lbi0;
1591       last_check->error = *error0;
1592       last_check->first = 0;
1593       last_check->fib_index = vnet_buffer (b)->ip.fib_index;
1594     }
1595   else
1596     {
1597       vnet_buffer (b)->ip.adj_index[VLIB_RX] =
1598         vnet_buffer (b)->ip.adj_index[VLIB_TX];
1599       vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1600       *error0 = last_check->error;
1601     }
1602 }
1603
1604 static inline void
1605 ip4_local_check_src_x2 (vlib_buffer_t **b, ip4_header_t **ip,
1606                         ip4_local_last_check_t *last_check, u8 *error,
1607                         int is_receive_dpo)
1608 {
1609   const dpo_id_t *dpo[2];
1610   load_balance_t *lb[2];
1611   u32 not_last_hit;
1612   u32 lbi[2];
1613
1614   not_last_hit = last_check->first;
1615   not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1616   not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1617
1618   vnet_buffer (b[0])->ip.fib_index =
1619     vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1620     vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1621     vnet_buffer (b[0])->ip.fib_index;
1622
1623   vnet_buffer (b[1])->ip.fib_index =
1624     vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1625     vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1626     vnet_buffer (b[1])->ip.fib_index;
1627
1628   not_last_hit |= vnet_buffer (b[0])->ip.fib_index ^ last_check->fib_index;
1629   not_last_hit |= vnet_buffer (b[1])->ip.fib_index ^ last_check->fib_index;
1630
1631   vnet_buffer (b[0])->ip.rx_sw_if_index =
1632     vnet_buffer (b[0])->sw_if_index[VLIB_RX];
1633   vnet_buffer (b[1])->ip.rx_sw_if_index =
1634     vnet_buffer (b[1])->sw_if_index[VLIB_RX];
1635   if (is_receive_dpo)
1636     {
1637       const receive_dpo_t *rd0, *rd1;
1638       rd0 = receive_dpo_get (vnet_buffer (b[0])->ip.adj_index[VLIB_TX]);
1639       rd1 = receive_dpo_get (vnet_buffer (b[1])->ip.adj_index[VLIB_TX]);
1640       if (rd0->rd_sw_if_index != ~0)
1641         vnet_buffer (b[0])->ip.rx_sw_if_index = rd0->rd_sw_if_index;
1642       if (rd1->rd_sw_if_index != ~0)
1643         vnet_buffer (b[1])->ip.rx_sw_if_index = rd1->rd_sw_if_index;
1644     }
1645
1646   /*
1647    * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
1648    *  adjacency for the destination address (the local interface address).
1649    * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
1650    *  adjacency for the source address (the remote sender's address)
1651    */
1652   if (PREDICT_TRUE (not_last_hit))
1653     {
1654       ip4_fib_forwarding_lookup_x2 (
1655         vnet_buffer (b[0])->ip.fib_index, vnet_buffer (b[1])->ip.fib_index,
1656         &ip[0]->src_address, &ip[1]->src_address, &lbi[0], &lbi[1]);
1657
1658       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1659         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1660       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1661
1662       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1663         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1664       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1665
1666       lb[0] = load_balance_get (lbi[0]);
1667       lb[1] = load_balance_get (lbi[1]);
1668
1669       dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1670       dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1671
1672       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1673                    dpo[0]->dpoi_type == DPO_RECEIVE) ?
1674                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1675       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1676                    !fib_urpf_check_size (lb[0]->lb_urpf) &&
1677                    ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1678                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1679
1680       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1681                    dpo[1]->dpoi_type == DPO_RECEIVE) ?
1682                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1683       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1684                    !fib_urpf_check_size (lb[1]->lb_urpf) &&
1685                    ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1686                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1687
1688       last_check->src.as_u32 = ip[1]->src_address.as_u32;
1689       last_check->lbi = lbi[1];
1690       last_check->error = error[1];
1691       last_check->first = 0;
1692       last_check->fib_index = vnet_buffer (b[1])->ip.fib_index;
1693     }
1694   else
1695     {
1696       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
1697         vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
1698       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1699
1700       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] =
1701         vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
1702       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1703
1704       error[0] = last_check->error;
1705       error[1] = last_check->error;
1706     }
1707 }
1708
1709 enum ip_local_packet_type_e
1710 {
1711   IP_LOCAL_PACKET_TYPE_L4,
1712   IP_LOCAL_PACKET_TYPE_NAT,
1713   IP_LOCAL_PACKET_TYPE_FRAG,
1714 };
1715
1716 /**
1717  * Determine packet type and next node.
1718  *
1719  * The expectation is that all packets that are not L4 will skip
1720  * checksums and source checks.
1721  */
1722 always_inline u8
1723 ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
1724 {
1725   ip_lookup_main_t *lm = &ip4_main.lookup_main;
1726
1727   if (PREDICT_FALSE (ip4_is_fragment (ip)))
1728     {
1729       *next = IP_LOCAL_NEXT_REASSEMBLY;
1730       return IP_LOCAL_PACKET_TYPE_FRAG;
1731     }
1732   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_IS_NATED))
1733     {
1734       *next = lm->local_next_by_ip_protocol[ip->protocol];
1735       return IP_LOCAL_PACKET_TYPE_NAT;
1736     }
1737
1738   *next = lm->local_next_by_ip_protocol[ip->protocol];
1739   return IP_LOCAL_PACKET_TYPE_L4;
1740 }
1741
1742 static inline uword
1743 ip4_local_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
1744                   vlib_frame_t *frame, int head_of_feature_arc,
1745                   int is_receive_dpo)
1746 {
1747   u32 *from, n_left_from;
1748   vlib_node_runtime_t *error_node =
1749     vlib_node_get_runtime (vm, ip4_local_node.index);
1750   u16 nexts[VLIB_FRAME_SIZE], *next;
1751   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1752   ip4_header_t *ip[2];
1753   u8 error[2], pt[2];
1754
1755   ip4_local_last_check_t last_check = {
1756     /*
1757      * 0.0.0.0 can appear as the source address of an IP packet,
1758      * as can any other address, hence the need to use the 'first'
1759      * member to make sure the .lbi is initialised for the first
1760      * packet.
1761      */
1762     .src = { .as_u32 = 0 },
1763     .lbi = ~0,
1764     .error = IP4_ERROR_UNKNOWN_PROTOCOL,
1765     .first = 1,
1766     .fib_index = 0,
1767   };
1768
1769   from = vlib_frame_vector_args (frame);
1770   n_left_from = frame->n_vectors;
1771
1772   if (node->flags & VLIB_NODE_FLAG_TRACE)
1773     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1774
1775   vlib_get_buffers (vm, from, bufs, n_left_from);
1776   b = bufs;
1777   next = nexts;
1778
1779   while (n_left_from >= 6)
1780     {
1781       u8 not_batch = 0;
1782
1783       /* Prefetch next iteration. */
1784       {
1785         vlib_prefetch_buffer_header (b[4], LOAD);
1786         vlib_prefetch_buffer_header (b[5], LOAD);
1787
1788         clib_prefetch_load (b[4]->data);
1789         clib_prefetch_load (b[5]->data);
1790       }
1791
1792       error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1793
1794       ip[0] = vlib_buffer_get_current (b[0]);
1795       ip[1] = vlib_buffer_get_current (b[1]);
1796
1797       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1798       vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1799
1800       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1801       pt[1] = ip4_local_classify (b[1], ip[1], &next[1]);
1802
1803       not_batch = pt[0] ^ pt[1];
1804
1805       if (head_of_feature_arc == 0 || (pt[0] && not_batch == 0))
1806         goto skip_checks;
1807
1808       if (PREDICT_TRUE (not_batch == 0))
1809         {
1810           ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1811           ip4_local_check_src_x2 (b, ip, &last_check, error, is_receive_dpo);
1812         }
1813       else
1814         {
1815           if (!pt[0])
1816             {
1817               ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1818               ip4_local_check_src (b[0], ip[0], &last_check, &error[0],
1819                                    is_receive_dpo);
1820             }
1821           if (!pt[1])
1822             {
1823               ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1824               ip4_local_check_src (b[1], ip[1], &last_check, &error[1],
1825                                    is_receive_dpo);
1826             }
1827         }
1828
1829     skip_checks:
1830
1831       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1832                                     head_of_feature_arc);
1833       ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1834                                     head_of_feature_arc);
1835
1836       b += 2;
1837       next += 2;
1838       n_left_from -= 2;
1839     }
1840
1841   while (n_left_from > 0)
1842     {
1843       error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1844
1845       ip[0] = vlib_buffer_get_current (b[0]);
1846       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1847       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1848
1849       if (head_of_feature_arc == 0 || pt[0])
1850         goto skip_check;
1851
1852       ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1853       ip4_local_check_src (b[0], ip[0], &last_check, &error[0],
1854                            is_receive_dpo);
1855
1856     skip_check:
1857
1858       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1859                                     head_of_feature_arc);
1860
1861       b += 1;
1862       next += 1;
1863       n_left_from -= 1;
1864     }
1865
1866   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1867   return frame->n_vectors;
1868 }
1869
1870 VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1871                                vlib_frame_t * frame)
1872 {
1873   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */,
1874                            0 /* is_receive_dpo */);
1875 }
1876
1877 VLIB_REGISTER_NODE (ip4_local_node) =
1878 {
1879   .name = "ip4-local",
1880   .vector_size = sizeof (u32),
1881   .format_trace = format_ip4_forward_next_trace,
1882   .n_errors = IP4_N_ERROR,
1883   .error_counters = ip4_error_counters,
1884   .n_next_nodes = IP_LOCAL_N_NEXT,
1885   .next_nodes =
1886   {
1887     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1888     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1889     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1890     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1891     [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-local-full-reassembly",
1892   },
1893 };
1894
1895 VLIB_NODE_FN (ip4_receive_local_node)
1896 (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
1897 {
1898   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */,
1899                            1 /* is_receive_dpo */);
1900 }
1901
1902 VLIB_REGISTER_NODE (ip4_receive_local_node) = {
1903   .name = "ip4-receive",
1904   .vector_size = sizeof (u32),
1905   .format_trace = format_ip4_forward_next_trace,
1906   .sibling_of = "ip4-local"
1907 };
1908
1909 VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
1910                                           vlib_node_runtime_t * node,
1911                                           vlib_frame_t * frame)
1912 {
1913   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */,
1914                            0 /* is_receive_dpo */);
1915 }
1916
1917 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
1918   .name = "ip4-local-end-of-arc",
1919   .vector_size = sizeof (u32),
1920
1921   .format_trace = format_ip4_forward_next_trace,
1922   .sibling_of = "ip4-local",
1923 };
1924
1925 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1926   .arc_name = "ip4-local",
1927   .node_name = "ip4-local-end-of-arc",
1928   .runs_before = 0, /* not before any other features */
1929 };
1930
1931 #ifndef CLIB_MARCH_VARIANT
1932 void
1933 ip4_register_protocol (u32 protocol, u32 node_index)
1934 {
1935   vlib_main_t *vm = vlib_get_main ();
1936   ip4_main_t *im = &ip4_main;
1937   ip_lookup_main_t *lm = &im->lookup_main;
1938
1939   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1940   lm->local_next_by_ip_protocol[protocol] =
1941     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1942 }
1943
1944 void
1945 ip4_unregister_protocol (u32 protocol)
1946 {
1947   ip4_main_t *im = &ip4_main;
1948   ip_lookup_main_t *lm = &im->lookup_main;
1949
1950   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1951   lm->local_next_by_ip_protocol[protocol] = IP_LOCAL_NEXT_PUNT;
1952 }
1953 #endif
1954
1955 static clib_error_t *
1956 show_ip_local_command_fn (vlib_main_t * vm,
1957                           unformat_input_t * input, vlib_cli_command_t * cmd)
1958 {
1959   ip4_main_t *im = &ip4_main;
1960   ip_lookup_main_t *lm = &im->lookup_main;
1961   int i;
1962
1963   vlib_cli_output (vm, "Protocols handled by ip4_local");
1964   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1965     {
1966       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1967         {
1968           u32 node_index = vlib_get_node (vm,
1969                                           ip4_local_node.index)->
1970             next_nodes[lm->local_next_by_ip_protocol[i]];
1971           vlib_cli_output (vm, "%U: %U", format_ip_protocol, i,
1972                            format_vlib_node_name, vm, node_index);
1973         }
1974     }
1975   return 0;
1976 }
1977
1978
1979
1980 /*?
1981  * Display the set of protocols handled by the local IPv4 stack.
1982  *
1983  * @cliexpar
1984  * Example of how to display local protocol table:
1985  * @cliexstart{show ip local}
1986  * Protocols handled by ip4_local
1987  * 1
1988  * 17
1989  * 47
1990  * @cliexend
1991 ?*/
1992 /* *INDENT-OFF* */
1993 VLIB_CLI_COMMAND (show_ip_local, static) =
1994 {
1995   .path = "show ip local",
1996   .function = show_ip_local_command_fn,
1997   .short_help = "show ip local",
1998 };
1999 /* *INDENT-ON* */
2000
2001 typedef enum
2002 {
2003   IP4_REWRITE_NEXT_DROP,
2004   IP4_REWRITE_NEXT_ICMP_ERROR,
2005   IP4_REWRITE_NEXT_FRAGMENT,
2006   IP4_REWRITE_N_NEXT            /* Last */
2007 } ip4_rewrite_next_t;
2008
2009 /**
2010  * This bits of an IPv4 address to mask to construct a multicast
2011  * MAC address
2012  */
2013 #if CLIB_ARCH_IS_BIG_ENDIAN
2014 #define IP4_MCAST_ADDR_MASK 0x007fffff
2015 #else
2016 #define IP4_MCAST_ADDR_MASK 0xffff7f00
2017 #endif
2018
2019 always_inline void
2020 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
2021                u16 adj_packet_bytes, bool df, u16 * next,
2022                u8 is_midchain, u32 * error)
2023 {
2024   if (packet_len > adj_packet_bytes)
2025     {
2026       *error = IP4_ERROR_MTU_EXCEEDED;
2027       if (df)
2028         {
2029           icmp4_error_set_vnet_buffer
2030             (b, ICMP4_destination_unreachable,
2031              ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
2032              adj_packet_bytes);
2033           *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2034         }
2035       else
2036         {
2037           /* IP fragmentation */
2038           ip_frag_set_vnet_buffer (b, adj_packet_bytes,
2039                                    (is_midchain ?
2040                                     IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN :
2041                                     IP_FRAG_NEXT_IP_REWRITE), 0);
2042           *next = IP4_REWRITE_NEXT_FRAGMENT;
2043         }
2044     }
2045 }
2046
2047 /* increment TTL & update checksum.
2048    Works either endian, so no need for byte swap. */
2049 static_always_inline void
2050 ip4_ttl_inc (vlib_buffer_t * b, ip4_header_t * ip)
2051 {
2052   i32 ttl;
2053   u32 checksum;
2054   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2055     return;
2056
2057   ttl = ip->ttl;
2058
2059   checksum = ip->checksum - clib_host_to_net_u16 (0x0100);
2060   checksum += checksum >= 0xffff;
2061
2062   ip->checksum = checksum;
2063   ttl += 1;
2064   ip->ttl = ttl;
2065
2066   ASSERT (ip4_header_checksum_is_valid (ip) ||
2067           (vnet_buffer (b)->oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM) ||
2068           (vnet_buffer (b)->oflags & VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM));
2069 }
2070
2071 /* Decrement TTL & update checksum.
2072    Works either endian, so no need for byte swap. */
2073 static_always_inline void
2074 ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
2075                             u32 * error)
2076 {
2077   i32 ttl;
2078   u32 checksum;
2079   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2080     return;
2081
2082   ttl = ip->ttl;
2083
2084   /* Input node should have reject packets with ttl 0. */
2085   ASSERT (ip->ttl > 0);
2086
2087   checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
2088   checksum += checksum >= 0xffff;
2089
2090   ip->checksum = checksum;
2091   ttl -= 1;
2092   ip->ttl = ttl;
2093
2094   /*
2095    * If the ttl drops below 1 when forwarding, generate
2096    * an ICMP response.
2097    */
2098   if (PREDICT_FALSE (ttl <= 0))
2099     {
2100       *error = IP4_ERROR_TIME_EXPIRED;
2101       vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2102       icmp4_error_set_vnet_buffer (b, ICMP4_time_exceeded,
2103                                    ICMP4_time_exceeded_ttl_exceeded_in_transit,
2104                                    0);
2105       *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2106     }
2107
2108   /* Verify checksum. */
2109   ASSERT (ip4_header_checksum_is_valid (ip) ||
2110           (vnet_buffer (b)->oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM) ||
2111           (vnet_buffer (b)->oflags & VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM));
2112 }
2113
2114 always_inline uword
2115 ip4_rewrite_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
2116                     vlib_frame_t *frame, int do_counters, int is_midchain,
2117                     int is_mcast)
2118 {
2119   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2120   u32 *from = vlib_frame_vector_args (frame);
2121   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2122   u16 nexts[VLIB_FRAME_SIZE], *next;
2123   u32 n_left_from;
2124   vlib_node_runtime_t *error_node =
2125     vlib_node_get_runtime (vm, ip4_input_node.index);
2126
2127   n_left_from = frame->n_vectors;
2128   u32 thread_index = vm->thread_index;
2129
2130   vlib_get_buffers (vm, from, bufs, n_left_from);
2131   clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
2132
2133 #if (CLIB_N_PREFETCHES >= 8)
2134   if (n_left_from >= 6)
2135     {
2136       int i;
2137       for (i = 2; i < 6; i++)
2138         vlib_prefetch_buffer_header (bufs[i], LOAD);
2139     }
2140
2141   next = nexts;
2142   b = bufs;
2143   while (n_left_from >= 8)
2144     {
2145       const ip_adjacency_t *adj0, *adj1;
2146       ip4_header_t *ip0, *ip1;
2147       u32 rw_len0, error0, adj_index0;
2148       u32 rw_len1, error1, adj_index1;
2149       u32 tx_sw_if_index0, tx_sw_if_index1;
2150       u8 *p;
2151
2152       if (is_midchain)
2153         {
2154           vlib_prefetch_buffer_header (b[6], LOAD);
2155           vlib_prefetch_buffer_header (b[7], LOAD);
2156         }
2157
2158       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2159       adj_index1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
2160
2161       /*
2162        * pre-fetch the per-adjacency counters
2163        */
2164       if (do_counters)
2165         {
2166           vlib_prefetch_combined_counter (&adjacency_counters,
2167                                           thread_index, adj_index0);
2168           vlib_prefetch_combined_counter (&adjacency_counters,
2169                                           thread_index, adj_index1);
2170         }
2171
2172       ip0 = vlib_buffer_get_current (b[0]);
2173       ip1 = vlib_buffer_get_current (b[1]);
2174
2175       error0 = error1 = IP4_ERROR_NONE;
2176
2177       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2178       ip4_ttl_and_checksum_check (b[1], ip1, next + 1, &error1);
2179
2180       /* Rewrite packet header and updates lengths. */
2181       adj0 = adj_get (adj_index0);
2182       adj1 = adj_get (adj_index1);
2183
2184       /* Worth pipelining. No guarantee that adj0,1 are hot... */
2185       rw_len0 = adj0[0].rewrite_header.data_bytes;
2186       rw_len1 = adj1[0].rewrite_header.data_bytes;
2187       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2188       vnet_buffer (b[1])->ip.save_rewrite_length = rw_len1;
2189
2190       p = vlib_buffer_get_current (b[2]);
2191       clib_prefetch_store (p - CLIB_CACHE_LINE_BYTES);
2192       clib_prefetch_load (p);
2193
2194       p = vlib_buffer_get_current (b[3]);
2195       clib_prefetch_store (p - CLIB_CACHE_LINE_BYTES);
2196       clib_prefetch_load (p);
2197
2198       /* Check MTU of outgoing interface. */
2199       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2200       u16 ip1_len = clib_net_to_host_u16 (ip1->length);
2201
2202       if (b[0]->flags & VNET_BUFFER_F_GSO)
2203         ip0_len = gso_mtu_sz (b[0]);
2204       if (b[1]->flags & VNET_BUFFER_F_GSO)
2205         ip1_len = gso_mtu_sz (b[1]);
2206
2207       ip4_mtu_check (b[0], ip0_len,
2208                      adj0[0].rewrite_header.max_l3_packet_bytes,
2209                      ip0->flags_and_fragment_offset &
2210                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2211                      next + 0, is_midchain, &error0);
2212       ip4_mtu_check (b[1], ip1_len,
2213                      adj1[0].rewrite_header.max_l3_packet_bytes,
2214                      ip1->flags_and_fragment_offset &
2215                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2216                      next + 1, is_midchain, &error1);
2217
2218       if (is_mcast)
2219         {
2220           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2221                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2222                     IP4_ERROR_SAME_INTERFACE : error0);
2223           error1 = ((adj1[0].rewrite_header.sw_if_index ==
2224                      vnet_buffer (b[1])->sw_if_index[VLIB_RX]) ?
2225                     IP4_ERROR_SAME_INTERFACE : error1);
2226         }
2227
2228       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2229        * to see the IP header */
2230       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2231         {
2232           u32 next_index = adj0[0].rewrite_header.next_index;
2233           vlib_buffer_advance (b[0], -(word) rw_len0);
2234
2235           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2236           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2237
2238           if (PREDICT_FALSE
2239               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2240             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2241                                                 tx_sw_if_index0,
2242                                                 &next_index, b[0],
2243                                                 adj0->ia_cfg_index);
2244
2245           next[0] = next_index;
2246           if (is_midchain)
2247             vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2248                                         0 /* is_ip6 */ );
2249         }
2250       else
2251         {
2252           b[0]->error = error_node->errors[error0];
2253           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2254             ip4_ttl_inc (b[0], ip0);
2255         }
2256       if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2257         {
2258           u32 next_index = adj1[0].rewrite_header.next_index;
2259           vlib_buffer_advance (b[1], -(word) rw_len1);
2260
2261           tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2262           vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2263
2264           if (PREDICT_FALSE
2265               (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2266             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2267                                                 tx_sw_if_index1,
2268                                                 &next_index, b[1],
2269                                                 adj1->ia_cfg_index);
2270           next[1] = next_index;
2271           if (is_midchain)
2272             vnet_calc_checksums_inline (vm, b[1], 1 /* is_ip4 */ ,
2273                                         0 /* is_ip6 */ );
2274         }
2275       else
2276         {
2277           b[1]->error = error_node->errors[error1];
2278           if (error1 == IP4_ERROR_MTU_EXCEEDED)
2279             ip4_ttl_inc (b[1], ip1);
2280         }
2281
2282       if (is_midchain)
2283         /* Guess we are only writing on ipv4 header. */
2284         vnet_rewrite_two_headers (adj0[0], adj1[0],
2285                                   ip0, ip1, sizeof (ip4_header_t));
2286       else
2287         /* Guess we are only writing on simple Ethernet header. */
2288         vnet_rewrite_two_headers (adj0[0], adj1[0],
2289                                   ip0, ip1, sizeof (ethernet_header_t));
2290
2291       if (do_counters)
2292         {
2293           if (error0 == IP4_ERROR_NONE)
2294             vlib_increment_combined_counter
2295               (&adjacency_counters,
2296                thread_index,
2297                adj_index0, 1,
2298                vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2299
2300           if (error1 == IP4_ERROR_NONE)
2301             vlib_increment_combined_counter
2302               (&adjacency_counters,
2303                thread_index,
2304                adj_index1, 1,
2305                vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
2306         }
2307
2308       if (is_midchain)
2309         {
2310           if (error0 == IP4_ERROR_NONE)
2311             adj_midchain_fixup (vm, adj0, b[0], VNET_LINK_IP4);
2312           if (error1 == IP4_ERROR_NONE)
2313             adj_midchain_fixup (vm, adj1, b[1], VNET_LINK_IP4);
2314         }
2315
2316       if (is_mcast)
2317         {
2318           /* copy bytes from the IP address into the MAC rewrite */
2319           if (error0 == IP4_ERROR_NONE)
2320             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2321                                         adj0->rewrite_header.dst_mcast_offset,
2322                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2323           if (error1 == IP4_ERROR_NONE)
2324             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2325                                         adj1->rewrite_header.dst_mcast_offset,
2326                                         &ip1->dst_address.as_u32, (u8 *) ip1);
2327         }
2328
2329       next += 2;
2330       b += 2;
2331       n_left_from -= 2;
2332     }
2333 #elif (CLIB_N_PREFETCHES >= 4)
2334   next = nexts;
2335   b = bufs;
2336   while (n_left_from >= 1)
2337     {
2338       ip_adjacency_t *adj0;
2339       ip4_header_t *ip0;
2340       u32 rw_len0, error0, adj_index0;
2341       u32 tx_sw_if_index0;
2342       u8 *p;
2343
2344       /* Prefetch next iteration */
2345       if (PREDICT_TRUE (n_left_from >= 4))
2346         {
2347           ip_adjacency_t *adj2;
2348           u32 adj_index2;
2349
2350           vlib_prefetch_buffer_header (b[3], LOAD);
2351           vlib_prefetch_buffer_data (b[2], LOAD);
2352
2353           /* Prefetch adj->rewrite_header */
2354           adj_index2 = vnet_buffer (b[2])->ip.adj_index[VLIB_TX];
2355           adj2 = adj_get (adj_index2);
2356           p = (u8 *) adj2;
2357           CLIB_PREFETCH (p + CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES,
2358                          LOAD);
2359         }
2360
2361       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2362
2363       /*
2364        * Prefetch the per-adjacency counters
2365        */
2366       if (do_counters)
2367         {
2368           vlib_prefetch_combined_counter (&adjacency_counters,
2369                                           thread_index, adj_index0);
2370         }
2371
2372       ip0 = vlib_buffer_get_current (b[0]);
2373
2374       error0 = IP4_ERROR_NONE;
2375
2376       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2377
2378       /* Rewrite packet header and updates lengths. */
2379       adj0 = adj_get (adj_index0);
2380
2381       /* Rewrite header was prefetched. */
2382       rw_len0 = adj0[0].rewrite_header.data_bytes;
2383       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2384
2385       /* Check MTU of outgoing interface. */
2386       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2387
2388       if (b[0]->flags & VNET_BUFFER_F_GSO)
2389         ip0_len = gso_mtu_sz (b[0]);
2390
2391       ip4_mtu_check (b[0], ip0_len,
2392                      adj0[0].rewrite_header.max_l3_packet_bytes,
2393                      ip0->flags_and_fragment_offset &
2394                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2395                      next + 0, is_midchain, &error0);
2396
2397       if (is_mcast)
2398         {
2399           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2400                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2401                     IP4_ERROR_SAME_INTERFACE : error0);
2402         }
2403
2404       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2405        * to see the IP header */
2406       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2407         {
2408           u32 next_index = adj0[0].rewrite_header.next_index;
2409           vlib_buffer_advance (b[0], -(word) rw_len0);
2410           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2411           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2412
2413           if (PREDICT_FALSE
2414               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2415             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2416                                                 tx_sw_if_index0,
2417                                                 &next_index, b[0],
2418                                                 adj0->ia_cfg_index);
2419           next[0] = next_index;
2420
2421           if (is_midchain)
2422             {
2423               vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2424                                           0 /* is_ip6 */ );
2425
2426               /* Guess we are only writing on ipv4 header. */
2427               vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t));
2428             }
2429           else
2430             /* Guess we are only writing on simple Ethernet header. */
2431             vnet_rewrite_one_header (adj0[0], ip0,
2432                                      sizeof (ethernet_header_t));
2433
2434           /*
2435            * Bump the per-adjacency counters
2436            */
2437           if (do_counters)
2438             vlib_increment_combined_counter
2439               (&adjacency_counters,
2440                thread_index,
2441                adj_index0, 1, vlib_buffer_length_in_chain (vm,
2442                                                            b[0]) + rw_len0);
2443
2444           if (is_midchain)
2445             adj_midchain_fixup (vm, adj0, b[0], VNET_LINK_IP4);
2446
2447           if (is_mcast)
2448             /* copy bytes from the IP address into the MAC rewrite */
2449             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2450                                         adj0->rewrite_header.dst_mcast_offset,
2451                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2452         }
2453       else
2454         {
2455           b[0]->error = error_node->errors[error0];
2456           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2457             ip4_ttl_inc (b[0], ip0);
2458         }
2459
2460       next += 1;
2461       b += 1;
2462       n_left_from -= 1;
2463     }
2464 #endif
2465
2466   while (n_left_from > 0)
2467     {
2468       ip_adjacency_t *adj0;
2469       ip4_header_t *ip0;
2470       u32 rw_len0, adj_index0, error0;
2471       u32 tx_sw_if_index0;
2472
2473       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2474
2475       adj0 = adj_get (adj_index0);
2476
2477       if (do_counters)
2478         vlib_prefetch_combined_counter (&adjacency_counters,
2479                                         thread_index, adj_index0);
2480
2481       ip0 = vlib_buffer_get_current (b[0]);
2482
2483       error0 = IP4_ERROR_NONE;
2484
2485       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2486
2487
2488       /* Update packet buffer attributes/set output interface. */
2489       rw_len0 = adj0[0].rewrite_header.data_bytes;
2490       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2491
2492       /* Check MTU of outgoing interface. */
2493       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2494       if (b[0]->flags & VNET_BUFFER_F_GSO)
2495         ip0_len = gso_mtu_sz (b[0]);
2496
2497       ip4_mtu_check (b[0], ip0_len,
2498                      adj0[0].rewrite_header.max_l3_packet_bytes,
2499                      ip0->flags_and_fragment_offset &
2500                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2501                      next + 0, is_midchain, &error0);
2502
2503       if (is_mcast)
2504         {
2505           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2506                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2507                     IP4_ERROR_SAME_INTERFACE : error0);
2508         }
2509
2510       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2511        * to see the IP header */
2512       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2513         {
2514           u32 next_index = adj0[0].rewrite_header.next_index;
2515           vlib_buffer_advance (b[0], -(word) rw_len0);
2516           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2517           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2518
2519           if (PREDICT_FALSE
2520               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2521             vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
2522                                                 tx_sw_if_index0,
2523                                                 &next_index, b[0],
2524                                                 adj0->ia_cfg_index);
2525           next[0] = next_index;
2526
2527           if (is_midchain)
2528             {
2529               /* this acts on the packet that is about to be encapped */
2530               vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
2531                                           0 /* is_ip6 */ );
2532
2533               /* Guess we are only writing on ipv4 header. */
2534               vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t));
2535             }
2536           else
2537             /* Guess we are only writing on simple Ethernet header. */
2538             vnet_rewrite_one_header (adj0[0], ip0,
2539                                      sizeof (ethernet_header_t));
2540
2541           if (do_counters)
2542             vlib_increment_combined_counter
2543               (&adjacency_counters,
2544                thread_index, adj_index0, 1,
2545                vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2546
2547           if (is_midchain)
2548             adj_midchain_fixup (vm, adj0, b[0], VNET_LINK_IP4);
2549
2550           if (is_mcast)
2551             /* copy bytes from the IP address into the MAC rewrite */
2552             vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2553                                         adj0->rewrite_header.dst_mcast_offset,
2554                                         &ip0->dst_address.as_u32, (u8 *) ip0);
2555         }
2556       else
2557         {
2558           b[0]->error = error_node->errors[error0];
2559           /* undo the TTL decrement - we'll be back to do it again */
2560           if (error0 == IP4_ERROR_MTU_EXCEEDED)
2561             ip4_ttl_inc (b[0], ip0);
2562         }
2563
2564       next += 1;
2565       b += 1;
2566       n_left_from -= 1;
2567     }
2568
2569
2570   /* Need to do trace after rewrites to pick up new packet data. */
2571   if (node->flags & VLIB_NODE_FLAG_TRACE)
2572     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2573
2574   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2575   return frame->n_vectors;
2576 }
2577
2578 /** @brief IPv4 rewrite node.
2579     @node ip4-rewrite
2580
2581     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2582     header checksum, fetch the ip adjacency, check the outbound mtu,
2583     apply the adjacency rewrite, and send pkts to the adjacency
2584     rewrite header's rewrite_next_index.
2585
2586     @param vm vlib_main_t corresponding to the current thread
2587     @param node vlib_node_runtime_t
2588     @param frame vlib_frame_t whose contents should be dispatched
2589
2590     @par Graph mechanics: buffer metadata, next index usage
2591
2592     @em Uses:
2593     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2594         - the rewrite adjacency index
2595     - <code>adj->lookup_next_index</code>
2596         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2597           the packet will be dropped.
2598     - <code>adj->rewrite_header</code>
2599         - Rewrite string length, rewrite string, next_index
2600
2601     @em Sets:
2602     - <code>b->current_data, b->current_length</code>
2603         - Updated net of applying the rewrite string
2604
2605     <em>Next Indices:</em>
2606     - <code> adj->rewrite_header.next_index </code>
2607       or @c ip4-drop
2608 */
2609
2610 VLIB_NODE_FN (ip4_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2611                                  vlib_frame_t * frame)
2612 {
2613   if (adj_are_counters_enabled ())
2614     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2615   else
2616     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2617 }
2618
2619 VLIB_NODE_FN (ip4_rewrite_bcast_node) (vlib_main_t * vm,
2620                                        vlib_node_runtime_t * node,
2621                                        vlib_frame_t * frame)
2622 {
2623   if (adj_are_counters_enabled ())
2624     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2625   else
2626     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2627 }
2628
2629 VLIB_NODE_FN (ip4_midchain_node) (vlib_main_t * vm,
2630                                   vlib_node_runtime_t * node,
2631                                   vlib_frame_t * frame)
2632 {
2633   if (adj_are_counters_enabled ())
2634     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2635   else
2636     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2637 }
2638
2639 VLIB_NODE_FN (ip4_rewrite_mcast_node) (vlib_main_t * vm,
2640                                        vlib_node_runtime_t * node,
2641                                        vlib_frame_t * frame)
2642 {
2643   if (adj_are_counters_enabled ())
2644     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2645   else
2646     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2647 }
2648
2649 VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
2650                                         vlib_node_runtime_t * node,
2651                                         vlib_frame_t * frame)
2652 {
2653   if (adj_are_counters_enabled ())
2654     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2655   else
2656     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2657 }
2658
2659 /* *INDENT-OFF* */
2660 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2661   .name = "ip4-rewrite",
2662   .vector_size = sizeof (u32),
2663
2664   .format_trace = format_ip4_rewrite_trace,
2665
2666   .n_next_nodes = IP4_REWRITE_N_NEXT,
2667   .next_nodes = {
2668     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2669     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2670     [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2671   },
2672 };
2673
2674 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2675   .name = "ip4-rewrite-bcast",
2676   .vector_size = sizeof (u32),
2677
2678   .format_trace = format_ip4_rewrite_trace,
2679   .sibling_of = "ip4-rewrite",
2680 };
2681
2682 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2683   .name = "ip4-rewrite-mcast",
2684   .vector_size = sizeof (u32),
2685
2686   .format_trace = format_ip4_rewrite_trace,
2687   .sibling_of = "ip4-rewrite",
2688 };
2689
2690 VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
2691   .name = "ip4-mcast-midchain",
2692   .vector_size = sizeof (u32),
2693
2694   .format_trace = format_ip4_rewrite_trace,
2695   .sibling_of = "ip4-rewrite",
2696 };
2697
2698 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2699   .name = "ip4-midchain",
2700   .vector_size = sizeof (u32),
2701   .format_trace = format_ip4_rewrite_trace,
2702   .sibling_of = "ip4-rewrite",
2703 };
2704 /* *INDENT-ON */
2705
2706 static clib_error_t *
2707 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2708                              unformat_input_t * input,
2709                              vlib_cli_command_t * cmd)
2710 {
2711   int matched = 0;
2712   u32 table_id = 0;
2713   u32 flow_hash_config = 0;
2714   int rv;
2715
2716   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2717     {
2718       if (unformat (input, "table %d", &table_id))
2719         matched = 1;
2720 #define _(a, b, v)                                                            \
2721   else if (unformat (input, #a))                                              \
2722   {                                                                           \
2723     flow_hash_config |= v;                                                    \
2724     matched = 1;                                                              \
2725   }
2726       foreach_flow_hash_bit
2727 #undef _
2728         else
2729         break;
2730     }
2731
2732   if (matched == 0)
2733     return clib_error_return (0, "unknown input `%U'",
2734                               format_unformat_error, input);
2735
2736   rv = ip_flow_hash_set (AF_IP4, table_id, flow_hash_config);
2737   switch (rv)
2738     {
2739     case 0:
2740       break;
2741
2742     case VNET_API_ERROR_NO_SUCH_FIB:
2743       return clib_error_return (0, "no such FIB table %d", table_id);
2744
2745     default:
2746       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2747       break;
2748     }
2749
2750   return 0;
2751 }
2752
2753 /*?
2754  * Configure the set of IPv4 fields used by the flow hash.
2755  *
2756  * @cliexpar
2757  * Example of how to set the flow hash on a given table:
2758  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2759  * Example of display the configured flow hash:
2760  * @cliexstart{show ip fib}
2761  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2762  * 0.0.0.0/0
2763  *   unicast-ip4-chain
2764  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2765  *     [0] [@0]: dpo-drop ip6
2766  * 0.0.0.0/32
2767  *   unicast-ip4-chain
2768  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2769  *     [0] [@0]: dpo-drop ip6
2770  * 224.0.0.0/8
2771  *   unicast-ip4-chain
2772  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2773  *     [0] [@0]: dpo-drop ip6
2774  * 6.0.1.2/32
2775  *   unicast-ip4-chain
2776  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2777  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2778  * 7.0.0.1/32
2779  *   unicast-ip4-chain
2780  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
2781  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2782  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2783  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2784  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2785  * 240.0.0.0/8
2786  *   unicast-ip4-chain
2787  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
2788  *     [0] [@0]: dpo-drop ip6
2789  * 255.255.255.255/32
2790  *   unicast-ip4-chain
2791  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
2792  *     [0] [@0]: dpo-drop ip6
2793  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
2794  * 0.0.0.0/0
2795  *   unicast-ip4-chain
2796  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
2797  *     [0] [@0]: dpo-drop ip6
2798  * 0.0.0.0/32
2799  *   unicast-ip4-chain
2800  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
2801  *     [0] [@0]: dpo-drop ip6
2802  * 172.16.1.0/24
2803  *   unicast-ip4-chain
2804  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
2805  *     [0] [@4]: ipv4-glean: af_packet0
2806  * 172.16.1.1/32
2807  *   unicast-ip4-chain
2808  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
2809  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
2810  * 172.16.1.2/32
2811  *   unicast-ip4-chain
2812  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2813  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
2814  * 172.16.2.0/24
2815  *   unicast-ip4-chain
2816  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
2817  *     [0] [@4]: ipv4-glean: af_packet1
2818  * 172.16.2.1/32
2819  *   unicast-ip4-chain
2820  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
2821  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
2822  * 224.0.0.0/8
2823  *   unicast-ip4-chain
2824  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
2825  *     [0] [@0]: dpo-drop ip6
2826  * 240.0.0.0/8
2827  *   unicast-ip4-chain
2828  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
2829  *     [0] [@0]: dpo-drop ip6
2830  * 255.255.255.255/32
2831  *   unicast-ip4-chain
2832  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
2833  *     [0] [@0]: dpo-drop ip6
2834  * @cliexend
2835 ?*/
2836 /* *INDENT-OFF* */
2837 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
2838 {
2839   .path = "set ip flow-hash",
2840   .short_help =
2841   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
2842   .function = set_ip_flow_hash_command_fn,
2843 };
2844 /* *INDENT-ON* */
2845
2846 #ifndef CLIB_MARCH_VARIANT
2847 int
2848 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2849                              u32 table_index)
2850 {
2851   vnet_main_t *vnm = vnet_get_main ();
2852   vnet_interface_main_t *im = &vnm->interface_main;
2853   ip4_main_t *ipm = &ip4_main;
2854   ip_lookup_main_t *lm = &ipm->lookup_main;
2855   vnet_classify_main_t *cm = &vnet_classify_main;
2856   ip4_address_t *if_addr;
2857
2858   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2859     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2860
2861   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2862     return VNET_API_ERROR_NO_SUCH_ENTRY;
2863
2864   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2865   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
2866
2867   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
2868
2869   if (NULL != if_addr)
2870     {
2871       fib_prefix_t pfx = {
2872         .fp_len = 32,
2873         .fp_proto = FIB_PROTOCOL_IP4,
2874         .fp_addr.ip4 = *if_addr,
2875       };
2876       u32 fib_index;
2877
2878       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2879                                                        sw_if_index);
2880
2881
2882       if (table_index != (u32) ~ 0)
2883         {
2884           dpo_id_t dpo = DPO_INVALID;
2885
2886           dpo_set (&dpo,
2887                    DPO_CLASSIFY,
2888                    DPO_PROTO_IP4,
2889                    classify_dpo_create (DPO_PROTO_IP4, table_index));
2890
2891           fib_table_entry_special_dpo_add (fib_index,
2892                                            &pfx,
2893                                            FIB_SOURCE_CLASSIFY,
2894                                            FIB_ENTRY_FLAG_NONE, &dpo);
2895           dpo_reset (&dpo);
2896         }
2897       else
2898         {
2899           fib_table_entry_special_remove (fib_index,
2900                                           &pfx, FIB_SOURCE_CLASSIFY);
2901         }
2902     }
2903
2904   return 0;
2905 }
2906 #endif
2907
2908 static clib_error_t *
2909 set_ip_classify_command_fn (vlib_main_t * vm,
2910                             unformat_input_t * input,
2911                             vlib_cli_command_t * cmd)
2912 {
2913   u32 table_index = ~0;
2914   int table_index_set = 0;
2915   u32 sw_if_index = ~0;
2916   int rv;
2917
2918   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2919     {
2920       if (unformat (input, "table-index %d", &table_index))
2921         table_index_set = 1;
2922       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
2923                          vnet_get_main (), &sw_if_index))
2924         ;
2925       else
2926         break;
2927     }
2928
2929   if (table_index_set == 0)
2930     return clib_error_return (0, "classify table-index must be specified");
2931
2932   if (sw_if_index == ~0)
2933     return clib_error_return (0, "interface / subif must be specified");
2934
2935   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
2936
2937   switch (rv)
2938     {
2939     case 0:
2940       break;
2941
2942     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
2943       return clib_error_return (0, "No such interface");
2944
2945     case VNET_API_ERROR_NO_SUCH_ENTRY:
2946       return clib_error_return (0, "No such classifier table");
2947     }
2948   return 0;
2949 }
2950
2951 /*?
2952  * Assign a classification table to an interface. The classification
2953  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
2954  * commands. Once the table is create, use this command to filter packets
2955  * on an interface.
2956  *
2957  * @cliexpar
2958  * Example of how to assign a classification table to an interface:
2959  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
2960 ?*/
2961 /* *INDENT-OFF* */
2962 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
2963 {
2964     .path = "set ip classify",
2965     .short_help =
2966     "set ip classify intfc <interface> table-index <classify-idx>",
2967     .function = set_ip_classify_command_fn,
2968 };
2969 /* *INDENT-ON* */
2970
2971 /*
2972  * fd.io coding-style-patch-verification: ON
2973  *
2974  * Local Variables:
2975  * eval: (c-set-style "gnu")
2976  * End:
2977  */